diff mbox

[dpdk-dev,v4] net/kni: add KNI PMD

Message ID 20161130181228.25380-1-ferruh.yigit@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers show

Checks

Context Check Description
checkpatch/checkpatch success coding style OK

Commit Message

Ferruh Yigit Nov. 30, 2016, 6:12 p.m. UTC
Add KNI PMD which wraps librte_kni for ease of use.

KNI PMD can be used as any regular PMD to send / receive packets to the
Linux networking stack.

Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
---

v4:
* allow only single queue
* use driver.name as name

v3:
* rebase on top of latest master

v2:
* updated driver name eth_kni -> net_kni
---
 config/common_base                      |   1 +
 config/common_linuxapp                  |   1 +
 drivers/net/Makefile                    |   1 +
 drivers/net/kni/Makefile                |  63 +++++
 drivers/net/kni/rte_eth_kni.c           | 462 ++++++++++++++++++++++++++++++++
 drivers/net/kni/rte_pmd_kni_version.map |   4 +
 mk/rte.app.mk                           |  10 +-
 7 files changed, 537 insertions(+), 5 deletions(-)
 create mode 100644 drivers/net/kni/Makefile
 create mode 100644 drivers/net/kni/rte_eth_kni.c
 create mode 100644 drivers/net/kni/rte_pmd_kni_version.map

Comments

Yong Wang Dec. 12, 2016, 9:59 p.m. UTC | #1
> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Wednesday, November 30, 2016 10:12 AM
> To: dev@dpdk.org
> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; Yong Wang
> <yongwang@vmware.com>
> Subject: [PATCH v4] net/kni: add KNI PMD
> 
> Add KNI PMD which wraps librte_kni for ease of use.
> 
> KNI PMD can be used as any regular PMD to send / receive packets to the
> Linux networking stack.
> 
> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
> ---
> 
> v4:
> * allow only single queue
> * use driver.name as name
> 
> v3:
> * rebase on top of latest master
> 
> v2:
> * updated driver name eth_kni -> net_kni
> ---
>  config/common_base                      |   1 +
>  config/common_linuxapp                  |   1 +
>  drivers/net/Makefile                    |   1 +
>  drivers/net/kni/Makefile                |  63 +++++
>  drivers/net/kni/rte_eth_kni.c           | 462
> ++++++++++++++++++++++++++++++++
>  drivers/net/kni/rte_pmd_kni_version.map |   4 +
>  mk/rte.app.mk                           |  10 +-
>  7 files changed, 537 insertions(+), 5 deletions(-)
>  create mode 100644 drivers/net/kni/Makefile
>  create mode 100644 drivers/net/kni/rte_eth_kni.c
>  create mode 100644 drivers/net/kni/rte_pmd_kni_version.map
> 
> diff --git a/config/common_base b/config/common_base
> index 4bff83a..3385879 100644
> --- a/config/common_base
> +++ b/config/common_base
> @@ -543,6 +543,7 @@ CONFIG_RTE_PIPELINE_STATS_COLLECT=n
>  # Compile librte_kni
>  #
>  CONFIG_RTE_LIBRTE_KNI=n
> +CONFIG_RTE_LIBRTE_PMD_KNI=n
>  CONFIG_RTE_KNI_KMOD=n
>  CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
>  CONFIG_RTE_KNI_VHOST=n
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 2483dfa..2ecd510 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
>  CONFIG_RTE_EAL_VFIO=y
>  CONFIG_RTE_KNI_KMOD=y
>  CONFIG_RTE_LIBRTE_KNI=y
> +CONFIG_RTE_LIBRTE_PMD_KNI=y
>  CONFIG_RTE_LIBRTE_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index bc93230..c4771cd 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
>  DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
> diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
> new file mode 100644
> index 0000000..0b7cf91
> --- /dev/null
> +++ b/drivers/net/kni/Makefile
> @@ -0,0 +1,63 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
> +#
> +#   Redistribution and use in source and binary forms, with or without
> +#   modification, are permitted provided that the following conditions
> +#   are met:
> +#
> +#     * Redistributions of source code must retain the above copyright
> +#       notice, this list of conditions and the following disclaimer.
> +#     * Redistributions in binary form must reproduce the above copyright
> +#       notice, this list of conditions and the following disclaimer in
> +#       the documentation and/or other materials provided with the
> +#       distribution.
> +#     * Neither the name of Intel Corporation nor the names of its
> +#       contributors may be used to endorse or promote products derived
> +#       from this software without specific prior written permission.
> +#
> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_kni.a
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +LDLIBS += -lpthread
> +
> +EXPORT_MAP := rte_pmd_kni_version.map
> +
> +LIBABIVER := 1
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
> +
> +#
> +# Export include files
> +#
> +SYMLINK-y-include +=
> +
> +# this lib depends upon:
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
> new file mode 100644
> index 0000000..6c4df96
> --- /dev/null
> +++ b/drivers/net/kni/rte_eth_kni.c
> @@ -0,0 +1,462 @@
> +/*-
> + *   BSD LICENSE
> + *
> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> + *   All rights reserved.
> + *
> + *   Redistribution and use in source and binary forms, with or without
> + *   modification, are permitted provided that the following conditions
> + *   are met:
> + *
> + *     * Redistributions of source code must retain the above copyright
> + *       notice, this list of conditions and the following disclaimer.
> + *     * Redistributions in binary form must reproduce the above copyright
> + *       notice, this list of conditions and the following disclaimer in
> + *       the documentation and/or other materials provided with the
> + *       distribution.
> + *     * Neither the name of Intel Corporation nor the names of its
> + *       contributors may be used to endorse or promote products derived
> + *       from this software without specific prior written permission.
> + *
> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> CONTRIBUTORS
> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
> NOT
> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> FITNESS FOR
> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> COPYRIGHT
> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> INCIDENTAL,
> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> NOT
> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
> OF USE,
> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> TORT
> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE
> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + */
> +
> +#include <fcntl.h>
> +#include <pthread.h>
> +#include <unistd.h>
> +
> +#include <rte_ethdev.h>
> +#include <rte_kni.h>
> +#include <rte_malloc.h>
> +#include <rte_vdev.h>
> +
> +/* Only single queue supported */
> +#define KNI_MAX_QUEUE_PER_PORT 1
> +
> +#define MAX_PACKET_SZ 2048
> +#define MAX_KNI_PORTS 8
> +
> +struct pmd_queue_stats {
> +	uint64_t pkts;
> +	uint64_t bytes;
> +	uint64_t err_pkts;
> +};
> +
> +struct pmd_queue {
> +	struct pmd_internals *internals;
> +	struct rte_mempool *mb_pool;
> +
> +	struct pmd_queue_stats rx;
> +	struct pmd_queue_stats tx;
> +};
> +
> +struct pmd_internals {
> +	struct rte_kni *kni;
> +	int is_kni_started;
> +
> +	pthread_t thread;
> +	int stop_thread;
> +
> +	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
> +	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
> +};
> +
> +static struct ether_addr eth_addr;
> +static struct rte_eth_link pmd_link = {
> +		.link_speed = ETH_SPEED_NUM_10G,
> +		.link_duplex = ETH_LINK_FULL_DUPLEX,
> +		.link_status = 0
> +};
> +static int is_kni_initialized;
> +
> +static uint16_t
> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> +{
> +	struct pmd_queue *kni_q = q;
> +	struct rte_kni *kni = kni_q->internals->kni;
> +	uint16_t nb_pkts;
> +
> +	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
> +
> +	kni_q->rx.pkts += nb_pkts;
> +	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
> +
> +	return nb_pkts;
> +}
> +
> +static uint16_t
> +eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> +{
> +	struct pmd_queue *kni_q = q;
> +	struct rte_kni *kni = kni_q->internals->kni;
> +	uint16_t nb_pkts;
> +
> +	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
> +
> +	kni_q->tx.pkts += nb_pkts;
> +	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
> +
> +	return nb_pkts;
> +}
> +
> +static void *
> +kni_handle_request(void *param)
> +{
> +	struct pmd_internals *internals = param;
> +#define MS 1000
> +
> +	while (!internals->stop_thread) {
> +		rte_kni_handle_request(internals->kni);
> +		usleep(500 * MS);
> +	}
> +
> +	return param;
> +}
> +

Do we really need a thread to handle request by default? I know there are apps that handle request their own way and having a separate thread could add synchronization problems.  Can we at least add an option to disable this?

> +static int
> +eth_kni_start(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	uint16_t port_id = dev->data->port_id;
> +	struct rte_mempool *mb_pool;
> +	struct rte_kni_conf conf;
> +	const char *name = dev->data->name + 4; /* remove net_ */
> +
> +	snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
> +	conf.force_bind = 0;
> +	conf.group_id = port_id;
> +	conf.mbuf_size = MAX_PACKET_SZ;
> +	mb_pool = internals->rx_queues[0].mb_pool;
> +
> +	internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
> +	if (internals->kni == NULL) {
> +		RTE_LOG(ERR, PMD,
> +			"Fail to create kni for port: %d\n", port_id);
> +		return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_dev_start(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	int ret;
> +
> +	if (internals->is_kni_started == 0) {
> +		ret = eth_kni_start(dev);
> +		if (ret)
> +			return -1;
> +		internals->is_kni_started = 1;
> +	}
> +

In case is_kni_started is 1 already,  shouldn't we return directly instead of proceeding?

> +	ret = pthread_create(&internals->thread, NULL, kni_handle_request,
> +			internals);
> +	if (ret) {
> +		RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
> +		return -1;
> +	}
> +
> +	dev->data->dev_link.link_status = 1;
> +
> +	return 0;
> +}
> +
> +static void
> +eth_kni_dev_stop(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	int ret;
> +
> +	internals->stop_thread = 1;
> +
> +	ret = pthread_cancel(internals->thread);
> +	if (ret)
> +		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
> +
> +	ret = pthread_join(internals->thread, NULL);
> +	if (ret)
> +		RTE_LOG(ERR, PMD, "Can't join the thread\n");
> +
> +	internals->stop_thread = 0;
> +
> +	dev->data->dev_link.link_status = 0;
> +}
> +
> +static int
> +eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static void
> +eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info
> *dev_info)
> +{
> +	struct rte_eth_dev_data *data = dev->data;
> +
> +	dev_info->driver_name = data->drv_name;
> +	dev_info->max_mac_addrs = 1;
> +	dev_info->max_rx_pktlen = (uint32_t)-1;
> +	dev_info->max_rx_queues = KNI_MAX_QUEUE_PER_PORT;
> +	dev_info->max_tx_queues = KNI_MAX_QUEUE_PER_PORT;
> +	dev_info->min_rx_bufsize = 0;
> +	dev_info->pci_dev = NULL;
> +}
> +
> +static int
> +eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
> +		uint16_t rx_queue_id,
> +		uint16_t nb_rx_desc __rte_unused,
> +		unsigned int socket_id __rte_unused,
> +		const struct rte_eth_rxconf *rx_conf __rte_unused,
> +		struct rte_mempool *mb_pool)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_queue *q;
> +
> +	q = &internals->rx_queues[rx_queue_id];
> +	q->internals = internals;
> +	q->mb_pool = mb_pool;
> +
> +	dev->data->rx_queues[rx_queue_id] = q;
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
> +		uint16_t tx_queue_id,
> +		uint16_t nb_tx_desc __rte_unused,
> +		unsigned int socket_id __rte_unused,
> +		const struct rte_eth_txconf *tx_conf __rte_unused)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +	struct pmd_queue *q;
> +
> +	q = &internals->tx_queues[tx_queue_id];
> +	q->internals = internals;
> +
> +	dev->data->tx_queues[tx_queue_id] = q;
> +
> +	return 0;
> +}
> +
> +static void
> +eth_kni_queue_release(void *q __rte_unused)
> +{
> +}
> +
> +static int
> +eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
> +		int wait_to_complete __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static void
> +eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> +{
> +	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
> +	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
> +	struct rte_eth_dev_data *data = dev->data;
> +	unsigned long tx_packets_err_total = 0;
> +	unsigned int i, num_stats;
> +	struct pmd_queue *q;
> +
> +	num_stats = RTE_MIN((unsigned
> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
> +			data->nb_rx_queues);
> +	for (i = 0; i < num_stats; i++) {
> +		q = data->rx_queues[i];
> +		stats->q_ipackets[i] = q->rx.pkts;
> +		stats->q_ibytes[i] = q->rx.bytes;
> +		rx_packets_total += stats->q_ipackets[i];
> +		rx_bytes_total += stats->q_ibytes[i];
> +	}
> +
> +	num_stats = RTE_MIN((unsigned
> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
> +			data->nb_tx_queues);
> +	for (i = 0; i < num_stats; i++) {
> +		q = data->tx_queues[i];
> +		stats->q_opackets[i] = q->tx.pkts;
> +		stats->q_obytes[i] = q->tx.bytes;
> +		stats->q_errors[i] = q->tx.err_pkts;
> +		tx_packets_total += stats->q_opackets[i];
> +		tx_bytes_total += stats->q_obytes[i];
> +		tx_packets_err_total += stats->q_errors[i];
> +	}
> +
> +	stats->ipackets = rx_packets_total;
> +	stats->ibytes = rx_bytes_total;
> +	stats->opackets = tx_packets_total;
> +	stats->obytes = tx_bytes_total;
> +	stats->oerrors = tx_packets_err_total;
> +}
> +
> +static void
> +eth_kni_stats_reset(struct rte_eth_dev *dev)
> +{
> +	struct rte_eth_dev_data *data = dev->data;
> +	struct pmd_queue *q;
> +	unsigned int i;
> +
> +	for (i = 0; i < data->nb_rx_queues; i++) {
> +		q = data->rx_queues[i];
> +		q->rx.pkts = 0;
> +		q->rx.bytes = 0;
> +	}
> +	for (i = 0; i < data->nb_tx_queues; i++) {
> +		q = data->tx_queues[i];
> +		q->tx.pkts = 0;
> +		q->tx.bytes = 0;
> +		q->tx.err_pkts = 0;
> +	}
> +}
> +
> +static const struct eth_dev_ops eth_kni_ops = {
> +	.dev_start = eth_kni_dev_start,
> +	.dev_stop = eth_kni_dev_stop,
> +	.dev_configure = eth_kni_dev_configure,
> +	.dev_infos_get = eth_kni_dev_info,
> +	.rx_queue_setup = eth_kni_rx_queue_setup,
> +	.tx_queue_setup = eth_kni_tx_queue_setup,
> +	.rx_queue_release = eth_kni_queue_release,
> +	.tx_queue_release = eth_kni_queue_release,
> +	.link_update = eth_kni_link_update,
> +	.stats_get = eth_kni_stats_get,
> +	.stats_reset = eth_kni_stats_reset,
> +};
> +
> +static struct rte_vdev_driver eth_kni_drv;
> +
> +static struct rte_eth_dev *
> +eth_kni_create(const char *name, unsigned int numa_node)
> +{
> +	struct pmd_internals *internals = NULL;
> +	struct rte_eth_dev_data *data;
> +	struct rte_eth_dev *eth_dev;
> +
> +	RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
> +			numa_node);
> +
> +	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
> +	if (data == NULL)
> +		goto error;
> +
> +	internals = rte_zmalloc_socket(name, sizeof(*internals), 0,
> numa_node);
> +	if (internals == NULL)
> +		goto error;
> +
> +	/* reserve an ethdev entry */
> +	eth_dev = rte_eth_dev_allocate(name);
> +	if (eth_dev == NULL)
> +		goto error;
> +
> +	data->dev_private = internals;
> +	data->port_id = eth_dev->data->port_id;
> +	memmove(data->name, eth_dev->data->name, sizeof(data-
> >name));
> +	data->nb_rx_queues = 1;
> +	data->nb_tx_queues = 1;
> +	data->dev_link = pmd_link;
> +	data->mac_addrs = &eth_addr;
> +
> +	eth_dev->data = data;
> +	eth_dev->dev_ops = &eth_kni_ops;
> +	eth_dev->driver = NULL;
> +
> +	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
> +	data->kdrv = RTE_KDRV_NONE;
> +	data->drv_name = eth_kni_drv.driver.name;
> +	data->numa_node = numa_node;
> +
> +	return eth_dev;
> +
> +error:
> +	rte_free(data);
> +	rte_free(internals);
> +
> +	return NULL;
> +}
> +
> +static int
> +kni_init(void)
> +{
> +	if (is_kni_initialized == 0)
> +		rte_kni_init(MAX_KNI_PORTS);
> +
> +	is_kni_initialized += 1;
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_probe(const char *name, const char *params __rte_unused)
> +{
> +	struct rte_eth_dev *eth_dev;
> +	int ret;
> +
> +	RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
> +
> +	ret = kni_init();
> +	if (ret < 0)
> +		/* Not return error to prevent panic in rte_eal_init() */
> +		return 0;

If we don't return error here, the application that needs to add KNI ports eventually will fail.  If it's a fail-stop situation, isn't it better to return error where the it happened?
 
> +	eth_dev = eth_kni_create(name, rte_socket_id());
> +	if (eth_dev == NULL)
> +		return -1;
> +
> +	eth_dev->rx_pkt_burst = eth_kni_rx;
> +	eth_dev->tx_pkt_burst = eth_kni_tx;
> +
> +	return 0;
> +}
> +
> +static int
> +eth_kni_remove(const char *name)
> +{
> +	struct rte_eth_dev *eth_dev;
> +	struct pmd_internals *internals;
> +
> +	RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
> +
> +	/* find the ethdev entry */
> +	eth_dev = rte_eth_dev_allocated(name);
> +	if (eth_dev == NULL)
> +		return -1;
> +
> +	eth_kni_dev_stop(eth_dev);
> +
> +	if (eth_dev->data) {
> +		internals = eth_dev->data->dev_private;
> +		rte_kni_release(internals->kni);
> +
> +		rte_free(internals);
> +	}
> +	rte_free(eth_dev->data);
> +
> +	rte_eth_dev_release_port(eth_dev);
> +
> +	is_kni_initialized -= 1;
> +	if (is_kni_initialized == 0)
> +		rte_kni_close();
> +
> +	return 0;
> +}
> +
> +static struct rte_vdev_driver eth_kni_drv = {
> +	.probe = eth_kni_probe,
> +	.remove = eth_kni_remove,
> +};
> +
> +RTE_PMD_REGISTER_VDEV(net_kni, eth_kni_drv);
> diff --git a/drivers/net/kni/rte_pmd_kni_version.map
> b/drivers/net/kni/rte_pmd_kni_version.map
> new file mode 100644
> index 0000000..31eca32
> --- /dev/null
> +++ b/drivers/net/kni/rte_pmd_kni_version.map
> @@ -0,0 +1,4 @@
> +DPDK_17.02 {
> +
> +	local: *;
> +};
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index f75f0e2..af02816 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -59,11 +59,6 @@ _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
>  #
>  # Order is important: from higher level to lower level
>  #
> -
> -ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> -_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
> -endif
> -
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
> @@ -84,6 +79,10 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -
> lrte_power
> 
>  _LDLIBS-y += --whole-archive
> 
> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
> +endif
> +
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
> @@ -115,6 +114,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       += -
> lrte_pmd_enic
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -
> libverbs
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -
> libverbs
>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -lgxio
> --
> 2.9.3
Ferruh Yigit Dec. 14, 2016, 3:59 p.m. UTC | #2
On 12/12/2016 9:59 PM, Yong Wang wrote:
>> -----Original Message-----
>> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
>> Sent: Wednesday, November 30, 2016 10:12 AM
>> To: dev@dpdk.org
>> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; Yong Wang
>> <yongwang@vmware.com>
>> Subject: [PATCH v4] net/kni: add KNI PMD
>>
>> Add KNI PMD which wraps librte_kni for ease of use.
>>
>> KNI PMD can be used as any regular PMD to send / receive packets to the
>> Linux networking stack.
>>
>> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
>> ---
>>
>> v4:
>> * allow only single queue
>> * use driver.name as name
>>
>> v3:
>> * rebase on top of latest master
>>
>> v2:
>> * updated driver name eth_kni -> net_kni
>> ---
>>  config/common_base                      |   1 +
>>  config/common_linuxapp                  |   1 +
>>  drivers/net/Makefile                    |   1 +
>>  drivers/net/kni/Makefile                |  63 +++++
>>  drivers/net/kni/rte_eth_kni.c           | 462
>> ++++++++++++++++++++++++++++++++
>>  drivers/net/kni/rte_pmd_kni_version.map |   4 +
>>  mk/rte.app.mk                           |  10 +-
>>  7 files changed, 537 insertions(+), 5 deletions(-)
>>  create mode 100644 drivers/net/kni/Makefile
>>  create mode 100644 drivers/net/kni/rte_eth_kni.c
>>  create mode 100644 drivers/net/kni/rte_pmd_kni_version.map
>>
>> diff --git a/config/common_base b/config/common_base
>> index 4bff83a..3385879 100644
>> --- a/config/common_base
>> +++ b/config/common_base
>> @@ -543,6 +543,7 @@ CONFIG_RTE_PIPELINE_STATS_COLLECT=n
>>  # Compile librte_kni
>>  #
>>  CONFIG_RTE_LIBRTE_KNI=n
>> +CONFIG_RTE_LIBRTE_PMD_KNI=n
>>  CONFIG_RTE_KNI_KMOD=n
>>  CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
>>  CONFIG_RTE_KNI_VHOST=n
>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>> index 2483dfa..2ecd510 100644
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
>>  CONFIG_RTE_EAL_VFIO=y
>>  CONFIG_RTE_KNI_KMOD=y
>>  CONFIG_RTE_LIBRTE_KNI=y
>> +CONFIG_RTE_LIBRTE_PMD_KNI=y
>>  CONFIG_RTE_LIBRTE_VHOST=y
>>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
>>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
>> index bc93230..c4771cd 100644
>> --- a/drivers/net/Makefile
>> +++ b/drivers/net/Makefile
>> @@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
>>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
>>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
>>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
>> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
>>  DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
>> diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
>> new file mode 100644
>> index 0000000..0b7cf91
>> --- /dev/null
>> +++ b/drivers/net/kni/Makefile
>> @@ -0,0 +1,63 @@
>> +#   BSD LICENSE
>> +#
>> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
>> +#
>> +#   Redistribution and use in source and binary forms, with or without
>> +#   modification, are permitted provided that the following conditions
>> +#   are met:
>> +#
>> +#     * Redistributions of source code must retain the above copyright
>> +#       notice, this list of conditions and the following disclaimer.
>> +#     * Redistributions in binary form must reproduce the above copyright
>> +#       notice, this list of conditions and the following disclaimer in
>> +#       the documentation and/or other materials provided with the
>> +#       distribution.
>> +#     * Neither the name of Intel Corporation nor the names of its
>> +#       contributors may be used to endorse or promote products derived
>> +#       from this software without specific prior written permission.
>> +#
>> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>> CONTRIBUTORS
>> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
>> NOT
>> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>> FITNESS FOR
>> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>> COPYRIGHT
>> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>> INCIDENTAL,
>> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
>> NOT
>> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
>> OF USE,
>> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>> AND ON ANY
>> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>> TORT
>> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>> THE USE
>> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>> DAMAGE.
>> +
>> +include $(RTE_SDK)/mk/rte.vars.mk
>> +
>> +#
>> +# library name
>> +#
>> +LIB = librte_pmd_kni.a
>> +
>> +CFLAGS += -O3
>> +CFLAGS += $(WERROR_FLAGS)
>> +LDLIBS += -lpthread
>> +
>> +EXPORT_MAP := rte_pmd_kni_version.map
>> +
>> +LIBABIVER := 1
>> +
>> +#
>> +# all source are stored in SRCS-y
>> +#
>> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
>> +
>> +#
>> +# Export include files
>> +#
>> +SYMLINK-y-include +=
>> +
>> +# this lib depends upon:
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
>> +
>> +include $(RTE_SDK)/mk/rte.lib.mk
>> diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
>> new file mode 100644
>> index 0000000..6c4df96
>> --- /dev/null
>> +++ b/drivers/net/kni/rte_eth_kni.c
>> @@ -0,0 +1,462 @@
>> +/*-
>> + *   BSD LICENSE
>> + *
>> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
>> + *   All rights reserved.
>> + *
>> + *   Redistribution and use in source and binary forms, with or without
>> + *   modification, are permitted provided that the following conditions
>> + *   are met:
>> + *
>> + *     * Redistributions of source code must retain the above copyright
>> + *       notice, this list of conditions and the following disclaimer.
>> + *     * Redistributions in binary form must reproduce the above copyright
>> + *       notice, this list of conditions and the following disclaimer in
>> + *       the documentation and/or other materials provided with the
>> + *       distribution.
>> + *     * Neither the name of Intel Corporation nor the names of its
>> + *       contributors may be used to endorse or promote products derived
>> + *       from this software without specific prior written permission.
>> + *
>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>> CONTRIBUTORS
>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
>> NOT
>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>> FITNESS FOR
>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>> COPYRIGHT
>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>> INCIDENTAL,
>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
>> NOT
>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
>> OF USE,
>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>> AND ON ANY
>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>> TORT
>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
>> THE USE
>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>> DAMAGE.
>> + */
>> +
>> +#include <fcntl.h>
>> +#include <pthread.h>
>> +#include <unistd.h>
>> +
>> +#include <rte_ethdev.h>
>> +#include <rte_kni.h>
>> +#include <rte_malloc.h>
>> +#include <rte_vdev.h>
>> +
>> +/* Only single queue supported */
>> +#define KNI_MAX_QUEUE_PER_PORT 1
>> +
>> +#define MAX_PACKET_SZ 2048
>> +#define MAX_KNI_PORTS 8
>> +
>> +struct pmd_queue_stats {
>> +	uint64_t pkts;
>> +	uint64_t bytes;
>> +	uint64_t err_pkts;
>> +};
>> +
>> +struct pmd_queue {
>> +	struct pmd_internals *internals;
>> +	struct rte_mempool *mb_pool;
>> +
>> +	struct pmd_queue_stats rx;
>> +	struct pmd_queue_stats tx;
>> +};
>> +
>> +struct pmd_internals {
>> +	struct rte_kni *kni;
>> +	int is_kni_started;
>> +
>> +	pthread_t thread;
>> +	int stop_thread;
>> +
>> +	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
>> +	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
>> +};
>> +
>> +static struct ether_addr eth_addr;
>> +static struct rte_eth_link pmd_link = {
>> +		.link_speed = ETH_SPEED_NUM_10G,
>> +		.link_duplex = ETH_LINK_FULL_DUPLEX,
>> +		.link_status = 0
>> +};
>> +static int is_kni_initialized;
>> +
>> +static uint16_t
>> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>> +{
>> +	struct pmd_queue *kni_q = q;
>> +	struct rte_kni *kni = kni_q->internals->kni;
>> +	uint16_t nb_pkts;
>> +
>> +	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
>> +
>> +	kni_q->rx.pkts += nb_pkts;
>> +	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
>> +
>> +	return nb_pkts;
>> +}
>> +
>> +static uint16_t
>> +eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>> +{
>> +	struct pmd_queue *kni_q = q;
>> +	struct rte_kni *kni = kni_q->internals->kni;
>> +	uint16_t nb_pkts;
>> +
>> +	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
>> +
>> +	kni_q->tx.pkts += nb_pkts;
>> +	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
>> +
>> +	return nb_pkts;
>> +}
>> +
>> +static void *
>> +kni_handle_request(void *param)
>> +{
>> +	struct pmd_internals *internals = param;
>> +#define MS 1000
>> +
>> +	while (!internals->stop_thread) {
>> +		rte_kni_handle_request(internals->kni);
>> +		usleep(500 * MS);
>> +	}
>> +
>> +	return param;
>> +}
>> +
> 
> Do we really need a thread to handle request by default? I know there are apps that handle request their own way and having a separate thread could add synchronization problems.  Can we at least add an option to disable this?

I didn't think about there can be a use case that requires own request
handling.

But, kni requests should be handled to make kni interface run properly,
and to handle interface "kni" handler (internals->kni) required, which
this PMD doesn't expose.

So, just disabling this thread won't work on its own.

A solution can be found, like callback registraion, or get_handler API,
but if an application has custom request handling, perhaps it may prefer
to use kni library directly instead of this wrapper, since wrapper
already doesn't expose all kni features.

> 
>> +static int
>> +eth_kni_start(struct rte_eth_dev *dev)
>> +{
>> +	struct pmd_internals *internals = dev->data->dev_private;
>> +	uint16_t port_id = dev->data->port_id;
>> +	struct rte_mempool *mb_pool;
>> +	struct rte_kni_conf conf;
>> +	const char *name = dev->data->name + 4; /* remove net_ */
>> +
>> +	snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
>> +	conf.force_bind = 0;
>> +	conf.group_id = port_id;
>> +	conf.mbuf_size = MAX_PACKET_SZ;
>> +	mb_pool = internals->rx_queues[0].mb_pool;
>> +
>> +	internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
>> +	if (internals->kni == NULL) {
>> +		RTE_LOG(ERR, PMD,
>> +			"Fail to create kni for port: %d\n", port_id);
>> +		return -1;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int
>> +eth_kni_dev_start(struct rte_eth_dev *dev)
>> +{
>> +	struct pmd_internals *internals = dev->data->dev_private;
>> +	int ret;
>> +
>> +	if (internals->is_kni_started == 0) {
>> +		ret = eth_kni_start(dev);
>> +		if (ret)
>> +			return -1;
>> +		internals->is_kni_started = 1;
>> +	}
>> +
> 
> In case is_kni_started is 1 already,  shouldn't we return directly instead of proceeding?

"is_kni_started" is just to protect "eth_kni_start()", as you can see it
doesn't have a counterpart in eth_kni_dev_stop(). This flag is to be
sure "eth_kni_start()" called only once during PMD life cycle.

The check you mentioned already done, start() / stop() functions already
balanced by APIs calling these functions.

> 
>> +	ret = pthread_create(&internals->thread, NULL, kni_handle_request,
>> +			internals);
>> +	if (ret) {
>> +		RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
>> +		return -1;
>> +	}
>> +
>> +	dev->data->dev_link.link_status = 1;
>> +
>> +	return 0;
>> +}
>> +
>> +static void
>> +eth_kni_dev_stop(struct rte_eth_dev *dev)
>> +{
>> +	struct pmd_internals *internals = dev->data->dev_private;
>> +	int ret;
>> +
>> +	internals->stop_thread = 1;
>> +
>> +	ret = pthread_cancel(internals->thread);
>> +	if (ret)
>> +		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
>> +
>> +	ret = pthread_join(internals->thread, NULL);
>> +	if (ret)
>> +		RTE_LOG(ERR, PMD, "Can't join the thread\n");
>> +
>> +	internals->stop_thread = 0;
>> +
>> +	dev->data->dev_link.link_status = 0;
>> +}
>> +
>> +static int
>> +eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
>> +{
>> +	return 0;
>> +}
>> +
>> +static void
>> +eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info
>> *dev_info)
>> +{
>> +	struct rte_eth_dev_data *data = dev->data;
>> +
>> +	dev_info->driver_name = data->drv_name;
>> +	dev_info->max_mac_addrs = 1;
>> +	dev_info->max_rx_pktlen = (uint32_t)-1;
>> +	dev_info->max_rx_queues = KNI_MAX_QUEUE_PER_PORT;
>> +	dev_info->max_tx_queues = KNI_MAX_QUEUE_PER_PORT;
>> +	dev_info->min_rx_bufsize = 0;
>> +	dev_info->pci_dev = NULL;
>> +}
>> +
>> +static int
>> +eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
>> +		uint16_t rx_queue_id,
>> +		uint16_t nb_rx_desc __rte_unused,
>> +		unsigned int socket_id __rte_unused,
>> +		const struct rte_eth_rxconf *rx_conf __rte_unused,
>> +		struct rte_mempool *mb_pool)
>> +{
>> +	struct pmd_internals *internals = dev->data->dev_private;
>> +	struct pmd_queue *q;
>> +
>> +	q = &internals->rx_queues[rx_queue_id];
>> +	q->internals = internals;
>> +	q->mb_pool = mb_pool;
>> +
>> +	dev->data->rx_queues[rx_queue_id] = q;
>> +
>> +	return 0;
>> +}
>> +
>> +static int
>> +eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
>> +		uint16_t tx_queue_id,
>> +		uint16_t nb_tx_desc __rte_unused,
>> +		unsigned int socket_id __rte_unused,
>> +		const struct rte_eth_txconf *tx_conf __rte_unused)
>> +{
>> +	struct pmd_internals *internals = dev->data->dev_private;
>> +	struct pmd_queue *q;
>> +
>> +	q = &internals->tx_queues[tx_queue_id];
>> +	q->internals = internals;
>> +
>> +	dev->data->tx_queues[tx_queue_id] = q;
>> +
>> +	return 0;
>> +}
>> +
>> +static void
>> +eth_kni_queue_release(void *q __rte_unused)
>> +{
>> +}
>> +
>> +static int
>> +eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
>> +		int wait_to_complete __rte_unused)
>> +{
>> +	return 0;
>> +}
>> +
>> +static void
>> +eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>> +{
>> +	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
>> +	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
>> +	struct rte_eth_dev_data *data = dev->data;
>> +	unsigned long tx_packets_err_total = 0;
>> +	unsigned int i, num_stats;
>> +	struct pmd_queue *q;
>> +
>> +	num_stats = RTE_MIN((unsigned
>> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
>> +			data->nb_rx_queues);
>> +	for (i = 0; i < num_stats; i++) {
>> +		q = data->rx_queues[i];
>> +		stats->q_ipackets[i] = q->rx.pkts;
>> +		stats->q_ibytes[i] = q->rx.bytes;
>> +		rx_packets_total += stats->q_ipackets[i];
>> +		rx_bytes_total += stats->q_ibytes[i];
>> +	}
>> +
>> +	num_stats = RTE_MIN((unsigned
>> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
>> +			data->nb_tx_queues);
>> +	for (i = 0; i < num_stats; i++) {
>> +		q = data->tx_queues[i];
>> +		stats->q_opackets[i] = q->tx.pkts;
>> +		stats->q_obytes[i] = q->tx.bytes;
>> +		stats->q_errors[i] = q->tx.err_pkts;
>> +		tx_packets_total += stats->q_opackets[i];
>> +		tx_bytes_total += stats->q_obytes[i];
>> +		tx_packets_err_total += stats->q_errors[i];
>> +	}
>> +
>> +	stats->ipackets = rx_packets_total;
>> +	stats->ibytes = rx_bytes_total;
>> +	stats->opackets = tx_packets_total;
>> +	stats->obytes = tx_bytes_total;
>> +	stats->oerrors = tx_packets_err_total;
>> +}
>> +
>> +static void
>> +eth_kni_stats_reset(struct rte_eth_dev *dev)
>> +{
>> +	struct rte_eth_dev_data *data = dev->data;
>> +	struct pmd_queue *q;
>> +	unsigned int i;
>> +
>> +	for (i = 0; i < data->nb_rx_queues; i++) {
>> +		q = data->rx_queues[i];
>> +		q->rx.pkts = 0;
>> +		q->rx.bytes = 0;
>> +	}
>> +	for (i = 0; i < data->nb_tx_queues; i++) {
>> +		q = data->tx_queues[i];
>> +		q->tx.pkts = 0;
>> +		q->tx.bytes = 0;
>> +		q->tx.err_pkts = 0;
>> +	}
>> +}
>> +
>> +static const struct eth_dev_ops eth_kni_ops = {
>> +	.dev_start = eth_kni_dev_start,
>> +	.dev_stop = eth_kni_dev_stop,
>> +	.dev_configure = eth_kni_dev_configure,
>> +	.dev_infos_get = eth_kni_dev_info,
>> +	.rx_queue_setup = eth_kni_rx_queue_setup,
>> +	.tx_queue_setup = eth_kni_tx_queue_setup,
>> +	.rx_queue_release = eth_kni_queue_release,
>> +	.tx_queue_release = eth_kni_queue_release,
>> +	.link_update = eth_kni_link_update,
>> +	.stats_get = eth_kni_stats_get,
>> +	.stats_reset = eth_kni_stats_reset,
>> +};
>> +
>> +static struct rte_vdev_driver eth_kni_drv;
>> +
>> +static struct rte_eth_dev *
>> +eth_kni_create(const char *name, unsigned int numa_node)
>> +{
>> +	struct pmd_internals *internals = NULL;
>> +	struct rte_eth_dev_data *data;
>> +	struct rte_eth_dev *eth_dev;
>> +
>> +	RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
>> +			numa_node);
>> +
>> +	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
>> +	if (data == NULL)
>> +		goto error;
>> +
>> +	internals = rte_zmalloc_socket(name, sizeof(*internals), 0,
>> numa_node);
>> +	if (internals == NULL)
>> +		goto error;
>> +
>> +	/* reserve an ethdev entry */
>> +	eth_dev = rte_eth_dev_allocate(name);
>> +	if (eth_dev == NULL)
>> +		goto error;
>> +
>> +	data->dev_private = internals;
>> +	data->port_id = eth_dev->data->port_id;
>> +	memmove(data->name, eth_dev->data->name, sizeof(data-
>>> name));
>> +	data->nb_rx_queues = 1;
>> +	data->nb_tx_queues = 1;
>> +	data->dev_link = pmd_link;
>> +	data->mac_addrs = &eth_addr;
>> +
>> +	eth_dev->data = data;
>> +	eth_dev->dev_ops = &eth_kni_ops;
>> +	eth_dev->driver = NULL;
>> +
>> +	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
>> +	data->kdrv = RTE_KDRV_NONE;
>> +	data->drv_name = eth_kni_drv.driver.name;
>> +	data->numa_node = numa_node;
>> +
>> +	return eth_dev;
>> +
>> +error:
>> +	rte_free(data);
>> +	rte_free(internals);
>> +
>> +	return NULL;
>> +}
>> +
>> +static int
>> +kni_init(void)
>> +{
>> +	if (is_kni_initialized == 0)
>> +		rte_kni_init(MAX_KNI_PORTS);
>> +
>> +	is_kni_initialized += 1;
>> +
>> +	return 0;
>> +}
>> +
>> +static int
>> +eth_kni_probe(const char *name, const char *params __rte_unused)
>> +{
>> +	struct rte_eth_dev *eth_dev;
>> +	int ret;
>> +
>> +	RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
>> +
>> +	ret = kni_init();
>> +	if (ret < 0)
>> +		/* Not return error to prevent panic in rte_eal_init() */
>> +		return 0;
> 
> If we don't return error here, the application that needs to add KNI ports eventually will fail.  If it's a fail-stop situation, isn't it better to return error where the it happened?

I am not sure this is fail-stop situation, but instead this gives a
chance to applicaton for a graceful exit.

If an error value returned here, it will lead to a rte_panic() and
application terminated abnormally!

But if we return a success at this point, since no ethernet device
created, there is no handler in application to use, which also means no
KNI interface created.
Application can check number of ports and recognize KNI port is missing,
app may chose to terminate or not, also it prefers to terminate, can do
it properly.

>  
>> +	eth_dev = eth_kni_create(name, rte_socket_id());
>> +	if (eth_dev == NULL)
>> +		return -1;
>> +
>> +	eth_dev->rx_pkt_burst = eth_kni_rx;
>> +	eth_dev->tx_pkt_burst = eth_kni_tx;
>> +
>> +	return 0;
>> +}
>> +
>> +static int
>> +eth_kni_remove(const char *name)
>> +{
>> +	struct rte_eth_dev *eth_dev;
>> +	struct pmd_internals *internals;
>> +
>> +	RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
>> +
>> +	/* find the ethdev entry */
>> +	eth_dev = rte_eth_dev_allocated(name);
>> +	if (eth_dev == NULL)
>> +		return -1;
>> +
>> +	eth_kni_dev_stop(eth_dev);
>> +
>> +	if (eth_dev->data) {
>> +		internals = eth_dev->data->dev_private;
>> +		rte_kni_release(internals->kni);
>> +
>> +		rte_free(internals);
>> +	}
>> +	rte_free(eth_dev->data);
>> +
>> +	rte_eth_dev_release_port(eth_dev);
>> +
>> +	is_kni_initialized -= 1;
>> +	if (is_kni_initialized == 0)
>> +		rte_kni_close();
>> +
>> +	return 0;
>> +}
>> +
>> +static struct rte_vdev_driver eth_kni_drv = {
>> +	.probe = eth_kni_probe,
>> +	.remove = eth_kni_remove,
>> +};
>> +
>> +RTE_PMD_REGISTER_VDEV(net_kni, eth_kni_drv);
>> diff --git a/drivers/net/kni/rte_pmd_kni_version.map
>> b/drivers/net/kni/rte_pmd_kni_version.map
>> new file mode 100644
>> index 0000000..31eca32
>> --- /dev/null
>> +++ b/drivers/net/kni/rte_pmd_kni_version.map
>> @@ -0,0 +1,4 @@
>> +DPDK_17.02 {
>> +
>> +	local: *;
>> +};
>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>> index f75f0e2..af02816 100644
>> --- a/mk/rte.app.mk
>> +++ b/mk/rte.app.mk
>> @@ -59,11 +59,6 @@ _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
>>  #
>>  # Order is important: from higher level to lower level
>>  #
>> -
>> -ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
>> -_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
>> -endif
>> -
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
>> @@ -84,6 +79,10 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -
>> lrte_power
>>
>>  _LDLIBS-y += --whole-archive
>>
>> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
>> +endif
>> +
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
>> @@ -115,6 +114,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       += -
>> lrte_pmd_enic
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -
>> libverbs
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -
>> libverbs
>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -lgxio
>> --
>> 2.9.3
>
Yong Wang Dec. 14, 2016, 7:25 p.m. UTC | #3
> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Wednesday, December 14, 2016 8:00 AM
> To: Yong Wang <yongwang@vmware.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4] net/kni: add KNI PMD
> 
> On 12/12/2016 9:59 PM, Yong Wang wrote:
> >> -----Original Message-----
> >> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> >> Sent: Wednesday, November 30, 2016 10:12 AM
> >> To: dev@dpdk.org
> >> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; Yong Wang
> >> <yongwang@vmware.com>
> >> Subject: [PATCH v4] net/kni: add KNI PMD
> >>
> >> Add KNI PMD which wraps librte_kni for ease of use.
> >>
> >> KNI PMD can be used as any regular PMD to send / receive packets to the
> >> Linux networking stack.
> >>
> >> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
> >> ---
> >>
> >> v4:
> >> * allow only single queue
> >> * use driver.name as name
> >>
> >> v3:
> >> * rebase on top of latest master
> >>
> >> v2:
> >> * updated driver name eth_kni -> net_kni
> >> ---
> >>  config/common_base                      |   1 +
> >>  config/common_linuxapp                  |   1 +
> >>  drivers/net/Makefile                    |   1 +
> >>  drivers/net/kni/Makefile                |  63 +++++
> >>  drivers/net/kni/rte_eth_kni.c           | 462
> >> ++++++++++++++++++++++++++++++++
> >>  drivers/net/kni/rte_pmd_kni_version.map |   4 +
> >>  mk/rte.app.mk                           |  10 +-
> >>  7 files changed, 537 insertions(+), 5 deletions(-)
> >>  create mode 100644 drivers/net/kni/Makefile
> >>  create mode 100644 drivers/net/kni/rte_eth_kni.c
> >>  create mode 100644 drivers/net/kni/rte_pmd_kni_version.map
> >>
> >> diff --git a/config/common_base b/config/common_base
> >> index 4bff83a..3385879 100644
> >> --- a/config/common_base
> >> +++ b/config/common_base
> >> @@ -543,6 +543,7 @@ CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> >>  # Compile librte_kni
> >>  #
> >>  CONFIG_RTE_LIBRTE_KNI=n
> >> +CONFIG_RTE_LIBRTE_PMD_KNI=n
> >>  CONFIG_RTE_KNI_KMOD=n
> >>  CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
> >>  CONFIG_RTE_KNI_VHOST=n
> >> diff --git a/config/common_linuxapp b/config/common_linuxapp
> >> index 2483dfa..2ecd510 100644
> >> --- a/config/common_linuxapp
> >> +++ b/config/common_linuxapp
> >> @@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
> >>  CONFIG_RTE_EAL_VFIO=y
> >>  CONFIG_RTE_KNI_KMOD=y
> >>  CONFIG_RTE_LIBRTE_KNI=y
> >> +CONFIG_RTE_LIBRTE_PMD_KNI=y
> >>  CONFIG_RTE_LIBRTE_VHOST=y
> >>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
> >>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
> >> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> >> index bc93230..c4771cd 100644
> >> --- a/drivers/net/Makefile
> >> +++ b/drivers/net/Makefile
> >> @@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
> >>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
> >>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
> >>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
> >> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
> >>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
> >>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
> >>  DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
> >> diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
> >> new file mode 100644
> >> index 0000000..0b7cf91
> >> --- /dev/null
> >> +++ b/drivers/net/kni/Makefile
> >> @@ -0,0 +1,63 @@
> >> +#   BSD LICENSE
> >> +#
> >> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
> >> +#
> >> +#   Redistribution and use in source and binary forms, with or without
> >> +#   modification, are permitted provided that the following conditions
> >> +#   are met:
> >> +#
> >> +#     * Redistributions of source code must retain the above copyright
> >> +#       notice, this list of conditions and the following disclaimer.
> >> +#     * Redistributions in binary form must reproduce the above copyright
> >> +#       notice, this list of conditions and the following disclaimer in
> >> +#       the documentation and/or other materials provided with the
> >> +#       distribution.
> >> +#     * Neither the name of Intel Corporation nor the names of its
> >> +#       contributors may be used to endorse or promote products derived
> >> +#       from this software without specific prior written permission.
> >> +#
> >> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> >> CONTRIBUTORS
> >> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
> BUT
> >> NOT
> >> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> >> FITNESS FOR
> >> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> >> COPYRIGHT
> >> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> >> INCIDENTAL,
> >> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT
> >> NOT
> >> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS
> >> OF USE,
> >> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> >> AND ON ANY
> >> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> >> TORT
> >> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF
> >> THE USE
> >> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> >> DAMAGE.
> >> +
> >> +include $(RTE_SDK)/mk/rte.vars.mk
> >> +
> >> +#
> >> +# library name
> >> +#
> >> +LIB = librte_pmd_kni.a
> >> +
> >> +CFLAGS += -O3
> >> +CFLAGS += $(WERROR_FLAGS)
> >> +LDLIBS += -lpthread
> >> +
> >> +EXPORT_MAP := rte_pmd_kni_version.map
> >> +
> >> +LIBABIVER := 1
> >> +
> >> +#
> >> +# all source are stored in SRCS-y
> >> +#
> >> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
> >> +
> >> +#
> >> +# Export include files
> >> +#
> >> +SYMLINK-y-include +=
> >> +
> >> +# this lib depends upon:
> >> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
> >> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
> >> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
> >> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
> >> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
> >> +
> >> +include $(RTE_SDK)/mk/rte.lib.mk
> >> diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
> >> new file mode 100644
> >> index 0000000..6c4df96
> >> --- /dev/null
> >> +++ b/drivers/net/kni/rte_eth_kni.c
> >> @@ -0,0 +1,462 @@
> >> +/*-
> >> + *   BSD LICENSE
> >> + *
> >> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> >> + *   All rights reserved.
> >> + *
> >> + *   Redistribution and use in source and binary forms, with or without
> >> + *   modification, are permitted provided that the following conditions
> >> + *   are met:
> >> + *
> >> + *     * Redistributions of source code must retain the above copyright
> >> + *       notice, this list of conditions and the following disclaimer.
> >> + *     * Redistributions in binary form must reproduce the above copyright
> >> + *       notice, this list of conditions and the following disclaimer in
> >> + *       the documentation and/or other materials provided with the
> >> + *       distribution.
> >> + *     * Neither the name of Intel Corporation nor the names of its
> >> + *       contributors may be used to endorse or promote products derived
> >> + *       from this software without specific prior written permission.
> >> + *
> >> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> >> CONTRIBUTORS
> >> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
> BUT
> >> NOT
> >> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> >> FITNESS FOR
> >> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> >> COPYRIGHT
> >> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> >> INCIDENTAL,
> >> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> BUT
> >> NOT
> >> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS
> >> OF USE,
> >> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> >> AND ON ANY
> >> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
> >> TORT
> >> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
> OF
> >> THE USE
> >> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> >> DAMAGE.
> >> + */
> >> +
> >> +#include <fcntl.h>
> >> +#include <pthread.h>
> >> +#include <unistd.h>
> >> +
> >> +#include <rte_ethdev.h>
> >> +#include <rte_kni.h>
> >> +#include <rte_malloc.h>
> >> +#include <rte_vdev.h>
> >> +
> >> +/* Only single queue supported */
> >> +#define KNI_MAX_QUEUE_PER_PORT 1
> >> +
> >> +#define MAX_PACKET_SZ 2048
> >> +#define MAX_KNI_PORTS 8
> >> +
> >> +struct pmd_queue_stats {
> >> +	uint64_t pkts;
> >> +	uint64_t bytes;
> >> +	uint64_t err_pkts;
> >> +};
> >> +
> >> +struct pmd_queue {
> >> +	struct pmd_internals *internals;
> >> +	struct rte_mempool *mb_pool;
> >> +
> >> +	struct pmd_queue_stats rx;
> >> +	struct pmd_queue_stats tx;
> >> +};
> >> +
> >> +struct pmd_internals {
> >> +	struct rte_kni *kni;
> >> +	int is_kni_started;
> >> +
> >> +	pthread_t thread;
> >> +	int stop_thread;
> >> +
> >> +	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
> >> +	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
> >> +};
> >> +
> >> +static struct ether_addr eth_addr;
> >> +static struct rte_eth_link pmd_link = {
> >> +		.link_speed = ETH_SPEED_NUM_10G,
> >> +		.link_duplex = ETH_LINK_FULL_DUPLEX,
> >> +		.link_status = 0
> >> +};
> >> +static int is_kni_initialized;
> >> +
> >> +static uint16_t
> >> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> >> +{
> >> +	struct pmd_queue *kni_q = q;
> >> +	struct rte_kni *kni = kni_q->internals->kni;
> >> +	uint16_t nb_pkts;
> >> +
> >> +	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
> >> +
> >> +	kni_q->rx.pkts += nb_pkts;
> >> +	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
> >> +
> >> +	return nb_pkts;
> >> +}
> >> +
> >> +static uint16_t
> >> +eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> >> +{
> >> +	struct pmd_queue *kni_q = q;
> >> +	struct rte_kni *kni = kni_q->internals->kni;
> >> +	uint16_t nb_pkts;
> >> +
> >> +	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
> >> +
> >> +	kni_q->tx.pkts += nb_pkts;
> >> +	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
> >> +
> >> +	return nb_pkts;
> >> +}
> >> +
> >> +static void *
> >> +kni_handle_request(void *param)
> >> +{
> >> +	struct pmd_internals *internals = param;
> >> +#define MS 1000
> >> +
> >> +	while (!internals->stop_thread) {
> >> +		rte_kni_handle_request(internals->kni);
> >> +		usleep(500 * MS);
> >> +	}
> >> +
> >> +	return param;
> >> +}
> >> +
> >
> > Do we really need a thread to handle request by default? I know there are
> apps that handle request their own way and having a separate thread could
> add synchronization problems.  Can we at least add an option to disable this?
> 
> I didn't think about there can be a use case that requires own request
> handling.
> 
> But, kni requests should be handled to make kni interface run properly,
> and to handle interface "kni" handler (internals->kni) required, which
> this PMD doesn't expose.
> 
> So, just disabling this thread won't work on its own.

I understand that and what I am asking is a way to at least disable this without having to make code changes for applications that have their own way of handling KNI request and the callback mentioned below sounds good to me.  I am fine with adding this capability with this commit or in a separate commit after you have this commit checked in.
 
> A solution can be found, like callback registraion, or get_handler API,
> but if an application has custom request handling, perhaps it may prefer
> to use kni library directly instead of this wrapper, since wrapper
> already doesn't expose all kni features.

I think one of the motivation of having KNI pmd is that it's abstracted the same way as other physical or virtual devices.  I think it makes sense to achieve  feature parity with the KNI library as much as possible.  What's currently supported in KNI library but missing in KNI PMD and any specific reason they are not supported?

> >
> >> +static int
> >> +eth_kni_start(struct rte_eth_dev *dev)
> >> +{
> >> +	struct pmd_internals *internals = dev->data->dev_private;
> >> +	uint16_t port_id = dev->data->port_id;
> >> +	struct rte_mempool *mb_pool;
> >> +	struct rte_kni_conf conf;
> >> +	const char *name = dev->data->name + 4; /* remove net_ */
> >> +
> >> +	snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
> >> +	conf.force_bind = 0;
> >> +	conf.group_id = port_id;
> >> +	conf.mbuf_size = MAX_PACKET_SZ;
> >> +	mb_pool = internals->rx_queues[0].mb_pool;
> >> +
> >> +	internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
> >> +	if (internals->kni == NULL) {
> >> +		RTE_LOG(ERR, PMD,
> >> +			"Fail to create kni for port: %d\n", port_id);
> >> +		return -1;
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int
> >> +eth_kni_dev_start(struct rte_eth_dev *dev)
> >> +{
> >> +	struct pmd_internals *internals = dev->data->dev_private;
> >> +	int ret;
> >> +
> >> +	if (internals->is_kni_started == 0) {
> >> +		ret = eth_kni_start(dev);
> >> +		if (ret)
> >> +			return -1;
> >> +		internals->is_kni_started = 1;
> >> +	}
> >> +
> >
> > In case is_kni_started is 1 already,  shouldn't we return directly instead of
> proceeding?
> 
> "is_kni_started" is just to protect "eth_kni_start()", as you can see it
> doesn't have a counterpart in eth_kni_dev_stop(). This flag is to be
> sure "eth_kni_start()" called only once during PMD life cycle.
> 
> The check you mentioned already done, start() / stop() functions already
> balanced by APIs calling these functions.

What about KNI request handing thread then?  Is it safe to have multiple threads calling into rte_kni_handle_request()? My understanding is that this is not safe as kni_fifo is not multi-thread safe.  It's also a bit wasteful to create multiple threads here.

> >
> >> +	ret = pthread_create(&internals->thread, NULL, kni_handle_request,
> >> +			internals);
> >> +	if (ret) {
> >> +		RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
> >> +		return -1;
> >> +	}
> >> +
> >> +	dev->data->dev_link.link_status = 1;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static void
> >> +eth_kni_dev_stop(struct rte_eth_dev *dev)
> >> +{
> >> +	struct pmd_internals *internals = dev->data->dev_private;
> >> +	int ret;
> >> +
> >> +	internals->stop_thread = 1;
> >> +
> >> +	ret = pthread_cancel(internals->thread);
> >> +	if (ret)
> >> +		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
> >> +
> >> +	ret = pthread_join(internals->thread, NULL);
> >> +	if (ret)
> >> +		RTE_LOG(ERR, PMD, "Can't join the thread\n");
> >> +
> >> +	internals->stop_thread = 0;
> >> +
> >> +	dev->data->dev_link.link_status = 0;
> >> +}
> >> +
> >> +static int
> >> +eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
> >> +{
> >> +	return 0;
> >> +}
> >> +
> >> +static void
> >> +eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info
> >> *dev_info)
> >> +{
> >> +	struct rte_eth_dev_data *data = dev->data;
> >> +
> >> +	dev_info->driver_name = data->drv_name;
> >> +	dev_info->max_mac_addrs = 1;
> >> +	dev_info->max_rx_pktlen = (uint32_t)-1;
> >> +	dev_info->max_rx_queues = KNI_MAX_QUEUE_PER_PORT;
> >> +	dev_info->max_tx_queues = KNI_MAX_QUEUE_PER_PORT;
> >> +	dev_info->min_rx_bufsize = 0;
> >> +	dev_info->pci_dev = NULL;
> >> +}
> >> +
> >> +static int
> >> +eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
> >> +		uint16_t rx_queue_id,
> >> +		uint16_t nb_rx_desc __rte_unused,
> >> +		unsigned int socket_id __rte_unused,
> >> +		const struct rte_eth_rxconf *rx_conf __rte_unused,
> >> +		struct rte_mempool *mb_pool)
> >> +{
> >> +	struct pmd_internals *internals = dev->data->dev_private;
> >> +	struct pmd_queue *q;
> >> +
> >> +	q = &internals->rx_queues[rx_queue_id];
> >> +	q->internals = internals;
> >> +	q->mb_pool = mb_pool;
> >> +
> >> +	dev->data->rx_queues[rx_queue_id] = q;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int
> >> +eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
> >> +		uint16_t tx_queue_id,
> >> +		uint16_t nb_tx_desc __rte_unused,
> >> +		unsigned int socket_id __rte_unused,
> >> +		const struct rte_eth_txconf *tx_conf __rte_unused)
> >> +{
> >> +	struct pmd_internals *internals = dev->data->dev_private;
> >> +	struct pmd_queue *q;
> >> +
> >> +	q = &internals->tx_queues[tx_queue_id];
> >> +	q->internals = internals;
> >> +
> >> +	dev->data->tx_queues[tx_queue_id] = q;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static void
> >> +eth_kni_queue_release(void *q __rte_unused)
> >> +{
> >> +}
> >> +
> >> +static int
> >> +eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
> >> +		int wait_to_complete __rte_unused)
> >> +{
> >> +	return 0;
> >> +}
> >> +
> >> +static void
> >> +eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
> >> +{
> >> +	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
> >> +	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
> >> +	struct rte_eth_dev_data *data = dev->data;
> >> +	unsigned long tx_packets_err_total = 0;
> >> +	unsigned int i, num_stats;
> >> +	struct pmd_queue *q;
> >> +
> >> +	num_stats = RTE_MIN((unsigned
> >> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
> >> +			data->nb_rx_queues);
> >> +	for (i = 0; i < num_stats; i++) {
> >> +		q = data->rx_queues[i];
> >> +		stats->q_ipackets[i] = q->rx.pkts;
> >> +		stats->q_ibytes[i] = q->rx.bytes;
> >> +		rx_packets_total += stats->q_ipackets[i];
> >> +		rx_bytes_total += stats->q_ibytes[i];
> >> +	}
> >> +
> >> +	num_stats = RTE_MIN((unsigned
> >> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
> >> +			data->nb_tx_queues);
> >> +	for (i = 0; i < num_stats; i++) {
> >> +		q = data->tx_queues[i];
> >> +		stats->q_opackets[i] = q->tx.pkts;
> >> +		stats->q_obytes[i] = q->tx.bytes;
> >> +		stats->q_errors[i] = q->tx.err_pkts;
> >> +		tx_packets_total += stats->q_opackets[i];
> >> +		tx_bytes_total += stats->q_obytes[i];
> >> +		tx_packets_err_total += stats->q_errors[i];
> >> +	}
> >> +
> >> +	stats->ipackets = rx_packets_total;
> >> +	stats->ibytes = rx_bytes_total;
> >> +	stats->opackets = tx_packets_total;
> >> +	stats->obytes = tx_bytes_total;
> >> +	stats->oerrors = tx_packets_err_total;
> >> +}
> >> +
> >> +static void
> >> +eth_kni_stats_reset(struct rte_eth_dev *dev)
> >> +{
> >> +	struct rte_eth_dev_data *data = dev->data;
> >> +	struct pmd_queue *q;
> >> +	unsigned int i;
> >> +
> >> +	for (i = 0; i < data->nb_rx_queues; i++) {
> >> +		q = data->rx_queues[i];
> >> +		q->rx.pkts = 0;
> >> +		q->rx.bytes = 0;
> >> +	}
> >> +	for (i = 0; i < data->nb_tx_queues; i++) {
> >> +		q = data->tx_queues[i];
> >> +		q->tx.pkts = 0;
> >> +		q->tx.bytes = 0;
> >> +		q->tx.err_pkts = 0;
> >> +	}
> >> +}
> >> +
> >> +static const struct eth_dev_ops eth_kni_ops = {
> >> +	.dev_start = eth_kni_dev_start,
> >> +	.dev_stop = eth_kni_dev_stop,
> >> +	.dev_configure = eth_kni_dev_configure,
> >> +	.dev_infos_get = eth_kni_dev_info,
> >> +	.rx_queue_setup = eth_kni_rx_queue_setup,
> >> +	.tx_queue_setup = eth_kni_tx_queue_setup,
> >> +	.rx_queue_release = eth_kni_queue_release,
> >> +	.tx_queue_release = eth_kni_queue_release,
> >> +	.link_update = eth_kni_link_update,
> >> +	.stats_get = eth_kni_stats_get,
> >> +	.stats_reset = eth_kni_stats_reset,
> >> +};
> >> +
> >> +static struct rte_vdev_driver eth_kni_drv;
> >> +
> >> +static struct rte_eth_dev *
> >> +eth_kni_create(const char *name, unsigned int numa_node)
> >> +{
> >> +	struct pmd_internals *internals = NULL;
> >> +	struct rte_eth_dev_data *data;
> >> +	struct rte_eth_dev *eth_dev;
> >> +
> >> +	RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
> >> +			numa_node);
> >> +
> >> +	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
> >> +	if (data == NULL)
> >> +		goto error;
> >> +
> >> +	internals = rte_zmalloc_socket(name, sizeof(*internals), 0,
> >> numa_node);
> >> +	if (internals == NULL)
> >> +		goto error;
> >> +
> >> +	/* reserve an ethdev entry */
> >> +	eth_dev = rte_eth_dev_allocate(name);
> >> +	if (eth_dev == NULL)
> >> +		goto error;
> >> +
> >> +	data->dev_private = internals;
> >> +	data->port_id = eth_dev->data->port_id;
> >> +	memmove(data->name, eth_dev->data->name, sizeof(data-
> >>> name));
> >> +	data->nb_rx_queues = 1;
> >> +	data->nb_tx_queues = 1;
> >> +	data->dev_link = pmd_link;
> >> +	data->mac_addrs = &eth_addr;
> >> +
> >> +	eth_dev->data = data;
> >> +	eth_dev->dev_ops = &eth_kni_ops;
> >> +	eth_dev->driver = NULL;
> >> +
> >> +	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
> >> +	data->kdrv = RTE_KDRV_NONE;
> >> +	data->drv_name = eth_kni_drv.driver.name;
> >> +	data->numa_node = numa_node;
> >> +
> >> +	return eth_dev;
> >> +
> >> +error:
> >> +	rte_free(data);
> >> +	rte_free(internals);
> >> +
> >> +	return NULL;
> >> +}
> >> +
> >> +static int
> >> +kni_init(void)
> >> +{
> >> +	if (is_kni_initialized == 0)
> >> +		rte_kni_init(MAX_KNI_PORTS);
> >> +
> >> +	is_kni_initialized += 1;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int
> >> +eth_kni_probe(const char *name, const char *params __rte_unused)
> >> +{
> >> +	struct rte_eth_dev *eth_dev;
> >> +	int ret;
> >> +
> >> +	RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
> >> +
> >> +	ret = kni_init();
> >> +	if (ret < 0)
> >> +		/* Not return error to prevent panic in rte_eal_init() */
> >> +		return 0;
> >
> > If we don't return error here, the application that needs to add KNI ports
> eventually will fail.  If it's a fail-stop situation, isn't it better to return error
> where the it happened?
> 
> I am not sure this is fail-stop situation, but instead this gives a
> chance to applicaton for a graceful exit.
> 
> If an error value returned here, it will lead to a rte_panic() and
> application terminated abnormally!
> 
> But if we return a success at this point, since no ethernet device
> created, there is no handler in application to use, which also means no
> KNI interface created.
> Application can check number of ports and recognize KNI port is missing,
> app may chose to terminate or not, also it prefers to terminate, can do
> it properly.

I might be wrong but as far as I know,  other virtual or physical PMDS do not have this behavior.  What you proposed makes sense but it also means that the application needs extra logic (checking if all ports are successfully initialized) to handle such failures (depending on the application, it might be able to proceed or it might need to fail-stop).  Personally I would prefer consistency across all PMDs here no matter what behavior we choose here as that's the "contract" the application needs to know.
 
> >
> >> +	eth_dev = eth_kni_create(name, rte_socket_id());
> >> +	if (eth_dev == NULL)
> >> +		return -1;
> >> +
> >> +	eth_dev->rx_pkt_burst = eth_kni_rx;
> >> +	eth_dev->tx_pkt_burst = eth_kni_tx;
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static int
> >> +eth_kni_remove(const char *name)
> >> +{
> >> +	struct rte_eth_dev *eth_dev;
> >> +	struct pmd_internals *internals;
> >> +
> >> +	RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
> >> +
> >> +	/* find the ethdev entry */
> >> +	eth_dev = rte_eth_dev_allocated(name);
> >> +	if (eth_dev == NULL)
> >> +		return -1;
> >> +
> >> +	eth_kni_dev_stop(eth_dev);
> >> +
> >> +	if (eth_dev->data) {
> >> +		internals = eth_dev->data->dev_private;
> >> +		rte_kni_release(internals->kni);
> >> +
> >> +		rte_free(internals);
> >> +	}
> >> +	rte_free(eth_dev->data);
> >> +
> >> +	rte_eth_dev_release_port(eth_dev);
> >> +
> >> +	is_kni_initialized -= 1;
> >> +	if (is_kni_initialized == 0)
> >> +		rte_kni_close();
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static struct rte_vdev_driver eth_kni_drv = {
> >> +	.probe = eth_kni_probe,
> >> +	.remove = eth_kni_remove,
> >> +};
> >> +
> >> +RTE_PMD_REGISTER_VDEV(net_kni, eth_kni_drv);
> >> diff --git a/drivers/net/kni/rte_pmd_kni_version.map
> >> b/drivers/net/kni/rte_pmd_kni_version.map
> >> new file mode 100644
> >> index 0000000..31eca32
> >> --- /dev/null
> >> +++ b/drivers/net/kni/rte_pmd_kni_version.map
> >> @@ -0,0 +1,4 @@
> >> +DPDK_17.02 {
> >> +
> >> +	local: *;
> >> +};
> >> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> >> index f75f0e2..af02816 100644
> >> --- a/mk/rte.app.mk
> >> +++ b/mk/rte.app.mk
> >> @@ -59,11 +59,6 @@ _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
> >>  #
> >>  # Order is important: from higher level to lower level
> >>  #
> >> -
> >> -ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> >> -_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
> >> -endif
> >> -
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
> >> @@ -84,6 +79,10 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -
> >> lrte_power
> >>
> >>  _LDLIBS-y += --whole-archive
> >>
> >> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
> >> +_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
> >> +endif
> >> +
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
> >> @@ -115,6 +114,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       +=
> -
> >> lrte_pmd_enic
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
> >> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -
> >> libverbs
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -
> >> libverbs
> >>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -
> lgxio
> >> --
> >> 2.9.3
> >
Ferruh Yigit Dec. 15, 2016, 3:55 p.m. UTC | #4
On 12/14/2016 7:25 PM, Yong Wang wrote:
>> -----Original Message-----
>> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
>> Sent: Wednesday, December 14, 2016 8:00 AM
>> To: Yong Wang <yongwang@vmware.com>; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v4] net/kni: add KNI PMD
>>
>> On 12/12/2016 9:59 PM, Yong Wang wrote:
>>>> -----Original Message-----
>>>> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
>>>> Sent: Wednesday, November 30, 2016 10:12 AM
>>>> To: dev@dpdk.org
>>>> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; Yong Wang
>>>> <yongwang@vmware.com>
>>>> Subject: [PATCH v4] net/kni: add KNI PMD
>>>>
>>>> Add KNI PMD which wraps librte_kni for ease of use.
>>>>
>>>> KNI PMD can be used as any regular PMD to send / receive packets to the
>>>> Linux networking stack.
>>>>
>>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
>>>> ---
>>>>
>>>> v4:
>>>> * allow only single queue
>>>> * use driver.name as name
>>>>
>>>> v3:
>>>> * rebase on top of latest master
>>>>
>>>> v2:
>>>> * updated driver name eth_kni -> net_kni
>>>> ---
>>>>  config/common_base                      |   1 +
>>>>  config/common_linuxapp                  |   1 +
>>>>  drivers/net/Makefile                    |   1 +
>>>>  drivers/net/kni/Makefile                |  63 +++++
>>>>  drivers/net/kni/rte_eth_kni.c           | 462
>>>> ++++++++++++++++++++++++++++++++
>>>>  drivers/net/kni/rte_pmd_kni_version.map |   4 +
>>>>  mk/rte.app.mk                           |  10 +-
>>>>  7 files changed, 537 insertions(+), 5 deletions(-)
>>>>  create mode 100644 drivers/net/kni/Makefile
>>>>  create mode 100644 drivers/net/kni/rte_eth_kni.c
>>>>  create mode 100644 drivers/net/kni/rte_pmd_kni_version.map
>>>>
>>>> diff --git a/config/common_base b/config/common_base
>>>> index 4bff83a..3385879 100644
>>>> --- a/config/common_base
>>>> +++ b/config/common_base
>>>> @@ -543,6 +543,7 @@ CONFIG_RTE_PIPELINE_STATS_COLLECT=n
>>>>  # Compile librte_kni
>>>>  #
>>>>  CONFIG_RTE_LIBRTE_KNI=n
>>>> +CONFIG_RTE_LIBRTE_PMD_KNI=n
>>>>  CONFIG_RTE_KNI_KMOD=n
>>>>  CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
>>>>  CONFIG_RTE_KNI_VHOST=n
>>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>>>> index 2483dfa..2ecd510 100644
>>>> --- a/config/common_linuxapp
>>>> +++ b/config/common_linuxapp
>>>> @@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
>>>>  CONFIG_RTE_EAL_VFIO=y
>>>>  CONFIG_RTE_KNI_KMOD=y
>>>>  CONFIG_RTE_LIBRTE_KNI=y
>>>> +CONFIG_RTE_LIBRTE_PMD_KNI=y
>>>>  CONFIG_RTE_LIBRTE_VHOST=y
>>>>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
>>>>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
>>>> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
>>>> index bc93230..c4771cd 100644
>>>> --- a/drivers/net/Makefile
>>>> +++ b/drivers/net/Makefile
>>>> @@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
>>>> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
>>>>  DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
>>>> diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
>>>> new file mode 100644
>>>> index 0000000..0b7cf91
>>>> --- /dev/null
>>>> +++ b/drivers/net/kni/Makefile
>>>> @@ -0,0 +1,63 @@
>>>> +#   BSD LICENSE
>>>> +#
>>>> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
>>>> +#
>>>> +#   Redistribution and use in source and binary forms, with or without
>>>> +#   modification, are permitted provided that the following conditions
>>>> +#   are met:
>>>> +#
>>>> +#     * Redistributions of source code must retain the above copyright
>>>> +#       notice, this list of conditions and the following disclaimer.
>>>> +#     * Redistributions in binary form must reproduce the above copyright
>>>> +#       notice, this list of conditions and the following disclaimer in
>>>> +#       the documentation and/or other materials provided with the
>>>> +#       distribution.
>>>> +#     * Neither the name of Intel Corporation nor the names of its
>>>> +#       contributors may be used to endorse or promote products derived
>>>> +#       from this software without specific prior written permission.
>>>> +#
>>>> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>>>> CONTRIBUTORS
>>>> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
>> BUT
>>>> NOT
>>>> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>>>> FITNESS FOR
>>>> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>>>> COPYRIGHT
>>>> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>>>> INCIDENTAL,
>>>> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
>> BUT
>>>> NOT
>>>> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>> LOSS
>>>> OF USE,
>>>> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>>>> AND ON ANY
>>>> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>>>> TORT
>>>> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>> OF
>>>> THE USE
>>>> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>>>> DAMAGE.
>>>> +
>>>> +include $(RTE_SDK)/mk/rte.vars.mk
>>>> +
>>>> +#
>>>> +# library name
>>>> +#
>>>> +LIB = librte_pmd_kni.a
>>>> +
>>>> +CFLAGS += -O3
>>>> +CFLAGS += $(WERROR_FLAGS)
>>>> +LDLIBS += -lpthread
>>>> +
>>>> +EXPORT_MAP := rte_pmd_kni_version.map
>>>> +
>>>> +LIBABIVER := 1
>>>> +
>>>> +#
>>>> +# all source are stored in SRCS-y
>>>> +#
>>>> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
>>>> +
>>>> +#
>>>> +# Export include files
>>>> +#
>>>> +SYMLINK-y-include +=
>>>> +
>>>> +# this lib depends upon:
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
>>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
>>>> +
>>>> +include $(RTE_SDK)/mk/rte.lib.mk
>>>> diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
>>>> new file mode 100644
>>>> index 0000000..6c4df96
>>>> --- /dev/null
>>>> +++ b/drivers/net/kni/rte_eth_kni.c
>>>> @@ -0,0 +1,462 @@
>>>> +/*-
>>>> + *   BSD LICENSE
>>>> + *
>>>> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
>>>> + *   All rights reserved.
>>>> + *
>>>> + *   Redistribution and use in source and binary forms, with or without
>>>> + *   modification, are permitted provided that the following conditions
>>>> + *   are met:
>>>> + *
>>>> + *     * Redistributions of source code must retain the above copyright
>>>> + *       notice, this list of conditions and the following disclaimer.
>>>> + *     * Redistributions in binary form must reproduce the above copyright
>>>> + *       notice, this list of conditions and the following disclaimer in
>>>> + *       the documentation and/or other materials provided with the
>>>> + *       distribution.
>>>> + *     * Neither the name of Intel Corporation nor the names of its
>>>> + *       contributors may be used to endorse or promote products derived
>>>> + *       from this software without specific prior written permission.
>>>> + *
>>>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
>>>> CONTRIBUTORS
>>>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
>> BUT
>>>> NOT
>>>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
>>>> FITNESS FOR
>>>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
>>>> COPYRIGHT
>>>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
>>>> INCIDENTAL,
>>>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
>> BUT
>>>> NOT
>>>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
>> LOSS
>>>> OF USE,
>>>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
>>>> AND ON ANY
>>>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
>>>> TORT
>>>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
>> OF
>>>> THE USE
>>>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
>>>> DAMAGE.
>>>> + */
>>>> +
>>>> +#include <fcntl.h>
>>>> +#include <pthread.h>
>>>> +#include <unistd.h>
>>>> +
>>>> +#include <rte_ethdev.h>
>>>> +#include <rte_kni.h>
>>>> +#include <rte_malloc.h>
>>>> +#include <rte_vdev.h>
>>>> +
>>>> +/* Only single queue supported */
>>>> +#define KNI_MAX_QUEUE_PER_PORT 1
>>>> +
>>>> +#define MAX_PACKET_SZ 2048
>>>> +#define MAX_KNI_PORTS 8
>>>> +
>>>> +struct pmd_queue_stats {
>>>> +	uint64_t pkts;
>>>> +	uint64_t bytes;
>>>> +	uint64_t err_pkts;
>>>> +};
>>>> +
>>>> +struct pmd_queue {
>>>> +	struct pmd_internals *internals;
>>>> +	struct rte_mempool *mb_pool;
>>>> +
>>>> +	struct pmd_queue_stats rx;
>>>> +	struct pmd_queue_stats tx;
>>>> +};
>>>> +
>>>> +struct pmd_internals {
>>>> +	struct rte_kni *kni;
>>>> +	int is_kni_started;
>>>> +
>>>> +	pthread_t thread;
>>>> +	int stop_thread;
>>>> +
>>>> +	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
>>>> +	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
>>>> +};
>>>> +
>>>> +static struct ether_addr eth_addr;
>>>> +static struct rte_eth_link pmd_link = {
>>>> +		.link_speed = ETH_SPEED_NUM_10G,
>>>> +		.link_duplex = ETH_LINK_FULL_DUPLEX,
>>>> +		.link_status = 0
>>>> +};
>>>> +static int is_kni_initialized;
>>>> +
>>>> +static uint16_t
>>>> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>>>> +{
>>>> +	struct pmd_queue *kni_q = q;
>>>> +	struct rte_kni *kni = kni_q->internals->kni;
>>>> +	uint16_t nb_pkts;
>>>> +
>>>> +	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
>>>> +
>>>> +	kni_q->rx.pkts += nb_pkts;
>>>> +	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
>>>> +
>>>> +	return nb_pkts;
>>>> +}
>>>> +
>>>> +static uint16_t
>>>> +eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
>>>> +{
>>>> +	struct pmd_queue *kni_q = q;
>>>> +	struct rte_kni *kni = kni_q->internals->kni;
>>>> +	uint16_t nb_pkts;
>>>> +
>>>> +	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
>>>> +
>>>> +	kni_q->tx.pkts += nb_pkts;
>>>> +	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
>>>> +
>>>> +	return nb_pkts;
>>>> +}
>>>> +
>>>> +static void *
>>>> +kni_handle_request(void *param)
>>>> +{
>>>> +	struct pmd_internals *internals = param;
>>>> +#define MS 1000
>>>> +
>>>> +	while (!internals->stop_thread) {
>>>> +		rte_kni_handle_request(internals->kni);
>>>> +		usleep(500 * MS);
>>>> +	}
>>>> +
>>>> +	return param;
>>>> +}
>>>> +
>>>
>>> Do we really need a thread to handle request by default? I know there are
>> apps that handle request their own way and having a separate thread could
>> add synchronization problems.  Can we at least add an option to disable this?
>>
>> I didn't think about there can be a use case that requires own request
>> handling.
>>
>> But, kni requests should be handled to make kni interface run properly,
>> and to handle interface "kni" handler (internals->kni) required, which
>> this PMD doesn't expose.
>>
>> So, just disabling this thread won't work on its own.
> 
> I understand that and what I am asking is a way to at least disable this without having to make code changes for applications that have their own way of handling KNI request and the callback mentioned below sounds good to me.  I am fine with adding this capability with this commit or in a separate commit after you have this commit checked in.

I don't mind adding in new version, only I am trying to understand it.

Normally what it does is calling KNI library rte_kni_handle_request()
API periodically on KNI handler. What an app may be doing own its way,
other than tweaking the period?

>  
>> A solution can be found, like callback registraion, or get_handler API,
>> but if an application has custom request handling, perhaps it may prefer
>> to use kni library directly instead of this wrapper, since wrapper
>> already doesn't expose all kni features.
> 
> I think one of the motivation of having KNI pmd is that it's abstracted the same way as other physical or virtual devices.  I think it makes sense to achieve  feature parity with the KNI library as much as possible.  What's currently supported in KNI library but missing in KNI PMD and any specific reason they are not supported?

Mainly what missing is rte_kni_conf and some APIs has default values,
instead of being variable.
And ethtool (kni control path) is not supported with PMD.

Default values used (instead of configurable devargs) , to make PMD simple.
And ethtool support is a) hard to add, b) doesn't quite fit to KNI PMD
logic.

> 
>>>
>>>> +static int
>>>> +eth_kni_start(struct rte_eth_dev *dev)
>>>> +{
>>>> +	struct pmd_internals *internals = dev->data->dev_private;
>>>> +	uint16_t port_id = dev->data->port_id;
>>>> +	struct rte_mempool *mb_pool;
>>>> +	struct rte_kni_conf conf;
>>>> +	const char *name = dev->data->name + 4; /* remove net_ */
>>>> +
>>>> +	snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
>>>> +	conf.force_bind = 0;
>>>> +	conf.group_id = port_id;
>>>> +	conf.mbuf_size = MAX_PACKET_SZ;
>>>> +	mb_pool = internals->rx_queues[0].mb_pool;
>>>> +
>>>> +	internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
>>>> +	if (internals->kni == NULL) {
>>>> +		RTE_LOG(ERR, PMD,
>>>> +			"Fail to create kni for port: %d\n", port_id);
>>>> +		return -1;
>>>> +	}
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_dev_start(struct rte_eth_dev *dev)
>>>> +{
>>>> +	struct pmd_internals *internals = dev->data->dev_private;
>>>> +	int ret;
>>>> +
>>>> +	if (internals->is_kni_started == 0) {
>>>> +		ret = eth_kni_start(dev);
>>>> +		if (ret)
>>>> +			return -1;
>>>> +		internals->is_kni_started = 1;
>>>> +	}
>>>> +
>>>
>>> In case is_kni_started is 1 already,  shouldn't we return directly instead of
>> proceeding?
>>
>> "is_kni_started" is just to protect "eth_kni_start()", as you can see it
>> doesn't have a counterpart in eth_kni_dev_stop(). This flag is to be
>> sure "eth_kni_start()" called only once during PMD life cycle.
>>
>> The check you mentioned already done, start() / stop() functions already
>> balanced by APIs calling these functions.
> 
> What about KNI request handing thread then?  Is it safe to have multiple threads calling into rte_kni_handle_request()? My understanding is that this is not safe as kni_fifo is not multi-thread safe.  It's also a bit wasteful to create multiple threads here.

That thread created within start() and canceled in stop().
And it is not possible to have start() call twice, the API that calls
start(), rte_eth_dev_start(), prevents multiple calls already. Same for
stop().

> 
>>>
>>>> +	ret = pthread_create(&internals->thread, NULL, kni_handle_request,
>>>> +			internals);
>>>> +	if (ret) {
>>>> +		RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
>>>> +		return -1;
>>>> +	}
>>>> +
>>>> +	dev->data->dev_link.link_status = 1;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_dev_stop(struct rte_eth_dev *dev)
>>>> +{
>>>> +	struct pmd_internals *internals = dev->data->dev_private;
>>>> +	int ret;
>>>> +
>>>> +	internals->stop_thread = 1;
>>>> +
>>>> +	ret = pthread_cancel(internals->thread);
>>>> +	if (ret)
>>>> +		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
>>>> +
>>>> +	ret = pthread_join(internals->thread, NULL);
>>>> +	if (ret)
>>>> +		RTE_LOG(ERR, PMD, "Can't join the thread\n");
>>>> +
>>>> +	internals->stop_thread = 0;
>>>> +
>>>> +	dev->data->dev_link.link_status = 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
>>>> +{
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info
>>>> *dev_info)
>>>> +{
>>>> +	struct rte_eth_dev_data *data = dev->data;
>>>> +
>>>> +	dev_info->driver_name = data->drv_name;
>>>> +	dev_info->max_mac_addrs = 1;
>>>> +	dev_info->max_rx_pktlen = (uint32_t)-1;
>>>> +	dev_info->max_rx_queues = KNI_MAX_QUEUE_PER_PORT;
>>>> +	dev_info->max_tx_queues = KNI_MAX_QUEUE_PER_PORT;
>>>> +	dev_info->min_rx_bufsize = 0;
>>>> +	dev_info->pci_dev = NULL;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
>>>> +		uint16_t rx_queue_id,
>>>> +		uint16_t nb_rx_desc __rte_unused,
>>>> +		unsigned int socket_id __rte_unused,
>>>> +		const struct rte_eth_rxconf *rx_conf __rte_unused,
>>>> +		struct rte_mempool *mb_pool)
>>>> +{
>>>> +	struct pmd_internals *internals = dev->data->dev_private;
>>>> +	struct pmd_queue *q;
>>>> +
>>>> +	q = &internals->rx_queues[rx_queue_id];
>>>> +	q->internals = internals;
>>>> +	q->mb_pool = mb_pool;
>>>> +
>>>> +	dev->data->rx_queues[rx_queue_id] = q;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
>>>> +		uint16_t tx_queue_id,
>>>> +		uint16_t nb_tx_desc __rte_unused,
>>>> +		unsigned int socket_id __rte_unused,
>>>> +		const struct rte_eth_txconf *tx_conf __rte_unused)
>>>> +{
>>>> +	struct pmd_internals *internals = dev->data->dev_private;
>>>> +	struct pmd_queue *q;
>>>> +
>>>> +	q = &internals->tx_queues[tx_queue_id];
>>>> +	q->internals = internals;
>>>> +
>>>> +	dev->data->tx_queues[tx_queue_id] = q;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_queue_release(void *q __rte_unused)
>>>> +{
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
>>>> +		int wait_to_complete __rte_unused)
>>>> +{
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
>>>> +{
>>>> +	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
>>>> +	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
>>>> +	struct rte_eth_dev_data *data = dev->data;
>>>> +	unsigned long tx_packets_err_total = 0;
>>>> +	unsigned int i, num_stats;
>>>> +	struct pmd_queue *q;
>>>> +
>>>> +	num_stats = RTE_MIN((unsigned
>>>> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
>>>> +			data->nb_rx_queues);
>>>> +	for (i = 0; i < num_stats; i++) {
>>>> +		q = data->rx_queues[i];
>>>> +		stats->q_ipackets[i] = q->rx.pkts;
>>>> +		stats->q_ibytes[i] = q->rx.bytes;
>>>> +		rx_packets_total += stats->q_ipackets[i];
>>>> +		rx_bytes_total += stats->q_ibytes[i];
>>>> +	}
>>>> +
>>>> +	num_stats = RTE_MIN((unsigned
>>>> int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
>>>> +			data->nb_tx_queues);
>>>> +	for (i = 0; i < num_stats; i++) {
>>>> +		q = data->tx_queues[i];
>>>> +		stats->q_opackets[i] = q->tx.pkts;
>>>> +		stats->q_obytes[i] = q->tx.bytes;
>>>> +		stats->q_errors[i] = q->tx.err_pkts;
>>>> +		tx_packets_total += stats->q_opackets[i];
>>>> +		tx_bytes_total += stats->q_obytes[i];
>>>> +		tx_packets_err_total += stats->q_errors[i];
>>>> +	}
>>>> +
>>>> +	stats->ipackets = rx_packets_total;
>>>> +	stats->ibytes = rx_bytes_total;
>>>> +	stats->opackets = tx_packets_total;
>>>> +	stats->obytes = tx_bytes_total;
>>>> +	stats->oerrors = tx_packets_err_total;
>>>> +}
>>>> +
>>>> +static void
>>>> +eth_kni_stats_reset(struct rte_eth_dev *dev)
>>>> +{
>>>> +	struct rte_eth_dev_data *data = dev->data;
>>>> +	struct pmd_queue *q;
>>>> +	unsigned int i;
>>>> +
>>>> +	for (i = 0; i < data->nb_rx_queues; i++) {
>>>> +		q = data->rx_queues[i];
>>>> +		q->rx.pkts = 0;
>>>> +		q->rx.bytes = 0;
>>>> +	}
>>>> +	for (i = 0; i < data->nb_tx_queues; i++) {
>>>> +		q = data->tx_queues[i];
>>>> +		q->tx.pkts = 0;
>>>> +		q->tx.bytes = 0;
>>>> +		q->tx.err_pkts = 0;
>>>> +	}
>>>> +}
>>>> +
>>>> +static const struct eth_dev_ops eth_kni_ops = {
>>>> +	.dev_start = eth_kni_dev_start,
>>>> +	.dev_stop = eth_kni_dev_stop,
>>>> +	.dev_configure = eth_kni_dev_configure,
>>>> +	.dev_infos_get = eth_kni_dev_info,
>>>> +	.rx_queue_setup = eth_kni_rx_queue_setup,
>>>> +	.tx_queue_setup = eth_kni_tx_queue_setup,
>>>> +	.rx_queue_release = eth_kni_queue_release,
>>>> +	.tx_queue_release = eth_kni_queue_release,
>>>> +	.link_update = eth_kni_link_update,
>>>> +	.stats_get = eth_kni_stats_get,
>>>> +	.stats_reset = eth_kni_stats_reset,
>>>> +};
>>>> +
>>>> +static struct rte_vdev_driver eth_kni_drv;
>>>> +
>>>> +static struct rte_eth_dev *
>>>> +eth_kni_create(const char *name, unsigned int numa_node)
>>>> +{
>>>> +	struct pmd_internals *internals = NULL;
>>>> +	struct rte_eth_dev_data *data;
>>>> +	struct rte_eth_dev *eth_dev;
>>>> +
>>>> +	RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
>>>> +			numa_node);
>>>> +
>>>> +	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
>>>> +	if (data == NULL)
>>>> +		goto error;
>>>> +
>>>> +	internals = rte_zmalloc_socket(name, sizeof(*internals), 0,
>>>> numa_node);
>>>> +	if (internals == NULL)
>>>> +		goto error;
>>>> +
>>>> +	/* reserve an ethdev entry */
>>>> +	eth_dev = rte_eth_dev_allocate(name);
>>>> +	if (eth_dev == NULL)
>>>> +		goto error;
>>>> +
>>>> +	data->dev_private = internals;
>>>> +	data->port_id = eth_dev->data->port_id;
>>>> +	memmove(data->name, eth_dev->data->name, sizeof(data-
>>>>> name));
>>>> +	data->nb_rx_queues = 1;
>>>> +	data->nb_tx_queues = 1;
>>>> +	data->dev_link = pmd_link;
>>>> +	data->mac_addrs = &eth_addr;
>>>> +
>>>> +	eth_dev->data = data;
>>>> +	eth_dev->dev_ops = &eth_kni_ops;
>>>> +	eth_dev->driver = NULL;
>>>> +
>>>> +	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
>>>> +	data->kdrv = RTE_KDRV_NONE;
>>>> +	data->drv_name = eth_kni_drv.driver.name;
>>>> +	data->numa_node = numa_node;
>>>> +
>>>> +	return eth_dev;
>>>> +
>>>> +error:
>>>> +	rte_free(data);
>>>> +	rte_free(internals);
>>>> +
>>>> +	return NULL;
>>>> +}
>>>> +
>>>> +static int
>>>> +kni_init(void)
>>>> +{
>>>> +	if (is_kni_initialized == 0)
>>>> +		rte_kni_init(MAX_KNI_PORTS);
>>>> +
>>>> +	is_kni_initialized += 1;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_probe(const char *name, const char *params __rte_unused)
>>>> +{
>>>> +	struct rte_eth_dev *eth_dev;
>>>> +	int ret;
>>>> +
>>>> +	RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
>>>> +
>>>> +	ret = kni_init();
>>>> +	if (ret < 0)
>>>> +		/* Not return error to prevent panic in rte_eal_init() */
>>>> +		return 0;
>>>
>>> If we don't return error here, the application that needs to add KNI ports
>> eventually will fail.  If it's a fail-stop situation, isn't it better to return error
>> where the it happened?
>>
>> I am not sure this is fail-stop situation, but instead this gives a
>> chance to applicaton for a graceful exit.
>>
>> If an error value returned here, it will lead to a rte_panic() and
>> application terminated abnormally!
>>
>> But if we return a success at this point, since no ethernet device
>> created, there is no handler in application to use, which also means no
>> KNI interface created.
>> Application can check number of ports and recognize KNI port is missing,
>> app may chose to terminate or not, also it prefers to terminate, can do
>> it properly.
> 
> I might be wrong but as far as I know,  other virtual or physical PMDS do not have this behavior.  What you proposed makes sense but it also means that the application needs extra logic (checking if all ports are successfully initialized) to handle such failures (depending on the application, it might be able to proceed or it might need to fail-stop).  Personally I would prefer consistency across all PMDs here no matter what behavior we choose here as that's the "contract" the application needs to know.

Right, other PMDs don't have this behavior, I will update this to be
consistent with others.

>  
>>>
>>>> +	eth_dev = eth_kni_create(name, rte_socket_id());
>>>> +	if (eth_dev == NULL)
>>>> +		return -1;
>>>> +
>>>> +	eth_dev->rx_pkt_burst = eth_kni_rx;
>>>> +	eth_dev->tx_pkt_burst = eth_kni_tx;
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static int
>>>> +eth_kni_remove(const char *name)
>>>> +{
>>>> +	struct rte_eth_dev *eth_dev;
>>>> +	struct pmd_internals *internals;
>>>> +
>>>> +	RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
>>>> +
>>>> +	/* find the ethdev entry */
>>>> +	eth_dev = rte_eth_dev_allocated(name);
>>>> +	if (eth_dev == NULL)
>>>> +		return -1;
>>>> +
>>>> +	eth_kni_dev_stop(eth_dev);
>>>> +
>>>> +	if (eth_dev->data) {
>>>> +		internals = eth_dev->data->dev_private;
>>>> +		rte_kni_release(internals->kni);
>>>> +
>>>> +		rte_free(internals);
>>>> +	}
>>>> +	rte_free(eth_dev->data);
>>>> +
>>>> +	rte_eth_dev_release_port(eth_dev);
>>>> +
>>>> +	is_kni_initialized -= 1;
>>>> +	if (is_kni_initialized == 0)
>>>> +		rte_kni_close();
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>> +static struct rte_vdev_driver eth_kni_drv = {
>>>> +	.probe = eth_kni_probe,
>>>> +	.remove = eth_kni_remove,
>>>> +};
>>>> +
>>>> +RTE_PMD_REGISTER_VDEV(net_kni, eth_kni_drv);
>>>> diff --git a/drivers/net/kni/rte_pmd_kni_version.map
>>>> b/drivers/net/kni/rte_pmd_kni_version.map
>>>> new file mode 100644
>>>> index 0000000..31eca32
>>>> --- /dev/null
>>>> +++ b/drivers/net/kni/rte_pmd_kni_version.map
>>>> @@ -0,0 +1,4 @@
>>>> +DPDK_17.02 {
>>>> +
>>>> +	local: *;
>>>> +};
>>>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>>>> index f75f0e2..af02816 100644
>>>> --- a/mk/rte.app.mk
>>>> +++ b/mk/rte.app.mk
>>>> @@ -59,11 +59,6 @@ _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
>>>>  #
>>>>  # Order is important: from higher level to lower level
>>>>  #
>>>> -
>>>> -ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
>>>> -_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
>>>> -endif
>>>> -
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
>>>> @@ -84,6 +79,10 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -
>>>> lrte_power
>>>>
>>>>  _LDLIBS-y += --whole-archive
>>>>
>>>> +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
>>>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
>>>> +endif
>>>> +
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
>>>> @@ -115,6 +114,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       +=
>> -
>>>> lrte_pmd_enic
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
>>>> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -
>>>> libverbs
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -
>>>> libverbs
>>>>  _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -
>> lgxio
>>>> --
>>>> 2.9.3
>>>
>
Yong Wang Dec. 19, 2016, 5:52 p.m. UTC | #5
> -----Original Message-----
> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> Sent: Thursday, December 15, 2016 7:56 AM
> To: Yong Wang <yongwang@vmware.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v4] net/kni: add KNI PMD
> 
> On 12/14/2016 7:25 PM, Yong Wang wrote:
> >> -----Original Message-----
> >> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> >> Sent: Wednesday, December 14, 2016 8:00 AM
> >> To: Yong Wang <yongwang@vmware.com>; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v4] net/kni: add KNI PMD
> >>
> >> On 12/12/2016 9:59 PM, Yong Wang wrote:
> >>>> -----Original Message-----
> >>>> From: Ferruh Yigit [mailto:ferruh.yigit@intel.com]
> >>>> Sent: Wednesday, November 30, 2016 10:12 AM
> >>>> To: dev@dpdk.org
> >>>> Cc: Ferruh Yigit <ferruh.yigit@intel.com>; Yong Wang
> >>>> <yongwang@vmware.com>
> >>>> Subject: [PATCH v4] net/kni: add KNI PMD
> >>>>
> >>>> Add KNI PMD which wraps librte_kni for ease of use.
> >>>>
> >>>> KNI PMD can be used as any regular PMD to send / receive packets to
> the
> >>>> Linux networking stack.
> >>>>
> >>>> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
> >>>> ---
> >>>>
> >>>> v4:
> >>>> * allow only single queue
> >>>> * use driver.name as name
> >>>>
> >>>> v3:
> >>>> * rebase on top of latest master
> >>>>
> >>>> v2:
> >>>> * updated driver name eth_kni -> net_kni
> >>>> ---
> >>>>  config/common_base                      |   1 +
> >>>>  config/common_linuxapp                  |   1 +
> >>>>  drivers/net/Makefile                    |   1 +
> >>>>  drivers/net/kni/Makefile                |  63 +++++
> >>>>  drivers/net/kni/rte_eth_kni.c           | 462
> >>>> ++++++++++++++++++++++++++++++++
> >>>>  drivers/net/kni/rte_pmd_kni_version.map |   4 +
> >>>>  mk/rte.app.mk                           |  10 +-
> >>>>  7 files changed, 537 insertions(+), 5 deletions(-)
> >>>>  create mode 100644 drivers/net/kni/Makefile
> >>>>  create mode 100644 drivers/net/kni/rte_eth_kni.c
> >>>>  create mode 100644 drivers/net/kni/rte_pmd_kni_version.map
> >>>>
> >>>> diff --git a/config/common_base b/config/common_base
> >>>> index 4bff83a..3385879 100644
> >>>> --- a/config/common_base
> >>>> +++ b/config/common_base
> >>>> @@ -543,6 +543,7 @@ CONFIG_RTE_PIPELINE_STATS_COLLECT=n
> >>>>  # Compile librte_kni
> >>>>  #
> >>>>  CONFIG_RTE_LIBRTE_KNI=n
> >>>> +CONFIG_RTE_LIBRTE_PMD_KNI=n
> >>>>  CONFIG_RTE_KNI_KMOD=n
> >>>>  CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
> >>>>  CONFIG_RTE_KNI_VHOST=n
> >>>> diff --git a/config/common_linuxapp b/config/common_linuxapp
> >>>> index 2483dfa..2ecd510 100644
> >>>> --- a/config/common_linuxapp
> >>>> +++ b/config/common_linuxapp
> >>>> @@ -39,6 +39,7 @@ CONFIG_RTE_EAL_IGB_UIO=y
> >>>>  CONFIG_RTE_EAL_VFIO=y
> >>>>  CONFIG_RTE_KNI_KMOD=y
> >>>>  CONFIG_RTE_LIBRTE_KNI=y
> >>>> +CONFIG_RTE_LIBRTE_PMD_KNI=y
> >>>>  CONFIG_RTE_LIBRTE_VHOST=y
> >>>>  CONFIG_RTE_LIBRTE_PMD_VHOST=y
> >>>>  CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
> >>>> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> >>>> index bc93230..c4771cd 100644
> >>>> --- a/drivers/net/Makefile
> >>>> +++ b/drivers/net/Makefile
> >>>> @@ -41,6 +41,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
> >>>>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
> >>>>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
> >>>>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
> >>>> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
> >>>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
> >>>>  DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
> >>>>  DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
> >>>> diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
> >>>> new file mode 100644
> >>>> index 0000000..0b7cf91
> >>>> --- /dev/null
> >>>> +++ b/drivers/net/kni/Makefile
> >>>> @@ -0,0 +1,63 @@
> >>>> +#   BSD LICENSE
> >>>> +#
> >>>> +#   Copyright(c) 2016 Intel Corporation. All rights reserved.
> >>>> +#
> >>>> +#   Redistribution and use in source and binary forms, with or without
> >>>> +#   modification, are permitted provided that the following conditions
> >>>> +#   are met:
> >>>> +#
> >>>> +#     * Redistributions of source code must retain the above copyright
> >>>> +#       notice, this list of conditions and the following disclaimer.
> >>>> +#     * Redistributions in binary form must reproduce the above
> copyright
> >>>> +#       notice, this list of conditions and the following disclaimer in
> >>>> +#       the documentation and/or other materials provided with the
> >>>> +#       distribution.
> >>>> +#     * Neither the name of Intel Corporation nor the names of its
> >>>> +#       contributors may be used to endorse or promote products
> derived
> >>>> +#       from this software without specific prior written permission.
> >>>> +#
> >>>> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> >>>> CONTRIBUTORS
> >>>> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
> >> BUT
> >>>> NOT
> >>>> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
> >>>> FITNESS FOR
> >>>> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
> >>>> COPYRIGHT
> >>>> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> >>>> INCIDENTAL,
> >>>> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
> >> BUT
> >>>> NOT
> >>>> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> >> LOSS
> >>>> OF USE,
> >>>> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
> CAUSED
> >>>> AND ON ANY
> >>>> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> OR
> >>>> TORT
> >>>> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> OUT
> >> OF
> >>>> THE USE
> >>>> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> >>>> DAMAGE.
> >>>> +
> >>>> +include $(RTE_SDK)/mk/rte.vars.mk
> >>>> +
> >>>> +#
> >>>> +# library name
> >>>> +#
> >>>> +LIB = librte_pmd_kni.a
> >>>> +
> >>>> +CFLAGS += -O3
> >>>> +CFLAGS += $(WERROR_FLAGS)
> >>>> +LDLIBS += -lpthread
> >>>> +
> >>>> +EXPORT_MAP := rte_pmd_kni_version.map
> >>>> +
> >>>> +LIBABIVER := 1
> >>>> +
> >>>> +#
> >>>> +# all source are stored in SRCS-y
> >>>> +#
> >>>> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
> >>>> +
> >>>> +#
> >>>> +# Export include files
> >>>> +#
> >>>> +SYMLINK-y-include +=
> >>>> +
> >>>> +# this lib depends upon:
> >>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
> >>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
> >>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
> >>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
> >>>> +DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
> >>>> +
> >>>> +include $(RTE_SDK)/mk/rte.lib.mk
> >>>> diff --git a/drivers/net/kni/rte_eth_kni.c
> b/drivers/net/kni/rte_eth_kni.c
> >>>> new file mode 100644
> >>>> index 0000000..6c4df96
> >>>> --- /dev/null
> >>>> +++ b/drivers/net/kni/rte_eth_kni.c
> >>>> @@ -0,0 +1,462 @@
> >>>> +/*-
> >>>> + *   BSD LICENSE
> >>>> + *
> >>>> + *   Copyright(c) 2016 Intel Corporation. All rights reserved.
> >>>> + *   All rights reserved.
> >>>> + *
> >>>> + *   Redistribution and use in source and binary forms, with or without
> >>>> + *   modification, are permitted provided that the following conditions
> >>>> + *   are met:
> >>>> + *
> >>>> + *     * Redistributions of source code must retain the above copyright
> >>>> + *       notice, this list of conditions and the following disclaimer.
> >>>> + *     * Redistributions in binary form must reproduce the above
> copyright
> >>>> + *       notice, this list of conditions and the following disclaimer in
> >>>> + *       the documentation and/or other materials provided with the
> >>>> + *       distribution.
> >>>> + *     * Neither the name of Intel Corporation nor the names of its
> >>>> + *       contributors may be used to endorse or promote products
> derived
> >>>> + *       from this software without specific prior written permission.
> >>>> + *
> >>>> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
> >>>> CONTRIBUTORS
> >>>> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
> >> BUT
> >>>> NOT
> >>>> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
> AND
> >>>> FITNESS FOR
> >>>> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
> THE
> >>>> COPYRIGHT
> >>>> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
> >>>> INCIDENTAL,
> >>>> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> (INCLUDING,
> >> BUT
> >>>> NOT
> >>>> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> >> LOSS
> >>>> OF USE,
> >>>> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
> CAUSED
> >>>> AND ON ANY
> >>>> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
> OR
> >>>> TORT
> >>>> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
> OUT
> >> OF
> >>>> THE USE
> >>>> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> >>>> DAMAGE.
> >>>> + */
> >>>> +
> >>>> +#include <fcntl.h>
> >>>> +#include <pthread.h>
> >>>> +#include <unistd.h>
> >>>> +
> >>>> +#include <rte_ethdev.h>
> >>>> +#include <rte_kni.h>
> >>>> +#include <rte_malloc.h>
> >>>> +#include <rte_vdev.h>
> >>>> +
> >>>> +/* Only single queue supported */
> >>>> +#define KNI_MAX_QUEUE_PER_PORT 1
> >>>> +
> >>>> +#define MAX_PACKET_SZ 2048
> >>>> +#define MAX_KNI_PORTS 8
> >>>> +
> >>>> +struct pmd_queue_stats {
> >>>> +	uint64_t pkts;
> >>>> +	uint64_t bytes;
> >>>> +	uint64_t err_pkts;
> >>>> +};
> >>>> +
> >>>> +struct pmd_queue {
> >>>> +	struct pmd_internals *internals;
> >>>> +	struct rte_mempool *mb_pool;
> >>>> +
> >>>> +	struct pmd_queue_stats rx;
> >>>> +	struct pmd_queue_stats tx;
> >>>> +};
> >>>> +
> >>>> +struct pmd_internals {
> >>>> +	struct rte_kni *kni;
> >>>> +	int is_kni_started;
> >>>> +
> >>>> +	pthread_t thread;
> >>>> +	int stop_thread;
> >>>> +
> >>>> +	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
> >>>> +	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
> >>>> +};
> >>>> +
> >>>> +static struct ether_addr eth_addr;
> >>>> +static struct rte_eth_link pmd_link = {
> >>>> +		.link_speed = ETH_SPEED_NUM_10G,
> >>>> +		.link_duplex = ETH_LINK_FULL_DUPLEX,
> >>>> +		.link_status = 0
> >>>> +};
> >>>> +static int is_kni_initialized;
> >>>> +
> >>>> +static uint16_t
> >>>> +eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> >>>> +{
> >>>> +	struct pmd_queue *kni_q = q;
> >>>> +	struct rte_kni *kni = kni_q->internals->kni;
> >>>> +	uint16_t nb_pkts;
> >>>> +
> >>>> +	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
> >>>> +
> >>>> +	kni_q->rx.pkts += nb_pkts;
> >>>> +	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
> >>>> +
> >>>> +	return nb_pkts;
> >>>> +}
> >>>> +
> >>>> +static uint16_t
> >>>> +eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
> >>>> +{
> >>>> +	struct pmd_queue *kni_q = q;
> >>>> +	struct rte_kni *kni = kni_q->internals->kni;
> >>>> +	uint16_t nb_pkts;
> >>>> +
> >>>> +	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
> >>>> +
> >>>> +	kni_q->tx.pkts += nb_pkts;
> >>>> +	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
> >>>> +
> >>>> +	return nb_pkts;
> >>>> +}
> >>>> +
> >>>> +static void *
> >>>> +kni_handle_request(void *param)
> >>>> +{
> >>>> +	struct pmd_internals *internals = param;
> >>>> +#define MS 1000
> >>>> +
> >>>> +	while (!internals->stop_thread) {
> >>>> +		rte_kni_handle_request(internals->kni);
> >>>> +		usleep(500 * MS);
> >>>> +	}
> >>>> +
> >>>> +	return param;
> >>>> +}
> >>>> +
> >>>
> >>> Do we really need a thread to handle request by default? I know there
> are
> >> apps that handle request their own way and having a separate thread
> could
> >> add synchronization problems.  Can we at least add an option to disable
> this?
> >>
> >> I didn't think about there can be a use case that requires own request
> >> handling.
> >>
> >> But, kni requests should be handled to make kni interface run properly,
> >> and to handle interface "kni" handler (internals->kni) required, which
> >> this PMD doesn't expose.
> >>
> >> So, just disabling this thread won't work on its own.
> >
> > I understand that and what I am asking is a way to at least disable this
> without having to make code changes for applications that have their own
> way of handling KNI request and the callback mentioned below sounds good
> to me.  I am fine with adding this capability with this commit or in a separate
> commit after you have this commit checked in.
> 
> I don't mind adding in new version, only I am trying to understand it.
> 
> Normally what it does is calling KNI library rte_kni_handle_request()
> API periodically on KNI handler. What an app may be doing own its way,
> other than tweaking the period?

It's the context that calls into rte_kni_handle_request() that I am referring to.  For applications that already handle this in their own thread or in the pmd thread, they don't need the extra thread created here.  It's not a big deal as they can just change the behavior by modifying the source code but I think it's reasonable to opt out of this default thread without making any source code changes to kni pmd.
diff mbox

Patch

diff --git a/config/common_base b/config/common_base
index 4bff83a..3385879 100644
--- a/config/common_base
+++ b/config/common_base
@@ -543,6 +543,7 @@  CONFIG_RTE_PIPELINE_STATS_COLLECT=n
 # Compile librte_kni
 #
 CONFIG_RTE_LIBRTE_KNI=n
+CONFIG_RTE_LIBRTE_PMD_KNI=n
 CONFIG_RTE_KNI_KMOD=n
 CONFIG_RTE_KNI_PREEMPT_DEFAULT=y
 CONFIG_RTE_KNI_VHOST=n
diff --git a/config/common_linuxapp b/config/common_linuxapp
index 2483dfa..2ecd510 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -39,6 +39,7 @@  CONFIG_RTE_EAL_IGB_UIO=y
 CONFIG_RTE_EAL_VFIO=y
 CONFIG_RTE_KNI_KMOD=y
 CONFIG_RTE_LIBRTE_KNI=y
+CONFIG_RTE_LIBRTE_PMD_KNI=y
 CONFIG_RTE_LIBRTE_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_VHOST=y
 CONFIG_RTE_LIBRTE_PMD_AF_PACKET=y
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index bc93230..c4771cd 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -41,6 +41,7 @@  DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += enic
 DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += i40e
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += ixgbe
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += kni
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5
 DIRS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD) += mpipe
diff --git a/drivers/net/kni/Makefile b/drivers/net/kni/Makefile
new file mode 100644
index 0000000..0b7cf91
--- /dev/null
+++ b/drivers/net/kni/Makefile
@@ -0,0 +1,63 @@ 
+#   BSD LICENSE
+#
+#   Copyright(c) 2016 Intel Corporation. All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_kni.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lpthread
+
+EXPORT_MAP := rte_pmd_kni_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += rte_eth_kni.c
+
+#
+# Export include files
+#
+SYMLINK-y-include +=
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_eal
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_kni
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_PMD_KNI) += lib/librte_mempool
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/kni/rte_eth_kni.c b/drivers/net/kni/rte_eth_kni.c
new file mode 100644
index 0000000..6c4df96
--- /dev/null
+++ b/drivers/net/kni/rte_eth_kni.c
@@ -0,0 +1,462 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_kni.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+
+/* Only single queue supported */
+#define KNI_MAX_QUEUE_PER_PORT 1
+
+#define MAX_PACKET_SZ 2048
+#define MAX_KNI_PORTS 8
+
+struct pmd_queue_stats {
+	uint64_t pkts;
+	uint64_t bytes;
+	uint64_t err_pkts;
+};
+
+struct pmd_queue {
+	struct pmd_internals *internals;
+	struct rte_mempool *mb_pool;
+
+	struct pmd_queue_stats rx;
+	struct pmd_queue_stats tx;
+};
+
+struct pmd_internals {
+	struct rte_kni *kni;
+	int is_kni_started;
+
+	pthread_t thread;
+	int stop_thread;
+
+	struct pmd_queue rx_queues[KNI_MAX_QUEUE_PER_PORT];
+	struct pmd_queue tx_queues[KNI_MAX_QUEUE_PER_PORT];
+};
+
+static struct ether_addr eth_addr;
+static struct rte_eth_link pmd_link = {
+		.link_speed = ETH_SPEED_NUM_10G,
+		.link_duplex = ETH_LINK_FULL_DUPLEX,
+		.link_status = 0
+};
+static int is_kni_initialized;
+
+static uint16_t
+eth_kni_rx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
+{
+	struct pmd_queue *kni_q = q;
+	struct rte_kni *kni = kni_q->internals->kni;
+	uint16_t nb_pkts;
+
+	nb_pkts = rte_kni_rx_burst(kni, bufs, nb_bufs);
+
+	kni_q->rx.pkts += nb_pkts;
+	kni_q->rx.err_pkts += nb_bufs - nb_pkts;
+
+	return nb_pkts;
+}
+
+static uint16_t
+eth_kni_tx(void *q, struct rte_mbuf **bufs, uint16_t nb_bufs)
+{
+	struct pmd_queue *kni_q = q;
+	struct rte_kni *kni = kni_q->internals->kni;
+	uint16_t nb_pkts;
+
+	nb_pkts =  rte_kni_tx_burst(kni, bufs, nb_bufs);
+
+	kni_q->tx.pkts += nb_pkts;
+	kni_q->tx.err_pkts += nb_bufs - nb_pkts;
+
+	return nb_pkts;
+}
+
+static void *
+kni_handle_request(void *param)
+{
+	struct pmd_internals *internals = param;
+#define MS 1000
+
+	while (!internals->stop_thread) {
+		rte_kni_handle_request(internals->kni);
+		usleep(500 * MS);
+	}
+
+	return param;
+}
+
+static int
+eth_kni_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	uint16_t port_id = dev->data->port_id;
+	struct rte_mempool *mb_pool;
+	struct rte_kni_conf conf;
+	const char *name = dev->data->name + 4; /* remove net_ */
+
+	snprintf(conf.name, RTE_KNI_NAMESIZE, "%s", name);
+	conf.force_bind = 0;
+	conf.group_id = port_id;
+	conf.mbuf_size = MAX_PACKET_SZ;
+	mb_pool = internals->rx_queues[0].mb_pool;
+
+	internals->kni = rte_kni_alloc(mb_pool, &conf, NULL);
+	if (internals->kni == NULL) {
+		RTE_LOG(ERR, PMD,
+			"Fail to create kni for port: %d\n", port_id);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+eth_kni_dev_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	if (internals->is_kni_started == 0) {
+		ret = eth_kni_start(dev);
+		if (ret)
+			return -1;
+		internals->is_kni_started = 1;
+	}
+
+	ret = pthread_create(&internals->thread, NULL, kni_handle_request,
+			internals);
+	if (ret) {
+		RTE_LOG(ERR, PMD, "Fail to create kni request thread\n");
+		return -1;
+	}
+
+	dev->data->dev_link.link_status = 1;
+
+	return 0;
+}
+
+static void
+eth_kni_dev_stop(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	int ret;
+
+	internals->stop_thread = 1;
+
+	ret = pthread_cancel(internals->thread);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Can't cancel the thread\n");
+
+	ret = pthread_join(internals->thread, NULL);
+	if (ret)
+		RTE_LOG(ERR, PMD, "Can't join the thread\n");
+
+	internals->stop_thread = 0;
+
+	dev->data->dev_link.link_status = 0;
+}
+
+static int
+eth_kni_dev_configure(struct rte_eth_dev *dev __rte_unused)
+{
+	return 0;
+}
+
+static void
+eth_kni_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
+{
+	struct rte_eth_dev_data *data = dev->data;
+
+	dev_info->driver_name = data->drv_name;
+	dev_info->max_mac_addrs = 1;
+	dev_info->max_rx_pktlen = (uint32_t)-1;
+	dev_info->max_rx_queues = KNI_MAX_QUEUE_PER_PORT;
+	dev_info->max_tx_queues = KNI_MAX_QUEUE_PER_PORT;
+	dev_info->min_rx_bufsize = 0;
+	dev_info->pci_dev = NULL;
+}
+
+static int
+eth_kni_rx_queue_setup(struct rte_eth_dev *dev,
+		uint16_t rx_queue_id,
+		uint16_t nb_rx_desc __rte_unused,
+		unsigned int socket_id __rte_unused,
+		const struct rte_eth_rxconf *rx_conf __rte_unused,
+		struct rte_mempool *mb_pool)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_queue *q;
+
+	q = &internals->rx_queues[rx_queue_id];
+	q->internals = internals;
+	q->mb_pool = mb_pool;
+
+	dev->data->rx_queues[rx_queue_id] = q;
+
+	return 0;
+}
+
+static int
+eth_kni_tx_queue_setup(struct rte_eth_dev *dev,
+		uint16_t tx_queue_id,
+		uint16_t nb_tx_desc __rte_unused,
+		unsigned int socket_id __rte_unused,
+		const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+	struct pmd_queue *q;
+
+	q = &internals->tx_queues[tx_queue_id];
+	q->internals = internals;
+
+	dev->data->tx_queues[tx_queue_id] = q;
+
+	return 0;
+}
+
+static void
+eth_kni_queue_release(void *q __rte_unused)
+{
+}
+
+static int
+eth_kni_link_update(struct rte_eth_dev *dev __rte_unused,
+		int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static void
+eth_kni_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	unsigned long rx_packets_total = 0, rx_bytes_total = 0;
+	unsigned long tx_packets_total = 0, tx_bytes_total = 0;
+	struct rte_eth_dev_data *data = dev->data;
+	unsigned long tx_packets_err_total = 0;
+	unsigned int i, num_stats;
+	struct pmd_queue *q;
+
+	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
+			data->nb_rx_queues);
+	for (i = 0; i < num_stats; i++) {
+		q = data->rx_queues[i];
+		stats->q_ipackets[i] = q->rx.pkts;
+		stats->q_ibytes[i] = q->rx.bytes;
+		rx_packets_total += stats->q_ipackets[i];
+		rx_bytes_total += stats->q_ibytes[i];
+	}
+
+	num_stats = RTE_MIN((unsigned int)RTE_ETHDEV_QUEUE_STAT_CNTRS,
+			data->nb_tx_queues);
+	for (i = 0; i < num_stats; i++) {
+		q = data->tx_queues[i];
+		stats->q_opackets[i] = q->tx.pkts;
+		stats->q_obytes[i] = q->tx.bytes;
+		stats->q_errors[i] = q->tx.err_pkts;
+		tx_packets_total += stats->q_opackets[i];
+		tx_bytes_total += stats->q_obytes[i];
+		tx_packets_err_total += stats->q_errors[i];
+	}
+
+	stats->ipackets = rx_packets_total;
+	stats->ibytes = rx_bytes_total;
+	stats->opackets = tx_packets_total;
+	stats->obytes = tx_bytes_total;
+	stats->oerrors = tx_packets_err_total;
+}
+
+static void
+eth_kni_stats_reset(struct rte_eth_dev *dev)
+{
+	struct rte_eth_dev_data *data = dev->data;
+	struct pmd_queue *q;
+	unsigned int i;
+
+	for (i = 0; i < data->nb_rx_queues; i++) {
+		q = data->rx_queues[i];
+		q->rx.pkts = 0;
+		q->rx.bytes = 0;
+	}
+	for (i = 0; i < data->nb_tx_queues; i++) {
+		q = data->tx_queues[i];
+		q->tx.pkts = 0;
+		q->tx.bytes = 0;
+		q->tx.err_pkts = 0;
+	}
+}
+
+static const struct eth_dev_ops eth_kni_ops = {
+	.dev_start = eth_kni_dev_start,
+	.dev_stop = eth_kni_dev_stop,
+	.dev_configure = eth_kni_dev_configure,
+	.dev_infos_get = eth_kni_dev_info,
+	.rx_queue_setup = eth_kni_rx_queue_setup,
+	.tx_queue_setup = eth_kni_tx_queue_setup,
+	.rx_queue_release = eth_kni_queue_release,
+	.tx_queue_release = eth_kni_queue_release,
+	.link_update = eth_kni_link_update,
+	.stats_get = eth_kni_stats_get,
+	.stats_reset = eth_kni_stats_reset,
+};
+
+static struct rte_vdev_driver eth_kni_drv;
+
+static struct rte_eth_dev *
+eth_kni_create(const char *name, unsigned int numa_node)
+{
+	struct pmd_internals *internals = NULL;
+	struct rte_eth_dev_data *data;
+	struct rte_eth_dev *eth_dev;
+
+	RTE_LOG(INFO, PMD, "Creating kni ethdev on numa socket %u\n",
+			numa_node);
+
+	data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node);
+	if (data == NULL)
+		goto error;
+
+	internals = rte_zmalloc_socket(name, sizeof(*internals), 0, numa_node);
+	if (internals == NULL)
+		goto error;
+
+	/* reserve an ethdev entry */
+	eth_dev = rte_eth_dev_allocate(name);
+	if (eth_dev == NULL)
+		goto error;
+
+	data->dev_private = internals;
+	data->port_id = eth_dev->data->port_id;
+	memmove(data->name, eth_dev->data->name, sizeof(data->name));
+	data->nb_rx_queues = 1;
+	data->nb_tx_queues = 1;
+	data->dev_link = pmd_link;
+	data->mac_addrs = &eth_addr;
+
+	eth_dev->data = data;
+	eth_dev->dev_ops = &eth_kni_ops;
+	eth_dev->driver = NULL;
+
+	data->dev_flags = RTE_ETH_DEV_DETACHABLE;
+	data->kdrv = RTE_KDRV_NONE;
+	data->drv_name = eth_kni_drv.driver.name;
+	data->numa_node = numa_node;
+
+	return eth_dev;
+
+error:
+	rte_free(data);
+	rte_free(internals);
+
+	return NULL;
+}
+
+static int
+kni_init(void)
+{
+	if (is_kni_initialized == 0)
+		rte_kni_init(MAX_KNI_PORTS);
+
+	is_kni_initialized += 1;
+
+	return 0;
+}
+
+static int
+eth_kni_probe(const char *name, const char *params __rte_unused)
+{
+	struct rte_eth_dev *eth_dev;
+	int ret;
+
+	RTE_LOG(INFO, PMD, "Initializing eth_kni for %s\n", name);
+
+	ret = kni_init();
+	if (ret < 0)
+		/* Not return error to prevent panic in rte_eal_init() */
+		return 0;
+
+	eth_dev = eth_kni_create(name, rte_socket_id());
+	if (eth_dev == NULL)
+		return -1;
+
+	eth_dev->rx_pkt_burst = eth_kni_rx;
+	eth_dev->tx_pkt_burst = eth_kni_tx;
+
+	return 0;
+}
+
+static int
+eth_kni_remove(const char *name)
+{
+	struct rte_eth_dev *eth_dev;
+	struct pmd_internals *internals;
+
+	RTE_LOG(INFO, PMD, "Un-Initializing eth_kni for %s\n", name);
+
+	/* find the ethdev entry */
+	eth_dev = rte_eth_dev_allocated(name);
+	if (eth_dev == NULL)
+		return -1;
+
+	eth_kni_dev_stop(eth_dev);
+
+	if (eth_dev->data) {
+		internals = eth_dev->data->dev_private;
+		rte_kni_release(internals->kni);
+
+		rte_free(internals);
+	}
+	rte_free(eth_dev->data);
+
+	rte_eth_dev_release_port(eth_dev);
+
+	is_kni_initialized -= 1;
+	if (is_kni_initialized == 0)
+		rte_kni_close();
+
+	return 0;
+}
+
+static struct rte_vdev_driver eth_kni_drv = {
+	.probe = eth_kni_probe,
+	.remove = eth_kni_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_kni, eth_kni_drv);
diff --git a/drivers/net/kni/rte_pmd_kni_version.map b/drivers/net/kni/rte_pmd_kni_version.map
new file mode 100644
index 0000000..31eca32
--- /dev/null
+++ b/drivers/net/kni/rte_pmd_kni_version.map
@@ -0,0 +1,4 @@ 
+DPDK_17.02 {
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index f75f0e2..af02816 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -59,11 +59,6 @@  _LDLIBS-y += -L$(RTE_SDK_BIN)/lib
 #
 # Order is important: from higher level to lower level
 #
-
-ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
-_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
-endif
-
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PIPELINE)       += -lrte_pipeline
 _LDLIBS-$(CONFIG_RTE_LIBRTE_TABLE)          += -lrte_table
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PORT)           += -lrte_port
@@ -84,6 +79,10 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_POWER)          += -lrte_power
 
 _LDLIBS-y += --whole-archive
 
+ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
+endif
+
 _LDLIBS-$(CONFIG_RTE_LIBRTE_TIMER)          += -lrte_timer
 _LDLIBS-$(CONFIG_RTE_LIBRTE_HASH)           += -lrte_hash
 _LDLIBS-$(CONFIG_RTE_LIBRTE_VHOST)          += -lrte_vhost
@@ -115,6 +114,7 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_ENIC_PMD)       += -lrte_pmd_enic
 _LDLIBS-$(CONFIG_RTE_LIBRTE_FM10K_PMD)      += -lrte_pmd_fm10k
 _LDLIBS-$(CONFIG_RTE_LIBRTE_I40E_PMD)       += -lrte_pmd_i40e
 _LDLIBS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD)      += -lrte_pmd_ixgbe
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_KNI)        += -lrte_pmd_kni
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX4_PMD)       += -lrte_pmd_mlx4 -libverbs
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MLX5_PMD)       += -lrte_pmd_mlx5 -libverbs
 _LDLIBS-$(CONFIG_RTE_LIBRTE_MPIPE_PMD)      += -lrte_pmd_mpipe -lgxio