[dpdk-dev,v5,2/5] vhost: support selective datapath

Message ID 20180402114656.17090-3-zhihong.wang@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Zhihong Wang April 2, 2018, 11:46 a.m. UTC
  This patch set introduces support for selective datapath in DPDK vhost-user
lib. vDPA stands for vhost Data Path Acceleration. The idea is to support
virtio ring compatible devices to serve virtio driver directly to enable
datapath acceleration.

A set of device ops is defined for device specific operations:

     a. get_queue_num: Called to get supported queue number of the device.

     b. get_features: Called to get supported features of the device.

     c. get_protocol_features: Called to get supported protocol features of
        the device.

     d. dev_conf: Called to configure the actual device when the virtio
        device becomes ready.

     e. dev_close: Called to close the actual device when the virtio device
        is stopped.

     f. set_vring_state: Called to change the state of the vring in the
        actual device when vring state changes.

     g. set_features: Called to set the negotiated features to device.

     h. migration_done: Called to allow the device to response to RARP
        sending.

     i. get_vfio_group_fd: Called to get the VFIO group fd of the device.

     j. get_vfio_device_fd: Called to get the VFIO device fd of the device.

     k. get_notify_area: Called to get the notify area info of the queue.

Signed-off-by: Zhihong Wang <zhihong.wang@intel.com>
---
Changes in v5:

 1. Rename the vDPA device ops to follow convention.

 2. Improve sanity check.

---
Changes in v4:

 1. Remove the "engine" concept in the lib.

---
Changes in v2:

 1. Add VFIO related vDPA device ops.

 lib/librte_vhost/Makefile              |   4 +-
 lib/librte_vhost/rte_vdpa.h            |  87 +++++++++++++++++++++++++
 lib/librte_vhost/rte_vhost_version.map |   7 ++
 lib/librte_vhost/vdpa.c                | 115 +++++++++++++++++++++++++++++++++
 4 files changed, 211 insertions(+), 2 deletions(-)
 create mode 100644 lib/librte_vhost/rte_vdpa.h
 create mode 100644 lib/librte_vhost/vdpa.c
  

Comments

Maxime Coquelin April 3, 2018, 8:02 a.m. UTC | #1
On 04/02/2018 01:46 PM, Zhihong Wang wrote:
> This patch set introduces support for selective datapath in DPDK vhost-user
> lib. vDPA stands for vhost Data Path Acceleration. The idea is to support
> virtio ring compatible devices to serve virtio driver directly to enable
> datapath acceleration.
> 
> A set of device ops is defined for device specific operations:
> 
>       a. get_queue_num: Called to get supported queue number of the device.
> 
>       b. get_features: Called to get supported features of the device.
> 
>       c. get_protocol_features: Called to get supported protocol features of
>          the device.
> 
>       d. dev_conf: Called to configure the actual device when the virtio
>          device becomes ready.
> 
>       e. dev_close: Called to close the actual device when the virtio device
>          is stopped.
> 
>       f. set_vring_state: Called to change the state of the vring in the
>          actual device when vring state changes.
> 
>       g. set_features: Called to set the negotiated features to device.
> 
>       h. migration_done: Called to allow the device to response to RARP
>          sending.
> 
>       i. get_vfio_group_fd: Called to get the VFIO group fd of the device.
> 
>       j. get_vfio_device_fd: Called to get the VFIO device fd of the device.
> 
>       k. get_notify_area: Called to get the notify area info of the queue.
> 
> Signed-off-by: Zhihong Wang <zhihong.wang@intel.com>
> ---
> Changes in v5:
> 
>   1. Rename the vDPA device ops to follow convention.
> 
>   2. Improve sanity check.
> 
> ---
> Changes in v4:
> 
>   1. Remove the "engine" concept in the lib.
> 
> ---
> Changes in v2:
> 
>   1. Add VFIO related vDPA device ops.
> 
>   lib/librte_vhost/Makefile              |   4 +-
>   lib/librte_vhost/rte_vdpa.h            |  87 +++++++++++++++++++++++++
>   lib/librte_vhost/rte_vhost_version.map |   7 ++
>   lib/librte_vhost/vdpa.c                | 115 +++++++++++++++++++++++++++++++++
>   4 files changed, 211 insertions(+), 2 deletions(-)
>   create mode 100644 lib/librte_vhost/rte_vdpa.h
>   create mode 100644 lib/librte_vhost/vdpa.c
> 


With the fix you suggested:
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks!
Maxime
  
Maxime Coquelin April 3, 2018, 8:19 a.m. UTC | #2
On 04/02/2018 01:46 PM, Zhihong Wang wrote:
> +int
> +rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
> +		struct rte_vdpa_dev_ops *ops)
> +{
> +	struct rte_vdpa_device *dev;
> +	char device_name[MAX_VDPA_NAME_LEN];
> +	int i;
> +
> +	if (vdpa_device_num >= MAX_VHOST_DEVICE)
> +		return -1;
> +
> +	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
> +		if (vdpa_devices[i] && is_same_vdpa_device(addr,
> +					&vdpa_devices[i]->addr))
> +			return -1;
> +	}

For consistency, I changed above check to look like same one in 
_find_device_id:

	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
		dev = vdpa_devices[i];
		if (dev && is_same_vdpa_device(&dev->addr, addr))
			return -1;
	}

> +
> +	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
> +		if (vdpa_devices[i] == NULL)
> +			break;
> +	}
> +
> +	sprintf(device_name, "vdpa-dev-%d", i);
> +	dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device),
> +			RTE_CACHE_LINE_SIZE);
> +	if (!dev)
> +		return -1;
> +
> +	memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr));
> +	dev->ops = ops;
> +	vdpa_devices[i] = dev;
> +	vdpa_device_num++;
> +
> +	return i;
> +}
> +
> +int
> +rte_vdpa_unregister_device(int did)
> +{
> +	if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL)
> +		return -1;
> +
> +	rte_free(vdpa_devices[did]);
> +	vdpa_devices[did] = NULL;
> +	vdpa_device_num--;
> +
> +	return did;
> +}
> +
> +int
> +rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
> +{
> +	struct rte_vdpa_device *dev;
> +	int i;
> +
> +	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
> +		dev = vdpa_devices[i];
> +		if (dev && is_same_vdpa_device(&dev->addr, addr) == 0)
> +			return i;
> +	}
> +
> +	return -1;
> +}
> +
  
Zhihong Wang April 3, 2018, 2:35 p.m. UTC | #3
> -----Original Message-----

> From: Maxime Coquelin [mailto:maxime.coquelin@redhat.com]

> Sent: Tuesday, April 3, 2018 4:19 PM

> To: Wang, Zhihong <zhihong.wang@intel.com>; dev@dpdk.org

> Cc: Tan, Jianfeng <jianfeng.tan@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>;

> yliu@fridaylinux.org; Liang, Cunming <cunming.liang@intel.com>; Wang, Xiao

> W <xiao.w.wang@intel.com>; Daly, Dan <dan.daly@intel.com>

> Subject: Re: [PATCH v5 2/5] vhost: support selective datapath

> 

> 

> 

> On 04/02/2018 01:46 PM, Zhihong Wang wrote:

> > +int

> > +rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,

> > +		struct rte_vdpa_dev_ops *ops)

> > +{

> > +	struct rte_vdpa_device *dev;

> > +	char device_name[MAX_VDPA_NAME_LEN];

> > +	int i;

> > +

> > +	if (vdpa_device_num >= MAX_VHOST_DEVICE)

> > +		return -1;

> > +

> > +	for (i = 0; i < MAX_VHOST_DEVICE; i++) {

> > +		if (vdpa_devices[i] && is_same_vdpa_device(addr,

> > +					&vdpa_devices[i]->addr))

> > +			return -1;

> > +	}

> 

> For consistency, I changed above check to look like same one in

> _find_device_id:


That's better. Thanks.

> 

> 	for (i = 0; i < MAX_VHOST_DEVICE; i++) {

> 		dev = vdpa_devices[i];

> 		if (dev && is_same_vdpa_device(&dev->addr, addr))

> 			return -1;

> 	}

> 

> > +

> > +	for (i = 0; i < MAX_VHOST_DEVICE; i++) {

> > +		if (vdpa_devices[i] == NULL)

> > +			break;

> > +	}

> > +

> > +	sprintf(device_name, "vdpa-dev-%d", i);

> > +	dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device),

> > +			RTE_CACHE_LINE_SIZE);

> > +	if (!dev)

> > +		return -1;

> > +

> > +	memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr));

> > +	dev->ops = ops;

> > +	vdpa_devices[i] = dev;

> > +	vdpa_device_num++;

> > +

> > +	return i;

> > +}

> > +

> > +int

> > +rte_vdpa_unregister_device(int did)

> > +{

> > +	if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL)

> > +		return -1;

> > +

> > +	rte_free(vdpa_devices[did]);

> > +	vdpa_devices[did] = NULL;

> > +	vdpa_device_num--;

> > +

> > +	return did;

> > +}

> > +

> > +int

> > +rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)

> > +{

> > +	struct rte_vdpa_device *dev;

> > +	int i;

> > +

> > +	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {

> > +		dev = vdpa_devices[i];

> > +		if (dev && is_same_vdpa_device(&dev->addr, addr) == 0)

> > +			return i;

> > +	}

> > +

> > +	return -1;

> > +}

> > +
  

Patch

diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile
index 5d6c6abae..37044ac03 100644
--- a/lib/librte_vhost/Makefile
+++ b/lib/librte_vhost/Makefile
@@ -22,9 +22,9 @@  LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev -lrte_net
 
 # all source are stored in SRCS-y
 SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := fd_man.c iotlb.c socket.c vhost.c \
-					vhost_user.c virtio_net.c
+					vhost_user.c virtio_net.c vdpa.c
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_vhost.h rte_vdpa.h
 
 include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_vhost/rte_vdpa.h b/lib/librte_vhost/rte_vdpa.h
new file mode 100644
index 000000000..90465ca26
--- /dev/null
+++ b/lib/librte_vhost/rte_vdpa.h
@@ -0,0 +1,87 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef _RTE_VDPA_H_
+#define _RTE_VDPA_H_
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <rte_pci.h>
+#include "rte_vhost.h"
+
+#define MAX_VDPA_NAME_LEN 128
+
+enum vdpa_addr_type {
+	PCI_ADDR,
+	VDPA_ADDR_MAX
+};
+
+struct rte_vdpa_dev_addr {
+	enum vdpa_addr_type type;
+	union {
+		uint8_t __dummy[64];
+		struct rte_pci_addr pci_addr;
+	};
+};
+
+struct rte_vdpa_dev_ops {
+	/* Get capabilities of this device */
+	int (*get_queue_num)(int did, uint32_t *queue_num);
+	int (*get_features)(int did, uint64_t *features);
+	int (*get_protocol_features)(int did, uint64_t *protocol_features);
+
+	/* Driver configure/close the device */
+	int (*dev_conf)(int vid);
+	int (*dev_close)(int vid);
+
+	/* Enable/disable this vring */
+	int (*set_vring_state)(int vid, int vring, int state);
+
+	/* Set features when changed */
+	int (*set_features)(int vid);
+
+	/* Destination operations when migration done */
+	int (*migration_done)(int vid);
+
+	/* Get the vfio group fd */
+	int (*get_vfio_group_fd)(int vid);
+
+	/* Get the vfio device fd */
+	int (*get_vfio_device_fd)(int vid);
+
+	/* Get the notify area info of the queue */
+	int (*get_notify_area)(int vid, int qid,
+			uint64_t *offset, uint64_t *size);
+
+	/* Reserved for future extension */
+	void *reserved[5];
+};
+
+struct rte_vdpa_device {
+	struct rte_vdpa_dev_addr addr;
+	struct rte_vdpa_dev_ops *ops;
+} __rte_cache_aligned;
+
+/* Register a vdpa device, return did if successful, -1 on failure */
+int __rte_experimental
+rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
+		struct rte_vdpa_dev_ops *ops);
+
+/* Unregister a vdpa device, return -1 on failure */
+int __rte_experimental
+rte_vdpa_unregister_device(int did);
+
+/* Find did of a vdpa device, return -1 on failure */
+int __rte_experimental
+rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr);
+
+/* Find a vdpa device based on did */
+struct rte_vdpa_device * __rte_experimental
+rte_vdpa_get_device(int did);
+
+#endif /* _RTE_VDPA_H_ */
diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map
index df0103129..d3453a2a7 100644
--- a/lib/librte_vhost/rte_vhost_version.map
+++ b/lib/librte_vhost/rte_vhost_version.map
@@ -59,3 +59,10 @@  DPDK_18.02 {
 	rte_vhost_vring_call;
 
 } DPDK_17.08;
+
+EXPERIMENTAL {
+	rte_vdpa_register_device;
+	rte_vdpa_unregister_device;
+	rte_vdpa_find_device_id;
+	rte_vdpa_get_device;
+} DPDK_18.02;
diff --git a/lib/librte_vhost/vdpa.c b/lib/librte_vhost/vdpa.c
new file mode 100644
index 000000000..4b339b1c2
--- /dev/null
+++ b/lib/librte_vhost/vdpa.c
@@ -0,0 +1,115 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+/**
+ * @file
+ *
+ * Device specific vhost lib
+ */
+
+#include <stdbool.h>
+
+#include <rte_malloc.h>
+#include "rte_vdpa.h"
+#include "vhost.h"
+
+static struct rte_vdpa_device *vdpa_devices[MAX_VHOST_DEVICE];
+static uint32_t vdpa_device_num;
+
+static bool
+is_same_vdpa_device(struct rte_vdpa_dev_addr *a,
+		struct rte_vdpa_dev_addr *b)
+{
+	bool ret = true;
+
+	if (a->type != b->type)
+		return false;
+
+	switch (a->type) {
+	case PCI_ADDR:
+		if (a->pci_addr.domain != b->pci_addr.domain ||
+				a->pci_addr.bus != b->pci_addr.bus ||
+				a->pci_addr.devid != b->pci_addr.devid ||
+				a->pci_addr.function != b->pci_addr.function)
+			ret = false;
+		break;
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+int
+rte_vdpa_register_device(struct rte_vdpa_dev_addr *addr,
+		struct rte_vdpa_dev_ops *ops)
+{
+	struct rte_vdpa_device *dev;
+	char device_name[MAX_VDPA_NAME_LEN];
+	int i;
+
+	if (vdpa_device_num >= MAX_VHOST_DEVICE)
+		return -1;
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vdpa_devices[i] && is_same_vdpa_device(addr,
+					&vdpa_devices[i]->addr))
+			return -1;
+	}
+
+	for (i = 0; i < MAX_VHOST_DEVICE; i++) {
+		if (vdpa_devices[i] == NULL)
+			break;
+	}
+
+	sprintf(device_name, "vdpa-dev-%d", i);
+	dev = rte_zmalloc(device_name, sizeof(struct rte_vdpa_device),
+			RTE_CACHE_LINE_SIZE);
+	if (!dev)
+		return -1;
+
+	memcpy(&dev->addr, addr, sizeof(struct rte_vdpa_dev_addr));
+	dev->ops = ops;
+	vdpa_devices[i] = dev;
+	vdpa_device_num++;
+
+	return i;
+}
+
+int
+rte_vdpa_unregister_device(int did)
+{
+	if (did < 0 || did >= MAX_VHOST_DEVICE || vdpa_devices[did] == NULL)
+		return -1;
+
+	rte_free(vdpa_devices[did]);
+	vdpa_devices[did] = NULL;
+	vdpa_device_num--;
+
+	return did;
+}
+
+int
+rte_vdpa_find_device_id(struct rte_vdpa_dev_addr *addr)
+{
+	struct rte_vdpa_device *dev;
+	int i;
+
+	for (i = 0; i < MAX_VHOST_DEVICE; ++i) {
+		dev = vdpa_devices[i];
+		if (dev && is_same_vdpa_device(&dev->addr, addr) == 0)
+			return i;
+	}
+
+	return -1;
+}
+
+struct rte_vdpa_device *
+rte_vdpa_get_device(int did)
+{
+	if (did < 0 || did >= MAX_VHOST_DEVICE)
+		return NULL;
+
+	return vdpa_devices[did];
+}