diff mbox series

[v3,4/4] examples/vhost: support vhost async dequeue data path

Message ID 20210928185641.86601-5-wenwux.ma@intel.com (mailing list archive)
State New
Delegated to: Maxime Coquelin
Headers show
Series support async dequeue for split ring | expand

Checks

Context Check Description
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/github-robot: build success github build: passed
ci/checkpatch success coding style OK

Commit Message

Wenwu Ma Sept. 28, 2021, 6:56 p.m. UTC
This patch is to add vhost async dequeue data-path in vhost sample.
vswitch can leverage IOAT to accelerate vhost async dequeue data-path.

Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
Tested-by: Yvonne Yang <yvonnex.yang@intel.com>
---
 doc/guides/sample_app_ug/vhost.rst |   9 +-
 examples/vhost/ioat.c              |  61 +++++++--
 examples/vhost/ioat.h              |  25 ++++
 examples/vhost/main.c              | 201 +++++++++++++++++++----------
 examples/vhost/main.h              |   6 +-
 5 files changed, 219 insertions(+), 83 deletions(-)
diff mbox series

Patch

diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst
index 9afde9c7f5..63dcf181e1 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -169,9 +169,12 @@  demonstrates how to use the async vhost APIs. It's used in combination with dmas
 **--dmas**
 This parameter is used to specify the assigned DMA device of a vhost device.
 Async vhost-user net driver will be used if --dmas is set. For example
---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost
-device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
-enqueue operation.
+--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
+DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
+and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
+operation. The index of the device corresponds to the socket file in order,
+that means vhost device 0 is created through the first socket file, vhost
+device 1 is created through the second socket file, and so on.
 
 Common Issues
 -------------
diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c
index 6adc30b622..3a256b0f4c 100644
--- a/examples/vhost/ioat.c
+++ b/examples/vhost/ioat.c
@@ -21,6 +21,8 @@  struct packet_tracker {
 
 struct packet_tracker cb_tracker[MAX_VHOST_DEVICE];
 
+int vid2socketid[MAX_VHOST_DEVICE];
+
 int
 open_ioat(const char *value)
 {
@@ -29,7 +31,7 @@  open_ioat(const char *value)
 	char *addrs = input;
 	char *ptrs[2];
 	char *start, *end, *substr;
-	int64_t vid, vring_id;
+	int64_t socketid, vring_id;
 	struct rte_ioat_rawdev_config config;
 	struct rte_rawdev_info info = { .dev_private = &config };
 	char name[32];
@@ -60,6 +62,7 @@  open_ioat(const char *value)
 		goto out;
 	}
 	while (i < args_nr) {
+		bool is_txd;
 		char *arg_temp = dma_arg[i];
 		uint8_t sub_nr;
 		sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
@@ -68,27 +71,39 @@  open_ioat(const char *value)
 			goto out;
 		}
 
-		start = strstr(ptrs[0], "txd");
-		if (start == NULL) {
+		int async_flag;
+		char *txd, *rxd;
+		txd = strstr(ptrs[0], "txd");
+		rxd = strstr(ptrs[0], "rxd");
+		if (txd) {
+			is_txd = true;
+			start = txd;
+			async_flag = ASYNC_ENQUEUE_VHOST;
+		} else if (rxd) {
+			is_txd = false;
+			start = rxd;
+			async_flag = ASYNC_DEQUEUE_VHOST;
+		} else {
 			ret = -1;
 			goto out;
 		}
 
 		start += 3;
-		vid = strtol(start, &end, 0);
+		socketid = strtol(start, &end, 0);
 		if (end == start) {
 			ret = -1;
 			goto out;
 		}
 
-		vring_id = 0 + VIRTIO_RXQ;
+		vring_id = is_txd ? VIRTIO_RXQ : VIRTIO_TXQ;
+
 		if (rte_pci_addr_parse(ptrs[1],
-				&(dma_info + vid)->dmas[vring_id].addr) < 0) {
+			&(dma_info + socketid)->dmas[vring_id].addr) < 0) {
 			ret = -1;
 			goto out;
 		}
 
-		rte_pci_device_name(&(dma_info + vid)->dmas[vring_id].addr,
+		rte_pci_device_name(&(dma_info + socketid)->dmas[vring_id].addr,
 				name, sizeof(name));
 		dev_id = rte_rawdev_get_dev_id(name);
 		if (dev_id == (uint16_t)(-ENODEV) ||
@@ -103,8 +118,9 @@  open_ioat(const char *value)
 			goto out;
 		}
 
-		(dma_info + vid)->dmas[vring_id].dev_id = dev_id;
-		(dma_info + vid)->dmas[vring_id].is_valid = true;
+		(dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
+		(dma_info + socketid)->dmas[vring_id].is_valid = true;
+		(dma_info + socketid)->async_flag |= async_flag;
 		config.ring_size = IOAT_RING_SIZE;
 		config.hdls_disable = true;
 		if (rte_rawdev_configure(dev_id, &info, sizeof(config)) < 0) {
@@ -126,13 +142,16 @@  ioat_transfer_data_cb(int vid, uint16_t queue_id,
 		struct rte_vhost_async_status *opaque_data, uint16_t count)
 {
 	uint32_t i_desc;
-	uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
 	struct rte_vhost_iov_iter *src = NULL;
 	struct rte_vhost_iov_iter *dst = NULL;
 	unsigned long i_seg;
 	unsigned short mask = MAX_ENQUEUED_SIZE - 1;
-	unsigned short write = cb_tracker[dev_id].next_write;
 
+	if (queue_id >= MAX_RING_COUNT)
+		return -1;
+
+	uint16_t dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id;
+	unsigned short write = cb_tracker[dev_id].next_write;
 	if (!opaque_data) {
 		for (i_desc = 0; i_desc < count; i_desc++) {
 			src = descs[i_desc].src;
@@ -170,16 +189,16 @@  ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		struct rte_vhost_async_status *opaque_data,
 		uint16_t max_packets)
 {
-	if (!opaque_data) {
+	if (!opaque_data && queue_id < MAX_RING_COUNT) {
 		uintptr_t dump[255];
 		int n_seg;
 		unsigned short read, write;
 		unsigned short nb_packet = 0;
 		unsigned short mask = MAX_ENQUEUED_SIZE - 1;
 		unsigned short i;
+		uint16_t dev_id;
 
-		uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2
-				+ VIRTIO_RXQ].dev_id;
+		dev_id = dma_bind[vid2socketid[vid]].dmas[queue_id].dev_id;
 		n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, dump);
 		if (n_seg < 0) {
 			RTE_LOG(ERR,
@@ -215,4 +234,18 @@  ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 	return -1;
 }
 
+uint32_t get_async_flag_by_vid(int vid)
+{
+	return dma_bind[vid2socketid[vid]].async_flag;
+}
+
+uint32_t get_async_flag_by_socketid(int socketid)
+{
+	return dma_bind[socketid].async_flag;
+}
+
+void init_vid2socketid_array(int vid, int socketid)
+{
+	vid2socketid[vid] = socketid;
+}
 #endif /* RTE_RAW_IOAT */
diff --git a/examples/vhost/ioat.h b/examples/vhost/ioat.h
index 62e163c585..105cee556d 100644
--- a/examples/vhost/ioat.h
+++ b/examples/vhost/ioat.h
@@ -12,6 +12,9 @@ 
 #define MAX_VHOST_DEVICE 1024
 #define IOAT_RING_SIZE 4096
 #define MAX_ENQUEUED_SIZE 4096
+#define MAX_RING_COUNT	2
+#define ASYNC_ENQUEUE_VHOST	1
+#define ASYNC_DEQUEUE_VHOST	2
 
 struct dma_info {
 	struct rte_pci_addr addr;
@@ -20,6 +23,7 @@  struct dma_info {
 };
 
 struct dma_for_vhost {
+	uint32_t async_flag;
 	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
 	uint16_t nr;
 };
@@ -36,6 +40,10 @@  int32_t
 ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
 		struct rte_vhost_async_status *opaque_data,
 		uint16_t max_packets);
+
+uint32_t get_async_flag_by_vid(int vid);
+uint32_t get_async_flag_by_socketid(int socketid);
+void init_vid2socketid_array(int vid, int socketid);
 #else
 static int open_ioat(const char *value __rte_unused)
 {
@@ -59,5 +67,22 @@  ioat_check_completed_copies_cb(int vid __rte_unused,
 {
 	return -1;
 }
+
+static uint32_t
+get_async_flag_by_vid(int vid __rte_unused)
+{
+	return 0;
+}
+
+static uint32_t
+get_async_flag_by_socketid(int socketid __rte_unused)
+{
+	return 0;
+}
+
+static void
+init_vid2socketid_array(int vid __rte_unused, int socketid __rte_unused)
+{
+}
 #endif
 #endif /* _IOAT_H_ */
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index 254f7097bc..572ffc12ae 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -93,8 +93,6 @@  static int client_mode;
 
 static int builtin_net_driver;
 
-static int async_vhost_driver;
-
 static char *dma_type;
 
 /* Specify timeout (in useconds) between retries on RX. */
@@ -673,7 +671,6 @@  us_vhost_parse_args(int argc, char **argv)
 				us_vhost_usage(prgname);
 				return -1;
 			}
-			async_vhost_driver = 1;
 			break;
 
 		case OPT_CLIENT_NUM:
@@ -846,7 +843,8 @@  complete_async_pkts(struct vhost_dev *vdev)
 					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST);
 	if (complete_count) {
 		free_pkts(p_cpl, complete_count);
-		__atomic_sub_fetch(&vdev->pkts_inflight, complete_count, __ATOMIC_SEQ_CST);
+		__atomic_sub_fetch(&vdev->pkts_enq_inflight,
+				complete_count, __ATOMIC_SEQ_CST);
 	}
 
 }
@@ -891,7 +889,7 @@  drain_vhost(struct vhost_dev *vdev)
 				__ATOMIC_SEQ_CST);
 	}
 
-	if (!async_vhost_driver)
+	if ((get_async_flag_by_vid(vdev->vid) & ASYNC_ENQUEUE_VHOST) == 0)
 		free_pkts(m, nr_xmit);
 }
 
@@ -1171,8 +1169,8 @@  async_enqueue_pkts(struct vhost_dev *vdev, uint16_t queue_id,
 	complete_async_pkts(vdev);
 	enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
 				queue_id, pkts, rx_count);
-	__atomic_add_fetch(&vdev->pkts_inflight, enqueue_count,
-					__ATOMIC_SEQ_CST);
+	__atomic_add_fetch(&vdev->pkts_enq_inflight,
+			enqueue_count, __ATOMIC_SEQ_CST);
 
 	enqueue_fail = rx_count - enqueue_count;
 	if (enqueue_fail)
@@ -1228,10 +1226,23 @@  drain_eth_rx(struct vhost_dev *vdev)
 				__ATOMIC_SEQ_CST);
 	}
 
-	if (!async_vhost_driver)
+	if ((get_async_flag_by_vid(vdev->vid) & ASYNC_ENQUEUE_VHOST) == 0)
 		free_pkts(pkts, rx_count);
 }
 
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+				struct rte_mempool *mbuf_pool,
+				struct rte_mbuf **pkts, uint16_t count)
+{
+	int nr_inflight;
+	uint16_t dequeue_count;
+	dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
+			mbuf_pool, pkts, count, &nr_inflight);
+	if (likely(nr_inflight != -1))
+		dev->pkts_deq_inflight = nr_inflight;
+	return dequeue_count;
+}
+
 uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
 			struct rte_mempool *mbuf_pool,
 			struct rte_mbuf **pkts, uint16_t count)
@@ -1327,6 +1338,32 @@  switch_worker(void *arg __rte_unused)
 	return 0;
 }
 
+static void
+vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t queue_id)
+{
+	uint16_t n_pkt = 0;
+	struct rte_mbuf *m_enq_cpl[vdev->pkts_enq_inflight];
+	struct rte_mbuf *m_deq_cpl[vdev->pkts_deq_inflight];
+
+	if (queue_id % 2 == 0) {
+		while (vdev->pkts_enq_inflight) {
+			n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid,
+				queue_id, m_enq_cpl, vdev->pkts_enq_inflight);
+			free_pkts(m_enq_cpl, n_pkt);
+			__atomic_sub_fetch(&vdev->pkts_enq_inflight,
+					n_pkt, __ATOMIC_SEQ_CST);
+		}
+	} else {
+		while (vdev->pkts_deq_inflight) {
+			n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid,
+				queue_id, m_deq_cpl, vdev->pkts_deq_inflight);
+			free_pkts(m_deq_cpl, n_pkt);
+			__atomic_sub_fetch(&vdev->pkts_deq_inflight,
+					n_pkt, __ATOMIC_SEQ_CST);
+		}
+	}
+}
+
 /*
  * Remove a device from the specific data core linked list and from the
  * main linked list. Synchonization  occurs through the use of the
@@ -1383,21 +1420,91 @@  destroy_device(int vid)
 		"(%d) device has been removed from data core\n",
 		vdev->vid);
 
-	if (async_vhost_driver) {
-		uint16_t n_pkt = 0;
-		struct rte_mbuf *m_cpl[vdev->pkts_inflight];
+	if (get_async_flag_by_vid(vid) & ASYNC_ENQUEUE_VHOST) {
+		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ);
+		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+	}
+	if (get_async_flag_by_vid(vid) & ASYNC_DEQUEUE_VHOST) {
+		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ);
+		rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
+	}
+
+	rte_free(vdev);
+}
+
+static int
+get_socketid_by_vid(int vid)
+{
+	int i;
+	char ifname[PATH_MAX];
+	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+
+	for (i = 0; i < nb_sockets; i++) {
+		char *file = socket_files + i * PATH_MAX;
+		if (strcmp(file, ifname) == 0)
+			return i;
+	}
+
+	return -1;
+}
+
+static int
+init_vhost_queue_ops(int vid)
+{
+	int socketid = get_socketid_by_vid(vid);
+	if (socketid == -1)
+		return -1;
+
+	init_vid2socketid_array(vid, socketid);
+	if (builtin_net_driver) {
+		vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
+		vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
+	} else {
+		if (get_async_flag_by_vid(vid) & ASYNC_ENQUEUE_VHOST) {
+			vdev_queue_ops[vid].enqueue_pkt_burst =
+						async_enqueue_pkts;
+		} else {
+			vdev_queue_ops[vid].enqueue_pkt_burst =
+						sync_enqueue_pkts;
+		}
 
-		while (vdev->pkts_inflight) {
-			n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, VIRTIO_RXQ,
-						m_cpl, vdev->pkts_inflight);
-			free_pkts(m_cpl, n_pkt);
-			__atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST);
+		if (get_async_flag_by_vid(vid) & ASYNC_DEQUEUE_VHOST) {
+			vdev_queue_ops[vid].dequeue_pkt_burst =
+						async_dequeue_pkts;
+		} else {
+			vdev_queue_ops[vid].dequeue_pkt_burst =
+						sync_dequeue_pkts;
 		}
+	}
 
-		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+	return 0;
+}
+
+static int
+vhost_async_channel_register(int vid)
+{
+	int ret = 0;
+	struct rte_vhost_async_config config = {0};
+	struct rte_vhost_async_channel_ops channel_ops;
+
+	if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
+		channel_ops.transfer_data = ioat_transfer_data_cb;
+		channel_ops.check_completed_copies =
+			ioat_check_completed_copies_cb;
+
+		config.features = RTE_VHOST_ASYNC_INORDER;
+
+		if (get_async_flag_by_vid(vid) & ASYNC_ENQUEUE_VHOST) {
+			ret |= rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+					config, &channel_ops);
+		}
+		if (get_async_flag_by_vid(vid) & ASYNC_DEQUEUE_VHOST) {
+			ret |= rte_vhost_async_channel_register(vid, VIRTIO_TXQ,
+					config, &channel_ops);
+		}
 	}
 
-	rte_free(vdev);
+	return ret;
 }
 
 /*
@@ -1433,20 +1540,8 @@  new_device(int vid)
 		}
 	}
 
-	if (builtin_net_driver) {
-		vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
-		vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
-	} else {
-		if (async_vhost_driver) {
-			vdev_queue_ops[vid].enqueue_pkt_burst =
-							async_enqueue_pkts;
-		} else {
-			vdev_queue_ops[vid].enqueue_pkt_burst =
-							sync_enqueue_pkts;
-		}
-
-		vdev_queue_ops[vid].dequeue_pkt_burst = sync_dequeue_pkts;
-	}
+	if (init_vhost_queue_ops(vid) != 0)
+		return -1;
 
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);
@@ -1475,27 +1570,13 @@  new_device(int vid)
 	rte_vhost_enable_guest_notification(vid, VIRTIO_RXQ, 0);
 	rte_vhost_enable_guest_notification(vid, VIRTIO_TXQ, 0);
 
+	int ret = vhost_async_channel_register(vid);
+
 	RTE_LOG(INFO, VHOST_DATA,
 		"(%d) device has been added to data core %d\n",
 		vid, vdev->coreid);
 
-	if (async_vhost_driver) {
-		struct rte_vhost_async_config config = {0};
-		struct rte_vhost_async_channel_ops channel_ops;
-
-		if (dma_type != NULL && strncmp(dma_type, "ioat", 4) == 0) {
-			channel_ops.transfer_data = ioat_transfer_data_cb;
-			channel_ops.check_completed_copies =
-				ioat_check_completed_copies_cb;
-
-			config.features = RTE_VHOST_ASYNC_INORDER;
-
-			return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
-				config, &channel_ops);
-		}
-	}
-
-	return 0;
+	return ret;
 }
 
 static int
@@ -1513,19 +1594,8 @@  vring_state_changed(int vid, uint16_t queue_id, int enable)
 	if (queue_id != VIRTIO_RXQ)
 		return 0;
 
-	if (async_vhost_driver) {
-		if (!enable) {
-			uint16_t n_pkt = 0;
-			struct rte_mbuf *m_cpl[vdev->pkts_inflight];
-
-			while (vdev->pkts_inflight) {
-				n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
-							m_cpl, vdev->pkts_inflight);
-				free_pkts(m_cpl, n_pkt);
-				__atomic_sub_fetch(&vdev->pkts_inflight, n_pkt, __ATOMIC_SEQ_CST);
-			}
-		}
-	}
+	if (!enable)
+		vhost_clear_queue_thread_unsafe(vdev, queue_id);
 
 	return 0;
 }
@@ -1769,10 +1839,11 @@  main(int argc, char *argv[])
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
 
-		if (async_vhost_driver)
-			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+		uint64_t flag = flags;
+		if (get_async_flag_by_socketid(i) != 0)
+			flag |= RTE_VHOST_USER_ASYNC_COPY;
 
-		ret = rte_vhost_driver_register(file, flags);
+		ret = rte_vhost_driver_register(file, flag);
 		if (ret != 0) {
 			unregister_drivers(i);
 			rte_exit(EXIT_FAILURE,
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index 2c5a558f12..5af7e7d97f 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -51,7 +51,8 @@  struct vhost_dev {
 	uint64_t features;
 	size_t hdr_len;
 	uint16_t nr_vrings;
-	uint16_t pkts_inflight;
+	uint16_t pkts_enq_inflight;
+	uint16_t pkts_deq_inflight;
 	struct rte_vhost_memory *mem;
 	struct device_statistics stats;
 	TAILQ_ENTRY(vhost_dev) global_vdev_entry;
@@ -112,4 +113,7 @@  uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
 			struct rte_mbuf **pkts, uint16_t count);
 uint16_t async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
 			 struct rte_mbuf **pkts, uint32_t count);
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			struct rte_mempool *mbuf_pool,
+			struct rte_mbuf **pkts, uint16_t count);
 #endif /* _MAIN_H_ */