diff mbox series

[v6,5/5] examples/vhost: support async dequeue data path

Message ID 20220513025058.12898-6-xuan.ding@intel.com (mailing list archive)
State Superseded
Delegated to: Maxime Coquelin
Headers show
Series vhost: support async dequeue data path | expand

Checks

Context Check Description
ci/intel-Testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

Ding, Xuan May 13, 2022, 2:50 a.m. UTC
From: Xuan Ding <xuan.ding@intel.com>

This patch adds the use case for async dequeue API. Vswitch can
leverage DMA device to accelerate vhost async dequeue path.

Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
Signed-off-by: Xuan Ding <xuan.ding@intel.com>
Tested-by: Yvonne Yang <yvonnex.yang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 doc/guides/sample_app_ug/vhost.rst |   9 +-
 examples/vhost/main.c              | 284 ++++++++++++++++++++---------
 examples/vhost/main.h              |  32 +++-
 examples/vhost/virtio_net.c        |  16 +-
 4 files changed, 243 insertions(+), 98 deletions(-)

Comments

Xia, Chenbo May 13, 2022, 3:27 a.m. UTC | #1
> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Friday, May 13, 2022 10:51 AM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>;
> liangma@liangbit.com; Ding, Xuan <xuan.ding@intel.com>; Ma, WenwuX
> <wenwux.ma@intel.com>; Wang, YuanX <yuanx.wang@intel.com>
> Subject: [PATCH v6 5/5] examples/vhost: support async dequeue data path
> 
> From: Xuan Ding <xuan.ding@intel.com>
> 
> This patch adds the use case for async dequeue API. Vswitch can
> leverage DMA device to accelerate vhost async dequeue path.
> 
> Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> Tested-by: Yvonne Yang <yvonnex.yang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  doc/guides/sample_app_ug/vhost.rst |   9 +-
>  examples/vhost/main.c              | 284 ++++++++++++++++++++---------
>  examples/vhost/main.h              |  32 +++-
>  examples/vhost/virtio_net.c        |  16 +-
>  4 files changed, 243 insertions(+), 98 deletions(-)
> 
> diff --git a/doc/guides/sample_app_ug/vhost.rst
> b/doc/guides/sample_app_ug/vhost.rst
> index a6ce4bc8ac..09db965e70 100644
> --- a/doc/guides/sample_app_ug/vhost.rst
> +++ b/doc/guides/sample_app_ug/vhost.rst
> @@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's
> used in combination with dmas
>  **--dmas**
>  This parameter is used to specify the assigned DMA device of a vhost
> device.
>  Async vhost-user net driver will be used if --dmas is set. For example
> ---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for
> vhost
> -device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
> -enqueue operation.
> +--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
> +DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
> +and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
> +operation. The index of the device corresponds to the socket file in
> order,
> +that means vhost device 0 is created through the first socket file, vhost
> +device 1 is created through the second socket file, and so on.
> 
>  Common Issues
>  -------------
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index c4d46de1c5..d070391727 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -63,6 +63,9 @@
> 
>  #define DMA_RING_SIZE 4096
> 
> +#define ASYNC_ENQUEUE_VHOST 1
> +#define ASYNC_DEQUEUE_VHOST 2
> +
>  /* number of mbufs in all pools - if specified on command-line. */
>  static int total_num_mbufs = NUM_MBUFS_DEFAULT;
> 
> @@ -116,6 +119,8 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
>  static char *socket_files;
>  static int nb_sockets;
> 
> +static struct vhost_queue_ops vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
> +
>  /* empty VMDq configuration structure. Filled in programmatically */
>  static struct rte_eth_conf vmdq_conf_default = {
>  	.rxmode = {
> @@ -205,6 +210,18 @@ struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE *
> RTE_MAX_VHOST_DEVICE];
>  #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
>  				 / US_PER_S * BURST_TX_DRAIN_US)
> 
> +static int vid2socketid[RTE_MAX_VHOST_DEVICE];
> +
> +static uint32_t get_async_flag_by_socketid(int socketid)
> +{
> +	return dma_bind[socketid].async_flag;
> +}
> +
> +static void init_vid2socketid_array(int vid, int socketid)
> +{
> +	vid2socketid[vid] = socketid;
> +}

Return value and func name should be on separate lines as per coding style.
And above func can be inline, same suggestion for short func below, especially
ones in data path.

Thanks,
Chenbo

> +
>  static inline bool
>  is_dma_configured(int16_t dev_id)
>  {
> @@ -224,7 +241,7 @@ open_dma(const char *value)
>  	char *addrs = input;
>  	char *ptrs[2];
>  	char *start, *end, *substr;
> -	int64_t vid;
> +	int64_t socketid, vring_id;
> 
>  	struct rte_dma_info info;
>  	struct rte_dma_conf dev_config = { .nb_vchans = 1 };
> @@ -262,7 +279,9 @@ open_dma(const char *value)
> 
>  	while (i < args_nr) {
>  		char *arg_temp = dma_arg[i];
> +		char *txd, *rxd;
>  		uint8_t sub_nr;
> +		int async_flag;
> 
>  		sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2,
> '@');
>  		if (sub_nr != 2) {
> @@ -270,14 +289,23 @@ open_dma(const char *value)
>  			goto out;
>  		}
> 
> -		start = strstr(ptrs[0], "txd");
> -		if (start == NULL) {
> +		txd = strstr(ptrs[0], "txd");
> +		rxd = strstr(ptrs[0], "rxd");
> +		if (txd) {
> +			start = txd;
> +			vring_id = VIRTIO_RXQ;
> +			async_flag = ASYNC_ENQUEUE_VHOST;
> +		} else if (rxd) {
> +			start = rxd;
> +			vring_id = VIRTIO_TXQ;
> +			async_flag = ASYNC_DEQUEUE_VHOST;
> +		} else {
>  			ret = -1;
>  			goto out;
>  		}
> 
>  		start += 3;
> -		vid = strtol(start, &end, 0);
> +		socketid = strtol(start, &end, 0);
>  		if (end == start) {
>  			ret = -1;
>  			goto out;
> @@ -338,7 +366,8 @@ open_dma(const char *value)
>  		dmas_id[dma_count++] = dev_id;
> 
>  done:
> -		(dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id;
> +		(dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
> +		(dma_info + socketid)->async_flag |= async_flag;
>  		i++;
>  	}
>  out:
> @@ -990,7 +1019,7 @@ complete_async_pkts(struct vhost_dev *vdev)
>  {
>  	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
>  	uint16_t complete_count;
> -	int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> +	int16_t dma_id = dma_bind[vid2socketid[vdev-
> >vid]].dmas[VIRTIO_RXQ].dev_id;
> 
>  	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
>  					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST, dma_id, 0);
> @@ -1029,22 +1058,7 @@ drain_vhost(struct vhost_dev *vdev)
>  	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
>  	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
> 
> -	if (builtin_net_driver) {
> -		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> -	} else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> -		uint16_t enqueue_fail = 0;
> -		int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> -
> -		complete_async_pkts(vdev);
> -		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m,
> nr_xmit, dma_id, 0);
> -
> -		enqueue_fail = nr_xmit - ret;
> -		if (enqueue_fail)
> -			free_pkts(&m[ret], nr_xmit - ret);
> -	} else {
> -		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> -						m, nr_xmit);
> -	}
> +	ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev, VIRTIO_RXQ,
> m, nr_xmit);
> 
>  	if (enable_stats) {
>  		__atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
> @@ -1053,7 +1067,7 @@ drain_vhost(struct vhost_dev *vdev)
>  				__ATOMIC_SEQ_CST);
>  	}
> 
> -	if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> +	if (!dma_bind[vid2socketid[vdev-
> >vid]].dmas[VIRTIO_RXQ].async_enabled)
>  		free_pkts(m, nr_xmit);
>  }
> 
> @@ -1325,6 +1339,32 @@ drain_mbuf_table(struct mbuf_table *tx_q)
>  	}
>  }
> 
> +uint16_t
> +async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint32_t rx_count)
> +{
> +	uint16_t enqueue_count;
> +	uint16_t enqueue_fail = 0;
> +	uint16_t dma_id = dma_bind[vid2socketid[dev-
> >vid]].dmas[VIRTIO_RXQ].dev_id;
> +
> +	complete_async_pkts(dev);
> +	enqueue_count = rte_vhost_submit_enqueue_burst(dev->vid, queue_id,
> +					pkts, rx_count, dma_id, 0);
> +
> +	enqueue_fail = rx_count - enqueue_count;
> +	if (enqueue_fail)
> +		free_pkts(&pkts[enqueue_count], enqueue_fail);
> +
> +	return enqueue_count;
> +}
> +
> +uint16_t
> +sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint32_t rx_count)
> +{
> +	return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, rx_count);
> +}
> +
>  static __rte_always_inline void
>  drain_eth_rx(struct vhost_dev *vdev)
>  {
> @@ -1355,25 +1395,8 @@ drain_eth_rx(struct vhost_dev *vdev)
>  		}
>  	}
> 
> -	if (builtin_net_driver) {
> -		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
> -						pkts, rx_count);
> -	} else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> -		uint16_t enqueue_fail = 0;
> -		int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> -
> -		complete_async_pkts(vdev);
> -		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> -					VIRTIO_RXQ, pkts, rx_count, dma_id, 0);
> -
> -		enqueue_fail = rx_count - enqueue_count;
> -		if (enqueue_fail)
> -			free_pkts(&pkts[enqueue_count], enqueue_fail);
> -
> -	} else {
> -		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> -						pkts, rx_count);
> -	}
> +	enqueue_count = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
> +					VIRTIO_RXQ, pkts, rx_count);
> 
>  	if (enable_stats) {
>  		__atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
> @@ -1382,10 +1405,31 @@ drain_eth_rx(struct vhost_dev *vdev)
>  				__ATOMIC_SEQ_CST);
>  	}
> 
> -	if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> +	if (!dma_bind[vid2socketid[vdev-
> >vid]].dmas[VIRTIO_RXQ].async_enabled)
>  		free_pkts(pkts, rx_count);
>  }
> 
> +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			    struct rte_mempool *mbuf_pool,
> +			    struct rte_mbuf **pkts, uint16_t count)
> +{
> +	int nr_inflight;
> +	uint16_t dequeue_count;
> +	uint16_t dma_id = dma_bind[vid2socketid[dev-
> >vid]].dmas[VIRTIO_TXQ].dev_id;
> +
> +	dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
> +			mbuf_pool, pkts, count, &nr_inflight, dma_id, 0);
> +
> +	return dequeue_count;
> +}
> +
> +uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			   struct rte_mempool *mbuf_pool,
> +			   struct rte_mbuf **pkts, uint16_t count)
> +{
> +	return rte_vhost_dequeue_burst(dev->vid, queue_id, mbuf_pool, pkts,
> count);
> +}
> +
>  static __rte_always_inline void
>  drain_virtio_tx(struct vhost_dev *vdev)
>  {
> @@ -1393,13 +1437,8 @@ drain_virtio_tx(struct vhost_dev *vdev)
>  	uint16_t count;
>  	uint16_t i;
> 
> -	if (builtin_net_driver) {
> -		count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
> -					pkts, MAX_PKT_BURST);
> -	} else {
> -		count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
> -					mbuf_pool, pkts, MAX_PKT_BURST);
> -	}
> +	count = vdev_queue_ops[vdev->vid].dequeue_pkt_burst(vdev,
> +				VIRTIO_TXQ, mbuf_pool, pkts, MAX_PKT_BURST);
> 
>  	/* setup VMDq for the first packet */
>  	if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
> @@ -1478,6 +1517,26 @@ switch_worker(void *arg __rte_unused)
>  	return 0;
>  }
> 
> +static void
> +vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t queue_id)
> +{
> +	uint16_t n_pkt = 0;
> +	int pkts_inflight;
> +
> +	int16_t dma_id = dma_bind[vid2socketid[vdev-
> >vid]].dmas[queue_id].dev_id;
> +	pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> queue_id);
> +
> +	struct rte_mbuf *m_cpl[pkts_inflight];
> +
> +	while (pkts_inflight) {
> +		n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid,
> queue_id, m_cpl,
> +							pkts_inflight, dma_id, 0);
> +		free_pkts(m_cpl, n_pkt);
> +		pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> +									queue_id);
> +	}
> +}
> +
>  /*
>   * Remove a device from the specific data core linked list and from the
>   * main linked list. Synchronization  occurs through the use of the
> @@ -1535,27 +1594,79 @@ destroy_device(int vid)
>  		vdev->vid);
> 
>  	if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
> -		uint16_t n_pkt = 0;
> -		int pkts_inflight;
> -		int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> -		pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
> VIRTIO_RXQ);
> -		struct rte_mbuf *m_cpl[pkts_inflight];
> -
> -		while (pkts_inflight) {
> -			n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> VIRTIO_RXQ,
> -						m_cpl, pkts_inflight, dma_id, 0);
> -			free_pkts(m_cpl, n_pkt);
> -			pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid,
> -										VIRTIO_RXQ);
> -		}
> -
> +		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ);
>  		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
>  		dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
>  	}
> 
> +	if (dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled) {
> +		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ);
> +		rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
> +		dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled = false;
> +	}
> +
>  	rte_free(vdev);
>  }
> 
> +static int
> +get_socketid_by_vid(int vid)
> +{
> +	int i;
> +	char ifname[PATH_MAX];
> +	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
> +
> +	for (i = 0; i < nb_sockets; i++) {
> +		char *file = socket_files + i * PATH_MAX;
> +		if (strcmp(file, ifname) == 0)
> +			return i;
> +	}
> +
> +	return -1;
> +}
> +
> +static int
> +init_vhost_queue_ops(int vid)
> +{
> +	if (builtin_net_driver) {
> +		vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
> +		vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
> +	} else {
> +		if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled)
> +			vdev_queue_ops[vid].enqueue_pkt_burst =
> async_enqueue_pkts;
> +		else
> +			vdev_queue_ops[vid].enqueue_pkt_burst =
> sync_enqueue_pkts;
> +
> +		if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled)
> +			vdev_queue_ops[vid].dequeue_pkt_burst =
> async_dequeue_pkts;
> +		else
> +			vdev_queue_ops[vid].dequeue_pkt_burst =
> sync_dequeue_pkts;
> +	}
> +
> +	return 0;
> +}
> +
> +static int
> +vhost_async_channel_register(int vid)
> +{
> +	int rx_ret = 0, tx_ret = 0;
> +
> +	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].dev_id !=
> INVALID_DMA_ID) {
> +		rx_ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> +		if (rx_ret == 0)
> +
> 	dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled = true;
> +	}
> +
> +	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].dev_id !=
> INVALID_DMA_ID) {
> +		tx_ret = rte_vhost_async_channel_register(vid, VIRTIO_TXQ);
> +		if (tx_ret == 0)
> +
> 	dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled = true;
> +	}
> +
> +	return rx_ret | tx_ret;
> +}
> +
> +
> +
>  /*
>   * A new device is added to a data core. First the device is added to the
> main linked list
>   * and then allocated to a specific data core.
> @@ -1567,6 +1678,8 @@ new_device(int vid)
>  	uint16_t i;
>  	uint32_t device_num_min = num_devices;
>  	struct vhost_dev *vdev;
> +	int ret;
> +
>  	vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> RTE_CACHE_LINE_SIZE);
>  	if (vdev == NULL) {
>  		RTE_LOG(INFO, VHOST_DATA,
> @@ -1589,6 +1702,17 @@ new_device(int vid)
>  		}
>  	}
> 
> +	int socketid = get_socketid_by_vid(vid);
> +	if (socketid == -1)
> +		return -1;
> +
> +	init_vid2socketid_array(vid, socketid);
> +
> +	ret =  vhost_async_channel_register(vid);
> +
> +	if (init_vhost_queue_ops(vid) != 0)
> +		return -1;
> +
>  	if (builtin_net_driver)
>  		vs_vhost_net_setup(vdev);
> 
> @@ -1620,16 +1744,7 @@ new_device(int vid)
>  		"(%d) device has been added to data core %d\n",
>  		vid, vdev->coreid);
> 
> -	if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
> -		int ret;
> -
> -		ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> -		if (ret == 0)
> -			dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = true;
> -		return ret;
> -	}
> -
> -	return 0;
> +	return ret;
>  }
> 
>  static int
> @@ -1647,22 +1762,9 @@ vring_state_changed(int vid, uint16_t queue_id, int
> enable)
>  	if (queue_id != VIRTIO_RXQ)
>  		return 0;
> 
> -	if (dma_bind[vid].dmas[queue_id].async_enabled) {
> -		if (!enable) {
> -			uint16_t n_pkt = 0;
> -			int pkts_inflight;
> -			pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
> -			int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> -			struct rte_mbuf *m_cpl[pkts_inflight];
> -
> -			while (pkts_inflight) {
> -				n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> queue_id,
> -							m_cpl, pkts_inflight, dma_id, 0);
> -				free_pkts(m_cpl, n_pkt);
> -				pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid,
> -
> 	queue_id);
> -			}
> -		}
> +	if (dma_bind[vid2socketid[vid]].dmas[queue_id].async_enabled) {
> +		if (!enable)
> +			vhost_clear_queue_thread_unsafe(vdev, queue_id);
>  	}
> 
>  	return 0;
> @@ -1887,7 +1989,7 @@ main(int argc, char *argv[])
>  	for (i = 0; i < nb_sockets; i++) {
>  		char *file = socket_files + i * PATH_MAX;
> 
> -		if (dma_count)
> +		if (dma_count && get_async_flag_by_socketid(i) != 0)
>  			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> 
>  		ret = rte_vhost_driver_register(file, flags);
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h
> index e7f395c3c9..2fcb8376c5 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -61,6 +61,19 @@ struct vhost_dev {
>  	struct vhost_queue queues[MAX_QUEUE_PAIRS * 2];
>  } __rte_cache_aligned;
> 
> +typedef uint16_t (*vhost_enqueue_burst_t)(struct vhost_dev *dev,
> +			uint16_t queue_id, struct rte_mbuf **pkts,
> +			uint32_t count);
> +
> +typedef uint16_t (*vhost_dequeue_burst_t)(struct vhost_dev *dev,
> +			uint16_t queue_id, struct rte_mempool *mbuf_pool,
> +			struct rte_mbuf **pkts, uint16_t count);
> +
> +struct vhost_queue_ops {
> +	vhost_enqueue_burst_t enqueue_pkt_burst;
> +	vhost_dequeue_burst_t dequeue_pkt_burst;
> +};
> +
>  TAILQ_HEAD(vhost_dev_tailq_list, vhost_dev);
> 
> 
> @@ -87,6 +100,7 @@ struct dma_info {
> 
>  struct dma_for_vhost {
>  	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> +	uint32_t async_flag;
>  };
> 
>  /* we implement non-extra virtio net features */
> @@ -97,7 +111,19 @@ void vs_vhost_net_remove(struct vhost_dev *dev);
>  uint16_t vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
>  			 struct rte_mbuf **pkts, uint32_t count);
> 
> -uint16_t vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> -			 struct rte_mempool *mbuf_pool,
> -			 struct rte_mbuf **pkts, uint16_t count);
> +uint16_t builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			struct rte_mbuf **pkts, uint32_t count);
> +uint16_t builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			struct rte_mempool *mbuf_pool,
> +			struct rte_mbuf **pkts, uint16_t count);
> +uint16_t sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			 struct rte_mbuf **pkts, uint32_t count);
> +uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			struct rte_mempool *mbuf_pool,
> +			struct rte_mbuf **pkts, uint16_t count);
> +uint16_t async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			 struct rte_mbuf **pkts, uint32_t count);
> +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +			struct rte_mempool *mbuf_pool,
> +			struct rte_mbuf **pkts, uint16_t count);
>  #endif /* _MAIN_H_ */
> diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c
> index 9064fc3a82..2432a96566 100644
> --- a/examples/vhost/virtio_net.c
> +++ b/examples/vhost/virtio_net.c
> @@ -238,6 +238,13 @@ vs_enqueue_pkts(struct vhost_dev *dev, uint16_t
> queue_id,
>  	return count;
>  }
> 
> +uint16_t
> +builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +		struct rte_mbuf **pkts, uint32_t count)
> +{
> +	return vs_enqueue_pkts(dev, queue_id, pkts, count);
> +}
> +
>  static __rte_always_inline int
>  dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
>  	    struct rte_mbuf *m, uint16_t desc_idx,
> @@ -363,7 +370,7 @@ dequeue_pkt(struct vhost_dev *dev, struct
> rte_vhost_vring *vr,
>  	return 0;
>  }
> 
> -uint16_t
> +static uint16_t
>  vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
>  	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> count)
>  {
> @@ -440,3 +447,10 @@ vs_dequeue_pkts(struct vhost_dev *dev, uint16_t
> queue_id,
> 
>  	return i;
>  }
> +
> +uint16_t
> +builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> +	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> count)
> +{
> +	return vs_dequeue_pkts(dev, queue_id, mbuf_pool, pkts, count);
> +}
> --
> 2.17.1
Ding, Xuan May 13, 2022, 3:51 a.m. UTC | #2
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, May 13, 2022 11:27 AM
> To: Ding, Xuan <xuan.ding@intel.com>; maxime.coquelin@redhat.com
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>;
> liangma@liangbit.com; Ma, WenwuX <wenwux.ma@intel.com>; Wang,
> YuanX <yuanx.wang@intel.com>
> Subject: RE: [PATCH v6 5/5] examples/vhost: support async dequeue data
> path
> 
> > -----Original Message-----
> > From: Ding, Xuan <xuan.ding@intel.com>
> > Sent: Friday, May 13, 2022 10:51 AM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> > <cheng1.jiang@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>;
> > liangma@liangbit.com; Ding, Xuan <xuan.ding@intel.com>; Ma, WenwuX
> > <wenwux.ma@intel.com>; Wang, YuanX <yuanx.wang@intel.com>
> > Subject: [PATCH v6 5/5] examples/vhost: support async dequeue data
> > path
> >
> > From: Xuan Ding <xuan.ding@intel.com>
> >
> > This patch adds the use case for async dequeue API. Vswitch can
> > leverage DMA device to accelerate vhost async dequeue path.
> >
> > Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
> > Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> > Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > Tested-by: Yvonne Yang <yvonnex.yang@intel.com>
> > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> >  doc/guides/sample_app_ug/vhost.rst |   9 +-
> >  examples/vhost/main.c              | 284 ++++++++++++++++++++---------
> >  examples/vhost/main.h              |  32 +++-
> >  examples/vhost/virtio_net.c        |  16 +-
> >  4 files changed, 243 insertions(+), 98 deletions(-)
> >
> > diff --git a/doc/guides/sample_app_ug/vhost.rst
> > b/doc/guides/sample_app_ug/vhost.rst
> > index a6ce4bc8ac..09db965e70 100644
> > --- a/doc/guides/sample_app_ug/vhost.rst
> > +++ b/doc/guides/sample_app_ug/vhost.rst
> > @@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs.
> > It's used in combination with dmas
> >  **--dmas**
> >  This parameter is used to specify the assigned DMA device of a vhost
> > device.
> >  Async vhost-user net driver will be used if --dmas is set. For
> > example ---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel
> > 00:04.0 for vhost -device 0 enqueue operation and use DMA channel
> > 00:04.1 for vhost device 1 -enqueue operation.
> > +--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means
> > +use DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue
> > +operation and use DMA channel 00:04.1/00:04.3 for vhost device 1
> > +enqueue/dequeue operation. The index of the device corresponds to the
> > +socket file in
> > order,
> > +that means vhost device 0 is created through the first socket file,
> > +vhost device 1 is created through the second socket file, and so on.
> >
> >  Common Issues
> >  -------------
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > c4d46de1c5..d070391727 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -63,6 +63,9 @@
> >
> >  #define DMA_RING_SIZE 4096
> >
> > +#define ASYNC_ENQUEUE_VHOST 1
> > +#define ASYNC_DEQUEUE_VHOST 2
> > +
> >  /* number of mbufs in all pools - if specified on command-line. */
> > static int total_num_mbufs = NUM_MBUFS_DEFAULT;
> >
> > @@ -116,6 +119,8 @@ static uint32_t burst_rx_retry_num =
> > BURST_RX_RETRIES;  static char *socket_files;  static int nb_sockets;
> >
> > +static struct vhost_queue_ops
> vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
> > +
> >  /* empty VMDq configuration structure. Filled in programmatically */
> > static struct rte_eth_conf vmdq_conf_default = {
> >  	.rxmode = {
> > @@ -205,6 +210,18 @@ struct vhost_bufftable
> > *vhost_txbuff[RTE_MAX_LCORE * RTE_MAX_VHOST_DEVICE];
> >  #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
> >  				 / US_PER_S * BURST_TX_DRAIN_US)
> >
> > +static int vid2socketid[RTE_MAX_VHOST_DEVICE];
> > +
> > +static uint32_t get_async_flag_by_socketid(int socketid) {
> > +	return dma_bind[socketid].async_flag; }
> > +
> > +static void init_vid2socketid_array(int vid, int socketid) {
> > +	vid2socketid[vid] = socketid;
> > +}
> 
> Return value and func name should be on separate lines as per coding style.
> And above func can be inline, same suggestion for short func below,
> especially ones in data path.

Thanks Chenbo, will fix it in next version.

Regards,
Xuan

> 
> Thanks,
> Chenbo
> 
> > +
> >  static inline bool
> >  is_dma_configured(int16_t dev_id)
> >  {
> > @@ -224,7 +241,7 @@ open_dma(const char *value)
> >  	char *addrs = input;
> >  	char *ptrs[2];
> >  	char *start, *end, *substr;
> > -	int64_t vid;
> > +	int64_t socketid, vring_id;
> >
> >  	struct rte_dma_info info;
> >  	struct rte_dma_conf dev_config = { .nb_vchans = 1 }; @@ -262,7
> > +279,9 @@ open_dma(const char *value)
> >
> >  	while (i < args_nr) {
> >  		char *arg_temp = dma_arg[i];
> > +		char *txd, *rxd;
> >  		uint8_t sub_nr;
> > +		int async_flag;
> >
> >  		sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
> >  		if (sub_nr != 2) {
> > @@ -270,14 +289,23 @@ open_dma(const char *value)
> >  			goto out;
> >  		}
> >
> > -		start = strstr(ptrs[0], "txd");
> > -		if (start == NULL) {
> > +		txd = strstr(ptrs[0], "txd");
> > +		rxd = strstr(ptrs[0], "rxd");
> > +		if (txd) {
> > +			start = txd;
> > +			vring_id = VIRTIO_RXQ;
> > +			async_flag = ASYNC_ENQUEUE_VHOST;
> > +		} else if (rxd) {
> > +			start = rxd;
> > +			vring_id = VIRTIO_TXQ;
> > +			async_flag = ASYNC_DEQUEUE_VHOST;
> > +		} else {
> >  			ret = -1;
> >  			goto out;
> >  		}
> >
> >  		start += 3;
> > -		vid = strtol(start, &end, 0);
> > +		socketid = strtol(start, &end, 0);
> >  		if (end == start) {
> >  			ret = -1;
> >  			goto out;
> > @@ -338,7 +366,8 @@ open_dma(const char *value)
> >  		dmas_id[dma_count++] = dev_id;
> >
> >  done:
> > -		(dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id;
> > +		(dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
> > +		(dma_info + socketid)->async_flag |= async_flag;
> >  		i++;
> >  	}
> >  out:
> > @@ -990,7 +1019,7 @@ complete_async_pkts(struct vhost_dev *vdev)  {
> >  	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> >  	uint16_t complete_count;
> > -	int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> > +	int16_t dma_id = dma_bind[vid2socketid[vdev-
> > >vid]].dmas[VIRTIO_RXQ].dev_id;
> >
> >  	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> >  					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST,
> dma_id, 0); @@ -1029,22
> > +1058,7 @@ drain_vhost(struct vhost_dev *vdev)
> >  	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
> >  	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
> >
> > -	if (builtin_net_driver) {
> > -		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> > -	} else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> > -		uint16_t enqueue_fail = 0;
> > -		int16_t dma_id = dma_bind[vdev-
> >vid].dmas[VIRTIO_RXQ].dev_id;
> > -
> > -		complete_async_pkts(vdev);
> > -		ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ, m,
> > nr_xmit, dma_id, 0);
> > -
> > -		enqueue_fail = nr_xmit - ret;
> > -		if (enqueue_fail)
> > -			free_pkts(&m[ret], nr_xmit - ret);
> > -	} else {
> > -		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> > -						m, nr_xmit);
> > -	}
> > +	ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
> VIRTIO_RXQ,
> > m, nr_xmit);
> >
> >  	if (enable_stats) {
> >  		__atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
> @@
> > -1053,7 +1067,7 @@ drain_vhost(struct vhost_dev *vdev)
> >  				__ATOMIC_SEQ_CST);
> >  	}
> >
> > -	if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> > +	if (!dma_bind[vid2socketid[vdev-
> > >vid]].dmas[VIRTIO_RXQ].async_enabled)
> >  		free_pkts(m, nr_xmit);
> >  }
> >
> > @@ -1325,6 +1339,32 @@ drain_mbuf_table(struct mbuf_table *tx_q)
> >  	}
> >  }
> >
> > +uint16_t
> > +async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +		struct rte_mbuf **pkts, uint32_t rx_count) {
> > +	uint16_t enqueue_count;
> > +	uint16_t enqueue_fail = 0;
> > +	uint16_t dma_id = dma_bind[vid2socketid[dev-
> > >vid]].dmas[VIRTIO_RXQ].dev_id;
> > +
> > +	complete_async_pkts(dev);
> > +	enqueue_count = rte_vhost_submit_enqueue_burst(dev->vid,
> queue_id,
> > +					pkts, rx_count, dma_id, 0);
> > +
> > +	enqueue_fail = rx_count - enqueue_count;
> > +	if (enqueue_fail)
> > +		free_pkts(&pkts[enqueue_count], enqueue_fail);
> > +
> > +	return enqueue_count;
> > +}
> > +
> > +uint16_t
> > +sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +		struct rte_mbuf **pkts, uint32_t rx_count) {
> > +	return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, rx_count);
> > +}
> > +
> >  static __rte_always_inline void
> >  drain_eth_rx(struct vhost_dev *vdev)
> >  {
> > @@ -1355,25 +1395,8 @@ drain_eth_rx(struct vhost_dev *vdev)
> >  		}
> >  	}
> >
> > -	if (builtin_net_driver) {
> > -		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
> > -						pkts, rx_count);
> > -	} else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> > -		uint16_t enqueue_fail = 0;
> > -		int16_t dma_id = dma_bind[vdev-
> >vid].dmas[VIRTIO_RXQ].dev_id;
> > -
> > -		complete_async_pkts(vdev);
> > -		enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> > -					VIRTIO_RXQ, pkts, rx_count, dma_id,
> 0);
> > -
> > -		enqueue_fail = rx_count - enqueue_count;
> > -		if (enqueue_fail)
> > -			free_pkts(&pkts[enqueue_count], enqueue_fail);
> > -
> > -	} else {
> > -		enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> > -						pkts, rx_count);
> > -	}
> > +	enqueue_count = vdev_queue_ops[vdev-
> >vid].enqueue_pkt_burst(vdev,
> > +					VIRTIO_RXQ, pkts, rx_count);
> >
> >  	if (enable_stats) {
> >  		__atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
> @@
> > -1382,10 +1405,31 @@ drain_eth_rx(struct vhost_dev *vdev)
> >  				__ATOMIC_SEQ_CST);
> >  	}
> >
> > -	if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> > +	if (!dma_bind[vid2socketid[vdev-
> > >vid]].dmas[VIRTIO_RXQ].async_enabled)
> >  		free_pkts(pkts, rx_count);
> >  }
> >
> > +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			    struct rte_mempool *mbuf_pool,
> > +			    struct rte_mbuf **pkts, uint16_t count) {
> > +	int nr_inflight;
> > +	uint16_t dequeue_count;
> > +	uint16_t dma_id = dma_bind[vid2socketid[dev-
> > >vid]].dmas[VIRTIO_TXQ].dev_id;
> > +
> > +	dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid,
> queue_id,
> > +			mbuf_pool, pkts, count, &nr_inflight, dma_id, 0);
> > +
> > +	return dequeue_count;
> > +}
> > +
> > +uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			   struct rte_mempool *mbuf_pool,
> > +			   struct rte_mbuf **pkts, uint16_t count) {
> > +	return rte_vhost_dequeue_burst(dev->vid, queue_id, mbuf_pool,
> pkts,
> > count);
> > +}
> > +
> >  static __rte_always_inline void
> >  drain_virtio_tx(struct vhost_dev *vdev)  { @@ -1393,13 +1437,8 @@
> > drain_virtio_tx(struct vhost_dev *vdev)
> >  	uint16_t count;
> >  	uint16_t i;
> >
> > -	if (builtin_net_driver) {
> > -		count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
> > -					pkts, MAX_PKT_BURST);
> > -	} else {
> > -		count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
> > -					mbuf_pool, pkts, MAX_PKT_BURST);
> > -	}
> > +	count = vdev_queue_ops[vdev->vid].dequeue_pkt_burst(vdev,
> > +				VIRTIO_TXQ, mbuf_pool, pkts,
> MAX_PKT_BURST);
> >
> >  	/* setup VMDq for the first packet */
> >  	if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count)
> { @@
> > -1478,6 +1517,26 @@ switch_worker(void *arg __rte_unused)
> >  	return 0;
> >  }
> >
> > +static void
> > +vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t
> > +queue_id) {
> > +	uint16_t n_pkt = 0;
> > +	int pkts_inflight;
> > +
> > +	int16_t dma_id = dma_bind[vid2socketid[vdev-
> > >vid]].dmas[queue_id].dev_id;
> > +	pkts_inflight =
> > +rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> > queue_id);
> > +
> > +	struct rte_mbuf *m_cpl[pkts_inflight];
> > +
> > +	while (pkts_inflight) {
> > +		n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid,
> > queue_id, m_cpl,
> > +							pkts_inflight, dma_id,
> 0);
> > +		free_pkts(m_cpl, n_pkt);
> > +		pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> > +
> 	queue_id);
> > +	}
> > +}
> > +
> >  /*
> >   * Remove a device from the specific data core linked list and from the
> >   * main linked list. Synchronization  occurs through the use of the
> > @@ -1535,27 +1594,79 @@ destroy_device(int vid)
> >  		vdev->vid);
> >
> >  	if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
> > -		uint16_t n_pkt = 0;
> > -		int pkts_inflight;
> > -		int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> > -		pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid,
> > VIRTIO_RXQ);
> > -		struct rte_mbuf *m_cpl[pkts_inflight];
> > -
> > -		while (pkts_inflight) {
> > -			n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> > VIRTIO_RXQ,
> > -						m_cpl, pkts_inflight, dma_id,
> 0);
> > -			free_pkts(m_cpl, n_pkt);
> > -			pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vid,
> > -
> 	VIRTIO_RXQ);
> > -		}
> > -
> > +		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ);
> >  		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> >  		dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
> >  	}
> >
> > +	if (dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled) {
> > +		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ);
> > +		rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
> > +		dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled = false;
> > +	}
> > +
> >  	rte_free(vdev);
> >  }
> >
> > +static int
> > +get_socketid_by_vid(int vid)
> > +{
> > +	int i;
> > +	char ifname[PATH_MAX];
> > +	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
> > +
> > +	for (i = 0; i < nb_sockets; i++) {
> > +		char *file = socket_files + i * PATH_MAX;
> > +		if (strcmp(file, ifname) == 0)
> > +			return i;
> > +	}
> > +
> > +	return -1;
> > +}
> > +
> > +static int
> > +init_vhost_queue_ops(int vid)
> > +{
> > +	if (builtin_net_driver) {
> > +		vdev_queue_ops[vid].enqueue_pkt_burst =
> builtin_enqueue_pkts;
> > +		vdev_queue_ops[vid].dequeue_pkt_burst =
> builtin_dequeue_pkts;
> > +	} else {
> > +		if
> (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled)
> > +			vdev_queue_ops[vid].enqueue_pkt_burst =
> > async_enqueue_pkts;
> > +		else
> > +			vdev_queue_ops[vid].enqueue_pkt_burst =
> > sync_enqueue_pkts;
> > +
> > +		if
> (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled)
> > +			vdev_queue_ops[vid].dequeue_pkt_burst =
> > async_dequeue_pkts;
> > +		else
> > +			vdev_queue_ops[vid].dequeue_pkt_burst =
> > sync_dequeue_pkts;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> > +vhost_async_channel_register(int vid) {
> > +	int rx_ret = 0, tx_ret = 0;
> > +
> > +	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].dev_id !=
> > INVALID_DMA_ID) {
> > +		rx_ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> > +		if (rx_ret == 0)
> > +
> > 	dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled =
> true;
> > +	}
> > +
> > +	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].dev_id !=
> > INVALID_DMA_ID) {
> > +		tx_ret = rte_vhost_async_channel_register(vid, VIRTIO_TXQ);
> > +		if (tx_ret == 0)
> > +
> > 	dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled =
> true;
> > +	}
> > +
> > +	return rx_ret | tx_ret;
> > +}
> > +
> > +
> > +
> >  /*
> >   * A new device is added to a data core. First the device is added to
> > the main linked list
> >   * and then allocated to a specific data core.
> > @@ -1567,6 +1678,8 @@ new_device(int vid)
> >  	uint16_t i;
> >  	uint32_t device_num_min = num_devices;
> >  	struct vhost_dev *vdev;
> > +	int ret;
> > +
> >  	vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> > RTE_CACHE_LINE_SIZE);
> >  	if (vdev == NULL) {
> >  		RTE_LOG(INFO, VHOST_DATA,
> > @@ -1589,6 +1702,17 @@ new_device(int vid)
> >  		}
> >  	}
> >
> > +	int socketid = get_socketid_by_vid(vid);
> > +	if (socketid == -1)
> > +		return -1;
> > +
> > +	init_vid2socketid_array(vid, socketid);
> > +
> > +	ret =  vhost_async_channel_register(vid);
> > +
> > +	if (init_vhost_queue_ops(vid) != 0)
> > +		return -1;
> > +
> >  	if (builtin_net_driver)
> >  		vs_vhost_net_setup(vdev);
> >
> > @@ -1620,16 +1744,7 @@ new_device(int vid)
> >  		"(%d) device has been added to data core %d\n",
> >  		vid, vdev->coreid);
> >
> > -	if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
> > -		int ret;
> > -
> > -		ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> > -		if (ret == 0)
> > -			dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled =
> true;
> > -		return ret;
> > -	}
> > -
> > -	return 0;
> > +	return ret;
> >  }
> >
> >  static int
> > @@ -1647,22 +1762,9 @@ vring_state_changed(int vid, uint16_t queue_id,
> > int
> > enable)
> >  	if (queue_id != VIRTIO_RXQ)
> >  		return 0;
> >
> > -	if (dma_bind[vid].dmas[queue_id].async_enabled) {
> > -		if (!enable) {
> > -			uint16_t n_pkt = 0;
> > -			int pkts_inflight;
> > -			pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
> > -			int16_t dma_id =
> dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> > -			struct rte_mbuf *m_cpl[pkts_inflight];
> > -
> > -			while (pkts_inflight) {
> > -				n_pkt =
> rte_vhost_clear_queue_thread_unsafe(vid,
> > queue_id,
> > -							m_cpl, pkts_inflight,
> dma_id, 0);
> > -				free_pkts(m_cpl, n_pkt);
> > -				pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vid,
> > -
> > 	queue_id);
> > -			}
> > -		}
> > +	if (dma_bind[vid2socketid[vid]].dmas[queue_id].async_enabled) {
> > +		if (!enable)
> > +			vhost_clear_queue_thread_unsafe(vdev, queue_id);
> >  	}
> >
> >  	return 0;
> > @@ -1887,7 +1989,7 @@ main(int argc, char *argv[])
> >  	for (i = 0; i < nb_sockets; i++) {
> >  		char *file = socket_files + i * PATH_MAX;
> >
> > -		if (dma_count)
> > +		if (dma_count && get_async_flag_by_socketid(i) != 0)
> >  			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> >
> >  		ret = rte_vhost_driver_register(file, flags); diff --git
> > a/examples/vhost/main.h b/examples/vhost/main.h index
> > e7f395c3c9..2fcb8376c5 100644
> > --- a/examples/vhost/main.h
> > +++ b/examples/vhost/main.h
> > @@ -61,6 +61,19 @@ struct vhost_dev {
> >  	struct vhost_queue queues[MAX_QUEUE_PAIRS * 2];  }
> > __rte_cache_aligned;
> >
> > +typedef uint16_t (*vhost_enqueue_burst_t)(struct vhost_dev *dev,
> > +			uint16_t queue_id, struct rte_mbuf **pkts,
> > +			uint32_t count);
> > +
> > +typedef uint16_t (*vhost_dequeue_burst_t)(struct vhost_dev *dev,
> > +			uint16_t queue_id, struct rte_mempool *mbuf_pool,
> > +			struct rte_mbuf **pkts, uint16_t count);
> > +
> > +struct vhost_queue_ops {
> > +	vhost_enqueue_burst_t enqueue_pkt_burst;
> > +	vhost_dequeue_burst_t dequeue_pkt_burst; };
> > +
> >  TAILQ_HEAD(vhost_dev_tailq_list, vhost_dev);
> >
> >
> > @@ -87,6 +100,7 @@ struct dma_info {
> >
> >  struct dma_for_vhost {
> >  	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> > +	uint32_t async_flag;
> >  };
> >
> >  /* we implement non-extra virtio net features */ @@ -97,7 +111,19 @@
> > void vs_vhost_net_remove(struct vhost_dev *dev);  uint16_t
> > vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> >  			 struct rte_mbuf **pkts, uint32_t count);
> >
> > -uint16_t vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > -			 struct rte_mempool *mbuf_pool,
> > -			 struct rte_mbuf **pkts, uint16_t count);
> > +uint16_t builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			struct rte_mbuf **pkts, uint32_t count); uint16_t
> > +builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			struct rte_mempool *mbuf_pool,
> > +			struct rte_mbuf **pkts, uint16_t count); uint16_t
> > +sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			 struct rte_mbuf **pkts, uint32_t count); uint16_t
> > +sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			struct rte_mempool *mbuf_pool,
> > +			struct rte_mbuf **pkts, uint16_t count); uint16_t
> > +async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			 struct rte_mbuf **pkts, uint32_t count); uint16_t
> > +async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +			struct rte_mempool *mbuf_pool,
> > +			struct rte_mbuf **pkts, uint16_t count);
> >  #endif /* _MAIN_H_ */
> > diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c
> > index 9064fc3a82..2432a96566 100644
> > --- a/examples/vhost/virtio_net.c
> > +++ b/examples/vhost/virtio_net.c
> > @@ -238,6 +238,13 @@ vs_enqueue_pkts(struct vhost_dev *dev, uint16_t
> > queue_id,
> >  	return count;
> >  }
> >
> > +uint16_t
> > +builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +		struct rte_mbuf **pkts, uint32_t count) {
> > +	return vs_enqueue_pkts(dev, queue_id, pkts, count); }
> > +
> >  static __rte_always_inline int
> >  dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
> >  	    struct rte_mbuf *m, uint16_t desc_idx, @@ -363,7 +370,7 @@
> > dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
> >  	return 0;
> >  }
> >
> > -uint16_t
> > +static uint16_t
> >  vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> >  	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> > count)
> >  {
> > @@ -440,3 +447,10 @@ vs_dequeue_pkts(struct vhost_dev *dev, uint16_t
> > queue_id,
> >
> >  	return i;
> >  }
> > +
> > +uint16_t
> > +builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > +	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> > count)
> > +{
> > +	return vs_dequeue_pkts(dev, queue_id, mbuf_pool, pkts, count); }
> > --
> > 2.17.1
diff mbox series

Patch

diff --git a/doc/guides/sample_app_ug/vhost.rst b/doc/guides/sample_app_ug/vhost.rst
index a6ce4bc8ac..09db965e70 100644
--- a/doc/guides/sample_app_ug/vhost.rst
+++ b/doc/guides/sample_app_ug/vhost.rst
@@ -169,9 +169,12 @@  demonstrates how to use the async vhost APIs. It's used in combination with dmas
 **--dmas**
 This parameter is used to specify the assigned DMA device of a vhost device.
 Async vhost-user net driver will be used if --dmas is set. For example
---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost
-device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
-enqueue operation.
+--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
+DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
+and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
+operation. The index of the device corresponds to the socket file in order,
+that means vhost device 0 is created through the first socket file, vhost
+device 1 is created through the second socket file, and so on.
 
 Common Issues
 -------------
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
index c4d46de1c5..d070391727 100644
--- a/examples/vhost/main.c
+++ b/examples/vhost/main.c
@@ -63,6 +63,9 @@ 
 
 #define DMA_RING_SIZE 4096
 
+#define ASYNC_ENQUEUE_VHOST 1
+#define ASYNC_DEQUEUE_VHOST 2
+
 /* number of mbufs in all pools - if specified on command-line. */
 static int total_num_mbufs = NUM_MBUFS_DEFAULT;
 
@@ -116,6 +119,8 @@  static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
 static char *socket_files;
 static int nb_sockets;
 
+static struct vhost_queue_ops vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
+
 /* empty VMDq configuration structure. Filled in programmatically */
 static struct rte_eth_conf vmdq_conf_default = {
 	.rxmode = {
@@ -205,6 +210,18 @@  struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * RTE_MAX_VHOST_DEVICE];
 #define MBUF_TABLE_DRAIN_TSC	((rte_get_tsc_hz() + US_PER_S - 1) \
 				 / US_PER_S * BURST_TX_DRAIN_US)
 
+static int vid2socketid[RTE_MAX_VHOST_DEVICE];
+
+static uint32_t get_async_flag_by_socketid(int socketid)
+{
+	return dma_bind[socketid].async_flag;
+}
+
+static void init_vid2socketid_array(int vid, int socketid)
+{
+	vid2socketid[vid] = socketid;
+}
+
 static inline bool
 is_dma_configured(int16_t dev_id)
 {
@@ -224,7 +241,7 @@  open_dma(const char *value)
 	char *addrs = input;
 	char *ptrs[2];
 	char *start, *end, *substr;
-	int64_t vid;
+	int64_t socketid, vring_id;
 
 	struct rte_dma_info info;
 	struct rte_dma_conf dev_config = { .nb_vchans = 1 };
@@ -262,7 +279,9 @@  open_dma(const char *value)
 
 	while (i < args_nr) {
 		char *arg_temp = dma_arg[i];
+		char *txd, *rxd;
 		uint8_t sub_nr;
+		int async_flag;
 
 		sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
 		if (sub_nr != 2) {
@@ -270,14 +289,23 @@  open_dma(const char *value)
 			goto out;
 		}
 
-		start = strstr(ptrs[0], "txd");
-		if (start == NULL) {
+		txd = strstr(ptrs[0], "txd");
+		rxd = strstr(ptrs[0], "rxd");
+		if (txd) {
+			start = txd;
+			vring_id = VIRTIO_RXQ;
+			async_flag = ASYNC_ENQUEUE_VHOST;
+		} else if (rxd) {
+			start = rxd;
+			vring_id = VIRTIO_TXQ;
+			async_flag = ASYNC_DEQUEUE_VHOST;
+		} else {
 			ret = -1;
 			goto out;
 		}
 
 		start += 3;
-		vid = strtol(start, &end, 0);
+		socketid = strtol(start, &end, 0);
 		if (end == start) {
 			ret = -1;
 			goto out;
@@ -338,7 +366,8 @@  open_dma(const char *value)
 		dmas_id[dma_count++] = dev_id;
 
 done:
-		(dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id;
+		(dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
+		(dma_info + socketid)->async_flag |= async_flag;
 		i++;
 	}
 out:
@@ -990,7 +1019,7 @@  complete_async_pkts(struct vhost_dev *vdev)
 {
 	struct rte_mbuf *p_cpl[MAX_PKT_BURST];
 	uint16_t complete_count;
-	int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
+	int16_t dma_id = dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].dev_id;
 
 	complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
 					VIRTIO_RXQ, p_cpl, MAX_PKT_BURST, dma_id, 0);
@@ -1029,22 +1058,7 @@  drain_vhost(struct vhost_dev *vdev)
 	uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
 	struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
 
-	if (builtin_net_driver) {
-		ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
-	} else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
-		uint16_t enqueue_fail = 0;
-		int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
-
-		complete_async_pkts(vdev);
-		ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, nr_xmit, dma_id, 0);
-
-		enqueue_fail = nr_xmit - ret;
-		if (enqueue_fail)
-			free_pkts(&m[ret], nr_xmit - ret);
-	} else {
-		ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
-						m, nr_xmit);
-	}
+	ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev, VIRTIO_RXQ, m, nr_xmit);
 
 	if (enable_stats) {
 		__atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
@@ -1053,7 +1067,7 @@  drain_vhost(struct vhost_dev *vdev)
 				__ATOMIC_SEQ_CST);
 	}
 
-	if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
+	if (!dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].async_enabled)
 		free_pkts(m, nr_xmit);
 }
 
@@ -1325,6 +1339,32 @@  drain_mbuf_table(struct mbuf_table *tx_q)
 	}
 }
 
+uint16_t
+async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint32_t rx_count)
+{
+	uint16_t enqueue_count;
+	uint16_t enqueue_fail = 0;
+	uint16_t dma_id = dma_bind[vid2socketid[dev->vid]].dmas[VIRTIO_RXQ].dev_id;
+
+	complete_async_pkts(dev);
+	enqueue_count = rte_vhost_submit_enqueue_burst(dev->vid, queue_id,
+					pkts, rx_count, dma_id, 0);
+
+	enqueue_fail = rx_count - enqueue_count;
+	if (enqueue_fail)
+		free_pkts(&pkts[enqueue_count], enqueue_fail);
+
+	return enqueue_count;
+}
+
+uint16_t
+sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint32_t rx_count)
+{
+	return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, rx_count);
+}
+
 static __rte_always_inline void
 drain_eth_rx(struct vhost_dev *vdev)
 {
@@ -1355,25 +1395,8 @@  drain_eth_rx(struct vhost_dev *vdev)
 		}
 	}
 
-	if (builtin_net_driver) {
-		enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
-						pkts, rx_count);
-	} else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
-		uint16_t enqueue_fail = 0;
-		int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
-
-		complete_async_pkts(vdev);
-		enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
-					VIRTIO_RXQ, pkts, rx_count, dma_id, 0);
-
-		enqueue_fail = rx_count - enqueue_count;
-		if (enqueue_fail)
-			free_pkts(&pkts[enqueue_count], enqueue_fail);
-
-	} else {
-		enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
-						pkts, rx_count);
-	}
+	enqueue_count = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
+					VIRTIO_RXQ, pkts, rx_count);
 
 	if (enable_stats) {
 		__atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
@@ -1382,10 +1405,31 @@  drain_eth_rx(struct vhost_dev *vdev)
 				__ATOMIC_SEQ_CST);
 	}
 
-	if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
+	if (!dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].async_enabled)
 		free_pkts(pkts, rx_count);
 }
 
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			    struct rte_mempool *mbuf_pool,
+			    struct rte_mbuf **pkts, uint16_t count)
+{
+	int nr_inflight;
+	uint16_t dequeue_count;
+	uint16_t dma_id = dma_bind[vid2socketid[dev->vid]].dmas[VIRTIO_TXQ].dev_id;
+
+	dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
+			mbuf_pool, pkts, count, &nr_inflight, dma_id, 0);
+
+	return dequeue_count;
+}
+
+uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			   struct rte_mempool *mbuf_pool,
+			   struct rte_mbuf **pkts, uint16_t count)
+{
+	return rte_vhost_dequeue_burst(dev->vid, queue_id, mbuf_pool, pkts, count);
+}
+
 static __rte_always_inline void
 drain_virtio_tx(struct vhost_dev *vdev)
 {
@@ -1393,13 +1437,8 @@  drain_virtio_tx(struct vhost_dev *vdev)
 	uint16_t count;
 	uint16_t i;
 
-	if (builtin_net_driver) {
-		count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
-					pkts, MAX_PKT_BURST);
-	} else {
-		count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
-					mbuf_pool, pkts, MAX_PKT_BURST);
-	}
+	count = vdev_queue_ops[vdev->vid].dequeue_pkt_burst(vdev,
+				VIRTIO_TXQ, mbuf_pool, pkts, MAX_PKT_BURST);
 
 	/* setup VMDq for the first packet */
 	if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
@@ -1478,6 +1517,26 @@  switch_worker(void *arg __rte_unused)
 	return 0;
 }
 
+static void
+vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t queue_id)
+{
+	uint16_t n_pkt = 0;
+	int pkts_inflight;
+
+	int16_t dma_id = dma_bind[vid2socketid[vdev->vid]].dmas[queue_id].dev_id;
+	pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid, queue_id);
+
+	struct rte_mbuf *m_cpl[pkts_inflight];
+
+	while (pkts_inflight) {
+		n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid, queue_id, m_cpl,
+							pkts_inflight, dma_id, 0);
+		free_pkts(m_cpl, n_pkt);
+		pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
+									queue_id);
+	}
+}
+
 /*
  * Remove a device from the specific data core linked list and from the
  * main linked list. Synchronization  occurs through the use of the
@@ -1535,27 +1594,79 @@  destroy_device(int vid)
 		vdev->vid);
 
 	if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
-		uint16_t n_pkt = 0;
-		int pkts_inflight;
-		int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
-		pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid, VIRTIO_RXQ);
-		struct rte_mbuf *m_cpl[pkts_inflight];
-
-		while (pkts_inflight) {
-			n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, VIRTIO_RXQ,
-						m_cpl, pkts_inflight, dma_id, 0);
-			free_pkts(m_cpl, n_pkt);
-			pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
-										VIRTIO_RXQ);
-		}
-
+		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ);
 		rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
 		dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
 	}
 
+	if (dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled) {
+		vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ);
+		rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
+		dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled = false;
+	}
+
 	rte_free(vdev);
 }
 
+static int
+get_socketid_by_vid(int vid)
+{
+	int i;
+	char ifname[PATH_MAX];
+	rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+
+	for (i = 0; i < nb_sockets; i++) {
+		char *file = socket_files + i * PATH_MAX;
+		if (strcmp(file, ifname) == 0)
+			return i;
+	}
+
+	return -1;
+}
+
+static int
+init_vhost_queue_ops(int vid)
+{
+	if (builtin_net_driver) {
+		vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
+		vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
+	} else {
+		if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled)
+			vdev_queue_ops[vid].enqueue_pkt_burst = async_enqueue_pkts;
+		else
+			vdev_queue_ops[vid].enqueue_pkt_burst = sync_enqueue_pkts;
+
+		if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled)
+			vdev_queue_ops[vid].dequeue_pkt_burst = async_dequeue_pkts;
+		else
+			vdev_queue_ops[vid].dequeue_pkt_burst = sync_dequeue_pkts;
+	}
+
+	return 0;
+}
+
+static int
+vhost_async_channel_register(int vid)
+{
+	int rx_ret = 0, tx_ret = 0;
+
+	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
+		rx_ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
+		if (rx_ret == 0)
+			dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled = true;
+	}
+
+	if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].dev_id != INVALID_DMA_ID) {
+		tx_ret = rte_vhost_async_channel_register(vid, VIRTIO_TXQ);
+		if (tx_ret == 0)
+			dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled = true;
+	}
+
+	return rx_ret | tx_ret;
+}
+
+
+
 /*
  * A new device is added to a data core. First the device is added to the main linked list
  * and then allocated to a specific data core.
@@ -1567,6 +1678,8 @@  new_device(int vid)
 	uint16_t i;
 	uint32_t device_num_min = num_devices;
 	struct vhost_dev *vdev;
+	int ret;
+
 	vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
 	if (vdev == NULL) {
 		RTE_LOG(INFO, VHOST_DATA,
@@ -1589,6 +1702,17 @@  new_device(int vid)
 		}
 	}
 
+	int socketid = get_socketid_by_vid(vid);
+	if (socketid == -1)
+		return -1;
+
+	init_vid2socketid_array(vid, socketid);
+
+	ret =  vhost_async_channel_register(vid);
+
+	if (init_vhost_queue_ops(vid) != 0)
+		return -1;
+
 	if (builtin_net_driver)
 		vs_vhost_net_setup(vdev);
 
@@ -1620,16 +1744,7 @@  new_device(int vid)
 		"(%d) device has been added to data core %d\n",
 		vid, vdev->coreid);
 
-	if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
-		int ret;
-
-		ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
-		if (ret == 0)
-			dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = true;
-		return ret;
-	}
-
-	return 0;
+	return ret;
 }
 
 static int
@@ -1647,22 +1762,9 @@  vring_state_changed(int vid, uint16_t queue_id, int enable)
 	if (queue_id != VIRTIO_RXQ)
 		return 0;
 
-	if (dma_bind[vid].dmas[queue_id].async_enabled) {
-		if (!enable) {
-			uint16_t n_pkt = 0;
-			int pkts_inflight;
-			pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
-			int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
-			struct rte_mbuf *m_cpl[pkts_inflight];
-
-			while (pkts_inflight) {
-				n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
-							m_cpl, pkts_inflight, dma_id, 0);
-				free_pkts(m_cpl, n_pkt);
-				pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
-											queue_id);
-			}
-		}
+	if (dma_bind[vid2socketid[vid]].dmas[queue_id].async_enabled) {
+		if (!enable)
+			vhost_clear_queue_thread_unsafe(vdev, queue_id);
 	}
 
 	return 0;
@@ -1887,7 +1989,7 @@  main(int argc, char *argv[])
 	for (i = 0; i < nb_sockets; i++) {
 		char *file = socket_files + i * PATH_MAX;
 
-		if (dma_count)
+		if (dma_count && get_async_flag_by_socketid(i) != 0)
 			flags = flags | RTE_VHOST_USER_ASYNC_COPY;
 
 		ret = rte_vhost_driver_register(file, flags);
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
index e7f395c3c9..2fcb8376c5 100644
--- a/examples/vhost/main.h
+++ b/examples/vhost/main.h
@@ -61,6 +61,19 @@  struct vhost_dev {
 	struct vhost_queue queues[MAX_QUEUE_PAIRS * 2];
 } __rte_cache_aligned;
 
+typedef uint16_t (*vhost_enqueue_burst_t)(struct vhost_dev *dev,
+			uint16_t queue_id, struct rte_mbuf **pkts,
+			uint32_t count);
+
+typedef uint16_t (*vhost_dequeue_burst_t)(struct vhost_dev *dev,
+			uint16_t queue_id, struct rte_mempool *mbuf_pool,
+			struct rte_mbuf **pkts, uint16_t count);
+
+struct vhost_queue_ops {
+	vhost_enqueue_burst_t enqueue_pkt_burst;
+	vhost_dequeue_burst_t dequeue_pkt_burst;
+};
+
 TAILQ_HEAD(vhost_dev_tailq_list, vhost_dev);
 
 
@@ -87,6 +100,7 @@  struct dma_info {
 
 struct dma_for_vhost {
 	struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
+	uint32_t async_flag;
 };
 
 /* we implement non-extra virtio net features */
@@ -97,7 +111,19 @@  void vs_vhost_net_remove(struct vhost_dev *dev);
 uint16_t vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
 			 struct rte_mbuf **pkts, uint32_t count);
 
-uint16_t vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
-			 struct rte_mempool *mbuf_pool,
-			 struct rte_mbuf **pkts, uint16_t count);
+uint16_t builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			struct rte_mbuf **pkts, uint32_t count);
+uint16_t builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			struct rte_mempool *mbuf_pool,
+			struct rte_mbuf **pkts, uint16_t count);
+uint16_t sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			 struct rte_mbuf **pkts, uint32_t count);
+uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			struct rte_mempool *mbuf_pool,
+			struct rte_mbuf **pkts, uint16_t count);
+uint16_t async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			 struct rte_mbuf **pkts, uint32_t count);
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+			struct rte_mempool *mbuf_pool,
+			struct rte_mbuf **pkts, uint16_t count);
 #endif /* _MAIN_H_ */
diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c
index 9064fc3a82..2432a96566 100644
--- a/examples/vhost/virtio_net.c
+++ b/examples/vhost/virtio_net.c
@@ -238,6 +238,13 @@  vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
 	return count;
 }
 
+uint16_t
+builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+		struct rte_mbuf **pkts, uint32_t count)
+{
+	return vs_enqueue_pkts(dev, queue_id, pkts, count);
+}
+
 static __rte_always_inline int
 dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
 	    struct rte_mbuf *m, uint16_t desc_idx,
@@ -363,7 +370,7 @@  dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
 	return 0;
 }
 
-uint16_t
+static uint16_t
 vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
 	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
 {
@@ -440,3 +447,10 @@  vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
 
 	return i;
 }
+
+uint16_t
+builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+	struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+	return vs_dequeue_pkts(dev, queue_id, mbuf_pool, pkts, count);
+}