[v6,5/5] examples/vhost: support async dequeue data path
Checks
Commit Message
From: Xuan Ding <xuan.ding@intel.com>
This patch adds the use case for async dequeue API. Vswitch can
leverage DMA device to accelerate vhost async dequeue path.
Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
Signed-off-by: Xuan Ding <xuan.ding@intel.com>
Tested-by: Yvonne Yang <yvonnex.yang@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
doc/guides/sample_app_ug/vhost.rst | 9 +-
examples/vhost/main.c | 284 ++++++++++++++++++++---------
examples/vhost/main.h | 32 +++-
examples/vhost/virtio_net.c | 16 +-
4 files changed, 243 insertions(+), 98 deletions(-)
Comments
> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Friday, May 13, 2022 10:51 AM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>;
> liangma@liangbit.com; Ding, Xuan <xuan.ding@intel.com>; Ma, WenwuX
> <wenwux.ma@intel.com>; Wang, YuanX <yuanx.wang@intel.com>
> Subject: [PATCH v6 5/5] examples/vhost: support async dequeue data path
>
> From: Xuan Ding <xuan.ding@intel.com>
>
> This patch adds the use case for async dequeue API. Vswitch can
> leverage DMA device to accelerate vhost async dequeue path.
>
> Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> Tested-by: Yvonne Yang <yvonnex.yang@intel.com>
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
> doc/guides/sample_app_ug/vhost.rst | 9 +-
> examples/vhost/main.c | 284 ++++++++++++++++++++---------
> examples/vhost/main.h | 32 +++-
> examples/vhost/virtio_net.c | 16 +-
> 4 files changed, 243 insertions(+), 98 deletions(-)
>
> diff --git a/doc/guides/sample_app_ug/vhost.rst
> b/doc/guides/sample_app_ug/vhost.rst
> index a6ce4bc8ac..09db965e70 100644
> --- a/doc/guides/sample_app_ug/vhost.rst
> +++ b/doc/guides/sample_app_ug/vhost.rst
> @@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's
> used in combination with dmas
> **--dmas**
> This parameter is used to specify the assigned DMA device of a vhost
> device.
> Async vhost-user net driver will be used if --dmas is set. For example
> ---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for
> vhost
> -device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
> -enqueue operation.
> +--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
> +DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
> +and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
> +operation. The index of the device corresponds to the socket file in
> order,
> +that means vhost device 0 is created through the first socket file, vhost
> +device 1 is created through the second socket file, and so on.
>
> Common Issues
> -------------
> diff --git a/examples/vhost/main.c b/examples/vhost/main.c
> index c4d46de1c5..d070391727 100644
> --- a/examples/vhost/main.c
> +++ b/examples/vhost/main.c
> @@ -63,6 +63,9 @@
>
> #define DMA_RING_SIZE 4096
>
> +#define ASYNC_ENQUEUE_VHOST 1
> +#define ASYNC_DEQUEUE_VHOST 2
> +
> /* number of mbufs in all pools - if specified on command-line. */
> static int total_num_mbufs = NUM_MBUFS_DEFAULT;
>
> @@ -116,6 +119,8 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
> static char *socket_files;
> static int nb_sockets;
>
> +static struct vhost_queue_ops vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
> +
> /* empty VMDq configuration structure. Filled in programmatically */
> static struct rte_eth_conf vmdq_conf_default = {
> .rxmode = {
> @@ -205,6 +210,18 @@ struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE *
> RTE_MAX_VHOST_DEVICE];
> #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
> / US_PER_S * BURST_TX_DRAIN_US)
>
> +static int vid2socketid[RTE_MAX_VHOST_DEVICE];
> +
> +static uint32_t get_async_flag_by_socketid(int socketid)
> +{
> + return dma_bind[socketid].async_flag;
> +}
> +
> +static void init_vid2socketid_array(int vid, int socketid)
> +{
> + vid2socketid[vid] = socketid;
> +}
Return value and func name should be on separate lines as per coding style.
And above func can be inline, same suggestion for short func below, especially
ones in data path.
Thanks,
Chenbo
> +
> static inline bool
> is_dma_configured(int16_t dev_id)
> {
> @@ -224,7 +241,7 @@ open_dma(const char *value)
> char *addrs = input;
> char *ptrs[2];
> char *start, *end, *substr;
> - int64_t vid;
> + int64_t socketid, vring_id;
>
> struct rte_dma_info info;
> struct rte_dma_conf dev_config = { .nb_vchans = 1 };
> @@ -262,7 +279,9 @@ open_dma(const char *value)
>
> while (i < args_nr) {
> char *arg_temp = dma_arg[i];
> + char *txd, *rxd;
> uint8_t sub_nr;
> + int async_flag;
>
> sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2,
> '@');
> if (sub_nr != 2) {
> @@ -270,14 +289,23 @@ open_dma(const char *value)
> goto out;
> }
>
> - start = strstr(ptrs[0], "txd");
> - if (start == NULL) {
> + txd = strstr(ptrs[0], "txd");
> + rxd = strstr(ptrs[0], "rxd");
> + if (txd) {
> + start = txd;
> + vring_id = VIRTIO_RXQ;
> + async_flag = ASYNC_ENQUEUE_VHOST;
> + } else if (rxd) {
> + start = rxd;
> + vring_id = VIRTIO_TXQ;
> + async_flag = ASYNC_DEQUEUE_VHOST;
> + } else {
> ret = -1;
> goto out;
> }
>
> start += 3;
> - vid = strtol(start, &end, 0);
> + socketid = strtol(start, &end, 0);
> if (end == start) {
> ret = -1;
> goto out;
> @@ -338,7 +366,8 @@ open_dma(const char *value)
> dmas_id[dma_count++] = dev_id;
>
> done:
> - (dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id;
> + (dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
> + (dma_info + socketid)->async_flag |= async_flag;
> i++;
> }
> out:
> @@ -990,7 +1019,7 @@ complete_async_pkts(struct vhost_dev *vdev)
> {
> struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> uint16_t complete_count;
> - int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> + int16_t dma_id = dma_bind[vid2socketid[vdev-
> >vid]].dmas[VIRTIO_RXQ].dev_id;
>
> complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> VIRTIO_RXQ, p_cpl, MAX_PKT_BURST, dma_id, 0);
> @@ -1029,22 +1058,7 @@ drain_vhost(struct vhost_dev *vdev)
> uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
> struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
>
> - if (builtin_net_driver) {
> - ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> - } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> - uint16_t enqueue_fail = 0;
> - int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> -
> - complete_async_pkts(vdev);
> - ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m,
> nr_xmit, dma_id, 0);
> -
> - enqueue_fail = nr_xmit - ret;
> - if (enqueue_fail)
> - free_pkts(&m[ret], nr_xmit - ret);
> - } else {
> - ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> - m, nr_xmit);
> - }
> + ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev, VIRTIO_RXQ,
> m, nr_xmit);
>
> if (enable_stats) {
> __atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
> @@ -1053,7 +1067,7 @@ drain_vhost(struct vhost_dev *vdev)
> __ATOMIC_SEQ_CST);
> }
>
> - if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> + if (!dma_bind[vid2socketid[vdev-
> >vid]].dmas[VIRTIO_RXQ].async_enabled)
> free_pkts(m, nr_xmit);
> }
>
> @@ -1325,6 +1339,32 @@ drain_mbuf_table(struct mbuf_table *tx_q)
> }
> }
>
> +uint16_t
> +async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t rx_count)
> +{
> + uint16_t enqueue_count;
> + uint16_t enqueue_fail = 0;
> + uint16_t dma_id = dma_bind[vid2socketid[dev-
> >vid]].dmas[VIRTIO_RXQ].dev_id;
> +
> + complete_async_pkts(dev);
> + enqueue_count = rte_vhost_submit_enqueue_burst(dev->vid, queue_id,
> + pkts, rx_count, dma_id, 0);
> +
> + enqueue_fail = rx_count - enqueue_count;
> + if (enqueue_fail)
> + free_pkts(&pkts[enqueue_count], enqueue_fail);
> +
> + return enqueue_count;
> +}
> +
> +uint16_t
> +sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t rx_count)
> +{
> + return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, rx_count);
> +}
> +
> static __rte_always_inline void
> drain_eth_rx(struct vhost_dev *vdev)
> {
> @@ -1355,25 +1395,8 @@ drain_eth_rx(struct vhost_dev *vdev)
> }
> }
>
> - if (builtin_net_driver) {
> - enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
> - pkts, rx_count);
> - } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> - uint16_t enqueue_fail = 0;
> - int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> -
> - complete_async_pkts(vdev);
> - enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
> - VIRTIO_RXQ, pkts, rx_count, dma_id, 0);
> -
> - enqueue_fail = rx_count - enqueue_count;
> - if (enqueue_fail)
> - free_pkts(&pkts[enqueue_count], enqueue_fail);
> -
> - } else {
> - enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> - pkts, rx_count);
> - }
> + enqueue_count = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
> + VIRTIO_RXQ, pkts, rx_count);
>
> if (enable_stats) {
> __atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
> @@ -1382,10 +1405,31 @@ drain_eth_rx(struct vhost_dev *vdev)
> __ATOMIC_SEQ_CST);
> }
>
> - if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> + if (!dma_bind[vid2socketid[vdev-
> >vid]].dmas[VIRTIO_RXQ].async_enabled)
> free_pkts(pkts, rx_count);
> }
>
> +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool,
> + struct rte_mbuf **pkts, uint16_t count)
> +{
> + int nr_inflight;
> + uint16_t dequeue_count;
> + uint16_t dma_id = dma_bind[vid2socketid[dev-
> >vid]].dmas[VIRTIO_TXQ].dev_id;
> +
> + dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
> + mbuf_pool, pkts, count, &nr_inflight, dma_id, 0);
> +
> + return dequeue_count;
> +}
> +
> +uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool,
> + struct rte_mbuf **pkts, uint16_t count)
> +{
> + return rte_vhost_dequeue_burst(dev->vid, queue_id, mbuf_pool, pkts,
> count);
> +}
> +
> static __rte_always_inline void
> drain_virtio_tx(struct vhost_dev *vdev)
> {
> @@ -1393,13 +1437,8 @@ drain_virtio_tx(struct vhost_dev *vdev)
> uint16_t count;
> uint16_t i;
>
> - if (builtin_net_driver) {
> - count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
> - pkts, MAX_PKT_BURST);
> - } else {
> - count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
> - mbuf_pool, pkts, MAX_PKT_BURST);
> - }
> + count = vdev_queue_ops[vdev->vid].dequeue_pkt_burst(vdev,
> + VIRTIO_TXQ, mbuf_pool, pkts, MAX_PKT_BURST);
>
> /* setup VMDq for the first packet */
> if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
> @@ -1478,6 +1517,26 @@ switch_worker(void *arg __rte_unused)
> return 0;
> }
>
> +static void
> +vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t queue_id)
> +{
> + uint16_t n_pkt = 0;
> + int pkts_inflight;
> +
> + int16_t dma_id = dma_bind[vid2socketid[vdev-
> >vid]].dmas[queue_id].dev_id;
> + pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> queue_id);
> +
> + struct rte_mbuf *m_cpl[pkts_inflight];
> +
> + while (pkts_inflight) {
> + n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid,
> queue_id, m_cpl,
> + pkts_inflight, dma_id, 0);
> + free_pkts(m_cpl, n_pkt);
> + pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> + queue_id);
> + }
> +}
> +
> /*
> * Remove a device from the specific data core linked list and from the
> * main linked list. Synchronization occurs through the use of the
> @@ -1535,27 +1594,79 @@ destroy_device(int vid)
> vdev->vid);
>
> if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
> - uint16_t n_pkt = 0;
> - int pkts_inflight;
> - int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> - pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
> VIRTIO_RXQ);
> - struct rte_mbuf *m_cpl[pkts_inflight];
> -
> - while (pkts_inflight) {
> - n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> VIRTIO_RXQ,
> - m_cpl, pkts_inflight, dma_id, 0);
> - free_pkts(m_cpl, n_pkt);
> - pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid,
> - VIRTIO_RXQ);
> - }
> -
> + vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ);
> rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
> }
>
> + if (dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled) {
> + vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ);
> + rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
> + dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled = false;
> + }
> +
> rte_free(vdev);
> }
>
> +static int
> +get_socketid_by_vid(int vid)
> +{
> + int i;
> + char ifname[PATH_MAX];
> + rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
> +
> + for (i = 0; i < nb_sockets; i++) {
> + char *file = socket_files + i * PATH_MAX;
> + if (strcmp(file, ifname) == 0)
> + return i;
> + }
> +
> + return -1;
> +}
> +
> +static int
> +init_vhost_queue_ops(int vid)
> +{
> + if (builtin_net_driver) {
> + vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
> + vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
> + } else {
> + if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled)
> + vdev_queue_ops[vid].enqueue_pkt_burst =
> async_enqueue_pkts;
> + else
> + vdev_queue_ops[vid].enqueue_pkt_burst =
> sync_enqueue_pkts;
> +
> + if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled)
> + vdev_queue_ops[vid].dequeue_pkt_burst =
> async_dequeue_pkts;
> + else
> + vdev_queue_ops[vid].dequeue_pkt_burst =
> sync_dequeue_pkts;
> + }
> +
> + return 0;
> +}
> +
> +static int
> +vhost_async_channel_register(int vid)
> +{
> + int rx_ret = 0, tx_ret = 0;
> +
> + if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].dev_id !=
> INVALID_DMA_ID) {
> + rx_ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> + if (rx_ret == 0)
> +
> dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled = true;
> + }
> +
> + if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].dev_id !=
> INVALID_DMA_ID) {
> + tx_ret = rte_vhost_async_channel_register(vid, VIRTIO_TXQ);
> + if (tx_ret == 0)
> +
> dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled = true;
> + }
> +
> + return rx_ret | tx_ret;
> +}
> +
> +
> +
> /*
> * A new device is added to a data core. First the device is added to the
> main linked list
> * and then allocated to a specific data core.
> @@ -1567,6 +1678,8 @@ new_device(int vid)
> uint16_t i;
> uint32_t device_num_min = num_devices;
> struct vhost_dev *vdev;
> + int ret;
> +
> vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> RTE_CACHE_LINE_SIZE);
> if (vdev == NULL) {
> RTE_LOG(INFO, VHOST_DATA,
> @@ -1589,6 +1702,17 @@ new_device(int vid)
> }
> }
>
> + int socketid = get_socketid_by_vid(vid);
> + if (socketid == -1)
> + return -1;
> +
> + init_vid2socketid_array(vid, socketid);
> +
> + ret = vhost_async_channel_register(vid);
> +
> + if (init_vhost_queue_ops(vid) != 0)
> + return -1;
> +
> if (builtin_net_driver)
> vs_vhost_net_setup(vdev);
>
> @@ -1620,16 +1744,7 @@ new_device(int vid)
> "(%d) device has been added to data core %d\n",
> vid, vdev->coreid);
>
> - if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
> - int ret;
> -
> - ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> - if (ret == 0)
> - dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = true;
> - return ret;
> - }
> -
> - return 0;
> + return ret;
> }
>
> static int
> @@ -1647,22 +1762,9 @@ vring_state_changed(int vid, uint16_t queue_id, int
> enable)
> if (queue_id != VIRTIO_RXQ)
> return 0;
>
> - if (dma_bind[vid].dmas[queue_id].async_enabled) {
> - if (!enable) {
> - uint16_t n_pkt = 0;
> - int pkts_inflight;
> - pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
> - int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> - struct rte_mbuf *m_cpl[pkts_inflight];
> -
> - while (pkts_inflight) {
> - n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> queue_id,
> - m_cpl, pkts_inflight, dma_id, 0);
> - free_pkts(m_cpl, n_pkt);
> - pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid,
> -
> queue_id);
> - }
> - }
> + if (dma_bind[vid2socketid[vid]].dmas[queue_id].async_enabled) {
> + if (!enable)
> + vhost_clear_queue_thread_unsafe(vdev, queue_id);
> }
>
> return 0;
> @@ -1887,7 +1989,7 @@ main(int argc, char *argv[])
> for (i = 0; i < nb_sockets; i++) {
> char *file = socket_files + i * PATH_MAX;
>
> - if (dma_count)
> + if (dma_count && get_async_flag_by_socketid(i) != 0)
> flags = flags | RTE_VHOST_USER_ASYNC_COPY;
>
> ret = rte_vhost_driver_register(file, flags);
> diff --git a/examples/vhost/main.h b/examples/vhost/main.h
> index e7f395c3c9..2fcb8376c5 100644
> --- a/examples/vhost/main.h
> +++ b/examples/vhost/main.h
> @@ -61,6 +61,19 @@ struct vhost_dev {
> struct vhost_queue queues[MAX_QUEUE_PAIRS * 2];
> } __rte_cache_aligned;
>
> +typedef uint16_t (*vhost_enqueue_burst_t)(struct vhost_dev *dev,
> + uint16_t queue_id, struct rte_mbuf **pkts,
> + uint32_t count);
> +
> +typedef uint16_t (*vhost_dequeue_burst_t)(struct vhost_dev *dev,
> + uint16_t queue_id, struct rte_mempool *mbuf_pool,
> + struct rte_mbuf **pkts, uint16_t count);
> +
> +struct vhost_queue_ops {
> + vhost_enqueue_burst_t enqueue_pkt_burst;
> + vhost_dequeue_burst_t dequeue_pkt_burst;
> +};
> +
> TAILQ_HEAD(vhost_dev_tailq_list, vhost_dev);
>
>
> @@ -87,6 +100,7 @@ struct dma_info {
>
> struct dma_for_vhost {
> struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> + uint32_t async_flag;
> };
>
> /* we implement non-extra virtio net features */
> @@ -97,7 +111,19 @@ void vs_vhost_net_remove(struct vhost_dev *dev);
> uint16_t vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> struct rte_mbuf **pkts, uint32_t count);
>
> -uint16_t vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> - struct rte_mempool *mbuf_pool,
> - struct rte_mbuf **pkts, uint16_t count);
> +uint16_t builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t count);
> +uint16_t builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool,
> + struct rte_mbuf **pkts, uint16_t count);
> +uint16_t sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t count);
> +uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool,
> + struct rte_mbuf **pkts, uint16_t count);
> +uint16_t async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t count);
> +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool,
> + struct rte_mbuf **pkts, uint16_t count);
> #endif /* _MAIN_H_ */
> diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c
> index 9064fc3a82..2432a96566 100644
> --- a/examples/vhost/virtio_net.c
> +++ b/examples/vhost/virtio_net.c
> @@ -238,6 +238,13 @@ vs_enqueue_pkts(struct vhost_dev *dev, uint16_t
> queue_id,
> return count;
> }
>
> +uint16_t
> +builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mbuf **pkts, uint32_t count)
> +{
> + return vs_enqueue_pkts(dev, queue_id, pkts, count);
> +}
> +
> static __rte_always_inline int
> dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
> struct rte_mbuf *m, uint16_t desc_idx,
> @@ -363,7 +370,7 @@ dequeue_pkt(struct vhost_dev *dev, struct
> rte_vhost_vring *vr,
> return 0;
> }
>
> -uint16_t
> +static uint16_t
> vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> count)
> {
> @@ -440,3 +447,10 @@ vs_dequeue_pkts(struct vhost_dev *dev, uint16_t
> queue_id,
>
> return i;
> }
> +
> +uint16_t
> +builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> count)
> +{
> + return vs_dequeue_pkts(dev, queue_id, mbuf_pool, pkts, count);
> +}
> --
> 2.17.1
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, May 13, 2022 11:27 AM
> To: Ding, Xuan <xuan.ding@intel.com>; maxime.coquelin@redhat.com
> Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>;
> liangma@liangbit.com; Ma, WenwuX <wenwux.ma@intel.com>; Wang,
> YuanX <yuanx.wang@intel.com>
> Subject: RE: [PATCH v6 5/5] examples/vhost: support async dequeue data
> path
>
> > -----Original Message-----
> > From: Ding, Xuan <xuan.ding@intel.com>
> > Sent: Friday, May 13, 2022 10:51 AM
> > To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > Cc: dev@dpdk.org; Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> > <cheng1.jiang@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>;
> > liangma@liangbit.com; Ding, Xuan <xuan.ding@intel.com>; Ma, WenwuX
> > <wenwux.ma@intel.com>; Wang, YuanX <yuanx.wang@intel.com>
> > Subject: [PATCH v6 5/5] examples/vhost: support async dequeue data
> > path
> >
> > From: Xuan Ding <xuan.ding@intel.com>
> >
> > This patch adds the use case for async dequeue API. Vswitch can
> > leverage DMA device to accelerate vhost async dequeue path.
> >
> > Signed-off-by: Wenwu Ma <wenwux.ma@intel.com>
> > Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> > Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > Tested-by: Yvonne Yang <yvonnex.yang@intel.com>
> > Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> > ---
> > doc/guides/sample_app_ug/vhost.rst | 9 +-
> > examples/vhost/main.c | 284 ++++++++++++++++++++---------
> > examples/vhost/main.h | 32 +++-
> > examples/vhost/virtio_net.c | 16 +-
> > 4 files changed, 243 insertions(+), 98 deletions(-)
> >
> > diff --git a/doc/guides/sample_app_ug/vhost.rst
> > b/doc/guides/sample_app_ug/vhost.rst
> > index a6ce4bc8ac..09db965e70 100644
> > --- a/doc/guides/sample_app_ug/vhost.rst
> > +++ b/doc/guides/sample_app_ug/vhost.rst
> > @@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs.
> > It's used in combination with dmas
> > **--dmas**
> > This parameter is used to specify the assigned DMA device of a vhost
> > device.
> > Async vhost-user net driver will be used if --dmas is set. For
> > example ---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel
> > 00:04.0 for vhost -device 0 enqueue operation and use DMA channel
> > 00:04.1 for vhost device 1 -enqueue operation.
> > +--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means
> > +use DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue
> > +operation and use DMA channel 00:04.1/00:04.3 for vhost device 1
> > +enqueue/dequeue operation. The index of the device corresponds to the
> > +socket file in
> > order,
> > +that means vhost device 0 is created through the first socket file,
> > +vhost device 1 is created through the second socket file, and so on.
> >
> > Common Issues
> > -------------
> > diff --git a/examples/vhost/main.c b/examples/vhost/main.c index
> > c4d46de1c5..d070391727 100644
> > --- a/examples/vhost/main.c
> > +++ b/examples/vhost/main.c
> > @@ -63,6 +63,9 @@
> >
> > #define DMA_RING_SIZE 4096
> >
> > +#define ASYNC_ENQUEUE_VHOST 1
> > +#define ASYNC_DEQUEUE_VHOST 2
> > +
> > /* number of mbufs in all pools - if specified on command-line. */
> > static int total_num_mbufs = NUM_MBUFS_DEFAULT;
> >
> > @@ -116,6 +119,8 @@ static uint32_t burst_rx_retry_num =
> > BURST_RX_RETRIES; static char *socket_files; static int nb_sockets;
> >
> > +static struct vhost_queue_ops
> vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
> > +
> > /* empty VMDq configuration structure. Filled in programmatically */
> > static struct rte_eth_conf vmdq_conf_default = {
> > .rxmode = {
> > @@ -205,6 +210,18 @@ struct vhost_bufftable
> > *vhost_txbuff[RTE_MAX_LCORE * RTE_MAX_VHOST_DEVICE];
> > #define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
> > / US_PER_S * BURST_TX_DRAIN_US)
> >
> > +static int vid2socketid[RTE_MAX_VHOST_DEVICE];
> > +
> > +static uint32_t get_async_flag_by_socketid(int socketid) {
> > + return dma_bind[socketid].async_flag; }
> > +
> > +static void init_vid2socketid_array(int vid, int socketid) {
> > + vid2socketid[vid] = socketid;
> > +}
>
> Return value and func name should be on separate lines as per coding style.
> And above func can be inline, same suggestion for short func below,
> especially ones in data path.
Thanks Chenbo, will fix it in next version.
Regards,
Xuan
>
> Thanks,
> Chenbo
>
> > +
> > static inline bool
> > is_dma_configured(int16_t dev_id)
> > {
> > @@ -224,7 +241,7 @@ open_dma(const char *value)
> > char *addrs = input;
> > char *ptrs[2];
> > char *start, *end, *substr;
> > - int64_t vid;
> > + int64_t socketid, vring_id;
> >
> > struct rte_dma_info info;
> > struct rte_dma_conf dev_config = { .nb_vchans = 1 }; @@ -262,7
> > +279,9 @@ open_dma(const char *value)
> >
> > while (i < args_nr) {
> > char *arg_temp = dma_arg[i];
> > + char *txd, *rxd;
> > uint8_t sub_nr;
> > + int async_flag;
> >
> > sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
> > if (sub_nr != 2) {
> > @@ -270,14 +289,23 @@ open_dma(const char *value)
> > goto out;
> > }
> >
> > - start = strstr(ptrs[0], "txd");
> > - if (start == NULL) {
> > + txd = strstr(ptrs[0], "txd");
> > + rxd = strstr(ptrs[0], "rxd");
> > + if (txd) {
> > + start = txd;
> > + vring_id = VIRTIO_RXQ;
> > + async_flag = ASYNC_ENQUEUE_VHOST;
> > + } else if (rxd) {
> > + start = rxd;
> > + vring_id = VIRTIO_TXQ;
> > + async_flag = ASYNC_DEQUEUE_VHOST;
> > + } else {
> > ret = -1;
> > goto out;
> > }
> >
> > start += 3;
> > - vid = strtol(start, &end, 0);
> > + socketid = strtol(start, &end, 0);
> > if (end == start) {
> > ret = -1;
> > goto out;
> > @@ -338,7 +366,8 @@ open_dma(const char *value)
> > dmas_id[dma_count++] = dev_id;
> >
> > done:
> > - (dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id;
> > + (dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
> > + (dma_info + socketid)->async_flag |= async_flag;
> > i++;
> > }
> > out:
> > @@ -990,7 +1019,7 @@ complete_async_pkts(struct vhost_dev *vdev) {
> > struct rte_mbuf *p_cpl[MAX_PKT_BURST];
> > uint16_t complete_count;
> > - int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
> > + int16_t dma_id = dma_bind[vid2socketid[vdev-
> > >vid]].dmas[VIRTIO_RXQ].dev_id;
> >
> > complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
> > VIRTIO_RXQ, p_cpl, MAX_PKT_BURST,
> dma_id, 0); @@ -1029,22
> > +1058,7 @@ drain_vhost(struct vhost_dev *vdev)
> > uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
> > struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
> >
> > - if (builtin_net_driver) {
> > - ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
> > - } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> > - uint16_t enqueue_fail = 0;
> > - int16_t dma_id = dma_bind[vdev-
> >vid].dmas[VIRTIO_RXQ].dev_id;
> > -
> > - complete_async_pkts(vdev);
> > - ret = rte_vhost_submit_enqueue_burst(vdev->vid,
> VIRTIO_RXQ, m,
> > nr_xmit, dma_id, 0);
> > -
> > - enqueue_fail = nr_xmit - ret;
> > - if (enqueue_fail)
> > - free_pkts(&m[ret], nr_xmit - ret);
> > - } else {
> > - ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
> > - m, nr_xmit);
> > - }
> > + ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
> VIRTIO_RXQ,
> > m, nr_xmit);
> >
> > if (enable_stats) {
> > __atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
> @@
> > -1053,7 +1067,7 @@ drain_vhost(struct vhost_dev *vdev)
> > __ATOMIC_SEQ_CST);
> > }
> >
> > - if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> > + if (!dma_bind[vid2socketid[vdev-
> > >vid]].dmas[VIRTIO_RXQ].async_enabled)
> > free_pkts(m, nr_xmit);
> > }
> >
> > @@ -1325,6 +1339,32 @@ drain_mbuf_table(struct mbuf_table *tx_q)
> > }
> > }
> >
> > +uint16_t
> > +async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mbuf **pkts, uint32_t rx_count) {
> > + uint16_t enqueue_count;
> > + uint16_t enqueue_fail = 0;
> > + uint16_t dma_id = dma_bind[vid2socketid[dev-
> > >vid]].dmas[VIRTIO_RXQ].dev_id;
> > +
> > + complete_async_pkts(dev);
> > + enqueue_count = rte_vhost_submit_enqueue_burst(dev->vid,
> queue_id,
> > + pkts, rx_count, dma_id, 0);
> > +
> > + enqueue_fail = rx_count - enqueue_count;
> > + if (enqueue_fail)
> > + free_pkts(&pkts[enqueue_count], enqueue_fail);
> > +
> > + return enqueue_count;
> > +}
> > +
> > +uint16_t
> > +sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mbuf **pkts, uint32_t rx_count) {
> > + return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, rx_count);
> > +}
> > +
> > static __rte_always_inline void
> > drain_eth_rx(struct vhost_dev *vdev)
> > {
> > @@ -1355,25 +1395,8 @@ drain_eth_rx(struct vhost_dev *vdev)
> > }
> > }
> >
> > - if (builtin_net_driver) {
> > - enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
> > - pkts, rx_count);
> > - } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
> > - uint16_t enqueue_fail = 0;
> > - int16_t dma_id = dma_bind[vdev-
> >vid].dmas[VIRTIO_RXQ].dev_id;
> > -
> > - complete_async_pkts(vdev);
> > - enqueue_count = rte_vhost_submit_enqueue_burst(vdev-
> >vid,
> > - VIRTIO_RXQ, pkts, rx_count, dma_id,
> 0);
> > -
> > - enqueue_fail = rx_count - enqueue_count;
> > - if (enqueue_fail)
> > - free_pkts(&pkts[enqueue_count], enqueue_fail);
> > -
> > - } else {
> > - enqueue_count = rte_vhost_enqueue_burst(vdev->vid,
> VIRTIO_RXQ,
> > - pkts, rx_count);
> > - }
> > + enqueue_count = vdev_queue_ops[vdev-
> >vid].enqueue_pkt_burst(vdev,
> > + VIRTIO_RXQ, pkts, rx_count);
> >
> > if (enable_stats) {
> > __atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
> @@
> > -1382,10 +1405,31 @@ drain_eth_rx(struct vhost_dev *vdev)
> > __ATOMIC_SEQ_CST);
> > }
> >
> > - if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
> > + if (!dma_bind[vid2socketid[vdev-
> > >vid]].dmas[VIRTIO_RXQ].async_enabled)
> > free_pkts(pkts, rx_count);
> > }
> >
> > +uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mempool *mbuf_pool,
> > + struct rte_mbuf **pkts, uint16_t count) {
> > + int nr_inflight;
> > + uint16_t dequeue_count;
> > + uint16_t dma_id = dma_bind[vid2socketid[dev-
> > >vid]].dmas[VIRTIO_TXQ].dev_id;
> > +
> > + dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid,
> queue_id,
> > + mbuf_pool, pkts, count, &nr_inflight, dma_id, 0);
> > +
> > + return dequeue_count;
> > +}
> > +
> > +uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mempool *mbuf_pool,
> > + struct rte_mbuf **pkts, uint16_t count) {
> > + return rte_vhost_dequeue_burst(dev->vid, queue_id, mbuf_pool,
> pkts,
> > count);
> > +}
> > +
> > static __rte_always_inline void
> > drain_virtio_tx(struct vhost_dev *vdev) { @@ -1393,13 +1437,8 @@
> > drain_virtio_tx(struct vhost_dev *vdev)
> > uint16_t count;
> > uint16_t i;
> >
> > - if (builtin_net_driver) {
> > - count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
> > - pkts, MAX_PKT_BURST);
> > - } else {
> > - count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
> > - mbuf_pool, pkts, MAX_PKT_BURST);
> > - }
> > + count = vdev_queue_ops[vdev->vid].dequeue_pkt_burst(vdev,
> > + VIRTIO_TXQ, mbuf_pool, pkts,
> MAX_PKT_BURST);
> >
> > /* setup VMDq for the first packet */
> > if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count)
> { @@
> > -1478,6 +1517,26 @@ switch_worker(void *arg __rte_unused)
> > return 0;
> > }
> >
> > +static void
> > +vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t
> > +queue_id) {
> > + uint16_t n_pkt = 0;
> > + int pkts_inflight;
> > +
> > + int16_t dma_id = dma_bind[vid2socketid[vdev-
> > >vid]].dmas[queue_id].dev_id;
> > + pkts_inflight =
> > +rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> > queue_id);
> > +
> > + struct rte_mbuf *m_cpl[pkts_inflight];
> > +
> > + while (pkts_inflight) {
> > + n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid,
> > queue_id, m_cpl,
> > + pkts_inflight, dma_id,
> 0);
> > + free_pkts(m_cpl, n_pkt);
> > + pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
> > +
> queue_id);
> > + }
> > +}
> > +
> > /*
> > * Remove a device from the specific data core linked list and from the
> > * main linked list. Synchronization occurs through the use of the
> > @@ -1535,27 +1594,79 @@ destroy_device(int vid)
> > vdev->vid);
> >
> > if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
> > - uint16_t n_pkt = 0;
> > - int pkts_inflight;
> > - int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> > - pkts_inflight =
> rte_vhost_async_get_inflight_thread_unsafe(vid,
> > VIRTIO_RXQ);
> > - struct rte_mbuf *m_cpl[pkts_inflight];
> > -
> > - while (pkts_inflight) {
> > - n_pkt = rte_vhost_clear_queue_thread_unsafe(vid,
> > VIRTIO_RXQ,
> > - m_cpl, pkts_inflight, dma_id,
> 0);
> > - free_pkts(m_cpl, n_pkt);
> > - pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vid,
> > -
> VIRTIO_RXQ);
> > - }
> > -
> > + vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ);
> > rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
> > dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
> > }
> >
> > + if (dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled) {
> > + vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ);
> > + rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
> > + dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled = false;
> > + }
> > +
> > rte_free(vdev);
> > }
> >
> > +static int
> > +get_socketid_by_vid(int vid)
> > +{
> > + int i;
> > + char ifname[PATH_MAX];
> > + rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
> > +
> > + for (i = 0; i < nb_sockets; i++) {
> > + char *file = socket_files + i * PATH_MAX;
> > + if (strcmp(file, ifname) == 0)
> > + return i;
> > + }
> > +
> > + return -1;
> > +}
> > +
> > +static int
> > +init_vhost_queue_ops(int vid)
> > +{
> > + if (builtin_net_driver) {
> > + vdev_queue_ops[vid].enqueue_pkt_burst =
> builtin_enqueue_pkts;
> > + vdev_queue_ops[vid].dequeue_pkt_burst =
> builtin_dequeue_pkts;
> > + } else {
> > + if
> (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled)
> > + vdev_queue_ops[vid].enqueue_pkt_burst =
> > async_enqueue_pkts;
> > + else
> > + vdev_queue_ops[vid].enqueue_pkt_burst =
> > sync_enqueue_pkts;
> > +
> > + if
> (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled)
> > + vdev_queue_ops[vid].dequeue_pkt_burst =
> > async_dequeue_pkts;
> > + else
> > + vdev_queue_ops[vid].dequeue_pkt_burst =
> > sync_dequeue_pkts;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > +static int
> > +vhost_async_channel_register(int vid) {
> > + int rx_ret = 0, tx_ret = 0;
> > +
> > + if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].dev_id !=
> > INVALID_DMA_ID) {
> > + rx_ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> > + if (rx_ret == 0)
> > +
> > dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled =
> true;
> > + }
> > +
> > + if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].dev_id !=
> > INVALID_DMA_ID) {
> > + tx_ret = rte_vhost_async_channel_register(vid, VIRTIO_TXQ);
> > + if (tx_ret == 0)
> > +
> > dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled =
> true;
> > + }
> > +
> > + return rx_ret | tx_ret;
> > +}
> > +
> > +
> > +
> > /*
> > * A new device is added to a data core. First the device is added to
> > the main linked list
> > * and then allocated to a specific data core.
> > @@ -1567,6 +1678,8 @@ new_device(int vid)
> > uint16_t i;
> > uint32_t device_num_min = num_devices;
> > struct vhost_dev *vdev;
> > + int ret;
> > +
> > vdev = rte_zmalloc("vhost device", sizeof(*vdev),
> > RTE_CACHE_LINE_SIZE);
> > if (vdev == NULL) {
> > RTE_LOG(INFO, VHOST_DATA,
> > @@ -1589,6 +1702,17 @@ new_device(int vid)
> > }
> > }
> >
> > + int socketid = get_socketid_by_vid(vid);
> > + if (socketid == -1)
> > + return -1;
> > +
> > + init_vid2socketid_array(vid, socketid);
> > +
> > + ret = vhost_async_channel_register(vid);
> > +
> > + if (init_vhost_queue_ops(vid) != 0)
> > + return -1;
> > +
> > if (builtin_net_driver)
> > vs_vhost_net_setup(vdev);
> >
> > @@ -1620,16 +1744,7 @@ new_device(int vid)
> > "(%d) device has been added to data core %d\n",
> > vid, vdev->coreid);
> >
> > - if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
> > - int ret;
> > -
> > - ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
> > - if (ret == 0)
> > - dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled =
> true;
> > - return ret;
> > - }
> > -
> > - return 0;
> > + return ret;
> > }
> >
> > static int
> > @@ -1647,22 +1762,9 @@ vring_state_changed(int vid, uint16_t queue_id,
> > int
> > enable)
> > if (queue_id != VIRTIO_RXQ)
> > return 0;
> >
> > - if (dma_bind[vid].dmas[queue_id].async_enabled) {
> > - if (!enable) {
> > - uint16_t n_pkt = 0;
> > - int pkts_inflight;
> > - pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
> > - int16_t dma_id =
> dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
> > - struct rte_mbuf *m_cpl[pkts_inflight];
> > -
> > - while (pkts_inflight) {
> > - n_pkt =
> rte_vhost_clear_queue_thread_unsafe(vid,
> > queue_id,
> > - m_cpl, pkts_inflight,
> dma_id, 0);
> > - free_pkts(m_cpl, n_pkt);
> > - pkts_inflight =
> > rte_vhost_async_get_inflight_thread_unsafe(vid,
> > -
> > queue_id);
> > - }
> > - }
> > + if (dma_bind[vid2socketid[vid]].dmas[queue_id].async_enabled) {
> > + if (!enable)
> > + vhost_clear_queue_thread_unsafe(vdev, queue_id);
> > }
> >
> > return 0;
> > @@ -1887,7 +1989,7 @@ main(int argc, char *argv[])
> > for (i = 0; i < nb_sockets; i++) {
> > char *file = socket_files + i * PATH_MAX;
> >
> > - if (dma_count)
> > + if (dma_count && get_async_flag_by_socketid(i) != 0)
> > flags = flags | RTE_VHOST_USER_ASYNC_COPY;
> >
> > ret = rte_vhost_driver_register(file, flags); diff --git
> > a/examples/vhost/main.h b/examples/vhost/main.h index
> > e7f395c3c9..2fcb8376c5 100644
> > --- a/examples/vhost/main.h
> > +++ b/examples/vhost/main.h
> > @@ -61,6 +61,19 @@ struct vhost_dev {
> > struct vhost_queue queues[MAX_QUEUE_PAIRS * 2]; }
> > __rte_cache_aligned;
> >
> > +typedef uint16_t (*vhost_enqueue_burst_t)(struct vhost_dev *dev,
> > + uint16_t queue_id, struct rte_mbuf **pkts,
> > + uint32_t count);
> > +
> > +typedef uint16_t (*vhost_dequeue_burst_t)(struct vhost_dev *dev,
> > + uint16_t queue_id, struct rte_mempool *mbuf_pool,
> > + struct rte_mbuf **pkts, uint16_t count);
> > +
> > +struct vhost_queue_ops {
> > + vhost_enqueue_burst_t enqueue_pkt_burst;
> > + vhost_dequeue_burst_t dequeue_pkt_burst; };
> > +
> > TAILQ_HEAD(vhost_dev_tailq_list, vhost_dev);
> >
> >
> > @@ -87,6 +100,7 @@ struct dma_info {
> >
> > struct dma_for_vhost {
> > struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
> > + uint32_t async_flag;
> > };
> >
> > /* we implement non-extra virtio net features */ @@ -97,7 +111,19 @@
> > void vs_vhost_net_remove(struct vhost_dev *dev); uint16_t
> > vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > struct rte_mbuf **pkts, uint32_t count);
> >
> > -uint16_t vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > - struct rte_mempool *mbuf_pool,
> > - struct rte_mbuf **pkts, uint16_t count);
> > +uint16_t builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mbuf **pkts, uint32_t count); uint16_t
> > +builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mempool *mbuf_pool,
> > + struct rte_mbuf **pkts, uint16_t count); uint16_t
> > +sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mbuf **pkts, uint32_t count); uint16_t
> > +sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mempool *mbuf_pool,
> > + struct rte_mbuf **pkts, uint16_t count); uint16_t
> > +async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mbuf **pkts, uint32_t count); uint16_t
> > +async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mempool *mbuf_pool,
> > + struct rte_mbuf **pkts, uint16_t count);
> > #endif /* _MAIN_H_ */
> > diff --git a/examples/vhost/virtio_net.c b/examples/vhost/virtio_net.c
> > index 9064fc3a82..2432a96566 100644
> > --- a/examples/vhost/virtio_net.c
> > +++ b/examples/vhost/virtio_net.c
> > @@ -238,6 +238,13 @@ vs_enqueue_pkts(struct vhost_dev *dev, uint16_t
> > queue_id,
> > return count;
> > }
> >
> > +uint16_t
> > +builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mbuf **pkts, uint32_t count) {
> > + return vs_enqueue_pkts(dev, queue_id, pkts, count); }
> > +
> > static __rte_always_inline int
> > dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
> > struct rte_mbuf *m, uint16_t desc_idx, @@ -363,7 +370,7 @@
> > dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
> > return 0;
> > }
> >
> > -uint16_t
> > +static uint16_t
> > vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> > count)
> > {
> > @@ -440,3 +447,10 @@ vs_dequeue_pkts(struct vhost_dev *dev, uint16_t
> > queue_id,
> >
> > return i;
> > }
> > +
> > +uint16_t
> > +builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
> > + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t
> > count)
> > +{
> > + return vs_dequeue_pkts(dev, queue_id, mbuf_pool, pkts, count); }
> > --
> > 2.17.1
@@ -169,9 +169,12 @@ demonstrates how to use the async vhost APIs. It's used in combination with dmas
**--dmas**
This parameter is used to specify the assigned DMA device of a vhost device.
Async vhost-user net driver will be used if --dmas is set. For example
---dmas [txd0@00:04.0,txd1@00:04.1] means use DMA channel 00:04.0 for vhost
-device 0 enqueue operation and use DMA channel 00:04.1 for vhost device 1
-enqueue operation.
+--dmas [txd0@00:04.0,txd1@00:04.1,rxd0@00:04.2,rxd1@00:04.3] means use
+DMA channel 00:04.0/00:04.2 for vhost device 0 enqueue/dequeue operation
+and use DMA channel 00:04.1/00:04.3 for vhost device 1 enqueue/dequeue
+operation. The index of the device corresponds to the socket file in order,
+that means vhost device 0 is created through the first socket file, vhost
+device 1 is created through the second socket file, and so on.
Common Issues
-------------
@@ -63,6 +63,9 @@
#define DMA_RING_SIZE 4096
+#define ASYNC_ENQUEUE_VHOST 1
+#define ASYNC_DEQUEUE_VHOST 2
+
/* number of mbufs in all pools - if specified on command-line. */
static int total_num_mbufs = NUM_MBUFS_DEFAULT;
@@ -116,6 +119,8 @@ static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
static char *socket_files;
static int nb_sockets;
+static struct vhost_queue_ops vdev_queue_ops[RTE_MAX_VHOST_DEVICE];
+
/* empty VMDq configuration structure. Filled in programmatically */
static struct rte_eth_conf vmdq_conf_default = {
.rxmode = {
@@ -205,6 +210,18 @@ struct vhost_bufftable *vhost_txbuff[RTE_MAX_LCORE * RTE_MAX_VHOST_DEVICE];
#define MBUF_TABLE_DRAIN_TSC ((rte_get_tsc_hz() + US_PER_S - 1) \
/ US_PER_S * BURST_TX_DRAIN_US)
+static int vid2socketid[RTE_MAX_VHOST_DEVICE];
+
+static uint32_t get_async_flag_by_socketid(int socketid)
+{
+ return dma_bind[socketid].async_flag;
+}
+
+static void init_vid2socketid_array(int vid, int socketid)
+{
+ vid2socketid[vid] = socketid;
+}
+
static inline bool
is_dma_configured(int16_t dev_id)
{
@@ -224,7 +241,7 @@ open_dma(const char *value)
char *addrs = input;
char *ptrs[2];
char *start, *end, *substr;
- int64_t vid;
+ int64_t socketid, vring_id;
struct rte_dma_info info;
struct rte_dma_conf dev_config = { .nb_vchans = 1 };
@@ -262,7 +279,9 @@ open_dma(const char *value)
while (i < args_nr) {
char *arg_temp = dma_arg[i];
+ char *txd, *rxd;
uint8_t sub_nr;
+ int async_flag;
sub_nr = rte_strsplit(arg_temp, strlen(arg_temp), ptrs, 2, '@');
if (sub_nr != 2) {
@@ -270,14 +289,23 @@ open_dma(const char *value)
goto out;
}
- start = strstr(ptrs[0], "txd");
- if (start == NULL) {
+ txd = strstr(ptrs[0], "txd");
+ rxd = strstr(ptrs[0], "rxd");
+ if (txd) {
+ start = txd;
+ vring_id = VIRTIO_RXQ;
+ async_flag = ASYNC_ENQUEUE_VHOST;
+ } else if (rxd) {
+ start = rxd;
+ vring_id = VIRTIO_TXQ;
+ async_flag = ASYNC_DEQUEUE_VHOST;
+ } else {
ret = -1;
goto out;
}
start += 3;
- vid = strtol(start, &end, 0);
+ socketid = strtol(start, &end, 0);
if (end == start) {
ret = -1;
goto out;
@@ -338,7 +366,8 @@ open_dma(const char *value)
dmas_id[dma_count++] = dev_id;
done:
- (dma_info + vid)->dmas[VIRTIO_RXQ].dev_id = dev_id;
+ (dma_info + socketid)->dmas[vring_id].dev_id = dev_id;
+ (dma_info + socketid)->async_flag |= async_flag;
i++;
}
out:
@@ -990,7 +1019,7 @@ complete_async_pkts(struct vhost_dev *vdev)
{
struct rte_mbuf *p_cpl[MAX_PKT_BURST];
uint16_t complete_count;
- int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
+ int16_t dma_id = dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].dev_id;
complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
VIRTIO_RXQ, p_cpl, MAX_PKT_BURST, dma_id, 0);
@@ -1029,22 +1058,7 @@ drain_vhost(struct vhost_dev *vdev)
uint16_t nr_xmit = vhost_txbuff[buff_idx]->len;
struct rte_mbuf **m = vhost_txbuff[buff_idx]->m_table;
- if (builtin_net_driver) {
- ret = vs_enqueue_pkts(vdev, VIRTIO_RXQ, m, nr_xmit);
- } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
- uint16_t enqueue_fail = 0;
- int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
-
- complete_async_pkts(vdev);
- ret = rte_vhost_submit_enqueue_burst(vdev->vid, VIRTIO_RXQ, m, nr_xmit, dma_id, 0);
-
- enqueue_fail = nr_xmit - ret;
- if (enqueue_fail)
- free_pkts(&m[ret], nr_xmit - ret);
- } else {
- ret = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
- m, nr_xmit);
- }
+ ret = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev, VIRTIO_RXQ, m, nr_xmit);
if (enable_stats) {
__atomic_add_fetch(&vdev->stats.rx_total_atomic, nr_xmit,
@@ -1053,7 +1067,7 @@ drain_vhost(struct vhost_dev *vdev)
__ATOMIC_SEQ_CST);
}
- if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
+ if (!dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].async_enabled)
free_pkts(m, nr_xmit);
}
@@ -1325,6 +1339,32 @@ drain_mbuf_table(struct mbuf_table *tx_q)
}
}
+uint16_t
+async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t rx_count)
+{
+ uint16_t enqueue_count;
+ uint16_t enqueue_fail = 0;
+ uint16_t dma_id = dma_bind[vid2socketid[dev->vid]].dmas[VIRTIO_RXQ].dev_id;
+
+ complete_async_pkts(dev);
+ enqueue_count = rte_vhost_submit_enqueue_burst(dev->vid, queue_id,
+ pkts, rx_count, dma_id, 0);
+
+ enqueue_fail = rx_count - enqueue_count;
+ if (enqueue_fail)
+ free_pkts(&pkts[enqueue_count], enqueue_fail);
+
+ return enqueue_count;
+}
+
+uint16_t
+sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t rx_count)
+{
+ return rte_vhost_enqueue_burst(dev->vid, queue_id, pkts, rx_count);
+}
+
static __rte_always_inline void
drain_eth_rx(struct vhost_dev *vdev)
{
@@ -1355,25 +1395,8 @@ drain_eth_rx(struct vhost_dev *vdev)
}
}
- if (builtin_net_driver) {
- enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
- pkts, rx_count);
- } else if (dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled) {
- uint16_t enqueue_fail = 0;
- int16_t dma_id = dma_bind[vdev->vid].dmas[VIRTIO_RXQ].dev_id;
-
- complete_async_pkts(vdev);
- enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
- VIRTIO_RXQ, pkts, rx_count, dma_id, 0);
-
- enqueue_fail = rx_count - enqueue_count;
- if (enqueue_fail)
- free_pkts(&pkts[enqueue_count], enqueue_fail);
-
- } else {
- enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
- pkts, rx_count);
- }
+ enqueue_count = vdev_queue_ops[vdev->vid].enqueue_pkt_burst(vdev,
+ VIRTIO_RXQ, pkts, rx_count);
if (enable_stats) {
__atomic_add_fetch(&vdev->stats.rx_total_atomic, rx_count,
@@ -1382,10 +1405,31 @@ drain_eth_rx(struct vhost_dev *vdev)
__ATOMIC_SEQ_CST);
}
- if (!dma_bind[vdev->vid].dmas[VIRTIO_RXQ].async_enabled)
+ if (!dma_bind[vid2socketid[vdev->vid]].dmas[VIRTIO_RXQ].async_enabled)
free_pkts(pkts, rx_count);
}
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts, uint16_t count)
+{
+ int nr_inflight;
+ uint16_t dequeue_count;
+ uint16_t dma_id = dma_bind[vid2socketid[dev->vid]].dmas[VIRTIO_TXQ].dev_id;
+
+ dequeue_count = rte_vhost_async_try_dequeue_burst(dev->vid, queue_id,
+ mbuf_pool, pkts, count, &nr_inflight, dma_id, 0);
+
+ return dequeue_count;
+}
+
+uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts, uint16_t count)
+{
+ return rte_vhost_dequeue_burst(dev->vid, queue_id, mbuf_pool, pkts, count);
+}
+
static __rte_always_inline void
drain_virtio_tx(struct vhost_dev *vdev)
{
@@ -1393,13 +1437,8 @@ drain_virtio_tx(struct vhost_dev *vdev)
uint16_t count;
uint16_t i;
- if (builtin_net_driver) {
- count = vs_dequeue_pkts(vdev, VIRTIO_TXQ, mbuf_pool,
- pkts, MAX_PKT_BURST);
- } else {
- count = rte_vhost_dequeue_burst(vdev->vid, VIRTIO_TXQ,
- mbuf_pool, pkts, MAX_PKT_BURST);
- }
+ count = vdev_queue_ops[vdev->vid].dequeue_pkt_burst(vdev,
+ VIRTIO_TXQ, mbuf_pool, pkts, MAX_PKT_BURST);
/* setup VMDq for the first packet */
if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && count) {
@@ -1478,6 +1517,26 @@ switch_worker(void *arg __rte_unused)
return 0;
}
+static void
+vhost_clear_queue_thread_unsafe(struct vhost_dev *vdev, uint16_t queue_id)
+{
+ uint16_t n_pkt = 0;
+ int pkts_inflight;
+
+ int16_t dma_id = dma_bind[vid2socketid[vdev->vid]].dmas[queue_id].dev_id;
+ pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid, queue_id);
+
+ struct rte_mbuf *m_cpl[pkts_inflight];
+
+ while (pkts_inflight) {
+ n_pkt = rte_vhost_clear_queue_thread_unsafe(vdev->vid, queue_id, m_cpl,
+ pkts_inflight, dma_id, 0);
+ free_pkts(m_cpl, n_pkt);
+ pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vdev->vid,
+ queue_id);
+ }
+}
+
/*
* Remove a device from the specific data core linked list and from the
* main linked list. Synchronization occurs through the use of the
@@ -1535,27 +1594,79 @@ destroy_device(int vid)
vdev->vid);
if (dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled) {
- uint16_t n_pkt = 0;
- int pkts_inflight;
- int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
- pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid, VIRTIO_RXQ);
- struct rte_mbuf *m_cpl[pkts_inflight];
-
- while (pkts_inflight) {
- n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, VIRTIO_RXQ,
- m_cpl, pkts_inflight, dma_id, 0);
- free_pkts(m_cpl, n_pkt);
- pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
- VIRTIO_RXQ);
- }
-
+ vhost_clear_queue_thread_unsafe(vdev, VIRTIO_RXQ);
rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = false;
}
+ if (dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled) {
+ vhost_clear_queue_thread_unsafe(vdev, VIRTIO_TXQ);
+ rte_vhost_async_channel_unregister(vid, VIRTIO_TXQ);
+ dma_bind[vid].dmas[VIRTIO_TXQ].async_enabled = false;
+ }
+
rte_free(vdev);
}
+static int
+get_socketid_by_vid(int vid)
+{
+ int i;
+ char ifname[PATH_MAX];
+ rte_vhost_get_ifname(vid, ifname, sizeof(ifname));
+
+ for (i = 0; i < nb_sockets; i++) {
+ char *file = socket_files + i * PATH_MAX;
+ if (strcmp(file, ifname) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+static int
+init_vhost_queue_ops(int vid)
+{
+ if (builtin_net_driver) {
+ vdev_queue_ops[vid].enqueue_pkt_burst = builtin_enqueue_pkts;
+ vdev_queue_ops[vid].dequeue_pkt_burst = builtin_dequeue_pkts;
+ } else {
+ if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled)
+ vdev_queue_ops[vid].enqueue_pkt_burst = async_enqueue_pkts;
+ else
+ vdev_queue_ops[vid].enqueue_pkt_burst = sync_enqueue_pkts;
+
+ if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled)
+ vdev_queue_ops[vid].dequeue_pkt_burst = async_dequeue_pkts;
+ else
+ vdev_queue_ops[vid].dequeue_pkt_burst = sync_dequeue_pkts;
+ }
+
+ return 0;
+}
+
+static int
+vhost_async_channel_register(int vid)
+{
+ int rx_ret = 0, tx_ret = 0;
+
+ if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
+ rx_ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
+ if (rx_ret == 0)
+ dma_bind[vid2socketid[vid]].dmas[VIRTIO_RXQ].async_enabled = true;
+ }
+
+ if (dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].dev_id != INVALID_DMA_ID) {
+ tx_ret = rte_vhost_async_channel_register(vid, VIRTIO_TXQ);
+ if (tx_ret == 0)
+ dma_bind[vid2socketid[vid]].dmas[VIRTIO_TXQ].async_enabled = true;
+ }
+
+ return rx_ret | tx_ret;
+}
+
+
+
/*
* A new device is added to a data core. First the device is added to the main linked list
* and then allocated to a specific data core.
@@ -1567,6 +1678,8 @@ new_device(int vid)
uint16_t i;
uint32_t device_num_min = num_devices;
struct vhost_dev *vdev;
+ int ret;
+
vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
if (vdev == NULL) {
RTE_LOG(INFO, VHOST_DATA,
@@ -1589,6 +1702,17 @@ new_device(int vid)
}
}
+ int socketid = get_socketid_by_vid(vid);
+ if (socketid == -1)
+ return -1;
+
+ init_vid2socketid_array(vid, socketid);
+
+ ret = vhost_async_channel_register(vid);
+
+ if (init_vhost_queue_ops(vid) != 0)
+ return -1;
+
if (builtin_net_driver)
vs_vhost_net_setup(vdev);
@@ -1620,16 +1744,7 @@ new_device(int vid)
"(%d) device has been added to data core %d\n",
vid, vdev->coreid);
- if (dma_bind[vid].dmas[VIRTIO_RXQ].dev_id != INVALID_DMA_ID) {
- int ret;
-
- ret = rte_vhost_async_channel_register(vid, VIRTIO_RXQ);
- if (ret == 0)
- dma_bind[vid].dmas[VIRTIO_RXQ].async_enabled = true;
- return ret;
- }
-
- return 0;
+ return ret;
}
static int
@@ -1647,22 +1762,9 @@ vring_state_changed(int vid, uint16_t queue_id, int enable)
if (queue_id != VIRTIO_RXQ)
return 0;
- if (dma_bind[vid].dmas[queue_id].async_enabled) {
- if (!enable) {
- uint16_t n_pkt = 0;
- int pkts_inflight;
- pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid, queue_id);
- int16_t dma_id = dma_bind[vid].dmas[VIRTIO_RXQ].dev_id;
- struct rte_mbuf *m_cpl[pkts_inflight];
-
- while (pkts_inflight) {
- n_pkt = rte_vhost_clear_queue_thread_unsafe(vid, queue_id,
- m_cpl, pkts_inflight, dma_id, 0);
- free_pkts(m_cpl, n_pkt);
- pkts_inflight = rte_vhost_async_get_inflight_thread_unsafe(vid,
- queue_id);
- }
- }
+ if (dma_bind[vid2socketid[vid]].dmas[queue_id].async_enabled) {
+ if (!enable)
+ vhost_clear_queue_thread_unsafe(vdev, queue_id);
}
return 0;
@@ -1887,7 +1989,7 @@ main(int argc, char *argv[])
for (i = 0; i < nb_sockets; i++) {
char *file = socket_files + i * PATH_MAX;
- if (dma_count)
+ if (dma_count && get_async_flag_by_socketid(i) != 0)
flags = flags | RTE_VHOST_USER_ASYNC_COPY;
ret = rte_vhost_driver_register(file, flags);
@@ -61,6 +61,19 @@ struct vhost_dev {
struct vhost_queue queues[MAX_QUEUE_PAIRS * 2];
} __rte_cache_aligned;
+typedef uint16_t (*vhost_enqueue_burst_t)(struct vhost_dev *dev,
+ uint16_t queue_id, struct rte_mbuf **pkts,
+ uint32_t count);
+
+typedef uint16_t (*vhost_dequeue_burst_t)(struct vhost_dev *dev,
+ uint16_t queue_id, struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts, uint16_t count);
+
+struct vhost_queue_ops {
+ vhost_enqueue_burst_t enqueue_pkt_burst;
+ vhost_dequeue_burst_t dequeue_pkt_burst;
+};
+
TAILQ_HEAD(vhost_dev_tailq_list, vhost_dev);
@@ -87,6 +100,7 @@ struct dma_info {
struct dma_for_vhost {
struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
+ uint32_t async_flag;
};
/* we implement non-extra virtio net features */
@@ -97,7 +111,19 @@ void vs_vhost_net_remove(struct vhost_dev *dev);
uint16_t vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
struct rte_mbuf **pkts, uint32_t count);
-uint16_t vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
- struct rte_mempool *mbuf_pool,
- struct rte_mbuf **pkts, uint16_t count);
+uint16_t builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count);
+uint16_t builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts, uint16_t count);
+uint16_t sync_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count);
+uint16_t sync_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts, uint16_t count);
+uint16_t async_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count);
+uint16_t async_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool,
+ struct rte_mbuf **pkts, uint16_t count);
#endif /* _MAIN_H_ */
@@ -238,6 +238,13 @@ vs_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
return count;
}
+uint16_t
+builtin_enqueue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mbuf **pkts, uint32_t count)
+{
+ return vs_enqueue_pkts(dev, queue_id, pkts, count);
+}
+
static __rte_always_inline int
dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
struct rte_mbuf *m, uint16_t desc_idx,
@@ -363,7 +370,7 @@ dequeue_pkt(struct vhost_dev *dev, struct rte_vhost_vring *vr,
return 0;
}
-uint16_t
+static uint16_t
vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
{
@@ -440,3 +447,10 @@ vs_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
return i;
}
+
+uint16_t
+builtin_dequeue_pkts(struct vhost_dev *dev, uint16_t queue_id,
+ struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count)
+{
+ return vs_dequeue_pkts(dev, queue_id, mbuf_pool, pkts, count);
+}