@@ -24,11 +24,15 @@
#include <rte_ip.h>
#include <rte_tcp.h>
#include <rte_pause.h>
+#include <rte_vhost_async.h>
+#include <rte_rawdev.h>
+#include <rte_ioat_rawdev.h>
+#include <rte_pci.h>
#include "main.h"
#ifndef MAX_QUEUES
-#define MAX_QUEUES 128
+#define MAX_QUEUES 512
#endif
/* the maximum number of external ports supported */
@@ -58,6 +62,12 @@
/* Maximum long option length for option parsing. */
#define MAX_LONG_OPT_SZ 64
+#define IOAT_RING_SIZE 4096
+
+#define MAX_ENQUEUED_SIZE 2048
+
+#define MAX_VHOST_DEVICE 1024
+
/* mask of enabled ports */
static uint32_t enabled_port_mask = 0;
@@ -96,6 +106,20 @@ static int dequeue_zero_copy;
static int builtin_net_driver;
+static int async_vhost_driver;
+
+struct dma_info {
+ struct rte_pci_addr addr;
+ uint16_t dev_id;
+ bool is_valid;
+};
+
+struct dma_info_input {
+ struct dma_info dmas[RTE_MAX_QUEUES_PER_PORT * 2];
+ uint16_t nr;
+};
+
+static struct dma_info_input dma_bind[MAX_VHOST_DEVICE];
/* Specify timeout (in useconds) between retries on RX. */
static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
/* Specify the number of retries on RX. */
@@ -141,6 +165,61 @@ static struct rte_eth_conf vmdq_conf_default = {
},
};
+static int
+ioat_transfer_data_cb(int vid, uint16_t queue_id, struct rte_vhost_async_desc *descs,
+ struct rte_vhost_async_status *opaque_data, uint16_t count)
+{
+ int ret;
+ uint16_t i_desc;
+
+ struct rte_vhost_iov_iter *src = NULL;
+ struct rte_vhost_iov_iter *dst = NULL;
+ unsigned long i_seg;
+
+ int dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id;
+ if (likely(!opaque_data)) {
+ for (i_desc = 0; i_desc < count; i_desc++) {
+ src = descs[i_desc].src;
+ dst = descs[i_desc].dst;
+ i_seg = 0;
+ while (i_seg < src->nr_segs) {
+ ret = rte_ioat_enqueue_copy(dev_id,
+ (uintptr_t)(src->iov[i_seg].iov_base)
+ + src->offset,
+ (uintptr_t)(dst->iov[i_seg].iov_base)
+ + dst->offset,
+ src->iov[i_seg].iov_len,
+ 0,
+ 0,
+ 0);
+ if (ret != 1)
+ break;
+ i_seg++;
+ }
+ }
+ } else {
+ /* Opaque data is not supported */
+ return -1;
+ }
+ /* ring the doorbell */
+ rte_ioat_do_copies(dev_id);
+ return i_desc;
+}
+
+static int
+ioat_check_completed_copies_cb(int vid, uint16_t queue_id,
+ struct rte_vhost_async_status *opaque_data,
+ uint16_t max_packets __rte_unused)
+{
+ if (!opaque_data) {
+ uintptr_t dump[255];
+ return rte_ioat_completed_copies(dma_bind[vid].dmas[queue_id * 2
+ + VIRTIO_RXQ].dev_id, 255, dump, dump);
+ } else {
+ /* Opaque data is not supported */
+ return -1;
+ }
+}
static unsigned lcore_ids[RTE_MAX_LCORE];
static uint16_t ports[RTE_MAX_ETHPORTS];
@@ -186,6 +265,94 @@ struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
* Builds up the correct configuration for VMDQ VLAN pool map
* according to the pool & queue limits.
*/
+
+static inline int
+open_dma(const char *value, void *dma_bind_info)
+{
+ struct dma_info_input *dma_info = dma_bind_info;
+ char *input = strndup(value, strlen(value) + 1);
+ char *addrs = input;
+ char *ptrs[2];
+ char *start, *end, *substr;
+ int64_t qid, vring_id;
+ struct rte_ioat_rawdev_config config;
+ struct rte_rawdev_info info = { .dev_private = &config };
+ char name[32];
+ int dev_id;
+ int ret = 0;
+
+ while (isblank(*addrs))
+ addrs++;
+ if (*addrs == '\0') {
+ ret = -1;
+ goto out;
+ }
+
+ /* process DMA devices within bracket. */
+ addrs++;
+ substr = strtok(addrs, ";]");
+ if (!substr) {
+ ret = -1;
+ goto out;
+ }
+
+ do {
+ rte_strsplit(substr, strlen(substr), ptrs, 2, '@');
+
+ start = strstr(ptrs[0], "txq");
+ if (start == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ start += 3;
+ qid = strtol(start, &end, 0);
+ if (end == start) {
+ ret = -1;
+ goto out;
+ }
+
+ vring_id = qid * 2 + VIRTIO_RXQ;
+ if (rte_pci_addr_parse(ptrs[1],
+ &dma_info->dmas[vring_id].addr) < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ rte_pci_device_name(&dma_info->dmas[vring_id].addr,
+ name, sizeof(name));
+ dev_id = rte_rawdev_get_dev_id(name);
+ if (dev_id == (uint16_t)(-ENODEV) ||
+ dev_id == (uint16_t)(-EINVAL)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (rte_rawdev_info_get(dev_id, &info) < 0 ||
+ strstr(info.driver_name, "ioat") == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ dma_info->dmas[vring_id].dev_id = dev_id;
+ dma_info->dmas[vring_id].is_valid = true;
+ config.ring_size = IOAT_RING_SIZE;
+ if (rte_rawdev_configure(dev_id, &info) < 0) {
+ ret = -1;
+ goto out;
+ }
+ rte_rawdev_start(dev_id);
+
+ dma_info->nr++;
+
+ substr = strtok(NULL, ";]");
+ } while (substr);
+
+out:
+ free(input);
+ return ret;
+}
+
static inline int
get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
{
@@ -488,6 +655,8 @@ us_vhost_parse_args(int argc, char **argv)
{"client", no_argument, &client_mode, 1},
{"dequeue-zero-copy", no_argument, &dequeue_zero_copy, 1},
{"builtin-net-driver", no_argument, &builtin_net_driver, 1},
+ {"async_vhost_driver", no_argument, &async_vhost_driver, 1},
+ {"dmas", required_argument, NULL, 0},
{NULL, 0, 0, 0},
};
@@ -623,13 +792,25 @@ us_vhost_parse_args(int argc, char **argv)
"socket-file", MAX_LONG_OPT_SZ)) {
if (us_vhost_parse_socket_path(optarg) == -1) {
RTE_LOG(INFO, VHOST_CONFIG,
- "Invalid argument for socket name (Max %d characters)\n",
- PATH_MAX);
+ "Invalid argument for socket name (Max %d characters)\n",
+ PATH_MAX);
us_vhost_usage(prgname);
return -1;
}
}
+ if (!strncmp(long_option[option_index].name,
+ "dmas", MAX_LONG_OPT_SZ)) {
+ if (open_dma(optarg, &(dma_bind[0])) == -1) {
+ if (*optarg == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Wrong DMA args\n");
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ }
+ }
+
break;
/* Invalid option - print options. */
@@ -785,9 +966,26 @@ virtio_xmit(struct vhost_dev *dst_vdev, struct vhost_dev *src_vdev,
struct rte_mbuf *m)
{
uint16_t ret;
+ struct rte_mbuf *m_cpl[1];
if (builtin_net_driver) {
ret = vs_enqueue_pkts(dst_vdev, VIRTIO_RXQ, &m, 1);
+ } else if (async_vhost_driver) {
+ ret = rte_vhost_submit_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ,
+ &m, 1);
+
+ if (likely(ret)) {
+ dst_vdev->nr_async_pkts++;
+ rte_mbuf_refcnt_update(m, 1);
+ }
+
+ while (likely(dst_vdev->nr_async_pkts)) {
+ if (rte_vhost_poll_enqueue_completed(dst_vdev->vid,
+ VIRTIO_RXQ, m_cpl, 1)) {
+ dst_vdev->nr_async_pkts--;
+ rte_pktmbuf_free(*m_cpl);
+ }
+ }
} else {
ret = rte_vhost_enqueue_burst(dst_vdev->vid, VIRTIO_RXQ, &m, 1);
}
@@ -1036,6 +1234,19 @@ drain_mbuf_table(struct mbuf_table *tx_q)
}
}
+static __rte_always_inline void
+complete_async_pkts(struct vhost_dev *vdev, uint16_t qid)
+{
+ struct rte_mbuf *p_cpl[MAX_PKT_BURST];
+ uint16_t complete_count;
+
+ complete_count = rte_vhost_poll_enqueue_completed(vdev->vid,
+ qid, p_cpl, MAX_PKT_BURST);
+ vdev->nr_async_pkts -= complete_count;
+ if (complete_count)
+ free_pkts(p_cpl, complete_count);
+}
+
static __rte_always_inline void
drain_eth_rx(struct vhost_dev *vdev)
{
@@ -1044,6 +1255,10 @@ drain_eth_rx(struct vhost_dev *vdev)
rx_count = rte_eth_rx_burst(ports[0], vdev->vmdq_rx_q,
pkts, MAX_PKT_BURST);
+
+ while (likely(vdev->nr_async_pkts))
+ complete_async_pkts(vdev, VIRTIO_RXQ);
+
if (!rx_count)
return;
@@ -1068,16 +1283,22 @@ drain_eth_rx(struct vhost_dev *vdev)
if (builtin_net_driver) {
enqueue_count = vs_enqueue_pkts(vdev, VIRTIO_RXQ,
pkts, rx_count);
+ } else if (async_vhost_driver) {
+ enqueue_count = rte_vhost_submit_enqueue_burst(vdev->vid,
+ VIRTIO_RXQ, pkts, rx_count);
+ vdev->nr_async_pkts += enqueue_count;
} else {
enqueue_count = rte_vhost_enqueue_burst(vdev->vid, VIRTIO_RXQ,
pkts, rx_count);
}
+
if (enable_stats) {
rte_atomic64_add(&vdev->stats.rx_total_atomic, rx_count);
rte_atomic64_add(&vdev->stats.rx_atomic, enqueue_count);
}
- free_pkts(pkts, rx_count);
+ if (!async_vhost_driver)
+ free_pkts(pkts, rx_count);
}
static __rte_always_inline void
@@ -1224,6 +1445,9 @@ destroy_device(int vid)
"(%d) device has been removed from data core\n",
vdev->vid);
+ if (async_vhost_driver)
+ rte_vhost_async_channel_unregister(vid, VIRTIO_RXQ);
+
rte_free(vdev);
}
@@ -1238,6 +1462,12 @@ new_device(int vid)
uint32_t device_num_min = num_devices;
struct vhost_dev *vdev;
+ struct rte_vhost_async_channel_ops channel_ops = {
+ .transfer_data = ioat_transfer_data_cb,
+ .check_completed_copies = ioat_check_completed_copies_cb
+ };
+ struct rte_vhost_async_features f;
+
vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
if (vdev == NULL) {
RTE_LOG(INFO, VHOST_DATA,
@@ -1278,6 +1508,13 @@ new_device(int vid)
"(%d) device has been added to data core %d\n",
vid, vdev->coreid);
+ if (async_vhost_driver) {
+ f.async_inorder = 1;
+ f.async_threshold = 256;
+ return rte_vhost_async_channel_register(vid, VIRTIO_RXQ,
+ f.intval, &channel_ops);
+ }
+
return 0;
}
@@ -1519,6 +1756,9 @@ main(int argc, char *argv[])
/* Register vhost user driver to handle vhost messages. */
for (i = 0; i < nb_sockets; i++) {
char *file = socket_files + i * PATH_MAX;
+ if (async_vhost_driver)
+ flags = flags | RTE_VHOST_USER_ASYNC_COPY;
+
ret = rte_vhost_driver_register(file, flags);
if (ret != 0) {
unregister_drivers(i);
@@ -51,6 +51,7 @@ struct vhost_dev {
uint64_t features;
size_t hdr_len;
uint16_t nr_vrings;
+ uint16_t nr_async_pkts;
struct rte_vhost_memory *mem;
struct device_statistics stats;
TAILQ_ENTRY(vhost_dev) global_vdev_entry;