@@ -106,8 +106,39 @@ Run-time configuration
- **ethtool** operations on related kernel interfaces also affect the PMD.
+Driver options
+^^^^^^^^^^^^^^
+
- ``class`` parameter [string]
Select the class of the driver that should probe the device.
`vdpa` for the mlx5 vDPA driver.
+- ``event_mode`` parameter [int]
+
+ - 0, Completion queue scheduling will be managed by a timer thread which
+ automatically adjusts its delays to the coming traffic rate.
+
+ - 1, Completion queue scheduling will be managed by a timer thread with fixed
+ delay time.
+
+ - 2, Completion queue scheduling will be managed by interrupts. Each CQ burst
+ arms the CQ in order to get an interrupt event in the next traffic burst.
+
+ - Default mode is 0.
+
+- ``event_us`` parameter [int]
+
+ Per mode micro-seconds parameter - relevant only for event mode 0 and 1:
+ - 0, A nonzero value to set timer step in micro-seconds. The timer thread
+ dynamic delay change steps according to this value. Default value is 1us.
+
+ - 1, A nonzero value to set fixed timer delay in micro-seconds. Default value
+ is 100us.
+
+- ``no_traffic_time`` parameter [int]
+
+ A nonzero value defines the traffic off time, in seconds, that moves the
+ driver to no-traffic mode. In this mode the timer events are stopped and
+ interrupts are configured to the device in order to notify traffic for the
+ driver. Default value is 2s.
@@ -43,6 +43,7 @@
#define MLX5_VDPA_MAX_RETRIES 20
#define MLX5_VDPA_USEC 1000
+#define MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S 2LLU
TAILQ_HEAD(mlx5_vdpa_privs, mlx5_vdpa_priv) priv_list =
TAILQ_HEAD_INITIALIZER(priv_list);
@@ -605,6 +606,61 @@
return -rte_errno;
}
+static int
+mlx5_vdpa_args_check_handler(const char *key, const char *val, void *opaque)
+{
+ struct mlx5_vdpa_priv *priv = opaque;
+ unsigned long tmp;
+
+ if (strcmp(key, "class") == 0)
+ return 0;
+ errno = 0;
+ tmp = strtoul(val, NULL, 0);
+ if (errno) {
+ DRV_LOG(WARNING, "%s: \"%s\" is an invalid integer.", key, val);
+ return -errno;
+ }
+ if (strcmp(key, "event_mode") == 0) {
+ if (tmp <= MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT)
+ priv->event_mode = (int)tmp;
+ else
+ DRV_LOG(WARNING, "Invalid event_mode %s.", val);
+ } else if (strcmp(key, "event_us") == 0) {
+ priv->event_us = (uint32_t)tmp;
+ } else if (strcmp(key, "no_traffic_time") == 0) {
+ priv->no_traffic_time_s = (uint32_t)tmp;
+ } else {
+ DRV_LOG(WARNING, "Invalid key %s.", key);
+ }
+ return 0;
+}
+
+static void
+mlx5_vdpa_config_get(struct rte_devargs *devargs, struct mlx5_vdpa_priv *priv)
+{
+ struct rte_kvargs *kvlist;
+
+ priv->event_mode = MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER;
+ priv->event_us = 0;
+ priv->no_traffic_time_s = MLX5_VDPA_DEFAULT_NO_TRAFFIC_TIME_S;
+ if (devargs == NULL)
+ return;
+ kvlist = rte_kvargs_parse(devargs->args, NULL);
+ if (kvlist == NULL)
+ return;
+ rte_kvargs_process(kvlist, NULL, mlx5_vdpa_args_check_handler, priv);
+ rte_kvargs_free(kvlist);
+ if (!priv->event_us) {
+ if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER)
+ priv->event_us = MLX5_VDPA_DEFAULT_TIMER_STEP_US;
+ else if (priv->event_mode == MLX5_VDPA_EVENT_MODE_FIXED_TIMER)
+ priv->event_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
+ }
+ DRV_LOG(DEBUG, "event mode is %d.", priv->event_mode);
+ DRV_LOG(DEBUG, "event_us is %u us.", priv->event_us);
+ DRV_LOG(DEBUG, "no traffic time is %u s.", priv->no_traffic_time_s);
+}
+
/**
* DPDK callback to register a PCI device.
*
@@ -694,6 +750,7 @@
rte_errno = rte_errno ? rte_errno : EINVAL;
goto error;
}
+ mlx5_vdpa_config_get(pci_dev->device.devargs, priv);
SLIST_INIT(&priv->mr_list);
pthread_mutex_lock(&priv_list_lock);
TAILQ_INSERT_TAIL(&priv_list, priv, next);
@@ -35,6 +35,9 @@
#define VIRTIO_F_RING_PACKED 34
#endif
+#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 100u
+#define MLX5_VDPA_DEFAULT_TIMER_STEP_US 1u
+
struct mlx5_vdpa_cq {
uint16_t log_desc_n;
uint32_t cq_ci:24;
@@ -100,6 +103,12 @@ struct mlx5_vdpa_steer {
} rss[7];
};
+enum {
+ MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER,
+ MLX5_VDPA_EVENT_MODE_FIXED_TIMER,
+ MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT
+};
+
struct mlx5_vdpa_priv {
TAILQ_ENTRY(mlx5_vdpa_priv) next;
uint8_t configured;
@@ -109,7 +118,10 @@ struct mlx5_vdpa_priv {
pthread_mutex_t timer_lock;
pthread_cond_t timer_cond;
volatile uint8_t timer_on;
+ int event_mode;
+ uint32_t event_us;
uint32_t timer_delay_us;
+ uint32_t no_traffic_time_s;
int id; /* vDPA device id. */
int vid; /* vhost device id. */
struct ibv_context *ctx; /* Device context. */
@@ -20,9 +20,6 @@
#include "mlx5_vdpa.h"
-#define MLX5_VDPA_DEFAULT_TIMER_DELAY_US 500u
-#define MLX5_VDPA_NO_TRAFFIC_TIME_S 2LLU
-
void
mlx5_vdpa_event_qp_global_release(struct mlx5_vdpa_priv *priv)
{
@@ -175,7 +172,8 @@
rte_errno = errno;
goto error;
}
- if (callfd != -1) {
+ if (callfd != -1 &&
+ priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
ret = mlx5_glue->devx_subscribe_devx_event_fd(priv->eventc,
callfd,
cq->cq->obj, 0);
@@ -253,21 +251,43 @@
}
}
+static void
+mlx5_vdpa_timer_sleep(struct mlx5_vdpa_priv *priv, uint32_t max)
+{
+ if (priv->event_mode == MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER) {
+ switch (max) {
+ case 0:
+ priv->timer_delay_us += priv->event_us;
+ break;
+ case 1:
+ break;
+ default:
+ priv->timer_delay_us /= max;
+ break;
+ }
+ }
+ usleep(priv->timer_delay_us);
+}
+
static void *
mlx5_vdpa_poll_handle(void *arg)
{
struct mlx5_vdpa_priv *priv = arg;
int i;
struct mlx5_vdpa_cq *cq;
- uint32_t total;
+ uint32_t max;
uint64_t current_tic;
pthread_mutex_lock(&priv->timer_lock);
while (!priv->timer_on)
pthread_cond_wait(&priv->timer_cond, &priv->timer_lock);
pthread_mutex_unlock(&priv->timer_lock);
+ priv->timer_delay_us = priv->event_mode ==
+ MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
+ MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
+ priv->event_us;
while (1) {
- total = 0;
+ max = 0;
for (i = 0; i < priv->nr_virtqs; i++) {
cq = &priv->virtqs[i].eqp.cq;
if (cq->cq && !cq->armed) {
@@ -278,15 +298,16 @@
if (cq->callfd != -1)
eventfd_write(cq->callfd,
(eventfd_t)1);
- total += comp;
+ if (comp > max)
+ max = comp;
}
}
}
current_tic = rte_rdtsc();
- if (!total) {
+ if (!max) {
/* No traffic ? stop timer and load interrupts. */
if (current_tic - priv->last_traffic_tic >=
- rte_get_timer_hz() * MLX5_VDPA_NO_TRAFFIC_TIME_S) {
+ rte_get_timer_hz() * priv->no_traffic_time_s) {
DRV_LOG(DEBUG, "Device %d traffic was stopped.",
priv->id);
mlx5_vdpa_arm_all_cqs(priv);
@@ -296,12 +317,16 @@
pthread_cond_wait(&priv->timer_cond,
&priv->timer_lock);
pthread_mutex_unlock(&priv->timer_lock);
+ priv->timer_delay_us = priv->event_mode ==
+ MLX5_VDPA_EVENT_MODE_DYNAMIC_TIMER ?
+ MLX5_VDPA_DEFAULT_TIMER_DELAY_US :
+ priv->event_us;
continue;
}
} else {
priv->last_traffic_tic = current_tic;
}
- usleep(priv->timer_delay_us);
+ mlx5_vdpa_timer_sleep(priv, max);
}
return NULL;
}
@@ -327,6 +352,13 @@
struct mlx5_vdpa_virtq, eqp);
mlx5_vdpa_cq_poll(cq);
+ if (priv->event_mode == MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+ mlx5_vdpa_cq_arm(priv, cq);
+ /* Notify guest for descs consuming. */
+ if (cq->callfd != -1)
+ eventfd_write(cq->callfd, (eventfd_t)1);
+ return;
+ }
/* Don't arm again - timer will take control. */
DRV_LOG(DEBUG, "Device %d virtq %d cq %d event was captured."
" Timer is %s, cq ci is %u.\n", priv->id,
@@ -355,15 +387,16 @@
if (!priv->eventc)
/* All virtqs are in poll mode. */
return 0;
- pthread_mutex_init(&priv->timer_lock, NULL);
- pthread_cond_init(&priv->timer_cond, NULL);
- priv->timer_on = 0;
- priv->timer_delay_us = MLX5_VDPA_DEFAULT_TIMER_DELAY_US;
- ret = pthread_create(&priv->timer_tid, NULL, mlx5_vdpa_poll_handle,
- (void *)priv);
- if (ret) {
- DRV_LOG(ERR, "Failed to create timer thread.");
- return -1;
+ if (priv->event_mode != MLX5_VDPA_EVENT_MODE_ONLY_INTERRUPT) {
+ pthread_mutex_init(&priv->timer_lock, NULL);
+ pthread_cond_init(&priv->timer_cond, NULL);
+ priv->timer_on = 0;
+ ret = pthread_create(&priv->timer_tid, NULL,
+ mlx5_vdpa_poll_handle, (void *)priv);
+ if (ret) {
+ DRV_LOG(ERR, "Failed to create timer thread.");
+ return -1;
+ }
}
flags = fcntl(priv->eventc->fd, F_GETFL);
ret = fcntl(priv->eventc->fd, F_SETFL, flags | O_NONBLOCK);