@@ -464,6 +464,10 @@ Limitations
- In order to achieve best insertion rate, application should manage the flows per lcore.
- Better to disable memory reclaim by setting ``reclaim_mem_mode`` to 0 to accelerate the flow object allocation and release with cache.
+- HW hashed bonding
+
+ - TXQ affinity subjects to HW hash once enabled.
+
Statistics
----------
@@ -878,7 +878,6 @@ mlx5_representor_match(struct mlx5_dev_spawn_data *spawn,
return false;
}
-
/**
* Spawn an Ethernet device from Verbs information.
*
@@ -1668,6 +1667,7 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
*/
MLX5_ASSERT(spawn->ifindex);
priv->if_index = spawn->ifindex;
+ priv->lag_affinity_idx = sh->refcnt - 1;
eth_dev->data->dev_private = priv;
priv->dev_data = eth_dev->data;
eth_dev->data->mac_addrs = priv->mac;
@@ -1256,6 +1256,68 @@ mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
return 0;
}
+/**
+ * Set up multiple TISs with different affinities according to
+ * number of bonding ports
+ *
+ * @param priv
+ * Pointer of shared context.
+ *
+ * @return
+ * Zero on success, -1 otherwise.
+ */
+static int
+mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
+{
+ int i;
+ struct mlx5_devx_lag_context lag_ctx = { 0 };
+ struct mlx5_devx_tis_attr tis_attr = { 0 };
+
+ tis_attr.transport_domain = sh->td->id;
+ if (sh->bond.n_port) {
+ if (!mlx5_devx_cmd_query_lag(sh->ctx, &lag_ctx)) {
+ sh->lag.tx_remap_affinity[0] =
+ lag_ctx.tx_remap_affinity_1;
+ sh->lag.tx_remap_affinity[1] =
+ lag_ctx.tx_remap_affinity_2;
+ sh->lag.affinity_mode = lag_ctx.port_select_mode;
+ } else {
+ DRV_LOG(ERR, "Failed to query lag affinity.");
+ return -1;
+ }
+ if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
+ for (i = 0; i < sh->bond.n_port; i++) {
+ tis_attr.lag_tx_port_affinity =
+ MLX5_IFC_LAG_MAP_TIS_AFFINITY(i,
+ sh->bond.n_port);
+ sh->tis[i] = mlx5_devx_cmd_create_tis(sh->ctx,
+ &tis_attr);
+ if (!sh->tis[i]) {
+ DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device"
+ " %s.", i, sh->bond.n_port,
+ sh->ibdev_name);
+ return -1;
+ }
+ }
+ DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n",
+ sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
+ lag_ctx.tx_remap_affinity_2);
+ return 0;
+ }
+ if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
+ DRV_LOG(INFO, "Device %s enabled HW hash based LAG.",
+ sh->ibdev_name);
+ }
+ tis_attr.lag_tx_port_affinity = 0;
+ sh->tis[0] = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
+ if (!sh->tis[0]) {
+ DRV_LOG(ERR, "Failed to TIS 0 for bonding device"
+ " %s.", sh->ibdev_name);
+ return -1;
+ }
+ return 0;
+}
+
/**
* Allocate shared device context. If there is multiport device the
* master and representors will share this context, if there is single
@@ -1283,7 +1345,6 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
struct mlx5_dev_ctx_shared *sh;
int err = 0;
uint32_t i;
- struct mlx5_devx_tis_attr tis_attr = { 0 };
MLX5_ASSERT(spawn);
/* Secondary process should not create the shared context. */
@@ -1354,9 +1415,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
err = ENOMEM;
goto error;
}
- tis_attr.transport_domain = sh->td->id;
- sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
- if (!sh->tis) {
+ if (mlx5_setup_tis(sh)) {
DRV_LOG(ERR, "TIS allocation failure");
err = ENOMEM;
goto error;
@@ -1420,10 +1479,13 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
MLX5_ASSERT(sh);
if (sh->share_cache.cache.table)
mlx5_mr_btree_free(&sh->share_cache.cache);
- if (sh->tis)
- claim_zero(mlx5_devx_cmd_destroy(sh->tis));
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
+ i = 0;
+ do {
+ if (sh->tis[i])
+ claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
+ } while (++i < (uint32_t)sh->bond.n_port);
if (sh->devx_rx_uar)
mlx5_glue->devx_free_uar(sh->devx_rx_uar);
if (sh->tx_uar)
@@ -1449,6 +1511,7 @@ void
mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
{
int ret;
+ int i = 0;
pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
#ifdef RTE_LIBRTE_MLX5_DEBUG
@@ -1510,8 +1573,10 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
}
if (sh->pd)
claim_zero(mlx5_os_dealloc_pd(sh->pd));
- if (sh->tis)
- claim_zero(mlx5_devx_cmd_destroy(sh->tis));
+ do {
+ if (sh->tis[i])
+ claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
+ } while (++i < sh->bond.n_port);
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
if (sh->devx_rx_uar)
@@ -1120,6 +1120,12 @@ struct mlx5_aso_ct_pools_mng {
struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
};
+/* LAG attr. */
+struct mlx5_lag {
+ uint8_t tx_remap_affinity[16]; /* The PF port number of affinity */
+ uint8_t affinity_mode; /* TIS or hash based affinity */
+};
+
/*
* Shared Infiniband device context for Master/Representors
* which belong to same IB device with multiple IB ports.
@@ -1187,8 +1193,9 @@ struct mlx5_dev_ctx_shared {
struct rte_intr_handle intr_handle; /* Interrupt handler for device. */
struct rte_intr_handle intr_handle_devx; /* DEVX interrupt handler. */
void *devx_comp; /* DEVX async comp obj. */
- struct mlx5_devx_obj *tis; /* TIS object. */
+ struct mlx5_devx_obj *tis[16]; /* TIS object. */
struct mlx5_devx_obj *td; /* Transport domain. */
+ struct mlx5_lag lag; /* LAG attributes */
void *tx_uar; /* Tx/packet pacing shared UAR. */
struct mlx5_flex_parser_profiles fp[MLX5_FLEX_PARSER_MAX];
/* Flex parser profiles information. */
@@ -1454,6 +1461,7 @@ struct mlx5_priv {
uint32_t rss_shared_actions; /* RSS shared actions. */
struct mlx5_devx_obj *q_counters; /* DevX queue counter object. */
uint32_t counter_set_id; /* Queue counter ID to set in DevX objects. */
+ uint32_t lag_affinity_idx; /* LAG mode queue 0 affinity starting. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -888,6 +888,37 @@ mlx5_devx_drop_action_destroy(struct rte_eth_dev *dev)
rte_errno = ENOTSUP;
}
+/**
+ * Select TXQ TIS number.
+ *
+ * @param dev
+ * Pointer to Ethernet device.
+ * @param queue_idx
+ * Queue index in DPDK Tx queue array.
+ *
+ * @return
+ * > 0 on success, a negative errno value otherwise.
+ */
+static uint32_t
+mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx)
+{
+ struct mlx5_priv *priv = dev->data->dev_private;
+ int tis_idx;
+
+ if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode ==
+ MLX5_LAG_MODE_TIS) {
+ tis_idx = (priv->lag_affinity_idx + queue_idx) %
+ priv->sh->bond.n_port;
+ DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
+ dev->data->port_id, queue_idx, tis_idx + 1,
+ priv->sh->lag.tx_remap_affinity[tis_idx]);
+ } else {
+ tis_idx = 0;
+ }
+ MLX5_ASSERT(priv->sh->tis[tis_idx]);
+ return priv->sh->tis[tis_idx]->id;
+}
+
/**
* Create the Tx hairpin queue object.
*
@@ -935,7 +966,8 @@ mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx)
attr.wq_attr.log_hairpin_num_packets =
attr.wq_attr.log_hairpin_data_sz -
MLX5_HAIRPIN_QUEUE_STRIDE;
- attr.tis_num = priv->sh->tis->id;
+
+ attr.tis_num = mlx5_get_txq_tis_num(dev, idx);
tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr);
if (!tmpl->sq) {
DRV_LOG(ERR,
@@ -992,14 +1024,15 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx,
.allow_swp = !!priv->config.swp,
.cqn = txq_obj->cq_obj.cq->id,
.tis_lst_sz = 1,
- .tis_num = priv->sh->tis->id,
.wq_attr = (struct mlx5_devx_wq_attr){
.pd = priv->sh->pdn,
.uar_page =
mlx5_os_get_devx_uar_page_id(priv->sh->tx_uar),
},
.ts_format = mlx5_ts_format_conv(priv->sh->sq_ts_format),
+ .tis_num = mlx5_get_txq_tis_num(dev, idx),
};
+
/* Create Send Queue object with DevX. */
return mlx5_devx_sq_create(priv->sh->ctx, &txq_obj->sq_obj, log_desc_n,
&sq_attr, priv->sh->numa_node);
@@ -230,7 +230,7 @@ mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh)
.cd_master = 1,
.state = MLX5_SQC_STATE_RST,
.tis_lst_sz = 1,
- .tis_num = sh->tis->id,
+ .tis_num = sh->tis[0]->id,
.wq_attr = (struct mlx5_devx_wq_attr){
.pd = sh->pdn,
.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar),
@@ -433,7 +433,7 @@ mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh)
/* Create send queue object for Clock Queue. */
if (sh->txpp.test) {
sq_attr.tis_lst_sz = 1;
- sq_attr.tis_num = sh->tis->id;
+ sq_attr.tis_num = sh->tis[0]->id;
sq_attr.non_wire = 0;
sq_attr.static_sq_wq = 1;
} else {