@@ -2765,3 +2765,41 @@ mlx5_devx_cmd_create_crypto_login_obj(void *ctx,
crypto_login_obj->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
return crypto_login_obj;
}
+
+/**
+ * Query LAG context.
+ *
+ * @param[in] ctx
+ * Pointer to ibv_context, returned from mlx5dv_open_device.
+ * @param[out] lag_ctx
+ * Pointer to struct mlx5_devx_lag_context, to be set by the routine.
+ *
+ * @return
+ * 0 on success, a negative value otherwise.
+ */
+int
+mlx5_devx_cmd_query_lag(struct ibv_context *ctx,
+ struct mlx5_devx_lag_context *lag_ctx)
+{
+ uint32_t in[MLX5_ST_SZ_DW(query_lag_in)] = {0};
+ uint32_t out[MLX5_ST_SZ_DW(query_lag_out)] = {0};
+ void *lctx;
+ int rc;
+
+ MLX5_SET(query_lag_in, in, opcode, MLX5_CMD_OP_QUERY_LAG);
+ rc = mlx5_glue->devx_general_cmd(ctx, in, sizeof(in), out, sizeof(out));
+ if (rc)
+ goto error;
+ lctx = MLX5_ADDR_OF(query_lag_out, out, context);
+ lag_ctx->fdb_selection_mode = MLX5_GET(lag_context, lctx,
+ fdb_selection_mode);
+ lag_ctx->lag_state = MLX5_GET(lag_context, lctx, lag_state);
+ lag_ctx->tx_remap_affinity_2 = MLX5_GET(lag_context, lctx,
+ tx_remap_affinity_2);
+ lag_ctx->tx_remap_affinity_1 = MLX5_GET(lag_context, lctx,
+ tx_remap_affinity_1);
+ return 0;
+error:
+ rc = (rc > 0) ? -rc : rc;
+ return rc;
+}
@@ -184,6 +184,14 @@ struct mlx5_hca_attr {
uint32_t umr_indirect_mkey_disabled:1;
};
+/* LAG Context. */
+struct mlx5_devx_lag_context {
+ uint32_t fdb_selection_mode:1;
+ uint32_t lag_state:3;
+ uint32_t tx_remap_affinity_1:4;
+ uint32_t tx_remap_affinity_2:4;
+};
+
struct mlx5_devx_wq_attr {
uint32_t wq_type:4;
uint32_t wq_signature:1;
@@ -666,4 +674,8 @@ struct mlx5_devx_obj *
mlx5_devx_cmd_create_crypto_login_obj(void *ctx,
struct mlx5_devx_crypto_login_attr *attr);
+__rte_internal
+int
+mlx5_devx_cmd_query_lag(struct ibv_context *ctx,
+ struct mlx5_devx_lag_context *lag_ctx);
#endif /* RTE_PMD_MLX5_DEVX_CMDS_H_ */
@@ -1048,6 +1048,7 @@ enum {
MLX5_CMD_OP_DEALLOC_PD = 0x801,
MLX5_CMD_OP_ACCESS_REGISTER = 0x805,
MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN = 0x816,
+ MLX5_CMD_OP_QUERY_LAG = 0x842,
MLX5_CMD_OP_CREATE_TIR = 0x900,
MLX5_CMD_OP_MODIFY_TIR = 0x901,
MLX5_CMD_OP_CREATE_SQ = 0X904,
@@ -2000,6 +2001,31 @@ struct mlx5_ifc_query_tis_in_bits {
u8 reserved_at_60[0x20];
};
+struct mlx5_ifc_lag_context_bits {
+ u8 fdb_selection_mode[0x1];
+ u8 reserved_at_1[0x1c];
+ u8 lag_state[0x3];
+ u8 reserved_at_20[0x14];
+ u8 tx_remap_affinity_2[0x4];
+ u8 reserved_at_38[0x4];
+ u8 tx_remap_affinity_1[0x4];
+};
+
+struct mlx5_ifc_query_lag_in_bits {
+ u8 opcode[0x10];
+ u8 uid[0x10];
+ u8 reserved_at_20[0x10];
+ u8 op_mod[0x10];
+ u8 reserved_at_40[0x40];
+};
+
+struct mlx5_ifc_query_lag_out_bits {
+ u8 status[0x8];
+ u8 reserved_at_8[0x18];
+ u8 syndrome[0x20];
+ struct mlx5_ifc_lag_context_bits context;
+};
+
struct mlx5_ifc_alloc_transport_domain_out_bits {
u8 status[0x8];
u8 reserved_at_8[0x18];
@@ -53,6 +53,7 @@ INTERNAL {
mlx5_devx_cmd_modify_virtq;
mlx5_devx_cmd_qp_query_tis_td;
mlx5_devx_cmd_query_hca_attr;
+ mlx5_devx_cmd_query_lag;
mlx5_devx_cmd_query_parse_samples;
mlx5_devx_cmd_query_virtio_q_counters; # WINDOWS_NO_EXPORT
mlx5_devx_cmd_query_virtq;
@@ -977,6 +977,8 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
uint16_t port_id;
struct mlx5_port_info vport_info = { .query_flags = 0 };
int i;
+ struct mlx5_devx_tis_attr tis_attr = { 0 };
+ struct mlx5_devx_lag_context lag_ctx = {0};
/* Determine if this port representor is supposed to be spawned. */
if (switch_info->representor && dpdk_dev->devargs &&
@@ -1679,6 +1681,41 @@ mlx5_dev_spawn(struct rte_device *dpdk_dev,
eth_dev->data->representor_id = priv->representor_id;
}
priv->mp_id.port_id = eth_dev->data->port_id;
+ tis_attr.transport_domain = sh->td->id;
+ if (sh->bond.n_port) {
+ for (i = 0; i < sh->bond.n_port; i++) {
+ /*
+ * 0 is auto affinity, non-zero value
+ * to propose port.
+ */
+ tis_attr.lag_tx_port_affinity = (eth_dev->data->port_id
+ + i) % sh->bond.n_port + 1;
+ sh->tis[i] = mlx5_devx_cmd_create_tis(sh->ctx,
+ &tis_attr);
+ if (!sh->tis[i]) {
+ DRV_LOG(ERR, "TIS allocation failure %d", i);
+ err = ENOMEM;
+ goto error;
+ }
+ }
+ if (!mlx5_devx_cmd_query_lag(sh->ctx, &lag_ctx)) {
+ sh->lag.tx_remap_affinity[0] =
+ lag_ctx.tx_remap_affinity_1;
+ sh->lag.tx_remap_affinity[1] =
+ lag_ctx.tx_remap_affinity_2;
+ }
+ DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : %d & %d\n",
+ sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
+ lag_ctx.tx_remap_affinity_2);
+ } else {
+ tis_attr.lag_tx_port_affinity = 0;
+ sh->tis[0] = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
+ if (!sh->tis[0]) {
+ DRV_LOG(ERR, "TIS allocation failure");
+ err = ENOMEM;
+ goto error;
+ }
+ }
strlcpy(priv->mp_id.name, MLX5_MP_NAME, RTE_MP_MAX_NAME_LEN);
/*
* Store associated network device interface index. This index
@@ -1112,7 +1112,6 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
struct mlx5_dev_ctx_shared *sh;
int err = 0;
uint32_t i;
- struct mlx5_devx_tis_attr tis_attr = { 0 };
MLX5_ASSERT(spawn);
/* Secondary process should not create the shared context. */
@@ -1183,13 +1182,6 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
err = ENOMEM;
goto error;
}
- tis_attr.transport_domain = sh->td->id;
- sh->tis = mlx5_devx_cmd_create_tis(sh->ctx, &tis_attr);
- if (!sh->tis) {
- DRV_LOG(ERR, "TIS allocation failure");
- err = ENOMEM;
- goto error;
- }
err = mlx5_alloc_rxtx_uars(sh, config);
if (err)
goto error;
@@ -1254,8 +1246,6 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
MLX5_ASSERT(sh);
if (sh->cnt_id_tbl)
mlx5_l3t_destroy(sh->cnt_id_tbl);
- if (sh->tis)
- claim_zero(mlx5_devx_cmd_destroy(sh->tis));
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
if (sh->devx_rx_uar)
@@ -1282,6 +1272,7 @@ mlx5_alloc_shared_dev_ctx(const struct mlx5_dev_spawn_data *spawn,
void
mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
{
+ int i = 0;
pthread_mutex_lock(&mlx5_dev_ctx_list_mutex);
#ifdef RTE_LIBRTE_MLX5_DEBUG
/* Check the object presence in the list. */
@@ -1337,8 +1328,10 @@ mlx5_free_shared_dev_ctx(struct mlx5_dev_ctx_shared *sh)
}
if (sh->pd)
claim_zero(mlx5_os_dealloc_pd(sh->pd));
- if (sh->tis)
- claim_zero(mlx5_devx_cmd_destroy(sh->tis));
+ do {
+ if (sh->tis[i])
+ claim_zero(mlx5_devx_cmd_destroy(sh->tis[i]));
+ } while (++i < sh->bond.n_port);
if (sh->td)
claim_zero(mlx5_devx_cmd_destroy(sh->td));
if (sh->devx_rx_uar)
@@ -1118,6 +1118,11 @@ struct mlx5_aso_ct_pools_mng {
struct mlx5_aso_sq aso_sq; /* ASO queue objects. */
};
+/* LAG attr. */
+struct mlx5_lag {
+ uint8_t tx_remap_affinity[16]; /* The PF port number of affinity */
+};
+
/*
* Shared Infiniband device context for Master/Representors
* which belong to same IB device with multiple IB ports.
@@ -1187,8 +1192,9 @@ struct mlx5_dev_ctx_shared {
struct rte_intr_handle intr_handle; /* Interrupt handler for device. */
struct rte_intr_handle intr_handle_devx; /* DEVX interrupt handler. */
void *devx_comp; /* DEVX async comp obj. */
- struct mlx5_devx_obj *tis; /* TIS object. */
+ struct mlx5_devx_obj *tis[16]; /* TIS object. */
struct mlx5_devx_obj *td; /* Transport domain. */
+ struct mlx5_lag lag; /* LAG attributes */
void *tx_uar; /* Tx/packet pacing shared UAR. */
struct mlx5_flex_parser_profiles fp[MLX5_FLEX_PARSER_MAX];
/* Flex parser profiles information. */
@@ -888,6 +888,41 @@ mlx5_devx_drop_action_destroy(struct rte_eth_dev *dev)
rte_errno = ENOTSUP;
}
+/**
+ * Set TXQ affinity via TIS round-ronbin
+ *
+ * @param priv
+ * Pointer to device private data.
+ * @param idx
+ * TX queue index
+ * @param attr
+ * Pointer to Devx SQ attribute
+ */
+static void
+__mlx5_set_txq_affinity(struct mlx5_priv *priv, uint16_t idx,
+ struct mlx5_devx_create_sq_attr *attr)
+{
+ struct mlx5_dev_ctx_shared *sh = priv->sh;
+ struct mlx5_txq_data *txq_data = (*priv->txqs)[idx];
+ int i, min_tis;
+
+ /*
+ * The starting TIS is round-robin in all ports like
+ * port 0: TIS 0, port 1: TIS 1 and so on.
+ * Suppose TIS id increases by creation.
+ */
+ for (min_tis = 0, i = 1; i < sh->bond.n_port; i++)
+ if (sh->tis[i]->id < sh->tis[min_tis]->id)
+ min_tis = i;
+ if (sh->bond.n_port) {
+ attr->tis_num = sh->tis[idx % sh->bond.n_port]->id;
+ txq_data->lag_port_affinity = (idx + sh->bond.n_port -
+ min_tis) % sh->bond.n_port + 1;
+ } else {
+ attr->tis_num = sh->tis[0]->id;
+ }
+}
+
/**
* Create the Tx hairpin queue object.
*
@@ -935,7 +970,11 @@ mlx5_txq_obj_hairpin_new(struct rte_eth_dev *dev, uint16_t idx)
attr.wq_attr.log_hairpin_num_packets =
attr.wq_attr.log_hairpin_data_sz -
MLX5_HAIRPIN_QUEUE_STRIDE;
- attr.tis_num = priv->sh->tis->id;
+ __mlx5_set_txq_affinity(priv, idx, &attr);
+ DRV_LOG(INFO, "queue %d tis number %d with affinity %d maps to PF port %d",
+ idx, attr.tis_num, txq_data->lag_port_affinity,
+ priv->sh->lag.tx_remap_affinity
+ [txq_data->lag_port_affinity - 1]);
tmpl->sq = mlx5_devx_cmd_create_sq(priv->sh->ctx, &attr);
if (!tmpl->sq) {
DRV_LOG(ERR,
@@ -992,7 +1031,6 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx,
.allow_swp = !!priv->config.swp,
.cqn = txq_obj->cq_obj.cq->id,
.tis_lst_sz = 1,
- .tis_num = priv->sh->tis->id,
.wq_attr = (struct mlx5_devx_wq_attr){
.pd = priv->sh->pdn,
.uar_page =
@@ -1000,6 +1038,11 @@ mlx5_txq_create_devx_sq_resources(struct rte_eth_dev *dev, uint16_t idx,
},
.ts_format = mlx5_ts_format_conv(priv->sh->sq_ts_format),
};
+ __mlx5_set_txq_affinity(priv, idx, &sq_attr);
+ DRV_LOG(INFO, "queue %d tis number %d with affinity %d maps to PF port %d",
+ idx, sq_attr.tis_num, txq_data->lag_port_affinity,
+ priv->sh->lag.tx_remap_affinity
+ [txq_data->lag_port_affinity - 1]);
/* Create Send Queue object with DevX. */
return mlx5_devx_sq_create(priv->sh->ctx, &txq_obj->sq_obj, log_desc_n,
&sq_attr, priv->sh->numa_node);
@@ -161,6 +161,7 @@ struct mlx5_txq_data {
int32_t ts_offset; /* Timestamp field dynamic offset. */
struct mlx5_dev_ctx_shared *sh; /* Shared context. */
struct mlx5_txq_stats stats; /* TX queue counters. */
+ uint8_t lag_port_affinity; /* TXQ affinity */
#ifndef RTE_ARCH_64
rte_spinlock_t *uar_lock;
/* UAR access lock required for 32bit implementations */
@@ -230,7 +230,7 @@ mlx5_txpp_create_rearm_queue(struct mlx5_dev_ctx_shared *sh)
.cd_master = 1,
.state = MLX5_SQC_STATE_RST,
.tis_lst_sz = 1,
- .tis_num = sh->tis->id,
+ .tis_num = sh->tis[0]->id,
.wq_attr = (struct mlx5_devx_wq_attr){
.pd = sh->pdn,
.uar_page = mlx5_os_get_devx_uar_page_id(sh->tx_uar),
@@ -433,7 +433,7 @@ mlx5_txpp_create_clock_queue(struct mlx5_dev_ctx_shared *sh)
/* Create send queue object for Clock Queue. */
if (sh->txpp.test) {
sq_attr.tis_lst_sz = 1;
- sq_attr.tis_num = sh->tis->id;
+ sq_attr.tis_num = sh->tis[0]->id;
sq_attr.non_wire = 0;
sq_attr.static_sq_wq = 1;
} else {