[v2,3/3] net/mlx5: enhance the Tx queue affinity

Message ID 20230222122628.29627-4-jiaweiw@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series Add Tx queue mapping of aggregated ports in MLX5 PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-testing warning Testing issues
ci/iol-abi-testing success Testing PASS

Commit Message

Jiawei Wang Feb. 22, 2023, 12:26 p.m. UTC
  The rte_eth_dev_map_aggr_tx_affinity() was introduced in
ethdev lib, it was used to set the affinity value per Tx queue.

This patch adds the MLX5 PMD support for two device ops:
 - map_aggr_tx_affinity
 - count_aggr_ports

After maps a Tx queue with an aggregated port by call
map_aggr_tx_affinity() and starts sending traffic, the MLX5 PMD
updates TIS creation with tx_aggr_affinity value of Tx queue.
TIS index 1 goes to first physical port, TIS index 2 goes to second
physical port, and so on, TIS index 0 is reserved for default
HW hash mode.

Signed-off-by: Jiawei Wang <jiaweiw@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/common/mlx5/mlx5_prm.h |  8 ------
 drivers/net/mlx5/mlx5.c        | 49 +++++++++++++++++-----------------
 drivers/net/mlx5/mlx5_devx.c   | 24 +++++++++--------
 drivers/net/mlx5/mlx5_tx.h     |  4 +++
 drivers/net/mlx5/mlx5_txq.c    | 38 ++++++++++++++++++++++++++
 5 files changed, 80 insertions(+), 43 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 26a1f0717d..2f5aeecaa9 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -2363,14 +2363,6 @@  struct mlx5_ifc_query_nic_vport_context_in_bits {
 	u8 reserved_at_68[0x18];
 };
 
-/*
- * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
- * Each TIS binds to one PF by setting lag_tx_port_affinity (>0).
- * Once LAG enabled, we create multiple TISs and bind each one to
- * different PFs, then TIS[i] gets affinity i+1 and goes to PF i+1.
- */
-#define MLX5_IFC_LAG_MAP_TIS_AFFINITY(index, num) ((num) ? \
-						    (index) % (num) + 1 : 0)
 struct mlx5_ifc_tisc_bits {
 	u8 strict_lag_tx_port_affinity[0x1];
 	u8 reserved_at_1[0x3];
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index f55c1caca0..8c8f71d508 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -1257,9 +1257,9 @@  mlx5_dev_ctx_shared_mempool_subscribe(struct rte_eth_dev *dev)
 static int
 mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
 {
-	int i;
 	struct mlx5_devx_lag_context lag_ctx = { 0 };
 	struct mlx5_devx_tis_attr tis_attr = { 0 };
+	int i;
 
 	tis_attr.transport_domain = sh->td->id;
 	if (sh->bond.n_port) {
@@ -1273,35 +1273,30 @@  mlx5_setup_tis(struct mlx5_dev_ctx_shared *sh)
 			DRV_LOG(ERR, "Failed to query lag affinity.");
 			return -1;
 		}
-		if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
-			for (i = 0; i < sh->bond.n_port; i++) {
-				tis_attr.lag_tx_port_affinity =
-					MLX5_IFC_LAG_MAP_TIS_AFFINITY(i,
-							sh->bond.n_port);
-				sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx,
-						&tis_attr);
-				if (!sh->tis[i]) {
-					DRV_LOG(ERR, "Failed to TIS %d/%d for bonding device"
-						" %s.", i, sh->bond.n_port,
-						sh->ibdev_name);
-					return -1;
-				}
-			}
+		if (sh->lag.affinity_mode == MLX5_LAG_MODE_TIS)
 			DRV_LOG(DEBUG, "LAG number of ports : %d, affinity_1 & 2 : pf%d & %d.\n",
 				sh->bond.n_port, lag_ctx.tx_remap_affinity_1,
 				lag_ctx.tx_remap_affinity_2);
-			return 0;
-		}
-		if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
+		else if (sh->lag.affinity_mode == MLX5_LAG_MODE_HASH)
 			DRV_LOG(INFO, "Device %s enabled HW hash based LAG.",
 					sh->ibdev_name);
 	}
-	tis_attr.lag_tx_port_affinity = 0;
-	sh->tis[0] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
-	if (!sh->tis[0]) {
-		DRV_LOG(ERR, "Failed to TIS 0 for bonding device"
-			" %s.", sh->ibdev_name);
-		return -1;
+	for (i = 0; i <= sh->bond.n_port; i++) {
+		/*
+		 * lag_tx_port_affinity: 0 auto-selection, 1 PF1, 2 PF2 vice versa.
+		 * Each TIS binds to one PF by setting lag_tx_port_affinity (> 0).
+		 * Once LAG enabled, we create multiple TISs and bind each one to
+		 * different PFs, then TIS[i+1] gets affinity i+1 and goes to PF i+1.
+		 * TIS[0] is reserved for HW Hash mode.
+		 */
+		tis_attr.lag_tx_port_affinity = i;
+		sh->tis[i] = mlx5_devx_cmd_create_tis(sh->cdev->ctx, &tis_attr);
+		if (!sh->tis[i]) {
+			DRV_LOG(ERR, "Failed to create TIS %d/%d for [bonding] device"
+				" %s.", i, sh->bond.n_port,
+				sh->ibdev_name);
+			return -1;
+		}
 	}
 	return 0;
 }
@@ -2335,6 +2330,8 @@  const struct eth_dev_ops mlx5_dev_ops = {
 	.hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
 	.hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
 	.get_monitor_addr = mlx5_get_monitor_addr,
+	.count_aggr_ports = mlx5_count_aggr_ports,
+	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
 };
 
 /* Available operations from secondary process. */
@@ -2358,6 +2355,8 @@  const struct eth_dev_ops mlx5_dev_sec_ops = {
 	.tx_burst_mode_get = mlx5_tx_burst_mode_get,
 	.get_module_info = mlx5_get_module_info,
 	.get_module_eeprom = mlx5_get_module_eeprom,
+	.count_aggr_ports = mlx5_count_aggr_ports,
+	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
 };
 
 /* Available operations in flow isolated mode. */
@@ -2422,6 +2421,8 @@  const struct eth_dev_ops mlx5_dev_ops_isolate = {
 	.hairpin_queue_peer_bind = mlx5_hairpin_queue_peer_bind,
 	.hairpin_queue_peer_unbind = mlx5_hairpin_queue_peer_unbind,
 	.get_monitor_addr = mlx5_get_monitor_addr,
+	.count_aggr_ports = mlx5_count_aggr_ports,
+	.map_aggr_tx_affinity = mlx5_map_aggr_tx_affinity,
 };
 
 /**
diff --git a/drivers/net/mlx5/mlx5_devx.c b/drivers/net/mlx5/mlx5_devx.c
index f6e1943fd7..d02cedb202 100644
--- a/drivers/net/mlx5/mlx5_devx.c
+++ b/drivers/net/mlx5/mlx5_devx.c
@@ -1190,17 +1190,19 @@  static uint32_t
 mlx5_get_txq_tis_num(struct rte_eth_dev *dev, uint16_t queue_idx)
 {
 	struct mlx5_priv *priv = dev->data->dev_private;
-	int tis_idx;
-
-	if (priv->sh->bond.n_port && priv->sh->lag.affinity_mode ==
-			MLX5_LAG_MODE_TIS) {
-		tis_idx = (priv->lag_affinity_idx + queue_idx) %
-			priv->sh->bond.n_port;
-		DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
-			dev->data->port_id, queue_idx, tis_idx + 1,
-			priv->sh->lag.tx_remap_affinity[tis_idx]);
-	} else {
-		tis_idx = 0;
+	struct mlx5_txq_data *txq_data = (*priv->txqs)[queue_idx];
+	int tis_idx = 0;
+
+	if (priv->sh->bond.n_port) {
+		if (txq_data->tx_aggr_affinity) {
+			tis_idx = txq_data->tx_aggr_affinity;
+		} else if (priv->sh->lag.affinity_mode == MLX5_LAG_MODE_TIS) {
+			tis_idx = (priv->lag_affinity_idx + queue_idx) %
+				priv->sh->bond.n_port + 1;
+			DRV_LOG(INFO, "port %d txq %d gets affinity %d and maps to PF %d.",
+				dev->data->port_id, queue_idx, tis_idx,
+				priv->sh->lag.tx_remap_affinity[tis_idx - 1]);
+		}
 	}
 	MLX5_ASSERT(priv->sh->tis[tis_idx]);
 	return priv->sh->tis[tis_idx]->id;
diff --git a/drivers/net/mlx5/mlx5_tx.h b/drivers/net/mlx5/mlx5_tx.h
index a056be7ca8..d0c6303a2d 100644
--- a/drivers/net/mlx5/mlx5_tx.h
+++ b/drivers/net/mlx5/mlx5_tx.h
@@ -144,6 +144,7 @@  struct mlx5_txq_data {
 	uint16_t inlen_send; /* Ordinary send data inline size. */
 	uint16_t inlen_empw; /* eMPW max packet size to inline. */
 	uint16_t inlen_mode; /* Minimal data length to inline. */
+	uint8_t tx_aggr_affinity; /* TxQ affinity configuration. */
 	uint32_t qp_num_8s; /* QP number shifted by 8. */
 	uint64_t offloads; /* Offloads for Tx Queue. */
 	struct mlx5_mr_ctrl mr_ctrl; /* MR control descriptor. */
@@ -218,6 +219,9 @@  void txq_alloc_elts(struct mlx5_txq_ctrl *txq_ctrl);
 void txq_free_elts(struct mlx5_txq_ctrl *txq_ctrl);
 uint64_t mlx5_get_tx_port_offloads(struct rte_eth_dev *dev);
 void mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev);
+int mlx5_count_aggr_ports(struct rte_eth_dev *dev);
+int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+			      uint8_t affinity);
 
 /* mlx5_tx.c */
 
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index 419e913559..1e0e61a620 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -1365,3 +1365,41 @@  mlx5_txq_dynf_timestamp_set(struct rte_eth_dev *dev)
 				     ts_mask : 0;
 	}
 }
+
+int mlx5_count_aggr_ports(struct rte_eth_dev *dev)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+
+	return priv->sh->bond.n_port;
+}
+
+int mlx5_map_aggr_tx_affinity(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+			      uint8_t affinity)
+{
+	struct mlx5_txq_ctrl *txq_ctrl;
+	struct mlx5_txq_data *txq;
+	struct mlx5_priv *priv;
+
+	priv = dev->data->dev_private;
+	txq = (*priv->txqs)[tx_queue_id];
+	if (!txq)
+		return -1;
+	txq_ctrl = container_of(txq, struct mlx5_txq_ctrl, txq);
+	if (tx_queue_id >= priv->txqs_n) {
+		DRV_LOG(ERR, "port %u Tx queue index out of range (%u >= %u)",
+			dev->data->port_id, tx_queue_id, priv->txqs_n);
+		rte_errno = EOVERFLOW;
+		return -rte_errno;
+	}
+	if (affinity > priv->num_lag_ports) {
+		DRV_LOG(ERR, "port %u unable to setup Tx queue index %u"
+			" affinity is %u exceeds the maximum %u", dev->data->port_id,
+			tx_queue_id, affinity, priv->num_lag_ports);
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	DRV_LOG(DEBUG, "port %u configuring queue %u for aggregated affinity %u",
+		dev->data->port_id, tx_queue_id, affinity);
+	txq_ctrl->txq.tx_aggr_affinity = affinity;
+	return 0;
+}