[RFC,v1,5/7] net/mlx5: support Rx queue based limit watermark

Message ID 20220506035645.4101714-6-spiked@nvidia.com (mailing list archive)
State Changes Requested, archived
Delegated to: Andrew Rybchenko
Headers
Series net/mlx5: introduce limit watermark and host shaper |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Spike Du May 6, 2022, 3:56 a.m. UTC
  Add mlx5 specific LWM(limit watermark) configuration handler.
While the Rx queue fullness reaches the LWM limit, the driver catches
an HW event and invokes the user callback.

Signed-off-by: Spike Du <spiked@nvidia.com>
---
 doc/guides/nics/mlx5.rst               |   4 ++
 doc/guides/rel_notes/release_22_07.rst |   1 +
 drivers/common/mlx5/mlx5_prm.h         |   1 +
 drivers/net/mlx5/mlx5.c                |   1 +
 drivers/net/mlx5/mlx5_rx.c             | 123 +++++++++++++++++++++++++++++++++
 drivers/net/mlx5/mlx5_rx.h             |   3 +
 6 files changed, 133 insertions(+)
  

Patch

diff --git a/doc/guides/nics/mlx5.rst b/doc/guides/nics/mlx5.rst
index 4805d08..a7698c9 100644
--- a/doc/guides/nics/mlx5.rst
+++ b/doc/guides/nics/mlx5.rst
@@ -92,6 +92,7 @@  Features
 - Connection tracking.
 - Sub-Function representors.
 - Sub-Function.
+- Rx queue LWM (Limit WaterMark) configuration.
 
 
 Limitations
@@ -518,6 +519,9 @@  Limitations
 - The NIC egress flow rules on representor port are not supported.
 
 
+- LWM:
+  - Doesn't support shared Rx queue and Hairpin Rx queue.
+
 Statistics
 ----------
 
diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
index 88d6e96..f3cf2f1 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -64,6 +64,7 @@  New Features
 
   * Added support for promiscuous mode on Windows.
   * Added support for MTU on Windows.
+  * Added Rx queue LWM(Limit WaterMark) support.
 
 
 Removed Items
diff --git a/drivers/common/mlx5/mlx5_prm.h b/drivers/common/mlx5/mlx5_prm.h
index 44b1822..23b13e3 100644
--- a/drivers/common/mlx5/mlx5_prm.h
+++ b/drivers/common/mlx5/mlx5_prm.h
@@ -3290,6 +3290,7 @@  struct mlx5_aso_wqe {
 
 enum {
 	MLX5_EVENT_TYPE_OBJECT_CHANGE = 0x27,
+	MLX5_EVENT_TYPE_SRQ_LIMIT_REACHED = 0x14,
 };
 
 enum {
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index 334223e..628003d 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -2062,6 +2062,7 @@  struct mlx5_dev_ctx_shared *
 	.dev_supported_ptypes_get = mlx5_dev_supported_ptypes_get,
 	.vlan_filter_set = mlx5_vlan_filter_set,
 	.rx_queue_setup = mlx5_rx_queue_setup,
+	.rx_queue_set_lwm = mlx5_rx_queue_set_lwm,
 	.rx_hairpin_queue_setup = mlx5_rx_hairpin_queue_setup,
 	.tx_queue_setup = mlx5_tx_queue_setup,
 	.tx_hairpin_queue_setup = mlx5_tx_hairpin_queue_setup,
diff --git a/drivers/net/mlx5/mlx5_rx.c b/drivers/net/mlx5/mlx5_rx.c
index 6b2ef45..68564ea 100644
--- a/drivers/net/mlx5/mlx5_rx.c
+++ b/drivers/net/mlx5/mlx5_rx.c
@@ -19,12 +19,14 @@ 
 #include <mlx5_prm.h>
 #include <mlx5_common.h>
 #include <mlx5_common_mr.h>
+#include <rte_pmd_mlx5.h>
 
 #include "mlx5_autoconf.h"
 #include "mlx5_defs.h"
 #include "mlx5.h"
 #include "mlx5_utils.h"
 #include "mlx5_rxtx.h"
+#include "mlx5_devx.h"
 #include "mlx5_rx.h"
 
 
@@ -128,6 +130,16 @@ 
 	return RTE_ETH_RX_DESC_AVAIL;
 }
 
+/* Get rxq lwm percentage according to lwm number. */
+static uint8_t
+mlx5_rxq_lwm_to_percentage(struct mlx5_rxq_priv *rxq)
+{
+	struct mlx5_rxq_data *rxq_data = &rxq->ctrl->rxq;
+	uint32_t wqe_cnt = 1 << rxq_data->elts_n;
+
+	return (rxq->lwm * 100 / wqe_cnt);
+}
+
 /**
  * DPDK callback to get the RX queue information.
  *
@@ -150,6 +162,7 @@ 
 {
 	struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, rx_queue_id);
 	struct mlx5_rxq_data *rxq = mlx5_rxq_data_get(dev, rx_queue_id);
+	struct mlx5_rxq_priv *rxq_priv = mlx5_rxq_get(dev, rx_queue_id);
 
 	if (!rxq)
 		return;
@@ -169,6 +182,7 @@ 
 	qinfo->nb_desc = mlx5_rxq_mprq_enabled(rxq) ?
 		RTE_BIT32(rxq->elts_n) * RTE_BIT32(rxq->log_strd_num) :
 		RTE_BIT32(rxq->elts_n);
+	qinfo->conf.lwm = mlx5_rxq_lwm_to_percentage(rxq_priv);
 }
 
 /**
@@ -1214,3 +1228,112 @@  int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc)
 	rte_eth_dev_callback_process(dev, RTE_ETH_EVENT_RXQ_LIMIT_REACHED,
 				     (void *)(uintptr_t)rxq_idx);
 }
+
+/**
+ * DPDK callback to arm an Rx queue LWM(limit watermark) event.
+ * While the Rx queue fullness reaches the LWM limit, the driver catches
+ * an HW event and invokes the user event callback.
+ * After the last event handling, the user needs to call this API again
+ * to arm an additional event.
+ *
+ * @param dev
+ *   Pointer to the device structure.
+ * @param[in] rx_queue_id
+ *   Rx queue identificator.
+ * @param[in] lwm
+ *   The LWM value, is defined by a percentage of the Rx queue size.
+ *   [1-99] to set a new LWM (update the old value).
+ *   0 to unarm the event.
+ *
+ * @return
+ *   0 : operation success.
+ *   Otherwise:
+ *   - ENOMEM - not enough memory to create LWM event channel.
+ *   - EINVAL - the input Rxq is not created by devx.
+ *   - E2BIG  - lwm is bigger than 99.
+ */
+int
+mlx5_rx_queue_set_lwm(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+		      uint8_t lwm)
+{
+	struct mlx5_priv *priv = dev->data->dev_private;
+	uint16_t port_id = PORT_ID(priv);
+	struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, rx_queue_id);
+	uint16_t event_nums[1] = {MLX5_EVENT_TYPE_SRQ_LIMIT_REACHED};
+	struct mlx5_rxq_data *rxq_data;
+	uint32_t wqe_cnt;
+	uint64_t cookie;
+	int ret = 0;
+
+	if (!rxq) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	rxq_data = &rxq->ctrl->rxq;
+	/* Ensure the Rq is created by devx. */
+	if (priv->obj_ops.rxq_obj_new != devx_obj_ops.rxq_obj_new) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	if (lwm > 99) {
+		DRV_LOG(WARNING, "Too big LWM configuration.");
+		rte_errno = E2BIG;
+		return -rte_errno;
+	}
+	/* Start config LWM. */
+	pthread_mutex_lock(&priv->sh->lwm_config_lock);
+	if (rxq->lwm == 0 && lwm == 0) {
+		/* Both old/new values are 0, do nothing. */
+		ret = 0;
+		goto end;
+	}
+	wqe_cnt = 1 << rxq_data->elts_n;
+	if (lwm) {
+		if (!priv->sh->devx_channel_lwm) {
+			ret = mlx5_lwm_setup(priv);
+			if (ret) {
+				DRV_LOG(WARNING,
+					"Failed to create shared_lwm.");
+				rte_errno = ENOMEM;
+				ret = -rte_errno;
+				goto end;
+			}
+		}
+		if (!rxq->lwm_devx_subscribed) {
+			cookie = ((uint32_t)
+				  (port_id << LWM_COOKIE_PORTID_OFFSET)) |
+				(rx_queue_id << LWM_COOKIE_RXQID_OFFSET);
+			ret = mlx5_os_devx_subscribe_devx_event
+				(priv->sh->devx_channel_lwm,
+				 rxq->devx_rq.rq->obj,
+				 sizeof(event_nums),
+				 event_nums,
+				 cookie);
+			if (ret) {
+				rte_errno = rte_errno ? rte_errno : EINVAL;
+				ret = -rte_errno;
+				goto end;
+			}
+			rxq->lwm_devx_subscribed = 1;
+		}
+	}
+	/* Save LWM to rxq and send modfiy_rq devx command. */
+	rxq->lwm = lwm * wqe_cnt / 100;
+	/* Prevent integer division loss when switch lwm number to percentage. */
+	if (lwm && (lwm * wqe_cnt % 100)) {
+		rxq->lwm = ((uint32_t)(rxq->lwm + 1) >= wqe_cnt) ?
+			rxq->lwm : (rxq->lwm + 1);
+	}
+	if (lwm && !rxq->lwm) {
+		/* With mprq, wqe_cnt may be < 100. */
+		DRV_LOG(WARNING, "Too small LWM configuration.");
+		rte_errno = EINVAL;
+		ret = -rte_errno;
+		goto end;
+	}
+	ret = mlx5_devx_modify_rq(rxq, MLX5_RXQ_MOD_RDY2RDY);
+end:
+	pthread_mutex_unlock(&priv->sh->lwm_config_lock);
+	return ret;
+}
+
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 103509f..483ca12 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -176,6 +176,7 @@  struct mlx5_rxq_priv {
 	struct rte_eth_hairpin_conf hairpin_conf; /* Hairpin configuration. */
 	uint32_t hairpin_status; /* Hairpin binding status. */
 	uint32_t lwm:16;
+	uint32_t lwm_devx_subscribed:1;
 	void (*lwm_event_rxq_limit_reached)(uint16_t port_id, uint16_t rxq_id);
 };
 
@@ -297,6 +298,8 @@  int mlx5_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t rx_queue_id,
 			   struct rte_eth_burst_mode *mode);
 int mlx5_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
 void mlx5_dev_interrupt_handler_lwm(void *args);
+int mlx5_rx_queue_set_lwm(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+			  uint8_t lwm);
 
 /* Vectorized version of mlx5_rx.c */
 int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);