[2/2] net/mlx5: add mbuf fast free offload support

Message ID 1608311697-31529-3-git-send-email-viacheslavo@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: add mbuf fast free Tx offload |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-testing success Testing PASS
ci/Intel-compilation success Compilation OK

Commit Message

Slava Ovsiienko Dec. 18, 2020, 5:14 p.m. UTC
  This patch adds support of the mbuf fast free offload to the
transmit datapath. This offload allows to free the mbufs on
transmit completion in the most efficient way. It requires
the all mbufs were allocated from the same pool, have
the reference counter value as 1, and have no any externally
attached buffers.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
---
 drivers/net/mlx5/mlx5_rxtx.c | 35 +++++++++++++++++++++++++++--------
 drivers/net/mlx5/mlx5_rxtx.h |  1 +
 drivers/net/mlx5/mlx5_txq.c  |  6 ++++++
 3 files changed, 34 insertions(+), 8 deletions(-)
  

Comments

Thomas Monjalon Dec. 18, 2020, 5:59 p.m. UTC | #1
18/12/2020 18:14, Viacheslav Ovsiienko:
> This patch adds support of the mbuf fast free offload to the
> transmit datapath. This offload allows to free the mbufs on
> transmit completion in the most efficient way. It requires
> the all mbufs were allocated from the same pool, have
> the reference counter value as 1, and have no any externally
> attached buffers.
> 
> Signed-off-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
> ---
>  drivers/net/mlx5/mlx5_rxtx.c | 35 +++++++++++++++++++++++++++--------
>  drivers/net/mlx5/mlx5_rxtx.h |  1 +
>  drivers/net/mlx5/mlx5_txq.c  |  6 ++++++

The doc update is missing:
	- release notes
	- mlx5 guide
	- mlx5 features file
  

Patch

diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index e8c8783..9545283 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -1839,6 +1839,8 @@  enum mlx5_txcmp_code {
 /**
  * Free the mbufs from the linear array of pointers.
  *
+ * @param txq
+ *   Pointer to Tx queue structure.
  * @param pkts
  *   Pointer to array of packets to be free.
  * @param pkts_n
@@ -1848,7 +1850,8 @@  enum mlx5_txcmp_code {
  *   compile time and may be used for optimization.
  */
 static __rte_always_inline void
-mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts,
+mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
+		  struct rte_mbuf **__rte_restrict pkts,
 		  unsigned int pkts_n,
 		  unsigned int olx __rte_unused)
 {
@@ -1864,6 +1867,16 @@  enum mlx5_txcmp_code {
 	 */
 	MLX5_ASSERT(pkts);
 	MLX5_ASSERT(pkts_n);
+	/*
+	 * Free mbufs directly to the pool in bulk
+	 * if fast free offload is engaged
+	 */
+	if (!MLX5_TXOFF_CONFIG(MULTI) && txq->fast_free) {
+		mbuf = *pkts;
+		pool = mbuf->pool;
+		rte_mempool_put_bulk(pool, (void *)pkts, pkts_n);
+		return;
+	}
 	for (;;) {
 		for (;;) {
 			/*
@@ -1945,11 +1958,12 @@  enum mlx5_txcmp_code {
  * on the tx_burst completion.
  */
 static __rte_noinline void
-__mlx5_tx_free_mbuf(struct rte_mbuf **__rte_restrict pkts,
+__mlx5_tx_free_mbuf(struct mlx5_txq_data *__rte_restrict txq,
+		    struct rte_mbuf **__rte_restrict pkts,
 		    unsigned int pkts_n,
 		    unsigned int olx __rte_unused)
 {
-	mlx5_tx_free_mbuf(pkts, pkts_n, olx);
+	mlx5_tx_free_mbuf(txq, pkts, pkts_n, olx);
 }
 
 /**
@@ -1983,7 +1997,8 @@  enum mlx5_txcmp_code {
 		part = RTE_MIN(part, n_elts);
 		MLX5_ASSERT(part);
 		MLX5_ASSERT(part <= txq->elts_s);
-		mlx5_tx_free_mbuf(&txq->elts[txq->elts_tail & txq->elts_m],
+		mlx5_tx_free_mbuf(txq,
+				  &txq->elts[txq->elts_tail & txq->elts_m],
 				  part, olx);
 		txq->elts_tail += part;
 		n_elts -= part;
@@ -5172,7 +5187,7 @@  enum mlx5_txcmp_code {
 	txq->stats.opackets += loc.pkts_sent;
 #endif
 	if (MLX5_TXOFF_CONFIG(INLINE) && loc.mbuf_free)
-		__mlx5_tx_free_mbuf(pkts, loc.mbuf_free, olx);
+		__mlx5_tx_free_mbuf(txq, pkts, loc.mbuf_free, olx);
 	return loc.pkts_sent;
 }
 
@@ -5827,17 +5842,19 @@  enum mlx5_txcmp_code {
 
 int
 mlx5_tx_burst_mode_get(struct rte_eth_dev *dev,
-		       uint16_t tx_queue_id __rte_unused,
+		       uint16_t tx_queue_id,
 		       struct rte_eth_burst_mode *mode)
 {
 	eth_tx_burst_t pkt_burst = dev->tx_pkt_burst;
+	struct mlx5_priv *priv = dev->data->dev_private;
+	struct mlx5_txq_data *txq = (*priv->txqs)[tx_queue_id];
 	unsigned int i, olx;
 
 	for (i = 0; i < RTE_DIM(txoff_func); i++) {
 		if (pkt_burst == txoff_func[i].func) {
 			olx = txoff_func[i].olx;
 			snprintf(mode->info, sizeof(mode->info),
-				 "%s%s%s%s%s%s%s%s%s",
+				 "%s%s%s%s%s%s%s%s%s%s",
 				 (olx & MLX5_TXOFF_CONFIG_EMPW) ?
 				 ((olx & MLX5_TXOFF_CONFIG_MPW) ?
 				 "Legacy MPW" : "Enhanced MPW") : "No MPW",
@@ -5856,7 +5873,9 @@  enum mlx5_txcmp_code {
 				 (olx & MLX5_TXOFF_CONFIG_METADATA) ?
 				 " + METADATA" : "",
 				 (olx & MLX5_TXOFF_CONFIG_TXPP) ?
-				 " + TXPP" : "");
+				 " + TXPP" : "",
+				 (txq && txq->fast_free) ?
+				 " + Fast Free" : "");
 			return 0;
 		}
 	}
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index fc5cc2e..1b2983d 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -251,6 +251,7 @@  struct mlx5_txq_data {
 	uint16_t vlan_en:1; /* VLAN insertion in WQE is supported. */
 	uint16_t db_nc:1; /* Doorbell mapped to non-cached region. */
 	uint16_t db_heu:1; /* Doorbell heuristic write barrier. */
+	uint16_t fast_free:1; /* mbuf fast free on Tx is enabled. */
 	uint16_t inlen_send; /* Ordinary send data inline size. */
 	uint16_t inlen_empw; /* eMPW max packet size to inline. */
 	uint16_t inlen_mode; /* Minimal data length to inline. */
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index d96abef..c51c85b 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -123,6 +123,8 @@ 
 				     DEV_TX_OFFLOAD_GRE_TNL_TSO |
 				     DEV_TX_OFFLOAD_GENEVE_TNL_TSO);
 	}
+	if (!config->mprq.enabled)
+		offloads |= DEV_TX_OFFLOAD_MBUF_FAST_FREE;
 	return offloads;
 }
 
@@ -800,6 +802,10 @@ 
 	bool vlan_inline;
 	unsigned int temp;
 
+	txq_ctrl->txq.fast_free =
+		!!((txq_ctrl->txq.offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE) &&
+		   !(txq_ctrl->txq.offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
+		   !config->mprq.enabled);
 	if (config->txqs_inline == MLX5_ARG_UNSET)
 		txqs_inline =
 #if defined(RTE_ARCH_ARM64)