[v11,2/4] net/i40e: implement mbufs recycle mode

Message ID 20230822072710.1945027-3-feifei.wang2@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series Recycle mbufs from Tx queue into Rx queue |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Feifei Wang Aug. 22, 2023, 7:27 a.m. UTC
  Define specific function implementation for i40e driver.
Currently, mbufs recycle mode can support 128bit
vector path and avx2 path. And can be enabled both in
fast free and no fast free mode.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 drivers/net/i40e/i40e_ethdev.c                |   1 +
 drivers/net/i40e/i40e_ethdev.h                |   2 +
 .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147 ++++++++++++++++++
 drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
 drivers/net/i40e/i40e_rxtx.h                  |   4 +
 drivers/net/i40e/meson.build                  |   1 +
 6 files changed, 187 insertions(+)
 create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
  

Comments

Konstantin Ananyev Aug. 22, 2023, 11:43 p.m. UTC | #1
22/08/2023 08:27, Feifei Wang пишет:
> Define specific function implementation for i40e driver.
> Currently, mbufs recycle mode can support 128bit
> vector path and avx2 path. And can be enabled both in
> fast free and no fast free mode.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>   drivers/net/i40e/i40e_ethdev.c                |   1 +
>   drivers/net/i40e/i40e_ethdev.h                |   2 +
>   .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147 ++++++++++++++++++
>   drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
>   drivers/net/i40e/i40e_rxtx.h                  |   4 +
>   drivers/net/i40e/meson.build                  |   1 +
>   6 files changed, 187 insertions(+)
>   create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 8271bbb394..50ba9aac94 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
>   	.flow_ops_get                 = i40e_dev_flow_ops_get,
>   	.rxq_info_get                 = i40e_rxq_info_get,
>   	.txq_info_get                 = i40e_txq_info_get,
> +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
>   	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
>   	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
>   	.timesync_enable              = i40e_timesync_enable,
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index 6f65d5e0ac..af758798e1 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>   	struct rte_eth_rxq_info *qinfo);
>   void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>   	struct rte_eth_txq_info *qinfo);
> +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
>   int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
>   			   struct rte_eth_burst_mode *mode);
>   int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> new file mode 100644
> index 0000000000..5663ecccde
> --- /dev/null
> +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> @@ -0,0 +1,147 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2023 Arm Limited.
> + */
> +
> +#include <stdint.h>
> +#include <ethdev_driver.h>
> +
> +#include "base/i40e_prototype.h"
> +#include "base/i40e_type.h"
> +#include "i40e_ethdev.h"
> +#include "i40e_rxtx.h"
> +
> +#pragma GCC diagnostic ignored "-Wcast-qual"
> +
> +void
> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
> +{
> +	struct i40e_rx_queue *rxq = rx_queue;
> +	struct i40e_rx_entry *rxep;
> +	volatile union i40e_rx_desc *rxdp;
> +	uint16_t rx_id;
> +	uint64_t paddr;
> +	uint64_t dma_addr;
> +	uint16_t i;
> +
> +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> +
> +	for (i = 0; i < nb_mbufs; i++) {
> +		/* Initialize rxdp descs. */
> +		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
> +		dma_addr = rte_cpu_to_le_64(paddr);
> +		/* flush desc with pa dma_addr */
> +		rxdp[i].read.hdr_addr = 0;
> +		rxdp[i].read.pkt_addr = dma_addr;
> +	}
> +
> +	/* Update the descriptor initializer index */
> +	rxq->rxrearm_start += nb_mbufs;
> +	rx_id = rxq->rxrearm_start - 1;
> +
> +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> +		rxq->rxrearm_start = 0;
> +		rx_id = rxq->nb_rx_desc - 1;
> +	}
> +
> +	rxq->rxrearm_nb -= nb_mbufs;
> +
> +	rte_io_wmb();
> +	/* Update the tail pointer on the NIC */
> +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
> +}
> +
> +uint16_t
> +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
> +{
> +	struct i40e_tx_queue *txq = tx_queue;
> +	struct i40e_tx_entry *txep;
> +	struct rte_mbuf **rxep;
> +	int i, n;
> +	uint16_t nb_recycle_mbufs;
> +	uint16_t avail = 0;
> +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> +
> +	/* Get available recycling Rx buffers. */
> +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> +
> +	/* Check Tx free thresh and Rx available space. */
> +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> +		return 0;
> +
> +	/* check DD bits on threshold descriptor */
> +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> +				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> +			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> +		return 0;
> +
> +	n = txq->tx_rs_thresh;
> +	nb_recycle_mbufs = n;
> +
> +	/* Mbufs recycle mode can only support no ring buffer wrapping around.
> +	 * Two case for this:
> +	 *
> +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
> +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> +	 * should be equal to refill_requirement.
> +	 *
> +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
> +	 * with mbuf ring size. In this case, the update of refill head can not
> +	 * exceed the Rx mbuf ring size.
> +	 */
> +	if (refill_requirement != n ||
> +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
> +		return 0;
> +
> +	/* First buffer to free from S/W ring is at index
> +	 * tx_next_dd - (tx_rs_thresh-1).
> +	 */
> +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> +	rxep = recycle_rxq_info->mbuf_ring;
> +	rxep += refill_head;
> +
> +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> +		/* Avoid txq contains buffers from unexpected mempool. */
> +		if (unlikely(recycle_rxq_info->mp
> +					!= txep[0].mbuf->pool))
> +			return 0;
> +
> +		/* Directly put mbufs from Tx to Rx. */
> +		for (i = 0; i < n; i++)
> +			rxep[i] = txep[i].mbuf;
> +	} else {
> +		for (i = 0; i < n; i++) {
> +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> +
> +			/* If Tx buffers are not the last reference or from
> +			 * unexpected mempool, previous copied buffers are
> +			 * considered as invalid.
> +			 */
> +			if (unlikely((rxep[i] == NULL && refill_requirement) ||

Could you pls remind me why it is ok to have rxep[i]==NULL when 
refill_requirement is not set?

> +					recycle_rxq_info->mp != txep[i].mbuf->pool))
> +				nb_recycle_mbufs = 0;
> +		}
> +		/* If Tx buffers are not the last reference or
> +		 * from unexpected mempool, all recycled buffers
> +		 * are put into mempool.
> +		 */
> +		if (nb_recycle_mbufs == 0)
> +			for (i = 0; i < n; i++) {
> +				if (rxep[i] != NULL)
> +					rte_mempool_put(rxep[i]->pool, rxep[i]);
> +			}
> +	}
> +
> +	/* Update counters for Tx. */
> +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> +
> +	return nb_recycle_mbufs;
> +}
  
Feifei Wang Aug. 24, 2023, 6:10 a.m. UTC | #2
For konstantin

> -----Original Message-----
> From: Feifei Wang <feifei.wang2@arm.com>
> Sent: Tuesday, August 22, 2023 3:27 PM
> To: Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> <beilei.xing@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Feifei Wang
> <Feifei.Wang2@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> Subject: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> Define specific function implementation for i40e driver.
> Currently, mbufs recycle mode can support 128bit vector path and avx2 path.
> And can be enabled both in fast free and no fast free mode.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> ---
>  drivers/net/i40e/i40e_ethdev.c                |   1 +
>  drivers/net/i40e/i40e_ethdev.h                |   2 +
>  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> ++++++++++++++++++
>  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
>  drivers/net/i40e/i40e_rxtx.h                  |   4 +
>  drivers/net/i40e/meson.build                  |   1 +
>  6 files changed, 187 insertions(+)
>  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> 
> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> index 8271bbb394..50ba9aac94 100644
> --- a/drivers/net/i40e/i40e_ethdev.c
> +++ b/drivers/net/i40e/i40e_ethdev.c
> @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
>  	.flow_ops_get                 = i40e_dev_flow_ops_get,
>  	.rxq_info_get                 = i40e_rxq_info_get,
>  	.txq_info_get                 = i40e_txq_info_get,
> +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
>  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
>  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
>  	.timesync_enable              = i40e_timesync_enable,
> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> index 6f65d5e0ac..af758798e1 100644
> --- a/drivers/net/i40e/i40e_ethdev.h
> +++ b/drivers/net/i40e/i40e_ethdev.h
> @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
>  	struct rte_eth_rxq_info *qinfo);
>  void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>  	struct rte_eth_txq_info *qinfo);
> +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
>  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
>  			   struct rte_eth_burst_mode *mode);  int
> i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --
> git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> new file mode 100644
> index 0000000000..5663ecccde
> --- /dev/null
> +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> @@ -0,0 +1,147 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright (c) 2023 Arm Limited.
> + */
> +
> +#include <stdint.h>
> +#include <ethdev_driver.h>
> +
> +#include "base/i40e_prototype.h"
> +#include "base/i40e_type.h"
> +#include "i40e_ethdev.h"
> +#include "i40e_rxtx.h"
> +
> +#pragma GCC diagnostic ignored "-Wcast-qual"
> +
> +void
> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> +nb_mbufs) {
> +	struct i40e_rx_queue *rxq = rx_queue;
> +	struct i40e_rx_entry *rxep;
> +	volatile union i40e_rx_desc *rxdp;
> +	uint16_t rx_id;
> +	uint64_t paddr;
> +	uint64_t dma_addr;
> +	uint16_t i;
> +
> +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> +
> +	for (i = 0; i < nb_mbufs; i++) {
> +		/* Initialize rxdp descs. */
> +		paddr = (rxep[i].mbuf)->buf_iova +
> RTE_PKTMBUF_HEADROOM;
> +		dma_addr = rte_cpu_to_le_64(paddr);
> +		/* flush desc with pa dma_addr */
> +		rxdp[i].read.hdr_addr = 0;
> +		rxdp[i].read.pkt_addr = dma_addr;
> +	}
> +
> +	/* Update the descriptor initializer index */
> +	rxq->rxrearm_start += nb_mbufs;
> +	rx_id = rxq->rxrearm_start - 1;
> +
> +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> +		rxq->rxrearm_start = 0;
> +		rx_id = rxq->nb_rx_desc - 1;
> +	}
> +
> +	rxq->rxrearm_nb -= nb_mbufs;
> +
> +	rte_io_wmb();
> +	/* Update the tail pointer on the NIC */
> +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> +
> +uint16_t
> +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> +	struct i40e_tx_queue *txq = tx_queue;
> +	struct i40e_tx_entry *txep;
> +	struct rte_mbuf **rxep;
> +	int i, n;
> +	uint16_t nb_recycle_mbufs;
> +	uint16_t avail = 0;
> +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> +
> +	/* Get available recycling Rx buffers. */
> +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> +
> +	/* Check Tx free thresh and Rx available space. */
> +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> +		return 0;
> +
> +	/* check DD bits on threshold descriptor */
> +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> +
> 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> +
> 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> +		return 0;
> +
> +	n = txq->tx_rs_thresh;
> +	nb_recycle_mbufs = n;
> +
> +	/* Mbufs recycle mode can only support no ring buffer wrapping
> around.
> +	 * Two case for this:
> +	 *
> +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
> +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> +	 * should be equal to refill_requirement.
> +	 *
> +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
> +	 * with mbuf ring size. In this case, the update of refill head can not
> +	 * exceed the Rx mbuf ring size.
> +	 */
> +	if (refill_requirement != n ||
> +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
> +		return 0;
> +
> +	/* First buffer to free from S/W ring is at index
> +	 * tx_next_dd - (tx_rs_thresh-1).
> +	 */
> +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> +	rxep = recycle_rxq_info->mbuf_ring;
> +	rxep += refill_head;
> +
> +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> +		/* Avoid txq contains buffers from unexpected mempool. */
> +		if (unlikely(recycle_rxq_info->mp
> +					!= txep[0].mbuf->pool))
> +			return 0;
> +
> +		/* Directly put mbufs from Tx to Rx. */
> +		for (i = 0; i < n; i++)
> +			rxep[i] = txep[i].mbuf;
> +	} else {
> +		for (i = 0; i < n; i++) {
> +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> +
> +			/* If Tx buffers are not the last reference or from
> +			 * unexpected mempool, previous copied buffers are
> +			 * considered as invalid.
> +			 */
> +			if (unlikely((rxep[i] == NULL && refill_requirement) ||
[Konstantin]
Could you pls remind me why it is ok to have rxep[i]==NULL when 
refill_requirement is not set?

If reill_requirement is not zero, it means each tx freed buffer must be valid and can be put into Rx
sw_ring. Then  the refill head of Rx buffer ring can be aligned with mbuf ring size. Briefly speaking
the number of Tx valid freed buffer must be equal to Rx refill_requirement. For example, i40e driver.

If reill_requirement is zero, it means that the refill head of Rx buffer ring does not need to be aligned
with mbuf ring size, thus if Tx have n valid freed buffers, we just need to put these n buffers into Rx sw-ring,
and not to be equal to the Rx setting rearm number. For example, mlx5 driver.

In conclusion, above difference is due to pmd drivers have different strategies to update their Rx rearm(refill) head.
For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to  the number of each rearm is a fixed value by default.
For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve real index. Thus its rearm number can be a different value. 

> +					recycle_rxq_info->mp != txep[i].mbuf-
> >pool))
> +				nb_recycle_mbufs = 0;
> +		}
> +		/* If Tx buffers are not the last reference or
> +		 * from unexpected mempool, all recycled buffers
> +		 * are put into mempool.
> +		 */
> +		if (nb_recycle_mbufs == 0)
> +			for (i = 0; i < n; i++) {
> +				if (rxep[i] != NULL)
> +					rte_mempool_put(rxep[i]->pool,
> rxep[i]);
> +			}
> +	}
> +
> +	/* Update counters for Tx. */
> +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> +
> +	return nb_recycle_mbufs;
> +}
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
> b4f65b58fa..a9c9eb331c 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> uint16_t queue_id,
>  	qinfo->conf.offloads = txq->offloads;
>  }
> 
> +void
> +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> +	struct i40e_rx_queue *rxq;
> +	struct i40e_adapter *ad =
> +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> +
> +	rxq = dev->data->rx_queues[queue_id];
> +
> +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> +	recycle_rxq_info->mp = rxq->mp;
> +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> +
> +	if (ad->rx_vec_allowed) {
> +		recycle_rxq_info->refill_requirement =
> RTE_I40E_RXQ_REARM_THRESH;
> +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> +	} else {
> +		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
> +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> +	}
> +}
> +
>  #ifdef RTE_ARCH_X86
>  static inline bool
>  get_avx_supported(bool request_avx512)
> @@ -3293,6 +3317,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
>  					i40e_recv_scattered_pkts_vec_avx2 :
>  					i40e_recv_scattered_pkts_vec;
> +				dev->recycle_rx_descriptors_refill =
> +					i40e_recycle_rx_descriptors_refill_vec;
>  			}
>  		} else {
>  			if (ad->rx_use_avx512) {
> @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
>  					i40e_recv_pkts_vec_avx2 :
>  					i40e_recv_pkts_vec;
> +				dev->recycle_rx_descriptors_refill =
> +					i40e_recycle_rx_descriptors_refill_vec;
>  			}
>  		}
>  #else /* RTE_ARCH_X86 */
> +		dev->recycle_rx_descriptors_refill =
> +i40e_recycle_rx_descriptors_refill_vec;
>  		if (dev->data->scattered_rx) {
>  			PMD_INIT_LOG(DEBUG,
>  				     "Using Vector Scattered Rx (port %d).", @@
> -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>  				dev->tx_pkt_burst = ad->tx_use_avx2 ?
>  						    i40e_xmit_pkts_vec_avx2 :
>  						    i40e_xmit_pkts_vec;
> +				dev->recycle_tx_mbufs_reuse =
> i40e_recycle_tx_mbufs_reuse_vec;
>  			}
>  #else /* RTE_ARCH_X86 */
>  			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
>  				     dev->data->port_id);
>  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> +			dev->recycle_tx_mbufs_reuse =
> i40e_recycle_tx_mbufs_reuse_vec;
>  #endif /* RTE_ARCH_X86 */
>  		} else {
>  			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
>  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> +			dev->recycle_tx_mbufs_reuse =
> i40e_recycle_tx_mbufs_reuse_vec;
>  		}
>  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
>  	} else {
> diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index
> a8686224e5..b191f23e1f 100644
> --- a/drivers/net/i40e/i40e_rxtx.h
> +++ b/drivers/net/i40e/i40e_rxtx.h
> @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t
> offset);  int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
> 
> +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> +		struct rte_eth_recycle_rxq_info *recycle_rxq_info); void
> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> +nb_mbufs);
> +
>  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>  			    uint16_t nb_pkts);
>  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
> a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index
> 8e53b87a65..3b1a233c84 100644
> --- a/drivers/net/i40e/meson.build
> +++ b/drivers/net/i40e/meson.build
> @@ -34,6 +34,7 @@ sources = files(
>          'i40e_tm.c',
>          'i40e_hash.c',
>          'i40e_vf_representor.c',
> +	'i40e_recycle_mbufs_vec_common.c',
>          'rte_pmd_i40e.c',
>  )
> 
> --
> 2.25.1
  
Konstantin Ananyev Aug. 31, 2023, 5:24 p.m. UTC | #3
> >
> > Define specific function implementation for i40e driver.
> > Currently, mbufs recycle mode can support 128bit vector path and avx2 path.
> > And can be enabled both in fast free and no fast free mode.
> >
> > Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > ---
> >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > ++++++++++++++++++
> >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> >  drivers/net/i40e/meson.build                  |   1 +
> >  6 files changed, 187 insertions(+)
> >  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> >
> > diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
> > index 8271bbb394..50ba9aac94 100644
> > --- a/drivers/net/i40e/i40e_ethdev.c
> > +++ b/drivers/net/i40e/i40e_ethdev.c
> > @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
> >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> >  	.rxq_info_get                 = i40e_rxq_info_get,
> >  	.txq_info_get                 = i40e_txq_info_get,
> > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> >  	.timesync_enable              = i40e_timesync_enable,
> > diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
> > index 6f65d5e0ac..af758798e1 100644
> > --- a/drivers/net/i40e/i40e_ethdev.h
> > +++ b/drivers/net/i40e/i40e_ethdev.h
> > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
> > uint16_t queue_id,
> >  	struct rte_eth_rxq_info *qinfo);
> >  void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> >  	struct rte_eth_txq_info *qinfo);
> > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> >  			   struct rte_eth_burst_mode *mode);  int
> > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --
> > git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > new file mode 100644
> > index 0000000000..5663ecccde
> > --- /dev/null
> > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > @@ -0,0 +1,147 @@
> > +/* SPDX-License-Identifier: BSD-3-Clause
> > + * Copyright (c) 2023 Arm Limited.
> > + */
> > +
> > +#include <stdint.h>
> > +#include <ethdev_driver.h>
> > +
> > +#include "base/i40e_prototype.h"
> > +#include "base/i40e_type.h"
> > +#include "i40e_ethdev.h"
> > +#include "i40e_rxtx.h"
> > +
> > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > +
> > +void
> > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > +nb_mbufs) {
> > +	struct i40e_rx_queue *rxq = rx_queue;
> > +	struct i40e_rx_entry *rxep;
> > +	volatile union i40e_rx_desc *rxdp;
> > +	uint16_t rx_id;
> > +	uint64_t paddr;
> > +	uint64_t dma_addr;
> > +	uint16_t i;
> > +
> > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > +
> > +	for (i = 0; i < nb_mbufs; i++) {
> > +		/* Initialize rxdp descs. */
> > +		paddr = (rxep[i].mbuf)->buf_iova +
> > RTE_PKTMBUF_HEADROOM;
> > +		dma_addr = rte_cpu_to_le_64(paddr);
> > +		/* flush desc with pa dma_addr */
> > +		rxdp[i].read.hdr_addr = 0;
> > +		rxdp[i].read.pkt_addr = dma_addr;
> > +	}
> > +
> > +	/* Update the descriptor initializer index */
> > +	rxq->rxrearm_start += nb_mbufs;
> > +	rx_id = rxq->rxrearm_start - 1;
> > +
> > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > +		rxq->rxrearm_start = 0;
> > +		rx_id = rxq->nb_rx_desc - 1;
> > +	}
> > +
> > +	rxq->rxrearm_nb -= nb_mbufs;
> > +
> > +	rte_io_wmb();
> > +	/* Update the tail pointer on the NIC */
> > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > +
> > +uint16_t
> > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > +	struct i40e_tx_queue *txq = tx_queue;
> > +	struct i40e_tx_entry *txep;
> > +	struct rte_mbuf **rxep;
> > +	int i, n;
> > +	uint16_t nb_recycle_mbufs;
> > +	uint16_t avail = 0;
> > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> > +
> > +	/* Get available recycling Rx buffers. */
> > +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> > +
> > +	/* Check Tx free thresh and Rx available space. */
> > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> > +		return 0;
> > +
> > +	/* check DD bits on threshold descriptor */
> > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > +
> > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > +
> > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > +		return 0;
> > +
> > +	n = txq->tx_rs_thresh;
> > +	nb_recycle_mbufs = n;
> > +
> > +	/* Mbufs recycle mode can only support no ring buffer wrapping
> > around.
> > +	 * Two case for this:
> > +	 *
> > +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
> > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > +	 * should be equal to refill_requirement.
> > +	 *
> > +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
> > +	 * with mbuf ring size. In this case, the update of refill head can not
> > +	 * exceed the Rx mbuf ring size.
> > +	 */
> > +	if (refill_requirement != n ||
> > +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
> > +		return 0;
> > +
> > +	/* First buffer to free from S/W ring is at index
> > +	 * tx_next_dd - (tx_rs_thresh-1).
> > +	 */
> > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > +	rxep = recycle_rxq_info->mbuf_ring;
> > +	rxep += refill_head;
> > +
> > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > +		/* Avoid txq contains buffers from unexpected mempool. */
> > +		if (unlikely(recycle_rxq_info->mp
> > +					!= txep[0].mbuf->pool))
> > +			return 0;
> > +
> > +		/* Directly put mbufs from Tx to Rx. */
> > +		for (i = 0; i < n; i++)
> > +			rxep[i] = txep[i].mbuf;
> > +	} else {
> > +		for (i = 0; i < n; i++) {
> > +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > +
> > +			/* If Tx buffers are not the last reference or from
> > +			 * unexpected mempool, previous copied buffers are
> > +			 * considered as invalid.
> > +			 */
> > +			if (unlikely((rxep[i] == NULL && refill_requirement) ||
> [Konstantin]
> Could you pls remind me why it is ok to have rxep[i]==NULL when
> refill_requirement is not set?
> 
> If reill_requirement is not zero, it means each tx freed buffer must be valid and can be put into Rx
> sw_ring. Then  the refill head of Rx buffer ring can be aligned with mbuf ring size. Briefly speaking
> the number of Tx valid freed buffer must be equal to Rx refill_requirement. For example, i40e driver.
> 
> If reill_requirement is zero, it means that the refill head of Rx buffer ring does not need to be aligned
> with mbuf ring size, thus if Tx have n valid freed buffers, we just need to put these n buffers into Rx sw-ring,
> and not to be equal to the Rx setting rearm number. For example, mlx5 driver.
> 
> In conclusion, above difference is due to pmd drivers have different strategies to update their Rx rearm(refill) head.
> For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to  the number of each rearm is a fixed value by default.
> For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve real index. Thus its rearm number can be a different value.

Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this mbuf is not free yet and can't be reused.
Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
Or probably would be enough to skip that mbuf?
Might be something like that:

for (i = 0, j = 0; i < n; i++) {

	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
		if (refill_requirement) {
			nb_recycle_mbufs = 0;
			break; 
		}
	} else
		j++;
}

/* now j contains actual number of recycled mbufs */

?

> 
> > +					recycle_rxq_info->mp != txep[i].mbuf-
> > >pool))
> > +				nb_recycle_mbufs = 0;
> > +		}
> > +		/* If Tx buffers are not the last reference or
> > +		 * from unexpected mempool, all recycled buffers
> > +		 * are put into mempool.
> > +		 */
> > +		if (nb_recycle_mbufs == 0)
> > +			for (i = 0; i < n; i++) {
> > +				if (rxep[i] != NULL)
> > +					rte_mempool_put(rxep[i]->pool,
> > rxep[i]);
> > +			}
> > +	}
> > +
> > +	/* Update counters for Tx. */
> > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > +
> > +	return nb_recycle_mbufs;
> > +}
> > diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
> > b4f65b58fa..a9c9eb331c 100644
> > --- a/drivers/net/i40e/i40e_rxtx.c
> > +++ b/drivers/net/i40e/i40e_rxtx.c
> > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> > uint16_t queue_id,
> >  	qinfo->conf.offloads = txq->offloads;
> >  }
> >
> > +void
> > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > +	struct i40e_rx_queue *rxq;
> > +	struct i40e_adapter *ad =
> > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> > +
> > +	rxq = dev->data->rx_queues[queue_id];
> > +
> > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > +	recycle_rxq_info->mp = rxq->mp;
> > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > +
> > +	if (ad->rx_vec_allowed) {
> > +		recycle_rxq_info->refill_requirement =
> > RTE_I40E_RXQ_REARM_THRESH;
> > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > +	} else {
> > +		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
> > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > +	}
> > +}
> > +
> >  #ifdef RTE_ARCH_X86
> >  static inline bool
> >  get_avx_supported(bool request_avx512)
> > @@ -3293,6 +3317,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> >  					i40e_recv_scattered_pkts_vec_avx2 :
> >  					i40e_recv_scattered_pkts_vec;
> > +				dev->recycle_rx_descriptors_refill =
> > +					i40e_recycle_rx_descriptors_refill_vec;
> >  			}
> >  		} else {
> >  			if (ad->rx_use_avx512) {
> > @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> >  					i40e_recv_pkts_vec_avx2 :
> >  					i40e_recv_pkts_vec;
> > +				dev->recycle_rx_descriptors_refill =
> > +					i40e_recycle_rx_descriptors_refill_vec;
> >  			}
> >  		}
> >  #else /* RTE_ARCH_X86 */
> > +		dev->recycle_rx_descriptors_refill =
> > +i40e_recycle_rx_descriptors_refill_vec;
> >  		if (dev->data->scattered_rx) {
> >  			PMD_INIT_LOG(DEBUG,
> >  				     "Using Vector Scattered Rx (port %d).", @@
> > -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> >  				dev->tx_pkt_burst = ad->tx_use_avx2 ?
> >  						    i40e_xmit_pkts_vec_avx2 :
> >  						    i40e_xmit_pkts_vec;
> > +				dev->recycle_tx_mbufs_reuse =
> > i40e_recycle_tx_mbufs_reuse_vec;
> >  			}
> >  #else /* RTE_ARCH_X86 */
> >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
> >  				     dev->data->port_id);
> >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > +			dev->recycle_tx_mbufs_reuse =
> > i40e_recycle_tx_mbufs_reuse_vec;
> >  #endif /* RTE_ARCH_X86 */
> >  		} else {
> >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > +			dev->recycle_tx_mbufs_reuse =
> > i40e_recycle_tx_mbufs_reuse_vec;
> >  		}
> >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> >  	} else {
> > diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index
> > a8686224e5..b191f23e1f 100644
> > --- a/drivers/net/i40e/i40e_rxtx.h
> > +++ b/drivers/net/i40e/i40e_rxtx.h
> > @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> > *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t
> > offset);  int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
> >
> > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info); void
> > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > +nb_mbufs);
> > +
> >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> >  			    uint16_t nb_pkts);
> >  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
> > a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index
> > 8e53b87a65..3b1a233c84 100644
> > --- a/drivers/net/i40e/meson.build
> > +++ b/drivers/net/i40e/meson.build
> > @@ -34,6 +34,7 @@ sources = files(
> >          'i40e_tm.c',
> >          'i40e_hash.c',
> >          'i40e_vf_representor.c',
> > +	'i40e_recycle_mbufs_vec_common.c',
> >          'rte_pmd_i40e.c',
> >  )
> >
> > --
> > 2.25.1
  
Konstantin Ananyev Aug. 31, 2023, 11:49 p.m. UTC | #4
31/08/2023 18:24, Konstantin Ananyev пишет:
> 
> 
>>>
>>> Define specific function implementation for i40e driver.
>>> Currently, mbufs recycle mode can support 128bit vector path and avx2 path.
>>> And can be enabled both in fast free and no fast free mode.
>>>
>>> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>>> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
>>> ---
>>>   drivers/net/i40e/i40e_ethdev.c                |   1 +
>>>   drivers/net/i40e/i40e_ethdev.h                |   2 +
>>>   .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
>>> ++++++++++++++++++
>>>   drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
>>>   drivers/net/i40e/i40e_rxtx.h                  |   4 +
>>>   drivers/net/i40e/meson.build                  |   1 +
>>>   6 files changed, 187 insertions(+)
>>>   create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>>
>>> diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
>>> index 8271bbb394..50ba9aac94 100644
>>> --- a/drivers/net/i40e/i40e_ethdev.c
>>> +++ b/drivers/net/i40e/i40e_ethdev.c
>>> @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops = {
>>>   	.flow_ops_get                 = i40e_dev_flow_ops_get,
>>>   	.rxq_info_get                 = i40e_rxq_info_get,
>>>   	.txq_info_get                 = i40e_txq_info_get,
>>> +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
>>>   	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
>>>   	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
>>>   	.timesync_enable              = i40e_timesync_enable,
>>> diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
>>> index 6f65d5e0ac..af758798e1 100644
>>> --- a/drivers/net/i40e/i40e_ethdev.h
>>> +++ b/drivers/net/i40e/i40e_ethdev.h
>>> @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev *dev,
>>> uint16_t queue_id,
>>>   	struct rte_eth_rxq_info *qinfo);
>>>   void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>>   	struct rte_eth_txq_info *qinfo);
>>> +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
>>>   int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>>   			   struct rte_eth_burst_mode *mode);  int
>>> i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id, diff --
>>> git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>> b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>> new file mode 100644
>>> index 0000000000..5663ecccde
>>> --- /dev/null
>>> +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>> @@ -0,0 +1,147 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright (c) 2023 Arm Limited.
>>> + */
>>> +
>>> +#include <stdint.h>
>>> +#include <ethdev_driver.h>
>>> +
>>> +#include "base/i40e_prototype.h"
>>> +#include "base/i40e_type.h"
>>> +#include "i40e_ethdev.h"
>>> +#include "i40e_rxtx.h"
>>> +
>>> +#pragma GCC diagnostic ignored "-Wcast-qual"
>>> +
>>> +void
>>> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
>>> +nb_mbufs) {
>>> +	struct i40e_rx_queue *rxq = rx_queue;
>>> +	struct i40e_rx_entry *rxep;
>>> +	volatile union i40e_rx_desc *rxdp;
>>> +	uint16_t rx_id;
>>> +	uint64_t paddr;
>>> +	uint64_t dma_addr;
>>> +	uint16_t i;
>>> +
>>> +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
>>> +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
>>> +
>>> +	for (i = 0; i < nb_mbufs; i++) {
>>> +		/* Initialize rxdp descs. */
>>> +		paddr = (rxep[i].mbuf)->buf_iova +
>>> RTE_PKTMBUF_HEADROOM;
>>> +		dma_addr = rte_cpu_to_le_64(paddr);
>>> +		/* flush desc with pa dma_addr */
>>> +		rxdp[i].read.hdr_addr = 0;
>>> +		rxdp[i].read.pkt_addr = dma_addr;
>>> +	}
>>> +
>>> +	/* Update the descriptor initializer index */
>>> +	rxq->rxrearm_start += nb_mbufs;
>>> +	rx_id = rxq->rxrearm_start - 1;
>>> +
>>> +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
>>> +		rxq->rxrearm_start = 0;
>>> +		rx_id = rxq->nb_rx_desc - 1;
>>> +	}
>>> +
>>> +	rxq->rxrearm_nb -= nb_mbufs;
>>> +
>>> +	rte_io_wmb();
>>> +	/* Update the tail pointer on the NIC */
>>> +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
>>> +
>>> +uint16_t
>>> +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
>>> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
>>> +	struct i40e_tx_queue *txq = tx_queue;
>>> +	struct i40e_tx_entry *txep;
>>> +	struct rte_mbuf **rxep;
>>> +	int i, n;
>>> +	uint16_t nb_recycle_mbufs;
>>> +	uint16_t avail = 0;
>>> +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
>>> +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
>>> +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
>>> +	uint16_t refill_head = *recycle_rxq_info->refill_head;
>>> +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
>>> +
>>> +	/* Get available recycling Rx buffers. */
>>> +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
>>> +
>>> +	/* Check Tx free thresh and Rx available space. */
>>> +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
>>> +		return 0;
>>> +
>>> +	/* check DD bits on threshold descriptor */
>>> +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
>>> +
>>> 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
>>> +
>>> 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
>>> +		return 0;
>>> +
>>> +	n = txq->tx_rs_thresh;
>>> +	nb_recycle_mbufs = n;
>>> +
>>> +	/* Mbufs recycle mode can only support no ring buffer wrapping
>>> around.
>>> +	 * Two case for this:
>>> +	 *
>>> +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
>>> +	 * mbuf ring size. In this case, the number of Tx freeing buffers
>>> +	 * should be equal to refill_requirement.
>>> +	 *
>>> +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
>>> +	 * with mbuf ring size. In this case, the update of refill head can not
>>> +	 * exceed the Rx mbuf ring size.
>>> +	 */
>>> +	if (refill_requirement != n ||
>>> +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
>>> +		return 0;
>>> +
>>> +	/* First buffer to free from S/W ring is at index
>>> +	 * tx_next_dd - (tx_rs_thresh-1).
>>> +	 */
>>> +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
>>> +	rxep = recycle_rxq_info->mbuf_ring;
>>> +	rxep += refill_head;
>>> +
>>> +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
>>> +		/* Avoid txq contains buffers from unexpected mempool. */
>>> +		if (unlikely(recycle_rxq_info->mp
>>> +					!= txep[0].mbuf->pool))
>>> +			return 0;
>>> +
>>> +		/* Directly put mbufs from Tx to Rx. */
>>> +		for (i = 0; i < n; i++)
>>> +			rxep[i] = txep[i].mbuf;
>>> +	} else {
>>> +		for (i = 0; i < n; i++) {
>>> +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
>>> +
>>> +			/* If Tx buffers are not the last reference or from
>>> +			 * unexpected mempool, previous copied buffers are
>>> +			 * considered as invalid.
>>> +			 */
>>> +			if (unlikely((rxep[i] == NULL && refill_requirement) ||
>> [Konstantin]
>> Could you pls remind me why it is ok to have rxep[i]==NULL when
>> refill_requirement is not set?
>>
>> If reill_requirement is not zero, it means each tx freed buffer must be valid and can be put into Rx
>> sw_ring. Then  the refill head of Rx buffer ring can be aligned with mbuf ring size. Briefly speaking
>> the number of Tx valid freed buffer must be equal to Rx refill_requirement. For example, i40e driver.
>>
>> If reill_requirement is zero, it means that the refill head of Rx buffer ring does not need to be aligned
>> with mbuf ring size, thus if Tx have n valid freed buffers, we just need to put these n buffers into Rx sw-ring,
>> and not to be equal to the Rx setting rearm number. For example, mlx5 driver.
>>
>> In conclusion, above difference is due to pmd drivers have different strategies to update their Rx rearm(refill) head.
>> For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to  the number of each rearm is a fixed value by default.
>> For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve real index. Thus its rearm number can be a different value.
> 
> Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this mbuf is not free yet and can't be reused.
> Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> Or probably would be enough to skip that mbuf?
> Might be something like that:
> 
> for (i = 0, j = 0; i < n; i++) {
> 
> 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> 		if (refill_requirement) {
> 			nb_recycle_mbufs = 0;
> 			break;
> 		}
> 	} else
> 		j++;
> }
> 
> /* now j contains actual number of recycled mbufs */
> 
> ?


After another thought, it might be easier and cleaner just to:
if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
	nb_recycle_mbufs = 0;

Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf) returns 
NULL, then we can't recycle that mbuf.	


> 
>>
>>> +					recycle_rxq_info->mp != txep[i].mbuf-
>>>> pool))
>>> +				nb_recycle_mbufs = 0;
>>> +		}
>>> +		/* If Tx buffers are not the last reference or
>>> +		 * from unexpected mempool, all recycled buffers
>>> +		 * are put into mempool.
>>> +		 */
>>> +		if (nb_recycle_mbufs == 0)
>>> +			for (i = 0; i < n; i++) {
>>> +				if (rxep[i] != NULL)
>>> +					rte_mempool_put(rxep[i]->pool,
>>> rxep[i]);
>>> +			}
>>> +	}
>>> +
>>> +	/* Update counters for Tx. */
>>> +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
>>> +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
>>> +	if (txq->tx_next_dd >= txq->nb_tx_desc)
>>> +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
>>> +
>>> +	return nb_recycle_mbufs;
>>> +}
>>> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
>>> b4f65b58fa..a9c9eb331c 100644
>>> --- a/drivers/net/i40e/i40e_rxtx.c
>>> +++ b/drivers/net/i40e/i40e_rxtx.c
>>> @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
>>> uint16_t queue_id,
>>>   	qinfo->conf.offloads = txq->offloads;
>>>   }
>>>
>>> +void
>>> +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
>>> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
>>> +	struct i40e_rx_queue *rxq;
>>> +	struct i40e_adapter *ad =
>>> +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
>>> +
>>> +	rxq = dev->data->rx_queues[queue_id];
>>> +
>>> +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
>>> +	recycle_rxq_info->mp = rxq->mp;
>>> +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
>>> +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
>>> +
>>> +	if (ad->rx_vec_allowed) {
>>> +		recycle_rxq_info->refill_requirement =
>>> RTE_I40E_RXQ_REARM_THRESH;
>>> +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
>>> +	} else {
>>> +		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
>>> +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
>>> +	}
>>> +}
>>> +
>>>   #ifdef RTE_ARCH_X86
>>>   static inline bool
>>>   get_avx_supported(bool request_avx512)
>>> @@ -3293,6 +3317,8 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>>>   				dev->rx_pkt_burst = ad->rx_use_avx2 ?
>>>   					i40e_recv_scattered_pkts_vec_avx2 :
>>>   					i40e_recv_scattered_pkts_vec;
>>> +				dev->recycle_rx_descriptors_refill =
>>> +					i40e_recycle_rx_descriptors_refill_vec;
>>>   			}
>>>   		} else {
>>>   			if (ad->rx_use_avx512) {
>>> @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
>>>   				dev->rx_pkt_burst = ad->rx_use_avx2 ?
>>>   					i40e_recv_pkts_vec_avx2 :
>>>   					i40e_recv_pkts_vec;
>>> +				dev->recycle_rx_descriptors_refill =
>>> +					i40e_recycle_rx_descriptors_refill_vec;
>>>   			}
>>>   		}
>>>   #else /* RTE_ARCH_X86 */
>>> +		dev->recycle_rx_descriptors_refill =
>>> +i40e_recycle_rx_descriptors_refill_vec;
>>>   		if (dev->data->scattered_rx) {
>>>   			PMD_INIT_LOG(DEBUG,
>>>   				     "Using Vector Scattered Rx (port %d).", @@
>>> -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
>>>   				dev->tx_pkt_burst = ad->tx_use_avx2 ?
>>>   						    i40e_xmit_pkts_vec_avx2 :
>>>   						    i40e_xmit_pkts_vec;
>>> +				dev->recycle_tx_mbufs_reuse =
>>> i40e_recycle_tx_mbufs_reuse_vec;
>>>   			}
>>>   #else /* RTE_ARCH_X86 */
>>>   			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
>>>   				     dev->data->port_id);
>>>   			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
>>> +			dev->recycle_tx_mbufs_reuse =
>>> i40e_recycle_tx_mbufs_reuse_vec;
>>>   #endif /* RTE_ARCH_X86 */
>>>   		} else {
>>>   			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
>>>   			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
>>> +			dev->recycle_tx_mbufs_reuse =
>>> i40e_recycle_tx_mbufs_reuse_vec;
>>>   		}
>>>   		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
>>>   	} else {
>>> diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h index
>>> a8686224e5..b191f23e1f 100644
>>> --- a/drivers/net/i40e/i40e_rxtx.h
>>> +++ b/drivers/net/i40e/i40e_rxtx.h
>>> @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
>>> *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t
>>> offset);  int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
>>>
>>> +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
>>> +		struct rte_eth_recycle_rxq_info *recycle_rxq_info); void
>>> +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
>>> +nb_mbufs);
>>> +
>>>   uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
>>>   			    uint16_t nb_pkts);
>>>   uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
>>> a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index
>>> 8e53b87a65..3b1a233c84 100644
>>> --- a/drivers/net/i40e/meson.build
>>> +++ b/drivers/net/i40e/meson.build
>>> @@ -34,6 +34,7 @@ sources = files(
>>>           'i40e_tm.c',
>>>           'i40e_hash.c',
>>>           'i40e_vf_representor.c',
>>> +	'i40e_recycle_mbufs_vec_common.c',
>>>           'rte_pmd_i40e.c',
>>>   )
>>>
>>> --
>>> 2.25.1
>
  
Feifei Wang Sept. 1, 2023, 12:22 p.m. UTC | #5
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> Sent: Friday, September 1, 2023 1:25 AM
> To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> 
> > >
> > > Define specific function implementation for i40e driver.
> > > Currently, mbufs recycle mode can support 128bit vector path and avx2
> path.
> > > And can be enabled both in fast free and no fast free mode.
> > >
> > > Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > ---
> > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > ++++++++++++++++++
> > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > >  drivers/net/i40e/meson.build                  |   1 +
> > >  6 files changed, 187 insertions(+)
> > >  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > >
> > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > b/drivers/net/i40e/i40e_ethdev.c index 8271bbb394..50ba9aac94 100644
> > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops
> = {
> > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > >  	.txq_info_get                 = i40e_txq_info_get,
> > > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> > >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> > >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> > >  	.timesync_enable              = i40e_timesync_enable,
> > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > b/drivers/net/i40e/i40e_ethdev.h index 6f65d5e0ac..af758798e1 100644
> > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev
> > > *dev, uint16_t queue_id,
> > >  	struct rte_eth_rxq_info *qinfo);
> > >  void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > >  	struct rte_eth_txq_info *qinfo);
> > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > >  			   struct rte_eth_burst_mode *mode);  int
> > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > diff -- git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > new file mode 100644
> > > index 0000000000..5663ecccde
> > > --- /dev/null
> > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > @@ -0,0 +1,147 @@
> > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > + * Copyright (c) 2023 Arm Limited.
> > > + */
> > > +
> > > +#include <stdint.h>
> > > +#include <ethdev_driver.h>
> > > +
> > > +#include "base/i40e_prototype.h"
> > > +#include "base/i40e_type.h"
> > > +#include "i40e_ethdev.h"
> > > +#include "i40e_rxtx.h"
> > > +
> > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > +
> > > +void
> > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > +nb_mbufs) {
> > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > +	struct i40e_rx_entry *rxep;
> > > +	volatile union i40e_rx_desc *rxdp;
> > > +	uint16_t rx_id;
> > > +	uint64_t paddr;
> > > +	uint64_t dma_addr;
> > > +	uint16_t i;
> > > +
> > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > +
> > > +	for (i = 0; i < nb_mbufs; i++) {
> > > +		/* Initialize rxdp descs. */
> > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > RTE_PKTMBUF_HEADROOM;
> > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > +		/* flush desc with pa dma_addr */
> > > +		rxdp[i].read.hdr_addr = 0;
> > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > +	}
> > > +
> > > +	/* Update the descriptor initializer index */
> > > +	rxq->rxrearm_start += nb_mbufs;
> > > +	rx_id = rxq->rxrearm_start - 1;
> > > +
> > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > +		rxq->rxrearm_start = 0;
> > > +		rx_id = rxq->nb_rx_desc - 1;
> > > +	}
> > > +
> > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > +
> > > +	rte_io_wmb();
> > > +	/* Update the tail pointer on the NIC */
> > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > +
> > > +uint16_t
> > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > +	struct i40e_tx_queue *txq = tx_queue;
> > > +	struct i40e_tx_entry *txep;
> > > +	struct rte_mbuf **rxep;
> > > +	int i, n;
> > > +	uint16_t nb_recycle_mbufs;
> > > +	uint16_t avail = 0;
> > > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> > > +
> > > +	/* Get available recycling Rx buffers. */
> > > +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> > > +
> > > +	/* Check Tx free thresh and Rx available space. */
> > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> > > +		return 0;
> > > +
> > > +	/* check DD bits on threshold descriptor */
> > > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > +
> > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > +
> > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > +		return 0;
> > > +
> > > +	n = txq->tx_rs_thresh;
> > > +	nb_recycle_mbufs = n;
> > > +
> > > +	/* Mbufs recycle mode can only support no ring buffer wrapping
> > > around.
> > > +	 * Two case for this:
> > > +	 *
> > > +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
> > > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > > +	 * should be equal to refill_requirement.
> > > +	 *
> > > +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
> > > +	 * with mbuf ring size. In this case, the update of refill head can not
> > > +	 * exceed the Rx mbuf ring size.
> > > +	 */
> > > +	if (refill_requirement != n ||
> > > +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
> > > +		return 0;
> > > +
> > > +	/* First buffer to free from S/W ring is at index
> > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > +	 */
> > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > +	rxep += refill_head;
> > > +
> > > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > +		/* Avoid txq contains buffers from unexpected mempool. */
> > > +		if (unlikely(recycle_rxq_info->mp
> > > +					!= txep[0].mbuf->pool))
> > > +			return 0;
> > > +
> > > +		/* Directly put mbufs from Tx to Rx. */
> > > +		for (i = 0; i < n; i++)
> > > +			rxep[i] = txep[i].mbuf;
> > > +	} else {
> > > +		for (i = 0; i < n; i++) {
> > > +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > +
> > > +			/* If Tx buffers are not the last reference or from
> > > +			 * unexpected mempool, previous copied buffers are
> > > +			 * considered as invalid.
> > > +			 */
> > > +			if (unlikely((rxep[i] == NULL && refill_requirement) ||
> > [Konstantin]
> > Could you pls remind me why it is ok to have rxep[i]==NULL when
> > refill_requirement is not set?
> >
> > If reill_requirement is not zero, it means each tx freed buffer must
> > be valid and can be put into Rx sw_ring. Then  the refill head of Rx
> > buffer ring can be aligned with mbuf ring size. Briefly speaking the number
> of Tx valid freed buffer must be equal to Rx refill_requirement. For example,
> i40e driver.
> >
> > If reill_requirement is zero, it means that the refill head of Rx
> > buffer ring does not need to be aligned with mbuf ring size, thus if
> > Tx have n valid freed buffers, we just need to put these n buffers into Rx sw-
> ring, and not to be equal to the Rx setting rearm number. For example, mlx5
> driver.
> >
> > In conclusion, above difference is due to pmd drivers have different
> strategies to update their Rx rearm(refill) head.
> > For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to  the
> number of each rearm is a fixed value by default.
> > For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve
> real index. Thus its rearm number can be a different value.
> 
> Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this mbuf is
> not free yet and can't be reused.
> Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> Or probably would be enough to skip that mbuf?
> Might be something like that:
> 
> for (i = 0, j = 0; i < n; i++) {
> 
> 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> 		if (refill_requirement) {
> 			nb_recycle_mbufs = 0;
> 			break;
> 		}
> 	} else
> 		j++;
> }
> 
> /* now j contains actual number of recycled mbufs */
> 
> ?
> 
> >
> > > +					recycle_rxq_info->mp != txep[i].mbuf-
> > > >pool))
> > > +				nb_recycle_mbufs = 0;
> > > +		}
> > > +		/* If Tx buffers are not the last reference or
> > > +		 * from unexpected mempool, all recycled buffers
> > > +		 * are put into mempool.
> > > +		 */
> > > +		if (nb_recycle_mbufs == 0)
> > > +			for (i = 0; i < n; i++) {
> > > +				if (rxep[i] != NULL)
> > > +					rte_mempool_put(rxep[i]->pool,
> > > rxep[i]);
> > > +			}
> > > +	}
> > > +
[Konstantin] After another thought, it might be easier and cleaner just to:
if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
	nb_recycle_mbufs = 0;

Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf) returns 
NULL, then we can't recycle that mbuf.	

[Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't recycle that mbuf'.

Firstly, we should know for i40e driver,  the number of free mbufs is fixed, it must equal to 'tx_rs_thresh'
This means if we start to free Tx mbufs, it cannot be interrupted until the given amount of mbufs are freed.
In the meanwhile, doing prefree operation for a Tx mbuf can be looked at this mbuf is freed from this TX sw-ring
 if the API returns NULL. This is due to that call 'prefree_seg' function means update the mbuf refcnt. 

So let's come back our recycle mbuf case.
Here we consider that the case if we have 32 mbufs need to be freed, and we firstly do the pre-free.
And then first 8 mbufs is good and return value is not none. But the 9th mbuf is bad, its refcnt is more than 1.
So we just update its refcnt and cannot put it into Rx sw-ring. For Tx sw-ring, this mbuf has been freed.
Then we should continue to do pre-free operation for the next Tx mbufs to ensure the given amount of
mbufs are freed. 

Do a conclusion for this, this is because if we start to do pre-free operation,  the Tx mbuf refcnt value
maybe changed, so we cannot stop or break until finish all the pre-free operation.

Finally, in the case that Rx refill_request is not zero, but the valid mbuf amount is less than this value,
we must put back this Tx mbufs into mempool.

Above is the reason why I do not directly jump out of the loop if some mbuf return value is NULL.

> > > +	/* Update counters for Tx. */
> > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > +
> > > +	return nb_recycle_mbufs;
> > > +}
> > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > b/drivers/net/i40e/i40e_rxtx.c index b4f65b58fa..a9c9eb331c 100644
> > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> > > uint16_t queue_id,
> > >  	qinfo->conf.offloads = txq->offloads;  }
> > >
> > > +void
> > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > +	struct i40e_rx_queue *rxq;
> > > +	struct i40e_adapter *ad =
> > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> > > +
> > > +	rxq = dev->data->rx_queues[queue_id];
> > > +
> > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > +	recycle_rxq_info->mp = rxq->mp;
> > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > +
> > > +	if (ad->rx_vec_allowed) {
> > > +		recycle_rxq_info->refill_requirement =
> > > RTE_I40E_RXQ_REARM_THRESH;
> > > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > > +	} else {
> > > +		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
> > > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > > +	}
> > > +}
> > > +
> > >  #ifdef RTE_ARCH_X86
> > >  static inline bool
> > >  get_avx_supported(bool request_avx512) @@ -3293,6 +3317,8 @@
> > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > >  					i40e_recv_scattered_pkts_vec_avx2 :
> > >  					i40e_recv_scattered_pkts_vec;
> > > +				dev->recycle_rx_descriptors_refill =
> > > +					i40e_recycle_rx_descriptors_refill_vec;
> > >  			}
> > >  		} else {
> > >  			if (ad->rx_use_avx512) {
> > > @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > >  					i40e_recv_pkts_vec_avx2 :
> > >  					i40e_recv_pkts_vec;
> > > +				dev->recycle_rx_descriptors_refill =
> > > +					i40e_recycle_rx_descriptors_refill_vec;
> > >  			}
> > >  		}
> > >  #else /* RTE_ARCH_X86 */
> > > +		dev->recycle_rx_descriptors_refill =
> > > +i40e_recycle_rx_descriptors_refill_vec;
> > >  		if (dev->data->scattered_rx) {
> > >  			PMD_INIT_LOG(DEBUG,
> > >  				     "Using Vector Scattered Rx (port %d).", @@
> > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> > >  				dev->tx_pkt_burst = ad->tx_use_avx2 ?
> > >  						    i40e_xmit_pkts_vec_avx2 :
> > >  						    i40e_xmit_pkts_vec;
> > > +				dev->recycle_tx_mbufs_reuse =
> > > i40e_recycle_tx_mbufs_reuse_vec;
> > >  			}
> > >  #else /* RTE_ARCH_X86 */
> > >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
> > >  				     dev->data->port_id);
> > >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > > +			dev->recycle_tx_mbufs_reuse =
> > > i40e_recycle_tx_mbufs_reuse_vec;
> > >  #endif /* RTE_ARCH_X86 */
> > >  		} else {
> > >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> > >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > > +			dev->recycle_tx_mbufs_reuse =
> > > i40e_recycle_tx_mbufs_reuse_vec;
> > >  		}
> > >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> > >  	} else {
> > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > b/drivers/net/i40e/i40e_rxtx.h index a8686224e5..b191f23e1f 100644
> > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> > > *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue,
> > > uint16_t offset);  int i40e_dev_tx_descriptor_status(void *tx_queue,
> > > uint16_t offset);
> > >
> > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info); void
> > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > +nb_mbufs);
> > > +
> > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> > >  			    uint16_t nb_pkts);
> > >  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
> > > a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index
> > > 8e53b87a65..3b1a233c84 100644
> > > --- a/drivers/net/i40e/meson.build
> > > +++ b/drivers/net/i40e/meson.build
> > > @@ -34,6 +34,7 @@ sources = files(
> > >          'i40e_tm.c',
> > >          'i40e_hash.c',
> > >          'i40e_vf_representor.c',
> > > +	'i40e_recycle_mbufs_vec_common.c',
> > >          'rte_pmd_i40e.c',
> > >  )
> > >
> > > --
> > > 2.25.1
  
Konstantin Ananyev Sept. 1, 2023, 2:22 p.m. UTC | #6
> > > >
> > > > Define specific function implementation for i40e driver.
> > > > Currently, mbufs recycle mode can support 128bit vector path and avx2
> > path.
> > > > And can be enabled both in fast free and no fast free mode.
> > > >
> > > > Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > > > ---
> > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > ++++++++++++++++++
> > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > >  drivers/net/i40e/meson.build                  |   1 +
> > > >  6 files changed, 187 insertions(+)
> > > >  create mode 100644 drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > >
> > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > b/drivers/net/i40e/i40e_ethdev.c index 8271bbb394..50ba9aac94 100644
> > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops i40e_eth_dev_ops
> > = {
> > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> > > >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> > > >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > b/drivers/net/i40e/i40e_ethdev.h index 6f65d5e0ac..af758798e1 100644
> > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev
> > > > *dev, uint16_t queue_id,
> > > >  	struct rte_eth_rxq_info *qinfo);
> > > >  void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > >  	struct rte_eth_txq_info *qinfo);
> > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> > queue_id,
> > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > >  			   struct rte_eth_burst_mode *mode);  int
> > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > diff -- git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > new file mode 100644
> > > > index 0000000000..5663ecccde
> > > > --- /dev/null
> > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > @@ -0,0 +1,147 @@
> > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > + * Copyright (c) 2023 Arm Limited.
> > > > + */
> > > > +
> > > > +#include <stdint.h>
> > > > +#include <ethdev_driver.h>
> > > > +
> > > > +#include "base/i40e_prototype.h"
> > > > +#include "base/i40e_type.h"
> > > > +#include "i40e_ethdev.h"
> > > > +#include "i40e_rxtx.h"
> > > > +
> > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > +
> > > > +void
> > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > +nb_mbufs) {
> > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > +	struct i40e_rx_entry *rxep;
> > > > +	volatile union i40e_rx_desc *rxdp;
> > > > +	uint16_t rx_id;
> > > > +	uint64_t paddr;
> > > > +	uint64_t dma_addr;
> > > > +	uint16_t i;
> > > > +
> > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > +
> > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > +		/* Initialize rxdp descs. */
> > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > RTE_PKTMBUF_HEADROOM;
> > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > +		/* flush desc with pa dma_addr */
> > > > +		rxdp[i].read.hdr_addr = 0;
> > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > +	}
> > > > +
> > > > +	/* Update the descriptor initializer index */
> > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > +
> > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > +		rxq->rxrearm_start = 0;
> > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > +	}
> > > > +
> > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > +
> > > > +	rte_io_wmb();
> > > > +	/* Update the tail pointer on the NIC */
> > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > +
> > > > +uint16_t
> > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > +	struct i40e_tx_entry *txep;
> > > > +	struct rte_mbuf **rxep;
> > > > +	int i, n;
> > > > +	uint16_t nb_recycle_mbufs;
> > > > +	uint16_t avail = 0;
> > > > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > +	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
> > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> > > > +
> > > > +	/* Get available recycling Rx buffers. */
> > > > +	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
> > > > +
> > > > +	/* Check Tx free thresh and Rx available space. */
> > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
> > > > +		return 0;
> > > > +
> > > > +	/* check DD bits on threshold descriptor */
> > > > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > > +
> > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > +
> > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > +		return 0;
> > > > +
> > > > +	n = txq->tx_rs_thresh;
> > > > +	nb_recycle_mbufs = n;
> > > > +
> > > > +	/* Mbufs recycle mode can only support no ring buffer wrapping
> > > > around.
> > > > +	 * Two case for this:
> > > > +	 *
> > > > +	 * case 1: The refill head of Rx buffer ring needs to be aligned with
> > > > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > > > +	 * should be equal to refill_requirement.
> > > > +	 *
> > > > +	 * case 2: The refill head of Rx ring buffer does not need to be aligned
> > > > +	 * with mbuf ring size. In this case, the update of refill head can not
> > > > +	 * exceed the Rx mbuf ring size.
> > > > +	 */
> > > > +	if (refill_requirement != n ||
> > > > +		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
> > > > +		return 0;
> > > > +
> > > > +	/* First buffer to free from S/W ring is at index
> > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > +	 */
> > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > +	rxep += refill_head;
> > > > +
> > > > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > +		/* Avoid txq contains buffers from unexpected mempool. */
> > > > +		if (unlikely(recycle_rxq_info->mp
> > > > +					!= txep[0].mbuf->pool))
> > > > +			return 0;
> > > > +
> > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > +		for (i = 0; i < n; i++)
> > > > +			rxep[i] = txep[i].mbuf;
> > > > +	} else {
> > > > +		for (i = 0; i < n; i++) {
> > > > +			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > +
> > > > +			/* If Tx buffers are not the last reference or from
> > > > +			 * unexpected mempool, previous copied buffers are
> > > > +			 * considered as invalid.
> > > > +			 */
> > > > +			if (unlikely((rxep[i] == NULL && refill_requirement) ||
> > > [Konstantin]
> > > Could you pls remind me why it is ok to have rxep[i]==NULL when
> > > refill_requirement is not set?
> > >
> > > If reill_requirement is not zero, it means each tx freed buffer must
> > > be valid and can be put into Rx sw_ring. Then  the refill head of Rx
> > > buffer ring can be aligned with mbuf ring size. Briefly speaking the number
> > of Tx valid freed buffer must be equal to Rx refill_requirement. For example,
> > i40e driver.
> > >
> > > If reill_requirement is zero, it means that the refill head of Rx
> > > buffer ring does not need to be aligned with mbuf ring size, thus if
> > > Tx have n valid freed buffers, we just need to put these n buffers into Rx sw-
> > ring, and not to be equal to the Rx setting rearm number. For example, mlx5
> > driver.
> > >
> > > In conclusion, above difference is due to pmd drivers have different
> > strategies to update their Rx rearm(refill) head.
> > > For i40e driver, if rearm_head exceed 1024, it will be set as 0 due to  the
> > number of each rearm is a fixed value by default.
> > > For mlx5 driver. Its rearm_head can exceed 1024, and use mask to achieve
> > real index. Thus its rearm number can be a different value.
> >
> > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this mbuf is
> > not free yet and can't be reused.
> > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > Or probably would be enough to skip that mbuf?
> > Might be something like that:
> >
> > for (i = 0, j = 0; i < n; i++) {
> >
> > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> > 		if (refill_requirement) {
> > 			nb_recycle_mbufs = 0;
> > 			break;
> > 		}
> > 	} else
> > 		j++;
> > }
> >
> > /* now j contains actual number of recycled mbufs */
> >
> > ?
> >
> > >
> > > > +					recycle_rxq_info->mp != txep[i].mbuf-
> > > > >pool))
> > > > +				nb_recycle_mbufs = 0;
> > > > +		}
> > > > +		/* If Tx buffers are not the last reference or
> > > > +		 * from unexpected mempool, all recycled buffers
> > > > +		 * are put into mempool.
> > > > +		 */
> > > > +		if (nb_recycle_mbufs == 0)
> > > > +			for (i = 0; i < n; i++) {
> > > > +				if (rxep[i] != NULL)
> > > > +					rte_mempool_put(rxep[i]->pool,
> > > > rxep[i]);
> > > > +			}
> > > > +	}
> > > > +
> [Konstantin] After another thought, it might be easier and cleaner just to:
> if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> 	nb_recycle_mbufs = 0;
> 
> Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf) returns
> NULL, then we can't recycle that mbuf.
> 
> [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't recycle that mbuf'.
> 
> Firstly, we should know for i40e driver,  the number of free mbufs is fixed, it must equal to 'tx_rs_thresh'
> This means if we start to free Tx mbufs, it cannot be interrupted until the given amount of mbufs are freed.
> In the meanwhile, doing prefree operation for a Tx mbuf can be looked at this mbuf is freed from this TX sw-ring
>  if the API returns NULL. This is due to that call 'prefree_seg' function means update the mbuf refcnt.
> 
> So let's come back our recycle mbuf case.
> Here we consider that the case if we have 32 mbufs need to be freed, and we firstly do the pre-free.
> And then first 8 mbufs is good and return value is not none. But the 9th mbuf is bad, its refcnt is more than 1.
> So we just update its refcnt and cannot put it into Rx sw-ring. For Tx sw-ring, this mbuf has been freed.
> Then we should continue to do pre-free operation for the next Tx mbufs to ensure the given amount of
> mbufs are freed.
> 
> Do a conclusion for this, this is because if we start to do pre-free operation,  the Tx mbuf refcnt value
> maybe changed, so we cannot stop or break until finish all the pre-free operation.
> 
> Finally, in the case that Rx refill_request is not zero, but the valid mbuf amount is less than this value,
> we must put back this Tx mbufs into mempool.
> 
> Above is the reason why I do not directly jump out of the loop if some mbuf return value is NULL.

Yep, I already realized that it is a bit more complicated and we need to continue with prefree()
for all packets even when we get NULL in the middle.
Anyway the code has to be changed, right? 
 
> 
> > > > +	/* Update counters for Tx. */
> > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
> > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
> > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > > +
> > > > +	return nb_recycle_mbufs;
> > > > +}
> > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > b/drivers/net/i40e/i40e_rxtx.c index b4f65b58fa..a9c9eb331c 100644
> > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev *dev,
> > > > uint16_t queue_id,
> > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > >
> > > > +void
> > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > +	struct i40e_rx_queue *rxq;
> > > > +	struct i40e_adapter *ad =
> > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
> > > > +
> > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > +
> > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > +
> > > > +	if (ad->rx_vec_allowed) {
> > > > +		recycle_rxq_info->refill_requirement =
> > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > > > +	} else {
> > > > +		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
> > > > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > > > +	}
> > > > +}
> > > > +
> > > >  #ifdef RTE_ARCH_X86
> > > >  static inline bool
> > > >  get_avx_supported(bool request_avx512) @@ -3293,6 +3317,8 @@
> > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > > >  					i40e_recv_scattered_pkts_vec_avx2 :
> > > >  					i40e_recv_scattered_pkts_vec;
> > > > +				dev->recycle_rx_descriptors_refill =
> > > > +					i40e_recycle_rx_descriptors_refill_vec;
> > > >  			}
> > > >  		} else {
> > > >  			if (ad->rx_use_avx512) {
> > > > @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
> > > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > > >  					i40e_recv_pkts_vec_avx2 :
> > > >  					i40e_recv_pkts_vec;
> > > > +				dev->recycle_rx_descriptors_refill =
> > > > +					i40e_recycle_rx_descriptors_refill_vec;
> > > >  			}
> > > >  		}
> > > >  #else /* RTE_ARCH_X86 */
> > > > +		dev->recycle_rx_descriptors_refill =
> > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > >  		if (dev->data->scattered_rx) {
> > > >  			PMD_INIT_LOG(DEBUG,
> > > >  				     "Using Vector Scattered Rx (port %d).", @@
> > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> > > >  				dev->tx_pkt_burst = ad->tx_use_avx2 ?
> > > >  						    i40e_xmit_pkts_vec_avx2 :
> > > >  						    i40e_xmit_pkts_vec;
> > > > +				dev->recycle_tx_mbufs_reuse =
> > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > >  			}
> > > >  #else /* RTE_ARCH_X86 */
> > > >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
> > > >  				     dev->data->port_id);
> > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > > > +			dev->recycle_tx_mbufs_reuse =
> > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > >  #endif /* RTE_ARCH_X86 */
> > > >  		} else {
> > > >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > > > +			dev->recycle_tx_mbufs_reuse =
> > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > >  		}
> > > >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> > > >  	} else {
> > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > b/drivers/net/i40e/i40e_rxtx.h index a8686224e5..b191f23e1f 100644
> > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> > > > *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > uint16_t offset);  int i40e_dev_tx_descriptor_status(void *tx_queue,
> > > > uint16_t offset);
> > > >
> > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info); void
> > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > +nb_mbufs);
> > > > +
> > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
> > > >  			    uint16_t nb_pkts);
> > > >  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
> > > > a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build index
> > > > 8e53b87a65..3b1a233c84 100644
> > > > --- a/drivers/net/i40e/meson.build
> > > > +++ b/drivers/net/i40e/meson.build
> > > > @@ -34,6 +34,7 @@ sources = files(
> > > >          'i40e_tm.c',
> > > >          'i40e_hash.c',
> > > >          'i40e_vf_representor.c',
> > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > >          'rte_pmd_i40e.c',
> > > >  )
> > > >
> > > > --
> > > > 2.25.1
  
Feifei Wang Sept. 4, 2023, 6:59 a.m. UTC | #7
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> Sent: Friday, September 1, 2023 10:23 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> 
> > > > >
> > > > > Define specific function implementation for i40e driver.
> > > > > Currently, mbufs recycle mode can support 128bit vector path and
> > > > > avx2
> > > path.
> > > > > And can be enabled both in fast free and no fast free mode.
> > > > >
> > > > > Suggested-by: Honnappa Nagarahalli
> > > > > <honnappa.nagarahalli@arm.com>
> > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > Reviewed-by: Honnappa Nagarahalli
> <honnappa.nagarahalli@arm.com>
> > > > > ---
> > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > ++++++++++++++++++
> > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > >  6 files changed, 187 insertions(+)  create mode 100644
> > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > >
> > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > b/drivers/net/i40e/i40e_ethdev.c index 8271bbb394..50ba9aac94
> > > > > 100644
> > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > i40e_eth_dev_ops
> > > = {
> > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> > > > >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> > > > >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > b/drivers/net/i40e/i40e_ethdev.h index 6f65d5e0ac..af758798e1
> > > > > 100644
> > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev
> > > > > *dev, uint16_t queue_id,
> > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > +uint16_t
> > > queue_id,
> > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > > > >  			   struct rte_eth_burst_mode *mode);  int
> > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> > > > > queue_id, diff -- git
> > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > new file mode 100644
> > > > > index 0000000000..5663ecccde
> > > > > --- /dev/null
> > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > @@ -0,0 +1,147 @@
> > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > + */
> > > > > +
> > > > > +#include <stdint.h>
> > > > > +#include <ethdev_driver.h>
> > > > > +
> > > > > +#include "base/i40e_prototype.h"
> > > > > +#include "base/i40e_type.h"
> > > > > +#include "i40e_ethdev.h"
> > > > > +#include "i40e_rxtx.h"
> > > > > +
> > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > +
> > > > > +void
> > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > > +nb_mbufs) {
> > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > +	struct i40e_rx_entry *rxep;
> > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > +	uint16_t rx_id;
> > > > > +	uint64_t paddr;
> > > > > +	uint64_t dma_addr;
> > > > > +	uint16_t i;
> > > > > +
> > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > +
> > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > +		/* Initialize rxdp descs. */
> > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > RTE_PKTMBUF_HEADROOM;
> > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > +		/* flush desc with pa dma_addr */
> > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > +	}
> > > > > +
> > > > > +	/* Update the descriptor initializer index */
> > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > +
> > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > +		rxq->rxrearm_start = 0;
> > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > +	}
> > > > > +
> > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > +
> > > > > +	rte_io_wmb();
> > > > > +	/* Update the tail pointer on the NIC */
> > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > > +
> > > > > +uint16_t
> > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > +	struct i40e_tx_entry *txep;
> > > > > +	struct rte_mbuf **rxep;
> > > > > +	int i, n;
> > > > > +	uint16_t nb_recycle_mbufs;
> > > > > +	uint16_t avail = 0;
> > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> >refill_requirement;
> > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> > > > > +
> > > > > +	/* Get available recycling Rx buffers. */
> > > > > +	avail = (mbuf_ring_size - (refill_head - receive_tail)) &
> > > > > +mask;
> > > > > +
> > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq-
> >tx_rs_thresh)
> > > > > +		return 0;
> > > > > +
> > > > > +	/* check DD bits on threshold descriptor */
> > > > > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > > > +
> > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > +
> > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > +		return 0;
> > > > > +
> > > > > +	n = txq->tx_rs_thresh;
> > > > > +	nb_recycle_mbufs = n;
> > > > > +
> > > > > +	/* Mbufs recycle mode can only support no ring buffer
> wrapping
> > > > > around.
> > > > > +	 * Two case for this:
> > > > > +	 *
> > > > > +	 * case 1: The refill head of Rx buffer ring needs to be aligned
> with
> > > > > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > > > > +	 * should be equal to refill_requirement.
> > > > > +	 *
> > > > > +	 * case 2: The refill head of Rx ring buffer does not need to be
> aligned
> > > > > +	 * with mbuf ring size. In this case, the update of refill head
> can not
> > > > > +	 * exceed the Rx mbuf ring size.
> > > > > +	 */
> > > > > +	if (refill_requirement != n ||
> > > > > +		(!refill_requirement && (refill_head + n >
> mbuf_ring_size)))
> > > > > +		return 0;
> > > > > +
> > > > > +	/* First buffer to free from S/W ring is at index
> > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > +	 */
> > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > +	rxep += refill_head;
> > > > > +
> > > > > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > +		/* Avoid txq contains buffers from unexpected
> mempool. */
> > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > +					!= txep[0].mbuf->pool))
> > > > > +			return 0;
> > > > > +
> > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > +		for (i = 0; i < n; i++)
> > > > > +			rxep[i] = txep[i].mbuf;
> > > > > +	} else {
> > > > > +		for (i = 0; i < n; i++) {
> > > > > +			rxep[i] =
> rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > +
> > > > > +			/* If Tx buffers are not the last reference or
> from
> > > > > +			 * unexpected mempool, previous copied
> buffers are
> > > > > +			 * considered as invalid.
> > > > > +			 */
> > > > > +			if (unlikely((rxep[i] == NULL &&
> refill_requirement) ||
> > > > [Konstantin]
> > > > Could you pls remind me why it is ok to have rxep[i]==NULL when
> > > > refill_requirement is not set?
> > > >
> > > > If reill_requirement is not zero, it means each tx freed buffer
> > > > must be valid and can be put into Rx sw_ring. Then  the refill
> > > > head of Rx buffer ring can be aligned with mbuf ring size. Briefly
> > > > speaking the number
> > > of Tx valid freed buffer must be equal to Rx refill_requirement. For
> > > example, i40e driver.
> > > >
> > > > If reill_requirement is zero, it means that the refill head of Rx
> > > > buffer ring does not need to be aligned with mbuf ring size, thus
> > > > if Tx have n valid freed buffers, we just need to put these n
> > > > buffers into Rx sw-
> > > ring, and not to be equal to the Rx setting rearm number. For
> > > example, mlx5 driver.
> > > >
> > > > In conclusion, above difference is due to pmd drivers have
> > > > different
> > > strategies to update their Rx rearm(refill) head.
> > > > For i40e driver, if rearm_head exceed 1024, it will be set as 0
> > > > due to  the
> > > number of each rearm is a fixed value by default.
> > > > For mlx5 driver. Its rearm_head can exceed 1024, and use mask to
> > > > achieve
> > > real index. Thus its rearm number can be a different value.
> > >
> > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this
> > > mbuf is not free yet and can't be reused.
> > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > Or probably would be enough to skip that mbuf?
> > > Might be something like that:
> > >
> > > for (i = 0, j = 0; i < n; i++) {
> > >
> > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> > > 		if (refill_requirement) {
> > > 			nb_recycle_mbufs = 0;
> > > 			break;
> > > 		}
> > > 	} else
> > > 		j++;
> > > }
> > >
> > > /* now j contains actual number of recycled mbufs */
> > >
> > > ?
> > >
> > > >
> > > > > +					recycle_rxq_info->mp !=
> txep[i].mbuf-
> > > > > >pool))
> > > > > +				nb_recycle_mbufs = 0;
> > > > > +		}
> > > > > +		/* If Tx buffers are not the last reference or
> > > > > +		 * from unexpected mempool, all recycled buffers
> > > > > +		 * are put into mempool.
> > > > > +		 */
> > > > > +		if (nb_recycle_mbufs == 0)
> > > > > +			for (i = 0; i < n; i++) {
> > > > > +				if (rxep[i] != NULL)
> > > > > +					rte_mempool_put(rxep[i]-
> >pool,
> > > > > rxep[i]);
> > > > > +			}
> > > > > +	}
> > > > > +
> > [Konstantin] After another thought, it might be easier and cleaner just to:
> > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > 	nb_recycle_mbufs = 0;
> >
> > Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf)
> > returns NULL, then we can't recycle that mbuf.
> >
> > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then
> we can't recycle that mbuf'.
> >
> > Firstly, we should know for i40e driver,  the number of free mbufs is fixed, it
> must equal to 'tx_rs_thresh'
> > This means if we start to free Tx mbufs, it cannot be interrupted until the
> given amount of mbufs are freed.
> > In the meanwhile, doing prefree operation for a Tx mbuf can be looked
> > at this mbuf is freed from this TX sw-ring  if the API returns NULL. This is due
> to that call 'prefree_seg' function means update the mbuf refcnt.
> >
> > So let's come back our recycle mbuf case.
> > Here we consider that the case if we have 32 mbufs need to be freed, and
> we firstly do the pre-free.
> > And then first 8 mbufs is good and return value is not none. But the 9th
> mbuf is bad, its refcnt is more than 1.
> > So we just update its refcnt and cannot put it into Rx sw-ring. For Tx sw-ring,
> this mbuf has been freed.
> > Then we should continue to do pre-free operation for the next Tx mbufs
> > to ensure the given amount of mbufs are freed.
> >
> > Do a conclusion for this, this is because if we start to do pre-free
> > operation,  the Tx mbuf refcnt value maybe changed, so we cannot stop or
> break until finish all the pre-free operation.
> >
> > Finally, in the case that Rx refill_request is not zero, but the valid
> > mbuf amount is less than this value, we must put back this Tx mbufs into
> mempool.
> >
> > Above is the reason why I do not directly jump out of the loop if some mbuf
> return value is NULL.
> 
> Yep, I already realized that it is a bit more complicated and we need to
> continue with prefree() for all packets even when we get NULL in the middle.
> Anyway the code has to be changed, right?
> 
Sorry, I think for the code, it is unnecessary to be changed.
For no fast free path, currently:
1.   We check whether each mbuf is Ok and call pre_free function
------------------------------------------------------------------------------------------------------------------------------------
2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
2.2 For the mbuf return value is zero and refill-request, it will also firstly put into Rx sw-ring, and we set nb_recycle = 0
------------------------------------------------------------------------------------------------------------------------------------
3.1 We check nb_recycle, if it is not 0, we will continue to rearm Rx descs and update its index by call descs_refill function.  
3.2 if nb_recycle is 0, we will put valid recycle mbufs back into mempool as general path. This is necessary, because we need
to ensure the freed Tx number is fixed.(Some buffers return is null can be seen as freed, others need to be put into mempool)

Or maybe I ignore something?

> >
> > > > > +	/* Update counters for Tx. */
> > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq-
> >tx_rs_thresh);
> > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq-
> >tx_rs_thresh);
> > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > > > +
> > > > > +	return nb_recycle_mbufs;
> > > > > +}
> > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > b/drivers/net/i40e/i40e_rxtx.c index b4f65b58fa..a9c9eb331c
> > > > > 100644
> > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev
> > > > > *dev, uint16_t queue_id,
> > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > >
> > > > > +void
> > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > +	struct i40e_rx_queue *rxq;
> > > > > +	struct i40e_adapter *ad =
> > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> >dev_private);
> > > > > +
> > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > +
> > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > +
> > > > > +	if (ad->rx_vec_allowed) {
> > > > > +		recycle_rxq_info->refill_requirement =
> > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > > > > +	} else {
> > > > > +		recycle_rxq_info->refill_requirement = rxq-
> >rx_free_thresh;
> > > > > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > > > > +	}
> > > > > +}
> > > > > +
> > > > >  #ifdef RTE_ARCH_X86
> > > > >  static inline bool
> > > > >  get_avx_supported(bool request_avx512) @@ -3293,6 +3317,8 @@
> > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > > > >  					i40e_recv_scattered_pkts_vec_avx2 :
> > > > >  					i40e_recv_scattered_pkts_vec;
> > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > +
> 	i40e_recycle_rx_descriptors_refill_vec;
> > > > >  			}
> > > > >  		} else {
> > > > >  			if (ad->rx_use_avx512) {
> > > > > @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev
> *dev)
> > > > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > > > >  					i40e_recv_pkts_vec_avx2 :
> > > > >  					i40e_recv_pkts_vec;
> > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > +
> 	i40e_recycle_rx_descriptors_refill_vec;
> > > > >  			}
> > > > >  		}
> > > > >  #else /* RTE_ARCH_X86 */
> > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > >  		if (dev->data->scattered_rx) {
> > > > >  			PMD_INIT_LOG(DEBUG,
> > > > >  				     "Using Vector Scattered Rx (port %d).", @@
> > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> > > > >  				dev->tx_pkt_burst = ad->tx_use_avx2 ?
> > > > >  						    i40e_xmit_pkts_vec_avx2 :
> > > > >  						    i40e_xmit_pkts_vec;
> > > > > +				dev->recycle_tx_mbufs_reuse =
> > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > >  			}
> > > > >  #else /* RTE_ARCH_X86 */
> > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
> > > > >  				     dev->data->port_id);
> > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /* RTE_ARCH_X86 */
> > > > >  		} else {
> > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > >  		}
> > > > >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> > > > >  	} else {
> > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > b/drivers/net/i40e/i40e_rxtx.h index a8686224e5..b191f23e1f
> > > > > 100644
> > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> > > > > *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > uint16_t offset);  int i40e_dev_tx_descriptor_status(void
> > > > > *tx_queue, uint16_t offset);
> > > > >
> > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> void
> > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > > +nb_mbufs);
> > > > > +
> > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf
> **rx_pkts,
> > > > >  			    uint16_t nb_pkts);
> > > > >  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff
> > > > > --git a/drivers/net/i40e/meson.build
> > > > > b/drivers/net/i40e/meson.build index
> > > > > 8e53b87a65..3b1a233c84 100644
> > > > > --- a/drivers/net/i40e/meson.build
> > > > > +++ b/drivers/net/i40e/meson.build
> > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > >          'i40e_tm.c',
> > > > >          'i40e_hash.c',
> > > > >          'i40e_vf_representor.c',
> > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > >          'rte_pmd_i40e.c',
> > > > >  )
> > > > >
> > > > > --
> > > > > 2.25.1
  
Konstantin Ananyev Sept. 4, 2023, 7:49 a.m. UTC | #8
Hi Feifei,

> > > > > > Define specific function implementation for i40e driver.
> > > > > > Currently, mbufs recycle mode can support 128bit vector path and
> > > > > > avx2
> > > > path.
> > > > > > And can be enabled both in fast free and no fast free mode.
> > > > > >
> > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > Reviewed-by: Honnappa Nagarahalli
> > <honnappa.nagarahalli@arm.com>
> > > > > > ---
> > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > > ++++++++++++++++++
> > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > >  6 files changed, 187 insertions(+)  create mode 100644
> > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > >
> > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > b/drivers/net/i40e/i40e_ethdev.c index 8271bbb394..50ba9aac94
> > > > > > 100644
> > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > > i40e_eth_dev_ops
> > > > = {
> > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> > > > > >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> > > > > >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > b/drivers/net/i40e/i40e_ethdev.h index 6f65d5e0ac..af758798e1
> > > > > > 100644
> > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct rte_eth_dev
> > > > > > *dev, uint16_t queue_id,
> > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > +uint16_t
> > > > queue_id,
> > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> > queue_id,
> > > > > >  			   struct rte_eth_burst_mode *mode);  int
> > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> > > > > > queue_id, diff -- git
> > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > new file mode 100644
> > > > > > index 0000000000..5663ecccde
> > > > > > --- /dev/null
> > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > @@ -0,0 +1,147 @@
> > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > + */
> > > > > > +
> > > > > > +#include <stdint.h>
> > > > > > +#include <ethdev_driver.h>
> > > > > > +
> > > > > > +#include "base/i40e_prototype.h"
> > > > > > +#include "base/i40e_type.h"
> > > > > > +#include "i40e_ethdev.h"
> > > > > > +#include "i40e_rxtx.h"
> > > > > > +
> > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > +
> > > > > > +void
> > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > > > +nb_mbufs) {
> > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > +	uint16_t rx_id;
> > > > > > +	uint64_t paddr;
> > > > > > +	uint64_t dma_addr;
> > > > > > +	uint16_t i;
> > > > > > +
> > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > +
> > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > +		/* Initialize rxdp descs. */
> > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > +		/* flush desc with pa dma_addr */
> > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > +	}
> > > > > > +
> > > > > > +	/* Update the descriptor initializer index */
> > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > +
> > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > +		rxq->rxrearm_start = 0;
> > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > +	}
> > > > > > +
> > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > +
> > > > > > +	rte_io_wmb();
> > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > > > +
> > > > > > +uint16_t
> > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > +	struct i40e_tx_entry *txep;
> > > > > > +	struct rte_mbuf **rxep;
> > > > > > +	int i, n;
> > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > +	uint16_t avail = 0;
> > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> > >refill_requirement;
> > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> > > > > > +
> > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > +	avail = (mbuf_ring_size - (refill_head - receive_tail)) &
> > > > > > +mask;
> > > > > > +
> > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq-
> > >tx_rs_thresh)
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > > > > +
> > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > +
> > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	n = txq->tx_rs_thresh;
> > > > > > +	nb_recycle_mbufs = n;
> > > > > > +
> > > > > > +	/* Mbufs recycle mode can only support no ring buffer
> > wrapping
> > > > > > around.
> > > > > > +	 * Two case for this:
> > > > > > +	 *
> > > > > > +	 * case 1: The refill head of Rx buffer ring needs to be aligned
> > with
> > > > > > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > > > > > +	 * should be equal to refill_requirement.
> > > > > > +	 *
> > > > > > +	 * case 2: The refill head of Rx ring buffer does not need to be
> > aligned
> > > > > > +	 * with mbuf ring size. In this case, the update of refill head
> > can not
> > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > +	 */
> > > > > > +	if (refill_requirement != n ||
> > > > > > +		(!refill_requirement && (refill_head + n >
> > mbuf_ring_size)))
> > > > > > +		return 0;
> > > > > > +
> > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > +	 */
> > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > +	rxep += refill_head;
> > > > > > +
> > > > > > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > +		/* Avoid txq contains buffers from unexpected
> > mempool. */
> > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > +					!= txep[0].mbuf->pool))
> > > > > > +			return 0;
> > > > > > +
> > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > +		for (i = 0; i < n; i++)
> > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > +	} else {
> > > > > > +		for (i = 0; i < n; i++) {
> > > > > > +			rxep[i] =
> > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > +
> > > > > > +			/* If Tx buffers are not the last reference or
> > from
> > > > > > +			 * unexpected mempool, previous copied
> > buffers are
> > > > > > +			 * considered as invalid.
> > > > > > +			 */
> > > > > > +			if (unlikely((rxep[i] == NULL &&
> > refill_requirement) ||
> > > > > [Konstantin]
> > > > > Could you pls remind me why it is ok to have rxep[i]==NULL when
> > > > > refill_requirement is not set?
> > > > >
> > > > > If reill_requirement is not zero, it means each tx freed buffer
> > > > > must be valid and can be put into Rx sw_ring. Then  the refill
> > > > > head of Rx buffer ring can be aligned with mbuf ring size. Briefly
> > > > > speaking the number
> > > > of Tx valid freed buffer must be equal to Rx refill_requirement. For
> > > > example, i40e driver.
> > > > >
> > > > > If reill_requirement is zero, it means that the refill head of Rx
> > > > > buffer ring does not need to be aligned with mbuf ring size, thus
> > > > > if Tx have n valid freed buffers, we just need to put these n
> > > > > buffers into Rx sw-
> > > > ring, and not to be equal to the Rx setting rearm number. For
> > > > example, mlx5 driver.
> > > > >
> > > > > In conclusion, above difference is due to pmd drivers have
> > > > > different
> > > > strategies to update their Rx rearm(refill) head.
> > > > > For i40e driver, if rearm_head exceed 1024, it will be set as 0
> > > > > due to  the
> > > > number of each rearm is a fixed value by default.
> > > > > For mlx5 driver. Its rearm_head can exceed 1024, and use mask to
> > > > > achieve
> > > > real index. Thus its rearm number can be a different value.
> > > >
> > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that this
> > > > mbuf is not free yet and can't be reused.
> > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > > Or probably would be enough to skip that mbuf?
> > > > Might be something like that:
> > > >
> > > > for (i = 0, j = 0; i < n; i++) {
> > > >
> > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> > > > 		if (refill_requirement) {
> > > > 			nb_recycle_mbufs = 0;
> > > > 			break;
> > > > 		}
> > > > 	} else
> > > > 		j++;
> > > > }
> > > >
> > > > /* now j contains actual number of recycled mbufs */
> > > >
> > > > ?
> > > >
> > > > >
> > > > > > +					recycle_rxq_info->mp !=
> > txep[i].mbuf-
> > > > > > >pool))
> > > > > > +				nb_recycle_mbufs = 0;
> > > > > > +		}
> > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > +		 * from unexpected mempool, all recycled buffers
> > > > > > +		 * are put into mempool.
> > > > > > +		 */
> > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > +			for (i = 0; i < n; i++) {
> > > > > > +				if (rxep[i] != NULL)
> > > > > > +					rte_mempool_put(rxep[i]-
> > >pool,
> > > > > > rxep[i]);
> > > > > > +			}
> > > > > > +	}
> > > > > > +
> > > [Konstantin] After another thought, it might be easier and cleaner just to:
> > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > > 	nb_recycle_mbufs = 0;
> > >
> > > Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf)
> > > returns NULL, then we can't recycle that mbuf.
> > >
> > > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then
> > we can't recycle that mbuf'.
> > >
> > > Firstly, we should know for i40e driver,  the number of free mbufs is fixed, it
> > must equal to 'tx_rs_thresh'
> > > This means if we start to free Tx mbufs, it cannot be interrupted until the
> > given amount of mbufs are freed.
> > > In the meanwhile, doing prefree operation for a Tx mbuf can be looked
> > > at this mbuf is freed from this TX sw-ring  if the API returns NULL. This is due
> > to that call 'prefree_seg' function means update the mbuf refcnt.
> > >
> > > So let's come back our recycle mbuf case.
> > > Here we consider that the case if we have 32 mbufs need to be freed, and
> > we firstly do the pre-free.
> > > And then first 8 mbufs is good and return value is not none. But the 9th
> > mbuf is bad, its refcnt is more than 1.
> > > So we just update its refcnt and cannot put it into Rx sw-ring. For Tx sw-ring,
> > this mbuf has been freed.
> > > Then we should continue to do pre-free operation for the next Tx mbufs
> > > to ensure the given amount of mbufs are freed.
> > >
> > > Do a conclusion for this, this is because if we start to do pre-free
> > > operation,  the Tx mbuf refcnt value maybe changed, so we cannot stop or
> > break until finish all the pre-free operation.
> > >
> > > Finally, in the case that Rx refill_request is not zero, but the valid
> > > mbuf amount is less than this value, we must put back this Tx mbufs into
> > mempool.
> > >
> > > Above is the reason why I do not directly jump out of the loop if some mbuf
> > return value is NULL.
> >
> > Yep, I already realized that it is a bit more complicated and we need to
> > continue with prefree() for all packets even when we get NULL in the middle.
> > Anyway the code has to be changed, right?
> >
> Sorry, I think for the code, it is unnecessary to be changed.
> For no fast free path, currently:
> 1.   We check whether each mbuf is Ok and call pre_free function
> ------------------------------------------------------------------------------------------------------------------------------------
> 2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
> 2.2 For the mbuf return value is zero and refill-request, it will also firstly put into Rx sw-ring, and we set nb_recycle = 0
> ------------------------------------------------------------------------------------------------------------------------------------
> 3.1 We check nb_recycle, if it is not 0, we will continue to rearm Rx descs and update its index by call descs_refill function.
> 3.2 if nb_recycle is 0, we will put valid recycle mbufs back into mempool as general path. This is necessary, because we need
> to ensure the freed Tx number is fixed.(Some buffers return is null can be seen as freed, others need to be put into mempool)
> 
> Or maybe I ignore something?


I am talking about the case when both refill_requirement and mbuf return values iare zero:
if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???  rxep[i] == 0 AND refill_requirement == 0 ???
 		recycle_rxq_info->mp != txep[i].mbuf->pool))
	nb_recycle_mbufs = 0;

As I can read the code you treat such situation as valid, while I think we should reset 
nb_recycle_mbufs to zero when rxep[i] == NULL, no matter what value refill_requirement is.
. 
 

> > >
> > > > > > +	/* Update counters for Tx. */
> > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq-
> > >tx_rs_thresh);
> > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq-
> > >tx_rs_thresh);
> > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > > > > +
> > > > > > +	return nb_recycle_mbufs;
> > > > > > +}
> > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > b/drivers/net/i40e/i40e_rxtx.c index b4f65b58fa..a9c9eb331c
> > > > > > 100644
> > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev
> > > > > > *dev, uint16_t queue_id,
> > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > >
> > > > > > +void
> > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> > queue_id,
> > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > +	struct i40e_adapter *ad =
> > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > >dev_private);
> > > > > > +
> > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > +
> > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > > +
> > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > > > > > +	} else {
> > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > >rx_free_thresh;
> > > > > > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > > > > > +	}
> > > > > > +}
> > > > > > +
> > > > > >  #ifdef RTE_ARCH_X86
> > > > > >  static inline bool
> > > > > >  get_avx_supported(bool request_avx512) @@ -3293,6 +3317,8 @@
> > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > > > > >  					i40e_recv_scattered_pkts_vec_avx2 :
> > > > > >  					i40e_recv_scattered_pkts_vec;
> > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > +
> > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > >  			}
> > > > > >  		} else {
> > > > > >  			if (ad->rx_use_avx512) {
> > > > > > @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev
> > *dev)
> > > > > >  				dev->rx_pkt_burst = ad->rx_use_avx2 ?
> > > > > >  					i40e_recv_pkts_vec_avx2 :
> > > > > >  					i40e_recv_pkts_vec;
> > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > +
> > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > >  			}
> > > > > >  		}
> > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > >  		if (dev->data->scattered_rx) {
> > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > >  				     "Using Vector Scattered Rx (port %d).", @@
> > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
> > > > > >  				dev->tx_pkt_burst = ad->tx_use_avx2 ?
> > > > > >  						    i40e_xmit_pkts_vec_avx2 :
> > > > > >  						    i40e_xmit_pkts_vec;
> > > > > > +				dev->recycle_tx_mbufs_reuse =
> > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > >  			}
> > > > > >  #else /* RTE_ARCH_X86 */
> > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
> > > > > >  				     dev->data->port_id);
> > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /* RTE_ARCH_X86 */
> > > > > >  		} else {
> > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
> > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > >  		}
> > > > > >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> > > > > >  	} else {
> > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > b/drivers/net/i40e/i40e_rxtx.h index a8686224e5..b191f23e1f
> > > > > > 100644
> > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> > > > > > *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > > uint16_t offset);  int i40e_dev_tx_descriptor_status(void
> > > > > > *tx_queue, uint16_t offset);
> > > > > >
> > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > void
> > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > > > +nb_mbufs);
> > > > > > +
> > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf
> > **rx_pkts,
> > > > > >  			    uint16_t nb_pkts);
> > > > > >  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff
> > > > > > --git a/drivers/net/i40e/meson.build
> > > > > > b/drivers/net/i40e/meson.build index
> > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > >          'i40e_tm.c',
> > > > > >          'i40e_hash.c',
> > > > > >          'i40e_vf_representor.c',
> > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > >          'rte_pmd_i40e.c',
> > > > > >  )
> > > > > >
> > > > > > --
> > > > > > 2.25.1
  
Feifei Wang Sept. 4, 2023, 9:24 a.m. UTC | #9
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> Sent: Monday, September 4, 2023 3:50 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> Hi Feifei,
> 
> > > > > > > Define specific function implementation for i40e driver.
> > > > > > > Currently, mbufs recycle mode can support 128bit vector path
> > > > > > > and
> > > > > > > avx2
> > > > > path.
> > > > > > > And can be enabled both in fast free and no fast free mode.
> > > > > > >
> > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > <honnappa.nagarahalli@arm.com>
> > > > > > > ---
> > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > > > ++++++++++++++++++
> > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > >  6 files changed, 187 insertions(+)  create mode 100644
> > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > >
> > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > 8271bbb394..50ba9aac94
> > > > > > > 100644
> > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > > > i40e_eth_dev_ops
> > > > > = {
> > > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> > > > > > >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> > > > > > >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> > > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > 100644
> > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct
> > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > +uint16_t
> > > > > queue_id,
> > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > uint16_t
> > > queue_id,
> > > > > > >  			   struct rte_eth_burst_mode *mode);  int
> > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> > > > > > > queue_id, diff -- git
> > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > new file mode 100644
> > > > > > > index 0000000000..5663ecccde
> > > > > > > --- /dev/null
> > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > @@ -0,0 +1,147 @@
> > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > + */
> > > > > > > +
> > > > > > > +#include <stdint.h>
> > > > > > > +#include <ethdev_driver.h>
> > > > > > > +
> > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > +#include "base/i40e_type.h"
> > > > > > > +#include "i40e_ethdev.h"
> > > > > > > +#include "i40e_rxtx.h"
> > > > > > > +
> > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > +
> > > > > > > +void
> > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue,
> > > > > > > +uint16_t
> > > > > > > +nb_mbufs) {
> > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > +	uint16_t rx_id;
> > > > > > > +	uint64_t paddr;
> > > > > > > +	uint64_t dma_addr;
> > > > > > > +	uint16_t i;
> > > > > > > +
> > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > +
> > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > +
> > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > +
> > > > > > > +	rte_io_wmb();
> > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > > > > +
> > > > > > > +uint16_t
> > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > +	int i, n;
> > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > +	uint16_t avail = 0;
> > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> > > >refill_requirement;
> > > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > > +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> > > > > > > +
> > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > +	avail = (mbuf_ring_size - (refill_head - receive_tail)) &
> > > > > > > +mask;
> > > > > > > +
> > > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq-
> > > >tx_rs_thresh)
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > > > > > +
> > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > +
> > > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > +
> > > > > > > +	/* Mbufs recycle mode can only support no ring buffer
> > > wrapping
> > > > > > > around.
> > > > > > > +	 * Two case for this:
> > > > > > > +	 *
> > > > > > > +	 * case 1: The refill head of Rx buffer ring needs to be
> > > > > > > +aligned
> > > with
> > > > > > > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > +	 *
> > > > > > > +	 * case 2: The refill head of Rx ring buffer does not need
> > > > > > > +to be
> > > aligned
> > > > > > > +	 * with mbuf ring size. In this case, the update of refill
> > > > > > > +head
> > > can not
> > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > +	 */
> > > > > > > +	if (refill_requirement != n ||
> > > > > > > +		(!refill_requirement && (refill_head + n >
> > > mbuf_ring_size)))
> > > > > > > +		return 0;
> > > > > > > +
> > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > +	 */
> > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > +	rxep += refill_head;
> > > > > > > +
> > > > > > > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > +		/* Avoid txq contains buffers from unexpected
> > > mempool. */
> > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > +					!= txep[0].mbuf->pool))
> > > > > > > +			return 0;
> > > > > > > +
> > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > +	} else {
> > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > +			rxep[i] =
> > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > +
> > > > > > > +			/* If Tx buffers are not the last reference or
> > > from
> > > > > > > +			 * unexpected mempool, previous copied
> > > buffers are
> > > > > > > +			 * considered as invalid.
> > > > > > > +			 */
> > > > > > > +			if (unlikely((rxep[i] == NULL &&
> > > refill_requirement) ||
> > > > > > [Konstantin]
> > > > > > Could you pls remind me why it is ok to have rxep[i]==NULL
> > > > > > when refill_requirement is not set?
> > > > > >
> > > > > > If reill_requirement is not zero, it means each tx freed
> > > > > > buffer must be valid and can be put into Rx sw_ring. Then  the
> > > > > > refill head of Rx buffer ring can be aligned with mbuf ring
> > > > > > size. Briefly speaking the number
> > > > > of Tx valid freed buffer must be equal to Rx refill_requirement.
> > > > > For example, i40e driver.
> > > > > >
> > > > > > If reill_requirement is zero, it means that the refill head of
> > > > > > Rx buffer ring does not need to be aligned with mbuf ring
> > > > > > size, thus if Tx have n valid freed buffers, we just need to
> > > > > > put these n buffers into Rx sw-
> > > > > ring, and not to be equal to the Rx setting rearm number. For
> > > > > example, mlx5 driver.
> > > > > >
> > > > > > In conclusion, above difference is due to pmd drivers have
> > > > > > different
> > > > > strategies to update their Rx rearm(refill) head.
> > > > > > For i40e driver, if rearm_head exceed 1024, it will be set as
> > > > > > 0 due to  the
> > > > > number of each rearm is a fixed value by default.
> > > > > > For mlx5 driver. Its rearm_head can exceed 1024, and use mask
> > > > > > to achieve
> > > > > real index. Thus its rearm number can be a different value.
> > > > >
> > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that
> > > > > this mbuf is not free yet and can't be reused.
> > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > > > Or probably would be enough to skip that mbuf?
> > > > > Might be something like that:
> > > > >
> > > > > for (i = 0, j = 0; i < n; i++) {
> > > > >
> > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> > > > > 		if (refill_requirement) {
> > > > > 			nb_recycle_mbufs = 0;
> > > > > 			break;
> > > > > 		}
> > > > > 	} else
> > > > > 		j++;
> > > > > }
> > > > >
> > > > > /* now j contains actual number of recycled mbufs */
> > > > >
> > > > > ?
> > > > >
> > > > > >
> > > > > > > +					recycle_rxq_info->mp !=
> > > txep[i].mbuf-
> > > > > > > >pool))
> > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > +		}
> > > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > > +		 * from unexpected mempool, all recycled buffers
> > > > > > > +		 * are put into mempool.
> > > > > > > +		 */
> > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > +				if (rxep[i] != NULL)
> > > > > > > +					rte_mempool_put(rxep[i]-
> > > >pool,
> > > > > > > rxep[i]);
> > > > > > > +			}
> > > > > > > +	}
> > > > > > > +
> > > > [Konstantin] After another thought, it might be easier and cleaner just to:
> > > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > > > 	nb_recycle_mbufs = 0;
> > > >
> > > > Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf)
> > > > returns NULL, then we can't recycle that mbuf.
> > > >
> > > > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf) returns
> > > > NULL, then
> > > we can't recycle that mbuf'.
> > > >
> > > > Firstly, we should know for i40e driver,  the number of free mbufs
> > > > is fixed, it
> > > must equal to 'tx_rs_thresh'
> > > > This means if we start to free Tx mbufs, it cannot be interrupted
> > > > until the
> > > given amount of mbufs are freed.
> > > > In the meanwhile, doing prefree operation for a Tx mbuf can be
> > > > looked at this mbuf is freed from this TX sw-ring  if the API
> > > > returns NULL. This is due
> > > to that call 'prefree_seg' function means update the mbuf refcnt.
> > > >
> > > > So let's come back our recycle mbuf case.
> > > > Here we consider that the case if we have 32 mbufs need to be
> > > > freed, and
> > > we firstly do the pre-free.
> > > > And then first 8 mbufs is good and return value is not none. But
> > > > the 9th
> > > mbuf is bad, its refcnt is more than 1.
> > > > So we just update its refcnt and cannot put it into Rx sw-ring.
> > > > For Tx sw-ring,
> > > this mbuf has been freed.
> > > > Then we should continue to do pre-free operation for the next Tx
> > > > mbufs to ensure the given amount of mbufs are freed.
> > > >
> > > > Do a conclusion for this, this is because if we start to do
> > > > pre-free operation,  the Tx mbuf refcnt value maybe changed, so we
> > > > cannot stop or
> > > break until finish all the pre-free operation.
> > > >
> > > > Finally, in the case that Rx refill_request is not zero, but the
> > > > valid mbuf amount is less than this value, we must put back this
> > > > Tx mbufs into
> > > mempool.
> > > >
> > > > Above is the reason why I do not directly jump out of the loop if
> > > > some mbuf
> > > return value is NULL.
> > >
> > > Yep, I already realized that it is a bit more complicated and we
> > > need to continue with prefree() for all packets even when we get NULL in
> the middle.
> > > Anyway the code has to be changed, right?
> > >
> > Sorry, I think for the code, it is unnecessary to be changed.
> > For no fast free path, currently:
> > 1.   We check whether each mbuf is Ok and call pre_free function
> > ----------------------------------------------------------------------
> > --------------------------------------------------------------
> > 2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
> > 2.2 For the mbuf return value is zero and refill-request, it will also
> > firstly put into Rx sw-ring, and we set nb_recycle = 0
> > ----------------------------------------------------------------------
> > --------------------------------------------------------------
> > 3.1 We check nb_recycle, if it is not 0, we will continue to rearm Rx descs
> and update its index by call descs_refill function.
> > 3.2 if nb_recycle is 0, we will put valid recycle mbufs back into
> > mempool as general path. This is necessary, because we need to ensure
> > the freed Tx number is fixed.(Some buffers return is null can be seen
> > as freed, others need to be put into mempool)
> >
> > Or maybe I ignore something?
> 
> 
> I am talking about the case when both refill_requirement and mbuf return
> values iare zero:
> if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???  rxep[i] == 0
> AND refill_requirement == 0 ???
>  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> 	nb_recycle_mbufs = 0;
> 
> As I can read the code you treat such situation as valid, while I think we
> should reset nb_recycle_mbufs to zero when rxep[i] == NULL, no matter what
> value refill_requirement is.

So this means for maybe MLX5 driver, its refill_request = 0. And if some mbufs return value
is zero, the other mbufs can not be recycled into Rx sw-ring? Because nb_recycle=0, and they
need to be put into mempool.

I think for such as MLX5 driver, we can allow recycle some valid mbufs into Rx ring. 
Because no constraint for its refill number. Is my suggestion reasonable? 
> .
> 
> 
> > > >
> > > > > > > +	/* Update counters for Tx. */
> > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq-
> > > >tx_rs_thresh);
> > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq-
> > > >tx_rs_thresh);
> > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > > > > > +
> > > > > > > +	return nb_recycle_mbufs;
> > > > > > > +}
> > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > b/drivers/net/i40e/i40e_rxtx.c index b4f65b58fa..a9c9eb331c
> > > > > > > 100644
> > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev
> > > > > > > *dev, uint16_t queue_id,
> > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > >
> > > > > > > +void
> > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> > > queue_id,
> > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > +	struct i40e_adapter *ad =
> > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > > >dev_private);
> > > > > > > +
> > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > +
> > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > > > +
> > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > > > > > > +	} else {
> > > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > > >rx_free_thresh;
> > > > > > > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > > > > > > +	}
> > > > > > > +}
> > > > > > > +
> > > > > > >  #ifdef RTE_ARCH_X86
> > > > > > >  static inline bool
> > > > > > >  get_avx_supported(bool request_avx512) @@ -3293,6 +3317,8
> @@
> > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > >  				dev->rx_pkt_burst = ad-
> >rx_use_avx2 ?
> > > > > > >
> 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > >  					i40e_recv_scattered_pkts_vec;
> > > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > > +
> > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > >  			}
> > > > > > >  		} else {
> > > > > > >  			if (ad->rx_use_avx512) {
> > > > > > > @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev
> > > *dev)
> > > > > > >  				dev->rx_pkt_burst = ad-
> >rx_use_avx2 ?
> > > > > > >  					i40e_recv_pkts_vec_avx2 :
> > > > > > >  					i40e_recv_pkts_vec;
> > > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > > +
> > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > >  			}
> > > > > > >  		}
> > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > >  				     "Using Vector Scattered Rx
> (port %d).", @@
> > > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev
> *dev)
> > > > > > >  				dev->tx_pkt_burst = ad-
> >tx_use_avx2 ?
> > > > > > >
> i40e_xmit_pkts_vec_avx2 :
> > > > > > >
> i40e_xmit_pkts_vec;
> > > > > > > +				dev->recycle_tx_mbufs_reuse =
> > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > >  			}
> > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx
> (port %d).",
> > > > > > >  				     dev->data->port_id);
> > > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /* RTE_ARCH_X86 */
> > > > > > >  		} else {
> > > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be
> used.");
> > > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > >  		}
> > > > > > >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> > > > > > >  	} else {
> > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > b/drivers/net/i40e/i40e_rxtx.h index a8686224e5..b191f23e1f
> > > > > > > 100644
> > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> > > > > > > *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > > > uint16_t offset);  int i40e_dev_tx_descriptor_status(void
> > > > > > > *tx_queue, uint16_t offset);
> > > > > > >
> > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > void
> > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > > > > +nb_mbufs);
> > > > > > > +
> > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf
> > > **rx_pkts,
> > > > > > >  			    uint16_t nb_pkts);
> > > > > > >  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff
> > > > > > > --git a/drivers/net/i40e/meson.build
> > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > >          'i40e_tm.c',
> > > > > > >          'i40e_hash.c',
> > > > > > >          'i40e_vf_representor.c',
> > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > >          'rte_pmd_i40e.c',
> > > > > > >  )
> > > > > > >
> > > > > > > --
> > > > > > > 2.25.1
  
Konstantin Ananyev Sept. 4, 2023, 10:21 a.m. UTC | #10
> > > > > > > > Define specific function implementation for i40e driver.
> > > > > > > > Currently, mbufs recycle mode can support 128bit vector path
> > > > > > > > and
> > > > > > > > avx2
> > > > > > path.
> > > > > > > > And can be enabled both in fast free and no fast free mode.
> > > > > > > >
> > > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > ---
> > > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > > > > ++++++++++++++++++
> > > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > > >  6 files changed, 187 insertions(+)  create mode 100644
> > > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > >
> > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > > 8271bbb394..50ba9aac94
> > > > > > > > 100644
> > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > > > > i40e_eth_dev_ops
> > > > > > = {
> > > > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> > > > > > > >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> > > > > > > >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> > > > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > > 100644
> > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct
> > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > > +uint16_t
> > > > > > queue_id,
> > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > uint16_t
> > > > queue_id,
> > > > > > > >  			   struct rte_eth_burst_mode *mode);  int
> > > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> > > > > > > > queue_id, diff -- git
> > > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > new file mode 100644
> > > > > > > > index 0000000000..5663ecccde
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > @@ -0,0 +1,147 @@
> > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > > + */
> > > > > > > > +
> > > > > > > > +#include <stdint.h>
> > > > > > > > +#include <ethdev_driver.h>
> > > > > > > > +
> > > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > > +#include "base/i40e_type.h"
> > > > > > > > +#include "i40e_ethdev.h"
> > > > > > > > +#include "i40e_rxtx.h"
> > > > > > > > +
> > > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > > +
> > > > > > > > +void
> > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue,
> > > > > > > > +uint16_t
> > > > > > > > +nb_mbufs) {
> > > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > > +	uint16_t rx_id;
> > > > > > > > +	uint64_t paddr;
> > > > > > > > +	uint64_t dma_addr;
> > > > > > > > +	uint16_t i;
> > > > > > > > +
> > > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > > +
> > > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > > +
> > > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > > +
> > > > > > > > +	rte_io_wmb();
> > > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > > > > > +
> > > > > > > > +uint16_t
> > > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > > +	int i, n;
> > > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > > +	uint16_t avail = 0;
> > > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> > > > >refill_requirement;
> > > > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > > > +	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
> > > > > > > > +
> > > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > > +	avail = (mbuf_ring_size - (refill_head - receive_tail)) &
> > > > > > > > +mask;
> > > > > > > > +
> > > > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq-
> > > > >tx_rs_thresh)
> > > > > > > > +		return 0;
> > > > > > > > +
> > > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
> > > > > > > > +
> > > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > > +
> > > > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > > > +		return 0;
> > > > > > > > +
> > > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > > +
> > > > > > > > +	/* Mbufs recycle mode can only support no ring buffer
> > > > wrapping
> > > > > > > > around.
> > > > > > > > +	 * Two case for this:
> > > > > > > > +	 *
> > > > > > > > +	 * case 1: The refill head of Rx buffer ring needs to be
> > > > > > > > +aligned
> > > > with
> > > > > > > > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > > +	 *
> > > > > > > > +	 * case 2: The refill head of Rx ring buffer does not need
> > > > > > > > +to be
> > > > aligned
> > > > > > > > +	 * with mbuf ring size. In this case, the update of refill
> > > > > > > > +head
> > > > can not
> > > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > > +	 */
> > > > > > > > +	if (refill_requirement != n ||
> > > > > > > > +		(!refill_requirement && (refill_head + n >
> > > > mbuf_ring_size)))
> > > > > > > > +		return 0;
> > > > > > > > +
> > > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > > +	 */
> > > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > > +	rxep += refill_head;
> > > > > > > > +
> > > > > > > > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > > +		/* Avoid txq contains buffers from unexpected
> > > > mempool. */
> > > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > > +					!= txep[0].mbuf->pool))
> > > > > > > > +			return 0;
> > > > > > > > +
> > > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > > +	} else {
> > > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > > +			rxep[i] =
> > > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > +
> > > > > > > > +			/* If Tx buffers are not the last reference or
> > > > from
> > > > > > > > +			 * unexpected mempool, previous copied
> > > > buffers are
> > > > > > > > +			 * considered as invalid.
> > > > > > > > +			 */
> > > > > > > > +			if (unlikely((rxep[i] == NULL &&
> > > > refill_requirement) ||
> > > > > > > [Konstantin]
> > > > > > > Could you pls remind me why it is ok to have rxep[i]==NULL
> > > > > > > when refill_requirement is not set?
> > > > > > >
> > > > > > > If reill_requirement is not zero, it means each tx freed
> > > > > > > buffer must be valid and can be put into Rx sw_ring. Then  the
> > > > > > > refill head of Rx buffer ring can be aligned with mbuf ring
> > > > > > > size. Briefly speaking the number
> > > > > > of Tx valid freed buffer must be equal to Rx refill_requirement.
> > > > > > For example, i40e driver.
> > > > > > >
> > > > > > > If reill_requirement is zero, it means that the refill head of
> > > > > > > Rx buffer ring does not need to be aligned with mbuf ring
> > > > > > > size, thus if Tx have n valid freed buffers, we just need to
> > > > > > > put these n buffers into Rx sw-
> > > > > > ring, and not to be equal to the Rx setting rearm number. For
> > > > > > example, mlx5 driver.
> > > > > > >
> > > > > > > In conclusion, above difference is due to pmd drivers have
> > > > > > > different
> > > > > > strategies to update their Rx rearm(refill) head.
> > > > > > > For i40e driver, if rearm_head exceed 1024, it will be set as
> > > > > > > 0 due to  the
> > > > > > number of each rearm is a fixed value by default.
> > > > > > > For mlx5 driver. Its rearm_head can exceed 1024, and use mask
> > > > > > > to achieve
> > > > > > real index. Thus its rearm number can be a different value.
> > > > > >
> > > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means that
> > > > > > this mbuf is not free yet and can't be reused.
> > > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > > > > Or probably would be enough to skip that mbuf?
> > > > > > Might be something like that:
> > > > > >
> > > > > > for (i = 0, j = 0; i < n; i++) {
> > > > > >
> > > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool))  {
> > > > > > 		if (refill_requirement) {
> > > > > > 			nb_recycle_mbufs = 0;
> > > > > > 			break;
> > > > > > 		}
> > > > > > 	} else
> > > > > > 		j++;
> > > > > > }
> > > > > >
> > > > > > /* now j contains actual number of recycled mbufs */
> > > > > >
> > > > > > ?
> > > > > >
> > > > > > >
> > > > > > > > +					recycle_rxq_info->mp !=
> > > > txep[i].mbuf-
> > > > > > > > >pool))
> > > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > > +		}
> > > > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > > > +		 * from unexpected mempool, all recycled buffers
> > > > > > > > +		 * are put into mempool.
> > > > > > > > +		 */
> > > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > > +				if (rxep[i] != NULL)
> > > > > > > > +					rte_mempool_put(rxep[i]-
> > > > >pool,
> > > > > > > > rxep[i]);
> > > > > > > > +			}
> > > > > > > > +	}
> > > > > > > > +
> > > > > [Konstantin] After another thought, it might be easier and cleaner just to:
> > > > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > > > > 	nb_recycle_mbufs = 0;
> > > > >
> > > > > Anyway, from my understanding - if rte_pktmbuf_prefree_seg(mbuf)
> > > > > returns NULL, then we can't recycle that mbuf.
> > > > >
> > > > > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf) returns
> > > > > NULL, then
> > > > we can't recycle that mbuf'.
> > > > >
> > > > > Firstly, we should know for i40e driver,  the number of free mbufs
> > > > > is fixed, it
> > > > must equal to 'tx_rs_thresh'
> > > > > This means if we start to free Tx mbufs, it cannot be interrupted
> > > > > until the
> > > > given amount of mbufs are freed.
> > > > > In the meanwhile, doing prefree operation for a Tx mbuf can be
> > > > > looked at this mbuf is freed from this TX sw-ring  if the API
> > > > > returns NULL. This is due
> > > > to that call 'prefree_seg' function means update the mbuf refcnt.
> > > > >
> > > > > So let's come back our recycle mbuf case.
> > > > > Here we consider that the case if we have 32 mbufs need to be
> > > > > freed, and
> > > > we firstly do the pre-free.
> > > > > And then first 8 mbufs is good and return value is not none. But
> > > > > the 9th
> > > > mbuf is bad, its refcnt is more than 1.
> > > > > So we just update its refcnt and cannot put it into Rx sw-ring.
> > > > > For Tx sw-ring,
> > > > this mbuf has been freed.
> > > > > Then we should continue to do pre-free operation for the next Tx
> > > > > mbufs to ensure the given amount of mbufs are freed.
> > > > >
> > > > > Do a conclusion for this, this is because if we start to do
> > > > > pre-free operation,  the Tx mbuf refcnt value maybe changed, so we
> > > > > cannot stop or
> > > > break until finish all the pre-free operation.
> > > > >
> > > > > Finally, in the case that Rx refill_request is not zero, but the
> > > > > valid mbuf amount is less than this value, we must put back this
> > > > > Tx mbufs into
> > > > mempool.
> > > > >
> > > > > Above is the reason why I do not directly jump out of the loop if
> > > > > some mbuf
> > > > return value is NULL.
> > > >
> > > > Yep, I already realized that it is a bit more complicated and we
> > > > need to continue with prefree() for all packets even when we get NULL in
> > the middle.
> > > > Anyway the code has to be changed, right?
> > > >
> > > Sorry, I think for the code, it is unnecessary to be changed.
> > > For no fast free path, currently:
> > > 1.   We check whether each mbuf is Ok and call pre_free function
> > > ----------------------------------------------------------------------
> > > --------------------------------------------------------------
> > > 2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
> > > 2.2 For the mbuf return value is zero and refill-request, it will also
> > > firstly put into Rx sw-ring, and we set nb_recycle = 0
> > > ----------------------------------------------------------------------
> > > --------------------------------------------------------------
> > > 3.1 We check nb_recycle, if it is not 0, we will continue to rearm Rx descs
> > and update its index by call descs_refill function.
> > > 3.2 if nb_recycle is 0, we will put valid recycle mbufs back into
> > > mempool as general path. This is necessary, because we need to ensure
> > > the freed Tx number is fixed.(Some buffers return is null can be seen
> > > as freed, others need to be put into mempool)
> > >
> > > Or maybe I ignore something?
> >
> >
> > I am talking about the case when both refill_requirement and mbuf return
> > values iare zero:
> > if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???  rxep[i] == 0
> > AND refill_requirement == 0 ???
> >  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> > 	nb_recycle_mbufs = 0;
> >
> > As I can read the code you treat such situation as valid, while I think we
> > should reset nb_recycle_mbufs to zero when rxep[i] == NULL, no matter what
> > value refill_requirement is.
> 
> So this means for maybe MLX5 driver, its refill_request = 0. And if some mbufs return value
> is zero, the other mbufs can not be recycled into Rx sw-ring? Because nb_recycle=0, and they
> need to be put into mempool.
> 
> I think for such as MLX5 driver, we can allow recycle some valid mbufs into Rx ring.
> Because no constraint for its refill number. Is my suggestion reasonable?

I suppose yes: if refill_request is zero we potentially can skip 'bad' mbufs and continue with recycling
for remaining ones.
It would probably require more changes in current code, but sounds ok to me in general.  

> >
> >
> > > > >
> > > > > > > > +	/* Update counters for Tx. */
> > > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq-
> > > > >tx_rs_thresh);
> > > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq-
> > > > >tx_rs_thresh);
> > > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > > > > > > +
> > > > > > > > +	return nb_recycle_mbufs;
> > > > > > > > +}
> > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > b/drivers/net/i40e/i40e_rxtx.c index b4f65b58fa..a9c9eb331c
> > > > > > > > 100644
> > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct rte_eth_dev
> > > > > > > > *dev, uint16_t queue_id,
> > > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > > >
> > > > > > > > +void
> > > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> > > > queue_id,
> > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > > +	struct i40e_adapter *ad =
> > > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > > > >dev_private);
> > > > > > > > +
> > > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > > +
> > > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > > > > +
> > > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > > > > > > > +	} else {
> > > > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > > > >rx_free_thresh;
> > > > > > > > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > > > > > > > +	}
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >  #ifdef RTE_ARCH_X86
> > > > > > > >  static inline bool
> > > > > > > >  get_avx_supported(bool request_avx512) @@ -3293,6 +3317,8
> > @@
> > > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > > >  				dev->rx_pkt_burst = ad-
> > >rx_use_avx2 ?
> > > > > > > >
> > 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > > >  					i40e_recv_scattered_pkts_vec;
> > > > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > > > +
> > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > >  			}
> > > > > > > >  		} else {
> > > > > > > >  			if (ad->rx_use_avx512) {
> > > > > > > > @@ -3311,9 +3337,12 @@ i40e_set_rx_function(struct rte_eth_dev
> > > > *dev)
> > > > > > > >  				dev->rx_pkt_burst = ad-
> > >rx_use_avx2 ?
> > > > > > > >  					i40e_recv_pkts_vec_avx2 :
> > > > > > > >  					i40e_recv_pkts_vec;
> > > > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > > > +
> > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > >  			}
> > > > > > > >  		}
> > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > > >  				     "Using Vector Scattered Rx
> > (port %d).", @@
> > > > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct rte_eth_dev
> > *dev)
> > > > > > > >  				dev->tx_pkt_burst = ad-
> > >tx_use_avx2 ?
> > > > > > > >
> > i40e_xmit_pkts_vec_avx2 :
> > > > > > > >
> > i40e_xmit_pkts_vec;
> > > > > > > > +				dev->recycle_tx_mbufs_reuse =
> > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > >  			}
> > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx
> > (port %d).",
> > > > > > > >  				     dev->data->port_id);
> > > > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /* RTE_ARCH_X86 */
> > > > > > > >  		} else {
> > > > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be
> > used.");
> > > > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > >  		}
> > > > > > > >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> > > > > > > >  	} else {
> > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > b/drivers/net/i40e/i40e_rxtx.h index a8686224e5..b191f23e1f
> > > > > > > > 100644
> > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > @@ -236,6 +236,10 @@ uint32_t i40e_dev_rx_queue_count(void
> > > > > > > > *rx_queue);  int i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > > > > uint16_t offset);  int i40e_dev_tx_descriptor_status(void
> > > > > > > > *tx_queue, uint16_t offset);
> > > > > > > >
> > > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > void
> > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t
> > > > > > > > +nb_mbufs);
> > > > > > > > +
> > > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf
> > > > **rx_pkts,
> > > > > > > >  			    uint16_t nb_pkts);
> > > > > > > >  uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue, diff
> > > > > > > > --git a/drivers/net/i40e/meson.build
> > > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > > >          'i40e_tm.c',
> > > > > > > >          'i40e_hash.c',
> > > > > > > >          'i40e_vf_representor.c',
> > > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > > >          'rte_pmd_i40e.c',
> > > > > > > >  )
> > > > > > > >
> > > > > > > > --
> > > > > > > > 2.25.1
  
Feifei Wang Sept. 5, 2023, 3:11 a.m. UTC | #11
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> Sent: Monday, September 4, 2023 6:22 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> 
> > > > > > > > > Define specific function implementation for i40e driver.
> > > > > > > > > Currently, mbufs recycle mode can support 128bit vector
> > > > > > > > > path and
> > > > > > > > > avx2
> > > > > > > path.
> > > > > > > > > And can be enabled both in fast free and no fast free mode.
> > > > > > > > >
> > > > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > ---
> > > > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > > > > > ++++++++++++++++++
> > > > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > > > >  6 files changed, 187 insertions(+)  create mode 100644
> > > > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > >
> > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > > > 8271bbb394..50ba9aac94
> > > > > > > > > 100644
> > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > > > > > i40e_eth_dev_ops
> > > > > > > = {
> > > > > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > > > +	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
> > > > > > > > >  	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
> > > > > > > > >  	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
> > > > > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > > > 100644
> > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct
> > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > +uint16_t
> > > > > > > queue_id,
> > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > uint16_t
> > > > > queue_id,
> > > > > > > > >  			   struct rte_eth_burst_mode *mode);  int
> > > > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t
> > > > > > > > > queue_id, diff -- git
> > > > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > new file mode 100644
> > > > > > > > > index 0000000000..5663ecccde
> > > > > > > > > --- /dev/null
> > > > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > @@ -0,0 +1,147 @@
> > > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > > > + */
> > > > > > > > > +
> > > > > > > > > +#include <stdint.h>
> > > > > > > > > +#include <ethdev_driver.h>
> > > > > > > > > +
> > > > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > > > +#include "base/i40e_type.h"
> > > > > > > > > +#include "i40e_ethdev.h"
> > > > > > > > > +#include "i40e_rxtx.h"
> > > > > > > > > +
> > > > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > > > +
> > > > > > > > > +void
> > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue,
> > > > > > > > > +uint16_t
> > > > > > > > > +nb_mbufs) {
> > > > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > > > +	uint16_t rx_id;
> > > > > > > > > +	uint64_t paddr;
> > > > > > > > > +	uint64_t dma_addr;
> > > > > > > > > +	uint16_t i;
> > > > > > > > > +
> > > > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > > > +
> > > > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > > > +
> > > > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > > > +
> > > > > > > > > +	rte_io_wmb();
> > > > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > > > > > > +
> > > > > > > > > +uint16_t
> > > > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > > > +	int i, n;
> > > > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > > > +	uint16_t avail = 0;
> > > > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
> > > > > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> > > > > >refill_requirement;
> > > > > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > > > > +	uint16_t receive_tail =
> > > > > > > > > +*recycle_rxq_info->receive_tail;
> > > > > > > > > +
> > > > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > > > +	avail = (mbuf_ring_size - (refill_head -
> > > > > > > > > +receive_tail)) & mask;
> > > > > > > > > +
> > > > > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail <=
> > > > > > > > > +txq-
> > > > > >tx_rs_thresh)
> > > > > > > > > +		return 0;
> > > > > > > > > +
> > > > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > > > +	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz
> > > > > > > > > +&
> > > > > > > > > +
> > > > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > > > +
> > > > > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > > > > +		return 0;
> > > > > > > > > +
> > > > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > > > +
> > > > > > > > > +	/* Mbufs recycle mode can only support no ring buffer
> > > > > wrapping
> > > > > > > > > around.
> > > > > > > > > +	 * Two case for this:
> > > > > > > > > +	 *
> > > > > > > > > +	 * case 1: The refill head of Rx buffer ring needs to
> > > > > > > > > +be aligned
> > > > > with
> > > > > > > > > +	 * mbuf ring size. In this case, the number of Tx freeing buffers
> > > > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > > > +	 *
> > > > > > > > > +	 * case 2: The refill head of Rx ring buffer does not
> > > > > > > > > +need to be
> > > > > aligned
> > > > > > > > > +	 * with mbuf ring size. In this case, the update of
> > > > > > > > > +refill head
> > > > > can not
> > > > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > > > +	 */
> > > > > > > > > +	if (refill_requirement != n ||
> > > > > > > > > +		(!refill_requirement && (refill_head + n >
> > > > > mbuf_ring_size)))
> > > > > > > > > +		return 0;
> > > > > > > > > +
> > > > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > > > +	 */
> > > > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > > > +	rxep += refill_head;
> > > > > > > > > +
> > > > > > > > > +	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > > > +		/* Avoid txq contains buffers from unexpected
> > > > > mempool. */
> > > > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > > > +					!= txep[0].mbuf->pool))
> > > > > > > > > +			return 0;
> > > > > > > > > +
> > > > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > > > +	} else {
> > > > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > > > +			rxep[i] =
> > > > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > +
> > > > > > > > > +			/* If Tx buffers are not the last reference or
> > > > > from
> > > > > > > > > +			 * unexpected mempool, previous copied
> > > > > buffers are
> > > > > > > > > +			 * considered as invalid.
> > > > > > > > > +			 */
> > > > > > > > > +			if (unlikely((rxep[i] == NULL &&
> > > > > refill_requirement) ||
> > > > > > > > [Konstantin]
> > > > > > > > Could you pls remind me why it is ok to have rxep[i]==NULL
> > > > > > > > when refill_requirement is not set?
> > > > > > > >
> > > > > > > > If reill_requirement is not zero, it means each tx freed
> > > > > > > > buffer must be valid and can be put into Rx sw_ring. Then
> > > > > > > > the refill head of Rx buffer ring can be aligned with mbuf
> > > > > > > > ring size. Briefly speaking the number
> > > > > > > of Tx valid freed buffer must be equal to Rx refill_requirement.
> > > > > > > For example, i40e driver.
> > > > > > > >
> > > > > > > > If reill_requirement is zero, it means that the refill
> > > > > > > > head of Rx buffer ring does not need to be aligned with
> > > > > > > > mbuf ring size, thus if Tx have n valid freed buffers, we
> > > > > > > > just need to put these n buffers into Rx sw-
> > > > > > > ring, and not to be equal to the Rx setting rearm number.
> > > > > > > For example, mlx5 driver.
> > > > > > > >
> > > > > > > > In conclusion, above difference is due to pmd drivers have
> > > > > > > > different
> > > > > > > strategies to update their Rx rearm(refill) head.
> > > > > > > > For i40e driver, if rearm_head exceed 1024, it will be set
> > > > > > > > as
> > > > > > > > 0 due to  the
> > > > > > > number of each rearm is a fixed value by default.
> > > > > > > > For mlx5 driver. Its rearm_head can exceed 1024, and use
> > > > > > > > mask to achieve
> > > > > > > real index. Thus its rearm number can be a different value.
> > > > > > >
> > > > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means
> > > > > > > that this mbuf is not free yet and can't be reused.
> > > > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > > > > > Or probably would be enough to skip that mbuf?
> > > > > > > Might be something like that:
> > > > > > >
> > > > > > > for (i = 0, j = 0; i < n; i++) {
> > > > > > >
> > > > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf-
> >pool))  {
> > > > > > > 		if (refill_requirement) {
> > > > > > > 			nb_recycle_mbufs = 0;
> > > > > > > 			break;
> > > > > > > 		}
> > > > > > > 	} else
> > > > > > > 		j++;
> > > > > > > }
> > > > > > >
> > > > > > > /* now j contains actual number of recycled mbufs */
> > > > > > >
> > > > > > > ?
> > > > > > >
> > > > > > > >
> > > > > > > > > +					recycle_rxq_info->mp !=
> > > > > txep[i].mbuf-
> > > > > > > > > >pool))
> > > > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > > > +		}
> > > > > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > > > > +		 * from unexpected mempool, all recycled buffers
> > > > > > > > > +		 * are put into mempool.
> > > > > > > > > +		 */
> > > > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > > > +				if (rxep[i] != NULL)
> > > > > > > > > +					rte_mempool_put(rxep[i]-
> > > > > >pool,
> > > > > > > > > rxep[i]);
> > > > > > > > > +			}
> > > > > > > > > +	}
> > > > > > > > > +
> > > > > > [Konstantin] After another thought, it might be easier and cleaner
> just to:
> > > > > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > > > > > 	nb_recycle_mbufs = 0;
> > > > > >
> > > > > > Anyway, from my understanding - if
> > > > > > rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't recycle
> that mbuf.
> > > > > >
> > > > > > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf)
> > > > > > returns NULL, then
> > > > > we can't recycle that mbuf'.
> > > > > >
> > > > > > Firstly, we should know for i40e driver,  the number of free
> > > > > > mbufs is fixed, it
> > > > > must equal to 'tx_rs_thresh'
> > > > > > This means if we start to free Tx mbufs, it cannot be
> > > > > > interrupted until the
> > > > > given amount of mbufs are freed.
> > > > > > In the meanwhile, doing prefree operation for a Tx mbuf can be
> > > > > > looked at this mbuf is freed from this TX sw-ring  if the API
> > > > > > returns NULL. This is due
> > > > > to that call 'prefree_seg' function means update the mbuf refcnt.
> > > > > >
> > > > > > So let's come back our recycle mbuf case.
> > > > > > Here we consider that the case if we have 32 mbufs need to be
> > > > > > freed, and
> > > > > we firstly do the pre-free.
> > > > > > And then first 8 mbufs is good and return value is not none.
> > > > > > But the 9th
> > > > > mbuf is bad, its refcnt is more than 1.
> > > > > > So we just update its refcnt and cannot put it into Rx sw-ring.
> > > > > > For Tx sw-ring,
> > > > > this mbuf has been freed.
> > > > > > Then we should continue to do pre-free operation for the next
> > > > > > Tx mbufs to ensure the given amount of mbufs are freed.
> > > > > >
> > > > > > Do a conclusion for this, this is because if we start to do
> > > > > > pre-free operation,  the Tx mbuf refcnt value maybe changed,
> > > > > > so we cannot stop or
> > > > > break until finish all the pre-free operation.
> > > > > >
> > > > > > Finally, in the case that Rx refill_request is not zero, but
> > > > > > the valid mbuf amount is less than this value, we must put
> > > > > > back this Tx mbufs into
> > > > > mempool.
> > > > > >
> > > > > > Above is the reason why I do not directly jump out of the loop
> > > > > > if some mbuf
> > > > > return value is NULL.
> > > > >
> > > > > Yep, I already realized that it is a bit more complicated and we
> > > > > need to continue with prefree() for all packets even when we get
> > > > > NULL in
> > > the middle.
> > > > > Anyway the code has to be changed, right?
> > > > >
> > > > Sorry, I think for the code, it is unnecessary to be changed.
> > > > For no fast free path, currently:
> > > > 1.   We check whether each mbuf is Ok and call pre_free function
> > > > ------------------------------------------------------------------
> > > > ----
> > > > --------------------------------------------------------------
> > > > 2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
> > > > 2.2 For the mbuf return value is zero and refill-request, it will
> > > > also firstly put into Rx sw-ring, and we set nb_recycle = 0
> > > > ------------------------------------------------------------------
> > > > ----
> > > > --------------------------------------------------------------
> > > > 3.1 We check nb_recycle, if it is not 0, we will continue to rearm
> > > > Rx descs
> > > and update its index by call descs_refill function.
> > > > 3.2 if nb_recycle is 0, we will put valid recycle mbufs back into
> > > > mempool as general path. This is necessary, because we need to
> > > > ensure the freed Tx number is fixed.(Some buffers return is null
> > > > can be seen as freed, others need to be put into mempool)
> > > >
> > > > Or maybe I ignore something?
> > >
> > >
> > > I am talking about the case when both refill_requirement and mbuf
> > > return values iare zero:
> > > if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???  rxep[i] ==
> 0
> > > AND refill_requirement == 0 ???
> > >  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > 	nb_recycle_mbufs = 0;
> > >
> > > As I can read the code you treat such situation as valid, while I
> > > think we should reset nb_recycle_mbufs to zero when rxep[i] == NULL,
> > > no matter what value refill_requirement is.
> >
> > So this means for maybe MLX5 driver, its refill_request = 0. And if
> > some mbufs return value is zero, the other mbufs can not be recycled
> > into Rx sw-ring? Because nb_recycle=0, and they need to be put into
> mempool.
> >
> > I think for such as MLX5 driver, we can allow recycle some valid mbufs into
> Rx ring.
> > Because no constraint for its refill number. Is my suggestion reasonable?
> 
> I suppose yes: if refill_request is zero we potentially can skip 'bad' mbufs and
> continue with recycling for remaining ones.
> It would probably require more changes in current code, but sounds ok to me
> in general.
That's Ok. Thanks for your careful reviewing.
> 
> > >
> > >
> > > > > >
> > > > > > > > > +	/* Update counters for Tx. */
> > > > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq-
> > > > > >tx_rs_thresh);
> > > > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq-
> > > > > >tx_rs_thresh);
> > > > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > > > +		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
> > > > > > > > > +
> > > > > > > > > +	return nb_recycle_mbufs; }
> > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > b/drivers/net/i40e/i40e_rxtx.c index
> > > > > > > > > b4f65b58fa..a9c9eb331c
> > > > > > > > > 100644
> > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct
> > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > > > >
> > > > > > > > > +void
> > > > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > +uint16_t
> > > > > queue_id,
> > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > > > +	struct i40e_adapter *ad =
> > > > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > > > > >dev_private);
> > > > > > > > > +
> > > > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > > > +
> > > > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > > > > > +
> > > > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > > > +		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
> > > > > > > > > +	} else {
> > > > > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > > > > >rx_free_thresh;
> > > > > > > > > +		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
> > > > > > > > > +	}
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >  #ifdef RTE_ARCH_X86
> > > > > > > > >  static inline bool
> > > > > > > > >  get_avx_supported(bool request_avx512) @@ -3293,6
> > > > > > > > > +3317,8
> > > @@
> > > > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > >rx_use_avx2 ?
> > > > > > > > >
> > > 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > > > >  					i40e_recv_scattered_pkts_vec;
> > > > > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > > > > +
> > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > >  			}
> > > > > > > > >  		} else {
> > > > > > > > >  			if (ad->rx_use_avx512) { @@ -3311,9 +3337,12
> @@
> > > > > > > > > i40e_set_rx_function(struct rte_eth_dev
> > > > > *dev)
> > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > >rx_use_avx2 ?
> > > > > > > > >  					i40e_recv_pkts_vec_avx2 :
> > > > > > > > >  					i40e_recv_pkts_vec;
> > > > > > > > > +				dev->recycle_rx_descriptors_refill =
> > > > > > > > > +
> > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > >  			}
> > > > > > > > >  		}
> > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > > > >  				     "Using Vector Scattered Rx
> > > (port %d).", @@
> > > > > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct
> > > > > > > > > rte_eth_dev
> > > *dev)
> > > > > > > > >  				dev->tx_pkt_burst = ad-
> > > >tx_use_avx2 ?
> > > > > > > > >
> > > i40e_xmit_pkts_vec_avx2 :
> > > > > > > > >
> > > i40e_xmit_pkts_vec;
> > > > > > > > > +				dev->recycle_tx_mbufs_reuse =
> > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > >  			}
> > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector Tx
> > > (port %d).",
> > > > > > > > >  				     dev->data->port_id);
> > > > > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
> > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /* RTE_ARCH_X86 */
> > > > > > > > >  		} else {
> > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx finally be
> > > used.");
> > > > > > > > >  			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
> > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > >  		}
> > > > > > > > >  		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
> > > > > > > > >  	} else {
> > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > b/drivers/net/i40e/i40e_rxtx.h index
> > > > > > > > > a8686224e5..b191f23e1f
> > > > > > > > > 100644
> > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > @@ -236,6 +236,10 @@ uint32_t
> > > > > > > > > i40e_dev_rx_queue_count(void *rx_queue);  int
> > > > > > > > > i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t
> > > > > > > > > offset);  int i40e_dev_tx_descriptor_status(void
> > > > > > > > > *tx_queue, uint16_t offset);
> > > > > > > > >
> > > > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > +		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > > void
> > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void *rx_queue,
> > > > > > > > > +uint16_t nb_mbufs);
> > > > > > > > > +
> > > > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct
> > > > > > > > > rte_mbuf
> > > > > **rx_pkts,
> > > > > > > > >  			    uint16_t nb_pkts);  uint16_t
> > > > > > > > > i40e_recv_scattered_pkts_vec(void *rx_queue, diff --git
> > > > > > > > > a/drivers/net/i40e/meson.build
> > > > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > > > >          'i40e_tm.c',
> > > > > > > > >          'i40e_hash.c',
> > > > > > > > >          'i40e_vf_representor.c',
> > > > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > > > >          'rte_pmd_i40e.c',
> > > > > > > > >  )
> > > > > > > > >
> > > > > > > > > --
> > > > > > > > > 2.25.1
  
Feifei Wang Sept. 22, 2023, 2:58 p.m. UTC | #12
Hi, Konstantin

> -----Original Message-----
> From: Feifei Wang
> Sent: Tuesday, September 5, 2023 11:11 AM
> To: Konstantin Ananyev <konstantin.ananyev@huawei.com>; Konstantin
> Ananyev <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> 
> > -----Original Message-----
> > From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> > Sent: Monday, September 4, 2023 6:22 PM
> > To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> > <konstantin.v.ananyev@yandex.ru>
> > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>;
> > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > <nd@arm.com>; nd <nd@arm.com>
> > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> >
> >
> >
> > > > > > > > > > Define specific function implementation for i40e driver.
> > > > > > > > > > Currently, mbufs recycle mode can support 128bit
> > > > > > > > > > vector path and
> > > > > > > > > > avx2
> > > > > > > > path.
> > > > > > > > > > And can be enabled both in fast free and no fast free mode.
> > > > > > > > > >
> > > > > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > ---
> > > > > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > > > > > > ++++++++++++++++++
> > > > > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > > > > >  6 files changed, 187 insertions(+)  create mode
> > > > > > > > > > 100644
> > > > > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > >
> > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > > > > 8271bbb394..50ba9aac94
> > > > > > > > > > 100644
> > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > > > > > > i40e_eth_dev_ops
> > > > > > > > = {
> > > > > > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > > > > +	.recycle_rxq_info_get         =
> i40e_recycle_rxq_info_get,
> > > > > > > > > >  	.rx_burst_mode_get            =
> i40e_rx_burst_mode_get,
> > > > > > > > > >  	.tx_burst_mode_get            =
> i40e_tx_burst_mode_get,
> > > > > > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > > > > 100644
> > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct
> > > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev
> > > > > > > > > > +*dev, uint16_t
> > > > > > > > queue_id,
> > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
> > > > > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > > uint16_t
> > > > > > queue_id,
> > > > > > > > > >  			   struct rte_eth_burst_mode *mode);
> int
> > > > > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > > uint16_t queue_id, diff -- git
> > > > > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > new file mode 100644
> > > > > > > > > > index 0000000000..5663ecccde
> > > > > > > > > > --- /dev/null
> > > > > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > @@ -0,0 +1,147 @@
> > > > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > > > > + */
> > > > > > > > > > +
> > > > > > > > > > +#include <stdint.h>
> > > > > > > > > > +#include <ethdev_driver.h>
> > > > > > > > > > +
> > > > > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > > > > +#include "base/i40e_type.h"
> > > > > > > > > > +#include "i40e_ethdev.h"
> > > > > > > > > > +#include "i40e_rxtx.h"
> > > > > > > > > > +
> > > > > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > > > > +
> > > > > > > > > > +void
> > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > +*rx_queue, uint16_t
> > > > > > > > > > +nb_mbufs) {
> > > > > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > > > > +	uint16_t rx_id;
> > > > > > > > > > +	uint64_t paddr;
> > > > > > > > > > +	uint64_t dma_addr;
> > > > > > > > > > +	uint16_t i;
> > > > > > > > > > +
> > > > > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > > > > +
> > > > > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > > > > +	}
> > > > > > > > > > +
> > > > > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > > > > +
> > > > > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > > > > +	}
> > > > > > > > > > +
> > > > > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > > > > +
> > > > > > > > > > +	rte_io_wmb();
> > > > > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id); }
> > > > > > > > > > +
> > > > > > > > > > +uint16_t
> > > > > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > > > > +	int i, n;
> > > > > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > > > > +	uint16_t avail = 0;
> > > > > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info-
> >mbuf_ring_size;
> > > > > > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> > > > > > >refill_requirement;
> > > > > > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > > > > > +	uint16_t receive_tail =
> > > > > > > > > > +*recycle_rxq_info->receive_tail;
> > > > > > > > > > +
> > > > > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > > > > +	avail = (mbuf_ring_size - (refill_head -
> > > > > > > > > > +receive_tail)) & mask;
> > > > > > > > > > +
> > > > > > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail
> > > > > > > > > > +<=
> > > > > > > > > > +txq-
> > > > > > >tx_rs_thresh)
> > > > > > > > > > +		return 0;
> > > > > > > > > > +
> > > > > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > > > > +	if
> > > > > > > > > > +((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz
> > > > > > > > > > +&
> > > > > > > > > > +
> > > > > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > > > > +
> > > > > > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > > > > > +		return 0;
> > > > > > > > > > +
> > > > > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > > > > +
> > > > > > > > > > +	/* Mbufs recycle mode can only support no ring
> > > > > > > > > > +buffer
> > > > > > wrapping
> > > > > > > > > > around.
> > > > > > > > > > +	 * Two case for this:
> > > > > > > > > > +	 *
> > > > > > > > > > +	 * case 1: The refill head of Rx buffer ring needs
> > > > > > > > > > +to be aligned
> > > > > > with
> > > > > > > > > > +	 * mbuf ring size. In this case, the number of Tx
> freeing buffers
> > > > > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > > > > +	 *
> > > > > > > > > > +	 * case 2: The refill head of Rx ring buffer does
> > > > > > > > > > +not need to be
> > > > > > aligned
> > > > > > > > > > +	 * with mbuf ring size. In this case, the update of
> > > > > > > > > > +refill head
> > > > > > can not
> > > > > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > > > > +	 */
> > > > > > > > > > +	if (refill_requirement != n ||
> > > > > > > > > > +		(!refill_requirement && (refill_head + n >
> > > > > > mbuf_ring_size)))
> > > > > > > > > > +		return 0;
> > > > > > > > > > +
> > > > > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > > > > +	 */
> > > > > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > > > > +	rxep += refill_head;
> > > > > > > > > > +
> > > > > > > > > > +	if (txq->offloads &
> RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > > > > +		/* Avoid txq contains buffers from unexpected
> > > > > > mempool. */
> > > > > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > > > > +					!= txep[0].mbuf-
> >pool))
> > > > > > > > > > +			return 0;
> > > > > > > > > > +
> > > > > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > > > > +	} else {
> > > > > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > > > > +			rxep[i] =
> > > > > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > +
> > > > > > > > > > +			/* If Tx buffers are not the last
> reference or
> > > > > > from
> > > > > > > > > > +			 * unexpected mempool, previous
> copied
> > > > > > buffers are
> > > > > > > > > > +			 * considered as invalid.
> > > > > > > > > > +			 */
> > > > > > > > > > +			if (unlikely((rxep[i] == NULL &&
> > > > > > refill_requirement) ||
> > > > > > > > > [Konstantin]
> > > > > > > > > Could you pls remind me why it is ok to have
> > > > > > > > > rxep[i]==NULL when refill_requirement is not set?
> > > > > > > > >
> > > > > > > > > If reill_requirement is not zero, it means each tx freed
> > > > > > > > > buffer must be valid and can be put into Rx sw_ring.
> > > > > > > > > Then the refill head of Rx buffer ring can be aligned
> > > > > > > > > with mbuf ring size. Briefly speaking the number
> > > > > > > > of Tx valid freed buffer must be equal to Rx refill_requirement.
> > > > > > > > For example, i40e driver.
> > > > > > > > >
> > > > > > > > > If reill_requirement is zero, it means that the refill
> > > > > > > > > head of Rx buffer ring does not need to be aligned with
> > > > > > > > > mbuf ring size, thus if Tx have n valid freed buffers,
> > > > > > > > > we just need to put these n buffers into Rx sw-
> > > > > > > > ring, and not to be equal to the Rx setting rearm number.
> > > > > > > > For example, mlx5 driver.
> > > > > > > > >
> > > > > > > > > In conclusion, above difference is due to pmd drivers
> > > > > > > > > have different
> > > > > > > > strategies to update their Rx rearm(refill) head.
> > > > > > > > > For i40e driver, if rearm_head exceed 1024, it will be
> > > > > > > > > set as
> > > > > > > > > 0 due to  the
> > > > > > > > number of each rearm is a fixed value by default.
> > > > > > > > > For mlx5 driver. Its rearm_head can exceed 1024, and use
> > > > > > > > > mask to achieve
> > > > > > > > real index. Thus its rearm number can be a different value.
> > > > > > > >
> > > > > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it means
> > > > > > > > that this mbuf is not free yet and can't be reused.
> > > > > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > > > > > > Or probably would be enough to skip that mbuf?
> > > > > > > > Might be something like that:
> > > > > > > >
> > > > > > > > for (i = 0, j = 0; i < n; i++) {
> > > > > > > >
> > > > > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp !=
> > > > > > > > rxep[j].mbuf-
> > >pool))  {
> > > > > > > > 		if (refill_requirement) {
> > > > > > > > 			nb_recycle_mbufs = 0;
> > > > > > > > 			break;
> > > > > > > > 		}
> > > > > > > > 	} else
> > > > > > > > 		j++;
> > > > > > > > }
> > > > > > > >
> > > > > > > > /* now j contains actual number of recycled mbufs */
> > > > > > > >
> > > > > > > > ?
> > > > > > > >
> > > > > > > > >
> > > > > > > > > > +					recycle_rxq_info-
> >mp !=
> > > > > > txep[i].mbuf-
> > > > > > > > > > >pool))
> > > > > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > > > > +		}
> > > > > > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > > > > > +		 * from unexpected mempool, all recycled
> buffers
> > > > > > > > > > +		 * are put into mempool.
> > > > > > > > > > +		 */
> > > > > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > > > > +				if (rxep[i] != NULL)
> > > > > > > > > > +
> 	rte_mempool_put(rxep[i]-
> > > > > > >pool,
> > > > > > > > > > rxep[i]);
> > > > > > > > > > +			}
> > > > > > > > > > +	}
> > > > > > > > > > +
> > > > > > > [Konstantin] After another thought, it might be easier and
> > > > > > > cleaner
> > just to:
> > > > > > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > > > > > > 	nb_recycle_mbufs = 0;
> > > > > > >
> > > > > > > Anyway, from my understanding - if
> > > > > > > rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't
> > > > > > > recycle
> > that mbuf.
> > > > > > >
> > > > > > > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf)
> > > > > > > returns NULL, then
> > > > > > we can't recycle that mbuf'.
> > > > > > >
> > > > > > > Firstly, we should know for i40e driver,  the number of free
> > > > > > > mbufs is fixed, it
> > > > > > must equal to 'tx_rs_thresh'
> > > > > > > This means if we start to free Tx mbufs, it cannot be
> > > > > > > interrupted until the
> > > > > > given amount of mbufs are freed.
> > > > > > > In the meanwhile, doing prefree operation for a Tx mbuf can
> > > > > > > be looked at this mbuf is freed from this TX sw-ring  if the
> > > > > > > API returns NULL. This is due
> > > > > > to that call 'prefree_seg' function means update the mbuf refcnt.
> > > > > > >
> > > > > > > So let's come back our recycle mbuf case.
> > > > > > > Here we consider that the case if we have 32 mbufs need to
> > > > > > > be freed, and
> > > > > > we firstly do the pre-free.
> > > > > > > And then first 8 mbufs is good and return value is not none.
> > > > > > > But the 9th
> > > > > > mbuf is bad, its refcnt is more than 1.
> > > > > > > So we just update its refcnt and cannot put it into Rx sw-ring.
> > > > > > > For Tx sw-ring,
> > > > > > this mbuf has been freed.
> > > > > > > Then we should continue to do pre-free operation for the
> > > > > > > next Tx mbufs to ensure the given amount of mbufs are freed.
> > > > > > >
> > > > > > > Do a conclusion for this, this is because if we start to do
> > > > > > > pre-free operation,  the Tx mbuf refcnt value maybe changed,
> > > > > > > so we cannot stop or
> > > > > > break until finish all the pre-free operation.
> > > > > > >
> > > > > > > Finally, in the case that Rx refill_request is not zero, but
> > > > > > > the valid mbuf amount is less than this value, we must put
> > > > > > > back this Tx mbufs into
> > > > > > mempool.
> > > > > > >
> > > > > > > Above is the reason why I do not directly jump out of the
> > > > > > > loop if some mbuf
> > > > > > return value is NULL.
> > > > > >
> > > > > > Yep, I already realized that it is a bit more complicated and
> > > > > > we need to continue with prefree() for all packets even when
> > > > > > we get NULL in
> > > > the middle.
> > > > > > Anyway the code has to be changed, right?
> > > > > >
> > > > > Sorry, I think for the code, it is unnecessary to be changed.
> > > > > For no fast free path, currently:
> > > > > 1.   We check whether each mbuf is Ok and call pre_free function
> > > > > ----------------------------------------------------------------
> > > > > --
> > > > > ----
> > > > > --------------------------------------------------------------
> > > > > 2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
> > > > > 2.2 For the mbuf return value is zero and refill-request, it
> > > > > will also firstly put into Rx sw-ring, and we set nb_recycle = 0
> > > > > ----------------------------------------------------------------
> > > > > --
> > > > > ----
> > > > > --------------------------------------------------------------
> > > > > 3.1 We check nb_recycle, if it is not 0, we will continue to
> > > > > rearm Rx descs
> > > > and update its index by call descs_refill function.
> > > > > 3.2 if nb_recycle is 0, we will put valid recycle mbufs back
> > > > > into mempool as general path. This is necessary, because we need
> > > > > to ensure the freed Tx number is fixed.(Some buffers return is
> > > > > null can be seen as freed, others need to be put into mempool)
> > > > >
> > > > > Or maybe I ignore something?
> > > >
> > > >
> > > > I am talking about the case when both refill_requirement and mbuf
> > > > return values iare zero:
> > > > if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???  rxep[i]
> ==
> > 0
> > > > AND refill_requirement == 0 ???
> > > >  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > > 	nb_recycle_mbufs = 0;
> > > >
> > > > As I can read the code you treat such situation as valid, while I
> > > > think we should reset nb_recycle_mbufs to zero when rxep[i] ==
> > > > NULL, no matter what value refill_requirement is.
> > >
> > > So this means for maybe MLX5 driver, its refill_request = 0. And if
> > > some mbufs return value is zero, the other mbufs can not be recycled
> > > into Rx sw-ring? Because nb_recycle=0, and they need to be put into
> > mempool.
> > >
> > > I think for such as MLX5 driver, we can allow recycle some valid
> > > mbufs into
> > Rx ring.
> > > Because no constraint for its refill number. Is my suggestion reasonable?
> >
> > I suppose yes: if refill_request is zero we potentially can skip 'bad'
> > mbufs and continue with recycling for remaining ones.
> > It would probably require more changes in current code, but sounds ok
> > to me in general.
> That's Ok. Thanks for your careful reviewing.

I'm very sorry not to receive your e-mail and until now I realize we need to do some code change
for i40e driver. Also thanks ferruh to kindly remind this.

Agree with you we need some operation for the case that (refill_requirement == 0 &&  rxep[i] == 0).
Thus maybe we can do a change as follows:

for (i = 0; i < n; i++) {
	rxep[0] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
	if (unlikely((rxep[0] == NULL && refill_requirement) ||
			recycle_rxq_info->mp != txep[i].mbuf->pool))
		nb_recycle_mbufs = 0;
	if (likely(rxep[0]))
		rxep++;
}	

Is above change is Ok?

> >
> > > >
> > > >
> > > > > > >
> > > > > > > > > > +	/* Update counters for Tx. */
> > > > > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq-
> > > > > > >tx_rs_thresh);
> > > > > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq-
> > > > > > >tx_rs_thresh);
> > > > > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > > > > +		txq->tx_next_dd = (uint16_t)(txq-
> >tx_rs_thresh -
> > > > > > > > > > +1);
> > > > > > > > > > +
> > > > > > > > > > +	return nb_recycle_mbufs; }
> > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.c index
> > > > > > > > > > b4f65b58fa..a9c9eb331c
> > > > > > > > > > 100644
> > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct
> > > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > > > > >
> > > > > > > > > > +void
> > > > > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > > +uint16_t
> > > > > > queue_id,
> > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > > > > +	struct i40e_adapter *ad =
> > > > > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > > > > > >dev_private);
> > > > > > > > > > +
> > > > > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > > > > +
> > > > > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > > > > > > +
> > > > > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> >rxrearm_start;
> > > > > > > > > > +	} else {
> > > > > > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > > > > > >rx_free_thresh;
> > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> >rx_free_trigger;
> > > > > > > > > > +	}
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >  #ifdef RTE_ARCH_X86
> > > > > > > > > >  static inline bool
> > > > > > > > > >  get_avx_supported(bool request_avx512) @@ -3293,6
> > > > > > > > > > +3317,8
> > > > @@
> > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > >rx_use_avx2 ?
> > > > > > > > > >
> > > > 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > > > > >
> 	i40e_recv_scattered_pkts_vec;
> > > > > > > > > > +				dev-
> >recycle_rx_descriptors_refill =
> > > > > > > > > > +
> > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > >  			}
> > > > > > > > > >  		} else {
> > > > > > > > > >  			if (ad->rx_use_avx512) { @@ -3311,9
> +3337,12
> > @@
> > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev
> > > > > > *dev)
> > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > >rx_use_avx2 ?
> > > > > > > > > >
> 	i40e_recv_pkts_vec_avx2 :
> > > > > > > > > >  					i40e_recv_pkts_vec;
> > > > > > > > > > +				dev-
> >recycle_rx_descriptors_refill =
> > > > > > > > > > +
> > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > >  			}
> > > > > > > > > >  		}
> > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > > > > >  				     "Using Vector Scattered Rx
> > > > (port %d).", @@
> > > > > > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct
> > > > > > > > > > rte_eth_dev
> > > > *dev)
> > > > > > > > > >  				dev->tx_pkt_burst = ad-
> > > > >tx_use_avx2 ?
> > > > > > > > > >
> > > > i40e_xmit_pkts_vec_avx2 :
> > > > > > > > > >
> > > > i40e_xmit_pkts_vec;
> > > > > > > > > > +				dev->recycle_tx_mbufs_reuse
> =
> > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > >  			}
> > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector
> Tx
> > > > (port %d).",
> > > > > > > > > >  				     dev->data->port_id);
> > > > > > > > > >  			dev->tx_pkt_burst =
> i40e_xmit_pkts_vec;
> > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /* RTE_ARCH_X86
> */
> > > > > > > > > >  		} else {
> > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx
> finally be
> > > > used.");
> > > > > > > > > >  			dev->tx_pkt_burst =
> i40e_xmit_pkts_simple;
> > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > >  		}
> > > > > > > > > >  		dev->tx_pkt_prepare =
> i40e_simple_prep_pkts;
> > > > > > > > > >  	} else {
> > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.h index
> > > > > > > > > > a8686224e5..b191f23e1f
> > > > > > > > > > 100644
> > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > @@ -236,6 +236,10 @@ uint32_t
> > > > > > > > > > i40e_dev_rx_queue_count(void *rx_queue);  int
> > > > > > > > > > i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t
> > > > > > > > > > offset);  int i40e_dev_tx_descriptor_status(void
> > > > > > > > > > *tx_queue, uint16_t offset);
> > > > > > > > > >
> > > > > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > > +		struct rte_eth_recycle_rxq_info
> *recycle_rxq_info);
> > > > > > void
> > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > +*rx_queue, uint16_t nb_mbufs);
> > > > > > > > > > +
> > > > > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct
> > > > > > > > > > rte_mbuf
> > > > > > **rx_pkts,
> > > > > > > > > >  			    uint16_t nb_pkts);  uint16_t
> > > > > > > > > > i40e_recv_scattered_pkts_vec(void *rx_queue, diff
> > > > > > > > > > --git a/drivers/net/i40e/meson.build
> > > > > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > > > > >          'i40e_tm.c',
> > > > > > > > > >          'i40e_hash.c',
> > > > > > > > > >          'i40e_vf_representor.c',
> > > > > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > > > > >          'rte_pmd_i40e.c',
> > > > > > > > > >  )
> > > > > > > > > >
> > > > > > > > > > --
> > > > > > > > > > 2.25.1
  
Feifei Wang Sept. 22, 2023, 3:46 p.m. UTC | #13
> -----Original Message-----
> From: Feifei Wang
> Sent: Friday, September 22, 2023 10:59 PM
> To: Konstantin Ananyev <konstantin.ananyev@huawei.com>; Konstantin
> Ananyev <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> Hi, Konstantin
> 
> > -----Original Message-----
> > From: Feifei Wang
> > Sent: Tuesday, September 5, 2023 11:11 AM
> > To: Konstantin Ananyev <konstantin.ananyev@huawei.com>; Konstantin
> > Ananyev <konstantin.v.ananyev@yandex.ru>
> > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>;
> > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> >
> >
> >
> > > -----Original Message-----
> > > From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> > > Sent: Monday, September 4, 2023 6:22 PM
> > > To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> > > <konstantin.v.ananyev@yandex.ru>
> > > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> > <Ruifeng.Wang@arm.com>;
> > > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > > <nd@arm.com>; nd <nd@arm.com>
> > > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> > >
> > >
> > >
> > > > > > > > > > > Define specific function implementation for i40e driver.
> > > > > > > > > > > Currently, mbufs recycle mode can support 128bit
> > > > > > > > > > > vector path and
> > > > > > > > > > > avx2
> > > > > > > > > path.
> > > > > > > > > > > And can be enabled both in fast free and no fast free mode.
> > > > > > > > > > >
> > > > > > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > ---
> > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > > > > > > > ++++++++++++++++++
> > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > > > > > >  6 files changed, 187 insertions(+)  create mode
> > > > > > > > > > > 100644
> > > > > > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > > > > > 8271bbb394..50ba9aac94
> > > > > > > > > > > 100644
> > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > > > > > > > i40e_eth_dev_ops
> > > > > > > > > = {
> > > > > > > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > > > > > +	.recycle_rxq_info_get         =
> > i40e_recycle_rxq_info_get,
> > > > > > > > > > >  	.rx_burst_mode_get            =
> > i40e_rx_burst_mode_get,
> > > > > > > > > > >  	.tx_burst_mode_get            =
> > i40e_tx_burst_mode_get,
> > > > > > > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > > > > > 100644
> > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct
> > > > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > > > > > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev
> > > > > > > > > > > +*dev, uint16_t
> > > > > > > > > queue_id,
> > > > > > > > > > > +	struct rte_eth_recycle_rxq_info
> > > > > > > > > > > +*recycle_rxq_info);
> > > > > > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > > > uint16_t
> > > > > > > queue_id,
> > > > > > > > > > >  			   struct rte_eth_burst_mode *mode);
> > int
> > > > > > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > > > uint16_t queue_id, diff -- git
> > > > > > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > > new file mode 100644 index 0000000000..5663ecccde
> > > > > > > > > > > --- /dev/null
> > > > > > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common
> > > > > > > > > > > +++ .c
> > > > > > > > > > > @@ -0,0 +1,147 @@
> > > > > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > > > > > + */
> > > > > > > > > > > +
> > > > > > > > > > > +#include <stdint.h> #include <ethdev_driver.h>
> > > > > > > > > > > +
> > > > > > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > > > > > +#include "base/i40e_type.h"
> > > > > > > > > > > +#include "i40e_ethdev.h"
> > > > > > > > > > > +#include "i40e_rxtx.h"
> > > > > > > > > > > +
> > > > > > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > > > > > +
> > > > > > > > > > > +void
> > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > +*rx_queue, uint16_t
> > > > > > > > > > > +nb_mbufs) {
> > > > > > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > > > > > +	uint16_t rx_id;
> > > > > > > > > > > +	uint64_t paddr;
> > > > > > > > > > > +	uint64_t dma_addr;
> > > > > > > > > > > +	uint16_t i;
> > > > > > > > > > > +
> > > > > > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > > > > > +
> > > > > > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > > > > > +	}
> > > > > > > > > > > +
> > > > > > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > > > > > +
> > > > > > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > > > > > +	}
> > > > > > > > > > > +
> > > > > > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > > > > > +
> > > > > > > > > > > +	rte_io_wmb();
> > > > > > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
> > > > > > > > > > > +}
> > > > > > > > > > > +
> > > > > > > > > > > +uint16_t
> > > > > > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > > > > > +	int i, n;
> > > > > > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > > > > > +	uint16_t avail = 0;
> > > > > > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info-
> > >mbuf_ring_size;
> > > > > > > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > > > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> > > > > > > >refill_requirement;
> > > > > > > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > > > > > > +	uint16_t receive_tail =
> > > > > > > > > > > +*recycle_rxq_info->receive_tail;
> > > > > > > > > > > +
> > > > > > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > > > > > +	avail = (mbuf_ring_size - (refill_head -
> > > > > > > > > > > +receive_tail)) & mask;
> > > > > > > > > > > +
> > > > > > > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail
> > > > > > > > > > > +<=
> > > > > > > > > > > +txq-
> > > > > > > >tx_rs_thresh)
> > > > > > > > > > > +		return 0;
> > > > > > > > > > > +
> > > > > > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > > > > > +	if
> > > > > > > > > > > +((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz
> > > > > > > > > > > +&
> > > > > > > > > > > +
> > > > > > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > > > > > +
> > > > > > > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > > > > > > +		return 0;
> > > > > > > > > > > +
> > > > > > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > > > > > +
> > > > > > > > > > > +	/* Mbufs recycle mode can only support no ring
> > > > > > > > > > > +buffer
> > > > > > > wrapping
> > > > > > > > > > > around.
> > > > > > > > > > > +	 * Two case for this:
> > > > > > > > > > > +	 *
> > > > > > > > > > > +	 * case 1: The refill head of Rx buffer ring needs
> > > > > > > > > > > +to be aligned
> > > > > > > with
> > > > > > > > > > > +	 * mbuf ring size. In this case, the number of Tx
> > freeing buffers
> > > > > > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > > > > > +	 *
> > > > > > > > > > > +	 * case 2: The refill head of Rx ring buffer does
> > > > > > > > > > > +not need to be
> > > > > > > aligned
> > > > > > > > > > > +	 * with mbuf ring size. In this case, the update
> > > > > > > > > > > +of refill head
> > > > > > > can not
> > > > > > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > > > > > +	 */
> > > > > > > > > > > +	if (refill_requirement != n ||
> > > > > > > > > > > +		(!refill_requirement && (refill_head + n >
> > > > > > > mbuf_ring_size)))
> > > > > > > > > > > +		return 0;
> > > > > > > > > > > +
> > > > > > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > > > > > +	 */
> > > > > > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > > > > > +	rxep += refill_head;
> > > > > > > > > > > +
> > > > > > > > > > > +	if (txq->offloads &
> > RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > > > > > +		/* Avoid txq contains buffers from unexpected
> > > > > > > mempool. */
> > > > > > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > > > > > +					!= txep[0].mbuf-
> > >pool))
> > > > > > > > > > > +			return 0;
> > > > > > > > > > > +
> > > > > > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > > > > > +	} else {
> > > > > > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > > > > > +			rxep[i] =
> > > > > > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > > +
> > > > > > > > > > > +			/* If Tx buffers are not the last
> > reference or
> > > > > > > from
> > > > > > > > > > > +			 * unexpected mempool, previous
> > copied
> > > > > > > buffers are
> > > > > > > > > > > +			 * considered as invalid.
> > > > > > > > > > > +			 */
> > > > > > > > > > > +			if (unlikely((rxep[i] == NULL &&
> > > > > > > refill_requirement) ||
> > > > > > > > > > [Konstantin]
> > > > > > > > > > Could you pls remind me why it is ok to have
> > > > > > > > > > rxep[i]==NULL when refill_requirement is not set?
> > > > > > > > > >
> > > > > > > > > > If reill_requirement is not zero, it means each tx
> > > > > > > > > > freed buffer must be valid and can be put into Rx sw_ring.
> > > > > > > > > > Then the refill head of Rx buffer ring can be aligned
> > > > > > > > > > with mbuf ring size. Briefly speaking the number
> > > > > > > > > of Tx valid freed buffer must be equal to Rx refill_requirement.
> > > > > > > > > For example, i40e driver.
> > > > > > > > > >
> > > > > > > > > > If reill_requirement is zero, it means that the refill
> > > > > > > > > > head of Rx buffer ring does not need to be aligned
> > > > > > > > > > with mbuf ring size, thus if Tx have n valid freed
> > > > > > > > > > buffers, we just need to put these n buffers into Rx
> > > > > > > > > > sw-
> > > > > > > > > ring, and not to be equal to the Rx setting rearm number.
> > > > > > > > > For example, mlx5 driver.
> > > > > > > > > >
> > > > > > > > > > In conclusion, above difference is due to pmd drivers
> > > > > > > > > > have different
> > > > > > > > > strategies to update their Rx rearm(refill) head.
> > > > > > > > > > For i40e driver, if rearm_head exceed 1024, it will be
> > > > > > > > > > set as
> > > > > > > > > > 0 due to  the
> > > > > > > > > number of each rearm is a fixed value by default.
> > > > > > > > > > For mlx5 driver. Its rearm_head can exceed 1024, and
> > > > > > > > > > use mask to achieve
> > > > > > > > > real index. Thus its rearm number can be a different value.
> > > > > > > > >
> > > > > > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it
> > > > > > > > > means that this mbuf is not free yet and can't be reused.
> > > > > > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > > > > > > > Or probably would be enough to skip that mbuf?
> > > > > > > > > Might be something like that:
> > > > > > > > >
> > > > > > > > > for (i = 0, j = 0; i < n; i++) {
> > > > > > > > >
> > > > > > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp !=
> > > > > > > > > rxep[j].mbuf-
> > > >pool))  {
> > > > > > > > > 		if (refill_requirement) {
> > > > > > > > > 			nb_recycle_mbufs = 0;
> > > > > > > > > 			break;
> > > > > > > > > 		}
> > > > > > > > > 	} else
> > > > > > > > > 		j++;
> > > > > > > > > }
> > > > > > > > >
> > > > > > > > > /* now j contains actual number of recycled mbufs */
> > > > > > > > >
> > > > > > > > > ?
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > +					recycle_rxq_info-
> > >mp !=
> > > > > > > txep[i].mbuf-
> > > > > > > > > > > >pool))
> > > > > > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > > > > > +		}
> > > > > > > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > > > > > > +		 * from unexpected mempool, all recycled
> > buffers
> > > > > > > > > > > +		 * are put into mempool.
> > > > > > > > > > > +		 */
> > > > > > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > > > > > +				if (rxep[i] != NULL)
> > > > > > > > > > > +
> > 	rte_mempool_put(rxep[i]-
> > > > > > > >pool,
> > > > > > > > > > > rxep[i]);
> > > > > > > > > > > +			}
> > > > > > > > > > > +	}
> > > > > > > > > > > +
> > > > > > > > [Konstantin] After another thought, it might be easier and
> > > > > > > > cleaner
> > > just to:
> > > > > > > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > > > > > > > 	nb_recycle_mbufs = 0;
> > > > > > > >
> > > > > > > > Anyway, from my understanding - if
> > > > > > > > rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't
> > > > > > > > recycle
> > > that mbuf.
> > > > > > > >
> > > > > > > > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf)
> > > > > > > > returns NULL, then
> > > > > > > we can't recycle that mbuf'.
> > > > > > > >
> > > > > > > > Firstly, we should know for i40e driver,  the number of
> > > > > > > > free mbufs is fixed, it
> > > > > > > must equal to 'tx_rs_thresh'
> > > > > > > > This means if we start to free Tx mbufs, it cannot be
> > > > > > > > interrupted until the
> > > > > > > given amount of mbufs are freed.
> > > > > > > > In the meanwhile, doing prefree operation for a Tx mbuf
> > > > > > > > can be looked at this mbuf is freed from this TX sw-ring
> > > > > > > > if the API returns NULL. This is due
> > > > > > > to that call 'prefree_seg' function means update the mbuf refcnt.
> > > > > > > >
> > > > > > > > So let's come back our recycle mbuf case.
> > > > > > > > Here we consider that the case if we have 32 mbufs need to
> > > > > > > > be freed, and
> > > > > > > we firstly do the pre-free.
> > > > > > > > And then first 8 mbufs is good and return value is not none.
> > > > > > > > But the 9th
> > > > > > > mbuf is bad, its refcnt is more than 1.
> > > > > > > > So we just update its refcnt and cannot put it into Rx sw-ring.
> > > > > > > > For Tx sw-ring,
> > > > > > > this mbuf has been freed.
> > > > > > > > Then we should continue to do pre-free operation for the
> > > > > > > > next Tx mbufs to ensure the given amount of mbufs are freed.
> > > > > > > >
> > > > > > > > Do a conclusion for this, this is because if we start to
> > > > > > > > do pre-free operation,  the Tx mbuf refcnt value maybe
> > > > > > > > changed, so we cannot stop or
> > > > > > > break until finish all the pre-free operation.
> > > > > > > >
> > > > > > > > Finally, in the case that Rx refill_request is not zero,
> > > > > > > > but the valid mbuf amount is less than this value, we must
> > > > > > > > put back this Tx mbufs into
> > > > > > > mempool.
> > > > > > > >
> > > > > > > > Above is the reason why I do not directly jump out of the
> > > > > > > > loop if some mbuf
> > > > > > > return value is NULL.
> > > > > > >
> > > > > > > Yep, I already realized that it is a bit more complicated
> > > > > > > and we need to continue with prefree() for all packets even
> > > > > > > when we get NULL in
> > > > > the middle.
> > > > > > > Anyway the code has to be changed, right?
> > > > > > >
> > > > > > Sorry, I think for the code, it is unnecessary to be changed.
> > > > > > For no fast free path, currently:
> > > > > > 1.   We check whether each mbuf is Ok and call pre_free function
> > > > > > --------------------------------------------------------------
> > > > > > --
> > > > > > --
> > > > > > ----
> > > > > > --------------------------------------------------------------
> > > > > > 2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
> > > > > > 2.2 For the mbuf return value is zero and refill-request, it
> > > > > > will also firstly put into Rx sw-ring, and we set nb_recycle =
> > > > > > 0
> > > > > > --------------------------------------------------------------
> > > > > > --
> > > > > > --
> > > > > > ----
> > > > > > --------------------------------------------------------------
> > > > > > 3.1 We check nb_recycle, if it is not 0, we will continue to
> > > > > > rearm Rx descs
> > > > > and update its index by call descs_refill function.
> > > > > > 3.2 if nb_recycle is 0, we will put valid recycle mbufs back
> > > > > > into mempool as general path. This is necessary, because we
> > > > > > need to ensure the freed Tx number is fixed.(Some buffers
> > > > > > return is null can be seen as freed, others need to be put
> > > > > > into mempool)
> > > > > >
> > > > > > Or maybe I ignore something?
> > > > >
> > > > >
> > > > > I am talking about the case when both refill_requirement and
> > > > > mbuf return values iare zero:
> > > > > if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???
> rxep[i]
> > ==
> > > 0
> > > > > AND refill_requirement == 0 ???
> > > > >  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > > > 	nb_recycle_mbufs = 0;
> > > > >
> > > > > As I can read the code you treat such situation as valid, while
> > > > > I think we should reset nb_recycle_mbufs to zero when rxep[i] ==
> > > > > NULL, no matter what value refill_requirement is.
> > > >
> > > > So this means for maybe MLX5 driver, its refill_request = 0. And
> > > > if some mbufs return value is zero, the other mbufs can not be
> > > > recycled into Rx sw-ring? Because nb_recycle=0, and they need to
> > > > be put into
> > > mempool.
> > > >
> > > > I think for such as MLX5 driver, we can allow recycle some valid
> > > > mbufs into
> > > Rx ring.
> > > > Because no constraint for its refill number. Is my suggestion reasonable?
> > >
> > > I suppose yes: if refill_request is zero we potentially can skip 'bad'
> > > mbufs and continue with recycling for remaining ones.
> > > It would probably require more changes in current code, but sounds
> > > ok to me in general.
> > That's Ok. Thanks for your careful reviewing.
> 
> I'm very sorry not to receive your e-mail and until now I realize we need to do
> some code change for i40e driver. Also thanks ferruh to kindly remind this.
> 
> Agree with you we need some operation for the case that (refill_requirement
> == 0 &&  rxep[i] == 0).
> Thus maybe we can do a change as follows:
> 
> for (i = 0; i < n; i++) {
> 	rxep[0] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> 	if (unlikely((rxep[0] == NULL && refill_requirement) ||
> 			recycle_rxq_info->mp != txep[i].mbuf->pool))
> 		nb_recycle_mbufs = 0;
> 	if (likely(rxep[0]))
> 		rxep++;
> }
> 
> Is above change is Ok?
> 

Just do a change for the above code:
-----------------------------------------------------------------------------------------------------
int j = 0;
......

 for (i = 0; i < n; i++) {
 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
	if (unlikely(rxep[j] == NULL  && !refill_requirement))
 		continue;
	if (unlikely((rxep[j] == NULL && refill_requirement) ||
 			recycle_rxq_info->mp != txep[i].mbuf->pool))
 		nb_recycle_mbufs  = 0;
	j++;
 }

if (nb_recycle_mbufs  == 0)
	for (i = 0; i < j; i++) {
		if (rxep[i] != NULL)
			rte_mempool_put(rxep[i]->pool, rxep[i]);
	}
---------------------------------------------------------------------------------------------------------
> > >
> > > > >
> > > > >
> > > > > > > >
> > > > > > > > > > > +	/* Update counters for Tx. */
> > > > > > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free +
> > > > > > > > > > > +txq-
> > > > > > > >tx_rs_thresh);
> > > > > > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd +
> > > > > > > > > > > +txq-
> > > > > > > >tx_rs_thresh);
> > > > > > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > > > > > +		txq->tx_next_dd = (uint16_t)(txq-
> > >tx_rs_thresh -
> > > > > > > > > > > +1);
> > > > > > > > > > > +
> > > > > > > > > > > +	return nb_recycle_mbufs; }
> > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.c index
> > > > > > > > > > > b4f65b58fa..a9c9eb331c
> > > > > > > > > > > 100644
> > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct
> > > > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > > > > > >
> > > > > > > > > > > +void
> > > > > > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > > > +uint16_t
> > > > > > > queue_id,
> > > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > > > > > +	struct i40e_adapter *ad =
> > > > > > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > > > > > > >dev_private);
> > > > > > > > > > > +
> > > > > > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > > > > > +
> > > > > > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > > > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > > > > > > > +
> > > > > > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > >rxrearm_start;
> > > > > > > > > > > +	} else {
> > > > > > > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > > > > > > >rx_free_thresh;
> > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > >rx_free_trigger;
> > > > > > > > > > > +	}
> > > > > > > > > > > +}
> > > > > > > > > > > +
> > > > > > > > > > >  #ifdef RTE_ARCH_X86  static inline bool
> > > > > > > > > > > get_avx_supported(bool request_avx512) @@ -3293,6
> > > > > > > > > > > +3317,8
> > > > > @@
> > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > > >rx_use_avx2 ?
> > > > > > > > > > >
> > > > > 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > > > > > >
> > 	i40e_recv_scattered_pkts_vec;
> > > > > > > > > > > +				dev-
> > >recycle_rx_descriptors_refill =
> > > > > > > > > > > +
> > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > >  			}
> > > > > > > > > > >  		} else {
> > > > > > > > > > >  			if (ad->rx_use_avx512) { @@ -3311,9
> > +3337,12
> > > @@
> > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev
> > > > > > > *dev)
> > > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > > >rx_use_avx2 ?
> > > > > > > > > > >
> > 	i40e_recv_pkts_vec_avx2 :
> > > > > > > > > > >  					i40e_recv_pkts_vec;
> > > > > > > > > > > +				dev-
> > >recycle_rx_descriptors_refill =
> > > > > > > > > > > +
> > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > >  			}
> > > > > > > > > > >  		}
> > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > > > > > >  				     "Using Vector Scattered Rx
> > > > > (port %d).", @@
> > > > > > > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct
> > > > > > > > > > > rte_eth_dev
> > > > > *dev)
> > > > > > > > > > >  				dev->tx_pkt_burst = ad-
> > > > > >tx_use_avx2 ?
> > > > > > > > > > >
> > > > > i40e_xmit_pkts_vec_avx2 :
> > > > > > > > > > >
> > > > > i40e_xmit_pkts_vec;
> > > > > > > > > > > +				dev->recycle_tx_mbufs_reuse
> > =
> > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > >  			}
> > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector
> > Tx
> > > > > (port %d).",
> > > > > > > > > > >  				     dev->data->port_id);
> > > > > > > > > > >  			dev->tx_pkt_burst =
> > i40e_xmit_pkts_vec;
> > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /*
> > > > > > > > > > > RTE_ARCH_X86
> > */
> > > > > > > > > > >  		} else {
> > > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx
> > finally be
> > > > > used.");
> > > > > > > > > > >  			dev->tx_pkt_burst =
> > i40e_xmit_pkts_simple;
> > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > >  		}
> > > > > > > > > > >  		dev->tx_pkt_prepare =
> > i40e_simple_prep_pkts;
> > > > > > > > > > >  	} else {
> > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.h index
> > > > > > > > > > > a8686224e5..b191f23e1f
> > > > > > > > > > > 100644
> > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > @@ -236,6 +236,10 @@ uint32_t
> > > > > > > > > > > i40e_dev_rx_queue_count(void *rx_queue);  int
> > > > > > > > > > > i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > > > > > > > uint16_t offset);  int
> > > > > > > > > > > i40e_dev_tx_descriptor_status(void
> > > > > > > > > > > *tx_queue, uint16_t offset);
> > > > > > > > > > >
> > > > > > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void
> *tx_queue,
> > > > > > > > > > > +		struct rte_eth_recycle_rxq_info
> > *recycle_rxq_info);
> > > > > > > void
> > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > +*rx_queue, uint16_t nb_mbufs);
> > > > > > > > > > > +
> > > > > > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct
> > > > > > > > > > > rte_mbuf
> > > > > > > **rx_pkts,
> > > > > > > > > > >  			    uint16_t nb_pkts);  uint16_t
> > > > > > > > > > > i40e_recv_scattered_pkts_vec(void *rx_queue, diff
> > > > > > > > > > > --git a/drivers/net/i40e/meson.build
> > > > > > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > > > > > >          'i40e_tm.c',
> > > > > > > > > > >          'i40e_hash.c',
> > > > > > > > > > >          'i40e_vf_representor.c',
> > > > > > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > > > > > >          'rte_pmd_i40e.c',
> > > > > > > > > > >  )
> > > > > > > > > > >
> > > > > > > > > > > --
> > > > > > > > > > > 2.25.1
  
Konstantin Ananyev Sept. 22, 2023, 4:40 p.m. UTC | #14
Hi Feifei,

> > > -----Original Message-----
> > > From: Feifei Wang
> > > Sent: Tuesday, September 5, 2023 11:11 AM
> > > To: Konstantin Ananyev <konstantin.ananyev@huawei.com>; Konstantin
> > > Ananyev <konstantin.v.ananyev@yandex.ru>
> > > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> > <Ruifeng.Wang@arm.com>;
> > > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > > <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> > > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> > >
> > >
> > >
> > > > -----Original Message-----
> > > > From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> > > > Sent: Monday, September 4, 2023 6:22 PM
> > > > To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> > > > <konstantin.v.ananyev@yandex.ru>
> > > > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > > > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> > > <Ruifeng.Wang@arm.com>;
> > > > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > > > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > > > <nd@arm.com>; nd <nd@arm.com>
> > > > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> > > >
> > > >
> > > >
> > > > > > > > > > > > Define specific function implementation for i40e driver.
> > > > > > > > > > > > Currently, mbufs recycle mode can support 128bit
> > > > > > > > > > > > vector path and
> > > > > > > > > > > > avx2
> > > > > > > > > > path.
> > > > > > > > > > > > And can be enabled both in fast free and no fast free mode.
> > > > > > > > > > > >
> > > > > > > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > > > > > > > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > > ---
> > > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  | 147
> > > > > > > > > > > > ++++++++++++++++++
> > > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > > > > > > >  6 files changed, 187 insertions(+)  create mode
> > > > > > > > > > > > 100644
> > > > > > > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > > > > > > 8271bbb394..50ba9aac94
> > > > > > > > > > > > 100644
> > > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > @@ -496,6 +496,7 @@ static const struct eth_dev_ops
> > > > > > > > > > > > i40e_eth_dev_ops
> > > > > > > > > > = {
> > > > > > > > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > > > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > > > > > > +	.recycle_rxq_info_get         =
> > > i40e_recycle_rxq_info_get,
> > > > > > > > > > > >  	.rx_burst_mode_get            =
> > > i40e_rx_burst_mode_get,
> > > > > > > > > > > >  	.tx_burst_mode_get            =
> > > i40e_tx_burst_mode_get,
> > > > > > > > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > > > > > > 100644
> > > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > @@ -1355,6 +1355,8 @@ void i40e_rxq_info_get(struct
> > > > > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t
> > queue_id,
> > > > > > > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > > > > > > +void i40e_recycle_rxq_info_get(struct rte_eth_dev
> > > > > > > > > > > > +*dev, uint16_t
> > > > > > > > > > queue_id,
> > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info
> > > > > > > > > > > > +*recycle_rxq_info);
> > > > > > > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > > > > uint16_t
> > > > > > > > queue_id,
> > > > > > > > > > > >  			   struct rte_eth_burst_mode *mode);
> > > int
> > > > > > > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > > > > uint16_t queue_id, diff -- git
> > > > > > > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > > > new file mode 100644 index 0000000000..5663ecccde
> > > > > > > > > > > > --- /dev/null
> > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common
> > > > > > > > > > > > +++ .c
> > > > > > > > > > > > @@ -0,0 +1,147 @@
> > > > > > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > > > > > > + */
> > > > > > > > > > > > +
> > > > > > > > > > > > +#include <stdint.h> #include <ethdev_driver.h>
> > > > > > > > > > > > +
> > > > > > > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > > > > > > +#include "base/i40e_type.h"
> > > > > > > > > > > > +#include "i40e_ethdev.h"
> > > > > > > > > > > > +#include "i40e_rxtx.h"
> > > > > > > > > > > > +
> > > > > > > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > > > > > > +
> > > > > > > > > > > > +void
> > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > > +*rx_queue, uint16_t
> > > > > > > > > > > > +nb_mbufs) {
> > > > > > > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > > > > > > +	uint16_t rx_id;
> > > > > > > > > > > > +	uint64_t paddr;
> > > > > > > > > > > > +	uint64_t dma_addr;
> > > > > > > > > > > > +	uint16_t i;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > > > > > > +
> > > > > > > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > > > > > > +	}
> > > > > > > > > > > > +
> > > > > > > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > > > > > > +	}
> > > > > > > > > > > > +
> > > > > > > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	rte_io_wmb();
> > > > > > > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
> > > > > > > > > > > > +}
> > > > > > > > > > > > +
> > > > > > > > > > > > +uint16_t
> > > > > > > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > > > > > > +	int i, n;
> > > > > > > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > > > > > > +	uint16_t avail = 0;
> > > > > > > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info-
> > > >mbuf_ring_size;
> > > > > > > > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > > > > > > > +	uint16_t refill_requirement = recycle_rxq_info-
> > > > > > > > >refill_requirement;
> > > > > > > > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > > > > > > > +	uint16_t receive_tail =
> > > > > > > > > > > > +*recycle_rxq_info->receive_tail;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > > > > > > +	avail = (mbuf_ring_size - (refill_head -
> > > > > > > > > > > > +receive_tail)) & mask;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh || avail
> > > > > > > > > > > > +<=
> > > > > > > > > > > > +txq-
> > > > > > > > >tx_rs_thresh)
> > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > > > > > > +	if
> > > > > > > > > > > > +((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz
> > > > > > > > > > > > +&
> > > > > > > > > > > > +
> > > > > > > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > > > > > > +
> > > > > > > > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	/* Mbufs recycle mode can only support no ring
> > > > > > > > > > > > +buffer
> > > > > > > > wrapping
> > > > > > > > > > > > around.
> > > > > > > > > > > > +	 * Two case for this:
> > > > > > > > > > > > +	 *
> > > > > > > > > > > > +	 * case 1: The refill head of Rx buffer ring needs
> > > > > > > > > > > > +to be aligned
> > > > > > > > with
> > > > > > > > > > > > +	 * mbuf ring size. In this case, the number of Tx
> > > freeing buffers
> > > > > > > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > > > > > > +	 *
> > > > > > > > > > > > +	 * case 2: The refill head of Rx ring buffer does
> > > > > > > > > > > > +not need to be
> > > > > > > > aligned
> > > > > > > > > > > > +	 * with mbuf ring size. In this case, the update
> > > > > > > > > > > > +of refill head
> > > > > > > > can not
> > > > > > > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > > > > > > +	 */
> > > > > > > > > > > > +	if (refill_requirement != n ||
> > > > > > > > > > > > +		(!refill_requirement && (refill_head + n >
> > > > > > > > mbuf_ring_size)))
> > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > > > > > > +	 */
> > > > > > > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > > > > > > +	rxep += refill_head;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	if (txq->offloads &
> > > RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > > > > > > +		/* Avoid txq contains buffers from unexpected
> > > > > > > > mempool. */
> > > > > > > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > > > > > > +					!= txep[0].mbuf-
> > > >pool))
> > > > > > > > > > > > +			return 0;
> > > > > > > > > > > > +
> > > > > > > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > > > > > > +	} else {
> > > > > > > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > > > > > > +			rxep[i] =
> > > > > > > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > > > +
> > > > > > > > > > > > +			/* If Tx buffers are not the last
> > > reference or
> > > > > > > > from
> > > > > > > > > > > > +			 * unexpected mempool, previous
> > > copied
> > > > > > > > buffers are
> > > > > > > > > > > > +			 * considered as invalid.
> > > > > > > > > > > > +			 */
> > > > > > > > > > > > +			if (unlikely((rxep[i] == NULL &&
> > > > > > > > refill_requirement) ||
> > > > > > > > > > > [Konstantin]
> > > > > > > > > > > Could you pls remind me why it is ok to have
> > > > > > > > > > > rxep[i]==NULL when refill_requirement is not set?
> > > > > > > > > > >
> > > > > > > > > > > If reill_requirement is not zero, it means each tx
> > > > > > > > > > > freed buffer must be valid and can be put into Rx sw_ring.
> > > > > > > > > > > Then the refill head of Rx buffer ring can be aligned
> > > > > > > > > > > with mbuf ring size. Briefly speaking the number
> > > > > > > > > > of Tx valid freed buffer must be equal to Rx refill_requirement.
> > > > > > > > > > For example, i40e driver.
> > > > > > > > > > >
> > > > > > > > > > > If reill_requirement is zero, it means that the refill
> > > > > > > > > > > head of Rx buffer ring does not need to be aligned
> > > > > > > > > > > with mbuf ring size, thus if Tx have n valid freed
> > > > > > > > > > > buffers, we just need to put these n buffers into Rx
> > > > > > > > > > > sw-
> > > > > > > > > > ring, and not to be equal to the Rx setting rearm number.
> > > > > > > > > > For example, mlx5 driver.
> > > > > > > > > > >
> > > > > > > > > > > In conclusion, above difference is due to pmd drivers
> > > > > > > > > > > have different
> > > > > > > > > > strategies to update their Rx rearm(refill) head.
> > > > > > > > > > > For i40e driver, if rearm_head exceed 1024, it will be
> > > > > > > > > > > set as
> > > > > > > > > > > 0 due to  the
> > > > > > > > > > number of each rearm is a fixed value by default.
> > > > > > > > > > > For mlx5 driver. Its rearm_head can exceed 1024, and
> > > > > > > > > > > use mask to achieve
> > > > > > > > > > real index. Thus its rearm number can be a different value.
> > > > > > > > > >
> > > > > > > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it
> > > > > > > > > > means that this mbuf is not free yet and can't be reused.
> > > > > > > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case too?
> > > > > > > > > > Or probably would be enough to skip that mbuf?
> > > > > > > > > > Might be something like that:
> > > > > > > > > >
> > > > > > > > > > for (i = 0, j = 0; i < n; i++) {
> > > > > > > > > >
> > > > > > > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp !=
> > > > > > > > > > rxep[j].mbuf-
> > > > >pool))  {
> > > > > > > > > > 		if (refill_requirement) {
> > > > > > > > > > 			nb_recycle_mbufs = 0;
> > > > > > > > > > 			break;
> > > > > > > > > > 		}
> > > > > > > > > > 	} else
> > > > > > > > > > 		j++;
> > > > > > > > > > }
> > > > > > > > > >
> > > > > > > > > > /* now j contains actual number of recycled mbufs */
> > > > > > > > > >
> > > > > > > > > > ?
> > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > +					recycle_rxq_info-
> > > >mp !=
> > > > > > > > txep[i].mbuf-
> > > > > > > > > > > > >pool))
> > > > > > > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > > > > > > +		}
> > > > > > > > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > > > > > > > +		 * from unexpected mempool, all recycled
> > > buffers
> > > > > > > > > > > > +		 * are put into mempool.
> > > > > > > > > > > > +		 */
> > > > > > > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > > > > > > +				if (rxep[i] != NULL)
> > > > > > > > > > > > +
> > > 	rte_mempool_put(rxep[i]-
> > > > > > > > >pool,
> > > > > > > > > > > > rxep[i]);
> > > > > > > > > > > > +			}
> > > > > > > > > > > > +	}
> > > > > > > > > > > > +
> > > > > > > > > [Konstantin] After another thought, it might be easier and
> > > > > > > > > cleaner
> > > > just to:
> > > > > > > > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf->pool)
> > > > > > > > > 	nb_recycle_mbufs = 0;
> > > > > > > > >
> > > > > > > > > Anyway, from my understanding - if
> > > > > > > > > rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we can't
> > > > > > > > > recycle
> > > > that mbuf.
> > > > > > > > >
> > > > > > > > > [Feifei] Agree with that 'rte_pktmbuf_prefree_seg(mbuf)
> > > > > > > > > returns NULL, then
> > > > > > > > we can't recycle that mbuf'.
> > > > > > > > >
> > > > > > > > > Firstly, we should know for i40e driver,  the number of
> > > > > > > > > free mbufs is fixed, it
> > > > > > > > must equal to 'tx_rs_thresh'
> > > > > > > > > This means if we start to free Tx mbufs, it cannot be
> > > > > > > > > interrupted until the
> > > > > > > > given amount of mbufs are freed.
> > > > > > > > > In the meanwhile, doing prefree operation for a Tx mbuf
> > > > > > > > > can be looked at this mbuf is freed from this TX sw-ring
> > > > > > > > > if the API returns NULL. This is due
> > > > > > > > to that call 'prefree_seg' function means update the mbuf refcnt.
> > > > > > > > >
> > > > > > > > > So let's come back our recycle mbuf case.
> > > > > > > > > Here we consider that the case if we have 32 mbufs need to
> > > > > > > > > be freed, and
> > > > > > > > we firstly do the pre-free.
> > > > > > > > > And then first 8 mbufs is good and return value is not none.
> > > > > > > > > But the 9th
> > > > > > > > mbuf is bad, its refcnt is more than 1.
> > > > > > > > > So we just update its refcnt and cannot put it into Rx sw-ring.
> > > > > > > > > For Tx sw-ring,
> > > > > > > > this mbuf has been freed.
> > > > > > > > > Then we should continue to do pre-free operation for the
> > > > > > > > > next Tx mbufs to ensure the given amount of mbufs are freed.
> > > > > > > > >
> > > > > > > > > Do a conclusion for this, this is because if we start to
> > > > > > > > > do pre-free operation,  the Tx mbuf refcnt value maybe
> > > > > > > > > changed, so we cannot stop or
> > > > > > > > break until finish all the pre-free operation.
> > > > > > > > >
> > > > > > > > > Finally, in the case that Rx refill_request is not zero,
> > > > > > > > > but the valid mbuf amount is less than this value, we must
> > > > > > > > > put back this Tx mbufs into
> > > > > > > > mempool.
> > > > > > > > >
> > > > > > > > > Above is the reason why I do not directly jump out of the
> > > > > > > > > loop if some mbuf
> > > > > > > > return value is NULL.
> > > > > > > >
> > > > > > > > Yep, I already realized that it is a bit more complicated
> > > > > > > > and we need to continue with prefree() for all packets even
> > > > > > > > when we get NULL in
> > > > > > the middle.
> > > > > > > > Anyway the code has to be changed, right?
> > > > > > > >
> > > > > > > Sorry, I think for the code, it is unnecessary to be changed.
> > > > > > > For no fast free path, currently:
> > > > > > > 1.   We check whether each mbuf is Ok and call pre_free function
> > > > > > > --------------------------------------------------------------
> > > > > > > --
> > > > > > > --
> > > > > > > ----
> > > > > > > --------------------------------------------------------------
> > > > > > > 2.1 For the mbuf return value is not NULL, it is put into Rx sw-ring.
> > > > > > > 2.2 For the mbuf return value is zero and refill-request, it
> > > > > > > will also firstly put into Rx sw-ring, and we set nb_recycle =
> > > > > > > 0
> > > > > > > --------------------------------------------------------------
> > > > > > > --
> > > > > > > --
> > > > > > > ----
> > > > > > > --------------------------------------------------------------
> > > > > > > 3.1 We check nb_recycle, if it is not 0, we will continue to
> > > > > > > rearm Rx descs
> > > > > > and update its index by call descs_refill function.
> > > > > > > 3.2 if nb_recycle is 0, we will put valid recycle mbufs back
> > > > > > > into mempool as general path. This is necessary, because we
> > > > > > > need to ensure the freed Tx number is fixed.(Some buffers
> > > > > > > return is null can be seen as freed, others need to be put
> > > > > > > into mempool)
> > > > > > >
> > > > > > > Or maybe I ignore something?
> > > > > >
> > > > > >
> > > > > > I am talking about the case when both refill_requirement and
> > > > > > mbuf return values iare zero:
> > > > > > if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???
> > rxep[i]
> > > ==
> > > > 0
> > > > > > AND refill_requirement == 0 ???
> > > > > >  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > > > > 	nb_recycle_mbufs = 0;
> > > > > >
> > > > > > As I can read the code you treat such situation as valid, while
> > > > > > I think we should reset nb_recycle_mbufs to zero when rxep[i] ==
> > > > > > NULL, no matter what value refill_requirement is.
> > > > >
> > > > > So this means for maybe MLX5 driver, its refill_request = 0. And
> > > > > if some mbufs return value is zero, the other mbufs can not be
> > > > > recycled into Rx sw-ring? Because nb_recycle=0, and they need to
> > > > > be put into
> > > > mempool.
> > > > >
> > > > > I think for such as MLX5 driver, we can allow recycle some valid
> > > > > mbufs into
> > > > Rx ring.
> > > > > Because no constraint for its refill number. Is my suggestion reasonable?
> > > >
> > > > I suppose yes: if refill_request is zero we potentially can skip 'bad'
> > > > mbufs and continue with recycling for remaining ones.
> > > > It would probably require more changes in current code, but sounds
> > > > ok to me in general.
> > > That's Ok. Thanks for your careful reviewing.
> >
> > I'm very sorry not to receive your e-mail and until now I realize we need to do
> > some code change for i40e driver. Also thanks ferruh to kindly remind this.

No worries at all and thanks for you and Ferruh fro fast reaction.
 
> >
> > Agree with you we need some operation for the case that (refill_requirement
> > == 0 &&  rxep[i] == 0).
> > Thus maybe we can do a change as follows:
> >
> > for (i = 0; i < n; i++) {
> > 	rxep[0] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > 	if (unlikely((rxep[0] == NULL && refill_requirement) ||
> > 			recycle_rxq_info->mp != txep[i].mbuf->pool))
> > 		nb_recycle_mbufs = 0;
> > 	if (likely(rxep[0]))
> > 		rxep++;
> > }
> >
> > Is above change is Ok?
> >
> 
> Just do a change for the above code:
> -----------------------------------------------------------------------------------------------------
> int j = 0;
> ......
> 
>  for (i = 0; i < n; i++) {
>  	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> 	if (unlikely(rxep[j] == NULL  && !refill_requirement))
>  		continue;
> 	if (unlikely((rxep[j] == NULL && refill_requirement) ||
>  			recycle_rxq_info->mp != txep[i].mbuf->pool))
>  		nb_recycle_mbufs  = 0;
> 	j++;
>  }
> 
> if (nb_recycle_mbufs  == 0)
> 	for (i = 0; i < j; i++) {
> 		if (rxep[i] != NULL)
> 			rte_mempool_put(rxep[i]->pool, rxep[i]);
> 	}

I think it should work...
One more thing, shouldn't we reset
nb_recycle_mbufs  = j;

Something like:
 if (nb_recycle_mbufs  == 0) {
 	for (i = 0; i < j; i++) {
 		if (rxep[i] != NULL)
 			rte_mempool_put(rxep[i]->pool, rxep[i]);
 	}
} else
  nb_recycle_mbufs  = j;

? 



 


> ---------------------------------------------------------------------------------------------------------
> > > >
> > > > > >
> > > > > >
> > > > > > > > >
> > > > > > > > > > > > +	/* Update counters for Tx. */
> > > > > > > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free +
> > > > > > > > > > > > +txq-
> > > > > > > > >tx_rs_thresh);
> > > > > > > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd +
> > > > > > > > > > > > +txq-
> > > > > > > > >tx_rs_thresh);
> > > > > > > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > > > > > > +		txq->tx_next_dd = (uint16_t)(txq-
> > > >tx_rs_thresh -
> > > > > > > > > > > > +1);
> > > > > > > > > > > > +
> > > > > > > > > > > > +	return nb_recycle_mbufs; }
> > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.c index
> > > > > > > > > > > > b4f65b58fa..a9c9eb331c
> > > > > > > > > > > > 100644
> > > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct
> > > > > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > > > > > > >
> > > > > > > > > > > > +void
> > > > > > > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > > > > +uint16_t
> > > > > > > > queue_id,
> > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > > > > > > +	struct i40e_adapter *ad =
> > > > > > > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > > > > > > > >dev_private);
> > > > > > > > > > > > +
> > > > > > > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > > > > > > +
> > > > > > > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > > > > > > > +	recycle_rxq_info->receive_tail = &rxq->rx_tail;
> > > > > > > > > > > > +
> > > > > > > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > > >rxrearm_start;
> > > > > > > > > > > > +	} else {
> > > > > > > > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > > > > > > > >rx_free_thresh;
> > > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > > >rx_free_trigger;
> > > > > > > > > > > > +	}
> > > > > > > > > > > > +}
> > > > > > > > > > > > +
> > > > > > > > > > > >  #ifdef RTE_ARCH_X86  static inline bool
> > > > > > > > > > > > get_avx_supported(bool request_avx512) @@ -3293,6
> > > > > > > > > > > > +3317,8
> > > > > > @@
> > > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > > > >rx_use_avx2 ?
> > > > > > > > > > > >
> > > > > > 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > > > > > > >
> > > 	i40e_recv_scattered_pkts_vec;
> > > > > > > > > > > > +				dev-
> > > >recycle_rx_descriptors_refill =
> > > > > > > > > > > > +
> > > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > >  			}
> > > > > > > > > > > >  		} else {
> > > > > > > > > > > >  			if (ad->rx_use_avx512) { @@ -3311,9
> > > +3337,12
> > > > @@
> > > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev
> > > > > > > > *dev)
> > > > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > > > >rx_use_avx2 ?
> > > > > > > > > > > >
> > > 	i40e_recv_pkts_vec_avx2 :
> > > > > > > > > > > >  					i40e_recv_pkts_vec;
> > > > > > > > > > > > +				dev-
> > > >recycle_rx_descriptors_refill =
> > > > > > > > > > > > +
> > > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > >  			}
> > > > > > > > > > > >  		}
> > > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > > > > > > >  				     "Using Vector Scattered Rx
> > > > > > (port %d).", @@
> > > > > > > > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct
> > > > > > > > > > > > rte_eth_dev
> > > > > > *dev)
> > > > > > > > > > > >  				dev->tx_pkt_burst = ad-
> > > > > > >tx_use_avx2 ?
> > > > > > > > > > > >
> > > > > > i40e_xmit_pkts_vec_avx2 :
> > > > > > > > > > > >
> > > > > > i40e_xmit_pkts_vec;
> > > > > > > > > > > > +				dev->recycle_tx_mbufs_reuse
> > > =
> > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > > >  			}
> > > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector
> > > Tx
> > > > > > (port %d).",
> > > > > > > > > > > >  				     dev->data->port_id);
> > > > > > > > > > > >  			dev->tx_pkt_burst =
> > > i40e_xmit_pkts_vec;
> > > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /*
> > > > > > > > > > > > RTE_ARCH_X86
> > > */
> > > > > > > > > > > >  		} else {
> > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx
> > > finally be
> > > > > > used.");
> > > > > > > > > > > >  			dev->tx_pkt_burst =
> > > i40e_xmit_pkts_simple;
> > > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > > >  		}
> > > > > > > > > > > >  		dev->tx_pkt_prepare =
> > > i40e_simple_prep_pkts;
> > > > > > > > > > > >  	} else {
> > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.h index
> > > > > > > > > > > > a8686224e5..b191f23e1f
> > > > > > > > > > > > 100644
> > > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > @@ -236,6 +236,10 @@ uint32_t
> > > > > > > > > > > > i40e_dev_rx_queue_count(void *rx_queue);  int
> > > > > > > > > > > > i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > > > > > > > > uint16_t offset);  int
> > > > > > > > > > > > i40e_dev_tx_descriptor_status(void
> > > > > > > > > > > > *tx_queue, uint16_t offset);
> > > > > > > > > > > >
> > > > > > > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void
> > *tx_queue,
> > > > > > > > > > > > +		struct rte_eth_recycle_rxq_info
> > > *recycle_rxq_info);
> > > > > > > > void
> > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > > +*rx_queue, uint16_t nb_mbufs);
> > > > > > > > > > > > +
> > > > > > > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue, struct
> > > > > > > > > > > > rte_mbuf
> > > > > > > > **rx_pkts,
> > > > > > > > > > > >  			    uint16_t nb_pkts);  uint16_t
> > > > > > > > > > > > i40e_recv_scattered_pkts_vec(void *rx_queue, diff
> > > > > > > > > > > > --git a/drivers/net/i40e/meson.build
> > > > > > > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > > > > > > >          'i40e_tm.c',
> > > > > > > > > > > >          'i40e_hash.c',
> > > > > > > > > > > >          'i40e_vf_representor.c',
> > > > > > > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > > > > > > >          'rte_pmd_i40e.c',
> > > > > > > > > > > >  )
> > > > > > > > > > > >
> > > > > > > > > > > > --
> > > > > > > > > > > > 2.25.1
  
Feifei Wang Sept. 23, 2023, 5:52 a.m. UTC | #15
> -----Original Message-----
> From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> Sent: Saturday, September 23, 2023 12:41 AM
> To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>;
> nd <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> Hi Feifei,
> 
> > > > -----Original Message-----
> > > > From: Feifei Wang
> > > > Sent: Tuesday, September 5, 2023 11:11 AM
> > > > To: Konstantin Ananyev <konstantin.ananyev@huawei.com>; Konstantin
> > > > Ananyev <konstantin.v.ananyev@yandex.ru>
> > > > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > > > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> > > <Ruifeng.Wang@arm.com>;
> > > > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > > > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > > > <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> > > > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle
> > > > mode
> > > >
> > > >
> > > >
> > > > > -----Original Message-----
> > > > > From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> > > > > Sent: Monday, September 4, 2023 6:22 PM
> > > > > To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> > > > > <konstantin.v.ananyev@yandex.ru>
> > > > > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > > > > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> > > > <Ruifeng.Wang@arm.com>;
> > > > > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > > > > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > > > > <nd@arm.com>; nd <nd@arm.com>
> > > > > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle
> > > > > mode
> > > > >
> > > > >
> > > > >
> > > > > > > > > > > > > Define specific function implementation for i40e driver.
> > > > > > > > > > > > > Currently, mbufs recycle mode can support 128bit
> > > > > > > > > > > > > vector path and
> > > > > > > > > > > > > avx2
> > > > > > > > > > > path.
> > > > > > > > > > > > > And can be enabled both in fast free and no fast free
> mode.
> > > > > > > > > > > > >
> > > > > > > > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > > > Signed-off-by: Feifei Wang
> > > > > > > > > > > > > <feifei.wang2@arm.com>
> > > > > > > > > > > > > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > > > > > > > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > > > ---
> > > > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > > > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c  |
> > > > > > > > > > > > > 147
> > > > > > > > > > > > > ++++++++++++++++++
> > > > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > > > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > > > > > > > >  6 files changed, 187 insertions(+)  create mode
> > > > > > > > > > > > > 100644
> > > > > > > > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > > > >
> > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > > > > > > > 8271bbb394..50ba9aac94
> > > > > > > > > > > > > 100644
> > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > > @@ -496,6 +496,7 @@ static const struct
> > > > > > > > > > > > > eth_dev_ops i40e_eth_dev_ops
> > > > > > > > > > > = {
> > > > > > > > > > > > >  	.flow_ops_get                 = i40e_dev_flow_ops_get,
> > > > > > > > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > > > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > > > > > > > +	.recycle_rxq_info_get         =
> > > > i40e_recycle_rxq_info_get,
> > > > > > > > > > > > >  	.rx_burst_mode_get            =
> > > > i40e_rx_burst_mode_get,
> > > > > > > > > > > > >  	.tx_burst_mode_get            =
> > > > i40e_tx_burst_mode_get,
> > > > > > > > > > > > >  	.timesync_enable              = i40e_timesync_enable,
> > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > > > > > > > 100644
> > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > > @@ -1355,6 +1355,8 @@ void
> > > > > > > > > > > > > i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > > > > > > > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > > > > > uint16_t
> > > queue_id,
> > > > > > > > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > > > > > > > +void i40e_recycle_rxq_info_get(struct
> > > > > > > > > > > > > +rte_eth_dev *dev, uint16_t
> > > > > > > > > > > queue_id,
> > > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info
> > > > > > > > > > > > > +*recycle_rxq_info);
> > > > > > > > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev
> > > > > > > > > > > > > *dev, uint16_t
> > > > > > > > > queue_id,
> > > > > > > > > > > > >  			   struct rte_eth_burst_mode *mode);
> > > > int
> > > > > > > > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev *dev,
> > > > > > > > > > > > > uint16_t queue_id, diff -- git
> > > > > > > > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_common
> > > > > > > > > > > > > .c
> > > > > > > > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_common
> > > > > > > > > > > > > .c new file mode 100644 index
> > > > > > > > > > > > > 0000000000..5663ecccde
> > > > > > > > > > > > > --- /dev/null
> > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_co
> > > > > > > > > > > > > +++ mmon
> > > > > > > > > > > > > +++ .c
> > > > > > > > > > > > > @@ -0,0 +1,147 @@
> > > > > > > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > > > > > > > + */
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +#include <stdint.h> #include <ethdev_driver.h>
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > > > > > > > +#include "base/i40e_type.h"
> > > > > > > > > > > > > +#include "i40e_ethdev.h"
> > > > > > > > > > > > > +#include "i40e_rxtx.h"
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +void
> > > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > > > +*rx_queue, uint16_t
> > > > > > > > > > > > > +nb_mbufs) {
> > > > > > > > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > > > > > > > +	uint16_t rx_id;
> > > > > > > > > > > > > +	uint64_t paddr;
> > > > > > > > > > > > > +	uint64_t dma_addr;
> > > > > > > > > > > > > +	uint16_t i;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > > > > > > > +	}
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
> > > > > > > > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > > > > > > > +	}
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	rte_io_wmb();
> > > > > > > > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail,
> > > > > > > > > > > > > +rx_id); }
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +uint16_t
> > > > > > > > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > > > > > > > +	int i, n;
> > > > > > > > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > > > > > > > +	uint16_t avail = 0;
> > > > > > > > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info-
> > > > >mbuf_ring_size;
> > > > > > > > > > > > > +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
> > > > > > > > > > > > > +	uint16_t refill_requirement =
> > > > > > > > > > > > > +recycle_rxq_info-
> > > > > > > > > >refill_requirement;
> > > > > > > > > > > > > +	uint16_t refill_head = *recycle_rxq_info->refill_head;
> > > > > > > > > > > > > +	uint16_t receive_tail =
> > > > > > > > > > > > > +*recycle_rxq_info->receive_tail;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > > > > > > > +	avail = (mbuf_ring_size - (refill_head -
> > > > > > > > > > > > > +receive_tail)) & mask;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	/* Check Tx free thresh and Rx available space. */
> > > > > > > > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh ||
> > > > > > > > > > > > > +avail <=
> > > > > > > > > > > > > +txq-
> > > > > > > > > >tx_rs_thresh)
> > > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > > > > > > > +	if
> > > > > > > > > > > > > +((txq->tx_ring[txq->tx_next_dd].cmd_type_offset
> > > > > > > > > > > > > +_bsz
> > > > > > > > > > > > > +&
> > > > > > > > > > > > > +
> > > > > > > > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > > > > > > > +
> > > > > > > > > > > > > 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
> > > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	/* Mbufs recycle mode can only support no ring
> > > > > > > > > > > > > +buffer
> > > > > > > > > wrapping
> > > > > > > > > > > > > around.
> > > > > > > > > > > > > +	 * Two case for this:
> > > > > > > > > > > > > +	 *
> > > > > > > > > > > > > +	 * case 1: The refill head of Rx buffer ring
> > > > > > > > > > > > > +needs to be aligned
> > > > > > > > > with
> > > > > > > > > > > > > +	 * mbuf ring size. In this case, the number of
> > > > > > > > > > > > > +Tx
> > > > freeing buffers
> > > > > > > > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > > > > > > > +	 *
> > > > > > > > > > > > > +	 * case 2: The refill head of Rx ring buffer
> > > > > > > > > > > > > +does not need to be
> > > > > > > > > aligned
> > > > > > > > > > > > > +	 * with mbuf ring size. In this case, the
> > > > > > > > > > > > > +update of refill head
> > > > > > > > > can not
> > > > > > > > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > > > > > > > +	 */
> > > > > > > > > > > > > +	if (refill_requirement != n ||
> > > > > > > > > > > > > +		(!refill_requirement && (refill_head + n >
> > > > > > > > > mbuf_ring_size)))
> > > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > > > > > > > +	 */
> > > > > > > > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
> > > > > > > > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > > > > > > > +	rxep += refill_head;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	if (txq->offloads &
> > > > RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > > > > > > > +		/* Avoid txq contains buffers from unexpected
> > > > > > > > > mempool. */
> > > > > > > > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > > > > > > > +					!= txep[0].mbuf-
> > > > >pool))
> > > > > > > > > > > > > +			return 0;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > > > > > > > +	} else {
> > > > > > > > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > > > > > > > +			rxep[i] =
> > > > > > > > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +			/* If Tx buffers are not the last
> > > > reference or
> > > > > > > > > from
> > > > > > > > > > > > > +			 * unexpected mempool, previous
> > > > copied
> > > > > > > > > buffers are
> > > > > > > > > > > > > +			 * considered as invalid.
> > > > > > > > > > > > > +			 */
> > > > > > > > > > > > > +			if (unlikely((rxep[i] == NULL &&
> > > > > > > > > refill_requirement) ||
> > > > > > > > > > > > [Konstantin]
> > > > > > > > > > > > Could you pls remind me why it is ok to have
> > > > > > > > > > > > rxep[i]==NULL when refill_requirement is not set?
> > > > > > > > > > > >
> > > > > > > > > > > > If reill_requirement is not zero, it means each tx
> > > > > > > > > > > > freed buffer must be valid and can be put into Rx sw_ring.
> > > > > > > > > > > > Then the refill head of Rx buffer ring can be
> > > > > > > > > > > > aligned with mbuf ring size. Briefly speaking the
> > > > > > > > > > > > number
> > > > > > > > > > > of Tx valid freed buffer must be equal to Rx
> refill_requirement.
> > > > > > > > > > > For example, i40e driver.
> > > > > > > > > > > >
> > > > > > > > > > > > If reill_requirement is zero, it means that the
> > > > > > > > > > > > refill head of Rx buffer ring does not need to be
> > > > > > > > > > > > aligned with mbuf ring size, thus if Tx have n
> > > > > > > > > > > > valid freed buffers, we just need to put these n
> > > > > > > > > > > > buffers into Rx
> > > > > > > > > > > > sw-
> > > > > > > > > > > ring, and not to be equal to the Rx setting rearm number.
> > > > > > > > > > > For example, mlx5 driver.
> > > > > > > > > > > >
> > > > > > > > > > > > In conclusion, above difference is due to pmd
> > > > > > > > > > > > drivers have different
> > > > > > > > > > > strategies to update their Rx rearm(refill) head.
> > > > > > > > > > > > For i40e driver, if rearm_head exceed 1024, it
> > > > > > > > > > > > will be set as
> > > > > > > > > > > > 0 due to  the
> > > > > > > > > > > number of each rearm is a fixed value by default.
> > > > > > > > > > > > For mlx5 driver. Its rearm_head can exceed 1024,
> > > > > > > > > > > > and use mask to achieve
> > > > > > > > > > > real index. Thus its rearm number can be a different value.
> > > > > > > > > > >
> > > > > > > > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it
> > > > > > > > > > > means that this mbuf is not free yet and can't be reused.
> > > > > > > > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that case
> too?
> > > > > > > > > > > Or probably would be enough to skip that mbuf?
> > > > > > > > > > > Might be something like that:
> > > > > > > > > > >
> > > > > > > > > > > for (i = 0, j = 0; i < n; i++) {
> > > > > > > > > > >
> > > > > > > > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp !=
> > > > > > > > > > > rxep[j].mbuf-
> > > > > >pool))  {
> > > > > > > > > > > 		if (refill_requirement) {
> > > > > > > > > > > 			nb_recycle_mbufs = 0;
> > > > > > > > > > > 			break;
> > > > > > > > > > > 		}
> > > > > > > > > > > 	} else
> > > > > > > > > > > 		j++;
> > > > > > > > > > > }
> > > > > > > > > > >
> > > > > > > > > > > /* now j contains actual number of recycled mbufs */
> > > > > > > > > > >
> > > > > > > > > > > ?
> > > > > > > > > > >
> > > > > > > > > > > >
> > > > > > > > > > > > > +					recycle_rxq_info-
> > > > >mp !=
> > > > > > > > > txep[i].mbuf-
> > > > > > > > > > > > > >pool))
> > > > > > > > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > > > > > > > +		}
> > > > > > > > > > > > > +		/* If Tx buffers are not the last reference or
> > > > > > > > > > > > > +		 * from unexpected mempool, all recycled
> > > > buffers
> > > > > > > > > > > > > +		 * are put into mempool.
> > > > > > > > > > > > > +		 */
> > > > > > > > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > > > > > > > +				if (rxep[i] != NULL)
> > > > > > > > > > > > > +
> > > > 	rte_mempool_put(rxep[i]-
> > > > > > > > > >pool,
> > > > > > > > > > > > > rxep[i]);
> > > > > > > > > > > > > +			}
> > > > > > > > > > > > > +	}
> > > > > > > > > > > > > +
> > > > > > > > > > [Konstantin] After another thought, it might be easier
> > > > > > > > > > and cleaner
> > > > > just to:
> > > > > > > > > > if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf-
> >pool)
> > > > > > > > > > 	nb_recycle_mbufs = 0;
> > > > > > > > > >
> > > > > > > > > > Anyway, from my understanding - if
> > > > > > > > > > rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we
> > > > > > > > > > can't recycle
> > > > > that mbuf.
> > > > > > > > > >
> > > > > > > > > > [Feifei] Agree with that
> > > > > > > > > > 'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then
> > > > > > > > > we can't recycle that mbuf'.
> > > > > > > > > >
> > > > > > > > > > Firstly, we should know for i40e driver,  the number
> > > > > > > > > > of free mbufs is fixed, it
> > > > > > > > > must equal to 'tx_rs_thresh'
> > > > > > > > > > This means if we start to free Tx mbufs, it cannot be
> > > > > > > > > > interrupted until the
> > > > > > > > > given amount of mbufs are freed.
> > > > > > > > > > In the meanwhile, doing prefree operation for a Tx
> > > > > > > > > > mbuf can be looked at this mbuf is freed from this TX
> > > > > > > > > > sw-ring if the API returns NULL. This is due
> > > > > > > > > to that call 'prefree_seg' function means update the mbuf
> refcnt.
> > > > > > > > > >
> > > > > > > > > > So let's come back our recycle mbuf case.
> > > > > > > > > > Here we consider that the case if we have 32 mbufs
> > > > > > > > > > need to be freed, and
> > > > > > > > > we firstly do the pre-free.
> > > > > > > > > > And then first 8 mbufs is good and return value is not none.
> > > > > > > > > > But the 9th
> > > > > > > > > mbuf is bad, its refcnt is more than 1.
> > > > > > > > > > So we just update its refcnt and cannot put it into Rx sw-ring.
> > > > > > > > > > For Tx sw-ring,
> > > > > > > > > this mbuf has been freed.
> > > > > > > > > > Then we should continue to do pre-free operation for
> > > > > > > > > > the next Tx mbufs to ensure the given amount of mbufs are
> freed.
> > > > > > > > > >
> > > > > > > > > > Do a conclusion for this, this is because if we start
> > > > > > > > > > to do pre-free operation,  the Tx mbuf refcnt value
> > > > > > > > > > maybe changed, so we cannot stop or
> > > > > > > > > break until finish all the pre-free operation.
> > > > > > > > > >
> > > > > > > > > > Finally, in the case that Rx refill_request is not
> > > > > > > > > > zero, but the valid mbuf amount is less than this
> > > > > > > > > > value, we must put back this Tx mbufs into
> > > > > > > > > mempool.
> > > > > > > > > >
> > > > > > > > > > Above is the reason why I do not directly jump out of
> > > > > > > > > > the loop if some mbuf
> > > > > > > > > return value is NULL.
> > > > > > > > >
> > > > > > > > > Yep, I already realized that it is a bit more
> > > > > > > > > complicated and we need to continue with prefree() for
> > > > > > > > > all packets even when we get NULL in
> > > > > > > the middle.
> > > > > > > > > Anyway the code has to be changed, right?
> > > > > > > > >
> > > > > > > > Sorry, I think for the code, it is unnecessary to be changed.
> > > > > > > > For no fast free path, currently:
> > > > > > > > 1.   We check whether each mbuf is Ok and call pre_free function
> > > > > > > > ----------------------------------------------------------
> > > > > > > > ----
> > > > > > > > --
> > > > > > > > --
> > > > > > > > ----
> > > > > > > > ----------------------------------------------------------
> > > > > > > > ----
> > > > > > > > 2.1 For the mbuf return value is not NULL, it is put into Rx sw-
> ring.
> > > > > > > > 2.2 For the mbuf return value is zero and refill-request,
> > > > > > > > it will also firstly put into Rx sw-ring, and we set
> > > > > > > > nb_recycle =
> > > > > > > > 0
> > > > > > > > ----------------------------------------------------------
> > > > > > > > ----
> > > > > > > > --
> > > > > > > > --
> > > > > > > > ----
> > > > > > > > ----------------------------------------------------------
> > > > > > > > ----
> > > > > > > > 3.1 We check nb_recycle, if it is not 0, we will continue
> > > > > > > > to rearm Rx descs
> > > > > > > and update its index by call descs_refill function.
> > > > > > > > 3.2 if nb_recycle is 0, we will put valid recycle mbufs
> > > > > > > > back into mempool as general path. This is necessary,
> > > > > > > > because we need to ensure the freed Tx number is
> > > > > > > > fixed.(Some buffers return is null can be seen as freed,
> > > > > > > > others need to be put into mempool)
> > > > > > > >
> > > > > > > > Or maybe I ignore something?
> > > > > > >
> > > > > > >
> > > > > > > I am talking about the case when both refill_requirement and
> > > > > > > mbuf return values iare zero:
> > > > > > > if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???
> > > rxep[i]
> > > > ==
> > > > > 0
> > > > > > > AND refill_requirement == 0 ???
> > > > > > >  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > > > > > 	nb_recycle_mbufs = 0;
> > > > > > >
> > > > > > > As I can read the code you treat such situation as valid,
> > > > > > > while I think we should reset nb_recycle_mbufs to zero when
> > > > > > > rxep[i] == NULL, no matter what value refill_requirement is.
> > > > > >
> > > > > > So this means for maybe MLX5 driver, its refill_request = 0.
> > > > > > And if some mbufs return value is zero, the other mbufs can
> > > > > > not be recycled into Rx sw-ring? Because nb_recycle=0, and
> > > > > > they need to be put into
> > > > > mempool.
> > > > > >
> > > > > > I think for such as MLX5 driver, we can allow recycle some
> > > > > > valid mbufs into
> > > > > Rx ring.
> > > > > > Because no constraint for its refill number. Is my suggestion
> reasonable?
> > > > >
> > > > > I suppose yes: if refill_request is zero we potentially can skip 'bad'
> > > > > mbufs and continue with recycling for remaining ones.
> > > > > It would probably require more changes in current code, but
> > > > > sounds ok to me in general.
> > > > That's Ok. Thanks for your careful reviewing.
> > >
> > > I'm very sorry not to receive your e-mail and until now I realize we
> > > need to do some code change for i40e driver. Also thanks ferruh to kindly
> remind this.
> 
> No worries at all and thanks for you and Ferruh fro fast reaction.
> 
> > >
> > > Agree with you we need some operation for the case that
> > > (refill_requirement == 0 &&  rxep[i] == 0).
> > > Thus maybe we can do a change as follows:
> > >
> > > for (i = 0; i < n; i++) {
> > > 	rxep[0] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > 	if (unlikely((rxep[0] == NULL && refill_requirement) ||
> > > 			recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > 		nb_recycle_mbufs = 0;
> > > 	if (likely(rxep[0]))
> > > 		rxep++;
> > > }
> > >
> > > Is above change is Ok?
> > >
> >
> > Just do a change for the above code:
> > ----------------------------------------------------------------------
> > -------------------------------
> > int j = 0;
> > ......
> >
> >  for (i = 0; i < n; i++) {
> >  	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > 	if (unlikely(rxep[j] == NULL  && !refill_requirement))
> >  		continue;
> > 	if (unlikely((rxep[j] == NULL && refill_requirement) ||
> >  			recycle_rxq_info->mp != txep[i].mbuf->pool))
> >  		nb_recycle_mbufs  = 0;
> > 	j++;
> >  }
> >
> > if (nb_recycle_mbufs  == 0)
> > 	for (i = 0; i < j; i++) {
> > 		if (rxep[i] != NULL)
> > 			rte_mempool_put(rxep[i]->pool, rxep[i]);
> > 	}
> 
> I think it should work...
> One more thing, shouldn't we reset
> nb_recycle_mbufs  = j;
> 
> Something like:
>  if (nb_recycle_mbufs  == 0) {
>  	for (i = 0; i < j; i++) {
>  		if (rxep[i] != NULL)
>  			rte_mempool_put(rxep[i]->pool, rxep[i]);
>  	}
> } else
>   nb_recycle_mbufs  = j;
> 
> ?
> 
> 
Yes, we should reset it.

I just do a performance test with the new code and find the performance decrease with  8%, comparing
with the old version.

Thus I think we should come back your previous idea: 
"we should reset nb_recycle_mbufs to zero when rxep[i] == NULL,
no matter what value refill_requirement is":

for (i = 0; i < n; i++) {
	rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
	if (unlikely((rxep[i] == NULL) ||
			recycle_rxq_info->mp != txep[i].mbuf->pool))
		nb_recycle_mbufs  = 0;
}

Thus we drop the case that putting the remaining valid mbuf into Rx sw-ring
when no refill_request, but maintain good performance.

> 
> 
> 
> > ----------------------------------------------------------------------
> > -----------------------------------

> > > > >
> > > > > > >
> > > > > > >
> > > > > > > > > >
> > > > > > > > > > > > > +	/* Update counters for Tx. */
> > > > > > > > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free +
> > > > > > > > > > > > > +txq-
> > > > > > > > > >tx_rs_thresh);
> > > > > > > > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd +
> > > > > > > > > > > > > +txq-
> > > > > > > > > >tx_rs_thresh);
> > > > > > > > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > > > > > > > +		txq->tx_next_dd = (uint16_t)(txq-
> > > > >tx_rs_thresh -
> > > > > > > > > > > > > +1);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	return nb_recycle_mbufs; }
> > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.c index
> > > > > > > > > > > > > b4f65b58fa..a9c9eb331c
> > > > > > > > > > > > > 100644
> > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > > @@ -3199,6 +3199,30 @@ i40e_txq_info_get(struct
> > > > > > > > > > > > > rte_eth_dev *dev, uint16_t queue_id,
> > > > > > > > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > > > > > > > >
> > > > > > > > > > > > > +void
> > > > > > > > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev
> > > > > > > > > > > > > +*dev, uint16_t
> > > > > > > > > queue_id,
> > > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
> > > > > > > > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > > > > > > > +	struct i40e_adapter *ad =
> > > > > > > > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data-
> > > > > > > > > >dev_private);
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
> > > > > > > > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
> > > > > > > > > > > > > +	recycle_rxq_info->receive_tail =
> > > > > > > > > > > > > +&rxq->rx_tail;
> > > > > > > > > > > > > +
> > > > > > > > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > > > >rxrearm_start;
> > > > > > > > > > > > > +	} else {
> > > > > > > > > > > > > +		recycle_rxq_info->refill_requirement = rxq-
> > > > > > > > > >rx_free_thresh;
> > > > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > > > >rx_free_trigger;
> > > > > > > > > > > > > +	}
> > > > > > > > > > > > > +}
> > > > > > > > > > > > > +
> > > > > > > > > > > > >  #ifdef RTE_ARCH_X86  static inline bool
> > > > > > > > > > > > > get_avx_supported(bool request_avx512) @@
> > > > > > > > > > > > > -3293,6
> > > > > > > > > > > > > +3317,8
> > > > > > > @@
> > > > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > > > > >rx_use_avx2 ?
> > > > > > > > > > > > >
> > > > > > > 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > > > > > > > >
> > > > 	i40e_recv_scattered_pkts_vec;
> > > > > > > > > > > > > +				dev-
> > > > >recycle_rx_descriptors_refill =
> > > > > > > > > > > > > +
> > > > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > > >  			}
> > > > > > > > > > > > >  		} else {
> > > > > > > > > > > > >  			if (ad->rx_use_avx512) { @@ -3311,9
> > > > +3337,12
> > > > > @@
> > > > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev
> > > > > > > > > *dev)
> > > > > > > > > > > > >  				dev->rx_pkt_burst = ad-
> > > > > > > >rx_use_avx2 ?
> > > > > > > > > > > > >
> > > > 	i40e_recv_pkts_vec_avx2 :
> > > > > > > > > > > > >  					i40e_recv_pkts_vec;
> > > > > > > > > > > > > +				dev-
> > > > >recycle_rx_descriptors_refill =
> > > > > > > > > > > > > +
> > > > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > > >  			}
> > > > > > > > > > > > >  		}
> > > > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > > > > > > > >  				     "Using Vector Scattered Rx
> > > > > > > (port %d).", @@
> > > > > > > > > > > > > -3481,15 +3510,18 @@ i40e_set_tx_function(struct
> > > > > > > > > > > > > rte_eth_dev
> > > > > > > *dev)
> > > > > > > > > > > > >  				dev->tx_pkt_burst = ad-
> > > > > > > >tx_use_avx2 ?
> > > > > > > > > > > > >
> > > > > > > i40e_xmit_pkts_vec_avx2 :
> > > > > > > > > > > > >
> > > > > > > i40e_xmit_pkts_vec;
> > > > > > > > > > > > > +				dev->recycle_tx_mbufs_reuse
> > > > =
> > > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > > > >  			}
> > > > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Using Vector
> > > > Tx
> > > > > > > (port %d).",
> > > > > > > > > > > > >  				     dev->data->port_id);
> > > > > > > > > > > > >  			dev->tx_pkt_burst =
> > > > i40e_xmit_pkts_vec;
> > > > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /*
> > > > > > > > > > > > > RTE_ARCH_X86
> > > > */
> > > > > > > > > > > > >  		} else {
> > > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG, "Simple tx
> > > > finally be
> > > > > > > used.");
> > > > > > > > > > > > >  			dev->tx_pkt_burst =
> > > > i40e_xmit_pkts_simple;
> > > > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse =
> > > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > > > >  		}
> > > > > > > > > > > > >  		dev->tx_pkt_prepare =
> > > > i40e_simple_prep_pkts;
> > > > > > > > > > > > >  	} else {
> > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.h index
> > > > > > > > > > > > > a8686224e5..b191f23e1f
> > > > > > > > > > > > > 100644
> > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > > @@ -236,6 +236,10 @@ uint32_t
> > > > > > > > > > > > > i40e_dev_rx_queue_count(void *rx_queue);  int
> > > > > > > > > > > > > i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > > > > > > > > > uint16_t offset);  int
> > > > > > > > > > > > > i40e_dev_tx_descriptor_status(void
> > > > > > > > > > > > > *tx_queue, uint16_t offset);
> > > > > > > > > > > > >
> > > > > > > > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void
> > > *tx_queue,
> > > > > > > > > > > > > +		struct rte_eth_recycle_rxq_info
> > > > *recycle_rxq_info);
> > > > > > > > > void
> > > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > > > +*rx_queue, uint16_t nb_mbufs);
> > > > > > > > > > > > > +
> > > > > > > > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue,
> > > > > > > > > > > > > struct rte_mbuf
> > > > > > > > > **rx_pkts,
> > > > > > > > > > > > >  			    uint16_t nb_pkts);  uint16_t
> > > > > > > > > > > > > i40e_recv_scattered_pkts_vec(void *rx_queue,
> > > > > > > > > > > > > diff --git a/drivers/net/i40e/meson.build
> > > > > > > > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > > > > > > > >          'i40e_tm.c',
> > > > > > > > > > > > >          'i40e_hash.c',
> > > > > > > > > > > > >          'i40e_vf_representor.c',
> > > > > > > > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > > > > > > > >          'rte_pmd_i40e.c',
> > > > > > > > > > > > >  )
> > > > > > > > > > > > >
> > > > > > > > > > > > > --
> > > > > > > > > > > > > 2.25.1
  
Konstantin Ananyev Sept. 23, 2023, 8:40 p.m. UTC | #16
>>>>>>>>>>>>>> Define specific function implementation for i40e driver.
>>>>>>>>>>>>>> Currently, mbufs recycle mode can support 128bit
>>>>>>>>>>>>>> vector path and
>>>>>>>>>>>>>> avx2
>>>>>>>>>>>> path.
>>>>>>>>>>>>>> And can be enabled both in fast free and no fast free
>> mode.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Suggested-by: Honnappa Nagarahalli
>>>>>>>>>>>>>> <honnappa.nagarahalli@arm.com>
>>>>>>>>>>>>>> Signed-off-by: Feifei Wang
>>>>>>>>>>>>>> <feifei.wang2@arm.com>
>>>>>>>>>>>>>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>>>>>>>>>>>>>> Reviewed-by: Honnappa Nagarahalli
>>>>>>>>>> <honnappa.nagarahalli@arm.com>
>>>>>>>>>>>>>> ---
>>>>>>>>>>>>>>   drivers/net/i40e/i40e_ethdev.c                |   1 +
>>>>>>>>>>>>>>   drivers/net/i40e/i40e_ethdev.h                |   2 +
>>>>>>>>>>>>>>   .../net/i40e/i40e_recycle_mbufs_vec_common.c  |
>>>>>>>>>>>>>> 147
>>>>>>>>>>>>>> ++++++++++++++++++
>>>>>>>>>>>>>>   drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
>>>>>>>>>>>>>>   drivers/net/i40e/i40e_rxtx.h                  |   4 +
>>>>>>>>>>>>>>   drivers/net/i40e/meson.build                  |   1 +
>>>>>>>>>>>>>>   6 files changed, 187 insertions(+)  create mode
>>>>>>>>>>>>>> 100644
>>>>>>>>>>>>>> drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> diff --git a/drivers/net/i40e/i40e_ethdev.c
>>>>>>>>>>>>>> b/drivers/net/i40e/i40e_ethdev.c index
>>>>>>>>>>>>>> 8271bbb394..50ba9aac94
>>>>>>>>>>>>>> 100644
>>>>>>>>>>>>>> --- a/drivers/net/i40e/i40e_ethdev.c
>>>>>>>>>>>>>> +++ b/drivers/net/i40e/i40e_ethdev.c
>>>>>>>>>>>>>> @@ -496,6 +496,7 @@ static const struct
>>>>>>>>>>>>>> eth_dev_ops i40e_eth_dev_ops
>>>>>>>>>>>> = {
>>>>>>>>>>>>>>   	.flow_ops_get                 = i40e_dev_flow_ops_get,
>>>>>>>>>>>>>>   	.rxq_info_get                 = i40e_rxq_info_get,
>>>>>>>>>>>>>>   	.txq_info_get                 = i40e_txq_info_get,
>>>>>>>>>>>>>> +	.recycle_rxq_info_get         =
>>>>> i40e_recycle_rxq_info_get,
>>>>>>>>>>>>>>   	.rx_burst_mode_get            =
>>>>> i40e_rx_burst_mode_get,
>>>>>>>>>>>>>>   	.tx_burst_mode_get            =
>>>>> i40e_tx_burst_mode_get,
>>>>>>>>>>>>>>   	.timesync_enable              = i40e_timesync_enable,
>>>>>>>>>>>>>> diff --git a/drivers/net/i40e/i40e_ethdev.h
>>>>>>>>>>>>>> b/drivers/net/i40e/i40e_ethdev.h index
>>>>>>>>>>>>>> 6f65d5e0ac..af758798e1
>>>>>>>>>>>>>> 100644
>>>>>>>>>>>>>> --- a/drivers/net/i40e/i40e_ethdev.h
>>>>>>>>>>>>>> +++ b/drivers/net/i40e/i40e_ethdev.h
>>>>>>>>>>>>>> @@ -1355,6 +1355,8 @@ void
>>>>>>>>>>>>>> i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t
>> queue_id,
>>>>>>>>>>>>>>   	struct rte_eth_rxq_info *qinfo);  void
>>>>>>>>>>>>>> i40e_txq_info_get(struct rte_eth_dev *dev,
>>>>>>>>>>>>>> uint16_t
>>>> queue_id,
>>>>>>>>>>>>>>   	struct rte_eth_txq_info *qinfo);
>>>>>>>>>>>>>> +void i40e_recycle_rxq_info_get(struct
>>>>>>>>>>>>>> +rte_eth_dev *dev, uint16_t
>>>>>>>>>>>> queue_id,
>>>>>>>>>>>>>> +	struct rte_eth_recycle_rxq_info
>>>>>>>>>>>>>> +*recycle_rxq_info);
>>>>>>>>>>>>>>   int i40e_rx_burst_mode_get(struct rte_eth_dev
>>>>>>>>>>>>>> *dev, uint16_t
>>>>>>>>>> queue_id,
>>>>>>>>>>>>>>   			   struct rte_eth_burst_mode *mode);
>>>>> int
>>>>>>>>>>>>>> i40e_tx_burst_mode_get(struct rte_eth_dev *dev,
>>>>>>>>>>>>>> uint16_t queue_id, diff -- git
>>>>>>>>>>>>>> a/drivers/net/i40e/i40e_recycle_mbufs_vec_common
>>>>>>>>>>>>>> .c
>>>>>>>>>>>>>> b/drivers/net/i40e/i40e_recycle_mbufs_vec_common
>>>>>>>>>>>>>> .c new file mode 100644 index
>>>>>>>>>>>>>> 0000000000..5663ecccde
>>>>>>>>>>>>>> --- /dev/null
>>>>>>>>>>>>>> +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_co
>>>>>>>>>>>>>> +++ mmon
>>>>>>>>>>>>>> +++ .c
>>>>>>>>>>>>>> @@ -0,0 +1,147 @@
>>>>>>>>>>>>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>>>>>>>>>>>>> + * Copyright (c) 2023 Arm Limited.
>>>>>>>>>>>>>> + */
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +#include <stdint.h> #include <ethdev_driver.h>
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +#include "base/i40e_prototype.h"
>>>>>>>>>>>>>> +#include "base/i40e_type.h"
>>>>>>>>>>>>>> +#include "i40e_ethdev.h"
>>>>>>>>>>>>>> +#include "i40e_rxtx.h"
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +#pragma GCC diagnostic ignored "-Wcast-qual"
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +void
>>>>>>>>>>>>>> +i40e_recycle_rx_descriptors_refill_vec(void
>>>>>>>>>>>>>> +*rx_queue, uint16_t
>>>>>>>>>>>>>> +nb_mbufs) {
>>>>>>>>>>>>>> +	struct i40e_rx_queue *rxq = rx_queue;
>>>>>>>>>>>>>> +	struct i40e_rx_entry *rxep;
>>>>>>>>>>>>>> +	volatile union i40e_rx_desc *rxdp;
>>>>>>>>>>>>>> +	uint16_t rx_id;
>>>>>>>>>>>>>> +	uint64_t paddr;
>>>>>>>>>>>>>> +	uint64_t dma_addr;
>>>>>>>>>>>>>> +	uint16_t i;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
>>>>>>>>>>>>>> +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	for (i = 0; i < nb_mbufs; i++) {
>>>>>>>>>>>>>> +		/* Initialize rxdp descs. */
>>>>>>>>>>>>>> +		paddr = (rxep[i].mbuf)->buf_iova +
>>>>>>>>>>>>>> RTE_PKTMBUF_HEADROOM;
>>>>>>>>>>>>>> +		dma_addr = rte_cpu_to_le_64(paddr);
>>>>>>>>>>>>>> +		/* flush desc with pa dma_addr */
>>>>>>>>>>>>>> +		rxdp[i].read.hdr_addr = 0;
>>>>>>>>>>>>>> +		rxdp[i].read.pkt_addr = dma_addr;
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	/* Update the descriptor initializer index */
>>>>>>>>>>>>>> +	rxq->rxrearm_start += nb_mbufs;
>>>>>>>>>>>>>> +	rx_id = rxq->rxrearm_start - 1;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
>>>>>>>>>>>>>> +		rxq->rxrearm_start = 0;
>>>>>>>>>>>>>> +		rx_id = rxq->nb_rx_desc - 1;
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	rxq->rxrearm_nb -= nb_mbufs;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	rte_io_wmb();
>>>>>>>>>>>>>> +	/* Update the tail pointer on the NIC */
>>>>>>>>>>>>>> +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail,
>>>>>>>>>>>>>> +rx_id); }
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +uint16_t
>>>>>>>>>>>>>> +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
>>>>>>>>>>>>>> +	struct rte_eth_recycle_rxq_info *recycle_rxq_info) {
>>>>>>>>>>>>>> +	struct i40e_tx_queue *txq = tx_queue;
>>>>>>>>>>>>>> +	struct i40e_tx_entry *txep;
>>>>>>>>>>>>>> +	struct rte_mbuf **rxep;
>>>>>>>>>>>>>> +	int i, n;
>>>>>>>>>>>>>> +	uint16_t nb_recycle_mbufs;
>>>>>>>>>>>>>> +	uint16_t avail = 0;
>>>>>>>>>>>>>> +	uint16_t mbuf_ring_size = recycle_rxq_info-
>>>>>> mbuf_ring_size;
>>>>>>>>>>>>>> +	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
>>>>>>>>>>>>>> +	uint16_t refill_requirement =
>>>>>>>>>>>>>> +recycle_rxq_info-
>>>>>>>>>>> refill_requirement;
>>>>>>>>>>>>>> +	uint16_t refill_head = *recycle_rxq_info->refill_head;
>>>>>>>>>>>>>> +	uint16_t receive_tail =
>>>>>>>>>>>>>> +*recycle_rxq_info->receive_tail;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	/* Get available recycling Rx buffers. */
>>>>>>>>>>>>>> +	avail = (mbuf_ring_size - (refill_head -
>>>>>>>>>>>>>> +receive_tail)) & mask;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	/* Check Tx free thresh and Rx available space. */
>>>>>>>>>>>>>> +	if (txq->nb_tx_free > txq->tx_free_thresh ||
>>>>>>>>>>>>>> +avail <=
>>>>>>>>>>>>>> +txq-
>>>>>>>>>>> tx_rs_thresh)
>>>>>>>>>>>>>> +		return 0;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	/* check DD bits on threshold descriptor */
>>>>>>>>>>>>>> +	if
>>>>>>>>>>>>>> +((txq->tx_ring[txq->tx_next_dd].cmd_type_offset
>>>>>>>>>>>>>> +_bsz
>>>>>>>>>>>>>> +&
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> 	rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
>>>>>>>>>>>>>> +		return 0;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	n = txq->tx_rs_thresh;
>>>>>>>>>>>>>> +	nb_recycle_mbufs = n;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	/* Mbufs recycle mode can only support no ring
>>>>>>>>>>>>>> +buffer
>>>>>>>>>> wrapping
>>>>>>>>>>>>>> around.
>>>>>>>>>>>>>> +	 * Two case for this:
>>>>>>>>>>>>>> +	 *
>>>>>>>>>>>>>> +	 * case 1: The refill head of Rx buffer ring
>>>>>>>>>>>>>> +needs to be aligned
>>>>>>>>>> with
>>>>>>>>>>>>>> +	 * mbuf ring size. In this case, the number of
>>>>>>>>>>>>>> +Tx
>>>>> freeing buffers
>>>>>>>>>>>>>> +	 * should be equal to refill_requirement.
>>>>>>>>>>>>>> +	 *
>>>>>>>>>>>>>> +	 * case 2: The refill head of Rx ring buffer
>>>>>>>>>>>>>> +does not need to be
>>>>>>>>>> aligned
>>>>>>>>>>>>>> +	 * with mbuf ring size. In this case, the
>>>>>>>>>>>>>> +update of refill head
>>>>>>>>>> can not
>>>>>>>>>>>>>> +	 * exceed the Rx mbuf ring size.
>>>>>>>>>>>>>> +	 */
>>>>>>>>>>>>>> +	if (refill_requirement != n ||
>>>>>>>>>>>>>> +		(!refill_requirement && (refill_head + n >
>>>>>>>>>> mbuf_ring_size)))
>>>>>>>>>>>>>> +		return 0;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	/* First buffer to free from S/W ring is at index
>>>>>>>>>>>>>> +	 * tx_next_dd - (tx_rs_thresh-1).
>>>>>>>>>>>>>> +	 */
>>>>>>>>>>>>>> +	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
>>>>>>>>>>>>>> +	rxep = recycle_rxq_info->mbuf_ring;
>>>>>>>>>>>>>> +	rxep += refill_head;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +	if (txq->offloads &
>>>>> RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
>>>>>>>>>>>>>> +		/* Avoid txq contains buffers from unexpected
>>>>>>>>>> mempool. */
>>>>>>>>>>>>>> +		if (unlikely(recycle_rxq_info->mp
>>>>>>>>>>>>>> +					!= txep[0].mbuf-
>>>>>> pool))
>>>>>>>>>>>>>> +			return 0;
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +		/* Directly put mbufs from Tx to Rx. */
>>>>>>>>>>>>>> +		for (i = 0; i < n; i++)
>>>>>>>>>>>>>> +			rxep[i] = txep[i].mbuf;
>>>>>>>>>>>>>> +	} else {
>>>>>>>>>>>>>> +		for (i = 0; i < n; i++) {
>>>>>>>>>>>>>> +			rxep[i] =
>>>>>>>>>> rte_pktmbuf_prefree_seg(txep[i].mbuf);
>>>>>>>>>>>>>> +
>>>>>>>>>>>>>> +			/* If Tx buffers are not the last
>>>>> reference or
>>>>>>>>>> from
>>>>>>>>>>>>>> +			 * unexpected mempool, previous
>>>>> copied
>>>>>>>>>> buffers are
>>>>>>>>>>>>>> +			 * considered as invalid.
>>>>>>>>>>>>>> +			 */
>>>>>>>>>>>>>> +			if (unlikely((rxep[i] == NULL &&
>>>>>>>>>> refill_requirement) ||
>>>>>>>>>>>>> [Konstantin]
>>>>>>>>>>>>> Could you pls remind me why it is ok to have
>>>>>>>>>>>>> rxep[i]==NULL when refill_requirement is not set?
>>>>>>>>>>>>>
>>>>>>>>>>>>> If reill_requirement is not zero, it means each tx
>>>>>>>>>>>>> freed buffer must be valid and can be put into Rx sw_ring.
>>>>>>>>>>>>> Then the refill head of Rx buffer ring can be
>>>>>>>>>>>>> aligned with mbuf ring size. Briefly speaking the
>>>>>>>>>>>>> number
>>>>>>>>>>>> of Tx valid freed buffer must be equal to Rx
>> refill_requirement.
>>>>>>>>>>>> For example, i40e driver.
>>>>>>>>>>>>>
>>>>>>>>>>>>> If reill_requirement is zero, it means that the
>>>>>>>>>>>>> refill head of Rx buffer ring does not need to be
>>>>>>>>>>>>> aligned with mbuf ring size, thus if Tx have n
>>>>>>>>>>>>> valid freed buffers, we just need to put these n
>>>>>>>>>>>>> buffers into Rx
>>>>>>>>>>>>> sw-
>>>>>>>>>>>> ring, and not to be equal to the Rx setting rearm number.
>>>>>>>>>>>> For example, mlx5 driver.
>>>>>>>>>>>>>
>>>>>>>>>>>>> In conclusion, above difference is due to pmd
>>>>>>>>>>>>> drivers have different
>>>>>>>>>>>> strategies to update their Rx rearm(refill) head.
>>>>>>>>>>>>> For i40e driver, if rearm_head exceed 1024, it
>>>>>>>>>>>>> will be set as
>>>>>>>>>>>>> 0 due to  the
>>>>>>>>>>>> number of each rearm is a fixed value by default.
>>>>>>>>>>>>> For mlx5 driver. Its rearm_head can exceed 1024,
>>>>>>>>>>>>> and use mask to achieve
>>>>>>>>>>>> real index. Thus its rearm number can be a different value.
>>>>>>>>>>>>
>>>>>>>>>>>> Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf), it
>>>>>>>>>>>> means that this mbuf is not free yet and can't be reused.
>>>>>>>>>>>> Shouldn't we then set nb_recycle_mbufs = 0 in that case
>> too?
>>>>>>>>>>>> Or probably would be enough to skip that mbuf?
>>>>>>>>>>>> Might be something like that:
>>>>>>>>>>>>
>>>>>>>>>>>> for (i = 0, j = 0; i < n; i++) {
>>>>>>>>>>>>
>>>>>>>>>>>> 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
>>>>>>>>>>>> 	if (rxep[j] == NULL || recycle_rxq_info->mp !=
>>>>>>>>>>>> rxep[j].mbuf-
>>>>>>> pool))  {
>>>>>>>>>>>> 		if (refill_requirement) {
>>>>>>>>>>>> 			nb_recycle_mbufs = 0;
>>>>>>>>>>>> 			break;
>>>>>>>>>>>> 		}
>>>>>>>>>>>> 	} else
>>>>>>>>>>>> 		j++;
>>>>>>>>>>>> }
>>>>>>>>>>>>
>>>>>>>>>>>> /* now j contains actual number of recycled mbufs */
>>>>>>>>>>>>
>>>>>>>>>>>> ?
>>>>>>>>>>>>
>>>>>>>>>>>>>
>>>>>>>>>>>>>> +					recycle_rxq_info-
>>>>>> mp !=
>>>>>>>>>> txep[i].mbuf-
>>>>>>>>>>>>>>> pool))
>>>>>>>>>>>>>> +				nb_recycle_mbufs = 0;
>>>>>>>>>>>>>> +		}
>>>>>>>>>>>>>> +		/* If Tx buffers are not the last reference or
>>>>>>>>>>>>>> +		 * from unexpected mempool, all recycled
>>>>> buffers
>>>>>>>>>>>>>> +		 * are put into mempool.
>>>>>>>>>>>>>> +		 */
>>>>>>>>>>>>>> +		if (nb_recycle_mbufs == 0)
>>>>>>>>>>>>>> +			for (i = 0; i < n; i++) {
>>>>>>>>>>>>>> +				if (rxep[i] != NULL)
>>>>>>>>>>>>>> +
>>>>> 	rte_mempool_put(rxep[i]-
>>>>>>>>>>> pool,
>>>>>>>>>>>>>> rxep[i]);
>>>>>>>>>>>>>> +			}
>>>>>>>>>>>>>> +	}
>>>>>>>>>>>>>> +
>>>>>>>>>>> [Konstantin] After another thought, it might be easier
>>>>>>>>>>> and cleaner
>>>>>> just to:
>>>>>>>>>>> if (rxep[j] == NULL || recycle_rxq_info->mp != rxep[j].mbuf-
>>> pool)
>>>>>>>>>>> 	nb_recycle_mbufs = 0;
>>>>>>>>>>>
>>>>>>>>>>> Anyway, from my understanding - if
>>>>>>>>>>> rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we
>>>>>>>>>>> can't recycle
>>>>>> that mbuf.
>>>>>>>>>>>
>>>>>>>>>>> [Feifei] Agree with that
>>>>>>>>>>> 'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then
>>>>>>>>>> we can't recycle that mbuf'.
>>>>>>>>>>>
>>>>>>>>>>> Firstly, we should know for i40e driver,  the number
>>>>>>>>>>> of free mbufs is fixed, it
>>>>>>>>>> must equal to 'tx_rs_thresh'
>>>>>>>>>>> This means if we start to free Tx mbufs, it cannot be
>>>>>>>>>>> interrupted until the
>>>>>>>>>> given amount of mbufs are freed.
>>>>>>>>>>> In the meanwhile, doing prefree operation for a Tx
>>>>>>>>>>> mbuf can be looked at this mbuf is freed from this TX
>>>>>>>>>>> sw-ring if the API returns NULL. This is due
>>>>>>>>>> to that call 'prefree_seg' function means update the mbuf
>> refcnt.
>>>>>>>>>>>
>>>>>>>>>>> So let's come back our recycle mbuf case.
>>>>>>>>>>> Here we consider that the case if we have 32 mbufs
>>>>>>>>>>> need to be freed, and
>>>>>>>>>> we firstly do the pre-free.
>>>>>>>>>>> And then first 8 mbufs is good and return value is not none.
>>>>>>>>>>> But the 9th
>>>>>>>>>> mbuf is bad, its refcnt is more than 1.
>>>>>>>>>>> So we just update its refcnt and cannot put it into Rx sw-ring.
>>>>>>>>>>> For Tx sw-ring,
>>>>>>>>>> this mbuf has been freed.
>>>>>>>>>>> Then we should continue to do pre-free operation for
>>>>>>>>>>> the next Tx mbufs to ensure the given amount of mbufs are
>> freed.
>>>>>>>>>>>
>>>>>>>>>>> Do a conclusion for this, this is because if we start
>>>>>>>>>>> to do pre-free operation,  the Tx mbuf refcnt value
>>>>>>>>>>> maybe changed, so we cannot stop or
>>>>>>>>>> break until finish all the pre-free operation.
>>>>>>>>>>>
>>>>>>>>>>> Finally, in the case that Rx refill_request is not
>>>>>>>>>>> zero, but the valid mbuf amount is less than this
>>>>>>>>>>> value, we must put back this Tx mbufs into
>>>>>>>>>> mempool.
>>>>>>>>>>>
>>>>>>>>>>> Above is the reason why I do not directly jump out of
>>>>>>>>>>> the loop if some mbuf
>>>>>>>>>> return value is NULL.
>>>>>>>>>>
>>>>>>>>>> Yep, I already realized that it is a bit more
>>>>>>>>>> complicated and we need to continue with prefree() for
>>>>>>>>>> all packets even when we get NULL in
>>>>>>>> the middle.
>>>>>>>>>> Anyway the code has to be changed, right?
>>>>>>>>>>
>>>>>>>>> Sorry, I think for the code, it is unnecessary to be changed.
>>>>>>>>> For no fast free path, currently:
>>>>>>>>> 1.   We check whether each mbuf is Ok and call pre_free function
>>>>>>>>> ----------------------------------------------------------
>>>>>>>>> ----
>>>>>>>>> --
>>>>>>>>> --
>>>>>>>>> ----
>>>>>>>>> ----------------------------------------------------------
>>>>>>>>> ----
>>>>>>>>> 2.1 For the mbuf return value is not NULL, it is put into Rx sw-
>> ring.
>>>>>>>>> 2.2 For the mbuf return value is zero and refill-request,
>>>>>>>>> it will also firstly put into Rx sw-ring, and we set
>>>>>>>>> nb_recycle =
>>>>>>>>> 0
>>>>>>>>> ----------------------------------------------------------
>>>>>>>>> ----
>>>>>>>>> --
>>>>>>>>> --
>>>>>>>>> ----
>>>>>>>>> ----------------------------------------------------------
>>>>>>>>> ----
>>>>>>>>> 3.1 We check nb_recycle, if it is not 0, we will continue
>>>>>>>>> to rearm Rx descs
>>>>>>>> and update its index by call descs_refill function.
>>>>>>>>> 3.2 if nb_recycle is 0, we will put valid recycle mbufs
>>>>>>>>> back into mempool as general path. This is necessary,
>>>>>>>>> because we need to ensure the freed Tx number is
>>>>>>>>> fixed.(Some buffers return is null can be seen as freed,
>>>>>>>>> others need to be put into mempool)
>>>>>>>>>
>>>>>>>>> Or maybe I ignore something?
>>>>>>>>
>>>>>>>>
>>>>>>>> I am talking about the case when both refill_requirement and
>>>>>>>> mbuf return values iare zero:
>>>>>>>> if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???
>>>> rxep[i]
>>>>> ==
>>>>>> 0
>>>>>>>> AND refill_requirement == 0 ???
>>>>>>>>   		recycle_rxq_info->mp != txep[i].mbuf->pool))
>>>>>>>> 	nb_recycle_mbufs = 0;
>>>>>>>>
>>>>>>>> As I can read the code you treat such situation as valid,
>>>>>>>> while I think we should reset nb_recycle_mbufs to zero when
>>>>>>>> rxep[i] == NULL, no matter what value refill_requirement is.
>>>>>>>
>>>>>>> So this means for maybe MLX5 driver, its refill_request = 0.
>>>>>>> And if some mbufs return value is zero, the other mbufs can
>>>>>>> not be recycled into Rx sw-ring? Because nb_recycle=0, and
>>>>>>> they need to be put into
>>>>>> mempool.
>>>>>>>
>>>>>>> I think for such as MLX5 driver, we can allow recycle some
>>>>>>> valid mbufs into
>>>>>> Rx ring.
>>>>>>> Because no constraint for its refill number. Is my suggestion
>> reasonable?
>>>>>>
>>>>>> I suppose yes: if refill_request is zero we potentially can skip 'bad'
>>>>>> mbufs and continue with recycling for remaining ones.
>>>>>> It would probably require more changes in current code, but
>>>>>> sounds ok to me in general.
>>>>> That's Ok. Thanks for your careful reviewing.
>>>>
>>>> I'm very sorry not to receive your e-mail and until now I realize we
>>>> need to do some code change for i40e driver. Also thanks ferruh to kindly
>> remind this.
>>
>> No worries at all and thanks for you and Ferruh fro fast reaction.
>>
>>>>
>>>> Agree with you we need some operation for the case that
>>>> (refill_requirement == 0 &&  rxep[i] == 0).
>>>> Thus maybe we can do a change as follows:
>>>>
>>>> for (i = 0; i < n; i++) {
>>>> 	rxep[0] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
>>>> 	if (unlikely((rxep[0] == NULL && refill_requirement) ||
>>>> 			recycle_rxq_info->mp != txep[i].mbuf->pool))
>>>> 		nb_recycle_mbufs = 0;
>>>> 	if (likely(rxep[0]))
>>>> 		rxep++;
>>>> }
>>>>
>>>> Is above change is Ok?
>>>>
>>>
>>> Just do a change for the above code:
>>> ----------------------------------------------------------------------
>>> -------------------------------
>>> int j = 0;
>>> ......
>>>
>>>   for (i = 0; i < n; i++) {
>>>   	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
>>> 	if (unlikely(rxep[j] == NULL  && !refill_requirement))
>>>   		continue;
>>> 	if (unlikely((rxep[j] == NULL && refill_requirement) ||
>>>   			recycle_rxq_info->mp != txep[i].mbuf->pool))
>>>   		nb_recycle_mbufs  = 0;
>>> 	j++;
>>>   }
>>>
>>> if (nb_recycle_mbufs  == 0)
>>> 	for (i = 0; i < j; i++) {
>>> 		if (rxep[i] != NULL)
>>> 			rte_mempool_put(rxep[i]->pool, rxep[i]);
>>> 	}
>>
>> I think it should work...
>> One more thing, shouldn't we reset
>> nb_recycle_mbufs  = j;
>>
>> Something like:
>>   if (nb_recycle_mbufs  == 0) {
>>   	for (i = 0; i < j; i++) {
>>   		if (rxep[i] != NULL)
>>   			rte_mempool_put(rxep[i]->pool, rxep[i]);
>>   	}
>> } else
>>    nb_recycle_mbufs  = j;
>>
>> ?
>>
>>
> Yes, we should reset it.
> 
> I just do a performance test with the new code and find the performance decrease with  8%, comparing
> with the old version.
> 
> Thus I think we should come back your previous idea:
> "we should reset nb_recycle_mbufs to zero when rxep[i] == NULL,
> no matter what value refill_requirement is":
> 
> for (i = 0; i < n; i++) {
> 	rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> 	if (unlikely((rxep[i] == NULL) ||
> 			recycle_rxq_info->mp != txep[i].mbuf->pool))
> 		nb_recycle_mbufs  = 0;
> }
> 
> Thus we drop the case that putting the remaining valid mbuf into Rx sw-ring
> when no refill_request, but maintain good performance.

I am ok with that way too.
Again that way, it looks simpler and probaly less error-prone.
Thanks
Konstantin
  
Feifei Wang Sept. 25, 2023, 3:26 a.m. UTC | #17
For Konstantin

> -----Original Message-----
> From: Feifei Wang
> Sent: Saturday, September 23, 2023 1:52 PM
> To: Konstantin Ananyev <konstantin.ananyev@huawei.com>; Konstantin
> Ananyev <konstantin.v.ananyev@yandex.ru>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; Yuying Zhang <Yuying.Zhang@intel.com>; Beilei
> Xing <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>;
> nd <nd@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> 
> 
> 
> > -----Original Message-----
> > From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> > Sent: Saturday, September 23, 2023 12:41 AM
> > To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> > <konstantin.v.ananyev@yandex.ru>
> > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>;
> > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> <nd@arm.com>; nd
> > <nd@arm.com>
> > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle mode
> >
> >
> > Hi Feifei,
> >
> > > > > -----Original Message-----
> > > > > From: Feifei Wang
> > > > > Sent: Tuesday, September 5, 2023 11:11 AM
> > > > > To: Konstantin Ananyev <konstantin.ananyev@huawei.com>;
> > > > > Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>
> > > > > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > > > > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> > > > <Ruifeng.Wang@arm.com>;
> > > > > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > > > > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > > > > <nd@arm.com>; nd <nd@arm.com>; nd <nd@arm.com>
> > > > > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle
> > > > > mode
> > > > >
> > > > >
> > > > >
> > > > > > -----Original Message-----
> > > > > > From: Konstantin Ananyev <konstantin.ananyev@huawei.com>
> > > > > > Sent: Monday, September 4, 2023 6:22 PM
> > > > > > To: Feifei Wang <Feifei.Wang2@arm.com>; Konstantin Ananyev
> > > > > > <konstantin.v.ananyev@yandex.ru>
> > > > > > Cc: dev@dpdk.org; nd <nd@arm.com>; Honnappa Nagarahalli
> > > > > > <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> > > > > <Ruifeng.Wang@arm.com>;
> > > > > > Yuying Zhang <Yuying.Zhang@intel.com>; Beilei Xing
> > > > > > <beilei.xing@intel.com>; nd <nd@arm.com>; nd <nd@arm.com>; nd
> > > > > > <nd@arm.com>; nd <nd@arm.com>
> > > > > > Subject: RE: [PATCH v11 2/4] net/i40e: implement mbufs recycle
> > > > > > mode
> > > > > >
> > > > > >
> > > > > >
> > > > > > > > > > > > > > Define specific function implementation for i40e driver.
> > > > > > > > > > > > > > Currently, mbufs recycle mode can support
> > > > > > > > > > > > > > 128bit vector path and
> > > > > > > > > > > > > > avx2
> > > > > > > > > > > > path.
> > > > > > > > > > > > > > And can be enabled both in fast free and no
> > > > > > > > > > > > > > fast free
> > mode.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > Suggested-by: Honnappa Nagarahalli
> > > > > > > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > > > > Signed-off-by: Feifei Wang
> > > > > > > > > > > > > > <feifei.wang2@arm.com>
> > > > > > > > > > > > > > Reviewed-by: Ruifeng Wang
> > > > > > > > > > > > > > <ruifeng.wang@arm.com>
> > > > > > > > > > > > > > Reviewed-by: Honnappa Nagarahalli
> > > > > > > > > > <honnappa.nagarahalli@arm.com>
> > > > > > > > > > > > > > ---
> > > > > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.c                |   1 +
> > > > > > > > > > > > > >  drivers/net/i40e/i40e_ethdev.h                |   2 +
> > > > > > > > > > > > > >  .../net/i40e/i40e_recycle_mbufs_vec_common.c
> > > > > > > > > > > > > > |
> > > > > > > > > > > > > > 147
> > > > > > > > > > > > > > ++++++++++++++++++
> > > > > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.c                  |  32 ++++
> > > > > > > > > > > > > >  drivers/net/i40e/i40e_rxtx.h                  |   4 +
> > > > > > > > > > > > > >  drivers/net/i40e/meson.build                  |   1 +
> > > > > > > > > > > > > >  6 files changed, 187 insertions(+)  create
> > > > > > > > > > > > > > mode
> > > > > > > > > > > > > > 100644
> > > > > > > > > > > > > > drivers/net/i40e/i40e_recycle_mbufs_vec_common
> > > > > > > > > > > > > > .c
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.c index
> > > > > > > > > > > > > > 8271bbb394..50ba9aac94
> > > > > > > > > > > > > > 100644
> > > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.c
> > > > > > > > > > > > > > @@ -496,6 +496,7 @@ static const struct
> > > > > > > > > > > > > > eth_dev_ops i40e_eth_dev_ops
> > > > > > > > > > > > = {
> > > > > > > > > > > > > >  	.flow_ops_get                 =
> i40e_dev_flow_ops_get,
> > > > > > > > > > > > > >  	.rxq_info_get                 = i40e_rxq_info_get,
> > > > > > > > > > > > > >  	.txq_info_get                 = i40e_txq_info_get,
> > > > > > > > > > > > > > +	.recycle_rxq_info_get         =
> > > > > i40e_recycle_rxq_info_get,
> > > > > > > > > > > > > >  	.rx_burst_mode_get            =
> > > > > i40e_rx_burst_mode_get,
> > > > > > > > > > > > > >  	.tx_burst_mode_get            =
> > > > > i40e_tx_burst_mode_get,
> > > > > > > > > > > > > >  	.timesync_enable              =
> i40e_timesync_enable,
> > > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > > > b/drivers/net/i40e/i40e_ethdev.h index
> > > > > > > > > > > > > > 6f65d5e0ac..af758798e1
> > > > > > > > > > > > > > 100644
> > > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_ethdev.h
> > > > > > > > > > > > > > @@ -1355,6 +1355,8 @@ void
> > > > > > > > > > > > > > i40e_rxq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > > > > > > uint16_t
> > queue_id,
> > > > > > > > > > > > > >  	struct rte_eth_rxq_info *qinfo);  void
> > > > > > > > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev,
> > > > > > > > > > > > > > uint16_t
> > > > queue_id,
> > > > > > > > > > > > > >  	struct rte_eth_txq_info *qinfo);
> > > > > > > > > > > > > > +void i40e_recycle_rxq_info_get(struct
> > > > > > > > > > > > > > +rte_eth_dev *dev, uint16_t
> > > > > > > > > > > > queue_id,
> > > > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info
> > > > > > > > > > > > > > +*recycle_rxq_info);
> > > > > > > > > > > > > >  int i40e_rx_burst_mode_get(struct rte_eth_dev
> > > > > > > > > > > > > > *dev, uint16_t
> > > > > > > > > > queue_id,
> > > > > > > > > > > > > >  			   struct rte_eth_burst_mode
> *mode);
> > > > > int
> > > > > > > > > > > > > > i40e_tx_burst_mode_get(struct rte_eth_dev
> > > > > > > > > > > > > > *dev, uint16_t queue_id, diff -- git
> > > > > > > > > > > > > > a/drivers/net/i40e/i40e_recycle_mbufs_vec_comm
> > > > > > > > > > > > > > on
> > > > > > > > > > > > > > .c
> > > > > > > > > > > > > > b/drivers/net/i40e/i40e_recycle_mbufs_vec_comm
> > > > > > > > > > > > > > on .c new file mode 100644 index
> > > > > > > > > > > > > > 0000000000..5663ecccde
> > > > > > > > > > > > > > --- /dev/null
> > > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_
> > > > > > > > > > > > > > +++ co
> > > > > > > > > > > > > > +++ mmon
> > > > > > > > > > > > > > +++ .c
> > > > > > > > > > > > > > @@ -0,0 +1,147 @@
> > > > > > > > > > > > > > +/* SPDX-License-Identifier: BSD-3-Clause
> > > > > > > > > > > > > > + * Copyright (c) 2023 Arm Limited.
> > > > > > > > > > > > > > + */
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +#include <stdint.h> #include
> > > > > > > > > > > > > > +<ethdev_driver.h>
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +#include "base/i40e_prototype.h"
> > > > > > > > > > > > > > +#include "base/i40e_type.h"
> > > > > > > > > > > > > > +#include "i40e_ethdev.h"
> > > > > > > > > > > > > > +#include "i40e_rxtx.h"
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +#pragma GCC diagnostic ignored "-Wcast-qual"
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +void
> > > > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > > > > +*rx_queue, uint16_t
> > > > > > > > > > > > > > +nb_mbufs) {
> > > > > > > > > > > > > > +	struct i40e_rx_queue *rxq = rx_queue;
> > > > > > > > > > > > > > +	struct i40e_rx_entry *rxep;
> > > > > > > > > > > > > > +	volatile union i40e_rx_desc *rxdp;
> > > > > > > > > > > > > > +	uint16_t rx_id;
> > > > > > > > > > > > > > +	uint64_t paddr;
> > > > > > > > > > > > > > +	uint64_t dma_addr;
> > > > > > > > > > > > > > +	uint16_t i;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	rxdp = rxq->rx_ring + rxq->rxrearm_start;
> > > > > > > > > > > > > > +	rxep = &rxq->sw_ring[rxq->rxrearm_start];
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	for (i = 0; i < nb_mbufs; i++) {
> > > > > > > > > > > > > > +		/* Initialize rxdp descs. */
> > > > > > > > > > > > > > +		paddr = (rxep[i].mbuf)->buf_iova +
> > > > > > > > > > > > > > RTE_PKTMBUF_HEADROOM;
> > > > > > > > > > > > > > +		dma_addr = rte_cpu_to_le_64(paddr);
> > > > > > > > > > > > > > +		/* flush desc with pa dma_addr */
> > > > > > > > > > > > > > +		rxdp[i].read.hdr_addr = 0;
> > > > > > > > > > > > > > +		rxdp[i].read.pkt_addr = dma_addr;
> > > > > > > > > > > > > > +	}
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	/* Update the descriptor initializer index */
> > > > > > > > > > > > > > +	rxq->rxrearm_start += nb_mbufs;
> > > > > > > > > > > > > > +	rx_id = rxq->rxrearm_start - 1;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	if (unlikely(rxq->rxrearm_start >= rxq-
> >nb_rx_desc)) {
> > > > > > > > > > > > > > +		rxq->rxrearm_start = 0;
> > > > > > > > > > > > > > +		rx_id = rxq->nb_rx_desc - 1;
> > > > > > > > > > > > > > +	}
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	rxq->rxrearm_nb -= nb_mbufs;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	rte_io_wmb();
> > > > > > > > > > > > > > +	/* Update the tail pointer on the NIC */
> > > > > > > > > > > > > > +	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail,
> > > > > > > > > > > > > > +rx_id); }
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +uint16_t
> > > > > > > > > > > > > > +i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
> > > > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info
> *recycle_rxq_info) {
> > > > > > > > > > > > > > +	struct i40e_tx_queue *txq = tx_queue;
> > > > > > > > > > > > > > +	struct i40e_tx_entry *txep;
> > > > > > > > > > > > > > +	struct rte_mbuf **rxep;
> > > > > > > > > > > > > > +	int i, n;
> > > > > > > > > > > > > > +	uint16_t nb_recycle_mbufs;
> > > > > > > > > > > > > > +	uint16_t avail = 0;
> > > > > > > > > > > > > > +	uint16_t mbuf_ring_size = recycle_rxq_info-
> > > > > >mbuf_ring_size;
> > > > > > > > > > > > > > +	uint16_t mask = recycle_rxq_info-
> >mbuf_ring_size - 1;
> > > > > > > > > > > > > > +	uint16_t refill_requirement =
> > > > > > > > > > > > > > +recycle_rxq_info-
> > > > > > > > > > >refill_requirement;
> > > > > > > > > > > > > > +	uint16_t refill_head = *recycle_rxq_info-
> >refill_head;
> > > > > > > > > > > > > > +	uint16_t receive_tail =
> > > > > > > > > > > > > > +*recycle_rxq_info->receive_tail;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	/* Get available recycling Rx buffers. */
> > > > > > > > > > > > > > +	avail = (mbuf_ring_size - (refill_head -
> > > > > > > > > > > > > > +receive_tail)) & mask;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	/* Check Tx free thresh and Rx available
> space. */
> > > > > > > > > > > > > > +	if (txq->nb_tx_free > txq->tx_free_thresh ||
> > > > > > > > > > > > > > +avail <=
> > > > > > > > > > > > > > +txq-
> > > > > > > > > > >tx_rs_thresh)
> > > > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	/* check DD bits on threshold descriptor */
> > > > > > > > > > > > > > +	if
> > > > > > > > > > > > > > +((txq->tx_ring[txq->tx_next_dd].cmd_type_offs
> > > > > > > > > > > > > > +et
> > > > > > > > > > > > > > +_bsz
> > > > > > > > > > > > > > +&
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > 	rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE)
> > > > > > > > > > > > > > )
> > > > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	n = txq->tx_rs_thresh;
> > > > > > > > > > > > > > +	nb_recycle_mbufs = n;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	/* Mbufs recycle mode can only support no
> > > > > > > > > > > > > > +ring buffer
> > > > > > > > > > wrapping
> > > > > > > > > > > > > > around.
> > > > > > > > > > > > > > +	 * Two case for this:
> > > > > > > > > > > > > > +	 *
> > > > > > > > > > > > > > +	 * case 1: The refill head of Rx buffer ring
> > > > > > > > > > > > > > +needs to be aligned
> > > > > > > > > > with
> > > > > > > > > > > > > > +	 * mbuf ring size. In this case, the number
> > > > > > > > > > > > > > +of Tx
> > > > > freeing buffers
> > > > > > > > > > > > > > +	 * should be equal to refill_requirement.
> > > > > > > > > > > > > > +	 *
> > > > > > > > > > > > > > +	 * case 2: The refill head of Rx ring buffer
> > > > > > > > > > > > > > +does not need to be
> > > > > > > > > > aligned
> > > > > > > > > > > > > > +	 * with mbuf ring size. In this case, the
> > > > > > > > > > > > > > +update of refill head
> > > > > > > > > > can not
> > > > > > > > > > > > > > +	 * exceed the Rx mbuf ring size.
> > > > > > > > > > > > > > +	 */
> > > > > > > > > > > > > > +	if (refill_requirement != n ||
> > > > > > > > > > > > > > +		(!refill_requirement && (refill_head +
> n >
> > > > > > > > > > mbuf_ring_size)))
> > > > > > > > > > > > > > +		return 0;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	/* First buffer to free from S/W ring is at index
> > > > > > > > > > > > > > +	 * tx_next_dd - (tx_rs_thresh-1).
> > > > > > > > > > > > > > +	 */
> > > > > > > > > > > > > > +	txep = &txq->sw_ring[txq->tx_next_dd - (n -
> 1)];
> > > > > > > > > > > > > > +	rxep = recycle_rxq_info->mbuf_ring;
> > > > > > > > > > > > > > +	rxep += refill_head;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	if (txq->offloads &
> > > > > RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
> > > > > > > > > > > > > > +		/* Avoid txq contains buffers from
> > > > > > > > > > > > > > +unexpected
> > > > > > > > > > mempool. */
> > > > > > > > > > > > > > +		if (unlikely(recycle_rxq_info->mp
> > > > > > > > > > > > > > +					!=
> txep[0].mbuf-
> > > > > >pool))
> > > > > > > > > > > > > > +			return 0;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +		/* Directly put mbufs from Tx to Rx. */
> > > > > > > > > > > > > > +		for (i = 0; i < n; i++)
> > > > > > > > > > > > > > +			rxep[i] = txep[i].mbuf;
> > > > > > > > > > > > > > +	} else {
> > > > > > > > > > > > > > +		for (i = 0; i < n; i++) {
> > > > > > > > > > > > > > +			rxep[i] =
> > > > > > > > > > rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +			/* If Tx buffers are not the last
> > > > > reference or
> > > > > > > > > > from
> > > > > > > > > > > > > > +			 * unexpected mempool,
> previous
> > > > > copied
> > > > > > > > > > buffers are
> > > > > > > > > > > > > > +			 * considered as invalid.
> > > > > > > > > > > > > > +			 */
> > > > > > > > > > > > > > +			if (unlikely((rxep[i] == NULL
> &&
> > > > > > > > > > refill_requirement) ||
> > > > > > > > > > > > > [Konstantin]
> > > > > > > > > > > > > Could you pls remind me why it is ok to have
> > > > > > > > > > > > > rxep[i]==NULL when refill_requirement is not set?
> > > > > > > > > > > > >
> > > > > > > > > > > > > If reill_requirement is not zero, it means each
> > > > > > > > > > > > > tx freed buffer must be valid and can be put into Rx
> sw_ring.
> > > > > > > > > > > > > Then the refill head of Rx buffer ring can be
> > > > > > > > > > > > > aligned with mbuf ring size. Briefly speaking
> > > > > > > > > > > > > the number
> > > > > > > > > > > > of Tx valid freed buffer must be equal to Rx
> > refill_requirement.
> > > > > > > > > > > > For example, i40e driver.
> > > > > > > > > > > > >
> > > > > > > > > > > > > If reill_requirement is zero, it means that the
> > > > > > > > > > > > > refill head of Rx buffer ring does not need to
> > > > > > > > > > > > > be aligned with mbuf ring size, thus if Tx have
> > > > > > > > > > > > > n valid freed buffers, we just need to put these
> > > > > > > > > > > > > n buffers into Rx
> > > > > > > > > > > > > sw-
> > > > > > > > > > > > ring, and not to be equal to the Rx setting rearm number.
> > > > > > > > > > > > For example, mlx5 driver.
> > > > > > > > > > > > >
> > > > > > > > > > > > > In conclusion, above difference is due to pmd
> > > > > > > > > > > > > drivers have different
> > > > > > > > > > > > strategies to update their Rx rearm(refill) head.
> > > > > > > > > > > > > For i40e driver, if rearm_head exceed 1024, it
> > > > > > > > > > > > > will be set as
> > > > > > > > > > > > > 0 due to  the
> > > > > > > > > > > > number of each rearm is a fixed value by default.
> > > > > > > > > > > > > For mlx5 driver. Its rearm_head can exceed 1024,
> > > > > > > > > > > > > and use mask to achieve
> > > > > > > > > > > > real index. Thus its rearm number can be a different value.
> > > > > > > > > > > >
> > > > > > > > > > > > Ok, but if rte_pktmbuf_prefree_seg(txep[i].mbuf),
> > > > > > > > > > > > it means that this mbuf is not free yet and can't be
> reused.
> > > > > > > > > > > > Shouldn't we then set nb_recycle_mbufs = 0 in that
> > > > > > > > > > > > case
> > too?
> > > > > > > > > > > > Or probably would be enough to skip that mbuf?
> > > > > > > > > > > > Might be something like that:
> > > > > > > > > > > >
> > > > > > > > > > > > for (i = 0, j = 0; i < n; i++) {
> > > > > > > > > > > >
> > > > > > > > > > > > 	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > > > > > > > > > 	if (rxep[j] == NULL || recycle_rxq_info->mp !=
> > > > > > > > > > > > rxep[j].mbuf-
> > > > > > >pool))  {
> > > > > > > > > > > > 		if (refill_requirement) {
> > > > > > > > > > > > 			nb_recycle_mbufs = 0;
> > > > > > > > > > > > 			break;
> > > > > > > > > > > > 		}
> > > > > > > > > > > > 	} else
> > > > > > > > > > > > 		j++;
> > > > > > > > > > > > }
> > > > > > > > > > > >
> > > > > > > > > > > > /* now j contains actual number of recycled mbufs
> > > > > > > > > > > > */
> > > > > > > > > > > >
> > > > > > > > > > > > ?
> > > > > > > > > > > >
> > > > > > > > > > > > >
> > > > > > > > > > > > > > +
> 	recycle_rxq_info-
> > > > > >mp !=
> > > > > > > > > > txep[i].mbuf-
> > > > > > > > > > > > > > >pool))
> > > > > > > > > > > > > > +				nb_recycle_mbufs = 0;
> > > > > > > > > > > > > > +		}
> > > > > > > > > > > > > > +		/* If Tx buffers are not the last
> reference or
> > > > > > > > > > > > > > +		 * from unexpected mempool, all
> recycled
> > > > > buffers
> > > > > > > > > > > > > > +		 * are put into mempool.
> > > > > > > > > > > > > > +		 */
> > > > > > > > > > > > > > +		if (nb_recycle_mbufs == 0)
> > > > > > > > > > > > > > +			for (i = 0; i < n; i++) {
> > > > > > > > > > > > > > +				if (rxep[i] != NULL)
> > > > > > > > > > > > > > +
> > > > > 	rte_mempool_put(rxep[i]-
> > > > > > > > > > >pool,
> > > > > > > > > > > > > > rxep[i]);
> > > > > > > > > > > > > > +			}
> > > > > > > > > > > > > > +	}
> > > > > > > > > > > > > > +
> > > > > > > > > > > [Konstantin] After another thought, it might be
> > > > > > > > > > > easier and cleaner
> > > > > > just to:
> > > > > > > > > > > if (rxep[j] == NULL || recycle_rxq_info->mp !=
> > > > > > > > > > > rxep[j].mbuf-
> > >pool)
> > > > > > > > > > > 	nb_recycle_mbufs = 0;
> > > > > > > > > > >
> > > > > > > > > > > Anyway, from my understanding - if
> > > > > > > > > > > rte_pktmbuf_prefree_seg(mbuf) returns NULL, then we
> > > > > > > > > > > can't recycle
> > > > > > that mbuf.
> > > > > > > > > > >
> > > > > > > > > > > [Feifei] Agree with that
> > > > > > > > > > > 'rte_pktmbuf_prefree_seg(mbuf) returns NULL, then
> > > > > > > > > > we can't recycle that mbuf'.
> > > > > > > > > > >
> > > > > > > > > > > Firstly, we should know for i40e driver,  the number
> > > > > > > > > > > of free mbufs is fixed, it
> > > > > > > > > > must equal to 'tx_rs_thresh'
> > > > > > > > > > > This means if we start to free Tx mbufs, it cannot
> > > > > > > > > > > be interrupted until the
> > > > > > > > > > given amount of mbufs are freed.
> > > > > > > > > > > In the meanwhile, doing prefree operation for a Tx
> > > > > > > > > > > mbuf can be looked at this mbuf is freed from this
> > > > > > > > > > > TX sw-ring if the API returns NULL. This is due
> > > > > > > > > > to that call 'prefree_seg' function means update the
> > > > > > > > > > mbuf
> > refcnt.
> > > > > > > > > > >
> > > > > > > > > > > So let's come back our recycle mbuf case.
> > > > > > > > > > > Here we consider that the case if we have 32 mbufs
> > > > > > > > > > > need to be freed, and
> > > > > > > > > > we firstly do the pre-free.
> > > > > > > > > > > And then first 8 mbufs is good and return value is not none.
> > > > > > > > > > > But the 9th
> > > > > > > > > > mbuf is bad, its refcnt is more than 1.
> > > > > > > > > > > So we just update its refcnt and cannot put it into Rx sw-
> ring.
> > > > > > > > > > > For Tx sw-ring,
> > > > > > > > > > this mbuf has been freed.
> > > > > > > > > > > Then we should continue to do pre-free operation for
> > > > > > > > > > > the next Tx mbufs to ensure the given amount of
> > > > > > > > > > > mbufs are
> > freed.
> > > > > > > > > > >
> > > > > > > > > > > Do a conclusion for this, this is because if we
> > > > > > > > > > > start to do pre-free operation,  the Tx mbuf refcnt
> > > > > > > > > > > value maybe changed, so we cannot stop or
> > > > > > > > > > break until finish all the pre-free operation.
> > > > > > > > > > >
> > > > > > > > > > > Finally, in the case that Rx refill_request is not
> > > > > > > > > > > zero, but the valid mbuf amount is less than this
> > > > > > > > > > > value, we must put back this Tx mbufs into
> > > > > > > > > > mempool.
> > > > > > > > > > >
> > > > > > > > > > > Above is the reason why I do not directly jump out
> > > > > > > > > > > of the loop if some mbuf
> > > > > > > > > > return value is NULL.
> > > > > > > > > >
> > > > > > > > > > Yep, I already realized that it is a bit more
> > > > > > > > > > complicated and we need to continue with prefree() for
> > > > > > > > > > all packets even when we get NULL in
> > > > > > > > the middle.
> > > > > > > > > > Anyway the code has to be changed, right?
> > > > > > > > > >
> > > > > > > > > Sorry, I think for the code, it is unnecessary to be changed.
> > > > > > > > > For no fast free path, currently:
> > > > > > > > > 1.   We check whether each mbuf is Ok and call pre_free
> function
> > > > > > > > > --------------------------------------------------------
> > > > > > > > > --
> > > > > > > > > ----
> > > > > > > > > --
> > > > > > > > > --
> > > > > > > > > ----
> > > > > > > > > --------------------------------------------------------
> > > > > > > > > --
> > > > > > > > > ----
> > > > > > > > > 2.1 For the mbuf return value is not NULL, it is put
> > > > > > > > > into Rx sw-
> > ring.
> > > > > > > > > 2.2 For the mbuf return value is zero and
> > > > > > > > > refill-request, it will also firstly put into Rx
> > > > > > > > > sw-ring, and we set nb_recycle =
> > > > > > > > > 0
> > > > > > > > > --------------------------------------------------------
> > > > > > > > > --
> > > > > > > > > ----
> > > > > > > > > --
> > > > > > > > > --
> > > > > > > > > ----
> > > > > > > > > --------------------------------------------------------
> > > > > > > > > --
> > > > > > > > > ----
> > > > > > > > > 3.1 We check nb_recycle, if it is not 0, we will
> > > > > > > > > continue to rearm Rx descs
> > > > > > > > and update its index by call descs_refill function.
> > > > > > > > > 3.2 if nb_recycle is 0, we will put valid recycle mbufs
> > > > > > > > > back into mempool as general path. This is necessary,
> > > > > > > > > because we need to ensure the freed Tx number is
> > > > > > > > > fixed.(Some buffers return is null can be seen as freed,
> > > > > > > > > others need to be put into mempool)
> > > > > > > > >
> > > > > > > > > Or maybe I ignore something?
> > > > > > > >
> > > > > > > >
> > > > > > > > I am talking about the case when both refill_requirement
> > > > > > > > and mbuf return values iare zero:
> > > > > > > > if (unlikely((rxep[i] == NULL && refill_requirement) ||         // ???
> > > > rxep[i]
> > > > > ==
> > > > > > 0
> > > > > > > > AND refill_requirement == 0 ???
> > > > > > > >  		recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > > > > > > 	nb_recycle_mbufs = 0;
> > > > > > > >
> > > > > > > > As I can read the code you treat such situation as valid,
> > > > > > > > while I think we should reset nb_recycle_mbufs to zero
> > > > > > > > when rxep[i] == NULL, no matter what value refill_requirement is.
> > > > > > >
> > > > > > > So this means for maybe MLX5 driver, its refill_request = 0.
> > > > > > > And if some mbufs return value is zero, the other mbufs can
> > > > > > > not be recycled into Rx sw-ring? Because nb_recycle=0, and
> > > > > > > they need to be put into
> > > > > > mempool.
> > > > > > >
> > > > > > > I think for such as MLX5 driver, we can allow recycle some
> > > > > > > valid mbufs into
> > > > > > Rx ring.
> > > > > > > Because no constraint for its refill number. Is my
> > > > > > > suggestion
> > reasonable?
> > > > > >
> > > > > > I suppose yes: if refill_request is zero we potentially can skip 'bad'
> > > > > > mbufs and continue with recycling for remaining ones.
> > > > > > It would probably require more changes in current code, but
> > > > > > sounds ok to me in general.
> > > > > That's Ok. Thanks for your careful reviewing.
> > > >
> > > > I'm very sorry not to receive your e-mail and until now I realize
> > > > we need to do some code change for i40e driver. Also thanks ferruh
> > > > to kindly
> > remind this.
> >
> > No worries at all and thanks for you and Ferruh fro fast reaction.
> >
> > > >
> > > > Agree with you we need some operation for the case that
> > > > (refill_requirement == 0 &&  rxep[i] == 0).
> > > > Thus maybe we can do a change as follows:
> > > >
> > > > for (i = 0; i < n; i++) {
> > > > 	rxep[0] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > > 	if (unlikely((rxep[0] == NULL && refill_requirement) ||
> > > > 			recycle_rxq_info->mp != txep[i].mbuf->pool))
> > > > 		nb_recycle_mbufs = 0;
> > > > 	if (likely(rxep[0]))
> > > > 		rxep++;
> > > > }
> > > >
> > > > Is above change is Ok?
> > > >
> > >
> > > Just do a change for the above code:
> > > --------------------------------------------------------------------
> > > --
> > > -------------------------------
> > > int j = 0;
> > > ......
> > >
> > >  for (i = 0; i < n; i++) {
> > >  	rxep[j] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> > > 	if (unlikely(rxep[j] == NULL  && !refill_requirement))
> > >  		continue;
> > > 	if (unlikely((rxep[j] == NULL && refill_requirement) ||
> > >  			recycle_rxq_info->mp != txep[i].mbuf->pool))
> > >  		nb_recycle_mbufs  = 0;
> > > 	j++;
> > >  }
> > >
> > > if (nb_recycle_mbufs  == 0)
> > > 	for (i = 0; i < j; i++) {
> > > 		if (rxep[i] != NULL)
> > > 			rte_mempool_put(rxep[i]->pool, rxep[i]);
> > > 	}
> >
> > I think it should work...
> > One more thing, shouldn't we reset
> > nb_recycle_mbufs  = j;
> >
> > Something like:
> >  if (nb_recycle_mbufs  == 0) {
> >  	for (i = 0; i < j; i++) {
> >  		if (rxep[i] != NULL)
> >  			rte_mempool_put(rxep[i]->pool, rxep[i]);
> >  	}
> > } else
> >   nb_recycle_mbufs  = j;
> >
> > ?
> >
> >
> Yes, we should reset it.
> 
> I just do a performance test with the new code and find the performance
> decrease with  8%, comparing with the old version.
> 
> Thus I think we should come back your previous idea:
> "we should reset nb_recycle_mbufs to zero when rxep[i] == NULL, no matter
> what value refill_requirement is":
> 
> for (i = 0; i < n; i++) {
> 	rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
> 	if (unlikely((rxep[i] == NULL) ||
> 			recycle_rxq_info->mp != txep[i].mbuf->pool))
> 		nb_recycle_mbufs  = 0;
> }
> 
> Thus we drop the case that putting the remaining valid mbuf into Rx sw-ring
> when no refill_request, but maintain good performance.

[Konstantin] I am ok with that way too.
Again that way, it looks simpler and probaly less error-prone.

Thanks
Konstantin
[Feifei] Agree with this, I will update a new version to the community.
> 
> >
> >
> >
> > > --------------------------------------------------------------------
> > > --
> > > -----------------------------------
> 
> > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > > > +	/* Update counters for Tx. */
> > > > > > > > > > > > > > +	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free
> > > > > > > > > > > > > > ++
> > > > > > > > > > > > > > +txq-
> > > > > > > > > > >tx_rs_thresh);
> > > > > > > > > > > > > > +	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd
> > > > > > > > > > > > > > ++
> > > > > > > > > > > > > > +txq-
> > > > > > > > > > >tx_rs_thresh);
> > > > > > > > > > > > > > +	if (txq->tx_next_dd >= txq->nb_tx_desc)
> > > > > > > > > > > > > > +		txq->tx_next_dd = (uint16_t)(txq-
> > > > > >tx_rs_thresh -
> > > > > > > > > > > > > > +1);
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	return nb_recycle_mbufs; }
> > > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.c index
> > > > > > > > > > > > > > b4f65b58fa..a9c9eb331c
> > > > > > > > > > > > > > 100644
> > > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.c
> > > > > > > > > > > > > > @@ -3199,6 +3199,30 @@
> > > > > > > > > > > > > > i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t
> queue_id,
> > > > > > > > > > > > > >  	qinfo->conf.offloads = txq->offloads;  }
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > +void
> > > > > > > > > > > > > > +i40e_recycle_rxq_info_get(struct rte_eth_dev
> > > > > > > > > > > > > > +*dev, uint16_t
> > > > > > > > > > queue_id,
> > > > > > > > > > > > > > +	struct rte_eth_recycle_rxq_info
> *recycle_rxq_info) {
> > > > > > > > > > > > > > +	struct i40e_rx_queue *rxq;
> > > > > > > > > > > > > > +	struct i40e_adapter *ad =
> > > > > > > > > > > > > > +		I40E_DEV_PRIVATE_TO_ADAPTER(dev-
> >data-
> > > > > > > > > > >dev_private);
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	rxq = dev->data->rx_queues[queue_id];
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	recycle_rxq_info->mbuf_ring = (void *)rxq-
> >sw_ring;
> > > > > > > > > > > > > > +	recycle_rxq_info->mp = rxq->mp;
> > > > > > > > > > > > > > +	recycle_rxq_info->mbuf_ring_size = rxq-
> >nb_rx_desc;
> > > > > > > > > > > > > > +	recycle_rxq_info->receive_tail =
> > > > > > > > > > > > > > +&rxq->rx_tail;
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > > +	if (ad->rx_vec_allowed) {
> > > > > > > > > > > > > > +		recycle_rxq_info->refill_requirement =
> > > > > > > > > > > > > > RTE_I40E_RXQ_REARM_THRESH;
> > > > > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > > > > >rxrearm_start;
> > > > > > > > > > > > > > +	} else {
> > > > > > > > > > > > > > +		recycle_rxq_info->refill_requirement =
> rxq-
> > > > > > > > > > >rx_free_thresh;
> > > > > > > > > > > > > > +		recycle_rxq_info->refill_head = &rxq-
> > > > > >rx_free_trigger;
> > > > > > > > > > > > > > +	}
> > > > > > > > > > > > > > +}
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >  #ifdef RTE_ARCH_X86  static inline bool
> > > > > > > > > > > > > > get_avx_supported(bool request_avx512) @@
> > > > > > > > > > > > > > -3293,6
> > > > > > > > > > > > > > +3317,8
> > > > > > > > @@
> > > > > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev *dev)
> > > > > > > > > > > > > >  				dev->rx_pkt_burst =
> ad-
> > > > > > > > >rx_use_avx2 ?
> > > > > > > > > > > > > >
> > > > > > > > 	i40e_recv_scattered_pkts_vec_avx2 :
> > > > > > > > > > > > > >
> > > > > 	i40e_recv_scattered_pkts_vec;
> > > > > > > > > > > > > > +				dev-
> > > > > >recycle_rx_descriptors_refill =
> > > > > > > > > > > > > > +
> > > > > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > > > >  			}
> > > > > > > > > > > > > >  		} else {
> > > > > > > > > > > > > >  			if (ad->rx_use_avx512) { @@ -
> 3311,9
> > > > > +3337,12
> > > > > > @@
> > > > > > > > > > > > > > i40e_set_rx_function(struct rte_eth_dev
> > > > > > > > > > *dev)
> > > > > > > > > > > > > >  				dev->rx_pkt_burst =
> ad-
> > > > > > > > >rx_use_avx2 ?
> > > > > > > > > > > > > >
> > > > > 	i40e_recv_pkts_vec_avx2 :
> > > > > > > > > > > > > >
> 	i40e_recv_pkts_vec;
> > > > > > > > > > > > > > +				dev-
> > > > > >recycle_rx_descriptors_refill =
> > > > > > > > > > > > > > +
> > > > > > > > > > 	i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > > > >  			}
> > > > > > > > > > > > > >  		}
> > > > > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > > > > > +		dev->recycle_rx_descriptors_refill =
> > > > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec;
> > > > > > > > > > > > > >  		if (dev->data->scattered_rx) {
> > > > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> > > > > > > > > > > > > >  				     "Using Vector
> Scattered Rx
> > > > > > > > (port %d).", @@
> > > > > > > > > > > > > > -3481,15 +3510,18 @@
> > > > > > > > > > > > > > i40e_set_tx_function(struct rte_eth_dev
> > > > > > > > *dev)
> > > > > > > > > > > > > >  				dev->tx_pkt_burst =
> ad-
> > > > > > > > >tx_use_avx2 ?
> > > > > > > > > > > > > >
> > > > > > > > i40e_xmit_pkts_vec_avx2 :
> > > > > > > > > > > > > >
> > > > > > > > i40e_xmit_pkts_vec;
> > > > > > > > > > > > > > +				dev-
> >recycle_tx_mbufs_reuse
> > > > > =
> > > > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > > > > >  			}
> > > > > > > > > > > > > >  #else /* RTE_ARCH_X86 */
> > > > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> "Using Vector
> > > > > Tx
> > > > > > > > (port %d).",
> > > > > > > > > > > > > >  				     dev->data-
> >port_id);
> > > > > > > > > > > > > >  			dev->tx_pkt_burst =
> > > > > i40e_xmit_pkts_vec;
> > > > > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse
> =
> > > > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;  #endif /*
> > > > > > > > > > > > > > RTE_ARCH_X86
> > > > > */
> > > > > > > > > > > > > >  		} else {
> > > > > > > > > > > > > >  			PMD_INIT_LOG(DEBUG,
> "Simple tx
> > > > > finally be
> > > > > > > > used.");
> > > > > > > > > > > > > >  			dev->tx_pkt_burst =
> > > > > i40e_xmit_pkts_simple;
> > > > > > > > > > > > > > +			dev->recycle_tx_mbufs_reuse
> =
> > > > > > > > > > > > > > i40e_recycle_tx_mbufs_reuse_vec;
> > > > > > > > > > > > > >  		}
> > > > > > > > > > > > > >  		dev->tx_pkt_prepare =
> > > > > i40e_simple_prep_pkts;
> > > > > > > > > > > > > >  	} else {
> > > > > > > > > > > > > > diff --git a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > > > b/drivers/net/i40e/i40e_rxtx.h index
> > > > > > > > > > > > > > a8686224e5..b191f23e1f
> > > > > > > > > > > > > > 100644
> > > > > > > > > > > > > > --- a/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > > > +++ b/drivers/net/i40e/i40e_rxtx.h
> > > > > > > > > > > > > > @@ -236,6 +236,10 @@ uint32_t
> > > > > > > > > > > > > > i40e_dev_rx_queue_count(void *rx_queue);  int
> > > > > > > > > > > > > > i40e_dev_rx_descriptor_status(void *rx_queue,
> > > > > > > > > > > > > > uint16_t offset);  int
> > > > > > > > > > > > > > i40e_dev_tx_descriptor_status(void
> > > > > > > > > > > > > > *tx_queue, uint16_t offset);
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > +uint16_t i40e_recycle_tx_mbufs_reuse_vec(void
> > > > *tx_queue,
> > > > > > > > > > > > > > +		struct rte_eth_recycle_rxq_info
> > > > > *recycle_rxq_info);
> > > > > > > > > > void
> > > > > > > > > > > > > > +i40e_recycle_rx_descriptors_refill_vec(void
> > > > > > > > > > > > > > +*rx_queue, uint16_t nb_mbufs);
> > > > > > > > > > > > > > +
> > > > > > > > > > > > > >  uint16_t i40e_recv_pkts_vec(void *rx_queue,
> > > > > > > > > > > > > > struct rte_mbuf
> > > > > > > > > > **rx_pkts,
> > > > > > > > > > > > > >  			    uint16_t nb_pkts);  uint16_t
> > > > > > > > > > > > > > i40e_recv_scattered_pkts_vec(void *rx_queue,
> > > > > > > > > > > > > > diff --git a/drivers/net/i40e/meson.build
> > > > > > > > > > > > > > b/drivers/net/i40e/meson.build index
> > > > > > > > > > > > > > 8e53b87a65..3b1a233c84 100644
> > > > > > > > > > > > > > --- a/drivers/net/i40e/meson.build
> > > > > > > > > > > > > > +++ b/drivers/net/i40e/meson.build
> > > > > > > > > > > > > > @@ -34,6 +34,7 @@ sources = files(
> > > > > > > > > > > > > >          'i40e_tm.c',
> > > > > > > > > > > > > >          'i40e_hash.c',
> > > > > > > > > > > > > >          'i40e_vf_representor.c',
> > > > > > > > > > > > > > +	'i40e_recycle_mbufs_vec_common.c',
> > > > > > > > > > > > > >          'rte_pmd_i40e.c',
> > > > > > > > > > > > > >  )
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > --
> > > > > > > > > > > > > > 2.25.1
  

Patch

diff --git a/drivers/net/i40e/i40e_ethdev.c b/drivers/net/i40e/i40e_ethdev.c
index 8271bbb394..50ba9aac94 100644
--- a/drivers/net/i40e/i40e_ethdev.c
+++ b/drivers/net/i40e/i40e_ethdev.c
@@ -496,6 +496,7 @@  static const struct eth_dev_ops i40e_eth_dev_ops = {
 	.flow_ops_get                 = i40e_dev_flow_ops_get,
 	.rxq_info_get                 = i40e_rxq_info_get,
 	.txq_info_get                 = i40e_txq_info_get,
+	.recycle_rxq_info_get         = i40e_recycle_rxq_info_get,
 	.rx_burst_mode_get            = i40e_rx_burst_mode_get,
 	.tx_burst_mode_get            = i40e_tx_burst_mode_get,
 	.timesync_enable              = i40e_timesync_enable,
diff --git a/drivers/net/i40e/i40e_ethdev.h b/drivers/net/i40e/i40e_ethdev.h
index 6f65d5e0ac..af758798e1 100644
--- a/drivers/net/i40e/i40e_ethdev.h
+++ b/drivers/net/i40e/i40e_ethdev.h
@@ -1355,6 +1355,8 @@  void i40e_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_rxq_info *qinfo);
 void i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo);
+void i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info);
 int i40e_rx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
 			   struct rte_eth_burst_mode *mode);
 int i40e_tx_burst_mode_get(struct rte_eth_dev *dev, uint16_t queue_id,
diff --git a/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
new file mode 100644
index 0000000000..5663ecccde
--- /dev/null
+++ b/drivers/net/i40e/i40e_recycle_mbufs_vec_common.c
@@ -0,0 +1,147 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright (c) 2023 Arm Limited.
+ */
+
+#include <stdint.h>
+#include <ethdev_driver.h>
+
+#include "base/i40e_prototype.h"
+#include "base/i40e_type.h"
+#include "i40e_ethdev.h"
+#include "i40e_rxtx.h"
+
+#pragma GCC diagnostic ignored "-Wcast-qual"
+
+void
+i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs)
+{
+	struct i40e_rx_queue *rxq = rx_queue;
+	struct i40e_rx_entry *rxep;
+	volatile union i40e_rx_desc *rxdp;
+	uint16_t rx_id;
+	uint64_t paddr;
+	uint64_t dma_addr;
+	uint16_t i;
+
+	rxdp = rxq->rx_ring + rxq->rxrearm_start;
+	rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+	for (i = 0; i < nb_mbufs; i++) {
+		/* Initialize rxdp descs. */
+		paddr = (rxep[i].mbuf)->buf_iova + RTE_PKTMBUF_HEADROOM;
+		dma_addr = rte_cpu_to_le_64(paddr);
+		/* flush desc with pa dma_addr */
+		rxdp[i].read.hdr_addr = 0;
+		rxdp[i].read.pkt_addr = dma_addr;
+	}
+
+	/* Update the descriptor initializer index */
+	rxq->rxrearm_start += nb_mbufs;
+	rx_id = rxq->rxrearm_start - 1;
+
+	if (unlikely(rxq->rxrearm_start >= rxq->nb_rx_desc)) {
+		rxq->rxrearm_start = 0;
+		rx_id = rxq->nb_rx_desc - 1;
+	}
+
+	rxq->rxrearm_nb -= nb_mbufs;
+
+	rte_io_wmb();
+	/* Update the tail pointer on the NIC */
+	I40E_PCI_REG_WRITE_RELAXED(rxq->qrx_tail, rx_id);
+}
+
+uint16_t
+i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct i40e_tx_queue *txq = tx_queue;
+	struct i40e_tx_entry *txep;
+	struct rte_mbuf **rxep;
+	int i, n;
+	uint16_t nb_recycle_mbufs;
+	uint16_t avail = 0;
+	uint16_t mbuf_ring_size = recycle_rxq_info->mbuf_ring_size;
+	uint16_t mask = recycle_rxq_info->mbuf_ring_size - 1;
+	uint16_t refill_requirement = recycle_rxq_info->refill_requirement;
+	uint16_t refill_head = *recycle_rxq_info->refill_head;
+	uint16_t receive_tail = *recycle_rxq_info->receive_tail;
+
+	/* Get available recycling Rx buffers. */
+	avail = (mbuf_ring_size - (refill_head - receive_tail)) & mask;
+
+	/* Check Tx free thresh and Rx available space. */
+	if (txq->nb_tx_free > txq->tx_free_thresh || avail <= txq->tx_rs_thresh)
+		return 0;
+
+	/* check DD bits on threshold descriptor */
+	if ((txq->tx_ring[txq->tx_next_dd].cmd_type_offset_bsz &
+				rte_cpu_to_le_64(I40E_TXD_QW1_DTYPE_MASK)) !=
+			rte_cpu_to_le_64(I40E_TX_DESC_DTYPE_DESC_DONE))
+		return 0;
+
+	n = txq->tx_rs_thresh;
+	nb_recycle_mbufs = n;
+
+	/* Mbufs recycle mode can only support no ring buffer wrapping around.
+	 * Two case for this:
+	 *
+	 * case 1: The refill head of Rx buffer ring needs to be aligned with
+	 * mbuf ring size. In this case, the number of Tx freeing buffers
+	 * should be equal to refill_requirement.
+	 *
+	 * case 2: The refill head of Rx ring buffer does not need to be aligned
+	 * with mbuf ring size. In this case, the update of refill head can not
+	 * exceed the Rx mbuf ring size.
+	 */
+	if (refill_requirement != n ||
+		(!refill_requirement && (refill_head + n > mbuf_ring_size)))
+		return 0;
+
+	/* First buffer to free from S/W ring is at index
+	 * tx_next_dd - (tx_rs_thresh-1).
+	 */
+	txep = &txq->sw_ring[txq->tx_next_dd - (n - 1)];
+	rxep = recycle_rxq_info->mbuf_ring;
+	rxep += refill_head;
+
+	if (txq->offloads & RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE) {
+		/* Avoid txq contains buffers from unexpected mempool. */
+		if (unlikely(recycle_rxq_info->mp
+					!= txep[0].mbuf->pool))
+			return 0;
+
+		/* Directly put mbufs from Tx to Rx. */
+		for (i = 0; i < n; i++)
+			rxep[i] = txep[i].mbuf;
+	} else {
+		for (i = 0; i < n; i++) {
+			rxep[i] = rte_pktmbuf_prefree_seg(txep[i].mbuf);
+
+			/* If Tx buffers are not the last reference or from
+			 * unexpected mempool, previous copied buffers are
+			 * considered as invalid.
+			 */
+			if (unlikely((rxep[i] == NULL && refill_requirement) ||
+					recycle_rxq_info->mp != txep[i].mbuf->pool))
+				nb_recycle_mbufs = 0;
+		}
+		/* If Tx buffers are not the last reference or
+		 * from unexpected mempool, all recycled buffers
+		 * are put into mempool.
+		 */
+		if (nb_recycle_mbufs == 0)
+			for (i = 0; i < n; i++) {
+				if (rxep[i] != NULL)
+					rte_mempool_put(rxep[i]->pool, rxep[i]);
+			}
+	}
+
+	/* Update counters for Tx. */
+	txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
+	txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);
+	if (txq->tx_next_dd >= txq->nb_tx_desc)
+		txq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);
+
+	return nb_recycle_mbufs;
+}
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index b4f65b58fa..a9c9eb331c 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -3199,6 +3199,30 @@  i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 	qinfo->conf.offloads = txq->offloads;
 }
 
+void
+i40e_recycle_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
+	struct rte_eth_recycle_rxq_info *recycle_rxq_info)
+{
+	struct i40e_rx_queue *rxq;
+	struct i40e_adapter *ad =
+		I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
+
+	rxq = dev->data->rx_queues[queue_id];
+
+	recycle_rxq_info->mbuf_ring = (void *)rxq->sw_ring;
+	recycle_rxq_info->mp = rxq->mp;
+	recycle_rxq_info->mbuf_ring_size = rxq->nb_rx_desc;
+	recycle_rxq_info->receive_tail = &rxq->rx_tail;
+
+	if (ad->rx_vec_allowed) {
+		recycle_rxq_info->refill_requirement = RTE_I40E_RXQ_REARM_THRESH;
+		recycle_rxq_info->refill_head = &rxq->rxrearm_start;
+	} else {
+		recycle_rxq_info->refill_requirement = rxq->rx_free_thresh;
+		recycle_rxq_info->refill_head = &rxq->rx_free_trigger;
+	}
+}
+
 #ifdef RTE_ARCH_X86
 static inline bool
 get_avx_supported(bool request_avx512)
@@ -3293,6 +3317,8 @@  i40e_set_rx_function(struct rte_eth_dev *dev)
 				dev->rx_pkt_burst = ad->rx_use_avx2 ?
 					i40e_recv_scattered_pkts_vec_avx2 :
 					i40e_recv_scattered_pkts_vec;
+				dev->recycle_rx_descriptors_refill =
+					i40e_recycle_rx_descriptors_refill_vec;
 			}
 		} else {
 			if (ad->rx_use_avx512) {
@@ -3311,9 +3337,12 @@  i40e_set_rx_function(struct rte_eth_dev *dev)
 				dev->rx_pkt_burst = ad->rx_use_avx2 ?
 					i40e_recv_pkts_vec_avx2 :
 					i40e_recv_pkts_vec;
+				dev->recycle_rx_descriptors_refill =
+					i40e_recycle_rx_descriptors_refill_vec;
 			}
 		}
 #else /* RTE_ARCH_X86 */
+		dev->recycle_rx_descriptors_refill = i40e_recycle_rx_descriptors_refill_vec;
 		if (dev->data->scattered_rx) {
 			PMD_INIT_LOG(DEBUG,
 				     "Using Vector Scattered Rx (port %d).",
@@ -3481,15 +3510,18 @@  i40e_set_tx_function(struct rte_eth_dev *dev)
 				dev->tx_pkt_burst = ad->tx_use_avx2 ?
 						    i40e_xmit_pkts_vec_avx2 :
 						    i40e_xmit_pkts_vec;
+				dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 			}
 #else /* RTE_ARCH_X86 */
 			PMD_INIT_LOG(DEBUG, "Using Vector Tx (port %d).",
 				     dev->data->port_id);
 			dev->tx_pkt_burst = i40e_xmit_pkts_vec;
+			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 #endif /* RTE_ARCH_X86 */
 		} else {
 			PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
 			dev->tx_pkt_burst = i40e_xmit_pkts_simple;
+			dev->recycle_tx_mbufs_reuse = i40e_recycle_tx_mbufs_reuse_vec;
 		}
 		dev->tx_pkt_prepare = i40e_simple_prep_pkts;
 	} else {
diff --git a/drivers/net/i40e/i40e_rxtx.h b/drivers/net/i40e/i40e_rxtx.h
index a8686224e5..b191f23e1f 100644
--- a/drivers/net/i40e/i40e_rxtx.h
+++ b/drivers/net/i40e/i40e_rxtx.h
@@ -236,6 +236,10 @@  uint32_t i40e_dev_rx_queue_count(void *rx_queue);
 int i40e_dev_rx_descriptor_status(void *rx_queue, uint16_t offset);
 int i40e_dev_tx_descriptor_status(void *tx_queue, uint16_t offset);
 
+uint16_t i40e_recycle_tx_mbufs_reuse_vec(void *tx_queue,
+		struct rte_eth_recycle_rxq_info *recycle_rxq_info);
+void i40e_recycle_rx_descriptors_refill_vec(void *rx_queue, uint16_t nb_mbufs);
+
 uint16_t i40e_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
 			    uint16_t nb_pkts);
 uint16_t i40e_recv_scattered_pkts_vec(void *rx_queue,
diff --git a/drivers/net/i40e/meson.build b/drivers/net/i40e/meson.build
index 8e53b87a65..3b1a233c84 100644
--- a/drivers/net/i40e/meson.build
+++ b/drivers/net/i40e/meson.build
@@ -34,6 +34,7 @@  sources = files(
         'i40e_tm.c',
         'i40e_hash.c',
         'i40e_vf_representor.c',
+	'i40e_recycle_mbufs_vec_common.c',
         'rte_pmd_i40e.c',
 )