[v5,1/3] ethdev: add API for buffer recycle mode

Message ID 20230330062939.1206267-2-feifei.wang2@arm.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series Recycle buffers from Tx to Rx |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Feifei Wang March 30, 2023, 6:29 a.m. UTC
  There are 4 upper APIs for buffer recycle mode:
1. 'rte_eth_rx_queue_buf_recycle_info_get'
This is to retrieve buffer ring information about given ports's Rx
queue in buffer recycle mode. And due to this, buffer recycle can
be no longer limited to the same driver in Rx and Tx.

2. 'rte_eth_dev_buf_recycle'
Users can call this API to enable buffer recycle mode in data path.
There are 2 internal APIs in it, which is separately for Rx and TX.

3. 'rte_eth_tx_buf_stash'
Internal API for buffer recycle mode. This is to stash Tx used
buffers into Rx buffer ring.

4. 'rte_eth_rx_descriptors_refill'
Internal API for buffer recycle mode. This is to refill Rx
descriptors.

Above all APIs are just implemented at the upper level.
For different APIs, we need to define specific functions separately.

Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
---
 lib/ethdev/ethdev_driver.h   |  10 ++
 lib/ethdev/ethdev_private.c  |   2 +
 lib/ethdev/rte_ethdev.c      |  33 +++++
 lib/ethdev/rte_ethdev.h      | 230 +++++++++++++++++++++++++++++++++++
 lib/ethdev/rte_ethdev_core.h |  15 ++-
 lib/ethdev/version.map       |   6 +
 6 files changed, 294 insertions(+), 2 deletions(-)
  

Comments

Morten Brørup March 30, 2023, 7:19 a.m. UTC | #1
> From: Feifei Wang [mailto:feifei.wang2@arm.com]
> Sent: Thursday, 30 March 2023 08.30
> 

[...]

> +/**
> + * @internal
> + * Rx routine for rte_eth_dev_buf_recycle().
> + * Refill Rx descriptors in buffer recycle mode.
> + *
> + * @note
> + * This API can only be called by rte_eth_dev_buf_recycle().
> + * Before calling this API, rte_eth_tx_buf_stash() should be
> + * called to stash Tx used buffers into Rx buffer ring.
> + *
> + * When this functionality is not implemented in the driver, the return
> + * buffer number is 0.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param queue_id
> + *   The index of the receive queue.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + *@param nb
> + *   The number of Rx descriptors to be refilled.
> + * @return
> + *   The number Rx descriptors correct to be refilled.
> + *   - ENODEV: bad port or queue (only if compiled with debug).

If you want errors reported by the return value, the function return type cannot be uint16_t.

> + */
> +static inline uint16_t rte_eth_rx_descriptors_refill(uint16_t port_id,
> +		uint16_t queue_id, uint16_t nb)
> +{
> +	struct rte_eth_fp_ops *p;
> +	void *qd;
> +
> +#ifdef RTE_ETHDEV_DEBUG_RX
> +	if (port_id >= RTE_MAX_ETHPORTS ||
> +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Invalid port_id=%u or queue_id=%u\n",
> +			port_id, queue_id);
> +		rte_errno = ENODEV;
> +		return 0;

If p->rx_descriptors_refill() is likely to return 0, this function should not use 0 as return value to indicate errors.

> +	}
> +#endif
> +
> +	p = &rte_eth_fp_ops[port_id];
> +	qd = p->rxq.data[queue_id];
> +
> +#ifdef RTE_ETHDEV_DEBUG_RX
> +	if (!rte_eth_dev_is_valid_port(port_id)) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx port_id=%u\n", port_id);
> +		rte_errno = ENODEV;
> +		return 0;
> +
> +	if (qd == NULL) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
> +			queue_id, port_id);
> +		rte_errno = ENODEV;
> +		return 0;
> +	}
> +#endif
> +
> +	if (p->rx_descriptors_refill == NULL)
> +		return 0;
> +
> +	return p->rx_descriptors_refill(qd, nb);
> +}
> +
>  /**@{@name Rx hardware descriptor states
>   * @see rte_eth_rx_descriptor_status
>   */
> @@ -6483,6 +6597,122 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
>  	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
>  }
> 
> +/**
> + * @internal
> + * Tx routine for rte_eth_dev_buf_recycle().
> + * Stash Tx used buffers into Rx buffer ring in buffer recycle mode.
> + *
> + * @note
> + * This API can only be called by rte_eth_dev_buf_recycle().
> + * After calling this API, rte_eth_rx_descriptors_refill() should be
> + * called to refill Rx ring descriptors.
> + *
> + * When this functionality is not implemented in the driver, the return
> + * buffer number is 0.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param queue_id
> + *   The index of the transmit queue.
> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param rxq_buf_recycle_info
> + *   A pointer to a structure of Rx queue buffer ring information in buffer
> + *   recycle mode.
> + *
> + * @return
> + *   The number buffers correct to be filled in the Rx buffer ring.
> + *   - ENODEV: bad port or queue (only if compiled with debug).

If you want errors reported by the return value, the function return type cannot be uint16_t.

> + */
> +static inline uint16_t rte_eth_tx_buf_stash(uint16_t port_id, uint16_t
> queue_id,
> +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> +{
> +	struct rte_eth_fp_ops *p;
> +	void *qd;
> +
> +#ifdef RTE_ETHDEV_DEBUG_TX
> +	if (port_id >= RTE_MAX_ETHPORTS ||
> +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> +		RTE_ETHDEV_LOG(ERR,
> +			"Invalid port_id=%u or queue_id=%u\n",
> +			port_id, queue_id);
> +		rte_errno = ENODEV;
> +		return 0;

If p->tx_buf_stash() is likely to return 0, this function should not use 0 as return value to indicate errors.

> +	}
> +#endif
> +
> +	p = &rte_eth_fp_ops[port_id];
> +	qd = p->txq.data[queue_id];
> +
> +#ifdef RTE_ETHDEV_DEBUG_TX
> +	if (!rte_eth_dev_is_valid_port(port_id)) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx port_id=%u\n", port_id);
> +		rte_errno = ENODEV;
> +		return 0;
> +
> +	if (qd == NULL) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
> +			queue_id, port_id);
> +		rte_erno = ENODEV;
> +		return 0;
> +	}
> +#endif
> +
> +	if (p->tx_buf_stash == NULL)
> +		return 0;
> +
> +	return p->tx_buf_stash(qd, rxq_buf_recycle_info);
> +}
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Buffer recycle mode can let Tx queue directly put used buffers into Rx
> buffer
> + * ring. This avoids freeing buffers into mempool and allocating buffers from
> + * mempool.

This function description generally describes the buffer recycle mode.

Please update it to describe what this specific function does.

> + *
> + * @param rx_port_id
> + *   Port identifying the receive side.
> + * @param rx_queue_id
> + *   The index of the receive queue identifying the receive side.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param tx_port_id
> + *   Port identifying the transmit side.
> + * @param tx_queue_id
> + *   The index of the transmit queue identifying the transmit side.
> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param rxq_recycle_info
> + *   A pointer to a structure of type *rte_eth_txq_rearm_data* to be filled.
> + * @return
> + *   - (0) on success or no recycling buffer.

Why not return the return value of rte_eth_rx_descriptors_refill() instead of 0 on success? (This is a question, not a suggestion.)

Or, if rxq_buf_recycle_info must be valid, the function return type could be void instead of int.

> + *   - (-EINVAL) rxq_recycle_info is NULL.
> + */
> +__rte_experimental
> +static inline int
> +rte_eth_dev_buf_recycle(uint16_t rx_port_id, uint16_t rx_queue_id,
> +		uint16_t tx_port_id, uint16_t tx_queue_id,
> +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> +{
> +	/* The number of recycling buffers. */
> +	uint16_t nb_buf;
> +
> +	if (!rxq_buf_recycle_info)
> +		return -EINVAL;

This is a fast path function. In which situation is this function called with rxq_buf_recycle_info == NULL?

If this function can genuinely be called with rxq_buf_recycle_info == NULL, you should test for (rxq_buf_recycle_info == NULL), not (! rxq_buf_recycle_info). Otherwise, I think RTE_ASSERT(rxq_buf_recycle_info != NULL) is more appropriate.

> +
> +	/* Stash Tx used buffers into Rx buffer ring */
> +	nb_buf = rte_eth_tx_buf_stash(tx_port_id, tx_queue_id,
> +				rxq_buf_recycle_info);
> +	/* If there are recycling buffers, refill Rx queue descriptors. */
> +	if (nb_buf)
> +		rte_eth_rx_descriptors_refill(rx_port_id, rx_queue_id,
> +					nb_buf);
> +
> +	return 0;
> +}
> +
>  /**
>   * @warning
>   * @b EXPERIMENTAL: this API may change without prior notice
> diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> index dcf8adab92..a138fd4dbc 100644
> --- a/lib/ethdev/rte_ethdev_core.h
> +++ b/lib/ethdev/rte_ethdev_core.h
> @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void *rxq,
> uint16_t offset);
>  /** @internal Check the status of a Tx descriptor */
>  typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> 
> +/** @internal Stash Tx used buffers into RX ring in buffer recycle mode */
> +typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
> +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
> +
> +/** @internal Refill Rx descriptors in buffer recycle mode */
> +typedef uint16_t (*eth_rx_descriptors_refill_t)(void *rxq, uint16_t nb);

Please add proper function descriptions for the two callbacks above.

> +
>  /**
>   * @internal
>   * Structure used to hold opaque pointers to internal ethdev Rx/Tx
> @@ -90,9 +97,11 @@ struct rte_eth_fp_ops {
>  	eth_rx_queue_count_t rx_queue_count;
>  	/** Check the status of a Rx descriptor. */
>  	eth_rx_descriptor_status_t rx_descriptor_status;
> +	/** Refill Rx descriptors in buffer recycle mode */
> +	eth_rx_descriptors_refill_t rx_descriptors_refill;
>  	/** Rx queues data. */
>  	struct rte_ethdev_qdata rxq;
> -	uintptr_t reserved1[3];
> +	uintptr_t reserved1[4];

You added a function pointer above, so to keep the structure alignment, you must remove one here, not add one:

-	uintptr_t reserved1[3];
+	uintptr_t reserved1[2];

>  	/**@}*/
> 
>  	/**@{*/
> @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
>  	eth_tx_prep_t tx_pkt_prepare;
>  	/** Check the status of a Tx descriptor. */
>  	eth_tx_descriptor_status_t tx_descriptor_status;
> +	/** Stash Tx used buffers into RX ring in buffer recycle mode */
> +	eth_tx_buf_stash_t tx_buf_stash;
>  	/** Tx queues data. */
>  	struct rte_ethdev_qdata txq;
> -	uintptr_t reserved2[3];
> +	uintptr_t reserved2[4];

You added a function pointer above, so to keep the structure alignment, you must remove one here, not add one:

-	uintptr_t reserved1[3];
+	uintptr_t reserved1[2];

>  	/**@}*/
> 
>  } __rte_cache_aligned;
  
Feifei Wang March 30, 2023, 9:31 a.m. UTC | #2
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Thursday, March 30, 2023 3:19 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net; Ferruh
> Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; konstantin.v.ananyev@yandex.ru; nd <nd@arm.com>;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> Subject: RE: [PATCH v5 1/3] ethdev: add API for buffer recycle mode
> 
> > From: Feifei Wang [mailto:feifei.wang2@arm.com]
> > Sent: Thursday, 30 March 2023 08.30
> >
> 
> [...]
> 
> > +/**
> > + * @internal
> > + * Rx routine for rte_eth_dev_buf_recycle().
> > + * Refill Rx descriptors in buffer recycle mode.
> > + *
> > + * @note
> > + * This API can only be called by rte_eth_dev_buf_recycle().
> > + * Before calling this API, rte_eth_tx_buf_stash() should be
> > + * called to stash Tx used buffers into Rx buffer ring.
> > + *
> > + * When this functionality is not implemented in the driver, the
> > +return
> > + * buffer number is 0.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param queue_id
> > + *   The index of the receive queue.
> > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> supplied
> > + *   to rte_eth_dev_configure().
> > + *@param nb
> > + *   The number of Rx descriptors to be refilled.
> > + * @return
> > + *   The number Rx descriptors correct to be refilled.
> > + *   - ENODEV: bad port or queue (only if compiled with debug).
> 
> If you want errors reported by the return value, the function return type
> cannot be uint16_t.
Agree. Actually, in the code path, if errors happen, the function will return 0.
For this description line, I refer to 'rte_eth_tx_prepare' notes. Maybe we should delete
this line.

> 
> > + */
> > +static inline uint16_t rte_eth_rx_descriptors_refill(uint16_t port_id,
> > +		uint16_t queue_id, uint16_t nb)
> > +{
> > +	struct rte_eth_fp_ops *p;
> > +	void *qd;
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_RX
> > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			"Invalid port_id=%u or queue_id=%u\n",
> > +			port_id, queue_id);
> > +		rte_errno = ENODEV;
> > +		return 0;
> 
> If p->rx_descriptors_refill() is likely to return 0, this function should not use 0
> as return value to indicate errors.
However, refer to dpdk code style in ethdev, most of API write like this.
For example, 'rte_eth_rx/tx_burst', 'rte_eth_tx_prep'. 

I'm also confused what's return type for this due to I want
to indicate errors and show the processed buffer number.

> 
> > +	}
> > +#endif
> > +
> > +	p = &rte_eth_fp_ops[port_id];
> > +	qd = p->rxq.data[queue_id];
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_RX
> > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx port_id=%u\n", port_id);
> > +		rte_errno = ENODEV;
> > +		return 0;
> > +
> > +	if (qd == NULL) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> port_id=%u\n",
> > +			queue_id, port_id);
> > +		rte_errno = ENODEV;
> > +		return 0;
> > +	}
> > +#endif
> > +
> > +	if (p->rx_descriptors_refill == NULL)
> > +		return 0;
> > +
> > +	return p->rx_descriptors_refill(qd, nb); }
> > +
> >  /**@{@name Rx hardware descriptor states
> >   * @see rte_eth_rx_descriptor_status
> >   */
> > @@ -6483,6 +6597,122 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t
> queue_id,
> >  	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);  }
> >
> > +/**
> > + * @internal
> > + * Tx routine for rte_eth_dev_buf_recycle().
> > + * Stash Tx used buffers into Rx buffer ring in buffer recycle mode.
> > + *
> > + * @note
> > + * This API can only be called by rte_eth_dev_buf_recycle().
> > + * After calling this API, rte_eth_rx_descriptors_refill() should be
> > + * called to refill Rx ring descriptors.
> > + *
> > + * When this functionality is not implemented in the driver, the
> > +return
> > + * buffer number is 0.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param queue_id
> > + *   The index of the transmit queue.
> > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> supplied
> > + *   to rte_eth_dev_configure().
> > + * @param rxq_buf_recycle_info
> > + *   A pointer to a structure of Rx queue buffer ring information in buffer
> > + *   recycle mode.
> > + *
> > + * @return
> > + *   The number buffers correct to be filled in the Rx buffer ring.
> > + *   - ENODEV: bad port or queue (only if compiled with debug).
> 
> If you want errors reported by the return value, the function return type
> cannot be uint16_t.
> 
> > + */
> > +static inline uint16_t rte_eth_tx_buf_stash(uint16_t port_id,
> > +uint16_t
> > queue_id,
> > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> {
> > +	struct rte_eth_fp_ops *p;
> > +	void *qd;
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_TX
> > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			"Invalid port_id=%u or queue_id=%u\n",
> > +			port_id, queue_id);
> > +		rte_errno = ENODEV;
> > +		return 0;
> 
> If p->tx_buf_stash() is likely to return 0, this function should not use 0 as
> return value to indicate errors.
> 
> > +	}
> > +#endif
> > +
> > +	p = &rte_eth_fp_ops[port_id];
> > +	qd = p->txq.data[queue_id];
> > +
> > +#ifdef RTE_ETHDEV_DEBUG_TX
> > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx port_id=%u\n", port_id);
> > +		rte_errno = ENODEV;
> > +		return 0;
> > +
> > +	if (qd == NULL) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for
> port_id=%u\n",
> > +			queue_id, port_id);
> > +		rte_erno = ENODEV;
> > +		return 0;
> > +	}
> > +#endif
> > +
> > +	if (p->tx_buf_stash == NULL)
> > +		return 0;
> > +
> > +	return p->tx_buf_stash(qd, rxq_buf_recycle_info); }
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > +notice
> > + *
> > + * Buffer recycle mode can let Tx queue directly put used buffers
> > +into Rx
> > buffer
> > + * ring. This avoids freeing buffers into mempool and allocating
> > + buffers from
> > + * mempool.
> 
> This function description generally describes the buffer recycle mode.
> 
> Please update it to describe what this specific function does.
Ack.
> 
> > + *
> > + * @param rx_port_id
> > + *   Port identifying the receive side.
> > + * @param rx_queue_id
> > + *   The index of the receive queue identifying the receive side.
> > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> supplied
> > + *   to rte_eth_dev_configure().
> > + * @param tx_port_id
> > + *   Port identifying the transmit side.
> > + * @param tx_queue_id
> > + *   The index of the transmit queue identifying the transmit side.
> > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> supplied
> > + *   to rte_eth_dev_configure().
> > + * @param rxq_recycle_info
> > + *   A pointer to a structure of type *rte_eth_txq_rearm_data* to be filled.
> > + * @return
> > + *   - (0) on success or no recycling buffer.
> 
> Why not return the return value of rte_eth_rx_descriptors_refill() instead of
> 0 on success? (This is a question, not a suggestion.)
> 
> Or, if rxq_buf_recycle_info must be valid, the function return type could be
> void instead of int.
> 
In some time, users may forget to allocate the room for ' rxq_buf_recycle_info '
and call 'rte_rxq_buf_recycle_info_get' API. Thus, I think we need to check with this.

Furthermore, the return value of this API should return success or not.

> > + *   - (-EINVAL) rxq_recycle_info is NULL.
> > + */
> > +__rte_experimental
> > +static inline int
> > +rte_eth_dev_buf_recycle(uint16_t rx_port_id, uint16_t rx_queue_id,
> > +		uint16_t tx_port_id, uint16_t tx_queue_id,
> > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> {
> > +	/* The number of recycling buffers. */
> > +	uint16_t nb_buf;
> > +
> > +	if (!rxq_buf_recycle_info)
> > +		return -EINVAL;
> 
> This is a fast path function. In which situation is this function called with
> rxq_buf_recycle_info == NULL?
> 
> If this function can genuinely be called with rxq_buf_recycle_info == NULL,
> you should test for (rxq_buf_recycle_info == NULL), not (!
> rxq_buf_recycle_info). Otherwise, I think
> RTE_ASSERT(rxq_buf_recycle_info != NULL) is more appropriate.
Agree. We should use ' RTE_ASSERT(rxq_buf_recycle_info != NULL)'.
> 
> > +
> > +	/* Stash Tx used buffers into Rx buffer ring */
> > +	nb_buf = rte_eth_tx_buf_stash(tx_port_id, tx_queue_id,
> > +				rxq_buf_recycle_info);
> > +	/* If there are recycling buffers, refill Rx queue descriptors. */
> > +	if (nb_buf)
> > +		rte_eth_rx_descriptors_refill(rx_port_id, rx_queue_id,
> > +					nb_buf);
> > +
> > +	return 0;
> > +}
> > +
> >  /**
> >   * @warning
> >   * @b EXPERIMENTAL: this API may change without prior notice diff
> > --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> > index dcf8adab92..a138fd4dbc 100644
> > --- a/lib/ethdev/rte_ethdev_core.h
> > +++ b/lib/ethdev/rte_ethdev_core.h
> > @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
> > *rxq, uint16_t offset);
> >  /** @internal Check the status of a Tx descriptor */  typedef int
> > (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> >
> > +/** @internal Stash Tx used buffers into RX ring in buffer recycle
> > +mode */ typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
> > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
> > +
> > +/** @internal Refill Rx descriptors in buffer recycle mode */ typedef
> > +uint16_t (*eth_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
> 
> Please add proper function descriptions for the two callbacks above.
Ack.
> 
> > +
> >  /**
> >   * @internal
> >   * Structure used to hold opaque pointers to internal ethdev Rx/Tx @@
> > -90,9 +97,11 @@ struct rte_eth_fp_ops {
> >  	eth_rx_queue_count_t rx_queue_count;
> >  	/** Check the status of a Rx descriptor. */
> >  	eth_rx_descriptor_status_t rx_descriptor_status;
> > +	/** Refill Rx descriptors in buffer recycle mode */
> > +	eth_rx_descriptors_refill_t rx_descriptors_refill;
> >  	/** Rx queues data. */
> >  	struct rte_ethdev_qdata rxq;
> > -	uintptr_t reserved1[3];
> > +	uintptr_t reserved1[4];
> 
> You added a function pointer above, so to keep the structure alignment, you
> must remove one here, not add one:
> 
> -	uintptr_t reserved1[3];
> +	uintptr_t reserved1[2];
> 
Ack.
> >  	/**@}*/
> >
> >  	/**@{*/
> > @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
> >  	eth_tx_prep_t tx_pkt_prepare;
> >  	/** Check the status of a Tx descriptor. */
> >  	eth_tx_descriptor_status_t tx_descriptor_status;
> > +	/** Stash Tx used buffers into RX ring in buffer recycle mode */
> > +	eth_tx_buf_stash_t tx_buf_stash;
> >  	/** Tx queues data. */
> >  	struct rte_ethdev_qdata txq;
> > -	uintptr_t reserved2[3];
> > +	uintptr_t reserved2[4];
> 
> You added a function pointer above, so to keep the structure alignment, you
> must remove one here, not add one:
> 
> -	uintptr_t reserved1[3];
> +	uintptr_t reserved1[2];
> 
Ack.
> >  	/**@}*/
> >
> >  } __rte_cache_aligned;
  
Morten Brørup March 30, 2023, 3:15 p.m. UTC | #3
> From: Feifei Wang [mailto:Feifei.Wang2@arm.com]
> Sent: Thursday, 30 March 2023 11.31
> 
> > From: Morten Brørup <mb@smartsharesystems.com>
> > Sent: Thursday, March 30, 2023 3:19 PM
> >
> > > From: Feifei Wang [mailto:feifei.wang2@arm.com]
> > > Sent: Thursday, 30 March 2023 08.30
> > >
> >
> > [...]
> >
> > > +/**
> > > + * @internal
> > > + * Rx routine for rte_eth_dev_buf_recycle().
> > > + * Refill Rx descriptors in buffer recycle mode.
> > > + *
> > > + * @note
> > > + * This API can only be called by rte_eth_dev_buf_recycle().
> > > + * Before calling this API, rte_eth_tx_buf_stash() should be
> > > + * called to stash Tx used buffers into Rx buffer ring.
> > > + *
> > > + * When this functionality is not implemented in the driver, the
> > > +return
> > > + * buffer number is 0.
> > > + *
> > > + * @param port_id
> > > + *   The port identifier of the Ethernet device.
> > > + * @param queue_id
> > > + *   The index of the receive queue.
> > > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> > supplied
> > > + *   to rte_eth_dev_configure().
> > > + *@param nb
> > > + *   The number of Rx descriptors to be refilled.
> > > + * @return
> > > + *   The number Rx descriptors correct to be refilled.
> > > + *   - ENODEV: bad port or queue (only if compiled with debug).
> >
> > If you want errors reported by the return value, the function return type
> > cannot be uint16_t.
> Agree. Actually, in the code path, if errors happen, the function will return
> 0.
> For this description line, I refer to 'rte_eth_tx_prepare' notes. Maybe we
> should delete
> this line.
> 
> >
> > > + */
> > > +static inline uint16_t rte_eth_rx_descriptors_refill(uint16_t port_id,
> > > +		uint16_t queue_id, uint16_t nb)
> > > +{
> > > +	struct rte_eth_fp_ops *p;
> > > +	void *qd;
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > > +		RTE_ETHDEV_LOG(ERR,
> > > +			"Invalid port_id=%u or queue_id=%u\n",
> > > +			port_id, queue_id);
> > > +		rte_errno = ENODEV;
> > > +		return 0;
> >
> > If p->rx_descriptors_refill() is likely to return 0, this function should
> not use 0
> > as return value to indicate errors.
> However, refer to dpdk code style in ethdev, most of API write like this.
> For example, 'rte_eth_rx/tx_burst', 'rte_eth_tx_prep'.
> 
> I'm also confused what's return type for this due to I want
> to indicate errors and show the processed buffer number.

OK. Thanks for the references.

Looking at rte_eth_rx/tx_burst(), you could follow the same conventions here, i.e.:
- Use uint16_t as return type.
- Return 0 on error.
- Do not set rte_errno.
- Remove the "ENODEV" line from the @return description.
- Use RTE_ETHDEV_LOG(ERR,...) as the only method to indicate errors.

I now see that you follow the convention of rte_eth_tx_prepare(). This is also perfectly fine; then you just need to update the description of @return to mention that the error value is set in rte_errno if a value less than 'nb' is returned.

> 
> >
> > > +	}
> > > +#endif
> > > +
> > > +	p = &rte_eth_fp_ops[port_id];
> > > +	qd = p->rxq.data[queue_id];
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx port_id=%u\n", port_id);
> > > +		rte_errno = ENODEV;
> > > +		return 0;
> > > +
> > > +	if (qd == NULL) {
> > > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> > port_id=%u\n",
> > > +			queue_id, port_id);
> > > +		rte_errno = ENODEV;
> > > +		return 0;
> > > +	}
> > > +#endif
> > > +
> > > +	if (p->rx_descriptors_refill == NULL)
> > > +		return 0;
> > > +
> > > +	return p->rx_descriptors_refill(qd, nb); }

When does p->rx_descriptors_refill() return anything else than 'nb'?

If p->rx_descriptors_refill() always succeeds (and thus always returns 'nb'), you could make its return type void. And thus, you could also make the return type of rte_eth_rx_descriptors_refill() void.

> > > +
> > >  /**@{@name Rx hardware descriptor states
> > >   * @see rte_eth_rx_descriptor_status
> > >   */
> > > @@ -6483,6 +6597,122 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t
> > queue_id,
> > >  	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);  }
> > >
> > > +/**
> > > + * @internal
> > > + * Tx routine for rte_eth_dev_buf_recycle().
> > > + * Stash Tx used buffers into Rx buffer ring in buffer recycle mode.
> > > + *
> > > + * @note
> > > + * This API can only be called by rte_eth_dev_buf_recycle().
> > > + * After calling this API, rte_eth_rx_descriptors_refill() should be
> > > + * called to refill Rx ring descriptors.
> > > + *
> > > + * When this functionality is not implemented in the driver, the
> > > +return
> > > + * buffer number is 0.
> > > + *
> > > + * @param port_id
> > > + *   The port identifier of the Ethernet device.
> > > + * @param queue_id
> > > + *   The index of the transmit queue.
> > > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> > supplied
> > > + *   to rte_eth_dev_configure().
> > > + * @param rxq_buf_recycle_info
> > > + *   A pointer to a structure of Rx queue buffer ring information in
> buffer
> > > + *   recycle mode.
> > > + *
> > > + * @return
> > > + *   The number buffers correct to be filled in the Rx buffer ring.
> > > + *   - ENODEV: bad port or queue (only if compiled with debug).
> >
> > If you want errors reported by the return value, the function return type
> > cannot be uint16_t.

I now see that you follow the convention of rte_eth_tx_prepare() here too.

This is perfectly fine; then you just need to update the description of @return to mention that the error value is set in rte_errno if a value less than 'nb' is returned.

> >
> > > + */
> > > +static inline uint16_t rte_eth_tx_buf_stash(uint16_t port_id,
> > > +uint16_t
> > > queue_id,
> > > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> > {
> > > +	struct rte_eth_fp_ops *p;
> > > +	void *qd;
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > > +		RTE_ETHDEV_LOG(ERR,
> > > +			"Invalid port_id=%u or queue_id=%u\n",
> > > +			port_id, queue_id);
> > > +		rte_errno = ENODEV;
> > > +		return 0;
> >
> > If p->tx_buf_stash() is likely to return 0, this function should not use 0
> as
> > return value to indicate errors.

I now see that you follow the convention of rte_eth_tx_prepare() here too. Then please ignore my comment about using 0 as return value on errors for this function.

> >
> > > +	}
> > > +#endif
> > > +
> > > +	p = &rte_eth_fp_ops[port_id];
> > > +	qd = p->txq.data[queue_id];
> > > +
> > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx port_id=%u\n", port_id);
> > > +		rte_errno = ENODEV;
> > > +		return 0;
> > > +
> > > +	if (qd == NULL) {
> > > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for
> > port_id=%u\n",
> > > +			queue_id, port_id);
> > > +		rte_erno = ENODEV;
> > > +		return 0;
> > > +	}
> > > +#endif
> > > +
> > > +	if (p->tx_buf_stash == NULL)
> > > +		return 0;
> > > +
> > > +	return p->tx_buf_stash(qd, rxq_buf_recycle_info); }
> > > +
> > > +/**
> > > + * @warning
> > > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > > +notice
> > > + *
> > > + * Buffer recycle mode can let Tx queue directly put used buffers
> > > +into Rx
> > > buffer
> > > + * ring. This avoids freeing buffers into mempool and allocating
> > > + buffers from
> > > + * mempool.
> >
> > This function description generally describes the buffer recycle mode.
> >
> > Please update it to describe what this specific function does.
> Ack.
> >
> > > + *
> > > + * @param rx_port_id
> > > + *   Port identifying the receive side.
> > > + * @param rx_queue_id
> > > + *   The index of the receive queue identifying the receive side.
> > > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> > supplied
> > > + *   to rte_eth_dev_configure().
> > > + * @param tx_port_id
> > > + *   Port identifying the transmit side.
> > > + * @param tx_queue_id
> > > + *   The index of the transmit queue identifying the transmit side.
> > > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> > supplied
> > > + *   to rte_eth_dev_configure().
> > > + * @param rxq_recycle_info
> > > + *   A pointer to a structure of type *rte_eth_txq_rearm_data* to be
> filled.
> > > + * @return
> > > + *   - (0) on success or no recycling buffer.
> >
> > Why not return the return value of rte_eth_rx_descriptors_refill() instead
> of
> > 0 on success? (This is a question, not a suggestion.)
> >
> > Or, if rxq_buf_recycle_info must be valid, the function return type could be
> > void instead of int.
> >
> In some time, users may forget to allocate the room for ' rxq_buf_recycle_info
> '
> and call 'rte_rxq_buf_recycle_info_get' API. Thus, I think we need to check
> with this.

If the user forgets to allocate the rxq_buf_recycle_info, it is a serious bug in the user's application.

We don't need to handle such bugs at runtime.

> 
> Furthermore, the return value of this API should return success or not.

If rxq_buf_recycle_info is not NULL, this function will always succeed. So there is no need for a return value.

If you want this function to return something, it could return nb_buf, so the application can it use for telemetry purposes or similar.

> 
> > > + *   - (-EINVAL) rxq_recycle_info is NULL.
> > > + */
> > > +__rte_experimental
> > > +static inline int
> > > +rte_eth_dev_buf_recycle(uint16_t rx_port_id, uint16_t rx_queue_id,
> > > +		uint16_t tx_port_id, uint16_t tx_queue_id,
> > > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> > {
> > > +	/* The number of recycling buffers. */
> > > +	uint16_t nb_buf;
> > > +
> > > +	if (!rxq_buf_recycle_info)
> > > +		return -EINVAL;
> >
> > This is a fast path function. In which situation is this function called
> with
> > rxq_buf_recycle_info == NULL?
> >
> > If this function can genuinely be called with rxq_buf_recycle_info == NULL,
> > you should test for (rxq_buf_recycle_info == NULL), not (!
> > rxq_buf_recycle_info). Otherwise, I think
> > RTE_ASSERT(rxq_buf_recycle_info != NULL) is more appropriate.
> Agree. We should use ' RTE_ASSERT(rxq_buf_recycle_info != NULL)'.
> >
> > > +
> > > +	/* Stash Tx used buffers into Rx buffer ring */
> > > +	nb_buf = rte_eth_tx_buf_stash(tx_port_id, tx_queue_id,
> > > +				rxq_buf_recycle_info);
> > > +	/* If there are recycling buffers, refill Rx queue descriptors. */
> > > +	if (nb_buf)
> > > +		rte_eth_rx_descriptors_refill(rx_port_id, rx_queue_id,
> > > +					nb_buf);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >  /**
> > >   * @warning
> > >   * @b EXPERIMENTAL: this API may change without prior notice diff
> > > --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> > > index dcf8adab92..a138fd4dbc 100644
> > > --- a/lib/ethdev/rte_ethdev_core.h
> > > +++ b/lib/ethdev/rte_ethdev_core.h
> > > @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
> > > *rxq, uint16_t offset);
> > >  /** @internal Check the status of a Tx descriptor */  typedef int
> > > (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> > >
> > > +/** @internal Stash Tx used buffers into RX ring in buffer recycle
> > > +mode */ typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
> > > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
> > > +
> > > +/** @internal Refill Rx descriptors in buffer recycle mode */ typedef
> > > +uint16_t (*eth_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
> >
> > Please add proper function descriptions for the two callbacks above.
> Ack.
> >
> > > +
> > >  /**
> > >   * @internal
> > >   * Structure used to hold opaque pointers to internal ethdev Rx/Tx @@
> > > -90,9 +97,11 @@ struct rte_eth_fp_ops {
> > >  	eth_rx_queue_count_t rx_queue_count;
> > >  	/** Check the status of a Rx descriptor. */
> > >  	eth_rx_descriptor_status_t rx_descriptor_status;
> > > +	/** Refill Rx descriptors in buffer recycle mode */
> > > +	eth_rx_descriptors_refill_t rx_descriptors_refill;
> > >  	/** Rx queues data. */
> > >  	struct rte_ethdev_qdata rxq;
> > > -	uintptr_t reserved1[3];
> > > +	uintptr_t reserved1[4];
> >
> > You added a function pointer above, so to keep the structure alignment, you
> > must remove one here, not add one:
> >
> > -	uintptr_t reserved1[3];
> > +	uintptr_t reserved1[2];
> >
> Ack.
> > >  	/**@}*/
> > >
> > >  	/**@{*/
> > > @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
> > >  	eth_tx_prep_t tx_pkt_prepare;
> > >  	/** Check the status of a Tx descriptor. */
> > >  	eth_tx_descriptor_status_t tx_descriptor_status;
> > > +	/** Stash Tx used buffers into RX ring in buffer recycle mode */
> > > +	eth_tx_buf_stash_t tx_buf_stash;
> > >  	/** Tx queues data. */
> > >  	struct rte_ethdev_qdata txq;
> > > -	uintptr_t reserved2[3];
> > > +	uintptr_t reserved2[4];
> >
> > You added a function pointer above, so to keep the structure alignment, you
> > must remove one here, not add one:
> >
> > -	uintptr_t reserved1[3];
> > +	uintptr_t reserved1[2];
> >
> Ack.
> > >  	/**@}*/
> > >
> > >  } __rte_cache_aligned;
  
Morten Brørup March 30, 2023, 3:58 p.m. UTC | #4
> From: Morten Brørup
> Sent: Thursday, 30 March 2023 17.15
> 
> > From: Feifei Wang [mailto:Feifei.Wang2@arm.com]
> > Sent: Thursday, 30 March 2023 11.31
> >
> > > From: Morten Brørup <mb@smartsharesystems.com>
> > > Sent: Thursday, March 30, 2023 3:19 PM
> > >
> > > > From: Feifei Wang [mailto:feifei.wang2@arm.com]
> > > > Sent: Thursday, 30 March 2023 08.30
> > > >
> > >
> > > [...]
> > >
> > > > +/**
> > > > + * @internal
> > > > + * Rx routine for rte_eth_dev_buf_recycle().
> > > > + * Refill Rx descriptors in buffer recycle mode.
> > > > + *
> > > > + * @note
> > > > + * This API can only be called by rte_eth_dev_buf_recycle().
> > > > + * Before calling this API, rte_eth_tx_buf_stash() should be
> > > > + * called to stash Tx used buffers into Rx buffer ring.
> > > > + *
> > > > + * When this functionality is not implemented in the driver, the
> > > > +return
> > > > + * buffer number is 0.
> > > > + *
> > > > + * @param port_id
> > > > + *   The port identifier of the Ethernet device.
> > > > + * @param queue_id
> > > > + *   The index of the receive queue.
> > > > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> > > supplied
> > > > + *   to rte_eth_dev_configure().
> > > > + *@param nb
> > > > + *   The number of Rx descriptors to be refilled.
> > > > + * @return
> > > > + *   The number Rx descriptors correct to be refilled.
> > > > + *   - ENODEV: bad port or queue (only if compiled with debug).
> > >
> > > If you want errors reported by the return value, the function return type
> > > cannot be uint16_t.
> > Agree. Actually, in the code path, if errors happen, the function will
> return
> > 0.
> > For this description line, I refer to 'rte_eth_tx_prepare' notes. Maybe we
> > should delete
> > this line.
> >
> > >
> > > > + */
> > > > +static inline uint16_t rte_eth_rx_descriptors_refill(uint16_t port_id,
> > > > +		uint16_t queue_id, uint16_t nb)
> > > > +{
> > > > +	struct rte_eth_fp_ops *p;
> > > > +	void *qd;
> > > > +
> > > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > > > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > > > +		RTE_ETHDEV_LOG(ERR,
> > > > +			"Invalid port_id=%u or queue_id=%u\n",
> > > > +			port_id, queue_id);
> > > > +		rte_errno = ENODEV;
> > > > +		return 0;
> > >
> > > If p->rx_descriptors_refill() is likely to return 0, this function should
> > not use 0
> > > as return value to indicate errors.
> > However, refer to dpdk code style in ethdev, most of API write like this.
> > For example, 'rte_eth_rx/tx_burst', 'rte_eth_tx_prep'.
> >
> > I'm also confused what's return type for this due to I want
> > to indicate errors and show the processed buffer number.
> 
> OK. Thanks for the references.
> 
> Looking at rte_eth_rx/tx_burst(), you could follow the same conventions here,
> i.e.:
> - Use uint16_t as return type.
> - Return 0 on error.
> - Do not set rte_errno.
> - Remove the "ENODEV" line from the @return description.
> - Use RTE_ETHDEV_LOG(ERR,...) as the only method to indicate errors.
> 
> I now see that you follow the convention of rte_eth_tx_prepare(). This is also
> perfectly fine; then you just need to update the description of @return to
> mention that the error value is set in rte_errno if a value less than 'nb' is
> returned.

After further consideration, I have changed my mind:

The primary purpose of rte_eth_tx_prepare() is to test if a packet burst is valid, so the ability to return an error value is a natural requirement.

This is not the purpose your functions. The purpose of your functions resemble rte_eth_rx/tx_burst(), where there is no requirement to return an error value. So you should follow the convention of rte_eth_rx/tx_burst(), as I just suggested.

> 
> >
> > >
> > > > +	}
> > > > +#endif
> > > > +
> > > > +	p = &rte_eth_fp_ops[port_id];
> > > > +	qd = p->rxq.data[queue_id];
> > > > +
> > > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx port_id=%u\n", port_id);
> > > > +		rte_errno = ENODEV;
> > > > +		return 0;
> > > > +
> > > > +	if (qd == NULL) {
> > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> > > port_id=%u\n",
> > > > +			queue_id, port_id);
> > > > +		rte_errno = ENODEV;
> > > > +		return 0;
> > > > +	}
> > > > +#endif
> > > > +
> > > > +	if (p->rx_descriptors_refill == NULL)
> > > > +		return 0;
> > > > +
> > > > +	return p->rx_descriptors_refill(qd, nb); }
> 
> When does p->rx_descriptors_refill() return anything else than 'nb'?
> 
> If p->rx_descriptors_refill() always succeeds (and thus always returns 'nb'),
> you could make its return type void. And thus, you could also make the return
> type of rte_eth_rx_descriptors_refill() void.
> 
> > > > +
> > > >  /**@{@name Rx hardware descriptor states
> > > >   * @see rte_eth_rx_descriptor_status
> > > >   */
> > > > @@ -6483,6 +6597,122 @@ rte_eth_tx_buffer(uint16_t port_id, uint16_t
> > > queue_id,
> > > >  	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);  }
> > > >
> > > > +/**
> > > > + * @internal
> > > > + * Tx routine for rte_eth_dev_buf_recycle().
> > > > + * Stash Tx used buffers into Rx buffer ring in buffer recycle mode.
> > > > + *
> > > > + * @note
> > > > + * This API can only be called by rte_eth_dev_buf_recycle().
> > > > + * After calling this API, rte_eth_rx_descriptors_refill() should be
> > > > + * called to refill Rx ring descriptors.
> > > > + *
> > > > + * When this functionality is not implemented in the driver, the
> > > > +return
> > > > + * buffer number is 0.
> > > > + *
> > > > + * @param port_id
> > > > + *   The port identifier of the Ethernet device.
> > > > + * @param queue_id
> > > > + *   The index of the transmit queue.
> > > > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> > > supplied
> > > > + *   to rte_eth_dev_configure().
> > > > + * @param rxq_buf_recycle_info
> > > > + *   A pointer to a structure of Rx queue buffer ring information in
> > buffer
> > > > + *   recycle mode.
> > > > + *
> > > > + * @return
> > > > + *   The number buffers correct to be filled in the Rx buffer ring.
> > > > + *   - ENODEV: bad port or queue (only if compiled with debug).
> > >
> > > If you want errors reported by the return value, the function return type
> > > cannot be uint16_t.
> 
> I now see that you follow the convention of rte_eth_tx_prepare() here too.
> 
> This is perfectly fine; then you just need to update the description of
> @return to mention that the error value is set in rte_errno if a value less
> than 'nb' is returned.

Same comment about conventions as above: This function is more like rte_eth_rx/tx_burst() than rte_eth_tx_prepare(), so follow the convention of rte_eth_rx/tx_burst() instead.

> 
> > >
> > > > + */
> > > > +static inline uint16_t rte_eth_tx_buf_stash(uint16_t port_id,
> > > > +uint16_t
> > > > queue_id,
> > > > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> > > {
> > > > +	struct rte_eth_fp_ops *p;
> > > > +	void *qd;
> > > > +
> > > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > > > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > > > +		RTE_ETHDEV_LOG(ERR,
> > > > +			"Invalid port_id=%u or queue_id=%u\n",
> > > > +			port_id, queue_id);
> > > > +		rte_errno = ENODEV;
> > > > +		return 0;
> > >
> > > If p->tx_buf_stash() is likely to return 0, this function should not use 0
> > as
> > > return value to indicate errors.
> 
> I now see that you follow the convention of rte_eth_tx_prepare() here too.
> Then please ignore my comment about using 0 as return value on errors for this
> function.

Same comment about conventions as above: This function is more like rte_eth_rx/tx_burst() than rte_eth_tx_prepare(), so follow the convention of rte_eth_rx/tx_burst() instead.

> 
> > >
> > > > +	}
> > > > +#endif
> > > > +
> > > > +	p = &rte_eth_fp_ops[port_id];
> > > > +	qd = p->txq.data[queue_id];
> > > > +
> > > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx port_id=%u\n", port_id);
> > > > +		rte_errno = ENODEV;
> > > > +		return 0;
> > > > +
> > > > +	if (qd == NULL) {
> > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for
> > > port_id=%u\n",
> > > > +			queue_id, port_id);
> > > > +		rte_erno = ENODEV;
> > > > +		return 0;
> > > > +	}
> > > > +#endif
> > > > +
> > > > +	if (p->tx_buf_stash == NULL)
> > > > +		return 0;
> > > > +
> > > > +	return p->tx_buf_stash(qd, rxq_buf_recycle_info); }
> > > > +
> > > > +/**
> > > > + * @warning
> > > > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> > > > +notice
> > > > + *
> > > > + * Buffer recycle mode can let Tx queue directly put used buffers
> > > > +into Rx
> > > > buffer
> > > > + * ring. This avoids freeing buffers into mempool and allocating
> > > > + buffers from
> > > > + * mempool.
> > >
> > > This function description generally describes the buffer recycle mode.
> > >
> > > Please update it to describe what this specific function does.
> > Ack.
> > >
> > > > + *
> > > > + * @param rx_port_id
> > > > + *   Port identifying the receive side.
> > > > + * @param rx_queue_id
> > > > + *   The index of the receive queue identifying the receive side.
> > > > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> > > supplied
> > > > + *   to rte_eth_dev_configure().
> > > > + * @param tx_port_id
> > > > + *   Port identifying the transmit side.
> > > > + * @param tx_queue_id
> > > > + *   The index of the transmit queue identifying the transmit side.
> > > > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> > > supplied
> > > > + *   to rte_eth_dev_configure().
> > > > + * @param rxq_recycle_info
> > > > + *   A pointer to a structure of type *rte_eth_txq_rearm_data* to be
> > filled.
> > > > + * @return
> > > > + *   - (0) on success or no recycling buffer.
> > >
> > > Why not return the return value of rte_eth_rx_descriptors_refill() instead
> > of
> > > 0 on success? (This is a question, not a suggestion.)
> > >
> > > Or, if rxq_buf_recycle_info must be valid, the function return type could
> be
> > > void instead of int.
> > >
> > In some time, users may forget to allocate the room for '
> rxq_buf_recycle_info
> > '
> > and call 'rte_rxq_buf_recycle_info_get' API. Thus, I think we need to check
> > with this.
> 
> If the user forgets to allocate the rxq_buf_recycle_info, it is a serious bug
> in the user's application.
> 
> We don't need to handle such bugs at runtime.
> 
> >
> > Furthermore, the return value of this API should return success or not.
> 
> If rxq_buf_recycle_info is not NULL, this function will always succeed. So
> there is no need for a return value.
> 
> If you want this function to return something, it could return nb_buf, so the
> application can it use for telemetry purposes or similar.
> 
> >
> > > > + *   - (-EINVAL) rxq_recycle_info is NULL.
> > > > + */
> > > > +__rte_experimental
> > > > +static inline int
> > > > +rte_eth_dev_buf_recycle(uint16_t rx_port_id, uint16_t rx_queue_id,
> > > > +		uint16_t tx_port_id, uint16_t tx_queue_id,
> > > > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> > > {
> > > > +	/* The number of recycling buffers. */
> > > > +	uint16_t nb_buf;
> > > > +
> > > > +	if (!rxq_buf_recycle_info)
> > > > +		return -EINVAL;
> > >
> > > This is a fast path function. In which situation is this function called
> > with
> > > rxq_buf_recycle_info == NULL?
> > >
> > > If this function can genuinely be called with rxq_buf_recycle_info ==
> NULL,
> > > you should test for (rxq_buf_recycle_info == NULL), not (!
> > > rxq_buf_recycle_info). Otherwise, I think
> > > RTE_ASSERT(rxq_buf_recycle_info != NULL) is more appropriate.
> > Agree. We should use ' RTE_ASSERT(rxq_buf_recycle_info != NULL)'.
> > >
> > > > +
> > > > +	/* Stash Tx used buffers into Rx buffer ring */
> > > > +	nb_buf = rte_eth_tx_buf_stash(tx_port_id, tx_queue_id,
> > > > +				rxq_buf_recycle_info);
> > > > +	/* If there are recycling buffers, refill Rx queue descriptors.
> */
> > > > +	if (nb_buf)
> > > > +		rte_eth_rx_descriptors_refill(rx_port_id, rx_queue_id,
> > > > +					nb_buf);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > >  /**
> > > >   * @warning
> > > >   * @b EXPERIMENTAL: this API may change without prior notice diff
> > > > --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
> > > > index dcf8adab92..a138fd4dbc 100644
> > > > --- a/lib/ethdev/rte_ethdev_core.h
> > > > +++ b/lib/ethdev/rte_ethdev_core.h
> > > > @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
> > > > *rxq, uint16_t offset);
> > > >  /** @internal Check the status of a Tx descriptor */  typedef int
> > > > (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> > > >
> > > > +/** @internal Stash Tx used buffers into RX ring in buffer recycle
> > > > +mode */ typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
> > > > +		struct rte_eth_rxq_buf_recycle_info
> *rxq_buf_recycle_info);
> > > > +
> > > > +/** @internal Refill Rx descriptors in buffer recycle mode */ typedef
> > > > +uint16_t (*eth_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
> > >
> > > Please add proper function descriptions for the two callbacks above.
> > Ack.
> > >
> > > > +
> > > >  /**
> > > >   * @internal
> > > >   * Structure used to hold opaque pointers to internal ethdev Rx/Tx @@
> > > > -90,9 +97,11 @@ struct rte_eth_fp_ops {
> > > >  	eth_rx_queue_count_t rx_queue_count;
> > > >  	/** Check the status of a Rx descriptor. */
> > > >  	eth_rx_descriptor_status_t rx_descriptor_status;
> > > > +	/** Refill Rx descriptors in buffer recycle mode */
> > > > +	eth_rx_descriptors_refill_t rx_descriptors_refill;
> > > >  	/** Rx queues data. */
> > > >  	struct rte_ethdev_qdata rxq;
> > > > -	uintptr_t reserved1[3];
> > > > +	uintptr_t reserved1[4];
> > >
> > > You added a function pointer above, so to keep the structure alignment,
> you
> > > must remove one here, not add one:
> > >
> > > -	uintptr_t reserved1[3];
> > > +	uintptr_t reserved1[2];
> > >
> > Ack.
> > > >  	/**@}*/
> > > >
> > > >  	/**@{*/
> > > > @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
> > > >  	eth_tx_prep_t tx_pkt_prepare;
> > > >  	/** Check the status of a Tx descriptor. */
> > > >  	eth_tx_descriptor_status_t tx_descriptor_status;
> > > > +	/** Stash Tx used buffers into RX ring in buffer recycle mode */
> > > > +	eth_tx_buf_stash_t tx_buf_stash;
> > > >  	/** Tx queues data. */
> > > >  	struct rte_ethdev_qdata txq;
> > > > -	uintptr_t reserved2[3];
> > > > +	uintptr_t reserved2[4];
> > >
> > > You added a function pointer above, so to keep the structure alignment,
> you
> > > must remove one here, not add one:
> > >
> > > -	uintptr_t reserved1[3];
> > > +	uintptr_t reserved1[2];
> > >
> > Ack.
> > > >  	/**@}*/
> > > >
> > > >  } __rte_cache_aligned;
  
Ferruh Yigit April 19, 2023, 2:46 p.m. UTC | #5
On 3/30/2023 7:29 AM, Feifei Wang wrote:
> There are 4 upper APIs for buffer recycle mode:
> 1. 'rte_eth_rx_queue_buf_recycle_info_get'
> This is to retrieve buffer ring information about given ports's Rx
> queue in buffer recycle mode. And due to this, buffer recycle can
> be no longer limited to the same driver in Rx and Tx.
> 
> 2. 'rte_eth_dev_buf_recycle'
> Users can call this API to enable buffer recycle mode in data path.
> There are 2 internal APIs in it, which is separately for Rx and TX.
> 

Overall, can we have a namespace in the functions related to the buffer
recycle, to clarify API usage, something like (just putting as sample to
clarify my point):

rte_eth_recycle_buf
rte_eth_recycle_tx_buf_stash
rte_eth_recycle_rx_descriptors_refill
rte_eth_recycle_rx_queue_info_get


> 3. 'rte_eth_tx_buf_stash'
> Internal API for buffer recycle mode. This is to stash Tx used
> buffers into Rx buffer ring.
> 

This API is to move/recycle descriptors from Tx queue to Rx queue, but
name on its own, 'rte_eth_tx_buf_stash', reads like we are stashing
something to Tx queue. What do you think, can naming be improved?

> 4. 'rte_eth_rx_descriptors_refill'
> Internal API for buffer recycle mode. This is to refill Rx
> descriptors.
> 
> Above all APIs are just implemented at the upper level.
> For different APIs, we need to define specific functions separately.
> 
> Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

<...>

>  
> +int
> +rte_eth_rx_queue_buf_recycle_info_get(uint16_t port_id, uint16_t queue_id,
> +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
> +{
> +	struct rte_eth_dev *dev;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> +	dev = &rte_eth_devices[port_id];
> +
> +	if (queue_id >= dev->data->nb_rx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n", queue_id);
> +		return -EINVAL;
> +	}
> +
> +	RTE_ASSERT(rxq_buf_recycle_info != NULL);
> +

This is slow path API, I think better to validate parameter and return
an error instead of assert().

<...>

> --- a/lib/ethdev/rte_ethdev_core.h
> +++ b/lib/ethdev/rte_ethdev_core.h
> @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
>  /** @internal Check the status of a Tx descriptor */
>  typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
>  
> +/** @internal Stash Tx used buffers into RX ring in buffer recycle mode */
> +typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
> +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
> +
> +/** @internal Refill Rx descriptors in buffer recycle mode */
> +typedef uint16_t (*eth_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
> +

Since there is only single API exposed to the application, is it really
required to have two dev_ops, why not have a single one?
  
Feifei Wang April 26, 2023, 6:59 a.m. UTC | #6
> -----Original Message-----
> From: Morten Brørup <mb@smartsharesystems.com>
> Sent: Thursday, March 30, 2023 11:59 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net; Ferruh
> Yigit <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; konstantin.v.ananyev@yandex.ru; nd <nd@arm.com>;
> Honnappa Nagarahalli <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>; nd <nd@arm.com>
> Subject: RE: [PATCH v5 1/3] ethdev: add API for buffer recycle mode
> 
> > From: Morten Brørup
> > Sent: Thursday, 30 March 2023 17.15
> >
> > > From: Feifei Wang [mailto:Feifei.Wang2@arm.com]
> > > Sent: Thursday, 30 March 2023 11.31
> > >
> > > > From: Morten Brørup <mb@smartsharesystems.com>
> > > > Sent: Thursday, March 30, 2023 3:19 PM
> > > >
> > > > > From: Feifei Wang [mailto:feifei.wang2@arm.com]
> > > > > Sent: Thursday, 30 March 2023 08.30
> > > > >
> > > >
> > > > [...]
> > > >
> > > > > +/**
> > > > > + * @internal
> > > > > + * Rx routine for rte_eth_dev_buf_recycle().
> > > > > + * Refill Rx descriptors in buffer recycle mode.
> > > > > + *
> > > > > + * @note
> > > > > + * This API can only be called by rte_eth_dev_buf_recycle().
> > > > > + * Before calling this API, rte_eth_tx_buf_stash() should be
> > > > > + * called to stash Tx used buffers into Rx buffer ring.
> > > > > + *
> > > > > + * When this functionality is not implemented in the driver,
> > > > > +the return
> > > > > + * buffer number is 0.
> > > > > + *
> > > > > + * @param port_id
> > > > > + *   The port identifier of the Ethernet device.
> > > > > + * @param queue_id
> > > > > + *   The index of the receive queue.
> > > > > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> > > > supplied
> > > > > + *   to rte_eth_dev_configure().
> > > > > + *@param nb
> > > > > + *   The number of Rx descriptors to be refilled.
> > > > > + * @return
> > > > > + *   The number Rx descriptors correct to be refilled.
> > > > > + *   - ENODEV: bad port or queue (only if compiled with debug).
> > > >
> > > > If you want errors reported by the return value, the function
> > > > return type cannot be uint16_t.
> > > Agree. Actually, in the code path, if errors happen, the function
> > > will
> > return
> > > 0.
> > > For this description line, I refer to 'rte_eth_tx_prepare' notes.
> > > Maybe we should delete this line.
> > >
> > > >
> > > > > + */
> > > > > +static inline uint16_t rte_eth_rx_descriptors_refill(uint16_t port_id,
> > > > > +		uint16_t queue_id, uint16_t nb) {
> > > > > +	struct rte_eth_fp_ops *p;
> > > > > +	void *qd;
> > > > > +
> > > > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > > > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > > > > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > > > > +		RTE_ETHDEV_LOG(ERR,
> > > > > +			"Invalid port_id=%u or queue_id=%u\n",
> > > > > +			port_id, queue_id);
> > > > > +		rte_errno = ENODEV;
> > > > > +		return 0;
> > > >
> > > > If p->rx_descriptors_refill() is likely to return 0, this function
> > > > should
> > > not use 0
> > > > as return value to indicate errors.
> > > However, refer to dpdk code style in ethdev, most of API write like this.
> > > For example, 'rte_eth_rx/tx_burst', 'rte_eth_tx_prep'.
> > >
> > > I'm also confused what's return type for this due to I want to
> > > indicate errors and show the processed buffer number.
> >
> > OK. Thanks for the references.
> >
> > Looking at rte_eth_rx/tx_burst(), you could follow the same
> > conventions here,
> > i.e.:
> > - Use uint16_t as return type.
> > - Return 0 on error.
> > - Do not set rte_errno.
> > - Remove the "ENODEV" line from the @return description.
> > - Use RTE_ETHDEV_LOG(ERR,...) as the only method to indicate errors.
> >
> > I now see that you follow the convention of rte_eth_tx_prepare(). This
> > is also perfectly fine; then you just need to update the description
> > of @return to mention that the error value is set in rte_errno if a
> > value less than 'nb' is returned.
> 
> After further consideration, I have changed my mind:
> 
> The primary purpose of rte_eth_tx_prepare() is to test if a packet burst is valid,
> so the ability to return an error value is a natural requirement.
> 
> This is not the purpose your functions. The purpose of your functions
> resemble rte_eth_rx/tx_burst(), where there is no requirement to return an
> error value. So you should follow the convention of rte_eth_rx/tx_burst(), as I
> just suggested.

Agree.
> 
> >
> > >
> > > >
> > > > > +	}
> > > > > +#endif
> > > > > +
> > > > > +	p = &rte_eth_fp_ops[port_id];
> > > > > +	qd = p->rxq.data[queue_id];
> > > > > +
> > > > > +#ifdef RTE_ETHDEV_DEBUG_RX
> > > > > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx port_id=%u\n",
> port_id);
> > > > > +		rte_errno = ENODEV;
> > > > > +		return 0;
> > > > > +
> > > > > +	if (qd == NULL) {
> > > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for
> > > > port_id=%u\n",
> > > > > +			queue_id, port_id);
> > > > > +		rte_errno = ENODEV;
> > > > > +		return 0;
> > > > > +	}
> > > > > +#endif
> > > > > +
> > > > > +	if (p->rx_descriptors_refill == NULL)
> > > > > +		return 0;
> > > > > +
> > > > > +	return p->rx_descriptors_refill(qd, nb); }
> >
> > When does p->rx_descriptors_refill() return anything else than 'nb'?
> >
> > If p->rx_descriptors_refill() always succeeds (and thus always returns
> > 'nb'), you could make its return type void. And thus, you could also
> > make the return type of rte_eth_rx_descriptors_refill() void.
> >
> > > > > +
> > > > >  /**@{@name Rx hardware descriptor states
> > > > >   * @see rte_eth_rx_descriptor_status
> > > > >   */
> > > > > @@ -6483,6 +6597,122 @@ rte_eth_tx_buffer(uint16_t port_id,
> > > > > uint16_t
> > > > queue_id,
> > > > >  	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);  }
> > > > >
> > > > > +/**
> > > > > + * @internal
> > > > > + * Tx routine for rte_eth_dev_buf_recycle().
> > > > > + * Stash Tx used buffers into Rx buffer ring in buffer recycle mode.
> > > > > + *
> > > > > + * @note
> > > > > + * This API can only be called by rte_eth_dev_buf_recycle().
> > > > > + * After calling this API, rte_eth_rx_descriptors_refill()
> > > > > +should be
> > > > > + * called to refill Rx ring descriptors.
> > > > > + *
> > > > > + * When this functionality is not implemented in the driver,
> > > > > +the return
> > > > > + * buffer number is 0.
> > > > > + *
> > > > > + * @param port_id
> > > > > + *   The port identifier of the Ethernet device.
> > > > > + * @param queue_id
> > > > > + *   The index of the transmit queue.
> > > > > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> > > > supplied
> > > > > + *   to rte_eth_dev_configure().
> > > > > + * @param rxq_buf_recycle_info
> > > > > + *   A pointer to a structure of Rx queue buffer ring information in
> > > buffer
> > > > > + *   recycle mode.
> > > > > + *
> > > > > + * @return
> > > > > + *   The number buffers correct to be filled in the Rx buffer ring.
> > > > > + *   - ENODEV: bad port or queue (only if compiled with debug).
> > > >
> > > > If you want errors reported by the return value, the function
> > > > return type cannot be uint16_t.
> >
> > I now see that you follow the convention of rte_eth_tx_prepare() here too.
> >
> > This is perfectly fine; then you just need to update the description
> > of @return to mention that the error value is set in rte_errno if a
> > value less than 'nb' is returned.
> 
> Same comment about conventions as above: This function is more like
> rte_eth_rx/tx_burst() than rte_eth_tx_prepare(), so follow the convention of
> rte_eth_rx/tx_burst() instead.
> 
> >
> > > >
> > > > > + */
> > > > > +static inline uint16_t rte_eth_tx_buf_stash(uint16_t port_id,
> > > > > +uint16_t
> > > > > queue_id,
> > > > > +		struct rte_eth_rxq_buf_recycle_info
> *rxq_buf_recycle_info)
> > > > {
> > > > > +	struct rte_eth_fp_ops *p;
> > > > > +	void *qd;
> > > > > +
> > > > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > > > +	if (port_id >= RTE_MAX_ETHPORTS ||
> > > > > +			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
> > > > > +		RTE_ETHDEV_LOG(ERR,
> > > > > +			"Invalid port_id=%u or queue_id=%u\n",
> > > > > +			port_id, queue_id);
> > > > > +		rte_errno = ENODEV;
> > > > > +		return 0;
> > > >
> > > > If p->tx_buf_stash() is likely to return 0, this function should
> > > > not use 0
> > > as
> > > > return value to indicate errors.
> >
> > I now see that you follow the convention of rte_eth_tx_prepare() here too.
> > Then please ignore my comment about using 0 as return value on errors
> > for this function.
> 
> Same comment about conventions as above: This function is more like
> rte_eth_rx/tx_burst() than rte_eth_tx_prepare(), so follow the convention of
> rte_eth_rx/tx_burst() instead.
> 
> >
> > > >
> > > > > +	}
> > > > > +#endif
> > > > > +
> > > > > +	p = &rte_eth_fp_ops[port_id];
> > > > > +	qd = p->txq.data[queue_id];
> > > > > +
> > > > > +#ifdef RTE_ETHDEV_DEBUG_TX
> > > > > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx port_id=%u\n",
> port_id);
> > > > > +		rte_errno = ENODEV;
> > > > > +		return 0;
> > > > > +
> > > > > +	if (qd == NULL) {
> > > > > +		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for
> > > > port_id=%u\n",
> > > > > +			queue_id, port_id);
> > > > > +		rte_erno = ENODEV;
> > > > > +		return 0;
> > > > > +	}
> > > > > +#endif
> > > > > +
> > > > > +	if (p->tx_buf_stash == NULL)
> > > > > +		return 0;
> > > > > +
> > > > > +	return p->tx_buf_stash(qd, rxq_buf_recycle_info); }
> > > > > +
> > > > > +/**
> > > > > + * @warning
> > > > > + * @b EXPERIMENTAL: this API may change, or be removed, without
> > > > > +prior notice
> > > > > + *
> > > > > + * Buffer recycle mode can let Tx queue directly put used
> > > > > +buffers into Rx
> > > > > buffer
> > > > > + * ring. This avoids freeing buffers into mempool and
> > > > > + allocating buffers from
> > > > > + * mempool.
> > > >
> > > > This function description generally describes the buffer recycle mode.
> > > >
> > > > Please update it to describe what this specific function does.
> > > Ack.
> > > >
> > > > > + *
> > > > > + * @param rx_port_id
> > > > > + *   Port identifying the receive side.
> > > > > + * @param rx_queue_id
> > > > > + *   The index of the receive queue identifying the receive side.
> > > > > + *   The value must be in the range [0, nb_rx_queue - 1] previously
> > > > supplied
> > > > > + *   to rte_eth_dev_configure().
> > > > > + * @param tx_port_id
> > > > > + *   Port identifying the transmit side.
> > > > > + * @param tx_queue_id
> > > > > + *   The index of the transmit queue identifying the transmit side.
> > > > > + *   The value must be in the range [0, nb_tx_queue - 1] previously
> > > > supplied
> > > > > + *   to rte_eth_dev_configure().
> > > > > + * @param rxq_recycle_info
> > > > > + *   A pointer to a structure of type *rte_eth_txq_rearm_data* to be
> > > filled.
> > > > > + * @return
> > > > > + *   - (0) on success or no recycling buffer.
> > > >
> > > > Why not return the return value of rte_eth_rx_descriptors_refill()
> > > > instead
> > > of
> > > > 0 on success? (This is a question, not a suggestion.)
> > > >
> > > > Or, if rxq_buf_recycle_info must be valid, the function return
> > > > type could
> > be
> > > > void instead of int.
> > > >
> > > In some time, users may forget to allocate the room for '
> > rxq_buf_recycle_info
> > > '
> > > and call 'rte_rxq_buf_recycle_info_get' API. Thus, I think we need
> > > to check with this.
> >
> > If the user forgets to allocate the rxq_buf_recycle_info, it is a
> > serious bug in the user's application.
> >
> > We don't need to handle such bugs at runtime.
> >
> > >
> > > Furthermore, the return value of this API should return success or not.
> >
> > If rxq_buf_recycle_info is not NULL, this function will always
> > succeed. So there is no need for a return value.
> >
> > If you want this function to return something, it could return nb_buf,
> > so the application can it use for telemetry purposes or similar.
> >
> > >
> > > > > + *   - (-EINVAL) rxq_recycle_info is NULL.
> > > > > + */
> > > > > +__rte_experimental
> > > > > +static inline int
> > > > > +rte_eth_dev_buf_recycle(uint16_t rx_port_id, uint16_t rx_queue_id,
> > > > > +		uint16_t tx_port_id, uint16_t tx_queue_id,
> > > > > +		struct rte_eth_rxq_buf_recycle_info
> *rxq_buf_recycle_info)
> > > > {
> > > > > +	/* The number of recycling buffers. */
> > > > > +	uint16_t nb_buf;
> > > > > +
> > > > > +	if (!rxq_buf_recycle_info)
> > > > > +		return -EINVAL;
> > > >
> > > > This is a fast path function. In which situation is this function
> > > > called
> > > with
> > > > rxq_buf_recycle_info == NULL?
> > > >
> > > > If this function can genuinely be called with rxq_buf_recycle_info
> > > > ==
> > NULL,
> > > > you should test for (rxq_buf_recycle_info == NULL), not (!
> > > > rxq_buf_recycle_info). Otherwise, I think
> > > > RTE_ASSERT(rxq_buf_recycle_info != NULL) is more appropriate.
> > > Agree. We should use ' RTE_ASSERT(rxq_buf_recycle_info != NULL)'.
> > > >
> > > > > +
> > > > > +	/* Stash Tx used buffers into Rx buffer ring */
> > > > > +	nb_buf = rte_eth_tx_buf_stash(tx_port_id, tx_queue_id,
> > > > > +				rxq_buf_recycle_info);
> > > > > +	/* If there are recycling buffers, refill Rx queue descriptors.
> > */
> > > > > +	if (nb_buf)
> > > > > +		rte_eth_rx_descriptors_refill(rx_port_id, rx_queue_id,
> > > > > +					nb_buf);
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > > >  /**
> > > > >   * @warning
> > > > >   * @b EXPERIMENTAL: this API may change without prior notice
> > > > > diff --git a/lib/ethdev/rte_ethdev_core.h
> > > > > b/lib/ethdev/rte_ethdev_core.h index dcf8adab92..a138fd4dbc
> > > > > 100644
> > > > > --- a/lib/ethdev/rte_ethdev_core.h
> > > > > +++ b/lib/ethdev/rte_ethdev_core.h
> > > > > @@ -56,6 +56,13 @@ typedef int
> > > > > (*eth_rx_descriptor_status_t)(void
> > > > > *rxq, uint16_t offset);
> > > > >  /** @internal Check the status of a Tx descriptor */  typedef
> > > > > int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> > > > >
> > > > > +/** @internal Stash Tx used buffers into RX ring in buffer
> > > > > +recycle mode */ typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
> > > > > +		struct rte_eth_rxq_buf_recycle_info
> > *rxq_buf_recycle_info);
> > > > > +
> > > > > +/** @internal Refill Rx descriptors in buffer recycle mode */
> > > > > +typedef uint16_t (*eth_rx_descriptors_refill_t)(void *rxq,
> > > > > +uint16_t nb);
> > > >
> > > > Please add proper function descriptions for the two callbacks above.
> > > Ack.
> > > >
> > > > > +
> > > > >  /**
> > > > >   * @internal
> > > > >   * Structure used to hold opaque pointers to internal ethdev
> > > > > Rx/Tx @@
> > > > > -90,9 +97,11 @@ struct rte_eth_fp_ops {
> > > > >  	eth_rx_queue_count_t rx_queue_count;
> > > > >  	/** Check the status of a Rx descriptor. */
> > > > >  	eth_rx_descriptor_status_t rx_descriptor_status;
> > > > > +	/** Refill Rx descriptors in buffer recycle mode */
> > > > > +	eth_rx_descriptors_refill_t rx_descriptors_refill;
> > > > >  	/** Rx queues data. */
> > > > >  	struct rte_ethdev_qdata rxq;
> > > > > -	uintptr_t reserved1[3];
> > > > > +	uintptr_t reserved1[4];
> > > >
> > > > You added a function pointer above, so to keep the structure
> > > > alignment,
> > you
> > > > must remove one here, not add one:
> > > >
> > > > -	uintptr_t reserved1[3];
> > > > +	uintptr_t reserved1[2];
> > > >
> > > Ack.
> > > > >  	/**@}*/
> > > > >
> > > > >  	/**@{*/
> > > > > @@ -106,9 +115,11 @@ struct rte_eth_fp_ops {
> > > > >  	eth_tx_prep_t tx_pkt_prepare;
> > > > >  	/** Check the status of a Tx descriptor. */
> > > > >  	eth_tx_descriptor_status_t tx_descriptor_status;
> > > > > +	/** Stash Tx used buffers into RX ring in buffer recycle mode */
> > > > > +	eth_tx_buf_stash_t tx_buf_stash;
> > > > >  	/** Tx queues data. */
> > > > >  	struct rte_ethdev_qdata txq;
> > > > > -	uintptr_t reserved2[3];
> > > > > +	uintptr_t reserved2[4];
> > > >
> > > > You added a function pointer above, so to keep the structure
> > > > alignment,
> > you
> > > > must remove one here, not add one:
> > > >
> > > > -	uintptr_t reserved1[3];
> > > > +	uintptr_t reserved1[2];
> > > >
> > > Ack.
> > > > >  	/**@}*/
> > > > >
> > > > >  } __rte_cache_aligned;
  
Feifei Wang April 26, 2023, 7:29 a.m. UTC | #7
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@amd.com>
> Sent: Wednesday, April 19, 2023 10:46 PM
> To: Feifei Wang <Feifei.Wang2@arm.com>; thomas@monjalon.net; Andrew
> Rybchenko <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; konstantin.v.ananyev@yandex.ru;
> mb@smartsharesystems.com; nd <nd@arm.com>; Honnappa Nagarahalli
> <Honnappa.Nagarahalli@arm.com>; Ruifeng Wang
> <Ruifeng.Wang@arm.com>
> Subject: Re: [PATCH v5 1/3] ethdev: add API for buffer recycle mode
> 
> On 3/30/2023 7:29 AM, Feifei Wang wrote:
> > There are 4 upper APIs for buffer recycle mode:
> > 1. 'rte_eth_rx_queue_buf_recycle_info_get'
> > This is to retrieve buffer ring information about given ports's Rx
> > queue in buffer recycle mode. And due to this, buffer recycle can be
> > no longer limited to the same driver in Rx and Tx.
> >
> > 2. 'rte_eth_dev_buf_recycle'
> > Users can call this API to enable buffer recycle mode in data path.
> > There are 2 internal APIs in it, which is separately for Rx and TX.
> >
> 
> Overall, can we have a namespace in the functions related to the buffer
> recycle, to clarify API usage, something like (just putting as sample to clarify
> my point):
> 
> rte_eth_recycle_buf
> rte_eth_recycle_tx_buf_stash
> rte_eth_recycle_rx_descriptors_refill
> rte_eth_recycle_rx_queue_info_get
> 
Agree.

> 
> > 3. 'rte_eth_tx_buf_stash'
> > Internal API for buffer recycle mode. This is to stash Tx used buffers
> > into Rx buffer ring.
> >
> 
> This API is to move/recycle descriptors from Tx queue to Rx queue, but name
> on its own, 'rte_eth_tx_buf_stash', reads like we are stashing something to Tx
> queue. What do you think, can naming be improved?
> 
Agree with this. And new name is 'rte_eth_tx_mbuf_reuse'

> > 4. 'rte_eth_rx_descriptors_refill'
> > Internal API for buffer recycle mode. This is to refill Rx
> > descriptors.
> >
> > Above all APIs are just implemented at the upper level.
> > For different APIs, we need to define specific functions separately.
> >
> > Suggested-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> > Suggested-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Signed-off-by: Feifei Wang <feifei.wang2@arm.com>
> > Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> > Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> 
> <...>
> 
> >
> > +int
> > +rte_eth_rx_queue_buf_recycle_info_get(uint16_t port_id, uint16_t
> queue_id,
> > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info) {
> > +	struct rte_eth_dev *dev;
> > +
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
> > +	dev = &rte_eth_devices[port_id];
> > +
> > +	if (queue_id >= dev->data->nb_rx_queues) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n",
> queue_id);
> > +		return -EINVAL;
> > +	}
> > +
> > +	RTE_ASSERT(rxq_buf_recycle_info != NULL);
> > +
> 
> This is slow path API, I think better to validate parameter and return an error
> instead of assert().

Thanks for the remind. Here I'll delete this check due to I realize it is unnecessary to check if users
have allocated memory for 'rxq_buf_recycle_info', which is an input parameter.

> 
> <...>
> 
> > --- a/lib/ethdev/rte_ethdev_core.h
> > +++ b/lib/ethdev/rte_ethdev_core.h
> > @@ -56,6 +56,13 @@ typedef int (*eth_rx_descriptor_status_t)(void
> > *rxq, uint16_t offset);
> >  /** @internal Check the status of a Tx descriptor */  typedef int
> > (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
> >
> > +/** @internal Stash Tx used buffers into RX ring in buffer recycle
> > +mode */ typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
> > +		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
> > +
> > +/** @internal Refill Rx descriptors in buffer recycle mode */ typedef
> > +uint16_t (*eth_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
> > +
> 
> Since there is only single API exposed to the application, is it really required to
> have two dev_ops, why not have a single one?

Two API is due to we need to separate Rx/TX path.  Then buffer recycle can support
the case of different pmds. For example, Rx port is i40e, and Tx port is ixgbe.
  

Patch

diff --git a/lib/ethdev/ethdev_driver.h b/lib/ethdev/ethdev_driver.h
index 2c9d615fb5..412f064975 100644
--- a/lib/ethdev/ethdev_driver.h
+++ b/lib/ethdev/ethdev_driver.h
@@ -59,6 +59,10 @@  struct rte_eth_dev {
 	eth_rx_descriptor_status_t rx_descriptor_status;
 	/** Check the status of a Tx descriptor */
 	eth_tx_descriptor_status_t tx_descriptor_status;
+	/** Stash Tx used buffers into RX ring in buffer recycle mode */
+	eth_tx_buf_stash_t tx_buf_stash;
+	/** Refill Rx descriptors in buffer recycle mode */
+	eth_rx_descriptors_refill_t rx_descriptors_refill;
 
 	/**
 	 * Device data that is shared between primary and secondary processes
@@ -504,6 +508,10 @@  typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev,
 typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev,
 	uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo);
 
+typedef void (*eth_rxq_buf_recycle_info_get_t)(struct rte_eth_dev *dev,
+	uint16_t rx_queue_id,
+	struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
+
 typedef int (*eth_burst_mode_get_t)(struct rte_eth_dev *dev,
 	uint16_t queue_id, struct rte_eth_burst_mode *mode);
 
@@ -1247,6 +1255,8 @@  struct eth_dev_ops {
 	eth_rxq_info_get_t         rxq_info_get;
 	/** Retrieve Tx queue information */
 	eth_txq_info_get_t         txq_info_get;
+	/** Get Rx queue buffer recycle information */
+	eth_rxq_buf_recycle_info_get_t rxq_buf_recycle_info_get;
 	eth_burst_mode_get_t       rx_burst_mode_get; /**< Get Rx burst mode */
 	eth_burst_mode_get_t       tx_burst_mode_get; /**< Get Tx burst mode */
 	eth_fw_version_get_t       fw_version_get; /**< Get firmware version */
diff --git a/lib/ethdev/ethdev_private.c b/lib/ethdev/ethdev_private.c
index 14ec8c6ccf..f8d0ae9226 100644
--- a/lib/ethdev/ethdev_private.c
+++ b/lib/ethdev/ethdev_private.c
@@ -277,6 +277,8 @@  eth_dev_fp_ops_setup(struct rte_eth_fp_ops *fpo,
 	fpo->rx_queue_count = dev->rx_queue_count;
 	fpo->rx_descriptor_status = dev->rx_descriptor_status;
 	fpo->tx_descriptor_status = dev->tx_descriptor_status;
+	fpo->tx_buf_stash = dev->tx_buf_stash;
+	fpo->rx_descriptors_refill = dev->rx_descriptors_refill;
 
 	fpo->rxq.data = dev->data->rx_queues;
 	fpo->rxq.clbk = (void **)(uintptr_t)dev->post_rx_burst_cbs;
diff --git a/lib/ethdev/rte_ethdev.c b/lib/ethdev/rte_ethdev.c
index 4d03255683..36c3a17588 100644
--- a/lib/ethdev/rte_ethdev.c
+++ b/lib/ethdev/rte_ethdev.c
@@ -5784,6 +5784,39 @@  rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 	return 0;
 }
 
+int
+rte_eth_rx_queue_buf_recycle_info_get(uint16_t port_id, uint16_t queue_id,
+		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV);
+	dev = &rte_eth_devices[port_id];
+
+	if (queue_id >= dev->data->nb_rx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u\n", queue_id);
+		return -EINVAL;
+	}
+
+	RTE_ASSERT(rxq_buf_recycle_info != NULL);
+
+	if (dev->data->rx_queues == NULL ||
+			dev->data->rx_queues[queue_id] == NULL) {
+		RTE_ETHDEV_LOG(ERR,
+			   "Rx queue %"PRIu16" of device with port_id=%"
+			   PRIu16" has not been setup\n",
+			   queue_id, port_id);
+		return -EINVAL;
+	}
+
+	if (*dev->dev_ops->rxq_buf_recycle_info_get == NULL)
+		return -ENOTSUP;
+
+	dev->dev_ops->rxq_buf_recycle_info_get(dev, queue_id, rxq_buf_recycle_info);
+
+	return 0;
+}
+
 int
 rte_eth_rx_burst_mode_get(uint16_t port_id, uint16_t queue_id,
 			  struct rte_eth_burst_mode *mode)
diff --git a/lib/ethdev/rte_ethdev.h b/lib/ethdev/rte_ethdev.h
index 99fe9e238b..016c16615d 100644
--- a/lib/ethdev/rte_ethdev.h
+++ b/lib/ethdev/rte_ethdev.h
@@ -1820,6 +1820,29 @@  struct rte_eth_txq_info {
 	uint8_t queue_state;        /**< one of RTE_ETH_QUEUE_STATE_*. */
 } __rte_cache_min_aligned;
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this structure may change without prior notice.
+ *
+ * Ethernet device Rx queue buffer ring information structure in buffer recycle mode.
+ * Used to retrieve Rx queue buffer ring information when Tx queue stashing used buffers
+ * in Rx buffer ring.
+ */
+struct rte_eth_rxq_buf_recycle_info {
+	struct rte_mbuf **buf_ring; /**< buffer ring of Rx queue. */
+	struct rte_mempool *mp;     /**< mempool of Rx queue. */
+	uint16_t *refill_head;      /**< head of buffer ring refilling descriptors. */
+	uint16_t *receive_tail;     /**< tail of buffer ring receiving pkts. */
+	uint16_t buf_ring_size;     /**< configured number of buffer ring size. */
+	/**
+	 *  request for number of Rx refill buffers.
+	 *   For some PMD drivers, Rx refiil buffers number should be aligned with
+	 *   its buffer ring size. This is to simplify ring wraparound.
+	 *   Value 0 means that no request for this.
+	 */
+	uint16_t refill_request;
+} __rte_cache_min_aligned;
+
 /* Generic Burst mode flag definition, values can be ORed. */
 
 /**
@@ -4809,6 +4832,32 @@  int rte_eth_rx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 int rte_eth_tx_queue_info_get(uint16_t port_id, uint16_t queue_id,
 	struct rte_eth_txq_info *qinfo);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Retrieve buffer ring information about given ports's Rx queue in buffer recycle
+ * mode.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The Rx queue on the Ethernet device for which buffer ring information
+ *   will be retrieved.
+ * @param rxq_buf_recycle_info
+ *   A pointer to a structure of type *rte_eth_rxq_buf_recycle_info* to be filled.
+ *
+ * @return
+ *   - 0: Success
+ *   - -ENODEV:  If *port_id* is invalid.
+ *   - -ENOTSUP: routine is not supported by the device PMD.
+ *   - -EINVAL:  The queue_id is out of range.
+ */
+__rte_experimental
+int rte_eth_rx_queue_buf_recycle_info_get(uint16_t port_id,
+		uint16_t queue_id,
+		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
+
 /**
  * Retrieve information about the Rx packet burst mode.
  *
@@ -5987,6 +6036,71 @@  rte_eth_rx_queue_count(uint16_t port_id, uint16_t queue_id)
 	return (int)(*p->rx_queue_count)(qd);
 }
 
+/**
+ * @internal
+ * Rx routine for rte_eth_dev_buf_recycle().
+ * Refill Rx descriptors in buffer recycle mode.
+ *
+ * @note
+ * This API can only be called by rte_eth_dev_buf_recycle().
+ * Before calling this API, rte_eth_tx_buf_stash() should be
+ * called to stash Tx used buffers into Rx buffer ring.
+ *
+ * When this functionality is not implemented in the driver, the return
+ * buffer number is 0.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the receive queue.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ *@param nb
+ *   The number of Rx descriptors to be refilled.
+ * @return
+ *   The number Rx descriptors correct to be refilled.
+ *   - ENODEV: bad port or queue (only if compiled with debug).
+ */
+static inline uint16_t rte_eth_rx_descriptors_refill(uint16_t port_id,
+		uint16_t queue_id, uint16_t nb)
+{
+	struct rte_eth_fp_ops *p;
+	void *qd;
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+	if (port_id >= RTE_MAX_ETHPORTS ||
+			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid port_id=%u or queue_id=%u\n",
+			port_id, queue_id);
+		rte_errno = ENODEV;
+		return 0;
+	}
+#endif
+
+	p = &rte_eth_fp_ops[port_id];
+	qd = p->rxq.data[queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_RX
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Rx port_id=%u\n", port_id);
+		rte_errno = ENODEV;
+		return 0;
+
+	if (qd == NULL) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Rx queue_id=%u for port_id=%u\n",
+			queue_id, port_id);
+		rte_errno = ENODEV;
+		return 0;
+	}
+#endif
+
+	if (p->rx_descriptors_refill == NULL)
+		return 0;
+
+	return p->rx_descriptors_refill(qd, nb);
+}
+
 /**@{@name Rx hardware descriptor states
  * @see rte_eth_rx_descriptor_status
  */
@@ -6483,6 +6597,122 @@  rte_eth_tx_buffer(uint16_t port_id, uint16_t queue_id,
 	return rte_eth_tx_buffer_flush(port_id, queue_id, buffer);
 }
 
+/**
+ * @internal
+ * Tx routine for rte_eth_dev_buf_recycle().
+ * Stash Tx used buffers into Rx buffer ring in buffer recycle mode.
+ *
+ * @note
+ * This API can only be called by rte_eth_dev_buf_recycle().
+ * After calling this API, rte_eth_rx_descriptors_refill() should be
+ * called to refill Rx ring descriptors.
+ *
+ * When this functionality is not implemented in the driver, the return
+ * buffer number is 0.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param rxq_buf_recycle_info
+ *   A pointer to a structure of Rx queue buffer ring information in buffer
+ *   recycle mode.
+ *
+ * @return
+ *   The number buffers correct to be filled in the Rx buffer ring.
+ *   - ENODEV: bad port or queue (only if compiled with debug).
+ */
+static inline uint16_t rte_eth_tx_buf_stash(uint16_t port_id, uint16_t queue_id,
+		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
+{
+	struct rte_eth_fp_ops *p;
+	void *qd;
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+	if (port_id >= RTE_MAX_ETHPORTS ||
+			queue_id >= RTE_MAX_QUEUES_PER_PORT) {
+		RTE_ETHDEV_LOG(ERR,
+			"Invalid port_id=%u or queue_id=%u\n",
+			port_id, queue_id);
+		rte_errno = ENODEV;
+		return 0;
+	}
+#endif
+
+	p = &rte_eth_fp_ops[port_id];
+	qd = p->txq.data[queue_id];
+
+#ifdef RTE_ETHDEV_DEBUG_TX
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Tx port_id=%u\n", port_id);
+		rte_errno = ENODEV;
+		return 0;
+
+	if (qd == NULL) {
+		RTE_ETHDEV_LOG(ERR, "Invalid Tx queue_id=%u for port_id=%u\n",
+			queue_id, port_id);
+		rte_erno = ENODEV;
+		return 0;
+	}
+#endif
+
+	if (p->tx_buf_stash == NULL)
+		return 0;
+
+	return p->tx_buf_stash(qd, rxq_buf_recycle_info);
+}
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Buffer recycle mode can let Tx queue directly put used buffers into Rx buffer
+ * ring. This avoids freeing buffers into mempool and allocating buffers from
+ * mempool.
+ *
+ * @param rx_port_id
+ *   Port identifying the receive side.
+ * @param rx_queue_id
+ *   The index of the receive queue identifying the receive side.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_port_id
+ *   Port identifying the transmit side.
+ * @param tx_queue_id
+ *   The index of the transmit queue identifying the transmit side.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param rxq_recycle_info
+ *   A pointer to a structure of type *rte_eth_txq_rearm_data* to be filled.
+ * @return
+ *   - (0) on success or no recycling buffer.
+ *   - (-EINVAL) rxq_recycle_info is NULL.
+ */
+__rte_experimental
+static inline int
+rte_eth_dev_buf_recycle(uint16_t rx_port_id, uint16_t rx_queue_id,
+		uint16_t tx_port_id, uint16_t tx_queue_id,
+		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info)
+{
+	/* The number of recycling buffers. */
+	uint16_t nb_buf;
+
+	if (!rxq_buf_recycle_info)
+		return -EINVAL;
+
+	/* Stash Tx used buffers into Rx buffer ring */
+	nb_buf = rte_eth_tx_buf_stash(tx_port_id, tx_queue_id,
+				rxq_buf_recycle_info);
+	/* If there are recycling buffers, refill Rx queue descriptors. */
+	if (nb_buf)
+		rte_eth_rx_descriptors_refill(rx_port_id, rx_queue_id,
+					nb_buf);
+
+	return 0;
+}
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice
diff --git a/lib/ethdev/rte_ethdev_core.h b/lib/ethdev/rte_ethdev_core.h
index dcf8adab92..a138fd4dbc 100644
--- a/lib/ethdev/rte_ethdev_core.h
+++ b/lib/ethdev/rte_ethdev_core.h
@@ -56,6 +56,13 @@  typedef int (*eth_rx_descriptor_status_t)(void *rxq, uint16_t offset);
 /** @internal Check the status of a Tx descriptor */
 typedef int (*eth_tx_descriptor_status_t)(void *txq, uint16_t offset);
 
+/** @internal Stash Tx used buffers into RX ring in buffer recycle mode */
+typedef uint16_t (*eth_tx_buf_stash_t)(void *txq,
+		struct rte_eth_rxq_buf_recycle_info *rxq_buf_recycle_info);
+
+/** @internal Refill Rx descriptors in buffer recycle mode */
+typedef uint16_t (*eth_rx_descriptors_refill_t)(void *rxq, uint16_t nb);
+
 /**
  * @internal
  * Structure used to hold opaque pointers to internal ethdev Rx/Tx
@@ -90,9 +97,11 @@  struct rte_eth_fp_ops {
 	eth_rx_queue_count_t rx_queue_count;
 	/** Check the status of a Rx descriptor. */
 	eth_rx_descriptor_status_t rx_descriptor_status;
+	/** Refill Rx descriptors in buffer recycle mode */
+	eth_rx_descriptors_refill_t rx_descriptors_refill;
 	/** Rx queues data. */
 	struct rte_ethdev_qdata rxq;
-	uintptr_t reserved1[3];
+	uintptr_t reserved1[4];
 	/**@}*/
 
 	/**@{*/
@@ -106,9 +115,11 @@  struct rte_eth_fp_ops {
 	eth_tx_prep_t tx_pkt_prepare;
 	/** Check the status of a Tx descriptor. */
 	eth_tx_descriptor_status_t tx_descriptor_status;
+	/** Stash Tx used buffers into RX ring in buffer recycle mode */
+	eth_tx_buf_stash_t tx_buf_stash;
 	/** Tx queues data. */
 	struct rte_ethdev_qdata txq;
-	uintptr_t reserved2[3];
+	uintptr_t reserved2[4];
 	/**@}*/
 
 } __rte_cache_aligned;
diff --git a/lib/ethdev/version.map b/lib/ethdev/version.map
index 357d1a88c0..8a4b1dac80 100644
--- a/lib/ethdev/version.map
+++ b/lib/ethdev/version.map
@@ -299,6 +299,10 @@  EXPERIMENTAL {
 	rte_flow_action_handle_query_update;
 	rte_flow_async_action_handle_query_update;
 	rte_flow_async_create_by_index;
+
+	# added in 23.07
+	rte_eth_dev_buf_recycle;
+	rte_eth_rx_queue_buf_recycle_info_get;
 };
 
 INTERNAL {
@@ -328,4 +332,6 @@  INTERNAL {
 	rte_eth_representor_id_get;
 	rte_eth_switch_domain_alloc;
 	rte_eth_switch_domain_free;
+	rte_eth_tx_buf_stash;
+	rte_eth_rx_descriptors_refill;
 };