[v2,01/14] ethdev: add support for hairpin queue

Message ID 1570218855-73478-2-git-send-email-orika@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series add hairpin feature |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-compilation success Compile Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Performance fail Performance Testing issues

Commit Message

Ori Kam Oct. 4, 2019, 7:54 p.m. UTC
This commit introduce hairpin queue type.

The hairpin queue in build from Rx queue binded to Tx queue.
It is used to offload traffic coming from the wire and redirect it back
to the wire.

There are 3 new functions:
- rte_eth_dev_hairpin_capability_get
- rte_eth_rx_hairpin_queue_setup
- rte_eth_tx_hairpin_queue_setup

In order to use the queue, there is a need to create rte_flow
with queue / RSS action that targets one or more of the Rx queues.

Signed-off-by: Ori Kam <orika@mellanox.com>
---
V2:
 - update according to ML comments.

---
 lib/librte_ethdev/rte_ethdev.c           | 214 ++++++++++++++++++++++++++++++-
 lib/librte_ethdev/rte_ethdev.h           | 126 ++++++++++++++++++
 lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
 lib/librte_ethdev/rte_ethdev_version.map |   5 +
 4 files changed, 368 insertions(+), 4 deletions(-)
  

Comments

Andrew Rybchenko Oct. 8, 2019, 4:11 p.m. UTC | #1
Hi Ori,

thanks for updated version. See my notes below.

There are few style notes about line breaks which are not defined in
coding style. Of course, it may be ignored.

On 10/4/19 10:54 PM, Ori Kam wrote:
> This commit introduce hairpin queue type.
>
> The hairpin queue in build from Rx queue binded to Tx queue.
> It is used to offload traffic coming from the wire and redirect it back
> to the wire.
>
> There are 3 new functions:
> - rte_eth_dev_hairpin_capability_get
> - rte_eth_rx_hairpin_queue_setup
> - rte_eth_tx_hairpin_queue_setup
>
> In order to use the queue, there is a need to create rte_flow
> with queue / RSS action that targets one or more of the Rx queues.
>
> Signed-off-by: Ori Kam <orika@mellanox.com>

rte_eth_dev_[rt]x_queue_stop() should return error if used for hairpin 
queue.
Right now rte_eth_dev_[rt]x_queue_start() will return 0. Not sure about it.
What about rte_eth_rx_queue_info_get() and rte_eth_tx_queue_info_get()?
Any other Rx/Tx queue functions?

> ---
> V2:
>   - update according to ML comments.
>
> ---
>   lib/librte_ethdev/rte_ethdev.c           | 214 ++++++++++++++++++++++++++++++-
>   lib/librte_ethdev/rte_ethdev.h           | 126 ++++++++++++++++++
>   lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
>   lib/librte_ethdev/rte_ethdev_version.map |   5 +
>   4 files changed, 368 insertions(+), 4 deletions(-)
>
> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> index af82360..ee8af42 100644
> --- a/lib/librte_ethdev/rte_ethdev.c
> +++ b/lib/librte_ethdev/rte_ethdev.c
> @@ -1752,12 +1752,102 @@ struct rte_eth_dev *
>   		if (!dev->data->min_rx_buf_size ||
>   		    dev->data->min_rx_buf_size > mbp_buf_size)
>   			dev->data->min_rx_buf_size = mbp_buf_size;
> +		if (dev->data->rx_queue_state[rx_queue_id] ==
> +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
> +			dev->data->rx_queue_state[rx_queue_id] =
> +				RTE_ETH_QUEUE_STATE_STOPPED;

I don't understand it. Why is rte_eth_rx_queue_setup() changed?

>   	}
>   
>   	return eth_err(port_id, ret);
>   }
>   
>   int
> +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> +			       uint16_t nb_rx_desc,
> +			       const struct rte_eth_hairpin_conf *conf)
> +{
> +	int ret;
> +	struct rte_eth_dev *dev;
> +	struct rte_eth_hairpin_cap cap;
> +	struct rte_eth_dev_info dev_info;
> +	void **rxq;
> +	int i;
> +	int count = 0;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> +
> +	dev = &rte_eth_devices[port_id];
> +	if (rx_queue_id >= dev->data->nb_rx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
> +		return -EINVAL;
> +	}
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> +				-ENOTSUP);
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_hairpin_queue_setup,
> +				-ENOTSUP);
> +	rte_eth_dev_hairpin_capability_get(port_id, &cap);

Return value should be checked. It makes  hairpin_cap_get check above
unnecessary.

> +	/* Use default specified by driver, if nb_rx_desc is zero */
> +	if (nb_rx_desc == 0)
> +		nb_rx_desc = cap.max_nb_desc;
> +	if (nb_rx_desc > cap.max_nb_desc) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for nb_rx_desc(=%hu), should be: "
> +			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
> +		return -EINVAL;
> +	}
> +	ret = rte_eth_dev_info_get(port_id, &dev_info);
> +	if (ret != 0)
> +		return ret;
> +	if (dev->data->dev_started &&
> +		!(dev_info.dev_capa &
> +		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
> +		return -EBUSY;
> +	if (dev->data->dev_started &&
> +		(dev->data->rx_queue_state[rx_queue_id] !=
> +		 RTE_ETH_QUEUE_STATE_STOPPED))
> +		return -EBUSY;
> +	if (conf->peer_n > cap.max_rx_2_tx) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: <= %hu", conf->peer_n,
> +			       cap.max_rx_2_tx);
> +		return -EINVAL;
> +	}
> +	if (conf->peer_n == 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: > 0", conf->peer_n);
> +		return -EINVAL;
> +	}
> +	if (cap.max_n_queues != -1) {
> +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +			if (dev->data->rx_queue_state[i] ==
> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> +				count++;
> +		}
> +		if (count > cap.max_n_queues) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "To many Rx hairpin queues %d", count);
> +			return -EINVAL;
> +		}
> +	}
> +	rxq = dev->data->rx_queues;
> +	if (rxq[rx_queue_id]) {

Please, compare with NULL (I know that rte_eth_rx_queue_setup() does 
like above).

> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
> +					-ENOTSUP);
> +		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
> +		rxq[rx_queue_id] = NULL;
> +	}
> +	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
> +						      nb_rx_desc, conf);
> +	if (!ret)

Please, compare with 0

> +		dev->data->rx_queue_state[rx_queue_id] =
> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> +	return eth_err(port_id, ret);
> +}
> +
> +int
>   rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>   		       uint16_t nb_tx_desc, unsigned int socket_id,
>   		       const struct rte_eth_txconf *tx_conf)
> @@ -1851,9 +1941,97 @@ struct rte_eth_dev *
>   			__func__);
>   		return -EINVAL;
>   	}
> +	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
> +					      socket_id, &local_conf);
> +	if (!ret)
> +		if (dev->data->tx_queue_state[tx_queue_id] ==
> +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
> +			dev->data->tx_queue_state[tx_queue_id] =
> +				RTE_ETH_QUEUE_STATE_STOPPED;

Why is it changed?

> +	return eth_err(port_id, ret);
> +}
> +
> +int
> +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> +			       uint16_t nb_tx_desc,
> +			       const struct rte_eth_hairpin_conf *conf)
> +{
> +	struct rte_eth_dev *dev;
> +	struct rte_eth_hairpin_cap cap;
> +	struct rte_eth_dev_info dev_info;
> +	void **txq;
> +	int i;
> +	int count = 0;
> +	int ret;
>   
> -	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
> -		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> +	dev = &rte_eth_devices[port_id];
> +	if (tx_queue_id >= dev->data->nb_tx_queues) {
> +		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
> +		return -EINVAL;
> +	}
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> +				-ENOTSUP);
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_hairpin_queue_setup,
> +				-ENOTSUP);
> +	rte_eth_dev_info_get(port_id, &dev_info);

return value should be checked.

> +	rte_eth_dev_hairpin_capability_get(port_id, &cap);

Check return value and you can rely on  hairpin_cap_get check inside.

> +	/* Use default specified by driver, if nb_tx_desc is zero */
> +	if (nb_tx_desc == 0)
> +		nb_tx_desc = cap.max_nb_desc;
> +	if (nb_tx_desc > cap.max_nb_desc) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for nb_tx_desc(=%hu), should be: "
> +			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
> +		return -EINVAL;
> +	}
> +	if (conf->peer_n > cap.max_tx_2_rx) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: <= %hu", conf->peer_n,
> +			       cap.max_tx_2_rx);
> +		return -EINVAL;
> +	}
> +	if (conf->peer_n == 0) {
> +		RTE_ETHDEV_LOG(ERR,
> +			       "Invalid value for number of peers(=%hu), "
> +			       "should be: > 0", conf->peer_n);
> +		return -EINVAL;
> +	}
> +	if (cap.max_n_queues != -1) {
> +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +			if (dev->data->tx_queue_state[i] ==
> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> +				count++;
> +		}
> +		if (count > cap.max_n_queues) {
> +			RTE_ETHDEV_LOG(ERR,
> +				       "To many Rx hairpin queues %d", count);
> +			return -EINVAL;
> +		}
> +	}
> +	if (dev->data->dev_started &&
> +		!(dev_info.dev_capa &
> +		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
> +		return -EBUSY;
> +	if (dev->data->dev_started &&
> +		(dev->data->tx_queue_state[tx_queue_id] !=
> +		 RTE_ETH_QUEUE_STATE_STOPPED))
> +		return -EBUSY;
> +	txq = dev->data->tx_queues;
> +	if (txq[tx_queue_id]) {

Please, compare with NULL

> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
> +					-ENOTSUP);
> +		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
> +		txq[tx_queue_id] = NULL;
> +	}
> +	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
> +		(dev, tx_queue_id, nb_tx_desc, conf);
> +	if (!ret)

Please, compare with 0

> +		dev->data->tx_queue_state[tx_queue_id] =
> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> +	return eth_err(port_id, ret);
>   }
>   
>   void
> @@ -3981,12 +4159,20 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
>   	rte_errno = ENOTSUP;
>   	return NULL;
>   #endif
> +	struct rte_eth_dev *dev;
> +
>   	/* check input parameters */
>   	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
>   		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
>   		rte_errno = EINVAL;
>   		return NULL;
>   	}
> +	dev = &rte_eth_devices[port_id];
> +	if (dev->data->rx_queue_state[queue_id] ==
> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {

It looks like line break is not required above. Just to make code a bit 
shorter.

> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
>   	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
>   
>   	if (cb == NULL) {
> @@ -4058,6 +4244,8 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
>   	rte_errno = ENOTSUP;
>   	return NULL;
>   #endif
> +	struct rte_eth_dev *dev;
> +
>   	/* check input parameters */
>   	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
>   		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
> @@ -4065,6 +4253,13 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
>   		return NULL;
>   	}
>   
> +	dev = &rte_eth_devices[port_id];
> +	if (dev->data->tx_queue_state[queue_id] ==
> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {

It looks like line break is not required above. Just to make code a bit 
shorter.

> +		rte_errno = EINVAL;
> +		return NULL;
> +	}
> +
>   	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
>   
>   	if (cb == NULL) {
> @@ -4510,6 +4705,21 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
>   }
>   
>   int
> +rte_eth_dev_hairpin_capability_get(uint16_t port_id,
> +				   struct rte_eth_hairpin_cap *cap)
> +{
> +	struct rte_eth_dev *dev;
> +
> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> +
> +	dev = &rte_eth_devices[port_id];
> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> +				-ENOTSUP);

I think it would be useful to memset() cap with 0 to be sure that it is 
initialized.

> +	return eth_err(port_id, (*dev->dev_ops->hairpin_cap_get)
> +		       (dev, cap));


Please, consider to avoid line breaks above to make code a bit shorter.
Of course, it should not exceed line length limit.

> +}
> +
> +int
>   rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
>   {
>   	struct rte_eth_dev *dev;
> diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
> index d937fb4..29dcfea 100644
> --- a/lib/librte_ethdev/rte_ethdev.h
> +++ b/lib/librte_ethdev/rte_ethdev.h
> @@ -804,6 +804,46 @@ struct rte_eth_txconf {
>   };
>   
>   /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * A structure used to return the hairpin capabilities that are supportd.

supportd -> supported

> + */
> +struct rte_eth_hairpin_cap {
> +	int16_t max_n_queues;
> +	/**< The max number of hairpin queuesi. -1 no limit. */

I'm not sure that I like type difference from max_rx_queues here.
I think it would be better to use uint16_t. I would say there is point
to highlight no limit (first of all I'm not sure that it makes sense,
second, UINT16_MAX will obviously do the job in uint16_t.

Is it both Rx and Tx?

> +	int16_t max_rx_2_tx;
> +	/**< Max number of Rx queues to be connected to one Tx queue. */
> +	int16_t max_tx_2_rx;
> +	/**< Max number of Tx queues to be connected to one Rx queue. */

I would prefer to have uint16_t here as well. Mainly for consistency.

> +	uint16_t max_nb_desc; /**< The max num of descriptors. */

Is it common for Rx and Tx? What about min?

> +};
> +
> +#define RTE_ETH_MAX_HAIRPIN_PEERS 32
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * A structure used to hold hairpin peer data.
> + */
> +struct rte_eth_hairpin_peer {
> +	uint16_t port; /**< Peer port. */
> +	uint16_t queue; /**< Peer queue. */
> +};
> +
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * A structure used to configure hairpin binding.

It should be explained what happens if Rx queue has many Tx queues.
Is packet duplicated? Or distributed?

> + */
> +struct rte_eth_hairpin_conf {
> +	uint16_t peer_n; /**< The number of peers. */
> +	struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
> +};
> +
> +/**
>    * A structure contains information about HW descriptor ring limitations.
>    */
>   struct rte_eth_desc_lim {
> @@ -1080,6 +1120,8 @@ struct rte_eth_conf {
>   /**< Device supports Rx queue setup after device started*/
>   #define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002
>   /**< Device supports Tx queue setup after device started*/
> +#define RTE_ETH_DEV_CAPA_HAIRPIN_SUPPORT 0x00000004
> +/**< Device supports hairpin queues. */

Do we really need it? Isn't rte_eth_dev_hairpin_capability_get() returning
-ENOTSUP sufficient?

>   /*
>    * If new Tx offload capabilities are defined, they also must be
> @@ -1277,6 +1319,7 @@ struct rte_eth_dcb_info {
>    */
>   #define RTE_ETH_QUEUE_STATE_STOPPED 0
>   #define RTE_ETH_QUEUE_STATE_STARTED 1
> +#define RTE_ETH_QUEUE_STATE_HAIRPIN 2
>   
>   #define RTE_ETH_ALL RTE_MAX_ETHPORTS
>   
> @@ -1771,6 +1814,34 @@ int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>   		struct rte_mempool *mb_pool);
>   
>   /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Allocate and set up a hairpin receive queue for an Ethernet device.
> + *
> + * The function set up the selected queue to be used in hairpin.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param rx_queue_id
> + *   The index of the receive queue to set up.
> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param nb_rx_desc
> + *   The number of receive descriptors to allocate for the receive ring.

Is it allows to specify 0 to pick driver recommended value?

> + * @param conf
> + *   The pointer to the hairpin configuration.
> + * @return
> + *   - 0: Success, receive queue correctly set up.
> + *   - -EINVAL: Selected Queue can't be configured for hairpin.
> + *   - -ENOMEM: Unable to allocate the resources required for the queue.

Please, follow return value description style similar to 
rte_eth_dev_info_get()
which is more common to the file.

> + */
> +__rte_experimental
> +int rte_eth_rx_hairpin_queue_setup
> +	(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc,
> +	 const struct rte_eth_hairpin_conf *conf);
> +
> +/**
>    * Allocate and set up a transmit queue for an Ethernet device.
>    *
>    * @param port_id
> @@ -1823,6 +1894,33 @@ int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>   		const struct rte_eth_txconf *tx_conf);
>   
>   /**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Allocate and set up a transmit hairpin queue for an Ethernet device.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param tx_queue_id
> + *   The index of the transmit queue to set up.
> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param nb_tx_desc
> + *   The number of transmit descriptors to allocate for the transmit ring.

Is it allows to specify 0 to pick driver recommended value?

> + * @param conf
> + *   The hairpin configuration.
> + *
> + * @return
> + *   - 0: Success, transmit queue correctly set up.
> + *   - -EINVAL: Selected Queue can't be configured for hairpin.
> + *   - -ENOMEM: Unable to allocate the resources required for the queue.

Please, follow return value description style similar to 
rte_eth_dev_info_get()
which is more common to the file.

> + */
> +__rte_experimental
> +int rte_eth_tx_hairpin_queue_setup
> +	(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc,
> +	 const struct rte_eth_hairpin_conf *conf);
> +
> +/**
>    * Return the NUMA socket to which an Ethernet device is connected
>    *
>    * @param port_id
> @@ -4037,6 +4135,22 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
>   void *
>   rte_eth_dev_get_sec_ctx(uint16_t port_id);
>   
> +/**
> + * @warning
> + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
> + *
> + * Query the device hairpin capabilities.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param cap
> + *   Pointer to a structure that will hold the hairpin capabilities.
> + * @return
> + *   - 0 on success, -ENOTSUP if the device doesn't support hairpin.

Please, follow return value description style similar to 
rte_eth_dev_info_get()
which is more common to the file.

> + */
> +__rte_experimental
> +int rte_eth_dev_hairpin_capability_get(uint16_t port_id,
> +				       struct rte_eth_hairpin_cap *cap);
>   
>   #include <rte_ethdev_core.h>
>   
> @@ -4137,6 +4251,12 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
>   		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
>   		return 0;
>   	}
> +	if (dev->data->rx_queue_state[queue_id] ==
> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> +		RTE_ETHDEV_LOG(ERR, "RX queue_id=%u is hairpin queue\n",
> +			       queue_id);
> +		return 0;
> +	}
>   #endif
>   	nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
>   				     rx_pkts, nb_pkts);
> @@ -4403,6 +4523,12 @@ static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
>   		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
>   		return 0;
>   	}
> +	if (dev->data->tx_queue_state[queue_id] ==
> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> +		RTE_ETHDEV_LOG(ERR, "TX queue_id=%u is hairpin queue\n",
> +			       queue_id);
> +		return 0;
> +	}
>   #endif
>   
>   #ifdef RTE_ETHDEV_RXTX_CALLBACKS
> diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
> index dcb5ae6..ef46e71 100644
> --- a/lib/librte_ethdev/rte_ethdev_core.h
> +++ b/lib/librte_ethdev/rte_ethdev_core.h
> @@ -250,6 +250,12 @@ typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev,
>   				    struct rte_mempool *mb_pool);
>   /**< @internal Set up a receive queue of an Ethernet device. */
>   
> +typedef int (*eth_rx_hairpin_queue_setup_t)
> +	(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> +	 uint16_t nb_rx_desc,
> +	 const struct rte_eth_hairpin_conf *conf);
> +/**< @internal Set up a receive hairpin queue of an Ethernet device. */
> +
>   typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
>   				    uint16_t tx_queue_id,
>   				    uint16_t nb_tx_desc,
> @@ -257,6 +263,12 @@ typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
>   				    const struct rte_eth_txconf *tx_conf);
>   /**< @internal Setup a transmit queue of an Ethernet device. */
>   
> +typedef int (*eth_tx_hairpin_queue_setup_t)
> +	(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> +	 uint16_t nb_tx_desc,
> +	 const struct rte_eth_hairpin_conf *hairpin_conf);
> +/**< @internal Setup a transmit hairpin queue of an Ethernet device. */
> +
>   typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
>   				    uint16_t rx_queue_id);
>   /**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> @@ -505,6 +517,10 @@ typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev,
>   						const char *pool);
>   /**< @internal Test if a port supports specific mempool ops */
>   
> +typedef int (*eth_hairpin_cap_get_t)(struct rte_eth_dev *dev,
> +				     struct rte_eth_hairpin_cap *cap);
> +/**< @internal get the hairpin capabilities. */
> +
>   /**
>    * @internal A structure containing the functions exported by an Ethernet driver.
>    */
> @@ -557,6 +573,8 @@ struct eth_dev_ops {
>   	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
>   	eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
>   	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue. */
> +	eth_rx_hairpin_queue_setup_t rx_hairpin_queue_setup;
> +	/**< Set up device RX hairpin queue. */
>   	eth_queue_release_t        rx_queue_release; /**< Release RX queue. */
>   	eth_rx_queue_count_t       rx_queue_count;
>   	/**< Get the number of used RX descriptors. */
> @@ -568,6 +586,8 @@ struct eth_dev_ops {
>   	eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue interrupt. */
>   	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt. */
>   	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue. */
> +	eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
> +	/**< Set up device TX hairpin queue. */
>   	eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
>   	eth_tx_done_cleanup_t      tx_done_cleanup;/**< Free tx ring mbufs */
>   
> @@ -639,6 +659,9 @@ struct eth_dev_ops {
>   
>   	eth_pool_ops_supported_t pool_ops_supported;
>   	/**< Test if a port supports specific mempool ops */
> +
> +	eth_hairpin_cap_get_t hairpin_cap_get;
> +	/**< Returns the hairpin capabilities. */
>   };
>   
>   /**
> @@ -746,9 +769,9 @@ struct rte_eth_dev_data {
>   		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
>   		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
>   	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> -			/**< Queues state: STARTED(1) / STOPPED(0). */
> +		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
>   	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> -			/**< Queues state: STARTED(1) / STOPPED(0). */
> +		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */
>   	uint32_t dev_flags;             /**< Capabilities. */
>   	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
>   	int numa_node;                  /**< NUMA node connection. */
> diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map
> index 6df42a4..77b0a86 100644
> --- a/lib/librte_ethdev/rte_ethdev_version.map
> +++ b/lib/librte_ethdev/rte_ethdev_version.map
> @@ -283,4 +283,9 @@ EXPERIMENTAL {
>   
>   	# added in 19.08
>   	rte_eth_read_clock;
> +
> +	# added in 19.11
> +	rte_eth_rx_hairpin_queue_setup;
> +	rte_eth_tx_hairpin_queue_setup;
> +	rte_eth_dev_hairpin_capability_get;
>   };
  
Ori Kam Oct. 10, 2019, 9:07 p.m. UTC | #2
Hi Andrew,

Thanks for your comments,

PSB,
Ori

> -----Original Message-----
> From: Andrew Rybchenko <arybchenko@solarflare.com>
> Sent: Tuesday, October 8, 2019 7:11 PM
> To: Ori Kam <orika@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>
> Cc: dev@dpdk.org; jingjing.wu@intel.com; stephen@networkplumber.org
> Subject: Re: [PATCH v2 01/14] ethdev: add support for hairpin queue
> 
> Hi Ori,
> 
> thanks for updated version. See my notes below.
> 
> There are few style notes about line breaks which are not defined in
> coding style. Of course, it may be ignored.
> 

I will fix what I can.

> On 10/4/19 10:54 PM, Ori Kam wrote:
> > This commit introduce hairpin queue type.
> >
> > The hairpin queue in build from Rx queue binded to Tx queue.
> > It is used to offload traffic coming from the wire and redirect it back
> > to the wire.
> >
> > There are 3 new functions:
> > - rte_eth_dev_hairpin_capability_get
> > - rte_eth_rx_hairpin_queue_setup
> > - rte_eth_tx_hairpin_queue_setup
> >
> > In order to use the queue, there is a need to create rte_flow
> > with queue / RSS action that targets one or more of the Rx queues.
> >
> > Signed-off-by: Ori Kam <orika@mellanox.com>
> 
> rte_eth_dev_[rt]x_queue_stop() should return error if used for hairpin
> queue.
> Right now rte_eth_dev_[rt]x_queue_start() will return 0. Not sure about it.
> What about rte_eth_rx_queue_info_get() and rte_eth_tx_queue_info_get()?
> Any other Rx/Tx queue functions?
> 

I will add error to both functions (Tx/Rx)
I don't see any other function (only the info_get and queue_stop)

> > ---
> > V2:
> >   - update according to ML comments.
> >
> > ---
> >   lib/librte_ethdev/rte_ethdev.c           | 214
> ++++++++++++++++++++++++++++++-
> >   lib/librte_ethdev/rte_ethdev.h           | 126 ++++++++++++++++++
> >   lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
> >   lib/librte_ethdev/rte_ethdev_version.map |   5 +
> >   4 files changed, 368 insertions(+), 4 deletions(-)
> >
> > diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> > index af82360..ee8af42 100644
> > --- a/lib/librte_ethdev/rte_ethdev.c
> > +++ b/lib/librte_ethdev/rte_ethdev.c
> > @@ -1752,12 +1752,102 @@ struct rte_eth_dev *
> >   		if (!dev->data->min_rx_buf_size ||
> >   		    dev->data->min_rx_buf_size > mbp_buf_size)
> >   			dev->data->min_rx_buf_size = mbp_buf_size;
> > +		if (dev->data->rx_queue_state[rx_queue_id] ==
> > +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
> > +			dev->data->rx_queue_state[rx_queue_id] =
> > +				RTE_ETH_QUEUE_STATE_STOPPED;
> 
> I don't understand it. Why is rte_eth_rx_queue_setup() changed?
> 

This was done so user can reset the queue back to normal queue.

> >   	}
> >
> >   	return eth_err(port_id, ret);
> >   }
> >
> >   int
> > +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> > +			       uint16_t nb_rx_desc,
> > +			       const struct rte_eth_hairpin_conf *conf)
> > +{
> > +	int ret;
> > +	struct rte_eth_dev *dev;
> > +	struct rte_eth_hairpin_cap cap;
> > +	struct rte_eth_dev_info dev_info;
> > +	void **rxq;
> > +	int i;
> > +	int count = 0;
> > +
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> > +
> > +	dev = &rte_eth_devices[port_id];
> > +	if (rx_queue_id >= dev->data->nb_rx_queues) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n",
> rx_queue_id);
> > +		return -EINVAL;
> > +	}
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
> ENOTSUP);
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> > +				-ENOTSUP);
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >rx_hairpin_queue_setup,
> > +				-ENOTSUP);
> > +	rte_eth_dev_hairpin_capability_get(port_id, &cap);
> 
> Return value should be checked. It makes  hairpin_cap_get check above
> unnecessary.
> 
I will add a check.
 
> > +	/* Use default specified by driver, if nb_rx_desc is zero */
> > +	if (nb_rx_desc == 0)
> > +		nb_rx_desc = cap.max_nb_desc;
> > +	if (nb_rx_desc > cap.max_nb_desc) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for nb_rx_desc(=%hu), should be: "
> > +			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
> > +		return -EINVAL;
> > +	}
> > +	ret = rte_eth_dev_info_get(port_id, &dev_info);
> > +	if (ret != 0)
> > +		return ret;
> > +	if (dev->data->dev_started &&
> > +		!(dev_info.dev_capa &
> > +		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
> > +		return -EBUSY;
> > +	if (dev->data->dev_started &&
> > +		(dev->data->rx_queue_state[rx_queue_id] !=
> > +		 RTE_ETH_QUEUE_STATE_STOPPED))
> > +		return -EBUSY;
> > +	if (conf->peer_n > cap.max_rx_2_tx) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: <= %hu", conf->peer_n,
> > +			       cap.max_rx_2_tx);
> > +		return -EINVAL;
> > +	}
> > +	if (conf->peer_n == 0) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: > 0", conf->peer_n);
> > +		return -EINVAL;
> > +	}
> > +	if (cap.max_n_queues != -1) {
> > +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > +			if (dev->data->rx_queue_state[i] ==
> > +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> > +				count++;
> > +		}
> > +		if (count > cap.max_n_queues) {
> > +			RTE_ETHDEV_LOG(ERR,
> > +				       "To many Rx hairpin queues %d", count);
> > +			return -EINVAL;
> > +		}
> > +	}
> > +	rxq = dev->data->rx_queues;
> > +	if (rxq[rx_queue_id]) {
> 
> Please, compare with NULL (I know that rte_eth_rx_queue_setup() does
> like above).
>

O.K. I will change, but may I ask why? If in the rte_eth_rx_queue_setup function it is written the same way,
why do you want this change?

> > +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >rx_queue_release,
> > +					-ENOTSUP);
> > +		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
> > +		rxq[rx_queue_id] = NULL;
> > +	}
> > +	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
> > +						      nb_rx_desc, conf);
> > +	if (!ret)
> 
> Please, compare with 0
> 

Will do, but again just for my knowledge why?

> > +		dev->data->rx_queue_state[rx_queue_id] =
> > +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> > +	return eth_err(port_id, ret);
> > +}
> > +
> > +int
> >   rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> >   		       uint16_t nb_tx_desc, unsigned int socket_id,
> >   		       const struct rte_eth_txconf *tx_conf)
> > @@ -1851,9 +1941,97 @@ struct rte_eth_dev *
> >   			__func__);
> >   		return -EINVAL;
> >   	}
> > +	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
> > +					      socket_id, &local_conf);
> > +	if (!ret)
> > +		if (dev->data->tx_queue_state[tx_queue_id] ==
> > +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
> > +			dev->data->tx_queue_state[tx_queue_id] =
> > +				RTE_ETH_QUEUE_STATE_STOPPED;
> 
> Why is it changed?
> 

Like in the Rx to enable switching back to normal queue.

> > +	return eth_err(port_id, ret);
> > +}
> > +
> > +int
> > +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> > +			       uint16_t nb_tx_desc,
> > +			       const struct rte_eth_hairpin_conf *conf)
> > +{
> > +	struct rte_eth_dev *dev;
> > +	struct rte_eth_hairpin_cap cap;
> > +	struct rte_eth_dev_info dev_info;
> > +	void **txq;
> > +	int i;
> > +	int count = 0;
> > +	int ret;
> >
> > -	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
> > -		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> > +	dev = &rte_eth_devices[port_id];
> > +	if (tx_queue_id >= dev->data->nb_tx_queues) {
> > +		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n",
> tx_queue_id);
> > +		return -EINVAL;
> > +	}
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
> ENOTSUP);
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> > +				-ENOTSUP);
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >tx_hairpin_queue_setup,
> > +				-ENOTSUP);
> > +	rte_eth_dev_info_get(port_id, &dev_info);
> 
> return value should be checked.
> 
> > +	rte_eth_dev_hairpin_capability_get(port_id, &cap);
> 
> Check return value and you can rely on  hairpin_cap_get check inside.
> 

Same as above I will add a check but I'm not sure I understand the second part
Just to make sure I understand, you are talking about that if I don't check
the return value I'm not allowed to check the cap right?

> > +	/* Use default specified by driver, if nb_tx_desc is zero */
> > +	if (nb_tx_desc == 0)
> > +		nb_tx_desc = cap.max_nb_desc;
> > +	if (nb_tx_desc > cap.max_nb_desc) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for nb_tx_desc(=%hu), should be: "
> > +			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
> > +		return -EINVAL;
> > +	}
> > +	if (conf->peer_n > cap.max_tx_2_rx) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: <= %hu", conf->peer_n,
> > +			       cap.max_tx_2_rx);
> > +		return -EINVAL;
> > +	}
> > +	if (conf->peer_n == 0) {
> > +		RTE_ETHDEV_LOG(ERR,
> > +			       "Invalid value for number of peers(=%hu), "
> > +			       "should be: > 0", conf->peer_n);
> > +		return -EINVAL;
> > +	}
> > +	if (cap.max_n_queues != -1) {
> > +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > +			if (dev->data->tx_queue_state[i] ==
> > +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> > +				count++;
> > +		}
> > +		if (count > cap.max_n_queues) {
> > +			RTE_ETHDEV_LOG(ERR,
> > +				       "To many Rx hairpin queues %d", count);
> > +			return -EINVAL;
> > +		}
> > +	}
> > +	if (dev->data->dev_started &&
> > +		!(dev_info.dev_capa &
> > +		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
> > +		return -EBUSY;
> > +	if (dev->data->dev_started &&
> > +		(dev->data->tx_queue_state[tx_queue_id] !=
> > +		 RTE_ETH_QUEUE_STATE_STOPPED))
> > +		return -EBUSY;
> > +	txq = dev->data->tx_queues;
> > +	if (txq[tx_queue_id]) {
> 
> Please, compare with NULL
> 

O.K.

> > +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >tx_queue_release,
> > +					-ENOTSUP);
> > +		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
> > +		txq[tx_queue_id] = NULL;
> > +	}
> > +	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
> > +		(dev, tx_queue_id, nb_tx_desc, conf);
> > +	if (!ret)
> 
> Please, compare with 0
> 

O.K.

> > +		dev->data->tx_queue_state[tx_queue_id] =
> > +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> > +	return eth_err(port_id, ret);
> >   }
> >
> >   void
> > @@ -3981,12 +4159,20 @@ int rte_eth_set_queue_rate_limit(uint16_t
> port_id, uint16_t queue_idx,
> >   	rte_errno = ENOTSUP;
> >   	return NULL;
> >   #endif
> > +	struct rte_eth_dev *dev;
> > +
> >   	/* check input parameters */
> >   	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
> >   		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
> >   		rte_errno = EINVAL;
> >   		return NULL;
> >   	}
> > +	dev = &rte_eth_devices[port_id];
> > +	if (dev->data->rx_queue_state[queue_id] ==
> > +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> 
> It looks like line break is not required above. Just to make code a bit
> shorter.
> 

Will fix if possible.

> > +		rte_errno = EINVAL;
> > +		return NULL;
> > +	}
> >   	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
> >
> >   	if (cb == NULL) {
> > @@ -4058,6 +4244,8 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id,
> uint16_t queue_idx,
> >   	rte_errno = ENOTSUP;
> >   	return NULL;
> >   #endif
> > +	struct rte_eth_dev *dev;
> > +
> >   	/* check input parameters */
> >   	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
> >   		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
> > @@ -4065,6 +4253,13 @@ int rte_eth_set_queue_rate_limit(uint16_t
> port_id, uint16_t queue_idx,
> >   		return NULL;
> >   	}
> >
> > +	dev = &rte_eth_devices[port_id];
> > +	if (dev->data->tx_queue_state[queue_id] ==
> > +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> 
> It looks like line break is not required above. Just to make code a bit
> shorter.
> 

Will fix.

> > +		rte_errno = EINVAL;
> > +		return NULL;
> > +	}
> > +
> >   	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
> >
> >   	if (cb == NULL) {
> > @@ -4510,6 +4705,21 @@ int rte_eth_set_queue_rate_limit(uint16_t
> port_id, uint16_t queue_idx,
> >   }
> >
> >   int
> > +rte_eth_dev_hairpin_capability_get(uint16_t port_id,
> > +				   struct rte_eth_hairpin_cap *cap)
> > +{
> > +	struct rte_eth_dev *dev;
> > +
> > +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> > +
> > +	dev = &rte_eth_devices[port_id];
> > +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> > +				-ENOTSUP);
> 
> I think it would be useful to memset() cap with 0 to be sure that it is
> initialized.
> 

O.K but if we follow your comments from above, the result is not valid if we return
error, so it is not a must, but like I said I don't care to add it.

> > +	return eth_err(port_id, (*dev->dev_ops->hairpin_cap_get)
> > +		       (dev, cap));
> 
> 
> Please, consider to avoid line breaks above to make code a bit shorter.
> Of course, it should not exceed line length limit.
> 

Will check.

> > +}
> > +
> > +int
> >   rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
> >   {
> >   	struct rte_eth_dev *dev;
> > diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
> > index d937fb4..29dcfea 100644
> > --- a/lib/librte_ethdev/rte_ethdev.h
> > +++ b/lib/librte_ethdev/rte_ethdev.h
> > @@ -804,6 +804,46 @@ struct rte_eth_txconf {
> >   };
> >
> >   /**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * A structure used to return the hairpin capabilities that are supportd.
> 
> supportd -> supported
>

Will fix.
 
> > + */
> > +struct rte_eth_hairpin_cap {
> > +	int16_t max_n_queues;
> > +	/**< The max number of hairpin queuesi. -1 no limit. */
> 
> I'm not sure that I like type difference from max_rx_queues here.
> I think it would be better to use uint16_t. I would say there is point
> to highlight no limit (first of all I'm not sure that it makes sense,
> second, UINT16_MAX will obviously do the job in uint16_t.
> 
> Is it both Rx and Tx?
> 
> > +	int16_t max_rx_2_tx;
> > +	/**< Max number of Rx queues to be connected to one Tx queue. */
> > +	int16_t max_tx_2_rx;
> > +	/**< Max number of Tx queues to be connected to one Rx queue. */
> 
> I would prefer to have uint16_t here as well. Mainly for consistency.
> 

Agree, will fix,

> > +	uint16_t max_nb_desc; /**< The max num of descriptors. */
> 
> Is it common for Rx and Tx? What about min?
> 

I think min is always 1, by  definition.

> > +};
> > +
> > +#define RTE_ETH_MAX_HAIRPIN_PEERS 32
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * A structure used to hold hairpin peer data.
> > + */
> > +struct rte_eth_hairpin_peer {
> > +	uint16_t port; /**< Peer port. */
> > +	uint16_t queue; /**< Peer queue. */
> > +};
> > +
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * A structure used to configure hairpin binding.
> 
> It should be explained what happens if Rx queue has many Tx queues.
> Is packet duplicated? Or distributed?
> 

Like we said before, I don't know it depends on the Nic. 
In case of Mellanox we don't support 1 to many.

> > + */
> > +struct rte_eth_hairpin_conf {
> > +	uint16_t peer_n; /**< The number of peers. */
> > +	struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
> > +};
> > +
> > +/**
> >    * A structure contains information about HW descriptor ring limitations.
> >    */
> >   struct rte_eth_desc_lim {
> > @@ -1080,6 +1120,8 @@ struct rte_eth_conf {
> >   /**< Device supports Rx queue setup after device started*/
> >   #define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002
> >   /**< Device supports Tx queue setup after device started*/
> > +#define RTE_ETH_DEV_CAPA_HAIRPIN_SUPPORT 0x00000004
> > +/**< Device supports hairpin queues. */
> 
> Do we really need it? Isn't rte_eth_dev_hairpin_capability_get() returning
> -ENOTSUP sufficient?
>

I agree with you, but my thinking was that using the cap the application 
can avoid calling to function that will fail.
If you approve I will remove this cap.
 
> >   /*
> >    * If new Tx offload capabilities are defined, they also must be
> > @@ -1277,6 +1319,7 @@ struct rte_eth_dcb_info {
> >    */
> >   #define RTE_ETH_QUEUE_STATE_STOPPED 0
> >   #define RTE_ETH_QUEUE_STATE_STARTED 1
> > +#define RTE_ETH_QUEUE_STATE_HAIRPIN 2
> >
> >   #define RTE_ETH_ALL RTE_MAX_ETHPORTS
> >
> > @@ -1771,6 +1814,34 @@ int rte_eth_rx_queue_setup(uint16_t port_id,
> uint16_t rx_queue_id,
> >   		struct rte_mempool *mb_pool);
> >
> >   /**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * Allocate and set up a hairpin receive queue for an Ethernet device.
> > + *
> > + * The function set up the selected queue to be used in hairpin.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param rx_queue_id
> > + *   The index of the receive queue to set up.
> > + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
> > + *   to rte_eth_dev_configure().
> > + * @param nb_rx_desc
> > + *   The number of receive descriptors to allocate for the receive ring.
> 
> Is it allows to specify 0 to pick driver recommended value?
> 

Yes, (assuming you are talking about the nb_rx_desc)

> > + * @param conf
> > + *   The pointer to the hairpin configuration.
> > + * @return
> > + *   - 0: Success, receive queue correctly set up.
> > + *   - -EINVAL: Selected Queue can't be configured for hairpin.
> > + *   - -ENOMEM: Unable to allocate the resources required for the queue.
> 
> Please, follow return value description style similar to
> rte_eth_dev_info_get()
> which is more common to the file.
> 

O.K

> > + */
> > +__rte_experimental
> > +int rte_eth_rx_hairpin_queue_setup
> > +	(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc,
> > +	 const struct rte_eth_hairpin_conf *conf);
> > +
> > +/**
> >    * Allocate and set up a transmit queue for an Ethernet device.
> >    *
> >    * @param port_id
> > @@ -1823,6 +1894,33 @@ int rte_eth_tx_queue_setup(uint16_t port_id,
> uint16_t tx_queue_id,
> >   		const struct rte_eth_txconf *tx_conf);
> >
> >   /**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * Allocate and set up a transmit hairpin queue for an Ethernet device.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param tx_queue_id
> > + *   The index of the transmit queue to set up.
> > + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> > + *   to rte_eth_dev_configure().
> > + * @param nb_tx_desc
> > + *   The number of transmit descriptors to allocate for the transmit ring.
> 
> Is it allows to specify 0 to pick driver recommended value?
> 

Like above yes, 

> > + * @param conf
> > + *   The hairpin configuration.
> > + *
> > + * @return
> > + *   - 0: Success, transmit queue correctly set up.
> > + *   - -EINVAL: Selected Queue can't be configured for hairpin.
> > + *   - -ENOMEM: Unable to allocate the resources required for the queue.
> 
> Please, follow return value description style similar to
> rte_eth_dev_info_get()
> which is more common to the file.
> 

Will fix.

> > + */
> > +__rte_experimental
> > +int rte_eth_tx_hairpin_queue_setup
> > +	(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc,
> > +	 const struct rte_eth_hairpin_conf *conf);
> > +
> > +/**
> >    * Return the NUMA socket to which an Ethernet device is connected
> >    *
> >    * @param port_id
> > @@ -4037,6 +4135,22 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t
> port_id,
> >   void *
> >   rte_eth_dev_get_sec_ctx(uint16_t port_id);
> >
> > +/**
> > + * @warning
> > + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> notice
> > + *
> > + * Query the device hairpin capabilities.
> > + *
> > + * @param port_id
> > + *   The port identifier of the Ethernet device.
> > + * @param cap
> > + *   Pointer to a structure that will hold the hairpin capabilities.
> > + * @return
> > + *   - 0 on success, -ENOTSUP if the device doesn't support hairpin.
> 
> Please, follow return value description style similar to
> rte_eth_dev_info_get()
> which is more common to the file.
> 

Will fix.

> > + */
> > +__rte_experimental
> > +int rte_eth_dev_hairpin_capability_get(uint16_t port_id,
> > +				       struct rte_eth_hairpin_cap *cap);
> >
> >   #include <rte_ethdev_core.h>
> >
> > @@ -4137,6 +4251,12 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t
> port_id,
> >   		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n",
> queue_id);
> >   		return 0;
> >   	}
> > +	if (dev->data->rx_queue_state[queue_id] ==
> > +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> > +		RTE_ETHDEV_LOG(ERR, "RX queue_id=%u is hairpin queue\n",
> > +			       queue_id);
> > +		return 0;
> > +	}
> >   #endif
> >   	nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
> >   				     rx_pkts, nb_pkts);
> > @@ -4403,6 +4523,12 @@ static inline int
> rte_eth_tx_descriptor_status(uint16_t port_id,
> >   		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n",
> queue_id);
> >   		return 0;
> >   	}
> > +	if (dev->data->tx_queue_state[queue_id] ==
> > +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> > +		RTE_ETHDEV_LOG(ERR, "TX queue_id=%u is hairpin queue\n",
> > +			       queue_id);
> > +		return 0;
> > +	}
> >   #endif
> >
> >   #ifdef RTE_ETHDEV_RXTX_CALLBACKS
> > diff --git a/lib/librte_ethdev/rte_ethdev_core.h
> b/lib/librte_ethdev/rte_ethdev_core.h
> > index dcb5ae6..ef46e71 100644
> > --- a/lib/librte_ethdev/rte_ethdev_core.h
> > +++ b/lib/librte_ethdev/rte_ethdev_core.h
> > @@ -250,6 +250,12 @@ typedef int (*eth_rx_queue_setup_t)(struct
> rte_eth_dev *dev,
> >   				    struct rte_mempool *mb_pool);
> >   /**< @internal Set up a receive queue of an Ethernet device. */
> >
> > +typedef int (*eth_rx_hairpin_queue_setup_t)
> > +	(struct rte_eth_dev *dev, uint16_t rx_queue_id,
> > +	 uint16_t nb_rx_desc,
> > +	 const struct rte_eth_hairpin_conf *conf);
> > +/**< @internal Set up a receive hairpin queue of an Ethernet device. */
> > +
> >   typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
> >   				    uint16_t tx_queue_id,
> >   				    uint16_t nb_tx_desc,
> > @@ -257,6 +263,12 @@ typedef int (*eth_tx_queue_setup_t)(struct
> rte_eth_dev *dev,
> >   				    const struct rte_eth_txconf *tx_conf);
> >   /**< @internal Setup a transmit queue of an Ethernet device. */
> >
> > +typedef int (*eth_tx_hairpin_queue_setup_t)
> > +	(struct rte_eth_dev *dev, uint16_t tx_queue_id,
> > +	 uint16_t nb_tx_desc,
> > +	 const struct rte_eth_hairpin_conf *hairpin_conf);
> > +/**< @internal Setup a transmit hairpin queue of an Ethernet device. */
> > +
> >   typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
> >   				    uint16_t rx_queue_id);
> >   /**< @internal Enable interrupt of a receive queue of an Ethernet device. */
> > @@ -505,6 +517,10 @@ typedef int (*eth_pool_ops_supported_t)(struct
> rte_eth_dev *dev,
> >   						const char *pool);
> >   /**< @internal Test if a port supports specific mempool ops */
> >
> > +typedef int (*eth_hairpin_cap_get_t)(struct rte_eth_dev *dev,
> > +				     struct rte_eth_hairpin_cap *cap);
> > +/**< @internal get the hairpin capabilities. */
> > +
> >   /**
> >    * @internal A structure containing the functions exported by an Ethernet
> driver.
> >    */
> > @@ -557,6 +573,8 @@ struct eth_dev_ops {
> >   	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
> >   	eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
> >   	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX
> queue. */
> > +	eth_rx_hairpin_queue_setup_t rx_hairpin_queue_setup;
> > +	/**< Set up device RX hairpin queue. */
> >   	eth_queue_release_t        rx_queue_release; /**< Release RX queue.
> */
> >   	eth_rx_queue_count_t       rx_queue_count;
> >   	/**< Get the number of used RX descriptors. */
> > @@ -568,6 +586,8 @@ struct eth_dev_ops {
> >   	eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue
> interrupt. */
> >   	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue
> interrupt. */
> >   	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX
> queue. */
> > +	eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
> > +	/**< Set up device TX hairpin queue. */
> >   	eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
> >   	eth_tx_done_cleanup_t      tx_done_cleanup;/**< Free tx ring mbufs */
> >
> > @@ -639,6 +659,9 @@ struct eth_dev_ops {
> >
> >   	eth_pool_ops_supported_t pool_ops_supported;
> >   	/**< Test if a port supports specific mempool ops */
> > +
> > +	eth_hairpin_cap_get_t hairpin_cap_get;
> > +	/**< Returns the hairpin capabilities. */
> >   };
> >
> >   /**
> > @@ -746,9 +769,9 @@ struct rte_eth_dev_data {
> >   		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0).
> */
> >   		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
> >   	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> > -			/**< Queues state: STARTED(1) / STOPPED(0). */
> > +		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
> >   	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
> > -			/**< Queues state: STARTED(1) / STOPPED(0). */
> > +		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */
> >   	uint32_t dev_flags;             /**< Capabilities. */
> >   	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
> >   	int numa_node;                  /**< NUMA node connection. */
> > diff --git a/lib/librte_ethdev/rte_ethdev_version.map
> b/lib/librte_ethdev/rte_ethdev_version.map
> > index 6df42a4..77b0a86 100644
> > --- a/lib/librte_ethdev/rte_ethdev_version.map
> > +++ b/lib/librte_ethdev/rte_ethdev_version.map
> > @@ -283,4 +283,9 @@ EXPERIMENTAL {
> >
> >   	# added in 19.08
> >   	rte_eth_read_clock;
> > +
> > +	# added in 19.11
> > +	rte_eth_rx_hairpin_queue_setup;
> > +	rte_eth_tx_hairpin_queue_setup;
> > +	rte_eth_dev_hairpin_capability_get;
> >   };
  
Andrew Rybchenko Oct. 14, 2019, 9:37 a.m. UTC | #3
Hi Ori,

see my answers below.

On 10/11/19 12:07 AM, Ori Kam wrote:
> Hi Andrew,
>
> Thanks for your comments,
>
> PSB,
> Ori
>
>> -----Original Message-----
>> From: Andrew Rybchenko <arybchenko@solarflare.com>
>> Sent: Tuesday, October 8, 2019 7:11 PM
>> To: Ori Kam <orika@mellanox.com>; Thomas Monjalon
>> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>
>> Cc: dev@dpdk.org; jingjing.wu@intel.com; stephen@networkplumber.org
>> Subject: Re: [PATCH v2 01/14] ethdev: add support for hairpin queue
>>
>> Hi Ori,
>>
>> thanks for updated version. See my notes below.
>>
>> There are few style notes about line breaks which are not defined in
>> coding style. Of course, it may be ignored.
>>
> I will fix what I can.
>
>> On 10/4/19 10:54 PM, Ori Kam wrote:
>>> This commit introduce hairpin queue type.
>>>
>>> The hairpin queue in build from Rx queue binded to Tx queue.
>>> It is used to offload traffic coming from the wire and redirect it back
>>> to the wire.
>>>
>>> There are 3 new functions:
>>> - rte_eth_dev_hairpin_capability_get
>>> - rte_eth_rx_hairpin_queue_setup
>>> - rte_eth_tx_hairpin_queue_setup
>>>
>>> In order to use the queue, there is a need to create rte_flow
>>> with queue / RSS action that targets one or more of the Rx queues.
>>>
>>> Signed-off-by: Ori Kam <orika@mellanox.com>
>> rte_eth_dev_[rt]x_queue_stop() should return error if used for hairpin
>> queue.
>> Right now rte_eth_dev_[rt]x_queue_start() will return 0. Not sure about it.
>> What about rte_eth_rx_queue_info_get() and rte_eth_tx_queue_info_get()?
>> Any other Rx/Tx queue functions?
>>
> I will add error to both functions (Tx/Rx)
> I don't see any other function (only the info_get and queue_stop)
>
>>> ---
>>> V2:
>>>    - update according to ML comments.
>>>
>>> ---
>>>    lib/librte_ethdev/rte_ethdev.c           | 214
>> ++++++++++++++++++++++++++++++-
>>>    lib/librte_ethdev/rte_ethdev.h           | 126 ++++++++++++++++++
>>>    lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
>>>    lib/librte_ethdev/rte_ethdev_version.map |   5 +
>>>    4 files changed, 368 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
>>> index af82360..ee8af42 100644
>>> --- a/lib/librte_ethdev/rte_ethdev.c
>>> +++ b/lib/librte_ethdev/rte_ethdev.c
>>> @@ -1752,12 +1752,102 @@ struct rte_eth_dev *
>>>    		if (!dev->data->min_rx_buf_size ||
>>>    		    dev->data->min_rx_buf_size > mbp_buf_size)
>>>    			dev->data->min_rx_buf_size = mbp_buf_size;
>>> +		if (dev->data->rx_queue_state[rx_queue_id] ==
>>> +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
>>> +			dev->data->rx_queue_state[rx_queue_id] =
>>> +				RTE_ETH_QUEUE_STATE_STOPPED;
>> I don't understand it. Why is rte_eth_rx_queue_setup() changed?
>>
> This was done so user can reset the queue back to normal queue.

I think it should be done in queue release.

>>>    	}
>>>
>>>    	return eth_err(port_id, ret);
>>>    }
>>>
>>>    int
>>> +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
>>> +			       uint16_t nb_rx_desc,
>>> +			       const struct rte_eth_hairpin_conf *conf)
>>> +{
>>> +	int ret;
>>> +	struct rte_eth_dev *dev;
>>> +	struct rte_eth_hairpin_cap cap;
>>> +	struct rte_eth_dev_info dev_info;
>>> +	void **rxq;
>>> +	int i;
>>> +	int count = 0;
>>> +
>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
>>> +
>>> +	dev = &rte_eth_devices[port_id];
>>> +	if (rx_queue_id >= dev->data->nb_rx_queues) {
>>> +		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n",
>> rx_queue_id);
>>> +		return -EINVAL;
>>> +	}
>>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
>> ENOTSUP);
>>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
>>> +				-ENOTSUP);
>>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
>>> rx_hairpin_queue_setup,
>>> +				-ENOTSUP);
>>> +	rte_eth_dev_hairpin_capability_get(port_id, &cap);
>> Return value should be checked. It makes  hairpin_cap_get check above
>> unnecessary.
>>
> I will add a check.
>   
>>> +	/* Use default specified by driver, if nb_rx_desc is zero */
>>> +	if (nb_rx_desc == 0)
>>> +		nb_rx_desc = cap.max_nb_desc;
>>> +	if (nb_rx_desc > cap.max_nb_desc) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			       "Invalid value for nb_rx_desc(=%hu), should be: "
>>> +			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
>>> +		return -EINVAL;
>>> +	}
>>> +	ret = rte_eth_dev_info_get(port_id, &dev_info);
>>> +	if (ret != 0)
>>> +		return ret;
>>> +	if (dev->data->dev_started &&
>>> +		!(dev_info.dev_capa &
>>> +		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
>>> +		return -EBUSY;
>>> +	if (dev->data->dev_started &&
>>> +		(dev->data->rx_queue_state[rx_queue_id] !=
>>> +		 RTE_ETH_QUEUE_STATE_STOPPED))
>>> +		return -EBUSY;
>>> +	if (conf->peer_n > cap.max_rx_2_tx) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			       "Invalid value for number of peers(=%hu), "
>>> +			       "should be: <= %hu", conf->peer_n,
>>> +			       cap.max_rx_2_tx);
>>> +		return -EINVAL;
>>> +	}
>>> +	if (conf->peer_n == 0) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			       "Invalid value for number of peers(=%hu), "
>>> +			       "should be: > 0", conf->peer_n);
>>> +		return -EINVAL;
>>> +	}
>>> +	if (cap.max_n_queues != -1) {
>>> +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
>>> +			if (dev->data->rx_queue_state[i] ==
>>> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
>>> +				count++;
>>> +		}
>>> +		if (count > cap.max_n_queues) {
>>> +			RTE_ETHDEV_LOG(ERR,
>>> +				       "To many Rx hairpin queues %d", count);
>>> +			return -EINVAL;
>>> +		}
>>> +	}
>>> +	rxq = dev->data->rx_queues;
>>> +	if (rxq[rx_queue_id]) {
>> Please, compare with NULL (I know that rte_eth_rx_queue_setup() does
>> like above).
>>
> O.K. I will change, but may I ask why? If in the rte_eth_rx_queue_setup function it is written the same way,
> why do you want this change?

https://doc.dpdk.org/guides/contributing/coding_style.html#null-pointers

>>> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
>>> rx_queue_release,
>>> +					-ENOTSUP);
>>> +		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
>>> +		rxq[rx_queue_id] = NULL;
>>> +	}
>>> +	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
>>> +						      nb_rx_desc, conf);
>>> +	if (!ret)
>> Please, compare with 0
>>
> Will do, but again just for my knowledge why?

https://doc.dpdk.org/guides/contributing/coding_style.html#function-calls

>>> +		dev->data->rx_queue_state[rx_queue_id] =
>>> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
>>> +	return eth_err(port_id, ret);
>>> +}
>>> +
>>> +int
>>>    rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>>>    		       uint16_t nb_tx_desc, unsigned int socket_id,
>>>    		       const struct rte_eth_txconf *tx_conf)
>>> @@ -1851,9 +1941,97 @@ struct rte_eth_dev *
>>>    			__func__);
>>>    		return -EINVAL;
>>>    	}
>>> +	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
>>> +					      socket_id, &local_conf);
>>> +	if (!ret)
>>> +		if (dev->data->tx_queue_state[tx_queue_id] ==
>>> +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
>>> +			dev->data->tx_queue_state[tx_queue_id] =
>>> +				RTE_ETH_QUEUE_STATE_STOPPED;
>> Why is it changed?
>>
> Like in the Rx to enable switching back to normal queue.
>
>>> +	return eth_err(port_id, ret);
>>> +}
>>> +
>>> +int
>>> +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
>>> +			       uint16_t nb_tx_desc,
>>> +			       const struct rte_eth_hairpin_conf *conf)
>>> +{
>>> +	struct rte_eth_dev *dev;
>>> +	struct rte_eth_hairpin_cap cap;
>>> +	struct rte_eth_dev_info dev_info;
>>> +	void **txq;
>>> +	int i;
>>> +	int count = 0;
>>> +	int ret;
>>>
>>> -	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
>>> -		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
>>> +	dev = &rte_eth_devices[port_id];
>>> +	if (tx_queue_id >= dev->data->nb_tx_queues) {
>>> +		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n",
>> tx_queue_id);
>>> +		return -EINVAL;
>>> +	}
>>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
>> ENOTSUP);
>>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
>>> +				-ENOTSUP);
>>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
>>> tx_hairpin_queue_setup,
>>> +				-ENOTSUP);
>>> +	rte_eth_dev_info_get(port_id, &dev_info);
>> return value should be checked.
>>
>>> +	rte_eth_dev_hairpin_capability_get(port_id, &cap);
>> Check return value and you can rely on  hairpin_cap_get check inside.
>>
> Same as above I will add a check but I'm not sure I understand the second part
> Just to make sure I understand, you are talking about that if I don't check
> the return value I'm not allowed to check the cap right?

It is about dev_ops->hairpin_cap_get vs NULL check.
It is checked inside rte_eth_dev_hairpin_capability_get() and we should
not duplicate the check above.

>>> +	/* Use default specified by driver, if nb_tx_desc is zero */
>>> +	if (nb_tx_desc == 0)
>>> +		nb_tx_desc = cap.max_nb_desc;
>>> +	if (nb_tx_desc > cap.max_nb_desc) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			       "Invalid value for nb_tx_desc(=%hu), should be: "
>>> +			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
>>> +		return -EINVAL;
>>> +	}
>>> +	if (conf->peer_n > cap.max_tx_2_rx) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			       "Invalid value for number of peers(=%hu), "
>>> +			       "should be: <= %hu", conf->peer_n,
>>> +			       cap.max_tx_2_rx);
>>> +		return -EINVAL;
>>> +	}
>>> +	if (conf->peer_n == 0) {
>>> +		RTE_ETHDEV_LOG(ERR,
>>> +			       "Invalid value for number of peers(=%hu), "
>>> +			       "should be: > 0", conf->peer_n);
>>> +		return -EINVAL;
>>> +	}
>>> +	if (cap.max_n_queues != -1) {
>>> +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
>>> +			if (dev->data->tx_queue_state[i] ==
>>> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
>>> +				count++;
>>> +		}
>>> +		if (count > cap.max_n_queues) {
>>> +			RTE_ETHDEV_LOG(ERR,
>>> +				       "To many Rx hairpin queues %d", count);
>>> +			return -EINVAL;
>>> +		}
>>> +	}
>>> +	if (dev->data->dev_started &&
>>> +		!(dev_info.dev_capa &
>>> +		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
>>> +		return -EBUSY;
>>> +	if (dev->data->dev_started &&
>>> +		(dev->data->tx_queue_state[tx_queue_id] !=
>>> +		 RTE_ETH_QUEUE_STATE_STOPPED))
>>> +		return -EBUSY;
>>> +	txq = dev->data->tx_queues;
>>> +	if (txq[tx_queue_id]) {
>> Please, compare with NULL
>>
> O.K.
>
>>> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
>>> tx_queue_release,
>>> +					-ENOTSUP);
>>> +		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
>>> +		txq[tx_queue_id] = NULL;
>>> +	}
>>> +	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
>>> +		(dev, tx_queue_id, nb_tx_desc, conf);
>>> +	if (!ret)
>> Please, compare with 0
>>
> O.K.
>
>>> +		dev->data->tx_queue_state[tx_queue_id] =
>>> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
>>> +	return eth_err(port_id, ret);
>>>    }
>>>
>>>    void
>>> @@ -3981,12 +4159,20 @@ int rte_eth_set_queue_rate_limit(uint16_t
>> port_id, uint16_t queue_idx,
>>>    	rte_errno = ENOTSUP;
>>>    	return NULL;
>>>    #endif
>>> +	struct rte_eth_dev *dev;
>>> +
>>>    	/* check input parameters */
>>>    	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
>>>    		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
>>>    		rte_errno = EINVAL;
>>>    		return NULL;
>>>    	}
>>> +	dev = &rte_eth_devices[port_id];
>>> +	if (dev->data->rx_queue_state[queue_id] ==
>>> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
>> It looks like line break is not required above. Just to make code a bit
>> shorter.
>>
> Will fix if possible.
>
>>> +		rte_errno = EINVAL;
>>> +		return NULL;
>>> +	}
>>>    	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
>>>
>>>    	if (cb == NULL) {
>>> @@ -4058,6 +4244,8 @@ int rte_eth_set_queue_rate_limit(uint16_t port_id,
>> uint16_t queue_idx,
>>>    	rte_errno = ENOTSUP;
>>>    	return NULL;
>>>    #endif
>>> +	struct rte_eth_dev *dev;
>>> +
>>>    	/* check input parameters */
>>>    	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
>>>    		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
>>> @@ -4065,6 +4253,13 @@ int rte_eth_set_queue_rate_limit(uint16_t
>> port_id, uint16_t queue_idx,
>>>    		return NULL;
>>>    	}
>>>
>>> +	dev = &rte_eth_devices[port_id];
>>> +	if (dev->data->tx_queue_state[queue_id] ==
>>> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
>> It looks like line break is not required above. Just to make code a bit
>> shorter.
>>
> Will fix.
>
>>> +		rte_errno = EINVAL;
>>> +		return NULL;
>>> +	}
>>> +
>>>    	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
>>>
>>>    	if (cb == NULL) {
>>> @@ -4510,6 +4705,21 @@ int rte_eth_set_queue_rate_limit(uint16_t
>> port_id, uint16_t queue_idx,
>>>    }
>>>
>>>    int
>>> +rte_eth_dev_hairpin_capability_get(uint16_t port_id,
>>> +				   struct rte_eth_hairpin_cap *cap)
>>> +{
>>> +	struct rte_eth_dev *dev;
>>> +
>>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
>>> +
>>> +	dev = &rte_eth_devices[port_id];
>>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
>>> +				-ENOTSUP);
>> I think it would be useful to memset() cap with 0 to be sure that it is
>> initialized.
>>
> O.K but if we follow your comments from above, the result is not valid if we return
> error, so it is not a must, but like I said I don't care to add it.
>
>>> +	return eth_err(port_id, (*dev->dev_ops->hairpin_cap_get)
>>> +		       (dev, cap));
>>
>> Please, consider to avoid line breaks above to make code a bit shorter.
>> Of course, it should not exceed line length limit.
>>
> Will check.
>
>>> +}
>>> +
>>> +int
>>>    rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
>>>    {
>>>    	struct rte_eth_dev *dev;
>>> diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
>>> index d937fb4..29dcfea 100644
>>> --- a/lib/librte_ethdev/rte_ethdev.h
>>> +++ b/lib/librte_ethdev/rte_ethdev.h
>>> @@ -804,6 +804,46 @@ struct rte_eth_txconf {
>>>    };
>>>
>>>    /**
>>> + * @warning
>>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>>> + *
>>> + * A structure used to return the hairpin capabilities that are supportd.
>> supportd -> supported
>>
> Will fix.
>   
>>> + */
>>> +struct rte_eth_hairpin_cap {
>>> +	int16_t max_n_queues;
>>> +	/**< The max number of hairpin queuesi. -1 no limit. */
>> I'm not sure that I like type difference from max_rx_queues here.
>> I think it would be better to use uint16_t. I would say there is point
>> to highlight no limit (first of all I'm not sure that it makes sense,
>> second, UINT16_MAX will obviously do the job in uint16_t.
>>
>> Is it both Rx and Tx?
>>
>>> +	int16_t max_rx_2_tx;
>>> +	/**< Max number of Rx queues to be connected to one Tx queue. */
>>> +	int16_t max_tx_2_rx;
>>> +	/**< Max number of Tx queues to be connected to one Rx queue. */
>> I would prefer to have uint16_t here as well. Mainly for consistency.
>>
> Agree, will fix,
>
>>> +	uint16_t max_nb_desc; /**< The max num of descriptors. */
>> Is it common for Rx and Tx? What about min?
>>
> I think min is always 1, by  definition.

OK

>>> +};
>>> +
>>> +#define RTE_ETH_MAX_HAIRPIN_PEERS 32
>>> +
>>> +/**
>>> + * @warning
>>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>>> + *
>>> + * A structure used to hold hairpin peer data.
>>> + */
>>> +struct rte_eth_hairpin_peer {
>>> +	uint16_t port; /**< Peer port. */
>>> +	uint16_t queue; /**< Peer queue. */
>>> +};
>>> +
>>> +/**
>>> + * @warning
>>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>>> + *
>>> + * A structure used to configure hairpin binding.
>> It should be explained what happens if Rx queue has many Tx queues.
>> Is packet duplicated? Or distributed?
>>
> Like we said before, I don't know it depends on the Nic.
> In case of Mellanox we don't support 1 to many.

I see, but what application should expect?

>>> + */
>>> +struct rte_eth_hairpin_conf {
>>> +	uint16_t peer_n; /**< The number of peers. */
>>> +	struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
>>> +};
>>> +
>>> +/**
>>>     * A structure contains information about HW descriptor ring limitations.
>>>     */
>>>    struct rte_eth_desc_lim {
>>> @@ -1080,6 +1120,8 @@ struct rte_eth_conf {
>>>    /**< Device supports Rx queue setup after device started*/
>>>    #define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002
>>>    /**< Device supports Tx queue setup after device started*/
>>> +#define RTE_ETH_DEV_CAPA_HAIRPIN_SUPPORT 0x00000004
>>> +/**< Device supports hairpin queues. */
>> Do we really need it? Isn't rte_eth_dev_hairpin_capability_get() returning
>> -ENOTSUP sufficient?
>>
> I agree with you, but my thinking was that using the cap the application
> can avoid calling to function that will fail.
> If you approve I will remove this cap.

Runtime setup flags are required since these functions are always
supported, but it is not alway possible to use these function when
device is started. In this case I think there is no necessity to add
the capability since we have no similar caps for anything else.

>>>    /*
>>>     * If new Tx offload capabilities are defined, they also must be
>>> @@ -1277,6 +1319,7 @@ struct rte_eth_dcb_info {
>>>     */
>>>    #define RTE_ETH_QUEUE_STATE_STOPPED 0
>>>    #define RTE_ETH_QUEUE_STATE_STARTED 1
>>> +#define RTE_ETH_QUEUE_STATE_HAIRPIN 2
>>>
>>>    #define RTE_ETH_ALL RTE_MAX_ETHPORTS
>>>
>>> @@ -1771,6 +1814,34 @@ int rte_eth_rx_queue_setup(uint16_t port_id,
>> uint16_t rx_queue_id,
>>>    		struct rte_mempool *mb_pool);
>>>
>>>    /**
>>> + * @warning
>>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>>> + *
>>> + * Allocate and set up a hairpin receive queue for an Ethernet device.
>>> + *
>>> + * The function set up the selected queue to be used in hairpin.
>>> + *
>>> + * @param port_id
>>> + *   The port identifier of the Ethernet device.
>>> + * @param rx_queue_id
>>> + *   The index of the receive queue to set up.
>>> + *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
>>> + *   to rte_eth_dev_configure().
>>> + * @param nb_rx_desc
>>> + *   The number of receive descriptors to allocate for the receive ring.
>> Is it allows to specify 0 to pick driver recommended value?
>>
> Yes, (assuming you are talking about the nb_rx_desc)

Please, highlight it in the description.

>>> + * @param conf
>>> + *   The pointer to the hairpin configuration.
>>> + * @return
>>> + *   - 0: Success, receive queue correctly set up.
>>> + *   - -EINVAL: Selected Queue can't be configured for hairpin.
>>> + *   - -ENOMEM: Unable to allocate the resources required for the queue.
>> Please, follow return value description style similar to
>> rte_eth_dev_info_get()
>> which is more common to the file.
>>
> O.K
>
>>> + */
>>> +__rte_experimental
>>> +int rte_eth_rx_hairpin_queue_setup
>>> +	(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc,
>>> +	 const struct rte_eth_hairpin_conf *conf);
>>> +
>>> +/**
>>>     * Allocate and set up a transmit queue for an Ethernet device.
>>>     *
>>>     * @param port_id
>>> @@ -1823,6 +1894,33 @@ int rte_eth_tx_queue_setup(uint16_t port_id,
>> uint16_t tx_queue_id,
>>>    		const struct rte_eth_txconf *tx_conf);
>>>
>>>    /**
>>> + * @warning
>>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>>> + *
>>> + * Allocate and set up a transmit hairpin queue for an Ethernet device.
>>> + *
>>> + * @param port_id
>>> + *   The port identifier of the Ethernet device.
>>> + * @param tx_queue_id
>>> + *   The index of the transmit queue to set up.
>>> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
>>> + *   to rte_eth_dev_configure().
>>> + * @param nb_tx_desc
>>> + *   The number of transmit descriptors to allocate for the transmit ring.
>> Is it allows to specify 0 to pick driver recommended value?
>>
> Like above yes,
>
>>> + * @param conf
>>> + *   The hairpin configuration.
>>> + *
>>> + * @return
>>> + *   - 0: Success, transmit queue correctly set up.
>>> + *   - -EINVAL: Selected Queue can't be configured for hairpin.
>>> + *   - -ENOMEM: Unable to allocate the resources required for the queue.
>> Please, follow return value description style similar to
>> rte_eth_dev_info_get()
>> which is more common to the file.
>>
> Will fix.
>
>>> + */
>>> +__rte_experimental
>>> +int rte_eth_tx_hairpin_queue_setup
>>> +	(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc,
>>> +	 const struct rte_eth_hairpin_conf *conf);
>>> +
>>> +/**
>>>     * Return the NUMA socket to which an Ethernet device is connected
>>>     *
>>>     * @param port_id
>>> @@ -4037,6 +4135,22 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t
>> port_id,
>>>    void *
>>>    rte_eth_dev_get_sec_ctx(uint16_t port_id);
>>>
>>> +/**
>>> + * @warning
>>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
>> notice
>>> + *
>>> + * Query the device hairpin capabilities.
>>> + *
>>> + * @param port_id
>>> + *   The port identifier of the Ethernet device.
>>> + * @param cap
>>> + *   Pointer to a structure that will hold the hairpin capabilities.
>>> + * @return
>>> + *   - 0 on success, -ENOTSUP if the device doesn't support hairpin.
>> Please, follow return value description style similar to
>> rte_eth_dev_info_get()
>> which is more common to the file.
>>
> Will fix.
>
>>> + */
>>> +__rte_experimental
>>> +int rte_eth_dev_hairpin_capability_get(uint16_t port_id,
>>> +				       struct rte_eth_hairpin_cap *cap);
>>>
>>>    #include <rte_ethdev_core.h>
>>>
>>> @@ -4137,6 +4251,12 @@ int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t
>> port_id,
>>>    		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n",
>> queue_id);
>>>    		return 0;
>>>    	}
>>> +	if (dev->data->rx_queue_state[queue_id] ==
>>> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
>>> +		RTE_ETHDEV_LOG(ERR, "RX queue_id=%u is hairpin queue\n",
>>> +			       queue_id);
>>> +		return 0;
>>> +	}
>>>    #endif
>>>    	nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
>>>    				     rx_pkts, nb_pkts);
>>> @@ -4403,6 +4523,12 @@ static inline int
>> rte_eth_tx_descriptor_status(uint16_t port_id,
>>>    		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n",
>> queue_id);
>>>    		return 0;
>>>    	}
>>> +	if (dev->data->tx_queue_state[queue_id] ==
>>> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
>>> +		RTE_ETHDEV_LOG(ERR, "TX queue_id=%u is hairpin queue\n",
>>> +			       queue_id);
>>> +		return 0;
>>> +	}
>>>    #endif
>>>
>>>    #ifdef RTE_ETHDEV_RXTX_CALLBACKS
>>> diff --git a/lib/librte_ethdev/rte_ethdev_core.h
>> b/lib/librte_ethdev/rte_ethdev_core.h
>>> index dcb5ae6..ef46e71 100644
>>> --- a/lib/librte_ethdev/rte_ethdev_core.h
>>> +++ b/lib/librte_ethdev/rte_ethdev_core.h
>>> @@ -250,6 +250,12 @@ typedef int (*eth_rx_queue_setup_t)(struct
>> rte_eth_dev *dev,
>>>    				    struct rte_mempool *mb_pool);
>>>    /**< @internal Set up a receive queue of an Ethernet device. */
>>>
>>> +typedef int (*eth_rx_hairpin_queue_setup_t)
>>> +	(struct rte_eth_dev *dev, uint16_t rx_queue_id,
>>> +	 uint16_t nb_rx_desc,
>>> +	 const struct rte_eth_hairpin_conf *conf);
>>> +/**< @internal Set up a receive hairpin queue of an Ethernet device. */
>>> +
>>>    typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
>>>    				    uint16_t tx_queue_id,
>>>    				    uint16_t nb_tx_desc,
>>> @@ -257,6 +263,12 @@ typedef int (*eth_tx_queue_setup_t)(struct
>> rte_eth_dev *dev,
>>>    				    const struct rte_eth_txconf *tx_conf);
>>>    /**< @internal Setup a transmit queue of an Ethernet device. */
>>>
>>> +typedef int (*eth_tx_hairpin_queue_setup_t)
>>> +	(struct rte_eth_dev *dev, uint16_t tx_queue_id,
>>> +	 uint16_t nb_tx_desc,
>>> +	 const struct rte_eth_hairpin_conf *hairpin_conf);
>>> +/**< @internal Setup a transmit hairpin queue of an Ethernet device. */
>>> +
>>>    typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
>>>    				    uint16_t rx_queue_id);
>>>    /**< @internal Enable interrupt of a receive queue of an Ethernet device. */
>>> @@ -505,6 +517,10 @@ typedef int (*eth_pool_ops_supported_t)(struct
>> rte_eth_dev *dev,
>>>    						const char *pool);
>>>    /**< @internal Test if a port supports specific mempool ops */
>>>
>>> +typedef int (*eth_hairpin_cap_get_t)(struct rte_eth_dev *dev,
>>> +				     struct rte_eth_hairpin_cap *cap);
>>> +/**< @internal get the hairpin capabilities. */
>>> +
>>>    /**
>>>     * @internal A structure containing the functions exported by an Ethernet
>> driver.
>>>     */
>>> @@ -557,6 +573,8 @@ struct eth_dev_ops {
>>>    	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
>>>    	eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
>>>    	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX
>> queue. */
>>> +	eth_rx_hairpin_queue_setup_t rx_hairpin_queue_setup;
>>> +	/**< Set up device RX hairpin queue. */
>>>    	eth_queue_release_t        rx_queue_release; /**< Release RX queue.
>> */
>>>    	eth_rx_queue_count_t       rx_queue_count;
>>>    	/**< Get the number of used RX descriptors. */
>>> @@ -568,6 +586,8 @@ struct eth_dev_ops {
>>>    	eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue
>> interrupt. */
>>>    	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue
>> interrupt. */
>>>    	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX
>> queue. */
>>> +	eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
>>> +	/**< Set up device TX hairpin queue. */
>>>    	eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
>>>    	eth_tx_done_cleanup_t      tx_done_cleanup;/**< Free tx ring mbufs */
>>>
>>> @@ -639,6 +659,9 @@ struct eth_dev_ops {
>>>
>>>    	eth_pool_ops_supported_t pool_ops_supported;
>>>    	/**< Test if a port supports specific mempool ops */
>>> +
>>> +	eth_hairpin_cap_get_t hairpin_cap_get;
>>> +	/**< Returns the hairpin capabilities. */
>>>    };
>>>
>>>    /**
>>> @@ -746,9 +769,9 @@ struct rte_eth_dev_data {
>>>    		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0).
>> */
>>>    		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
>>>    	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
>>> -			/**< Queues state: STARTED(1) / STOPPED(0). */
>>> +		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
>>>    	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
>>> -			/**< Queues state: STARTED(1) / STOPPED(0). */
>>> +		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */
>>>    	uint32_t dev_flags;             /**< Capabilities. */
>>>    	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
>>>    	int numa_node;                  /**< NUMA node connection. */
>>> diff --git a/lib/librte_ethdev/rte_ethdev_version.map
>> b/lib/librte_ethdev/rte_ethdev_version.map
>>> index 6df42a4..77b0a86 100644
>>> --- a/lib/librte_ethdev/rte_ethdev_version.map
>>> +++ b/lib/librte_ethdev/rte_ethdev_version.map
>>> @@ -283,4 +283,9 @@ EXPERIMENTAL {
>>>
>>>    	# added in 19.08
>>>    	rte_eth_read_clock;
>>> +
>>> +	# added in 19.11
>>> +	rte_eth_rx_hairpin_queue_setup;
>>> +	rte_eth_tx_hairpin_queue_setup;
>>> +	rte_eth_dev_hairpin_capability_get;
>>>    };
  
Ori Kam Oct. 14, 2019, 10:19 a.m. UTC | #4
Thanks for your comments,

I will start working on V3

Ori

> -----Original Message-----
> From: Andrew Rybchenko <arybchenko@solarflare.com>
> Sent: Monday, October 14, 2019 12:37 PM
> To: Ori Kam <orika@mellanox.com>; Thomas Monjalon
> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>
> Cc: dev@dpdk.org; jingjing.wu@intel.com; stephen@networkplumber.org
> Subject: Re: [PATCH v2 01/14] ethdev: add support for hairpin queue
> 
> Hi Ori,
> 
> see my answers below.
> 
> On 10/11/19 12:07 AM, Ori Kam wrote:
> > Hi Andrew,
> >
> > Thanks for your comments,
> >
> > PSB,
> > Ori
> >
> >> -----Original Message-----
> >> From: Andrew Rybchenko <arybchenko@solarflare.com>
> >> Sent: Tuesday, October 8, 2019 7:11 PM
> >> To: Ori Kam <orika@mellanox.com>; Thomas Monjalon
> >> <thomas@monjalon.net>; Ferruh Yigit <ferruh.yigit@intel.com>
> >> Cc: dev@dpdk.org; jingjing.wu@intel.com; stephen@networkplumber.org
> >> Subject: Re: [PATCH v2 01/14] ethdev: add support for hairpin queue
> >>
> >> Hi Ori,
> >>
> >> thanks for updated version. See my notes below.
> >>
> >> There are few style notes about line breaks which are not defined in
> >> coding style. Of course, it may be ignored.
> >>
> > I will fix what I can.
> >
> >> On 10/4/19 10:54 PM, Ori Kam wrote:
> >>> This commit introduce hairpin queue type.
> >>>
> >>> The hairpin queue in build from Rx queue binded to Tx queue.
> >>> It is used to offload traffic coming from the wire and redirect it back
> >>> to the wire.
> >>>
> >>> There are 3 new functions:
> >>> - rte_eth_dev_hairpin_capability_get
> >>> - rte_eth_rx_hairpin_queue_setup
> >>> - rte_eth_tx_hairpin_queue_setup
> >>>
> >>> In order to use the queue, there is a need to create rte_flow
> >>> with queue / RSS action that targets one or more of the Rx queues.
> >>>
> >>> Signed-off-by: Ori Kam <orika@mellanox.com>
> >> rte_eth_dev_[rt]x_queue_stop() should return error if used for hairpin
> >> queue.
> >> Right now rte_eth_dev_[rt]x_queue_start() will return 0. Not sure about it.
> >> What about rte_eth_rx_queue_info_get() and rte_eth_tx_queue_info_get()?
> >> Any other Rx/Tx queue functions?
> >>
> > I will add error to both functions (Tx/Rx)
> > I don't see any other function (only the info_get and queue_stop)
> >
> >>> ---
> >>> V2:
> >>>    - update according to ML comments.
> >>>
> >>> ---
> >>>    lib/librte_ethdev/rte_ethdev.c           | 214
> >> ++++++++++++++++++++++++++++++-
> >>>    lib/librte_ethdev/rte_ethdev.h           | 126 ++++++++++++++++++
> >>>    lib/librte_ethdev/rte_ethdev_core.h      |  27 +++-
> >>>    lib/librte_ethdev/rte_ethdev_version.map |   5 +
> >>>    4 files changed, 368 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
> >>> index af82360..ee8af42 100644
> >>> --- a/lib/librte_ethdev/rte_ethdev.c
> >>> +++ b/lib/librte_ethdev/rte_ethdev.c
> >>> @@ -1752,12 +1752,102 @@ struct rte_eth_dev *
> >>>    		if (!dev->data->min_rx_buf_size ||
> >>>    		    dev->data->min_rx_buf_size > mbp_buf_size)
> >>>    			dev->data->min_rx_buf_size = mbp_buf_size;
> >>> +		if (dev->data->rx_queue_state[rx_queue_id] ==
> >>> +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
> >>> +			dev->data->rx_queue_state[rx_queue_id] =
> >>> +				RTE_ETH_QUEUE_STATE_STOPPED;
> >> I don't understand it. Why is rte_eth_rx_queue_setup() changed?
> >>
> > This was done so user can reset the queue back to normal queue.
> 
> I think it should be done in queue release.
> 
> >>>    	}
> >>>
> >>>    	return eth_err(port_id, ret);
> >>>    }
> >>>
> >>>    int
> >>> +rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
> >>> +			       uint16_t nb_rx_desc,
> >>> +			       const struct rte_eth_hairpin_conf *conf)
> >>> +{
> >>> +	int ret;
> >>> +	struct rte_eth_dev *dev;
> >>> +	struct rte_eth_hairpin_cap cap;
> >>> +	struct rte_eth_dev_info dev_info;
> >>> +	void **rxq;
> >>> +	int i;
> >>> +	int count = 0;
> >>> +
> >>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> >>> +
> >>> +	dev = &rte_eth_devices[port_id];
> >>> +	if (rx_queue_id >= dev->data->nb_rx_queues) {
> >>> +		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n",
> >> rx_queue_id);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
> >> ENOTSUP);
> >>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> >>> +				-ENOTSUP);
> >>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >>> rx_hairpin_queue_setup,
> >>> +				-ENOTSUP);
> >>> +	rte_eth_dev_hairpin_capability_get(port_id, &cap);
> >> Return value should be checked. It makes  hairpin_cap_get check above
> >> unnecessary.
> >>
> > I will add a check.
> >
> >>> +	/* Use default specified by driver, if nb_rx_desc is zero */
> >>> +	if (nb_rx_desc == 0)
> >>> +		nb_rx_desc = cap.max_nb_desc;
> >>> +	if (nb_rx_desc > cap.max_nb_desc) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			       "Invalid value for nb_rx_desc(=%hu), should be: "
> >>> +			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	ret = rte_eth_dev_info_get(port_id, &dev_info);
> >>> +	if (ret != 0)
> >>> +		return ret;
> >>> +	if (dev->data->dev_started &&
> >>> +		!(dev_info.dev_capa &
> >>> +		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
> >>> +		return -EBUSY;
> >>> +	if (dev->data->dev_started &&
> >>> +		(dev->data->rx_queue_state[rx_queue_id] !=
> >>> +		 RTE_ETH_QUEUE_STATE_STOPPED))
> >>> +		return -EBUSY;
> >>> +	if (conf->peer_n > cap.max_rx_2_tx) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			       "Invalid value for number of peers(=%hu), "
> >>> +			       "should be: <= %hu", conf->peer_n,
> >>> +			       cap.max_rx_2_tx);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	if (conf->peer_n == 0) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			       "Invalid value for number of peers(=%hu), "
> >>> +			       "should be: > 0", conf->peer_n);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	if (cap.max_n_queues != -1) {
> >>> +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> >>> +			if (dev->data->rx_queue_state[i] ==
> >>> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> >>> +				count++;
> >>> +		}
> >>> +		if (count > cap.max_n_queues) {
> >>> +			RTE_ETHDEV_LOG(ERR,
> >>> +				       "To many Rx hairpin queues %d", count);
> >>> +			return -EINVAL;
> >>> +		}
> >>> +	}
> >>> +	rxq = dev->data->rx_queues;
> >>> +	if (rxq[rx_queue_id]) {
> >> Please, compare with NULL (I know that rte_eth_rx_queue_setup() does
> >> like above).
> >>
> > O.K. I will change, but may I ask why? If in the rte_eth_rx_queue_setup
> function it is written the same way,
> > why do you want this change?
> 
> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdoc.dpdk.
> org%2Fguides%2Fcontributing%2Fcoding_style.html%23null-
> pointers&amp;data=02%7C01%7Corika%40mellanox.com%7C022cd953964f4a2
> 0d50508d7508a259f%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C
> 637066426584029629&amp;sdata=IBOo6AyTGn4DiNLHBCC5MbwJkRuqdsGqdrl
> vm0xS9Vw%3D&amp;reserved=0
> 


Thanks,

> >>> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >>> rx_queue_release,
> >>> +					-ENOTSUP);
> >>> +		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
> >>> +		rxq[rx_queue_id] = NULL;
> >>> +	}
> >>> +	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
> >>> +						      nb_rx_desc, conf);
> >>> +	if (!ret)
> >> Please, compare with 0
> >>
> > Will do, but again just for my knowledge why?
> 
> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fdoc.dpdk.
> org%2Fguides%2Fcontributing%2Fcoding_style.html%23function-
> calls&amp;data=02%7C01%7Corika%40mellanox.com%7C022cd953964f4a20d5
> 0508d7508a259f%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637
> 066426584029629&amp;sdata=G8ZxEWFFsv1kLWc6L7sKT8O6CSiBcj5ZuwQqmK
> 0Q6nY%3D&amp;reserved=0
> 

Thanks,

> >>> +		dev->data->rx_queue_state[rx_queue_id] =
> >>> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> >>> +	return eth_err(port_id, ret);
> >>> +}
> >>> +
> >>> +int
> >>>    rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> >>>    		       uint16_t nb_tx_desc, unsigned int socket_id,
> >>>    		       const struct rte_eth_txconf *tx_conf)
> >>> @@ -1851,9 +1941,97 @@ struct rte_eth_dev *
> >>>    			__func__);
> >>>    		return -EINVAL;
> >>>    	}
> >>> +	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
> >>> +					      socket_id, &local_conf);
> >>> +	if (!ret)
> >>> +		if (dev->data->tx_queue_state[tx_queue_id] ==
> >>> +		    RTE_ETH_QUEUE_STATE_HAIRPIN)
> >>> +			dev->data->tx_queue_state[tx_queue_id] =
> >>> +				RTE_ETH_QUEUE_STATE_STOPPED;
> >> Why is it changed?
> >>
> > Like in the Rx to enable switching back to normal queue.
> >
> >>> +	return eth_err(port_id, ret);
> >>> +}
> >>> +
> >>> +int
> >>> +rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
> >>> +			       uint16_t nb_tx_desc,
> >>> +			       const struct rte_eth_hairpin_conf *conf)
> >>> +{
> >>> +	struct rte_eth_dev *dev;
> >>> +	struct rte_eth_hairpin_cap cap;
> >>> +	struct rte_eth_dev_info dev_info;
> >>> +	void **txq;
> >>> +	int i;
> >>> +	int count = 0;
> >>> +	int ret;
> >>>
> >>> -	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
> >>> -		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
> >>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> >>> +	dev = &rte_eth_devices[port_id];
> >>> +	if (tx_queue_id >= dev->data->nb_tx_queues) {
> >>> +		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n",
> >> tx_queue_id);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -
> >> ENOTSUP);
> >>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> >>> +				-ENOTSUP);
> >>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >>> tx_hairpin_queue_setup,
> >>> +				-ENOTSUP);
> >>> +	rte_eth_dev_info_get(port_id, &dev_info);
> >> return value should be checked.
> >>
> >>> +	rte_eth_dev_hairpin_capability_get(port_id, &cap);
> >> Check return value and you can rely on  hairpin_cap_get check inside.
> >>
> > Same as above I will add a check but I'm not sure I understand the second
> part
> > Just to make sure I understand, you are talking about that if I don't check
> > the return value I'm not allowed to check the cap right?
> 
> It is about dev_ops->hairpin_cap_get vs NULL check.
> It is checked inside rte_eth_dev_hairpin_capability_get() and we should
> not duplicate the check above.
> 

O.K.

> >>> +	/* Use default specified by driver, if nb_tx_desc is zero */
> >>> +	if (nb_tx_desc == 0)
> >>> +		nb_tx_desc = cap.max_nb_desc;
> >>> +	if (nb_tx_desc > cap.max_nb_desc) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			       "Invalid value for nb_tx_desc(=%hu), should be: "
> >>> +			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	if (conf->peer_n > cap.max_tx_2_rx) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			       "Invalid value for number of peers(=%hu), "
> >>> +			       "should be: <= %hu", conf->peer_n,
> >>> +			       cap.max_tx_2_rx);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	if (conf->peer_n == 0) {
> >>> +		RTE_ETHDEV_LOG(ERR,
> >>> +			       "Invalid value for number of peers(=%hu), "
> >>> +			       "should be: > 0", conf->peer_n);
> >>> +		return -EINVAL;
> >>> +	}
> >>> +	if (cap.max_n_queues != -1) {
> >>> +		for (i = 0; i < dev->data->nb_tx_queues; i++) {
> >>> +			if (dev->data->tx_queue_state[i] ==
> >>> +			    RTE_ETH_QUEUE_STATE_HAIRPIN)
> >>> +				count++;
> >>> +		}
> >>> +		if (count > cap.max_n_queues) {
> >>> +			RTE_ETHDEV_LOG(ERR,
> >>> +				       "To many Rx hairpin queues %d", count);
> >>> +			return -EINVAL;
> >>> +		}
> >>> +	}
> >>> +	if (dev->data->dev_started &&
> >>> +		!(dev_info.dev_capa &
> >>> +		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
> >>> +		return -EBUSY;
> >>> +	if (dev->data->dev_started &&
> >>> +		(dev->data->tx_queue_state[tx_queue_id] !=
> >>> +		 RTE_ETH_QUEUE_STATE_STOPPED))
> >>> +		return -EBUSY;
> >>> +	txq = dev->data->tx_queues;
> >>> +	if (txq[tx_queue_id]) {
> >> Please, compare with NULL
> >>
> > O.K.
> >
> >>> +		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops-
> >>> tx_queue_release,
> >>> +					-ENOTSUP);
> >>> +		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
> >>> +		txq[tx_queue_id] = NULL;
> >>> +	}
> >>> +	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
> >>> +		(dev, tx_queue_id, nb_tx_desc, conf);
> >>> +	if (!ret)
> >> Please, compare with 0
> >>
> > O.K.
> >
> >>> +		dev->data->tx_queue_state[tx_queue_id] =
> >>> +			RTE_ETH_QUEUE_STATE_HAIRPIN;
> >>> +	return eth_err(port_id, ret);
> >>>    }
> >>>
> >>>    void
> >>> @@ -3981,12 +4159,20 @@ int rte_eth_set_queue_rate_limit(uint16_t
> >> port_id, uint16_t queue_idx,
> >>>    	rte_errno = ENOTSUP;
> >>>    	return NULL;
> >>>    #endif
> >>> +	struct rte_eth_dev *dev;
> >>> +
> >>>    	/* check input parameters */
> >>>    	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
> >>>    		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
> >>>    		rte_errno = EINVAL;
> >>>    		return NULL;
> >>>    	}
> >>> +	dev = &rte_eth_devices[port_id];
> >>> +	if (dev->data->rx_queue_state[queue_id] ==
> >>> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> >> It looks like line break is not required above. Just to make code a bit
> >> shorter.
> >>
> > Will fix if possible.
> >
> >>> +		rte_errno = EINVAL;
> >>> +		return NULL;
> >>> +	}
> >>>    	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
> >>>
> >>>    	if (cb == NULL) {
> >>> @@ -4058,6 +4244,8 @@ int rte_eth_set_queue_rate_limit(uint16_t
> port_id,
> >> uint16_t queue_idx,
> >>>    	rte_errno = ENOTSUP;
> >>>    	return NULL;
> >>>    #endif
> >>> +	struct rte_eth_dev *dev;
> >>> +
> >>>    	/* check input parameters */
> >>>    	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
> >>>    		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
> >>> @@ -4065,6 +4253,13 @@ int rte_eth_set_queue_rate_limit(uint16_t
> >> port_id, uint16_t queue_idx,
> >>>    		return NULL;
> >>>    	}
> >>>
> >>> +	dev = &rte_eth_devices[port_id];
> >>> +	if (dev->data->tx_queue_state[queue_id] ==
> >>> +	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
> >> It looks like line break is not required above. Just to make code a bit
> >> shorter.
> >>
> > Will fix.
> >
> >>> +		rte_errno = EINVAL;
> >>> +		return NULL;
> >>> +	}
> >>> +
> >>>    	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
> >>>
> >>>    	if (cb == NULL) {
> >>> @@ -4510,6 +4705,21 @@ int rte_eth_set_queue_rate_limit(uint16_t
> >> port_id, uint16_t queue_idx,
> >>>    }
> >>>
> >>>    int
> >>> +rte_eth_dev_hairpin_capability_get(uint16_t port_id,
> >>> +				   struct rte_eth_hairpin_cap *cap)
> >>> +{
> >>> +	struct rte_eth_dev *dev;
> >>> +
> >>> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
> >>> +
> >>> +	dev = &rte_eth_devices[port_id];
> >>> +	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
> >>> +				-ENOTSUP);
> >> I think it would be useful to memset() cap with 0 to be sure that it is
> >> initialized.
> >>
> > O.K but if we follow your comments from above, the result is not valid if we
> return
> > error, so it is not a must, but like I said I don't care to add it.
> >
> >>> +	return eth_err(port_id, (*dev->dev_ops->hairpin_cap_get)
> >>> +		       (dev, cap));
> >>
> >> Please, consider to avoid line breaks above to make code a bit shorter.
> >> Of course, it should not exceed line length limit.
> >>
> > Will check.
> >
> >>> +}
> >>> +
> >>> +int
> >>>    rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
> >>>    {
> >>>    	struct rte_eth_dev *dev;
> >>> diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
> >>> index d937fb4..29dcfea 100644
> >>> --- a/lib/librte_ethdev/rte_ethdev.h
> >>> +++ b/lib/librte_ethdev/rte_ethdev.h
> >>> @@ -804,6 +804,46 @@ struct rte_eth_txconf {
> >>>    };
> >>>
> >>>    /**
> >>> + * @warning
> >>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> >> notice
> >>> + *
> >>> + * A structure used to return the hairpin capabilities that are supportd.
> >> supportd -> supported
> >>
> > Will fix.
> >
> >>> + */
> >>> +struct rte_eth_hairpin_cap {
> >>> +	int16_t max_n_queues;
> >>> +	/**< The max number of hairpin queuesi. -1 no limit. */
> >> I'm not sure that I like type difference from max_rx_queues here.
> >> I think it would be better to use uint16_t. I would say there is point
> >> to highlight no limit (first of all I'm not sure that it makes sense,
> >> second, UINT16_MAX will obviously do the job in uint16_t.
> >>
> >> Is it both Rx and Tx?
> >>
> >>> +	int16_t max_rx_2_tx;
> >>> +	/**< Max number of Rx queues to be connected to one Tx queue. */
> >>> +	int16_t max_tx_2_rx;
> >>> +	/**< Max number of Tx queues to be connected to one Rx queue. */
> >> I would prefer to have uint16_t here as well. Mainly for consistency.
> >>
> > Agree, will fix,
> >
> >>> +	uint16_t max_nb_desc; /**< The max num of descriptors. */
> >> Is it common for Rx and Tx? What about min?
> >>
> > I think min is always 1, by  definition.
> 
> OK
> 
> >>> +};
> >>> +
> >>> +#define RTE_ETH_MAX_HAIRPIN_PEERS 32
> >>> +
> >>> +/**
> >>> + * @warning
> >>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> >> notice
> >>> + *
> >>> + * A structure used to hold hairpin peer data.
> >>> + */
> >>> +struct rte_eth_hairpin_peer {
> >>> +	uint16_t port; /**< Peer port. */
> >>> +	uint16_t queue; /**< Peer queue. */
> >>> +};
> >>> +
> >>> +/**
> >>> + * @warning
> >>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> >> notice
> >>> + *
> >>> + * A structure used to configure hairpin binding.
> >> It should be explained what happens if Rx queue has many Tx queues.
> >> Is packet duplicated? Or distributed?
> >>
> > Like we said before, I don't know it depends on the Nic.
> > In case of Mellanox we don't support 1 to many.
> 
> I see, but what application should expect?

I think that when some Nic supports such connection, it should add in the capabilities 
or in documentation the expected results. 
Personally I think that the common use case will be distributed (RSS on the Tx) but it is only 
my personal thinking, your question will be answered when someone will implement it.
 
> 
> >>> + */
> >>> +struct rte_eth_hairpin_conf {
> >>> +	uint16_t peer_n; /**< The number of peers. */
> >>> +	struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
> >>> +};
> >>> +
> >>> +/**
> >>>     * A structure contains information about HW descriptor ring limitations.
> >>>     */
> >>>    struct rte_eth_desc_lim {
> >>> @@ -1080,6 +1120,8 @@ struct rte_eth_conf {
> >>>    /**< Device supports Rx queue setup after device started*/
> >>>    #define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002
> >>>    /**< Device supports Tx queue setup after device started*/
> >>> +#define RTE_ETH_DEV_CAPA_HAIRPIN_SUPPORT 0x00000004
> >>> +/**< Device supports hairpin queues. */
> >> Do we really need it? Isn't rte_eth_dev_hairpin_capability_get() returning
> >> -ENOTSUP sufficient?
> >>
> > I agree with you, but my thinking was that using the cap the application
> > can avoid calling to function that will fail.
> > If you approve I will remove this cap.
> 
> Runtime setup flags are required since these functions are always
> supported, but it is not alway possible to use these function when
> device is started. In this case I think there is no necessity to add
> the capability since we have no similar caps for anything else.
> 

O.K. will remove it.

> >>>    /*
> >>>     * If new Tx offload capabilities are defined, they also must be
> >>> @@ -1277,6 +1319,7 @@ struct rte_eth_dcb_info {
> >>>     */
> >>>    #define RTE_ETH_QUEUE_STATE_STOPPED 0
> >>>    #define RTE_ETH_QUEUE_STATE_STARTED 1
> >>> +#define RTE_ETH_QUEUE_STATE_HAIRPIN 2
> >>>
> >>>    #define RTE_ETH_ALL RTE_MAX_ETHPORTS
> >>>
> >>> @@ -1771,6 +1814,34 @@ int rte_eth_rx_queue_setup(uint16_t port_id,
> >> uint16_t rx_queue_id,
> >>>    		struct rte_mempool *mb_pool);
> >>>
> >>>    /**
> >>> + * @warning
> >>> + * @b EXPERIMENTAL: this API may change, or be removed, without prior
> >> notice
> >>> + *
> >>> + * Allocate and set up a hairpin receive queue for an Ethernet device.
> >>> + *
> >>> + * The function set up the selected queue to be used in hairpin.
> >>> + *
> >>> + * @param port_id
> >>> + *   The port identifier of the Ethernet device.
> >>> + * @param rx_queue_id
> >>> + *   The index of the receive queue to set up.
> >>> + *   The value must be in the range [0, nb_rx_queue - 1] previously
> supplied
> >>> + *   to rte_eth_dev_configure().
> >>> + * @param nb_rx_desc
> >>> + *   The number of receive descriptors to allocate for the receive ring.
> >> Is it allows to specify 0 to pick driver recommended value?
> >>
> > Yes, (assuming you are talking about the nb_rx_desc)
> 
> Please, highlight it in the description.
> 

O.K.

[nip]

Thanks,
Ori
  

Patch

diff --git a/lib/librte_ethdev/rte_ethdev.c b/lib/librte_ethdev/rte_ethdev.c
index af82360..ee8af42 100644
--- a/lib/librte_ethdev/rte_ethdev.c
+++ b/lib/librte_ethdev/rte_ethdev.c
@@ -1752,12 +1752,102 @@  struct rte_eth_dev *
 		if (!dev->data->min_rx_buf_size ||
 		    dev->data->min_rx_buf_size > mbp_buf_size)
 			dev->data->min_rx_buf_size = mbp_buf_size;
+		if (dev->data->rx_queue_state[rx_queue_id] ==
+		    RTE_ETH_QUEUE_STATE_HAIRPIN)
+			dev->data->rx_queue_state[rx_queue_id] =
+				RTE_ETH_QUEUE_STATE_STOPPED;
 	}
 
 	return eth_err(port_id, ret);
 }
 
 int
+rte_eth_rx_hairpin_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
+			       uint16_t nb_rx_desc,
+			       const struct rte_eth_hairpin_conf *conf)
+{
+	int ret;
+	struct rte_eth_dev *dev;
+	struct rte_eth_hairpin_cap cap;
+	struct rte_eth_dev_info dev_info;
+	void **rxq;
+	int i;
+	int count = 0;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	if (rx_queue_id >= dev->data->nb_rx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", rx_queue_id);
+		return -EINVAL;
+	}
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
+				-ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_hairpin_queue_setup,
+				-ENOTSUP);
+	rte_eth_dev_hairpin_capability_get(port_id, &cap);
+	/* Use default specified by driver, if nb_rx_desc is zero */
+	if (nb_rx_desc == 0)
+		nb_rx_desc = cap.max_nb_desc;
+	if (nb_rx_desc > cap.max_nb_desc) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for nb_rx_desc(=%hu), should be: "
+			       "<= %hu", nb_rx_desc, cap.max_nb_desc);
+		return -EINVAL;
+	}
+	ret = rte_eth_dev_info_get(port_id, &dev_info);
+	if (ret != 0)
+		return ret;
+	if (dev->data->dev_started &&
+		!(dev_info.dev_capa &
+		  RTE_ETH_DEV_CAPA_RUNTIME_RX_QUEUE_SETUP))
+		return -EBUSY;
+	if (dev->data->dev_started &&
+		(dev->data->rx_queue_state[rx_queue_id] !=
+		 RTE_ETH_QUEUE_STATE_STOPPED))
+		return -EBUSY;
+	if (conf->peer_n > cap.max_rx_2_tx) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: <= %hu", conf->peer_n,
+			       cap.max_rx_2_tx);
+		return -EINVAL;
+	}
+	if (conf->peer_n == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: > 0", conf->peer_n);
+		return -EINVAL;
+	}
+	if (cap.max_n_queues != -1) {
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			if (dev->data->rx_queue_state[i] ==
+			    RTE_ETH_QUEUE_STATE_HAIRPIN)
+				count++;
+		}
+		if (count > cap.max_n_queues) {
+			RTE_ETHDEV_LOG(ERR,
+				       "To many Rx hairpin queues %d", count);
+			return -EINVAL;
+		}
+	}
+	rxq = dev->data->rx_queues;
+	if (rxq[rx_queue_id]) {
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release,
+					-ENOTSUP);
+		(*dev->dev_ops->rx_queue_release)(rxq[rx_queue_id]);
+		rxq[rx_queue_id] = NULL;
+	}
+	ret = (*dev->dev_ops->rx_hairpin_queue_setup)(dev, rx_queue_id,
+						      nb_rx_desc, conf);
+	if (!ret)
+		dev->data->rx_queue_state[rx_queue_id] =
+			RTE_ETH_QUEUE_STATE_HAIRPIN;
+	return eth_err(port_id, ret);
+}
+
+int
 rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 		       uint16_t nb_tx_desc, unsigned int socket_id,
 		       const struct rte_eth_txconf *tx_conf)
@@ -1851,9 +1941,97 @@  struct rte_eth_dev *
 			__func__);
 		return -EINVAL;
 	}
+	ret = (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc,
+					      socket_id, &local_conf);
+	if (!ret)
+		if (dev->data->tx_queue_state[tx_queue_id] ==
+		    RTE_ETH_QUEUE_STATE_HAIRPIN)
+			dev->data->tx_queue_state[tx_queue_id] =
+				RTE_ETH_QUEUE_STATE_STOPPED;
+	return eth_err(port_id, ret);
+}
+
+int
+rte_eth_tx_hairpin_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
+			       uint16_t nb_tx_desc,
+			       const struct rte_eth_hairpin_conf *conf)
+{
+	struct rte_eth_dev *dev;
+	struct rte_eth_hairpin_cap cap;
+	struct rte_eth_dev_info dev_info;
+	void **txq;
+	int i;
+	int count = 0;
+	int ret;
 
-	return eth_err(port_id, (*dev->dev_ops->tx_queue_setup)(dev,
-		       tx_queue_id, nb_tx_desc, socket_id, &local_conf));
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+	dev = &rte_eth_devices[port_id];
+	if (tx_queue_id >= dev->data->nb_tx_queues) {
+		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", tx_queue_id);
+		return -EINVAL;
+	}
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
+				-ENOTSUP);
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_hairpin_queue_setup,
+				-ENOTSUP);
+	rte_eth_dev_info_get(port_id, &dev_info);
+	rte_eth_dev_hairpin_capability_get(port_id, &cap);
+	/* Use default specified by driver, if nb_tx_desc is zero */
+	if (nb_tx_desc == 0)
+		nb_tx_desc = cap.max_nb_desc;
+	if (nb_tx_desc > cap.max_nb_desc) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for nb_tx_desc(=%hu), should be: "
+			       "<= %hu", nb_tx_desc, cap.max_nb_desc);
+		return -EINVAL;
+	}
+	if (conf->peer_n > cap.max_tx_2_rx) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: <= %hu", conf->peer_n,
+			       cap.max_tx_2_rx);
+		return -EINVAL;
+	}
+	if (conf->peer_n == 0) {
+		RTE_ETHDEV_LOG(ERR,
+			       "Invalid value for number of peers(=%hu), "
+			       "should be: > 0", conf->peer_n);
+		return -EINVAL;
+	}
+	if (cap.max_n_queues != -1) {
+		for (i = 0; i < dev->data->nb_tx_queues; i++) {
+			if (dev->data->tx_queue_state[i] ==
+			    RTE_ETH_QUEUE_STATE_HAIRPIN)
+				count++;
+		}
+		if (count > cap.max_n_queues) {
+			RTE_ETHDEV_LOG(ERR,
+				       "To many Rx hairpin queues %d", count);
+			return -EINVAL;
+		}
+	}
+	if (dev->data->dev_started &&
+		!(dev_info.dev_capa &
+		  RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP))
+		return -EBUSY;
+	if (dev->data->dev_started &&
+		(dev->data->tx_queue_state[tx_queue_id] !=
+		 RTE_ETH_QUEUE_STATE_STOPPED))
+		return -EBUSY;
+	txq = dev->data->tx_queues;
+	if (txq[tx_queue_id]) {
+		RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release,
+					-ENOTSUP);
+		(*dev->dev_ops->tx_queue_release)(txq[tx_queue_id]);
+		txq[tx_queue_id] = NULL;
+	}
+	ret = (*dev->dev_ops->tx_hairpin_queue_setup)
+		(dev, tx_queue_id, nb_tx_desc, conf);
+	if (!ret)
+		dev->data->tx_queue_state[tx_queue_id] =
+			RTE_ETH_QUEUE_STATE_HAIRPIN;
+	return eth_err(port_id, ret);
 }
 
 void
@@ -3981,12 +4159,20 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 	rte_errno = ENOTSUP;
 	return NULL;
 #endif
+	struct rte_eth_dev *dev;
+
 	/* check input parameters */
 	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
 		    queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) {
 		rte_errno = EINVAL;
 		return NULL;
 	}
+	dev = &rte_eth_devices[port_id];
+	if (dev->data->rx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
 	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
 
 	if (cb == NULL) {
@@ -4058,6 +4244,8 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 	rte_errno = ENOTSUP;
 	return NULL;
 #endif
+	struct rte_eth_dev *dev;
+
 	/* check input parameters */
 	if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL ||
 		    queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) {
@@ -4065,6 +4253,13 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 		return NULL;
 	}
 
+	dev = &rte_eth_devices[port_id];
+	if (dev->data->tx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		rte_errno = EINVAL;
+		return NULL;
+	}
+
 	struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0);
 
 	if (cb == NULL) {
@@ -4510,6 +4705,21 @@  int rte_eth_set_queue_rate_limit(uint16_t port_id, uint16_t queue_idx,
 }
 
 int
+rte_eth_dev_hairpin_capability_get(uint16_t port_id,
+				   struct rte_eth_hairpin_cap *cap)
+{
+	struct rte_eth_dev *dev;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	dev = &rte_eth_devices[port_id];
+	RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->hairpin_cap_get,
+				-ENOTSUP);
+	return eth_err(port_id, (*dev->dev_ops->hairpin_cap_get)
+		       (dev, cap));
+}
+
+int
 rte_eth_dev_pool_ops_supported(uint16_t port_id, const char *pool)
 {
 	struct rte_eth_dev *dev;
diff --git a/lib/librte_ethdev/rte_ethdev.h b/lib/librte_ethdev/rte_ethdev.h
index d937fb4..29dcfea 100644
--- a/lib/librte_ethdev/rte_ethdev.h
+++ b/lib/librte_ethdev/rte_ethdev.h
@@ -804,6 +804,46 @@  struct rte_eth_txconf {
 };
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * A structure used to return the hairpin capabilities that are supportd.
+ */
+struct rte_eth_hairpin_cap {
+	int16_t max_n_queues;
+	/**< The max number of hairpin queuesi. -1 no limit. */
+	int16_t max_rx_2_tx;
+	/**< Max number of Rx queues to be connected to one Tx queue. */
+	int16_t max_tx_2_rx;
+	/**< Max number of Tx queues to be connected to one Rx queue. */
+	uint16_t max_nb_desc; /**< The max num of descriptors. */
+};
+
+#define RTE_ETH_MAX_HAIRPIN_PEERS 32
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * A structure used to hold hairpin peer data.
+ */
+struct rte_eth_hairpin_peer {
+	uint16_t port; /**< Peer port. */
+	uint16_t queue; /**< Peer queue. */
+};
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * A structure used to configure hairpin binding.
+ */
+struct rte_eth_hairpin_conf {
+	uint16_t peer_n; /**< The number of peers. */
+	struct rte_eth_hairpin_peer peers[RTE_ETH_MAX_HAIRPIN_PEERS];
+};
+
+/**
  * A structure contains information about HW descriptor ring limitations.
  */
 struct rte_eth_desc_lim {
@@ -1080,6 +1120,8 @@  struct rte_eth_conf {
 /**< Device supports Rx queue setup after device started*/
 #define RTE_ETH_DEV_CAPA_RUNTIME_TX_QUEUE_SETUP 0x00000002
 /**< Device supports Tx queue setup after device started*/
+#define RTE_ETH_DEV_CAPA_HAIRPIN_SUPPORT 0x00000004
+/**< Device supports hairpin queues. */
 
 /*
  * If new Tx offload capabilities are defined, they also must be
@@ -1277,6 +1319,7 @@  struct rte_eth_dcb_info {
  */
 #define RTE_ETH_QUEUE_STATE_STOPPED 0
 #define RTE_ETH_QUEUE_STATE_STARTED 1
+#define RTE_ETH_QUEUE_STATE_HAIRPIN 2
 
 #define RTE_ETH_ALL RTE_MAX_ETHPORTS
 
@@ -1771,6 +1814,34 @@  int rte_eth_rx_queue_setup(uint16_t port_id, uint16_t rx_queue_id,
 		struct rte_mempool *mb_pool);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Allocate and set up a hairpin receive queue for an Ethernet device.
+ *
+ * The function set up the selected queue to be used in hairpin.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param rx_queue_id
+ *   The index of the receive queue to set up.
+ *   The value must be in the range [0, nb_rx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_rx_desc
+ *   The number of receive descriptors to allocate for the receive ring.
+ * @param conf
+ *   The pointer to the hairpin configuration.
+ * @return
+ *   - 0: Success, receive queue correctly set up.
+ *   - -EINVAL: Selected Queue can't be configured for hairpin.
+ *   - -ENOMEM: Unable to allocate the resources required for the queue.
+ */
+__rte_experimental
+int rte_eth_rx_hairpin_queue_setup
+	(uint16_t port_id, uint16_t rx_queue_id, uint16_t nb_rx_desc,
+	 const struct rte_eth_hairpin_conf *conf);
+
+/**
  * Allocate and set up a transmit queue for an Ethernet device.
  *
  * @param port_id
@@ -1823,6 +1894,33 @@  int rte_eth_tx_queue_setup(uint16_t port_id, uint16_t tx_queue_id,
 		const struct rte_eth_txconf *tx_conf);
 
 /**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Allocate and set up a transmit hairpin queue for an Ethernet device.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param tx_queue_id
+ *   The index of the transmit queue to set up.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param nb_tx_desc
+ *   The number of transmit descriptors to allocate for the transmit ring.
+ * @param conf
+ *   The hairpin configuration.
+ *
+ * @return
+ *   - 0: Success, transmit queue correctly set up.
+ *   - -EINVAL: Selected Queue can't be configured for hairpin.
+ *   - -ENOMEM: Unable to allocate the resources required for the queue.
+ */
+__rte_experimental
+int rte_eth_tx_hairpin_queue_setup
+	(uint16_t port_id, uint16_t tx_queue_id, uint16_t nb_tx_desc,
+	 const struct rte_eth_hairpin_conf *conf);
+
+/**
  * Return the NUMA socket to which an Ethernet device is connected
  *
  * @param port_id
@@ -4037,6 +4135,22 @@  int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
 void *
 rte_eth_dev_get_sec_ctx(uint16_t port_id);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice
+ *
+ * Query the device hairpin capabilities.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param cap
+ *   Pointer to a structure that will hold the hairpin capabilities.
+ * @return
+ *   - 0 on success, -ENOTSUP if the device doesn't support hairpin.
+ */
+__rte_experimental
+int rte_eth_dev_hairpin_capability_get(uint16_t port_id,
+				       struct rte_eth_hairpin_cap *cap);
 
 #include <rte_ethdev_core.h>
 
@@ -4137,6 +4251,12 @@  int rte_eth_dev_adjust_nb_rx_tx_desc(uint16_t port_id,
 		RTE_ETHDEV_LOG(ERR, "Invalid RX queue_id=%u\n", queue_id);
 		return 0;
 	}
+	if (dev->data->rx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(ERR, "RX queue_id=%u is hairpin queue\n",
+			       queue_id);
+		return 0;
+	}
 #endif
 	nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id],
 				     rx_pkts, nb_pkts);
@@ -4403,6 +4523,12 @@  static inline int rte_eth_tx_descriptor_status(uint16_t port_id,
 		RTE_ETHDEV_LOG(ERR, "Invalid TX queue_id=%u\n", queue_id);
 		return 0;
 	}
+	if (dev->data->tx_queue_state[queue_id] ==
+	    RTE_ETH_QUEUE_STATE_HAIRPIN) {
+		RTE_ETHDEV_LOG(ERR, "TX queue_id=%u is hairpin queue\n",
+			       queue_id);
+		return 0;
+	}
 #endif
 
 #ifdef RTE_ETHDEV_RXTX_CALLBACKS
diff --git a/lib/librte_ethdev/rte_ethdev_core.h b/lib/librte_ethdev/rte_ethdev_core.h
index dcb5ae6..ef46e71 100644
--- a/lib/librte_ethdev/rte_ethdev_core.h
+++ b/lib/librte_ethdev/rte_ethdev_core.h
@@ -250,6 +250,12 @@  typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev,
 				    struct rte_mempool *mb_pool);
 /**< @internal Set up a receive queue of an Ethernet device. */
 
+typedef int (*eth_rx_hairpin_queue_setup_t)
+	(struct rte_eth_dev *dev, uint16_t rx_queue_id,
+	 uint16_t nb_rx_desc,
+	 const struct rte_eth_hairpin_conf *conf);
+/**< @internal Set up a receive hairpin queue of an Ethernet device. */
+
 typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    uint16_t tx_queue_id,
 				    uint16_t nb_tx_desc,
@@ -257,6 +263,12 @@  typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev,
 				    const struct rte_eth_txconf *tx_conf);
 /**< @internal Setup a transmit queue of an Ethernet device. */
 
+typedef int (*eth_tx_hairpin_queue_setup_t)
+	(struct rte_eth_dev *dev, uint16_t tx_queue_id,
+	 uint16_t nb_tx_desc,
+	 const struct rte_eth_hairpin_conf *hairpin_conf);
+/**< @internal Setup a transmit hairpin queue of an Ethernet device. */
+
 typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev,
 				    uint16_t rx_queue_id);
 /**< @internal Enable interrupt of a receive queue of an Ethernet device. */
@@ -505,6 +517,10 @@  typedef int (*eth_pool_ops_supported_t)(struct rte_eth_dev *dev,
 						const char *pool);
 /**< @internal Test if a port supports specific mempool ops */
 
+typedef int (*eth_hairpin_cap_get_t)(struct rte_eth_dev *dev,
+				     struct rte_eth_hairpin_cap *cap);
+/**< @internal get the hairpin capabilities. */
+
 /**
  * @internal A structure containing the functions exported by an Ethernet driver.
  */
@@ -557,6 +573,8 @@  struct eth_dev_ops {
 	eth_queue_start_t          tx_queue_start;/**< Start TX for a queue. */
 	eth_queue_stop_t           tx_queue_stop; /**< Stop TX for a queue. */
 	eth_rx_queue_setup_t       rx_queue_setup;/**< Set up device RX queue. */
+	eth_rx_hairpin_queue_setup_t rx_hairpin_queue_setup;
+	/**< Set up device RX hairpin queue. */
 	eth_queue_release_t        rx_queue_release; /**< Release RX queue. */
 	eth_rx_queue_count_t       rx_queue_count;
 	/**< Get the number of used RX descriptors. */
@@ -568,6 +586,8 @@  struct eth_dev_ops {
 	eth_rx_enable_intr_t       rx_queue_intr_enable;  /**< Enable Rx queue interrupt. */
 	eth_rx_disable_intr_t      rx_queue_intr_disable; /**< Disable Rx queue interrupt. */
 	eth_tx_queue_setup_t       tx_queue_setup;/**< Set up device TX queue. */
+	eth_tx_hairpin_queue_setup_t tx_hairpin_queue_setup;
+	/**< Set up device TX hairpin queue. */
 	eth_queue_release_t        tx_queue_release; /**< Release TX queue. */
 	eth_tx_done_cleanup_t      tx_done_cleanup;/**< Free tx ring mbufs */
 
@@ -639,6 +659,9 @@  struct eth_dev_ops {
 
 	eth_pool_ops_supported_t pool_ops_supported;
 	/**< Test if a port supports specific mempool ops */
+
+	eth_hairpin_cap_get_t hairpin_cap_get;
+	/**< Returns the hairpin capabilities. */
 };
 
 /**
@@ -746,9 +769,9 @@  struct rte_eth_dev_data {
 		dev_started : 1,   /**< Device state: STARTED(1) / STOPPED(0). */
 		lro         : 1;   /**< RX LRO is ON(1) / OFF(0) */
 	uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-			/**< Queues state: STARTED(1) / STOPPED(0). */
+		/**< Queues state: HAIRPIN(2) / STARTED(1) / STOPPED(0). */
 	uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT];
-			/**< Queues state: STARTED(1) / STOPPED(0). */
+		/**< Queues state: HAIRPIN(2) STARTED(1) / STOPPED(0). */
 	uint32_t dev_flags;             /**< Capabilities. */
 	enum rte_kernel_driver kdrv;    /**< Kernel driver passthrough. */
 	int numa_node;                  /**< NUMA node connection. */
diff --git a/lib/librte_ethdev/rte_ethdev_version.map b/lib/librte_ethdev/rte_ethdev_version.map
index 6df42a4..77b0a86 100644
--- a/lib/librte_ethdev/rte_ethdev_version.map
+++ b/lib/librte_ethdev/rte_ethdev_version.map
@@ -283,4 +283,9 @@  EXPERIMENTAL {
 
 	# added in 19.08
 	rte_eth_read_clock;
+
+	# added in 19.11
+	rte_eth_rx_hairpin_queue_setup;
+	rte_eth_tx_hairpin_queue_setup;
+	rte_eth_dev_hairpin_capability_get;
 };