[v2] net/netvsc: fix number Tx queues > Rx queues

Message ID DU0PR83MB05321DB2ADBEDAE7687773DF97272@DU0PR83MB0532.EURPRD83.prod.outlook.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series [v2] net/netvsc: fix number Tx queues > Rx queues |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/intel-Testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-abi-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS

Commit Message

Alan Elder March 8, 2024, 6:09 p.m. UTC
  The previous code allowed the number of Tx queues to be set higher than
the number of Rx queues.  If a packet was sent on a Tx queue with index
>= number Rx queues there was a segfault.

This commit fixes the issue by creating an Rx queue for every Tx queue
meaning that an event buffer is allocated to handle receiving Tx
completion messages.

mbuf pool and Rx ring are not allocated for these additional Rx queues
and RSS configuration ensures that no packets are received on them.

Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
Cc: sthemmin@microsoft.com
Cc: stable@dpdk.org

Signed-off-by: Alan Elder <alan.elder@microsoft.com>
---
v2:
* Remove function declaration for static non-member function

---
 drivers/net/netvsc/hn_ethdev.c |  9 +++++++
 drivers/net/netvsc/hn_rxtx.c   | 46 +++++++++++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 3 deletions(-)
  

Comments

Ferruh Yigit March 11, 2024, 10:31 p.m. UTC | #1
On 3/8/2024 6:09 PM, Alan Elder wrote:
> The previous code allowed the number of Tx queues to be set higher than
> the number of Rx queues.  If a packet was sent on a Tx queue with index
>> = number Rx queues there was a segfault.
> 
> This commit fixes the issue by creating an Rx queue for every Tx queue
> meaning that an event buffer is allocated to handle receiving Tx
> completion messages.
> 
> mbuf pool and Rx ring are not allocated for these additional Rx queues
> and RSS configuration ensures that no packets are received on them.
> 
> Fixes: 4e9c73e96e83 ("net/netvsc: add Hyper-V network device")
> Cc: sthemmin@microsoft.com
> Cc: stable@dpdk.org
> 
> Signed-off-by: Alan Elder <alan.elder@microsoft.com>
> ---
> v2:
> * Remove function declaration for static non-member function
> 

Hi Long,

There was a request to hold prev version, what is the latest status?
If you agree with this patch, can you please ack/review ?
  
Long Li March 12, 2024, 7:08 p.m. UTC | #2
> a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index
> 9bf1ec5509..c0aaeaa972 100644
> --- a/drivers/net/netvsc/hn_rxtx.c
> +++ b/drivers/net/netvsc/hn_rxtx.c
> @@ -243,6 +243,7 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,  {
>  	struct hn_data *hv = dev->data->dev_private;
>  	struct hn_tx_queue *txq;
> +	struct hn_rx_queue *rxq;
>  	char name[RTE_MEMPOOL_NAMESIZE];
>  	uint32_t tx_free_thresh;
>  	int err = -ENOMEM;
> @@ -301,6 +302,22 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
>  		goto error;
>  	}
> 
> +	/*
> +	 * If there are more Tx queues than Rx queues, allocate rx_queues
> +	 * with event buffer so that Tx completion messages can still be
> +	 * received
> +	 */
> +	if (queue_idx >= dev->data->nb_rx_queues) {
> +		rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);

Need to check if rxq is NULL.

> +		/*
> +		 * Don't allocate mbuf pool or rx ring.  RSS is always configured
> +		 * to ensure packets aren't received by this Rx queue.
> +		 */
> +		rxq->mb_pool = NULL;
> +		rxq->rx_ring = NULL;
> +		dev->data->rx_queues[queue_idx] = rxq;
> +	}
> +
>  	txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
>  	txq->agg_pktmax = hv->rndis_agg_pkts;
>  	txq->agg_align  = hv->rndis_agg_align; @@ -354,6 +371,17 @@ static
> void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd)
>  	rte_mempool_put(txq->txdesc_pool, txd);  }
> 
> +static void
> +hn_rx_queue_free_common(struct hn_rx_queue *rxq) {
> +	if (!rxq)
> +		return;
> +
> +	rte_free(rxq->rxbuf_info);
> +	rte_free(rxq->event_buf);
> +	rte_free(rxq);
> +}
> +
>  void
>  hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)  { @@ -364,6
> +392,13 @@ hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
>  	if (!txq)
>  		return;
> 
> +	/*
> +	 * Free any Rx queues allocated for a Tx queue without a corresponding
> +	 * Rx queue
> +	 */
> +	if (qid >= dev->data->nb_rx_queues)
> +		hn_rx_queue_free_common(dev->data->rx_queues[qid]);
> +
>  	rte_mempool_free(txq->txdesc_pool);
> 
>  	rte_memzone_free(txq->tx_rndis_mz);
> @@ -942,6 +977,13 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
>  	if (queue_idx == 0) {
>  		rxq = hv->primary;
>  	} else {
> +		/*
> +		 * If the number of Tx queues was previously greater than
> +		 * the number of Rx queues, we may already have allocated
> +		 * an rxq. If so, free it now before allocating a new one.
> +		 */
> +		hn_rx_queue_free_common(dev->data-
> >rx_queues[queue_idx]);

This logic seems strange. How about check if rxq is already allocated. If not, allocate it.

Something like:

if (!dev->data->rx_queues[queue_idx])
	rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);



Thanks,

Long
  
Alan Elder March 19, 2024, 2:19 p.m. UTC | #3
Thanks for the feedback Long.

I've made both changes you suggested, plus one additional one to not try and allocate an mbuf if the pool is null.

This means if a packet is received on a Rx queue that isn't being polled we will see it appear as "mbuf allocation failed" rather than causing a segfault.

Cheers,
Alan

> -----Original Message-----
> From: Long Li <longli@microsoft.com>
> Sent: Tuesday, March 12, 2024 7:09 PM
> To: Alan Elder <alan.elder@microsoft.com>; Ferruh Yigit
> <ferruh.yigit@amd.com>; Andrew Rybchenko
> <andrew.rybchenko@oktetlabs.ru>
> Cc: dev@dpdk.org; stephen <stephen@networkplumber.org>
> Subject: RE: [PATCH v2] net/netvsc: fix number Tx queues > Rx queues
> 
> > a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c index
> > 9bf1ec5509..c0aaeaa972 100644
> > --- a/drivers/net/netvsc/hn_rxtx.c
> > +++ b/drivers/net/netvsc/hn_rxtx.c
> > @@ -243,6 +243,7 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,  {
> >  	struct hn_data *hv = dev->data->dev_private;
> >  	struct hn_tx_queue *txq;
> > +	struct hn_rx_queue *rxq;
> >  	char name[RTE_MEMPOOL_NAMESIZE];
> >  	uint32_t tx_free_thresh;
> >  	int err = -ENOMEM;
> > @@ -301,6 +302,22 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
> >  		goto error;
> >  	}
> >
> > +	/*
> > +	 * If there are more Tx queues than Rx queues, allocate rx_queues
> > +	 * with event buffer so that Tx completion messages can still be
> > +	 * received
> > +	 */
> > +	if (queue_idx >= dev->data->nb_rx_queues) {
> > +		rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);
> 
> Need to check if rxq is NULL.
> 
> > +		/*
> > +		 * Don't allocate mbuf pool or rx ring.  RSS is always configured
> > +		 * to ensure packets aren't received by this Rx queue.
> > +		 */
> > +		rxq->mb_pool = NULL;
> > +		rxq->rx_ring = NULL;
> > +		dev->data->rx_queues[queue_idx] = rxq;
> > +	}
> > +
> >  	txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
> >  	txq->agg_pktmax = hv->rndis_agg_pkts;
> >  	txq->agg_align  = hv->rndis_agg_align; @@ -354,6 +371,17 @@ static
> > void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd)
> >  	rte_mempool_put(txq->txdesc_pool, txd);  }
> >
> > +static void
> > +hn_rx_queue_free_common(struct hn_rx_queue *rxq) {
> > +	if (!rxq)
> > +		return;
> > +
> > +	rte_free(rxq->rxbuf_info);
> > +	rte_free(rxq->event_buf);
> > +	rte_free(rxq);
> > +}
> > +
> >  void
> >  hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)  { @@
> > -364,6
> > +392,13 @@ hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t
> > +qid)
> >  	if (!txq)
> >  		return;
> >
> > +	/*
> > +	 * Free any Rx queues allocated for a Tx queue without a
> corresponding
> > +	 * Rx queue
> > +	 */
> > +	if (qid >= dev->data->nb_rx_queues)
> > +		hn_rx_queue_free_common(dev->data->rx_queues[qid]);
> > +
> >  	rte_mempool_free(txq->txdesc_pool);
> >
> >  	rte_memzone_free(txq->tx_rndis_mz);
> > @@ -942,6 +977,13 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
> >  	if (queue_idx == 0) {
> >  		rxq = hv->primary;
> >  	} else {
> > +		/*
> > +		 * If the number of Tx queues was previously greater than
> > +		 * the number of Rx queues, we may already have allocated
> > +		 * an rxq. If so, free it now before allocating a new one.
> > +		 */
> > +		hn_rx_queue_free_common(dev->data-
> > >rx_queues[queue_idx]);
> 
> This logic seems strange. How about check if rxq is already allocated. If not,
> allocate it.
> 
> Something like:
> 
> if (!dev->data->rx_queues[queue_idx])
> 	rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);
> 
> 
> 
> Thanks,
> 
> Long
  

Patch

diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c
index b8a32832d7..d7e3f12346 100644
--- a/drivers/net/netvsc/hn_ethdev.c
+++ b/drivers/net/netvsc/hn_ethdev.c
@@ -313,6 +313,15 @@  static int hn_rss_reta_update(struct rte_eth_dev *dev,
 
 		if (reta_conf[idx].mask & mask)
 			hv->rss_ind[i] = reta_conf[idx].reta[shift];
+
+		/*
+		 * Ensure we don't allow config that directs traffic to an Rx
+		 * queue that we aren't going to poll
+		 */
+		if (hv->rss_ind[i] >=  dev->data->nb_rx_queues) {
+			PMD_DRV_LOG(ERR, "RSS distributing traffic to invalid Rx queue");
+			return -EINVAL;
+		}
 	}
 
 	err = hn_rndis_conf_rss(hv, NDIS_RSS_FLAG_DISABLE);
diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c
index 9bf1ec5509..c0aaeaa972 100644
--- a/drivers/net/netvsc/hn_rxtx.c
+++ b/drivers/net/netvsc/hn_rxtx.c
@@ -243,6 +243,7 @@  hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 {
 	struct hn_data *hv = dev->data->dev_private;
 	struct hn_tx_queue *txq;
+	struct hn_rx_queue *rxq;
 	char name[RTE_MEMPOOL_NAMESIZE];
 	uint32_t tx_free_thresh;
 	int err = -ENOMEM;
@@ -301,6 +302,22 @@  hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
 		goto error;
 	}
 
+	/*
+	 * If there are more Tx queues than Rx queues, allocate rx_queues
+	 * with event buffer so that Tx completion messages can still be
+	 * received
+	 */
+	if (queue_idx >= dev->data->nb_rx_queues) {
+		rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);
+		/*
+		 * Don't allocate mbuf pool or rx ring.  RSS is always configured
+		 * to ensure packets aren't received by this Rx queue.
+		 */
+		rxq->mb_pool = NULL;
+		rxq->rx_ring = NULL;
+		dev->data->rx_queues[queue_idx] = rxq;
+	}
+
 	txq->agg_szmax  = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size);
 	txq->agg_pktmax = hv->rndis_agg_pkts;
 	txq->agg_align  = hv->rndis_agg_align;
@@ -354,6 +371,17 @@  static void hn_txd_put(struct hn_tx_queue *txq, struct hn_txdesc *txd)
 	rte_mempool_put(txq->txdesc_pool, txd);
 }
 
+static void
+hn_rx_queue_free_common(struct hn_rx_queue *rxq)
+{
+	if (!rxq)
+		return;
+
+	rte_free(rxq->rxbuf_info);
+	rte_free(rxq->event_buf);
+	rte_free(rxq);
+}
+
 void
 hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 {
@@ -364,6 +392,13 @@  hn_dev_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 	if (!txq)
 		return;
 
+	/*
+	 * Free any Rx queues allocated for a Tx queue without a corresponding
+	 * Rx queue
+	 */
+	if (qid >= dev->data->nb_rx_queues)
+		hn_rx_queue_free_common(dev->data->rx_queues[qid]);
+
 	rte_mempool_free(txq->txdesc_pool);
 
 	rte_memzone_free(txq->tx_rndis_mz);
@@ -942,6 +977,13 @@  hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
 	if (queue_idx == 0) {
 		rxq = hv->primary;
 	} else {
+		/*
+		 * If the number of Tx queues was previously greater than
+		 * the number of Rx queues, we may already have allocated
+		 * an rxq. If so, free it now before allocating a new one.
+		 */
+		hn_rx_queue_free_common(dev->data->rx_queues[queue_idx]);
+
 		rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id);
 		if (!rxq)
 			return -ENOMEM;
@@ -998,9 +1040,7 @@  hn_rx_queue_free(struct hn_rx_queue *rxq, bool keep_primary)
 	if (keep_primary && rxq == rxq->hv->primary)
 		return;
 
-	rte_free(rxq->rxbuf_info);
-	rte_free(rxq->event_buf);
-	rte_free(rxq);
+	hn_rx_queue_free_common(rxq);
 }
 
 void