net/mlx5: fix the Rx queue control management

Message ID 20241104161541.255086-1-bingz@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Raslan Darawsheh
Headers
Series net/mlx5: fix the Rx queue control management |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-unit-amd64-testing success Testing PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS

Commit Message

Bing Zhao Nov. 4, 2024, 4:15 p.m. UTC
With the shared Rx queue feature introduced, the control and private
Rx queue structures are decoupled, each control structure can be
shared for multiple queue for all representors inside a domain.

So it should be only managed by the shared context instead of any
private data of each device. The previous workaround is using a flag
to check the owner (allocator) of the structure and handle it only
on that device closing stage.

A proper formal solution is to add a reference count for each control
structure and only free the structure when there is no reference to
it to get rid of the UAF issue.

Fixes: f957ac996435 ("net/mlx5: workaround list management of Rx queue control")
Fixes: bcc220cb57d7 ("net/mlx5: fix shared Rx queue list management")
CC: stable@dpdk.org

Signed-off-by: Bing Zhao <bingz@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>
---
 drivers/net/mlx5/mlx5.h      |  1 -
 drivers/net/mlx5/mlx5_flow.c |  4 ++--
 drivers/net/mlx5/mlx5_rx.h   |  3 +--
 drivers/net/mlx5/mlx5_rxq.c  | 20 ++++++++++----------
 4 files changed, 13 insertions(+), 15 deletions(-)
  

Comments

Raslan Darawsheh Nov. 13, 2024, 1:38 p.m. UTC | #1
Hi,

From: Bing Zhao <bingz@nvidia.com>
Sent: Monday, November 4, 2024 6:15 PM
To: Dariusz Sosnowski; Slava Ovsiienko; dev@dpdk.org; Raslan Darawsheh
Cc: Ori Kam; Suanming Mou; Matan Azrad; stable@dpdk.org
Subject: [PATCH] net/mlx5: fix the Rx queue control management

With the shared Rx queue feature introduced, the control and private
Rx queue structures are decoupled, each control structure can be
shared for multiple queue for all representors inside a domain.

So it should be only managed by the shared context instead of any
private data of each device. The previous workaround is using a flag
to check the owner (allocator) of the structure and handle it only
on that device closing stage.

A proper formal solution is to add a reference count for each control
structure and only free the structure when there is no reference to
it to get rid of the UAF issue.

Fixes: f957ac996435 ("net/mlx5: workaround list management of Rx queue control")
Fixes: bcc220cb57d7 ("net/mlx5: fix shared Rx queue list management")
CC: stable@dpdk.org

Signed-off-by: Bing Zhao <bingz@nvidia.com>
Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
Acked-by: Dariusz Sosnowski <dsosnowski@nvidia.com>

Patch applied to next-net-mlx,

Kindest regards,
Raslan Darawsheh
  

Patch

diff --git a/drivers/net/mlx5/mlx5.h b/drivers/net/mlx5/mlx5.h
index b6be4646ef..6db02da3b8 100644
--- a/drivers/net/mlx5/mlx5.h
+++ b/drivers/net/mlx5/mlx5.h
@@ -1978,7 +1978,6 @@  struct mlx5_priv {
 	uint32_t ctrl_flows; /* Control flow rules. */
 	rte_spinlock_t flow_list_lock;
 	struct mlx5_obj_ops obj_ops; /* HW objects operations. */
-	LIST_HEAD(rxq, mlx5_rxq_ctrl) rxqsctrl; /* DPDK Rx queues. */
 	LIST_HEAD(rxqobj, mlx5_rxq_obj) rxqsobj; /* Verbs/DevX Rx queues. */
 	struct mlx5_list *hrxqs; /* Hash Rx queues. */
 	LIST_HEAD(txq, mlx5_txq_ctrl) txqsctrl; /* DPDK Tx queues. */
diff --git a/drivers/net/mlx5/mlx5_flow.c b/drivers/net/mlx5/mlx5_flow.c
index 9c43201e05..acae2bb063 100644
--- a/drivers/net/mlx5/mlx5_flow.c
+++ b/drivers/net/mlx5/mlx5_flow.c
@@ -1648,13 +1648,13 @@  flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
 			    opriv->domain_id != priv->domain_id ||
 			    opriv->mark_enabled)
 				continue;
-			LIST_FOREACH(rxq_ctrl, &opriv->rxqsctrl, next) {
+			LIST_FOREACH(rxq_ctrl, &opriv->sh->shared_rxqs, share_entry) {
 				rxq_ctrl->rxq.mark = 1;
 			}
 			opriv->mark_enabled = 1;
 		}
 	} else {
-		LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+		LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) {
 			rxq_ctrl->rxq.mark = 1;
 		}
 		priv->mark_enabled = 1;
diff --git a/drivers/net/mlx5/mlx5_rx.h b/drivers/net/mlx5/mlx5_rx.h
index 9bcb43b007..da7c448948 100644
--- a/drivers/net/mlx5/mlx5_rx.h
+++ b/drivers/net/mlx5/mlx5_rx.h
@@ -151,13 +151,13 @@  struct __rte_cache_aligned mlx5_rxq_data {
 /* RX queue control descriptor. */
 struct mlx5_rxq_ctrl {
 	struct mlx5_rxq_data rxq; /* Data path structure. */
-	LIST_ENTRY(mlx5_rxq_ctrl) next; /* Pointer to the next element. */
 	LIST_HEAD(priv, mlx5_rxq_priv) owners; /* Owner rxq list. */
 	struct mlx5_rxq_obj *obj; /* Verbs/DevX elements. */
 	struct mlx5_dev_ctx_shared *sh; /* Shared context. */
 	bool is_hairpin; /* Whether RxQ type is Hairpin. */
 	unsigned int socket; /* CPU socket ID for allocations. */
 	LIST_ENTRY(mlx5_rxq_ctrl) share_entry; /* Entry in shared RXQ list. */
+	RTE_ATOMIC(uint32_t) ctrl_ref; /* Reference counter. */
 	uint32_t share_group; /* Group ID of shared RXQ. */
 	uint16_t share_qid; /* Shared RxQ ID in group. */
 	unsigned int started:1; /* Whether (shared) RXQ has been started. */
@@ -173,7 +173,6 @@  struct mlx5_rxq_ctrl {
 /* RX queue private data. */
 struct mlx5_rxq_priv {
 	uint16_t idx; /* Queue index. */
-	bool possessor; /* Shared rxq_ctrl allocated for the 1st time. */
 	RTE_ATOMIC(uint32_t) refcnt; /* Reference counter. */
 	struct mlx5_rxq_ctrl *ctrl; /* Shared Rx Queue. */
 	LIST_ENTRY(mlx5_rxq_priv) owner_entry; /* Entry in shared rxq_ctrl. */
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index 5eac224b76..d437835b73 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -946,7 +946,6 @@  mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 			rte_errno = ENOMEM;
 			return -rte_errno;
 		}
-		rxq->possessor = true;
 	}
 	rxq->priv = priv;
 	rxq->idx = idx;
@@ -954,6 +953,7 @@  mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 	/* Join owner list. */
 	LIST_INSERT_HEAD(&rxq_ctrl->owners, rxq, owner_entry);
 	rxq->ctrl = rxq_ctrl;
+	rte_atomic_fetch_add_explicit(&rxq_ctrl->ctrl_ref, 1, rte_memory_order_relaxed);
 	mlx5_rxq_ref(dev, idx);
 	DRV_LOG(DEBUG, "port %u adding Rx queue %u to list",
 		dev->data->port_id, idx);
@@ -1970,9 +1970,9 @@  mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
 		tmpl->rxq.shared = 1;
 		tmpl->share_group = conf->share_group;
 		tmpl->share_qid = conf->share_qid;
-		LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
 	}
-	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
+	rte_atomic_store_explicit(&tmpl->ctrl_ref, 1, rte_memory_order_relaxed);
 	return tmpl;
 error:
 	mlx5_mr_btree_free(&tmpl->rxq.mr_ctrl.cache_bh);
@@ -2024,9 +2024,9 @@  mlx5_rxq_hairpin_new(struct rte_eth_dev *dev, struct mlx5_rxq_priv *rxq,
 	tmpl->rxq.mr_ctrl.cache_bh = (struct mlx5_mr_btree) { 0 };
 	tmpl->rxq.idx = idx;
 	rxq->hairpin_conf = *hairpin_conf;
-	rxq->possessor = true;
 	mlx5_rxq_ref(dev, idx);
-	LIST_INSERT_HEAD(&priv->rxqsctrl, tmpl, next);
+	LIST_INSERT_HEAD(&priv->sh->shared_rxqs, tmpl, share_entry);
+	rte_atomic_store_explicit(&tmpl->ctrl_ref, 1, rte_memory_order_relaxed);
 	return tmpl;
 }
 
@@ -2292,16 +2292,16 @@  mlx5_rxq_release(struct rte_eth_dev *dev, uint16_t idx)
 					RTE_ETH_QUEUE_STATE_STOPPED;
 		}
 	} else { /* Refcnt zero, closing device. */
-		if (rxq->possessor)
-			LIST_REMOVE(rxq_ctrl, next);
 		LIST_REMOVE(rxq, owner_entry);
 		if (LIST_EMPTY(&rxq_ctrl->owners)) {
 			if (!rxq_ctrl->is_hairpin)
 				mlx5_mr_btree_free
 					(&rxq_ctrl->rxq.mr_ctrl.cache_bh);
-			if (rxq_ctrl->rxq.shared)
+			if (rte_atomic_fetch_sub_explicit(&rxq_ctrl->ctrl_ref, 1,
+			    rte_memory_order_relaxed) == 1) {
 				LIST_REMOVE(rxq_ctrl, share_entry);
-			mlx5_free(rxq_ctrl);
+				mlx5_free(rxq_ctrl);
+			}
 		}
 		dev->data->rx_queues[idx] = NULL;
 		mlx5_free(rxq);
@@ -2326,7 +2326,7 @@  mlx5_rxq_verify(struct rte_eth_dev *dev)
 	struct mlx5_rxq_ctrl *rxq_ctrl;
 	int ret = 0;
 
-	LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
+	LIST_FOREACH(rxq_ctrl, &priv->sh->shared_rxqs, share_entry) {
 		DRV_LOG(DEBUG, "port %u Rx Queue %u still referenced",
 			dev->data->port_id, rxq_ctrl->rxq.idx);
 		++ret;