common/mlx5: fix mempool registration

Message ID 20211117184936.2581314-1-dkozlyuk@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Raslan Darawsheh
Headers
Series common/mlx5: fix mempool registration |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation warning apply issues
ci/iol-testing warning apply patch failure

Commit Message

Dmitry Kozlyuk Nov. 17, 2021, 6:49 p.m. UTC
  Mempool registration was not correctly processing
mempools with RTE_PKTMBUF_F_PINEND_EXT_BUF flag set
("pinned mempools" for short), because it is not known
at registration time whether the mempool is a pktmbuf one,
and its elements may not yet be initialized to analyze them.
Attempts had been made to recognize such pools,
but there was no robust solution, only the owner of a mempool
(the application or a device) knows its type.
This patch extends common/mlx5 registration code
to accept a hint that the mempool is a pinned one
and uses this capability from net/mlx5 driver.

1. Remove all code assuming pktmbuf pool type
   or trying to recognize the type of a pool.
2. Register pinned mempools used for Rx
   and their external memory on port start.
3. Change Tx slow path logic as follows:
   3.1. Search the mempool database for a memory region (MR)
        by the mbuf pool and its buffer address.
   3.2. If not MR for the address is found for the mempool,
	and the mempool contains only pinned external buffers,
	perform the mempool registration of the mempool
	and its external pinned memory.
   3.3. Fall back to using page-based MRs in other cases
	(for example, a buffer with externally attached memory,
	but not from a pinned mempool).

Fixes: 690b2a88c2f7 ("common/mlx5: add mempool registration facilities")
Fixes: fec28ca0e3a9 ("net/mlx5: support mempool registration")

Signed-off-by: Dmitry Kozlyuk <dkozlyuk@nvidia.com>
Reviewed-by: Matan Azrad <matan@nvidia.com>
---
Applies to next-net-mlx.

 drivers/common/mlx5/mlx5_common.c    |  11 +--
 drivers/common/mlx5/mlx5_common_mp.c |   4 +-
 drivers/common/mlx5/mlx5_common_mp.h |  10 ++-
 drivers/common/mlx5/mlx5_common_mr.c | 109 ++++++++++++++++-----------
 drivers/common/mlx5/mlx5_common_mr.h |  12 +--
 drivers/net/mlx5/linux/mlx5_mp_os.c  |   3 +-
 drivers/net/mlx5/mlx5_rxq.c          |   2 +-
 drivers/net/mlx5/mlx5_trigger.c      |   7 +-
 8 files changed, 88 insertions(+), 70 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_common.c b/drivers/common/mlx5/mlx5_common.c
index 66c2c08b7d..f1650f94c6 100644
--- a/drivers/common/mlx5/mlx5_common.c
+++ b/drivers/common/mlx5/mlx5_common.c
@@ -317,9 +317,9 @@  mlx5_dev_to_pci_str(const struct rte_device *dev, char *addr, size_t size)
  */
 static int
 mlx5_dev_mempool_register(struct mlx5_common_device *cdev,
-			  struct rte_mempool *mp)
+			  struct rte_mempool *mp, bool is_extmem)
 {
-	return mlx5_mr_mempool_register(cdev, mp);
+	return mlx5_mr_mempool_register(cdev, mp, is_extmem);
 }
 
 /**
@@ -353,7 +353,7 @@  mlx5_dev_mempool_register_cb(struct rte_mempool *mp, void *arg)
 	struct mlx5_common_device *cdev = arg;
 	int ret;
 
-	ret = mlx5_dev_mempool_register(cdev, mp);
+	ret = mlx5_dev_mempool_register(cdev, mp, false);
 	if (ret < 0 && rte_errno != EEXIST)
 		DRV_LOG(ERR,
 			"Failed to register existing mempool %s for PD %p: %s",
@@ -390,13 +390,10 @@  mlx5_dev_mempool_event_cb(enum rte_mempool_event event, struct rte_mempool *mp,
 			  void *arg)
 {
 	struct mlx5_common_device *cdev = arg;
-	bool extmem = mlx5_mempool_is_extmem(mp);
 
 	switch (event) {
 	case RTE_MEMPOOL_EVENT_READY:
-		if (extmem)
-			break;
-		if (mlx5_dev_mempool_register(cdev, mp) < 0)
+		if (mlx5_dev_mempool_register(cdev, mp, false) < 0)
 			DRV_LOG(ERR,
 				"Failed to register new mempool %s for PD %p: %s",
 				mp->name, cdev->pd, rte_strerror(rte_errno));
diff --git a/drivers/common/mlx5/mlx5_common_mp.c b/drivers/common/mlx5/mlx5_common_mp.c
index 536d61f66c..a7a671b7c5 100644
--- a/drivers/common/mlx5/mlx5_common_mp.c
+++ b/drivers/common/mlx5/mlx5_common_mp.c
@@ -65,7 +65,8 @@  mlx5_mp_req_mr_create(struct mlx5_common_device *cdev, uintptr_t addr)
  */
 int
 mlx5_mp_req_mempool_reg(struct mlx5_common_device *cdev,
-			struct rte_mempool *mempool, bool reg)
+			struct rte_mempool *mempool, bool reg,
+			bool is_extmem)
 {
 	struct rte_mp_msg mp_req;
 	struct rte_mp_msg *mp_res;
@@ -82,6 +83,7 @@  mlx5_mp_req_mempool_reg(struct mlx5_common_device *cdev,
 		     MLX5_MP_REQ_MEMPOOL_UNREGISTER;
 	mp_init_port_agnostic_msg(&mp_req, type);
 	arg->mempool = mempool;
+	arg->is_extmem = is_extmem;
 	arg->cdev = cdev;
 	ret = rte_mp_request_sync(&mp_req, &mp_rep, &ts);
 	if (ret) {
diff --git a/drivers/common/mlx5/mlx5_common_mp.h b/drivers/common/mlx5/mlx5_common_mp.h
index b1e3a41a20..4599ba8f92 100644
--- a/drivers/common/mlx5/mlx5_common_mp.h
+++ b/drivers/common/mlx5/mlx5_common_mp.h
@@ -37,9 +37,12 @@  struct mlx5_mp_arg_queue_id {
 
 struct mlx5_mp_arg_mr_manage {
 	struct mlx5_common_device *cdev;
+	RTE_STD_C11
 	union {
-		struct rte_mempool *mempool;
-		/* MLX5_MP_REQ_MEMPOOL_(UN)REGISTER */
+		struct {
+			struct rte_mempool *mempool;
+			bool is_extmem;
+		}; /* MLX5_MP_REQ_MEMPOOL_(UN)REGISTER */
 		uintptr_t addr; /* MLX5_MP_REQ_CREATE_MR */
 	};
 };
@@ -134,7 +137,8 @@  __rte_internal
 int mlx5_mp_req_mr_create(struct mlx5_common_device *cdev, uintptr_t addr);
 __rte_internal
 int mlx5_mp_req_mempool_reg(struct mlx5_common_device *cdev,
-			    struct rte_mempool *mempool, bool reg);
+			    struct rte_mempool *mempool, bool reg,
+			    bool is_extmem);
 __rte_internal
 int mlx5_mp_req_queue_state_modify(struct mlx5_mp_id *mp_id,
 				   struct mlx5_mp_arg_queue_state_modify *sm);
diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
index a7a499f6f9..498fa7513f 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -47,6 +47,8 @@  struct mlx5_mempool_reg {
 	struct mlx5_mempool_mr *mrs;
 	/** Number of memory regions. */
 	unsigned int mrs_n;
+	/** Whether the MR were created for external pinned memory. */
+	bool is_extmem;
 };
 
 void
@@ -1400,6 +1402,8 @@  mlx5_mempool_get_extmem(struct rte_mempool *mp, struct mlx5_range **out,
  *
  * @param[in] mp
  *   Analyzed mempool.
+ * @param[in] is_extmem
+ *   Whether the pool is contains only external pinned buffers.
  * @param[out] out
  *   Receives the ranges, caller must release it with free().
  * @param[out] ount_n
@@ -1409,17 +1413,16 @@  mlx5_mempool_get_extmem(struct rte_mempool *mp, struct mlx5_range **out,
  *   0 on success, (-1) on failure.
  */
 static int
-mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out,
-			unsigned int *out_n)
+mlx5_get_mempool_ranges(struct rte_mempool *mp, bool is_extmem,
+			struct mlx5_range **out, unsigned int *out_n)
 {
 	struct mlx5_range *chunks;
 	unsigned int chunks_n, contig_n, i;
 	int ret;
 
 	/* Collect the pool underlying memory. */
-	ret = mlx5_mempool_is_extmem(mp) ?
-	      mlx5_mempool_get_extmem(mp, &chunks, &chunks_n) :
-	      mlx5_mempool_get_chunks(mp, &chunks, &chunks_n);
+	ret = is_extmem ? mlx5_mempool_get_extmem(mp, &chunks, &chunks_n) :
+			  mlx5_mempool_get_chunks(mp, &chunks, &chunks_n);
 	if (ret < 0)
 		return ret;
 	/* Merge adjacent chunks and place them at the beginning. */
@@ -1443,6 +1446,8 @@  mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out,
  *
  * @param[in] mp
  *   Mempool to analyze.
+ * @param[in] is_extmem
+ *   Whether the pool is contains only external pinned buffers.
  * @param[out] out
  *   Receives memory ranges to register, aligned to the system page size.
  *   The caller must release them with free().
@@ -1455,14 +1460,15 @@  mlx5_get_mempool_ranges(struct rte_mempool *mp, struct mlx5_range **out,
  *   0 on success, (-1) on failure.
  */
 static int
-mlx5_mempool_reg_analyze(struct rte_mempool *mp, struct mlx5_range **out,
-			 unsigned int *out_n, bool *share_hugepage)
+mlx5_mempool_reg_analyze(struct rte_mempool *mp, bool is_extmem,
+			 struct mlx5_range **out, unsigned int *out_n,
+			 bool *share_hugepage)
 {
 	struct mlx5_range *ranges = NULL;
 	unsigned int i, ranges_n = 0;
 	struct rte_memseg_list *msl;
 
-	if (mlx5_get_mempool_ranges(mp, &ranges, &ranges_n) < 0) {
+	if (mlx5_get_mempool_ranges(mp, is_extmem, &ranges, &ranges_n) < 0) {
 		DRV_LOG(ERR, "Cannot get address ranges for mempool %s",
 			mp->name);
 		return -1;
@@ -1504,7 +1510,8 @@  mlx5_mempool_reg_analyze(struct rte_mempool *mp, struct mlx5_range **out,
 
 /** Create a registration object for the mempool. */
 static struct mlx5_mempool_reg *
-mlx5_mempool_reg_create(struct rte_mempool *mp, unsigned int mrs_n)
+mlx5_mempool_reg_create(struct rte_mempool *mp, unsigned int mrs_n,
+			bool is_extmem)
 {
 	struct mlx5_mempool_reg *mpr = NULL;
 
@@ -1519,6 +1526,7 @@  mlx5_mempool_reg_create(struct rte_mempool *mp, unsigned int mrs_n)
 	mpr->mp = mp;
 	mpr->mrs = (struct mlx5_mempool_mr *)(mpr + 1);
 	mpr->mrs_n = mrs_n;
+	mpr->is_extmem = is_extmem;
 	return mpr;
 }
 
@@ -1583,31 +1591,32 @@  mlx5_mempool_reg_detach(struct mlx5_mempool_reg *mpr)
 
 static int
 mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache,
-				 void *pd, struct rte_mempool *mp)
+				 void *pd, struct rte_mempool *mp,
+				 bool is_extmem)
 {
 	struct mlx5_range *ranges = NULL;
-	struct mlx5_mempool_reg *mpr, *new_mpr;
+	struct mlx5_mempool_reg *mpr, *old_mpr, *new_mpr;
 	unsigned int i, ranges_n;
-	bool share_hugepage;
+	bool share_hugepage, standalone = false;
 	int ret = -1;
 
 	/* Early check to avoid unnecessary creation of MRs. */
 	rte_rwlock_read_lock(&share_cache->rwlock);
-	mpr = mlx5_mempool_reg_lookup(share_cache, mp);
+	old_mpr = mlx5_mempool_reg_lookup(share_cache, mp);
 	rte_rwlock_read_unlock(&share_cache->rwlock);
-	if (mpr != NULL) {
+	if (old_mpr != NULL && (!is_extmem || old_mpr->is_extmem)) {
 		DRV_LOG(DEBUG, "Mempool %s is already registered for PD %p",
 			mp->name, pd);
 		rte_errno = EEXIST;
 		goto exit;
 	}
-	if (mlx5_mempool_reg_analyze(mp, &ranges, &ranges_n,
+	if (mlx5_mempool_reg_analyze(mp, is_extmem, &ranges, &ranges_n,
 				     &share_hugepage) < 0) {
 		DRV_LOG(ERR, "Cannot get mempool %s memory ranges", mp->name);
 		rte_errno = ENOMEM;
 		goto exit;
 	}
-	new_mpr = mlx5_mempool_reg_create(mp, ranges_n);
+	new_mpr = mlx5_mempool_reg_create(mp, ranges_n, is_extmem);
 	if (new_mpr == NULL) {
 		DRV_LOG(ERR,
 			"Cannot create a registration object for mempool %s in PD %p",
@@ -1667,6 +1676,12 @@  mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache,
 	/* Concurrent registration is not supposed to happen. */
 	rte_rwlock_write_lock(&share_cache->rwlock);
 	mpr = mlx5_mempool_reg_lookup(share_cache, mp);
+	if (mpr == old_mpr && old_mpr != NULL) {
+		LIST_REMOVE(old_mpr, next);
+		standalone = mlx5_mempool_reg_detach(mpr);
+		/* No need to flush the cache: old MRs cannot be in use. */
+		mpr = NULL;
+	}
 	if (mpr == NULL) {
 		mlx5_mempool_reg_attach(new_mpr);
 		LIST_INSERT_HEAD(&share_cache->mempool_reg_list, new_mpr, next);
@@ -1679,6 +1694,10 @@  mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache,
 		mlx5_mempool_reg_destroy(share_cache, new_mpr, true);
 		rte_errno = EEXIST;
 		goto exit;
+	} else if (old_mpr != NULL) {
+		DRV_LOG(DEBUG, "Mempool %s registration for PD %p updated for external memory",
+			mp->name, pd);
+		mlx5_mempool_reg_destroy(share_cache, old_mpr, standalone);
 	}
 exit:
 	free(ranges);
@@ -1687,9 +1706,9 @@  mlx5_mr_mempool_register_primary(struct mlx5_mr_share_cache *share_cache,
 
 static int
 mlx5_mr_mempool_register_secondary(struct mlx5_common_device *cdev,
-				   struct rte_mempool *mp)
+				   struct rte_mempool *mp, bool is_extmem)
 {
-	return mlx5_mp_req_mempool_reg(cdev, mp, true);
+	return mlx5_mp_req_mempool_reg(cdev, mp, true, is_extmem);
 }
 
 /**
@@ -1705,16 +1724,17 @@  mlx5_mr_mempool_register_secondary(struct mlx5_common_device *cdev,
  */
 int
 mlx5_mr_mempool_register(struct mlx5_common_device *cdev,
-			 struct rte_mempool *mp)
+			 struct rte_mempool *mp, bool is_extmem)
 {
 	if (mp->flags & RTE_MEMPOOL_F_NON_IO)
 		return 0;
 	switch (rte_eal_process_type()) {
 	case RTE_PROC_PRIMARY:
 		return mlx5_mr_mempool_register_primary(&cdev->mr_scache,
-							cdev->pd, mp);
+							cdev->pd, mp,
+							is_extmem);
 	case RTE_PROC_SECONDARY:
-		return mlx5_mr_mempool_register_secondary(cdev, mp);
+		return mlx5_mr_mempool_register_secondary(cdev, mp, is_extmem);
 	default:
 		return -1;
 	}
@@ -1753,7 +1773,7 @@  static int
 mlx5_mr_mempool_unregister_secondary(struct mlx5_common_device *cdev,
 				     struct rte_mempool *mp)
 {
-	return mlx5_mp_req_mempool_reg(cdev, mp, false);
+	return mlx5_mp_req_mempool_reg(cdev, mp, false, false /* is_extmem */);
 }
 
 /**
@@ -1910,32 +1930,33 @@  mlx5_mr_mempool2mr_bh(struct mlx5_mr_share_cache *share_cache,
 uint32_t
 mlx5_mr_mb2mr_bh(struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mb)
 {
+	struct mlx5_mprq_buf *buf;
 	uint32_t lkey;
 	uintptr_t addr = (uintptr_t)mb->buf_addr;
 	struct mlx5_common_device *cdev = mr_ctrl->cdev;
+	struct rte_mempool *mp;
 
-	if (cdev->config.mr_mempool_reg_en) {
-		struct rte_mempool *mp = NULL;
-		struct mlx5_mprq_buf *buf;
-
-		if (!RTE_MBUF_HAS_EXTBUF(mb)) {
-			mp = mlx5_mb2mp(mb);
-		} else if (mb->shinfo->free_cb == mlx5_mprq_buf_free_cb) {
-			/* Recover MPRQ mempool. */
-			buf = mb->shinfo->fcb_opaque;
-			mp = buf->mp;
-		}
-		if (mp != NULL) {
-			lkey = mlx5_mr_mempool2mr_bh(&cdev->mr_scache,
-						     mr_ctrl, mp, addr);
-			/*
-			 * Lookup can only fail on invalid input, e.g. "addr"
-			 * is not from "mp" or "mp" has MEMPOOL_F_NON_IO set.
-			 */
-			if (lkey != UINT32_MAX)
-				return lkey;
-		}
-		/* Fallback for generic mechanism in corner cases. */
+	/* Recover MPRQ mempool. */
+	if (RTE_MBUF_HAS_EXTBUF(mb) &&
+	    mb->shinfo->free_cb == mlx5_mprq_buf_free_cb) {
+		buf = mb->shinfo->fcb_opaque;
+		mp = buf->mp;
+	} else {
+		mp = mlx5_mb2mp(mb);
+	}
+	lkey = mlx5_mr_mempool2mr_bh(&cdev->mr_scache,
+				     mr_ctrl, mp, addr);
+	if (lkey != UINT32_MAX)
+		return lkey;
+	/* Register pinned external memory if the mempool is not used for Rx. */
+	if (cdev->config.mr_mempool_reg_en &&
+	    (rte_pktmbuf_priv_flags(mp) & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF)) {
+		if (mlx5_mr_mempool_register(mr_ctrl->cdev, mp, true) < 0)
+			return UINT32_MAX;
+		lkey = mlx5_mr_mempool2mr_bh(&cdev->mr_scache,
+					     mr_ctrl, mp, addr);
+		MLX5_ASSERT(lkey != UINT32_MAX);
 	}
+	/* Fallback to generic mechanism in corner cases. */
 	return mlx5_mr_addr2mr_bh(mr_ctrl, addr);
 }
diff --git a/drivers/common/mlx5/mlx5_common_mr.h b/drivers/common/mlx5/mlx5_common_mr.h
index 442b9d4694..08035f48ee 100644
--- a/drivers/common/mlx5/mlx5_common_mr.h
+++ b/drivers/common/mlx5/mlx5_common_mr.h
@@ -257,20 +257,10 @@  mlx5_os_set_reg_mr_cb(mlx5_reg_mr_t *reg_mr_cb, mlx5_dereg_mr_t *dereg_mr_cb);
 __rte_internal
 int
 mlx5_mr_mempool_register(struct mlx5_common_device *cdev,
-			 struct rte_mempool *mp);
+			 struct rte_mempool *mp, bool is_extmem);
 __rte_internal
 int
 mlx5_mr_mempool_unregister(struct mlx5_common_device *cdev,
 			   struct rte_mempool *mp);
 
-/** Check if @p mp has buffers pinned in external memory. */
-static inline bool
-mlx5_mempool_is_extmem(struct rte_mempool *mp)
-{
-	return (mp->private_data_size ==
-		sizeof(struct rte_pktmbuf_pool_private)) &&
-	       (mp->elt_size >= sizeof(struct rte_mbuf)) &&
-	       (rte_pktmbuf_priv_flags(mp) & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF);
-}
-
 #endif /* RTE_PMD_MLX5_COMMON_MR_H_ */
diff --git a/drivers/net/mlx5/linux/mlx5_mp_os.c b/drivers/net/mlx5/linux/mlx5_mp_os.c
index edc5203dd6..c448a3e9eb 100644
--- a/drivers/net/mlx5/linux/mlx5_mp_os.c
+++ b/drivers/net/mlx5/linux/mlx5_mp_os.c
@@ -48,7 +48,8 @@  mlx5_mp_os_handle_port_agnostic(const struct rte_mp_msg *mp_msg,
 		return rte_mp_reply(&mp_res, peer);
 	case MLX5_MP_REQ_MEMPOOL_REGISTER:
 		mp_init_port_agnostic_msg(&mp_res, param->type);
-		res->result = mlx5_mr_mempool_register(mng->cdev, mng->mempool);
+		res->result = mlx5_mr_mempool_register(mng->cdev, mng->mempool,
+						       mng->is_extmem);
 		return rte_mp_reply(&mp_res, peer);
 	case MLX5_MP_REQ_MEMPOOL_UNREGISTER:
 		mp_init_port_agnostic_msg(&mp_res, param->type);
diff --git a/drivers/net/mlx5/mlx5_rxq.c b/drivers/net/mlx5/mlx5_rxq.c
index d5a7155392..a8ef21c6f1 100644
--- a/drivers/net/mlx5/mlx5_rxq.c
+++ b/drivers/net/mlx5/mlx5_rxq.c
@@ -1458,7 +1458,7 @@  mlx5_mprq_alloc_mp(struct rte_eth_dev *dev)
 		rte_errno = ENOMEM;
 		return -rte_errno;
 	}
-	ret = mlx5_mr_mempool_register(priv->sh->cdev, mp);
+	ret = mlx5_mr_mempool_register(priv->sh->cdev, mp, false);
 	if (ret < 0 && rte_errno != EEXIST) {
 		ret = rte_errno;
 		DRV_LOG(ERR, "port %u failed to register a mempool for Multi-Packet RQ",
diff --git a/drivers/net/mlx5/mlx5_trigger.c b/drivers/net/mlx5/mlx5_trigger.c
index e2bfde19c7..bafb41d9cf 100644
--- a/drivers/net/mlx5/mlx5_trigger.c
+++ b/drivers/net/mlx5/mlx5_trigger.c
@@ -147,14 +147,17 @@  mlx5_rxq_mempool_register(struct mlx5_rxq_ctrl *rxq_ctrl)
 	}
 	for (s = 0; s < rxq_ctrl->rxq.rxseg_n; s++) {
 		uint32_t flags;
+		bool is_extmem;
 
 		mp = rxq_ctrl->rxq.rxseg[s].mp;
 		flags = mp != rxq_ctrl->rxq.mprq_mp ?
 			rte_pktmbuf_priv_flags(mp) : 0;
-		ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp);
+		is_extmem = (flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) != 0;
+		ret = mlx5_mr_mempool_register(rxq_ctrl->sh->cdev, mp,
+					       is_extmem);
 		if (ret < 0 && rte_errno != EEXIST)
 			return ret;
-		if ((flags & RTE_PKTMBUF_POOL_F_PINNED_EXT_BUF) == 0)
+		if (!is_extmem)
 			rte_mempool_mem_iter(mp, mlx5_rxq_mempool_register_cb,
 					     &rxq_ctrl->rxq);
 	}