[v3,15/18] common/mlx5: share MR top-half search function

Message ID 20211019205602.3188203-16-michaelba@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series mlx5: sharing global MR cache between drivers |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Michael Baum Oct. 19, 2021, 8:55 p.m. UTC
From: Michael Baum <michaelba@oss.nvidia.com>

Add function to search in local liniar cache and use it in the drivers
instead of their functions.

Signed-off-by: Michael Baum <michaelba@oss.nvidia.com>
Acked-by: Matan Azrad <matan@nvidia.com>
---
 drivers/common/mlx5/mlx5_common.h        |  9 ++++
 drivers/common/mlx5/mlx5_common_mr.c     | 52 ++++++++++++++++++++++++
 drivers/common/mlx5/version.map          |  1 +
 drivers/compress/mlx5/mlx5_compress.c    | 38 +----------------
 drivers/crypto/mlx5/mlx5_crypto.c        | 38 +----------------
 drivers/regex/mlx5/mlx5_regex_fastpath.c | 34 +++++-----------
 6 files changed, 77 insertions(+), 95 deletions(-)
  

Patch

diff --git a/drivers/common/mlx5/mlx5_common.h b/drivers/common/mlx5/mlx5_common.h
index a863fb2b26..8df4f32aa2 100644
--- a/drivers/common/mlx5/mlx5_common.h
+++ b/drivers/common/mlx5/mlx5_common.h
@@ -22,6 +22,7 @@ 
 #include "mlx5_prm.h"
 #include "mlx5_devx_cmds.h"
 #include "mlx5_common_os.h"
+#include "mlx5_common_mr.h"
 
 /* Reported driver name. */
 #define MLX5_PCI_DRIVER_NAME "mlx5_pci"
@@ -447,6 +448,14 @@  __rte_internal
 bool
 mlx5_dev_is_pci(const struct rte_device *dev);
 
+/* mlx5_common_mr.c */
+
+__rte_internal
+uint32_t
+mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
+	      struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf,
+	      struct mlx5_mr_share_cache *share_cache);
+
 /* mlx5_common_os.c */
 
 int mlx5_os_open_device(struct mlx5_common_device *cdev, uint32_t classes);
diff --git a/drivers/common/mlx5/mlx5_common_mr.c b/drivers/common/mlx5/mlx5_common_mr.c
index 43dc3d88ce..4de1c25f2a 100644
--- a/drivers/common/mlx5/mlx5_common_mr.c
+++ b/drivers/common/mlx5/mlx5_common_mr.c
@@ -576,6 +576,8 @@  mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
  *
  * @param pd
  *   Pointer to pd of a device (net, regex, vdpa,...).
+ * @param mp_id
+ *   Multi-process identifier, may be NULL for the primary process.
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param[out] entry
@@ -867,6 +869,8 @@  mlx5_mr_create_primary(void *pd,
  *
  * @param pd
  *   Pointer to pd handle of a device (net, regex, vdpa,...).
+ * @param mp_id
+ *   Multi-process identifier, may be NULL for the primary process.
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param[out] entry
@@ -874,6 +878,8 @@  mlx5_mr_create_primary(void *pd,
  *   created. If failed to create one, this will not be updated.
  * @param addr
  *   Target virtual address to register.
+ * @param mr_ext_memseg_en
+ *   Configurable flag about external memory segment enable or not.
  *
  * @return
  *   Searched LKey on success, UINT32_MAX on failure and rte_errno is set.
@@ -907,6 +913,8 @@  mlx5_mr_create(void *pd, struct mlx5_mp_id *mp_id,
  *
  * @param pd
  *   Pointer to pd of a device (net, regex, vdpa,...).
+ * @param mp_id
+ *   Multi-process identifier, may be NULL for the primary process.
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param mr_ctrl
@@ -916,6 +924,8 @@  mlx5_mr_create(void *pd, struct mlx5_mp_id *mp_id,
  *   created. If failed to create one, this is not written.
  * @param addr
  *   Search key.
+ * @param mr_ext_memseg_en
+ *   Configurable flag about external memory segment enable or not.
  *
  * @return
  *   Searched LKey on success, UINT32_MAX on no match.
@@ -971,12 +981,16 @@  mr_lookup_caches(void *pd, struct mlx5_mp_id *mp_id,
  *
  * @param pd
  *   Pointer to pd of a device (net, regex, vdpa,...).
+ * @param mp_id
+ *   Multi-process identifier, may be NULL for the primary process.
  * @param share_cache
  *   Pointer to a global shared MR cache.
  * @param mr_ctrl
  *   Pointer to per-queue MR control structure.
  * @param addr
  *   Search key.
+ * @param mr_ext_memseg_en
+ *   Configurable flag about external memory segment enable or not.
  *
  * @return
  *   Searched LKey on success, UINT32_MAX on no match.
@@ -1822,3 +1836,41 @@  mlx5_mr_mempool2mr_bh(struct mlx5_mr_share_cache *share_cache,
 	mr_ctrl->head = (mr_ctrl->head + 1) % MLX5_MR_CACHE_N;
 	return lkey;
 }
+
+/**
+ * Query LKey from a packet buffer.
+ *
+ * @param cdev
+ *   Pointer to the mlx5 device structure.
+ * @param mp_id
+ *   Multi-process identifier, may be NULL for the primary process.
+ * @param mr_ctrl
+ *   Pointer to per-queue MR control structure.
+ * @param mbuf
+ *   Pointer to mbuf.
+ * @param share_cache
+ *   Pointer to a global shared MR cache.
+ *
+ * @return
+ *   Searched LKey on success, UINT32_MAX on no match.
+ */
+uint32_t
+mlx5_mr_mb2mr(struct mlx5_common_device *cdev, struct mlx5_mp_id *mp_id,
+	      struct mlx5_mr_ctrl *mr_ctrl, struct rte_mbuf *mbuf,
+	      struct mlx5_mr_share_cache *share_cache)
+{
+	uint32_t lkey;
+	uintptr_t addr = (uintptr_t)mbuf->buf_addr;
+
+	/* Check generation bit to see if there's any change on existing MRs. */
+	if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
+		mlx5_mr_flush_local_cache(mr_ctrl);
+	/* Linear search on MR cache array. */
+	lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
+				   MLX5_MR_CACHE_N, (uintptr_t)mbuf->buf_addr);
+	if (likely(lkey != UINT32_MAX))
+		return lkey;
+	/* Take slower bottom-half on miss. */
+	return mlx5_mr_addr2mr_bh(cdev->pd, mp_id, share_cache, mr_ctrl,
+				  addr, cdev->config.mr_ext_memseg_en);
+}
diff --git a/drivers/common/mlx5/version.map b/drivers/common/mlx5/version.map
index abe5c12cd8..292c5ede89 100644
--- a/drivers/common/mlx5/version.map
+++ b/drivers/common/mlx5/version.map
@@ -118,6 +118,7 @@  INTERNAL {
 	mlx5_mr_insert_cache;
 	mlx5_mr_lookup_cache;
 	mlx5_mr_lookup_list;
+	mlx5_mr_mb2mr;
 	mlx5_free_mr_by_addr;
 	mlx5_mr_rebuild_cache;
 	mlx5_mr_release_cache;
diff --git a/drivers/compress/mlx5/mlx5_compress.c b/drivers/compress/mlx5/mlx5_compress.c
index c36db0c062..a5cec27894 100644
--- a/drivers/compress/mlx5/mlx5_compress.c
+++ b/drivers/compress/mlx5/mlx5_compress.c
@@ -435,40 +435,6 @@  static struct rte_compressdev_ops mlx5_compress_ops = {
 	.stream_free		= NULL,
 };
 
-/**
- * Query LKey from a packet buffer for QP. If not found, add the mempool.
- *
- * @param priv
- *   Pointer to the priv object.
- * @param addr
- *   Search key.
- * @param mr_ctrl
- *   Pointer to per-queue MR control structure.
- * @param ol_flags
- *   Mbuf offload features.
- *
- * @return
- *   Searched LKey on success, UINT32_MAX on no match.
- */
-static __rte_always_inline uint32_t
-mlx5_compress_addr2mr(struct mlx5_compress_priv *priv, uintptr_t addr,
-		      struct mlx5_mr_ctrl *mr_ctrl, uint64_t ol_flags)
-{
-	uint32_t lkey;
-
-	/* Check generation bit to see if there's any change on existing MRs. */
-	if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
-		mlx5_mr_flush_local_cache(mr_ctrl);
-	/* Linear search on MR cache array. */
-	lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
-				   MLX5_MR_CACHE_N, addr);
-	if (likely(lkey != UINT32_MAX))
-		return lkey;
-	/* Take slower bottom-half on miss. */
-	return mlx5_mr_addr2mr_bh(priv->cdev->pd, 0, &priv->mr_scache, mr_ctrl,
-				  addr, !!(ol_flags & EXT_ATTACHED_MBUF));
-}
-
 static __rte_always_inline uint32_t
 mlx5_compress_dseg_set(struct mlx5_compress_qp *qp,
 		       volatile struct mlx5_wqe_dseg *restrict dseg,
@@ -478,8 +444,8 @@  mlx5_compress_dseg_set(struct mlx5_compress_qp *qp,
 	uintptr_t addr = rte_pktmbuf_mtod_offset(mbuf, uintptr_t, offset);
 
 	dseg->bcount = rte_cpu_to_be_32(len);
-	dseg->lkey = mlx5_compress_addr2mr(qp->priv, addr, &qp->mr_ctrl,
-					   mbuf->ol_flags);
+	dseg->lkey = mlx5_mr_mb2mr(qp->priv->cdev, 0, &qp->mr_ctrl, mbuf,
+				   &qp->priv->mr_scache);
 	dseg->pbuf = rte_cpu_to_be_64(addr);
 	return dseg->lkey;
 }
diff --git a/drivers/crypto/mlx5/mlx5_crypto.c b/drivers/crypto/mlx5/mlx5_crypto.c
index 6cf6889d21..1105d3fcd5 100644
--- a/drivers/crypto/mlx5/mlx5_crypto.c
+++ b/drivers/crypto/mlx5/mlx5_crypto.c
@@ -303,40 +303,6 @@  mlx5_crypto_get_block_size(struct rte_crypto_op *op)
 	}
 }
 
-/**
- * Query LKey from a packet buffer for QP. If not found, add the mempool.
- *
- * @param priv
- *   Pointer to the priv object.
- * @param addr
- *   Search key.
- * @param mr_ctrl
- *   Pointer to per-queue MR control structure.
- * @param ol_flags
- *   Mbuf offload features.
- *
- * @return
- *   Searched LKey on success, UINT32_MAX on no match.
- */
-static __rte_always_inline uint32_t
-mlx5_crypto_addr2mr(struct mlx5_crypto_priv *priv, uintptr_t addr,
-		    struct mlx5_mr_ctrl *mr_ctrl, uint64_t ol_flags)
-{
-	uint32_t lkey;
-
-	/* Check generation bit to see if there's any change on existing MRs. */
-	if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
-		mlx5_mr_flush_local_cache(mr_ctrl);
-	/* Linear search on MR cache array. */
-	lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
-				   MLX5_MR_CACHE_N, addr);
-	if (likely(lkey != UINT32_MAX))
-		return lkey;
-	/* Take slower bottom-half on miss. */
-	return mlx5_mr_addr2mr_bh(priv->cdev->pd, 0, &priv->mr_scache, mr_ctrl,
-				  addr, !!(ol_flags & EXT_ATTACHED_MBUF));
-}
-
 static __rte_always_inline uint32_t
 mlx5_crypto_klm_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
 		      struct rte_mbuf *mbuf, struct mlx5_wqe_dseg *klm,
@@ -350,8 +316,8 @@  mlx5_crypto_klm_set(struct mlx5_crypto_priv *priv, struct mlx5_crypto_qp *qp,
 	*remain -= data_len;
 	klm->bcount = rte_cpu_to_be_32(data_len);
 	klm->pbuf = rte_cpu_to_be_64(addr);
-	klm->lkey = mlx5_crypto_addr2mr(priv, addr, &qp->mr_ctrl,
-					mbuf->ol_flags);
+	klm->lkey = mlx5_mr_mb2mr(priv->cdev, 0, &qp->mr_ctrl, mbuf,
+				  &priv->mr_scache);
 	return klm->lkey;
 
 }
diff --git a/drivers/regex/mlx5/mlx5_regex_fastpath.c b/drivers/regex/mlx5/mlx5_regex_fastpath.c
index 575b639752..8817e2e074 100644
--- a/drivers/regex/mlx5/mlx5_regex_fastpath.c
+++ b/drivers/regex/mlx5/mlx5_regex_fastpath.c
@@ -123,26 +123,12 @@  set_wqe_ctrl_seg(struct mlx5_wqe_ctrl_seg *seg, uint16_t pi, uint8_t opcode,
  *   Searched LKey on success, UINT32_MAX on no match.
  */
 static inline uint32_t
-mlx5_regex_addr2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl,
-		   struct rte_mbuf *mbuf)
+mlx5_regex_mb2mr(struct mlx5_regex_priv *priv, struct mlx5_mr_ctrl *mr_ctrl,
+		 struct rte_mbuf *mbuf)
 {
-	uintptr_t addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
-	uint32_t lkey;
-
-	/* Check generation bit to see if there's any change on existing MRs. */
-	if (unlikely(*mr_ctrl->dev_gen_ptr != mr_ctrl->cur_gen))
-		mlx5_mr_flush_local_cache(mr_ctrl);
-	/* Linear search on MR cache array. */
-	lkey = mlx5_mr_lookup_lkey(mr_ctrl->cache, &mr_ctrl->mru,
-				   MLX5_MR_CACHE_N, addr);
-	if (likely(lkey != UINT32_MAX))
-		return lkey;
-	/* Take slower bottom-half on miss. */
-	return mlx5_mr_addr2mr_bh(priv->cdev->pd, 0, &priv->mr_scache, mr_ctrl,
-				  addr, !!(mbuf->ol_flags & EXT_ATTACHED_MBUF));
+	return mlx5_mr_mb2mr(priv->cdev, 0, mr_ctrl, mbuf, &priv->mr_scache);
 }
 
-
 static inline void
 __prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_hw_qp *qp_obj,
 	   struct rte_regex_ops *op, struct mlx5_regex_job *job,
@@ -194,7 +180,7 @@  prep_one(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 	struct mlx5_klm klm;
 
 	klm.byte_count = rte_pktmbuf_data_len(op->mbuf);
-	klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, op->mbuf);
+	klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, op->mbuf);
 	klm.address = rte_pktmbuf_mtod(op->mbuf, uintptr_t);
 	__prep_one(priv, qp_obj, op, job, qp_obj->pi, &klm);
 	qp_obj->db_pi = qp_obj->pi;
@@ -317,6 +303,7 @@  prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 	uint32_t len = 0;
 	struct mlx5_klm *mkey_klm = NULL;
 	struct mlx5_klm klm;
+	uintptr_t addr;
 
 	while (left_ops--)
 		rte_prefetch0(op[left_ops]);
@@ -360,11 +347,12 @@  prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 			klm.mkey = rte_cpu_to_be_32
 					(qp->jobs[mkey_job_id].imkey->id);
 			while (mbuf) {
+				addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
 				/* Build indirect mkey seg's KLM. */
-				mkey_klm->mkey = mlx5_regex_addr2mr
-						(priv, &qp->mr_ctrl, mbuf);
-				mkey_klm->address = rte_cpu_to_be_64
-					(rte_pktmbuf_mtod(mbuf, uintptr_t));
+				mkey_klm->mkey = mlx5_regex_mb2mr(priv,
+								  &qp->mr_ctrl,
+								  mbuf);
+				mkey_klm->address = rte_cpu_to_be_64(addr);
 				mkey_klm->byte_count = rte_cpu_to_be_32
 						(rte_pktmbuf_data_len(mbuf));
 				/*
@@ -380,7 +368,7 @@  prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
 			klm.byte_count = scatter_size;
 		} else {
 			/* The single mubf case. Build the KLM directly. */
-			klm.mkey = mlx5_regex_addr2mr(priv, &qp->mr_ctrl, mbuf);
+			klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, mbuf);
 			klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);
 			klm.byte_count = rte_pktmbuf_data_len(mbuf);
 		}