[dpdk-dev,v2,3/5] net/mlx5: use buffer address for LKEY search

Message ID 1342e608a5a7c45b7af17e9228d6ce643e7ae40e.1498850005.git.yskoh@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Yongseok Koh June 30, 2017, 7:23 p.m. UTC
  When searching LKEY, if search key is mempool pointer, the 2nd cacheline
has to be accessed and it even requires to check whether a buffer is
indirect per every search. Instead, using address for search key can reduce
cycles taken. And caching the last hit entry is beneficial as well.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
 drivers/net/mlx5/mlx5_mr.c   | 17 ++++++++++++++---
 drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------
 drivers/net/mlx5/mlx5_rxtx.h |  4 +++-
 drivers/net/mlx5/mlx5_txq.c  |  3 +--
 4 files changed, 39 insertions(+), 24 deletions(-)
  

Comments

Nélio Laranjeiro July 3, 2017, 2:06 p.m. UTC | #1
On Fri, Jun 30, 2017 at 12:23:31PM -0700, Yongseok Koh wrote:
> When searching LKEY, if search key is mempool pointer, the 2nd cacheline
> has to be accessed and it even requires to check whether a buffer is
> indirect per every search. Instead, using address for search key can reduce
> cycles taken. And caching the last hit entry is beneficial as well.
> 
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
>  drivers/net/mlx5/mlx5_mr.c   | 17 ++++++++++++++---
>  drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------
>  drivers/net/mlx5/mlx5_rxtx.h |  4 +++-
>  drivers/net/mlx5/mlx5_txq.c  |  3 +--
>  4 files changed, 39 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
> index 0a3638460..287335179 100644
> --- a/drivers/net/mlx5/mlx5_mr.c
> +++ b/drivers/net/mlx5/mlx5_mr.c
> @@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
>  	struct txq_mp2mr_mbuf_check_data data = {
>  		.ret = 0,
>  	};
> +	uintptr_t start;
> +	uintptr_t end;
>  	unsigned int i;
>  
>  	/* Register mempool only if the first element looks like a mbuf. */
>  	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
>  			data.ret == -1)
>  		return;
> +	if (mlx5_check_mempool(mp, &start, &end) != 0) {
> +		ERROR("mempool %p: not virtually contiguous",
> +		      (void *)mp);
> +		return;
> +	}
>  	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
> -		if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {
> +		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
> +
> +		if (unlikely(mr == NULL)) {
>  			/* Unknown MP, add a new MR for it. */
>  			break;
>  		}
> -		if (txq_ctrl->txq.mp2mr[i].mp == mp)
> +		if (start >= (uintptr_t)mr->addr &&
> +		    end <= (uintptr_t)mr->addr + mr->length)
>  			return;
>  	}
>  	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);

 if (start >= (uintptr_t)mr->addr &&
     end <= (uintptr_t)mr->addr + mr->length)

Is this expected to have a memory region bigger than the memory pool
space?  I mean I was expecting to see strict equality in the addresses.

Regards,
  
Yongseok Koh July 3, 2017, 8:54 p.m. UTC | #2
> On Jul 3, 2017, at 7:06 AM, Nélio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:

> 

> On Fri, Jun 30, 2017 at 12:23:31PM -0700, Yongseok Koh wrote:

>> When searching LKEY, if search key is mempool pointer, the 2nd cacheline

>> has to be accessed and it even requires to check whether a buffer is

>> indirect per every search. Instead, using address for search key can reduce

>> cycles taken. And caching the last hit entry is beneficial as well.

>> 

>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>

>> ---

>> drivers/net/mlx5/mlx5_mr.c   | 17 ++++++++++++++---

>> drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------

>> drivers/net/mlx5/mlx5_rxtx.h |  4 +++-

>> drivers/net/mlx5/mlx5_txq.c  |  3 +--

>> 4 files changed, 39 insertions(+), 24 deletions(-)

>> 

>> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c

>> index 0a3638460..287335179 100644

>> --- a/drivers/net/mlx5/mlx5_mr.c

>> +++ b/drivers/net/mlx5/mlx5_mr.c

>> @@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)

>> 	struct txq_mp2mr_mbuf_check_data data = {

>> 		.ret = 0,

>> 	};

>> +	uintptr_t start;

>> +	uintptr_t end;

>> 	unsigned int i;

>> 

>> 	/* Register mempool only if the first element looks like a mbuf. */

>> 	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||

>> 			data.ret == -1)

>> 		return;

>> +	if (mlx5_check_mempool(mp, &start, &end) != 0) {

>> +		ERROR("mempool %p: not virtually contiguous",

>> +		      (void *)mp);

>> +		return;

>> +	}

>> 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {

>> -		if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {

>> +		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;

>> +

>> +		if (unlikely(mr == NULL)) {

>> 			/* Unknown MP, add a new MR for it. */

>> 			break;

>> 		}

>> -		if (txq_ctrl->txq.mp2mr[i].mp == mp)

>> +		if (start >= (uintptr_t)mr->addr &&

>> +		    end <= (uintptr_t)mr->addr + mr->length)

>> 			return;

>> 	}

>> 	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);

> 

> if (start >= (uintptr_t)mr->addr &&

>     end <= (uintptr_t)mr->addr + mr->length)

> 

> Is this expected to have a memory region bigger than the memory pool

> space?  I mean I was expecting to see strict equality in the addresses.

In mlx5_mp2mr(), start/end of a memory region are rounded up to make it
aligned to its hugepage size.

struct ibv_mr *
mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
{
[...]
        /* Round start and end to page boundary if found in memory segments. */
        for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
                uintptr_t addr = (uintptr_t)ms[i].addr;
                size_t len = ms[i].len;
                unsigned int align = ms[i].hugepage_sz;

                if ((start > addr) && (start < addr + len))
                        start = RTE_ALIGN_FLOOR(start, align);
                if ((end > addr) && (end < addr + len))
                        end = RTE_ALIGN_CEIL(end, align);
        }

Thanks,
Yongseok
  
Nélio Laranjeiro July 4, 2017, 6:54 a.m. UTC | #3
On Mon, Jul 03, 2017 at 08:54:43PM +0000, Yongseok Koh wrote:
> 
> > On Jul 3, 2017, at 7:06 AM, Nélio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> > 
> > On Fri, Jun 30, 2017 at 12:23:31PM -0700, Yongseok Koh wrote:
> >> When searching LKEY, if search key is mempool pointer, the 2nd cacheline
> >> has to be accessed and it even requires to check whether a buffer is
> >> indirect per every search. Instead, using address for search key can reduce
> >> cycles taken. And caching the last hit entry is beneficial as well.
> >> 
> >> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> >> ---
> >> drivers/net/mlx5/mlx5_mr.c   | 17 ++++++++++++++---
> >> drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------
> >> drivers/net/mlx5/mlx5_rxtx.h |  4 +++-
> >> drivers/net/mlx5/mlx5_txq.c  |  3 +--
> >> 4 files changed, 39 insertions(+), 24 deletions(-)
> >> 
> >> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
> >> index 0a3638460..287335179 100644
> >> --- a/drivers/net/mlx5/mlx5_mr.c
> >> +++ b/drivers/net/mlx5/mlx5_mr.c
> >> @@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
> >> 	struct txq_mp2mr_mbuf_check_data data = {
> >> 		.ret = 0,
> >> 	};
> >> +	uintptr_t start;
> >> +	uintptr_t end;
> >> 	unsigned int i;
> >> 
> >> 	/* Register mempool only if the first element looks like a mbuf. */
> >> 	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
> >> 			data.ret == -1)
> >> 		return;
> >> +	if (mlx5_check_mempool(mp, &start, &end) != 0) {
> >> +		ERROR("mempool %p: not virtually contiguous",
> >> +		      (void *)mp);
> >> +		return;
> >> +	}
> >> 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
> >> -		if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {
> >> +		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
> >> +
> >> +		if (unlikely(mr == NULL)) {
> >> 			/* Unknown MP, add a new MR for it. */
> >> 			break;
> >> 		}
> >> -		if (txq_ctrl->txq.mp2mr[i].mp == mp)
> >> +		if (start >= (uintptr_t)mr->addr &&
> >> +		    end <= (uintptr_t)mr->addr + mr->length)
> >> 			return;
> >> 	}
> >> 	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
> > 
> > if (start >= (uintptr_t)mr->addr &&
> >     end <= (uintptr_t)mr->addr + mr->length)
> > 
> > Is this expected to have a memory region bigger than the memory pool
> > space?  I mean I was expecting to see strict equality in the addresses.
> In mlx5_mp2mr(), start/end of a memory region are rounded up to make it
> aligned to its hugepage size.
> 
> struct ibv_mr *
> mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
> {
> [...]
>         /* Round start and end to page boundary if found in memory segments. */
>         for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
>                 uintptr_t addr = (uintptr_t)ms[i].addr;
>                 size_t len = ms[i].len;
>                 unsigned int align = ms[i].hugepage_sz;
> 
>                 if ((start > addr) && (start < addr + len))
>                         start = RTE_ALIGN_FLOOR(start, align);
>                 if ((end > addr) && (end < addr + len))
>                         end = RTE_ALIGN_CEIL(end, align);
>         }
> 

Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
  

Patch

diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 0a3638460..287335179 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -207,7 +207,8 @@  txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx)
 			 sizeof(txq_ctrl->txq.mp2mr[0])));
 	}
 	/* Store the new entry. */
-	txq_ctrl->txq.mp2mr[idx].mp = mp;
+	txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
+	txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
 	txq_ctrl->txq.mp2mr[idx].mr = mr;
 	txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey);
 	DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
@@ -265,18 +266,28 @@  txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
 	struct txq_mp2mr_mbuf_check_data data = {
 		.ret = 0,
 	};
+	uintptr_t start;
+	uintptr_t end;
 	unsigned int i;
 
 	/* Register mempool only if the first element looks like a mbuf. */
 	if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
 			data.ret == -1)
 		return;
+	if (mlx5_check_mempool(mp, &start, &end) != 0) {
+		ERROR("mempool %p: not virtually contiguous",
+		      (void *)mp);
+		return;
+	}
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {
+		struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
+
+		if (unlikely(mr == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (txq_ctrl->txq.mp2mr[i].mp == mp)
+		if (start >= (uintptr_t)mr->addr &&
+		    end <= (uintptr_t)mr->addr + mr->length)
 			return;
 	}
 	txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
diff --git a/drivers/net/mlx5/mlx5_rxtx.c b/drivers/net/mlx5/mlx5_rxtx.c
index 66593679f..688ee9028 100644
--- a/drivers/net/mlx5/mlx5_rxtx.c
+++ b/drivers/net/mlx5/mlx5_rxtx.c
@@ -77,7 +77,7 @@  static __rte_always_inline void
 txq_complete(struct txq *txq);
 
 static __rte_always_inline uint32_t
-txq_mp2mr(struct txq *txq, struct rte_mempool *mp);
+txq_mb2mr(struct txq *txq, struct rte_mbuf *mb);
 
 static __rte_always_inline void
 mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe);
@@ -352,7 +352,7 @@  txq_mb2mp(struct rte_mbuf *buf)
 }
 
 /**
- * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
+ * Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
  * Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
  * remove an entry first.
  *
@@ -365,27 +365,30 @@  txq_mb2mp(struct rte_mbuf *buf)
  *   mr->lkey on success, (uint32_t)-1 on failure.
  */
 static inline uint32_t
-txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
+txq_mb2mr(struct txq *txq, struct rte_mbuf *mb)
 {
-	unsigned int i;
-	uint32_t lkey = (uint32_t)-1;
+	uint16_t i = txq->mr_cache_idx;
+	uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
 
+	assert(i < RTE_DIM(txq->mp2mr));
+	if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
+		return txq->mp2mr[i].lkey;
 	for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
-		if (unlikely(txq->mp2mr[i].mp == NULL)) {
+		if (unlikely(txq->mp2mr[i].mr == NULL)) {
 			/* Unknown MP, add a new MR for it. */
 			break;
 		}
-		if (txq->mp2mr[i].mp == mp) {
+		if (txq->mp2mr[i].start <= addr &&
+		    txq->mp2mr[i].end >= addr) {
 			assert(txq->mp2mr[i].lkey != (uint32_t)-1);
 			assert(htonl(txq->mp2mr[i].mr->lkey) ==
 			       txq->mp2mr[i].lkey);
-			lkey = txq->mp2mr[i].lkey;
-			break;
+			txq->mr_cache_idx = i;
+			return txq->mp2mr[i].lkey;
 		}
 	}
-	if (unlikely(lkey == (uint32_t)-1))
-		lkey = txq_mp2mr_reg(txq, mp, i);
-	return lkey;
+	txq->mr_cache_idx = 0;
+	return txq_mp2mr_reg(txq, txq_mb2mp(mb), i);
 }
 
 /**
@@ -770,7 +773,7 @@  mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			naddr = htonll(addr);
 			*dseg = (rte_v128u32_t){
 				htonl(length),
-				txq_mp2mr(txq, txq_mb2mp(buf)),
+				txq_mb2mr(txq, buf),
 				naddr,
 				naddr >> 32,
 			};
@@ -809,7 +812,7 @@  mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 		naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
 		*dseg = (rte_v128u32_t){
 			htonl(length),
-			txq_mp2mr(txq, txq_mb2mp(buf)),
+			txq_mb2mr(txq, buf),
 			naddr,
 			naddr >> 32,
 		};
@@ -1051,7 +1054,7 @@  mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			addr = rte_pktmbuf_mtod(buf, uintptr_t);
 			*dseg = (struct mlx5_wqe_data_seg){
 				.byte_count = htonl(DATA_LEN(buf)),
-				.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+				.lkey = txq_mb2mr(txq, buf),
 				.addr = htonll(addr),
 			};
 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@@ -1297,7 +1300,7 @@  mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
 				addr = rte_pktmbuf_mtod(buf, uintptr_t);
 				*dseg = (struct mlx5_wqe_data_seg){
 					.byte_count = htonl(DATA_LEN(buf)),
-					.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+					.lkey = txq_mb2mr(txq, buf),
 					.addr = htonll(addr),
 				};
 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@@ -1604,7 +1607,7 @@  mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 				addr = rte_pktmbuf_mtod(buf, uintptr_t);
 				*dseg = (struct mlx5_wqe_data_seg){
 					.byte_count = htonl(DATA_LEN(buf)),
-					.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+					.lkey = txq_mb2mr(txq, buf),
 					.addr = htonll(addr),
 				};
 #if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@@ -1687,7 +1690,7 @@  mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
 			naddr = htonll(addr);
 			*dseg = (rte_v128u32_t) {
 				htonl(length),
-				txq_mp2mr(txq, txq_mb2mp(buf)),
+				txq_mb2mr(txq, buf),
 				naddr,
 				naddr >> 32,
 			};
diff --git a/drivers/net/mlx5/mlx5_rxtx.h b/drivers/net/mlx5/mlx5_rxtx.h
index f9b738b4e..51e258a15 100644
--- a/drivers/net/mlx5/mlx5_rxtx.h
+++ b/drivers/net/mlx5/mlx5_rxtx.h
@@ -267,10 +267,12 @@  struct txq {
 	volatile uint32_t *cq_db; /* Completion queue doorbell. */
 	volatile void *bf_reg; /* Blueflame register. */
 	struct {
-		const struct rte_mempool *mp; /* Cached Memory Pool. */
+		uintptr_t start; /* Start address of MR */
+		uintptr_t end; /* End address of MR */
 		struct ibv_mr *mr; /* Memory Region (for mp). */
 		uint32_t lkey; /* htonl(mr->lkey) */
 	} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
+	uint16_t mr_cache_idx; /* Index of last hit entry. */
 	struct rte_mbuf *(*elts)[]; /* TX elements. */
 	struct mlx5_txq_stats stats; /* TX queue counters. */
 } __rte_cache_aligned;
diff --git a/drivers/net/mlx5/mlx5_txq.c b/drivers/net/mlx5/mlx5_txq.c
index f0729a2a8..ac9dfc5f0 100644
--- a/drivers/net/mlx5/mlx5_txq.c
+++ b/drivers/net/mlx5/mlx5_txq.c
@@ -149,9 +149,8 @@  txq_cleanup(struct txq_ctrl *txq_ctrl)
 	if (txq_ctrl->cq != NULL)
 		claim_zero(ibv_destroy_cq(txq_ctrl->cq));
 	for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
-		if (txq_ctrl->txq.mp2mr[i].mp == NULL)
+		if (txq_ctrl->txq.mp2mr[i].mr == NULL)
 			break;
-		assert(txq_ctrl->txq.mp2mr[i].mr != NULL);
 		claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
 	}
 	memset(txq_ctrl, 0, sizeof(*txq_ctrl));