[dpdk-dev,v2,3/5] net/mlx5: use buffer address for LKEY search
Checks
Commit Message
When searching LKEY, if search key is mempool pointer, the 2nd cacheline
has to be accessed and it even requires to check whether a buffer is
indirect per every search. Instead, using address for search key can reduce
cycles taken. And caching the last hit entry is beneficial as well.
Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
---
drivers/net/mlx5/mlx5_mr.c | 17 ++++++++++++++---
drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------
drivers/net/mlx5/mlx5_rxtx.h | 4 +++-
drivers/net/mlx5/mlx5_txq.c | 3 +--
4 files changed, 39 insertions(+), 24 deletions(-)
Comments
On Fri, Jun 30, 2017 at 12:23:31PM -0700, Yongseok Koh wrote:
> When searching LKEY, if search key is mempool pointer, the 2nd cacheline
> has to be accessed and it even requires to check whether a buffer is
> indirect per every search. Instead, using address for search key can reduce
> cycles taken. And caching the last hit entry is beneficial as well.
>
> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> ---
> drivers/net/mlx5/mlx5_mr.c | 17 ++++++++++++++---
> drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------
> drivers/net/mlx5/mlx5_rxtx.h | 4 +++-
> drivers/net/mlx5/mlx5_txq.c | 3 +--
> 4 files changed, 39 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
> index 0a3638460..287335179 100644
> --- a/drivers/net/mlx5/mlx5_mr.c
> +++ b/drivers/net/mlx5/mlx5_mr.c
> @@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
> struct txq_mp2mr_mbuf_check_data data = {
> .ret = 0,
> };
> + uintptr_t start;
> + uintptr_t end;
> unsigned int i;
>
> /* Register mempool only if the first element looks like a mbuf. */
> if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
> data.ret == -1)
> return;
> + if (mlx5_check_mempool(mp, &start, &end) != 0) {
> + ERROR("mempool %p: not virtually contiguous",
> + (void *)mp);
> + return;
> + }
> for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
> - if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {
> + struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
> +
> + if (unlikely(mr == NULL)) {
> /* Unknown MP, add a new MR for it. */
> break;
> }
> - if (txq_ctrl->txq.mp2mr[i].mp == mp)
> + if (start >= (uintptr_t)mr->addr &&
> + end <= (uintptr_t)mr->addr + mr->length)
> return;
> }
> txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
if (start >= (uintptr_t)mr->addr &&
end <= (uintptr_t)mr->addr + mr->length)
Is this expected to have a memory region bigger than the memory pool
space? I mean I was expecting to see strict equality in the addresses.
Regards,
> On Jul 3, 2017, at 7:06 AM, Nélio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
>
> On Fri, Jun 30, 2017 at 12:23:31PM -0700, Yongseok Koh wrote:
>> When searching LKEY, if search key is mempool pointer, the 2nd cacheline
>> has to be accessed and it even requires to check whether a buffer is
>> indirect per every search. Instead, using address for search key can reduce
>> cycles taken. And caching the last hit entry is beneficial as well.
>>
>> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
>> ---
>> drivers/net/mlx5/mlx5_mr.c | 17 ++++++++++++++---
>> drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------
>> drivers/net/mlx5/mlx5_rxtx.h | 4 +++-
>> drivers/net/mlx5/mlx5_txq.c | 3 +--
>> 4 files changed, 39 insertions(+), 24 deletions(-)
>>
>> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
>> index 0a3638460..287335179 100644
>> --- a/drivers/net/mlx5/mlx5_mr.c
>> +++ b/drivers/net/mlx5/mlx5_mr.c
>> @@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
>> struct txq_mp2mr_mbuf_check_data data = {
>> .ret = 0,
>> };
>> + uintptr_t start;
>> + uintptr_t end;
>> unsigned int i;
>>
>> /* Register mempool only if the first element looks like a mbuf. */
>> if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
>> data.ret == -1)
>> return;
>> + if (mlx5_check_mempool(mp, &start, &end) != 0) {
>> + ERROR("mempool %p: not virtually contiguous",
>> + (void *)mp);
>> + return;
>> + }
>> for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
>> - if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {
>> + struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
>> +
>> + if (unlikely(mr == NULL)) {
>> /* Unknown MP, add a new MR for it. */
>> break;
>> }
>> - if (txq_ctrl->txq.mp2mr[i].mp == mp)
>> + if (start >= (uintptr_t)mr->addr &&
>> + end <= (uintptr_t)mr->addr + mr->length)
>> return;
>> }
>> txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
>
> if (start >= (uintptr_t)mr->addr &&
> end <= (uintptr_t)mr->addr + mr->length)
>
> Is this expected to have a memory region bigger than the memory pool
> space? I mean I was expecting to see strict equality in the addresses.
In mlx5_mp2mr(), start/end of a memory region are rounded up to make it
aligned to its hugepage size.
struct ibv_mr *
mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
{
[...]
/* Round start and end to page boundary if found in memory segments. */
for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
uintptr_t addr = (uintptr_t)ms[i].addr;
size_t len = ms[i].len;
unsigned int align = ms[i].hugepage_sz;
if ((start > addr) && (start < addr + len))
start = RTE_ALIGN_FLOOR(start, align);
if ((end > addr) && (end < addr + len))
end = RTE_ALIGN_CEIL(end, align);
}
Thanks,
Yongseok
On Mon, Jul 03, 2017 at 08:54:43PM +0000, Yongseok Koh wrote:
>
> > On Jul 3, 2017, at 7:06 AM, Nélio Laranjeiro <nelio.laranjeiro@6wind.com> wrote:
> >
> > On Fri, Jun 30, 2017 at 12:23:31PM -0700, Yongseok Koh wrote:
> >> When searching LKEY, if search key is mempool pointer, the 2nd cacheline
> >> has to be accessed and it even requires to check whether a buffer is
> >> indirect per every search. Instead, using address for search key can reduce
> >> cycles taken. And caching the last hit entry is beneficial as well.
> >>
> >> Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
> >> ---
> >> drivers/net/mlx5/mlx5_mr.c | 17 ++++++++++++++---
> >> drivers/net/mlx5/mlx5_rxtx.c | 39 +++++++++++++++++++++------------------
> >> drivers/net/mlx5/mlx5_rxtx.h | 4 +++-
> >> drivers/net/mlx5/mlx5_txq.c | 3 +--
> >> 4 files changed, 39 insertions(+), 24 deletions(-)
> >>
> >> diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
> >> index 0a3638460..287335179 100644
> >> --- a/drivers/net/mlx5/mlx5_mr.c
> >> +++ b/drivers/net/mlx5/mlx5_mr.c
> >> @@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
> >> struct txq_mp2mr_mbuf_check_data data = {
> >> .ret = 0,
> >> };
> >> + uintptr_t start;
> >> + uintptr_t end;
> >> unsigned int i;
> >>
> >> /* Register mempool only if the first element looks like a mbuf. */
> >> if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
> >> data.ret == -1)
> >> return;
> >> + if (mlx5_check_mempool(mp, &start, &end) != 0) {
> >> + ERROR("mempool %p: not virtually contiguous",
> >> + (void *)mp);
> >> + return;
> >> + }
> >> for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
> >> - if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {
> >> + struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
> >> +
> >> + if (unlikely(mr == NULL)) {
> >> /* Unknown MP, add a new MR for it. */
> >> break;
> >> }
> >> - if (txq_ctrl->txq.mp2mr[i].mp == mp)
> >> + if (start >= (uintptr_t)mr->addr &&
> >> + end <= (uintptr_t)mr->addr + mr->length)
> >> return;
> >> }
> >> txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
> >
> > if (start >= (uintptr_t)mr->addr &&
> > end <= (uintptr_t)mr->addr + mr->length)
> >
> > Is this expected to have a memory region bigger than the memory pool
> > space? I mean I was expecting to see strict equality in the addresses.
> In mlx5_mp2mr(), start/end of a memory region are rounded up to make it
> aligned to its hugepage size.
>
> struct ibv_mr *
> mlx5_mp2mr(struct ibv_pd *pd, struct rte_mempool *mp)
> {
> [...]
> /* Round start and end to page boundary if found in memory segments. */
> for (i = 0; (i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL); ++i) {
> uintptr_t addr = (uintptr_t)ms[i].addr;
> size_t len = ms[i].len;
> unsigned int align = ms[i].hugepage_sz;
>
> if ((start > addr) && (start < addr + len))
> start = RTE_ALIGN_FLOOR(start, align);
> if ((end > addr) && (end < addr + len))
> end = RTE_ALIGN_CEIL(end, align);
> }
>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
@@ -207,7 +207,8 @@ txq_mp2mr_reg(struct txq *txq, struct rte_mempool *mp, unsigned int idx)
sizeof(txq_ctrl->txq.mp2mr[0])));
}
/* Store the new entry. */
- txq_ctrl->txq.mp2mr[idx].mp = mp;
+ txq_ctrl->txq.mp2mr[idx].start = (uintptr_t)mr->addr;
+ txq_ctrl->txq.mp2mr[idx].end = (uintptr_t)mr->addr + mr->length;
txq_ctrl->txq.mp2mr[idx].mr = mr;
txq_ctrl->txq.mp2mr[idx].lkey = htonl(mr->lkey);
DEBUG("%p: new MR lkey for MP \"%s\" (%p): 0x%08" PRIu32,
@@ -265,18 +266,28 @@ txq_mp2mr_iter(struct rte_mempool *mp, void *arg)
struct txq_mp2mr_mbuf_check_data data = {
.ret = 0,
};
+ uintptr_t start;
+ uintptr_t end;
unsigned int i;
/* Register mempool only if the first element looks like a mbuf. */
if (rte_mempool_obj_iter(mp, txq_mp2mr_mbuf_check, &data) == 0 ||
data.ret == -1)
return;
+ if (mlx5_check_mempool(mp, &start, &end) != 0) {
+ ERROR("mempool %p: not virtually contiguous",
+ (void *)mp);
+ return;
+ }
for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
- if (unlikely(txq_ctrl->txq.mp2mr[i].mp == NULL)) {
+ struct ibv_mr *mr = txq_ctrl->txq.mp2mr[i].mr;
+
+ if (unlikely(mr == NULL)) {
/* Unknown MP, add a new MR for it. */
break;
}
- if (txq_ctrl->txq.mp2mr[i].mp == mp)
+ if (start >= (uintptr_t)mr->addr &&
+ end <= (uintptr_t)mr->addr + mr->length)
return;
}
txq_mp2mr_reg(&txq_ctrl->txq, mp, i);
@@ -77,7 +77,7 @@ static __rte_always_inline void
txq_complete(struct txq *txq);
static __rte_always_inline uint32_t
-txq_mp2mr(struct txq *txq, struct rte_mempool *mp);
+txq_mb2mr(struct txq *txq, struct rte_mbuf *mb);
static __rte_always_inline void
mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe);
@@ -352,7 +352,7 @@ txq_mb2mp(struct rte_mbuf *buf)
}
/**
- * Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
+ * Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
* Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
* remove an entry first.
*
@@ -365,27 +365,30 @@ txq_mb2mp(struct rte_mbuf *buf)
* mr->lkey on success, (uint32_t)-1 on failure.
*/
static inline uint32_t
-txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
+txq_mb2mr(struct txq *txq, struct rte_mbuf *mb)
{
- unsigned int i;
- uint32_t lkey = (uint32_t)-1;
+ uint16_t i = txq->mr_cache_idx;
+ uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
+ assert(i < RTE_DIM(txq->mp2mr));
+ if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
+ return txq->mp2mr[i].lkey;
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
- if (unlikely(txq->mp2mr[i].mp == NULL)) {
+ if (unlikely(txq->mp2mr[i].mr == NULL)) {
/* Unknown MP, add a new MR for it. */
break;
}
- if (txq->mp2mr[i].mp == mp) {
+ if (txq->mp2mr[i].start <= addr &&
+ txq->mp2mr[i].end >= addr) {
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
assert(htonl(txq->mp2mr[i].mr->lkey) ==
txq->mp2mr[i].lkey);
- lkey = txq->mp2mr[i].lkey;
- break;
+ txq->mr_cache_idx = i;
+ return txq->mp2mr[i].lkey;
}
}
- if (unlikely(lkey == (uint32_t)-1))
- lkey = txq_mp2mr_reg(txq, mp, i);
- return lkey;
+ txq->mr_cache_idx = 0;
+ return txq_mp2mr_reg(txq, txq_mb2mp(mb), i);
}
/**
@@ -770,7 +773,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
naddr = htonll(addr);
*dseg = (rte_v128u32_t){
htonl(length),
- txq_mp2mr(txq, txq_mb2mp(buf)),
+ txq_mb2mr(txq, buf),
naddr,
naddr >> 32,
};
@@ -809,7 +812,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
*dseg = (rte_v128u32_t){
htonl(length),
- txq_mp2mr(txq, txq_mb2mp(buf)),
+ txq_mb2mr(txq, buf),
naddr,
naddr >> 32,
};
@@ -1051,7 +1054,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.byte_count = htonl(DATA_LEN(buf)),
- .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ .lkey = txq_mb2mr(txq, buf),
.addr = htonll(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@@ -1297,7 +1300,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.byte_count = htonl(DATA_LEN(buf)),
- .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ .lkey = txq_mb2mr(txq, buf),
.addr = htonll(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@@ -1604,7 +1607,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.byte_count = htonl(DATA_LEN(buf)),
- .lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
+ .lkey = txq_mb2mr(txq, buf),
.addr = htonll(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@@ -1687,7 +1690,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
naddr = htonll(addr);
*dseg = (rte_v128u32_t) {
htonl(length),
- txq_mp2mr(txq, txq_mb2mp(buf)),
+ txq_mb2mr(txq, buf),
naddr,
naddr >> 32,
};
@@ -267,10 +267,12 @@ struct txq {
volatile uint32_t *cq_db; /* Completion queue doorbell. */
volatile void *bf_reg; /* Blueflame register. */
struct {
- const struct rte_mempool *mp; /* Cached Memory Pool. */
+ uintptr_t start; /* Start address of MR */
+ uintptr_t end; /* End address of MR */
struct ibv_mr *mr; /* Memory Region (for mp). */
uint32_t lkey; /* htonl(mr->lkey) */
} mp2mr[MLX5_PMD_TX_MP_CACHE]; /* MP to MR translation table. */
+ uint16_t mr_cache_idx; /* Index of last hit entry. */
struct rte_mbuf *(*elts)[]; /* TX elements. */
struct mlx5_txq_stats stats; /* TX queue counters. */
} __rte_cache_aligned;
@@ -149,9 +149,8 @@ txq_cleanup(struct txq_ctrl *txq_ctrl)
if (txq_ctrl->cq != NULL)
claim_zero(ibv_destroy_cq(txq_ctrl->cq));
for (i = 0; (i != RTE_DIM(txq_ctrl->txq.mp2mr)); ++i) {
- if (txq_ctrl->txq.mp2mr[i].mp == NULL)
+ if (txq_ctrl->txq.mp2mr[i].mr == NULL)
break;
- assert(txq_ctrl->txq.mp2mr[i].mr != NULL);
claim_zero(ibv_dereg_mr(txq_ctrl->txq.mp2mr[i].mr));
}
memset(txq_ctrl, 0, sizeof(*txq_ctrl));