net/mana: use rte_pktmbuf_alloc_bulk for allocating RX WQEs
Checks
Commit Message
From: Long Li <longli@microsoft.com>
Instead of allocating mbufs one by one during RX, use
rte_pktmbuf_alloc_bulk() to allocate them in a batch.
Signed-off-by: Long Li <longli@microsoft.com>
---
drivers/net/mana/rx.c | 67 +++++++++++++++++++++++++++----------------
1 file changed, 43 insertions(+), 24 deletions(-)
Comments
On Wed, 24 Jan 2024 18:42:42 -0800
longli@linuxonhyperv.com wrote:
> + struct rte_mbuf **mbufs;
> +
> + mbufs = rte_calloc("mana_rx_mbufs", count, sizeof(struct rte_mbuf *), 0);
> + if (!mbufs)
> + return -ENOMEM;
Looks good, you might want to make this numa aware in some future version.
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
> Subject: Re: [PATCH] net/mana: use rte_pktmbuf_alloc_bulk for allocating RX
> WQEs
>
> On Wed, 24 Jan 2024 18:42:42 -0800
> longli@linuxonhyperv.com wrote:
>
> > + struct rte_mbuf **mbufs;
> > +
> > + mbufs = rte_calloc("mana_rx_mbufs", count, sizeof(struct rte_mbuf *), 0);
> > + if (!mbufs)
> > + return -ENOMEM;
>
> Looks good, you might want to make this numa aware in some future version.
>
> Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Thank you!
I'm sending v2 to fix this.
There are a couple of other places in MANA doing memory allocation that should be NUMA aware. I will send another patch to make them NUMA aware.
Long
@@ -2,6 +2,7 @@
* Copyright 2022 Microsoft Corporation
*/
#include <ethdev_driver.h>
+#include <rte_malloc.h>
#include <infiniband/verbs.h>
#include <infiniband/manadv.h>
@@ -59,9 +60,8 @@ mana_rq_ring_doorbell(struct mana_rxq *rxq)
}
static int
-mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
+mana_post_rx_wqe(struct mana_rxq *rxq, struct rte_mbuf *mbuf)
{
- struct rte_mbuf *mbuf = NULL;
struct gdma_sgl_element sgl[1];
struct gdma_work_request request;
uint32_t wqe_size_in_bu;
@@ -69,12 +69,6 @@ mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
int ret;
struct mana_mr_cache *mr;
- mbuf = rte_pktmbuf_alloc(rxq->mp);
- if (!mbuf) {
- rxq->stats.nombuf++;
- return -ENOMEM;
- }
-
mr = mana_alloc_pmd_mr(&rxq->mr_btree, priv, mbuf);
if (!mr) {
DP_LOG(ERR, "failed to register RX MR");
@@ -121,19 +115,31 @@ mana_alloc_and_post_rx_wqe(struct mana_rxq *rxq)
* Post work requests for a Rx queue.
*/
static int
-mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq)
+mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq, uint32_t count)
{
int ret;
uint32_t i;
+ struct rte_mbuf **mbufs;
+
+ mbufs = rte_calloc("mana_rx_mbufs", count, sizeof(struct rte_mbuf *), 0);
+ if (!mbufs)
+ return -ENOMEM;
+
+ ret = rte_pktmbuf_alloc_bulk(rxq->mp, mbufs, count);
+ if (ret) {
+ DP_LOG(ERR, "failed to allocate mbufs for RX");
+ rxq->stats.nombuf += count;
+ goto fail;
+ }
#ifdef RTE_ARCH_32
rxq->wqe_cnt_to_short_db = 0;
#endif
- for (i = 0; i < rxq->num_desc; i++) {
- ret = mana_alloc_and_post_rx_wqe(rxq);
+ for (i = 0; i < count; i++) {
+ ret = mana_post_rx_wqe(rxq, mbufs[i]);
if (ret) {
DP_LOG(ERR, "failed to post RX ret = %d", ret);
- return ret;
+ goto fail;
}
#ifdef RTE_ARCH_32
@@ -146,6 +152,8 @@ mana_alloc_and_post_rx_wqes(struct mana_rxq *rxq)
mana_rq_ring_doorbell(rxq);
+fail:
+ rte_free(mbufs);
return ret;
}
@@ -404,7 +412,9 @@ mana_start_rx_queues(struct rte_eth_dev *dev)
}
for (i = 0; i < priv->num_queues; i++) {
- ret = mana_alloc_and_post_rx_wqes(dev->data->rx_queues[i]);
+ struct mana_rxq *rxq = dev->data->rx_queues[i];
+
+ ret = mana_alloc_and_post_rx_wqes(rxq, rxq->num_desc);
if (ret)
goto fail;
}
@@ -423,7 +433,7 @@ uint16_t
mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
uint16_t pkt_received = 0;
- uint16_t wqe_posted = 0;
+ uint16_t wqe_consumed = 0;
struct mana_rxq *rxq = dpdk_rxq;
struct mana_priv *priv = rxq->priv;
struct rte_mbuf *mbuf;
@@ -535,18 +545,23 @@ mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
rxq->gdma_rq.tail += desc->wqe_size_in_bu;
- /* Consume this request and post another request */
- ret = mana_alloc_and_post_rx_wqe(rxq);
- if (ret) {
- DP_LOG(ERR, "failed to post rx wqe ret=%d", ret);
- break;
- }
-
- wqe_posted++;
+ /* Record the number of the RX WQE we need to post to replenish
+ * consumed RX requests
+ */
+ wqe_consumed++;
if (pkt_received == pkts_n)
break;
#ifdef RTE_ARCH_32
+ /* Always post WQE as soon as it's consumed for short DB */
+ ret = mana_alloc_and_post_rx_wqes(rxq, wqe_consumed);
+ if (ret) {
+ DRV_LOG(ERR, "failed to post %d WQEs, ret %d",
+ wqe_consumed, ret);
+ return pkt_received;
+ }
+ wqe_consumed = 0;
+
/* Ring short doorbell if approaching the wqe increment
* limit.
*/
@@ -569,8 +584,12 @@ mana_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
goto repoll;
}
- if (wqe_posted)
- mana_rq_ring_doorbell(rxq);
+ if (wqe_consumed) {
+ ret = mana_alloc_and_post_rx_wqes(rxq, wqe_consumed);
+ if (ret)
+ DRV_LOG(ERR, "failed to post %d WQEs, ret %d",
+ wqe_consumed, ret);
+ }
return pkt_received;
}