[v4,1/2] net/memif: add a Rx fast path

Message ID 20220915065825.26169-2-joyce.kong@arm.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series add a fast path for memif Rx/Tx |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Joyce Kong Sept. 15, 2022, 6:58 a.m. UTC
  For memif non-zero-copy mode, there is a branch to compare
the mbuf and memif buffer size during memory copying. Mbuf
and memif buffer size is defined at compile time. If memif
buf size <= mbuf size, add a fast Rx memory copy path by
removing this branch and mbuf bulk alloc.

The removal of the branch and bulk alloc lead to considerable
performance uplift.

Test with 1p1q on N1SDP AArch64 server,
--------------------------------------------
  buf size  | memif <= mbuf | memif > mbuf |
--------------------------------------------
non-zc gain |     26.85%    |    -0.37%    |
--------------------------------------------
   zc gain  |      8.57%    |     3.04%    |
--------------------------------------------

Test with 1p1q on Cascade Lake Xeon X86 server,
--------------------------------------------
  buf size  | memif <= mbuf | memif > mbuf |
--------------------------------------------
non-zc gain |     17.54%    |    -0.42%    |
--------------------------------------------
   zc gain  |     10.67%    |     0.26%    |
--------------------------------------------

Signed-off-by: Joyce Kong <joyce.kong@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Morten Brørup <mb@smartsharesystems.com>
---
 drivers/net/memif/rte_eth_memif.c | 146 +++++++++++++++++++++---------
 drivers/net/memif/rte_eth_memif.h |   2 +
 2 files changed, 107 insertions(+), 41 deletions(-)
  

Patch

diff --git a/drivers/net/memif/rte_eth_memif.c b/drivers/net/memif/rte_eth_memif.c
index dd951b8296..762293f636 100644
--- a/drivers/net/memif/rte_eth_memif.c
+++ b/drivers/net/memif/rte_eth_memif.c
@@ -297,7 +297,7 @@  eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		rte_eth_devices[mq->in_port].process_private;
 	memif_ring_t *ring = memif_get_ring_from_queue(proc_private, mq);
 	uint16_t cur_slot, last_slot, n_slots, ring_size, mask, s0;
-	uint16_t n_rx_pkts = 0;
+	uint16_t pkts, rx_pkts, n_rx_pkts = 0;
 	uint16_t mbuf_size = rte_pktmbuf_data_room_size(mq->mempool) -
 		RTE_PKTMBUF_HEADROOM;
 	uint16_t src_len, src_off, dst_len, dst_off, cp_len;
@@ -342,66 +342,131 @@  eth_memif_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		goto refill;
 	n_slots = last_slot - cur_slot;
 
-	while (n_slots && n_rx_pkts < nb_pkts) {
-		mbuf_head = rte_pktmbuf_alloc(mq->mempool);
-		if (unlikely(mbuf_head == NULL))
+	if (likely(mbuf_size >= pmd->cfg.pkt_buffer_size)) {
+		struct rte_mbuf *mbufs[MAX_PKT_BURST];
+next_bulk:
+		ret = rte_pktmbuf_alloc_bulk(mq->mempool, mbufs, MAX_PKT_BURST);
+		if (unlikely(ret < 0))
 			goto no_free_bufs;
-		mbuf = mbuf_head;
-		mbuf->port = mq->in_port;
-		dst_off = 0;
 
-next_slot:
-		s0 = cur_slot & mask;
-		d0 = &ring->desc[s0];
+		rx_pkts = 0;
+		pkts = nb_pkts < MAX_PKT_BURST ? nb_pkts : MAX_PKT_BURST;
+		while (n_slots && rx_pkts < pkts) {
+			mbuf_head = mbufs[n_rx_pkts];
+			mbuf = mbuf_head;
 
-		src_len = d0->length;
-		src_off = 0;
+next_slot1:
+			mbuf->port = mq->in_port;
+			s0 = cur_slot & mask;
+			d0 = &ring->desc[s0];
 
-		do {
-			dst_len = mbuf_size - dst_off;
-			if (dst_len == 0) {
-				dst_off = 0;
-				dst_len = mbuf_size;
+			cp_len = d0->length;
+
+			rte_pktmbuf_data_len(mbuf) = cp_len;
+			rte_pktmbuf_pkt_len(mbuf) = cp_len;
+			if (mbuf != mbuf_head)
+				rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
+
+			rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+				(uint8_t *)memif_get_buffer(proc_private, d0), cp_len);
 
-				/* store pointer to tail */
+			cur_slot++;
+			n_slots--;
+
+			if (d0->flags & MEMIF_DESC_FLAG_NEXT) {
 				mbuf_tail = mbuf;
 				mbuf = rte_pktmbuf_alloc(mq->mempool);
-				if (unlikely(mbuf == NULL))
+				if (unlikely(mbuf == NULL)) {
+					rte_pktmbuf_free_bulk(mbufs + rx_pkts,
+							MAX_PKT_BURST - rx_pkts);
 					goto no_free_bufs;
-				mbuf->port = mq->in_port;
+				}
 				ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
 				if (unlikely(ret < 0)) {
 					MIF_LOG(ERR, "number-of-segments-overflow");
 					rte_pktmbuf_free(mbuf);
+					rte_pktmbuf_free_bulk(mbufs + rx_pkts,
+							MAX_PKT_BURST - rx_pkts);
 					goto no_free_bufs;
 				}
+				goto next_slot1;
 			}
-			cp_len = RTE_MIN(dst_len, src_len);
 
-			rte_pktmbuf_data_len(mbuf) += cp_len;
-			rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
-			if (mbuf != mbuf_head)
-				rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
+			mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+			*bufs++ = mbuf_head;
+			rx_pkts++;
+			n_rx_pkts++;
+		}
 
-			rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
-							   dst_off),
-				(uint8_t *)memif_get_buffer(proc_private, d0) +
-				src_off, cp_len);
+		if (rx_pkts < MAX_PKT_BURST) {
+			rte_pktmbuf_free_bulk(mbufs + rx_pkts, MAX_PKT_BURST - rx_pkts);
+		} else {
+			nb_pkts -= rx_pkts;
+			if (nb_pkts)
+				goto next_bulk;
+		}
+	} else {
+		while (n_slots && n_rx_pkts < nb_pkts) {
+			mbuf_head = rte_pktmbuf_alloc(mq->mempool);
+			if (unlikely(mbuf_head == NULL))
+				goto no_free_bufs;
+			mbuf = mbuf_head;
+			mbuf->port = mq->in_port;
+
+next_slot2:
+			s0 = cur_slot & mask;
+			d0 = &ring->desc[s0];
 
-			src_off += cp_len;
-			dst_off += cp_len;
-			src_len -= cp_len;
-		} while (src_len);
+			src_len = d0->length;
+			dst_off = 0;
+			src_off = 0;
 
-		cur_slot++;
-		n_slots--;
+			do {
+				dst_len = mbuf_size - dst_off;
+				if (dst_len == 0) {
+					dst_off = 0;
+					dst_len = mbuf_size;
+
+					/* store pointer to tail */
+					mbuf_tail = mbuf;
+					mbuf = rte_pktmbuf_alloc(mq->mempool);
+					if (unlikely(mbuf == NULL))
+						goto no_free_bufs;
+					mbuf->port = mq->in_port;
+					ret = memif_pktmbuf_chain(mbuf_head, mbuf_tail, mbuf);
+					if (unlikely(ret < 0)) {
+						MIF_LOG(ERR, "number-of-segments-overflow");
+						rte_pktmbuf_free(mbuf);
+						goto no_free_bufs;
+					}
+				}
+				cp_len = RTE_MIN(dst_len, src_len);
 
-		if (d0->flags & MEMIF_DESC_FLAG_NEXT)
-			goto next_slot;
+				rte_pktmbuf_data_len(mbuf) += cp_len;
+				rte_pktmbuf_pkt_len(mbuf) = rte_pktmbuf_data_len(mbuf);
+				if (mbuf != mbuf_head)
+					rte_pktmbuf_pkt_len(mbuf_head) += cp_len;
 
-		mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
-		*bufs++ = mbuf_head;
-		n_rx_pkts++;
+				rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *,
+								   dst_off),
+					(uint8_t *)memif_get_buffer(proc_private, d0) +
+					src_off, cp_len);
+
+				src_off += cp_len;
+				dst_off += cp_len;
+				src_len -= cp_len;
+			} while (src_len);
+
+			cur_slot++;
+			n_slots--;
+
+			if (d0->flags & MEMIF_DESC_FLAG_NEXT)
+				goto next_slot2;
+
+			mq->n_bytes += rte_pktmbuf_pkt_len(mbuf_head);
+			*bufs++ = mbuf_head;
+			n_rx_pkts++;
+		}
 	}
 
 no_free_bufs:
@@ -694,7 +759,6 @@  eth_memif_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	return n_tx_pkts;
 }
 
-
 static int
 memif_tx_one_zc(struct pmd_process_private *proc_private, struct memif_queue *mq,
 		memif_ring_t *ring, struct rte_mbuf *mbuf, const uint16_t mask,
diff --git a/drivers/net/memif/rte_eth_memif.h b/drivers/net/memif/rte_eth_memif.h
index 81e7dceae0..09928ecc86 100644
--- a/drivers/net/memif/rte_eth_memif.h
+++ b/drivers/net/memif/rte_eth_memif.h
@@ -25,6 +25,8 @@ 
 #define ETH_MEMIF_DISC_STRING_SIZE		96
 #define ETH_MEMIF_SECRET_SIZE			24
 
+#define MAX_PKT_BURST				32
+
 extern int memif_logtype;
 
 #define MIF_LOG(level, fmt, args...) \