[19/35] net/ionic: overhaul receive side for performance

Message ID 20221007174336.54354-20-andrew.boyer@amd.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series net/ionic: updates for 22.11 release |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Andrew Boyer Oct. 7, 2022, 5:43 p.m. UTC
  Linearize RX mbuf chains in the expanded info array.
Clean one and fill one per CQE (completions are not coalesced).
Touch the mbufs as little as possible in the fill stage.
When touching the mbuf in the clean stage, use the rearm_data unions.
Ring the doorbell once at the end of the bulk clean/fill.

Signed-off-by: Neel Patel <neel.patel@amd.com>
Signed-off-by: Andrew Boyer <andrew.boyer@amd.com>
---
 doc/guides/rel_notes/release_22_11.rst |   1 +
 drivers/net/ionic/ionic_dev.h          |   2 +-
 drivers/net/ionic/ionic_lif.c          |  49 +++++-
 drivers/net/ionic/ionic_lif.h          |   3 +-
 drivers/net/ionic/ionic_rxtx.c         | 225 ++++++++++++-------------
 drivers/net/ionic/ionic_rxtx.h         |   1 -
 6 files changed, 159 insertions(+), 122 deletions(-)
  

Patch

diff --git a/doc/guides/rel_notes/release_22_11.rst b/doc/guides/rel_notes/release_22_11.rst
index 552cc5b62c..d7eced510e 100644
--- a/doc/guides/rel_notes/release_22_11.rst
+++ b/doc/guides/rel_notes/release_22_11.rst
@@ -83,6 +83,7 @@  New Features
   Updated the ionic PMD with new features and improvements, including:
 
   * Updated to reflect that Pensando has been acquired by AMD.
+  * Enhanced data path to provide substantial performance improvements.
 
 Removed Items
 -------------
diff --git a/drivers/net/ionic/ionic_dev.h b/drivers/net/ionic/ionic_dev.h
index 55a9485bff..6a80ebc71b 100644
--- a/drivers/net/ionic/ionic_dev.h
+++ b/drivers/net/ionic/ionic_dev.h
@@ -132,7 +132,7 @@  struct ionic_dev {
 #define Q_NEXT_TO_POST(_q, _n)	(((_q)->head_idx + (_n)) & ((_q)->size_mask))
 #define Q_NEXT_TO_SRVC(_q, _n)	(((_q)->tail_idx + (_n)) & ((_q)->size_mask))
 
-#define IONIC_INFO_IDX(_q, _i)	(_i)
+#define IONIC_INFO_IDX(_q, _i)	((_i) * (_q)->num_segs)
 #define IONIC_INFO_PTR(_q, _i)	(&(_q)->info[IONIC_INFO_IDX((_q), _i)])
 
 struct ionic_queue {
diff --git a/drivers/net/ionic/ionic_lif.c b/drivers/net/ionic/ionic_lif.c
index cc64aedaa1..db5d42dda6 100644
--- a/drivers/net/ionic/ionic_lif.c
+++ b/drivers/net/ionic/ionic_lif.c
@@ -116,7 +116,6 @@  ionic_lif_get_abs_stats(const struct ionic_lif *lif, struct rte_eth_stats *stats
 		struct ionic_rx_stats *rx_stats = &lif->rxqcqs[i]->stats;
 		stats->ierrors +=
 			rx_stats->bad_cq_status +
-			rx_stats->no_room +
 			rx_stats->bad_len;
 	}
 
@@ -136,7 +135,6 @@  ionic_lif_get_abs_stats(const struct ionic_lif *lif, struct rte_eth_stats *stats
 		stats->q_ibytes[i] = rx_stats->bytes;
 		stats->q_errors[i] =
 			rx_stats->bad_cq_status +
-			rx_stats->no_room +
 			rx_stats->bad_len;
 	}
 
@@ -608,8 +606,9 @@  ionic_qcq_alloc(struct ionic_lif *lif,
 
 	new->lif = lif;
 
+	/* Most queue types will store 1 ptr per descriptor */
 	new->q.info = rte_calloc_socket("ionic",
-				num_descs, sizeof(void *),
+				num_descs * num_segs, sizeof(void *),
 				rte_mem_page_size(), socket_id);
 	if (!new->q.info) {
 		IONIC_PRINT(ERR, "Cannot allocate queue info");
@@ -698,6 +697,42 @@  ionic_qcq_free(struct ionic_qcq *qcq)
 	rte_free(qcq);
 }
 
+static uint64_t
+ionic_rx_rearm_data(struct ionic_lif *lif)
+{
+	struct rte_mbuf rxm;
+
+	memset(&rxm, 0, sizeof(rxm));
+
+	rte_mbuf_refcnt_set(&rxm, 1);
+	rxm.data_off = RTE_PKTMBUF_HEADROOM;
+	rxm.nb_segs = 1;
+	rxm.port = lif->port_id;
+
+	rte_compiler_barrier();
+
+	RTE_BUILD_BUG_ON(sizeof(rxm.rearm_data[0]) != sizeof(uint64_t));
+	return rxm.rearm_data[0];
+}
+
+static uint64_t
+ionic_rx_seg_rearm_data(struct ionic_lif *lif)
+{
+	struct rte_mbuf rxm;
+
+	memset(&rxm, 0, sizeof(rxm));
+
+	rte_mbuf_refcnt_set(&rxm, 1);
+	rxm.data_off = 0;  /* no headroom */
+	rxm.nb_segs = 1;
+	rxm.port = lif->port_id;
+
+	rte_compiler_barrier();
+
+	RTE_BUILD_BUG_ON(sizeof(rxm.rearm_data[0]) != sizeof(uint64_t));
+	return rxm.rearm_data[0];
+}
+
 int
 ionic_rx_qcq_alloc(struct ionic_lif *lif, uint32_t socket_id, uint32_t index,
 		uint16_t nrxq_descs, struct rte_mempool *mb_pool,
@@ -721,11 +756,13 @@  ionic_rx_qcq_alloc(struct ionic_lif *lif, uint32_t socket_id, uint32_t index,
 
 	/*
 	 * Calculate how many fragment pointers might be stored in queue.
+	 * This is the worst-case number, so that there's enough room in
+	 * the info array.
 	 */
 	max_segs = 1 + (max_mtu + RTE_PKTMBUF_HEADROOM - 1) / seg_size;
 
-	IONIC_PRINT(DEBUG, "rxq %u frame_size %u seg_size %u max_segs %u",
-		index, lif->frame_size, seg_size, max_segs);
+	IONIC_PRINT(DEBUG, "rxq %u max_mtu %u seg_size %u max_segs %u",
+		index, max_mtu, seg_size, max_segs);
 	if (max_segs > max_segs_fw) {
 		IONIC_PRINT(ERR, "Rx mbuf size insufficient (%d > %d avail)",
 			max_segs, max_segs_fw);
@@ -751,6 +788,8 @@  ionic_rx_qcq_alloc(struct ionic_lif *lif, uint32_t socket_id, uint32_t index,
 	rxq->flags = flags;
 	rxq->seg_size = seg_size;
 	rxq->hdr_seg_size = hdr_seg_size;
+	rxq->rearm_data = ionic_rx_rearm_data(lif);
+	rxq->rearm_seg_data = ionic_rx_seg_rearm_data(lif);
 
 	lif->rxqcqs[index] = rxq;
 	*rxq_out = rxq;
diff --git a/drivers/net/ionic/ionic_lif.h b/drivers/net/ionic/ionic_lif.h
index 237fd0a2ef..b0bd721b06 100644
--- a/drivers/net/ionic/ionic_lif.h
+++ b/drivers/net/ionic/ionic_lif.h
@@ -40,7 +40,6 @@  struct ionic_rx_stats {
 	uint64_t packets;
 	uint64_t bytes;
 	uint64_t bad_cq_status;
-	uint64_t no_room;
 	uint64_t bad_len;
 	uint64_t mtods;
 };
@@ -80,6 +79,8 @@  struct ionic_rx_qcq {
 
 	/* cacheline2 */
 	struct rte_mempool *mb_pool;
+	uint64_t rearm_data;
+	uint64_t rearm_seg_data;
 	uint16_t frame_size;	/* Based on configured MTU */
 	uint16_t hdr_seg_size;	/* Length of first segment of RX chain */
 	uint16_t seg_size;	/* Length of all subsequent segments */
diff --git a/drivers/net/ionic/ionic_rxtx.c b/drivers/net/ionic/ionic_rxtx.c
index 2a34465e46..bb6ca019d9 100644
--- a/drivers/net/ionic/ionic_rxtx.c
+++ b/drivers/net/ionic/ionic_rxtx.c
@@ -72,7 +72,11 @@  ionic_rx_empty(struct ionic_rx_qcq *rxq)
 {
 	struct ionic_queue *q = &rxq->qcq.q;
 
-	ionic_empty_array(q->info, q->num_descs, 0);
+	/*
+	 * Walk the full info array so that the clean up includes any
+	 * fragments that were left dangling for later reuse
+	 */
+	ionic_empty_array(q->info, q->num_descs * q->num_segs, 0);
 }
 
 /*********************************************************************
@@ -658,9 +662,6 @@  ionic_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
  *
  **********************************************************************/
 
-static void ionic_rx_recycle(struct ionic_queue *q, uint32_t q_desc_index,
-		struct rte_mbuf *mbuf);
-
 void
 ionic_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
 		struct rte_eth_rxq_info *qinfo)
@@ -763,64 +764,67 @@  ionic_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
 	return 0;
 }
 
+/*
+ * Cleans one descriptor. Connects the filled mbufs into a chain.
+ * Does not advance the tail index.
+ */
 static __rte_always_inline void
-ionic_rx_clean(struct ionic_rx_qcq *rxq,
-		uint32_t q_desc_index, uint32_t cq_desc_index,
+ionic_rx_clean_one(struct ionic_rx_qcq *rxq,
+		struct ionic_rxq_comp *cq_desc,
 		struct ionic_rx_service *rx_svc)
 {
 	struct ionic_queue *q = &rxq->qcq.q;
-	struct ionic_cq *cq = &rxq->qcq.cq;
-	struct ionic_rxq_comp *cq_desc_base = cq->base;
-	struct ionic_rxq_comp *cq_desc = &cq_desc_base[cq_desc_index];
-	struct rte_mbuf *rxm, *rxm_seg;
+	struct rte_mbuf *rxm, *rxm_seg, *prev_rxm;
+	struct ionic_rx_stats *stats = &rxq->stats;
 	uint64_t pkt_flags = 0;
 	uint32_t pkt_type;
-	struct ionic_rx_stats *stats = &rxq->stats;
-	uint32_t left;
+	uint32_t left, i;
+	uint16_t cq_desc_len;
 	void **info;
 
-	assert(q_desc_index == cq_desc->comp_index);
+	cq_desc_len = rte_le_to_cpu_16(cq_desc->len);
 
-	info = IONIC_INFO_PTR(q, cq_desc->comp_index);
+	info = IONIC_INFO_PTR(q, q->tail_idx);
 
 	rxm = info[0];
 
 	if (cq_desc->status) {
 		stats->bad_cq_status++;
-		ionic_rx_recycle(q, q_desc_index, rxm);
-		return;
-	}
-
-	if (rx_svc->nb_rx >= rx_svc->nb_pkts) {
-		stats->no_room++;
-		ionic_rx_recycle(q, q_desc_index, rxm);
 		return;
 	}
 
-	if (cq_desc->len > rxq->frame_size || cq_desc->len == 0) {
+	if (cq_desc_len > rxq->frame_size || cq_desc_len == 0) {
 		stats->bad_len++;
-		ionic_rx_recycle(q, q_desc_index, rxm);
 		return;
 	}
 
-	rxm->data_off = RTE_PKTMBUF_HEADROOM;
-	rte_prefetch1((char *)rxm->buf_addr + rxm->data_off);
-	rxm->nb_segs = 1; /* cq_desc->num_sg_elems */
-	rxm->pkt_len = cq_desc->len;
-	rxm->port = rxq->qcq.lif->port_id;
+	info[0] = NULL;
 
-	rxm->data_len = RTE_MIN(rxq->hdr_seg_size, cq_desc->len);
-	left = cq_desc->len - rxm->data_len;
+	/* Set the mbuf metadata based on the cq entry */
+	rxm->rearm_data[0] = rxq->rearm_data;
+	rxm->pkt_len = cq_desc_len;
+	rxm->data_len = RTE_MIN(rxq->hdr_seg_size, cq_desc_len);
+	left = cq_desc_len - rxm->data_len;
+	rxm->nb_segs = cq_desc->num_sg_elems + 1;
+	prev_rxm = rxm;
 
-	rxm_seg = rxm->next;
-	while (rxm_seg && left) {
+	for (i = 1; i < rxm->nb_segs && left; i++) {
+		rxm_seg = info[i];
+		info[i] = NULL;
+
+		/* Set the chained mbuf metadata */
+		rxm_seg->rearm_data[0] = rxq->rearm_seg_data;
 		rxm_seg->data_len = RTE_MIN(rxq->seg_size, left);
 		left -= rxm_seg->data_len;
 
-		rxm_seg = rxm_seg->next;
-		rxm->nb_segs++;
+		/* Link the mbuf */
+		prev_rxm->next = rxm_seg;
+		prev_rxm = rxm_seg;
 	}
 
+	/* Terminate the mbuf chain */
+	prev_rxm->next = NULL;
+
 	/* RSS */
 	pkt_flags |= RTE_MBUF_F_RX_RSS_HASH;
 	rxm->hash.rss = rte_le_to_cpu_32(cq_desc->rss_hash);
@@ -897,77 +901,74 @@  ionic_rx_clean(struct ionic_rx_qcq *rxq,
 	stats->bytes += rxm->pkt_len;
 }
 
-static void
-ionic_rx_recycle(struct ionic_queue *q, uint32_t q_desc_index,
-		 struct rte_mbuf *mbuf)
-{
-	struct ionic_rxq_desc *desc_base = q->base;
-	struct ionic_rxq_desc *old = &desc_base[q_desc_index];
-	struct ionic_rxq_desc *new = &desc_base[q->head_idx];
-
-	new->addr = old->addr;
-	new->len = old->len;
-
-	q->info[q->head_idx] = mbuf;
-
-	q->head_idx = Q_NEXT_TO_POST(q, 1);
-
-	ionic_q_flush(q);
-}
-
+/*
+ * Fills one descriptor with mbufs. Does not advance the head index.
+ */
 static __rte_always_inline int
-ionic_rx_fill(struct ionic_rx_qcq *rxq)
+ionic_rx_fill_one(struct ionic_rx_qcq *rxq)
 {
 	struct ionic_queue *q = &rxq->qcq.q;
+	struct rte_mbuf *rxm, *rxm_seg;
 	struct ionic_rxq_desc *desc, *desc_base = q->base;
 	struct ionic_rxq_sg_desc *sg_desc, *sg_desc_base = q->sg_base;
-	struct ionic_rxq_sg_elem *elem;
+	rte_iova_t data_iova;
+	uint32_t i;
 	void **info;
-	rte_iova_t dma_addr;
-	uint32_t i, j;
 
-	/* Initialize software ring entries */
-	for (i = ionic_q_space_avail(q); i; i--) {
-		struct rte_mbuf *rxm = rte_mbuf_raw_alloc(rxq->mb_pool);
-		struct rte_mbuf *prev_rxm_seg;
+	info = IONIC_INFO_PTR(q, q->head_idx);
+	desc = &desc_base[q->head_idx];
+	sg_desc = &sg_desc_base[q->head_idx];
+
+	/* mbuf is unused => whole chain is unused */
+	if (unlikely(info[0]))
+		return 0;
+
+	rxm = rte_mbuf_raw_alloc(rxq->mb_pool);
+	if (unlikely(rxm == NULL)) {
+		assert(0);
+		return -ENOMEM;
+	}
+
+	info[0] = rxm;
+
+	data_iova = rte_mbuf_data_iova_default(rxm);
+	desc->addr = rte_cpu_to_le_64(data_iova);
 
-		if (rxm == NULL) {
-			IONIC_PRINT(ERR, "RX mbuf alloc failed");
+	for (i = 1; i < q->num_segs; i++) {
+		/* mbuf is unused => rest of the chain is unused */
+		if (info[i])
+			return 0;
+
+		rxm_seg = rte_mbuf_raw_alloc(rxq->mb_pool);
+		if (rxm_seg == NULL) {
+			assert(0);
 			return -ENOMEM;
 		}
 
-		info = IONIC_INFO_PTR(q, q->head_idx);
+		info[i] = rxm_seg;
 
-		desc = &desc_base[q->head_idx];
-		dma_addr = rte_cpu_to_le_64(rte_mbuf_data_iova_default(rxm));
-		desc->addr = dma_addr;
-		rxm->next = NULL;
-
-		prev_rxm_seg = rxm;
-		sg_desc = &sg_desc_base[q->head_idx];
-		elem = sg_desc->elems;
-		for (j = 0; j < q->num_segs - 1u; j++) {
-			struct rte_mbuf *rxm_seg;
-			rte_iova_t data_iova;
-
-			rxm_seg = rte_mbuf_raw_alloc(rxq->mb_pool);
-			if (rxm_seg == NULL) {
-				IONIC_PRINT(ERR, "RX mbuf alloc failed");
-				return -ENOMEM;
-			}
+		/* The data_off does not get set to 0 until later */
+		data_iova = rxm_seg->buf_iova;
+		sg_desc->elems[i - 1].addr = rte_cpu_to_le_64(data_iova);
+	}
 
-			rxm_seg->data_off = 0;
-			data_iova = rte_mbuf_data_iova(rxm_seg);
-			dma_addr = rte_cpu_to_le_64(data_iova);
-			elem->addr = dma_addr;
-			elem++;
+	return 0;
+}
 
-			rxm_seg->next = NULL;
-			prev_rxm_seg->next = rxm_seg;
-			prev_rxm_seg = rxm_seg;
-		}
+/*
+ * Fills all descriptors with mbufs.
+ */
+static int __rte_cold
+ionic_rx_fill(struct ionic_rx_qcq *rxq)
+{
+	struct ionic_queue *q = &rxq->qcq.q;
+	uint32_t i;
+	int err;
 
-		info[0] = rxm;
+	for (i = 1; i < q->num_descs; i++) {
+		err = ionic_rx_fill_one(rxq);
+		if (err)
+			return err;
 
 		q->head_idx = Q_NEXT_TO_POST(q, 1);
 	}
@@ -1056,53 +1057,52 @@  ionic_dev_rx_queue_start(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
 	return 0;
 }
 
+/*
+ * Walk the CQ to find completed receive descriptors.
+ * Any completed descriptor found is refilled.
+ */
 static __rte_always_inline void
 ionic_rxq_service(struct ionic_rx_qcq *rxq, uint32_t work_to_do,
 		struct ionic_rx_service *rx_svc)
 {
 	struct ionic_cq *cq = &rxq->qcq.cq;
 	struct ionic_queue *q = &rxq->qcq.q;
+	struct ionic_rxq_desc *q_desc_base = q->base;
 	struct ionic_rxq_comp *cq_desc, *cq_desc_base = cq->base;
-	bool more;
-	uint32_t curr_q_tail_idx, curr_cq_tail_idx;
 	uint32_t work_done = 0;
 
-	if (work_to_do == 0)
-		return;
-
 	cq_desc = &cq_desc_base[cq->tail_idx];
+
 	while (color_match(cq_desc->pkt_type_color, cq->done_color)) {
-		curr_cq_tail_idx = cq->tail_idx;
 		cq->tail_idx = Q_NEXT_TO_SRVC(cq, 1);
 
 		if (cq->tail_idx == 0)
 			cq->done_color = !cq->done_color;
 
-		/* Prefetch the next 4 descriptors */
-		if ((cq->tail_idx & 0x3) == 0)
-			rte_prefetch0(&cq_desc_base[cq->tail_idx]);
-
-		do {
-			more = (q->tail_idx != cq_desc->comp_index);
+		/* Prefetch 8 x 8B bufinfo */
+		rte_prefetch0(IONIC_INFO_PTR(q, Q_NEXT_TO_SRVC(q, 8)));
+		/* Prefetch 4 x 16B comp */
+		rte_prefetch0(&cq_desc_base[Q_NEXT_TO_SRVC(cq, 4)]);
+		/* Prefetch 4 x 16B descriptors */
+		rte_prefetch0(&q_desc_base[Q_NEXT_TO_POST(q, 4)]);
 
-			curr_q_tail_idx = q->tail_idx;
-			q->tail_idx = Q_NEXT_TO_SRVC(q, 1);
+		ionic_rx_clean_one(rxq, cq_desc, rx_svc);
 
-			/* Prefetch the next 4 descriptors */
-			if ((q->tail_idx & 0x3) == 0)
-				/* q desc info */
-				rte_prefetch0(&q->info[q->tail_idx]);
+		q->tail_idx = Q_NEXT_TO_SRVC(q, 1);
 
-			ionic_rx_clean(rxq, curr_q_tail_idx, curr_cq_tail_idx,
-				rx_svc);
+		(void)ionic_rx_fill_one(rxq);
 
-		} while (more);
+		q->head_idx = Q_NEXT_TO_POST(q, 1);
 
 		if (++work_done == work_to_do)
 			break;
 
 		cq_desc = &cq_desc_base[cq->tail_idx];
 	}
+
+	/* Update the queue indices and ring the doorbell */
+	if (work_done)
+		ionic_q_flush(q);
 }
 
 /*
@@ -1141,12 +1141,9 @@  ionic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
 	struct ionic_rx_service rx_svc;
 
 	rx_svc.rx_pkts = rx_pkts;
-	rx_svc.nb_pkts = nb_pkts;
 	rx_svc.nb_rx = 0;
 
 	ionic_rxq_service(rxq, nb_pkts, &rx_svc);
 
-	ionic_rx_fill(rxq);
-
 	return rx_svc.nb_rx;
 }
diff --git a/drivers/net/ionic/ionic_rxtx.h b/drivers/net/ionic/ionic_rxtx.h
index 91a9073803..79ec1112de 100644
--- a/drivers/net/ionic/ionic_rxtx.h
+++ b/drivers/net/ionic/ionic_rxtx.h
@@ -10,7 +10,6 @@ 
 struct ionic_rx_service {
 	/* cb in */
 	struct rte_mbuf **rx_pkts;
-	uint16_t nb_pkts;
 	/* cb out */
 	uint16_t nb_rx;
 };