[dpdk-dev,09/11] enic: Tx perf - optimize the transmit function

Message ID 1463771054-16861-9-git-send-email-johndale@cisco.com (mailing list archive)
State Superseded, archived
Delegated to: Bruce Richardson
Headers

Commit Message

John Daley (johndale) May 20, 2016, 7:04 p.m. UTC
Reduce host CPU overhead of Tx packet processing:
* Use local variables inside per packet loop instead of fields in structs.
* Factor book keeping and conditionals out of the per packet loop where
  possible.
* Post buffers to the nic at a maximum of every 64 packets

Signed-off-by: Nelson Escobar <neescoba@cisco.com>
Signed-off-by: John Daley <johndale@cisco.com>
---
 drivers/net/enic/base/vnic_wq.h |   1 +
 drivers/net/enic/enic_res.h     |   2 +-
 drivers/net/enic/enic_rxtx.c    | 167 +++++++++++++++++++---------------------
 3 files changed, 83 insertions(+), 87 deletions(-)
  

Patch

diff --git a/drivers/net/enic/base/vnic_wq.h b/drivers/net/enic/base/vnic_wq.h
index 689b81c..7a66813 100644
--- a/drivers/net/enic/base/vnic_wq.h
+++ b/drivers/net/enic/base/vnic_wq.h
@@ -67,6 +67,7 @@  struct vnic_wq_ctrl {
 
 /* 16 bytes */
 struct vnic_wq_buf {
+	struct rte_mempool *pool;
 	void *mb;
 };
 
diff --git a/drivers/net/enic/enic_res.h b/drivers/net/enic/enic_res.h
index 955db71..3c8e303 100644
--- a/drivers/net/enic/enic_res.h
+++ b/drivers/net/enic/enic_res.h
@@ -53,7 +53,7 @@ 
 
 #define ENIC_NON_TSO_MAX_DESC		16
 #define ENIC_DEFAULT_RX_FREE_THRESH	32
-#define ENIC_TX_POST_THRESH		(ENIC_MIN_WQ_DESCS / 2)
+#define ENIC_TX_XMIT_MAX		64
 
 #define ENIC_SETTING(enic, f) ((enic->config.flags & VENETF_##f) ? 1 : 0)
 
diff --git a/drivers/net/enic/enic_rxtx.c b/drivers/net/enic/enic_rxtx.c
index 8bf7640..336cfdf 100644
--- a/drivers/net/enic/enic_rxtx.c
+++ b/drivers/net/enic/enic_rxtx.c
@@ -380,114 +380,109 @@  unsigned int enic_cleanup_wq(__rte_unused struct enic *enic, struct vnic_wq *wq)
 	return 0;
 }
 
-void enic_post_wq_index(struct vnic_wq *wq)
-{
-	enic_vnic_post_wq_index(wq);
-}
-
-void enic_send_pkt(struct enic *enic, struct vnic_wq *wq,
-		   struct rte_mbuf *tx_pkt, unsigned short len,
-		   uint8_t sop, uint8_t eop, uint8_t cq_entry,
-		   uint16_t ol_flags, uint16_t vlan_tag)
-{
-	struct wq_enet_desc *desc, *descs;
-	uint16_t mss = 0;
-	uint8_t vlan_tag_insert = 0;
-	uint64_t bus_addr = (dma_addr_t)
-	    (tx_pkt->buf_physaddr + tx_pkt->data_off);
-
-	descs = (struct wq_enet_desc *)wq->ring.descs;
-	desc = descs + wq->head_idx;
-
-	if (sop) {
-		if (ol_flags & PKT_TX_VLAN_PKT)
-			vlan_tag_insert = 1;
-
-		if (enic->hw_ip_checksum) {
-			if (ol_flags & PKT_TX_IP_CKSUM)
-				mss |= ENIC_CALC_IP_CKSUM;
-
-			if (ol_flags & PKT_TX_TCP_UDP_CKSUM)
-				mss |= ENIC_CALC_TCP_UDP_CKSUM;
-		}
-	}
-
-	wq_enet_desc_enc(desc,
-		bus_addr,
-		len,
-		mss,
-		0 /* header_length */,
-		0 /* offload_mode WQ_ENET_OFFLOAD_MODE_CSUM */,
-		eop,
-		cq_entry,
-		0 /* fcoe_encap */,
-		vlan_tag_insert,
-		vlan_tag,
-		0 /* loopback */);
-
-	enic_vnic_post_wq(wq, (void *)tx_pkt, cq_entry);
-}
-
 uint16_t enic_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint16_t nb_pkts)
 {
 	uint16_t index;
-	unsigned int frags;
-	unsigned int pkt_len;
-	unsigned int seg_len;
-	unsigned int inc_len;
+	unsigned int pkt_len, data_len;
 	unsigned int nb_segs;
-	struct rte_mbuf *tx_pkt, *next_tx_pkt;
+	struct rte_mbuf *tx_pkt;
 	struct vnic_wq *wq = (struct vnic_wq *)tx_queue;
 	struct enic *enic = vnic_dev_priv(wq->vdev);
 	unsigned short vlan_id;
 	unsigned short ol_flags;
-	uint8_t last_seg, eop;
-	unsigned int host_tx_descs = 0;
+	unsigned int wq_desc_avail;
+	int head_idx;
+	struct vnic_wq_buf *buf;
+	unsigned int hw_ip_cksum_enabled;
+	unsigned int desc_count;
+	struct wq_enet_desc *descs, *desc_p, desc_tmp;
+	uint16_t mss;
+	uint8_t vlan_tag_insert;
+	uint8_t eop;
+	uint64_t bus_addr;
 
+	enic_cleanup_wq(enic, wq);
+	wq_desc_avail = vnic_wq_desc_avail(wq);
+	head_idx = wq->head_idx;
+	desc_count = wq->ring.desc_count;
+
+	nb_pkts = RTE_MIN(nb_pkts, ENIC_TX_XMIT_MAX);
+
+	hw_ip_cksum_enabled = enic->hw_ip_checksum;
 	for (index = 0; index < nb_pkts; index++) {
 		tx_pkt = *tx_pkts++;
-		inc_len = 0;
 		nb_segs = tx_pkt->nb_segs;
-		if (nb_segs > vnic_wq_desc_avail(wq)) {
+		if (nb_segs > wq_desc_avail) {
 			if (index > 0)
-				enic_post_wq_index(wq);
-
-			/* wq cleanup and try again */
-			if (!enic_cleanup_wq(enic, wq) ||
-				(nb_segs > vnic_wq_desc_avail(wq))) {
-				return index;
-			}
+				goto post;
+			goto done;
 		}
 
 		pkt_len = tx_pkt->pkt_len;
+		data_len = tx_pkt->data_len;
 		vlan_id = tx_pkt->vlan_tci;
 		ol_flags = tx_pkt->ol_flags;
-		for (frags = 0; inc_len < pkt_len; frags++) {
-			if (!tx_pkt)
-				break;
-			next_tx_pkt = tx_pkt->next;
-			seg_len = tx_pkt->data_len;
-			inc_len += seg_len;
-
-			host_tx_descs++;
-			last_seg = 0;
-			eop = 0;
-			if ((pkt_len == inc_len) || !next_tx_pkt) {
-				eop = 1;
-				/* post if last packet in batch or > thresh */
-				if ((index == (nb_pkts - 1)) ||
-				   (host_tx_descs > ENIC_TX_POST_THRESH)) {
-					last_seg = 1;
-					host_tx_descs = 0;
-				}
+
+		mss = 0;
+		vlan_tag_insert = 0;
+		bus_addr = (dma_addr_t)
+			   (tx_pkt->buf_physaddr + tx_pkt->data_off);
+
+		descs = (struct wq_enet_desc *)wq->ring.descs;
+		desc_p = descs + head_idx;
+
+		eop = (data_len == pkt_len);
+
+		if (ol_flags & PKT_TX_VLAN_PKT)
+			vlan_tag_insert = 1;
+
+		if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_IP_CKSUM))
+			mss |= ENIC_CALC_IP_CKSUM;
+
+		if (hw_ip_cksum_enabled && (ol_flags & PKT_TX_TCP_UDP_CKSUM))
+			mss |= ENIC_CALC_TCP_UDP_CKSUM;
+
+		wq_enet_desc_enc(&desc_tmp, bus_addr, data_len, mss, 0, 0, eop,
+				 eop, 0, vlan_tag_insert, vlan_id, 0);
+
+		*desc_p = desc_tmp;
+		buf = &wq->bufs[head_idx];
+		buf->mb = (void *)tx_pkt;
+		head_idx = enic_ring_incr(desc_count, head_idx);
+		wq_desc_avail--;
+
+		if (!eop) {
+			for (tx_pkt = tx_pkt->next; tx_pkt; tx_pkt =
+			    tx_pkt->next) {
+				data_len = tx_pkt->data_len;
+
+				if (tx_pkt->next == NULL)
+					eop = 1;
+				desc_p = descs + head_idx;
+				bus_addr = (dma_addr_t)(tx_pkt->buf_physaddr
+					   + tx_pkt->data_off);
+				wq_enet_desc_enc((struct wq_enet_desc *)
+						 &desc_tmp, bus_addr, data_len,
+						 mss, 0, 0, eop, eop, 0,
+						 vlan_tag_insert, vlan_id, 0);
+
+				*desc_p = desc_tmp;
+				buf = &wq->bufs[head_idx];
+				buf->mb = (void *)tx_pkt;
+				head_idx = enic_ring_incr(desc_count, head_idx);
+				wq_desc_avail--;
 			}
-			enic_send_pkt(enic, wq, tx_pkt, (unsigned short)seg_len,
-				      !frags, eop, last_seg, ol_flags, vlan_id);
-			tx_pkt = next_tx_pkt;
 		}
 	}
+ post:
+	rte_wmb();
+	iowrite32(head_idx, &wq->ctrl->posted_index);
+ done:
+	wq->ring.desc_avail = wq_desc_avail;
+	wq->head_idx = head_idx;
 
-	enic_cleanup_wq(enic, wq);
 	return index;
 }
+
+