[2/5] net/hns3: fix the order of NEON Rx code

Message ID 20230711102448.11627-3-liudongdong3@huawei.com (mailing list archive)
State Accepted, archived
Delegated to: Ferruh Yigit
Headers
Series net/hns3: some performance optimizations |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Dongdong Liu July 11, 2023, 10:24 a.m. UTC
From: Huisong Li <lihuisong@huawei.com>

This patch reorders the order of the NEON Rx for better maintenance
and easier understanding.

Fixes: a3d4f4d291d7 ("net/hns3: support NEON Rx")
Cc: stable@dpdk.org

Signed-off-by: Huisong Li <lihuisong@huawei.com>
Signed-off-by: Dongdong Liu <liudongdong3@huawei.com>
---
 drivers/net/hns3/hns3_rxtx_vec_neon.h | 78 +++++++++++----------------
 1 file changed, 31 insertions(+), 47 deletions(-)
  

Patch

diff --git a/drivers/net/hns3/hns3_rxtx_vec_neon.h b/drivers/net/hns3/hns3_rxtx_vec_neon.h
index 564d831a48..0dc6b9f0a2 100644
--- a/drivers/net/hns3/hns3_rxtx_vec_neon.h
+++ b/drivers/net/hns3/hns3_rxtx_vec_neon.h
@@ -180,19 +180,12 @@  hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);
 		bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);
 
-		/* load 2 mbuf pointer */
-		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
-
 		bd_vld = vshl_n_u16(bd_vld,
 				    HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);
 		bd_vld = vreinterpret_u16_s16(
 				vshr_n_s16(vreinterpret_s16_u16(bd_vld),
 					   HNS3_UINT16_BIT - 1));
 		stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);
-
-		/* load 2 mbuf pointer again */
-		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
-
 		if (likely(stat == 0))
 			bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
 		else
@@ -200,20 +193,20 @@  hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		if (bd_valid_num == 0)
 			break;
 
-		/* use offset to control below data load oper ordering */
-		offset = rxq->offset_table[bd_valid_num];
+		/* load 4 mbuf pointer */
+		mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
+		mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
 
-		/* store 2 mbuf pointer into rx_pkts */
+		/* store 4 mbuf pointer into rx_pkts */
 		vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
+		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
 
-		/* read first two descs */
+		/* use offset to control below data load oper ordering */
+		offset = rxq->offset_table[bd_valid_num];
+
+		/* read 4 descs */
 		descs[0] = vld2q_u64((uint64_t *)(rxdp + offset));
 		descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));
-
-		/* store 2 mbuf pointer into rx_pkts again */
-		vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
-
-		/* read remains two descs */
 		descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));
 		descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));
 
@@ -221,56 +214,47 @@  hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
 		pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);
 		pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);
 		pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);
+		pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
+		pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
+		pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
+		pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
 
-		/* pkt 1,2 convert format from desc to pktmbuf */
+		/* 4 packets convert format from desc to pktmbuf */
 		pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk);
 		pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk);
+		pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
+		pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
 
-		/* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */
-		*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
-			rxq->mbuf_initializer;
-		*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
-			rxq->mbuf_initializer;
-
-		/* pkt 1,2 remove crc */
+		/* 4 packets remove crc */
 		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
 		pkt_mb1 = vreinterpretq_u8_u16(tmp);
 		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
 		pkt_mb2 = vreinterpretq_u8_u16(tmp);
+		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
+		pkt_mb3 = vreinterpretq_u8_u16(tmp);
+		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
+		pkt_mb4 = vreinterpretq_u8_u16(tmp);
 
-		pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
-		pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
-		pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
-		pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
-
-		/* pkt 3,4 convert format from desc to pktmbuf */
-		pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
-		pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
-
-		/* pkt 1,2 save to rx_pkts mbuf */
+		/* save packet info to rx_pkts mbuf */
 		vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,
 			 pkt_mb1);
 		vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,
 			 pkt_mb2);
+		vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
+			 pkt_mb3);
+		vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
+			 pkt_mb4);
 
-		/* pkt 3,4 remove crc */
-		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
-		pkt_mb3 = vreinterpretq_u8_u16(tmp);
-		tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
-		pkt_mb4 = vreinterpretq_u8_u16(tmp);
-
-		/* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */
+		/* store the first 8 bytes of packets mbuf's rearm_data */
+		*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
+			rxq->mbuf_initializer;
+		*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
+			rxq->mbuf_initializer;
 		*(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =
 			rxq->mbuf_initializer;
 		*(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =
 			rxq->mbuf_initializer;
 
-		/* pkt 3,4 save to rx_pkts mbuf */
-		vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
-			 pkt_mb3);
-		vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
-			 pkt_mb4);
-
 		rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);
 
 		parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],