Message ID | 20201022185051.183164-1-lance.richardson@broadcom.com (mailing list archive) |
---|---|
State | Accepted, archived |
Delegated to: | Ajit Khaparde |
Headers | show |
Series | net/bnxt: use shorter SIMD initializers | expand |
Context | Check | Description |
---|---|---|
ci/iol-mellanox-Performance | success | Performance Testing PASS |
ci/travis-robot | success | Travis build: passed |
ci/Intel-compilation | success | Compilation OK |
ci/iol-testing | success | Testing PASS |
ci/iol-intel-Performance | success | Performance Testing PASS |
ci/iol-intel-Functional | success | Functional Testing PASS |
ci/iol-broadcom-Functional | success | Functional Testing PASS |
ci/iol-broadcom-Performance | success | Performance Testing PASS |
ci/checkpatch | success | coding style OK |
On Thu, Oct 22, 2020 at 11:51 AM Lance Richardson <lance.richardson@broadcom.com> wrote: > > Make SIMD initialization code less verbose by using appropriate > intrinsics when all lanes of a vector are initialized to the > same value. > > Signed-off-by: Lance Richardson <lance.richardson@broadcom.com> > Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com> Patch applied to dpdk-next-net-brcm. > --- > drivers/net/bnxt/bnxt_rxtx_vec_neon.c | 58 +++++++-------------------- > drivers/net/bnxt/bnxt_rxtx_vec_sse.c | 37 +++++------------ > 2 files changed, 23 insertions(+), 72 deletions(-) > > diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > index f49e29ccb..de1d96570 100644 > --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c > @@ -67,40 +67,17 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], > 0xFF, 0xFF, /* vlan_tci (zeroes) */ > 12, 13, 14, 15 /* rss hash */ > }; > - const uint32x4_t flags_type_mask = { > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK > - }; > - const uint32x4_t flags2_mask1 = { > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC > - }; > - const uint32x4_t flags2_mask2 = { > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE > - }; > - const uint32x4_t rss_mask = { > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID > - }; > - const uint32x4_t flags2_index_mask = { > - 0x1F, 0x1F, 0x1F, 0x1F > - }; > - const uint32x4_t flags2_error_mask = { > - 0xF, 0xF, 0xF, 0xF > - }; > + const uint32x4_t flags_type_mask = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); > + const uint32x4_t flags2_mask1 = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); > + const uint32x4_t flags2_mask2 = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE); > + const uint32x4_t rss_mask = > + vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID); > + const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F); > + const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F); > uint32x4_t flags_type, flags2, index, errors, rss_flags; > uint32x4_t tmp, ptype_idx; > uint64x2_t t0, t1; > @@ -180,20 +157,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, > uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size; > struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; > uint64_t valid, desc_valid_mask = ~0UL; > - const uint32x4_t info3_v_mask = { > - CMPL_BASE_V, CMPL_BASE_V, > - CMPL_BASE_V, CMPL_BASE_V > - }; > + const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V); > uint32_t raw_cons = cpr->cp_raw_cons; > uint32_t cons, mbcons; > int nb_rx_pkts = 0; > const uint64x2_t mb_init = {rxq->mbuf_initializer, 0}; > - const uint32x4_t valid_target = { > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size) > - }; > + const uint32x4_t valid_target = > + vdupq_n_u32(!!(raw_cons & cp_ring_size)); > int i; > > /* If Rx Q was stopped return */ > diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > index e4ba63551..e12bf8bb7 100644 > --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c > @@ -63,29 +63,14 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], > 0xFF, 0xFF, 3, 2, /* pkt_len */ > 0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */ > const __m128i flags_type_mask = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK, > - RX_PKT_CMPL_FLAGS_ITYPE_MASK); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); > const __m128i flags2_mask1 = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, > - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | > + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); > const __m128i flags2_mask2 = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE, > - RX_PKT_CMPL_FLAGS2_IP_TYPE); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE); > const __m128i rss_mask = > - _mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID, > - RX_PKT_CMPL_FLAGS_RSS_VALID); > + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); > __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; > __m128i ptype_idx; > uint32_t ol_flags; > @@ -114,10 +99,10 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], > t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); > > /* Compute ol_flags and checksum error indexes for four packets. */ > - flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F)); > + flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); > > errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); > - errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF)); > + errors = _mm_and_si128(errors, _mm_set1_epi32(0xF)); > errors = _mm_and_si128(errors, flags2); > > index = _mm_andnot_si128(errors, flags2); > @@ -165,16 +150,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, > uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size; > struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; > uint64_t valid, desc_valid_mask = ~0ULL; > - const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V, > - CMPL_BASE_V, CMPL_BASE_V); > + const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V); > uint32_t raw_cons = cpr->cp_raw_cons; > uint32_t cons, mbcons; > int nb_rx_pkts = 0; > const __m128i valid_target = > - _mm_set_epi32(!!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size), > - !!(raw_cons & cp_ring_size)); > + _mm_set1_epi32(!!(raw_cons & cp_ring_size)); > int i; > > /* If Rx Q was stopped return */ > -- > 2.25.1 >
diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c index f49e29ccb..de1d96570 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_neon.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_neon.c @@ -67,40 +67,17 @@ descs_to_mbufs(uint32x4_t mm_rxcmp[4], uint32x4_t mm_rxcmp1[4], 0xFF, 0xFF, /* vlan_tci (zeroes) */ 12, 13, 14, 15 /* rss hash */ }; - const uint32x4_t flags_type_mask = { - RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK - }; - const uint32x4_t flags2_mask1 = { - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC - }; - const uint32x4_t flags2_mask2 = { - RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE - }; - const uint32x4_t rss_mask = { - RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID - }; - const uint32x4_t flags2_index_mask = { - 0x1F, 0x1F, 0x1F, 0x1F - }; - const uint32x4_t flags2_error_mask = { - 0xF, 0xF, 0xF, 0xF - }; + const uint32x4_t flags_type_mask = + vdupq_n_u32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); + const uint32x4_t flags2_mask1 = + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); + const uint32x4_t flags2_mask2 = + vdupq_n_u32(RX_PKT_CMPL_FLAGS2_IP_TYPE); + const uint32x4_t rss_mask = + vdupq_n_u32(RX_PKT_CMPL_FLAGS_RSS_VALID); + const uint32x4_t flags2_index_mask = vdupq_n_u32(0x1F); + const uint32x4_t flags2_error_mask = vdupq_n_u32(0x0F); uint32x4_t flags_type, flags2, index, errors, rss_flags; uint32x4_t tmp, ptype_idx; uint64x2_t t0, t1; @@ -180,20 +157,13 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size; struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; uint64_t valid, desc_valid_mask = ~0UL; - const uint32x4_t info3_v_mask = { - CMPL_BASE_V, CMPL_BASE_V, - CMPL_BASE_V, CMPL_BASE_V - }; + const uint32x4_t info3_v_mask = vdupq_n_u32(CMPL_BASE_V); uint32_t raw_cons = cpr->cp_raw_cons; uint32_t cons, mbcons; int nb_rx_pkts = 0; const uint64x2_t mb_init = {rxq->mbuf_initializer, 0}; - const uint32x4_t valid_target = { - !!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size) - }; + const uint32x4_t valid_target = + vdupq_n_u32(!!(raw_cons & cp_ring_size)); int i; /* If Rx Q was stopped return */ diff --git a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c index e4ba63551..e12bf8bb7 100644 --- a/drivers/net/bnxt/bnxt_rxtx_vec_sse.c +++ b/drivers/net/bnxt/bnxt_rxtx_vec_sse.c @@ -63,29 +63,14 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], 0xFF, 0xFF, 3, 2, /* pkt_len */ 0xFF, 0xFF, 0xFF, 0xFF); /* pkt_type (zeroes) */ const __m128i flags_type_mask = - _mm_set_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK, - RX_PKT_CMPL_FLAGS_ITYPE_MASK); + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_ITYPE_MASK); const __m128i flags2_mask1 = - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC, - RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | - RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_META_FORMAT_VLAN | + RX_PKT_CMPL_FLAGS2_T_IP_CS_CALC); const __m128i flags2_mask2 = - _mm_set_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE, - RX_PKT_CMPL_FLAGS2_IP_TYPE); + _mm_set1_epi32(RX_PKT_CMPL_FLAGS2_IP_TYPE); const __m128i rss_mask = - _mm_set_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID, - RX_PKT_CMPL_FLAGS_RSS_VALID); + _mm_set1_epi32(RX_PKT_CMPL_FLAGS_RSS_VALID); __m128i t0, t1, flags_type, flags2, index, errors, rss_flags; __m128i ptype_idx; uint32_t ol_flags; @@ -114,10 +99,10 @@ descs_to_mbufs(__m128i mm_rxcmp[4], __m128i mm_rxcmp1[4], t1 = _mm_unpackhi_epi32(mm_rxcmp1[2], mm_rxcmp1[3]); /* Compute ol_flags and checksum error indexes for four packets. */ - flags2 = _mm_and_si128(flags2, _mm_set_epi32(0x1F, 0x1F, 0x1F, 0x1F)); + flags2 = _mm_and_si128(flags2, _mm_set1_epi32(0x1F)); errors = _mm_srli_epi32(_mm_unpacklo_epi64(t0, t1), 4); - errors = _mm_and_si128(errors, _mm_set_epi32(0xF, 0xF, 0xF, 0xF)); + errors = _mm_and_si128(errors, _mm_set1_epi32(0xF)); errors = _mm_and_si128(errors, flags2); index = _mm_andnot_si128(errors, flags2); @@ -165,16 +150,12 @@ bnxt_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t rx_ring_size = rxr->rx_ring_struct->ring_size; struct cmpl_base *cp_desc_ring = cpr->cp_desc_ring; uint64_t valid, desc_valid_mask = ~0ULL; - const __m128i info3_v_mask = _mm_set_epi32(CMPL_BASE_V, CMPL_BASE_V, - CMPL_BASE_V, CMPL_BASE_V); + const __m128i info3_v_mask = _mm_set1_epi32(CMPL_BASE_V); uint32_t raw_cons = cpr->cp_raw_cons; uint32_t cons, mbcons; int nb_rx_pkts = 0; const __m128i valid_target = - _mm_set_epi32(!!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size), - !!(raw_cons & cp_ring_size)); + _mm_set1_epi32(!!(raw_cons & cp_ring_size)); int i; /* If Rx Q was stopped return */