@@ -474,7 +474,7 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
* will cause performance drop to get into this context.
*/
if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
- RTE_ETH_RX_OFFLOAD_RSS_HASH) {
+ (RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_VLAN)) {
/* load bottom half of every 32B desc */
const __m128i raw_desc_bh7 =
_mm_load_si128
@@ -529,33 +529,112 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
* to shift the 32b RSS hash value to the
* highest 32b of each 128b before mask
*/
- __m256i rss_hash6_7 =
- _mm256_slli_epi64(raw_desc_bh6_7, 32);
- __m256i rss_hash4_5 =
- _mm256_slli_epi64(raw_desc_bh4_5, 32);
- __m256i rss_hash2_3 =
- _mm256_slli_epi64(raw_desc_bh2_3, 32);
- __m256i rss_hash0_1 =
- _mm256_slli_epi64(raw_desc_bh0_1, 32);
-
- __m256i rss_hash_msk =
- _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
- 0xFFFFFFFF, 0, 0, 0);
-
- rss_hash6_7 = _mm256_and_si256
- (rss_hash6_7, rss_hash_msk);
- rss_hash4_5 = _mm256_and_si256
- (rss_hash4_5, rss_hash_msk);
- rss_hash2_3 = _mm256_and_si256
- (rss_hash2_3, rss_hash_msk);
- rss_hash0_1 = _mm256_and_si256
- (rss_hash0_1, rss_hash_msk);
-
- mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
- mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
- mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
- mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
- } /* if() on RSS hash parsing */
+ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+ RTE_ETH_RX_OFFLOAD_RSS_HASH) {
+ __m256i rss_hash6_7 =
+ _mm256_slli_epi64(raw_desc_bh6_7, 32);
+ __m256i rss_hash4_5 =
+ _mm256_slli_epi64(raw_desc_bh4_5, 32);
+ __m256i rss_hash2_3 =
+ _mm256_slli_epi64(raw_desc_bh2_3, 32);
+ __m256i rss_hash0_1 =
+ _mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+ __m256i rss_hash_msk =
+ _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+ 0xFFFFFFFF, 0, 0, 0);
+
+ rss_hash6_7 = _mm256_and_si256
+ (rss_hash6_7, rss_hash_msk);
+ rss_hash4_5 = _mm256_and_si256
+ (rss_hash4_5, rss_hash_msk);
+ rss_hash2_3 = _mm256_and_si256
+ (rss_hash2_3, rss_hash_msk);
+ rss_hash0_1 = _mm256_and_si256
+ (rss_hash0_1, rss_hash_msk);
+
+ mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+ mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+ mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+ mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+ } /* if() on RSS hash parsing */
+
+ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+ RTE_ETH_RX_OFFLOAD_VLAN) {
+ /* merge the status/error-1 bits into one register */
+ const __m256i status1_4_7 =
+ _mm256_unpacklo_epi32(raw_desc_bh6_7,
+ raw_desc_bh4_5);
+ const __m256i status1_0_3 =
+ _mm256_unpacklo_epi32(raw_desc_bh2_3,
+ raw_desc_bh0_1);
+
+ const __m256i status1_0_7 =
+ _mm256_unpacklo_epi64(status1_4_7,
+ status1_0_3);
+
+ const __m256i l2tag2p_flag_mask =
+ _mm256_set1_epi32(1 << 11);
+
+ __m256i l2tag2p_flag_bits =
+ _mm256_and_si256
+ (status1_0_7, l2tag2p_flag_mask);
+
+ l2tag2p_flag_bits =
+ _mm256_srli_epi32(l2tag2p_flag_bits,
+ 11);
+
+ __m256i vlan_flags = _mm256_setzero_si256();
+ const __m256i l2tag2_flags_shuf =
+ _mm256_set_epi8(0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ /* end up 128-bits */
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0,
+ RTE_MBUF_F_RX_VLAN |
+ RTE_MBUF_F_RX_VLAN_STRIPPED,
+ 0);
+ vlan_flags =
+ _mm256_shuffle_epi8(l2tag2_flags_shuf,
+ l2tag2p_flag_bits);
+
+ /* merge with vlan_flags */
+ mbuf_flags = _mm256_or_si256
+ (mbuf_flags, vlan_flags);
+
+ /* L2TAG2_2 */
+ __m256i vlan_tci6_7 =
+ _mm256_slli_si256(raw_desc_bh6_7, 4);
+ __m256i vlan_tci4_5 =
+ _mm256_slli_si256(raw_desc_bh4_5, 4);
+ __m256i vlan_tci2_3 =
+ _mm256_slli_si256(raw_desc_bh2_3, 4);
+ __m256i vlan_tci0_1 =
+ _mm256_slli_si256(raw_desc_bh0_1, 4);
+
+ const __m256i vlan_tci_msk =
+ _mm256_set_epi32(0, 0xFFFF0000, 0, 0,
+ 0, 0xFFFF0000, 0, 0);
+
+ vlan_tci6_7 = _mm256_and_si256
+ (vlan_tci6_7, vlan_tci_msk);
+ vlan_tci4_5 = _mm256_and_si256
+ (vlan_tci4_5, vlan_tci_msk);
+ vlan_tci2_3 = _mm256_and_si256
+ (vlan_tci2_3, vlan_tci_msk);
+ vlan_tci0_1 = _mm256_and_si256
+ (vlan_tci0_1, vlan_tci_msk);
+
+ mb6_7 = _mm256_or_si256(mb6_7, vlan_tci6_7);
+ mb4_5 = _mm256_or_si256(mb4_5, vlan_tci4_5);
+ mb2_3 = _mm256_or_si256(mb2_3, vlan_tci2_3);
+ mb0_1 = _mm256_or_si256(mb0_1, vlan_tci0_1);
+ }
+ }
#endif
}
@@ -585,7 +585,7 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
* will cause performance drop to get into this context.
*/
if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
- RTE_ETH_RX_OFFLOAD_RSS_HASH) {
+ (RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_VLAN)) {
/* load bottom half of every 32B desc */
const __m128i raw_desc_bh7 =
_mm_load_si128
@@ -640,33 +640,131 @@ _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
* to shift the 32b RSS hash value to the
* highest 32b of each 128b before mask
*/
- __m256i rss_hash6_7 =
- _mm256_slli_epi64(raw_desc_bh6_7, 32);
- __m256i rss_hash4_5 =
- _mm256_slli_epi64(raw_desc_bh4_5, 32);
- __m256i rss_hash2_3 =
- _mm256_slli_epi64(raw_desc_bh2_3, 32);
- __m256i rss_hash0_1 =
- _mm256_slli_epi64(raw_desc_bh0_1, 32);
-
- __m256i rss_hash_msk =
- _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
- 0xFFFFFFFF, 0, 0, 0);
-
- rss_hash6_7 = _mm256_and_si256
- (rss_hash6_7, rss_hash_msk);
- rss_hash4_5 = _mm256_and_si256
- (rss_hash4_5, rss_hash_msk);
- rss_hash2_3 = _mm256_and_si256
- (rss_hash2_3, rss_hash_msk);
- rss_hash0_1 = _mm256_and_si256
- (rss_hash0_1, rss_hash_msk);
-
- mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
- mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
- mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
- mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
- } /* if() on RSS hash parsing */
+ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+ RTE_ETH_RX_OFFLOAD_RSS_HASH) {
+ __m256i rss_hash6_7 =
+ _mm256_slli_epi64(raw_desc_bh6_7, 32);
+ __m256i rss_hash4_5 =
+ _mm256_slli_epi64(raw_desc_bh4_5, 32);
+ __m256i rss_hash2_3 =
+ _mm256_slli_epi64(raw_desc_bh2_3, 32);
+ __m256i rss_hash0_1 =
+ _mm256_slli_epi64(raw_desc_bh0_1, 32);
+
+ __m256i rss_hash_msk =
+ _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0,
+ 0xFFFFFFFF, 0, 0, 0);
+
+ rss_hash6_7 = _mm256_and_si256
+ (rss_hash6_7, rss_hash_msk);
+ rss_hash4_5 = _mm256_and_si256
+ (rss_hash4_5, rss_hash_msk);
+ rss_hash2_3 = _mm256_and_si256
+ (rss_hash2_3, rss_hash_msk);
+ rss_hash0_1 = _mm256_and_si256
+ (rss_hash0_1, rss_hash_msk);
+
+ mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7);
+ mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5);
+ mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3);
+ mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1);
+ } /* if() on RSS hash parsing */
+
+ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+ RTE_ETH_RX_OFFLOAD_VLAN) {
+ /* merge the status/error-1 bits into one register */
+ const __m256i status1_4_7 =
+ _mm256_unpacklo_epi32
+ (raw_desc_bh6_7,
+ raw_desc_bh4_5);
+ const __m256i status1_0_3 =
+ _mm256_unpacklo_epi32
+ (raw_desc_bh2_3,
+ raw_desc_bh0_1);
+
+ const __m256i status1_0_7 =
+ _mm256_unpacklo_epi64
+ (status1_4_7, status1_0_3);
+
+ const __m256i l2tag2p_flag_mask =
+ _mm256_set1_epi32
+ (1 << 11);
+
+ __m256i l2tag2p_flag_bits =
+ _mm256_and_si256
+ (status1_0_7,
+ l2tag2p_flag_mask);
+
+ l2tag2p_flag_bits =
+ _mm256_srli_epi32
+ (l2tag2p_flag_bits,
+ 11);
+ const __m256i l2tag2_flags_shuf =
+ _mm256_set_epi8
+ (0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ /* end up 128-bits */
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0,
+ RTE_MBUF_F_RX_VLAN |
+ RTE_MBUF_F_RX_VLAN_STRIPPED,
+ 0);
+ __m256i vlan_flags =
+ _mm256_shuffle_epi8
+ (l2tag2_flags_shuf,
+ l2tag2p_flag_bits);
+
+ /* merge with vlan_flags */
+ mbuf_flags = _mm256_or_si256
+ (mbuf_flags,
+ vlan_flags);
+
+ /* L2TAG2_2 */
+ __m256i vlan_tci6_7 =
+ _mm256_slli_si256
+ (raw_desc_bh6_7, 4);
+ __m256i vlan_tci4_5 =
+ _mm256_slli_si256
+ (raw_desc_bh4_5, 4);
+ __m256i vlan_tci2_3 =
+ _mm256_slli_si256
+ (raw_desc_bh2_3, 4);
+ __m256i vlan_tci0_1 =
+ _mm256_slli_si256
+ (raw_desc_bh0_1, 4);
+
+ const __m256i vlan_tci_msk =
+ _mm256_set_epi32
+ (0, 0xFFFF0000, 0, 0,
+ 0, 0xFFFF0000, 0, 0);
+
+ vlan_tci6_7 = _mm256_and_si256
+ (vlan_tci6_7,
+ vlan_tci_msk);
+ vlan_tci4_5 = _mm256_and_si256
+ (vlan_tci4_5,
+ vlan_tci_msk);
+ vlan_tci2_3 = _mm256_and_si256
+ (vlan_tci2_3,
+ vlan_tci_msk);
+ vlan_tci0_1 = _mm256_and_si256
+ (vlan_tci0_1,
+ vlan_tci_msk);
+
+ mb6_7 = _mm256_or_si256
+ (mb6_7, vlan_tci6_7);
+ mb4_5 = _mm256_or_si256
+ (mb4_5, vlan_tci4_5);
+ mb2_3 = _mm256_or_si256
+ (mb2_3, vlan_tci2_3);
+ mb0_1 = _mm256_or_si256
+ (mb0_1, vlan_tci0_1);
+ }
+ }
#endif
}
@@ -100,9 +100,15 @@ ice_rxq_rearm(struct ice_rx_queue *rxq)
ICE_PCI_REG_WC_WRITE(rxq->qrx_tail, rx_id);
}
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+static inline void
+ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4], __m128i descs_bh[4],
+ struct rte_mbuf **rx_pkts)
+#else
static inline void
ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
struct rte_mbuf **rx_pkts)
+#endif
{
const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer);
__m128i rearm0, rearm1, rearm2, rearm3;
@@ -214,6 +220,38 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
/* merge the flags */
flags = _mm_or_si128(flags, rss_vlan);
+ #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+ RTE_ETH_RX_OFFLOAD_VLAN) {
+ const __m128i l2tag2_mask =
+ _mm_set1_epi32(1 << 11);
+ const __m128i vlan_tci0_1 =
+ _mm_unpacklo_epi32(descs_bh[0], descs_bh[1]);
+ const __m128i vlan_tci2_3 =
+ _mm_unpacklo_epi32(descs_bh[2], descs_bh[3]);
+ const __m128i vlan_tci0_3 =
+ _mm_unpacklo_epi64(vlan_tci0_1, vlan_tci2_3);
+
+ __m128i vlan_bits = _mm_and_si128(vlan_tci0_3, l2tag2_mask);
+
+ vlan_bits = _mm_srli_epi32(vlan_bits, 11);
+
+ const __m128i vlan_flags_shuf =
+ _mm_set_epi8(0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0,
+ RTE_MBUF_F_RX_VLAN |
+ RTE_MBUF_F_RX_VLAN_STRIPPED,
+ 0);
+
+ const __m128i vlan_flags = _mm_shuffle_epi8(vlan_flags_shuf, vlan_bits);
+
+ /* merge with vlan_flags */
+ flags = _mm_or_si128(flags, vlan_flags);
+ }
+#endif
+
if (rxq->fdir_enabled) {
const __m128i fdir_id0_1 =
_mm_unpackhi_epi32(descs[0], descs[1]);
@@ -405,6 +443,9 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
pos += ICE_DESCS_PER_LOOP,
rxdp += ICE_DESCS_PER_LOOP) {
__m128i descs[ICE_DESCS_PER_LOOP];
+ #ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
+ __m128i descs_bh[ICE_DESCS_PER_LOOP];
+ #endif
__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;
__m128i staterr, sterr_tmp1, sterr_tmp2;
/* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */
@@ -463,8 +504,6 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
/* C.1 4=>2 filter staterr info only */
sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]);
- ice_rx_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
-
/* D.2 pkt 3,4 set in_port/nb_seg and remove crc */
pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust);
pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);
@@ -479,21 +518,21 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
* will cause performance drop to get into this context.
*/
if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
- RTE_ETH_RX_OFFLOAD_RSS_HASH) {
+ (RTE_ETH_RX_OFFLOAD_RSS_HASH | RTE_ETH_RX_OFFLOAD_VLAN)) {
/* load bottom half of every 32B desc */
- const __m128i raw_desc_bh3 =
+ descs_bh[3] =
_mm_load_si128
((void *)(&rxdp[3].wb.status_error1));
rte_compiler_barrier();
- const __m128i raw_desc_bh2 =
+ descs_bh[2] =
_mm_load_si128
((void *)(&rxdp[2].wb.status_error1));
rte_compiler_barrier();
- const __m128i raw_desc_bh1 =
+ descs_bh[1] =
_mm_load_si128
((void *)(&rxdp[1].wb.status_error1));
rte_compiler_barrier();
- const __m128i raw_desc_bh0 =
+ descs_bh[0] =
_mm_load_si128
((void *)(&rxdp[0].wb.status_error1));
@@ -501,32 +540,59 @@ _ice_recv_raw_pkts_vec(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
* to shift the 32b RSS hash value to the
* highest 32b of each 128b before mask
*/
- __m128i rss_hash3 =
- _mm_slli_epi64(raw_desc_bh3, 32);
- __m128i rss_hash2 =
- _mm_slli_epi64(raw_desc_bh2, 32);
- __m128i rss_hash1 =
- _mm_slli_epi64(raw_desc_bh1, 32);
- __m128i rss_hash0 =
- _mm_slli_epi64(raw_desc_bh0, 32);
-
- __m128i rss_hash_msk =
- _mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
-
- rss_hash3 = _mm_and_si128
- (rss_hash3, rss_hash_msk);
- rss_hash2 = _mm_and_si128
- (rss_hash2, rss_hash_msk);
- rss_hash1 = _mm_and_si128
- (rss_hash1, rss_hash_msk);
- rss_hash0 = _mm_and_si128
- (rss_hash0, rss_hash_msk);
-
- pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
- pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
- pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
- pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
- } /* if() on RSS hash parsing */
+ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+ RTE_ETH_RX_OFFLOAD_RSS_HASH) {
+ __m128i rss_hash3 =
+ _mm_slli_epi64(descs_bh[3], 32);
+ __m128i rss_hash2 =
+ _mm_slli_epi64(descs_bh[2], 32);
+ __m128i rss_hash1 =
+ _mm_slli_epi64(descs_bh[1], 32);
+ __m128i rss_hash0 =
+ _mm_slli_epi64(descs_bh[0], 32);
+
+ __m128i rss_hash_msk =
+ _mm_set_epi32(0xFFFFFFFF, 0, 0, 0);
+
+ rss_hash3 = _mm_and_si128
+ (rss_hash3, rss_hash_msk);
+ rss_hash2 = _mm_and_si128
+ (rss_hash2, rss_hash_msk);
+ rss_hash1 = _mm_and_si128
+ (rss_hash1, rss_hash_msk);
+ rss_hash0 = _mm_and_si128
+ (rss_hash0, rss_hash_msk);
+
+ pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3);
+ pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2);
+ pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1);
+ pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0);
+ } /* if() on RSS hash parsing */
+
+ if (rxq->vsi->adapter->pf.dev_data->dev_conf.rxmode.offloads &
+ RTE_ETH_RX_OFFLOAD_VLAN) {
+ /* L2TAG2_2 */
+ __m128i vlan_tci3 = _mm_slli_si128(descs_bh[3], 4);
+ __m128i vlan_tci2 = _mm_slli_si128(descs_bh[2], 4);
+ __m128i vlan_tci1 = _mm_slli_si128(descs_bh[1], 4);
+ __m128i vlan_tci0 = _mm_slli_si128(descs_bh[0], 4);
+
+ const __m128i vlan_tci_msk = _mm_set_epi32(0, 0xFFFF0000, 0, 0);
+
+ vlan_tci3 = _mm_and_si128(vlan_tci3, vlan_tci_msk);
+ vlan_tci2 = _mm_and_si128(vlan_tci2, vlan_tci_msk);
+ vlan_tci1 = _mm_and_si128(vlan_tci1, vlan_tci_msk);
+ vlan_tci0 = _mm_and_si128(vlan_tci0, vlan_tci_msk);
+
+ pkt_mb3 = _mm_or_si128(pkt_mb3, vlan_tci3);
+ pkt_mb2 = _mm_or_si128(pkt_mb2, vlan_tci2);
+ pkt_mb1 = _mm_or_si128(pkt_mb1, vlan_tci1);
+ pkt_mb0 = _mm_or_si128(pkt_mb0, vlan_tci0);
+ }
+ ice_rx_desc_to_olflags_v(rxq, descs, descs_bh, &rx_pkts[pos]);
+ }
+#else
+ ice_rx_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]);
#endif
/* C.2 get 4 pkts staterr value */