diff mbox series

[v7] net/ice: fix outer checksum on cvl unknown

Message ID 20201123083455.87600-1-murphyx.yang@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Qi Zhang
Headers show
Series [v7] net/ice: fix outer checksum on cvl unknown | expand

Checks

Context Check Description
ci/travis-robot success Travis build: passed
ci/iol-testing warning Testing issues
ci/Intel-compilation success Compilation OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

Murphy Yang Nov. 23, 2020, 8:34 a.m. UTC
When received tunneled packets, the testpmd output log shows 'ol_flags'
value always is 'PKT_RX_OUTER_L4_CKSUM_UNKNOWN', but expected value is
'PKT_RX_OUTER_L4_CKSUM_GOOD' or 'PKT_RX_OUTER_L4_CKSUM_BAD'.

Add the 'PKT_RX_OUTER_L4_CKSUM_GOOD' and 'PKT_RX_OUTER_L4_CKSUM_BAD' to
'flags' for normal path, 'l3_l4_flags_shuf' for AVX2 and AVX512 vector
path and 'cksum_flags' for SSE vector path to ensure that the 'ol_flags'
can match correct flags.

Fixes: dbf3c0e77a22 ("net/ice: handle Rx flex descriptor")
Fixes: 4ab7dbb0a0f6 ("net/ice: switch to Rx flexible descriptor in AVX path")
Fixes: ece1f8a8f1c8 ("net/ice: switch to flexible descriptor in SSE path")

Signed-off-by: Murphy Yang <murphyx.yang@intel.com>
---
v7:
- fix compile error with default target on SSE vector path.
v6:
- rename variable name.
- update comments.
v5:
- fix outer L4 checksum mask for vector path.
v4:
- cover AVX512 vector path.
v3:
- add PKT_RX_OUTER_L4_CKSUM_GOOD in AVX2 and SSE vector path.
- rename variable name.
v2:
- cover AVX2 and SSE vector path
 drivers/net/ice/ice_rxtx.c            |   5 ++
 drivers/net/ice/ice_rxtx_vec_avx2.c   | 118 +++++++++++++++++++-------
 drivers/net/ice/ice_rxtx_vec_avx512.c | 117 ++++++++++++++++++-------
 drivers/net/ice/ice_rxtx_vec_sse.c    |  78 ++++++++++++-----
 4 files changed, 233 insertions(+), 85 deletions(-)

Comments

Xie, WeiX Nov. 23, 2020, 8:56 a.m. UTC | #1
Tested-by:  Xie,WeiX < weix.xie@intel.com>

Regards,
Xie Wei


> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Murphy Yang
> Sent: Monday, November 23, 2020 4:35 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; Yang, SteveX <stevex.yang@intel.com>; Rong, Leyi
> <leyi.rong@intel.com>; Lu, Wenzhuo <wenzhuo.lu@intel.com>; Yang,
> MurphyX <murphyx.yang@intel.com>
> Subject: [dpdk-dev] [PATCH v7] net/ice: fix outer checksum on cvl unknown
> 
> When received tunneled packets, the testpmd output log shows 'ol_flags'
> value always is 'PKT_RX_OUTER_L4_CKSUM_UNKNOWN', but expected
> value is 'PKT_RX_OUTER_L4_CKSUM_GOOD' or
> 'PKT_RX_OUTER_L4_CKSUM_BAD'.
> 
> Add the 'PKT_RX_OUTER_L4_CKSUM_GOOD' and
> 'PKT_RX_OUTER_L4_CKSUM_BAD' to 'flags' for normal path,
> 'l3_l4_flags_shuf' for AVX2 and AVX512 vector path and 'cksum_flags' for SSE
> vector path to ensure that the 'ol_flags'
> can match correct flags.
> 
> Fixes: dbf3c0e77a22 ("net/ice: handle Rx flex descriptor")
> Fixes: 4ab7dbb0a0f6 ("net/ice: switch to Rx flexible descriptor in AVX path")
> Fixes: ece1f8a8f1c8 ("net/ice: switch to flexible descriptor in SSE path")
> 
> Signed-off-by: Murphy Yang <murphyx.yang@intel.com>
> ---
> v7:
> - fix compile error with default target on SSE vector path.
> v6:
> - rename variable name.
> - update comments.
> v5:
> - fix outer L4 checksum mask for vector path.
> v4:
> - cover AVX512 vector path.
> v3:
> - add PKT_RX_OUTER_L4_CKSUM_GOOD in AVX2 and SSE vector path.
> - rename variable name.
> v2:
> - cover AVX2 and SSE vector path
>  drivers/net/ice/ice_rxtx.c            |   5 ++
>  drivers/net/ice/ice_rxtx_vec_avx2.c   | 118 +++++++++++++++++++-------
>  drivers/net/ice/ice_rxtx_vec_avx512.c | 117 ++++++++++++++++++-------
>  drivers/net/ice/ice_rxtx_vec_sse.c    |  78 ++++++++++++-----
>  4 files changed, 233 insertions(+), 85 deletions(-)
> 
> diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index
> 5fbd68eafc..24a7caeb98 100644
> --- a/drivers/net/ice/ice_rxtx.c
> +++ b/drivers/net/ice/ice_rxtx.c
> @@ -1451,6 +1451,11 @@ ice_rxd_error_to_pkt_flags(uint16_t stat_err0)
>  	if (unlikely(stat_err0 & (1 <<
> ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
>  		flags |= PKT_RX_EIP_CKSUM_BAD;
> 
> +	if (unlikely(stat_err0 & (1 <<
> ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
> +		flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
> +	else
> +		flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
> +
>  	return flags;
>  }
> 
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c
> b/drivers/net/ice/ice_rxtx_vec_avx2.c
> index b72a9e7025..7838e17787 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx2.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
> @@ -251,43 +251,88 @@ _ice_recv_raw_pkts_vec_avx2(struct
> ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
>  	 * bit13 is for VLAN indication.
>  	 */
>  	const __m256i flags_mask =
> -		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
> +		 _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
>  	/**
>  	 * data to be shuffled by the result of the flags mask shifted by 4
>  	 * bits.  This gives use the l3_l4 flags.
>  	 */
> -	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0,
> 0,
> -			/* shift right 1 bit to make sure it not exceed 255 */
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			/* second 128-bits */
> -			0, 0, 0, 0, 0, 0, 0, 0,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1);
> +	const __m256i l3_l4_flags_shuf =
> +		_mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20
> |
> +		 PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
> +		  PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		/**
> +		 * second 128-bits
> +		 * shift right 20 bits to use the low two bits to indicate
> +		 * outer checksum status
> +		 * shift right 1 bit to make sure it not exceed 255
> +		 */
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1);
>  	const __m256i cksum_mask =
> -		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD |
> -				   PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_BAD |
> -				   PKT_RX_EIP_CKSUM_BAD);
> +		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
> +				   PKT_RX_L4_CKSUM_MASK |
> +				   PKT_RX_EIP_CKSUM_BAD |
> +				   PKT_RX_OUTER_L4_CKSUM_MASK);
>  	/**
>  	 * data to be shuffled by result of flag mask, shifted down 12.
>  	 * If RSS(bit12)/VLAN(bit13) are set,
> @@ -469,6 +514,15 @@ _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue
> *rxq, struct rte_mbuf **rx_pkts,
>  		__m256i l3_l4_flags =
> _mm256_shuffle_epi8(l3_l4_flags_shuf,
>  				_mm256_srli_epi32(flag_bits, 4));
>  		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
> +
> +		__m256i l4_outer_mask = _mm256_set1_epi32(0x6);
> +		__m256i l4_outer_flags =
> +				_mm256_and_si256(l3_l4_flags,
> l4_outer_mask);
> +		l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
> +
> +		__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
> +		l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
> +		l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
>  		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
>  		/* set rss and vlan flags */
>  		const __m256i rss_vlan_flag_bits =
> diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c
> b/drivers/net/ice/ice_rxtx_vec_avx512.c
> index df5d2be1e6..fd5d724329 100644
> --- a/drivers/net/ice/ice_rxtx_vec_avx512.c
> +++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
> @@ -230,43 +230,88 @@ _ice_recv_raw_pkts_vec_avx512(struct
> ice_rx_queue *rxq,
>  	 * bit13 is for VLAN indication.
>  	 */
>  	const __m256i flags_mask =
> -		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
> +		 _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
>  	/**
>  	 * data to be shuffled by the result of the flags mask shifted by 4
>  	 * bits.  This gives use the l3_l4 flags.
>  	 */
> -	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0,
> 0,
> -			/* shift right 1 bit to make sure it not exceed 255 */
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			/* 2nd 128-bits */
> -			0, 0, 0, 0, 0, 0, 0, 0,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1);
> +	const __m256i l3_l4_flags_shuf =
> +		_mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20
> |
> +		 PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
> +		  PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		/**
> +		 * second 128-bits
> +		 * shift right 20 bits to use the low two bits to indicate
> +		 * outer checksum status
> +		 * shift right 1 bit to make sure it not exceed 255
> +		 */
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD  |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1);
>  	const __m256i cksum_mask =
> -		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD |
> -				   PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_BAD |
> -				   PKT_RX_EIP_CKSUM_BAD);
> +		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
> +				   PKT_RX_L4_CKSUM_MASK |
> +				   PKT_RX_EIP_CKSUM_BAD |
> +				   PKT_RX_OUTER_L4_CKSUM_MASK);
>  	/**
>  	 * data to be shuffled by result of flag mask, shifted down 12.
>  	 * If RSS(bit12)/VLAN(bit13) are set,
> @@ -451,6 +496,14 @@ _ice_recv_raw_pkts_vec_avx512(struct
> ice_rx_queue *rxq,
>  		__m256i l3_l4_flags =
> _mm256_shuffle_epi8(l3_l4_flags_shuf,
>  				_mm256_srli_epi32(flag_bits, 4));
>  		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
> +		__m256i l4_outer_mask = _mm256_set1_epi32(0x6);
> +		__m256i l4_outer_flags =
> +				_mm256_and_si256(l3_l4_flags,
> l4_outer_mask);
> +		l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
> +
> +		__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
> +		l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
> +		l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
>  		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
>  		/* set rss and vlan flags */
>  		const __m256i rss_vlan_flag_bits =
> diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c
> b/drivers/net/ice/ice_rxtx_vec_sse.c
> index 626364719b..87e0c3db2e 100644
> --- a/drivers/net/ice/ice_rxtx_vec_sse.c
> +++ b/drivers/net/ice/ice_rxtx_vec_sse.c
> @@ -114,39 +114,67 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq,
> __m128i descs[4],
>  	 * bit12 for RSS indication.
>  	 * bit13 for VLAN indication.
>  	 */
> -	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
> -						0x3070, 0x3070);
> -
> +	const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0,
> +						0x30f0, 0x30f0);
>  	const __m128i cksum_mask =
> _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
>  						 PKT_RX_L4_CKSUM_MASK |
> +
> PKT_RX_OUTER_L4_CKSUM_MASK |
>  						 PKT_RX_EIP_CKSUM_BAD,
>  						 PKT_RX_IP_CKSUM_MASK |
>  						 PKT_RX_L4_CKSUM_MASK |
> +
> PKT_RX_OUTER_L4_CKSUM_MASK |
>  						 PKT_RX_EIP_CKSUM_BAD,
>  						 PKT_RX_IP_CKSUM_MASK |
>  						 PKT_RX_L4_CKSUM_MASK |
> +
> PKT_RX_OUTER_L4_CKSUM_MASK |
>  						 PKT_RX_EIP_CKSUM_BAD,
>  						 PKT_RX_IP_CKSUM_MASK |
>  						 PKT_RX_L4_CKSUM_MASK |
> +
> PKT_RX_OUTER_L4_CKSUM_MASK |
>  						 PKT_RX_EIP_CKSUM_BAD);
> 
>  	/* map the checksum, rss and vlan fields to the checksum, rss
>  	 * and vlan flag
>  	 */
> -	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
> -			/* shift right 1 bit to make sure it not exceed 255 */
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> -			 PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> -			(PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1);
> +	const __m128i cksum_flags =
> +		_mm_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> +		 PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
> +		  PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		/**
> +		 * shift right 20 bits to use the low two bits to indicate
> +		 * outer checksum status
> +		 * shift right 1 bit to make sure it not exceed 255
> +		 */
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_EIP_CKSUM_BAD |
> +		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >>
> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_BAD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_BAD) >> 1,
> +		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 |
> PKT_RX_L4_CKSUM_GOOD |
> +		 PKT_RX_IP_CKSUM_GOOD) >> 1);
> 
>  	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
>  			0, 0, 0, 0,
> @@ -166,6 +194,14 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq,
> __m128i descs[4],
>  	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
>  	/* then we shift left 1 bit */
>  	flags = _mm_slli_epi32(flags, 1);
> +
> +	__m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6);
> +	__m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask);
> +	l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20);
> +
> +	__m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6);
> +	__m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask);
> +	flags = _mm_or_si128(l3_l4_flags, l4_outer_flags);
>  	/* we need to mask out the reduntant bits introduced by RSS or
>  	 * VLAN fields.
>  	 */
> @@ -217,10 +253,10 @@ ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq,
> __m128i descs[4],
>  	 * appropriate flags means that we have to do a shift and blend for
>  	 * each mbuf before we do the write.
>  	 */
> -	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8),
> 0x10);
> -	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4),
> 0x10);
> -	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
> -	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4),
> 0x10);
> +	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8),
> 0x30);
> +	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4),
> 0x30);
> +	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30);
> +	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4),
> 0x30);
> 
>  	/* write the rearm data and the olflags in one write */
>  	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=
> --
> 2.17.1
diff mbox series

Patch

diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index 5fbd68eafc..24a7caeb98 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1451,6 +1451,11 @@  ice_rxd_error_to_pkt_flags(uint16_t stat_err0)
 	if (unlikely(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))
 		flags |= PKT_RX_EIP_CKSUM_BAD;
 
+	if (unlikely(stat_err0 & (1 << ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+		flags |= PKT_RX_OUTER_L4_CKSUM_BAD;
+	else
+		flags |= PKT_RX_OUTER_L4_CKSUM_GOOD;
+
 	return flags;
 }
 
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c b/drivers/net/ice/ice_rxtx_vec_avx2.c
index b72a9e7025..7838e17787 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx2.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -251,43 +251,88 @@  _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 	 * bit13 is for VLAN indication.
 	 */
 	const __m256i flags_mask =
-		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+		 _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
 	/**
 	 * data to be shuffled by the result of the flags mask shifted by 4
 	 * bits.  This gives use the l3_l4 flags.
 	 */
-	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-			/* shift right 1 bit to make sure it not exceed 255 */
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
-			/* second 128-bits */
-			0, 0, 0, 0, 0, 0, 0, 0,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i l3_l4_flags_shuf =
+		_mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+		 PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+		  PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		/**
+		 * second 128-bits
+		 * shift right 20 bits to use the low two bits to indicate
+		 * outer checksum status
+		 * shift right 1 bit to make sure it not exceed 255
+		 */
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1);
 	const __m256i cksum_mask =
-		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
-				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
-				   PKT_RX_EIP_CKSUM_BAD);
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
+				   PKT_RX_L4_CKSUM_MASK |
+				   PKT_RX_EIP_CKSUM_BAD |
+				   PKT_RX_OUTER_L4_CKSUM_MASK);
 	/**
 	 * data to be shuffled by result of flag mask, shifted down 12.
 	 * If RSS(bit12)/VLAN(bit13) are set,
@@ -469,6 +514,15 @@  _ice_recv_raw_pkts_vec_avx2(struct ice_rx_queue *rxq, struct rte_mbuf **rx_pkts,
 		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
 				_mm256_srli_epi32(flag_bits, 4));
 		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+
+		__m256i l4_outer_mask = _mm256_set1_epi32(0x6);
+		__m256i l4_outer_flags =
+				_mm256_and_si256(l3_l4_flags, l4_outer_mask);
+		l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
+
+		__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
+		l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
 		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
 		/* set rss and vlan flags */
 		const __m256i rss_vlan_flag_bits =
diff --git a/drivers/net/ice/ice_rxtx_vec_avx512.c b/drivers/net/ice/ice_rxtx_vec_avx512.c
index df5d2be1e6..fd5d724329 100644
--- a/drivers/net/ice/ice_rxtx_vec_avx512.c
+++ b/drivers/net/ice/ice_rxtx_vec_avx512.c
@@ -230,43 +230,88 @@  _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 	 * bit13 is for VLAN indication.
 	 */
 	const __m256i flags_mask =
-		 _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13));
+		 _mm256_set1_epi32((0xF << 4) | (1 << 12) | (1 << 13));
 	/**
 	 * data to be shuffled by the result of the flags mask shifted by 4
 	 * bits.  This gives use the l3_l4 flags.
 	 */
-	const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-			/* shift right 1 bit to make sure it not exceed 255 */
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
-			/* 2nd 128-bits */
-			0, 0, 0, 0, 0, 0, 0, 0,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m256i l3_l4_flags_shuf =
+		_mm256_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+		 PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+		  PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		/**
+		 * second 128-bits
+		 * shift right 20 bits to use the low two bits to indicate
+		 * outer checksum status
+		 * shift right 1 bit to make sure it not exceed 255
+		 */
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD  |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1);
 	const __m256i cksum_mask =
-		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
-				   PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD |
-				   PKT_RX_EIP_CKSUM_BAD);
+		 _mm256_set1_epi32(PKT_RX_IP_CKSUM_MASK |
+				   PKT_RX_L4_CKSUM_MASK |
+				   PKT_RX_EIP_CKSUM_BAD |
+				   PKT_RX_OUTER_L4_CKSUM_MASK);
 	/**
 	 * data to be shuffled by result of flag mask, shifted down 12.
 	 * If RSS(bit12)/VLAN(bit13) are set,
@@ -451,6 +496,14 @@  _ice_recv_raw_pkts_vec_avx512(struct ice_rx_queue *rxq,
 		__m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf,
 				_mm256_srli_epi32(flag_bits, 4));
 		l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1);
+		__m256i l4_outer_mask = _mm256_set1_epi32(0x6);
+		__m256i l4_outer_flags =
+				_mm256_and_si256(l3_l4_flags, l4_outer_mask);
+		l4_outer_flags = _mm256_slli_epi32(l4_outer_flags, 20);
+
+		__m256i l3_l4_mask = _mm256_set1_epi32(~0x6);
+		l3_l4_flags = _mm256_and_si256(l3_l4_flags, l3_l4_mask);
+		l3_l4_flags = _mm256_or_si256(l3_l4_flags, l4_outer_flags);
 		l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask);
 		/* set rss and vlan flags */
 		const __m256i rss_vlan_flag_bits =
diff --git a/drivers/net/ice/ice_rxtx_vec_sse.c b/drivers/net/ice/ice_rxtx_vec_sse.c
index 626364719b..87e0c3db2e 100644
--- a/drivers/net/ice/ice_rxtx_vec_sse.c
+++ b/drivers/net/ice/ice_rxtx_vec_sse.c
@@ -114,39 +114,67 @@  ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
 	 * bit12 for RSS indication.
 	 * bit13 for VLAN indication.
 	 */
-	const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070,
-						0x3070, 0x3070);
-
+	const __m128i desc_mask = _mm_set_epi32(0x30f0, 0x30f0,
+						0x30f0, 0x30f0);
 	const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK |
 						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_OUTER_L4_CKSUM_MASK |
 						 PKT_RX_EIP_CKSUM_BAD,
 						 PKT_RX_IP_CKSUM_MASK |
 						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_OUTER_L4_CKSUM_MASK |
 						 PKT_RX_EIP_CKSUM_BAD,
 						 PKT_RX_IP_CKSUM_MASK |
 						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_OUTER_L4_CKSUM_MASK |
 						 PKT_RX_EIP_CKSUM_BAD,
 						 PKT_RX_IP_CKSUM_MASK |
 						 PKT_RX_L4_CKSUM_MASK |
+						 PKT_RX_OUTER_L4_CKSUM_MASK |
 						 PKT_RX_EIP_CKSUM_BAD);
 
 	/* map the checksum, rss and vlan fields to the checksum, rss
 	 * and vlan flag
 	 */
-	const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
-			/* shift right 1 bit to make sure it not exceed 255 */
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
-			 PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
-			(PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
+	const __m128i cksum_flags =
+		_mm_set_epi8((PKT_RX_OUTER_L4_CKSUM_BAD >> 20 |
+		 PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+		  PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_BAD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		/**
+		 * shift right 20 bits to use the low two bits to indicate
+		 * outer checksum status
+		 * shift right 1 bit to make sure it not exceed 255
+		 */
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_EIP_CKSUM_BAD |
+		 PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_BAD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_BAD) >> 1,
+		(PKT_RX_OUTER_L4_CKSUM_GOOD >> 20 | PKT_RX_L4_CKSUM_GOOD |
+		 PKT_RX_IP_CKSUM_GOOD) >> 1);
 
 	const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0,
 			0, 0, 0, 0,
@@ -166,6 +194,14 @@  ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
 	flags = _mm_shuffle_epi8(cksum_flags, tmp_desc);
 	/* then we shift left 1 bit */
 	flags = _mm_slli_epi32(flags, 1);
+
+	__m128i l4_outer_mask = _mm_set_epi32(0x6, 0x6, 0x6, 0x6);
+	__m128i l4_outer_flags = _mm_and_si128(flags, l4_outer_mask);
+	l4_outer_flags = _mm_slli_epi32(l4_outer_flags, 20);
+
+	__m128i l3_l4_mask = _mm_set_epi32(~0x6, ~0x6, ~0x6, ~0x6);
+	__m128i l3_l4_flags = _mm_and_si128(flags, l3_l4_mask);
+	flags = _mm_or_si128(l3_l4_flags, l4_outer_flags);
 	/* we need to mask out the reduntant bits introduced by RSS or
 	 * VLAN fields.
 	 */
@@ -217,10 +253,10 @@  ice_rx_desc_to_olflags_v(struct ice_rx_queue *rxq, __m128i descs[4],
 	 * appropriate flags means that we have to do a shift and blend for
 	 * each mbuf before we do the write.
 	 */
-	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10);
-	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10);
-	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10);
-	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10);
+	rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x30);
+	rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x30);
+	rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x30);
+	rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x30);
 
 	/* write the rearm data and the olflags in one write */
 	RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) !=