net/i40e: fix incorrect checksum flag of L4 checksum
Checks
Commit Message
When send tunneled packet that inner L4 checksum value is correct,
the test_pmd output log shows 'ol_flags' value is
'PKT_RX_L4_CKSUM_UNKNOWN', but expected value is
'PKT_RX_L4_CKSUM_GOOD'.
Add the 'PKT_RX_L4_CKSUM_GOOD' to 'l3_l4e_flags' for sse and
'l3_l4_flags_shuf' for avx2 to ensure that the 'ol_flags' can match
correct flags.
Fixes: 9966a00a0688 ("net/i40e: enable bad checksum flags in vector Rx")
Fixes: dafadd73762e ("net/i40e: add AVX2 Rx function")
Signed-off-by: Murphy Yang <murphyx.yang@intel.com>
---
drivers/net/i40e/i40e_rxtx_vec_avx2.c | 40 ++++++++++++++++-----------
drivers/net/i40e/i40e_rxtx_vec_sse.c | 20 ++++++++------
2 files changed, 35 insertions(+), 25 deletions(-)
Comments
Hi, murphy
> -----Original Message-----
> From: Murphy Yang <murphyx.yang@intel.com>
> Sent: Wednesday, November 11, 2020 5:11 PM
> To: dev@dpdk.org
> Cc: Yang, Qiming <qiming.yang@intel.com>; Yang, SteveX
> <stevex.yang@intel.com>; Xing, Beilei <beilei.xing@intel.com>; Guo, Jia
> <jia.guo@intel.com>; Yang, MurphyX <murphyx.yang@intel.com>
> Subject: [PATCH] net/i40e: fix incorrect checksum flag of L4 checksum
>
> When send tunneled packet that inner L4 checksum value is correct, the
> test_pmd output log shows 'ol_flags' value is
> 'PKT_RX_L4_CKSUM_UNKNOWN', but expected value is
> 'PKT_RX_L4_CKSUM_GOOD'.
>
> Add the 'PKT_RX_L4_CKSUM_GOOD' to 'l3_l4e_flags' for sse and
> 'l3_l4_flags_shuf' for avx2 to ensure that the 'ol_flags' can match correct flags.
>
Seems that 'PKT_RX_L4_CKSUM_GOOD' is previous there but not set correctly, so maybe it should
not say " Add the 'PKT_RX_L4_CKSUM_GOOD' to 'l3_l4e_flags' .... "
Add more, could you please to check if the other rx vec path also need it, such as vec_altivec and vec_neon?
> Fixes: 9966a00a0688 ("net/i40e: enable bad checksum flags in vector Rx")
> Fixes: dafadd73762e ("net/i40e: add AVX2 Rx function")
>
> Signed-off-by: Murphy Yang <murphyx.yang@intel.com>
> ---
> drivers/net/i40e/i40e_rxtx_vec_avx2.c | 40 ++++++++++++++++-----------
> drivers/net/i40e/i40e_rxtx_vec_sse.c | 20 ++++++++------
> 2 files changed, 35 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> index 7a558fc73a..fe6ec7deef 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> +++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> @@ -342,24 +342,32 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue
> *rxq, struct rte_mbuf **rx_pkts,
> */
> const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0,
> 0,
> /* shift right 1 bit to make sure it not exceed 255 */
> - (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1,
> - (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_EIP_CKSUM_BAD) >> 1,
> - (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_BAD) >> 1,
> - PKT_RX_IP_CKSUM_BAD >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_GOOD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> + PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> + PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> + PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> + PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> /* second 128-bits */
> 0, 0, 0, 0, 0, 0, 0, 0,
> - (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1,
> - (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_EIP_CKSUM_BAD) >> 1,
> - (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_BAD) >> 1,
> - PKT_RX_IP_CKSUM_BAD >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_GOOD) >> 1);
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> + PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> + PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> + PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> + PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1);
>
Could you double check if it is reasonable that the " PKT_RX_EIP_CKSUM_BAD" is always be set, but no " PKT_RX_EIP_CKSUM_GOOD "?
> const __m256i cksum_mask = _mm256_set1_epi32(
> PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD | diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c
> b/drivers/net/i40e/i40e_rxtx_vec_sse.c
> index 4b2b6a28fc..0bcb48e24e 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
> +++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
> @@ -254,16 +254,18 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq,
> volatile union i40e_rx_desc *rxdp,
>
> const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
> /* shift right 1 bit to make sure it not exceed 255 */
> - (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_EIP_CKSUM_BAD |
> - PKT_RX_L4_CKSUM_BAD) >> 1,
> - (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_EIP_CKSUM_BAD) >> 1,
> - (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_BAD) >> 1,
> - PKT_RX_IP_CKSUM_BAD >> 1,
> - (PKT_RX_IP_CKSUM_GOOD |
> PKT_RX_L4_CKSUM_GOOD) >> 1);
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_BAD |
> + PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> + PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_EIP_CKSUM_BAD |
> PKT_RX_L4_CKSUM_GOOD |
> + PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_L4_CKSUM_BAD |
> PKT_RX_IP_CKSUM_GOOD) >> 1,
> + (PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_BAD) >> 1,
> + (PKT_RX_L4_CKSUM_GOOD |
> PKT_RX_IP_CKSUM_GOOD) >> 1);
>
> /* Unpack "status" from quadword 1, bits 0:32 */
> vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);
> --
> 2.17.1
@@ -342,24 +342,32 @@ _recv_raw_pkts_vec_avx2(struct i40e_rx_queue *rxq, struct rte_mbuf **rx_pkts,
*/
const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
/* shift right 1 bit to make sure it not exceed 255 */
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1,
- PKT_RX_IP_CKSUM_BAD >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1,
/* second 128-bits */
0, 0, 0, 0, 0, 0, 0, 0,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1,
- PKT_RX_IP_CKSUM_BAD >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1);
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
const __m256i cksum_mask = _mm256_set1_epi32(
PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD |
@@ -254,16 +254,18 @@ desc_to_olflags_v(struct i40e_rx_queue *rxq, volatile union i40e_rx_desc *rxdp,
const __m128i l3_l4e_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
/* shift right 1 bit to make sure it not exceed 255 */
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD |
- PKT_RX_L4_CKSUM_BAD) >> 1,
- (PKT_RX_EIP_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_EIP_CKSUM_BAD) >> 1,
- (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD) >> 1,
- PKT_RX_IP_CKSUM_BAD >> 1,
- (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD) >> 1);
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD |
+ PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1,
+ (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1,
+ (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1);
/* Unpack "status" from quadword 1, bits 0:32 */
vlan0 = _mm_unpackhi_epi32(descs[0], descs[1]);