diff mbox

[dpdk-dev] i40e: Use one bit flag for all hardware detected RX packet errors

Message ID 1416982032-28519-1-git-send-email-helin.zhang@intel.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Helin Zhang Nov. 26, 2014, 6:07 a.m. UTC
There were some bit flags of 0 for RX packet errors detected by hardware.
Actually only one bit of error flag is enough for all hardware detected
RX packet errors.

Signed-off-by: Helin Zhang <helin.zhang@intel.com>
---
 lib/librte_mbuf/rte_mbuf.h      |  6 +-----
 lib/librte_pmd_i40e/i40e_rxtx.c | 31 +++----------------------------
 2 files changed, 4 insertions(+), 33 deletions(-)

Comments

Ananyev, Konstantin Nov. 26, 2014, 10:49 a.m. UTC | #1
Hi Helin,

> -----Original Message-----
> From: Zhang, Helin
> Sent: Wednesday, November 26, 2014 6:07 AM
> To: dev@dpdk.org
> Cc: Cao, Waterman; Cao, Min; Ananyev, Konstantin; olivier.matz@6wind.com; Zhang, Helin
> Subject: [PATCH] i40e: Use one bit flag for all hardware detected RX packet errors
> 
> There were some bit flags of 0 for RX packet errors detected by hardware.
> Actually only one bit of error flag is enough for all hardware detected
> RX packet errors.
> 
> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> ---
>  lib/librte_mbuf/rte_mbuf.h      |  6 +-----
>  lib/librte_pmd_i40e/i40e_rxtx.c | 31 +++----------------------------
>  2 files changed, 4 insertions(+), 33 deletions(-)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 5899e5c..897fd26 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -80,11 +80,6 @@ extern "C" {
>  #define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
>  #define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
>  #define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
> -#define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header checksum error. */
> -#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
> -#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
> -#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
> -#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
>  #define PKT_RX_IPV4_HDR      (1ULL << 5)  /**< RX packet with IPv4 header. */
>  #define PKT_RX_IPV4_HDR_EXT  (1ULL << 6)  /**< RX packet with extended IPv4 header. */
>  #define PKT_RX_IPV6_HDR      (1ULL << 7)  /**< RX packet with IPv6 header. */
> @@ -95,6 +90,7 @@ extern "C" {
>  #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
>  #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
>  #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
> +#define PKT_RX_ERR_HW        (1ULL << 15) /**< RX packet error detected by hardware. */
> 
>  #define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
>  #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
> diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
> index cce6911..3b2195d 100644
> --- a/lib/librte_pmd_i40e/i40e_rxtx.c
> +++ b/lib/librte_pmd_i40e/i40e_rxtx.c
> @@ -115,35 +115,10 @@ i40e_rxd_status_to_pkt_flags(uint64_t qword)
>  static inline uint64_t
>  i40e_rxd_error_to_pkt_flags(uint64_t qword)
>  {
> -	uint64_t flags = 0;
> -	uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT);
> -
> -#define I40E_RX_ERR_BITS 0x3f
> -	if (likely((error_bits & I40E_RX_ERR_BITS) == 0))
> -		return flags;
> -	/* If RXE bit set, all other status bits are meaningless */
> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
> -		flags |= PKT_RX_MAC_ERR;
> -		return flags;
> -	}
> -
> -	/* If RECIPE bit set, all other status indications should be ignored */
> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RECIPE_SHIFT))) {
> -		flags |= PKT_RX_RECIP_ERR;
> -		return flags;
> -	}
> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT)))
> -		flags |= PKT_RX_HBUF_OVERFLOW;
> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)))
> -		flags |= PKT_RX_IP_CKSUM_BAD;
> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)))
> -		flags |= PKT_RX_L4_CKSUM_BAD;
> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))
> -		flags |= PKT_RX_EIP_CKSUM_BAD;
> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_OVERSIZE_SHIFT)))
> -		flags |= PKT_RX_OVERSIZE;
> +	if (unlikely(qword & I40E_RXD_QW1_ERROR_MASK))
> +		return PKT_RX_ERR_HW;

Probably I didn't explain myself clear enough, sorry.
I didn't suggest to get rid of setting bits that indicate L3/L4 checksum errors:
PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD.
I think these flags should be set as before.

I was talking only about collapsing only these 4 RX error flags into one:

#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */ 

From my point of view the difference of these 2 groups are:
First - HW was able to receive whole packet without a problem, but L3/L4 checksum check failed.

Second - HW was not able to receive whole packet properly by whatever reason. 
From upper layer SW perspective - there it probably makes little difference, what caused it,
as most likely SW has to throw away erroneous packet. 
And for debugging purposes, we can add PMD_LOG(DEBUG, ...) that would print what exactly HW error happened.

Thanks
Konstantin

> 
> -	return flags;
> +	return 0;
>  }
> 
>  /* Translate pkt types to pkt flags */
> --
> 1.8.1.4
Olivier Matz Nov. 26, 2014, 11:22 a.m. UTC | #2
Hi Konstantin, Hi Helin,

On 11/26/2014 11:49 AM, Ananyev, Konstantin wrote:
> Hi Helin,
>
>> -----Original Message-----
>> From: Zhang, Helin
>> Sent: Wednesday, November 26, 2014 6:07 AM
>> To: dev@dpdk.org
>> Cc: Cao, Waterman; Cao, Min; Ananyev, Konstantin; olivier.matz@6wind.com; Zhang, Helin
>> Subject: [PATCH] i40e: Use one bit flag for all hardware detected RX packet errors
>>
>> There were some bit flags of 0 for RX packet errors detected by hardware.
>> Actually only one bit of error flag is enough for all hardware detected
>> RX packet errors.
>>
>> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
>> ---
>>   lib/librte_mbuf/rte_mbuf.h      |  6 +-----
>>   lib/librte_pmd_i40e/i40e_rxtx.c | 31 +++----------------------------
>>   2 files changed, 4 insertions(+), 33 deletions(-)
>>
>> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
>> index 5899e5c..897fd26 100644
>> --- a/lib/librte_mbuf/rte_mbuf.h
>> +++ b/lib/librte_mbuf/rte_mbuf.h
>> @@ -80,11 +80,6 @@ extern "C" {
>>   #define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
>>   #define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
>>   #define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
>> -#define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header checksum error. */
>> -#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
>> -#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
>> -#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
>> -#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
>>   #define PKT_RX_IPV4_HDR      (1ULL << 5)  /**< RX packet with IPv4 header. */
>>   #define PKT_RX_IPV4_HDR_EXT  (1ULL << 6)  /**< RX packet with extended IPv4 header. */
>>   #define PKT_RX_IPV6_HDR      (1ULL << 7)  /**< RX packet with IPv6 header. */
>> @@ -95,6 +90,7 @@ extern "C" {
>>   #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
>>   #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
>>   #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
>> +#define PKT_RX_ERR_HW        (1ULL << 15) /**< RX packet error detected by hardware. */
>>
>>   #define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
>>   #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
>> diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
>> index cce6911..3b2195d 100644
>> --- a/lib/librte_pmd_i40e/i40e_rxtx.c
>> +++ b/lib/librte_pmd_i40e/i40e_rxtx.c
>> @@ -115,35 +115,10 @@ i40e_rxd_status_to_pkt_flags(uint64_t qword)
>>   static inline uint64_t
>>   i40e_rxd_error_to_pkt_flags(uint64_t qword)
>>   {
>> -	uint64_t flags = 0;
>> -	uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT);
>> -
>> -#define I40E_RX_ERR_BITS 0x3f
>> -	if (likely((error_bits & I40E_RX_ERR_BITS) == 0))
>> -		return flags;
>> -	/* If RXE bit set, all other status bits are meaningless */
>> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
>> -		flags |= PKT_RX_MAC_ERR;
>> -		return flags;
>> -	}
>> -
>> -	/* If RECIPE bit set, all other status indications should be ignored */
>> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RECIPE_SHIFT))) {
>> -		flags |= PKT_RX_RECIP_ERR;
>> -		return flags;
>> -	}
>> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT)))
>> -		flags |= PKT_RX_HBUF_OVERFLOW;
>> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)))
>> -		flags |= PKT_RX_IP_CKSUM_BAD;
>> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)))
>> -		flags |= PKT_RX_L4_CKSUM_BAD;
>> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))
>> -		flags |= PKT_RX_EIP_CKSUM_BAD;
>> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_OVERSIZE_SHIFT)))
>> -		flags |= PKT_RX_OVERSIZE;
>> +	if (unlikely(qword & I40E_RXD_QW1_ERROR_MASK))
>> +		return PKT_RX_ERR_HW;
>
> Probably I didn't explain myself clear enough, sorry.
> I didn't suggest to get rid of setting bits that indicate L3/L4 checksum errors:
> PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD.
> I think these flags should be set as before.
>
> I was talking only about collapsing only these 4 RX error flags into one:
>
> #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
> #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
> #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
> #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
>
>  From my point of view the difference of these 2 groups are:
> First - HW was able to receive whole packet without a problem, but L3/L4 checksum check failed.
>
> Second - HW was not able to receive whole packet properly by whatever reason.
>  From upper layer SW perspective - there it probably makes little difference, what caused it,
> as most likely SW has to throw away erroneous packet.
> And for debugging purposes, we can add PMD_LOG(DEBUG, ...) that would print what exactly HW error happened.

I agree with Konstantin that there are 2 different cases:

a) the packet is properly received by the hardware, but has a bad
    checksum (or another protocol error, for instance an invalid ip len,
    a ip_version == 8 :))

    in this case, it is useful to the application to have the mbuf with
    the data + an error flag. Then using a tcpdump-like tool could help
    to debug what is the cause of the error and what equipment generates
    a bad packet.

b) the packet is not properly received by the hardware. In this case
    the data is invalid in the mbuf and not useable by the application.
    I suggest to only have a stats counter in this case, as receiving the
    mbuf is cpu time consuming and the only thing the application can do
    is to drop the packet.

Regards,
Olivier
Ananyev, Konstantin Nov. 26, 2014, 1:38 p.m. UTC | #3
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Wednesday, November 26, 2014 11:22 AM
> To: Ananyev, Konstantin; Zhang, Helin; dev@dpdk.org
> Cc: Cao, Waterman; Cao, Min
> Subject: Re: [PATCH] i40e: Use one bit flag for all hardware detected RX packet errors
> 
> Hi Konstantin, Hi Helin,
> 
> On 11/26/2014 11:49 AM, Ananyev, Konstantin wrote:
> > Hi Helin,
> >
> >> -----Original Message-----
> >> From: Zhang, Helin
> >> Sent: Wednesday, November 26, 2014 6:07 AM
> >> To: dev@dpdk.org
> >> Cc: Cao, Waterman; Cao, Min; Ananyev, Konstantin; olivier.matz@6wind.com; Zhang, Helin
> >> Subject: [PATCH] i40e: Use one bit flag for all hardware detected RX packet errors
> >>
> >> There were some bit flags of 0 for RX packet errors detected by hardware.
> >> Actually only one bit of error flag is enough for all hardware detected
> >> RX packet errors.
> >>
> >> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> >> ---
> >>   lib/librte_mbuf/rte_mbuf.h      |  6 +-----
> >>   lib/librte_pmd_i40e/i40e_rxtx.c | 31 +++----------------------------
> >>   2 files changed, 4 insertions(+), 33 deletions(-)
> >>
> >> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> >> index 5899e5c..897fd26 100644
> >> --- a/lib/librte_mbuf/rte_mbuf.h
> >> +++ b/lib/librte_mbuf/rte_mbuf.h
> >> @@ -80,11 +80,6 @@ extern "C" {
> >>   #define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
> >>   #define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
> >>   #define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
> >> -#define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header checksum error. */
> >> -#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
> >> -#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
> >> -#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
> >> -#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
> >>   #define PKT_RX_IPV4_HDR      (1ULL << 5)  /**< RX packet with IPv4 header. */
> >>   #define PKT_RX_IPV4_HDR_EXT  (1ULL << 6)  /**< RX packet with extended IPv4 header. */
> >>   #define PKT_RX_IPV6_HDR      (1ULL << 7)  /**< RX packet with IPv6 header. */
> >> @@ -95,6 +90,7 @@ extern "C" {
> >>   #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
> >>   #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
> >>   #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
> >> +#define PKT_RX_ERR_HW        (1ULL << 15) /**< RX packet error detected by hardware. */
> >>
> >>   #define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
> >>   #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
> >> diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
> >> index cce6911..3b2195d 100644
> >> --- a/lib/librte_pmd_i40e/i40e_rxtx.c
> >> +++ b/lib/librte_pmd_i40e/i40e_rxtx.c
> >> @@ -115,35 +115,10 @@ i40e_rxd_status_to_pkt_flags(uint64_t qword)
> >>   static inline uint64_t
> >>   i40e_rxd_error_to_pkt_flags(uint64_t qword)
> >>   {
> >> -	uint64_t flags = 0;
> >> -	uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT);
> >> -
> >> -#define I40E_RX_ERR_BITS 0x3f
> >> -	if (likely((error_bits & I40E_RX_ERR_BITS) == 0))
> >> -		return flags;
> >> -	/* If RXE bit set, all other status bits are meaningless */
> >> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
> >> -		flags |= PKT_RX_MAC_ERR;
> >> -		return flags;
> >> -	}
> >> -
> >> -	/* If RECIPE bit set, all other status indications should be ignored */
> >> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RECIPE_SHIFT))) {
> >> -		flags |= PKT_RX_RECIP_ERR;
> >> -		return flags;
> >> -	}
> >> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT)))
> >> -		flags |= PKT_RX_HBUF_OVERFLOW;
> >> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)))
> >> -		flags |= PKT_RX_IP_CKSUM_BAD;
> >> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)))
> >> -		flags |= PKT_RX_L4_CKSUM_BAD;
> >> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))
> >> -		flags |= PKT_RX_EIP_CKSUM_BAD;
> >> -	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_OVERSIZE_SHIFT)))
> >> -		flags |= PKT_RX_OVERSIZE;
> >> +	if (unlikely(qword & I40E_RXD_QW1_ERROR_MASK))
> >> +		return PKT_RX_ERR_HW;
> >
> > Probably I didn't explain myself clear enough, sorry.
> > I didn't suggest to get rid of setting bits that indicate L3/L4 checksum errors:
> > PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD.
> > I think these flags should be set as before.
> >
> > I was talking only about collapsing only these 4 RX error flags into one:
> >
> > #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
> > #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
> > #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
> > #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
> >
> >  From my point of view the difference of these 2 groups are:
> > First - HW was able to receive whole packet without a problem, but L3/L4 checksum check failed.
> >
> > Second - HW was not able to receive whole packet properly by whatever reason.
> >  From upper layer SW perspective - there it probably makes little difference, what caused it,
> > as most likely SW has to throw away erroneous packet.
> > And for debugging purposes, we can add PMD_LOG(DEBUG, ...) that would print what exactly HW error happened.
> 
> I agree with Konstantin that there are 2 different cases:
> 
> a) the packet is properly received by the hardware, but has a bad
>     checksum (or another protocol error, for instance an invalid ip len,
>     a ip_version == 8 :))
> 
>     in this case, it is useful to the application to have the mbuf with
>     the data + an error flag. Then using a tcpdump-like tool could help
>     to debug what is the cause of the error and what equipment generates
>     a bad packet.
> 
> b) the packet is not properly received by the hardware. In this case
>     the data is invalid in the mbuf and not useable by the application.
>     I suggest to only have a stats counter in this case, as receiving the
>     mbuf is cpu time consuming and the only thing the application can do
>     is to drop the packet.

So for b) you suggest to drop the packet straight in PMD RX function?
Something like:
if (unlikely(error_bits & ...)) {
        PMD_LOG(DEBUG, ...);
         rte_pktmbuf_free(mb);
}
?

That's probably a bit too radical. 
Yes, mbuf doesn't contain the whole packet, but it may contain at least part of it, let say in case of 'packet oversize'. 
So for debugging purposes the user may still like to examine the mbuf contents.

Konstantin

> 
> Regards,
> Olivier
Olivier Matz Nov. 26, 2014, 2:12 p.m. UTC | #4
Hi Konstantin,

On 11/26/2014 02:38 PM, Ananyev, Konstantin wrote:
>>> Probably I didn't explain myself clear enough, sorry.
>>> I didn't suggest to get rid of setting bits that indicate L3/L4 checksum errors:
>>> PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD, PKT_RX_EIP_CKSUM_BAD.
>>> I think these flags should be set as before.
>>>
>>> I was talking only about collapsing only these 4 RX error flags into one:
>>>
>>> #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
>>> #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
>>> #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
>>> #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
>>>
>>>   From my point of view the difference of these 2 groups are:
>>> First - HW was able to receive whole packet without a problem, but L3/L4 checksum check failed.
>>>
>>> Second - HW was not able to receive whole packet properly by whatever reason.
>>>   From upper layer SW perspective - there it probably makes little difference, what caused it,
>>> as most likely SW has to throw away erroneous packet.
>>> And for debugging purposes, we can add PMD_LOG(DEBUG, ...) that would print what exactly HW error happened.
>>
>> I agree with Konstantin that there are 2 different cases:
>>
>> a) the packet is properly received by the hardware, but has a bad
>>      checksum (or another protocol error, for instance an invalid ip len,
>>      a ip_version == 8 :))
>>
>>      in this case, it is useful to the application to have the mbuf with
>>      the data + an error flag. Then using a tcpdump-like tool could help
>>      to debug what is the cause of the error and what equipment generates
>>      a bad packet.
>>
>> b) the packet is not properly received by the hardware. In this case
>>      the data is invalid in the mbuf and not useable by the application.
>>      I suggest to only have a stats counter in this case, as receiving the
>>      mbuf is cpu time consuming and the only thing the application can do
>>      is to drop the packet.
>
> So for b) you suggest to drop the packet straight in PMD RX function?
> Something like:
> if (unlikely(error_bits & ...)) {
>          PMD_LOG(DEBUG, ...);
>           rte_pktmbuf_free(mb);
> }
> ?

Yes

> That's probably a bit too radical.
> Yes, mbuf doesn't contain the whole packet, but it may contain at least part of it, let say in case of 'packet oversize'.
> So for debugging purposes the user may still like to examine the mbuf contents.

As soon as there is some exploitable data in the mbuf, I agree it can
be transfered to the application (ex: bad header, bad len, bad
checksum...).

But if the hardware is not able to provide any exploitable data, it
looks a bit overkill to give an mbuf with an error flag.

But grouping the flags as you suggest is already a good clean-up to me,
I don't want to be more catholic than the Pope ;)

Regards,
Olivier
Helin Zhang Nov. 28, 2014, 8:07 a.m. UTC | #5
Hi Olivier, Konstantin

> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Wednesday, November 26, 2014 10:12 PM
> To: Ananyev, Konstantin; Zhang, Helin; dev@dpdk.org
> Cc: Cao, Waterman; Cao, Min
> Subject: Re: [PATCH] i40e: Use one bit flag for all hardware detected RX packet
> errors
> 
> Hi Konstantin,
> 
> On 11/26/2014 02:38 PM, Ananyev, Konstantin wrote:
> >>> Probably I didn't explain myself clear enough, sorry.
> >>> I didn't suggest to get rid of setting bits that indicate L3/L4 checksum errors:
> >>> PKT_RX_IP_CKSUM_BAD, PKT_RX_L4_CKSUM_BAD,
> PKT_RX_EIP_CKSUM_BAD.
> >>> I think these flags should be set as before.
> >>>
> >>> I was talking only about collapsing only these 4 RX error flags into one:
> >>>
> >>> #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX
> pkt oversize. */
> >>> #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer
> overflow. */
> >>> #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing
> error. */
> >>> #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
> >>>
> >>>   From my point of view the difference of these 2 groups are:
> >>> First - HW was able to receive whole packet without a problem, but L3/L4
> checksum check failed.
> >>>
> >>> Second - HW was not able to receive whole packet properly by whatever
> reason.
> >>>   From upper layer SW perspective - there it probably makes little
> >>> difference, what caused it, as most likely SW has to throw away erroneous
> packet.
> >>> And for debugging purposes, we can add PMD_LOG(DEBUG, ...) that would
> print what exactly HW error happened.
> >>
> >> I agree with Konstantin that there are 2 different cases:
> >>
> >> a) the packet is properly received by the hardware, but has a bad
> >>      checksum (or another protocol error, for instance an invalid ip len,
> >>      a ip_version == 8 :))
> >>
> >>      in this case, it is useful to the application to have the mbuf with
> >>      the data + an error flag. Then using a tcpdump-like tool could help
> >>      to debug what is the cause of the error and what equipment generates
> >>      a bad packet.
> >>
> >> b) the packet is not properly received by the hardware. In this case
> >>      the data is invalid in the mbuf and not useable by the application.
> >>      I suggest to only have a stats counter in this case, as receiving the
> >>      mbuf is cpu time consuming and the only thing the application can do
> >>      is to drop the packet.
> >
> > So for b) you suggest to drop the packet straight in PMD RX function?
> > Something like:
> > if (unlikely(error_bits & ...)) {
> >          PMD_LOG(DEBUG, ...);
> >           rte_pktmbuf_free(mb);
> > }
> > ?
> 
> Yes
> 
> > That's probably a bit too radical.
> > Yes, mbuf doesn't contain the whole packet, but it may contain at least part of it,
> let say in case of 'packet oversize'.
> > So for debugging purposes the user may still like to examine the mbuf
> contents.
> 
> As soon as there is some exploitable data in the mbuf, I agree it can be transfered
> to the application (ex: bad header, bad len, bad checksum...).
> 
> But if the hardware is not able to provide any exploitable data, it looks a bit
> overkill to give an mbuf with an error flag.
> 
> But grouping the flags as you suggest is already a good clean-up to me, I don't
> want to be more catholic than the Pope ;)

After I have completed another task, I read the datasheet carefully again. For those 5
error bits I introduced for a long time, I'd like to explain one by one as below.

#define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header checksum error. */
[Helin] Nobody complains it, so we will keep it there, and just assign a new value to it.

#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
[Helin] I don't think it can be merge with other hardware errors. It indicates the packet
received needs more descriptors than hardware allowed, and the part of packets can
still be stored in the mbufs provided. It is a good hint for users that larger size of mbuf
might be needed. If just put it as hardware error, users will lose this information. So I
prefer to keep it there, and just assign a new value to it.

#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
[Helin] It indicates the header buff size is not enough, but not means hardware cannot
process the packet received. It is a good hint for the users to provide larger size of header
buffers. I also prefer to keep it there, and just assign new value to it.

#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
[Helin] In the latest data sheet, it is not opened to external users. So we can just remove
it from here.

#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
[Helin] This indicates a real hardware error happens.

So my point is to just remove PKT_RX_RECIP_ERR, and we still need other new bit flags.
Any thought from you guys?

Regards,
Helin

> 
> Regards,
> Olivier
Olivier Matz Nov. 28, 2014, 8:47 a.m. UTC | #6
Hi Helin,

On 11/28/2014 09:07 AM, Zhang, Helin wrote:
> After I have completed another task, I read the datasheet carefully again. For those 5
> error bits I introduced for a long time, I'd like to explain one by one as below.
> 
> #define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header checksum error. */
> [Helin] Nobody complains it, so we will keep it there, and just assign a new value to it.

ok.

But it would be nice to have a better definition of this flag: does
external mean outer header? For instance, when you receive a
Ether/IP1/UDP/vxlan/Ether/IP2/xxx, does the flag concerns IP1 or IP2?

If it's IP1, it's really strange compared to the current behavior (the
flag PKT_RX_IP_CKSUM_BAD refers to IP1).

> #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
> [Helin] I don't think it can be merge with other hardware errors. It indicates the packet
> received needs more descriptors than hardware allowed, and the part of packets can
> still be stored in the mbufs provided. It is a good hint for users that larger size of mbuf
> might be needed. If just put it as hardware error, users will lose this information. So I
> prefer to keep it there, and just assign a new value to it.

Again, a statistic counter would do the job which if it's just to
provide a hint to the application.

I wonder in which case this flag can happen. If you fill the ring with
mbufs that are large enough compared to your ethernet network, this
should not happen in normal conditions. I really don't believe that
an application receiving an mbuf with this flag would stop the driver,
then refill the rings it with larger mbufs.

Last but not least: If it's really useful, should we have the same
behavior on other drivers like ixgbe? I think we really need to care
about not having different ways to use the different drivers.

To me, the only argument in favor of keeping this flag is when the mbuf
contains a part of the data that could be dumped by a user for debug
purposes.

> #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
> [Helin] It indicates the header buff size is not enough, but not means hardware cannot
> process the packet received. It is a good hint for the users to provide larger size of header
> buffers. I also prefer to keep it there, and just assign new value to it.

Same for this one.

> #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
> [Helin] In the latest data sheet, it is not opened to external users. So we can just remove
> it from here.

ok

> #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
> [Helin] This indicates a real hardware error happens.

And what is the content of the mbuf data in this case? Does the
application really need an mbuf?


Regards,
Olivier
Helin Zhang Dec. 1, 2014, 1:57 a.m. UTC | #7
Hi Olivier

> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Friday, November 28, 2014 4:48 PM
> To: Zhang, Helin; Ananyev, Konstantin; dev@dpdk.org
> Cc: Cao, Waterman; Cao, Min
> Subject: Re: [PATCH] i40e: Use one bit flag for all hardware detected RX packet
> errors
> 
> Hi Helin,
> 
> On 11/28/2014 09:07 AM, Zhang, Helin wrote:
> > After I have completed another task, I read the datasheet carefully
> > again. For those 5 error bits I introduced for a long time, I'd like to explain one
> by one as below.
> >
> > #define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header
> > checksum error. */ [Helin] Nobody complains it, so we will keep it there, and
> just assign a new value to it.
> 
> ok.
> 
> But it would be nice to have a better definition of this flag: does external mean
> outer header? For instance, when you receive a
> Ether/IP1/UDP/vxlan/Ether/IP2/xxx, does the flag concerns IP1 or IP2?
'E' means 'external', it indicates the (most) outer IP header checksum error. If you
don't think this name is not so clear, I can change it to 'PKT_RX_OUTER_IP_CHSUM_BAD'.
For inner IP header checksum error, it will be indicated by PKT_RX_IP_CKSUM_BAD.

> 
> If it's IP1, it's really strange compared to the current behavior (the flag
> PKT_RX_IP_CKSUM_BAD refers to IP1).
> 
> > #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt
> oversize. */
> > [Helin] I don't think it can be merge with other hardware errors. It
> > indicates the packet received needs more descriptors than hardware
> > allowed, and the part of packets can still be stored in the mbufs
> > provided. It is a good hint for users that larger size of mbuf might
> > be needed. If just put it as hardware error, users will lose this information. So I
> prefer to keep it there, and just assign a new value to it.
> 
> Again, a statistic counter would do the job which if it's just to provide a hint to the
> application.
It seems that we do not maintain a counter for packets in PMD, if I am not wrong. Two
ways current DPDK is using.
One is hardware provide registers to do that, we can read it directly when needed.
The other one is that applications or middle layer sw maintain its own statistics.

> 
> I wonder in which case this flag can happen. If you fill the ring with mbufs that are
> large enough compared to your ethernet network, this should not happen in
> normal conditions. I really don't believe that an application receiving an mbuf
> with this flag would stop the driver, then refill the rings it with larger mbufs.
This is not because of it is lack of available RX descriptors. It is because of a hardware
requirement. FVL hardware requires that it should not use more than 5 rx descriptors
for receiving a single packet.

> Last but not least: If it's really useful, should we have the same behavior on other
> drivers like ixgbe? I think we really need to care about not having different ways
> to use the different drivers.
I don't see the similar bit in ixgbe datasheet, but this restriction could be common
for some other NICs, as it is reasonable from hardware perspective.

> 
> To me, the only argument in favor of keeping this flag is when the mbuf contains
> a part of the data that could be dumped by a user for debug purposes.
> 
> > #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow.
> > */ [Helin] It indicates the header buff size is not enough, but not
> > means hardware cannot process the packet received. It is a good hint
> > for the users to provide larger size of header buffers. I also prefer to keep it
> there, and just assign new value to it.
> 
> Same for this one.
It is a bit different from previous one, it always has one header buffer, this flag means
the buffer size is not enough for the header.
These two flags are because of buffer size or number of buffers. The mbufs are prepared
in application or up layer software. If these two flags occur, it is easier for up layer software
to debug, and know different buffers are needed. They do not need to debug PMD, as they
generally don't want to do.

> 
> > #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error.
> */
> > [Helin] In the latest data sheet, it is not opened to external users.
> > So we can just remove it from here.
> 
> ok
> 
> > #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
> > [Helin] This indicates a real hardware error happens.
> 
> And what is the content of the mbuf data in this case? Does the application really
> need an mbuf?
Mbuf contains both the data and other information. I prefer to let the up layer software to
decide how to deal with the packet, no matter it is correct or bad. In addition, even hardware
errors happened, it still can set a special bit to enable storing the whole packet to the mbuf,
for debug purpose. Hardware error bit can be used for all hardware errors. As we do not have
one there, why not add one?

> 
> 
> Regards,
> Olivier

Regards,
Helin
Olivier Matz Dec. 1, 2014, 9:58 a.m. UTC | #8
Hi Helin,

On 12/01/2014 02:57 AM, Zhang, Helin wrote:
>>> #define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header
>>> checksum error. */ [Helin] Nobody complains it, so we will keep it there, and
>> just assign a new value to it.
>>
>> ok.
>>
>> But it would be nice to have a better definition of this flag: does external mean
>> outer header? For instance, when you receive a
>> Ether/IP1/UDP/vxlan/Ether/IP2/xxx, does the flag concerns IP1 or IP2?
> 'E' means 'external', it indicates the (most) outer IP header checksum error. If you
> don't think this name is not so clear, I can change it to 'PKT_RX_OUTER_IP_CHSUM_BAD'.
> For inner IP header checksum error, it will be indicated by PKT_RX_IP_CKSUM_BAD.
> 
>>
>> If it's IP1, it's really strange compared to the current behavior (the flag
>> PKT_RX_IP_CKSUM_BAD refers to IP1).

Ok.
But the real sense of my question was about the behavior which seems
different than with previous hardware. Today, if you receive the packet
Ether/IP1/UDP/vxlan/Ether/IP2/xxx on an ixgbe, the flag
PKT_RX_IP_CKSUM_BAD can be set if the checksum of IP1 is wrong. From
your explanation, I understand that PKT_RX_EIP_CKSUM_BAD would be set
for the same thing on i40e. Is it correct?


>>> #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt
>> oversize. */
>>> [Helin] I don't think it can be merge with other hardware errors. It
>>> indicates the packet received needs more descriptors than hardware
>>> allowed, and the part of packets can still be stored in the mbufs
>>> provided. It is a good hint for users that larger size of mbuf might
>>> be needed. If just put it as hardware error, users will lose this information. So I
>> prefer to keep it there, and just assign a new value to it.
>>
>> Again, a statistic counter would do the job which if it's just to provide a hint to the
>> application.
> It seems that we do not maintain a counter for packets in PMD, if I am not wrong. Two
> ways current DPDK is using.
> One is hardware provide registers to do that, we can read it directly when needed.
> The other one is that applications or middle layer sw maintain its own statistics.

rte_eth_stats_get() gives the generic statistics
For specific error stats, rte_eth_xstats_get() can be used from an
application (the driver has to provide the full list of statistics).

>> I wonder in which case this flag can happen. If you fill the ring with mbufs that are
>> large enough compared to your ethernet network, this should not happen in
>> normal conditions. I really don't believe that an application receiving an mbuf
>> with this flag would stop the driver, then refill the rings it with larger mbufs.
> This is not because of it is lack of available RX descriptors. It is because of a hardware
> requirement. FVL hardware requires that it should not use more than 5 rx descriptors
> for receiving a single packet.

I still don't understand what the application should do when the flag
is set. Maybe you could provide an example in l2fwd or testpmd?

>> Last but not least: If it's really useful, should we have the same behavior on other
>> drivers like ixgbe? I think we really need to care about not having different ways
>> to use the different drivers.
> I don't see the similar bit in ixgbe datasheet, but this restriction could be common
> for some other NICs, as it is reasonable from hardware perspective.

In ixgbe, there are other error cases:
- frames shorter than 64 bytes
- oversize (frames larger than MAXFRS)
- ... maybe others?

Should we have a flag for each situation? I think not.


>> To me, the only argument in favor of keeping this flag is when the mbuf contains
>> a part of the data that could be dumped by a user for debug purposes.
>>
>>> #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow.
>>> */ [Helin] It indicates the header buff size is not enough, but not
>>> means hardware cannot process the packet received. It is a good hint
>>> for the users to provide larger size of header buffers. I also prefer to keep it
>> there, and just assign new value to it.
>>
>> Same for this one.
> It is a bit different from previous one, it always has one header buffer, this flag means
> the buffer size is not enough for the header.
> These two flags are because of buffer size or number of buffers. The mbufs are prepared
> in application or up layer software. If these two flags occur, it is easier for up layer software
> to debug, and know different buffers are needed. They do not need to debug PMD, as they
> generally don't want to do.

You say it's easier for the software to debug, but I cannot see the
difference. When it's a statistics counter, you just have to use
rte_eth_xstats_get(), which is an equivalent of "ethtool -S iface"
which gives all the hardware statistics. It will work for any driver
and any application.

If we add these flags, the application have to know about all these
specific flags and how to handle them.

>>> #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error.
>> */
>>> [Helin] In the latest data sheet, it is not opened to external users.
>>> So we can just remove it from here.
>>
>> ok
>>
>>> #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
>>> [Helin] This indicates a real hardware error happens.
>>
>> And what is the content of the mbuf data in this case? Does the application really
>> need an mbuf?
> Mbuf contains both the data and other information. I prefer to let the up layer software to
> decide how to deal with the packet, no matter it is correct or bad. In addition, even hardware
> errors happened, it still can set a special bit to enable storing the whole packet to the mbuf,
> for debug purpose. Hardware error bit can be used for all hardware errors. As we do not have
> one there, why not add one?

You say "let the up layer software to decide how to deal with the
packet, no matter it is correct or bad". But what can do an application
with a packet if it does not know if the data is correct or bad?

Regards,
Olivier
Helin Zhang Dec. 2, 2014, 7:25 a.m. UTC | #9
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Monday, December 1, 2014 5:58 PM
> To: Zhang, Helin; Ananyev, Konstantin; dev@dpdk.org
> Cc: Cao, Waterman; Cao, Min
> Subject: Re: [PATCH] i40e: Use one bit flag for all hardware detected RX packet
> errors
> 
> Hi Helin,
> 
> On 12/01/2014 02:57 AM, Zhang, Helin wrote:
> >>> #define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header
> >>> checksum error. */ [Helin] Nobody complains it, so we will keep it
> >>> there, and
> >> just assign a new value to it.
> >>
> >> ok.
> >>
> >> But it would be nice to have a better definition of this flag: does
> >> external mean outer header? For instance, when you receive a
> >> Ether/IP1/UDP/vxlan/Ether/IP2/xxx, does the flag concerns IP1 or IP2?
> > 'E' means 'external', it indicates the (most) outer IP header checksum
> > error. If you don't think this name is not so clear, I can change it to
> 'PKT_RX_OUTER_IP_CHSUM_BAD'.
> > For inner IP header checksum error, it will be indicated by
> PKT_RX_IP_CKSUM_BAD.
> >
> >>
> >> If it's IP1, it's really strange compared to the current behavior
> >> (the flag PKT_RX_IP_CKSUM_BAD refers to IP1).
> 
> Ok.
> But the real sense of my question was about the behavior which seems different
> than with previous hardware. Today, if you receive the packet
> Ether/IP1/UDP/vxlan/Ether/IP2/xxx on an ixgbe, the flag
> PKT_RX_IP_CKSUM_BAD can be set if the checksum of IP1 is wrong. From your
> explanation, I understand that PKT_RX_EIP_CKSUM_BAD would be set for the
> same thing on i40e. Is it correct?
Yes, it is strange if ixgbe hardware checksum logic knows the packet is tunneling.
But, from another point of view, which seems the the ixgbe hardware does, if
checksum logic does not know tunneling, it may treat all others as data. This is
reasonable to report the error with PKT_RX_IP_CKSUM_BAD.

> 
> 
> >>> #define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX
> pkt
> >> oversize. */
> >>> [Helin] I don't think it can be merge with other hardware errors. It
> >>> indicates the packet received needs more descriptors than hardware
> >>> allowed, and the part of packets can still be stored in the mbufs
> >>> provided. It is a good hint for users that larger size of mbuf might
> >>> be needed. If just put it as hardware error, users will lose this
> >>> information. So I
> >> prefer to keep it there, and just assign a new value to it.
> >>
> >> Again, a statistic counter would do the job which if it's just to
> >> provide a hint to the application.
> > It seems that we do not maintain a counter for packets in PMD, if I am
> > not wrong. Two ways current DPDK is using.
> > One is hardware provide registers to do that, we can read it directly when
> needed.
> > The other one is that applications or middle layer sw maintain its own statistics.
> 
> rte_eth_stats_get() gives the generic statistics For specific error stats,
> rte_eth_xstats_get() can be used from an application (the driver has to provide
> the full list of statistics).
Yes, that function read all the statistics from registers directly. I think there might
already have the corresponding registers for it.

> 
> >> I wonder in which case this flag can happen. If you fill the ring
> >> with mbufs that are large enough compared to your ethernet network,
> >> this should not happen in normal conditions. I really don't believe
> >> that an application receiving an mbuf with this flag would stop the driver, then
> refill the rings it with larger mbufs.
> > This is not because of it is lack of available RX descriptors. It is
> > because of a hardware requirement. FVL hardware requires that it
> > should not use more than 5 rx descriptors for receiving a single packet.
> 
> I still don't understand what the application should do when the flag is set.
> Maybe you could provide an example in l2fwd or testpmd?
For an application, if it is reported with this oversize error, it then easily knows that
the mbuf size might not be enough. If it is reported with just a hardware error, then
the application developer needs to dig into the PMD to see what happens. I think most
of application developers do not want to debug PMD to much, as it might not be their scope.

> 
> >> Last but not least: If it's really useful, should we have the same
> >> behavior on other drivers like ixgbe? I think we really need to care
> >> about not having different ways to use the different drivers.
> > I don't see the similar bit in ixgbe datasheet, but this restriction
> > could be common for some other NICs, as it is reasonable from hardware
> perspective.
> 
> In ixgbe, there are other error cases:
> - frames shorter than 64 bytes
> - oversize (frames larger than MAXFRS)
> - ... maybe others?
> 
> Should we have a flag for each situation? I think not.
The more the better, but we may need a tradeoff, as the flag bits is limited.

> 
> 
> >> To me, the only argument in favor of keeping this flag is when the
> >> mbuf contains a part of the data that could be dumped by a user for debug
> purposes.
> >>
> >>> #define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer
> overflow.
> >>> */ [Helin] It indicates the header buff size is not enough, but not
> >>> means hardware cannot process the packet received. It is a good hint
> >>> for the users to provide larger size of header buffers. I also
> >>> prefer to keep it
> >> there, and just assign new value to it.
> >>
> >> Same for this one.
> > It is a bit different from previous one, it always has one header
> > buffer, this flag means the buffer size is not enough for the header.
> > These two flags are because of buffer size or number of buffers. The
> > mbufs are prepared in application or up layer software. If these two
> > flags occur, it is easier for up layer software to debug, and know
> > different buffers are needed. They do not need to debug PMD, as they
> generally don't want to do.
> 
> You say it's easier for the software to debug, but I cannot see the difference.
> When it's a statistics counter, you just have to use rte_eth_xstats_get(), which is
> an equivalent of "ethtool -S iface"
> which gives all the hardware statistics. It will work for any driver and any
> application.
> 
> If we add these flags, the application have to know about all these specific flags
> and how to handle them.
OK. I agree with you not all errors need to be reported as flags. But I'd prefer to
add those flags which is useful for applications to bypass the errors.

> 
> >>> #define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing
> error.
> >> */
> >>> [Helin] In the latest data sheet, it is not opened to external users.
> >>> So we can just remove it from here.
> >>
> >> ok
> >>
> >>> #define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
> >>> [Helin] This indicates a real hardware error happens.
> >>
> >> And what is the content of the mbuf data in this case? Does the
> >> application really need an mbuf?
> > Mbuf contains both the data and other information. I prefer to let the
> > up layer software to decide how to deal with the packet, no matter it
> > is correct or bad. In addition, even hardware errors happened, it
> > still can set a special bit to enable storing the whole packet to the
> > mbuf, for debug purpose. Hardware error bit can be used for all hardware
> errors. As we do not have one there, why not add one?
> 
> You say "let the up layer software to decide how to deal with the packet, no
> matter it is correct or bad". But what can do an application with a packet if it does
> not know if the data is correct or bad?
Mbuf flags can tell him if the data is good or bad, if good, more info can be seen in flags.

> 
> Regards,
> Olivier

Yes, flags should be a tradeoff, we can provide useful flags in mbuf, but not all.

Regards,
Helin
Helin Zhang Dec. 5, 2014, 1:46 a.m. UTC | #10
Before redefining mbuf structure, there was lack of free bits in 'ol_flags'
(32 bits in total) for new RX or TX flags. So it tried to reuse existant bits
as most as possible, or even assigning 0 to some of bit flags. After new mbuf
structure defined, there are quite a lot of free bits. So those newly added
bit flags should be assigned with correct and valid bit values, and getting
their names should be enabled as well. Note that 'RECIP' should be removed,
as nowhere will use it.

v2 changes:
* Removed error flag of 'ECIPE' processing only in both i40e PMD and mbuf. All
  other error flags were added back.
* Assigned error flags with correct and valid values, as their previous values
  were invalid.
* Enabled getting all error flag names.

Helin Zhang (2):
  i40e: remove checking rxd flag which is not public
  mbuf: assign valid bit values for some RX and TX flags

 lib/librte_mbuf/rte_mbuf.c      |  9 ++++-----
 lib/librte_mbuf/rte_mbuf.h      | 19 +++++++++----------
 lib/librte_pmd_i40e/i40e_rxtx.c |  6 ------
 3 files changed, 13 insertions(+), 21 deletions(-)
diff mbox

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 5899e5c..897fd26 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -80,11 +80,6 @@  extern "C" {
 #define PKT_RX_FDIR          (1ULL << 2)  /**< RX packet with FDIR match indicate. */
 #define PKT_RX_L4_CKSUM_BAD  (1ULL << 3)  /**< L4 cksum of RX pkt. is not OK. */
 #define PKT_RX_IP_CKSUM_BAD  (1ULL << 4)  /**< IP cksum of RX pkt. is not OK. */
-#define PKT_RX_EIP_CKSUM_BAD (0ULL << 0)  /**< External IP header checksum error. */
-#define PKT_RX_OVERSIZE      (0ULL << 0)  /**< Num of desc of an RX pkt oversize. */
-#define PKT_RX_HBUF_OVERFLOW (0ULL << 0)  /**< Header buffer overflow. */
-#define PKT_RX_RECIP_ERR     (0ULL << 0)  /**< Hardware processing error. */
-#define PKT_RX_MAC_ERR       (0ULL << 0)  /**< MAC error. */
 #define PKT_RX_IPV4_HDR      (1ULL << 5)  /**< RX packet with IPv4 header. */
 #define PKT_RX_IPV4_HDR_EXT  (1ULL << 6)  /**< RX packet with extended IPv4 header. */
 #define PKT_RX_IPV6_HDR      (1ULL << 7)  /**< RX packet with IPv6 header. */
@@ -95,6 +90,7 @@  extern "C" {
 #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
 #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
 #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
+#define PKT_RX_ERR_HW        (1ULL << 15) /**< RX packet error detected by hardware. */
 
 #define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
 #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index cce6911..3b2195d 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -115,35 +115,10 @@  i40e_rxd_status_to_pkt_flags(uint64_t qword)
 static inline uint64_t
 i40e_rxd_error_to_pkt_flags(uint64_t qword)
 {
-	uint64_t flags = 0;
-	uint64_t error_bits = (qword >> I40E_RXD_QW1_ERROR_SHIFT);
-
-#define I40E_RX_ERR_BITS 0x3f
-	if (likely((error_bits & I40E_RX_ERR_BITS) == 0))
-		return flags;
-	/* If RXE bit set, all other status bits are meaningless */
-	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
-		flags |= PKT_RX_MAC_ERR;
-		return flags;
-	}
-
-	/* If RECIPE bit set, all other status indications should be ignored */
-	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_RECIPE_SHIFT))) {
-		flags |= PKT_RX_RECIP_ERR;
-		return flags;
-	}
-	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT)))
-		flags |= PKT_RX_HBUF_OVERFLOW;
-	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_IPE_SHIFT)))
-		flags |= PKT_RX_IP_CKSUM_BAD;
-	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_L4E_SHIFT)))
-		flags |= PKT_RX_L4_CKSUM_BAD;
-	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_EIPE_SHIFT)))
-		flags |= PKT_RX_EIP_CKSUM_BAD;
-	if (unlikely(error_bits & (1 << I40E_RX_DESC_ERROR_OVERSIZE_SHIFT)))
-		flags |= PKT_RX_OVERSIZE;
+	if (unlikely(qword & I40E_RXD_QW1_ERROR_MASK))
+		return PKT_RX_ERR_HW;
 
-	return flags;
+	return 0;
 }
 
 /* Translate pkt types to pkt flags */