[dpdk-dev,RFC,01/17] mbuf: add definitions of unified packet types

Message ID 1421637803-17034-2-git-send-email-helin.zhang@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Zhang, Helin Jan. 19, 2015, 3:23 a.m. UTC
  As there are only 6 bit flags in ol_flags for indicating packet types,
which is not enough to describe all the possible packet types hardware
can recognize. For example, i40e hardware can recognize more than 150
packet types. Unified packet type is composed of tunnel type, L3 type,
L4 type and inner L3 type fields, and can be stored in 16 bits mbuf
field of 'packet_type'.

Signed-off-by: Helin Zhang <helin.zhang@intel.com>
Signed-off-by: Cunming Liang <cunming.liang@intel.com>
Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_mbuf/rte_mbuf.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
  

Comments

Ananyev, Konstantin Jan. 19, 2015, 4:19 p.m. UTC | #1
> -----Original Message-----
> From: Zhang, Helin
> Sent: Monday, January 19, 2015 3:23 AM
> To: dev@dpdk.org
> Cc: Liang, Cunming; Liu, Jijiang; Ananyev, Konstantin; Zhang, Helin
> Subject: [RFC 01/17] mbuf: add definitions of unified packet types
> 
> As there are only 6 bit flags in ol_flags for indicating packet types,
> which is not enough to describe all the possible packet types hardware
> can recognize. For example, i40e hardware can recognize more than 150
> packet types. Unified packet type is composed of tunnel type, L3 type,
> L4 type and inner L3 type fields, and can be stored in 16 bits mbuf
> field of 'packet_type'.
> 
> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> ---
>  lib/librte_mbuf/rte_mbuf.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 68 insertions(+)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 16059c6..94eb38f 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -165,6 +165,74 @@ extern "C" {
>  /* Use final bit of flags to indicate a control mbuf */
>  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
> 
> +/*
> + * Sixteen bits are divided into several fields to mark packet types. Note that
> + * each field is indexical.
> + * - Bit 3:0 is for tunnel types.
> + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
> + * - Bit 10:8 is for L4 types. It can also be used for inner L4 types for
> + *   tunneling packets.
> + * - Bit 13:11 is for inner L3 types.
> + * - Bit 15:14 is reserved.
> + *
> + * To be compitable with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
> + * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
> + * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
> + */
> +#define RTE_PTYPE_UNKNOWN                   0x0000 /* 0b0000000000000000 */
> +/* bit 3:0 for tunnel types */
> +#define RTE_PTYPE_TUNNEL_IP                 0x0001 /* 0b0000000000000001 */
> +#define RTE_PTYPE_TUNNEL_TCP                0x0002 /* 0b0000000000000010 */
> +#define RTE_PTYPE_TUNNEL_UDP                0x0003 /* 0b0000000000000011 */
> +#define RTE_PTYPE_TUNNEL_GRE                0x0004 /* 0b0000000000000100 */
> +#define RTE_PTYPE_TUNNEL_VXLAN              0x0005 /* 0b0000000000000101 */
> +#define RTE_PTYPE_TUNNEL_NVGRE              0x0006 /* 0b0000000000000110 */
> +#define RTE_PTYPE_TUNNEL_GENEVE             0x0007 /* 0b0000000000000111 */
> +#define RTE_PTYPE_TUNNEL_GRENAT             0x0008 /* 0b0000000000001000 */
> +#define RTE_PTYPE_TUNNEL_GRENAT_MAC         0x0009 /* 0b0000000000001001 */
> +#define RTE_PTYPE_TUNNEL_GRENAT_MACVLAN     0x000a /* 0b0000000000001010 */
> +#define RTE_PTYPE_TUNNEL_MASK               0x000f /* 0b0000000000001111 */
> +/* bit 7:4 for L3 types */
> +#define RTE_PTYPE_L3_IPV4                   0x0010 /* 0b0000000000010000 */
> +#define RTE_PTYPE_L3_IPV4_EXT               0x0030 /* 0b0000000000110000 */
> +#define RTE_PTYPE_L3_IPV6                   0x0040 /* 0b0000000001000000 */
> +#define RTE_PTYPE_L3_IPV6_EXT               0x00c0 /* 0b0000000011000000 */
> +#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00d0 /* 0b0000000011010000 */
> +#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x00e0 /* 0b0000000011100000 */
> +#define RTE_PTYPE_L3_MASK                   0x00f0 /* 0b0000000011110000 */

I still think it would be better to use enum not bit-set for IPv4/IPv6 distinction, but if you set it that way,
can you at least take advantage of it and make RTE_ETH_IS_IPV4_HDR() not require 3 comparisons?
I think it is doable if you set bit 4 for IPv4 types only (you already do that)
and bit 6 for IPv6 types only.
For that, I think, you can make RTE_PTYPE_L3_IPV4_EXT_UNKNOWN == 0xb0  /* 0b0000000010110000 */
Then you can:

#define  RTE_ETH_IS_IPV4_HDR(ptype)    (((ptype) & RTE_PTYPE_L3_IPV4) != 0)
#define  RTE_ETH_IS_IPV6_HDR(ptype)    (((ptype) & RTE_PTYPE_L3_IPV6) != 0)

I suppose that would be faster then what you propose below,
and would probably require less changes in our sample apps.

Konstantin

> +/* bit 10:8 for L4 types */
> +#define RTE_PTYPE_L4_TCP                    0x0100 /* 0b0000000100000000 */
> +#define RTE_PTYPE_L4_UDP                    0x0200 /* 0b0000001000000000 */
> +#define RTE_PTYPE_L4_FRAG                   0x0300 /* 0b0000001100000000 */
> +#define RTE_PTYPE_L4_SCTP                   0x0400 /* 0b0000010000000000 */
> +#define RTE_PTYPE_L4_ICMP                   0x0500 /* 0b0000010100000000 */
> +#define RTE_PTYPE_L4_NONFRAG                0x0600 /* 0b0000011000000000 */
> +#define RTE_PTYPE_L4_MASK                   0x0700 /* 0b0000011100000000 */
> +/* bit 13:11 for inner L3 types */
> +#define RTE_PTYPE_INNER_L3_IPV4             0x0800 /* 0b0000100000000000 */
> +#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x1000 /* 0b0001000000000000 */
> +#define RTE_PTYPE_INNER_L3_IPV6             0x1800 /* 0b0001100000000000 */
> +#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x2000 /* 0b0010000000000000 */
> +#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x2800 /* 0b0010100000000000 */
> +#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x3000 /* 0b0011000000000000 */
> +#define RTE_PTYPE_INNER_L3_MASK             0x3800 /* 0b0011100000000000 */
> +/* bit 15:14 reserved */
> +
> +/* Check if the (outer) L3 header is IPv4 */
> +#define  RTE_ETH_IS_IPV4_HDR(ptype) \
> +	(((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4 || \
> +	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4_EXT || \
> +	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4_EXT_UNKNOWN)
> +
> +/* Check if the (outer) L3 header is IPv6 */
> +#define  RTE_ETH_IS_IPV6_HDR(ptype) \
> +	(((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6 || \
> +	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT || \
> +	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT_UNKNOWN)
> +
> +/* Check if it is a tunneling packet */
> +#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & RTE_PTYPE_TUNNEL_MASK)
> +
>  /**
>   * Get the name of a RX offload flag
>   *
> --
> 1.8.1.4
  
Neil Horman Jan. 19, 2015, 4:33 p.m. UTC | #2
On Mon, Jan 19, 2015 at 11:23:07AM +0800, Helin Zhang wrote:
> As there are only 6 bit flags in ol_flags for indicating packet types,
> which is not enough to describe all the possible packet types hardware
> can recognize. For example, i40e hardware can recognize more than 150
> packet types. Unified packet type is composed of tunnel type, L3 type,
> L4 type and inner L3 type fields, and can be stored in 16 bits mbuf
> field of 'packet_type'.
> 
> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> ---
>  lib/librte_mbuf/rte_mbuf.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 68 insertions(+)
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 16059c6..94eb38f 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -165,6 +165,74 @@ extern "C" {
>  /* Use final bit of flags to indicate a control mbuf */
>  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
>  
> +/*
> + * Sixteen bits are divided into several fields to mark packet types. Note that
> + * each field is indexical.
> + * - Bit 3:0 is for tunnel types.
> + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
> + * - Bit 10:8 is for L4 types. It can also be used for inner L4 types for
> + *   tunneling packets.
This seems a bit sparse, in that the protocol field is 8 bits wide in a packet.
There are several common protocls that you don't have listed, and you've already
exhausted your namespace with the list you have.
Neil
  
Olivier Matz Jan. 19, 2015, 5:27 p.m. UTC | #3
Hi,

On 01/19/2015 05:33 PM, Neil Horman wrote:
> On Mon, Jan 19, 2015 at 11:23:07AM +0800, Helin Zhang wrote:
>> As there are only 6 bit flags in ol_flags for indicating packet types,
>> which is not enough to describe all the possible packet types hardware
>> can recognize. For example, i40e hardware can recognize more than 150
>> packet types. Unified packet type is composed of tunnel type, L3 type,
>> L4 type and inner L3 type fields, and can be stored in 16 bits mbuf
>> field of 'packet_type'.
>>
>> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
>> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
>> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
>> ---
>>  lib/librte_mbuf/rte_mbuf.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 68 insertions(+)
>>
>> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
>> index 16059c6..94eb38f 100644
>> --- a/lib/librte_mbuf/rte_mbuf.h
>> +++ b/lib/librte_mbuf/rte_mbuf.h
>> @@ -165,6 +165,74 @@ extern "C" {
>>  /* Use final bit of flags to indicate a control mbuf */
>>  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
>>  
>> +/*
>> + * Sixteen bits are divided into several fields to mark packet types. Note that
>> + * each field is indexical.
>> + * - Bit 3:0 is for tunnel types.
>> + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
>> + * - Bit 10:8 is for L4 types. It can also be used for inner L4 types for
>> + *   tunneling packets.
> This seems a bit sparse, in that the protocol field is 8 bits wide in a packet.
> There are several common protocls that you don't have listed, and you've already
> exhausted your namespace with the list you have.
> Neil

Another question I've asked several times[1][2] : what does having
RTE_PTYPE_TUNNEL_IP mean? What fields are checked by the hardware
(or the driver) and what fields should be checked by the application?
Are you sure that all the drivers (ixgbe, i40e, vmxnet3, enic) check
the same fields? (ethertype, ip version, ip len correct, ip checksum
correct, flags, ...)

To be clearer: Let's say I have a network stack that parses and
validates an IP packet. What tests can I remove if I get
RTE_PTYPE_TUNNEL_IP?

This question can be asked for all defined packet type. To be usable by
an application, I think a formal definition would be needed. This is
also important to know this for people wanting to develop a new PMD
based on a new hardware. If the hardware does not behave exactly like
ixgbe, i40e (I hope all drivers you implemented behave exactly the
same), some work has to be done in the driver or the feature cannot be
used.

One naïve question: are we sure that at the end, using these complex
packet types is faster than parsing the packet?

Regards,
Olivier


[1] http://dpdk.org/ml/archives/dev/2014-November/008534.html
[2] http://dpdk.org/ml/archives/dev/2014-November/008367.html
  
Neil Horman Jan. 19, 2015, 6:15 p.m. UTC | #4
On Mon, Jan 19, 2015 at 06:27:02PM +0100, Olivier MATZ wrote:
> Hi,
> 
> On 01/19/2015 05:33 PM, Neil Horman wrote:
> > On Mon, Jan 19, 2015 at 11:23:07AM +0800, Helin Zhang wrote:
> >> As there are only 6 bit flags in ol_flags for indicating packet types,
> >> which is not enough to describe all the possible packet types hardware
> >> can recognize. For example, i40e hardware can recognize more than 150
> >> packet types. Unified packet type is composed of tunnel type, L3 type,
> >> L4 type and inner L3 type fields, and can be stored in 16 bits mbuf
> >> field of 'packet_type'.
> >>
> >> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> >> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> >> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> >> ---
> >>  lib/librte_mbuf/rte_mbuf.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 68 insertions(+)
> >>
> >> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> >> index 16059c6..94eb38f 100644
> >> --- a/lib/librte_mbuf/rte_mbuf.h
> >> +++ b/lib/librte_mbuf/rte_mbuf.h
> >> @@ -165,6 +165,74 @@ extern "C" {
> >>  /* Use final bit of flags to indicate a control mbuf */
> >>  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
> >>  
> >> +/*
> >> + * Sixteen bits are divided into several fields to mark packet types. Note that
> >> + * each field is indexical.
> >> + * - Bit 3:0 is for tunnel types.
> >> + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
> >> + * - Bit 10:8 is for L4 types. It can also be used for inner L4 types for
> >> + *   tunneling packets.
> > This seems a bit sparse, in that the protocol field is 8 bits wide in a packet.
> > There are several common protocls that you don't have listed, and you've already
> > exhausted your namespace with the list you have.
> > Neil
> 
> Another question I've asked several times[1][2] : what does having
> RTE_PTYPE_TUNNEL_IP mean? What fields are checked by the hardware
> (or the driver) and what fields should be checked by the application?
> Are you sure that all the drivers (ixgbe, i40e, vmxnet3, enic) check
> the same fields? (ethertype, ip version, ip len correct, ip checksum
> correct, flags, ...)
> 
> To be clearer: Let's say I have a network stack that parses and
> validates an IP packet. What tests can I remove if I get
> RTE_PTYPE_TUNNEL_IP?
> 
> This question can be asked for all defined packet type. To be usable by
> an application, I think a formal definition would be needed. This is
> also important to know this for people wanting to develop a new PMD
> based on a new hardware. If the hardware does not behave exactly like
> ixgbe, i40e (I hope all drivers you implemented behave exactly the
> same), some work has to be done in the driver or the feature cannot be
> used.
> 
> One naïve question: are we sure that at the end, using these complex
> packet types is faster than parsing the packet?
> 
Thats an excellent question, especially when you start considering that high
layer stack functions will want to isolate themselves from these complex packet
types.

Neil

> Regards,
> Olivier
> 
> 
> [1] http://dpdk.org/ml/archives/dev/2014-November/008534.html
> [2] http://dpdk.org/ml/archives/dev/2014-November/008367.html
>
  
Zhang, Helin Jan. 20, 2015, 2:28 a.m. UTC | #5
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Tuesday, January 20, 2015 1:27 AM
> To: Neil Horman; Zhang, Helin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [RFC 01/17] mbuf: add definitions of unified packet
> types
> 
> Hi,
> 
> On 01/19/2015 05:33 PM, Neil Horman wrote:
> > On Mon, Jan 19, 2015 at 11:23:07AM +0800, Helin Zhang wrote:
> >> As there are only 6 bit flags in ol_flags for indicating packet
> >> types, which is not enough to describe all the possible packet types
> >> hardware can recognize. For example, i40e hardware can recognize more
> >> than 150 packet types. Unified packet type is composed of tunnel
> >> type, L3 type,
> >> L4 type and inner L3 type fields, and can be stored in 16 bits mbuf
> >> field of 'packet_type'.
> >>
> >> Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> >> Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> >> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> >> ---
> >>  lib/librte_mbuf/rte_mbuf.h | 68
> >> ++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 68 insertions(+)
> >>
> >> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> >> index 16059c6..94eb38f 100644
> >> --- a/lib/librte_mbuf/rte_mbuf.h
> >> +++ b/lib/librte_mbuf/rte_mbuf.h
> >> @@ -165,6 +165,74 @@ extern "C" {
> >>  /* Use final bit of flags to indicate a control mbuf */
> >>  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains
> control data */
> >>
> >> +/*
> >> + * Sixteen bits are divided into several fields to mark packet
> >> +types. Note that
> >> + * each field is indexical.
> >> + * - Bit 3:0 is for tunnel types.
> >> + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
> >> + * - Bit 10:8 is for L4 types. It can also be used for inner L4 types for
> >> + *   tunneling packets.
> > This seems a bit sparse, in that the protocol field is 8 bits wide in a packet.
> > There are several common protocls that you don't have listed, and
> > you've already exhausted your namespace with the list you have.
> > Neil
I have reviewed all packet types supported in igb, ixgbe and i40e, and read the
code to get the packet types used in vmxnet3, bond, enic ,etc.
Current design can support all packet types used in above PMDs.
Yes, we don't have too many space reserved for future, but we can try to make
more bits for packet_type field later, as we can save 6 bits in ol_flags with this
patch set.

> 
> Another question I've asked several times[1][2] : what does having
> RTE_PTYPE_TUNNEL_IP mean? What fields are checked by the hardware (or
> the driver) and what fields should be checked by the application?
> Are you sure that all the drivers (ixgbe, i40e, vmxnet3, enic) check the same
> fields? (ethertype, ip version, ip len correct, ip checksum correct, flags, ...)
RTE_PTYPE_TUNNEL_IP means hardware recognizes the received packet as an
IP-in-IP packet.
All the fields are filled by PMD which is recognized by hardware. The application
can just use it which can save some cpu cycles to recognize the packet type by
software.
Drivers is responsible for filling with correct values according to the packet types
recognized by its hardware. Different PMDs may fill with different values based on
different capabilities.

> 
> To be clearer: Let's say I have a network stack that parses and validates an IP
> packet. What tests can I remove if I get RTE_PTYPE_TUNNEL_IP?
That means it is a IP-in-IP tunnel packet, but not others. Also you can check other
fields in packet_type to get more information of the packet (e.g. L4 type).

> 
> This question can be asked for all defined packet type. To be usable by an
> application, I think a formal definition would be needed. This is also important
> to know this for people wanting to develop a new PMD based on a new
> hardware. If the hardware does not behave exactly like ixgbe, i40e (I hope all
> drivers you implemented behave exactly the same), some work has to be done
> in the driver or the feature cannot be used.
The unified packet type defined here is aiming to support all hardwares. I40e has
different values from ixgbe. We can add more in the future if needed for future NICs.

> 
> One naïve question: are we sure that at the end, using these complex packet
> types is faster than parsing the packet?
I guess yes for almost all cases, as hardware reported the packet types, and PMD
just puts the correct values into packet_type field.
Later, we will try to measure the differences.

Regards,
Helin

> 
> Regards,
> Olivier
> 
> 
> [1] http://dpdk.org/ml/archives/dev/2014-November/008534.html
> [2] http://dpdk.org/ml/archives/dev/2014-November/008367.html
  
Zhang, Helin Jan. 20, 2015, 3:47 a.m. UTC | #6
> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Tuesday, January 20, 2015 12:20 AM
> To: Zhang, Helin; dev@dpdk.org
> Cc: Liang, Cunming; Liu, Jijiang
> Subject: RE: [RFC 01/17] mbuf: add definitions of unified packet types
> 
> 
> 
> > -----Original Message-----
> > From: Zhang, Helin
> > Sent: Monday, January 19, 2015 3:23 AM
> > To: dev@dpdk.org
> > Cc: Liang, Cunming; Liu, Jijiang; Ananyev, Konstantin; Zhang, Helin
> > Subject: [RFC 01/17] mbuf: add definitions of unified packet types
> >
> > As there are only 6 bit flags in ol_flags for indicating packet types,
> > which is not enough to describe all the possible packet types hardware
> > can recognize. For example, i40e hardware can recognize more than 150
> > packet types. Unified packet type is composed of tunnel type, L3 type,
> > L4 type and inner L3 type fields, and can be stored in 16 bits mbuf
> > field of 'packet_type'.
> >
> > Signed-off-by: Helin Zhang <helin.zhang@intel.com>
> > Signed-off-by: Cunming Liang <cunming.liang@intel.com>
> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> > ---
> >  lib/librte_mbuf/rte_mbuf.h | 68
> > ++++++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 68 insertions(+)
> >
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index 16059c6..94eb38f 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -165,6 +165,74 @@ extern "C" {
> >  /* Use final bit of flags to indicate a control mbuf */
> >  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control
> data */
> >
> > +/*
> > + * Sixteen bits are divided into several fields to mark packet types.
> > +Note that
> > + * each field is indexical.
> > + * - Bit 3:0 is for tunnel types.
> > + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
> > + * - Bit 10:8 is for L4 types. It can also be used for inner L4 types for
> > + *   tunneling packets.
> > + * - Bit 13:11 is for inner L3 types.
> > + * - Bit 15:14 is reserved.
> > + *
> > + * To be compitable with Vector PMD, RTE_PTYPE_L3_IPV4,
> > +RTE_PTYPE_L3_IPV4_EXT,
> > + * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP,
> > +RTE_PTYPE_L4_UDP
> > + * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
> > + */
> > +#define RTE_PTYPE_UNKNOWN                   0x0000 /*
> 0b0000000000000000 */
> > +/* bit 3:0 for tunnel types */
> > +#define RTE_PTYPE_TUNNEL_IP                 0x0001 /*
> 0b0000000000000001 */
> > +#define RTE_PTYPE_TUNNEL_TCP                0x0002 /*
> 0b0000000000000010 */
> > +#define RTE_PTYPE_TUNNEL_UDP                0x0003 /*
> 0b0000000000000011 */
> > +#define RTE_PTYPE_TUNNEL_GRE                0x0004 /*
> 0b0000000000000100 */
> > +#define RTE_PTYPE_TUNNEL_VXLAN              0x0005 /*
> 0b0000000000000101 */
> > +#define RTE_PTYPE_TUNNEL_NVGRE              0x0006 /*
> 0b0000000000000110 */
> > +#define RTE_PTYPE_TUNNEL_GENEVE             0x0007 /*
> 0b0000000000000111 */
> > +#define RTE_PTYPE_TUNNEL_GRENAT             0x0008 /*
> 0b0000000000001000 */
> > +#define RTE_PTYPE_TUNNEL_GRENAT_MAC         0x0009 /*
> 0b0000000000001001 */
> > +#define RTE_PTYPE_TUNNEL_GRENAT_MACVLAN     0x000a /*
> 0b0000000000001010 */
> > +#define RTE_PTYPE_TUNNEL_MASK               0x000f /*
> 0b0000000000001111 */
> > +/* bit 7:4 for L3 types */
> > +#define RTE_PTYPE_L3_IPV4                   0x0010 /*
> 0b0000000000010000 */
> > +#define RTE_PTYPE_L3_IPV4_EXT               0x0030 /*
> 0b0000000000110000 */
> > +#define RTE_PTYPE_L3_IPV6                   0x0040 /*
> 0b0000000001000000 */
> > +#define RTE_PTYPE_L3_IPV6_EXT               0x00c0 /*
> 0b0000000011000000 */
> > +#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00d0 /*
> 0b0000000011010000 */
> > +#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x00e0 /*
> 0b0000000011100000 */
> > +#define RTE_PTYPE_L3_MASK                   0x00f0 /*
> 0b0000000011110000 */
> 
> I still think it would be better to use enum not bit-set for IPv4/IPv6 distinction,
> but if you set it that way, can you at least take advantage of it and make
> RTE_ETH_IS_IPV4_HDR() not require 3 comparisons?
> I think it is doable if you set bit 4 for IPv4 types only (you already do that) and bit
> 6 for IPv6 types only.
> For that, I think, you can make RTE_PTYPE_L3_IPV4_EXT_UNKNOWN == 0xb0
> /* 0b0000000010110000 */ Then you can:
> 
> #define  RTE_ETH_IS_IPV4_HDR(ptype)    (((ptype) &
> RTE_PTYPE_L3_IPV4) != 0)
> #define  RTE_ETH_IS_IPV6_HDR(ptype)    (((ptype) &
> RTE_PTYPE_L3_IPV6) != 0)
> 
> I suppose that would be faster then what you propose below, and would
> probably require less changes in our sample apps.
As waste of one bit can support Vector PMD well, I prefer to have it here.
Thank you very much for the good idea of bit selection, to get possible higher
performance. I will add the idea in the next version. Thanks a lot!

Regards,
Helin

> 
> Konstantin
> 
> > +/* bit 10:8 for L4 types */
> > +#define RTE_PTYPE_L4_TCP                    0x0100 /*
> 0b0000000100000000 */
> > +#define RTE_PTYPE_L4_UDP                    0x0200 /*
> 0b0000001000000000 */
> > +#define RTE_PTYPE_L4_FRAG                   0x0300 /*
> 0b0000001100000000 */
> > +#define RTE_PTYPE_L4_SCTP                   0x0400 /*
> 0b0000010000000000 */
> > +#define RTE_PTYPE_L4_ICMP                   0x0500 /*
> 0b0000010100000000 */
> > +#define RTE_PTYPE_L4_NONFRAG                0x0600 /*
> 0b0000011000000000 */
> > +#define RTE_PTYPE_L4_MASK                   0x0700 /*
> 0b0000011100000000 */
> > +/* bit 13:11 for inner L3 types */
> > +#define RTE_PTYPE_INNER_L3_IPV4             0x0800 /*
> 0b0000100000000000 */
> > +#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x1000 /*
> 0b0001000000000000 */
> > +#define RTE_PTYPE_INNER_L3_IPV6             0x1800 /*
> 0b0001100000000000 */
> > +#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x2000 /*
> 0b0010000000000000 */
> > +#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x2800 /*
> > +0b0010100000000000 */ #define
> RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x3000 /* 0b0011000000000000
> */
> > +#define RTE_PTYPE_INNER_L3_MASK             0x3800 /*
> 0b0011100000000000 */
> > +/* bit 15:14 reserved */
> > +
> > +/* Check if the (outer) L3 header is IPv4 */ #define
> > +RTE_ETH_IS_IPV4_HDR(ptype) \
> > +	(((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4 || \
> > +	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4_EXT || \
> > +	((ptype) & RTE_PTYPE_L3_MASK) ==
> RTE_PTYPE_L3_IPV4_EXT_UNKNOWN)
> > +
> > +/* Check if the (outer) L3 header is IPv6 */ #define
> > +RTE_ETH_IS_IPV6_HDR(ptype) \
> > +	(((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6 || \
> > +	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT || \
> > +	((ptype) & RTE_PTYPE_L3_MASK) ==
> RTE_PTYPE_L3_IPV6_EXT_UNKNOWN)
> > +
> > +/* Check if it is a tunneling packet */ #define
> > +RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & RTE_PTYPE_TUNNEL_MASK)
> > +
> >  /**
> >   * Get the name of a RX offload flag
> >   *
> > --
> > 1.8.1.4
  
Olivier Matz Jan. 20, 2015, 9:53 a.m. UTC | #7
Hi Helin,

On 01/20/2015 03:28 AM, Zhang, Helin wrote:
>> Another question I've asked several times[1][2] : what does having
>> RTE_PTYPE_TUNNEL_IP mean? What fields are checked by the hardware (or
>> the driver) and what fields should be checked by the application?
>> Are you sure that all the drivers (ixgbe, i40e, vmxnet3, enic) check the same
>> fields? (ethertype, ip version, ip len correct, ip checksum correct, flags, ...)
> RTE_PTYPE_TUNNEL_IP means hardware recognizes the received packet as an
> IP-in-IP packet.
> All the fields are filled by PMD which is recognized by hardware. The application
> can just use it which can save some cpu cycles to recognize the packet type by
> software.
> Drivers is responsible for filling with correct values according to the packet types
> recognized by its hardware. Different PMDs may fill with different values based on
> different capabilities.

Sorry, that does not answer to my question.

Let's take a simple example. Imagine a hardware-1 that is able to
recognize an IP packet by checking the ethertype and that the IP
version is set to 4.
Another hardware-2 recognize an IP packet by checking the ethertype,
the IP version and that the IP length is correct compared to m_len(m).

For the same packet, both hardwares will return RTE_PTYPE_L3_IPV4, but
they don't do the same checks on the packet. As I want my application
behave exactly the same whatever the hardware, I need to know what
checks are done in hardware, so I can decide what checks must be
done in my application.

Example of definition: RTE_PTYPE_L3_IPV4 means that ethertype is
0x0800 and IP.version is 4.

It means that I can skip these 2 tests in my application if I have
this packet_type, but all other checks must be done in software
(ip length, flags, checksum, ...)

For each packet type, we need a definition like above, and we must
check that all drivers setting a packet type behave like described.

Regards,
Olivier
  

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 16059c6..94eb38f 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -165,6 +165,74 @@  extern "C" {
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
 
+/*
+ * Sixteen bits are divided into several fields to mark packet types. Note that
+ * each field is indexical.
+ * - Bit 3:0 is for tunnel types.
+ * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types.
+ * - Bit 10:8 is for L4 types. It can also be used for inner L4 types for
+ *   tunneling packets.
+ * - Bit 13:11 is for inner L3 types.
+ * - Bit 15:14 is reserved.
+ *
+ * To be compitable with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT,
+ * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP
+ * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits.
+ */
+#define RTE_PTYPE_UNKNOWN                   0x0000 /* 0b0000000000000000 */
+/* bit 3:0 for tunnel types */
+#define RTE_PTYPE_TUNNEL_IP                 0x0001 /* 0b0000000000000001 */
+#define RTE_PTYPE_TUNNEL_TCP                0x0002 /* 0b0000000000000010 */
+#define RTE_PTYPE_TUNNEL_UDP                0x0003 /* 0b0000000000000011 */
+#define RTE_PTYPE_TUNNEL_GRE                0x0004 /* 0b0000000000000100 */
+#define RTE_PTYPE_TUNNEL_VXLAN              0x0005 /* 0b0000000000000101 */
+#define RTE_PTYPE_TUNNEL_NVGRE              0x0006 /* 0b0000000000000110 */
+#define RTE_PTYPE_TUNNEL_GENEVE             0x0007 /* 0b0000000000000111 */
+#define RTE_PTYPE_TUNNEL_GRENAT             0x0008 /* 0b0000000000001000 */
+#define RTE_PTYPE_TUNNEL_GRENAT_MAC         0x0009 /* 0b0000000000001001 */
+#define RTE_PTYPE_TUNNEL_GRENAT_MACVLAN     0x000a /* 0b0000000000001010 */
+#define RTE_PTYPE_TUNNEL_MASK               0x000f /* 0b0000000000001111 */
+/* bit 7:4 for L3 types */
+#define RTE_PTYPE_L3_IPV4                   0x0010 /* 0b0000000000010000 */
+#define RTE_PTYPE_L3_IPV4_EXT               0x0030 /* 0b0000000000110000 */
+#define RTE_PTYPE_L3_IPV6                   0x0040 /* 0b0000000001000000 */
+#define RTE_PTYPE_L3_IPV6_EXT               0x00c0 /* 0b0000000011000000 */
+#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN       0x00d0 /* 0b0000000011010000 */
+#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN       0x00e0 /* 0b0000000011100000 */
+#define RTE_PTYPE_L3_MASK                   0x00f0 /* 0b0000000011110000 */
+/* bit 10:8 for L4 types */
+#define RTE_PTYPE_L4_TCP                    0x0100 /* 0b0000000100000000 */
+#define RTE_PTYPE_L4_UDP                    0x0200 /* 0b0000001000000000 */
+#define RTE_PTYPE_L4_FRAG                   0x0300 /* 0b0000001100000000 */
+#define RTE_PTYPE_L4_SCTP                   0x0400 /* 0b0000010000000000 */
+#define RTE_PTYPE_L4_ICMP                   0x0500 /* 0b0000010100000000 */
+#define RTE_PTYPE_L4_NONFRAG                0x0600 /* 0b0000011000000000 */
+#define RTE_PTYPE_L4_MASK                   0x0700 /* 0b0000011100000000 */
+/* bit 13:11 for inner L3 types */
+#define RTE_PTYPE_INNER_L3_IPV4             0x0800 /* 0b0000100000000000 */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT         0x1000 /* 0b0001000000000000 */
+#define RTE_PTYPE_INNER_L3_IPV6             0x1800 /* 0b0001100000000000 */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT         0x2000 /* 0b0010000000000000 */
+#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x2800 /* 0b0010100000000000 */
+#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x3000 /* 0b0011000000000000 */
+#define RTE_PTYPE_INNER_L3_MASK             0x3800 /* 0b0011100000000000 */
+/* bit 15:14 reserved */
+
+/* Check if the (outer) L3 header is IPv4 */
+#define  RTE_ETH_IS_IPV4_HDR(ptype) \
+	(((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4 || \
+	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4_EXT || \
+	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV4_EXT_UNKNOWN)
+
+/* Check if the (outer) L3 header is IPv6 */
+#define  RTE_ETH_IS_IPV6_HDR(ptype) \
+	(((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6 || \
+	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT || \
+	((ptype) & RTE_PTYPE_L3_MASK) == RTE_PTYPE_L3_IPV6_EXT_UNKNOWN)
+
+/* Check if it is a tunneling packet */
+#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & RTE_PTYPE_TUNNEL_MASK)
+
 /**
  * Get the name of a RX offload flag
  *