diff mbox

[dpdk-dev,1/3] mbuf:add two TX offload flags and change three fields

Message ID 1417076319-629-2-git-send-email-jijiang.liu@intel.com (mailing list archive)
State Changes Requested, archived
Headers show

Commit Message

Jijiang Liu Nov. 27, 2014, 8:18 a.m. UTC
In place of removing the PKT_TX_VXLAN_CKSUM, we introduce 2 new flags: PKT_TX_OUT_IP_CKSUM, PKT_TX_UDP_TUNNEL_PKT, and a new field: l4_tun_len.
Replace the inner_l2_len and the inner_l3_len field with the outer_l2_len and outer_l3_len field.

PKT_TX_OUT_IP_CKSUM: is not used for non-tunnelling packet;hardware outer checksum for tunnelling packet.
PKT_TX_UDP_TUNNEL_PKT: is used to tell PMD that the transmit packet is a UDP tunneling packet.
l4_tun_len: for VXLAN packet, it should be udp header length plus VXLAN header length. 

Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
---
 lib/librte_mbuf/rte_mbuf.c |    2 +-
 lib/librte_mbuf/rte_mbuf.h |   23 ++++++++++++++---------
 2 files changed, 15 insertions(+), 10 deletions(-)

Comments

Olivier Matz Nov. 27, 2014, 10 a.m. UTC | #1
Hi Jijiang,

Please see some comments below.

On 11/27/2014 09:18 AM, Jijiang Liu wrote:
> In place of removing the PKT_TX_VXLAN_CKSUM, we introduce 2 new flags: PKT_TX_OUT_IP_CKSUM, PKT_TX_UDP_TUNNEL_PKT, and a new field: l4_tun_len.
> Replace the inner_l2_len and the inner_l3_len field with the outer_l2_len and outer_l3_len field.
>
> PKT_TX_OUT_IP_CKSUM: is not used for non-tunnelling packet;hardware outer checksum for tunnelling packet.
> PKT_TX_UDP_TUNNEL_PKT: is used to tell PMD that the transmit packet is a UDP tunneling packet.
> l4_tun_len: for VXLAN packet, it should be udp header length plus VXLAN header length.
>
> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> ---
>   lib/librte_mbuf/rte_mbuf.c |    2 +-
>   lib/librte_mbuf/rte_mbuf.h |   23 ++++++++++++++---------
>   2 files changed, 15 insertions(+), 10 deletions(-)
>
> diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> index 87c2963..e89c310 100644
> --- a/lib/librte_mbuf/rte_mbuf.c
> +++ b/lib/librte_mbuf/rte_mbuf.c
> @@ -240,7 +240,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
>   	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
>   	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
>   	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
> -	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
> +	case PKT_TX_UDP_TUNNEL_PKT: return "PKT_TX_UDP_TUNNEL_PKT";
>   	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
>   	default: return NULL;

As I said as a reply to the cover letter, I suggest to use
PKT_TX_OUT_UDP_CKSUM instead of PKT_TX_UDP_TUNNEL_PKT.

Also, the PKT_TX_OUT_IP_CKSUM case is missing here.

> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 367fc56..48cd8e1 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -99,10 +99,9 @@ extern "C" {
>   #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
>   #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
>   #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
> -/* add new RX flags here */
>

We should probably not remove this line.


>   /* add new TX flags here */
> -#define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
> +#define PKT_TX_UDP_TUNNEL_PKT (1ULL << 50) /**< TX packet is an UDP tunneling packet */
>   #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */
>
>   /**
> @@ -125,13 +124,20 @@ extern "C" {
>   #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
>   #define PKT_TX_IPV4_CSUM     PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
>
> +#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
> +
>   /** Tell the NIC it's an IPv4 packet. Required for L4 checksum offload or TSO. */
> -#define PKT_TX_IPV4          PKT_RX_IPV4_HDR
> +#define PKT_TX_IPV4          (1ULL << 56)
>
>   /** Tell the NIC it's an IPv6 packet. Required for L4 checksum offload or TSO. */
> -#define PKT_TX_IPV6          PKT_RX_IPV6_HDR
> +#define PKT_TX_IPV6          (1ULL << 57)

The description in comment does not match the description in the cover
letter.

Also, I think replacing PKT_RX_IPV[46]_HDR by the value may go in
another commit.


> -#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
> +/** Outer IP cksum of TX pkt. computed by NIC for tunneling packet */
> +#define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
> +#define PKT_TX_OUTER_IPV4_CSUM  PKT_TX_OUTER_IP_CKSUM /**< Alias of PKT_TX_OUTER_IP_CKSUM. */

Why do we need an alias?

By the way, I think the alias of PKT_TX_IP_CKSUM is also uneeded and
can be removed. But it's not the topic of your series.

Also, the name PKT_TX_OUTER_IP_CKSUM does not match the name in the
cover letter and commit logs.


> +
> +/** Tell the NIC it's an outer IPv6 packet for tunneling packet.*/
> +#define PKT_TX_OUTER_IPV6    (1ULL << 59)
>

This flag is not in the cover letter or commit log. What is its purpose?


>   /**
>    * TCP segmentation offload. To enable this offload feature for a
> @@ -266,10 +272,9 @@ struct rte_mbuf {
>   			uint64_t tso_segsz:16; /**< TCP TSO segment size */
>
>   			/* fields for TX offloading of tunnels */
> -			uint64_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
> -			uint64_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
> -
> -			/* uint64_t unused:8; */
> +			uint64_t outer_l3_len:9; /**< outer L3 (IP) Hdr Length. */
> +			uint64_t outer_l2_len:7; /**< outer L2 (MAC) Hdr Length. */
> +			uint64_t l4_tun_len:8; /**< L4 tunnelling header length */
>   		};
>   	};
>   } __rte_cache_aligned;
>

About l4_tun_len, I have another comment I forgot to add in the cover
letter. Can we remove it and include its length in outer_l2_len
instead? For instance, replace:

      mb->l2_len =  eth_hdr_in;
      mb->l3_len = ipv4_hdr_in;
      mb->outer_l2_len = eth_hdr_out;
      mb->outer_l3_len = ipv4_hdr_out;
      mb->l4tun_len = vxlan_hdr;
      mb->ol_flags |= PKT_TX_OUT_IP_CKSUM  | PKT_TX_UDP_TUNNEL |
        PKT_TX_IP_CKSUM |  PKT_TX_TCP_CKSUM;

by:

      mb->l2_len =  eth_hdr_in;
      mb->l3_len = ipv4_hdr_in;
      mb->outer_l2_len = eth_hdr_out + vxlan_hdr;
      mb->outer_l3_len = ipv4_hdr_out;
      mb->ol_flags |= PKT_TX_OUT_IP_CKSUM  | PKT_TX_UDP_TUNNEL |
        PKT_TX_IP_CKSUM |  PKT_TX_TCP_CKSUM;

I think it won't bother the driver, and it's coherent with case B.2 of
your cover letter.

Regards,
Olivier
Jijiang Liu Nov. 27, 2014, 1:14 p.m. UTC | #2
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Thursday, November 27, 2014 6:00 PM
> To: Liu, Jijiang; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change
> three fields
> 
> Hi Jijiang,
> 
> Please see some comments below.
> 
> On 11/27/2014 09:18 AM, Jijiang Liu wrote:
> > In place of removing the PKT_TX_VXLAN_CKSUM, we introduce 2 new flags:
> PKT_TX_OUT_IP_CKSUM, PKT_TX_UDP_TUNNEL_PKT, and a new field:
> l4_tun_len.
> > Replace the inner_l2_len and the inner_l3_len field with the outer_l2_len and
> outer_l3_len field.
> >
> > PKT_TX_OUT_IP_CKSUM: is not used for non-tunnelling packet;hardware outer
> checksum for tunnelling packet.
> > PKT_TX_UDP_TUNNEL_PKT: is used to tell PMD that the transmit packet is a
> UDP tunneling packet.
> > l4_tun_len: for VXLAN packet, it should be udp header length plus VXLAN
> header length.
> >
> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> > ---
> >   lib/librte_mbuf/rte_mbuf.c |    2 +-
> >   lib/librte_mbuf/rte_mbuf.h |   23 ++++++++++++++---------
> >   2 files changed, 15 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> > index 87c2963..e89c310 100644
> > --- a/lib/librte_mbuf/rte_mbuf.c
> > +++ b/lib/librte_mbuf/rte_mbuf.c
> > @@ -240,7 +240,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
> >   	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
> >   	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
> >   	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
> > -	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
> > +	case PKT_TX_UDP_TUNNEL_PKT: return "PKT_TX_UDP_TUNNEL_PKT";
> >   	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
> >   	default: return NULL;
> 
> As I said as a reply to the cover letter, I suggest to use
> PKT_TX_OUT_UDP_CKSUM instead of PKT_TX_UDP_TUNNEL_PKT.
> 
> Also, the PKT_TX_OUT_IP_CKSUM case is missing here.
Yes.

> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index 367fc56..48cd8e1 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -99,10 +99,9 @@ extern "C" {
> >   #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with
> IPv6 header. */
> >   #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match.
> */
> >   #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR
> match. */
> > -/* add new RX flags here */
> >
> 
> We should probably not remove this line.

Why?
There are two lines "/* add new RX flags here */" in rte_mbuf.h file.

> 
> >   /* add new TX flags here */
> > -#define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN
> computed by NIC */
> > +#define PKT_TX_UDP_TUNNEL_PKT (1ULL << 50) /**< TX packet is an UDP
> > +tunneling packet */
> >   #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to
> > timestamp. */
> >
> >   /**
> > @@ -125,13 +124,20 @@ extern "C" {
> >   #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed
> by NIC. */
> >   #define PKT_TX_IPV4_CSUM     PKT_TX_IP_CKSUM /**< Alias of
> PKT_TX_IP_CKSUM. */
> >
> > +#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN
> packet. */
> > +
> >   /** Tell the NIC it's an IPv4 packet. Required for L4 checksum offload or TSO.
> */
> > -#define PKT_TX_IPV4          PKT_RX_IPV4_HDR
> > +#define PKT_TX_IPV4          (1ULL << 56)
> >
> >   /** Tell the NIC it's an IPv6 packet. Required for L4 checksum offload or TSO.
> */
> > -#define PKT_TX_IPV6          PKT_RX_IPV6_HDR
> > +#define PKT_TX_IPV6          (1ULL << 57)
> 
> The description in comment does not match the description in the cover letter.
Change cover letter.
> Also, I think replacing PKT_RX_IPV[46]_HDR by the value may go in another
> commit.
Ok.
> 
> > -#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN
> packet. */
> > +/** Outer IP cksum of TX pkt. computed by NIC for tunneling packet */
> > +#define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
> > +#define PKT_TX_OUTER_IPV4_CSUM  PKT_TX_OUTER_IP_CKSUM /**< Alias
> of
> > +PKT_TX_OUTER_IP_CKSUM. */
> 
> Why do we need an alias?
Ok, remove it.
> By the way, I think the alias of PKT_TX_IP_CKSUM is also uneeded and can be
> removed. But it's not the topic of your series.
> 
> Also, the name PKT_TX_OUTER_IP_CKSUM does not match the name in the cover
> letter and commit logs.
> 
> 
> > +
> > +/** Tell the NIC it's an outer IPv6 packet for tunneling packet.*/
> > +#define PKT_TX_OUTER_IPV6    (1ULL << 59)
> >
> 
> This flag is not in the cover letter or commit log. What is its purpose?
> 
With FVL, if outer L3 header is IPv6, to make HW TX checksum offload work , SW must be responsible to tell hardware this information.
Ananyev, Konstantin Nov. 27, 2014, 2:56 p.m. UTC | #3
> 
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Thursday, November 27, 2014 6:00 PM
> To: Liu, Jijiang; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change three fields
> 
> Hi Jijiang,
> 
> Please see some comments below.
> 
> On 11/27/2014 09:18 AM, Jijiang Liu wrote:
> > In place of removing the PKT_TX_VXLAN_CKSUM, we introduce 2 new flags: PKT_TX_OUT_IP_CKSUM, PKT_TX_UDP_TUNNEL_PKT,
> and a new field: l4_tun_len.
> > Replace the inner_l2_len and the inner_l3_len field with the outer_l2_len and outer_l3_len field.
> >
> > PKT_TX_OUT_IP_CKSUM: is not used for non-tunnelling packet;hardware outer checksum for tunnelling packet.
> > PKT_TX_UDP_TUNNEL_PKT: is used to tell PMD that the transmit packet is a UDP tunneling packet.
> > l4_tun_len: for VXLAN packet, it should be udp header length plus VXLAN header length.
> >
> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> > ---
> >   lib/librte_mbuf/rte_mbuf.c |    2 +-
> >   lib/librte_mbuf/rte_mbuf.h |   23 ++++++++++++++---------
> >   2 files changed, 15 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> > index 87c2963..e89c310 100644
> > --- a/lib/librte_mbuf/rte_mbuf.c
> > +++ b/lib/librte_mbuf/rte_mbuf.c
> > @@ -240,7 +240,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
> >   	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
> >   	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
> >   	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
> > -	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
> > +	case PKT_TX_UDP_TUNNEL_PKT: return "PKT_TX_UDP_TUNNEL_PKT";
> >   	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
> >   	default: return NULL;
> 
> As I said as a reply to the cover letter, I suggest to use PKT_TX_OUT_UDP_CKSUM instead of PKT_TX_UDP_TUNNEL_PKT.

HW don't support outer L4 checksum offload.
But to calculate inner checksums correctly, it needs a hint from SW about L4 Tunneling Type.
Currently the following values are recognised by HW:

L4 Tunneling Type (Teredo / GRE header / VXLAN header) indication:
00b - No UDP / GRE tunneling (field must be set to zero if EIPT equals to zero)
01b - UDP tunneling header (any UDP tunneling, VXLAN and Geneve).
10b - GRE tunneling header
Else - reserved

You can check yourself:
http://www.intel.com/content/www/us/en/embedded/products/networking/xl710-10-40-controller-datasheet.html
Sections 8.4.2.2.1 and 8.4.4.2

> 
> Also, the PKT_TX_OUT_IP_CKSUM case is missing here.
> 
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index 367fc56..48cd8e1 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -99,10 +99,9 @@ extern "C" {
> >   #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
> >   #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
> >   #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
> > -/* add new RX flags here */
> >
> 
> We should probably not remove this line.
> 
> 
> >   /* add new TX flags here */
> > -#define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
> > +#define PKT_TX_UDP_TUNNEL_PKT (1ULL << 50) /**< TX packet is an UDP
> > +tunneling packet */
> >   #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to
> > timestamp. */
> >
> >   /**
> > @@ -125,13 +124,20 @@ extern "C" {
> >   #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
> >   #define PKT_TX_IPV4_CSUM     PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
> >
> > +#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
> > +
> >   /** Tell the NIC it's an IPv4 packet. Required for L4 checksum offload or TSO. */
> > -#define PKT_TX_IPV4          PKT_RX_IPV4_HDR
> > +#define PKT_TX_IPV4          (1ULL << 56)
> >
> >   /** Tell the NIC it's an IPv6 packet. Required for L4 checksum offload or TSO. */
> > -#define PKT_TX_IPV6          PKT_RX_IPV6_HDR
> > +#define PKT_TX_IPV6          (1ULL << 57)
> 
> The description in comment does not match the description in the cover letter.
> 
> Also, I think replacing PKT_RX_IPV[46]_HDR by the value may go in another commit.
> 
> 
> > -#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
> > +/** Outer IP cksum of TX pkt. computed by NIC for tunneling packet */
> > +#define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
> > +#define PKT_TX_OUTER_IPV4_CSUM  PKT_TX_OUTER_IP_CKSUM /**< Alias of
> > +PKT_TX_OUTER_IP_CKSUM. */
> 
> Why do we need an alias?
> 
> By the way, I think the alias of PKT_TX_IP_CKSUM is also uneeded and can be removed. But it's not the topic of your series.
> 
> Also, the name PKT_TX_OUTER_IP_CKSUM does not match the name in the cover letter and commit logs.
> 
> 
> > +
> > +/** Tell the NIC it's an outer IPv6 packet for tunneling packet.*/
> > +#define PKT_TX_OUTER_IPV6    (1ULL << 59)
> >
> 
> This flag is not in the cover letter or commit log. What is its purpose?


My bad, forgot that for outer IP, will also need to specify it's type.
So same story here as for inner IP.
So in total, we might need 3 flags for outer IP:

/* Tells HW that outer IP is IPV4 and checksum for it should be calculated by HW. */
PKT_TX_OUTER_IP_CKSUM

/* Tells HW that outer IP is IPV4 and checksum for it should not be calculated by HW. */
PKT_TX_OUTER_IPV4

/* Tells HW that outer IP is IPV6. */
PKT_TX_OUTER_IPV6

> 
> 
> >   /**
> >    * TCP segmentation offload. To enable this offload feature for a @@
> > -266,10 +272,9 @@ struct rte_mbuf {
> >   			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> >
> >   			/* fields for TX offloading of tunnels */
> > -			uint64_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
> > -			uint64_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
> > -
> > -			/* uint64_t unused:8; */
> > +			uint64_t outer_l3_len:9; /**< outer L3 (IP) Hdr Length. */
> > +			uint64_t outer_l2_len:7; /**< outer L2 (MAC) Hdr Length. */
> > +			uint64_t l4_tun_len:8; /**< L4 tunnelling header length */
> >   		};
> >   	};
> >   } __rte_cache_aligned;
> >
> 
> About l4_tun_len, I have another comment I forgot to add in the cover letter. Can we remove it and include its length in outer_l2_len
> instead? For instance, replace:
> 
>       mb->l2_len =  eth_hdr_in;
>       mb->l3_len = ipv4_hdr_in;
>       mb->outer_l2_len = eth_hdr_out;
>       mb->outer_l3_len = ipv4_hdr_out;
>       mb->l4tun_len = vxlan_hdr;
>       mb->ol_flags |= PKT_TX_OUT_IP_CKSUM  | PKT_TX_UDP_TUNNEL |
>         PKT_TX_IP_CKSUM |  PKT_TX_TCP_CKSUM;
> 
> by:
> 
>       mb->l2_len =  eth_hdr_in;
>       mb->l3_len = ipv4_hdr_in;
>       mb->outer_l2_len = eth_hdr_out + vxlan_hdr;
>       mb->outer_l3_len = ipv4_hdr_out;
>       mb->ol_flags |= PKT_TX_OUT_IP_CKSUM  | PKT_TX_UDP_TUNNEL |
>         PKT_TX_IP_CKSUM |  PKT_TX_TCP_CKSUM;
> 
> I think it won't bother the driver, and it's coherent with case B.2 of your cover letter.

You probably meant:
mb->l2_len =  eth_hdr_in + vxlan_hdr;
?
Yes, I think it could be done that way too.
Though I still prefer to keep l4tun_len - it makes things a bit cleaner (at least to me). 
After all  we do have space for it in mbuf's tx_offload.
Konstantin

> 
> Regards,
> Olivier
Ananyev, Konstantin Nov. 27, 2014, 5:01 p.m. UTC | #4
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Ananyev, Konstantin
> Sent: Thursday, November 27, 2014 2:56 PM
> To: Liu, Jijiang; Olivier Matz (olivier.matz@6wind.com)
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change three fields
> 
> 
> 
> >
> > -----Original Message-----
> > From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> > Sent: Thursday, November 27, 2014 6:00 PM
> > To: Liu, Jijiang; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change three fields
> >
> > Hi Jijiang,
> >
> > Please see some comments below.
> >
> > On 11/27/2014 09:18 AM, Jijiang Liu wrote:
> > > In place of removing the PKT_TX_VXLAN_CKSUM, we introduce 2 new flags: PKT_TX_OUT_IP_CKSUM,
> PKT_TX_UDP_TUNNEL_PKT,
> > and a new field: l4_tun_len.
> > > Replace the inner_l2_len and the inner_l3_len field with the outer_l2_len and outer_l3_len field.
> > >
> > > PKT_TX_OUT_IP_CKSUM: is not used for non-tunnelling packet;hardware outer checksum for tunnelling packet.
> > > PKT_TX_UDP_TUNNEL_PKT: is used to tell PMD that the transmit packet is a UDP tunneling packet.
> > > l4_tun_len: for VXLAN packet, it should be udp header length plus VXLAN header length.
> > >
> > > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> > > ---
> > >   lib/librte_mbuf/rte_mbuf.c |    2 +-
> > >   lib/librte_mbuf/rte_mbuf.h |   23 ++++++++++++++---------
> > >   2 files changed, 15 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
> > > index 87c2963..e89c310 100644
> > > --- a/lib/librte_mbuf/rte_mbuf.c
> > > +++ b/lib/librte_mbuf/rte_mbuf.c
> > > @@ -240,7 +240,7 @@ const char *rte_get_tx_ol_flag_name(uint64_t mask)
> > >   	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
> > >   	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
> > >   	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
> > > -	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
> > > +	case PKT_TX_UDP_TUNNEL_PKT: return "PKT_TX_UDP_TUNNEL_PKT";
> > >   	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
> > >   	default: return NULL;
> >
> > As I said as a reply to the cover letter, I suggest to use PKT_TX_OUT_UDP_CKSUM instead of PKT_TX_UDP_TUNNEL_PKT.
> 
> HW don't support outer L4 checksum offload.
> But to calculate inner checksums correctly, it needs a hint from SW about L4 Tunneling Type.
> Currently the following values are recognised by HW:
> 
> L4 Tunneling Type (Teredo / GRE header / VXLAN header) indication:
> 00b - No UDP / GRE tunneling (field must be set to zero if EIPT equals to zero)
> 01b - UDP tunneling header (any UDP tunneling, VXLAN and Geneve).
> 10b - GRE tunneling header
> Else - reserved
> 
> You can check yourself:
> http://www.intel.com/content/www/us/en/embedded/products/networking/xl710-10-40-controller-datasheet.html
> Sections 8.4.2.2.1 and 8.4.4.2
> 
> >
> > Also, the PKT_TX_OUT_IP_CKSUM case is missing here.
> >
> > > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > > index 367fc56..48cd8e1 100644
> > > --- a/lib/librte_mbuf/rte_mbuf.h
> > > +++ b/lib/librte_mbuf/rte_mbuf.h
> > > @@ -99,10 +99,9 @@ extern "C" {
> > >   #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
> > >   #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
> > >   #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
> > > -/* add new RX flags here */
> > >
> >
> > We should probably not remove this line.
> >
> >
> > >   /* add new TX flags here */
> > > -#define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
> > > +#define PKT_TX_UDP_TUNNEL_PKT (1ULL << 50) /**< TX packet is an UDP
> > > +tunneling packet */
> > >   #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to
> > > timestamp. */
> > >
> > >   /**
> > > @@ -125,13 +124,20 @@ extern "C" {
> > >   #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
> > >   #define PKT_TX_IPV4_CSUM     PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
> > >
> > > +#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
> > > +
> > >   /** Tell the NIC it's an IPv4 packet. Required for L4 checksum offload or TSO. */
> > > -#define PKT_TX_IPV4          PKT_RX_IPV4_HDR
> > > +#define PKT_TX_IPV4          (1ULL << 56)
> > >
> > >   /** Tell the NIC it's an IPv6 packet. Required for L4 checksum offload or TSO. */
> > > -#define PKT_TX_IPV6          PKT_RX_IPV6_HDR
> > > +#define PKT_TX_IPV6          (1ULL << 57)
> >
> > The description in comment does not match the description in the cover letter.
> >
> > Also, I think replacing PKT_RX_IPV[46]_HDR by the value may go in another commit.
> >
> >
> > > -#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
> > > +/** Outer IP cksum of TX pkt. computed by NIC for tunneling packet */
> > > +#define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
> > > +#define PKT_TX_OUTER_IPV4_CSUM  PKT_TX_OUTER_IP_CKSUM /**< Alias of
> > > +PKT_TX_OUTER_IP_CKSUM. */
> >
> > Why do we need an alias?
> >
> > By the way, I think the alias of PKT_TX_IP_CKSUM is also uneeded and can be removed. But it's not the topic of your series.
> >
> > Also, the name PKT_TX_OUTER_IP_CKSUM does not match the name in the cover letter and commit logs.
> >
> >
> > > +
> > > +/** Tell the NIC it's an outer IPv6 packet for tunneling packet.*/
> > > +#define PKT_TX_OUTER_IPV6    (1ULL << 59)
> > >
> >
> > This flag is not in the cover letter or commit log. What is its purpose?
> 
> 
> My bad, forgot that for outer IP, will also need to specify it's type.
> So same story here as for inner IP.
> So in total, we might need 3 flags for outer IP:
> 
> /* Tells HW that outer IP is IPV4 and checksum for it should be calculated by HW. */
> PKT_TX_OUTER_IP_CKSUM
> 
> /* Tells HW that outer IP is IPV4 and checksum for it should not be calculated by HW. */
> PKT_TX_OUTER_IPV4
> 
> /* Tells HW that outer IP is IPV6. */
> PKT_TX_OUTER_IPV6
> 
> >
> >
> > >   /**
> > >    * TCP segmentation offload. To enable this offload feature for a @@
> > > -266,10 +272,9 @@ struct rte_mbuf {
> > >   			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> > >
> > >   			/* fields for TX offloading of tunnels */
> > > -			uint64_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
> > > -			uint64_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
> > > -
> > > -			/* uint64_t unused:8; */
> > > +			uint64_t outer_l3_len:9; /**< outer L3 (IP) Hdr Length. */
> > > +			uint64_t outer_l2_len:7; /**< outer L2 (MAC) Hdr Length. */
> > > +			uint64_t l4_tun_len:8; /**< L4 tunnelling header length */
> > >   		};
> > >   	};
> > >   } __rte_cache_aligned;
> > >
> >
> > About l4_tun_len, I have another comment I forgot to add in the cover letter. Can we remove it and include its length in
> outer_l2_len
> > instead? For instance, replace:
> >
> >       mb->l2_len =  eth_hdr_in;
> >       mb->l3_len = ipv4_hdr_in;
> >       mb->outer_l2_len = eth_hdr_out;
> >       mb->outer_l3_len = ipv4_hdr_out;
> >       mb->l4tun_len = vxlan_hdr;
> >       mb->ol_flags |= PKT_TX_OUT_IP_CKSUM  | PKT_TX_UDP_TUNNEL |
> >         PKT_TX_IP_CKSUM |  PKT_TX_TCP_CKSUM;
> >
> > by:
> >
> >       mb->l2_len =  eth_hdr_in;
> >       mb->l3_len = ipv4_hdr_in;
> >       mb->outer_l2_len = eth_hdr_out + vxlan_hdr;
> >       mb->outer_l3_len = ipv4_hdr_out;
> >       mb->ol_flags |= PKT_TX_OUT_IP_CKSUM  | PKT_TX_UDP_TUNNEL |
> >         PKT_TX_IP_CKSUM |  PKT_TX_TCP_CKSUM;
> >
> > I think it won't bother the driver, and it's coherent with case B.2 of your cover letter.
> 
> You probably meant:
> mb->l2_len =  eth_hdr_in + vxlan_hdr;
> ?
> Yes, I think it could be done that way too.
> Though I still prefer to keep l4tun_len - it makes things a bit cleaner (at least to me).
> After all  we do have space for it in mbuf's tx_offload.

As one more thing in favour of separate l4tun_len field:
l2_len is 7 bit long, so in theory it might be not enough, as for FVL:
12:18 L4TUNLEN L4 Tunneling Length (Teredo / GRE header / VXLAN header) defined in Words. 


> Konstantin
> 
> >
> > Regards,
> > Olivier
Olivier Matz Nov. 28, 2014, 9:17 a.m. UTC | #5
Hi Jijiang,

On 11/27/2014 02:14 PM, Liu, Jijiang wrote:
>>> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
>>> index 367fc56..48cd8e1 100644
>>> --- a/lib/librte_mbuf/rte_mbuf.h
>>> +++ b/lib/librte_mbuf/rte_mbuf.h
>>> @@ -99,10 +99,9 @@ extern "C" {
>>>   #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with
>> IPv6 header. */
>>>   #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match.
>> */
>>>   #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR
>> match. */
>>> -/* add new RX flags here */
>>>
>>
>> We should probably not remove this line.
> 
> Why?
> There are two lines "/* add new RX flags here */" in rte_mbuf.h file.

No, one is RX, the other is TX.


>>> +/** Tell the NIC it's an outer IPv6 packet for tunneling packet.*/
>>> +#define PKT_TX_OUTER_IPV6    (1ULL << 59)
>>>
>>
>> This flag is not in the cover letter or commit log. What is its purpose?
>>
> With FVL, if outer L3 header is IPv6, to make HW TX checksum offload work , SW must be responsible to tell hardware this information.


What hardware checksum are you talking about?
I understand that outer L4 checksum is not supported from one of
Konstantin's mail.
And there is no L3 checksum in IPv6.


Regards,
Olivier
Olivier Matz Nov. 28, 2014, 10:45 a.m. UTC | #6
Hi Konstantin,

On 11/27/2014 06:01 PM, Ananyev, Konstantin wrote:
>> Yes, I think it could be done that way too.
>> Though I still prefer to keep l4tun_len - it makes things a bit cleaner (at least to me).
>> After all  we do have space for it in mbuf's tx_offload.
> 
> As one more thing in favour of separate l4tun_len field:
> l2_len is 7 bit long, so in theory it might be not enough, as for FVL:
> 12:18 L4TUNLEN L4 Tunneling Length (Teredo / GRE header / VXLAN header) defined in Words. 

The l2_len field is 7 bits long because it was mapped to ixgbe hardware.
If it's not enough (although I'm not sure it's possible to have a header
larger than 128 bytes in this case), it's probably because we should
not have been doing that.
Maybe these generic fields should be generic :)
If it's not enough, what about changing l2_len to 8 bits?

Often in the recent discussions, I see as an argument "fortville needs
this so we need to add it in the mbuf". I agree that currently
fortville is the only hardware supported for the new features, so it
can make sense to act like this, but we have to accept to come back
to this API in the future if it makes less sense with other drivers.

Also, application developers can be annoyed to see that the mbuf flags
and meta data are just duplicating information that is already present
in the packet.

About the l4tun_len, it's another field the application has to fill,
but it's maybe cleaner. I just wanted to clarify why I'm discussing
these points.

Regards,
Olivier
Ananyev, Konstantin Nov. 28, 2014, 11:16 a.m. UTC | #7
Hi Olver,

> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Friday, November 28, 2014 10:46 AM
> To: Ananyev, Konstantin; Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change three fields
> 
> Hi Konstantin,
> 
> On 11/27/2014 06:01 PM, Ananyev, Konstantin wrote:
> >> Yes, I think it could be done that way too.
> >> Though I still prefer to keep l4tun_len - it makes things a bit cleaner (at least to me).
> >> After all  we do have space for it in mbuf's tx_offload.
> >
> > As one more thing in favour of separate l4tun_len field:
> > l2_len is 7 bit long, so in theory it might be not enough, as for FVL:
> > 12:18 L4TUNLEN L4 Tunneling Length (Teredo / GRE header / VXLAN header) defined in Words.
> 
> The l2_len field is 7 bits long because it was mapped to ixgbe hardware.

Yes.

> If it's not enough (although I'm not sure it's possible to have a header
> larger than 128 bytes in this case), it's probably because we should
> not have been doing that.

I also can't imagine the L2 header being that long.
From other side - I just don't like an idea of PMD stripping off HW capabilities.

> Maybe these generic fields should be generic :)
> If it's not enough, what about changing l2_len to 8 bits?

Yes, was thinking about the same thing.
Maybe instead of introducing l4tun_len, we should increase l2_len and outer_l2_len sizes to 8bits each? 
Though that would break the pair:
l2_len : 7
l3_len :9 
Which is quite useful, as it fits into 2B, and maps exactly to ixgbe TCD layout.
Wonder if we change it, would be there any performance penalty for ixgbe, and if yes how big.

> 
> Often in the recent discussions, I see as an argument "fortville needs
> this so we need to add it in the mbuf". I agree that currently
> fortville is the only hardware supported for the new features, so it
> can make sense to act like this, but we have to accept to come back
> to this API in the future if it makes less sense with other drivers.

Yes, it is sometime hard to keep a balance.
From one side people would like to have a  clean and generic API, from other side
people would like to have ab ability to use all features that HW supports. 

> 
> Also, application developers can be annoyed to see that the mbuf flags
> and meta data are just duplicating information that is already present
> in the packet.
> 
> About the l4tun_len, it's another field the application has to fill,
> but it's maybe cleaner. I just wanted to clarify why I'm discussing
> these points.
> 
> Regards,
> Olivier
Ananyev, Konstantin Nov. 30, 2014, 2:50 p.m. UTC | #8
Hi Olver,

> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Friday, November 28, 2014 10:46 AM
> To: Ananyev, Konstantin; Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change three fields
> 
> Hi Konstantin,
> 
> On 11/27/2014 06:01 PM, Ananyev, Konstantin wrote:
> >> Yes, I think it could be done that way too.
> >> Though I still prefer to keep l4tun_len - it makes things a bit cleaner (at least to me).
> >> After all  we do have space for it in mbuf's tx_offload.
> >
> > As one more thing in favour of separate l4tun_len field:
> > l2_len is 7 bit long, so in theory it might be not enough, as for FVL:
> > 12:18 L4TUNLEN L4 Tunneling Length (Teredo / GRE header / VXLAN header) defined in Words.
> 
> The l2_len field is 7 bits long because it was mapped to ixgbe hardware.
> If it's not enough (although I'm not sure it's possible to have a header
> larger than 128 bytes in this case), it's probably because we should
> not have been doing that.
> Maybe these generic fields should be generic :)
> If it's not enough, what about changing l2_len to 8 bits?
> 
> Often in the recent discussions, I see as an argument "fortville needs
> this so we need to add it in the mbuf". I agree that currently
> fortville is the only hardware supported for the new features, so it
> can make sense to act like this, but we have to accept to come back
> to this API in the future if it makes less sense with other drivers.
> 
> Also, application developers can be annoyed to see that the mbuf flags
> and meta data are just duplicating information that is already present
> in the packet.
> 
> About the l4tun_len, it's another field the application has to fill,
> but it's maybe cleaner. I just wanted to clarify why I'm discussing
> these points.

After another thought, I think that the way you proposed is a better one.
I gives us more flexibility:  
let's say for now we'll keep both l2_len and outer_l2_len as 7 bit fields,
and upper layer would have to:
mb->l2_len =  eth_hdr_in + vxlan_hdr;
for VXLAN packets.
Then if in the future, we'll realise that 7 bit is not enough we can either:
- increase size of l2_len and outer_l2_len
- introduce new field (l4tun_len as we planned now)
In both cases backward compatibility wouldn't be affected.
From other side - if we'' introduce a new field now (l4tun_len), it would be very hard to get rid of it in the future.  
So, I think we'd better follow your approach here.

Thanks
Konstantin


> 
> Regards,
> Olivier
Jijiang Liu Dec. 1, 2014, 2:30 a.m. UTC | #9
> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Sunday, November 30, 2014 10:51 PM
> To: Olivier MATZ; Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change
> three fields
> 
> Hi Olver,
> 
> > -----Original Message-----
> > From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> > Sent: Friday, November 28, 2014 10:46 AM
> > To: Ananyev, Konstantin; Liu, Jijiang
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and
> > change three fields
> >
> > Hi Konstantin,
> >
> > On 11/27/2014 06:01 PM, Ananyev, Konstantin wrote:
> > >> Yes, I think it could be done that way too.
> > >> Though I still prefer to keep l4tun_len - it makes things a bit cleaner (at least
> to me).
> > >> After all  we do have space for it in mbuf's tx_offload.
> > >
> > > As one more thing in favour of separate l4tun_len field:
> > > l2_len is 7 bit long, so in theory it might be not enough, as for FVL:
> > > 12:18 L4TUNLEN L4 Tunneling Length (Teredo / GRE header / VXLAN header)
> defined in Words.
> >
> > The l2_len field is 7 bits long because it was mapped to ixgbe hardware.
> > If it's not enough (although I'm not sure it's possible to have a
> > header larger than 128 bytes in this case), it's probably because we
> > should not have been doing that.
> > Maybe these generic fields should be generic :) If it's not enough,
> > what about changing l2_len to 8 bits?
> >
> > Often in the recent discussions, I see as an argument "fortville needs
> > this so we need to add it in the mbuf". I agree that currently
> > fortville is the only hardware supported for the new features, so it
> > can make sense to act like this, but we have to accept to come back to
> > this API in the future if it makes less sense with other drivers.
> >
> > Also, application developers can be annoyed to see that the mbuf flags
> > and meta data are just duplicating information that is already present
> > in the packet.
> >
> > About the l4tun_len, it's another field the application has to fill,
> > but it's maybe cleaner. I just wanted to clarify why I'm discussing
> > these points.
> 
> After another thought, I think that the way you proposed is a better one.
> I gives us more flexibility:
> let's say for now we'll keep both l2_len and outer_l2_len as 7 bit fields, and upper
> layer would have to:
> mb->l2_len =  eth_hdr_in + vxlan_hdr;

A question, how to fill the mb->l2_len  for IP in IP, IP in GRE tunneling packet that is no inner L2 header?
What  is the rule?
Or you think it should be mb->l2_len =  0 + l4_tun_len;  ??

> for VXLAN packets.
> Then if in the future, we'll realise that 7 bit is not enough we can either:
> - increase size of l2_len and outer_l2_len
> - introduce new field (l4tun_len as we planned now) In both cases backward
> compatibility wouldn't be affected.
> From other side - if we'' introduce a new field now (l4tun_len), it would be very
> hard to get rid of it in the future.
> So, I think we'd better follow your approach here.
> 
> Thanks
> Konstantin
> 
> 
> >
> > Regards,
> > Olivier
Olivier Matz Dec. 1, 2014, 9:52 a.m. UTC | #10
Hi Jijiang,

On 12/01/2014 03:30 AM, Liu, Jijiang wrote:
>> After another thought, I think that the way you proposed is a better one.
>> I gives us more flexibility:
>> let's say for now we'll keep both l2_len and outer_l2_len as 7 bit fields, and upper
>> layer would have to:
>> mb->l2_len =  eth_hdr_in + vxlan_hdr;
> 
> A question, how to fill the mb->l2_len  for IP in IP, IP in GRE tunneling packet that is no inner L2 header?
> What  is the rule?
> Or you think it should be mb->l2_len =  0 + l4_tun_len;  ??

We could have the following rule:
- the l2_len starts after the end of outer_l2_len + outer_l3_len (if
  they are not 0)
- l2_len is the length of all headers up to the first ip or ipv6
  header

Examples:

Ether/IP/UDP/xxx
  m->outer_l2_len = 0
  m->outer_l3_len = 0
  m->l2_len = sizeof(ether)
  m->l3_len = sizeof(ip)
  m->l4_len = sizeof(udp)

Ether/IP/IP/UDP/xxx
  m->outer_l2_len = sizeof(ether)
  m->outer_l3_len = sizeof(ip)
  m->l2_len = 0
  m->l3_len = sizeof(ip)
  m->l4_len = sizeof(udp)

Ether/IP/GRE/IP/UDP/xxx
  m->outer_l2_len = sizeof(ether)
  m->outer_l3_len = sizeof(ip)
  m->l2_len = sizeof(gre)
  m->l3_len = sizeof(ip)
  m->l4_len = sizeof(udp)

Ether/IP/UDP/vxlan/Ether/IP/UDP/xxx
  m->outer_l2_len = sizeof(ether)
  m->outer_l3_len = sizeof(ip)
  m->l2_len = sizeof(udp) + sizeof(vxlan) + sizeof(ether)
  m->l3_len = sizeof(ip)
  m->l4_len = sizeof(udp)

Regards,
Olivier
Ananyev, Konstantin Dec. 1, 2014, 11:58 a.m. UTC | #11
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Monday, December 01, 2014 9:52 AM
> To: Liu, Jijiang; Ananyev, Konstantin
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change three fields
> 
> Hi Jijiang,
> 
> On 12/01/2014 03:30 AM, Liu, Jijiang wrote:
> >> After another thought, I think that the way you proposed is a better one.
> >> I gives us more flexibility:
> >> let's say for now we'll keep both l2_len and outer_l2_len as 7 bit fields, and upper
> >> layer would have to:
> >> mb->l2_len =  eth_hdr_in + vxlan_hdr;
> >
> > A question, how to fill the mb->l2_len  for IP in IP, IP in GRE tunneling packet that is no inner L2 header?
> > What  is the rule?
> > Or you think it should be mb->l2_len =  0 + l4_tun_len;  ??

I think it should be:
l2_len =  0 + l4_tun_len;

From your patch:
l4_tunnel_len = l4_tun_len + l2_len;

> 
> We could have the following rule:
> - the l2_len starts after the end of outer_l2_len + outer_l3_len (if
>   they are not 0)

I think it should start after the outer_l4_hdr.
I.e. right now we don't include udp header length into it.

> - l2_len is the length of all headers up to the first ip or ipv6
>   header

Yes, same thought here.

> 
> Examples:
> 
> Ether/IP/UDP/xxx
>   m->outer_l2_len = 0
>   m->outer_l3_len = 0
>   m->l2_len = sizeof(ether)
>   m->l3_len = sizeof(ip)
>   m->l4_len = sizeof(udp)
> 
> Ether/IP/IP/UDP/xxx
>   m->outer_l2_len = sizeof(ether)
>   m->outer_l3_len = sizeof(ip)
>   m->l2_len = 0
>   m->l3_len = sizeof(ip)
>   m->l4_len = sizeof(udp)
> 
> Ether/IP/GRE/IP/UDP/xxx
>   m->outer_l2_len = sizeof(ether)
>   m->outer_l3_len = sizeof(ip)
>   m->l2_len = sizeof(gre)
>   m->l3_len = sizeof(ip)
>   m->l4_len = sizeof(udp)
> 
> Ether/IP/UDP/vxlan/Ether/IP/UDP/xxx
>   m->outer_l2_len = sizeof(ether)
>   m->outer_l3_len = sizeof(ip)
>   m->l2_len = sizeof(udp) + sizeof(vxlan) + sizeof(ether)

I think it should be:
m->l2_len = sizeof(vxlan) + sizeof(ether)

We don't need to add sizeof(udp) as we already say to the HW that I t will be UDP TUNNELING vi the ol_flag: PKT_TX_UDP_TUNNEL_PKT.

>   m->l3_len = sizeof(ip)
>   m->l4_len = sizeof(udp)
> 
> Regards,
> Olivier
Olivier Matz Dec. 1, 2014, 12:28 p.m. UTC | #12
Hi Konstantin,

On 12/01/2014 12:58 PM, Ananyev, Konstantin wrote:
>> Ether/IP/UDP/vxlan/Ether/IP/UDP/xxx
>>   m->outer_l2_len = sizeof(ether)
>>   m->outer_l3_len = sizeof(ip)
>>   m->l2_len = sizeof(udp) + sizeof(vxlan) + sizeof(ether)
> 
> I think it should be:
> m->l2_len = sizeof(vxlan) + sizeof(ether)
> 
> We don't need to add sizeof(udp) as we already say to the HW that I t will be UDP TUNNELING vi the ol_flag: PKT_TX_UDP_TUNNEL_PKT.
> 
>>   m->l3_len = sizeof(ip)
>>   m->l4_len = sizeof(udp)

I would agree if we had a m->outer_l4_len. Maybe we should add
it to be coherent with lX_len fields?

I think that to process the inner IP checksum, we should be able
to use these 2 notations:

Ether/IP/GRE/IP/UDP/xxx
  m->flags = OUTER_IP_CKSUM
  m->outer_l2_len = sizeof(ether)
  m->outer_l3_len = sizeof(ip)
  m->l2_len = sizeof(gre)
  m->l3_len = sizeof(ip)
  m->l4_len = sizeof(udp)

Ether/IP/GRE/IP/UDP/xxx
  m->flags = IP_CKSUM
  /* sum of all outer_lX_len and l2_len from above: */
  m->l2_len = sizeof(ether) + sizeof(ip) + sizeof(gre)
  m->l3_len = sizeof(ip)
  m->l4_len = sizeof(udp)

So, in case of vxlan, I suggest that either we include the size of
UDP in l2_len, or we add a outer_l4_len. What do you think?

Maybe adding outer_l4_len makes more sense.

Regards,
Olivier
Jijiang Liu Dec. 1, 2014, 1:07 p.m. UTC | #13
> -----Original Message-----
> From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> Sent: Monday, December 1, 2014 8:28 PM
> To: Ananyev, Konstantin; Liu, Jijiang
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change
> three fields
> 
> Hi Konstantin,
> 
> On 12/01/2014 12:58 PM, Ananyev, Konstantin wrote:
> >> Ether/IP/UDP/vxlan/Ether/IP/UDP/xxx
> >>   m->outer_l2_len = sizeof(ether)
> >>   m->outer_l3_len = sizeof(ip)
> >>   m->l2_len = sizeof(udp) + sizeof(vxlan) + sizeof(ether)
> >
> > I think it should be:
> > m->l2_len = sizeof(vxlan) + sizeof(ether)
> >
> > We don't need to add sizeof(udp) as we already say to the HW that I t will be
> UDP TUNNELING vi the ol_flag: PKT_TX_UDP_TUNNEL_PKT.
> >
> >>   m->l3_len = sizeof(ip)
> >>   m->l4_len = sizeof(udp)
> 
> I would agree if we had a m->outer_l4_len. Maybe we should add it to be
> coherent with lX_len fields?
> 
> I think that to process the inner IP checksum, we should be able to use these 2
> notations:
> 
> Ether/IP/GRE/IP/UDP/xxx
>   m->flags = OUTER_IP_CKSUM
>   m->outer_l2_len = sizeof(ether)
>   m->outer_l3_len = sizeof(ip)
>   m->l2_len = sizeof(gre)
>   m->l3_len = sizeof(ip)
>   m->l4_len = sizeof(udp)
> 
> Ether/IP/GRE/IP/UDP/xxx
>   m->flags = IP_CKSUM
>   /* sum of all outer_lX_len and l2_len from above: */
>   m->l2_len = sizeof(ether) + sizeof(ip) + sizeof(gre)
>   m->l3_len = sizeof(ip)
>   m->l4_len = sizeof(udp)
> 
> So, in case of vxlan, I suggest that either we include the size of UDP in l2_len, or
> we add a outer_l4_len. What do you think?
I agree to include the size of UDP in l2_len, for VXLAN, the UDP header is a part of VXLAN tunneling header as the UDP destination port indicate if a packet is VXLAN packet.
> Maybe adding outer_l4_len makes more sense.

> 
> Regards,
> Olivier
Ananyev, Konstantin Dec. 1, 2014, 2:31 p.m. UTC | #14
Hi lads,

> -----Original Message-----
> From: Liu, Jijiang
> Sent: Monday, December 01, 2014 1:08 PM
> To: Olivier MATZ; Ananyev, Konstantin
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change three fields
> 
> 
> 
> > -----Original Message-----
> > From: Olivier MATZ [mailto:olivier.matz@6wind.com]
> > Sent: Monday, December 1, 2014 8:28 PM
> > To: Ananyev, Konstantin; Liu, Jijiang
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 1/3] mbuf:add two TX offload flags and change
> > three fields
> >
> > Hi Konstantin,
> >
> > On 12/01/2014 12:58 PM, Ananyev, Konstantin wrote:
> > >> Ether/IP/UDP/vxlan/Ether/IP/UDP/xxx
> > >>   m->outer_l2_len = sizeof(ether)
> > >>   m->outer_l3_len = sizeof(ip)
> > >>   m->l2_len = sizeof(udp) + sizeof(vxlan) + sizeof(ether)
> > >
> > > I think it should be:
> > > m->l2_len = sizeof(vxlan) + sizeof(ether)
> > >
> > > We don't need to add sizeof(udp) as we already say to the HW that I t will be
> > UDP TUNNELING vi the ol_flag: PKT_TX_UDP_TUNNEL_PKT.
> > >
> > >>   m->l3_len = sizeof(ip)
> > >>   m->l4_len = sizeof(udp)
> >
> > I would agree if we had a m->outer_l4_len. Maybe we should add it to be
> > coherent with lX_len fields?
> >
> > I think that to process the inner IP checksum, we should be able to use these 2
> > notations:
> >
> > Ether/IP/GRE/IP/UDP/xxx
> >   m->flags = OUTER_IP_CKSUM
> >   m->outer_l2_len = sizeof(ether)
> >   m->outer_l3_len = sizeof(ip)
> >   m->l2_len = sizeof(gre)
> >   m->l3_len = sizeof(ip)
> >   m->l4_len = sizeof(udp)
> >
> > Ether/IP/GRE/IP/UDP/xxx
> >   m->flags = IP_CKSUM
> >   /* sum of all outer_lX_len and l2_len from above: */
> >   m->l2_len = sizeof(ether) + sizeof(ip) + sizeof(gre)
> >   m->l3_len = sizeof(ip)
> >   m->l4_len = sizeof(udp)
> >
> > So, in case of vxlan, I suggest that either we include the size of UDP in l2_len, or
> > we add a outer_l4_len. What do you think?
> I agree to include the size of UDP in l2_len, for VXLAN, the UDP header is a part of VXLAN tunnelling header as the UDP destination
> port indicate if a packet is VXLAN packet.

Actually it is my bad.
While looking at current implementation, I didn't realise that: ETHER_VXLAN_HLEN == (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)).
So yes you are right for VXLAN packet it should be:
m->l2_len = sizeof(udp) + sizeof(vxlan) + sizeof(ether);
Sorry for confusing everyone.
Konstantin

> > Maybe adding outer_l4_len makes more sense.
> 
> >
> > Regards,
> > Olivier
diff mbox

Patch

diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c
index 87c2963..e89c310 100644
--- a/lib/librte_mbuf/rte_mbuf.c
+++ b/lib/librte_mbuf/rte_mbuf.c
@@ -240,7 +240,7 @@  const char *rte_get_tx_ol_flag_name(uint64_t mask)
 	case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM";
 	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
 	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
-	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
+	case PKT_TX_UDP_TUNNEL_PKT: return "PKT_TX_UDP_TUNNEL_PKT";
 	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
 	default: return NULL;
 	}
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 367fc56..48cd8e1 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -99,10 +99,9 @@  extern "C" {
 #define PKT_RX_TUNNEL_IPV6_HDR (1ULL << 12) /**< RX tunnel packet with IPv6 header. */
 #define PKT_RX_FDIR_ID       (1ULL << 13) /**< FD id reported if FDIR match. */
 #define PKT_RX_FDIR_FLX      (1ULL << 14) /**< Flexible bytes reported if FDIR match. */
-/* add new RX flags here */
 
 /* add new TX flags here */
-#define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
+#define PKT_TX_UDP_TUNNEL_PKT (1ULL << 50) /**< TX packet is an UDP tunneling packet */
 #define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */
 
 /**
@@ -125,13 +124,20 @@  extern "C" {
 #define PKT_TX_IP_CKSUM      (1ULL << 54) /**< IP cksum of TX pkt. computed by NIC. */
 #define PKT_TX_IPV4_CSUM     PKT_TX_IP_CKSUM /**< Alias of PKT_TX_IP_CKSUM. */
 
+#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
+
 /** Tell the NIC it's an IPv4 packet. Required for L4 checksum offload or TSO. */
-#define PKT_TX_IPV4          PKT_RX_IPV4_HDR
+#define PKT_TX_IPV4          (1ULL << 56)
 
 /** Tell the NIC it's an IPv6 packet. Required for L4 checksum offload or TSO. */
-#define PKT_TX_IPV6          PKT_RX_IPV6_HDR
+#define PKT_TX_IPV6          (1ULL << 57)
 
-#define PKT_TX_VLAN_PKT      (1ULL << 55) /**< TX packet is a 802.1q VLAN packet. */
+/** Outer IP cksum of TX pkt. computed by NIC for tunneling packet */
+#define PKT_TX_OUTER_IP_CKSUM   (1ULL << 58)
+#define PKT_TX_OUTER_IPV4_CSUM  PKT_TX_OUTER_IP_CKSUM /**< Alias of PKT_TX_OUTER_IP_CKSUM. */
+
+/** Tell the NIC it's an outer IPv6 packet for tunneling packet.*/
+#define PKT_TX_OUTER_IPV6    (1ULL << 59)
 
 /**
  * TCP segmentation offload. To enable this offload feature for a
@@ -266,10 +272,9 @@  struct rte_mbuf {
 			uint64_t tso_segsz:16; /**< TCP TSO segment size */
 
 			/* fields for TX offloading of tunnels */
-			uint64_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
-			uint64_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
-
-			/* uint64_t unused:8; */
+			uint64_t outer_l3_len:9; /**< outer L3 (IP) Hdr Length. */
+			uint64_t outer_l2_len:7; /**< outer L2 (MAC) Hdr Length. */
+			uint64_t l4_tun_len:8; /**< L4 tunnelling header length */
 		};
 	};
 } __rte_cache_aligned;