[dpdk-dev,v2,5/6] ixgbe: support VxLAN & NVGRE TX checksum off-load

Message ID 1452735516-4527-6-git-send-email-wenzhuo.lu@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Bruce Richardson
Headers

Commit Message

Wenzhuo Lu Jan. 14, 2016, 1:38 a.m. UTC
  The patch add VxLAN & NVGRE TX checksum off-load. When the flag of
outer IP header checksum offload is set, we'll set the context
descriptor to enable this checksum off-load.

Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 drivers/net/ixgbe/ixgbe_rxtx.c | 52 ++++++++++++++++++++++++++++++++++--------
 drivers/net/ixgbe/ixgbe_rxtx.h |  6 ++++-
 lib/librte_mbuf/rte_mbuf.h     |  2 ++
 3 files changed, 49 insertions(+), 11 deletions(-)
  

Comments

Ananyev, Konstantin Feb. 4, 2016, 6:55 p.m. UTC | #1
Hi Wenzhuo,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> Sent: Thursday, January 14, 2016 1:39 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX checksum off-load
> 
> The patch add VxLAN & NVGRE TX checksum off-load. When the flag of
> outer IP header checksum offload is set, we'll set the context
> descriptor to enable this checksum off-load.
> 
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  drivers/net/ixgbe/ixgbe_rxtx.c | 52 ++++++++++++++++++++++++++++++++++--------
>  drivers/net/ixgbe/ixgbe_rxtx.h |  6 ++++-
>  lib/librte_mbuf/rte_mbuf.h     |  2 ++
>  3 files changed, 49 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 512ac3a..fea2495 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -85,7 +85,8 @@
>  		PKT_TX_VLAN_PKT |		 \
>  		PKT_TX_IP_CKSUM |		 \
>  		PKT_TX_L4_MASK |		 \
> -		PKT_TX_TCP_SEG)
> +		PKT_TX_TCP_SEG |		 \
> +		PKT_TX_OUTER_IP_CKSUM)


I think you also need to update dev_info.tx_offload_capa,
couldn't find where you doing it.

> 
>  static inline struct rte_mbuf *
>  rte_rxmbuf_alloc(struct rte_mempool *mp)
> @@ -364,9 +365,11 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
>  	uint32_t ctx_idx;
>  	uint32_t vlan_macip_lens;
>  	union ixgbe_tx_offload tx_offload_mask;
> +	uint32_t seqnum_seed = 0;
> 
>  	ctx_idx = txq->ctx_curr;
> -	tx_offload_mask.data = 0;
> +	tx_offload_mask.data[0] = 0;
> +	tx_offload_mask.data[1] = 0;
>  	type_tucmd_mlhl = 0;
> 
>  	/* Specify which HW CTX to upload. */
> @@ -430,9 +433,20 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
>  		}
>  	}
> 
> +	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> +		tx_offload_mask.outer_l3_len |= ~0;
> +		tx_offload_mask.outer_l2_len |= ~0;
> +		seqnum_seed |= tx_offload.outer_l3_len
> +			       << IXGBE_ADVTXD_OUTER_IPLEN;
> +		seqnum_seed |= tx_offload.outer_l2_len
> +			       << IXGBE_ADVTXD_TUNNEL_LEN;
> +	}

I don't have an X550 card off-hand, but reading through datasheet - it doesn't seem right.
When OUTER_IP_CKSUM is enabled MACLEN becomes outer_l2_len and 
TUNNEL_LEN should be: outer_l4_len + tunnel_hdr_len + inner_l2_len.
So I think that in our case TUNNEL_LEN should be set to l2_len. 

> +
>  	txq->ctx_cache[ctx_idx].flags = ol_flags;
> -	txq->ctx_cache[ctx_idx].tx_offload.data  =
> -		tx_offload_mask.data & tx_offload.data;
> +	txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
> +		tx_offload_mask.data[0] & tx_offload.data[0];
> +	txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
> +		tx_offload_mask.data[1] & tx_offload.data[1];
>  	txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
> 
>  	ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
> @@ -441,7 +455,7 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
>  	vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
>  	ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
>  	ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
> -	ctx_txd->seqnum_seed     = 0;
> +	ctx_txd->seqnum_seed     = seqnum_seed;
>  }
> 
>  /*
> @@ -454,16 +468,24 @@ what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
>  {
>  	/* If match with the current used context */
>  	if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> -		(txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> -		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> +		 & tx_offload.data[0])) &&
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> +		 & tx_offload.data[1])))) {
>  			return txq->ctx_curr;
>  	}
> 
>  	/* What if match with the next context  */
>  	txq->ctx_curr ^= 1;
>  	if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> -		(txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> -		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> +		 & tx_offload.data[0])) &&
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> +		 & tx_offload.data[1])))) {
>  			return txq->ctx_curr;
>  	}
> 
> @@ -492,6 +514,12 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
>  		cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
>  	if (ol_flags & PKT_TX_TCP_SEG)
>  		cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
> +	if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> +		cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
> +	if (ol_flags & PKT_TX_VXLAN_PKT)
> +		cmdtype &= ~(1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
> +	else
> +		cmdtype |= (1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
>  	return cmdtype;
>  }
> 
> @@ -588,8 +616,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	uint64_t tx_ol_req;
>  	uint32_t ctx = 0;
>  	uint32_t new_ctx;
> -	union ixgbe_tx_offload tx_offload = {0};
> +	union ixgbe_tx_offload tx_offload;
> 
> +	tx_offload.data[0] = 0;
> +	tx_offload.data[1] = 0;
>  	txq = tx_queue;
>  	sw_ring = txq->sw_ring;
>  	txr     = txq->tx_ring;
> @@ -623,6 +653,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  			tx_offload.l4_len = tx_pkt->l4_len;
>  			tx_offload.vlan_tci = tx_pkt->vlan_tci;
>  			tx_offload.tso_segsz = tx_pkt->tso_segsz;
> +			tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
> +			tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
> 
>  			/* If new context need be built or reuse the exist ctx. */
>  			ctx = what_advctx_update(txq, tx_ol_req,
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 475a800..c15f9fa 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -163,7 +163,7 @@ enum ixgbe_advctx_num {
> 
>  /** Offload features */
>  union ixgbe_tx_offload {
> -	uint64_t data;
> +	uint64_t data[2];

I wonder what is there any performance impact of increasing size of tx_offload?

>  	struct {
>  		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
>  		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> @@ -171,6 +171,10 @@ union ixgbe_tx_offload {
>  		uint64_t tso_segsz:16; /**< TCP TSO segment size */
>  		uint64_t vlan_tci:16;
>  		/**< VLAN Tag Control Identifier (CPU order). */
> +
> +		/* fields for TX offloading of tunnels */
> +		uint64_t outer_l3_len:8; /**< Outer L3 (IP) Hdr Length. */
> +		uint64_t outer_l2_len:8; /**< Outer L2 (MAC) Hdr Length. */
>  	};
>  };
> 
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index 5ad5e59..1bda00e 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -103,6 +103,8 @@ extern "C" {
> 
>  /* add new TX flags here */
> 
> +#define PKT_TX_VXLAN_PKT      (1ULL << 48) /**< TX packet is a VxLAN packet. */
> +

From reading X550 spec, I don't really understand what for we need to specify is it
GRE or VXLAN packet, so probably we don't need that flag for now at all?
If we really do, might bw worth to organise it like KT_TX_L4_CKSUM (as enum)
and reserve few values for future expansion (2 or 3 bits?).

Konstantin
  
Wenzhuo Lu Feb. 15, 2016, 5:32 a.m. UTC | #2
Hi Konstantin,

> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Friday, February 5, 2016 2:55 AM
> To: Lu, Wenzhuo; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX
> checksum off-load
> 
> Hi Wenzhuo,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Thursday, January 14, 2016 1:39 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX
> > checksum off-load
> >
> > The patch add VxLAN & NVGRE TX checksum off-load. When the flag of
> > outer IP header checksum offload is set, we'll set the context
> > descriptor to enable this checksum off-load.
> >
> > Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> > ---
> >  drivers/net/ixgbe/ixgbe_rxtx.c | 52
> > ++++++++++++++++++++++++++++++++++--------
> >  drivers/net/ixgbe/ixgbe_rxtx.h |  6 ++++-
> >  lib/librte_mbuf/rte_mbuf.h     |  2 ++
> >  3 files changed, 49 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c
> > b/drivers/net/ixgbe/ixgbe_rxtx.c index 512ac3a..fea2495 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > @@ -85,7 +85,8 @@
> >  		PKT_TX_VLAN_PKT |		 \
> >  		PKT_TX_IP_CKSUM |		 \
> >  		PKT_TX_L4_MASK |		 \
> > -		PKT_TX_TCP_SEG)
> > +		PKT_TX_TCP_SEG |		 \
> > +		PKT_TX_OUTER_IP_CKSUM)
> 
> 
> I think you also need to update dev_info.tx_offload_capa, couldn't find where
> you doing it.
My bad. I'll add it. Thanks.

> 
> >
> >  static inline struct rte_mbuf *
> >  rte_rxmbuf_alloc(struct rte_mempool *mp) @@ -364,9 +365,11 @@
> > ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
> >  	uint32_t ctx_idx;
> >  	uint32_t vlan_macip_lens;
> >  	union ixgbe_tx_offload tx_offload_mask;
> > +	uint32_t seqnum_seed = 0;
> >
> >  	ctx_idx = txq->ctx_curr;
> > -	tx_offload_mask.data = 0;
> > +	tx_offload_mask.data[0] = 0;
> > +	tx_offload_mask.data[1] = 0;
> >  	type_tucmd_mlhl = 0;
> >
> >  	/* Specify which HW CTX to upload. */ @@ -430,9 +433,20 @@
> > ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
> >  		}
> >  	}
> >
> > +	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
> > +		tx_offload_mask.outer_l3_len |= ~0;
> > +		tx_offload_mask.outer_l2_len |= ~0;
> > +		seqnum_seed |= tx_offload.outer_l3_len
> > +			       << IXGBE_ADVTXD_OUTER_IPLEN;
> > +		seqnum_seed |= tx_offload.outer_l2_len
> > +			       << IXGBE_ADVTXD_TUNNEL_LEN;
> > +	}
> 
> I don't have an X550 card off-hand, but reading through datasheet - it doesn't
> seem right.
> When OUTER_IP_CKSUM is enabled MACLEN becomes outer_l2_len and
> TUNNEL_LEN should be: outer_l4_len + tunnel_hdr_len + inner_l2_len.
> So I think that in our case TUNNEL_LEN should be set to l2_len.
Yes, you're right. I made a mistake here. Will correct it. Thanks.

> 
> > +
> >  	txq->ctx_cache[ctx_idx].flags = ol_flags;
> > -	txq->ctx_cache[ctx_idx].tx_offload.data  =
> > -		tx_offload_mask.data & tx_offload.data;
> > +	txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
> > +		tx_offload_mask.data[0] & tx_offload.data[0];
> > +	txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
> > +		tx_offload_mask.data[1] & tx_offload.data[1];
> >  	txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
> >
> >  	ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
> > @@ -441,7 +455,7 @@ ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
> >  	vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci <<
> IXGBE_ADVTXD_VLAN_SHIFT);
> >  	ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
> >  	ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
> > -	ctx_txd->seqnum_seed     = 0;
> > +	ctx_txd->seqnum_seed     = seqnum_seed;
> >  }
> >
> >  /*
> > @@ -454,16 +468,24 @@ what_advctx_update(struct ixgbe_tx_queue *txq,
> > uint64_t flags,  {
> >  	/* If match with the current used context */
> >  	if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> > -		(txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> > -		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data &
> tx_offload.data)))) {
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> > +		 & tx_offload.data[0])) &&
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> > +		 & tx_offload.data[1])))) {
> >  			return txq->ctx_curr;
> >  	}
> >
> >  	/* What if match with the next context  */
> >  	txq->ctx_curr ^= 1;
> >  	if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
> > -		(txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
> > -		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data &
> tx_offload.data)))) {
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
> > +		 & tx_offload.data[0])) &&
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
> > +		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
> > +		 & tx_offload.data[1])))) {
> >  			return txq->ctx_curr;
> >  	}
> >
> > @@ -492,6 +514,12 @@ tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
> >  		cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
> >  	if (ol_flags & PKT_TX_TCP_SEG)
> >  		cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
> > +	if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> > +		cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
> > +	if (ol_flags & PKT_TX_VXLAN_PKT)
> > +		cmdtype &= ~(1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
> > +	else
> > +		cmdtype |= (1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
> >  	return cmdtype;
> >  }
> >
> > @@ -588,8 +616,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
> >  	uint64_t tx_ol_req;
> >  	uint32_t ctx = 0;
> >  	uint32_t new_ctx;
> > -	union ixgbe_tx_offload tx_offload = {0};
> > +	union ixgbe_tx_offload tx_offload;
> >
> > +	tx_offload.data[0] = 0;
> > +	tx_offload.data[1] = 0;
> >  	txq = tx_queue;
> >  	sw_ring = txq->sw_ring;
> >  	txr     = txq->tx_ring;
> > @@ -623,6 +653,8 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
> >  			tx_offload.l4_len = tx_pkt->l4_len;
> >  			tx_offload.vlan_tci = tx_pkt->vlan_tci;
> >  			tx_offload.tso_segsz = tx_pkt->tso_segsz;
> > +			tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
> > +			tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
> >
> >  			/* If new context need be built or reuse the exist ctx. */
> >  			ctx = what_advctx_update(txq, tx_ol_req, diff --git
> > a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> > index 475a800..c15f9fa 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> > @@ -163,7 +163,7 @@ enum ixgbe_advctx_num {
> >
> >  /** Offload features */
> >  union ixgbe_tx_offload {
> > -	uint64_t data;
> > +	uint64_t data[2];
> 
> I wonder what is there any performance impact of increasing size of tx_offload?
I believe there's some impact. But it's too little and can be ignored. As I tested there's no performance drop:)

> 
> >  	struct {
> >  		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> >  		uint64_t l3_len:9; /**< L3 (IP) Header Length. */ @@ -171,6
> +171,10
> > @@ union ixgbe_tx_offload {
> >  		uint64_t tso_segsz:16; /**< TCP TSO segment size */
> >  		uint64_t vlan_tci:16;
> >  		/**< VLAN Tag Control Identifier (CPU order). */
> > +
> > +		/* fields for TX offloading of tunnels */
> > +		uint64_t outer_l3_len:8; /**< Outer L3 (IP) Hdr Length. */
> > +		uint64_t outer_l2_len:8; /**< Outer L2 (MAC) Hdr Length. */
> >  	};
> >  };
> >
> > diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> > index 5ad5e59..1bda00e 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -103,6 +103,8 @@ extern "C" {
> >
> >  /* add new TX flags here */
> >
> > +#define PKT_TX_VXLAN_PKT      (1ULL << 48) /**< TX packet is a VxLAN
> packet. */
> > +
> 
> From reading X550 spec, I don't really understand what for we need to specify is
> it GRE or VXLAN packet, so probably we don't need that flag for now at all?
The reason is we need to set the tunnel type in the Advanced Transmit Data Descriptor.

> If we really do, might bw worth to organise it like KT_TX_L4_CKSUM (as enum)
> and reserve few values for future expansion (2 or 3 bits?).
Now there're only VxLAN and NVGRE supported. So, only 1 bit is occupied for that. Not sure if more types will be supported in the future. Any suggestions? Thanks.

> 
> Konstantin
  
Thomas Monjalon Feb. 15, 2016, 10:03 a.m. UTC | #3
2016-02-15 05:32, Lu, Wenzhuo:
> From: Ananyev, Konstantin
> > From reading X550 spec, I don't really understand what for we need to specify is
> > it GRE or VXLAN packet, so probably we don't need that flag for now at all?
> The reason is we need to set the tunnel type in the Advanced Transmit Data Descriptor.

Why don't we use packet type?

> > If we really do, might bw worth to organise it like KT_TX_L4_CKSUM (as enum)
> > and reserve few values for future expansion (2 or 3 bits?).
> Now there're only VxLAN and NVGRE supported. So, only 1 bit is occupied for that. Not sure if more types will be supported in the future. Any suggestions? Thanks.

Yes there can be a lot of tunnel types.
Please check RTE_PTYPE_TUNNEL_*
  
Ananyev, Konstantin Feb. 15, 2016, 1:15 p.m. UTC | #4
Hi lads,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> Sent: Monday, February 15, 2016 10:03 AM
> To: Lu, Wenzhuo
> Cc: dev@dpdk.org; Ananyev, Konstantin
> Subject: Re: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX checksum off-load
> 
> 2016-02-15 05:32, Lu, Wenzhuo:
> > From: Ananyev, Konstantin
> > > From reading X550 spec, I don't really understand what for we need to specify is
> > > it GRE or VXLAN packet, so probably we don't need that flag for now at all?
> > The reason is we need to set the tunnel type in the Advanced Transmit Data Descriptor.

Yes, I saw that in x550 spec, though I really doubt it is needed to calculate etiher outer IP,
or inner IP/L4 checksum.
After all TUNNELLEN includes all tunnel headers plus inner L2 length.
The only reason I can think it might be useful for  HW is when packets are sent from one
VF to another. 
In that case, HW probably would try to avoid extra packet scanning and rely on information from SW.
As mentioned in x550 spec, section's  note:
"...In virtualization mode, packets might be received from other local VMs. The X550 does not
check the L5 header for these packets and does not report NFS header. If such packets carry
IP tunneling (IPv4 - IPv6), they are reported as IPV4E. The packets received from local VM
are indicated by the LB bit in the status field. To be identified, the CC bit in the transmit
descriptor must be set and if it is a tunnel packet, the TUNNEL.OUTERIPCS must be set also."
Though I am not sure we do need to support that case.
That's why probably the easiest way would be to avoid setting 'Tunnel Type' field at all.

> 
> Why don't we use packet type?

Yes, that's probably another possible way, if we really decide to setup this info at TX descriptor.

BTW, reading x550 spec, I think ixgbe RX code that setups packet_type need to be updated too.
Right now it can't handle vxlan/gre tunnelling case.

Konstantin

> 
> > > If we really do, might bw worth to organise it like KT_TX_L4_CKSUM (as enum)
> > > and reserve few values for future expansion (2 or 3 bits?).
> > Now there're only VxLAN and NVGRE supported. So, only 1 bit is occupied for that. Not sure if more types will be supported in the
> future. Any suggestions? Thanks.
> 
> Yes there can be a lot of tunnel types.
> Please check RTE_PTYPE_TUNNEL_*
>
  
Wenzhuo Lu Feb. 17, 2016, 5:47 a.m. UTC | #5
Hi Konstantin,

> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Monday, February 15, 2016 9:16 PM
> To: Thomas Monjalon; Lu, Wenzhuo
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX
> checksum off-load
> 
> Hi lads,
> 
> > -----Original Message-----
> > From: Thomas Monjalon [mailto:thomas.monjalon@6wind.com]
> > Sent: Monday, February 15, 2016 10:03 AM
> > To: Lu, Wenzhuo
> > Cc: dev@dpdk.org; Ananyev, Konstantin
> > Subject: Re: [dpdk-dev] [PATCH v2 5/6] ixgbe: support VxLAN & NVGRE TX
> > checksum off-load
> >
> > 2016-02-15 05:32, Lu, Wenzhuo:
> > > From: Ananyev, Konstantin
> > > > From reading X550 spec, I don't really understand what for we need
> > > > to specify is it GRE or VXLAN packet, so probably we don't need that flag
> for now at all?
> > > The reason is we need to set the tunnel type in the Advanced Transmit Data
> Descriptor.
> 
> Yes, I saw that in x550 spec, though I really doubt it is needed to calculate etiher
> outer IP, or inner IP/L4 checksum.
> After all TUNNELLEN includes all tunnel headers plus inner L2 length.
> The only reason I can think it might be useful for  HW is when packets are sent
> from one VF to another.
> In that case, HW probably would try to avoid extra packet scanning and rely on
> information from SW.
> As mentioned in x550 spec, section's  note:
> "...In virtualization mode, packets might be received from other local VMs. The
> X550 does not check the L5 header for these packets and does not report NFS
> header. If such packets carry IP tunneling (IPv4 - IPv6), they are reported as
> IPV4E. The packets received from local VM are indicated by the LB bit in the
> status field. To be identified, the CC bit in the transmit descriptor must be set
> and if it is a tunnel packet, the TUNNEL.OUTERIPCS must be set also."
> Though I am not sure we do need to support that case.
> That's why probably the easiest way would be to avoid setting 'Tunnel Type' field
> at all.
Many thanks for your suggestion. I think you're right.
For outer IP header checksum offload, we need not to set the tunnel type bit. I'll choose this option and remove this vxlan ol_flag.

> 
> >
> > Why don't we use packet type?
> 
> Yes, that's probably another possible way, if we really decide to setup this info
> at TX descriptor.
> 
> BTW, reading x550 spec, I think ixgbe RX code that setups packet_type need to
> be updated too.
> Right now it can't handle vxlan/gre tunnelling case.
> 
> Konstantin
> 
> >
> > > > If we really do, might bw worth to organise it like KT_TX_L4_CKSUM
> > > > (as enum) and reserve few values for future expansion (2 or 3 bits?).
> > > Now there're only VxLAN and NVGRE supported. So, only 1 bit is
> > > occupied for that. Not sure if more types will be supported in the
> > future. Any suggestions? Thanks.
> >
> > Yes there can be a lot of tunnel types.
> > Please check RTE_PTYPE_TUNNEL_*
> >
  

Patch

diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 512ac3a..fea2495 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -85,7 +85,8 @@ 
 		PKT_TX_VLAN_PKT |		 \
 		PKT_TX_IP_CKSUM |		 \
 		PKT_TX_L4_MASK |		 \
-		PKT_TX_TCP_SEG)
+		PKT_TX_TCP_SEG |		 \
+		PKT_TX_OUTER_IP_CKSUM)
 
 static inline struct rte_mbuf *
 rte_rxmbuf_alloc(struct rte_mempool *mp)
@@ -364,9 +365,11 @@  ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
 	uint32_t ctx_idx;
 	uint32_t vlan_macip_lens;
 	union ixgbe_tx_offload tx_offload_mask;
+	uint32_t seqnum_seed = 0;
 
 	ctx_idx = txq->ctx_curr;
-	tx_offload_mask.data = 0;
+	tx_offload_mask.data[0] = 0;
+	tx_offload_mask.data[1] = 0;
 	type_tucmd_mlhl = 0;
 
 	/* Specify which HW CTX to upload. */
@@ -430,9 +433,20 @@  ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
 		}
 	}
 
+	if (ol_flags & PKT_TX_OUTER_IP_CKSUM) {
+		tx_offload_mask.outer_l3_len |= ~0;
+		tx_offload_mask.outer_l2_len |= ~0;
+		seqnum_seed |= tx_offload.outer_l3_len
+			       << IXGBE_ADVTXD_OUTER_IPLEN;
+		seqnum_seed |= tx_offload.outer_l2_len
+			       << IXGBE_ADVTXD_TUNNEL_LEN;
+	}
+
 	txq->ctx_cache[ctx_idx].flags = ol_flags;
-	txq->ctx_cache[ctx_idx].tx_offload.data  =
-		tx_offload_mask.data & tx_offload.data;
+	txq->ctx_cache[ctx_idx].tx_offload.data[0]  =
+		tx_offload_mask.data[0] & tx_offload.data[0];
+	txq->ctx_cache[ctx_idx].tx_offload.data[1]  =
+		tx_offload_mask.data[1] & tx_offload.data[1];
 	txq->ctx_cache[ctx_idx].tx_offload_mask    = tx_offload_mask;
 
 	ctx_txd->type_tucmd_mlhl = rte_cpu_to_le_32(type_tucmd_mlhl);
@@ -441,7 +455,7 @@  ixgbe_set_xmit_ctx(struct ixgbe_tx_queue *txq,
 	vlan_macip_lens |= ((uint32_t)tx_offload.vlan_tci << IXGBE_ADVTXD_VLAN_SHIFT);
 	ctx_txd->vlan_macip_lens = rte_cpu_to_le_32(vlan_macip_lens);
 	ctx_txd->mss_l4len_idx   = rte_cpu_to_le_32(mss_l4len_idx);
-	ctx_txd->seqnum_seed     = 0;
+	ctx_txd->seqnum_seed     = seqnum_seed;
 }
 
 /*
@@ -454,16 +468,24 @@  what_advctx_update(struct ixgbe_tx_queue *txq, uint64_t flags,
 {
 	/* If match with the current used context */
 	if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
-		(txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
-		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
+		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
+		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
+		 & tx_offload.data[0])) &&
+		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
+		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
+		 & tx_offload.data[1])))) {
 			return txq->ctx_curr;
 	}
 
 	/* What if match with the next context  */
 	txq->ctx_curr ^= 1;
 	if (likely((txq->ctx_cache[txq->ctx_curr].flags == flags) &&
-		(txq->ctx_cache[txq->ctx_curr].tx_offload.data ==
-		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data & tx_offload.data)))) {
+		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[0] ==
+		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[0]
+		 & tx_offload.data[0])) &&
+		(txq->ctx_cache[txq->ctx_curr].tx_offload.data[1] ==
+		(txq->ctx_cache[txq->ctx_curr].tx_offload_mask.data[1]
+		 & tx_offload.data[1])))) {
 			return txq->ctx_curr;
 	}
 
@@ -492,6 +514,12 @@  tx_desc_ol_flags_to_cmdtype(uint64_t ol_flags)
 		cmdtype |= IXGBE_ADVTXD_DCMD_VLE;
 	if (ol_flags & PKT_TX_TCP_SEG)
 		cmdtype |= IXGBE_ADVTXD_DCMD_TSE;
+	if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
+		cmdtype |= (1 << IXGBE_ADVTXD_OUTERIPCS_SHIFT);
+	if (ol_flags & PKT_TX_VXLAN_PKT)
+		cmdtype &= ~(1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
+	else
+		cmdtype |= (1 << IXGBE_ADVTXD_TUNNEL_TYPE_NVGRE);
 	return cmdtype;
 }
 
@@ -588,8 +616,10 @@  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	uint64_t tx_ol_req;
 	uint32_t ctx = 0;
 	uint32_t new_ctx;
-	union ixgbe_tx_offload tx_offload = {0};
+	union ixgbe_tx_offload tx_offload;
 
+	tx_offload.data[0] = 0;
+	tx_offload.data[1] = 0;
 	txq = tx_queue;
 	sw_ring = txq->sw_ring;
 	txr     = txq->tx_ring;
@@ -623,6 +653,8 @@  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 			tx_offload.l4_len = tx_pkt->l4_len;
 			tx_offload.vlan_tci = tx_pkt->vlan_tci;
 			tx_offload.tso_segsz = tx_pkt->tso_segsz;
+			tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
+			tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
 
 			/* If new context need be built or reuse the exist ctx. */
 			ctx = what_advctx_update(txq, tx_ol_req,
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 475a800..c15f9fa 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -163,7 +163,7 @@  enum ixgbe_advctx_num {
 
 /** Offload features */
 union ixgbe_tx_offload {
-	uint64_t data;
+	uint64_t data[2];
 	struct {
 		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
 		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
@@ -171,6 +171,10 @@  union ixgbe_tx_offload {
 		uint64_t tso_segsz:16; /**< TCP TSO segment size */
 		uint64_t vlan_tci:16;
 		/**< VLAN Tag Control Identifier (CPU order). */
+
+		/* fields for TX offloading of tunnels */
+		uint64_t outer_l3_len:8; /**< Outer L3 (IP) Hdr Length. */
+		uint64_t outer_l2_len:8; /**< Outer L2 (MAC) Hdr Length. */
 	};
 };
 
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 5ad5e59..1bda00e 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -103,6 +103,8 @@  extern "C" {
 
 /* add new TX flags here */
 
+#define PKT_TX_VXLAN_PKT      (1ULL << 48) /**< TX packet is a VxLAN packet. */
+
 /**
  * Second VLAN insertion (QinQ) flag.
  */