diff mbox

[dpdk-dev,07/12] mbuf: generic support for TCP segmentation offload

Message ID 1415635166-1364-8-git-send-email-olivier.matz@6wind.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Olivier Matz Nov. 10, 2014, 3:59 p.m. UTC
Some of the NICs supported by DPDK have a possibility to accelerate TCP
traffic by using segmentation offload. The application prepares a packet
with valid TCP header with size up to 64K and deleguates the
segmentation to the NIC.

Implement the generic part of TCP segmentation offload in rte_mbuf. It
introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes)
and tso_segsz (MSS of packets).

To delegate the TCP segmentation to the hardware, the user has to:

- set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
  PKT_TX_TCP_CKSUM)
- set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in
  the packet
- fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
- calculate the pseudo header checksum and set it in the TCP header,
  as required when doing hardware TCP checksum offload

The API is inspired from ixgbe hardware (the next commit adds the
support for ixgbe), but it seems generic enough to be used for other
hw/drivers in the future.

This commit also reworks the way l2_len and l3_len are used in igb
and ixgbe drivers as the l2_l3_len is not available anymore in mbuf.

Signed-off-by: Mirek Walukiewicz <miroslaw.walukiewicz@intel.com>
Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
---
 app/test-pmd/testpmd.c            |  3 ++-
 examples/ipv4_multicast/main.c    |  3 ++-
 lib/librte_mbuf/rte_mbuf.h        | 44 +++++++++++++++++++++++----------------
 lib/librte_pmd_e1000/igb_rxtx.c   | 11 +++++++++-
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +++++++++-
 5 files changed, 50 insertions(+), 22 deletions(-)

Comments

Jijiang Liu Nov. 11, 2014, 3:17 a.m. UTC | #1
> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Monday, November 10, 2014 11:59 PM
> To: dev@dpdk.org
> Cc: olivier.matz@6wind.com; Walukiewicz, Miroslaw; Liu, Jijiang; Liu, Yong;
> jigsaw@gmail.com; Richardson, Bruce; Ananyev, Konstantin
> Subject: [PATCH 07/12] mbuf: generic support for TCP segmentation offload
> 
> Some of the NICs supported by DPDK have a possibility to accelerate TCP traffic
> by using segmentation offload. The application prepares a packet with valid TCP
> header with size up to 64K and deleguates the segmentation to the NIC.
> 
> Implement the generic part of TCP segmentation offload in rte_mbuf. It
> introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes) and
> tso_segsz (MSS of packets).
> 
> To delegate the TCP segmentation to the hardware, the user has to:
> 
> - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
>   PKT_TX_TCP_CKSUM)
> - set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in
>   the packet
> - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
> - calculate the pseudo header checksum and set it in the TCP header,
>   as required when doing hardware TCP checksum offload
> 
> The API is inspired from ixgbe hardware (the next commit adds the support for
> ixgbe), but it seems generic enough to be used for other hw/drivers in the future.
> 
> This commit also reworks the way l2_len and l3_len are used in igb and ixgbe
> drivers as the l2_l3_len is not available anymore in mbuf.
> 
> Signed-off-by: Mirek Walukiewicz <miroslaw.walukiewicz@intel.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  app/test-pmd/testpmd.c            |  3 ++-
>  examples/ipv4_multicast/main.c    |  3 ++-
>  lib/librte_mbuf/rte_mbuf.h        | 44 +++++++++++++++++++++++----------------
>  lib/librte_pmd_e1000/igb_rxtx.c   | 11 +++++++++-
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +++++++++-
>  5 files changed, 50 insertions(+), 22 deletions(-)
> 
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c index
> 12adafa..a831e31 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -408,7 +408,8 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
>  	mb->ol_flags     = 0;
>  	mb->data_off     = RTE_PKTMBUF_HEADROOM;
>  	mb->nb_segs      = 1;
> -	mb->l2_l3_len       = 0;
> +	mb->l2_len       = 0;
> +	mb->l3_len       = 0;

The mb->inner_l2_len and  mb->inner_l3_len are missed here;   I also can add them later.

>  	mb->vlan_tci     = 0;
>  	mb->hash.rss     = 0;
>  }
> diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
> index de5e6be..a31d43d 100644
> --- a/examples/ipv4_multicast/main.c
> +++ b/examples/ipv4_multicast/main.c
> @@ -302,7 +302,8 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
>  	/* copy metadata from source packet*/
>  	hdr->port = pkt->port;
>  	hdr->vlan_tci = pkt->vlan_tci;
> -	hdr->l2_l3_len = pkt->l2_l3_len;
> +	hdr->l2_len = pkt->l2_len;
> +	hdr->l3_len = pkt->l3_len;

The mb->inner_l2_len and  mb->inner_l3_len are missed here, too.
    
>  	hdr->hash = pkt->hash;
> 
>  	hdr->ol_flags = pkt->ol_flags;
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index
> bcd8996..f76b768 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -126,6 +126,19 @@ extern "C" {
> 
>  #define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN
> computed by NIC */
> 
> +/**
> + * TCP segmentation offload. To enable this offload feature for a
> + * packet to be transmitted on hardware supporting TSO:
> + *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
> + *    PKT_TX_TCP_CKSUM)
> + *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
> + *    to 0 in the packet
> + *  - fill the mbuf offload information: l2_len, l3_len, l4_len,
> +tso_segsz
> + *  - calculate the pseudo header checksum and set it in the TCP header,
> + *    as required when doing hardware TCP checksum offload
> + */
> +#define PKT_TX_TCP_SEG       (1ULL << 49)
> +
>  /* Use final bit of flags to indicate a control mbuf */
>  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
> 
> @@ -185,6 +198,7 @@ static inline const char
> *rte_get_tx_ol_flag_name(uint64_t mask)
>  	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
>  	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
>  	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
> +	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
>  	default: return NULL;
>  	}
>  }
> @@ -264,22 +278,18 @@ struct rte_mbuf {
> 
>  	/* fields to support TX offloads */
>  	union {
> -		uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
> +		uint64_t tx_offload;       /**< combined for easy fetch */
>  		struct {
> -			uint16_t l3_len:9;      /**< L3 (IP) Header Length. */
> -			uint16_t l2_len:7;      /**< L2 (MAC) Header Length. */
> -		};
> -	};
> +			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> +			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> +			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> +			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> 
> -	/* fields for TX offloading of tunnels */
> -	union {
> -		uint16_t inner_l2_l3_len;
> -		/**< combined inner l2/l3 lengths as single var */
> -		struct {
> -			uint16_t inner_l3_len:9;
> -			/**< inner L3 (IP) Header Length. */
> -			uint16_t inner_l2_len:7;
> -			/**< inner L2 (MAC) Header Length. */
> +			/* fields for TX offloading of tunnels */
> +			uint16_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length.
> */
> +			uint16_t inner_l2_len:7; /**< inner L2 (MAC) Hdr
> Length. */
> +
> +			/* uint64_t unused:8; */
>  		};
>  	};
>  } __rte_cache_aligned;
> @@ -631,8 +641,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
> {
>  	m->next = NULL;
>  	m->pkt_len = 0;
> -	m->l2_l3_len = 0;
> -	m->inner_l2_l3_len = 0;
> +	m->tx_offload = 0;
>  	m->vlan_tci = 0;
>  	m->nb_segs = 1;
>  	m->port = 0xff;
> @@ -701,8 +710,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf
> *mi, struct rte_mbuf *md)
>  	mi->data_len = md->data_len;
>  	mi->port = md->port;
>  	mi->vlan_tci = md->vlan_tci;
> -	mi->l2_l3_len = md->l2_l3_len;
> -	mi->inner_l2_l3_len = md->inner_l2_l3_len;
> +	mi->tx_offload = md->tx_offload;
>  	mi->hash = md->hash;
> 
>  	mi->next = NULL;
> diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
> index dbf5074..0a9447e 100644
> --- a/lib/librte_pmd_e1000/igb_rxtx.c
> +++ b/lib/librte_pmd_e1000/igb_rxtx.c
> @@ -361,6 +361,13 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  	struct rte_mbuf     *tx_pkt;
>  	struct rte_mbuf     *m_seg;
>  	union igb_vlan_macip vlan_macip_lens;
> +	union {
> +		uint16_t u16;
> +		struct {
> +			uint16_t l3_len:9;
> +			uint16_t l2_len:7;
> +		};
> +	} l2_l3_len;
>  	uint64_t buf_dma_addr;
>  	uint32_t olinfo_status;
>  	uint32_t cmd_type_len;
> @@ -398,8 +405,10 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  		tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
> 
>  		ol_flags = tx_pkt->ol_flags;
> +		l2_l3_len.l2_len = tx_pkt->l2_len;
> +		l2_l3_len.l3_len = tx_pkt->l3_len;
>  		vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
> -		vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
> +		vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
>  		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM
> |
>  			PKT_TX_L4_MASK);
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index 70ca254..54a0fc1 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -540,6 +540,13 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  	struct rte_mbuf     *tx_pkt;
>  	struct rte_mbuf     *m_seg;
>  	union ixgbe_vlan_macip vlan_macip_lens;
> +	union {
> +		uint16_t u16;
> +		struct {
> +			uint16_t l3_len:9;
> +			uint16_t l2_len:7;
> +		};
> +	} l2_l3_len;
>  	uint64_t buf_dma_addr;
>  	uint32_t olinfo_status;
>  	uint32_t cmd_type_len;
> @@ -583,8 +590,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM
> |
>  			PKT_TX_L4_MASK);
>  		if (tx_ol_req) {
> +			l2_l3_len.l2_len = tx_pkt->l2_len;
> +			l2_l3_len.l3_len = tx_pkt->l3_len;
>  			vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
> -			vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
> +			vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
> 
>  			/* If new context need be built or reuse the exist ctx. */
>  			ctx = what_advctx_update(txq, tx_ol_req,
> --
> 2.1.0
Konstantin Ananyev Nov. 12, 2014, 1:09 p.m. UTC | #2
> -----Original Message-----
> From: Olivier Matz [mailto:olivier.matz@6wind.com]
> Sent: Monday, November 10, 2014 3:59 PM
> To: dev@dpdk.org
> Cc: olivier.matz@6wind.com; Walukiewicz, Miroslaw; Liu, Jijiang; Liu, Yong; jigsaw@gmail.com; Richardson, Bruce; Ananyev, Konstantin
> Subject: [PATCH 07/12] mbuf: generic support for TCP segmentation offload
> 
> Some of the NICs supported by DPDK have a possibility to accelerate TCP
> traffic by using segmentation offload. The application prepares a packet
> with valid TCP header with size up to 64K and deleguates the
> segmentation to the NIC.
> 
> Implement the generic part of TCP segmentation offload in rte_mbuf. It
> introduces 2 new fields in rte_mbuf: l4_len (length of L4 header in bytes)
> and tso_segsz (MSS of packets).
> 
> To delegate the TCP segmentation to the hardware, the user has to:
> 
> - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
>   PKT_TX_TCP_CKSUM)
> - set PKT_TX_IP_CKSUM if it's IPv4, and set the IP checksum to 0 in
>   the packet
> - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
> - calculate the pseudo header checksum and set it in the TCP header,
>   as required when doing hardware TCP checksum offload
> 
> The API is inspired from ixgbe hardware (the next commit adds the
> support for ixgbe), but it seems generic enough to be used for other
> hw/drivers in the future.
> 
> This commit also reworks the way l2_len and l3_len are used in igb
> and ixgbe drivers as the l2_l3_len is not available anymore in mbuf.
> 
> Signed-off-by: Mirek Walukiewicz <miroslaw.walukiewicz@intel.com>
> Signed-off-by: Olivier Matz <olivier.matz@6wind.com>
> ---
>  app/test-pmd/testpmd.c            |  3 ++-
>  examples/ipv4_multicast/main.c    |  3 ++-
>  lib/librte_mbuf/rte_mbuf.h        | 44 +++++++++++++++++++++++----------------
>  lib/librte_pmd_e1000/igb_rxtx.c   | 11 +++++++++-
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 11 +++++++++-
>  5 files changed, 50 insertions(+), 22 deletions(-)
> 
> diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
> index 12adafa..a831e31 100644
> --- a/app/test-pmd/testpmd.c
> +++ b/app/test-pmd/testpmd.c
> @@ -408,7 +408,8 @@ testpmd_mbuf_ctor(struct rte_mempool *mp,
>  	mb->ol_flags     = 0;
>  	mb->data_off     = RTE_PKTMBUF_HEADROOM;
>  	mb->nb_segs      = 1;
> -	mb->l2_l3_len       = 0;
> +	mb->l2_len       = 0;
> +	mb->l3_len       = 0;
>  	mb->vlan_tci     = 0;
>  	mb->hash.rss     = 0;
>  }
> diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
> index de5e6be..a31d43d 100644
> --- a/examples/ipv4_multicast/main.c
> +++ b/examples/ipv4_multicast/main.c
> @@ -302,7 +302,8 @@ mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
>  	/* copy metadata from source packet*/
>  	hdr->port = pkt->port;
>  	hdr->vlan_tci = pkt->vlan_tci;
> -	hdr->l2_l3_len = pkt->l2_l3_len;
> +	hdr->l2_len = pkt->l2_len;
> +	hdr->l3_len = pkt->l3_len;
>  	hdr->hash = pkt->hash;
> 
>  	hdr->ol_flags = pkt->ol_flags;
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index bcd8996..f76b768 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -126,6 +126,19 @@ extern "C" {
> 
>  #define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
> 
> +/**
> + * TCP segmentation offload. To enable this offload feature for a
> + * packet to be transmitted on hardware supporting TSO:
> + *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
> + *    PKT_TX_TCP_CKSUM)
> + *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
> + *    to 0 in the packet
> + *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
> + *  - calculate the pseudo header checksum and set it in the TCP header,
> + *    as required when doing hardware TCP checksum offload
> + */
> +#define PKT_TX_TCP_SEG       (1ULL << 49)
> +
>  /* Use final bit of flags to indicate a control mbuf */
>  #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
> 
> @@ -185,6 +198,7 @@ static inline const char *rte_get_tx_ol_flag_name(uint64_t mask)
>  	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
>  	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
>  	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
> +	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
>  	default: return NULL;
>  	}
>  }
> @@ -264,22 +278,18 @@ struct rte_mbuf {
> 
>  	/* fields to support TX offloads */
>  	union {
> -		uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
> +		uint64_t tx_offload;       /**< combined for easy fetch */
>  		struct {
> -			uint16_t l3_len:9;      /**< L3 (IP) Header Length. */
> -			uint16_t l2_len:7;      /**< L2 (MAC) Header Length. */
> -		};
> -	};
> +			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> +			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> +			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> +			uint64_t tso_segsz:16; /**< TCP TSO segment size */
> 
> -	/* fields for TX offloading of tunnels */
> -	union {
> -		uint16_t inner_l2_l3_len;
> -		/**< combined inner l2/l3 lengths as single var */
> -		struct {
> -			uint16_t inner_l3_len:9;
> -			/**< inner L3 (IP) Header Length. */
> -			uint16_t inner_l2_len:7;
> -			/**< inner L2 (MAC) Header Length. */
> +			/* fields for TX offloading of tunnels */
> +			uint16_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
> +			uint16_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
> +

Shouldn't these 2 fields be bit fields of uint64_t too?
uint64_t inner_l3_len:9;
uint64_t inner_l2_len:7;  
So it fits inot one uint64_t?

Konstantin

> +			/* uint64_t unused:8; */
>  		};
>  	};
>  } __rte_cache_aligned;
> @@ -631,8 +641,7 @@ static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
>  {
>  	m->next = NULL;
>  	m->pkt_len = 0;
> -	m->l2_l3_len = 0;
> -	m->inner_l2_l3_len = 0;
> +	m->tx_offload = 0;
>  	m->vlan_tci = 0;
>  	m->nb_segs = 1;
>  	m->port = 0xff;
> @@ -701,8 +710,7 @@ static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
>  	mi->data_len = md->data_len;
>  	mi->port = md->port;
>  	mi->vlan_tci = md->vlan_tci;
> -	mi->l2_l3_len = md->l2_l3_len;
> -	mi->inner_l2_l3_len = md->inner_l2_l3_len;
> +	mi->tx_offload = md->tx_offload;
>  	mi->hash = md->hash;
> 
>  	mi->next = NULL;
> diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
> index dbf5074..0a9447e 100644
> --- a/lib/librte_pmd_e1000/igb_rxtx.c
> +++ b/lib/librte_pmd_e1000/igb_rxtx.c
> @@ -361,6 +361,13 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	struct rte_mbuf     *tx_pkt;
>  	struct rte_mbuf     *m_seg;
>  	union igb_vlan_macip vlan_macip_lens;
> +	union {
> +		uint16_t u16;
> +		struct {
> +			uint16_t l3_len:9;
> +			uint16_t l2_len:7;
> +		};
> +	} l2_l3_len;
>  	uint64_t buf_dma_addr;
>  	uint32_t olinfo_status;
>  	uint32_t cmd_type_len;
> @@ -398,8 +405,10 @@ eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
> 
>  		ol_flags = tx_pkt->ol_flags;
> +		l2_l3_len.l2_len = tx_pkt->l2_len;
> +		l2_l3_len.l3_len = tx_pkt->l3_len;
>  		vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
> -		vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
> +		vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
>  		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
>  			PKT_TX_L4_MASK);
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index 70ca254..54a0fc1 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -540,6 +540,13 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  	struct rte_mbuf     *tx_pkt;
>  	struct rte_mbuf     *m_seg;
>  	union ixgbe_vlan_macip vlan_macip_lens;
> +	union {
> +		uint16_t u16;
> +		struct {
> +			uint16_t l3_len:9;
> +			uint16_t l2_len:7;
> +		};
> +	} l2_l3_len;
>  	uint64_t buf_dma_addr;
>  	uint32_t olinfo_status;
>  	uint32_t cmd_type_len;
> @@ -583,8 +590,10 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
>  			PKT_TX_L4_MASK);
>  		if (tx_ol_req) {
> +			l2_l3_len.l2_len = tx_pkt->l2_len;
> +			l2_l3_len.l3_len = tx_pkt->l3_len;
>  			vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
> -			vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
> +			vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
> 
>  			/* If new context need be built or reuse the exist ctx. */
>  			ctx = what_advctx_update(txq, tx_ol_req,
> --
> 2.1.0
diff mbox

Patch

diff --git a/app/test-pmd/testpmd.c b/app/test-pmd/testpmd.c
index 12adafa..a831e31 100644
--- a/app/test-pmd/testpmd.c
+++ b/app/test-pmd/testpmd.c
@@ -408,7 +408,8 @@  testpmd_mbuf_ctor(struct rte_mempool *mp,
 	mb->ol_flags     = 0;
 	mb->data_off     = RTE_PKTMBUF_HEADROOM;
 	mb->nb_segs      = 1;
-	mb->l2_l3_len       = 0;
+	mb->l2_len       = 0;
+	mb->l3_len       = 0;
 	mb->vlan_tci     = 0;
 	mb->hash.rss     = 0;
 }
diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
index de5e6be..a31d43d 100644
--- a/examples/ipv4_multicast/main.c
+++ b/examples/ipv4_multicast/main.c
@@ -302,7 +302,8 @@  mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
 	/* copy metadata from source packet*/
 	hdr->port = pkt->port;
 	hdr->vlan_tci = pkt->vlan_tci;
-	hdr->l2_l3_len = pkt->l2_l3_len;
+	hdr->l2_len = pkt->l2_len;
+	hdr->l3_len = pkt->l3_len;
 	hdr->hash = pkt->hash;
 
 	hdr->ol_flags = pkt->ol_flags;
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index bcd8996..f76b768 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -126,6 +126,19 @@  extern "C" {
 
 #define PKT_TX_VXLAN_CKSUM   (1ULL << 50) /**< TX checksum of VXLAN computed by NIC */
 
+/**
+ * TCP segmentation offload. To enable this offload feature for a
+ * packet to be transmitted on hardware supporting TSO:
+ *  - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies
+ *    PKT_TX_TCP_CKSUM)
+ *  - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum
+ *    to 0 in the packet
+ *  - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz
+ *  - calculate the pseudo header checksum and set it in the TCP header,
+ *    as required when doing hardware TCP checksum offload
+ */
+#define PKT_TX_TCP_SEG       (1ULL << 49)
+
 /* Use final bit of flags to indicate a control mbuf */
 #define CTRL_MBUF_FLAG       (1ULL << 63) /**< Mbuf contains control data */
 
@@ -185,6 +198,7 @@  static inline const char *rte_get_tx_ol_flag_name(uint64_t mask)
 	case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM";
 	case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST";
 	case PKT_TX_VXLAN_CKSUM: return "PKT_TX_VXLAN_CKSUM";
+	case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG";
 	default: return NULL;
 	}
 }
@@ -264,22 +278,18 @@  struct rte_mbuf {
 
 	/* fields to support TX offloads */
 	union {
-		uint16_t l2_l3_len; /**< combined l2/l3 lengths as single var */
+		uint64_t tx_offload;       /**< combined for easy fetch */
 		struct {
-			uint16_t l3_len:9;      /**< L3 (IP) Header Length. */
-			uint16_t l2_len:7;      /**< L2 (MAC) Header Length. */
-		};
-	};
+			uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+			uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+			uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+			uint64_t tso_segsz:16; /**< TCP TSO segment size */
 
-	/* fields for TX offloading of tunnels */
-	union {
-		uint16_t inner_l2_l3_len;
-		/**< combined inner l2/l3 lengths as single var */
-		struct {
-			uint16_t inner_l3_len:9;
-			/**< inner L3 (IP) Header Length. */
-			uint16_t inner_l2_len:7;
-			/**< inner L2 (MAC) Header Length. */
+			/* fields for TX offloading of tunnels */
+			uint16_t inner_l3_len:9; /**< inner L3 (IP) Hdr Length. */
+			uint16_t inner_l2_len:7; /**< inner L2 (MAC) Hdr Length. */
+
+			/* uint64_t unused:8; */
 		};
 	};
 } __rte_cache_aligned;
@@ -631,8 +641,7 @@  static inline void rte_pktmbuf_reset(struct rte_mbuf *m)
 {
 	m->next = NULL;
 	m->pkt_len = 0;
-	m->l2_l3_len = 0;
-	m->inner_l2_l3_len = 0;
+	m->tx_offload = 0;
 	m->vlan_tci = 0;
 	m->nb_segs = 1;
 	m->port = 0xff;
@@ -701,8 +710,7 @@  static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *md)
 	mi->data_len = md->data_len;
 	mi->port = md->port;
 	mi->vlan_tci = md->vlan_tci;
-	mi->l2_l3_len = md->l2_l3_len;
-	mi->inner_l2_l3_len = md->inner_l2_l3_len;
+	mi->tx_offload = md->tx_offload;
 	mi->hash = md->hash;
 
 	mi->next = NULL;
diff --git a/lib/librte_pmd_e1000/igb_rxtx.c b/lib/librte_pmd_e1000/igb_rxtx.c
index dbf5074..0a9447e 100644
--- a/lib/librte_pmd_e1000/igb_rxtx.c
+++ b/lib/librte_pmd_e1000/igb_rxtx.c
@@ -361,6 +361,13 @@  eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf     *tx_pkt;
 	struct rte_mbuf     *m_seg;
 	union igb_vlan_macip vlan_macip_lens;
+	union {
+		uint16_t u16;
+		struct {
+			uint16_t l3_len:9;
+			uint16_t l2_len:7;
+		};
+	} l2_l3_len;
 	uint64_t buf_dma_addr;
 	uint32_t olinfo_status;
 	uint32_t cmd_type_len;
@@ -398,8 +405,10 @@  eth_igb_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		tx_last = (uint16_t) (tx_id + tx_pkt->nb_segs - 1);
 
 		ol_flags = tx_pkt->ol_flags;
+		l2_l3_len.l2_len = tx_pkt->l2_len;
+		l2_l3_len.l3_len = tx_pkt->l3_len;
 		vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
-		vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
+		vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
 		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
 			PKT_TX_L4_MASK);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index 70ca254..54a0fc1 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -540,6 +540,13 @@  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct rte_mbuf     *tx_pkt;
 	struct rte_mbuf     *m_seg;
 	union ixgbe_vlan_macip vlan_macip_lens;
+	union {
+		uint16_t u16;
+		struct {
+			uint16_t l3_len:9;
+			uint16_t l2_len:7;
+		};
+	} l2_l3_len;
 	uint64_t buf_dma_addr;
 	uint32_t olinfo_status;
 	uint32_t cmd_type_len;
@@ -583,8 +590,10 @@  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		tx_ol_req = ol_flags & (PKT_TX_VLAN_PKT | PKT_TX_IP_CKSUM |
 			PKT_TX_L4_MASK);
 		if (tx_ol_req) {
+			l2_l3_len.l2_len = tx_pkt->l2_len;
+			l2_l3_len.l3_len = tx_pkt->l3_len;
 			vlan_macip_lens.f.vlan_tci = tx_pkt->vlan_tci;
-			vlan_macip_lens.f.l2_l3_len = tx_pkt->l2_l3_len;
+			vlan_macip_lens.f.l2_l3_len = l2_l3_len.u16;
 
 			/* If new context need be built or reuse the exist ctx. */
 			ctx = what_advctx_update(txq, tx_ol_req,