[dpdk-dev,2/2] i40e:enable TSO support

Message ID 1423463570-12947-3-git-send-email-jijiang.liu@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Jijiang Liu Feb. 9, 2015, 6:32 a.m. UTC
  This patch enables i40e TSO feature for both non-tunneling packet and tunneling packet.

Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
Signed-off-by: Miroslaw Walukiewicz <miroslaw.walukiewicz@intel.com>
---
 lib/librte_pmd_i40e/i40e_rxtx.c |   99 ++++++++++++++++++++++++++++-----------
 lib/librte_pmd_i40e/i40e_rxtx.h |   13 +++++
 2 files changed, 85 insertions(+), 27 deletions(-)
  

Comments

David Marchand Feb. 9, 2015, 9:10 a.m. UTC | #1
Hello,

This patch does two things at the same time.
Please split this to make it easier to understand (see comments below).


On Mon, Feb 9, 2015 at 7:32 AM, Jijiang Liu <jijiang.liu@intel.com> wrote:

> This patch enables i40e TSO feature for both non-tunneling packet and
> tunneling packet.
>
> Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>
> Signed-off-by: Miroslaw Walukiewicz <miroslaw.walukiewicz@intel.com>
> ---
>  lib/librte_pmd_i40e/i40e_rxtx.c |   99
> ++++++++++++++++++++++++++++-----------
>  lib/librte_pmd_i40e/i40e_rxtx.h |   13 +++++
>  2 files changed, 85 insertions(+), 27 deletions(-)
>
> diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c
> b/lib/librte_pmd_i40e/i40e_rxtx.c
> index 349c1e5..9b9bdcd 100644
> --- a/lib/librte_pmd_i40e/i40e_rxtx.c
> +++ b/lib/librte_pmd_i40e/i40e_rxtx.c
> @@ -465,16 +465,13 @@ static inline void
>  i40e_txd_enable_checksum(uint64_t ol_flags,
>                         uint32_t *td_cmd,
>                         uint32_t *td_offset,
> -                       uint8_t l2_len,
> -                       uint16_t l3_len,
> -                       uint8_t outer_l2_len,
> -                       uint16_t outer_l3_len,
> +                       union i40e_tx_offload tx_offload,
>                         uint32_t *cd_tunneling)
>  {
>         /* UDP tunneling packet TX checksum offload */
>         if (unlikely(ol_flags & PKT_TX_OUTER_IP_CKSUM)) {
>
> -               *td_offset |= (outer_l2_len >> 1)
> +               *td_offset |= (tx_offload.outer_l2_len >> 1)
>                                 << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
>
>                 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
> @@ -485,25 +482,35 @@ i40e_txd_enable_checksum(uint64_t ol_flags,
>                         *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
>
>                 /* Now set the ctx descriptor fields */
> -               *cd_tunneling |= (outer_l3_len >> 2) <<
> +               *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
>                                 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
> -                               (l2_len >> 1) <<
> +                               (tx_offload.l2_len >> 1) <<
>                                 I40E_TXD_CTX_QW0_NATLEN_SHIFT;
>
>         } else
> -               *td_offset |= (l2_len >> 1)
> +               *td_offset |= (tx_offload.l2_len >> 1)
>                         << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
>
>         /* Enable L3 checksum offloads */
>         if (ol_flags & PKT_TX_IP_CKSUM) {
>                 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
> -               *td_offset |= (l3_len >> 2) <<
> I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
> +               *td_offset |= (tx_offload.l3_len >> 2)
> +                       << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
>         } else if (ol_flags & PKT_TX_IPV4) {
>                 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
> -               *td_offset |= (l3_len >> 2) <<
> I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
> +               *td_offset |= (tx_offload.l3_len >> 2)
> +                        << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
>         } else if (ol_flags & PKT_TX_IPV6) {
>                 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
> -               *td_offset |= (l3_len >> 2) <<
> I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
> +               *td_offset |= (tx_offload.l3_len >> 2)
> +                        << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
> +       }
>

This first part is a rework to me.
Please split.



> +
> +       if (ol_flags & PKT_TX_TCP_SEG) {
> +               *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
> +               *td_offset |= (tx_offload.l4_len >> 2)
> +                       << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
> +               return;
>         }
>
>         /* Enable L4 checksum offloads */
> @@ -1154,7 +1161,7 @@ i40e_calc_context_desc(uint64_t flags)
>  {
>         uint64_t mask = 0ULL;
>
> -       mask |= PKT_TX_OUTER_IP_CKSUM;
> +       mask |= (PKT_TX_OUTER_IP_CKSUM | PKT_TX_TCP_SEG);
>

You are adding this offload flag in an unconditional way.
Is this intended ?


>  #ifdef RTE_LIBRTE_IEEE1588
>         mask |= PKT_TX_IEEE1588_TMST;
> @@ -1165,6 +1172,41 @@ i40e_calc_context_desc(uint64_t flags)
>         return 0;
>  }
>
> +/* set i40e TSO context descriptor */
> +static inline uint64_t
> +i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
> +{
> +
> +       uint64_t ctx_desc = 0;
> +       uint32_t cd_cmd, hdr_len, cd_tso_len;
> +
> +
> +       if (!tx_offload.l4_len) {
> +               PMD_DRV_LOG(DEBUG, "L4 length set to 0");
> +               return ctx_desc;
> +       }
> +
> +       /**
> +        * in case of tunneling packet, the outer_l2_len and
> +        * outer_l3_len must be 0.
> +        */
> +       hdr_len = tx_offload.outer_l2_len +
> +               tx_offload.outer_l3_len +
> +               tx_offload.l2_len +
> +               tx_offload.l3_len +
> +               tx_offload.l4_len;
> +
> +       cd_cmd = I40E_TX_CTX_DESC_TSO;
> +       cd_tso_len = mbuf->pkt_len - hdr_len;
> +       ctx_desc |= ((uint64_t)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
> +                               ((uint64_t)cd_tso_len <<
> +                                I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
> +                               ((uint64_t)mbuf->tso_segsz <<
> +                               I40E_TXD_CTX_QW1_MSS_SHIFT);
> +
> +       return ctx_desc;
> +}
> +
>



>  uint16_t
>  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t
> nb_pkts)
>  {
> @@ -1183,15 +1225,12 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>         uint32_t tx_flags;
>         uint32_t td_tag;
>         uint64_t ol_flags;
> -       uint8_t l2_len;
> -       uint16_t l3_len;
> -       uint8_t outer_l2_len;
> -       uint16_t outer_l3_len;
>         uint16_t nb_used;
>         uint16_t nb_ctx;
>         uint16_t tx_last;
>         uint16_t slen;
>         uint64_t buf_dma_addr;
> +       union i40e_tx_offload tx_offload = { .data = 0 };
>
>         txq = tx_queue;
>         sw_ring = txq->sw_ring;
> @@ -1213,10 +1252,12 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>                 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
>
>                 ol_flags = tx_pkt->ol_flags;
> -               l2_len = tx_pkt->l2_len;
> -               l3_len = tx_pkt->l3_len;
> -               outer_l2_len = tx_pkt->outer_l2_len;
> -               outer_l3_len = tx_pkt->outer_l3_len;
> +               tx_offload.l2_len = tx_pkt->l2_len;
> +               tx_offload.l3_len = tx_pkt->l3_len;
> +               tx_offload.l4_len = tx_pkt->l4_len;
> +               tx_offload.tso_segsz = tx_pkt->tso_segsz;
> +               tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
> +               tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
>


Idem, with the split stuff.


>
>                 /* Calculate the number of context descriptors needed. */
>                 nb_ctx = i40e_calc_context_desc(ol_flags);
> @@ -1267,9 +1308,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>                 cd_tunneling_params = 0;
>                 if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) {
>                         i40e_txd_enable_checksum(ol_flags, &td_cmd,
> &td_offset,
> -                               l2_len, l3_len, outer_l2_len,
> -                               outer_l3_len,
> -                               &cd_tunneling_params);
> +                               tx_offload, &cd_tunneling_params);
>                 }
>
>                 if (unlikely(nb_ctx)) {
>

Idem.


> @@ -1287,12 +1326,18 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>                                 rte_pktmbuf_free_seg(txe->mbuf);
>                                 txe->mbuf = NULL;
>                         }
> -#ifdef RTE_LIBRTE_IEEE1588
> -                       if (ol_flags & PKT_TX_IEEE1588_TMST)
> +                       /* TSO enabled means no timestamp */
> +                       if (ol_flags & PKT_TX_TCP_SEG) {
>                                 cd_type_cmd_tso_mss |=
> -                                       ((uint64_t)I40E_TX_CTX_DESC_TSYN <<
> -
>  I40E_TXD_CTX_QW1_CMD_SHIFT);
> +                                       i40e_set_tso_ctx(tx_pkt,
> tx_offload);
> +                       } else {
> +#ifdef RTE_LIBRTE_IEEE1588
> +                               if (ol_flags & PKT_TX_IEEE1588_TMST)
> +                                       cd_type_cmd_tso_mss |=
> +
>  ((uint64_t)I40E_TX_CTX_DESC_TSYN <<
> +
>  I40E_TXD_CTX_QW1_CMD_SHIFT);
>  #endif
> +                       }
>                         ctx_txd->tunneling_params =
>                                 rte_cpu_to_le_32(cd_tunneling_params);
>                         ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
> diff --git a/lib/librte_pmd_i40e/i40e_rxtx.h
> b/lib/librte_pmd_i40e/i40e_rxtx.h
> index af932e3..f1033ef 100644
> --- a/lib/librte_pmd_i40e/i40e_rxtx.h
> +++ b/lib/librte_pmd_i40e/i40e_rxtx.h
> @@ -154,6 +154,19 @@ struct i40e_tx_queue {
>         bool tx_deferred_start; /**< don't start this queue in dev start */
>  };
>
> +/** Offload features */
> +union i40e_tx_offload {
> +       uint64_t data;
> +       struct {
> +               uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
> +               uint64_t l3_len:9; /**< L3 (IP) Header Length. */
> +               uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
> +               uint64_t tso_segsz:16; /**< TCP TSO segment size */
> +               uint64_t outer_l2_len:8; /**< L2 outer Header Length */
> +               uint64_t outer_l3_len:16; /**< L3 outer Header Length */
> +       };
> +};
> +
>

Idem.
  
miroslaw.walukiewicz@intel.com Feb. 20, 2015, 3:54 p.m. UTC | #2
Hello, 

> -----Original Message-----

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of David Marchand

> Sent: Monday, February 9, 2015 10:11 AM

> To: Liu, Jijiang

> Cc: dev@dpdk.org

> Subject: Re: [dpdk-dev] [PATCH 2/2] i40e:enable TSO support

> 

> Hello,

> 

> This patch does two things at the same time.

> Please split this to make it easier to understand (see comments below).

> 

> 

> On Mon, Feb 9, 2015 at 7:32 AM, Jijiang Liu <jijiang.liu@intel.com> wrote:

> 

> > This patch enables i40e TSO feature for both non-tunneling packet and

> > tunneling packet.

> >

> > Signed-off-by: Jijiang Liu <jijiang.liu@intel.com>

> > Signed-off-by: Miroslaw Walukiewicz <miroslaw.walukiewicz@intel.com>

> > ---

> >  lib/librte_pmd_i40e/i40e_rxtx.c |   99

> > ++++++++++++++++++++++++++++-----------

> >  lib/librte_pmd_i40e/i40e_rxtx.h |   13 +++++

> >  2 files changed, 85 insertions(+), 27 deletions(-)

> >

> > diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c

> > b/lib/librte_pmd_i40e/i40e_rxtx.c

> > index 349c1e5..9b9bdcd 100644

> > --- a/lib/librte_pmd_i40e/i40e_rxtx.c

> > +++ b/lib/librte_pmd_i40e/i40e_rxtx.c

> > @@ -465,16 +465,13 @@ static inline void

> >  i40e_txd_enable_checksum(uint64_t ol_flags,

> >                         uint32_t *td_cmd,

> >                         uint32_t *td_offset,

> > -                       uint8_t l2_len,

> > -                       uint16_t l3_len,

> > -                       uint8_t outer_l2_len,

> > -                       uint16_t outer_l3_len,

> > +                       union i40e_tx_offload tx_offload,

> >                         uint32_t *cd_tunneling)

> >  {

> >         /* UDP tunneling packet TX checksum offload */

> >         if (unlikely(ol_flags & PKT_TX_OUTER_IP_CKSUM)) {

> >

> > -               *td_offset |= (outer_l2_len >> 1)

> > +               *td_offset |= (tx_offload.outer_l2_len >> 1)

> >                                 << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;

> >

> >                 if (ol_flags & PKT_TX_OUTER_IP_CKSUM)

> > @@ -485,25 +482,35 @@ i40e_txd_enable_checksum(uint64_t ol_flags,

> >                         *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;

> >

> >                 /* Now set the ctx descriptor fields */

> > -               *cd_tunneling |= (outer_l3_len >> 2) <<

> > +               *cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<

> >                                 I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |

> > -                               (l2_len >> 1) <<

> > +                               (tx_offload.l2_len >> 1) <<

> >                                 I40E_TXD_CTX_QW0_NATLEN_SHIFT;

> >

> >         } else

> > -               *td_offset |= (l2_len >> 1)

> > +               *td_offset |= (tx_offload.l2_len >> 1)

> >                         << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;

> >

> >         /* Enable L3 checksum offloads */

> >         if (ol_flags & PKT_TX_IP_CKSUM) {

> >                 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;

> > -               *td_offset |= (l3_len >> 2) <<

> > I40E_TX_DESC_LENGTH_IPLEN_SHIFT;

> > +               *td_offset |= (tx_offload.l3_len >> 2)

> > +                       << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;

> >         } else if (ol_flags & PKT_TX_IPV4) {

> >                 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;

> > -               *td_offset |= (l3_len >> 2) <<

> > I40E_TX_DESC_LENGTH_IPLEN_SHIFT;

> > +               *td_offset |= (tx_offload.l3_len >> 2)

> > +                        << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;

> >         } else if (ol_flags & PKT_TX_IPV6) {

> >                 *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;

> > -               *td_offset |= (l3_len >> 2) <<

> > I40E_TX_DESC_LENGTH_IPLEN_SHIFT;

> > +               *td_offset |= (tx_offload.l3_len >> 2)

> > +                        << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;

> > +       }

> >

> 

> This first part is a rework to me.

> Please split.

> 

> 

> 

> > +

> > +       if (ol_flags & PKT_TX_TCP_SEG) {

> > +               *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;

> > +               *td_offset |= (tx_offload.l4_len >> 2)

> > +                       << I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;

> > +               return;

> >         }

> >

> >         /* Enable L4 checksum offloads */

> > @@ -1154,7 +1161,7 @@ i40e_calc_context_desc(uint64_t flags)

> >  {

> >         uint64_t mask = 0ULL;

> >

> > -       mask |= PKT_TX_OUTER_IP_CKSUM;

> > +       mask |= (PKT_TX_OUTER_IP_CKSUM | PKT_TX_TCP_SEG);

> >

> 

> You are adding this offload flag in an unconditional way.

> Is this intended ?


It is intended. The function i40e_calc_context_desc decides if the driver should allocate the additional offload descriptor for NIC on base of ol_flags provided by application. 

For performance reason the code does not check if the application set the flag 
correctly (for example TSO is enabled in flags but not in NIC).

There is assumption that first application reads TX_oflload flags for NIC and next set 
  ol_flags correctly in the rte_mbuf. 


> 

> 

> >  #ifdef RTE_LIBRTE_IEEE1588

> >         mask |= PKT_TX_IEEE1588_TMST;

> > @@ -1165,6 +1172,41 @@ i40e_calc_context_desc(uint64_t flags)

> >         return 0;

> >  }

> >

> > +/* set i40e TSO context descriptor */

> > +static inline uint64_t

> > +i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload

> tx_offload)

> > +{

> > +

> > +       uint64_t ctx_desc = 0;

> > +       uint32_t cd_cmd, hdr_len, cd_tso_len;

> > +

> > +

> > +       if (!tx_offload.l4_len) {

> > +               PMD_DRV_LOG(DEBUG, "L4 length set to 0");

> > +               return ctx_desc;

> > +       }

> > +

> > +       /**

> > +        * in case of tunneling packet, the outer_l2_len and

> > +        * outer_l3_len must be 0.

> > +        */

> > +       hdr_len = tx_offload.outer_l2_len +

> > +               tx_offload.outer_l3_len +

> > +               tx_offload.l2_len +

> > +               tx_offload.l3_len +

> > +               tx_offload.l4_len;

> > +

> > +       cd_cmd = I40E_TX_CTX_DESC_TSO;

> > +       cd_tso_len = mbuf->pkt_len - hdr_len;

> > +       ctx_desc |= ((uint64_t)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT)

> |

> > +                               ((uint64_t)cd_tso_len <<

> > +                                I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |

> > +                               ((uint64_t)mbuf->tso_segsz <<

> > +                               I40E_TXD_CTX_QW1_MSS_SHIFT);

> > +

> > +       return ctx_desc;

> > +}

> > +

> >

> 

> 

> 

> >  uint16_t

> >  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t

> > nb_pkts)

> >  {

> > @@ -1183,15 +1225,12 @@ i40e_xmit_pkts(void *tx_queue, struct

> rte_mbuf

> > **tx_pkts, uint16_t nb_pkts)

> >         uint32_t tx_flags;

> >         uint32_t td_tag;

> >         uint64_t ol_flags;

> > -       uint8_t l2_len;

> > -       uint16_t l3_len;

> > -       uint8_t outer_l2_len;

> > -       uint16_t outer_l3_len;

> >         uint16_t nb_used;

> >         uint16_t nb_ctx;

> >         uint16_t tx_last;

> >         uint16_t slen;

> >         uint64_t buf_dma_addr;

> > +       union i40e_tx_offload tx_offload = { .data = 0 };

> >

> >         txq = tx_queue;

> >         sw_ring = txq->sw_ring;

> > @@ -1213,10 +1252,12 @@ i40e_xmit_pkts(void *tx_queue, struct

> rte_mbuf

> > **tx_pkts, uint16_t nb_pkts)

> >                 RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);

> >

> >                 ol_flags = tx_pkt->ol_flags;

> > -               l2_len = tx_pkt->l2_len;

> > -               l3_len = tx_pkt->l3_len;

> > -               outer_l2_len = tx_pkt->outer_l2_len;

> > -               outer_l3_len = tx_pkt->outer_l3_len;

> > +               tx_offload.l2_len = tx_pkt->l2_len;

> > +               tx_offload.l3_len = tx_pkt->l3_len;

> > +               tx_offload.l4_len = tx_pkt->l4_len;

> > +               tx_offload.tso_segsz = tx_pkt->tso_segsz;

> > +               tx_offload.outer_l2_len = tx_pkt->outer_l2_len;

> > +               tx_offload.outer_l3_len = tx_pkt->outer_l3_len;

> >

> 

> 

> Idem, with the split stuff.

> 

> 

> >

> >                 /* Calculate the number of context descriptors needed. */

> >                 nb_ctx = i40e_calc_context_desc(ol_flags);

> > @@ -1267,9 +1308,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf

> > **tx_pkts, uint16_t nb_pkts)

> >                 cd_tunneling_params = 0;

> >                 if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) {

> >                         i40e_txd_enable_checksum(ol_flags, &td_cmd,

> > &td_offset,

> > -                               l2_len, l3_len, outer_l2_len,

> > -                               outer_l3_len,

> > -                               &cd_tunneling_params);

> > +                               tx_offload, &cd_tunneling_params);

> >                 }

> >

> >                 if (unlikely(nb_ctx)) {

> >

> 

> Idem.

> 

> 

> > @@ -1287,12 +1326,18 @@ i40e_xmit_pkts(void *tx_queue, struct

> rte_mbuf

> > **tx_pkts, uint16_t nb_pkts)

> >                                 rte_pktmbuf_free_seg(txe->mbuf);

> >                                 txe->mbuf = NULL;

> >                         }

> > -#ifdef RTE_LIBRTE_IEEE1588

> > -                       if (ol_flags & PKT_TX_IEEE1588_TMST)

> > +                       /* TSO enabled means no timestamp */

> > +                       if (ol_flags & PKT_TX_TCP_SEG) {

> >                                 cd_type_cmd_tso_mss |=

> > -                                       ((uint64_t)I40E_TX_CTX_DESC_TSYN <<

> > -

> >  I40E_TXD_CTX_QW1_CMD_SHIFT);

> > +                                       i40e_set_tso_ctx(tx_pkt,

> > tx_offload);

> > +                       } else {

> > +#ifdef RTE_LIBRTE_IEEE1588

> > +                               if (ol_flags & PKT_TX_IEEE1588_TMST)

> > +                                       cd_type_cmd_tso_mss |=

> > +

> >  ((uint64_t)I40E_TX_CTX_DESC_TSYN <<

> > +

> >  I40E_TXD_CTX_QW1_CMD_SHIFT);

> >  #endif

> > +                       }

> >                         ctx_txd->tunneling_params =

> >                                 rte_cpu_to_le_32(cd_tunneling_params);

> >                         ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);

> > diff --git a/lib/librte_pmd_i40e/i40e_rxtx.h

> > b/lib/librte_pmd_i40e/i40e_rxtx.h

> > index af932e3..f1033ef 100644

> > --- a/lib/librte_pmd_i40e/i40e_rxtx.h

> > +++ b/lib/librte_pmd_i40e/i40e_rxtx.h

> > @@ -154,6 +154,19 @@ struct i40e_tx_queue {

> >         bool tx_deferred_start; /**< don't start this queue in dev start */

> >  };

> >

> > +/** Offload features */

> > +union i40e_tx_offload {

> > +       uint64_t data;

> > +       struct {

> > +               uint64_t l2_len:7; /**< L2 (MAC) Header Length. */

> > +               uint64_t l3_len:9; /**< L3 (IP) Header Length. */

> > +               uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */

> > +               uint64_t tso_segsz:16; /**< TCP TSO segment size */

> > +               uint64_t outer_l2_len:8; /**< L2 outer Header Length */

> > +               uint64_t outer_l3_len:16; /**< L3 outer Header Length */

> > +       };

> > +};

> > +

> >

> 

> Idem.

> 

> 

> 

> --

> David Marchand
  

Patch

diff --git a/lib/librte_pmd_i40e/i40e_rxtx.c b/lib/librte_pmd_i40e/i40e_rxtx.c
index 349c1e5..9b9bdcd 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.c
+++ b/lib/librte_pmd_i40e/i40e_rxtx.c
@@ -465,16 +465,13 @@  static inline void
 i40e_txd_enable_checksum(uint64_t ol_flags,
 			uint32_t *td_cmd,
 			uint32_t *td_offset,
-			uint8_t l2_len,
-			uint16_t l3_len,
-			uint8_t outer_l2_len,
-			uint16_t outer_l3_len,
+			union i40e_tx_offload tx_offload,
 			uint32_t *cd_tunneling)
 {
 	/* UDP tunneling packet TX checksum offload */
 	if (unlikely(ol_flags & PKT_TX_OUTER_IP_CKSUM)) {
 
-		*td_offset |= (outer_l2_len >> 1)
+		*td_offset |= (tx_offload.outer_l2_len >> 1)
 				<< I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
 
 		if (ol_flags & PKT_TX_OUTER_IP_CKSUM)
@@ -485,25 +482,35 @@  i40e_txd_enable_checksum(uint64_t ol_flags,
 			*cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
 
 		/* Now set the ctx descriptor fields */
-		*cd_tunneling |= (outer_l3_len >> 2) <<
+		*cd_tunneling |= (tx_offload.outer_l3_len >> 2) <<
 				I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
-				(l2_len >> 1) <<
+				(tx_offload.l2_len >> 1) <<
 				I40E_TXD_CTX_QW0_NATLEN_SHIFT;
 
 	} else
-		*td_offset |= (l2_len >> 1)
+		*td_offset |= (tx_offload.l2_len >> 1)
 			<< I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
 
 	/* Enable L3 checksum offloads */
 	if (ol_flags & PKT_TX_IP_CKSUM) {
 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
-		*td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+		*td_offset |= (tx_offload.l3_len >> 2)
+			<< I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
 	} else if (ol_flags & PKT_TX_IPV4) {
 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
-		*td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+		*td_offset |= (tx_offload.l3_len >> 2)
+			 << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
 	} else if (ol_flags & PKT_TX_IPV6) {
 		*td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
-		*td_offset |= (l3_len >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+		*td_offset |= (tx_offload.l3_len >> 2)
+			 << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+	}
+
+	if (ol_flags & PKT_TX_TCP_SEG) {
+		*td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+		*td_offset |= (tx_offload.l4_len >> 2)
+			<< I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+		return;
 	}
 
 	/* Enable L4 checksum offloads */
@@ -1154,7 +1161,7 @@  i40e_calc_context_desc(uint64_t flags)
 {
 	uint64_t mask = 0ULL;
 
-	mask |= PKT_TX_OUTER_IP_CKSUM;
+	mask |= (PKT_TX_OUTER_IP_CKSUM | PKT_TX_TCP_SEG);
 
 #ifdef RTE_LIBRTE_IEEE1588
 	mask |= PKT_TX_IEEE1588_TMST;
@@ -1165,6 +1172,41 @@  i40e_calc_context_desc(uint64_t flags)
 	return 0;
 }
 
+/* set i40e TSO context descriptor */
+static inline uint64_t
+i40e_set_tso_ctx(struct rte_mbuf *mbuf, union i40e_tx_offload tx_offload)
+{
+
+	uint64_t ctx_desc = 0;
+	uint32_t cd_cmd, hdr_len, cd_tso_len;
+
+
+	if (!tx_offload.l4_len) {
+		PMD_DRV_LOG(DEBUG, "L4 length set to 0");
+		return ctx_desc;
+	}
+
+	/**
+	 * in case of tunneling packet, the outer_l2_len and
+	 * outer_l3_len must be 0.
+	 */
+	hdr_len = tx_offload.outer_l2_len +
+		tx_offload.outer_l3_len +
+		tx_offload.l2_len +
+		tx_offload.l3_len +
+		tx_offload.l4_len;
+
+	cd_cmd = I40E_TX_CTX_DESC_TSO;
+	cd_tso_len = mbuf->pkt_len - hdr_len;
+	ctx_desc |= ((uint64_t)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
+				((uint64_t)cd_tso_len <<
+				 I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
+				((uint64_t)mbuf->tso_segsz <<
+				I40E_TXD_CTX_QW1_MSS_SHIFT);
+
+	return ctx_desc;
+}
+
 uint16_t
 i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 {
@@ -1183,15 +1225,12 @@  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint32_t tx_flags;
 	uint32_t td_tag;
 	uint64_t ol_flags;
-	uint8_t l2_len;
-	uint16_t l3_len;
-	uint8_t outer_l2_len;
-	uint16_t outer_l3_len;
 	uint16_t nb_used;
 	uint16_t nb_ctx;
 	uint16_t tx_last;
 	uint16_t slen;
 	uint64_t buf_dma_addr;
+	union i40e_tx_offload tx_offload = { .data = 0 };
 
 	txq = tx_queue;
 	sw_ring = txq->sw_ring;
@@ -1213,10 +1252,12 @@  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		RTE_MBUF_PREFETCH_TO_FREE(txe->mbuf);
 
 		ol_flags = tx_pkt->ol_flags;
-		l2_len = tx_pkt->l2_len;
-		l3_len = tx_pkt->l3_len;
-		outer_l2_len = tx_pkt->outer_l2_len;
-		outer_l3_len = tx_pkt->outer_l3_len;
+		tx_offload.l2_len = tx_pkt->l2_len;
+		tx_offload.l3_len = tx_pkt->l3_len;
+		tx_offload.l4_len = tx_pkt->l4_len;
+		tx_offload.tso_segsz = tx_pkt->tso_segsz;
+		tx_offload.outer_l2_len = tx_pkt->outer_l2_len;
+		tx_offload.outer_l3_len = tx_pkt->outer_l3_len;
 
 		/* Calculate the number of context descriptors needed. */
 		nb_ctx = i40e_calc_context_desc(ol_flags);
@@ -1267,9 +1308,7 @@  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		cd_tunneling_params = 0;
 		if (ol_flags & I40E_TX_CKSUM_OFFLOAD_MASK) {
 			i40e_txd_enable_checksum(ol_flags, &td_cmd, &td_offset,
-				l2_len, l3_len, outer_l2_len,
-				outer_l3_len,
-				&cd_tunneling_params);
+				tx_offload, &cd_tunneling_params);
 		}
 
 		if (unlikely(nb_ctx)) {
@@ -1287,12 +1326,18 @@  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 				rte_pktmbuf_free_seg(txe->mbuf);
 				txe->mbuf = NULL;
 			}
-#ifdef RTE_LIBRTE_IEEE1588
-			if (ol_flags & PKT_TX_IEEE1588_TMST)
+			/* TSO enabled means no timestamp */
+			if (ol_flags & PKT_TX_TCP_SEG) {
 				cd_type_cmd_tso_mss |=
-					((uint64_t)I40E_TX_CTX_DESC_TSYN <<
-						I40E_TXD_CTX_QW1_CMD_SHIFT);
+					i40e_set_tso_ctx(tx_pkt, tx_offload);
+			} else {
+#ifdef RTE_LIBRTE_IEEE1588
+				if (ol_flags & PKT_TX_IEEE1588_TMST)
+					cd_type_cmd_tso_mss |=
+						((uint64_t)I40E_TX_CTX_DESC_TSYN <<
+							I40E_TXD_CTX_QW1_CMD_SHIFT);
 #endif
+			}
 			ctx_txd->tunneling_params =
 				rte_cpu_to_le_32(cd_tunneling_params);
 			ctx_txd->l2tag2 = rte_cpu_to_le_16(cd_l2tag2);
diff --git a/lib/librte_pmd_i40e/i40e_rxtx.h b/lib/librte_pmd_i40e/i40e_rxtx.h
index af932e3..f1033ef 100644
--- a/lib/librte_pmd_i40e/i40e_rxtx.h
+++ b/lib/librte_pmd_i40e/i40e_rxtx.h
@@ -154,6 +154,19 @@  struct i40e_tx_queue {
 	bool tx_deferred_start; /**< don't start this queue in dev start */
 };
 
+/** Offload features */
+union i40e_tx_offload {
+	uint64_t data;
+	struct {
+		uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+		uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+		uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */
+		uint64_t tso_segsz:16; /**< TCP TSO segment size */
+		uint64_t outer_l2_len:8; /**< L2 outer Header Length */
+		uint64_t outer_l3_len:16; /**< L3 outer Header Length */
+	};
+};
+
 int i40e_dev_rx_queue_start(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int i40e_dev_rx_queue_stop(struct rte_eth_dev *dev, uint16_t rx_queue_id);
 int i40e_dev_tx_queue_start(struct rte_eth_dev *dev, uint16_t tx_queue_id);