net/octeontx2: use runtime lso format indices

Message ID 20210618130606.21646-1-ndabilpuram@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series net/octeontx2: use runtime lso format indices |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot success github build: passed
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-abi-testing success Testing PASS
ci/iol-testing fail Testing issues
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-mellanox-Functional fail Functional Testing issues

Commit Message

Nithin Dabilpuram June 18, 2021, 1:06 p.m. UTC
  Currently lso formats setup initially are expected to be
compile time constants and start from 0.

Change the logic in slow and fast path so that LSO format indexes
are only determined runtime.

Fixes: 3b635472a998 ("net/octeontx2: support TSO offload")
Cc: stable@dpdk.org

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 drivers/event/octeontx2/otx2_worker.h |  2 +-
 drivers/net/octeontx2/otx2_ethdev.c   | 72 ++++++++++++++++++++---------------
 drivers/net/octeontx2/otx2_ethdev.h   | 13 ++++++-
 drivers/net/octeontx2/otx2_tx.c       |  8 +++-
 drivers/net/octeontx2/otx2_tx.h       | 12 +++---
 5 files changed, 67 insertions(+), 40 deletions(-)
  

Comments

Jerin Jacob June 27, 2021, 6:52 a.m. UTC | #1
On Fri, Jun 18, 2021 at 6:36 PM Nithin Dabilpuram
<ndabilpuram@marvell.com> wrote:
>
> Currently lso formats setup initially are expected to be
> compile time constants and start from 0.
>
> Change the logic in slow and fast path so that LSO format indexes
> are only determined runtime.
>
> Fixes: 3b635472a998 ("net/octeontx2: support TSO offload")
> Cc: stable@dpdk.org
>
> Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>


Applied to dpdk-next-net-mrvl/for-next-net. Thanks


> ---
>  drivers/event/octeontx2/otx2_worker.h |  2 +-
>  drivers/net/octeontx2/otx2_ethdev.c   | 72 ++++++++++++++++++++---------------
>  drivers/net/octeontx2/otx2_ethdev.h   | 13 ++++++-
>  drivers/net/octeontx2/otx2_tx.c       |  8 +++-
>  drivers/net/octeontx2/otx2_tx.h       | 12 +++---
>  5 files changed, 67 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
> index fd149be..3e36dce 100644
> --- a/drivers/event/octeontx2/otx2_worker.h
> +++ b/drivers/event/octeontx2/otx2_worker.h
> @@ -264,7 +264,7 @@ otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
>                         uint64_t *cmd, const uint32_t flags)
>  {
>         otx2_lmt_mov(cmd, txq->cmd, otx2_nix_tx_ext_subs(flags));
> -       otx2_nix_xmit_prepare(m, cmd, flags);
> +       otx2_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
>  }
>
>  static __rte_always_inline uint16_t
> diff --git a/drivers/net/octeontx2/otx2_ethdev.c b/drivers/net/octeontx2/otx2_ethdev.c
> index 0834de0..0a420c1 100644
> --- a/drivers/net/octeontx2/otx2_ethdev.c
> +++ b/drivers/net/octeontx2/otx2_ethdev.c
> @@ -1326,6 +1326,7 @@ otx2_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t sq,
>         txq->qconf.nb_desc = nb_desc;
>         memcpy(&txq->qconf.conf.tx, tx_conf, sizeof(struct rte_eth_txconf));
>
> +       txq->lso_tun_fmt = dev->lso_tun_fmt;
>         otx2_nix_form_default_desc(txq);
>
>         otx2_nix_dbg("sq=%d fc=%p offload=0x%" PRIx64 " sqb=0x%" PRIx64 ""
> @@ -1676,7 +1677,7 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         struct otx2_mbox *mbox = dev->mbox;
>         struct nix_lso_format_cfg_rsp *rsp;
>         struct nix_lso_format_cfg *req;
> -       uint8_t base;
> +       uint8_t *fmt;
>         int rc;
>
>         /* Skip if TSO was not requested */
> @@ -1691,11 +1692,9 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       base = rsp->lso_format_idx;
> -       if (base != NIX_LSO_FORMAT_IDX_TSOV4)
> +       if (rsp->lso_format_idx != NIX_LSO_FORMAT_IDX_TSOV4)
>                 return -EFAULT;
> -       dev->lso_base_idx = base;
> -       otx2_nix_dbg("tcpv4 lso fmt=%u", base);
> +       otx2_nix_dbg("tcpv4 lso fmt=%u", rsp->lso_format_idx);
>
>
>         /*
> @@ -1707,9 +1706,9 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       if (rsp->lso_format_idx != base + 1)
> +       if (rsp->lso_format_idx != NIX_LSO_FORMAT_IDX_TSOV6)
>                 return -EFAULT;
> -       otx2_nix_dbg("tcpv6 lso fmt=%u\n", base + 1);
> +       otx2_nix_dbg("tcpv6 lso fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv4/UDP/TUN HDR/IPv4/TCP LSO
> @@ -1720,9 +1719,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       if (rsp->lso_format_idx != base + 2)
> -               return -EFAULT;
> -       otx2_nix_dbg("udp tun v4v4 fmt=%u\n", base + 2);
> +       dev->lso_udp_tun_idx[NIX_LSO_TUN_V4V4] = rsp->lso_format_idx;
> +       otx2_nix_dbg("udp tun v4v4 fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv4/UDP/TUN HDR/IPv6/TCP LSO
> @@ -1733,9 +1731,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       if (rsp->lso_format_idx != base + 3)
> -               return -EFAULT;
> -       otx2_nix_dbg("udp tun v4v6 fmt=%u\n", base + 3);
> +       dev->lso_udp_tun_idx[NIX_LSO_TUN_V4V6] = rsp->lso_format_idx;
> +       otx2_nix_dbg("udp tun v4v6 fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv6/UDP/TUN HDR/IPv4/TCP LSO
> @@ -1746,9 +1743,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       if (rsp->lso_format_idx != base + 4)
> -               return -EFAULT;
> -       otx2_nix_dbg("udp tun v6v4 fmt=%u\n", base + 4);
> +       dev->lso_udp_tun_idx[NIX_LSO_TUN_V6V4] = rsp->lso_format_idx;
> +       otx2_nix_dbg("udp tun v6v4 fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv6/UDP/TUN HDR/IPv6/TCP LSO
> @@ -1758,9 +1754,9 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
>         if (rc)
>                 return rc;
> -       if (rsp->lso_format_idx != base + 5)
> -               return -EFAULT;
> -       otx2_nix_dbg("udp tun v6v6 fmt=%u\n", base + 5);
> +
> +       dev->lso_udp_tun_idx[NIX_LSO_TUN_V6V6] = rsp->lso_format_idx;
> +       otx2_nix_dbg("udp tun v6v6 fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv4/TUN HDR/IPv4/TCP LSO
> @@ -1771,9 +1767,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       if (rsp->lso_format_idx != base + 6)
> -               return -EFAULT;
> -       otx2_nix_dbg("tun v4v4 fmt=%u\n", base + 6);
> +       dev->lso_tun_idx[NIX_LSO_TUN_V4V4] = rsp->lso_format_idx;
> +       otx2_nix_dbg("tun v4v4 fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv4/TUN HDR/IPv6/TCP LSO
> @@ -1784,9 +1779,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       if (rsp->lso_format_idx != base + 7)
> -               return -EFAULT;
> -       otx2_nix_dbg("tun v4v6 fmt=%u\n", base + 7);
> +       dev->lso_tun_idx[NIX_LSO_TUN_V4V6] = rsp->lso_format_idx;
> +       otx2_nix_dbg("tun v4v6 fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv6/TUN HDR/IPv4/TCP LSO
> @@ -1797,9 +1791,8 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         if (rc)
>                 return rc;
>
> -       if (rsp->lso_format_idx != base + 8)
> -               return -EFAULT;
> -       otx2_nix_dbg("tun v6v4 fmt=%u\n", base + 8);
> +       dev->lso_tun_idx[NIX_LSO_TUN_V6V4] = rsp->lso_format_idx;
> +       otx2_nix_dbg("tun v6v4 fmt=%u\n", rsp->lso_format_idx);
>
>         /*
>          * IPv6/TUN HDR/IPv6/TCP LSO
> @@ -1809,9 +1802,26 @@ nix_setup_lso_formats(struct otx2_eth_dev *dev)
>         rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
>         if (rc)
>                 return rc;
> -       if (rsp->lso_format_idx != base + 9)
> -               return -EFAULT;
> -       otx2_nix_dbg("tun v6v6 fmt=%u\n", base + 9);
> +
> +       dev->lso_tun_idx[NIX_LSO_TUN_V6V6] = rsp->lso_format_idx;
> +       otx2_nix_dbg("tun v6v6 fmt=%u\n", rsp->lso_format_idx);
> +
> +       /* Save all tun formats into u64 for fast path.
> +        * Lower 32bit has non-udp tunnel formats.
> +        * Upper 32bit has udp tunnel formats.
> +        */
> +       fmt = dev->lso_tun_idx;
> +       dev->lso_tun_fmt = ((uint64_t)fmt[NIX_LSO_TUN_V4V4] |
> +                           (uint64_t)fmt[NIX_LSO_TUN_V4V6] << 8 |
> +                           (uint64_t)fmt[NIX_LSO_TUN_V6V4] << 16 |
> +                           (uint64_t)fmt[NIX_LSO_TUN_V6V6] << 24);
> +
> +       fmt = dev->lso_udp_tun_idx;
> +       dev->lso_tun_fmt |= ((uint64_t)fmt[NIX_LSO_TUN_V4V4] << 32 |
> +                            (uint64_t)fmt[NIX_LSO_TUN_V4V6] << 40 |
> +                            (uint64_t)fmt[NIX_LSO_TUN_V6V4] << 48 |
> +                            (uint64_t)fmt[NIX_LSO_TUN_V6V6] << 56);
> +
>         return 0;
>  }
>
> diff --git a/drivers/net/octeontx2/otx2_ethdev.h b/drivers/net/octeontx2/otx2_ethdev.h
> index ac50da7..381e6b6 100644
> --- a/drivers/net/octeontx2/otx2_ethdev.h
> +++ b/drivers/net/octeontx2/otx2_ethdev.h
> @@ -182,6 +182,14 @@ enum nix_q_size_e {
>         nix_q_size_max
>  };
>
> +enum nix_lso_tun_type {
> +       NIX_LSO_TUN_V4V4,
> +       NIX_LSO_TUN_V4V6,
> +       NIX_LSO_TUN_V6V4,
> +       NIX_LSO_TUN_V6V6,
> +       NIX_LSO_TUN_MAX,
> +};
> +
>  struct otx2_qint {
>         struct rte_eth_dev *eth_dev;
>         uint8_t qintx;
> @@ -276,7 +284,9 @@ struct otx2_eth_dev {
>         uint8_t tx_chan_cnt;
>         uint8_t lso_tsov4_idx;
>         uint8_t lso_tsov6_idx;
> -       uint8_t lso_base_idx;
> +       uint8_t lso_udp_tun_idx[NIX_LSO_TUN_MAX];
> +       uint8_t lso_tun_idx[NIX_LSO_TUN_MAX];
> +       uint64_t lso_tun_fmt;
>         uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
>         uint8_t mkex_pfl_name[MKEX_NAME_LEN];
>         uint8_t max_mac_entries;
> @@ -359,6 +369,7 @@ struct otx2_eth_txq {
>         rte_iova_t fc_iova;
>         uint16_t sqes_per_sqb_log2;
>         int16_t nb_sqb_bufs_adj;
> +       uint64_t lso_tun_fmt;
>         RTE_MARKER slow_path_start;
>         uint16_t nb_sqb_bufs;
>         uint16_t sq;
> diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
> index 439c46f..ff299f0 100644
> --- a/drivers/net/octeontx2/otx2_tx.c
> +++ b/drivers/net/octeontx2/otx2_tx.c
> @@ -27,6 +27,7 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>         struct otx2_eth_txq *txq = tx_queue; uint16_t i;
>         const rte_iova_t io_addr = txq->io_addr;
>         void *lmt_addr = txq->lmt_addr;
> +       uint64_t lso_tun_fmt;
>
>         NIX_XMIT_FC_OR_RETURN(txq, pkts);
>
> @@ -34,6 +35,7 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>
>         /* Perform header writes before barrier for TSO */
>         if (flags & NIX_TX_OFFLOAD_TSO_F) {
> +               lso_tun_fmt = txq->lso_tun_fmt;
>                 for (i = 0; i < pkts; i++)
>                         otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
>         }
> @@ -45,7 +47,7 @@ nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>                 rte_io_wmb();
>
>         for (i = 0; i < pkts; i++) {
> -               otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
> +               otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
>                 /* Passing no of segdw as 4: HDR + EXT + SG + SMEM */
>                 otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
>                                              tx_pkts[i]->ol_flags, 4, flags);
> @@ -65,6 +67,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
>         struct otx2_eth_txq *txq = tx_queue; uint64_t i;
>         const rte_iova_t io_addr = txq->io_addr;
>         void *lmt_addr = txq->lmt_addr;
> +       uint64_t lso_tun_fmt;
>         uint16_t segdw;
>
>         NIX_XMIT_FC_OR_RETURN(txq, pkts);
> @@ -73,6 +76,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
>
>         /* Perform header writes before barrier for TSO */
>         if (flags & NIX_TX_OFFLOAD_TSO_F) {
> +               lso_tun_fmt = txq->lso_tun_fmt;
>                 for (i = 0; i < pkts; i++)
>                         otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
>         }
> @@ -84,7 +88,7 @@ nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
>                 rte_io_wmb();
>
>         for (i = 0; i < pkts; i++) {
> -               otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
> +               otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
>                 segdw = otx2_nix_prepare_mseg(tx_pkts[i], cmd, flags);
>                 otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
>                                              tx_pkts[i]->ol_flags, segdw,
> diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h
> index a97b160..486248d 100644
> --- a/drivers/net/octeontx2/otx2_tx.h
> +++ b/drivers/net/octeontx2/otx2_tx.h
> @@ -197,7 +197,8 @@ otx2_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
>  }
>
>  static __rte_always_inline void
> -otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
> +otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
> +                     const uint64_t lso_tun_fmt)
>  {
>         struct nix_send_ext_s *send_hdr_ext;
>         struct nix_send_hdr_s *send_hdr;
> @@ -339,14 +340,15 @@ otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
>                     (ol_flags & PKT_TX_TUNNEL_MASK)) {
>                         const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
>                                 ((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
> +                       uint8_t shift = is_udp_tun ? 32 : 0;
> +
> +                       shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
> +                       shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
>
>                         w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
>                         w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
>                         /* Update format for UDP tunneled packet */
> -                       send_hdr_ext->w0.lso_format += is_udp_tun ? 2 : 6;
> -
> -                       send_hdr_ext->w0.lso_format +=
> -                               !!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
> +                       send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
>                 }
>         }
>
> --
> 2.8.4
>
  

Patch

diff --git a/drivers/event/octeontx2/otx2_worker.h b/drivers/event/octeontx2/otx2_worker.h
index fd149be..3e36dce 100644
--- a/drivers/event/octeontx2/otx2_worker.h
+++ b/drivers/event/octeontx2/otx2_worker.h
@@ -264,7 +264,7 @@  otx2_ssogws_prepare_pkt(const struct otx2_eth_txq *txq, struct rte_mbuf *m,
 			uint64_t *cmd, const uint32_t flags)
 {
 	otx2_lmt_mov(cmd, txq->cmd, otx2_nix_tx_ext_subs(flags));
-	otx2_nix_xmit_prepare(m, cmd, flags);
+	otx2_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
 }
 
 static __rte_always_inline uint16_t
diff --git a/drivers/net/octeontx2/otx2_ethdev.c b/drivers/net/octeontx2/otx2_ethdev.c
index 0834de0..0a420c1 100644
--- a/drivers/net/octeontx2/otx2_ethdev.c
+++ b/drivers/net/octeontx2/otx2_ethdev.c
@@ -1326,6 +1326,7 @@  otx2_nix_tx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t sq,
 	txq->qconf.nb_desc = nb_desc;
 	memcpy(&txq->qconf.conf.tx, tx_conf, sizeof(struct rte_eth_txconf));
 
+	txq->lso_tun_fmt = dev->lso_tun_fmt;
 	otx2_nix_form_default_desc(txq);
 
 	otx2_nix_dbg("sq=%d fc=%p offload=0x%" PRIx64 " sqb=0x%" PRIx64 ""
@@ -1676,7 +1677,7 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	struct otx2_mbox *mbox = dev->mbox;
 	struct nix_lso_format_cfg_rsp *rsp;
 	struct nix_lso_format_cfg *req;
-	uint8_t base;
+	uint8_t *fmt;
 	int rc;
 
 	/* Skip if TSO was not requested */
@@ -1691,11 +1692,9 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	base = rsp->lso_format_idx;
-	if (base != NIX_LSO_FORMAT_IDX_TSOV4)
+	if (rsp->lso_format_idx != NIX_LSO_FORMAT_IDX_TSOV4)
 		return -EFAULT;
-	dev->lso_base_idx = base;
-	otx2_nix_dbg("tcpv4 lso fmt=%u", base);
+	otx2_nix_dbg("tcpv4 lso fmt=%u", rsp->lso_format_idx);
 
 
 	/*
@@ -1707,9 +1706,9 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	if (rsp->lso_format_idx != base + 1)
+	if (rsp->lso_format_idx != NIX_LSO_FORMAT_IDX_TSOV6)
 		return -EFAULT;
-	otx2_nix_dbg("tcpv6 lso fmt=%u\n", base + 1);
+	otx2_nix_dbg("tcpv6 lso fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv4/UDP/TUN HDR/IPv4/TCP LSO
@@ -1720,9 +1719,8 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	if (rsp->lso_format_idx != base + 2)
-		return -EFAULT;
-	otx2_nix_dbg("udp tun v4v4 fmt=%u\n", base + 2);
+	dev->lso_udp_tun_idx[NIX_LSO_TUN_V4V4] = rsp->lso_format_idx;
+	otx2_nix_dbg("udp tun v4v4 fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv4/UDP/TUN HDR/IPv6/TCP LSO
@@ -1733,9 +1731,8 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	if (rsp->lso_format_idx != base + 3)
-		return -EFAULT;
-	otx2_nix_dbg("udp tun v4v6 fmt=%u\n", base + 3);
+	dev->lso_udp_tun_idx[NIX_LSO_TUN_V4V6] = rsp->lso_format_idx;
+	otx2_nix_dbg("udp tun v4v6 fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv6/UDP/TUN HDR/IPv4/TCP LSO
@@ -1746,9 +1743,8 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	if (rsp->lso_format_idx != base + 4)
-		return -EFAULT;
-	otx2_nix_dbg("udp tun v6v4 fmt=%u\n", base + 4);
+	dev->lso_udp_tun_idx[NIX_LSO_TUN_V6V4] = rsp->lso_format_idx;
+	otx2_nix_dbg("udp tun v6v4 fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv6/UDP/TUN HDR/IPv6/TCP LSO
@@ -1758,9 +1754,9 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
 	if (rc)
 		return rc;
-	if (rsp->lso_format_idx != base + 5)
-		return -EFAULT;
-	otx2_nix_dbg("udp tun v6v6 fmt=%u\n", base + 5);
+
+	dev->lso_udp_tun_idx[NIX_LSO_TUN_V6V6] = rsp->lso_format_idx;
+	otx2_nix_dbg("udp tun v6v6 fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv4/TUN HDR/IPv4/TCP LSO
@@ -1771,9 +1767,8 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	if (rsp->lso_format_idx != base + 6)
-		return -EFAULT;
-	otx2_nix_dbg("tun v4v4 fmt=%u\n", base + 6);
+	dev->lso_tun_idx[NIX_LSO_TUN_V4V4] = rsp->lso_format_idx;
+	otx2_nix_dbg("tun v4v4 fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv4/TUN HDR/IPv6/TCP LSO
@@ -1784,9 +1779,8 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	if (rsp->lso_format_idx != base + 7)
-		return -EFAULT;
-	otx2_nix_dbg("tun v4v6 fmt=%u\n", base + 7);
+	dev->lso_tun_idx[NIX_LSO_TUN_V4V6] = rsp->lso_format_idx;
+	otx2_nix_dbg("tun v4v6 fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv6/TUN HDR/IPv4/TCP LSO
@@ -1797,9 +1791,8 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	if (rc)
 		return rc;
 
-	if (rsp->lso_format_idx != base + 8)
-		return -EFAULT;
-	otx2_nix_dbg("tun v6v4 fmt=%u\n", base + 8);
+	dev->lso_tun_idx[NIX_LSO_TUN_V6V4] = rsp->lso_format_idx;
+	otx2_nix_dbg("tun v6v4 fmt=%u\n", rsp->lso_format_idx);
 
 	/*
 	 * IPv6/TUN HDR/IPv6/TCP LSO
@@ -1809,9 +1802,26 @@  nix_setup_lso_formats(struct otx2_eth_dev *dev)
 	rc = otx2_mbox_process_msg(mbox, (void *)&rsp);
 	if (rc)
 		return rc;
-	if (rsp->lso_format_idx != base + 9)
-		return -EFAULT;
-	otx2_nix_dbg("tun v6v6 fmt=%u\n", base + 9);
+
+	dev->lso_tun_idx[NIX_LSO_TUN_V6V6] = rsp->lso_format_idx;
+	otx2_nix_dbg("tun v6v6 fmt=%u\n", rsp->lso_format_idx);
+
+	/* Save all tun formats into u64 for fast path.
+	 * Lower 32bit has non-udp tunnel formats.
+	 * Upper 32bit has udp tunnel formats.
+	 */
+	fmt = dev->lso_tun_idx;
+	dev->lso_tun_fmt = ((uint64_t)fmt[NIX_LSO_TUN_V4V4] |
+			    (uint64_t)fmt[NIX_LSO_TUN_V4V6] << 8 |
+			    (uint64_t)fmt[NIX_LSO_TUN_V6V4] << 16 |
+			    (uint64_t)fmt[NIX_LSO_TUN_V6V6] << 24);
+
+	fmt = dev->lso_udp_tun_idx;
+	dev->lso_tun_fmt |= ((uint64_t)fmt[NIX_LSO_TUN_V4V4] << 32 |
+			     (uint64_t)fmt[NIX_LSO_TUN_V4V6] << 40 |
+			     (uint64_t)fmt[NIX_LSO_TUN_V6V4] << 48 |
+			     (uint64_t)fmt[NIX_LSO_TUN_V6V6] << 56);
+
 	return 0;
 }
 
diff --git a/drivers/net/octeontx2/otx2_ethdev.h b/drivers/net/octeontx2/otx2_ethdev.h
index ac50da7..381e6b6 100644
--- a/drivers/net/octeontx2/otx2_ethdev.h
+++ b/drivers/net/octeontx2/otx2_ethdev.h
@@ -182,6 +182,14 @@  enum nix_q_size_e {
 	nix_q_size_max
 };
 
+enum nix_lso_tun_type {
+	NIX_LSO_TUN_V4V4,
+	NIX_LSO_TUN_V4V6,
+	NIX_LSO_TUN_V6V4,
+	NIX_LSO_TUN_V6V6,
+	NIX_LSO_TUN_MAX,
+};
+
 struct otx2_qint {
 	struct rte_eth_dev *eth_dev;
 	uint8_t qintx;
@@ -276,7 +284,9 @@  struct otx2_eth_dev {
 	uint8_t tx_chan_cnt;
 	uint8_t lso_tsov4_idx;
 	uint8_t lso_tsov6_idx;
-	uint8_t lso_base_idx;
+	uint8_t lso_udp_tun_idx[NIX_LSO_TUN_MAX];
+	uint8_t lso_tun_idx[NIX_LSO_TUN_MAX];
+	uint64_t lso_tun_fmt;
 	uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
 	uint8_t mkex_pfl_name[MKEX_NAME_LEN];
 	uint8_t max_mac_entries;
@@ -359,6 +369,7 @@  struct otx2_eth_txq {
 	rte_iova_t fc_iova;
 	uint16_t sqes_per_sqb_log2;
 	int16_t nb_sqb_bufs_adj;
+	uint64_t lso_tun_fmt;
 	RTE_MARKER slow_path_start;
 	uint16_t nb_sqb_bufs;
 	uint16_t sq;
diff --git a/drivers/net/octeontx2/otx2_tx.c b/drivers/net/octeontx2/otx2_tx.c
index 439c46f..ff299f0 100644
--- a/drivers/net/octeontx2/otx2_tx.c
+++ b/drivers/net/octeontx2/otx2_tx.c
@@ -27,6 +27,7 @@  nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct otx2_eth_txq *txq = tx_queue; uint16_t i;
 	const rte_iova_t io_addr = txq->io_addr;
 	void *lmt_addr = txq->lmt_addr;
+	uint64_t lso_tun_fmt;
 
 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
 
@@ -34,6 +35,7 @@  nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	/* Perform header writes before barrier for TSO */
 	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		lso_tun_fmt = txq->lso_tun_fmt;
 		for (i = 0; i < pkts; i++)
 			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
 	}
@@ -45,7 +47,7 @@  nix_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		rte_io_wmb();
 
 	for (i = 0; i < pkts; i++) {
-		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
+		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
 		/* Passing no of segdw as 4: HDR + EXT + SG + SMEM */
 		otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
 					     tx_pkts[i]->ol_flags, 4, flags);
@@ -65,6 +67,7 @@  nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 	struct otx2_eth_txq *txq = tx_queue; uint64_t i;
 	const rte_iova_t io_addr = txq->io_addr;
 	void *lmt_addr = txq->lmt_addr;
+	uint64_t lso_tun_fmt;
 	uint16_t segdw;
 
 	NIX_XMIT_FC_OR_RETURN(txq, pkts);
@@ -73,6 +76,7 @@  nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	/* Perform header writes before barrier for TSO */
 	if (flags & NIX_TX_OFFLOAD_TSO_F) {
+		lso_tun_fmt = txq->lso_tun_fmt;
 		for (i = 0; i < pkts; i++)
 			otx2_nix_xmit_prepare_tso(tx_pkts[i], flags);
 	}
@@ -84,7 +88,7 @@  nix_xmit_pkts_mseg(void *tx_queue, struct rte_mbuf **tx_pkts,
 		rte_io_wmb();
 
 	for (i = 0; i < pkts; i++) {
-		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags);
+		otx2_nix_xmit_prepare(tx_pkts[i], cmd, flags, lso_tun_fmt);
 		segdw = otx2_nix_prepare_mseg(tx_pkts[i], cmd, flags);
 		otx2_nix_xmit_prepare_tstamp(cmd, &txq->cmd[0],
 					     tx_pkts[i]->ol_flags, segdw,
diff --git a/drivers/net/octeontx2/otx2_tx.h b/drivers/net/octeontx2/otx2_tx.h
index a97b160..486248d 100644
--- a/drivers/net/octeontx2/otx2_tx.h
+++ b/drivers/net/octeontx2/otx2_tx.h
@@ -197,7 +197,8 @@  otx2_nix_xmit_prepare_tso(struct rte_mbuf *m, const uint64_t flags)
 }
 
 static __rte_always_inline void
-otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
+otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags,
+		      const uint64_t lso_tun_fmt)
 {
 	struct nix_send_ext_s *send_hdr_ext;
 	struct nix_send_hdr_s *send_hdr;
@@ -339,14 +340,15 @@  otx2_nix_xmit_prepare(struct rte_mbuf *m, uint64_t *cmd, const uint16_t flags)
 		    (ol_flags & PKT_TX_TUNNEL_MASK)) {
 			const uint8_t is_udp_tun = (NIX_UDP_TUN_BITMASK >>
 				((ol_flags & PKT_TX_TUNNEL_MASK) >> 45)) & 0x1;
+			uint8_t shift = is_udp_tun ? 32 : 0;
+
+			shift += (!!(ol_flags & PKT_TX_OUTER_IPV6) << 4);
+			shift += (!!(ol_flags & PKT_TX_IPV6) << 3);
 
 			w1.il4type = NIX_SENDL4TYPE_TCP_CKSUM;
 			w1.ol4type = is_udp_tun ? NIX_SENDL4TYPE_UDP_CKSUM : 0;
 			/* Update format for UDP tunneled packet */
-			send_hdr_ext->w0.lso_format += is_udp_tun ? 2 : 6;
-
-			send_hdr_ext->w0.lso_format +=
-				!!(ol_flags & PKT_TX_OUTER_IPV6) << 1;
+			send_hdr_ext->w0.lso_format = (lso_tun_fmt >> shift);
 		}
 	}