[v5,1/6] net/cnxk: add multi seg Rx vector routine

Message ID 20210629074424.264-1-pbhagavatula@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series [v5,1/6] net/cnxk: add multi seg Rx vector routine |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/iol-testing warning apply patch failure

Commit Message

Pavan Nikhilesh Bhagavatula June 29, 2021, 7:44 a.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Series-acked-by:  Nithin Dabilpuram <ndabilpuram@marvell.com>
---
 v5 Changes:
 - Fix incorrect mbuf assignment.
 v4 Changes:
 - Split patches for easier merge.
 - Rebase on dpdk-next-net-mrvl.
 v3 Changes:
 - Spell check.

 drivers/net/cnxk/cn10k_rx.c          | 31 +++++++++++------
 drivers/net/cnxk/cn10k_rx.h          | 51 +++++++++++++++++++++-------
 drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
 drivers/net/cnxk/cn9k_rx.c           | 31 +++++++++++------
 drivers/net/cnxk/cn9k_rx.h           | 51 +++++++++++++++++++++-------
 drivers/net/cnxk/cn9k_rx_vec_mseg.c  | 18 ++++++++++
 drivers/net/cnxk/meson.build         |  2 ++
 7 files changed, 157 insertions(+), 44 deletions(-)
 create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
 create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c

--
2.17.1
  

Comments

Jerin Jacob June 29, 2021, 4:20 p.m. UTC | #1
On Tue, Jun 29, 2021 at 1:14 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add multi-segment Rx vector routine, form the primary mbufs using
> vector path switch to scalar path when extracting segments.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Series-acked-by:  Nithin Dabilpuram <ndabilpuram@marvell.com>


Series applied to dpdk-next-net-mrvl/for-dpdk-main. Thanks.


> ---
>  v5 Changes:
>  - Fix incorrect mbuf assignment.
>  v4 Changes:
>  - Split patches for easier merge.
>  - Rebase on dpdk-next-net-mrvl.
>  v3 Changes:
>  - Spell check.
>
>  drivers/net/cnxk/cn10k_rx.c          | 31 +++++++++++------
>  drivers/net/cnxk/cn10k_rx.h          | 51 +++++++++++++++++++++-------
>  drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
>  drivers/net/cnxk/cn9k_rx.c           | 31 +++++++++++------
>  drivers/net/cnxk/cn9k_rx.h           | 51 +++++++++++++++++++++-------
>  drivers/net/cnxk/cn9k_rx_vec_mseg.c  | 18 ++++++++++
>  drivers/net/cnxk/meson.build         |  2 ++
>  7 files changed, 157 insertions(+), 44 deletions(-)
>  create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
>  create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
>
> diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
> index 5c956c06b..3a9fd7130 100644
> --- a/drivers/net/cnxk/cn10k_rx.c
> +++ b/drivers/net/cnxk/cn10k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> +       rte_atomic_thread_fence(__ATOMIC_RELEASE);
>  }
>
>  void
> @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
>  #undef R
>         };
>
> -       /* For PTP enabled, scalar rx function should be chosen as most of the
> -        * PTP apps are implemented to rx burst 1 pkt.
> -        */
> -       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst);
> -       else
> -               pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> +       const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
>
> -       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               NIX_RX_FASTPATH_MODES
> +#undef R
> +       };
>
>         /* Copy multi seg version with no offload for tear down sequence */
>         if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>                 dev->rx_pkt_burst_no_offload =
>                         nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> -       rte_mb();
> +
> +       /* For PTP enabled, scalar rx function should be chosen as most of the
> +        * PTP apps are implemented to rx burst 1 pkt.
> +        */
> +       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> +               if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +                       return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               return pick_rx_func(eth_dev, nix_eth_rx_burst);
> +       }
> +
> +       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +               return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> +       return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
>  }
> diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
> index 1cc37cbaa..5926ff7f4 100644
> --- a/drivers/net/cnxk/cn10k_rx.h
> +++ b/drivers/net/cnxk/cn10k_rx.h
> @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
>         sg = *(const uint64_t *)(rx + 1);
>         nb_segs = (sg >> 48) & 0x3;
> -       mbuf->nb_segs = nb_segs;
> +
> +       if (nb_segs == 1) {
> +               mbuf->next = NULL;
> +               return;
> +       }
> +
> +       mbuf->pkt_len = rx->pkt_lenm1 + 1;
>         mbuf->data_len = sg & 0xFFFF;
> +       mbuf->nb_segs = nb_segs;
>         sg = sg >> 16;
>
>         eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
> @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
>                 ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
>
>         mbuf->ol_flags = ol_flags;
> -       *(uint64_t *)(&mbuf->rearm_data) = val;
>         mbuf->pkt_len = len;
> +       mbuf->data_len = len;
> +       *(uint64_t *)(&mbuf->rearm_data) = val;
>
> -       if (flag & NIX_RX_MULTI_SEG_F) {
> +       if (flag & NIX_RX_MULTI_SEG_F)
>                 nix_cqe_xtract_mseg(rx, mbuf, val);
> -       } else {
> -               mbuf->data_len = len;
> +       else
>                 mbuf->next = NULL;
> -       }
>  }
>
>  static inline uint16_t
> @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
>                 vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
>                 vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> -               /* Update that no more segments */
> -               mbuf0->next = NULL;
> -               mbuf1->next = NULL;
> -               mbuf2->next = NULL;
> -               mbuf3->next = NULL;
> -
>                 /* Store the mbufs to rx_pkts */
>                 vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
>                 vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> +               if (flags & NIX_RX_MULTI_SEG_F) {
> +                       /* Multi segment is enable build mseg list for
> +                        * individual mbufs in scalar mode.
> +                        */
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(0) + 8), mbuf0,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(1) + 8), mbuf1,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(2) + 8), mbuf2,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(3) + 8), mbuf3,
> +                                           mbuf_initializer);
> +               } else {
> +                       /* Update that no more segments */
> +                       mbuf0->next = NULL;
> +                       mbuf1->next = NULL;
> +                       mbuf2->next = NULL;
> +                       mbuf3->next = NULL;
> +               }
> +
>                 /* Prefetch mbufs */
>                 roc_prefetch_store_keep(mbuf0);
>                 roc_prefetch_store_keep(mbuf1);
> @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss,     1, 1, 1, 1, 1, 1,                              \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
>                                                                                \
>         uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name(      \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
> +                                                                              \
> +       uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
>  NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..04d1e46c8
> --- /dev/null
> +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn10k_ethdev.h"
> +#include "cn10k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
> +       {                                                                      \
> +               return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,     \
> +                                         (flags) | NIX_RX_MULTI_SEG_F);       \
> +       }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
> index 0acedd0a1..d293d4eac 100644
> --- a/drivers/net/cnxk/cn9k_rx.c
> +++ b/drivers/net/cnxk/cn9k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
>                 [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> +       rte_atomic_thread_fence(__ATOMIC_RELEASE);
>  }
>
>  void
> @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
>  #undef R
>         };
>
> -       /* For PTP enabled, scalar rx function should be chosen as most of the
> -        * PTP apps are implemented to rx burst 1 pkt.
> -        */
> -       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst);
> -       else
> -               pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> +       const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
>
> -       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> -               pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               NIX_RX_FASTPATH_MODES
> +#undef R
> +       };
>
>         /* Copy multi seg version with no offload for tear down sequence */
>         if (rte_eal_process_type() == RTE_PROC_PRIMARY)
>                 dev->rx_pkt_burst_no_offload =
>                         nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> -       rte_mb();
> +
> +       /* For PTP enabled, scalar rx function should be chosen as most of the
> +        * PTP apps are implemented to rx burst 1 pkt.
> +        */
> +       if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> +               if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +                       return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> +               return pick_rx_func(eth_dev, nix_eth_rx_burst);
> +       }
> +
> +       if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> +               return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> +       return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
>  }
> diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
> index 10ef5c690..5ae9e8195 100644
> --- a/drivers/net/cnxk/cn9k_rx.h
> +++ b/drivers/net/cnxk/cn9k_rx.h
> @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
>         sg = *(const uint64_t *)(rx + 1);
>         nb_segs = (sg >> 48) & 0x3;
> -       mbuf->nb_segs = nb_segs;
> +
> +       if (nb_segs == 1) {
> +               mbuf->next = NULL;
> +               return;
> +       }
> +
> +       mbuf->pkt_len = rx->pkt_lenm1 + 1;
>         mbuf->data_len = sg & 0xFFFF;
> +       mbuf->nb_segs = nb_segs;
>         sg = sg >> 16;
>
>         eol = ((const rte_iova_t *)(rx + 1) +
> @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
>                         nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
>
>         mbuf->ol_flags = ol_flags;
> -       *(uint64_t *)(&mbuf->rearm_data) = val;
>         mbuf->pkt_len = len;
> +       mbuf->data_len = len;
> +       *(uint64_t *)(&mbuf->rearm_data) = val;
>
> -       if (flag & NIX_RX_MULTI_SEG_F) {
> +       if (flag & NIX_RX_MULTI_SEG_F)
>                 nix_cqe_xtract_mseg(rx, mbuf, val);
> -       } else {
> -               mbuf->data_len = len;
> +       else
>                 mbuf->next = NULL;
> -       }
>  }
>
>  static inline uint16_t
> @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
>                 vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
>                 vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> -               /* Update that no more segments */
> -               mbuf0->next = NULL;
> -               mbuf1->next = NULL;
> -               mbuf2->next = NULL;
> -               mbuf3->next = NULL;
> -
>                 /* Store the mbufs to rx_pkts */
>                 vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
>                 vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> +               if (flags & NIX_RX_MULTI_SEG_F) {
> +                       /* Multi segment is enable build mseg list for
> +                        * individual mbufs in scalar mode.
> +                        */
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(0) + 8), mbuf0,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(1) + 8), mbuf1,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(2) + 8), mbuf2,
> +                                           mbuf_initializer);
> +                       nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> +                                           (cq0 + CQE_SZ(3) + 8), mbuf3,
> +                                           mbuf_initializer);
> +               } else {
> +                       /* Update that no more segments */
> +                       mbuf0->next = NULL;
> +                       mbuf1->next = NULL;
> +                       mbuf2->next = NULL;
> +                       mbuf3->next = NULL;
> +               }
> +
>                 /* Prefetch mbufs */
>                 roc_prefetch_store_keep(mbuf0);
>                 roc_prefetch_store_keep(mbuf1);
> @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss,     1, 1, 1, 1, 1, 1,                              \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
>                                                                                \
>         uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name(       \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
> +                                                                              \
> +       uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
>                 void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
>  NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..e46d8a474
> --- /dev/null
> +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn9k_ethdev.h"
> +#include "cn9k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
> +       uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
> +               void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
> +       {                                                                      \
> +               return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,      \
> +                                                (flags) |                     \
> +                                                        NIX_RX_MULTI_SEG_F);  \
> +       }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index 2071d0dcb..aa8c7253f 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
>                  'cn9k_rx.c',
>                  'cn9k_rx_mseg.c',
>                  'cn9k_rx_vec.c',
> +                'cn9k_rx_vec_mseg.c',
>                  'cn9k_tx.c',
>                  'cn9k_tx_mseg.c',
>                  'cn9k_tx_vec.c')
> @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
>                  'cn10k_rx.c',
>                  'cn10k_rx_mseg.c',
>                  'cn10k_rx_vec.c',
> +                'cn10k_rx_vec_mseg.c',
>                  'cn10k_tx.c',
>                  'cn10k_tx_mseg.c',
>                  'cn10k_tx_vec.c')
> --
> 2.17.1
>
  

Patch

diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
index 5c956c06b..3a9fd7130 100644
--- a/drivers/net/cnxk/cn10k_rx.c
+++ b/drivers/net/cnxk/cn10k_rx.c
@@ -29,6 +29,8 @@  pick_rx_func(struct rte_eth_dev *eth_dev,
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+	rte_atomic_thread_fence(__ATOMIC_RELEASE);
 }

 void
@@ -60,20 +62,29 @@  cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
 #undef R
 	};

-	/* For PTP enabled, scalar rx function should be chosen as most of the
-	 * PTP apps are implemented to rx burst 1 pkt.
-	 */
-	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-		pick_rx_func(eth_dev, nix_eth_rx_burst);
-	else
-		pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+	const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	[f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,

-	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-		pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};

 	/* Copy multi seg version with no offload for tear down sequence */
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
 		dev->rx_pkt_burst_no_offload =
 			nix_eth_rx_burst_mseg[0][0][0][0][0][0];
-	rte_mb();
+
+	/* For PTP enabled, scalar rx function should be chosen as most of the
+	 * PTP apps are implemented to rx burst 1 pkt.
+	 */
+	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+		if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+			return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		return pick_rx_func(eth_dev, nix_eth_rx_burst);
+	}
+
+	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+		return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+	return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
 }
diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
index 1cc37cbaa..5926ff7f4 100644
--- a/drivers/net/cnxk/cn10k_rx.h
+++ b/drivers/net/cnxk/cn10k_rx.h
@@ -119,8 +119,15 @@  nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,

 	sg = *(const uint64_t *)(rx + 1);
 	nb_segs = (sg >> 48) & 0x3;
-	mbuf->nb_segs = nb_segs;
+
+	if (nb_segs == 1) {
+		mbuf->next = NULL;
+		return;
+	}
+
+	mbuf->pkt_len = rx->pkt_lenm1 + 1;
 	mbuf->data_len = sg & 0xFFFF;
+	mbuf->nb_segs = nb_segs;
 	sg = sg >> 16;

 	eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@  cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
 		ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);

 	mbuf->ol_flags = ol_flags;
-	*(uint64_t *)(&mbuf->rearm_data) = val;
 	mbuf->pkt_len = len;
+	mbuf->data_len = len;
+	*(uint64_t *)(&mbuf->rearm_data) = val;

-	if (flag & NIX_RX_MULTI_SEG_F) {
+	if (flag & NIX_RX_MULTI_SEG_F)
 		nix_cqe_xtract_mseg(rx, mbuf, val);
-	} else {
-		mbuf->data_len = len;
+	else
 		mbuf->next = NULL;
-	}
 }

 static inline uint16_t
@@ -481,16 +487,34 @@  cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 		vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
 		vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);

-		/* Update that no more segments */
-		mbuf0->next = NULL;
-		mbuf1->next = NULL;
-		mbuf2->next = NULL;
-		mbuf3->next = NULL;
-
 		/* Store the mbufs to rx_pkts */
 		vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
 		vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);

+		if (flags & NIX_RX_MULTI_SEG_F) {
+			/* Multi segment is enable build mseg list for
+			 * individual mbufs in scalar mode.
+			 */
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(0) + 8), mbuf0,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(1) + 8), mbuf1,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(2) + 8), mbuf2,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(3) + 8), mbuf3,
+					    mbuf_initializer);
+		} else {
+			/* Update that no more segments */
+			mbuf0->next = NULL;
+			mbuf1->next = NULL;
+			mbuf2->next = NULL;
+			mbuf3->next = NULL;
+		}
+
 		/* Prefetch mbufs */
 		roc_prefetch_store_keep(mbuf0);
 		roc_prefetch_store_keep(mbuf1);
@@ -645,6 +669,9 @@  R(vlan_ts_mark_cksum_ptype_rss,	1, 1, 1, 1, 1, 1,			       \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
 									       \
 	uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name(      \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
+									       \
+	uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);

 NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
new file mode 100644
index 000000000..04d1e46c8
--- /dev/null
+++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
@@ -0,0 +1,17 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
+	{                                                                      \
+		return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,     \
+					  (flags) | NIX_RX_MULTI_SEG_F);       \
+	}
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
index 0acedd0a1..d293d4eac 100644
--- a/drivers/net/cnxk/cn9k_rx.c
+++ b/drivers/net/cnxk/cn9k_rx.c
@@ -29,6 +29,8 @@  pick_rx_func(struct rte_eth_dev *eth_dev,
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
 		[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+	rte_atomic_thread_fence(__ATOMIC_RELEASE);
 }

 void
@@ -60,20 +62,29 @@  cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
 #undef R
 	};

-	/* For PTP enabled, scalar rx function should be chosen as most of the
-	 * PTP apps are implemented to rx burst 1 pkt.
-	 */
-	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
-		pick_rx_func(eth_dev, nix_eth_rx_burst);
-	else
-		pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+	const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	[f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,

-	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
-		pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		NIX_RX_FASTPATH_MODES
+#undef R
+	};

 	/* Copy multi seg version with no offload for tear down sequence */
 	if (rte_eal_process_type() == RTE_PROC_PRIMARY)
 		dev->rx_pkt_burst_no_offload =
 			nix_eth_rx_burst_mseg[0][0][0][0][0][0];
-	rte_mb();
+
+	/* For PTP enabled, scalar rx function should be chosen as most of the
+	 * PTP apps are implemented to rx burst 1 pkt.
+	 */
+	if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+		if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+			return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+		return pick_rx_func(eth_dev, nix_eth_rx_burst);
+	}
+
+	if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+		return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+	return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
 }
diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
index 10ef5c690..5ae9e8195 100644
--- a/drivers/net/cnxk/cn9k_rx.h
+++ b/drivers/net/cnxk/cn9k_rx.h
@@ -120,8 +120,15 @@  nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,

 	sg = *(const uint64_t *)(rx + 1);
 	nb_segs = (sg >> 48) & 0x3;
-	mbuf->nb_segs = nb_segs;
+
+	if (nb_segs == 1) {
+		mbuf->next = NULL;
+		return;
+	}
+
+	mbuf->pkt_len = rx->pkt_lenm1 + 1;
 	mbuf->data_len = sg & 0xFFFF;
+	mbuf->nb_segs = nb_segs;
 	sg = sg >> 16;

 	eol = ((const rte_iova_t *)(rx + 1) +
@@ -198,15 +205,14 @@  cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
 			nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);

 	mbuf->ol_flags = ol_flags;
-	*(uint64_t *)(&mbuf->rearm_data) = val;
 	mbuf->pkt_len = len;
+	mbuf->data_len = len;
+	*(uint64_t *)(&mbuf->rearm_data) = val;

-	if (flag & NIX_RX_MULTI_SEG_F) {
+	if (flag & NIX_RX_MULTI_SEG_F)
 		nix_cqe_xtract_mseg(rx, mbuf, val);
-	} else {
-		mbuf->data_len = len;
+	else
 		mbuf->next = NULL;
-	}
 }

 static inline uint16_t
@@ -484,16 +490,34 @@  cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
 		vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
 		vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);

-		/* Update that no more segments */
-		mbuf0->next = NULL;
-		mbuf1->next = NULL;
-		mbuf2->next = NULL;
-		mbuf3->next = NULL;
-
 		/* Store the mbufs to rx_pkts */
 		vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
 		vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);

+		if (flags & NIX_RX_MULTI_SEG_F) {
+			/* Multi segment is enable build mseg list for
+			 * individual mbufs in scalar mode.
+			 */
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(0) + 8), mbuf0,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(1) + 8), mbuf1,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(2) + 8), mbuf2,
+					    mbuf_initializer);
+			nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+					    (cq0 + CQE_SZ(3) + 8), mbuf3,
+					    mbuf_initializer);
+		} else {
+			/* Update that no more segments */
+			mbuf0->next = NULL;
+			mbuf1->next = NULL;
+			mbuf2->next = NULL;
+			mbuf3->next = NULL;
+		}
+
 		/* Prefetch mbufs */
 		roc_prefetch_store_keep(mbuf0);
 		roc_prefetch_store_keep(mbuf1);
@@ -647,6 +671,9 @@  R(vlan_ts_mark_cksum_ptype_rss,	1, 1, 1, 1, 1, 1,			       \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
 									       \
 	uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name(       \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);     \
+									       \
+	uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
 		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);

 NIX_RX_FASTPATH_MODES
diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
new file mode 100644
index 000000000..e46d8a474
--- /dev/null
+++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
@@ -0,0 +1,18 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags)                                 \
+	uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name(  \
+		void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)      \
+	{                                                                      \
+		return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts,      \
+						 (flags) |                     \
+							 NIX_RX_MULTI_SEG_F);  \
+	}
+
+NIX_RX_FASTPATH_MODES
+#undef R
diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
index 2071d0dcb..aa8c7253f 100644
--- a/drivers/net/cnxk/meson.build
+++ b/drivers/net/cnxk/meson.build
@@ -23,6 +23,7 @@  sources += files('cn9k_ethdev.c',
 		 'cn9k_rx.c',
 		 'cn9k_rx_mseg.c',
 		 'cn9k_rx_vec.c',
+		 'cn9k_rx_vec_mseg.c',
 		 'cn9k_tx.c',
 		 'cn9k_tx_mseg.c',
 		 'cn9k_tx_vec.c')
@@ -32,6 +33,7 @@  sources += files('cn10k_ethdev.c',
 		 'cn10k_rx.c',
 		 'cn10k_rx_mseg.c',
 		 'cn10k_rx_vec.c',
+		 'cn10k_rx_vec_mseg.c',
 		 'cn10k_tx.c',
 		 'cn10k_tx_mseg.c',
 		 'cn10k_tx_vec.c')