[v5,1/6] net/cnxk: add multi seg Rx vector routine
Checks
Commit Message
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Add multi-segment Rx vector routine, form the primary mbufs using
vector path switch to scalar path when extracting segments.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
---
v5 Changes:
- Fix incorrect mbuf assignment.
v4 Changes:
- Split patches for easier merge.
- Rebase on dpdk-next-net-mrvl.
v3 Changes:
- Spell check.
drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------
drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++-------
drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++
drivers/net/cnxk/meson.build | 2 ++
7 files changed, 157 insertions(+), 44 deletions(-)
create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
--
2.17.1
Comments
On Tue, Jun 29, 2021 at 1:14 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add multi-segment Rx vector routine, form the primary mbufs using
> vector path switch to scalar path when extracting segments.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> Series-acked-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
Series applied to dpdk-next-net-mrvl/for-dpdk-main. Thanks.
> ---
> v5 Changes:
> - Fix incorrect mbuf assignment.
> v4 Changes:
> - Split patches for easier merge.
> - Rebase on dpdk-next-net-mrvl.
> v3 Changes:
> - Spell check.
>
> drivers/net/cnxk/cn10k_rx.c | 31 +++++++++++------
> drivers/net/cnxk/cn10k_rx.h | 51 +++++++++++++++++++++-------
> drivers/net/cnxk/cn10k_rx_vec_mseg.c | 17 ++++++++++
> drivers/net/cnxk/cn9k_rx.c | 31 +++++++++++------
> drivers/net/cnxk/cn9k_rx.h | 51 +++++++++++++++++++++-------
> drivers/net/cnxk/cn9k_rx_vec_mseg.c | 18 ++++++++++
> drivers/net/cnxk/meson.build | 2 ++
> 7 files changed, 157 insertions(+), 44 deletions(-)
> create mode 100644 drivers/net/cnxk/cn10k_rx_vec_mseg.c
> create mode 100644 drivers/net/cnxk/cn9k_rx_vec_mseg.c
>
> diff --git a/drivers/net/cnxk/cn10k_rx.c b/drivers/net/cnxk/cn10k_rx.c
> index 5c956c06b..3a9fd7130 100644
> --- a/drivers/net/cnxk/cn10k_rx.c
> +++ b/drivers/net/cnxk/cn10k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> + rte_atomic_thread_fence(__ATOMIC_RELEASE);
> }
>
> void
> @@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
> #undef R
> };
>
> - /* For PTP enabled, scalar rx function should be chosen as most of the
> - * PTP apps are implemented to rx burst 1 pkt.
> - */
> - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> - pick_rx_func(eth_dev, nix_eth_rx_burst);
> - else
> - pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
>
> - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + NIX_RX_FASTPATH_MODES
> +#undef R
> + };
>
> /* Copy multi seg version with no offload for tear down sequence */
> if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> dev->rx_pkt_burst_no_offload =
> nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> - rte_mb();
> +
> + /* For PTP enabled, scalar rx function should be chosen as most of the
> + * PTP apps are implemented to rx burst 1 pkt.
> + */
> + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_burst);
> + }
> +
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> }
> diff --git a/drivers/net/cnxk/cn10k_rx.h b/drivers/net/cnxk/cn10k_rx.h
> index 1cc37cbaa..5926ff7f4 100644
> --- a/drivers/net/cnxk/cn10k_rx.h
> +++ b/drivers/net/cnxk/cn10k_rx.h
> @@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
> sg = *(const uint64_t *)(rx + 1);
> nb_segs = (sg >> 48) & 0x3;
> - mbuf->nb_segs = nb_segs;
> +
> + if (nb_segs == 1) {
> + mbuf->next = NULL;
> + return;
> + }
> +
> + mbuf->pkt_len = rx->pkt_lenm1 + 1;
> mbuf->data_len = sg & 0xFFFF;
> + mbuf->nb_segs = nb_segs;
> sg = sg >> 16;
>
> eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
> @@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
> ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
>
> mbuf->ol_flags = ol_flags;
> - *(uint64_t *)(&mbuf->rearm_data) = val;
> mbuf->pkt_len = len;
> + mbuf->data_len = len;
> + *(uint64_t *)(&mbuf->rearm_data) = val;
>
> - if (flag & NIX_RX_MULTI_SEG_F) {
> + if (flag & NIX_RX_MULTI_SEG_F)
> nix_cqe_xtract_mseg(rx, mbuf, val);
> - } else {
> - mbuf->data_len = len;
> + else
> mbuf->next = NULL;
> - }
> }
>
> static inline uint16_t
> @@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
> vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> - /* Update that no more segments */
> - mbuf0->next = NULL;
> - mbuf1->next = NULL;
> - mbuf2->next = NULL;
> - mbuf3->next = NULL;
> -
> /* Store the mbufs to rx_pkts */
> vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
> vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> + if (flags & NIX_RX_MULTI_SEG_F) {
> + /* Multi segment is enable build mseg list for
> + * individual mbufs in scalar mode.
> + */
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(0) + 8), mbuf0,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(1) + 8), mbuf1,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(2) + 8), mbuf2,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(3) + 8), mbuf3,
> + mbuf_initializer);
> + } else {
> + /* Update that no more segments */
> + mbuf0->next = NULL;
> + mbuf1->next = NULL;
> + mbuf2->next = NULL;
> + mbuf3->next = NULL;
> + }
> +
> /* Prefetch mbufs */
> roc_prefetch_store_keep(mbuf0);
> roc_prefetch_store_keep(mbuf1);
> @@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
> NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn10k_rx_vec_mseg.c b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..04d1e46c8
> --- /dev/null
> +++ b/drivers/net/cnxk/cn10k_rx_vec_mseg.c
> @@ -0,0 +1,17 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn10k_ethdev.h"
> +#include "cn10k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
> + { \
> + return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
> + (flags) | NIX_RX_MULTI_SEG_F); \
> + }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/cn9k_rx.c b/drivers/net/cnxk/cn9k_rx.c
> index 0acedd0a1..d293d4eac 100644
> --- a/drivers/net/cnxk/cn9k_rx.c
> +++ b/drivers/net/cnxk/cn9k_rx.c
> @@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
> [!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
> +
> + rte_atomic_thread_fence(__ATOMIC_RELEASE);
> }
>
> void
> @@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
> #undef R
> };
>
> - /* For PTP enabled, scalar rx function should be chosen as most of the
> - * PTP apps are implemented to rx burst 1 pkt.
> - */
> - if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
> - pick_rx_func(eth_dev, nix_eth_rx_burst);
> - else
> - pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> + const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
>
> - if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> - pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + NIX_RX_FASTPATH_MODES
> +#undef R
> + };
>
> /* Copy multi seg version with no offload for tear down sequence */
> if (rte_eal_process_type() == RTE_PROC_PRIMARY)
> dev->rx_pkt_burst_no_offload =
> nix_eth_rx_burst_mseg[0][0][0][0][0][0];
> - rte_mb();
> +
> + /* For PTP enabled, scalar rx function should be chosen as most of the
> + * PTP apps are implemented to rx burst 1 pkt.
> + */
> + if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_burst);
> + }
> +
> + if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
> + return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
> }
> diff --git a/drivers/net/cnxk/cn9k_rx.h b/drivers/net/cnxk/cn9k_rx.h
> index 10ef5c690..5ae9e8195 100644
> --- a/drivers/net/cnxk/cn9k_rx.h
> +++ b/drivers/net/cnxk/cn9k_rx.h
> @@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
>
> sg = *(const uint64_t *)(rx + 1);
> nb_segs = (sg >> 48) & 0x3;
> - mbuf->nb_segs = nb_segs;
> +
> + if (nb_segs == 1) {
> + mbuf->next = NULL;
> + return;
> + }
> +
> + mbuf->pkt_len = rx->pkt_lenm1 + 1;
> mbuf->data_len = sg & 0xFFFF;
> + mbuf->nb_segs = nb_segs;
> sg = sg >> 16;
>
> eol = ((const rte_iova_t *)(rx + 1) +
> @@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
> nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
>
> mbuf->ol_flags = ol_flags;
> - *(uint64_t *)(&mbuf->rearm_data) = val;
> mbuf->pkt_len = len;
> + mbuf->data_len = len;
> + *(uint64_t *)(&mbuf->rearm_data) = val;
>
> - if (flag & NIX_RX_MULTI_SEG_F) {
> + if (flag & NIX_RX_MULTI_SEG_F)
> nix_cqe_xtract_mseg(rx, mbuf, val);
> - } else {
> - mbuf->data_len = len;
> + else
> mbuf->next = NULL;
> - }
> }
>
> static inline uint16_t
> @@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
> vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
> vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
>
> - /* Update that no more segments */
> - mbuf0->next = NULL;
> - mbuf1->next = NULL;
> - mbuf2->next = NULL;
> - mbuf3->next = NULL;
> -
> /* Store the mbufs to rx_pkts */
> vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
> vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
>
> + if (flags & NIX_RX_MULTI_SEG_F) {
> + /* Multi segment is enable build mseg list for
> + * individual mbufs in scalar mode.
> + */
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(0) + 8), mbuf0,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(1) + 8), mbuf1,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(2) + 8), mbuf2,
> + mbuf_initializer);
> + nix_cqe_xtract_mseg((union nix_rx_parse_u *)
> + (cq0 + CQE_SZ(3) + 8), mbuf3,
> + mbuf_initializer);
> + } else {
> + /* Update that no more segments */
> + mbuf0->next = NULL;
> + mbuf1->next = NULL;
> + mbuf2->next = NULL;
> + mbuf3->next = NULL;
> + }
> +
> /* Prefetch mbufs */
> roc_prefetch_store_keep(mbuf0);
> roc_prefetch_store_keep(mbuf1);
> @@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> \
> uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
> + \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
> void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
>
> NIX_RX_FASTPATH_MODES
> diff --git a/drivers/net/cnxk/cn9k_rx_vec_mseg.c b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> new file mode 100644
> index 000000000..e46d8a474
> --- /dev/null
> +++ b/drivers/net/cnxk/cn9k_rx_vec_mseg.c
> @@ -0,0 +1,18 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(C) 2021 Marvell.
> + */
> +
> +#include "cn9k_ethdev.h"
> +#include "cn9k_rx.h"
> +
> +#define R(name, f5, f4, f3, f2, f1, f0, flags) \
> + uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
> + void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
> + { \
> + return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
> + (flags) | \
> + NIX_RX_MULTI_SEG_F); \
> + }
> +
> +NIX_RX_FASTPATH_MODES
> +#undef R
> diff --git a/drivers/net/cnxk/meson.build b/drivers/net/cnxk/meson.build
> index 2071d0dcb..aa8c7253f 100644
> --- a/drivers/net/cnxk/meson.build
> +++ b/drivers/net/cnxk/meson.build
> @@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
> 'cn9k_rx.c',
> 'cn9k_rx_mseg.c',
> 'cn9k_rx_vec.c',
> + 'cn9k_rx_vec_mseg.c',
> 'cn9k_tx.c',
> 'cn9k_tx_mseg.c',
> 'cn9k_tx_vec.c')
> @@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
> 'cn10k_rx.c',
> 'cn10k_rx_mseg.c',
> 'cn10k_rx_vec.c',
> + 'cn10k_rx_vec_mseg.c',
> 'cn10k_tx.c',
> 'cn10k_tx_mseg.c',
> 'cn10k_tx_vec.c')
> --
> 2.17.1
>
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn10k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn10k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
@@ -119,8 +119,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
@@ -195,15 +202,14 @@ cn10k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -481,16 +487,34 @@ cn10k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -645,6 +669,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
new file mode 100644
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_ethdev.h"
+#include "cn10k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn10k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn10k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
@@ -29,6 +29,8 @@ pick_rx_func(struct rte_eth_dev *eth_dev,
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_CHECKSUM_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_PTYPE_F)]
[!!(dev->rx_offload_flags & NIX_RX_OFFLOAD_RSS_F)];
+
+ rte_atomic_thread_fence(__ATOMIC_RELEASE);
}
void
@@ -60,20 +62,29 @@ cn9k_eth_set_rx_function(struct rte_eth_dev *eth_dev)
#undef R
};
- /* For PTP enabled, scalar rx function should be chosen as most of the
- * PTP apps are implemented to rx burst 1 pkt.
- */
- if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP)
- pick_rx_func(eth_dev, nix_eth_rx_burst);
- else
- pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
+ const eth_rx_burst_t nix_eth_rx_vec_burst_mseg[2][2][2][2][2][2] = {
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ [f5][f4][f3][f2][f1][f0] = cn9k_nix_recv_pkts_vec_mseg_##name,
- if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
- pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ NIX_RX_FASTPATH_MODES
+#undef R
+ };
/* Copy multi seg version with no offload for tear down sequence */
if (rte_eal_process_type() == RTE_PROC_PRIMARY)
dev->rx_pkt_burst_no_offload =
nix_eth_rx_burst_mseg[0][0][0][0][0][0];
- rte_mb();
+
+ /* For PTP enabled, scalar rx function should be chosen as most of the
+ * PTP apps are implemented to rx burst 1 pkt.
+ */
+ if (dev->scalar_ena || dev->rx_offloads & DEV_RX_OFFLOAD_TIMESTAMP) {
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_burst);
+ }
+
+ if (dev->rx_offloads & DEV_RX_OFFLOAD_SCATTER)
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst_mseg);
+ return pick_rx_func(eth_dev, nix_eth_rx_vec_burst);
}
@@ -120,8 +120,15 @@ nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf,
sg = *(const uint64_t *)(rx + 1);
nb_segs = (sg >> 48) & 0x3;
- mbuf->nb_segs = nb_segs;
+
+ if (nb_segs == 1) {
+ mbuf->next = NULL;
+ return;
+ }
+
+ mbuf->pkt_len = rx->pkt_lenm1 + 1;
mbuf->data_len = sg & 0xFFFF;
+ mbuf->nb_segs = nb_segs;
sg = sg >> 16;
eol = ((const rte_iova_t *)(rx + 1) +
@@ -198,15 +205,14 @@ cn9k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag,
nix_update_match_id(rx->cn9k.match_id, ol_flags, mbuf);
mbuf->ol_flags = ol_flags;
- *(uint64_t *)(&mbuf->rearm_data) = val;
mbuf->pkt_len = len;
+ mbuf->data_len = len;
+ *(uint64_t *)(&mbuf->rearm_data) = val;
- if (flag & NIX_RX_MULTI_SEG_F) {
+ if (flag & NIX_RX_MULTI_SEG_F)
nix_cqe_xtract_mseg(rx, mbuf, val);
- } else {
- mbuf->data_len = len;
+ else
mbuf->next = NULL;
- }
}
static inline uint16_t
@@ -484,16 +490,34 @@ cn9k_nix_recv_pkts_vector(void *rx_queue, struct rte_mbuf **rx_pkts,
vst1q_u64((uint64_t *)mbuf2->rearm_data, rearm2);
vst1q_u64((uint64_t *)mbuf3->rearm_data, rearm3);
- /* Update that no more segments */
- mbuf0->next = NULL;
- mbuf1->next = NULL;
- mbuf2->next = NULL;
- mbuf3->next = NULL;
-
/* Store the mbufs to rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[packets], mbuf01);
vst1q_u64((uint64_t *)&rx_pkts[packets + 2], mbuf23);
+ if (flags & NIX_RX_MULTI_SEG_F) {
+ /* Multi segment is enable build mseg list for
+ * individual mbufs in scalar mode.
+ */
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(0) + 8), mbuf0,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(1) + 8), mbuf1,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(2) + 8), mbuf2,
+ mbuf_initializer);
+ nix_cqe_xtract_mseg((union nix_rx_parse_u *)
+ (cq0 + CQE_SZ(3) + 8), mbuf3,
+ mbuf_initializer);
+ } else {
+ /* Update that no more segments */
+ mbuf0->next = NULL;
+ mbuf1->next = NULL;
+ mbuf2->next = NULL;
+ mbuf3->next = NULL;
+ }
+
/* Prefetch mbufs */
roc_prefetch_store_keep(mbuf0);
roc_prefetch_store_keep(mbuf1);
@@ -647,6 +671,9 @@ R(vlan_ts_mark_cksum_ptype_rss, 1, 1, 1, 1, 1, 1, \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
\
uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts); \
+ \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts);
NIX_RX_FASTPATH_MODES
new file mode 100644
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_ethdev.h"
+#include "cn9k_rx.h"
+
+#define R(name, f5, f4, f3, f2, f1, f0, flags) \
+ uint16_t __rte_noinline __rte_hot cn9k_nix_recv_pkts_vec_mseg_##name( \
+ void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts) \
+ { \
+ return cn9k_nix_recv_pkts_vector(rx_queue, rx_pkts, pkts, \
+ (flags) | \
+ NIX_RX_MULTI_SEG_F); \
+ }
+
+NIX_RX_FASTPATH_MODES
+#undef R
@@ -23,6 +23,7 @@ sources += files('cn9k_ethdev.c',
'cn9k_rx.c',
'cn9k_rx_mseg.c',
'cn9k_rx_vec.c',
+ 'cn9k_rx_vec_mseg.c',
'cn9k_tx.c',
'cn9k_tx_mseg.c',
'cn9k_tx_vec.c')
@@ -32,6 +33,7 @@ sources += files('cn10k_ethdev.c',
'cn10k_rx.c',
'cn10k_rx_mseg.c',
'cn10k_rx_vec.c',
+ 'cn10k_rx_vec_mseg.c',
'cn10k_tx.c',
'cn10k_tx_mseg.c',
'cn10k_tx_vec.c')