[2/2] event/cnxk: add Rx event vector fastpath

Message ID 20210524130617.1621-2-pbhagavatula@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series [1/2] event/cnxk: add Rx adapter vector support |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation warning apply issues

Commit Message

Pavan Nikhilesh Bhagavatula May 24, 2021, 1:06 p.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add Rx event vector fastpath to convert HW defined metadata into
rte_mbuf and rte_event_vector.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_worker.h | 50 +++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

--
2.17.1
  

Comments

Jerin Jacob June 13, 2021, 1:14 p.m. UTC | #1
On Mon, May 24, 2021 at 6:36 PM <pbhagavatula@marvell.com> wrote:
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Add Rx event vector fastpath to convert HW defined metadata into
> rte_mbuf and rte_event_vector.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
>  drivers/event/cnxk/cn10k_worker.h | 50 +++++++++++++++++++++++++++++++
>  1 file changed, 50 insertions(+)
>
> diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
> index b3f71202ad..8c2cd72873 100644
> --- a/drivers/event/cnxk/cn10k_worker.h
> +++ b/drivers/event/cnxk/cn10k_worker.h
> @@ -5,6 +5,8 @@
>  #ifndef __CN10K_WORKER_H__
>  #define __CN10K_WORKER_H__
>
> +#include <rte_vect.h>
> +
>  #include "cnxk_ethdev.h"
>  #include "cnxk_eventdev.h"
>  #include "cnxk_worker.h"
> @@ -101,6 +103,44 @@ cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
>                               mbuf_init.value, flags);
>  }
>
> +static __rte_always_inline void
> +cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
> +                  void *lookup_mem)
> +{
> +       union mbuf_initializer mbuf_init = {
> +               .fields = {.data_off = RTE_PKTMBUF_HEADROOM,
> +                          .refcnt = 1,
> +                          .nb_segs = 1,
> +                          .port = port_id},
> +       };


If there is performance improvement move mbuf_init population to slow
path and store it
in HWS software structure or so.

> +       struct rte_event_vector *vec;
> +       uint16_t nb_mbufs, non_vec;
> +       uint64_t **wqe;
> +
> +       vec = (struct rte_event_vector *)vwqe;
> +       wqe = vec->u64s;
> +
> +       nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
> +       nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init.value, vec->mbufs,
> +                                             nb_mbufs, flags | NIX_RX_VWQE_F,
> +                                             lookup_mem);
> +       wqe += nb_mbufs;
> +       non_vec = vec->nb_elem - nb_mbufs;
> +
> +       while (non_vec) {
> +               struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
> +               struct rte_mbuf *mbuf;
> +
> +               mbuf = (struct rte_mbuf *)((char *)cqe -
> +                                          sizeof(struct rte_mbuf));
> +               cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
> +                                     mbuf_init.value, flags);
> +               wqe[0] = (uint64_t *)mbuf;
> +               non_vec--;
> +               wqe++;
> +       }
> +}
> +
>  static __rte_always_inline uint16_t
>  cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
>                        const uint32_t flags, void *lookup_mem)
> @@ -141,6 +181,16 @@ cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
>                                           gw.u64[0] & 0xFFFFF, flags,
>                                           lookup_mem);
>                         gw.u64[1] = mbuf;
> +               } else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
> +                          RTE_EVENT_TYPE_ETHDEV_VECTOR) {
> +                       uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
> +                       __uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
> +
> +                       vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
> +                                  ((vwqe_hdr & 0xFFFF) << 48) |
> +                                  ((uint64_t)port << 32);
> +                       *(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
> +                       cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem);
>                 }
>         }
>
> --
> 2.17.1
>
  

Patch

diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b3f71202ad..8c2cd72873 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -5,6 +5,8 @@ 
 #ifndef __CN10K_WORKER_H__
 #define __CN10K_WORKER_H__

+#include <rte_vect.h>
+
 #include "cnxk_ethdev.h"
 #include "cnxk_eventdev.h"
 #include "cnxk_worker.h"
@@ -101,6 +103,44 @@  cn10k_wqe_to_mbuf(uint64_t wqe, const uint64_t mbuf, uint8_t port_id,
 			      mbuf_init.value, flags);
 }

+static __rte_always_inline void
+cn10k_process_vwqe(uintptr_t vwqe, uint16_t port_id, const uint32_t flags,
+		   void *lookup_mem)
+{
+	union mbuf_initializer mbuf_init = {
+		.fields = {.data_off = RTE_PKTMBUF_HEADROOM,
+			   .refcnt = 1,
+			   .nb_segs = 1,
+			   .port = port_id},
+	};
+	struct rte_event_vector *vec;
+	uint16_t nb_mbufs, non_vec;
+	uint64_t **wqe;
+
+	vec = (struct rte_event_vector *)vwqe;
+	wqe = vec->u64s;
+
+	nb_mbufs = RTE_ALIGN_FLOOR(vec->nb_elem, NIX_DESCS_PER_LOOP);
+	nb_mbufs = cn10k_nix_recv_pkts_vector(&mbuf_init.value, vec->mbufs,
+					      nb_mbufs, flags | NIX_RX_VWQE_F,
+					      lookup_mem);
+	wqe += nb_mbufs;
+	non_vec = vec->nb_elem - nb_mbufs;
+
+	while (non_vec) {
+		struct nix_cqe_hdr_s *cqe = (struct nix_cqe_hdr_s *)wqe[0];
+		struct rte_mbuf *mbuf;
+
+		mbuf = (struct rte_mbuf *)((char *)cqe -
+					   sizeof(struct rte_mbuf));
+		cn10k_nix_cqe_to_mbuf(cqe, cqe->tag, mbuf, lookup_mem,
+				      mbuf_init.value, flags);
+		wqe[0] = (uint64_t *)mbuf;
+		non_vec--;
+		wqe++;
+	}
+}
+
 static __rte_always_inline uint16_t
 cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 		       const uint32_t flags, void *lookup_mem)
@@ -141,6 +181,16 @@  cn10k_sso_hws_get_work(struct cn10k_sso_hws *ws, struct rte_event *ev,
 					  gw.u64[0] & 0xFFFFF, flags,
 					  lookup_mem);
 			gw.u64[1] = mbuf;
+		} else if (CNXK_EVENT_TYPE_FROM_TAG(gw.u64[0]) ==
+			   RTE_EVENT_TYPE_ETHDEV_VECTOR) {
+			uint8_t port = CNXK_SUB_EVENT_FROM_TAG(gw.u64[0]);
+			__uint128_t vwqe_hdr = *(__uint128_t *)gw.u64[1];
+
+			vwqe_hdr = ((vwqe_hdr >> 64) & 0xFFF) | BIT_ULL(31) |
+				   ((vwqe_hdr & 0xFFFF) << 48) |
+				   ((uint64_t)port << 32);
+			*(uint64_t *)gw.u64[1] = (uint64_t)vwqe_hdr;
+			cn10k_process_vwqe(gw.u64[1], port, flags, lookup_mem);
 		}
 	}