[v5,3/5] examples/l3fwd: use lpm vector path for event vector
Checks
Commit Message
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Use lpm vector path to process event vector.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
examples/l3fwd/l3fwd_event.h | 71 ++++++++++++++++++++++++++++++++++
examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++--------
examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++
examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
5 files changed, 214 insertions(+), 16 deletions(-)
Comments
>
>Use lpm vector path to process event vector.
>
>Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
Acked-by: Shijith Thotton <sthotton@marvell.com>
>---
> examples/l3fwd/l3fwd_altivec.h | 29 ++++++++++++++
> examples/l3fwd/l3fwd_event.h | 71
>++++++++++++++++++++++++++++++++++
> examples/l3fwd/l3fwd_lpm.c | 39 +++++++++++--------
> examples/l3fwd/l3fwd_neon.h | 47 ++++++++++++++++++++++
> examples/l3fwd/l3fwd_sse.h | 44 +++++++++++++++++++++
> 5 files changed, 214 insertions(+), 16 deletions(-)
>
>diff --git a/examples/l3fwd/l3fwd_altivec.h b/examples/l3fwd/l3fwd_altivec.h
>index 87018f5dbe..e45e138e59 100644
>--- a/examples/l3fwd/l3fwd_altivec.h
>+++ b/examples/l3fwd/l3fwd_altivec.h
>@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct
>rte_mbuf **pkts_burst,
> }
> }
>
>+static __rte_always_inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ uint16_t i = 0, res;
>+
>+ while (nb_elem > 7) {
>+ __vector unsigned short dp1;
>+ __vector unsigned short dp;
>+
>+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
>+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
>+ res = vec_all_eq(dp1, dp);
>+ if (!res)
>+ return BAD_PORT;
>+
>+ nb_elem -= 8;
>+ i += 8;
>+ }
>+
>+ while (nb_elem) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ nb_elem--;
>+ i++;
>+ }
>+
>+ return dst_ports[0];
>+}
>+
> #endif /* _L3FWD_ALTIVEC_H_ */
>diff --git a/examples/l3fwd/l3fwd_event.h b/examples/l3fwd/l3fwd_event.h
>index b93841a16f..3fe38aada0 100644
>--- a/examples/l3fwd/l3fwd_event.h
>+++ b/examples/l3fwd/l3fwd_event.h
>@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
> uint64_t vector_tmo_ns;
> };
>
>+#if defined(RTE_ARCH_X86)
>+#include "l3fwd_sse.h"
>+#elif defined __ARM_NEON
>+#include "l3fwd_neon.h"
>+#elif defined(RTE_ARCH_PPC_64)
>+#include "l3fwd_altivec.h"
>+#else
>+static inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ int i;
>+
>+ for (i = 0; i < nb_elem; i++) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ }
>+
>+ return dst_ports[0];
>+}
>+#endif
>+
> static inline void
> event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf
>*mbuf)
> {
>@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec,
>uint16_t txq)
> }
> }
>
>+static inline uint16_t
>+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
>+ uint16_t nb_pkts)
>+{
>+ uint16_t *des_pos, free = 0;
>+ struct rte_mbuf **pos;
>+ int i;
>+
>+ /* Filter out and free bad packets */
>+ for (i = 0; i < nb_pkts; i++) {
>+ if (dst_port[i] == BAD_PORT) {
>+ rte_pktmbuf_free(mbufs[i]);
>+ if (!free) {
>+ pos = &mbufs[i];
>+ des_pos = &dst_port[i];
>+ }
>+ free++;
>+ continue;
>+ }
>+
>+ if (free) {
>+ *pos = mbufs[i];
>+ pos++;
>+ *des_pos = dst_port[i];
>+ des_pos++;
>+ }
>+ }
>
>+ return nb_pkts - free;
>+}
>+
>+static inline void
>+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
>+{
>+ uint16_t port, i;
>+
>+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec-
>>nb_elem);
>+ /* Verify destination array */
>+ port = process_dst_port(dst_port, vec->nb_elem);
>+ if (port == BAD_PORT) {
>+ vec->attr_valid = 0;
>+ for (i = 0; i < vec->nb_elem; i++) {
>+ vec->mbufs[i]->port = dst_port[i];
>+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
>+ }
>+ } else {
>+ vec->attr_valid = 1;
>+ vec->port = port;
>+ vec->queue = 0;
>+ }
>+}
>
> struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
> void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
>diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
>index 22d7f61a42..5172979c72 100644
>--- a/examples/l3fwd/l3fwd_lpm.c
>+++ b/examples/l3fwd/l3fwd_lpm.c
>@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void
>*dummy)
> }
>
> static __rte_always_inline void
>-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf
>*lconf)
>+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf
>*lconf,
>+ uint16_t *dst_port)
> {
> struct rte_mbuf **mbufs = vec->mbufs;
> int i;
>
>- /* Process first packet to init vector attributes */
>- lpm_process_event_pkt(lconf, mbufs[0]);
>+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined
>RTE_ARCH_PPC_64
> if (vec->attr_valid) {
>- if (mbufs[0]->port != BAD_PORT)
>- vec->port = mbufs[0]->port;
>- else
>- vec->attr_valid = 0;
>+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
>+ dst_port, lconf, 1);
>+ } else {
>+ for (i = 0; i < vec->nb_elem; i++)
>+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
>+ &dst_port[i], lconf, 1);
> }
>+#else
>+ for (i = 0; i < vec->nb_elem; i++)
>+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
>+#endif
>
>- for (i = 1; i < vec->nb_elem; i++) {
>- lpm_process_event_pkt(lconf, mbufs[i]);
>- event_vector_attr_validate(vec, mbufs[i]);
>- }
>+ process_event_vector(vec, dst_port);
> }
>
> /* Same eventdev loop for single and burst of vector */
>@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> struct rte_event events[MAX_PKT_BURST];
> int i, nb_enq = 0, nb_deq = 0;
> struct lcore_conf *lconf;
>+ uint16_t *dst_port_list;
> unsigned int lcore_id;
>
> if (event_p_id < 0)
>@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
>
> lcore_id = rte_lcore_id();
> lconf = &lcore_conf[lcore_id];
>-
>+ dst_port_list =
>+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
>+ RTE_CACHE_LINE_SIZE);
>+ if (dst_port_list == NULL)
>+ return;
> RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
>
> while (!force_quit) {
>@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
> events[i].op = RTE_EVENT_OP_FORWARD;
> }
>
>- lpm_process_event_vector(events[i].vec, lconf);
>-
>- if (flags & L3FWD_EVENT_TX_DIRECT)
>- event_vector_txq_set(events[i].vec, 0);
>+ lpm_process_event_vector(events[i].vec, lconf,
>+ dst_port_list);
> }
>
> if (flags & L3FWD_EVENT_TX_ENQ) {
>@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources
>*evt_rsrc,
>
> l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
> nb_deq, 1);
>+ rte_free(dst_port_list);
> }
>
> int __rte_noinline
>diff --git a/examples/l3fwd/l3fwd_neon.h b/examples/l3fwd/l3fwd_neon.h
>index ce515e0bc4..bf365341fb 100644
>--- a/examples/l3fwd/l3fwd_neon.h
>+++ b/examples/l3fwd/l3fwd_neon.h
>@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct
>rte_mbuf **pkts_burst,
> }
> }
>
>+static __rte_always_inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ uint16_t i = 0;
>+
>+#if defined(RTE_ARCH_ARM64)
>+ uint16_t res;
>+
>+ while (nb_elem > 7) {
>+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
>+ uint16x8_t dp1;
>+
>+ dp1 = vld1q_u16(&dst_ports[i]);
>+ dp1 = vceqq_u16(dp1, dp);
>+ res = vminvq_u16(dp1);
>+ if (!res)
>+ return BAD_PORT;
>+
>+ nb_elem -= 8;
>+ i += 8;
>+ }
>+
>+ while (nb_elem > 3) {
>+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
>+ uint16x4_t dp1;
>+
>+ dp1 = vld1_u16(&dst_ports[i]);
>+ dp1 = vceq_u16(dp1, dp);
>+ res = vminv_u16(dp1);
>+ if (!res)
>+ return BAD_PORT;
>+
>+ nb_elem -= 4;
>+ i += 4;
>+ }
>+#endif
>+
>+ while (nb_elem) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ nb_elem--;
>+ i++;
>+ }
>+
>+ return dst_ports[0];
>+}
>+
> #endif /* _L3FWD_NEON_H_ */
>diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
>index 0f0d0323a2..083729cdef 100644
>--- a/examples/l3fwd/l3fwd_sse.h
>+++ b/examples/l3fwd/l3fwd_sse.h
>@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct
>rte_mbuf **pkts_burst,
> }
> }
>
>+static __rte_always_inline uint16_t
>+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
>+{
>+ uint16_t i = 0, res;
>+
>+ while (nb_elem > 7) {
>+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
>+ __m128i dp1;
>+
>+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
>+ dp1 = _mm_cmpeq_epi16(dp1, dp);
>+ res = _mm_movemask_epi8(dp1);
>+ if (res != 0xFFFF)
>+ return BAD_PORT;
>+
>+ nb_elem -= 8;
>+ i += 8;
>+ }
>+
>+ while (nb_elem > 3) {
>+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
>+ __m128i dp1;
>+
>+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
>+ dp1 = _mm_cmpeq_epi16(dp1, dp);
>+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
>+ res = _mm_movemask_ps((__m128)dp1);
>+ if (res != 0xF)
>+ return BAD_PORT;
>+
>+ nb_elem -= 4;
>+ i += 4;
>+ }
>+
>+ while (nb_elem) {
>+ if (dst_ports[i] != dst_ports[0])
>+ return BAD_PORT;
>+ nb_elem--;
>+ i++;
>+ }
>+
>+ return dst_ports[0];
>+}
>+
> #endif /* _L3FWD_SSE_H_ */
>--
>2.25.1
@@ -222,4 +222,33 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __vector unsigned short dp1;
+ __vector unsigned short dp;
+
+ dp = (__vector unsigned short)vec_splats((short)dst_ports[0]);
+ dp1 = *((__vector unsigned short *)&dst_ports[i]);
+ res = vec_all_eq(dp1, dp);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_ALTIVEC_H_ */
@@ -82,6 +82,27 @@ struct l3fwd_event_resources {
uint64_t vector_tmo_ns;
};
+#if defined(RTE_ARCH_X86)
+#include "l3fwd_sse.h"
+#elif defined __ARM_NEON
+#include "l3fwd_neon.h"
+#elif defined(RTE_ARCH_PPC_64)
+#include "l3fwd_altivec.h"
+#else
+static inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ int i;
+
+ for (i = 0; i < nb_elem; i++) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ }
+
+ return dst_ports[0];
+}
+#endif
+
static inline void
event_vector_attr_validate(struct rte_event_vector *vec, struct rte_mbuf *mbuf)
{
@@ -103,7 +124,57 @@ event_vector_txq_set(struct rte_event_vector *vec, uint16_t txq)
}
}
+static inline uint16_t
+filter_bad_packets(struct rte_mbuf **mbufs, uint16_t *dst_port,
+ uint16_t nb_pkts)
+{
+ uint16_t *des_pos, free = 0;
+ struct rte_mbuf **pos;
+ int i;
+
+ /* Filter out and free bad packets */
+ for (i = 0; i < nb_pkts; i++) {
+ if (dst_port[i] == BAD_PORT) {
+ rte_pktmbuf_free(mbufs[i]);
+ if (!free) {
+ pos = &mbufs[i];
+ des_pos = &dst_port[i];
+ }
+ free++;
+ continue;
+ }
+
+ if (free) {
+ *pos = mbufs[i];
+ pos++;
+ *des_pos = dst_port[i];
+ des_pos++;
+ }
+ }
+ return nb_pkts - free;
+}
+
+static inline void
+process_event_vector(struct rte_event_vector *vec, uint16_t *dst_port)
+{
+ uint16_t port, i;
+
+ vec->nb_elem = filter_bad_packets(vec->mbufs, dst_port, vec->nb_elem);
+ /* Verify destination array */
+ port = process_dst_port(dst_port, vec->nb_elem);
+ if (port == BAD_PORT) {
+ vec->attr_valid = 0;
+ for (i = 0; i < vec->nb_elem; i++) {
+ vec->mbufs[i]->port = dst_port[i];
+ rte_event_eth_tx_adapter_txq_set(vec->mbufs[i], 0);
+ }
+ } else {
+ vec->attr_valid = 1;
+ vec->port = port;
+ vec->queue = 0;
+ }
+}
struct l3fwd_event_resources *l3fwd_get_eventdev_rsrc(void);
void l3fwd_event_resource_setup(struct rte_eth_conf *port_conf);
@@ -425,24 +425,27 @@ lpm_event_main_loop_tx_q_burst(__rte_unused void *dummy)
}
static __rte_always_inline void
-lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf)
+lpm_process_event_vector(struct rte_event_vector *vec, struct lcore_conf *lconf,
+ uint16_t *dst_port)
{
struct rte_mbuf **mbufs = vec->mbufs;
int i;
- /* Process first packet to init vector attributes */
- lpm_process_event_pkt(lconf, mbufs[0]);
+#if defined RTE_ARCH_X86 || defined __ARM_NEON || defined RTE_ARCH_PPC_64
if (vec->attr_valid) {
- if (mbufs[0]->port != BAD_PORT)
- vec->port = mbufs[0]->port;
- else
- vec->attr_valid = 0;
+ l3fwd_lpm_process_packets(vec->nb_elem, mbufs, vec->port,
+ dst_port, lconf, 1);
+ } else {
+ for (i = 0; i < vec->nb_elem; i++)
+ l3fwd_lpm_process_packets(1, &mbufs[i], mbufs[i]->port,
+ &dst_port[i], lconf, 1);
}
+#else
+ for (i = 0; i < vec->nb_elem; i++)
+ dst_port[i] = lpm_process_event_pkt(lconf, mbufs[i]);
+#endif
- for (i = 1; i < vec->nb_elem; i++) {
- lpm_process_event_pkt(lconf, mbufs[i]);
- event_vector_attr_validate(vec, mbufs[i]);
- }
+ process_event_vector(vec, dst_port);
}
/* Same eventdev loop for single and burst of vector */
@@ -458,6 +461,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
struct rte_event events[MAX_PKT_BURST];
int i, nb_enq = 0, nb_deq = 0;
struct lcore_conf *lconf;
+ uint16_t *dst_port_list;
unsigned int lcore_id;
if (event_p_id < 0)
@@ -465,7 +469,11 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
lcore_id = rte_lcore_id();
lconf = &lcore_conf[lcore_id];
-
+ dst_port_list =
+ rte_zmalloc("", sizeof(uint16_t) * evt_rsrc->vector_size,
+ RTE_CACHE_LINE_SIZE);
+ if (dst_port_list == NULL)
+ return;
RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
while (!force_quit) {
@@ -483,10 +491,8 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
events[i].op = RTE_EVENT_OP_FORWARD;
}
- lpm_process_event_vector(events[i].vec, lconf);
-
- if (flags & L3FWD_EVENT_TX_DIRECT)
- event_vector_txq_set(events[i].vec, 0);
+ lpm_process_event_vector(events[i].vec, lconf,
+ dst_port_list);
}
if (flags & L3FWD_EVENT_TX_ENQ) {
@@ -510,6 +516,7 @@ lpm_event_loop_vector(struct l3fwd_event_resources *evt_rsrc,
l3fwd_event_worker_cleanup(event_d_id, event_p_id, events, nb_enq,
nb_deq, 1);
+ rte_free(dst_port_list);
}
int __rte_noinline
@@ -194,4 +194,51 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0;
+
+#if defined(RTE_ARCH_ARM64)
+ uint16_t res;
+
+ while (nb_elem > 7) {
+ uint16x8_t dp = vdupq_n_u16(dst_ports[0]);
+ uint16x8_t dp1;
+
+ dp1 = vld1q_u16(&dst_ports[i]);
+ dp1 = vceqq_u16(dp1, dp);
+ res = vminvq_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ uint16x4_t dp = vdup_n_u16(dst_ports[0]);
+ uint16x4_t dp1;
+
+ dp1 = vld1_u16(&dst_ports[i]);
+ dp1 = vceq_u16(dp1, dp);
+ res = vminv_u16(dp1);
+ if (!res)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+#endif
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_NEON_H_ */
@@ -194,4 +194,48 @@ send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
}
}
+static __rte_always_inline uint16_t
+process_dst_port(uint16_t *dst_ports, uint16_t nb_elem)
+{
+ uint16_t i = 0, res;
+
+ while (nb_elem > 7) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ res = _mm_movemask_epi8(dp1);
+ if (res != 0xFFFF)
+ return BAD_PORT;
+
+ nb_elem -= 8;
+ i += 8;
+ }
+
+ while (nb_elem > 3) {
+ __m128i dp = _mm_set1_epi16(dst_ports[0]);
+ __m128i dp1;
+
+ dp1 = _mm_loadu_si128((__m128i *)&dst_ports[i]);
+ dp1 = _mm_cmpeq_epi16(dp1, dp);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ res = _mm_movemask_ps((__m128)dp1);
+ if (res != 0xF)
+ return BAD_PORT;
+
+ nb_elem -= 4;
+ i += 4;
+ }
+
+ while (nb_elem) {
+ if (dst_ports[i] != dst_ports[0])
+ return BAD_PORT;
+ nb_elem--;
+ i++;
+ }
+
+ return dst_ports[0];
+}
+
#endif /* _L3FWD_SSE_H_ */