[09/11] examples/l3fwd: add event em main loop

Message ID 20190926100558.24348-10-pbhagavatula@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series example/l3fwd: introduce event device support |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/checkpatch warning coding style issues

Commit Message

Pavan Nikhilesh Bhagavatula Sept. 26, 2019, 10:05 a.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add em main loop for handling events based on capabilities of the
event device.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 examples/l3fwd/l3fwd.h               |  13 +++
 examples/l3fwd/l3fwd_em.c            | 151 +++++++++++++++++++++++++
 examples/l3fwd/l3fwd_em.h            | 159 +++++++++++++++++++--------
 examples/l3fwd/l3fwd_em_hlm.h        | 131 ++++++++++++++++++++++
 examples/l3fwd/l3fwd_em_sequential.h |  26 +++++
 examples/l3fwd/l3fwd_eventdev.c      |   9 ++
 examples/l3fwd/main.c                |   5 +-
 7 files changed, 447 insertions(+), 47 deletions(-)
  

Comments

Stephen Hemminger Sept. 27, 2019, 5:29 p.m. UTC | #1
On Thu, 26 Sep 2019 15:35:56 +0530
<pbhagavatula@marvell.com> wrote:

> +#define L3FWD_EM_EVENT_MODE						  \
> +EM_FP(tx_d,		0, 0, L3FWD_EVENT_TX_DIRECT | L3FWD_EVENT_SINGLE) \
> +EM_FP(tx_d_burst,	0, 1, L3FWD_EVENT_TX_DIRECT | L3FWD_EVENT_BURST)  \
> +EM_FP(tx_q,		1, 0, L3FWD_EVENT_TX_ENQ | L3FWD_EVENT_SINGLE)	  \
> +EM_FP(tx_q_burst,	1, 1, L3FWD_EVENT_TX_ENQ | L3FWD_EVENT_BURST)	  \
> +
> +#define EM_FP(_name, _f2, _f1, flags)					\
> +int									\
> +em_event_main_loop_ ## _name(__attribute__((unused)) void *dummy);
> +L3FWD_EM_EVENT_MODE
> +#undef EM_FP

Not a fan of this style of macro programming.

First off, macros should be setup not take a semi-colon at the end.
That is why checkpatch is grumbling about it.

And it is quite opaque to know the result of this template.
Why not just expand it as real code.
  
Stephen Hemminger Sept. 27, 2019, 5:30 p.m. UTC | #2
On Thu, 26 Sep 2019 15:35:56 +0530
<pbhagavatula@marvell.com> wrote:

> +static __rte_always_inline void
> +em_event_loop_single(struct l3fwd_eventdev_resources *evdev_rsrc,
> +		const uint8_t flags)

Do not use always_inline except for cases where compiler
will get it wrong.  This function should not have inline at all
compiler will be smart enough to do it.
  

Patch

diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
index 2cee544a5..ff1f14225 100644
--- a/examples/l3fwd/l3fwd.h
+++ b/examples/l3fwd/l3fwd.h
@@ -221,6 +221,19 @@  lpm_event_main_loop_ ## _name(__attribute__((unused)) void *dummy);
 L3FWD_LPM_EVENT_MODE
 #undef LPM_FP
 
+#define L3FWD_EM_EVENT_MODE						  \
+EM_FP(tx_d,		0, 0, L3FWD_EVENT_TX_DIRECT | L3FWD_EVENT_SINGLE) \
+EM_FP(tx_d_burst,	0, 1, L3FWD_EVENT_TX_DIRECT | L3FWD_EVENT_BURST)  \
+EM_FP(tx_q,		1, 0, L3FWD_EVENT_TX_ENQ | L3FWD_EVENT_SINGLE)	  \
+EM_FP(tx_q_burst,	1, 1, L3FWD_EVENT_TX_ENQ | L3FWD_EVENT_BURST)	  \
+
+#define EM_FP(_name, _f2, _f1, flags)					\
+int									\
+em_event_main_loop_ ## _name(__attribute__((unused)) void *dummy);
+L3FWD_EM_EVENT_MODE
+#undef EM_FP
+
+
 /* Return ipv4/ipv6 fwd lookup struct for LPM or EM. */
 void *
 em_get_ipv4_l3fwd_lookup_struct(const int socketid);
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 74a7c8fa4..e572d5b95 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -26,6 +26,7 @@ 
 #include <rte_hash.h>
 
 #include "l3fwd.h"
+#include "l3fwd_eventdev.h"
 
 #if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32)
 #define EM_HASH_CRC 1
@@ -699,6 +700,156 @@  em_main_loop(__attribute__((unused)) void *dummy)
 	return 0;
 }
 
+static __rte_always_inline void
+em_event_loop_single(struct l3fwd_eventdev_resources *evdev_rsrc,
+		const uint8_t flags)
+{
+	const int event_p_id = l3fwd_get_free_event_port(evdev_rsrc);
+	const uint8_t tx_q_id = evdev_rsrc->evq.event_q_id[
+		evdev_rsrc->evq.nb_queues - 1];
+	const uint8_t event_d_id = evdev_rsrc->event_d_id;
+	struct lcore_conf *lconf;
+	unsigned int lcore_id;
+	struct rte_event ev;
+
+	if (event_p_id < 0)
+		return;
+
+	lcore_id = rte_lcore_id();
+	lconf = &lcore_conf[lcore_id];
+
+	RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
+	while (!force_quit) {
+		if (!rte_event_dequeue_burst(event_d_id, event_p_id, &ev, 1, 0))
+			continue;
+
+		struct rte_mbuf *mbuf = ev.mbuf;
+
+#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON
+		mbuf->port = em_get_dst_port(lconf, mbuf, mbuf->port);
+		process_packet(mbuf, &mbuf->port);
+#else
+		l3fwd_em_simple_process(mbuf, lconf);
+#endif
+		if (mbuf->port == BAD_PORT) {
+			rte_pktmbuf_free(mbuf);
+			continue;
+		}
+
+		if (flags & L3FWD_EVENT_TX_ENQ) {
+			ev.queue_id = tx_q_id;
+			ev.op = RTE_EVENT_OP_FORWARD;
+			while (rte_event_enqueue_burst(event_d_id, event_p_id,
+						&ev, 1) && !force_quit)
+				;
+		}
+
+		if (flags & L3FWD_EVENT_TX_DIRECT) {
+			rte_event_eth_tx_adapter_txq_set(mbuf, 0);
+			while (!rte_event_eth_tx_adapter_enqueue(event_d_id,
+						event_p_id, &ev, 1) &&
+					!force_quit)
+				;
+		}
+	}
+}
+
+static __rte_always_inline void
+em_event_loop_burst(struct l3fwd_eventdev_resources *evdev_rsrc,
+		const uint8_t flags)
+{
+	const int event_p_id = l3fwd_get_free_event_port(evdev_rsrc);
+	const uint8_t tx_q_id = evdev_rsrc->evq.event_q_id[
+		evdev_rsrc->evq.nb_queues - 1];
+	const uint8_t event_d_id = evdev_rsrc->event_d_id;
+	const uint16_t deq_len = evdev_rsrc->deq_depth;
+	struct rte_event events[MAX_PKT_BURST];
+	struct lcore_conf *lconf;
+	unsigned int lcore_id;
+	int i, nb_enq, nb_deq;
+
+	if (event_p_id < 0)
+		return;
+
+	lcore_id = rte_lcore_id();
+
+	lconf = &lcore_conf[lcore_id];
+
+	RTE_LOG(INFO, L3FWD, "entering %s on lcore %u\n", __func__, lcore_id);
+
+	while (!force_quit) {
+		/* Read events from RX queues */
+		nb_deq = rte_event_dequeue_burst(event_d_id, event_p_id,
+				events, deq_len, 0);
+		if (nb_deq == 0) {
+			rte_pause();
+			continue;
+		}
+
+#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON
+		l3fwd_em_process_events(nb_deq, (struct rte_event **)&events,
+					lconf);
+#else
+		l3fwd_em_no_opt_process_events(nb_deq,
+					       (struct rte_event **)&events,
+					       lconf);
+#endif
+		for (i = 0; i < nb_deq; i++) {
+			if (flags & L3FWD_EVENT_TX_ENQ) {
+				events[i].queue_id = tx_q_id;
+				events[i].op = RTE_EVENT_OP_FORWARD;
+			}
+
+			if (flags & L3FWD_EVENT_TX_DIRECT)
+				rte_event_eth_tx_adapter_txq_set(events[i].mbuf,
+								 0);
+		}
+
+		if (flags & L3FWD_EVENT_TX_ENQ) {
+			nb_enq = rte_event_enqueue_burst(event_d_id, event_p_id,
+					events, nb_deq);
+			while (nb_enq < nb_deq && !force_quit)
+				nb_enq += rte_event_enqueue_burst(event_d_id,
+						event_p_id, events + nb_enq,
+						nb_deq - nb_enq);
+		}
+
+		if (flags & L3FWD_EVENT_TX_DIRECT) {
+			nb_enq = rte_event_eth_tx_adapter_enqueue(event_d_id,
+					event_p_id, events, nb_deq);
+			while (nb_enq < nb_deq && !force_quit)
+				nb_enq += rte_event_eth_tx_adapter_enqueue(
+						event_d_id, event_p_id,
+						events + nb_enq,
+						nb_deq - nb_enq);
+		}
+	}
+}
+
+static __rte_always_inline void
+em_event_loop(struct l3fwd_eventdev_resources *evdev_rsrc,
+		 const uint8_t flags)
+{
+	if (flags & L3FWD_EVENT_SINGLE)
+		em_event_loop_single(evdev_rsrc, flags);
+	if (flags & L3FWD_EVENT_BURST)
+		em_event_loop_burst(evdev_rsrc, flags);
+}
+
+#define EM_FP(_name, _f2, _f1, flags)					\
+int __rte_noinline							\
+em_event_main_loop_ ## _name(__attribute__((unused)) void *dummy)	\
+{									\
+	struct l3fwd_eventdev_resources *evdev_rsrc =			\
+					l3fwd_get_eventdev_rsrc();	\
+									\
+	em_event_loop(evdev_rsrc, flags);				\
+	return 0;							\
+}
+
+L3FWD_EM_EVENT_MODE
+#undef EM_FP
+
 /*
  * Initialize exact match (hash) parameters.
  */
diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
index 090c1b448..b992a21da 100644
--- a/examples/l3fwd/l3fwd_em.h
+++ b/examples/l3fwd/l3fwd_em.h
@@ -5,73 +5,92 @@ 
 #ifndef __L3FWD_EM_H__
 #define __L3FWD_EM_H__
 
-static __rte_always_inline void
-l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
-		struct lcore_conf *qconf)
+static __rte_always_inline uint16_t
+l3fwd_em_handle_ipv4(struct rte_mbuf *m, uint16_t portid,
+		     struct rte_ether_hdr *eth_hdr, struct lcore_conf *qconf)
 {
-	struct rte_ether_hdr *eth_hdr;
 	struct rte_ipv4_hdr *ipv4_hdr;
 	uint16_t dst_port;
-	uint32_t tcp_or_udp;
-	uint32_t l3_ptypes;
-
-	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
-	tcp_or_udp = m->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
-	l3_ptypes = m->packet_type & RTE_PTYPE_L3_MASK;
 
-	if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
-		/* Handle IPv4 headers.*/
-		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
-						sizeof(struct rte_ether_hdr));
+	/* Handle IPv4 headers.*/
+	ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv4_hdr *,
+			sizeof(struct rte_ether_hdr));
 
 #ifdef DO_RFC_1812_CHECKS
-		/* Check to make sure the packet is valid (RFC1812) */
-		if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
-			rte_pktmbuf_free(m);
-			return;
-		}
+	/* Check to make sure the packet is valid (RFC1812) */
+	if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
+		rte_pktmbuf_free(m);
+		return BAD_PORT;
+	}
 #endif
-		dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
-						qconf->ipv4_lookup_struct);
+	dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
+			qconf->ipv4_lookup_struct);
 
-		if (dst_port >= RTE_MAX_ETHPORTS ||
+	if (dst_port >= RTE_MAX_ETHPORTS ||
 			(enabled_port_mask & 1 << dst_port) == 0)
-			dst_port = portid;
+		dst_port = portid;
 
 #ifdef DO_RFC_1812_CHECKS
-		/* Update time to live and header checksum */
-		--(ipv4_hdr->time_to_live);
-		++(ipv4_hdr->hdr_checksum);
+	/* Update time to live and header checksum */
+	--(ipv4_hdr->time_to_live);
+	++(ipv4_hdr->hdr_checksum);
 #endif
-		/* dst addr */
-		*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+	/* dst addr */
+	*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
 
-		/* src addr */
-		rte_ether_addr_copy(&ports_eth_addr[dst_port],
-				&eth_hdr->s_addr);
+	/* src addr */
+	rte_ether_addr_copy(&ports_eth_addr[dst_port],
+			&eth_hdr->s_addr);
 
-		send_single_packet(qconf, m, dst_port);
-	} else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
-		/* Handle IPv6 headers.*/
-		struct rte_ipv6_hdr *ipv6_hdr;
+	return dst_port;
+}
 
-		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
-						sizeof(struct rte_ether_hdr));
+static __rte_always_inline uint16_t
+l3fwd_em_handle_ipv6(struct rte_mbuf *m, uint16_t portid,
+		struct rte_ether_hdr *eth_hdr, struct lcore_conf *qconf)
+{
+	/* Handle IPv6 headers.*/
+	struct rte_ipv6_hdr *ipv6_hdr;
+	uint16_t dst_port;
 
-		dst_port = em_get_ipv6_dst_port(ipv6_hdr, portid,
-					qconf->ipv6_lookup_struct);
+	ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct rte_ipv6_hdr *,
+			sizeof(struct rte_ether_hdr));
 
-		if (dst_port >= RTE_MAX_ETHPORTS ||
+	dst_port = em_get_ipv6_dst_port(ipv6_hdr, portid,
+			qconf->ipv6_lookup_struct);
+
+	if (dst_port >= RTE_MAX_ETHPORTS ||
 			(enabled_port_mask & 1 << dst_port) == 0)
-			dst_port = portid;
+		dst_port = portid;
+
+	/* dst addr */
+	*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
 
-		/* dst addr */
-		*(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+	/* src addr */
+	rte_ether_addr_copy(&ports_eth_addr[dst_port],
+			&eth_hdr->s_addr);
 
-		/* src addr */
-		rte_ether_addr_copy(&ports_eth_addr[dst_port],
-				&eth_hdr->s_addr);
+	return dst_port;
+}
 
+static __rte_always_inline void
+l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
+		struct lcore_conf *qconf)
+{
+	struct rte_ether_hdr *eth_hdr;
+	uint16_t dst_port;
+	uint32_t tcp_or_udp;
+	uint32_t l3_ptypes;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+	tcp_or_udp = m->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+	l3_ptypes = m->packet_type & RTE_PTYPE_L3_MASK;
+
+	if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
+		dst_port = l3fwd_em_handle_ipv4(m, portid, eth_hdr, qconf);
+		send_single_packet(qconf, m, dst_port);
+	} else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
+		dst_port = l3fwd_em_handle_ipv6(m, portid, eth_hdr, qconf);
 		send_single_packet(qconf, m, dst_port);
 	} else {
 		/* Free the mbuf that contains non-IPV4/IPV6 packet */
@@ -79,6 +98,25 @@  l3fwd_em_simple_forward(struct rte_mbuf *m, uint16_t portid,
 	}
 }
 
+static __rte_always_inline void
+l3fwd_em_simple_process(struct rte_mbuf *m, struct lcore_conf *qconf)
+{
+	struct rte_ether_hdr *eth_hdr;
+	uint32_t tcp_or_udp;
+	uint32_t l3_ptypes;
+
+	eth_hdr = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+	tcp_or_udp = m->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+	l3_ptypes = m->packet_type & RTE_PTYPE_L3_MASK;
+
+	if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4))
+		m->port = l3fwd_em_handle_ipv4(m, m->port, eth_hdr, qconf);
+	else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6))
+		m->port = l3fwd_em_handle_ipv6(m, m->port, eth_hdr, qconf);
+	else
+		m->port = BAD_PORT;
+}
+
 /*
  * Buffer non-optimized handling of packets, invoked
  * from main_loop.
@@ -108,4 +146,33 @@  l3fwd_em_no_opt_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 		l3fwd_em_simple_forward(pkts_burst[j], portid, qconf);
 }
 
+/*
+ * Buffer non-optimized handling of events, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_no_opt_process_events(int nb_rx, struct rte_event **events,
+			       struct lcore_conf *qconf)
+{
+	int32_t j;
+
+	/* Prefetch first packets */
+	for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+		rte_prefetch0(rte_pktmbuf_mtod(events[j]->mbuf, void *));
+
+	/*
+	 * Prefetch and forward already prefetched
+	 * packets.
+	 */
+	for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+		rte_prefetch0(rte_pktmbuf_mtod(events[
+				j + PREFETCH_OFFSET]->mbuf, void *));
+		l3fwd_em_simple_process(events[j]->mbuf, qconf);
+	}
+
+	/* Forward remaining prefetched packets */
+	for (; j < nb_rx; j++)
+		l3fwd_em_simple_process(events[j]->mbuf, qconf);
+}
+
 #endif /* __L3FWD_EM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h
index ad8b9ce87..79812716c 100644
--- a/examples/l3fwd/l3fwd_em_hlm.h
+++ b/examples/l3fwd/l3fwd_em_hlm.h
@@ -75,6 +75,60 @@  em_get_dst_port_ipv6xN(struct lcore_conf *qconf, struct rte_mbuf *m[],
 	}
 }
 
+static __rte_always_inline void
+em_get_dst_port_ipv4xN_events(struct lcore_conf *qconf, struct rte_mbuf *m[],
+			      uint16_t dst_port[])
+{
+	int i;
+	int32_t ret[EM_HASH_LOOKUP_COUNT];
+	union ipv4_5tuple_host key[EM_HASH_LOOKUP_COUNT];
+	const void *key_array[EM_HASH_LOOKUP_COUNT];
+
+	for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
+		get_ipv4_5tuple(m[i], mask0.x, &key[i]);
+		key_array[i] = &key[i];
+	}
+
+	rte_hash_lookup_bulk(qconf->ipv4_lookup_struct, &key_array[0],
+			     EM_HASH_LOOKUP_COUNT, ret);
+
+	for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
+		dst_port[i] = ((ret[i] < 0) ?
+				m[i]->port : ipv4_l3fwd_out_if[ret[i]]);
+
+		if (dst_port[i] >= RTE_MAX_ETHPORTS ||
+				(enabled_port_mask & 1 << dst_port[i]) == 0)
+			dst_port[i] = m[i]->port;
+	}
+}
+
+static __rte_always_inline void
+em_get_dst_port_ipv6xN_events(struct lcore_conf *qconf, struct rte_mbuf *m[],
+			      uint16_t dst_port[])
+{
+	int i;
+	int32_t ret[EM_HASH_LOOKUP_COUNT];
+	union ipv6_5tuple_host key[EM_HASH_LOOKUP_COUNT];
+	const void *key_array[EM_HASH_LOOKUP_COUNT];
+
+	for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
+		get_ipv6_5tuple(m[i], mask1.x, mask2.x, &key[i]);
+		key_array[i] = &key[i];
+	}
+
+	rte_hash_lookup_bulk(qconf->ipv6_lookup_struct, &key_array[0],
+			     EM_HASH_LOOKUP_COUNT, ret);
+
+	for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
+		dst_port[i] = ((ret[i] < 0) ?
+				m[i]->port : ipv6_l3fwd_out_if[ret[i]]);
+
+		if (dst_port[i] >= RTE_MAX_ETHPORTS ||
+				(enabled_port_mask & 1 << dst_port[i]) == 0)
+			dst_port[i] = m[i]->port;
+	}
+}
+
 static __rte_always_inline uint16_t
 em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
 		uint16_t portid)
@@ -187,4 +241,81 @@  l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
 
 }
+
+/*
+ * Buffer optimized handling of events, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_process_events(int nb_rx, struct rte_event **ev,
+		     struct lcore_conf *qconf)
+{
+	int32_t i, j, pos;
+	uint16_t dst_port[MAX_PKT_BURST];
+	struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+
+	/*
+	 * Send nb_rx - nb_rx % EM_HASH_LOOKUP_COUNT packets
+	 * in groups of EM_HASH_LOOKUP_COUNT.
+	 */
+	int32_t n = RTE_ALIGN_FLOOR(nb_rx, EM_HASH_LOOKUP_COUNT);
+
+	for (j = 0; j < EM_HASH_LOOKUP_COUNT && j < nb_rx; j++) {
+		pkts_burst[j] = ev[j]->mbuf;
+		rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
+					       struct rte_ether_hdr *) + 1);
+	}
+
+	for (j = 0; j < n; j += EM_HASH_LOOKUP_COUNT) {
+
+		uint32_t pkt_type = RTE_PTYPE_L3_MASK |
+				    RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP;
+		uint32_t l3_type, tcp_or_udp;
+
+		for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
+			pkt_type &= pkts_burst[j + i]->packet_type;
+
+		l3_type = pkt_type & RTE_PTYPE_L3_MASK;
+		tcp_or_udp = pkt_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+
+		for (i = 0, pos = j + EM_HASH_LOOKUP_COUNT;
+		     i < EM_HASH_LOOKUP_COUNT && pos < nb_rx; i++, pos++) {
+			rte_prefetch0(rte_pktmbuf_mtod(
+					pkts_burst[pos],
+					struct rte_ether_hdr *) + 1);
+		}
+
+		if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
+
+			em_get_dst_port_ipv4xN_events(qconf, &pkts_burst[j],
+					       &dst_port[j]);
+
+		} else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
+
+			em_get_dst_port_ipv6xN_events(qconf, &pkts_burst[j],
+					       &dst_port[j]);
+
+		} else {
+			for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++) {
+				pkts_burst[j + i]->port = em_get_dst_port(qconf,
+						pkts_burst[j + i],
+						pkts_burst[j + i]->port);
+				process_packet(pkts_burst[j + i],
+						&pkts_burst[j + i]->port);
+			}
+			continue;
+		}
+		processx4_step3(&pkts_burst[j], &dst_port[j]);
+
+		for (i = 0; i < EM_HASH_LOOKUP_COUNT; i++)
+			pkts_burst[j + i]->port = dst_port[j + i];
+
+	}
+
+	for (; j < nb_rx; j++) {
+		pkts_burst[j]->port = em_get_dst_port(qconf, pkts_burst[j],
+						      pkts_burst[j]->port);
+		process_packet(pkts_burst[j], &pkts_burst[j]->port);
+	}
+}
 #endif /* __L3FWD_EM_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h
index 23fe9dec8..b231b9994 100644
--- a/examples/l3fwd/l3fwd_em_sequential.h
+++ b/examples/l3fwd/l3fwd_em_sequential.h
@@ -95,4 +95,30 @@  l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
 
 	send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
 }
+
+/*
+ * Buffer optimized handling of events, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_process_events(int nb_rx, struct rte_event **events,
+		     struct lcore_conf *qconf)
+{
+	int32_t i, j;
+
+	rte_prefetch0(rte_pktmbuf_mtod(events[0]->mbuf,
+		      struct rte_ether_hdr *) + 1);
+
+	for (i = 1, j = 0; j < nb_rx; i++, j++) {
+		struct rte_mbuf *mbuf = events[j]->mbuf;
+
+		if (i < nb_rx) {
+			rte_prefetch0(rte_pktmbuf_mtod(
+					events[i]->mbuf,
+					struct rte_ether_hdr *) + 1);
+		}
+		mbuf->port = em_get_dst_port(qconf, mbuf, mbuf->port);
+		process_packet(mbuf, &mbuf->port);
+	}
+}
 #endif /* __L3FWD_EM_SEQUENTIAL_H__ */
diff --git a/examples/l3fwd/l3fwd_eventdev.c b/examples/l3fwd/l3fwd_eventdev.c
index 8cb12d661..047c04356 100644
--- a/examples/l3fwd/l3fwd_eventdev.c
+++ b/examples/l3fwd/l3fwd_eventdev.c
@@ -306,6 +306,12 @@  l3fwd_eventdev_resource_setup(struct rte_eth_conf *port_conf)
 		[_f2][_f1] = lpm_event_main_loop_ ## _name,
 		L3FWD_LPM_EVENT_MODE
 #undef LPM_FP
+	};
+	const event_loop_cb em_event_loop[2][2] = {
+#define EM_FP(_name, _f2, _f1, flags) \
+		[_f2][_f1] = em_event_main_loop_ ## _name,
+		L3FWD_EM_EVENT_MODE
+#undef EM_FP
 	};
 	uint16_t ethdev_count = rte_eth_dev_count_avail();
 	uint32_t event_queue_cfg;
@@ -344,4 +350,7 @@  l3fwd_eventdev_resource_setup(struct rte_eth_conf *port_conf)
 
 	evdev_rsrc->ops.lpm_event_loop = lpm_event_loop[evdev_rsrc->tx_mode_q]
 						       [evdev_rsrc->has_burst];
+
+	evdev_rsrc->ops.em_event_loop = em_event_loop[evdev_rsrc->tx_mode_q]
+						       [evdev_rsrc->has_burst];
 }
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
index dd371b945..e31afe045 100644
--- a/examples/l3fwd/main.c
+++ b/examples/l3fwd/main.c
@@ -951,7 +951,10 @@  main(int argc, char **argv)
 	/* Configure eventdev parameters if user has requested */
 	l3fwd_eventdev_resource_setup(&port_conf);
 	if (evdev_rsrc->enabled) {
-		l3fwd_lkp.main_loop = evdev_rsrc->ops.lpm_event_loop;
+		if (l3fwd_em_on)
+			l3fwd_lkp.main_loop = evdev_rsrc->ops.em_event_loop;
+		else
+			l3fwd_lkp.main_loop = evdev_rsrc->ops.lpm_event_loop;
 		goto skip_port_config;
 	}