diff mbox series

[v7,4/7] event/cnxk: add Tx adapter fastpath ops

Message ID 20210703220022.1387-4-pbhagavatula@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers show
Series [v7,1/7] event/cnxk: add Rx adapter support | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Pavan Nikhilesh Bhagavatula July 3, 2021, 10 p.m. UTC
From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Add support for event eth Tx adapter fastpath operations.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/event/cnxk/cn10k_eventdev.c           | 38 ++++++++
 drivers/event/cnxk/cn10k_worker.h             | 67 +++++++++++++
 drivers/event/cnxk/cn10k_worker_tx_enq.c      | 23 +++++
 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c  | 23 +++++
 drivers/event/cnxk/cn9k_eventdev.c            | 81 ++++++++++++++++
 drivers/event/cnxk/cn9k_worker.h              | 97 +++++++++++++++++++
 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c  | 23 +++++
 .../event/cnxk/cn9k_worker_dual_tx_enq_seg.c  | 23 +++++
 drivers/event/cnxk/cn9k_worker_tx_enq.c       | 23 +++++
 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c   | 23 +++++
 drivers/event/cnxk/cnxk_worker.h              | 27 +++---
 drivers/event/cnxk/meson.build                |  6 ++
 12 files changed, 440 insertions(+), 14 deletions(-)
 create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq.c
 create mode 100644 drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq.c
 create mode 100644 drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
diff mbox series

Patch

diff --git a/drivers/event/cnxk/cn10k_eventdev.c b/drivers/event/cnxk/cn10k_eventdev.c
index 8a9b04a3d..e462f770c 100644
--- a/drivers/event/cnxk/cn10k_eventdev.c
+++ b/drivers/event/cnxk/cn10k_eventdev.c
@@ -328,6 +328,23 @@  cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 #undef R
 		};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue
+		sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	[f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_##name,
+			NIX_TX_FASTPATH_MODES
+#undef T
+		};
+
+	const event_tx_adapter_enqueue
+		sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	[f5][f4][f3][f2][f1][f0] = cn10k_sso_hws_tx_adptr_enq_seg_##name,
+			NIX_TX_FASTPATH_MODES
+#undef T
+		};
+
 	event_dev->enqueue = cn10k_sso_hws_enq;
 	event_dev->enqueue_burst = cn10k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn10k_sso_hws_enq_new_burst;
@@ -407,6 +424,27 @@  cn10k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 				[!!(dev->rx_offloads & NIX_RX_OFFLOAD_RSS_F)];
 		}
 	}
+
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+		/* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+		event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+	} else {
+		event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+	}
+
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 }
 
 static void
diff --git a/drivers/event/cnxk/cn10k_worker.h b/drivers/event/cnxk/cn10k_worker.h
index b724083ca..3c90c8500 100644
--- a/drivers/event/cnxk/cn10k_worker.h
+++ b/drivers/event/cnxk/cn10k_worker.h
@@ -11,6 +11,7 @@ 
 
 #include "cn10k_ethdev.h"
 #include "cn10k_rx.h"
+#include "cn10k_tx.h"
 
 /* SSO Operations */
 
@@ -251,4 +252,70 @@  uint16_t __rte_hot cn10k_sso_hws_enq_fwd_burst(void *port,
 NIX_RX_FASTPATH_MODES
 #undef R
 
+static __rte_always_inline const struct cn10k_eth_txq *
+cn10k_sso_hws_xtract_meta(struct rte_mbuf *m,
+			  const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+	return (const struct cn10k_eth_txq *)
+		txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline uint16_t
+cn10k_sso_hws_event_tx(struct cn10k_sso_hws *ws, struct rte_event *ev,
+		       uint64_t *cmd,
+		       const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+		       const uint32_t flags)
+{
+	const struct cn10k_eth_txq *txq;
+	struct rte_mbuf *m = ev->mbuf;
+	uint16_t ref_cnt = m->refcnt;
+	uintptr_t lmt_addr;
+	uint16_t lmt_id;
+	uintptr_t pa;
+
+	lmt_addr = ws->lmt_base;
+	ROC_LMT_BASE_ID_GET(lmt_addr, lmt_id);
+	txq = cn10k_sso_hws_xtract_meta(m, txq_data);
+	cn10k_nix_tx_skeleton(txq, cmd, flags);
+	/* Perform header writes before barrier for TSO */
+	if (flags & NIX_TX_OFFLOAD_TSO_F)
+		cn10k_nix_xmit_prepare_tso(m, flags);
+
+	cn10k_nix_xmit_prepare(m, cmd, lmt_addr, flags, txq->lso_tun_fmt);
+	if (flags & NIX_TX_MULTI_SEG_F) {
+		const uint16_t segdw =
+			cn10k_nix_prepare_mseg(m, (uint64_t *)lmt_addr, flags);
+		pa = txq->io_addr | ((segdw - 1) << 4);
+	} else {
+		pa = txq->io_addr | (cn10k_nix_tx_ext_subs(flags) + 1) << 4;
+	}
+	if (!ev->sched_type)
+		cnxk_sso_hws_head_wait(ws->tx_base + SSOW_LF_GWS_TAG);
+
+	roc_lmt_submit_steorl(lmt_id, pa);
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+		if (ref_cnt > 1)
+			return 1;
+	}
+
+	cnxk_sso_hws_swtag_flush(ws->tx_base + SSOW_LF_GWS_TAG,
+				 ws->tx_base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+	return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name(                  \
+		void *port, struct rte_event ev[], uint16_t nb_events);        \
+	uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name(              \
+		void *port, struct rte_event ev[], uint16_t nb_events);        \
+	uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_##name(             \
+		void *port, struct rte_event ev[], uint16_t nb_events);        \
+	uint16_t __rte_hot cn10k_sso_hws_dual_tx_adptr_enq_seg_##name(         \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
 #endif
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq.c b/drivers/event/cnxk/cn10k_worker_tx_enq.c
new file mode 100644
index 000000000..f9968ac0d
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq.c
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_##name(                  \
+		void *port, struct rte_event ev[], uint16_t nb_events)         \
+	{                                                                      \
+		struct cn10k_sso_hws *ws = port;                               \
+		uint64_t cmd[sz];                                              \
+									       \
+		RTE_SET_USED(nb_events);                                       \
+		return cn10k_sso_hws_event_tx(                                 \
+			ws, &ev[0], cmd,                                       \
+			(const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
+				ws->tx_adptr_data,                             \
+			flags);                                                \
+	}
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..a24fc42e5
--- /dev/null
+++ b/drivers/event/cnxk/cn10k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn10k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn10k_sso_hws_tx_adptr_enq_seg_##name(              \
+		void *port, struct rte_event ev[], uint16_t nb_events)         \
+	{                                                                      \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
+		struct cn10k_sso_hws *ws = port;                               \
+									       \
+		RTE_SET_USED(nb_events);                                       \
+		return cn10k_sso_hws_event_tx(                                 \
+			ws, &ev[0], cmd,                                       \
+			(const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
+				ws->tx_adptr_data,                             \
+			(flags) | NIX_TX_MULTI_SEG_F);                         \
+	}
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_eventdev.c b/drivers/event/cnxk/cn9k_eventdev.c
index 21f80323d..a69edff19 100644
--- a/drivers/event/cnxk/cn9k_eventdev.c
+++ b/drivers/event/cnxk/cn9k_eventdev.c
@@ -430,6 +430,39 @@  cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 #undef R
 		};
 
+	/* Tx modes */
+	const event_tx_adapter_enqueue
+		sso_hws_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	[f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_##name,
+			NIX_TX_FASTPATH_MODES
+#undef T
+		};
+
+	const event_tx_adapter_enqueue
+		sso_hws_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	[f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_tx_adptr_enq_seg_##name,
+			NIX_TX_FASTPATH_MODES
+#undef T
+		};
+
+	const event_tx_adapter_enqueue
+		sso_hws_dual_tx_adptr_enq[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	[f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_##name,
+			NIX_TX_FASTPATH_MODES
+#undef T
+		};
+
+	const event_tx_adapter_enqueue
+		sso_hws_dual_tx_adptr_enq_seg[2][2][2][2][2][2] = {
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	[f5][f4][f3][f2][f1][f0] = cn9k_sso_hws_dual_tx_adptr_enq_seg_##name,
+			NIX_TX_FASTPATH_MODES
+#undef T
+		};
+
 	event_dev->enqueue = cn9k_sso_hws_enq;
 	event_dev->enqueue_burst = cn9k_sso_hws_enq_burst;
 	event_dev->enqueue_new_burst = cn9k_sso_hws_enq_new_burst;
@@ -510,6 +543,25 @@  cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 		}
 	}
 
+	if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+		/* [SEC] [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM] */
+		event_dev->txa_enqueue = sso_hws_tx_adptr_enq_seg
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+	} else {
+		event_dev->txa_enqueue = sso_hws_tx_adptr_enq
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+			[!!(dev->tx_offloads & NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+	}
+
 	if (dev->dual_ws) {
 		event_dev->enqueue = cn9k_sso_hws_dual_enq;
 		event_dev->enqueue_burst = cn9k_sso_hws_dual_enq_burst;
@@ -618,8 +670,37 @@  cn9k_sso_fp_fns_set(struct rte_eventdev *event_dev)
 						  NIX_RX_OFFLOAD_RSS_F)];
 			}
 		}
+
+		if (dev->tx_offloads & NIX_TX_MULTI_SEG_F) {
+			/* [TSMP] [MBUF_NOFF] [VLAN] [OL3_L4_CSUM] [L3_L4_CSUM]
+			 */
+			event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq_seg
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+		} else {
+			event_dev->txa_enqueue = sso_hws_dual_tx_adptr_enq
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSTAMP_F)]
+				[!!(dev->tx_offloads & NIX_TX_OFFLOAD_TSO_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_MBUF_NOFF_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_VLAN_QINQ_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_OL3_OL4_CSUM_F)]
+				[!!(dev->tx_offloads &
+				    NIX_TX_OFFLOAD_L3_L4_CSUM_F)];
+		}
 	}
 
+	event_dev->txa_enqueue_same_dest = event_dev->txa_enqueue;
 	rte_mb();
 }
 
diff --git a/drivers/event/cnxk/cn9k_worker.h b/drivers/event/cnxk/cn9k_worker.h
index c01c00e1d..3f9751211 100644
--- a/drivers/event/cnxk/cn9k_worker.h
+++ b/drivers/event/cnxk/cn9k_worker.h
@@ -11,6 +11,7 @@ 
 
 #include "cn9k_ethdev.h"
 #include "cn9k_rx.h"
+#include "cn9k_tx.h"
 
 /* SSO Operations */
 
@@ -416,4 +417,100 @@  NIX_RX_FASTPATH_MODES
 NIX_RX_FASTPATH_MODES
 #undef R
 
+static __rte_always_inline void
+cn9k_sso_txq_fc_wait(const struct cn9k_eth_txq *txq)
+{
+	while (!(((txq)->nb_sqb_bufs_adj - *(txq)->fc_mem)
+		 << (txq)->sqes_per_sqb_log2))
+		;
+}
+
+static __rte_always_inline const struct cn9k_eth_txq *
+cn9k_sso_hws_xtract_meta(struct rte_mbuf *m,
+			 const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT])
+{
+	return (const struct cn9k_eth_txq *)
+		txq_data[m->port][rte_event_eth_tx_adapter_txq_get(m)];
+}
+
+static __rte_always_inline void
+cn9k_sso_hws_prepare_pkt(const struct cn9k_eth_txq *txq, struct rte_mbuf *m,
+			 uint64_t *cmd, const uint32_t flags)
+{
+	roc_lmt_mov(cmd, txq->cmd, cn9k_nix_tx_ext_subs(flags));
+	cn9k_nix_xmit_prepare(m, cmd, flags, txq->lso_tun_fmt);
+}
+
+static __rte_always_inline uint16_t
+cn9k_sso_hws_event_tx(uint64_t base, struct rte_event *ev, uint64_t *cmd,
+		      const uint64_t txq_data[][RTE_MAX_QUEUES_PER_PORT],
+		      const uint32_t flags)
+{
+	struct rte_mbuf *m = ev->mbuf;
+	const struct cn9k_eth_txq *txq;
+	uint16_t ref_cnt = m->refcnt;
+
+	/* Perform header writes before barrier for TSO */
+	cn9k_nix_xmit_prepare_tso(m, flags);
+	/* Lets commit any changes in the packet here in case when
+	 * fast free is set as no further changes will be made to mbuf.
+	 * In case of fast free is not set, both cn9k_nix_prepare_mseg()
+	 * and cn9k_nix_xmit_prepare() has a barrier after refcnt update.
+	 */
+	if (!(flags & NIX_TX_OFFLOAD_MBUF_NOFF_F))
+		rte_io_wmb();
+	txq = cn9k_sso_hws_xtract_meta(m, txq_data);
+	cn9k_sso_hws_prepare_pkt(txq, m, cmd, flags);
+
+	if (flags & NIX_TX_MULTI_SEG_F) {
+		const uint16_t segdw = cn9k_nix_prepare_mseg(m, cmd, flags);
+		if (!CNXK_TT_FROM_EVENT(ev->event)) {
+			cn9k_nix_xmit_mseg_prep_lmt(cmd, txq->lmt_addr, segdw);
+			cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+			cn9k_sso_txq_fc_wait(txq);
+			if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+				cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr,
+						       txq->io_addr, segdw);
+		} else {
+			cn9k_nix_xmit_mseg_one(cmd, txq->lmt_addr, txq->io_addr,
+					       segdw);
+		}
+	} else {
+		if (!CNXK_TT_FROM_EVENT(ev->event)) {
+			cn9k_nix_xmit_prep_lmt(cmd, txq->lmt_addr, flags);
+			cnxk_sso_hws_head_wait(base + SSOW_LF_GWS_TAG);
+			cn9k_sso_txq_fc_wait(txq);
+			if (cn9k_nix_xmit_submit_lmt(txq->io_addr) == 0)
+				cn9k_nix_xmit_one(cmd, txq->lmt_addr,
+						  txq->io_addr, flags);
+		} else {
+			cn9k_nix_xmit_one(cmd, txq->lmt_addr, txq->io_addr,
+					  flags);
+		}
+	}
+
+	if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
+		if (ref_cnt > 1)
+			return 1;
+	}
+
+	cnxk_sso_hws_swtag_flush(base + SSOW_LF_GWS_TAG,
+				 base + SSOW_LF_GWS_OP_SWTAG_FLUSH);
+
+	return 1;
+}
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name(                   \
+		void *port, struct rte_event ev[], uint16_t nb_events);        \
+	uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name(               \
+		void *port, struct rte_event ev[], uint16_t nb_events);        \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name(              \
+		void *port, struct rte_event ev[], uint16_t nb_events);        \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name(          \
+		void *port, struct rte_event ev[], uint16_t nb_events);
+
+NIX_TX_FASTPATH_MODES
+#undef T
+
 #endif
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
new file mode 100644
index 000000000..92e2981f0
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq.c
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_##name(              \
+		void *port, struct rte_event ev[], uint16_t nb_events)         \
+	{                                                                      \
+		struct cn9k_sso_hws_dual *ws = port;                           \
+		uint64_t cmd[sz];                                              \
+									       \
+		RTE_SET_USED(nb_events);                                       \
+		return cn9k_sso_hws_event_tx(                                  \
+			ws->base[!ws->vws], &ev[0], cmd,                       \
+			(const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
+				ws->tx_adptr_data,                             \
+			flags);                                                \
+	}
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
new file mode 100644
index 000000000..dfb574cf9
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_dual_tx_enq_seg.c
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn9k_sso_hws_dual_tx_adptr_enq_seg_##name(          \
+		void *port, struct rte_event ev[], uint16_t nb_events)         \
+	{                                                                      \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
+		struct cn9k_sso_hws_dual *ws = port;                           \
+									       \
+		RTE_SET_USED(nb_events);                                       \
+		return cn9k_sso_hws_event_tx(                                  \
+			ws->base[!ws->vws], &ev[0], cmd,                       \
+			(const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
+				ws->tx_adptr_data,                             \
+			(flags) | NIX_TX_MULTI_SEG_F);                         \
+	}
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq.c b/drivers/event/cnxk/cn9k_worker_tx_enq.c
new file mode 100644
index 000000000..3df649c0c
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq.c
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_##name(                   \
+		void *port, struct rte_event ev[], uint16_t nb_events)         \
+	{                                                                      \
+		struct cn9k_sso_hws *ws = port;                                \
+		uint64_t cmd[sz];                                              \
+									       \
+		RTE_SET_USED(nb_events);                                       \
+		return cn9k_sso_hws_event_tx(                                  \
+			ws->base, &ev[0], cmd,                                 \
+			(const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
+				ws->tx_adptr_data,                             \
+			flags);                                                \
+	}
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
new file mode 100644
index 000000000..0efe29113
--- /dev/null
+++ b/drivers/event/cnxk/cn9k_worker_tx_enq_seg.c
@@ -0,0 +1,23 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(C) 2021 Marvell.
+ */
+
+#include "cn9k_worker.h"
+
+#define T(name, f5, f4, f3, f2, f1, f0, sz, flags)                             \
+	uint16_t __rte_hot cn9k_sso_hws_tx_adptr_enq_seg_##name(               \
+		void *port, struct rte_event ev[], uint16_t nb_events)         \
+	{                                                                      \
+		uint64_t cmd[(sz) + CNXK_NIX_TX_MSEG_SG_DWORDS - 2];           \
+		struct cn9k_sso_hws *ws = port;                                \
+									       \
+		RTE_SET_USED(nb_events);                                       \
+		return cn9k_sso_hws_event_tx(                                  \
+			ws->base, &ev[0], cmd,                                 \
+			(const uint64_t(*)[RTE_MAX_QUEUES_PER_PORT]) &         \
+				ws->tx_adptr_data,                             \
+			(flags) | NIX_TX_MULTI_SEG_F);                         \
+	}
+
+NIX_TX_FASTPATH_MODES
+#undef T
diff --git a/drivers/event/cnxk/cnxk_worker.h b/drivers/event/cnxk/cnxk_worker.h
index 4eb46ae16..7891b749d 100644
--- a/drivers/event/cnxk/cnxk_worker.h
+++ b/drivers/event/cnxk/cnxk_worker.h
@@ -79,21 +79,20 @@  static __rte_always_inline void
 cnxk_sso_hws_head_wait(uintptr_t tag_op)
 {
 #ifdef RTE_ARCH_ARM64
-	uint64_t swtp;
-
-	asm volatile(PLT_CPU_FEATURE_PREAMBLE
-		     "		ldr %[swtb], [%[swtp_loc]]	\n"
-		     "		tbz %[swtb], 35, done%=		\n"
-		     "		sevl				\n"
-		     "rty%=:	wfe				\n"
-		     "		ldr %[swtb], [%[swtp_loc]]	\n"
-		     "		tbnz %[swtb], 35, rty%=		\n"
-		     "done%=:					\n"
-		     : [swtb] "=&r"(swtp)
-		     : [swtp_loc] "r"(tag_op));
+	uint64_t tag;
+
+	asm volatile("       ldr %[tag], [%[tag_op]]         \n"
+		     "       tbnz %[tag], 35, done%=         \n"
+		     "       sevl                            \n"
+		     "rty%=: wfe                             \n"
+		     "       ldr %[tag], [%[tag_op]]         \n"
+		     "       tbz %[tag], 35, rty%=           \n"
+		     "done%=:                                \n"
+		     : [tag] "=&r"(tag)
+		     : [tag_op] "r"(tag_op));
 #else
-	/* Wait for the SWTAG/SWTAG_FULL operation */
-	while (plt_read64(tag_op) & BIT_ULL(35))
+	/* Wait for the HEAD to be set */
+	while (!(plt_read64(tag_op) & BIT_ULL(35)))
 		;
 #endif
 }
diff --git a/drivers/event/cnxk/meson.build b/drivers/event/cnxk/meson.build
index c5c1c0ee8..13e0634e8 100644
--- a/drivers/event/cnxk/meson.build
+++ b/drivers/event/cnxk/meson.build
@@ -17,11 +17,17 @@  sources = files(
         'cn9k_worker_dual_deq.c',
         'cn9k_worker_dual_deq_burst.c',
         'cn9k_worker_dual_deq_tmo.c',
+        'cn9k_worker_tx_enq.c',
+        'cn9k_worker_tx_enq_seg.c',
+        'cn9k_worker_dual_tx_enq.c',
+        'cn9k_worker_dual_tx_enq_seg.c',
         'cn10k_eventdev.c',
         'cn10k_worker.c',
         'cn10k_worker_deq.c',
         'cn10k_worker_deq_burst.c',
         'cn10k_worker_deq_tmo.c',
+        'cn10k_worker_tx_enq.c',
+        'cn10k_worker_tx_enq_seg.c',
         'cnxk_eventdev.c',
         'cnxk_eventdev_adptr.c',
         'cnxk_eventdev_selftest.c',