[v5,3/6] net/cnxk: enable VLAN processing in vector Tx

Message ID 20210629074424.264-3-pbhagavatula@marvell.com (mailing list archive)
State Accepted, archived
Delegated to: Jerin Jacob
Headers
Series [v5,1/6] net/cnxk: add multi seg Rx vector routine |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Pavan Nikhilesh Bhagavatula June 29, 2021, 7:44 a.m. UTC
  From: Pavan Nikhilesh <pbhagavatula@marvell.com>

Enable VLAN offload in vector Tx burst function.

Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/net/cnxk/cn10k_tx.c     |   3 +-
 drivers/net/cnxk/cn10k_tx.h     | 125 +++++++++++++++++++++++++++----
 drivers/net/cnxk/cn10k_tx_vec.c |   3 +-
 drivers/net/cnxk/cn9k_tx.c      |   3 +-
 drivers/net/cnxk/cn9k_tx.h      | 128 ++++++++++++++++++++++++++++----
 drivers/net/cnxk/cn9k_tx_vec.c  |   3 +-
 6 files changed, 227 insertions(+), 38 deletions(-)
  

Patch

diff --git a/drivers/net/cnxk/cn10k_tx.c b/drivers/net/cnxk/cn10k_tx.c
index 18694dc70..05bc163a4 100644
--- a/drivers/net/cnxk/cn10k_tx.c
+++ b/drivers/net/cnxk/cn10k_tx.c
@@ -69,8 +69,7 @@  cn10k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 
 	if (dev->scalar_ena ||
 	    (dev->tx_offload_flags &
-	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
-	      NIX_TX_OFFLOAD_TSO_F)))
+	     (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
 		pick_tx_func(eth_dev, nix_eth_tx_burst);
 	else
 		pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn10k_tx.h b/drivers/net/cnxk/cn10k_tx.h
index 8b1446f25..1e1697858 100644
--- a/drivers/net/cnxk/cn10k_tx.h
+++ b/drivers/net/cnxk/cn10k_tx.h
@@ -62,9 +62,14 @@  cn10k_nix_tx_ext_subs(const uint16_t flags)
 static __rte_always_inline uint8_t
 cn10k_nix_pkts_per_vec_brst(const uint16_t flags)
 {
-	RTE_SET_USED(flags);
-	/* We can pack up to 4 packets per LMTLINE if there are no offloads. */
-	return 4 << ROC_LMT_LINES_PER_CORE_LOG2;
+	return ((flags & NIX_TX_NEED_EXT_HDR) ? 2 : 4)
+	       << ROC_LMT_LINES_PER_CORE_LOG2;
+}
+
+static __rte_always_inline uint8_t
+cn10k_nix_tx_dwords_per_line(const uint16_t flags)
+{
+	return (flags & NIX_TX_NEED_EXT_HDR) ? 6 : 8;
 }
 
 static __rte_always_inline uint64_t
@@ -98,10 +103,9 @@  cn10k_nix_tx_steor_data(const uint16_t flags)
 static __rte_always_inline uint64_t
 cn10k_nix_tx_steor_vec_data(const uint16_t flags)
 {
-	const uint64_t dw_m1 = 0x7;
+	const uint64_t dw_m1 = cn10k_nix_tx_dwords_per_line(flags) - 1;
 	uint64_t data;
 
-	RTE_SET_USED(flags);
 	/* This will be moved to addr area */
 	data = dw_m1;
 	/* 15 vector sizes for single seg */
@@ -690,11 +694,14 @@  cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 {
 	uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
 	uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
-	uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+	uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+		cmd2[NIX_DESCS_PER_LOOP];
 	uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3, data, pa;
 	uint64x2_t senddesc01_w0, senddesc23_w0;
 	uint64x2_t senddesc01_w1, senddesc23_w1;
 	uint16_t left, scalar, burst, i, lmt_id;
+	uint64x2_t sendext01_w0, sendext23_w0;
+	uint64x2_t sendext01_w1, sendext23_w1;
 	uint64x2_t sgdesc01_w0, sgdesc23_w0;
 	uint64x2_t sgdesc01_w1, sgdesc23_w1;
 	struct cn10k_eth_txq *txq = tx_queue;
@@ -720,6 +727,14 @@  cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	sgdesc01_w0 = vld1q_dup_u64(&txq->sg_w0);
 	sgdesc23_w0 = sgdesc01_w0;
 
+	/* Load command defaults into vector variables. */
+	if (flags & NIX_TX_NEED_EXT_HDR) {
+		sendext01_w0 = vld1q_dup_u64(&txq->cmd[0]);
+		sendext23_w0 = sendext01_w0;
+		sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+		sendext23_w1 = sendext01_w1;
+	}
+
 	/* Get LMT base address and LMT ID as lcore id */
 	ROC_LMT_BASE_ID_GET(laddr, lmt_id);
 	left = pkts;
@@ -738,6 +753,13 @@  cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		senddesc23_w0 = senddesc01_w0;
 		sgdesc23_w0 = sgdesc01_w0;
 
+		/* Clear vlan enables. */
+		if (flags & NIX_TX_NEED_EXT_HDR) {
+			sendext01_w1 = vbicq_u64(sendext01_w1,
+						 vdupq_n_u64(0x3FFFF00FFFF00));
+			sendext23_w1 = sendext01_w1;
+		}
+
 		/* Move mbufs to iova */
 		mbuf0 = (uint64_t *)tx_pkts[0];
 		mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1303,6 +1325,52 @@  cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
 		senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
 
+		if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+			/* Tx ol_flag for vlan. */
+			const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+			/* Bit enable for VLAN1 */
+			const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+			/* Tx ol_flag for QnQ. */
+			const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+			/* Bit enable for VLAN0 */
+			const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+			/* Load vlan values from packet. outer is VLAN 0 */
+			uint64x2_t ext01 = {
+				((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+				((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+			};
+			uint64x2_t ext23 = {
+				((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+				((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+			};
+
+			/* Get ol_flags of the packets. */
+			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+			/* ORR vlan outer/inner values into cmd. */
+			sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+			sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+			/* Test for offload enable bits and generate masks. */
+			xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+						      mlv),
+					    vandq_u64(vtstq_u64(xtmp128, olq),
+						      mlq));
+			ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+						      mlv),
+					    vandq_u64(vtstq_u64(ytmp128, olq),
+						      mlq));
+
+			/* Set vlan enable bits into cmd based on mask. */
+			sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+			sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+		}
+
 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
 			/* Set don't free bit if reference count > 1 */
 			xmask01 = vdupq_n_u64(0);
@@ -1381,16 +1449,41 @@  cn10k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
 		cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
 
-		/* Store the prepared send desc to LMT lines */
-		vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
-		vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
-		vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
-		vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
-		vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
-		vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
-		vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
-		vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
-		lnum += 1;
+		if (flags & NIX_TX_NEED_EXT_HDR) {
+			cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+			cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+			cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+			cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+		}
+
+		if (flags & NIX_TX_NEED_EXT_HDR) {
+			/* Store the prepared send desc to LMT lines */
+			vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[0]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[0]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[1]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[1]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[1]);
+			lnum += 1;
+			vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[2]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd2[2]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd1[2]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd0[3]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd2[3]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[3]);
+			lnum += 1;
+		} else {
+			/* Store the prepared send desc to LMT lines */
+			vst1q_u64(LMT_OFF(laddr, lnum, 0), cmd0[0]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 16), cmd1[0]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 32), cmd0[1]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 48), cmd1[1]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 64), cmd0[2]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 80), cmd1[2]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 96), cmd0[3]);
+			vst1q_u64(LMT_OFF(laddr, lnum, 112), cmd1[3]);
+			lnum += 1;
+		}
 
 		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
 	}
diff --git a/drivers/net/cnxk/cn10k_tx_vec.c b/drivers/net/cnxk/cn10k_tx_vec.c
index 7453f3bc9..beb5c649b 100644
--- a/drivers/net/cnxk/cn10k_tx_vec.c
+++ b/drivers/net/cnxk/cn10k_tx_vec.c
@@ -14,8 +14,7 @@ 
 		uint64_t cmd[sz];                                              \
 									       \
 		/* VLAN, TSTMP, TSO is not supported by vec */                 \
-		if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||		       \
-		    (flags) & NIX_TX_OFFLOAD_TSTAMP_F ||		       \
+		if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F ||		       \
 		    (flags) & NIX_TX_OFFLOAD_TSO_F)			       \
 			return 0;                                              \
 		return cn10k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd,\
diff --git a/drivers/net/cnxk/cn9k_tx.c b/drivers/net/cnxk/cn9k_tx.c
index b80260607..4b43cdaff 100644
--- a/drivers/net/cnxk/cn9k_tx.c
+++ b/drivers/net/cnxk/cn9k_tx.c
@@ -68,8 +68,7 @@  cn9k_eth_set_tx_function(struct rte_eth_dev *eth_dev)
 
 	if (dev->scalar_ena ||
 	    (dev->tx_offload_flags &
-	     (NIX_TX_OFFLOAD_VLAN_QINQ_F | NIX_TX_OFFLOAD_TSTAMP_F |
-	      NIX_TX_OFFLOAD_TSO_F)))
+	     (NIX_TX_OFFLOAD_TSTAMP_F | NIX_TX_OFFLOAD_TSO_F)))
 		pick_tx_func(eth_dev, nix_eth_tx_burst);
 	else
 		pick_tx_func(eth_dev, nix_eth_tx_vec_burst);
diff --git a/drivers/net/cnxk/cn9k_tx.h b/drivers/net/cnxk/cn9k_tx.h
index 1899d6670..d5715bb52 100644
--- a/drivers/net/cnxk/cn9k_tx.h
+++ b/drivers/net/cnxk/cn9k_tx.h
@@ -552,10 +552,13 @@  cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 {
 	uint64x2_t dataoff_iova0, dataoff_iova1, dataoff_iova2, dataoff_iova3;
 	uint64x2_t len_olflags0, len_olflags1, len_olflags2, len_olflags3;
-	uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP];
+	uint64x2_t cmd0[NIX_DESCS_PER_LOOP], cmd1[NIX_DESCS_PER_LOOP],
+		cmd2[NIX_DESCS_PER_LOOP];
 	uint64_t *mbuf0, *mbuf1, *mbuf2, *mbuf3;
 	uint64x2_t senddesc01_w0, senddesc23_w0;
 	uint64x2_t senddesc01_w1, senddesc23_w1;
+	uint64x2_t sendext01_w0, sendext23_w0;
+	uint64x2_t sendext01_w1, sendext23_w1;
 	uint64x2_t sgdesc01_w0, sgdesc23_w0;
 	uint64x2_t sgdesc01_w1, sgdesc23_w1;
 	struct cn9k_eth_txq *txq = tx_queue;
@@ -585,8 +588,19 @@  cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 	senddesc23_w0 = senddesc01_w0;
 	senddesc01_w1 = vdupq_n_u64(0);
 	senddesc23_w1 = senddesc01_w1;
-	sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
-	sgdesc23_w0 = sgdesc01_w0;
+
+	/* Load command defaults into vector variables. */
+	if (flags & NIX_TX_NEED_EXT_HDR) {
+		sendext01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+		sendext23_w0 = sendext01_w0;
+		sendext01_w1 = vdupq_n_u64(12 | 12U << 24);
+		sendext23_w1 = sendext01_w1;
+		sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[4]);
+		sgdesc23_w0 = sgdesc01_w0;
+	} else {
+		sgdesc01_w0 = vld1q_dup_u64(&txq->cmd[2]);
+		sgdesc23_w0 = sgdesc01_w0;
+	}
 
 	for (i = 0; i < pkts; i += NIX_DESCS_PER_LOOP) {
 		/* Clear lower 32bit of SEND_HDR_W0 and SEND_SG_W0 */
@@ -597,6 +611,13 @@  cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		senddesc23_w0 = senddesc01_w0;
 		sgdesc23_w0 = sgdesc01_w0;
 
+		/* Clear vlan enables. */
+		if (flags & NIX_TX_NEED_EXT_HDR) {
+			sendext01_w1 = vbicq_u64(sendext01_w1,
+						 vdupq_n_u64(0x3FFFF00FFFF00));
+			sendext23_w1 = sendext01_w1;
+		}
+
 		/* Move mbufs to iova */
 		mbuf0 = (uint64_t *)tx_pkts[0];
 		mbuf1 = (uint64_t *)tx_pkts[1];
@@ -1162,6 +1183,52 @@  cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		senddesc01_w0 = vorrq_u64(senddesc01_w0, xmask01);
 		senddesc23_w0 = vorrq_u64(senddesc23_w0, xmask23);
 
+		if (flags & NIX_TX_OFFLOAD_VLAN_QINQ_F) {
+			/* Tx ol_flag for vlan. */
+			const uint64x2_t olv = {PKT_TX_VLAN, PKT_TX_VLAN};
+			/* Bit enable for VLAN1 */
+			const uint64x2_t mlv = {BIT_ULL(49), BIT_ULL(49)};
+			/* Tx ol_flag for QnQ. */
+			const uint64x2_t olq = {PKT_TX_QINQ, PKT_TX_QINQ};
+			/* Bit enable for VLAN0 */
+			const uint64x2_t mlq = {BIT_ULL(48), BIT_ULL(48)};
+			/* Load vlan values from packet. outer is VLAN 0 */
+			uint64x2_t ext01 = {
+				((uint32_t)tx_pkts[0]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[0]->vlan_tci) << 32,
+				((uint32_t)tx_pkts[1]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[1]->vlan_tci) << 32,
+			};
+			uint64x2_t ext23 = {
+				((uint32_t)tx_pkts[2]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[2]->vlan_tci) << 32,
+				((uint32_t)tx_pkts[3]->vlan_tci_outer) << 8 |
+					((uint64_t)tx_pkts[3]->vlan_tci) << 32,
+			};
+
+			/* Get ol_flags of the packets. */
+			xtmp128 = vzip1q_u64(len_olflags0, len_olflags1);
+			ytmp128 = vzip1q_u64(len_olflags2, len_olflags3);
+
+			/* ORR vlan outer/inner values into cmd. */
+			sendext01_w1 = vorrq_u64(sendext01_w1, ext01);
+			sendext23_w1 = vorrq_u64(sendext23_w1, ext23);
+
+			/* Test for offload enable bits and generate masks. */
+			xtmp128 = vorrq_u64(vandq_u64(vtstq_u64(xtmp128, olv),
+						      mlv),
+					    vandq_u64(vtstq_u64(xtmp128, olq),
+						      mlq));
+			ytmp128 = vorrq_u64(vandq_u64(vtstq_u64(ytmp128, olv),
+						      mlv),
+					    vandq_u64(vtstq_u64(ytmp128, olq),
+						      mlq));
+
+			/* Set vlan enable bits into cmd based on mask. */
+			sendext01_w1 = vorrq_u64(sendext01_w1, xtmp128);
+			sendext23_w1 = vorrq_u64(sendext23_w1, ytmp128);
+		}
+
 		if (flags & NIX_TX_OFFLOAD_MBUF_NOFF_F) {
 			/* Set don't free bit if reference count > 1 */
 			xmask01 = vdupq_n_u64(0);
@@ -1247,17 +1314,50 @@  cn9k_nix_xmit_pkts_vector(void *tx_queue, struct rte_mbuf **tx_pkts,
 		cmd1[2] = vzip1q_u64(sgdesc23_w0, sgdesc23_w1);
 		cmd1[3] = vzip2q_u64(sgdesc23_w0, sgdesc23_w1);
 
-		do {
-			vst1q_u64(lmt_addr, cmd0[0]);
-			vst1q_u64(lmt_addr + 2, cmd1[0]);
-			vst1q_u64(lmt_addr + 4, cmd0[1]);
-			vst1q_u64(lmt_addr + 6, cmd1[1]);
-			vst1q_u64(lmt_addr + 8, cmd0[2]);
-			vst1q_u64(lmt_addr + 10, cmd1[2]);
-			vst1q_u64(lmt_addr + 12, cmd0[3]);
-			vst1q_u64(lmt_addr + 14, cmd1[3]);
-			lmt_status = roc_lmt_submit_ldeor(io_addr);
-		} while (lmt_status == 0);
+		if (flags & NIX_TX_NEED_EXT_HDR) {
+			cmd2[0] = vzip1q_u64(sendext01_w0, sendext01_w1);
+			cmd2[1] = vzip2q_u64(sendext01_w0, sendext01_w1);
+			cmd2[2] = vzip1q_u64(sendext23_w0, sendext23_w1);
+			cmd2[3] = vzip2q_u64(sendext23_w0, sendext23_w1);
+		}
+
+		if (flags & NIX_TX_NEED_EXT_HDR) {
+			/* With ext header in the command we can no longer send
+			 * all 4 packets together since LMTLINE is 128bytes.
+			 * Split and Tx twice.
+			 */
+			do {
+				vst1q_u64(lmt_addr, cmd0[0]);
+				vst1q_u64(lmt_addr + 2, cmd2[0]);
+				vst1q_u64(lmt_addr + 4, cmd1[0]);
+				vst1q_u64(lmt_addr + 6, cmd0[1]);
+				vst1q_u64(lmt_addr + 8, cmd2[1]);
+				vst1q_u64(lmt_addr + 10, cmd1[1]);
+				lmt_status = roc_lmt_submit_ldeor(io_addr);
+			} while (lmt_status == 0);
+
+			do {
+				vst1q_u64(lmt_addr, cmd0[2]);
+				vst1q_u64(lmt_addr + 2, cmd2[2]);
+				vst1q_u64(lmt_addr + 4, cmd1[2]);
+				vst1q_u64(lmt_addr + 6, cmd0[3]);
+				vst1q_u64(lmt_addr + 8, cmd2[3]);
+				vst1q_u64(lmt_addr + 10, cmd1[3]);
+				lmt_status = roc_lmt_submit_ldeor(io_addr);
+			} while (lmt_status == 0);
+		} else {
+			do {
+				vst1q_u64(lmt_addr, cmd0[0]);
+				vst1q_u64(lmt_addr + 2, cmd1[0]);
+				vst1q_u64(lmt_addr + 4, cmd0[1]);
+				vst1q_u64(lmt_addr + 6, cmd1[1]);
+				vst1q_u64(lmt_addr + 8, cmd0[2]);
+				vst1q_u64(lmt_addr + 10, cmd1[2]);
+				vst1q_u64(lmt_addr + 12, cmd0[3]);
+				vst1q_u64(lmt_addr + 14, cmd1[3]);
+				lmt_status = roc_lmt_submit_ldeor(io_addr);
+			} while (lmt_status == 0);
+		}
 		tx_pkts = tx_pkts + NIX_DESCS_PER_LOOP;
 	}
 
diff --git a/drivers/net/cnxk/cn9k_tx_vec.c b/drivers/net/cnxk/cn9k_tx_vec.c
index a6e7c9e54..5842facb5 100644
--- a/drivers/net/cnxk/cn9k_tx_vec.c
+++ b/drivers/net/cnxk/cn9k_tx_vec.c
@@ -14,8 +14,7 @@ 
 		uint64_t cmd[sz];                                              \
 									       \
 		/* VLAN, TSTMP, TSO is not supported by vec */                 \
-		if ((flags) & NIX_TX_OFFLOAD_VLAN_QINQ_F ||		       \
-		    (flags) & NIX_TX_OFFLOAD_TSTAMP_F ||		       \
+		if ((flags) & NIX_TX_OFFLOAD_TSTAMP_F ||		       \
 		    (flags) & NIX_TX_OFFLOAD_TSO_F)			       \
 			return 0;                                              \
 		return cn9k_nix_xmit_pkts_vector(tx_queue, tx_pkts, pkts, cmd, \