[21/33] net/cnxk: support Rx burst scalar for cn20k

Message ID 20240910085909.1514457-22-ndabilpuram@marvell.com (mailing list archive)
State Changes Requested
Delegated to: Jerin Jacob
Headers
Series add Marvell cn20k SOC support for mempool and net |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Nithin Dabilpuram Sept. 10, 2024, 8:58 a.m. UTC
Add Rx burst support scalar version for cn20k.

Signed-off-by: Nithin Dabilpuram <ndabilpuram@marvell.com>
Signed-off-by: Jerin Jacob <jerinj@marvell.com>
Signed-off-by: Rahul Bhansali <rbhansali@marvell.com>
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
 drivers/net/cnxk/cn20k_ethdev.c    | 126 +++++++++
 drivers/net/cnxk/cn20k_rx.h        | 394 ++++++++++++++++++++++++++++-
 drivers/net/cnxk/cn20k_rx_select.c |   6 +-
 drivers/net/cnxk/cn20k_rxtx.h      | 156 ++++++++++++
 4 files changed, 674 insertions(+), 8 deletions(-)
  

Patch

diff --git a/drivers/net/cnxk/cn20k_ethdev.c b/drivers/net/cnxk/cn20k_ethdev.c
index 4b2f04ba31..cad7b1316a 100644
--- a/drivers/net/cnxk/cn20k_ethdev.c
+++ b/drivers/net/cnxk/cn20k_ethdev.c
@@ -330,6 +330,33 @@  cn20k_nix_rx_queue_setup(struct rte_eth_dev *eth_dev, uint16_t qid, uint16_t nb_
 	return 0;
 }
 
+static void
+cn20k_nix_rx_queue_meta_aura_update(struct rte_eth_dev *eth_dev)
+{
+	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	struct cn20k_eth_rxq *rxq;
+	struct roc_nix_rq *rq;
+	int i;
+
+	/* Update Aura handle for fastpath rx queues */
+	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+		rq = &dev->rqs[i];
+		rxq = eth_dev->data->rx_queues[i];
+		rxq->meta_aura = rq->meta_aura_handle;
+		rxq->meta_pool = dev->nix.meta_mempool;
+		/* Assume meta packet from normal aura if meta aura is not setup
+		 */
+		if (!rxq->meta_aura) {
+			rxq_sp = cnxk_eth_rxq_to_sp(rxq);
+			rxq->meta_aura = rxq_sp->qconf.mp->pool_id;
+			rxq->meta_pool = (uintptr_t)rxq_sp->qconf.mp;
+		}
+	}
+	/* Store mempool in lookup mem */
+	cnxk_nix_lookup_mem_metapool_set(dev);
+}
+
 static int
 cn20k_nix_tx_queue_stop(struct rte_eth_dev *eth_dev, uint16_t qidx)
 {
@@ -371,6 +398,74 @@  cn20k_nix_configure(struct rte_eth_dev *eth_dev)
 	return 0;
 }
 
+/* Function to enable ptp config for VFs */
+static void
+nix_ptp_enable_vf(struct rte_eth_dev *eth_dev)
+{
+	struct cnxk_eth_dev *dev = cnxk_eth_pmd_priv(eth_dev);
+
+	if (nix_recalc_mtu(eth_dev))
+		plt_err("Failed to set MTU size for ptp");
+
+	dev->rx_offload_flags |= NIX_RX_OFFLOAD_TSTAMP_F;
+
+	/* Setting up the function pointers as per new offload flags */
+	cn20k_eth_set_rx_function(eth_dev);
+	cn20k_eth_set_tx_function(eth_dev);
+}
+
+static uint16_t
+nix_ptp_vf_burst(void *queue, struct rte_mbuf **mbufs, uint16_t pkts)
+{
+	struct cn20k_eth_rxq *rxq = queue;
+	struct cnxk_eth_rxq_sp *rxq_sp;
+	struct rte_eth_dev *eth_dev;
+
+	RTE_SET_USED(mbufs);
+	RTE_SET_USED(pkts);
+
+	rxq_sp = cnxk_eth_rxq_to_sp(rxq);
+	eth_dev = rxq_sp->dev->eth_dev;
+	nix_ptp_enable_vf(eth_dev);
+
+	return 0;
+}
+
+static int
+cn20k_nix_ptp_info_update_cb(struct roc_nix *nix, bool ptp_en)
+{
+	struct cnxk_eth_dev *dev = (struct cnxk_eth_dev *)nix;
+	struct rte_eth_dev *eth_dev;
+	struct cn20k_eth_rxq *rxq;
+	int i;
+
+	if (!dev)
+		return -EINVAL;
+
+	eth_dev = dev->eth_dev;
+	if (!eth_dev)
+		return -EINVAL;
+
+	dev->ptp_en = ptp_en;
+
+	for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+		rxq = eth_dev->data->rx_queues[i];
+		rxq->mbuf_initializer = cnxk_nix_rxq_mbuf_setup(dev);
+	}
+
+	if (roc_nix_is_vf_or_sdp(nix) && !(roc_nix_is_sdp(nix)) && !(roc_nix_is_lbk(nix))) {
+		/* In case of VF, setting of MTU cannot be done directly in this
+		 * function as this is running as part of MBOX request(PF->VF)
+		 * and MTU setting also requires MBOX message to be
+		 * sent(VF->PF)
+		 */
+		eth_dev->rx_pkt_burst = nix_ptp_vf_burst;
+		rte_mb();
+	}
+
+	return 0;
+}
+
 static int
 cn20k_nix_timesync_enable(struct rte_eth_dev *eth_dev)
 {
@@ -451,11 +546,21 @@  cn20k_nix_dev_start(struct rte_eth_dev *eth_dev)
 	if (rc)
 		return rc;
 
+	/* Update VF about data off shifted by 8 bytes if PTP already
+	 * enabled in PF owning this VF
+	 */
+	if (dev->ptp_en && (!roc_nix_is_pf(nix) && (!roc_nix_is_sdp(nix))))
+		nix_ptp_enable_vf(eth_dev);
+
 	/* Setting up the rx[tx]_offload_flags due to change
 	 * in rx[tx]_offloads.
 	 */
 	dev->rx_offload_flags |= nix_rx_offload_flags(eth_dev);
 	dev->tx_offload_flags |= nix_tx_offload_flags(eth_dev);
+
+	if (dev->rx_offload_flags & NIX_RX_OFFLOAD_SECURITY_F)
+		cn20k_nix_rx_queue_meta_aura_update(eth_dev);
+
 	/* Set flags for Rx Inject feature */
 	if (roc_idev_nix_rx_inject_get(nix->port_id))
 		dev->rx_offload_flags |= NIX_RX_SEC_REASSEMBLY_F;
@@ -621,6 +726,20 @@  nix_tm_ops_override(void)
 	if (init_once)
 		return;
 	init_once = 1;
+
+	/* Update platform specific ops */
+}
+
+static void
+npc_flow_ops_override(void)
+{
+	static int init_once;
+
+	if (init_once)
+		return;
+	init_once = 1;
+
+	/* Update platform specific ops */
 }
 
 static int
@@ -633,6 +752,7 @@  static int
 cn20k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 {
 	struct rte_eth_dev *eth_dev;
+	struct cnxk_eth_dev *dev;
 	int rc;
 
 	rc = roc_plt_init();
@@ -643,6 +763,7 @@  cn20k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 
 	nix_eth_dev_ops_override();
 	nix_tm_ops_override();
+	npc_flow_ops_override();
 
 	/* Common probe */
 	rc = cnxk_nix_probe(pci_drv, pci_dev);
@@ -665,6 +786,11 @@  cn20k_nix_probe(struct rte_pci_driver *pci_drv, struct rte_pci_device *pci_dev)
 		return 0;
 	}
 
+	dev = cnxk_eth_pmd_priv(eth_dev);
+
+	/* Register up msg callbacks for PTP information */
+	roc_nix_ptp_info_cb_register(&dev->nix, cn20k_nix_ptp_info_update_cb);
+
 	return 0;
 }
 
diff --git a/drivers/net/cnxk/cn20k_rx.h b/drivers/net/cnxk/cn20k_rx.h
index 2cb77c0b46..22abf7bbd8 100644
--- a/drivers/net/cnxk/cn20k_rx.h
+++ b/drivers/net/cnxk/cn20k_rx.h
@@ -29,8 +29,397 @@ 
 #define NIX_RX_VWQE_F	   BIT(13)
 #define NIX_RX_MULTI_SEG_F BIT(14)
 
+#define CNXK_NIX_CQ_ENTRY_SZ 128
+#define NIX_DESCS_PER_LOOP   4
+#define CQE_CAST(x)	     ((struct nix_cqe_hdr_s *)(x))
+#define CQE_SZ(x)	     ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+
+#define CQE_PTR_OFF(b, i, o, f)                                                                    \
+	(((f) & NIX_RX_VWQE_F) ? (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) + (o)) :           \
+				 (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) + (o)))
+#define CQE_PTR_DIFF(b, i, o, f)                                                                   \
+	(((f) & NIX_RX_VWQE_F) ? (uint64_t *)(((uintptr_t)((uint64_t *)(b))[i]) - (o)) :           \
+				 (uint64_t *)(((uintptr_t)(b)) + CQE_SZ(i) - (o)))
+
+#define NIX_RX_SEC_UCC_CONST                                                                       \
+	((RTE_MBUF_F_RX_IP_CKSUM_BAD >> 1) |                                                       \
+	 ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) << 8 |                 \
+	 ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_BAD) >> 1) << 16 |                 \
+	 ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) << 32 |                \
+	 ((RTE_MBUF_F_RX_IP_CKSUM_GOOD | RTE_MBUF_F_RX_L4_CKSUM_GOOD) >> 1) << 48)
+
+#ifdef RTE_LIBRTE_MEMPOOL_DEBUG
+static inline void
+nix_mbuf_validate_next(struct rte_mbuf *m)
+{
+	if (m->nb_segs == 1 && m->next) {
+		rte_panic("mbuf->next[%p] valid when mbuf->nb_segs is %d", m->next, m->nb_segs);
+	}
+}
+#else
+static inline void
+nix_mbuf_validate_next(struct rte_mbuf *m)
+{
+	RTE_SET_USED(m);
+}
+#endif
+
 #define NIX_RX_SEC_REASSEMBLY_F (NIX_RX_REAS_F | NIX_RX_OFFLOAD_SECURITY_F)
 
+static inline rte_eth_ip_reassembly_dynfield_t *
+cnxk_ip_reassembly_dynfield(struct rte_mbuf *mbuf, int ip_reassembly_dynfield_offset)
+{
+	return RTE_MBUF_DYNFIELD(mbuf, ip_reassembly_dynfield_offset,
+				 rte_eth_ip_reassembly_dynfield_t *);
+}
+
+union mbuf_initializer {
+	struct {
+		uint16_t data_off;
+		uint16_t refcnt;
+		uint16_t nb_segs;
+		uint16_t port;
+	} fields;
+	uint64_t value;
+};
+
+static __rte_always_inline uint64_t
+nix_clear_data_off(uint64_t oldval)
+{
+	union mbuf_initializer mbuf_init = {.value = oldval};
+
+	mbuf_init.fields.data_off = 0;
+	return mbuf_init.value;
+}
+
+static __rte_always_inline struct rte_mbuf *
+nix_get_mbuf_from_cqe(void *cq, const uint64_t data_off)
+{
+	rte_iova_t buff;
+
+	/* Skip CQE, NIX_RX_PARSE_S and SG HDR(9 DWORDs) and peek buff addr */
+	buff = *((rte_iova_t *)((uint64_t *)cq + 9));
+	return (struct rte_mbuf *)(buff - data_off);
+}
+
+static __rte_always_inline uint32_t
+nix_ptype_get(const void *const lookup_mem, const uint64_t in)
+{
+	const uint16_t *const ptype = lookup_mem;
+	const uint16_t lh_lg_lf = (in & 0xFFF0000000000000) >> 52;
+	const uint16_t tu_l2 = ptype[(in & 0x000FFFF000000000) >> 36];
+	const uint16_t il4_tu = ptype[PTYPE_NON_TUNNEL_ARRAY_SZ + lh_lg_lf];
+
+	return (il4_tu << PTYPE_NON_TUNNEL_WIDTH) | tu_l2;
+}
+
+static __rte_always_inline uint32_t
+nix_rx_olflags_get(const void *const lookup_mem, const uint64_t in)
+{
+	const uint32_t *const ol_flags =
+		(const uint32_t *)((const uint8_t *)lookup_mem + PTYPE_ARRAY_SZ);
+
+	return ol_flags[(in & 0xfff00000) >> 20];
+}
+
+static inline uint64_t
+nix_update_match_id(const uint16_t match_id, uint64_t ol_flags, struct rte_mbuf *mbuf)
+{
+	/* There is no separate bit to check match_id
+	 * is valid or not? and no flag to identify it is an
+	 * RTE_FLOW_ACTION_TYPE_FLAG vs RTE_FLOW_ACTION_TYPE_MARK
+	 * action. The former case addressed through 0 being invalid
+	 * value and inc/dec match_id pair when MARK is activated.
+	 * The later case addressed through defining
+	 * CNXK_FLOW_MARK_DEFAULT as value for
+	 * RTE_FLOW_ACTION_TYPE_MARK.
+	 * This would translate to not use
+	 * CNXK_FLOW_ACTION_FLAG_DEFAULT - 1 and
+	 * CNXK_FLOW_ACTION_FLAG_DEFAULT for match_id.
+	 * i.e valid mark_id's are from
+	 * 0 to CNXK_FLOW_ACTION_FLAG_DEFAULT - 2
+	 */
+	if (likely(match_id)) {
+		ol_flags |= RTE_MBUF_F_RX_FDIR;
+		if (match_id != CNXK_FLOW_ACTION_FLAG_DEFAULT) {
+			ol_flags |= RTE_MBUF_F_RX_FDIR_ID;
+			mbuf->hash.fdir.hi = match_id - 1;
+		}
+	}
+
+	return ol_flags;
+}
+
+static __rte_always_inline void
+nix_cqe_xtract_mseg(const union nix_rx_parse_u *rx, struct rte_mbuf *mbuf, uint64_t rearm,
+		    uintptr_t cpth, uintptr_t sa_base, const uint16_t flags)
+{
+	const rte_iova_t *iova_list;
+	uint16_t later_skip = 0;
+	struct rte_mbuf *head;
+	const rte_iova_t *eol;
+	uint8_t nb_segs;
+	uint16_t sg_len;
+	int64_t len;
+	uint64_t sg;
+	uintptr_t p;
+
+	(void)cpth;
+	(void)sa_base;
+
+	sg = *(const uint64_t *)(rx + 1);
+	nb_segs = (sg >> 48) & 0x3;
+
+	if (nb_segs == 1)
+		return;
+
+	len = rx->pkt_lenm1 + 1;
+
+	mbuf->pkt_len = len - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+	mbuf->nb_segs = nb_segs;
+	head = mbuf;
+	mbuf->data_len =
+		(sg & 0xFFFF) - (flags & NIX_RX_OFFLOAD_TSTAMP_F ? CNXK_NIX_TIMESYNC_RX_OFFSET : 0);
+	eol = ((const rte_iova_t *)(rx + 1) + ((rx->desc_sizem1 + 1) << 1));
+
+	len -= mbuf->data_len;
+	sg = sg >> 16;
+	/* Skip SG_S and first IOVA*/
+	iova_list = ((const rte_iova_t *)(rx + 1)) + 2;
+	nb_segs--;
+
+	later_skip = (uintptr_t)mbuf->buf_addr - (uintptr_t)mbuf;
+
+	while (nb_segs) {
+		mbuf->next = (struct rte_mbuf *)(*iova_list - later_skip);
+		mbuf = mbuf->next;
+
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		sg_len = sg & 0XFFFF;
+
+		mbuf->data_len = sg_len;
+		sg = sg >> 16;
+		p = (uintptr_t)&mbuf->rearm_data;
+		*(uint64_t *)p = rearm & ~0xFFFF;
+		nb_segs--;
+		iova_list++;
+
+		if (!nb_segs && (iova_list + 1 < eol)) {
+			sg = *(const uint64_t *)(iova_list);
+			nb_segs = (sg >> 48) & 0x3;
+			head->nb_segs += nb_segs;
+			iova_list = (const rte_iova_t *)(iova_list + 1);
+		}
+	}
+}
+
+static __rte_always_inline void
+cn20k_nix_cqe_to_mbuf(const struct nix_cqe_hdr_s *cq, const uint32_t tag, struct rte_mbuf *mbuf,
+		      const void *lookup_mem, const uint64_t val, const uintptr_t cpth,
+		      const uintptr_t sa_base, const uint16_t flag)
+{
+	const union nix_rx_parse_u *rx = (const union nix_rx_parse_u *)((const uint64_t *)cq + 1);
+	const uint64_t w1 = *(const uint64_t *)rx;
+	uint16_t len = rx->pkt_lenm1 + 1;
+	uint64_t ol_flags = 0;
+	uintptr_t p;
+
+	if (flag & NIX_RX_OFFLOAD_PTYPE_F)
+		mbuf->packet_type = nix_ptype_get(lookup_mem, w1);
+	else
+		mbuf->packet_type = 0;
+
+	if (flag & NIX_RX_OFFLOAD_RSS_F) {
+		mbuf->hash.rss = tag;
+		ol_flags |= RTE_MBUF_F_RX_RSS_HASH;
+	}
+
+	/* Skip rx ol flags extraction for Security packets */
+	ol_flags |= (uint64_t)nix_rx_olflags_get(lookup_mem, w1);
+
+	if (flag & NIX_RX_OFFLOAD_VLAN_STRIP_F) {
+		if (rx->vtag0_gone) {
+			ol_flags |= RTE_MBUF_F_RX_VLAN | RTE_MBUF_F_RX_VLAN_STRIPPED;
+			mbuf->vlan_tci = rx->vtag0_tci;
+		}
+		if (rx->vtag1_gone) {
+			ol_flags |= RTE_MBUF_F_RX_QINQ | RTE_MBUF_F_RX_QINQ_STRIPPED;
+			mbuf->vlan_tci_outer = rx->vtag1_tci;
+		}
+	}
+
+	if (flag & NIX_RX_OFFLOAD_MARK_UPDATE_F)
+		ol_flags = nix_update_match_id(rx->match_id, ol_flags, mbuf);
+
+	mbuf->ol_flags = ol_flags;
+	mbuf->pkt_len = len;
+	mbuf->data_len = len;
+	p = (uintptr_t)&mbuf->rearm_data;
+	*(uint64_t *)p = val;
+
+	if (flag & NIX_RX_MULTI_SEG_F)
+		/*
+		 * For multi segment packets, mbuf length correction according
+		 * to Rx timestamp length will be handled later during
+		 * timestamp data process.
+		 * Hence, timestamp flag argument is not required.
+		 */
+		nix_cqe_xtract_mseg(rx, mbuf, val, cpth, sa_base, flag & ~NIX_RX_OFFLOAD_TSTAMP_F);
+}
+
+static inline uint16_t
+nix_rx_nb_pkts(struct cn20k_eth_rxq *rxq, const uint64_t wdata, const uint16_t pkts,
+	       const uint32_t qmask)
+{
+	uint32_t available = rxq->available;
+
+	/* Update the available count if cached value is not enough */
+	if (unlikely(available < pkts)) {
+		uint64_t reg, head, tail;
+
+		/* Use LDADDA version to avoid reorder */
+		reg = roc_atomic64_add_sync(wdata, rxq->cq_status);
+		/* CQ_OP_STATUS operation error */
+		if (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) || reg & BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))
+			return 0;
+
+		tail = reg & 0xFFFFF;
+		head = (reg >> 20) & 0xFFFFF;
+		if (tail < head)
+			available = tail - head + qmask + 1;
+		else
+			available = tail - head;
+
+		rxq->available = available;
+	}
+
+	return RTE_MIN(pkts, available);
+}
+
+static __rte_always_inline void
+cn20k_nix_mbuf_to_tstamp(struct rte_mbuf *mbuf, struct cnxk_timesync_info *tstamp,
+			 const uint8_t ts_enable, uint64_t *tstamp_ptr)
+{
+	if (ts_enable) {
+		mbuf->pkt_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+		mbuf->data_len -= CNXK_NIX_TIMESYNC_RX_OFFSET;
+
+		/* Reading the rx timestamp inserted by CGX, viz at
+		 * starting of the packet data.
+		 */
+		*tstamp_ptr = ((*tstamp_ptr >> 32) * NSEC_PER_SEC) + (*tstamp_ptr & 0xFFFFFFFFUL);
+		*cnxk_nix_timestamp_dynfield(mbuf, tstamp) = rte_be_to_cpu_64(*tstamp_ptr);
+		/* RTE_MBUF_F_RX_IEEE1588_TMST flag needs to be set only in case
+		 * PTP packets are received.
+		 */
+		if (mbuf->packet_type == RTE_PTYPE_L2_ETHER_TIMESYNC) {
+			tstamp->rx_tstamp = *cnxk_nix_timestamp_dynfield(mbuf, tstamp);
+			tstamp->rx_ready = 1;
+			mbuf->ol_flags |= RTE_MBUF_F_RX_IEEE1588_PTP | RTE_MBUF_F_RX_IEEE1588_TMST |
+					  tstamp->rx_tstamp_dynflag;
+		}
+	}
+}
+
+static __rte_always_inline uint16_t
+cn20k_nix_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts, const uint16_t flags)
+{
+	struct cn20k_eth_rxq *rxq = rx_queue;
+	const uint64_t mbuf_init = rxq->mbuf_initializer;
+	const void *lookup_mem = rxq->lookup_mem;
+	const uint64_t data_off = rxq->data_off;
+	const uintptr_t desc = rxq->desc;
+	const uint64_t wdata = rxq->wdata;
+	const uint32_t qmask = rxq->qmask;
+	uint16_t packets = 0, nb_pkts;
+	uint32_t head = rxq->head;
+	struct nix_cqe_hdr_s *cq;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+
+	nb_pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+
+	while (packets < nb_pkts) {
+		/* Prefetch N desc ahead */
+		rte_prefetch_non_temporal((void *)(desc + (CQE_SZ((head + 2) & qmask))));
+		cq = (struct nix_cqe_hdr_s *)(desc + CQE_SZ(head));
+
+		mbuf = nix_get_mbuf_from_cqe(cq, data_off);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cq, cq->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+		cn20k_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F),
+					 (uint64_t *)((uint8_t *)mbuf + data_off));
+		rx_pkts[packets++] = mbuf;
+		roc_prefetch_store_keep(mbuf);
+		head++;
+		head &= qmask;
+	}
+
+	rxq->head = head;
+	rxq->available -= nb_pkts;
+
+	/* Free all the CQs that we've processed */
+	plt_write64((wdata | nb_pkts), rxq->cq_door);
+
+	return nb_pkts;
+}
+
+static __rte_always_inline uint16_t
+cn20k_nix_flush_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts,
+			  const uint16_t flags)
+{
+	struct cn20k_eth_rxq *rxq = rx_queue;
+	const uint64_t mbuf_init = rxq->mbuf_initializer;
+	const void *lookup_mem = rxq->lookup_mem;
+	const uint64_t data_off = rxq->data_off;
+	const uint64_t wdata = rxq->wdata;
+	const uint32_t qmask = rxq->qmask;
+	const uintptr_t desc = rxq->desc;
+	uint16_t packets = 0, nb_pkts;
+	uint16_t lmt_id __rte_unused;
+	uint32_t head = rxq->head;
+	struct nix_cqe_hdr_s *cq;
+	struct rte_mbuf *mbuf;
+	uint64_t sa_base = 0;
+	uintptr_t cpth = 0;
+
+	nb_pkts = nix_rx_nb_pkts(rxq, wdata, pkts, qmask);
+
+	while (packets < nb_pkts) {
+		/* Prefetch N desc ahead */
+		rte_prefetch_non_temporal((void *)(desc + (CQE_SZ((head + 2) & qmask))));
+		cq = (struct nix_cqe_hdr_s *)(desc + CQE_SZ(head));
+
+		mbuf = nix_get_mbuf_from_cqe(cq, data_off);
+
+		/* Mark mempool obj as "get" as it is alloc'ed by NIX */
+		RTE_MEMPOOL_CHECK_COOKIES(mbuf->pool, (void **)&mbuf, 1, 1);
+
+		cn20k_nix_cqe_to_mbuf(cq, cq->tag, mbuf, lookup_mem, mbuf_init, cpth, sa_base,
+				      flags);
+		cn20k_nix_mbuf_to_tstamp(mbuf, rxq->tstamp, (flags & NIX_RX_OFFLOAD_TSTAMP_F),
+					 (uint64_t *)((uint8_t *)mbuf + data_off));
+		rx_pkts[packets++] = mbuf;
+		roc_prefetch_store_keep(mbuf);
+		head++;
+		head &= qmask;
+	}
+
+	rxq->head = head;
+	rxq->available -= nb_pkts;
+
+	/* Free all the CQs that we've processed */
+	plt_write64((wdata | nb_pkts), rxq->cq_door);
+
+	return nb_pkts;
+}
+
 #define RSS_F	  NIX_RX_OFFLOAD_RSS_F
 #define PTYPE_F	  NIX_RX_OFFLOAD_PTYPE_F
 #define CKSUM_F	  NIX_RX_OFFLOAD_CHECKSUM_F
@@ -220,10 +609,7 @@  NIX_RX_FASTPATH_MODES
 	uint16_t __rte_noinline __rte_hot fn(void *rx_queue, struct rte_mbuf **rx_pkts,            \
 					     uint16_t pkts)                                        \
 	{                                                                                          \
-		RTE_SET_USED(rx_queue);                                                            \
-		RTE_SET_USED(rx_pkts);                                                             \
-		RTE_SET_USED(pkts);                                                                \
-		return 0;                                                                          \
+		return cn20k_nix_recv_pkts(rx_queue, rx_pkts, pkts, (flags));                      \
 	}
 
 #define NIX_RX_RECV_MSEG(fn, flags) NIX_RX_RECV(fn, flags | NIX_RX_MULTI_SEG_F)
diff --git a/drivers/net/cnxk/cn20k_rx_select.c b/drivers/net/cnxk/cn20k_rx_select.c
index 82e06a62ef..25c79434cd 100644
--- a/drivers/net/cnxk/cn20k_rx_select.c
+++ b/drivers/net/cnxk/cn20k_rx_select.c
@@ -22,10 +22,8 @@  pick_rx_func(struct rte_eth_dev *eth_dev, const eth_rx_burst_t rx_burst[NIX_RX_O
 static uint16_t __rte_noinline __rte_hot __rte_unused
 cn20k_nix_flush_rx(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t pkts)
 {
-	RTE_SET_USED(rx_queue);
-	RTE_SET_USED(rx_pkts);
-	RTE_SET_USED(pkts);
-	return 0;
+	const uint16_t flags = NIX_RX_MULTI_SEG_F | NIX_RX_REAS_F | NIX_RX_OFFLOAD_SECURITY_F;
+	return cn20k_nix_flush_recv_pkts(rx_queue, rx_pkts, pkts, flags);
 }
 
 #if defined(RTE_ARCH_ARM64)
diff --git a/drivers/net/cnxk/cn20k_rxtx.h b/drivers/net/cnxk/cn20k_rxtx.h
index 5cc445d4b1..03eaf34d64 100644
--- a/drivers/net/cnxk/cn20k_rxtx.h
+++ b/drivers/net/cnxk/cn20k_rxtx.h
@@ -83,7 +83,163 @@  struct cn20k_eth_rxq {
 	struct cnxk_timesync_info *tstamp;
 } __plt_cache_aligned;
 
+/* Private data in sw rsvd area of struct roc_ot_ipsec_inb_sa */
+struct cn20k_inb_priv_data {
+	void *userdata;
+	int reass_dynfield_off;
+	int reass_dynflag_bit;
+	struct cnxk_eth_sec_sess *eth_sec;
+};
+
+struct cn20k_sec_sess_priv {
+	union {
+		struct {
+			uint32_t sa_idx;
+			uint8_t inb_sa : 1;
+			uint8_t outer_ip_ver : 1;
+			uint8_t mode : 1;
+			uint8_t roundup_byte : 5;
+			uint8_t roundup_len;
+			uint16_t partial_len : 10;
+			uint16_t chksum : 2;
+			uint16_t dec_ttl : 1;
+			uint16_t nixtx_off : 1;
+			uint16_t rsvd : 2;
+		};
+
+		uint64_t u64;
+	};
+} __rte_packed;
+
 #define LMT_OFF(lmt_addr, lmt_num, offset)                                                         \
 	(void *)((uintptr_t)(lmt_addr) + ((uint64_t)(lmt_num) << ROC_LMT_LINE_SIZE_LOG2) + (offset))
 
+static inline uint16_t
+nix_tx_compl_nb_pkts(struct cn20k_eth_txq *txq, const uint64_t wdata, const uint32_t qmask)
+{
+	uint16_t available = txq->tx_compl.available;
+
+	/* Update the available count if cached value is not enough */
+	if (!unlikely(available)) {
+		uint64_t reg, head, tail;
+
+		/* Use LDADDA version to avoid reorder */
+		reg = roc_atomic64_add_sync(wdata, txq->tx_compl.cq_status);
+		/* CQ_OP_STATUS operation error */
+		if (reg & BIT_ULL(NIX_CQ_OP_STAT_OP_ERR) || reg & BIT_ULL(NIX_CQ_OP_STAT_CQ_ERR))
+			return 0;
+
+		tail = reg & 0xFFFFF;
+		head = (reg >> 20) & 0xFFFFF;
+		if (tail < head)
+			available = tail - head + qmask + 1;
+		else
+			available = tail - head;
+
+		txq->tx_compl.available = available;
+	}
+	return available;
+}
+
+static inline void
+handle_tx_completion_pkts(struct cn20k_eth_txq *txq, uint8_t mt_safe)
+{
+#define CNXK_NIX_CQ_ENTRY_SZ 128
+#define CQE_SZ(x)	     ((x) * CNXK_NIX_CQ_ENTRY_SZ)
+
+	uint16_t tx_pkts = 0, nb_pkts;
+	const uintptr_t desc = txq->tx_compl.desc_base;
+	const uint64_t wdata = txq->tx_compl.wdata;
+	const uint32_t qmask = txq->tx_compl.qmask;
+	uint32_t head = txq->tx_compl.head;
+	struct nix_cqe_hdr_s *tx_compl_cq;
+	struct nix_send_comp_s *tx_compl_s0;
+	struct rte_mbuf *m_next, *m;
+
+	if (mt_safe)
+		rte_spinlock_lock(&txq->tx_compl.ext_buf_lock);
+
+	nb_pkts = nix_tx_compl_nb_pkts(txq, wdata, qmask);
+	while (tx_pkts < nb_pkts) {
+		rte_prefetch_non_temporal((void *)(desc + (CQE_SZ((head + 2) & qmask))));
+		tx_compl_cq = (struct nix_cqe_hdr_s *)(desc + CQE_SZ(head));
+		tx_compl_s0 = (struct nix_send_comp_s *)((uint64_t *)tx_compl_cq + 1);
+		m = txq->tx_compl.ptr[tx_compl_s0->sqe_id];
+		while (m->next != NULL) {
+			m_next = m->next;
+			rte_pktmbuf_free_seg(m);
+			m = m_next;
+		}
+		rte_pktmbuf_free_seg(m);
+		txq->tx_compl.ptr[tx_compl_s0->sqe_id] = NULL;
+
+		head++;
+		head &= qmask;
+		tx_pkts++;
+	}
+	txq->tx_compl.head = head;
+	txq->tx_compl.available -= nb_pkts;
+
+	plt_write64((wdata | nb_pkts), txq->tx_compl.cq_door);
+
+	if (mt_safe)
+		rte_spinlock_unlock(&txq->tx_compl.ext_buf_lock);
+}
+
+static __rte_always_inline uint64_t
+cn20k_cpt_tx_steor_data(void)
+{
+	/* We have two CPT instructions per LMTLine TODO */
+	const uint64_t dw_m1 = ROC_CN10K_TWO_CPT_INST_DW_M1;
+	uint64_t data;
+
+	/* This will be moved to addr area */
+	data = dw_m1 << 16;
+	data |= dw_m1 << 19;
+	data |= dw_m1 << 22;
+	data |= dw_m1 << 25;
+	data |= dw_m1 << 28;
+	data |= dw_m1 << 31;
+	data |= dw_m1 << 34;
+	data |= dw_m1 << 37;
+	data |= dw_m1 << 40;
+	data |= dw_m1 << 43;
+	data |= dw_m1 << 46;
+	data |= dw_m1 << 49;
+	data |= dw_m1 << 52;
+	data |= dw_m1 << 55;
+	data |= dw_m1 << 58;
+	data |= dw_m1 << 61;
+
+	return data;
+}
+
+static __rte_always_inline void
+cn20k_nix_sec_steorl(uintptr_t io_addr, uint32_t lmt_id, uint8_t lnum, uint8_t loff, uint8_t shft)
+{
+	uint64_t data;
+	uintptr_t pa;
+
+	/* Check if there is any CPT instruction to submit */
+	if (!lnum && !loff)
+		return;
+
+	data = cn20k_cpt_tx_steor_data();
+	/* Update lmtline use for partial end line */
+	if (loff) {
+		data &= ~(0x7ULL << shft);
+		/* Update it to half full i.e 64B */
+		data |= (0x3UL << shft);
+	}
+
+	pa = io_addr | ((data >> 16) & 0x7) << 4;
+	data &= ~(0x7ULL << 16);
+	/* Update lines - 1 that contain valid data */
+	data |= ((uint64_t)(lnum + loff - 1)) << 12;
+	data |= (uint64_t)lmt_id;
+
+	/* STEOR */
+	roc_lmt_submit_steorl(data, pa);
+}
+
 #endif /* __CN20K_RXTX_H__ */