[v2,4/6] raw/octeontx2_ep: add enqueue operation

Message ID 1577450386-16966-5-git-send-email-mchalla@marvell.com (mailing list archive)
State Superseded, archived
Headers
Series OCTEON TX2 End Point Driver |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Mahipal Challa Dec. 27, 2019, 12:39 p.m. UTC
  Add rawdev enqueue operation for SDP VF devices.

Signed-off-by: Mahipal Challa <mchalla@marvell.com>
---
 doc/guides/rawdevs/octeontx2_ep.rst       |   6 +
 drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c | 242 ++++++++++++++++++++++++++++++
 drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h |  39 +++++
 drivers/raw/octeontx2_ep/otx2_ep_rawdev.c |   1 +
 drivers/raw/octeontx2_ep/otx2_ep_rawdev.h |  20 +++
 drivers/raw/octeontx2_ep/otx2_ep_vf.c     |  24 +++
 6 files changed, 332 insertions(+)
  

Comments

Gavin Hu Jan. 2, 2020, 10:43 a.m. UTC | #1
Hi Mahipal,

> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Mahipal Challa
> Sent: Friday, December 27, 2019 8:40 PM
> To: dev@dpdk.org
> Cc: jerinj@marvell.com; pathreya@marvell.com; snilla@marvell.com;
> venkatn@marvell.com
> Subject: [dpdk-dev] [dpdk-dev PATCH v2 4/6] raw/octeontx2_ep: add enqueue
> operation
> 
> Add rawdev enqueue operation for SDP VF devices.
> 
> Signed-off-by: Mahipal Challa <mchalla@marvell.com>
> ---
>  doc/guides/rawdevs/octeontx2_ep.rst       |   6 +
>  drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c | 242
> ++++++++++++++++++++++++++++++
>  drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h |  39 +++++
>  drivers/raw/octeontx2_ep/otx2_ep_rawdev.c |   1 +
>  drivers/raw/octeontx2_ep/otx2_ep_rawdev.h |  20 +++
>  drivers/raw/octeontx2_ep/otx2_ep_vf.c     |  24 +++
>  6 files changed, 332 insertions(+)
> 
> diff --git a/doc/guides/rawdevs/octeontx2_ep.rst
> b/doc/guides/rawdevs/octeontx2_ep.rst
> index 2507fcf..39a7c29 100644
> --- a/doc/guides/rawdevs/octeontx2_ep.rst
> +++ b/doc/guides/rawdevs/octeontx2_ep.rst
> @@ -68,3 +68,9 @@ The following code shows how the device is configured
> 
>     rte_rawdev_configure(dev_id, (rte_rawdev_obj_t)&rdev_info);
> 
> +Performing Data Transfer
> +------------------------
> +
> +To perform data transfer using SDP VF EP rawdev devices use standard
> +``rte_rawdev_enqueue_buffers()`` and ``rte_rawdev_dequeue_buffers()``
> APIs.
> +
> diff --git a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> index 584b818..6910f08 100644
> --- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> +++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> @@ -403,3 +403,245 @@
>  	return -ENOMEM;
>  }
> 
> +static inline void
> +sdp_iqreq_delete(struct sdp_device *sdpvf,
> +		struct sdp_instr_queue *iq, uint32_t idx)
> +{
> +	uint32_t reqtype;
> +	void *buf;
> +
> +	buf     = iq->req_list[idx].buf;
> +	reqtype = iq->req_list[idx].reqtype;
> +
> +	switch (reqtype) {
> +	case SDP_REQTYPE_NORESP:
> +		rte_mempool_put(sdpvf->enqdeq_mpool, buf);
> +		otx2_sdp_dbg("IQ buffer freed at idx[%d]", idx);
> +		break;
> +
> +	case SDP_REQTYPE_NORESP_GATHER:
> +	case SDP_REQTYPE_NONE:
> +	default:
> +		otx2_info("This iqreq mode is not supported:%d", reqtype);
> +
> +	}
> +
> +	/* Reset the request list at this index */
> +	iq->req_list[idx].buf = NULL;
> +	iq->req_list[idx].reqtype = 0;
> +}
> +
> +static inline void
> +sdp_iqreq_add(struct sdp_instr_queue *iq, void *buf,
> +		uint32_t reqtype)
> +{
> +	iq->req_list[iq->host_write_index].buf = buf;
> +	iq->req_list[iq->host_write_index].reqtype = reqtype;
> +
> +	otx2_sdp_dbg("IQ buffer added at idx[%d]", iq->host_write_index);
> +
> +}
> +
> +static void
> +sdp_flush_iq(struct sdp_device *sdpvf,
> +		struct sdp_instr_queue *iq,
> +		uint32_t pending_thresh __rte_unused)
> +{
> +	uint32_t instr_processed = 0;
> +
> +	rte_spinlock_lock(&iq->lock);
> +
> +	iq->otx_read_index = sdpvf->fn_list.update_iq_read_idx(iq);
> +	while (iq->flush_index != iq->otx_read_index) {
> +		/* Free the IQ data buffer to the pool */
> +		sdp_iqreq_delete(sdpvf, iq, iq->flush_index);
> +		iq->flush_index =
> +			sdp_incr_index(iq->flush_index, 1, iq->nb_desc);
> +
> +		instr_processed++;
> +	}
> +
> +	iq->stats.instr_processed = instr_processed;
> +	rte_atomic64_sub(&iq->instr_pending, instr_processed);
> +
> +	rte_spinlock_unlock(&iq->lock);
> +}
> +
> +static inline void
> +sdp_ring_doorbell(struct sdp_device *sdpvf __rte_unused,
> +		struct sdp_instr_queue *iq)
> +{
> +	otx2_write64(iq->fill_cnt, iq->doorbell_reg);
> +
> +	/* Make sure doorbell write goes through */
> +	rte_cio_wmb();
This rte_cio_wmb() should be moved before the doorbell ring to guarantee the IQ command
submitted in post_iqcmd is observed by the peer HW before observing the doorbell ring.
> +	iq->fill_cnt = 0;
> +
> +}
> +
> +static inline int
> +post_iqcmd(struct sdp_instr_queue *iq, uint8_t *iqcmd)
> +{
> +	uint8_t *iqptr, cmdsize;
> +
> +	/* This ensures that the read index does not wrap around to
> +	 * the same position if queue gets full before OCTEON TX2 could
> +	 * fetch any instr.
> +	 */
> +	if (rte_atomic64_read(&iq->instr_pending) >=
> +			      (int32_t)(iq->nb_desc - 1)) {
> +		otx2_err("IQ is full, pending:%ld",
> +			 (long)rte_atomic64_read(&iq->instr_pending));
> +
> +		return SDP_IQ_SEND_FAILED;
> +	}
> +
> +	/* Copy cmd into iq */
> +	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
> +	iqptr   = iq->base_addr + (cmdsize * iq->host_write_index);
> +
> +	rte_memcpy(iqptr, iqcmd, cmdsize);
This is the DMA memory must be observed by the peer HW before observing the doorbell ring.

> +
> +	otx2_sdp_dbg("IQ cmd posted @ index:%d", iq->host_write_index);
> +
> +	/* Increment the host write index */
> +	iq->host_write_index =
> +		sdp_incr_index(iq->host_write_index, 1, iq->nb_desc);
> +
> +	iq->fill_cnt++;
> +
> +	/* Flush the command into memory. We need to be sure the data
> +	 * is in memory before indicating that the instruction is
> +	 * pending.
> +	 */
> +	rte_io_wmb();
Who is the observer of instr_pending? If it is the peer HW, then it should be rte_cio_wmb.
Otherwise if it is a peer lcore, rte_smp_wmb is the right one, or if both are observers, then cio is correct one.
> +	rte_atomic64_inc(&iq->instr_pending);
> +
> +	/* SDP_IQ_SEND_SUCCESS */
> +	return 0;
> +}
> +
> +
> +static int
> +sdp_send_data(struct sdp_device *sdpvf,
> +	      struct sdp_instr_queue *iq, void *cmd)
> +{
> +	uint32_t ret;
> +
> +	/* Lock this IQ command queue before posting instruction */
> +	rte_spinlock_lock(&iq->post_lock);
> +
> +	/* Submit IQ command */
> +	ret = post_iqcmd(iq, cmd);
> +
> +	if (ret == SDP_IQ_SEND_SUCCESS) {
> +		sdp_ring_doorbell(sdpvf, iq);
> +
> +		iq->stats.instr_posted++;
> +		otx2_sdp_dbg("Instr submit success posted: %ld\n",
> +			     (long)iq->stats.instr_posted);
> +
> +	} else {
> +		iq->stats.instr_dropped++;
> +		otx2_err("Instr submit failled, dropped: %ld\n",
> +			 (long)iq->stats.instr_dropped);
> +
> +	}
> +
> +	rte_spinlock_unlock(&iq->post_lock);
> +
> +	return ret;
> +}
> +
> +
> +/* Enqueue requests/packets to SDP IQ queue.
> + * returns number of requests enqueued successfully
> + */
> +int
> +sdp_rawdev_enqueue(struct rte_rawdev *rawdev,
> +		   struct rte_rawdev_buf **buffers __rte_unused,
> +		   unsigned int count, rte_rawdev_obj_t context)
> +{
> +	struct sdp_instr_64B *iqcmd;
> +	struct sdp_instr_queue *iq;
> +	struct sdp_soft_instr *si;
> +	struct sdp_device *sdpvf;
> +
> +	struct sdp_instr_ih ihx;
> +
> +	sdpvf = (struct sdp_device *)rawdev->dev_private;
> +	si = (struct sdp_soft_instr *)context;
> +
> +	iq = sdpvf->instr_queue[si->q_no];
> +
> +	if ((count > 1) || (count < 1)) {
> +		otx2_err("This mode not supported: req[%d]", count);
> +		goto enq_fail;
> +	}
> +
> +	memset(&ihx, 0, sizeof(struct sdp_instr_ih));
> +
> +	iqcmd = &si->command;
> +	memset(iqcmd, 0, sizeof(struct sdp_instr_64B));
> +
> +	iqcmd->dptr = (uint64_t)si->dptr;
> +
> +	/* Populate SDP IH */
> +	ihx.pkind  = sdpvf->pkind;
> +	ihx.fsz    = si->ih.fsz + 8; /* 8B for NIX IH */
> +	ihx.gather = si->ih.gather;
> +
> +	/* Direct data instruction */
> +	ihx.tlen   = si->ih.tlen + ihx.fsz;
> +
> +	switch (ihx.gather) {
> +	case 0: /* Direct data instr */
> +		ihx.tlen = si->ih.tlen + ihx.fsz;
> +		break;
> +
> +	default: /* Gather */
> +		switch (si->ih.gsz) {
> +		case 0: /* Direct gather instr */
> +			otx2_err("Direct Gather instr : not supported");
> +			goto enq_fail;
> +
> +		default: /* Indirect gather instr */
> +			otx2_err("Indirect Gather instr : not supported");
> +			goto enq_fail;
> +		}
> +	}
> +
> +	rte_memcpy(&iqcmd->ih, &ihx, sizeof(uint64_t));
> +	iqcmd->rptr = (uint64_t)si->rptr;
> +	rte_memcpy(&iqcmd->irh, &si->irh, sizeof(uint64_t));
> +
> +	/* Swap FSZ(front data) here, to avoid swapping on OCTEON TX2 side
> */
> +	sdp_swap_8B_data(&iqcmd->rptr, 1);
> +	sdp_swap_8B_data(&iqcmd->irh, 1);
> +
> +	otx2_sdp_dbg("After swapping");
> +	otx2_sdp_dbg("Word0 [dptr]: 0x%016lx", (unsigned long)iqcmd->dptr);
> +	otx2_sdp_dbg("Word1 [ihtx]: 0x%016lx", (unsigned long)iqcmd->ih);
> +	otx2_sdp_dbg("Word2 [rptr]: 0x%016lx", (unsigned long)iqcmd->rptr);
> +	otx2_sdp_dbg("Word3 [irh]: 0x%016lx", (unsigned long)iqcmd->irh);
> +	otx2_sdp_dbg("Word4 [exhdr[0]]: 0x%016lx",
> +			(unsigned long)iqcmd->exhdr[0]);
> +
> +	sdp_iqreq_add(iq, si->dptr, si->reqtype);
> +
> +	if (sdp_send_data(sdpvf, iq, iqcmd)) {
> +		otx2_err("Data send failled :");
> +		sdp_iqreq_delete(sdpvf, iq, iq->host_write_index);
> +		goto enq_fail;
> +	}
> +
> +	if (rte_atomic64_read(&iq->instr_pending) >= 1)
> +		sdp_flush_iq(sdpvf, iq, 1 /*(iq->nb_desc / 2)*/);
> +
> +	/* Return no# of instructions posted successfully. */
> +	return count;
> +
> +enq_fail:
> +	return SDP_IQ_SEND_FAILED;
> +}
> +
> diff --git a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> index 4c28283..b9b7c0b 100644
> --- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> +++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> @@ -8,4 +8,43 @@
>  #include <rte_byteorder.h>
>  #include "otx2_ep_rawdev.h"
> 
> +#define SDP_IQ_SEND_FAILED      (-1)
> +#define SDP_IQ_SEND_SUCCESS     (0)
> +
> +
> +static inline uint64_t
> +sdp_endian_swap_8B(uint64_t _d)
> +{
> +	return ((((((uint64_t)(_d)) >>  0) & (uint64_t)0xff) << 56) |
> +		(((((uint64_t)(_d)) >>  8) & (uint64_t)0xff) << 48) |
> +		(((((uint64_t)(_d)) >> 16) & (uint64_t)0xff) << 40) |
> +		(((((uint64_t)(_d)) >> 24) & (uint64_t)0xff) << 32) |
> +		(((((uint64_t)(_d)) >> 32) & (uint64_t)0xff) << 24) |
> +		(((((uint64_t)(_d)) >> 40) & (uint64_t)0xff) << 16) |
> +		(((((uint64_t)(_d)) >> 48) & (uint64_t)0xff) <<  8) |
> +		(((((uint64_t)(_d)) >> 56) & (uint64_t)0xff) <<  0));
> +}
> +
> +static inline void
> +sdp_swap_8B_data(uint64_t *data, uint32_t blocks)
> +{
> +	/* Swap 8B blocks */
> +	while (blocks) {
> +		*data = sdp_endian_swap_8B(*data);
> +		blocks--;
> +		data++;
> +	}
> +}
> +
> +static inline uint32_t
> +sdp_incr_index(uint32_t index, uint32_t count, uint32_t max)
> +{
> +	if ((index + count) >= max)
> +		index = index + count - max;
> +	else
> +		index += count;
> +
> +	return index;
> +}
> +
>  #endif /* _OTX2_EP_ENQDEQ_H_ */
> diff --git a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> index 3db5a74..22a6beb 100644
> --- a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> +++ b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> @@ -251,6 +251,7 @@
>  	.dev_start      = sdp_rawdev_start,
>  	.dev_stop       = sdp_rawdev_stop,
>  	.dev_close      = sdp_rawdev_close,
> +	.enqueue_bufs   = sdp_rawdev_enqueue,
>  };
> 
>  static int
> diff --git a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> index a01f48d..8fd06fb 100644
> --- a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> +++ b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> @@ -8,6 +8,10 @@
>  #include <rte_byteorder.h>
>  #include <rte_spinlock.h>
> 
> +/* IQ instruction req types */
> +#define SDP_REQTYPE_NONE             (0)
> +#define SDP_REQTYPE_NORESP           (1)
> +#define SDP_REQTYPE_NORESP_GATHER    (2)
> 
>  /* Input Request Header format */
>  struct sdp_instr_irh {
> @@ -128,6 +132,13 @@ struct sdp_instr_list {
>  };
>  #define SDP_IQREQ_LIST_SIZE	(sizeof(struct sdp_instr_list))
> 
> +/* Input Queue statistics. Each input queue has four stats fields. */
> +struct sdp_iq_stats {
> +	uint64_t instr_posted; /* Instructions posted to this queue. */
> +	uint64_t instr_processed; /* Instructions processed in this queue. */
> +	uint64_t instr_dropped; /* Instructions that could not be processed */
> +};
> +
>  /* Structure to define the configuration attributes for each Input queue. */
>  struct sdp_iq_config {
>  	/* Max number of IQs available */
> @@ -195,6 +206,9 @@ struct sdp_instr_queue {
>  	/* Number of instructions pending to be posted to OCTEON TX2. */
>  	uint32_t fill_cnt;
> 
> +	/* Statistics for this input queue. */
> +	struct sdp_iq_stats stats;
> +
>  	/* DMA mapped base address of the input descriptor ring. */
>  	uint64_t base_addr_dma;
> 
> @@ -380,6 +394,8 @@ struct sdp_fn_list {
>  	void (*setup_oq_regs)(struct sdp_device *sdpvf, uint32_t q_no);
> 
>  	int (*setup_device_regs)(struct sdp_device *sdpvf);
> +	uint32_t (*update_iq_read_idx)(struct sdp_instr_queue *iq);
> +
>  	void (*enable_io_queues)(struct sdp_device *sdpvf);
>  	void (*disable_io_queues)(struct sdp_device *sdpvf);
> 
> @@ -458,4 +474,8 @@ struct sdp_device {
>  int sdp_setup_oqs(struct sdp_device *sdpvf, uint32_t oq_no);
>  int sdp_delete_oqs(struct sdp_device *sdpvf, uint32_t oq_no);
> 
> +int sdp_rawdev_enqueue(struct rte_rawdev *dev, struct rte_rawdev_buf
> **buffers,
> +		       unsigned int count, rte_rawdev_obj_t context);
> +
> +
>  #endif /* _OTX2_EP_RAWDEV_H_ */
> diff --git a/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> b/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> index 8e79fe8..c5c0bc3 100644
> --- a/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> +++ b/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> @@ -409,6 +409,28 @@
>  		sdp_vf_disable_oq(sdpvf, q_no);
>  }
> 
> +static uint32_t
> +sdp_vf_update_read_index(struct sdp_instr_queue *iq)
> +{
> +	uint32_t new_idx = rte_read32(iq->inst_cnt_reg);
> +
> +	/* The new instr cnt reg is a 32-bit counter that can roll over.
> +	 * We have noted the counter's initial value at init time into
> +	 * reset_instr_cnt
> +	 */
> +	if (iq->reset_instr_cnt < new_idx)
> +		new_idx -= iq->reset_instr_cnt;
> +	else
> +		new_idx += (0xffffffff - iq->reset_instr_cnt) + 1;
> +
> +	/* Modulo of the new index with the IQ size will give us
> +	 * the new index.
> +	 */
> +	new_idx %= iq->nb_desc;
> +
> +	return new_idx;
> +}
> +
>  int
>  sdp_vf_setup_device(struct sdp_device *sdpvf)
>  {
> @@ -436,6 +458,8 @@
>  	sdpvf->fn_list.setup_oq_regs       = sdp_vf_setup_oq_regs;
> 
>  	sdpvf->fn_list.setup_device_regs   = sdp_vf_setup_device_regs;
> +	sdpvf->fn_list.update_iq_read_idx  = sdp_vf_update_read_index;
> +
>  	sdpvf->fn_list.enable_io_queues    = sdp_vf_enable_io_queues;
>  	sdpvf->fn_list.disable_io_queues   = sdp_vf_disable_io_queues;
> 
> --
> 1.8.3.1
  
Mahipal Challa Jan. 3, 2020, 11:29 a.m. UTC | #2
Hi Gavin,
Please see response inline.



> -----Original Message-----
> From: Gavin Hu <Gavin.Hu@arm.com>
> Sent: Thursday, January 2, 2020 4:13 PM
> To: Mahipal Challa <mchalla@marvell.com>; dev@dpdk.org
> Cc: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; Narayana Prasad Raju
> Athreya <pathreya@marvell.com>; Subrahmanyam Nilla
> <snilla@marvell.com>; Venkateshwarlu Nalla <venkatn@marvell.com>; nd
> <nd@arm.com>
> Subject: [EXT] RE: [dpdk-dev] [dpdk-dev PATCH v2 4/6] raw/octeontx2_ep:
> add enqueue operation
> 
> External Email
> 
> ----------------------------------------------------------------------
> Hi Mahipal,
> 
> > -----Original Message-----
> > From: dev <dev-bounces@dpdk.org> On Behalf Of Mahipal Challa
> > Sent: Friday, December 27, 2019 8:40 PM
> > To: dev@dpdk.org
> > Cc: jerinj@marvell.com; pathreya@marvell.com; snilla@marvell.com;
> > venkatn@marvell.com
> > Subject: [dpdk-dev] [dpdk-dev PATCH v2 4/6] raw/octeontx2_ep: add
> > enqueue operation
> >
> > Add rawdev enqueue operation for SDP VF devices.
> >
> > Signed-off-by: Mahipal Challa <mchalla@marvell.com>
> > ---
> >  doc/guides/rawdevs/octeontx2_ep.rst       |   6 +
> >  drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c | 242
> > ++++++++++++++++++++++++++++++
> >  drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h |  39 +++++
> >  drivers/raw/octeontx2_ep/otx2_ep_rawdev.c |   1 +
> >  drivers/raw/octeontx2_ep/otx2_ep_rawdev.h |  20 +++
> >  drivers/raw/octeontx2_ep/otx2_ep_vf.c     |  24 +++
> >  6 files changed, 332 insertions(+)
> >
> > diff --git a/doc/guides/rawdevs/octeontx2_ep.rst
> > b/doc/guides/rawdevs/octeontx2_ep.rst
> > index 2507fcf..39a7c29 100644
> > --- a/doc/guides/rawdevs/octeontx2_ep.rst
> > +++ b/doc/guides/rawdevs/octeontx2_ep.rst
> > @@ -68,3 +68,9 @@ The following code shows how the device is
> > configured
> >
> >     rte_rawdev_configure(dev_id, (rte_rawdev_obj_t)&rdev_info);
> >
> > +Performing Data Transfer
> > +------------------------
> > +
> > +To perform data transfer using SDP VF EP rawdev devices use standard
> > +``rte_rawdev_enqueue_buffers()`` and ``rte_rawdev_dequeue_buffers()``
> > APIs.
> > +
> > diff --git a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> > b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> > index 584b818..6910f08 100644
> > --- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> > +++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
> > @@ -403,3 +403,245 @@
> >  	return -ENOMEM;
> >  }
> >
> > +static inline void
> > +sdp_iqreq_delete(struct sdp_device *sdpvf,
> > +		struct sdp_instr_queue *iq, uint32_t idx) {
> > +	uint32_t reqtype;
> > +	void *buf;
> > +
> > +	buf     = iq->req_list[idx].buf;
> > +	reqtype = iq->req_list[idx].reqtype;
> > +
> > +	switch (reqtype) {
> > +	case SDP_REQTYPE_NORESP:
> > +		rte_mempool_put(sdpvf->enqdeq_mpool, buf);
> > +		otx2_sdp_dbg("IQ buffer freed at idx[%d]", idx);
> > +		break;
> > +
> > +	case SDP_REQTYPE_NORESP_GATHER:
> > +	case SDP_REQTYPE_NONE:
> > +	default:
> > +		otx2_info("This iqreq mode is not supported:%d", reqtype);
> > +
> > +	}
> > +
> > +	/* Reset the request list at this index */
> > +	iq->req_list[idx].buf = NULL;
> > +	iq->req_list[idx].reqtype = 0;
> > +}
> > +
> > +static inline void
> > +sdp_iqreq_add(struct sdp_instr_queue *iq, void *buf,
> > +		uint32_t reqtype)
> > +{
> > +	iq->req_list[iq->host_write_index].buf = buf;
> > +	iq->req_list[iq->host_write_index].reqtype = reqtype;
> > +
> > +	otx2_sdp_dbg("IQ buffer added at idx[%d]", iq->host_write_index);
> > +
> > +}
> > +
> > +static void
> > +sdp_flush_iq(struct sdp_device *sdpvf,
> > +		struct sdp_instr_queue *iq,
> > +		uint32_t pending_thresh __rte_unused) {
> > +	uint32_t instr_processed = 0;
> > +
> > +	rte_spinlock_lock(&iq->lock);
> > +
> > +	iq->otx_read_index = sdpvf->fn_list.update_iq_read_idx(iq);
> > +	while (iq->flush_index != iq->otx_read_index) {
> > +		/* Free the IQ data buffer to the pool */
> > +		sdp_iqreq_delete(sdpvf, iq, iq->flush_index);
> > +		iq->flush_index =
> > +			sdp_incr_index(iq->flush_index, 1, iq->nb_desc);
> > +
> > +		instr_processed++;
> > +	}
> > +
> > +	iq->stats.instr_processed = instr_processed;
> > +	rte_atomic64_sub(&iq->instr_pending, instr_processed);
> > +
> > +	rte_spinlock_unlock(&iq->lock);
> > +}
> > +
> > +static inline void
> > +sdp_ring_doorbell(struct sdp_device *sdpvf __rte_unused,
> > +		struct sdp_instr_queue *iq)
> > +{
> > +	otx2_write64(iq->fill_cnt, iq->doorbell_reg);
> > +
> > +	/* Make sure doorbell write goes through */
> > +	rte_cio_wmb();
> This rte_cio_wmb() should be moved before the doorbell ring to guarantee
> the IQ command submitted in post_iqcmd is observed by the peer HW
> before observing the doorbell ring.

[Mahipal]: Yes, IQ command is ensured to be in memory well before the doorbell ring operation, this is done by "post_iqcmd" routine(please see below).
And this "rte_cio_wmb()" is just to ensure the doorbell write operation completion(As per your suggestion in v1, we changed this from rte_wmb() to rte_cio_wmb()).

> > +	iq->fill_cnt = 0;
> > +
> > +}
> > +
> > +static inline int
> > +post_iqcmd(struct sdp_instr_queue *iq, uint8_t *iqcmd) {
> > +	uint8_t *iqptr, cmdsize;
> > +
> > +	/* This ensures that the read index does not wrap around to
> > +	 * the same position if queue gets full before OCTEON TX2 could
> > +	 * fetch any instr.
> > +	 */
> > +	if (rte_atomic64_read(&iq->instr_pending) >=
> > +			      (int32_t)(iq->nb_desc - 1)) {
> > +		otx2_err("IQ is full, pending:%ld",
> > +			 (long)rte_atomic64_read(&iq->instr_pending));
> > +
> > +		return SDP_IQ_SEND_FAILED;
> > +	}
> > +
> > +	/* Copy cmd into iq */
> > +	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
> > +	iqptr   = iq->base_addr + (cmdsize * iq->host_write_index);
> > +
> > +	rte_memcpy(iqptr, iqcmd, cmdsize);
> This is the DMA memory must be observed by the peer HW before observing
> the doorbell ring.
> 
[Mahipal]: Yes, this is ensured, please see the below comment.
> > +
> > +	otx2_sdp_dbg("IQ cmd posted @ index:%d", iq->host_write_index);
> > +
> > +	/* Increment the host write index */
> > +	iq->host_write_index =
> > +		sdp_incr_index(iq->host_write_index, 1, iq->nb_desc);
> > +
> > +	iq->fill_cnt++;
> > +
> > +	/* Flush the command into memory. We need to be sure the data
> > +	 * is in memory before indicating that the instruction is
> > +	 * pending.
> > +	 */
> > +	rte_io_wmb();
> Who is the observer of instr_pending? If it is the peer HW, then it should be
> rte_cio_wmb.
> Otherwise if it is a peer lcore, rte_smp_wmb is the right one, or if both are
> observers, then cio is correct one.

[Mahipal]: This is to ensure all the write operation completions(including the above IQ command memory write operation) before the doorbell ring operation.
Just to be very sure, would like to go with rte_io_wmb() instead of rte_cio_wmb().

> > +	rte_atomic64_inc(&iq->instr_pending);
> > +
> > +	/* SDP_IQ_SEND_SUCCESS */
> > +	return 0;
> > +}
> > +
> > +
> > +static int
> > +sdp_send_data(struct sdp_device *sdpvf,
> > +	      struct sdp_instr_queue *iq, void *cmd) {
> > +	uint32_t ret;
> > +
> > +	/* Lock this IQ command queue before posting instruction */
> > +	rte_spinlock_lock(&iq->post_lock);
> > +
> > +	/* Submit IQ command */
> > +	ret = post_iqcmd(iq, cmd);
> > +
> > +	if (ret == SDP_IQ_SEND_SUCCESS) {
> > +		sdp_ring_doorbell(sdpvf, iq);
> > +
> > +		iq->stats.instr_posted++;
> > +		otx2_sdp_dbg("Instr submit success posted: %ld\n",
> > +			     (long)iq->stats.instr_posted);
> > +
> > +	} else {
> > +		iq->stats.instr_dropped++;
> > +		otx2_err("Instr submit failled, dropped: %ld\n",
> > +			 (long)iq->stats.instr_dropped);
> > +
> > +	}
> > +
> > +	rte_spinlock_unlock(&iq->post_lock);
> > +
> > +	return ret;
> > +}
> > +
> > +
> > +/* Enqueue requests/packets to SDP IQ queue.
> > + * returns number of requests enqueued successfully  */ int
> > +sdp_rawdev_enqueue(struct rte_rawdev *rawdev,
> > +		   struct rte_rawdev_buf **buffers __rte_unused,
> > +		   unsigned int count, rte_rawdev_obj_t context) {
> > +	struct sdp_instr_64B *iqcmd;
> > +	struct sdp_instr_queue *iq;
> > +	struct sdp_soft_instr *si;
> > +	struct sdp_device *sdpvf;
> > +
> > +	struct sdp_instr_ih ihx;
> > +
> > +	sdpvf = (struct sdp_device *)rawdev->dev_private;
> > +	si = (struct sdp_soft_instr *)context;
> > +
> > +	iq = sdpvf->instr_queue[si->q_no];
> > +
> > +	if ((count > 1) || (count < 1)) {
> > +		otx2_err("This mode not supported: req[%d]", count);
> > +		goto enq_fail;
> > +	}
> > +
> > +	memset(&ihx, 0, sizeof(struct sdp_instr_ih));
> > +
> > +	iqcmd = &si->command;
> > +	memset(iqcmd, 0, sizeof(struct sdp_instr_64B));
> > +
> > +	iqcmd->dptr = (uint64_t)si->dptr;
> > +
> > +	/* Populate SDP IH */
> > +	ihx.pkind  = sdpvf->pkind;
> > +	ihx.fsz    = si->ih.fsz + 8; /* 8B for NIX IH */
> > +	ihx.gather = si->ih.gather;
> > +
> > +	/* Direct data instruction */
> > +	ihx.tlen   = si->ih.tlen + ihx.fsz;
> > +
> > +	switch (ihx.gather) {
> > +	case 0: /* Direct data instr */
> > +		ihx.tlen = si->ih.tlen + ihx.fsz;
> > +		break;
> > +
> > +	default: /* Gather */
> > +		switch (si->ih.gsz) {
> > +		case 0: /* Direct gather instr */
> > +			otx2_err("Direct Gather instr : not supported");
> > +			goto enq_fail;
> > +
> > +		default: /* Indirect gather instr */
> > +			otx2_err("Indirect Gather instr : not supported");
> > +			goto enq_fail;
> > +		}
> > +	}
> > +
> > +	rte_memcpy(&iqcmd->ih, &ihx, sizeof(uint64_t));
> > +	iqcmd->rptr = (uint64_t)si->rptr;
> > +	rte_memcpy(&iqcmd->irh, &si->irh, sizeof(uint64_t));
> > +
> > +	/* Swap FSZ(front data) here, to avoid swapping on OCTEON TX2 side
> > */
> > +	sdp_swap_8B_data(&iqcmd->rptr, 1);
> > +	sdp_swap_8B_data(&iqcmd->irh, 1);
> > +
> > +	otx2_sdp_dbg("After swapping");
> > +	otx2_sdp_dbg("Word0 [dptr]: 0x%016lx", (unsigned long)iqcmd-
> >dptr);
> > +	otx2_sdp_dbg("Word1 [ihtx]: 0x%016lx", (unsigned long)iqcmd->ih);
> > +	otx2_sdp_dbg("Word2 [rptr]: 0x%016lx", (unsigned long)iqcmd-
> >rptr);
> > +	otx2_sdp_dbg("Word3 [irh]: 0x%016lx", (unsigned long)iqcmd->irh);
> > +	otx2_sdp_dbg("Word4 [exhdr[0]]: 0x%016lx",
> > +			(unsigned long)iqcmd->exhdr[0]);
> > +
> > +	sdp_iqreq_add(iq, si->dptr, si->reqtype);
> > +
> > +	if (sdp_send_data(sdpvf, iq, iqcmd)) {
> > +		otx2_err("Data send failled :");
> > +		sdp_iqreq_delete(sdpvf, iq, iq->host_write_index);
> > +		goto enq_fail;
> > +	}
> > +
> > +	if (rte_atomic64_read(&iq->instr_pending) >= 1)
> > +		sdp_flush_iq(sdpvf, iq, 1 /*(iq->nb_desc / 2)*/);
> > +
> > +	/* Return no# of instructions posted successfully. */
> > +	return count;
> > +
> > +enq_fail:
> > +	return SDP_IQ_SEND_FAILED;
> > +}
> > +
> > diff --git a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> > b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> > index 4c28283..b9b7c0b 100644
> > --- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> > +++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
> > @@ -8,4 +8,43 @@
> >  #include <rte_byteorder.h>
> >  #include "otx2_ep_rawdev.h"
> >
> > +#define SDP_IQ_SEND_FAILED      (-1)
> > +#define SDP_IQ_SEND_SUCCESS     (0)
> > +
> > +
> > +static inline uint64_t
> > +sdp_endian_swap_8B(uint64_t _d)
> > +{
> > +	return ((((((uint64_t)(_d)) >>  0) & (uint64_t)0xff) << 56) |
> > +		(((((uint64_t)(_d)) >>  8) & (uint64_t)0xff) << 48) |
> > +		(((((uint64_t)(_d)) >> 16) & (uint64_t)0xff) << 40) |
> > +		(((((uint64_t)(_d)) >> 24) & (uint64_t)0xff) << 32) |
> > +		(((((uint64_t)(_d)) >> 32) & (uint64_t)0xff) << 24) |
> > +		(((((uint64_t)(_d)) >> 40) & (uint64_t)0xff) << 16) |
> > +		(((((uint64_t)(_d)) >> 48) & (uint64_t)0xff) <<  8) |
> > +		(((((uint64_t)(_d)) >> 56) & (uint64_t)0xff) <<  0)); }
> > +
> > +static inline void
> > +sdp_swap_8B_data(uint64_t *data, uint32_t blocks) {
> > +	/* Swap 8B blocks */
> > +	while (blocks) {
> > +		*data = sdp_endian_swap_8B(*data);
> > +		blocks--;
> > +		data++;
> > +	}
> > +}
> > +
> > +static inline uint32_t
> > +sdp_incr_index(uint32_t index, uint32_t count, uint32_t max) {
> > +	if ((index + count) >= max)
> > +		index = index + count - max;
> > +	else
> > +		index += count;
> > +
> > +	return index;
> > +}
> > +
> >  #endif /* _OTX2_EP_ENQDEQ_H_ */
> > diff --git a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> > b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> > index 3db5a74..22a6beb 100644
> > --- a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> > +++ b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
> > @@ -251,6 +251,7 @@
> >  	.dev_start      = sdp_rawdev_start,
> >  	.dev_stop       = sdp_rawdev_stop,
> >  	.dev_close      = sdp_rawdev_close,
> > +	.enqueue_bufs   = sdp_rawdev_enqueue,
> >  };
> >
> >  static int
> > diff --git a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> > b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> > index a01f48d..8fd06fb 100644
> > --- a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> > +++ b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
> > @@ -8,6 +8,10 @@
> >  #include <rte_byteorder.h>
> >  #include <rte_spinlock.h>
> >
> > +/* IQ instruction req types */
> > +#define SDP_REQTYPE_NONE             (0)
> > +#define SDP_REQTYPE_NORESP           (1)
> > +#define SDP_REQTYPE_NORESP_GATHER    (2)
> >
> >  /* Input Request Header format */
> >  struct sdp_instr_irh {
> > @@ -128,6 +132,13 @@ struct sdp_instr_list {  };
> >  #define SDP_IQREQ_LIST_SIZE	(sizeof(struct sdp_instr_list))
> >
> > +/* Input Queue statistics. Each input queue has four stats fields. */
> > +struct sdp_iq_stats {
> > +	uint64_t instr_posted; /* Instructions posted to this queue. */
> > +	uint64_t instr_processed; /* Instructions processed in this queue. */
> > +	uint64_t instr_dropped; /* Instructions that could not be processed
> > +*/ };
> > +
> >  /* Structure to define the configuration attributes for each Input
> > queue. */  struct sdp_iq_config {
> >  	/* Max number of IQs available */
> > @@ -195,6 +206,9 @@ struct sdp_instr_queue {
> >  	/* Number of instructions pending to be posted to OCTEON TX2. */
> >  	uint32_t fill_cnt;
> >
> > +	/* Statistics for this input queue. */
> > +	struct sdp_iq_stats stats;
> > +
> >  	/* DMA mapped base address of the input descriptor ring. */
> >  	uint64_t base_addr_dma;
> >
> > @@ -380,6 +394,8 @@ struct sdp_fn_list {
> >  	void (*setup_oq_regs)(struct sdp_device *sdpvf, uint32_t q_no);
> >
> >  	int (*setup_device_regs)(struct sdp_device *sdpvf);
> > +	uint32_t (*update_iq_read_idx)(struct sdp_instr_queue *iq);
> > +
> >  	void (*enable_io_queues)(struct sdp_device *sdpvf);
> >  	void (*disable_io_queues)(struct sdp_device *sdpvf);
> >
> > @@ -458,4 +474,8 @@ struct sdp_device {  int sdp_setup_oqs(struct
> > sdp_device *sdpvf, uint32_t oq_no);  int sdp_delete_oqs(struct
> > sdp_device *sdpvf, uint32_t oq_no);
> >
> > +int sdp_rawdev_enqueue(struct rte_rawdev *dev, struct rte_rawdev_buf
> > **buffers,
> > +		       unsigned int count, rte_rawdev_obj_t context);
> > +
> > +
> >  #endif /* _OTX2_EP_RAWDEV_H_ */
> > diff --git a/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> > b/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> > index 8e79fe8..c5c0bc3 100644
> > --- a/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> > +++ b/drivers/raw/octeontx2_ep/otx2_ep_vf.c
> > @@ -409,6 +409,28 @@
> >  		sdp_vf_disable_oq(sdpvf, q_no);
> >  }
> >
> > +static uint32_t
> > +sdp_vf_update_read_index(struct sdp_instr_queue *iq) {
> > +	uint32_t new_idx = rte_read32(iq->inst_cnt_reg);
> > +
> > +	/* The new instr cnt reg is a 32-bit counter that can roll over.
> > +	 * We have noted the counter's initial value at init time into
> > +	 * reset_instr_cnt
> > +	 */
> > +	if (iq->reset_instr_cnt < new_idx)
> > +		new_idx -= iq->reset_instr_cnt;
> > +	else
> > +		new_idx += (0xffffffff - iq->reset_instr_cnt) + 1;
> > +
> > +	/* Modulo of the new index with the IQ size will give us
> > +	 * the new index.
> > +	 */
> > +	new_idx %= iq->nb_desc;
> > +
> > +	return new_idx;
> > +}
> > +
> >  int
> >  sdp_vf_setup_device(struct sdp_device *sdpvf)  { @@ -436,6 +458,8 @@
> >  	sdpvf->fn_list.setup_oq_regs       = sdp_vf_setup_oq_regs;
> >
> >  	sdpvf->fn_list.setup_device_regs   = sdp_vf_setup_device_regs;
> > +	sdpvf->fn_list.update_iq_read_idx  = sdp_vf_update_read_index;
> > +
> >  	sdpvf->fn_list.enable_io_queues    = sdp_vf_enable_io_queues;
> >  	sdpvf->fn_list.disable_io_queues   = sdp_vf_disable_io_queues;
> >
> > --
> > 1.8.3.1

Thanks,
Mahipal
  

Patch

diff --git a/doc/guides/rawdevs/octeontx2_ep.rst b/doc/guides/rawdevs/octeontx2_ep.rst
index 2507fcf..39a7c29 100644
--- a/doc/guides/rawdevs/octeontx2_ep.rst
+++ b/doc/guides/rawdevs/octeontx2_ep.rst
@@ -68,3 +68,9 @@  The following code shows how the device is configured
 
    rte_rawdev_configure(dev_id, (rte_rawdev_obj_t)&rdev_info);
 
+Performing Data Transfer
+------------------------
+
+To perform data transfer using SDP VF EP rawdev devices use standard
+``rte_rawdev_enqueue_buffers()`` and ``rte_rawdev_dequeue_buffers()`` APIs.
+
diff --git a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
index 584b818..6910f08 100644
--- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
+++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.c
@@ -403,3 +403,245 @@ 
 	return -ENOMEM;
 }
 
+static inline void
+sdp_iqreq_delete(struct sdp_device *sdpvf,
+		struct sdp_instr_queue *iq, uint32_t idx)
+{
+	uint32_t reqtype;
+	void *buf;
+
+	buf     = iq->req_list[idx].buf;
+	reqtype = iq->req_list[idx].reqtype;
+
+	switch (reqtype) {
+	case SDP_REQTYPE_NORESP:
+		rte_mempool_put(sdpvf->enqdeq_mpool, buf);
+		otx2_sdp_dbg("IQ buffer freed at idx[%d]", idx);
+		break;
+
+	case SDP_REQTYPE_NORESP_GATHER:
+	case SDP_REQTYPE_NONE:
+	default:
+		otx2_info("This iqreq mode is not supported:%d", reqtype);
+
+	}
+
+	/* Reset the request list at this index */
+	iq->req_list[idx].buf = NULL;
+	iq->req_list[idx].reqtype = 0;
+}
+
+static inline void
+sdp_iqreq_add(struct sdp_instr_queue *iq, void *buf,
+		uint32_t reqtype)
+{
+	iq->req_list[iq->host_write_index].buf = buf;
+	iq->req_list[iq->host_write_index].reqtype = reqtype;
+
+	otx2_sdp_dbg("IQ buffer added at idx[%d]", iq->host_write_index);
+
+}
+
+static void
+sdp_flush_iq(struct sdp_device *sdpvf,
+		struct sdp_instr_queue *iq,
+		uint32_t pending_thresh __rte_unused)
+{
+	uint32_t instr_processed = 0;
+
+	rte_spinlock_lock(&iq->lock);
+
+	iq->otx_read_index = sdpvf->fn_list.update_iq_read_idx(iq);
+	while (iq->flush_index != iq->otx_read_index) {
+		/* Free the IQ data buffer to the pool */
+		sdp_iqreq_delete(sdpvf, iq, iq->flush_index);
+		iq->flush_index =
+			sdp_incr_index(iq->flush_index, 1, iq->nb_desc);
+
+		instr_processed++;
+	}
+
+	iq->stats.instr_processed = instr_processed;
+	rte_atomic64_sub(&iq->instr_pending, instr_processed);
+
+	rte_spinlock_unlock(&iq->lock);
+}
+
+static inline void
+sdp_ring_doorbell(struct sdp_device *sdpvf __rte_unused,
+		struct sdp_instr_queue *iq)
+{
+	otx2_write64(iq->fill_cnt, iq->doorbell_reg);
+
+	/* Make sure doorbell write goes through */
+	rte_cio_wmb();
+	iq->fill_cnt = 0;
+
+}
+
+static inline int
+post_iqcmd(struct sdp_instr_queue *iq, uint8_t *iqcmd)
+{
+	uint8_t *iqptr, cmdsize;
+
+	/* This ensures that the read index does not wrap around to
+	 * the same position if queue gets full before OCTEON TX2 could
+	 * fetch any instr.
+	 */
+	if (rte_atomic64_read(&iq->instr_pending) >=
+			      (int32_t)(iq->nb_desc - 1)) {
+		otx2_err("IQ is full, pending:%ld",
+			 (long)rte_atomic64_read(&iq->instr_pending));
+
+		return SDP_IQ_SEND_FAILED;
+	}
+
+	/* Copy cmd into iq */
+	cmdsize = ((iq->iqcmd_64B) ? 64 : 32);
+	iqptr   = iq->base_addr + (cmdsize * iq->host_write_index);
+
+	rte_memcpy(iqptr, iqcmd, cmdsize);
+
+	otx2_sdp_dbg("IQ cmd posted @ index:%d", iq->host_write_index);
+
+	/* Increment the host write index */
+	iq->host_write_index =
+		sdp_incr_index(iq->host_write_index, 1, iq->nb_desc);
+
+	iq->fill_cnt++;
+
+	/* Flush the command into memory. We need to be sure the data
+	 * is in memory before indicating that the instruction is
+	 * pending.
+	 */
+	rte_io_wmb();
+	rte_atomic64_inc(&iq->instr_pending);
+
+	/* SDP_IQ_SEND_SUCCESS */
+	return 0;
+}
+
+
+static int
+sdp_send_data(struct sdp_device *sdpvf,
+	      struct sdp_instr_queue *iq, void *cmd)
+{
+	uint32_t ret;
+
+	/* Lock this IQ command queue before posting instruction */
+	rte_spinlock_lock(&iq->post_lock);
+
+	/* Submit IQ command */
+	ret = post_iqcmd(iq, cmd);
+
+	if (ret == SDP_IQ_SEND_SUCCESS) {
+		sdp_ring_doorbell(sdpvf, iq);
+
+		iq->stats.instr_posted++;
+		otx2_sdp_dbg("Instr submit success posted: %ld\n",
+			     (long)iq->stats.instr_posted);
+
+	} else {
+		iq->stats.instr_dropped++;
+		otx2_err("Instr submit failled, dropped: %ld\n",
+			 (long)iq->stats.instr_dropped);
+
+	}
+
+	rte_spinlock_unlock(&iq->post_lock);
+
+	return ret;
+}
+
+
+/* Enqueue requests/packets to SDP IQ queue.
+ * returns number of requests enqueued successfully
+ */
+int
+sdp_rawdev_enqueue(struct rte_rawdev *rawdev,
+		   struct rte_rawdev_buf **buffers __rte_unused,
+		   unsigned int count, rte_rawdev_obj_t context)
+{
+	struct sdp_instr_64B *iqcmd;
+	struct sdp_instr_queue *iq;
+	struct sdp_soft_instr *si;
+	struct sdp_device *sdpvf;
+
+	struct sdp_instr_ih ihx;
+
+	sdpvf = (struct sdp_device *)rawdev->dev_private;
+	si = (struct sdp_soft_instr *)context;
+
+	iq = sdpvf->instr_queue[si->q_no];
+
+	if ((count > 1) || (count < 1)) {
+		otx2_err("This mode not supported: req[%d]", count);
+		goto enq_fail;
+	}
+
+	memset(&ihx, 0, sizeof(struct sdp_instr_ih));
+
+	iqcmd = &si->command;
+	memset(iqcmd, 0, sizeof(struct sdp_instr_64B));
+
+	iqcmd->dptr = (uint64_t)si->dptr;
+
+	/* Populate SDP IH */
+	ihx.pkind  = sdpvf->pkind;
+	ihx.fsz    = si->ih.fsz + 8; /* 8B for NIX IH */
+	ihx.gather = si->ih.gather;
+
+	/* Direct data instruction */
+	ihx.tlen   = si->ih.tlen + ihx.fsz;
+
+	switch (ihx.gather) {
+	case 0: /* Direct data instr */
+		ihx.tlen = si->ih.tlen + ihx.fsz;
+		break;
+
+	default: /* Gather */
+		switch (si->ih.gsz) {
+		case 0: /* Direct gather instr */
+			otx2_err("Direct Gather instr : not supported");
+			goto enq_fail;
+
+		default: /* Indirect gather instr */
+			otx2_err("Indirect Gather instr : not supported");
+			goto enq_fail;
+		}
+	}
+
+	rte_memcpy(&iqcmd->ih, &ihx, sizeof(uint64_t));
+	iqcmd->rptr = (uint64_t)si->rptr;
+	rte_memcpy(&iqcmd->irh, &si->irh, sizeof(uint64_t));
+
+	/* Swap FSZ(front data) here, to avoid swapping on OCTEON TX2 side */
+	sdp_swap_8B_data(&iqcmd->rptr, 1);
+	sdp_swap_8B_data(&iqcmd->irh, 1);
+
+	otx2_sdp_dbg("After swapping");
+	otx2_sdp_dbg("Word0 [dptr]: 0x%016lx", (unsigned long)iqcmd->dptr);
+	otx2_sdp_dbg("Word1 [ihtx]: 0x%016lx", (unsigned long)iqcmd->ih);
+	otx2_sdp_dbg("Word2 [rptr]: 0x%016lx", (unsigned long)iqcmd->rptr);
+	otx2_sdp_dbg("Word3 [irh]: 0x%016lx", (unsigned long)iqcmd->irh);
+	otx2_sdp_dbg("Word4 [exhdr[0]]: 0x%016lx",
+			(unsigned long)iqcmd->exhdr[0]);
+
+	sdp_iqreq_add(iq, si->dptr, si->reqtype);
+
+	if (sdp_send_data(sdpvf, iq, iqcmd)) {
+		otx2_err("Data send failled :");
+		sdp_iqreq_delete(sdpvf, iq, iq->host_write_index);
+		goto enq_fail;
+	}
+
+	if (rte_atomic64_read(&iq->instr_pending) >= 1)
+		sdp_flush_iq(sdpvf, iq, 1 /*(iq->nb_desc / 2)*/);
+
+	/* Return no# of instructions posted successfully. */
+	return count;
+
+enq_fail:
+	return SDP_IQ_SEND_FAILED;
+}
+
diff --git a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
index 4c28283..b9b7c0b 100644
--- a/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
+++ b/drivers/raw/octeontx2_ep/otx2_ep_enqdeq.h
@@ -8,4 +8,43 @@ 
 #include <rte_byteorder.h>
 #include "otx2_ep_rawdev.h"
 
+#define SDP_IQ_SEND_FAILED      (-1)
+#define SDP_IQ_SEND_SUCCESS     (0)
+
+
+static inline uint64_t
+sdp_endian_swap_8B(uint64_t _d)
+{
+	return ((((((uint64_t)(_d)) >>  0) & (uint64_t)0xff) << 56) |
+		(((((uint64_t)(_d)) >>  8) & (uint64_t)0xff) << 48) |
+		(((((uint64_t)(_d)) >> 16) & (uint64_t)0xff) << 40) |
+		(((((uint64_t)(_d)) >> 24) & (uint64_t)0xff) << 32) |
+		(((((uint64_t)(_d)) >> 32) & (uint64_t)0xff) << 24) |
+		(((((uint64_t)(_d)) >> 40) & (uint64_t)0xff) << 16) |
+		(((((uint64_t)(_d)) >> 48) & (uint64_t)0xff) <<  8) |
+		(((((uint64_t)(_d)) >> 56) & (uint64_t)0xff) <<  0));
+}
+
+static inline void
+sdp_swap_8B_data(uint64_t *data, uint32_t blocks)
+{
+	/* Swap 8B blocks */
+	while (blocks) {
+		*data = sdp_endian_swap_8B(*data);
+		blocks--;
+		data++;
+	}
+}
+
+static inline uint32_t
+sdp_incr_index(uint32_t index, uint32_t count, uint32_t max)
+{
+	if ((index + count) >= max)
+		index = index + count - max;
+	else
+		index += count;
+
+	return index;
+}
+
 #endif /* _OTX2_EP_ENQDEQ_H_ */
diff --git a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
index 3db5a74..22a6beb 100644
--- a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
+++ b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.c
@@ -251,6 +251,7 @@ 
 	.dev_start      = sdp_rawdev_start,
 	.dev_stop       = sdp_rawdev_stop,
 	.dev_close      = sdp_rawdev_close,
+	.enqueue_bufs   = sdp_rawdev_enqueue,
 };
 
 static int
diff --git a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
index a01f48d..8fd06fb 100644
--- a/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
+++ b/drivers/raw/octeontx2_ep/otx2_ep_rawdev.h
@@ -8,6 +8,10 @@ 
 #include <rte_byteorder.h>
 #include <rte_spinlock.h>
 
+/* IQ instruction req types */
+#define SDP_REQTYPE_NONE             (0)
+#define SDP_REQTYPE_NORESP           (1)
+#define SDP_REQTYPE_NORESP_GATHER    (2)
 
 /* Input Request Header format */
 struct sdp_instr_irh {
@@ -128,6 +132,13 @@  struct sdp_instr_list {
 };
 #define SDP_IQREQ_LIST_SIZE	(sizeof(struct sdp_instr_list))
 
+/* Input Queue statistics. Each input queue has four stats fields. */
+struct sdp_iq_stats {
+	uint64_t instr_posted; /* Instructions posted to this queue. */
+	uint64_t instr_processed; /* Instructions processed in this queue. */
+	uint64_t instr_dropped; /* Instructions that could not be processed */
+};
+
 /* Structure to define the configuration attributes for each Input queue. */
 struct sdp_iq_config {
 	/* Max number of IQs available */
@@ -195,6 +206,9 @@  struct sdp_instr_queue {
 	/* Number of instructions pending to be posted to OCTEON TX2. */
 	uint32_t fill_cnt;
 
+	/* Statistics for this input queue. */
+	struct sdp_iq_stats stats;
+
 	/* DMA mapped base address of the input descriptor ring. */
 	uint64_t base_addr_dma;
 
@@ -380,6 +394,8 @@  struct sdp_fn_list {
 	void (*setup_oq_regs)(struct sdp_device *sdpvf, uint32_t q_no);
 
 	int (*setup_device_regs)(struct sdp_device *sdpvf);
+	uint32_t (*update_iq_read_idx)(struct sdp_instr_queue *iq);
+
 	void (*enable_io_queues)(struct sdp_device *sdpvf);
 	void (*disable_io_queues)(struct sdp_device *sdpvf);
 
@@ -458,4 +474,8 @@  struct sdp_device {
 int sdp_setup_oqs(struct sdp_device *sdpvf, uint32_t oq_no);
 int sdp_delete_oqs(struct sdp_device *sdpvf, uint32_t oq_no);
 
+int sdp_rawdev_enqueue(struct rte_rawdev *dev, struct rte_rawdev_buf **buffers,
+		       unsigned int count, rte_rawdev_obj_t context);
+
+
 #endif /* _OTX2_EP_RAWDEV_H_ */
diff --git a/drivers/raw/octeontx2_ep/otx2_ep_vf.c b/drivers/raw/octeontx2_ep/otx2_ep_vf.c
index 8e79fe8..c5c0bc3 100644
--- a/drivers/raw/octeontx2_ep/otx2_ep_vf.c
+++ b/drivers/raw/octeontx2_ep/otx2_ep_vf.c
@@ -409,6 +409,28 @@ 
 		sdp_vf_disable_oq(sdpvf, q_no);
 }
 
+static uint32_t
+sdp_vf_update_read_index(struct sdp_instr_queue *iq)
+{
+	uint32_t new_idx = rte_read32(iq->inst_cnt_reg);
+
+	/* The new instr cnt reg is a 32-bit counter that can roll over.
+	 * We have noted the counter's initial value at init time into
+	 * reset_instr_cnt
+	 */
+	if (iq->reset_instr_cnt < new_idx)
+		new_idx -= iq->reset_instr_cnt;
+	else
+		new_idx += (0xffffffff - iq->reset_instr_cnt) + 1;
+
+	/* Modulo of the new index with the IQ size will give us
+	 * the new index.
+	 */
+	new_idx %= iq->nb_desc;
+
+	return new_idx;
+}
+
 int
 sdp_vf_setup_device(struct sdp_device *sdpvf)
 {
@@ -436,6 +458,8 @@ 
 	sdpvf->fn_list.setup_oq_regs       = sdp_vf_setup_oq_regs;
 
 	sdpvf->fn_list.setup_device_regs   = sdp_vf_setup_device_regs;
+	sdpvf->fn_list.update_iq_read_idx  = sdp_vf_update_read_index;
+
 	sdpvf->fn_list.enable_io_queues    = sdp_vf_enable_io_queues;
 	sdpvf->fn_list.disable_io_queues   = sdp_vf_disable_io_queues;