[v4,2/8] dma/cnxk: changes for dmadev driver

Message ID 20230821174942.3165191-2-amitprakashs@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Jerin Jacob
Headers
Series [v4,1/8] common/cnxk: use unique name for DPI memzone |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Amit Prakash Shukla Aug. 21, 2023, 5:49 p.m. UTC
  Dmadev driver changes to align with dpdk spec.

Fixes: 681851b347ad ("dma/cnxk: support CN10K DMA engine")
Cc: stable@dpdk.org

Signed-off-by: Amit Prakash Shukla <amitprakashs@marvell.com>
---
v2:
- Fix for bugs observed in v1.
- Squashed few commits.

v3:
- Resolved review suggestions.
- Code improvement.

v4:
- Resolved checkpatch warnings.

 drivers/dma/cnxk/cnxk_dmadev.c | 464 ++++++++++++++++++++-------------
 drivers/dma/cnxk/cnxk_dmadev.h |  24 +-
 2 files changed, 294 insertions(+), 194 deletions(-)
  

Patch

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index a6f4a31e0e..a0152fc6df 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -7,68 +7,76 @@ 
 
 #include <bus_pci_driver.h>
 #include <rte_common.h>
+#include <rte_dmadev.h>
+#include <rte_dmadev_pmd.h>
 #include <rte_eal.h>
 #include <rte_lcore.h>
 #include <rte_mempool.h>
 #include <rte_pci.h>
-#include <rte_dmadev.h>
-#include <rte_dmadev_pmd.h>
 
-#include <roc_api.h>
 #include <cnxk_dmadev.h>
 
 static int
-cnxk_dmadev_info_get(const struct rte_dma_dev *dev,
-		     struct rte_dma_info *dev_info, uint32_t size)
+cnxk_dmadev_info_get(const struct rte_dma_dev *dev, struct rte_dma_info *dev_info, uint32_t size)
 {
 	RTE_SET_USED(dev);
 	RTE_SET_USED(size);
 
 	dev_info->max_vchans = 1;
 	dev_info->nb_vchans = 1;
-	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM |
-		RTE_DMA_CAPA_MEM_TO_DEV | RTE_DMA_CAPA_DEV_TO_MEM |
-		RTE_DMA_CAPA_DEV_TO_DEV | RTE_DMA_CAPA_OPS_COPY |
-		RTE_DMA_CAPA_OPS_COPY_SG;
+	dev_info->dev_capa = RTE_DMA_CAPA_MEM_TO_MEM | RTE_DMA_CAPA_MEM_TO_DEV |
+			     RTE_DMA_CAPA_DEV_TO_MEM | RTE_DMA_CAPA_DEV_TO_DEV |
+			     RTE_DMA_CAPA_OPS_COPY | RTE_DMA_CAPA_OPS_COPY_SG;
 	dev_info->max_desc = DPI_MAX_DESC;
-	dev_info->min_desc = 1;
+	dev_info->min_desc = 2;
 	dev_info->max_sges = DPI_MAX_POINTER;
 
 	return 0;
 }
 
 static int
-cnxk_dmadev_configure(struct rte_dma_dev *dev,
-		      const struct rte_dma_conf *conf, uint32_t conf_sz)
+cnxk_dmadev_configure(struct rte_dma_dev *dev, const struct rte_dma_conf *conf, uint32_t conf_sz)
 {
 	struct cnxk_dpi_vf_s *dpivf = NULL;
 	int rc = 0;
 
 	RTE_SET_USED(conf);
-	RTE_SET_USED(conf);
-	RTE_SET_USED(conf_sz);
 	RTE_SET_USED(conf_sz);
+
 	dpivf = dev->fp_obj->dev_private;
+
+	if (dpivf->flag & CNXK_DPI_DEV_CONFIG)
+		return rc;
+
 	rc = roc_dpi_configure(&dpivf->rdpi);
-	if (rc < 0)
+	if (rc < 0) {
 		plt_err("DMA configure failed err = %d", rc);
+		goto done;
+	}
 
+	dpivf->flag |= CNXK_DPI_DEV_CONFIG;
+
+done:
 	return rc;
 }
 
 static int
 cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
-			const struct rte_dma_vchan_conf *conf,
-			uint32_t conf_sz)
+			const struct rte_dma_vchan_conf *conf, uint32_t conf_sz)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
-	struct cnxk_dpi_compl_s *comp_data;
-	union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
+	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf;
+	union dpi_instr_hdr_s *header = &dpi_conf->hdr;
+	uint16_t max_desc;
+	uint32_t size;
 	int i;
 
 	RTE_SET_USED(vchan);
 	RTE_SET_USED(conf_sz);
 
+	if (dpivf->flag & CNXK_DPI_VCHAN_CONFIG)
+		return 0;
+
 	header->cn9k.pt = DPI_HDR_PT_ZBW_CA;
 
 	switch (conf->direction) {
@@ -96,35 +104,54 @@  cnxk_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 		header->cn9k.fport = conf->dst_port.pcie.coreid;
 	};
 
-	for (i = 0; i < conf->nb_desc; i++) {
-		comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
-		if (comp_data == NULL) {
-			plt_err("Failed to allocate for comp_data");
-			return -ENOMEM;
-		}
-		comp_data->cdata = DPI_REQ_CDATA;
-		dpivf->conf.c_desc.compl_ptr[i] = comp_data;
-	};
-	dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
-	dpivf->conf.c_desc.head = 0;
-	dpivf->conf.c_desc.tail = 0;
+	max_desc = conf->nb_desc;
+	if (!rte_is_power_of_2(max_desc))
+		max_desc = rte_align32pow2(max_desc);
+
+	if (max_desc > DPI_MAX_DESC)
+		max_desc = DPI_MAX_DESC;
+
+	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
+
+	if (dpi_conf->c_desc.compl_ptr == NULL) {
+		plt_err("Failed to allocate for comp_data");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < max_desc; i++) {
+		dpi_conf->c_desc.compl_ptr[i] =
+			rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
+		dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;
+	}
+
+	dpi_conf->c_desc.max_cnt = (max_desc - 1);
+	dpi_conf->c_desc.head = 0;
+	dpi_conf->c_desc.tail = 0;
+	dpivf->pnum_words = 0;
+	dpivf->pending = 0;
+	dpivf->flag |= CNXK_DPI_VCHAN_CONFIG;
 
 	return 0;
 }
 
 static int
 cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
-			 const struct rte_dma_vchan_conf *conf,
-			 uint32_t conf_sz)
+			 const struct rte_dma_vchan_conf *conf, uint32_t conf_sz)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
-	struct cnxk_dpi_compl_s *comp_data;
-	union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
+	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf;
+	union dpi_instr_hdr_s *header = &dpi_conf->hdr;
+	uint16_t max_desc;
+	uint32_t size;
 	int i;
 
 	RTE_SET_USED(vchan);
 	RTE_SET_USED(conf_sz);
 
+	if (dpivf->flag & CNXK_DPI_VCHAN_CONFIG)
+		return 0;
+
 	header->cn10k.pt = DPI_HDR_PT_ZBW_CA;
 
 	switch (conf->direction) {
@@ -152,18 +179,33 @@  cn10k_dmadev_vchan_setup(struct rte_dma_dev *dev, uint16_t vchan,
 		header->cn10k.fport = conf->dst_port.pcie.coreid;
 	};
 
-	for (i = 0; i < conf->nb_desc; i++) {
-		comp_data = rte_zmalloc(NULL, sizeof(*comp_data), 0);
-		if (comp_data == NULL) {
-			plt_err("Failed to allocate for comp_data");
-			return -ENOMEM;
-		}
-		comp_data->cdata = DPI_REQ_CDATA;
-		dpivf->conf.c_desc.compl_ptr[i] = comp_data;
-	};
-	dpivf->conf.c_desc.max_cnt = DPI_MAX_DESC;
-	dpivf->conf.c_desc.head = 0;
-	dpivf->conf.c_desc.tail = 0;
+	max_desc = conf->nb_desc;
+	if (!rte_is_power_of_2(max_desc))
+		max_desc = rte_align32pow2(max_desc);
+
+	if (max_desc > DPI_MAX_DESC)
+		max_desc = DPI_MAX_DESC;
+
+	size = (max_desc * sizeof(struct cnxk_dpi_compl_s *));
+	dpi_conf->c_desc.compl_ptr = rte_zmalloc(NULL, size, 0);
+
+	if (dpi_conf->c_desc.compl_ptr == NULL) {
+		plt_err("Failed to allocate for comp_data");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < max_desc; i++) {
+		dpi_conf->c_desc.compl_ptr[i] =
+			rte_zmalloc(NULL, sizeof(struct cnxk_dpi_compl_s), 0);
+		dpi_conf->c_desc.compl_ptr[i]->cdata = DPI_REQ_CDATA;
+	}
+
+	dpi_conf->c_desc.max_cnt = (max_desc - 1);
+	dpi_conf->c_desc.head = 0;
+	dpi_conf->c_desc.tail = 0;
+	dpivf->pnum_words = 0;
+	dpivf->pending = 0;
+	dpivf->flag |= CNXK_DPI_VCHAN_CONFIG;
 
 	return 0;
 }
@@ -173,10 +215,16 @@  cnxk_dmadev_start(struct rte_dma_dev *dev)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
 
+	if (dpivf->flag & CNXK_DPI_DEV_START)
+		return 0;
+
 	dpivf->desc_idx = 0;
-	dpivf->num_words = 0;
+	dpivf->pending = 0;
+	dpivf->pnum_words = 0;
 	roc_dpi_enable(&dpivf->rdpi);
 
+	dpivf->flag |= CNXK_DPI_DEV_START;
+
 	return 0;
 }
 
@@ -187,6 +235,8 @@  cnxk_dmadev_stop(struct rte_dma_dev *dev)
 
 	roc_dpi_disable(&dpivf->rdpi);
 
+	dpivf->flag &= ~CNXK_DPI_DEV_START;
+
 	return 0;
 }
 
@@ -198,6 +248,8 @@  cnxk_dmadev_close(struct rte_dma_dev *dev)
 	roc_dpi_disable(&dpivf->rdpi);
 	roc_dpi_dev_fini(&dpivf->rdpi);
 
+	dpivf->flag = 0;
+
 	return 0;
 }
 
@@ -206,8 +258,7 @@  __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
 {
 	uint64_t *ptr = dpi->chunk_base;
 
-	if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) ||
-	    cmds == NULL)
+	if ((cmd_count < DPI_MIN_CMD_SIZE) || (cmd_count > DPI_MAX_CMD_SIZE) || cmds == NULL)
 		return -EINVAL;
 
 	/*
@@ -223,11 +274,15 @@  __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
 		int count;
 		uint64_t *new_buff = dpi->chunk_next;
 
-		dpi->chunk_next =
-			(void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
+		dpi->chunk_next = (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
 		if (!dpi->chunk_next) {
-			plt_err("Failed to alloc next buffer from NPA");
-			return -ENOMEM;
+			plt_dp_dbg("Failed to alloc next buffer from NPA");
+
+			/* NPA failed to allocate a buffer. Restoring chunk_next
+			 * to its original address.
+			 */
+			dpi->chunk_next = new_buff;
+			return -ENOSPC;
 		}
 
 		/*
@@ -261,13 +316,17 @@  __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
 		/* queue index may be greater than pool size */
 		if (dpi->chunk_head >= dpi->pool_size_m1) {
 			new_buff = dpi->chunk_next;
-			dpi->chunk_next =
-				(void *)roc_npa_aura_op_alloc(dpi->aura_handle,
-							      0);
+			dpi->chunk_next = (void *)roc_npa_aura_op_alloc(dpi->aura_handle, 0);
 			if (!dpi->chunk_next) {
-				plt_err("Failed to alloc next buffer from NPA");
-				return -ENOMEM;
+				plt_dp_dbg("Failed to alloc next buffer from NPA");
+
+				/* NPA failed to allocate a buffer. Restoring chunk_next
+				 * to its original address.
+				 */
+				dpi->chunk_next = new_buff;
+				return -ENOSPC;
 			}
+
 			/* Write next buffer address */
 			*ptr = (uint64_t)new_buff;
 			dpi->chunk_base = new_buff;
@@ -279,12 +338,13 @@  __dpi_queue_write(struct roc_dpi *dpi, uint64_t *cmds, int cmd_count)
 }
 
 static int
-cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
-		 rte_iova_t dst, uint32_t length, uint64_t flags)
+cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst, uint32_t length,
+		 uint64_t flags)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
 	struct cnxk_dpi_compl_s *comp_ptr;
+	uint64_t cmd[DPI_MAX_CMD_SIZE];
 	rte_iova_t fptr, lptr;
 	int num_words = 0;
 	int rc;
@@ -292,9 +352,8 @@  cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
 	RTE_SET_USED(vchan);
 
 	comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-	comp_ptr->cdata = DPI_REQ_CDATA;
 	header->cn9k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpivf->conf.c_desc);
+	STRM_INC(dpivf->conf.c_desc, tail);
 
 	header->cn9k.nfst = 1;
 	header->cn9k.nlst = 1;
@@ -311,103 +370,110 @@  cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
 		lptr = dst;
 	}
 
-	dpivf->cmd[0] = header->u[0];
-	dpivf->cmd[1] = header->u[1];
-	dpivf->cmd[2] = header->u[2];
+	cmd[0] = header->u[0];
+	cmd[1] = header->u[1];
+	cmd[2] = header->u[2];
 	/* word3 is always 0 */
 	num_words += 4;
-	dpivf->cmd[num_words++] = length;
-	dpivf->cmd[num_words++] = fptr;
-	dpivf->cmd[num_words++] = length;
-	dpivf->cmd[num_words++] = lptr;
-
-	rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-	if (!rc) {
-		if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-			rte_wmb();
-			plt_write64(num_words,
-				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-			dpivf->stats.submitted++;
-		}
-		dpivf->num_words += num_words;
+	cmd[num_words++] = length;
+	cmd[num_words++] = fptr;
+	cmd[num_words++] = length;
+	cmd[num_words++] = lptr;
+
+	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	if (unlikely(rc)) {
+		STRM_DEC(dpivf->conf.c_desc, tail);
+		return rc;
 	}
 
-	return dpivf->desc_idx++;
+	rte_wmb();
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+		dpivf->stats.submitted++;
+	} else {
+		dpivf->pnum_words += num_words;
+		dpivf->pending++;
+	}
+
+	return (dpivf->desc_idx++);
 }
 
 static int
-cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan,
-		    const struct rte_dma_sge *src,
-		    const struct rte_dma_sge *dst,
-		    uint16_t nb_src, uint16_t nb_dst, uint64_t flags)
+cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,
+		    const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst, uint64_t flags)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
 	const struct rte_dma_sge *fptr, *lptr;
 	struct cnxk_dpi_compl_s *comp_ptr;
+	uint64_t cmd[DPI_MAX_CMD_SIZE];
 	int num_words = 0;
 	int i, rc;
 
 	RTE_SET_USED(vchan);
 
 	comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-	comp_ptr->cdata = DPI_REQ_CDATA;
 	header->cn9k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpivf->conf.c_desc);
+	STRM_INC(dpivf->conf.c_desc, tail);
 
 	/*
 	 * For inbound case, src pointers are last pointers.
 	 * For all other cases, src pointers are first pointers.
 	 */
 	if (header->cn9k.xtype == DPI_XTYPE_INBOUND) {
-		header->cn9k.nfst = nb_dst & 0xf;
-		header->cn9k.nlst = nb_src & 0xf;
+		header->cn9k.nfst = nb_dst & DPI_MAX_POINTER;
+		header->cn9k.nlst = nb_src & DPI_MAX_POINTER;
 		fptr = &dst[0];
 		lptr = &src[0];
 	} else {
-		header->cn9k.nfst = nb_src & 0xf;
-		header->cn9k.nlst = nb_dst & 0xf;
+		header->cn9k.nfst = nb_src & DPI_MAX_POINTER;
+		header->cn9k.nlst = nb_dst & DPI_MAX_POINTER;
 		fptr = &src[0];
 		lptr = &dst[0];
 	}
 
-	dpivf->cmd[0] = header->u[0];
-	dpivf->cmd[1] = header->u[1];
-	dpivf->cmd[2] = header->u[2];
+	cmd[0] = header->u[0];
+	cmd[1] = header->u[1];
+	cmd[2] = header->u[2];
 	num_words += 4;
 	for (i = 0; i < header->cn9k.nfst; i++) {
-		dpivf->cmd[num_words++] = (uint64_t)fptr->length;
-		dpivf->cmd[num_words++] = fptr->addr;
+		cmd[num_words++] = (uint64_t)fptr->length;
+		cmd[num_words++] = fptr->addr;
 		fptr++;
 	}
 
 	for (i = 0; i < header->cn9k.nlst; i++) {
-		dpivf->cmd[num_words++] = (uint64_t)lptr->length;
-		dpivf->cmd[num_words++] = lptr->addr;
+		cmd[num_words++] = (uint64_t)lptr->length;
+		cmd[num_words++] = lptr->addr;
 		lptr++;
 	}
 
-	rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-	if (!rc) {
-		if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-			rte_wmb();
-			plt_write64(num_words,
-				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-			dpivf->stats.submitted += nb_src;
-		}
-		dpivf->num_words += num_words;
+	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	if (unlikely(rc)) {
+		STRM_DEC(dpivf->conf.c_desc, tail);
+		return rc;
 	}
 
-	return (rc < 0) ? rc : dpivf->desc_idx++;
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		rte_wmb();
+		plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+		dpivf->stats.submitted += nb_src;
+	} else {
+		dpivf->pnum_words += num_words;
+		dpivf->pending++;
+	}
+
+	return (dpivf->desc_idx++);
 }
 
 static int
-cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
-		  rte_iova_t dst, uint32_t length, uint64_t flags)
+cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t dst,
+		  uint32_t length, uint64_t flags)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
 	struct cnxk_dpi_compl_s *comp_ptr;
+	uint64_t cmd[DPI_MAX_CMD_SIZE];
 	rte_iova_t fptr, lptr;
 	int num_words = 0;
 	int rc;
@@ -415,9 +481,8 @@  cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
 	RTE_SET_USED(vchan);
 
 	comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-	comp_ptr->cdata = DPI_REQ_CDATA;
 	header->cn10k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpivf->conf.c_desc);
+	STRM_INC(dpivf->conf.c_desc, tail);
 
 	header->cn10k.nfst = 1;
 	header->cn10k.nlst = 1;
@@ -425,131 +490,140 @@  cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src,
 	fptr = src;
 	lptr = dst;
 
-	dpivf->cmd[0] = header->u[0];
-	dpivf->cmd[1] = header->u[1];
-	dpivf->cmd[2] = header->u[2];
+	cmd[0] = header->u[0];
+	cmd[1] = header->u[1];
+	cmd[2] = header->u[2];
 	/* word3 is always 0 */
 	num_words += 4;
-	dpivf->cmd[num_words++] = length;
-	dpivf->cmd[num_words++] = fptr;
-	dpivf->cmd[num_words++] = length;
-	dpivf->cmd[num_words++] = lptr;
-
-	rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-	if (!rc) {
-		if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-			rte_wmb();
-			plt_write64(num_words,
-				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-			dpivf->stats.submitted++;
-		}
-		dpivf->num_words += num_words;
+	cmd[num_words++] = length;
+	cmd[num_words++] = fptr;
+	cmd[num_words++] = length;
+	cmd[num_words++] = lptr;
+
+	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	if (unlikely(rc)) {
+		STRM_DEC(dpivf->conf.c_desc, tail);
+		return rc;
+	}
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		rte_wmb();
+		plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+		dpivf->stats.submitted++;
+	} else {
+		dpivf->pnum_words += num_words;
+		dpivf->pending++;
 	}
 
 	return dpivf->desc_idx++;
 }
 
 static int
-cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan,
-		     const struct rte_dma_sge *src,
-		     const struct rte_dma_sge *dst, uint16_t nb_src,
-		     uint16_t nb_dst, uint64_t flags)
+cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge *src,
+		     const struct rte_dma_sge *dst, uint16_t nb_src, uint16_t nb_dst,
+		     uint64_t flags)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
 	union dpi_instr_hdr_s *header = &dpivf->conf.hdr;
 	const struct rte_dma_sge *fptr, *lptr;
 	struct cnxk_dpi_compl_s *comp_ptr;
+	uint64_t cmd[DPI_MAX_CMD_SIZE];
 	int num_words = 0;
 	int i, rc;
 
 	RTE_SET_USED(vchan);
 
 	comp_ptr = dpivf->conf.c_desc.compl_ptr[dpivf->conf.c_desc.tail];
-	comp_ptr->cdata = DPI_REQ_CDATA;
 	header->cn10k.ptr = (uint64_t)comp_ptr;
-	STRM_INC(dpivf->conf.c_desc);
+	STRM_INC(dpivf->conf.c_desc, tail);
 
-	header->cn10k.nfst = nb_src & 0xf;
-	header->cn10k.nlst = nb_dst & 0xf;
+	header->cn10k.nfst = nb_src & DPI_MAX_POINTER;
+	header->cn10k.nlst = nb_dst & DPI_MAX_POINTER;
 	fptr = &src[0];
 	lptr = &dst[0];
 
-	dpivf->cmd[0] = header->u[0];
-	dpivf->cmd[1] = header->u[1];
-	dpivf->cmd[2] = header->u[2];
+	cmd[0] = header->u[0];
+	cmd[1] = header->u[1];
+	cmd[2] = header->u[2];
 	num_words += 4;
 
 	for (i = 0; i < header->cn10k.nfst; i++) {
-		dpivf->cmd[num_words++] = (uint64_t)fptr->length;
-		dpivf->cmd[num_words++] = fptr->addr;
+		cmd[num_words++] = (uint64_t)fptr->length;
+		cmd[num_words++] = fptr->addr;
 		fptr++;
 	}
 
 	for (i = 0; i < header->cn10k.nlst; i++) {
-		dpivf->cmd[num_words++] = (uint64_t)lptr->length;
-		dpivf->cmd[num_words++] = lptr->addr;
+		cmd[num_words++] = (uint64_t)lptr->length;
+		cmd[num_words++] = lptr->addr;
 		lptr++;
 	}
 
-	rc = __dpi_queue_write(&dpivf->rdpi, dpivf->cmd, num_words);
-	if (!rc) {
-		if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
-			rte_wmb();
-			plt_write64(num_words,
-				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-			dpivf->stats.submitted += nb_src;
-		}
-		dpivf->num_words += num_words;
+	rc = __dpi_queue_write(&dpivf->rdpi, cmd, num_words);
+	if (unlikely(rc)) {
+		STRM_DEC(dpivf->conf.c_desc, tail);
+		return rc;
+	}
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
+		rte_wmb();
+		plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+		dpivf->stats.submitted += nb_src;
+	} else {
+		dpivf->pnum_words += num_words;
+		dpivf->pending++;
 	}
 
-	return (rc < 0) ? rc : dpivf->desc_idx++;
+	return (dpivf->desc_idx++);
 }
 
 static uint16_t
-cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
-		      uint16_t *last_idx, bool *has_error)
+cnxk_dmadev_completed(void *dev_private, uint16_t vchan, const uint16_t nb_cpls, uint16_t *last_idx,
+		      bool *has_error)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
+	struct cnxk_dpi_cdesc_data_s *c_desc = &dpivf->conf.c_desc;
+	struct cnxk_dpi_compl_s *comp_ptr;
 	int cnt;
 
 	RTE_SET_USED(vchan);
 
-	if (dpivf->stats.submitted == dpivf->stats.completed)
-		return 0;
-
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		struct cnxk_dpi_compl_s *comp_ptr =
-			dpivf->conf.c_desc.compl_ptr[cnt];
+		comp_ptr = c_desc->compl_ptr[c_desc->head];
 
 		if (comp_ptr->cdata) {
 			if (comp_ptr->cdata == DPI_REQ_CDATA)
 				break;
 			*has_error = 1;
 			dpivf->stats.errors++;
+			STRM_INC(*c_desc, head);
 			break;
 		}
+
+		comp_ptr->cdata = DPI_REQ_CDATA;
+		STRM_INC(*c_desc, head);
 	}
 
-	*last_idx = cnt - 1;
-	dpivf->conf.c_desc.tail = cnt;
 	dpivf->stats.completed += cnt;
+	*last_idx = dpivf->stats.completed - 1;
 
 	return cnt;
 }
 
 static uint16_t
-cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan,
-			     const uint16_t nb_cpls, uint16_t *last_idx,
-			     enum rte_dma_status_code *status)
+cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan, const uint16_t nb_cpls,
+			     uint16_t *last_idx, enum rte_dma_status_code *status)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
+	struct cnxk_dpi_cdesc_data_s *c_desc = &dpivf->conf.c_desc;
+	struct cnxk_dpi_compl_s *comp_ptr;
 	int cnt;
 
 	RTE_SET_USED(vchan);
 	RTE_SET_USED(last_idx);
+
 	for (cnt = 0; cnt < nb_cpls; cnt++) {
-		struct cnxk_dpi_compl_s *comp_ptr =
-			dpivf->conf.c_desc.compl_ptr[cnt];
+		comp_ptr = c_desc->compl_ptr[c_desc->head];
 		status[cnt] = comp_ptr->cdata;
 		if (status[cnt]) {
 			if (status[cnt] == DPI_REQ_CDATA)
@@ -557,30 +631,52 @@  cnxk_dmadev_completed_status(void *dev_private, uint16_t vchan,
 
 			dpivf->stats.errors++;
 		}
+		comp_ptr->cdata = DPI_REQ_CDATA;
+		STRM_INC(*c_desc, head);
 	}
 
-	*last_idx = cnt - 1;
-	dpivf->conf.c_desc.tail = 0;
 	dpivf->stats.completed += cnt;
+	*last_idx = dpivf->stats.completed - 1;
 
 	return cnt;
 }
 
+static uint16_t
+cnxk_damdev_burst_capacity(const void *dev_private, uint16_t vchan)
+{
+	const struct cnxk_dpi_vf_s *dpivf = (const struct cnxk_dpi_vf_s *)dev_private;
+	uint16_t burst_cap;
+
+	RTE_SET_USED(vchan);
+
+	burst_cap = dpivf->conf.c_desc.max_cnt -
+		    ((dpivf->stats.submitted - dpivf->stats.completed) + dpivf->pending) + 1;
+
+	return burst_cap;
+}
+
 static int
 cnxk_dmadev_submit(void *dev_private, uint16_t vchan __rte_unused)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
+	uint32_t num_words = dpivf->pnum_words;
+
+	if (!dpivf->pnum_words)
+		return 0;
 
 	rte_wmb();
-	plt_write64(dpivf->num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-	dpivf->stats.submitted++;
+	plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
+
+	dpivf->stats.submitted += dpivf->pending;
+	dpivf->pnum_words = 0;
+	dpivf->pending = 0;
 
 	return 0;
 }
 
 static int
-cnxk_stats_get(const struct rte_dma_dev *dev, uint16_t vchan,
-	       struct rte_dma_stats *rte_stats, uint32_t size)
+cnxk_stats_get(const struct rte_dma_dev *dev, uint16_t vchan, struct rte_dma_stats *rte_stats,
+	       uint32_t size)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev->fp_obj->dev_private;
 	struct rte_dma_stats *stats = &dpivf->stats;
@@ -628,8 +724,7 @@  static const struct rte_dma_dev_ops cnxk_dmadev_ops = {
 };
 
 static int
-cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
-		  struct rte_pci_device *pci_dev)
+cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev)
 {
 	struct cnxk_dpi_vf_s *dpivf = NULL;
 	char name[RTE_DEV_NAME_MAX_LEN];
@@ -648,8 +743,7 @@  cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
 	memset(name, 0, sizeof(name));
 	rte_pci_device_name(&pci_dev->addr, name, sizeof(name));
 
-	dmadev = rte_dma_pmd_allocate(name, pci_dev->device.numa_node,
-				      sizeof(*dpivf));
+	dmadev = rte_dma_pmd_allocate(name, pci_dev->device.numa_node, sizeof(*dpivf));
 	if (dmadev == NULL) {
 		plt_err("dma device allocation failed for %s", name);
 		return -ENOMEM;
@@ -666,6 +760,7 @@  cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
 	dmadev->fp_obj->submit = cnxk_dmadev_submit;
 	dmadev->fp_obj->completed = cnxk_dmadev_completed;
 	dmadev->fp_obj->completed_status = cnxk_dmadev_completed_status;
+	dmadev->fp_obj->burst_capacity = cnxk_damdev_burst_capacity;
 
 	if (pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CN10KA ||
 	    pci_dev->id.subsystem_device_id == PCI_SUBSYSTEM_DEVID_CNF10KA ||
@@ -682,6 +777,8 @@  cnxk_dmadev_probe(struct rte_pci_driver *pci_drv __rte_unused,
 	if (rc < 0)
 		goto err_out_free;
 
+	dmadev->state = RTE_DMA_DEV_READY;
+
 	return 0;
 
 err_out_free:
@@ -703,20 +800,17 @@  cnxk_dmadev_remove(struct rte_pci_device *pci_dev)
 }
 
 static const struct rte_pci_id cnxk_dma_pci_map[] = {
-	{
-		RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM,
-			       PCI_DEVID_CNXK_DPI_VF)
-	},
+	{RTE_PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CNXK_DPI_VF)},
 	{
 		.vendor_id = 0,
 	},
 };
 
 static struct rte_pci_driver cnxk_dmadev = {
-	.id_table  = cnxk_dma_pci_map,
+	.id_table = cnxk_dma_pci_map,
 	.drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_NEED_IOVA_AS_VA,
-	.probe     = cnxk_dmadev_probe,
-	.remove    = cnxk_dmadev_remove,
+	.probe = cnxk_dmadev_probe,
+	.remove = cnxk_dmadev_remove,
 };
 
 RTE_PMD_REGISTER_PCI(cnxk_dmadev_pci_driver, cnxk_dmadev);
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index e1f5694f50..9563295af0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -4,16 +4,21 @@ 
 #ifndef CNXK_DMADEV_H
 #define CNXK_DMADEV_H
 
-#define DPI_MAX_POINTER		15
-#define DPI_QUEUE_STOP		0x0
-#define DPI_QUEUE_START		0x1
-#define STRM_INC(s)		((s).tail = ((s).tail + 1) % (s).max_cnt)
-#define DPI_MAX_DESC		1024
+#include <roc_api.h>
+
+#define DPI_MAX_POINTER	 15
+#define STRM_INC(s, var) ((s).var = ((s).var + 1) & (s).max_cnt)
+#define STRM_DEC(s, var) ((s).var = ((s).var - 1) == -1 ? (s).max_cnt : ((s).var - 1))
+#define DPI_MAX_DESC	 1024
 
 /* Set Completion data to 0xFF when request submitted,
  * upon successful request completion engine reset to completion status
  */
-#define DPI_REQ_CDATA		0xFF
+#define DPI_REQ_CDATA 0xFF
+
+#define CNXK_DPI_DEV_CONFIG   (1ULL << 0)
+#define CNXK_DPI_VCHAN_CONFIG (1ULL << 1)
+#define CNXK_DPI_DEV_START    (1ULL << 2)
 
 struct cnxk_dpi_compl_s {
 	uint64_t cdata;
@@ -21,7 +26,7 @@  struct cnxk_dpi_compl_s {
 };
 
 struct cnxk_dpi_cdesc_data_s {
-	struct cnxk_dpi_compl_s *compl_ptr[DPI_MAX_DESC];
+	struct cnxk_dpi_compl_s **compl_ptr;
 	uint16_t max_cnt;
 	uint16_t head;
 	uint16_t tail;
@@ -36,9 +41,10 @@  struct cnxk_dpi_vf_s {
 	struct roc_dpi rdpi;
 	struct cnxk_dpi_conf conf;
 	struct rte_dma_stats stats;
-	uint64_t cmd[DPI_MAX_CMD_SIZE];
-	uint32_t num_words;
+	uint16_t pending;
+	uint16_t pnum_words;
 	uint16_t desc_idx;
+	uint16_t flag;
 };
 
 #endif