[v0,1/1] dma/cnxk: support pending count per HW DMA channel

Message ID 20250211170532.1650221-1-vattunuru@marvell.com (mailing list archive)
State Accepted
Delegated to: Jerin Jacob
Headers
Series [v0,1/1] dma/cnxk: support pending count per HW DMA channel |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-compilation success Compilation OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-unit-amd64-testing success Testing PASS
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS
ci/iol-compile-amd64-testing success Testing PASS
ci/iol-unit-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS

Commit Message

Vamsi Krishna Feb. 11, 2025, 5:05 p.m. UTC
From: Vamsi Attunuru <vattunuru@marvell.com>

Adds code changes to maintain pending count per hw dma channel
instead of per vchan. This enables ringing dbell for exact number
of dma commands present in the DPI queue.

Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
 drivers/dma/cnxk/cnxk_dmadev.c    | 17 +++----
 drivers/dma/cnxk/cnxk_dmadev.h    |  3 +-
 drivers/dma/cnxk/cnxk_dmadev_fp.c | 74 +++++++++++++------------------
 3 files changed, 40 insertions(+), 54 deletions(-)
  

Comments

Jerin Jacob Feb. 24, 2025, 5:05 p.m. UTC | #1
> -----Original Message-----
> From: Vamsi Krishna <vattunuru@marvell.com>
> Sent: Tuesday, February 11, 2025 10:36 PM
> To: dev@dpdk.org
> Cc: Jerin Jacob <jerinj@marvell.com>; Vamsi Krishna Attunuru
> <vattunuru@marvell.com>
> Subject: [PATCH v0 1/1] dma/cnxk: support pending count per HW DMA
> channel
> 
> From: Vamsi Attunuru <vattunuru@marvell.com>
> 
> Adds code changes to maintain pending count per hw dma channel instead of
> per vchan. This enables ringing dbell for exact number of dma commands
> present in the DPI queue.
> 
> Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>


Applied to dpdk-next-net-mrvl/for-main. Thanks
  

Patch

diff --git a/drivers/dma/cnxk/cnxk_dmadev.c b/drivers/dma/cnxk/cnxk_dmadev.c
index e7be3767b2..90bb69011f 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.c
+++ b/drivers/dma/cnxk/cnxk_dmadev.c
@@ -288,12 +288,12 @@  cnxk_dmadev_start(struct rte_dma_dev *dev)
 	int i, j, rc = 0;
 	void *chunk;
 
+	dpivf->total_pnum_words = 0;
+
 	for (i = 0; i < dpivf->num_vchans; i++) {
 		dpi_conf = &dpivf->conf[i];
 		dpi_conf->c_desc.head = 0;
 		dpi_conf->c_desc.tail = 0;
-		dpi_conf->pnum_words = 0;
-		dpi_conf->pending = 0;
 		dpi_conf->desc_idx = 0;
 		for (j = 0; j < dpi_conf->c_desc.max_cnt + 1; j++)
 			dpi_conf->c_desc.compl_ptr[j * CNXK_DPI_COMPL_OFFSET] = CNXK_DPI_REQ_CDATA;
@@ -442,8 +442,7 @@  cnxk_damdev_burst_capacity(const void *dev_private, uint16_t vchan)
 	uint16_t burst_cap;
 
 	burst_cap = dpi_conf->c_desc.max_cnt -
-		    ((dpi_conf->stats.submitted - dpi_conf->stats.completed) + dpi_conf->pending) +
-		    1;
+		    (dpi_conf->stats.submitted - dpi_conf->stats.completed) + 1;
 
 	return burst_cap;
 }
@@ -452,18 +451,16 @@  static int
 cnxk_dmadev_submit(void *dev_private, uint16_t vchan)
 {
 	struct cnxk_dpi_vf_s *dpivf = dev_private;
-	struct cnxk_dpi_conf *dpi_conf = &dpivf->conf[vchan];
-	uint32_t num_words = dpi_conf->pnum_words;
+	uint32_t num_words = dpivf->total_pnum_words;
+	RTE_SET_USED(vchan);
 
-	if (!dpi_conf->pnum_words)
+	if (!num_words)
 		return 0;
 
 	rte_wmb();
 	plt_write64(num_words, dpivf->rdpi.rbase + DPI_VDMA_DBELL);
 
-	dpi_conf->stats.submitted += dpi_conf->pending;
-	dpi_conf->pnum_words = 0;
-	dpi_conf->pending = 0;
+	dpivf->total_pnum_words = 0;
 
 	return 0;
 }
diff --git a/drivers/dma/cnxk/cnxk_dmadev.h b/drivers/dma/cnxk/cnxk_dmadev.h
index a95ce13f62..9a232a5464 100644
--- a/drivers/dma/cnxk/cnxk_dmadev.h
+++ b/drivers/dma/cnxk/cnxk_dmadev.h
@@ -98,8 +98,6 @@  struct cnxk_dpi_cdesc_data_s {
 struct cnxk_dpi_conf {
 	union cnxk_dpi_instr_cmd cmd;
 	struct cnxk_dpi_cdesc_data_s c_desc;
-	uint16_t pnum_words;
-	uint16_t pending;
 	uint16_t desc_idx;
 	struct rte_dma_stats stats;
 	uint64_t completed_offset;
@@ -111,6 +109,7 @@  struct cnxk_dpi_vf_s {
 	uint64_t *chunk_base;
 	uint16_t chunk_head;
 	uint16_t chunk_size_m1;
+	uint16_t total_pnum_words;
 	struct rte_mempool *chunk_pool;
 	struct cnxk_dpi_conf conf[CNXK_DPI_MAX_VCHANS_PER_QUEUE];
 	RTE_ATOMIC(rte_mcslock_t *) mcs_lock;
diff --git a/drivers/dma/cnxk/cnxk_dmadev_fp.c b/drivers/dma/cnxk/cnxk_dmadev_fp.c
index 26591235c6..36fc40c7e0 100644
--- a/drivers/dma/cnxk/cnxk_dmadev_fp.c
+++ b/drivers/dma/cnxk/cnxk_dmadev_fp.c
@@ -281,16 +281,15 @@  cnxk_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t d
 
 	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
 		rte_wmb();
-		plt_write64(dpi_conf->pnum_words + CNXK_DPI_DW_PER_SINGLE_CMD,
+		plt_write64(dpivf->total_pnum_words + CNXK_DPI_DW_PER_SINGLE_CMD,
 			    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-		dpi_conf->stats.submitted += dpi_conf->pending + 1;
-		dpi_conf->pnum_words = 0;
-		dpi_conf->pending = 0;
+		dpivf->total_pnum_words = 0;
 	} else {
-		dpi_conf->pnum_words += CNXK_DPI_DW_PER_SINGLE_CMD;
-		dpi_conf->pending++;
+		dpivf->total_pnum_words += CNXK_DPI_DW_PER_SINGLE_CMD;
 	}
 
+	dpi_conf->stats.submitted += 1;
+
 	return dpi_conf->desc_idx++;
 }
 
@@ -337,16 +336,15 @@  cnxk_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 
 	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
 		rte_wmb();
-		plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+		plt_write64(dpivf->total_pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
 			    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-		dpi_conf->stats.submitted += dpi_conf->pending + 1;
-		dpi_conf->pnum_words = 0;
-		dpi_conf->pending = 0;
+		dpivf->total_pnum_words = 0;
 	} else {
-		dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
-		dpi_conf->pending++;
+		dpivf->total_pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
 	}
 
+	dpi_conf->stats.submitted += 1;
+
 	return dpi_conf->desc_idx++;
 }
 
@@ -383,16 +381,15 @@  cn10k_dmadev_copy(void *dev_private, uint16_t vchan, rte_iova_t src, rte_iova_t
 
 	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
 		rte_wmb();
-		plt_write64(dpi_conf->pnum_words + CNXK_DPI_DW_PER_SINGLE_CMD,
+		plt_write64(dpivf->total_pnum_words + CNXK_DPI_DW_PER_SINGLE_CMD,
 			    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-		dpi_conf->stats.submitted += dpi_conf->pending + 1;
-		dpi_conf->pnum_words = 0;
-		dpi_conf->pending = 0;
+		dpivf->total_pnum_words = 0;
 	} else {
-		dpi_conf->pnum_words += 8;
-		dpi_conf->pending++;
+		dpivf->total_pnum_words += CNXK_DPI_DW_PER_SINGLE_CMD;
 	}
 
+	dpi_conf->stats.submitted += 1;
+
 	return dpi_conf->desc_idx++;
 }
 
@@ -426,16 +423,15 @@  cn10k_dmadev_copy_sg(void *dev_private, uint16_t vchan, const struct rte_dma_sge
 
 	if (flags & RTE_DMA_OP_FLAG_SUBMIT) {
 		rte_wmb();
-		plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+		plt_write64(dpivf->total_pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
 			    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-		dpi_conf->stats.submitted += dpi_conf->pending + 1;
-		dpi_conf->pnum_words = 0;
-		dpi_conf->pending = 0;
+		dpivf->total_pnum_words = 0;
 	} else {
-		dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
-		dpi_conf->pending++;
+		dpivf->total_pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
 	}
 
+	dpi_conf->stats.submitted += 1;
+
 	return dpi_conf->desc_idx++;
 }
 
@@ -495,15 +491,13 @@  cn10k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 		if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
 			rte_wmb();
-			plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+			plt_write64(dpivf->total_pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
 				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-			dpi_conf->stats.submitted += dpi_conf->pending + 1;
-			dpi_conf->pnum_words = 0;
-			dpi_conf->pending = 0;
+			dpivf->total_pnum_words = 0;
 		} else {
-			dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
-			dpi_conf->pending++;
+			dpivf->total_pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
 		}
+		dpi_conf->stats.submitted += 1;
 		rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
 	}
 
@@ -567,15 +561,13 @@  cn9k_dma_adapter_dual_enqueue(void *ws, struct rte_event ev[], uint16_t nb_event
 
 		if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
 			rte_wmb();
-			plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+			plt_write64(dpivf->total_pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
 				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-			dpi_conf->stats.submitted += dpi_conf->pending + 1;
-			dpi_conf->pnum_words = 0;
-			dpi_conf->pending = 0;
+			dpivf->total_pnum_words = 0;
 		} else {
-			dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
-			dpi_conf->pending++;
+			dpivf->total_pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
 		}
+		dpi_conf->stats.submitted += 1;
 		rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
 	}
 
@@ -636,15 +628,13 @@  cn9k_dma_adapter_enqueue(void *ws, struct rte_event ev[], uint16_t nb_events)
 
 		if (op->flags & RTE_DMA_OP_FLAG_SUBMIT) {
 			rte_wmb();
-			plt_write64(dpi_conf->pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
+			plt_write64(dpivf->total_pnum_words + CNXK_DPI_CMD_LEN(nb_src, nb_dst),
 				    dpivf->rdpi.rbase + DPI_VDMA_DBELL);
-			dpi_conf->stats.submitted += dpi_conf->pending + 1;
-			dpi_conf->pnum_words = 0;
-			dpi_conf->pending = 0;
+			dpivf->total_pnum_words = 0;
 		} else {
-			dpi_conf->pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
-			dpi_conf->pending++;
+			dpivf->total_pnum_words += CNXK_DPI_CMD_LEN(nb_src, nb_dst);
 		}
+		dpi_conf->stats.submitted += 1;
 		rte_mcslock_unlock(&dpivf->mcs_lock, &mcs_lock_me);
 	}