diff mbox series

[v3,15/52] common/cnxk: add npa batch alloc/free support

Message ID	20210401123817.14348-16-ndabilpuram@marvell.com (mailing list archive)
State	Changes Requested, archived
Delegated to:	Jerin Jacob
Headers	From: Nithin Dabilpuram <ndabilpuram@marvell.com> To: <dev@dpdk.org> CC: <jerinj@marvell.com>, <skori@marvell.com>, <skoteshwar@marvell.com>, <pbhagavatula@marvell.com>, <kirankumark@marvell.com>, <psatheesh@marvell.com>, <asekhar@marvell.com> Date: Thu, 1 Apr 2021 18:07:40 +0530 Message-ID: <20210401123817.14348-16-ndabilpuram@marvell.com> In-Reply-To: <20210401123817.14348-1-ndabilpuram@marvell.com> References: <20210305133918.8005-1-ndabilpuram@marvell.com> <20210401123817.14348-1-ndabilpuram@marvell.com> MIME-Version: 1.0 Content-Type: text/plain Subject: [dpdk-dev] [PATCH v3 15/52] common/cnxk: add npa batch alloc/free support Precedence: list Errors-To: dev-bounces@dpdk.org Sender: "dev" <dev-bounces@dpdk.org>
Series	Add Marvell CNXK common driver \| [v3,00/52] Add Marvell CNXK common driver [v3,01/52] doc: add Marvell CNXK platform guide [v3,02/52] common/cnxk: add build infrastructre and HW definition [v3,03/52] common/cnxk: add model init and IO handling API [v3,04/52] common/cnxk: add interrupt helper API [v3,05/52] common/cnxk: add mbox request and response definitions [v3,06/52] common/cnxk: add mailbox base infra [v3,07/52] common/cnxk: add base device class [v3,08/52] common/cnxk: add VF support to base device class [v3,09/52] common/cnxk: add base npa device support [v3,10/52] common/cnxk: add npa irq support [v3,11/52] common/cnxk: add npa debug support [v3,12/52] common/cnxk: add npa pool HW ops [v3,13/52] common/cnxk: add npa bulk alloc/free support [v3,14/52] common/cnxk: add npa performance counter support [v3,15/52] common/cnxk: add npa batch alloc/free support [v3,16/52] common/cnxk: add npa lf init/fini callback support [v3,17/52] common/cnxk: add base nix support [v3,18/52] common/cnxk: add nix irq support [v3,19/52] common/cnxk: add nix Rx queue management API [v3,20/52] common/cnxk: add nix Tx queue management API [v3,21/52] common/cnxk: add nix MAC operations support [v3,22/52] common/cnxk: add nix specific npc operations [v3,23/52] common/cnxk: add nix inline IPsec config API [v3,24/52] common/cnxk: add nix RSS support [v3,25/52] common/cnxk: add nix ptp support [v3,26/52] common/cnxk: add nix stats support [v3,27/52] common/cnxk: add support for nix extended stats [v3,28/52] common/cnxk: add nix debug dump support [v3,29/52] common/cnxk: add VLAN filter support [v3,30/52] common/cnxk: add nix flow control support [v3,31/52] common/cnxk: add nix LSO support and misc utils [v3,32/52] common/cnxk: add nix traffic management base support [v3,33/52] common/cnxk: add nix tm support to add/delete node [v3,34/52] common/cnxk: add nix tm shaper profile add support [v3,35/52] common/cnxk: add nix tm helper to alloc and free resource [v3,36/52] common/cnxk: add nix tm hierarchy enable/disable [v3,37/52] common/cnxk: add nix tm support for internal hierarchy [v3,38/52] common/cnxk: add nix tm dynamic update support [v3,39/52] common/cnxk: add nix tm debug support and misc utils [v3,40/52] common/cnxk: add npc support [v3,41/52] common/cnxk: add npc helper API [v3,42/52] common/cnxk: add mcam utility API [v3,43/52] common/cnxk: add npc parsing API [v3,44/52] common/cnxk: add npc init and fini support [v3,45/52] common/cnxk: add base sso device support [v3,46/52] common/cnxk: add sso hws interface [v3,47/52] common/cnxk: add sso hwgrp interface [v3,48/52] common/cnxk: add sso irq support [v3,49/52] common/cnxk: add sso debug support [v3,50/52] common/cnxk: add base tim device support [v3,51/52] common/cnxk: add tim irq support [v3,52/52] common/cnxk: add support for rss action in rte_flow

Checks

Context	Check	Description
ci/checkpatch	warning	coding style issues

Commit Message

Nithin Dabilpuram April 1, 2021, 12:37 p.m. UTC

  From: Ashwin Sekhar T K <asekhar@marvell.com>

Add APIs to do allocations/frees in batch from
NPA pool.

Signed-off-by: Ashwin Sekhar T K <asekhar@marvell.com>
---
 drivers/common/cnxk/roc_npa.h | 217 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 217 insertions(+)

diff mbox series

Patch

diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h
index 7c6f78d..89f5c6f 100644
--- a/drivers/common/cnxk/roc_npa.h
+++ b/drivers/common/cnxk/roc_npa.h
@@ -8,6 +8,9 @@ 
 #define ROC_AURA_ID_MASK       (BIT_ULL(16) - 1)
 #define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1)
 
+#define ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS 512
+#define ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS  15
+
 /* 16 CASP instructions can be outstanding in CN9k, but we use only 15
  * outstanding CASPs as we run out of registers.
  */
@@ -180,6 +183,114 @@  roc_npa_pool_op_performance_counter(uint64_t aura_handle, const int drop)
 		return reg & 0xFFFFFFFFFFFF;
 }
 
+static inline int
+roc_npa_aura_batch_alloc_issue(uint64_t aura_handle, uint64_t *buf,
+			       unsigned int num, const int dis_wait,
+			       const int drop)
+{
+	unsigned int i;
+	int64_t *addr;
+	uint64_t res;
+	union {
+		uint64_t u;
+		struct npa_batch_alloc_compare_s compare_s;
+	} cmp;
+
+	if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
+		return -1;
+
+	/* Zero first word of every cache line */
+	for (i = 0; i < num; i += (ROC_ALIGN / sizeof(uint64_t)))
+		buf[i] = 0;
+
+	addr = (int64_t *)(roc_npa_aura_handle_to_base(aura_handle) +
+			   NPA_LF_AURA_BATCH_ALLOC);
+	cmp.u = 0;
+	cmp.compare_s.aura = roc_npa_aura_handle_to_aura(aura_handle);
+	cmp.compare_s.drop = drop;
+	cmp.compare_s.stype = ALLOC_STYPE_STSTP;
+	cmp.compare_s.dis_wait = dis_wait;
+	cmp.compare_s.count = num;
+
+	res = roc_atomic64_cas(cmp.u, (uint64_t)buf, addr);
+	if (res != ALLOC_RESULT_ACCEPTED && res != ALLOC_RESULT_NOCORE)
+		return -1;
+
+	return 0;
+}
+
+static inline unsigned int
+roc_npa_aura_batch_alloc_count(uint64_t *aligned_buf, unsigned int num)
+{
+	unsigned int count, i;
+
+	if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
+		return 0;
+
+	count = 0;
+	/* Check each ROC cache line one by one */
+	for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
+		struct npa_batch_alloc_status_s *status;
+		int ccode;
+
+		status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
+
+		/* Status is updated in first 7 bits of each 128 byte cache
+		 * line. Wait until the status gets updated.
+		 */
+		do {
+			ccode = (volatile int)status->ccode;
+		} while (ccode == ALLOC_CCODE_INVAL);
+
+		count += status->count;
+	}
+
+	return count;
+}
+
+static inline unsigned int
+roc_npa_aura_batch_alloc_extract(uint64_t *buf, uint64_t *aligned_buf,
+				 unsigned int num)
+{
+	unsigned int count, i;
+
+	if (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS)
+		return 0;
+
+	count = 0;
+	/* Check each ROC cache line one by one */
+	for (i = 0; i < num; i += (ROC_ALIGN >> 3)) {
+		struct npa_batch_alloc_status_s *status;
+		int line_count, ccode;
+
+		status = (struct npa_batch_alloc_status_s *)&aligned_buf[i];
+
+		/* Status is updated in first 7 bits of each 128 byte cache
+		 * line. Wait until the status gets updated.
+		 */
+		do {
+			ccode = (volatile int)status->ccode;
+		} while (ccode == ALLOC_CCODE_INVAL);
+
+		line_count = status->count;
+
+		/* Clear the status from the cache line */
+		status->ccode = 0;
+		status->count = 0;
+
+		/* 'Compress' the allocated buffers as there can
+		 * be 'holes' at the end of the 128 byte cache
+		 * lines.
+		 */
+		memmove(&buf[count], &aligned_buf[i],
+			line_count * sizeof(uint64_t));
+
+		count += line_count;
+	}
+
+	return count;
+}
+
 static inline void
 roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
 			  unsigned int num, const int fabs)
@@ -194,6 +305,112 @@  roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf,
 }
 
 static inline unsigned int
+roc_npa_aura_op_batch_alloc(uint64_t aura_handle, uint64_t *buf,
+			    uint64_t *aligned_buf, unsigned int num,
+			    const int dis_wait, const int drop,
+			    const int partial)
+{
+	unsigned int count, chunk, num_alloc;
+
+	/* The buffer should be 128 byte cache line aligned */
+	if (((uint64_t)aligned_buf & (ROC_ALIGN - 1)) != 0)
+		return 0;
+
+	count = 0;
+	while (num) {
+		chunk = (num > ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS) ?
+				      ROC_CN10K_NPA_BATCH_ALLOC_MAX_PTRS :
+				      num;
+
+		if (roc_npa_aura_batch_alloc_issue(aura_handle, aligned_buf,
+						   chunk, dis_wait, drop))
+			break;
+
+		num_alloc = roc_npa_aura_batch_alloc_extract(buf, aligned_buf,
+							     chunk);
+
+		count += num_alloc;
+		buf += num_alloc;
+		num -= num_alloc;
+
+		if (num_alloc != chunk)
+			break;
+	}
+
+	/* If the requested number of pointers was not allocated and if partial
+	 * alloc is not desired, then free allocated pointers.
+	 */
+	if (unlikely(num != 0 && !partial)) {
+		roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1);
+		count = 0;
+	}
+
+	return count;
+}
+
+static inline void
+roc_npa_aura_batch_free(uint64_t aura_handle, uint64_t const *buf,
+			unsigned int num, const int fabs, uint64_t lmt_addr,
+			uint64_t lmt_id)
+{
+	uint64_t addr, tar_addr, free0;
+	volatile uint64_t *lmt_data;
+	unsigned int i;
+
+	if (num > ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS)
+		return;
+
+	lmt_data = (uint64_t *)lmt_addr;
+
+	addr = roc_npa_aura_handle_to_base(aura_handle) +
+	       NPA_LF_AURA_BATCH_FREE0;
+
+	/*
+	 * NPA_LF_AURA_BATCH_FREE0
+	 *
+	 * 63   63 62  33 32       32 31  20 19    0
+	 * -----------------------------------------
+	 * | FABS | Rsvd | COUNT_EOT | Rsvd | AURA |
+	 * -----------------------------------------
+	 */
+	free0 = roc_npa_aura_handle_to_aura(aura_handle);
+	if (fabs)
+		free0 |= (0x1UL << 63);
+	if (num & 0x1)
+		free0 |= (0x1UL << 32);
+
+	/* tar_addr[4:6] is LMTST size-1 in units of 128b */
+	tar_addr = addr | ((num >> 1) << 4);
+
+	lmt_data[0] = free0;
+	for (i = 0; i < num; i++)
+		lmt_data[i + 1] = buf[i];
+
+	roc_lmt_submit_steorl(lmt_id, tar_addr);
+	plt_io_wmb();
+}
+
+static inline void
+roc_npa_aura_op_batch_free(uint64_t aura_handle, uint64_t const *buf,
+			   unsigned int num, const int fabs, uint64_t lmt_addr,
+			   uint64_t lmt_id)
+{
+	unsigned int chunk;
+
+	while (num) {
+		chunk = (num >= ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS) ?
+				      ROC_CN10K_NPA_BATCH_FREE_MAX_PTRS :
+				      num;
+
+		roc_npa_aura_batch_free(aura_handle, buf, chunk, fabs, lmt_addr,
+					lmt_id);
+
+		buf += chunk;
+		num -= chunk;
+	}
+}
+
+static inline unsigned int
 roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num,
 			const int drop)
 {