From patchwork Thu Apr 1 09:47:00 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nithin Dabilpuram X-Patchwork-Id: 90302 Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id B56BEA0548; Thu, 1 Apr 2021 11:50:12 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 302E7140E8F; Thu, 1 Apr 2021 11:48:48 +0200 (CEST) Received: from mx0b-0016f401.pphosted.com (mx0b-0016f401.pphosted.com [67.231.156.173]) by mails.dpdk.org (Postfix) with ESMTP id 64902140F36 for ; Thu, 1 Apr 2021 11:48:47 +0200 (CEST) Received: from pps.filterd (m0045851.ppops.net [127.0.0.1]) by mx0b-0016f401.pphosted.com (8.16.0.43/8.16.0.43) with SMTP id 1319fDM8031149 for ; Thu, 1 Apr 2021 02:48:46 -0700 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=marvell.com; h=from : to : cc : subject : date : message-id : in-reply-to : references : mime-version : content-type; s=pfpt0220; bh=gdDiD/G5riP/g6w3YN+RkOmtzhY7RuFUTae+0DVOug0=; b=UFx7iI409XiuSFbEDrEPwWUYw6/gNBu2csstlzsNhHAEr3NVjCF18v5kI5e4XfW/v58l EYkaSpwXNWl9oruN1jkypx3O0E2KqdHF9b1TVRMHzCprMQB4dn/0VTC3TyPJL6LX2cuL +zNslu3kdH7Ubysif/3ZnAFkeH0eezO8jl7OAT64ibs1061lDzxeF6Cdp8eGW/pBcGy0 EEfdlshgaavrdckFfzF0sR8YoV7wILeA1STKQ6eQJihqWOL2n9pNPQajq3Sppf6qPYue 7FuxYRxJ63vC82ts17BZFYjiRapAvsf1v1+um92VadCiZm+59uZi5PcKkL/YuWZ8pdTf kg== Received: from dc5-exch01.marvell.com ([199.233.59.181]) by mx0b-0016f401.pphosted.com with ESMTP id 37n28jhw68-1 (version=TLSv1.2 cipher=ECDHE-RSA-AES256-SHA384 bits=256 verify=NOT) for ; Thu, 01 Apr 2021 02:48:46 -0700 Received: from DC5-EXCH01.marvell.com (10.69.176.38) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server (TLS) id 15.0.1497.2; Thu, 1 Apr 2021 02:48:44 -0700 Received: from maili.marvell.com (10.69.176.80) by DC5-EXCH01.marvell.com (10.69.176.38) with Microsoft SMTP Server id 15.0.1497.2 via Frontend Transport; Thu, 1 Apr 2021 02:48:44 -0700 Received: from hyd1588t430.marvell.com (unknown [10.29.52.204]) by maili.marvell.com (Postfix) with ESMTP id 03A703F703F; Thu, 1 Apr 2021 02:48:41 -0700 (PDT) From: Nithin Dabilpuram To: CC: , , , , , , Date: Thu, 1 Apr 2021 15:17:00 +0530 Message-ID: <20210401094739.22714-14-ndabilpuram@marvell.com> X-Mailer: git-send-email 2.8.4 In-Reply-To: <20210401094739.22714-1-ndabilpuram@marvell.com> References: <20210305133918.8005-1-ndabilpuram@marvell.com> <20210401094739.22714-1-ndabilpuram@marvell.com> MIME-Version: 1.0 X-Proofpoint-GUID: 1YZjnQwnaax3zVB5HSxiJTqYK0aHZ5pf X-Proofpoint-ORIG-GUID: 1YZjnQwnaax3zVB5HSxiJTqYK0aHZ5pf X-Proofpoint-Virus-Version: vendor=fsecure engine=2.50.10434:6.0.369, 18.0.761 definitions=2021-04-01_04:2021-03-31, 2021-04-01 signatures=0 Subject: [dpdk-dev] [PATCH v2 13/52] common/cnxk: add npa bulk alloc/free support X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Ashwin Sekhar T K Add APIs to alloc/free in bulk from NPA pool. Signed-off-by: Ashwin Sekhar T K --- drivers/common/cnxk/roc_npa.h | 229 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) diff --git a/drivers/common/cnxk/roc_npa.h b/drivers/common/cnxk/roc_npa.h index 6983849..b829b23 100644 --- a/drivers/common/cnxk/roc_npa.h +++ b/drivers/common/cnxk/roc_npa.h @@ -8,6 +8,11 @@ #define ROC_AURA_ID_MASK (BIT_ULL(16) - 1) #define ROC_AURA_OP_LIMIT_MASK (BIT_ULL(36) - 1) +/* 16 CASP instructions can be outstanding in CN9k, but we use only 15 + * outstanding CASPs as we run out of registers. + */ +#define ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS 30 + /* * Generate 64bit handle to have optimized alloc and free aura operation. * 0 - ROC_AURA_ID_MASK for storing the aura_id. @@ -141,6 +146,230 @@ roc_npa_aura_op_available(uint64_t aura_handle) return reg & 0xFFFFFFFFF; } +static inline void +roc_npa_aura_op_bulk_free(uint64_t aura_handle, uint64_t const *buf, + unsigned int num, const int fabs) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + const uint64_t inbuf = buf[i]; + + roc_npa_aura_op_free(aura_handle, fabs, inbuf); + } +} + +static inline unsigned int +roc_npa_aura_bulk_alloc(uint64_t aura_handle, uint64_t *buf, unsigned int num, + const int drop) +{ +#if defined(__aarch64__) + uint64_t wdata = roc_npa_aura_handle_to_aura(aura_handle); + unsigned int i, count; + uint64_t addr; + + if (drop) + wdata |= BIT_ULL(63); /* DROP */ + + addr = roc_npa_aura_handle_to_base(aura_handle) + + NPA_LF_AURA_OP_ALLOCX(0); + + switch (num) { + case 30: + asm volatile( + ".cpu generic+lse\n" + "mov v18.d[0], %[dst]\n" + "mov v18.d[1], %[loc]\n" + "mov v19.d[0], %[wdata]\n" + "mov v19.d[1], x30\n" + "mov v20.d[0], x24\n" + "mov v20.d[1], x25\n" + "mov v21.d[0], x26\n" + "mov v21.d[1], x27\n" + "mov v22.d[0], x28\n" + "mov v22.d[1], x29\n" + "mov x28, v19.d[0]\n" + "mov x29, v19.d[0]\n" + "mov x30, v18.d[1]\n" + "casp x0, x1, x28, x29, [x30]\n" + "casp x2, x3, x28, x29, [x30]\n" + "casp x4, x5, x28, x29, [x30]\n" + "casp x6, x7, x28, x29, [x30]\n" + "casp x8, x9, x28, x29, [x30]\n" + "casp x10, x11, x28, x29, [x30]\n" + "casp x12, x13, x28, x29, [x30]\n" + "casp x14, x15, x28, x29, [x30]\n" + "casp x16, x17, x28, x29, [x30]\n" + "casp x18, x19, x28, x29, [x30]\n" + "casp x20, x21, x28, x29, [x30]\n" + "casp x22, x23, x28, x29, [x30]\n" + "casp x24, x25, x28, x29, [x30]\n" + "casp x26, x27, x28, x29, [x30]\n" + "casp x28, x29, x28, x29, [x30]\n" + "mov x30, v18.d[0]\n" + "stp x0, x1, [x30]\n" + "stp x2, x3, [x30, #16]\n" + "stp x4, x5, [x30, #32]\n" + "stp x6, x7, [x30, #48]\n" + "stp x8, x9, [x30, #64]\n" + "stp x10, x11, [x30, #80]\n" + "stp x12, x13, [x30, #96]\n" + "stp x14, x15, [x30, #112]\n" + "stp x16, x17, [x30, #128]\n" + "stp x18, x19, [x30, #144]\n" + "stp x20, x21, [x30, #160]\n" + "stp x22, x23, [x30, #176]\n" + "stp x24, x25, [x30, #192]\n" + "stp x26, x27, [x30, #208]\n" + "stp x28, x29, [x30, #224]\n" + "mov %[dst], v18.d[0]\n" + "mov %[loc], v18.d[1]\n" + "mov %[wdata], v19.d[0]\n" + "mov x30, v19.d[1]\n" + "mov x24, v20.d[0]\n" + "mov x25, v20.d[1]\n" + "mov x26, v21.d[0]\n" + "mov x27, v21.d[1]\n" + "mov x28, v22.d[0]\n" + "mov x29, v22.d[1]\n" + : + : [wdata] "r"(wdata), [loc] "r"(addr), [dst] "r"(buf) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", + "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17", "x18", "x19", "x20", "x21", + "x22", "x23", "v18", "v19", "v20", "v21", "v22"); + break; + case 16: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "casp x2, x3, x16, x17, [%[loc]]\n" + "casp x4, x5, x16, x17, [%[loc]]\n" + "casp x6, x7, x16, x17, [%[loc]]\n" + "casp x8, x9, x16, x17, [%[loc]]\n" + "casp x10, x11, x16, x17, [%[loc]]\n" + "casp x12, x13, x16, x17, [%[loc]]\n" + "casp x14, x15, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + "stp x2, x3, [%[dst], #16]\n" + "stp x4, x5, [%[dst], #32]\n" + "stp x6, x7, [%[dst], #48]\n" + "stp x8, x9, [%[dst], #64]\n" + "stp x10, x11, [%[dst], #80]\n" + "stp x12, x13, [%[dst], #96]\n" + "stp x14, x15, [%[dst], #112]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", + "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", + "x15", "x16", "x17" + ); + break; + case 8: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "casp x2, x3, x16, x17, [%[loc]]\n" + "casp x4, x5, x16, x17, [%[loc]]\n" + "casp x6, x7, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + "stp x2, x3, [%[dst], #16]\n" + "stp x4, x5, [%[dst], #32]\n" + "stp x6, x7, [%[dst], #48]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x4", "x5", "x6", + "x7", "x16", "x17" + ); + break; + case 4: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "casp x2, x3, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + "stp x2, x3, [%[dst], #16]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x2", "x3", "x16", "x17" + ); + break; + case 2: + asm volatile( + ".cpu generic+lse\n" + "mov x16, %[wdata]\n" + "mov x17, %[wdata]\n" + "casp x0, x1, x16, x17, [%[loc]]\n" + "stp x0, x1, [%[dst]]\n" + : + : [wdata] "r" (wdata), [dst] "r" (buf), [loc] "r" (addr) + : "memory", "x0", "x1", "x16", "x17" + ); + break; + case 1: + buf[0] = roc_npa_aura_op_alloc(aura_handle, drop); + return !!buf[0]; + } + + /* Pack the pointers */ + for (i = 0, count = 0; i < num; i++) + if (buf[i]) + buf[count++] = buf[i]; + + return count; +#else + unsigned int i, count; + + for (i = 0, count = 0; i < num; i++) { + buf[count] = roc_npa_aura_op_alloc(aura_handle, drop); + if (buf[count]) + count++; + } + + return count; +#endif +} + +static inline unsigned int +roc_npa_aura_op_bulk_alloc(uint64_t aura_handle, uint64_t *buf, + unsigned int num, const int drop, const int partial) +{ + unsigned int chunk, count, num_alloc; + + count = 0; + while (num) { + chunk = (num >= ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS) ? + ROC_CN9K_NPA_BULK_ALLOC_MAX_PTRS : + plt_align32prevpow2(num); + + num_alloc = + roc_npa_aura_bulk_alloc(aura_handle, buf, chunk, drop); + + count += num_alloc; + buf += num_alloc; + num -= num_alloc; + + if (unlikely(num_alloc != chunk)) + break; + } + + /* If the requested number of pointers was not allocated and if partial + * alloc is not desired, then free allocated pointers. + */ + if (unlikely(num != 0 && !partial)) { + roc_npa_aura_op_bulk_free(aura_handle, buf - count, count, 1); + count = 0; + } + + return count; +} + struct roc_npa { struct plt_pci_device *pci_dev;