From patchwork Mon Apr 26 09:52:48 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92152 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id BE622A0548; Mon, 26 Apr 2021 11:53:17 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 3F06841110; Mon, 26 Apr 2021 11:53:17 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 0041B40140 for ; Mon, 26 Apr 2021 11:53:13 +0200 (CEST) IronPort-SDR: BXzefYvaQlHoVEFXCCamc2f6Vhhnn4JNSqm8JZDIkmaYDoUZvDH6eCbC2fUoY+wxk1B6PwhEul bxqmOsBL9ahg== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442897" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442897" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:12 -0700 IronPort-SDR: RSWaQ9/WTaN4DfENnVns1rG0AckEMAL/wQoP+/625LG6lmXHtsAO41i6qK/xrPf2yC2Hy7Wnnd gDZxSyFbyBUg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336891" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:10 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:48 +0100 Message-Id: <20210426095259.225354-2-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 01/12] raw/ioat: add unit tests for completion batching X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add in additional unit tests to verify that we can get completion reports of multiple batches in a single completed_ops() call. Also verify we can get smaller number of completions if that is requested too. Signed-off-by: Bruce Richardson --- drivers/raw/ioat/ioat_rawdev_test.c | 199 +++++++++++++++++++--------- 1 file changed, 137 insertions(+), 62 deletions(-) diff --git a/drivers/raw/ioat/ioat_rawdev_test.c b/drivers/raw/ioat/ioat_rawdev_test.c index 101f24a677..a5064d739d 100644 --- a/drivers/raw/ioat/ioat_rawdev_test.c +++ b/drivers/raw/ioat/ioat_rawdev_test.c @@ -11,6 +11,7 @@ #define MAX_SUPPORTED_RAWDEVS 64 #define TEST_SKIPPED 77 +#define COPY_LEN 1024 int ioat_rawdev_test(uint16_t dev_id); /* pre-define to keep compiler happy */ @@ -34,32 +35,114 @@ print_err(const char *func, int lineno, const char *format, ...) return ret; } +static int +do_multi_copies(int dev_id, int split_batches, int split_completions) +{ + struct rte_mbuf *srcs[32], *dsts[32]; + struct rte_mbuf *completed_src[64]; + struct rte_mbuf *completed_dst[64]; + unsigned int i, j; + + for (i = 0; i < RTE_DIM(srcs); i++) { + char *src_data; + + if (split_batches && i == RTE_DIM(srcs) / 2) + rte_ioat_perform_ops(dev_id); + + srcs[i] = rte_pktmbuf_alloc(pool); + dsts[i] = rte_pktmbuf_alloc(pool); + src_data = rte_pktmbuf_mtod(srcs[i], char *); + + for (j = 0; j < COPY_LEN; j++) + src_data[j] = rand() & 0xFF; + + if (rte_ioat_enqueue_copy(dev_id, + srcs[i]->buf_iova + srcs[i]->data_off, + dsts[i]->buf_iova + dsts[i]->data_off, + COPY_LEN, + (uintptr_t)srcs[i], + (uintptr_t)dsts[i]) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy for buffer %u\n", + i); + return -1; + } + } + rte_ioat_perform_ops(dev_id); + usleep(100); + + if (split_completions) { + /* gather completions in two halves */ + uint16_t half_len = RTE_DIM(srcs) / 2; + if (rte_ioat_completed_ops(dev_id, half_len, (void *)completed_src, + (void *)completed_dst) != half_len) { + PRINT_ERR("Error with rte_ioat_completed_ops - first half request\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + if (rte_ioat_completed_ops(dev_id, half_len, (void *)&completed_src[half_len], + (void *)&completed_dst[half_len]) != half_len) { + PRINT_ERR("Error with rte_ioat_completed_ops - second half request\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + } else { + /* gather all completions in one go */ + if (rte_ioat_completed_ops(dev_id, 64, (void *)completed_src, + (void *)completed_dst) != RTE_DIM(srcs)) { + PRINT_ERR("Error with rte_ioat_completed_ops\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + } + for (i = 0; i < RTE_DIM(srcs); i++) { + char *src_data, *dst_data; + + if (completed_src[i] != srcs[i]) { + PRINT_ERR("Error with source pointer %u\n", i); + return -1; + } + if (completed_dst[i] != dsts[i]) { + PRINT_ERR("Error with dest pointer %u\n", i); + return -1; + } + + src_data = rte_pktmbuf_mtod(srcs[i], char *); + dst_data = rte_pktmbuf_mtod(dsts[i], char *); + for (j = 0; j < COPY_LEN; j++) + if (src_data[j] != dst_data[j]) { + PRINT_ERR("Error with copy of packet %u, byte %u\n", + i, j); + return -1; + } + rte_pktmbuf_free(srcs[i]); + rte_pktmbuf_free(dsts[i]); + } + return 0; +} + static int test_enqueue_copies(int dev_id) { - const unsigned int length = 1024; unsigned int i; + /* test doing a single copy */ do { struct rte_mbuf *src, *dst; char *src_data, *dst_data; struct rte_mbuf *completed[2] = {0}; - /* test doing a single copy */ src = rte_pktmbuf_alloc(pool); dst = rte_pktmbuf_alloc(pool); - src->data_len = src->pkt_len = length; - dst->data_len = dst->pkt_len = length; src_data = rte_pktmbuf_mtod(src, char *); dst_data = rte_pktmbuf_mtod(dst, char *); - for (i = 0; i < length; i++) + for (i = 0; i < COPY_LEN; i++) src_data[i] = rand() & 0xFF; if (rte_ioat_enqueue_copy(dev_id, src->buf_iova + src->data_off, dst->buf_iova + dst->data_off, - length, + COPY_LEN, (uintptr_t)src, (uintptr_t)dst) != 1) { PRINT_ERR("Error with rte_ioat_enqueue_copy\n"); @@ -79,99 +162,91 @@ test_enqueue_copies(int dev_id) return -1; } - for (i = 0; i < length; i++) + for (i = 0; i < COPY_LEN; i++) if (dst_data[i] != src_data[i]) { - PRINT_ERR("Data mismatch at char %u\n", i); + PRINT_ERR("Data mismatch at char %u [Got %02x not %02x]\n", + i, dst_data[i], src_data[i]); return -1; } rte_pktmbuf_free(src); rte_pktmbuf_free(dst); } while (0); - /* test doing multiple copies */ + /* test doing a multiple single copies */ do { - struct rte_mbuf *srcs[32], *dsts[32]; - struct rte_mbuf *completed_src[64]; - struct rte_mbuf *completed_dst[64]; - unsigned int j; - - for (i = 0; i < RTE_DIM(srcs); i++) { - char *src_data; + const uint16_t max_ops = 4; + struct rte_mbuf *src, *dst; + char *src_data, *dst_data; + struct rte_mbuf *completed[32] = {0}; + const uint16_t max_completions = RTE_DIM(completed) / 2; - srcs[i] = rte_pktmbuf_alloc(pool); - dsts[i] = rte_pktmbuf_alloc(pool); - srcs[i]->data_len = srcs[i]->pkt_len = length; - dsts[i]->data_len = dsts[i]->pkt_len = length; - src_data = rte_pktmbuf_mtod(srcs[i], char *); + src = rte_pktmbuf_alloc(pool); + dst = rte_pktmbuf_alloc(pool); + src_data = rte_pktmbuf_mtod(src, char *); + dst_data = rte_pktmbuf_mtod(dst, char *); - for (j = 0; j < length; j++) - src_data[j] = rand() & 0xFF; + for (i = 0; i < COPY_LEN; i++) + src_data[i] = rand() & 0xFF; + /* perform the same copy times */ + for (i = 0; i < max_ops; i++) { if (rte_ioat_enqueue_copy(dev_id, - srcs[i]->buf_iova + srcs[i]->data_off, - dsts[i]->buf_iova + dsts[i]->data_off, - length, - (uintptr_t)srcs[i], - (uintptr_t)dsts[i]) != 1) { - PRINT_ERR("Error with rte_ioat_enqueue_copy for buffer %u\n", - i); + src->buf_iova + src->data_off, + dst->buf_iova + dst->data_off, + COPY_LEN, + (uintptr_t)src, + (uintptr_t)dst) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy\n"); return -1; } + rte_ioat_perform_ops(dev_id); } - rte_ioat_perform_ops(dev_id); - usleep(100); + usleep(10); - if (rte_ioat_completed_ops(dev_id, 64, (void *)completed_src, - (void *)completed_dst) != RTE_DIM(srcs)) { + if (rte_ioat_completed_ops(dev_id, max_completions, (void *)&completed[0], + (void *)&completed[max_completions]) != max_ops) { PRINT_ERR("Error with rte_ioat_completed_ops\n"); return -1; } - for (i = 0; i < RTE_DIM(srcs); i++) { - char *src_data, *dst_data; + if (completed[0] != src || completed[max_completions] != dst) { + PRINT_ERR("Error with completions: got (%p, %p), not (%p,%p)\n", + completed[0], completed[max_completions], src, dst); + return -1; + } - if (completed_src[i] != srcs[i]) { - PRINT_ERR("Error with source pointer %u\n", i); - return -1; - } - if (completed_dst[i] != dsts[i]) { - PRINT_ERR("Error with dest pointer %u\n", i); + for (i = 0; i < COPY_LEN; i++) + if (dst_data[i] != src_data[i]) { + PRINT_ERR("Data mismatch at char %u\n", i); return -1; } - - src_data = rte_pktmbuf_mtod(srcs[i], char *); - dst_data = rte_pktmbuf_mtod(dsts[i], char *); - for (j = 0; j < length; j++) - if (src_data[j] != dst_data[j]) { - PRINT_ERR("Error with copy of packet %u, byte %u\n", - i, j); - return -1; - } - rte_pktmbuf_free(srcs[i]); - rte_pktmbuf_free(dsts[i]); - } - + rte_pktmbuf_free(src); + rte_pktmbuf_free(dst); } while (0); + /* test doing multiple copies */ + do_multi_copies(dev_id, 0, 0); /* enqueue and complete one batch at a time */ + do_multi_copies(dev_id, 1, 0); /* enqueue 2 batches and then complete both */ + do_multi_copies(dev_id, 0, 1); /* enqueue 1 batch, then complete in two halves */ return 0; } static int test_enqueue_fill(int dev_id) { - const unsigned int length[] = {8, 64, 1024, 50, 100, 89}; + const unsigned int lengths[] = {8, 64, 1024, 50, 100, 89}; struct rte_mbuf *dst = rte_pktmbuf_alloc(pool); char *dst_data = rte_pktmbuf_mtod(dst, char *); struct rte_mbuf *completed[2] = {0}; uint64_t pattern = 0xfedcba9876543210; unsigned int i, j; - for (i = 0; i < RTE_DIM(length); i++) { + for (i = 0; i < RTE_DIM(lengths); i++) { /* reset dst_data */ - memset(dst_data, 0, length[i]); + memset(dst_data, 0, lengths[i]); /* perform the fill operation */ if (rte_ioat_enqueue_fill(dev_id, pattern, - dst->buf_iova + dst->data_off, length[i], + dst->buf_iova + dst->data_off, lengths[i], (uintptr_t)dst) != 1) { PRINT_ERR("Error with rte_ioat_enqueue_fill\n"); return -1; @@ -186,11 +261,11 @@ test_enqueue_fill(int dev_id) return -1; } /* check the result */ - for (j = 0; j < length[i]; j++) { + for (j = 0; j < lengths[i]; j++) { char pat_byte = ((char *)&pattern)[j % 8]; if (dst_data[j] != pat_byte) { - PRINT_ERR("Error with fill operation (length = %u): got (%x), not (%x)\n", - length[i], dst_data[j], + PRINT_ERR("Error with fill operation (lengths = %u): got (%x), not (%x)\n", + lengths[i], dst_data[j], pat_byte); return -1; } From patchwork Mon Apr 26 09:52:49 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92155 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1C6ADA0548; Mon, 26 Apr 2021 11:53:34 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id BBA4E411B5; Mon, 26 Apr 2021 11:53:21 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 5B6E7411A0 for ; Mon, 26 Apr 2021 11:53:15 +0200 (CEST) IronPort-SDR: 6ai5+yjQlJynqOolcqEbnibzQ0PycgWitzTqJhq/dbqnmQotWHt2J77d9YxGGvSt9wzU9TdPUR DWiEXtmwxclg== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442899" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442899" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:13 -0700 IronPort-SDR: tRBovQnnugG9lxPj3QFUz1t/dOD0xtbwAe5qePwU9bc6N36Z0L97DjTVzEdqMZzQ7QFC5Enngb 9x0jOqUB+isA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336898" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:11 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:49 +0100 Message-Id: <20210426095259.225354-3-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 02/12] raw/ioat: support limiting queues for idxd PCI device X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" When using a full device instance via vfio, allow the user to specify a maximum number of queues to configure rather than always using the max number of supported queues. Signed-off-by: Bruce Richardson --- doc/guides/rawdevs/ioat.rst | 8 ++++++++ drivers/raw/ioat/idxd_pci.c | 28 ++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/doc/guides/rawdevs/ioat.rst b/doc/guides/rawdevs/ioat.rst index 250cfc48a6..60438cc3bc 100644 --- a/doc/guides/rawdevs/ioat.rst +++ b/doc/guides/rawdevs/ioat.rst @@ -106,6 +106,14 @@ For devices bound to a suitable DPDK-supported VFIO/UIO driver, the HW devices w be found as part of the device scan done at application initialization time without the need to pass parameters to the application. +For Intel\ |reg| DSA devices, DPDK will automatically configure the device with the +maximum number of workqueues available on it, partitioning all resources equally +among the queues. +If fewer workqueues are required, then the ``max_queues`` parameter may be passed to +the device driver on the EAL commandline, via the ``allowlist`` or ``-a`` flag e.g.:: + + $ dpdk-test -a ,max_queues=4 + If the device is bound to the IDXD kernel driver (and previously configured with sysfs), then a specific work queue needs to be passed to the application via a vdev parameter. This vdev parameter take the driver name and work queue name as parameters. diff --git a/drivers/raw/ioat/idxd_pci.c b/drivers/raw/ioat/idxd_pci.c index 01623f33f6..b48e565b4c 100644 --- a/drivers/raw/ioat/idxd_pci.c +++ b/drivers/raw/ioat/idxd_pci.c @@ -4,6 +4,7 @@ #include #include +#include #include "ioat_private.h" #include "ioat_spec.h" @@ -123,7 +124,8 @@ static const struct rte_rawdev_ops idxd_pci_ops = { #define IDXD_PORTAL_SIZE (4096 * 4) static int -init_pci_device(struct rte_pci_device *dev, struct idxd_rawdev *idxd) +init_pci_device(struct rte_pci_device *dev, struct idxd_rawdev *idxd, + unsigned int max_queues) { struct idxd_pci_common *pci; uint8_t nb_groups, nb_engines, nb_wqs; @@ -179,6 +181,16 @@ init_pci_device(struct rte_pci_device *dev, struct idxd_rawdev *idxd) for (i = 0; i < nb_wqs; i++) idxd_get_wq_cfg(pci, i)[0] = 0; + /* limit queues if necessary */ + if (max_queues != 0 && nb_wqs > max_queues) { + nb_wqs = max_queues; + if (nb_engines > max_queues) + nb_engines = max_queues; + if (nb_groups > max_queues) + nb_engines = max_queues; + IOAT_PMD_DEBUG("Limiting queues to %u", nb_wqs); + } + /* put each engine into a separate group to avoid reordering */ if (nb_groups > nb_engines) nb_groups = nb_engines; @@ -242,12 +254,23 @@ idxd_rawdev_probe_pci(struct rte_pci_driver *drv, struct rte_pci_device *dev) uint8_t nb_wqs; int qid, ret = 0; char name[PCI_PRI_STR_SIZE]; + unsigned int max_queues = 0; rte_pci_device_name(&dev->addr, name, sizeof(name)); IOAT_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node); dev->device.driver = &drv->driver; - ret = init_pci_device(dev, &idxd); + if (dev->device.devargs && dev->device.devargs->args[0] != '\0') { + /* if the number of devargs grows beyond just 1, use rte_kvargs */ + if (sscanf(dev->device.devargs->args, + "max_queues=%u", &max_queues) != 1) { + IOAT_PMD_ERR("Invalid device parameter: '%s'", + dev->device.devargs->args); + return -1; + } + } + + ret = init_pci_device(dev, &idxd, max_queues); if (ret < 0) { IOAT_PMD_ERR("Error initializing PCI hardware"); return ret; @@ -353,3 +376,4 @@ RTE_PMD_REGISTER_PCI(IDXD_PMD_RAWDEV_NAME_PCI, idxd_pmd_drv_pci); RTE_PMD_REGISTER_PCI_TABLE(IDXD_PMD_RAWDEV_NAME_PCI, pci_id_idxd_map); RTE_PMD_REGISTER_KMOD_DEP(IDXD_PMD_RAWDEV_NAME_PCI, "* igb_uio | uio_pci_generic | vfio-pci"); +RTE_PMD_REGISTER_PARAM_STRING(rawdev_idxd_pci, "max_queues=0"); From patchwork Mon Apr 26 09:52:50 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92154 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id CA0AFA0548; Mon, 26 Apr 2021 11:53:27 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 6B5A8411AE; Mon, 26 Apr 2021 11:53:20 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 583C44119B for ; Mon, 26 Apr 2021 11:53:15 +0200 (CEST) IronPort-SDR: luiGD9V6Vfpnt1pm2Q0pqfJSB7i7N9Z+TX2JnH1rrTiJLmh9mCAqQd8bQZRnZ/RI/100iqgS1+ MDv+2gZ37L3g== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442901" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442901" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:14 -0700 IronPort-SDR: T1CYxOH6/6lD1wawK8BOqqsyVPb1RxHjfotnUEHMVrHOM4egg9DU6K5sdr71CElxvPqOkP3p7O FoFZXa+/aVGg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336904" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:13 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:50 +0100 Message-Id: <20210426095259.225354-4-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 03/12] raw/ioat: add component prefix to log messages X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add the driver prefix "IOAT" to log messages for the driver. Signed-off-by: Bruce Richardson --- drivers/raw/ioat/ioat_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/raw/ioat/ioat_private.h b/drivers/raw/ioat/ioat_private.h index 6c423811ec..f032d5fe3d 100644 --- a/drivers/raw/ioat/ioat_private.h +++ b/drivers/raw/ioat/ioat_private.h @@ -21,7 +21,7 @@ extern int ioat_pmd_logtype; #define IOAT_PMD_LOG(level, fmt, args...) rte_log(RTE_LOG_ ## level, \ - ioat_pmd_logtype, "%s(): " fmt "\n", __func__, ##args) + ioat_pmd_logtype, "IOAT: %s(): " fmt "\n", __func__, ##args) #define IOAT_PMD_DEBUG(fmt, args...) IOAT_PMD_LOG(DEBUG, fmt, ## args) #define IOAT_PMD_INFO(fmt, args...) IOAT_PMD_LOG(INFO, fmt, ## args) From patchwork Mon Apr 26 09:52:51 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92156 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 25722A0548; Mon, 26 Apr 2021 11:53:40 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 0CEF0411BA; Mon, 26 Apr 2021 11:53:23 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 72400411A5 for ; Mon, 26 Apr 2021 11:53:16 +0200 (CEST) IronPort-SDR: ninjjuDyKP9BXEMaquGTsed1VBTUUXYsYDZfRcxf4GXE7joDrhKTLymN/ry6efZxfFQiXEKH1z uesWeVPwh+0g== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442902" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442902" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:15 -0700 IronPort-SDR: XGzi0n8Tg6zqYHLKnt7ZbFx0ZML7hs6u1uStmrGZkCx7P4Cbn4Il0BBA17QgWLG3p4L8FLhGAX iZ4eAULCiN0Q== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336912" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:14 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:51 +0100 Message-Id: <20210426095259.225354-5-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 04/12] raw/ioat: add explicit padding to descriptor struct X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add an explicit padding field to the end of the descriptor structure so that when the batch descriptor is defined on the stack for perform-ops, the unused space is all zeroed appropriately. Signed-off-by: Bruce Richardson --- drivers/raw/ioat/rte_ioat_rawdev_fns.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h index c2c4601ca7..e96edc9053 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h +++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h @@ -140,7 +140,10 @@ struct rte_idxd_hw_desc { uint32_t size; /* length of data for op, or batch size */ - /* 28 bytes of padding here */ + uint16_t intr_handle; /* completion interrupt handle */ + + /* remaining 26 bytes are reserved */ + uint16_t __reserved[13]; } __rte_aligned(64); /** From patchwork Mon Apr 26 09:52:52 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92157 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D32EBA0548; Mon, 26 Apr 2021 11:53:46 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 7D92B411C2; Mon, 26 Apr 2021 11:53:24 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 04E6D4119B; Mon, 26 Apr 2021 11:53:17 +0200 (CEST) IronPort-SDR: ZQNTwDt0iuwEaElVhfw6s23bgHSjwG5N0XTo0joKTCxP4wn4x63ua+Q5sbZaqu8/yLHLznXvMw XF23ES85UXpg== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442903" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442903" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:16 -0700 IronPort-SDR: FdFWIbmufXpf65T1lPYVFEMMJX8LWp4IR7a/NvNkseDbHLSw5xopSnszqjBSm7IHEkb/UaeTYL 25O22PY5PLhg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336917" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:15 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson , stable@dpdk.org Date: Mon, 26 Apr 2021 10:52:52 +0100 Message-Id: <20210426095259.225354-6-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 05/12] raw/ioat: fix script for configuring small number of queues X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The dpdk_idxd_cfg.py script included with the driver for convenience did not work properly where the number of queues to be configured was less than the number of groups or engines. This was because there would be configured groups/engines not assigned to queues. Fix this by limiting the engine and group counts to be no bigger than the number of queues. Fixes: 01863b9d2354 ("raw/ioat: include example configuration script") Cc: stable@dpdk.org Signed-off-by: Bruce Richardson --- drivers/raw/ioat/dpdk_idxd_cfg.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/raw/ioat/dpdk_idxd_cfg.py b/drivers/raw/ioat/dpdk_idxd_cfg.py index bce4bb5bd4..56e44063e4 100755 --- a/drivers/raw/ioat/dpdk_idxd_cfg.py +++ b/drivers/raw/ioat/dpdk_idxd_cfg.py @@ -39,15 +39,15 @@ def configure_dsa(dsa_id, queues): max_queues = dsa_dir.read_int("max_work_queues") max_tokens = dsa_dir.read_int("max_tokens") - # we want one engine per group - nb_groups = min(max_engines, max_groups) - for grp in range(nb_groups): - dsa_dir.write_values({f"engine{dsa_id}.{grp}/group_id": grp}) - nb_queues = min(queues, max_queues) if queues > nb_queues: print(f"Setting number of queues to max supported value: {max_queues}") + # we want one engine per group, and no more engines than queues + nb_groups = min(max_engines, max_groups, nb_queues) + for grp in range(nb_groups): + dsa_dir.write_values({f"engine{dsa_id}.{grp}/group_id": grp}) + # configure each queue for q in range(nb_queues): wq_dir = SysfsDir(os.path.join(dsa_dir.path, f"wq{dsa_id}.{q}")) From patchwork Mon Apr 26 09:52:53 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92158 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id E8B6AA0548; Mon, 26 Apr 2021 11:53:52 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id CD5F1411C5; Mon, 26 Apr 2021 11:53:25 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 55017411AD for ; Mon, 26 Apr 2021 11:53:19 +0200 (CEST) IronPort-SDR: H1cRusOxlT72Q9qMBHxpGncChahi/YBGTU5fhgv1ja/57WPA0gmhAFSNAwTtOOJUGF1Ox/CdrJ SPxWCkwd69cg== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442904" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442904" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:17 -0700 IronPort-SDR: x336wUojXYuirtpCueKbZhTXPih07AcDDkZYM2VCsitHdf+EJQYy9Y0Vz2pKjAKC5Ie2amE1JX 31OKBVci9NlA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336925" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:16 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:53 +0100 Message-Id: <20210426095259.225354-7-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 06/12] raw/ioat: make workqueue name configurable in script X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add a "--name-prefix" parameter to the quick configuration script for DSA. This allows the queues configured on a DSA instance to be made available to only one DPDK process in a setup with multiple DPDK process instances. Signed-off-by: Bruce Richardson --- drivers/raw/ioat/dpdk_idxd_cfg.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/raw/ioat/dpdk_idxd_cfg.py b/drivers/raw/ioat/dpdk_idxd_cfg.py index 56e44063e4..ff06d9e240 100755 --- a/drivers/raw/ioat/dpdk_idxd_cfg.py +++ b/drivers/raw/ioat/dpdk_idxd_cfg.py @@ -29,7 +29,7 @@ def write_values(self, values): f.write(str(contents)) -def configure_dsa(dsa_id, queues): +def configure_dsa(dsa_id, queues, prefix): "Configure the DSA instance with appropriate number of queues" dsa_dir = SysfsDir(f"/sys/bus/dsa/devices/dsa{dsa_id}") drv_dir = SysfsDir("/sys/bus/dsa/drivers/dsa") @@ -54,7 +54,7 @@ def configure_dsa(dsa_id, queues): wq_dir.write_values({"group_id": q % nb_groups, "type": "user", "mode": "dedicated", - "name": f"dpdk_wq{dsa_id}.{q}", + "name": f"{prefix}_wq{dsa_id}.{q}", "priority": 1, "size": int(max_tokens / nb_queues)}) @@ -71,8 +71,11 @@ def main(args): arg_p.add_argument('dsa_id', type=int, help="DSA instance number") arg_p.add_argument('-q', metavar='queues', type=int, default=255, help="Number of queues to set up") + arg_p.add_argument('--name-prefix', metavar='prefix', dest='prefix', + default="dpdk", + help="Prefix for workqueue name to mark for DPDK use [default: 'dpdk']") parsed_args = arg_p.parse_args(args[1:]) - configure_dsa(parsed_args.dsa_id, parsed_args.q) + configure_dsa(parsed_args.dsa_id, parsed_args.q, parsed_args.prefix) if __name__ == "__main__": From patchwork Mon Apr 26 09:52:54 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92159 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 7E753A0548; Mon, 26 Apr 2021 11:54:00 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 8341B411D0; Mon, 26 Apr 2021 11:53:27 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 25A9D411B2 for ; Mon, 26 Apr 2021 11:53:20 +0200 (CEST) IronPort-SDR: vEua89YeOxqJ0wiRWqWprcmZ3BUJrjNGKATM1VoCCI2R9mVIcyz7E5RXY7SWq5SpAf7UqhPCLZ BVVS+eb7SmUA== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442906" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442906" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:19 -0700 IronPort-SDR: zU9znVpLU8cIJGki6uLDWMoxbpa1kgRGxFMu2HChcD9ijtuHRmYasg0NnIOTyw5TAJV3YW29O1 GQlHSI/ATEEw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336930" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:17 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com Date: Mon, 26 Apr 2021 10:52:54 +0100 Message-Id: <20210426095259.225354-8-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 07/12] raw/ioat: allow perform operations function to return error X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Kevin Laatz Change the return type for the rte_ioat_perform_ops() function from void to int to allow the possibility of returning an error code in future, should it be necessary. Signed-off-by: Kevin Laatz --- drivers/raw/ioat/rte_ioat_rawdev.h | 4 +++- drivers/raw/ioat/rte_ioat_rawdev_fns.h | 11 +++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/raw/ioat/rte_ioat_rawdev.h b/drivers/raw/ioat/rte_ioat_rawdev.h index f9e8425a7f..e5a22a0799 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev.h +++ b/drivers/raw/ioat/rte_ioat_rawdev.h @@ -124,8 +124,10 @@ rte_ioat_fence(int dev_id); * * @param dev_id * The rawdev device id of the ioat instance + * @return + * 0 on success. Non-zero return on error. */ -static inline void +static inline int __rte_experimental rte_ioat_perform_ops(int dev_id); diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h index e96edc9053..477c1b7b41 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h +++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h @@ -291,7 +291,7 @@ __ioat_fence(int dev_id) /* * Trigger hardware to begin performing enqueued operations */ -static __rte_always_inline void +static __rte_always_inline int __ioat_perform_ops(int dev_id) { struct rte_ioat_rawdev *ioat = @@ -301,6 +301,8 @@ __ioat_perform_ops(int dev_id) rte_compiler_barrier(); *ioat->doorbell = ioat->next_write; ioat->xstats.started = ioat->xstats.enqueued; + + return 0; } /** @@ -462,7 +464,7 @@ __idxd_movdir64b(volatile void *dst, const void *src) : "a" (dst), "d" (src)); } -static __rte_always_inline void +static __rte_always_inline int __idxd_perform_ops(int dev_id) { struct rte_idxd_rawdev *idxd = @@ -470,7 +472,7 @@ __idxd_perform_ops(int dev_id) struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; if (b->submitted || b->op_count == 0) - return; + return 0; b->hdl_end = idxd->next_free_hdl; b->comp.status = 0; b->submitted = 1; @@ -480,6 +482,7 @@ __idxd_perform_ops(int dev_id) if (++idxd->next_batch == idxd->batch_ring_sz) idxd->next_batch = 0; idxd->xstats.started = idxd->xstats.enqueued; + return 0; } static __rte_always_inline int @@ -558,7 +561,7 @@ rte_ioat_fence(int dev_id) return __ioat_fence(dev_id); } -static inline void +static inline int rte_ioat_perform_ops(int dev_id) { enum rte_ioat_dev_type *type = From patchwork Mon Apr 26 09:52:55 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92160 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id EFCDAA0548; Mon, 26 Apr 2021 11:54:05 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id C2D3B411D3; Mon, 26 Apr 2021 11:53:28 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 9A0B8411B4 for ; Mon, 26 Apr 2021 11:53:21 +0200 (CEST) IronPort-SDR: waxD/1vYW2BJlOnSuZQhSqAA9akYHIB5qcnSFJ85wJdYP0W/Z4zPQLuc66k+ucoh99CkTVE/p/ x89yXYnjqFvQ== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442908" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442908" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:20 -0700 IronPort-SDR: RINl9GwdV+5ZSePLMLrh6oGN/BqiGfa6HfCFv6uhIiOfERcrWCmjLzKzeDnLrN3+Gtj+4NMW2Y h0EgUxhAlg+w== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336937" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:19 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:55 +0100 Message-Id: <20210426095259.225354-9-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 08/12] raw/ioat: add bus driver for device scanning automatically X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Rather than using a vdev with args, DPDK can scan and initialize the devices automatically using a bus-type driver. This bus does not need to worry about registering device drivers, rather it can initialize the devices directly on probe. The device instances (queues) to use are detected from /dev with the additional info about them got from /sys. Signed-off-by: Bruce Richardson --- doc/guides/rawdevs/ioat.rst | 23 ++- drivers/raw/ioat/idxd_bus.c | 359 +++++++++++++++++++++++++++++++++++ drivers/raw/ioat/idxd_vdev.c | 231 ---------------------- drivers/raw/ioat/meson.build | 4 +- 4 files changed, 374 insertions(+), 243 deletions(-) create mode 100644 drivers/raw/ioat/idxd_bus.c delete mode 100644 drivers/raw/ioat/idxd_vdev.c diff --git a/doc/guides/rawdevs/ioat.rst b/doc/guides/rawdevs/ioat.rst index 60438cc3bc..2ad13db294 100644 --- a/doc/guides/rawdevs/ioat.rst +++ b/doc/guides/rawdevs/ioat.rst @@ -78,7 +78,7 @@ Example configuration for a work queue:: $ accel-config config-wq dsa0/wq0.0 --group-id=0 \ --mode=dedicated --priority=10 --wq-size=8 \ - --type=user --name=app1 + --type=user --name=dpdk_app1 Once the devices have been configured, they need to be enabled:: @@ -114,15 +114,18 @@ the device driver on the EAL commandline, via the ``allowlist`` or ``-a`` flag e $ dpdk-test -a ,max_queues=4 -If the device is bound to the IDXD kernel driver (and previously configured with sysfs), -then a specific work queue needs to be passed to the application via a vdev parameter. -This vdev parameter take the driver name and work queue name as parameters. -For example, to use work queue 0 on Intel\ |reg| DSA instance 0:: - - $ dpdk-test --no-pci --vdev=rawdev_idxd,wq=0.0 - -Once probed successfully, the device will appear as a ``rawdev``, that is a -"raw device type" inside DPDK, and can be accessed using APIs from the +For devices bound to the IDXD kernel driver, +the DPDK ioat driver will automatically perform a scan for available workqueues to use. +Any workqueues found listed in ``/dev/dsa`` on the system will be checked in ``/sys``, +and any which have ``dpdk_`` prefix in their name will be automatically probed by the +driver to make them available to the application. +Alternatively, to support use by multiple DPDK processes simultaneously, +the value used as the DPDK ``--file-prefix`` parameter may be used as a workqueue name prefix, +instead of ``dpdk_``, +allowing each DPDK application instance to only use a subset of configured queues. + +Once probed successfully, irrespective of kernel driver, the device will appear as a ``rawdev``, +that is a "raw device type" inside DPDK, and can be accessed using APIs from the ``rte_rawdev`` library. Using IOAT Rawdev Devices diff --git a/drivers/raw/ioat/idxd_bus.c b/drivers/raw/ioat/idxd_bus.c new file mode 100644 index 0000000000..1f11a0d6ca --- /dev/null +++ b/drivers/raw/ioat/idxd_bus.c @@ -0,0 +1,359 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "ioat_private.h" + +/* default value for DSA paths, but allow override in environment for testing */ +#define DSA_DEV_PATH "/dev/dsa" +#define DSA_SYSFS_PATH "/sys/bus/dsa/devices" + +/** unique identifier for a DSA device/WQ instance */ +struct dsa_wq_addr { + uint16_t device_id; + uint16_t wq_id; +}; + +/** a DSA device instance */ +struct rte_dsa_device { + struct rte_device device; /**< Inherit core device */ + TAILQ_ENTRY(rte_dsa_device) next; /**< next dev in list */ + + char wq_name[32]; /**< the workqueue name/number e.g. wq0.1 */ + struct dsa_wq_addr addr; /**< Identifies the specific WQ */ +}; + +/* forward prototypes */ +struct dsa_bus; +static int dsa_scan(void); +static int dsa_probe(void); +static struct rte_device *dsa_find_device(const struct rte_device *start, + rte_dev_cmp_t cmp, const void *data); +static enum rte_iova_mode dsa_get_iommu_class(void); +static int dsa_addr_parse(const char *name, void *addr); + +/** List of devices */ +TAILQ_HEAD(dsa_device_list, rte_dsa_device); + +/** + * Structure describing the DSA bus + */ +struct dsa_bus { + struct rte_bus bus; /**< Inherit the generic class */ + struct rte_driver driver; /**< Driver struct for devices to point to */ + struct dsa_device_list device_list; /**< List of PCI devices */ +}; + +struct dsa_bus dsa_bus = { + .bus = { + .scan = dsa_scan, + .probe = dsa_probe, + .find_device = dsa_find_device, + .get_iommu_class = dsa_get_iommu_class, + .parse = dsa_addr_parse, + }, + .driver = { + .name = "rawdev_idxd" + }, + .device_list = TAILQ_HEAD_INITIALIZER(dsa_bus.device_list), +}; + +static inline const char * +dsa_get_dev_path(void) +{ + const char *path = getenv("DSA_DEV_PATH"); + return path ? path : DSA_DEV_PATH; +} + +static inline const char * +dsa_get_sysfs_path(void) +{ + const char *path = getenv("DSA_SYSFS_PATH"); + return path ? path : DSA_SYSFS_PATH; +} + +static const struct rte_rawdev_ops idxd_vdev_ops = { + .dev_close = idxd_rawdev_close, + .dev_selftest = ioat_rawdev_test, + .dump = idxd_dev_dump, + .dev_configure = idxd_dev_configure, + .dev_info_get = idxd_dev_info_get, + .xstats_get = ioat_xstats_get, + .xstats_get_names = ioat_xstats_get_names, + .xstats_reset = ioat_xstats_reset, +}; + +static void * +idxd_vdev_mmap_wq(struct rte_dsa_device *dev) +{ + void *addr; + char path[PATH_MAX]; + int fd; + + snprintf(path, sizeof(path), "%s/%s", dsa_get_dev_path(), dev->wq_name); + fd = open(path, O_RDWR); + if (fd < 0) { + IOAT_PMD_ERR("Failed to open device path: %s", path); + return NULL; + } + + addr = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + if (addr == MAP_FAILED) { + IOAT_PMD_ERR("Failed to mmap device %s", path); + return NULL; + } + + return addr; +} + +static int +read_wq_string(struct rte_dsa_device *dev, const char *filename, + char *value, size_t valuelen) +{ + char sysfs_node[PATH_MAX]; + int len; + int fd; + + snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s", + dsa_get_sysfs_path(), dev->wq_name, filename); + fd = open(sysfs_node, O_RDONLY); + if (fd < 0) { + IOAT_PMD_ERR("%s(): opening file '%s' failed: %s", + __func__, sysfs_node, strerror(errno)); + return -1; + } + + len = read(fd, value, valuelen - 1); + close(fd); + if (len < 0) { + IOAT_PMD_ERR("%s(): error reading file '%s': %s", + __func__, sysfs_node, strerror(errno)); + return -1; + } + value[len] = '\0'; + return 0; +} + +static int +read_wq_int(struct rte_dsa_device *dev, const char *filename, + int *value) +{ + char sysfs_node[PATH_MAX]; + FILE *f; + int ret = 0; + + snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s", + dsa_get_sysfs_path(), dev->wq_name, filename); + f = fopen(sysfs_node, "r"); + if (f == NULL) { + IOAT_PMD_ERR("%s(): opening file '%s' failed: %s", + __func__, sysfs_node, strerror(errno)); + return -1; + } + + if (fscanf(f, "%d", value) != 1) { + IOAT_PMD_ERR("%s(): error reading file '%s': %s", + __func__, sysfs_node, strerror(errno)); + ret = -1; + } + + fclose(f); + return ret; +} + +static int +read_device_int(struct rte_dsa_device *dev, const char *filename, + int *value) +{ + char sysfs_node[PATH_MAX]; + FILE *f; + int ret = 0; + + snprintf(sysfs_node, sizeof(sysfs_node), "%s/dsa%d/%s", + dsa_get_sysfs_path(), dev->addr.device_id, filename); + f = fopen(sysfs_node, "r"); + if (f == NULL) { + IOAT_PMD_ERR("%s(): opening file '%s' failed: %s", + __func__, sysfs_node, strerror(errno)); + return -1; + } + + if (fscanf(f, "%d", value) != 1) { + IOAT_PMD_ERR("%s(): error reading file '%s': %s", + __func__, sysfs_node, strerror(errno)); + ret = -1; + } + + fclose(f); + return ret; +} + +static int +idxd_rawdev_probe_dsa(struct rte_dsa_device *dev) +{ + struct idxd_rawdev idxd = {{0}}; /* double {} to avoid error on BSD12 */ + int ret = 0; + + IOAT_PMD_INFO("Probing device %s on numa node %d", + dev->wq_name, dev->device.numa_node); + if (read_wq_int(dev, "size", &ret) < 0) + return -1; + idxd.max_batches = ret; + idxd.qid = dev->addr.wq_id; + idxd.u.vdev.dsa_id = dev->addr.device_id; + + idxd.public.portal = idxd_vdev_mmap_wq(dev); + if (idxd.public.portal == NULL) { + IOAT_PMD_ERR("WQ mmap failed"); + return -ENOENT; + } + + ret = idxd_rawdev_create(dev->wq_name, &dev->device, &idxd, &idxd_vdev_ops); + if (ret) { + IOAT_PMD_ERR("Failed to create rawdev %s", dev->wq_name); + return ret; + } + + return 0; +} + +static int +is_for_this_process_use(const char *name) +{ + char *runtime_dir = strdup(rte_eal_get_runtime_dir()); + char *prefix = basename(runtime_dir); + int prefixlen = strlen(prefix); + int retval = 0; + + if (strncmp(name, "dpdk_", 5) == 0) + retval = 1; + if (strncmp(name, prefix, prefixlen) == 0 && name[prefixlen] == '_') + retval = 1; + + free(runtime_dir); + return retval; +} + +static int +dsa_probe(void) +{ + struct rte_dsa_device *dev; + + TAILQ_FOREACH(dev, &dsa_bus.device_list, next) { + char type[64], name[64]; + + if (read_wq_string(dev, "type", type, sizeof(type)) < 0 || + read_wq_string(dev, "name", name, sizeof(name)) < 0) + continue; + + if (strncmp(type, "user", 4) == 0 && is_for_this_process_use(name)) { + dev->device.driver = &dsa_bus.driver; + idxd_rawdev_probe_dsa(dev); + continue; + } + IOAT_PMD_DEBUG("WQ '%s', not allocated to DPDK", dev->wq_name); + } + + return 0; +} + +static int +dsa_scan(void) +{ + const char *path = dsa_get_dev_path(); + struct dirent *wq; + DIR *dev_dir; + + dev_dir = opendir(path); + if (dev_dir == NULL) { + if (errno == ENOENT) + return 0; /* no bus, return witout error */ + IOAT_PMD_ERR("%s(): opendir '%s' failed: %s", + __func__, path, strerror(errno)); + return -1; + } + + while ((wq = readdir(dev_dir)) != NULL) { + struct rte_dsa_device *dev; + int numa_node = -1; + + if (strncmp(wq->d_name, "wq", 2) != 0) + continue; + if (strnlen(wq->d_name, sizeof(dev->wq_name)) == sizeof(dev->wq_name)) { + IOAT_PMD_ERR("%s(): wq name too long: '%s', skipping", + __func__, wq->d_name); + continue; + } + IOAT_PMD_DEBUG("%s(): found %s/%s", __func__, path, wq->d_name); + + dev = malloc(sizeof(*dev)); + if (dsa_addr_parse(wq->d_name, &dev->addr) < 0) { + IOAT_PMD_ERR("Error parsing WQ name: %s", wq->d_name); + free(dev); + continue; + } + dev->device.bus = &dsa_bus.bus; + strlcpy(dev->wq_name, wq->d_name, sizeof(dev->wq_name)); + TAILQ_INSERT_TAIL(&dsa_bus.device_list, dev, next); + + read_device_int(dev, "numa_node", &numa_node); + dev->device.numa_node = numa_node; + } + + return 0; +} + +static struct rte_device * +dsa_find_device(const struct rte_device *start, rte_dev_cmp_t cmp, + const void *data) +{ + struct rte_dsa_device *dev = TAILQ_FIRST(&dsa_bus.device_list); + + /* the rte_device struct must be at start of dsa structure */ + RTE_BUILD_BUG_ON(offsetof(struct rte_dsa_device, device) != 0); + + if (start != NULL) /* jump to start point if given */ + dev = TAILQ_NEXT((const struct rte_dsa_device *)start, next); + while (dev != NULL) { + if (cmp(&dev->device, data) == 0) + return &dev->device; + dev = TAILQ_NEXT(dev, next); + } + return NULL; +} + +static enum rte_iova_mode +dsa_get_iommu_class(void) +{ + return RTE_IOVA_VA; +} + +static int +dsa_addr_parse(const char *name, void *addr) +{ + struct dsa_wq_addr *wq = addr; + unsigned int device_id, wq_id; + + if (sscanf(name, "wq%u.%u", &device_id, &wq_id) != 2) { + IOAT_PMD_DEBUG("Parsing WQ name failed: %s", name); + return -1; + } + + wq->device_id = device_id; + wq->wq_id = wq_id; + return 0; +} + +RTE_REGISTER_BUS(dsa, dsa_bus.bus); diff --git a/drivers/raw/ioat/idxd_vdev.c b/drivers/raw/ioat/idxd_vdev.c deleted file mode 100644 index 30a53b3b82..0000000000 --- a/drivers/raw/ioat/idxd_vdev.c +++ /dev/null @@ -1,231 +0,0 @@ -/* SPDX-License-Identifier: BSD-3-Clause - * Copyright(c) 2020 Intel Corporation - */ - -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "ioat_private.h" - -/** Name of the device driver */ -#define IDXD_PMD_RAWDEV_NAME rawdev_idxd -/* takes a work queue(WQ) as parameter */ -#define IDXD_ARG_WQ "wq" - -static const char * const valid_args[] = { - IDXD_ARG_WQ, - NULL -}; - -struct idxd_vdev_args { - uint8_t device_id; - uint8_t wq_id; -}; - -static const struct rte_rawdev_ops idxd_vdev_ops = { - .dev_close = idxd_rawdev_close, - .dev_selftest = ioat_rawdev_test, - .dump = idxd_dev_dump, - .dev_configure = idxd_dev_configure, - .dev_info_get = idxd_dev_info_get, - .xstats_get = ioat_xstats_get, - .xstats_get_names = ioat_xstats_get_names, - .xstats_reset = ioat_xstats_reset, -}; - -static void * -idxd_vdev_mmap_wq(struct idxd_vdev_args *args) -{ - void *addr; - char path[PATH_MAX]; - int fd; - - snprintf(path, sizeof(path), "/dev/dsa/wq%u.%u", - args->device_id, args->wq_id); - fd = open(path, O_RDWR); - if (fd < 0) { - IOAT_PMD_ERR("Failed to open device path"); - return NULL; - } - - addr = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED, fd, 0); - close(fd); - if (addr == MAP_FAILED) { - IOAT_PMD_ERR("Failed to mmap device"); - return NULL; - } - - return addr; -} - -static int -idxd_rawdev_parse_wq(const char *key __rte_unused, const char *value, - void *extra_args) -{ - struct idxd_vdev_args *args = (struct idxd_vdev_args *)extra_args; - int dev, wq, bytes = -1; - int read = sscanf(value, "%d.%d%n", &dev, &wq, &bytes); - - if (read != 2 || bytes != (int)strlen(value)) { - IOAT_PMD_ERR("Error parsing work-queue id. Must be in . format"); - return -EINVAL; - } - - if (dev >= UINT8_MAX || wq >= UINT8_MAX) { - IOAT_PMD_ERR("Device or work queue id out of range"); - return -EINVAL; - } - - args->device_id = dev; - args->wq_id = wq; - - return 0; -} - -static int -idxd_vdev_parse_params(struct rte_kvargs *kvlist, struct idxd_vdev_args *args) -{ - int ret = 0; - - if (rte_kvargs_count(kvlist, IDXD_ARG_WQ) == 1) { - if (rte_kvargs_process(kvlist, IDXD_ARG_WQ, - &idxd_rawdev_parse_wq, args) < 0) { - IOAT_PMD_ERR("Error parsing %s", IDXD_ARG_WQ); - ret = -EINVAL; - } - } else { - IOAT_PMD_ERR("%s is a mandatory arg", IDXD_ARG_WQ); - ret = -EINVAL; - } - - rte_kvargs_free(kvlist); - return ret; -} - -static int -idxd_vdev_get_max_batches(struct idxd_vdev_args *args) -{ - char sysfs_path[PATH_MAX]; - FILE *f; - int ret; - - snprintf(sysfs_path, sizeof(sysfs_path), - "/sys/bus/dsa/devices/wq%u.%u/size", - args->device_id, args->wq_id); - f = fopen(sysfs_path, "r"); - if (f == NULL) - return -1; - - if (fscanf(f, "%d", &ret) != 1) - ret = -1; - - fclose(f); - return ret; -} - -static int -idxd_rawdev_probe_vdev(struct rte_vdev_device *vdev) -{ - struct rte_kvargs *kvlist; - struct idxd_rawdev idxd = {{0}}; /* double {} to avoid error on BSD12 */ - struct idxd_vdev_args vdev_args; - const char *name; - int ret = 0; - - name = rte_vdev_device_name(vdev); - if (name == NULL) - return -EINVAL; - - IOAT_PMD_INFO("Initializing pmd_idxd for %s", name); - - kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_args); - if (kvlist == NULL) { - IOAT_PMD_ERR("Invalid kvargs key"); - return -EINVAL; - } - - ret = idxd_vdev_parse_params(kvlist, &vdev_args); - if (ret) { - IOAT_PMD_ERR("Failed to parse kvargs"); - return -EINVAL; - } - - idxd.qid = vdev_args.wq_id; - idxd.u.vdev.dsa_id = vdev_args.device_id; - idxd.max_batches = idxd_vdev_get_max_batches(&vdev_args); - - idxd.public.portal = idxd_vdev_mmap_wq(&vdev_args); - if (idxd.public.portal == NULL) { - IOAT_PMD_ERR("WQ mmap failed"); - return -ENOENT; - } - - ret = idxd_rawdev_create(name, &vdev->device, &idxd, &idxd_vdev_ops); - if (ret) { - IOAT_PMD_ERR("Failed to create rawdev %s", name); - return ret; - } - - return 0; -} - -static int -idxd_rawdev_remove_vdev(struct rte_vdev_device *vdev) -{ - struct idxd_rawdev *idxd; - const char *name; - struct rte_rawdev *rdev; - int ret = 0; - - name = rte_vdev_device_name(vdev); - if (name == NULL) - return -EINVAL; - - IOAT_PMD_INFO("Remove DSA vdev %p", name); - - rdev = rte_rawdev_pmd_get_named_dev(name); - if (!rdev) { - IOAT_PMD_ERR("Invalid device name (%s)", name); - return -EINVAL; - } - - idxd = rdev->dev_private; - - /* free context and memory */ - if (rdev->dev_private != NULL) { - IOAT_PMD_DEBUG("Freeing device driver memory"); - rdev->dev_private = NULL; - - if (munmap(idxd->public.portal, 0x1000) < 0) { - IOAT_PMD_ERR("Error unmapping portal"); - ret = -errno; - } - - rte_free(idxd->public.batch_ring); - rte_free(idxd->public.hdl_ring); - - rte_memzone_free(idxd->mz); - } - - if (rte_rawdev_pmd_release(rdev)) - IOAT_PMD_ERR("Device cleanup failed"); - - return ret; -} - -struct rte_vdev_driver idxd_rawdev_drv_vdev = { - .probe = idxd_rawdev_probe_vdev, - .remove = idxd_rawdev_remove_vdev, -}; - -RTE_PMD_REGISTER_VDEV(IDXD_PMD_RAWDEV_NAME, idxd_rawdev_drv_vdev); -RTE_PMD_REGISTER_PARAM_STRING(IDXD_PMD_RAWDEV_NAME, - "wq="); diff --git a/drivers/raw/ioat/meson.build b/drivers/raw/ioat/meson.build index 3b8ea65398..6382a826e7 100644 --- a/drivers/raw/ioat/meson.build +++ b/drivers/raw/ioat/meson.build @@ -4,13 +4,13 @@ build = dpdk_conf.has('RTE_ARCH_X86') reason = 'only supported on x86' sources = files( + 'idxd_bus.c', 'idxd_pci.c', - 'idxd_vdev.c', 'ioat_common.c', 'ioat_rawdev.c', 'ioat_rawdev_test.c', ) -deps += ['bus_pci', 'bus_vdev', 'mbuf', 'rawdev'] +deps += ['bus_pci', 'mbuf', 'rawdev'] headers = files( 'rte_ioat_rawdev.h', 'rte_ioat_rawdev_fns.h', From patchwork Mon Apr 26 09:52:56 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92161 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id C6A1EA0548; Mon, 26 Apr 2021 11:54:11 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 0C623411D6; Mon, 26 Apr 2021 11:53:30 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 54821411BF for ; Mon, 26 Apr 2021 11:53:26 +0200 (CEST) IronPort-SDR: hcVdG781aXVTmtCJJsZ21SDxEUnKRKBC8tgdIxiOUtwze1YhdTxydUKgKidoW7pX4XryYKyyOh XlrGlcCErPmw== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442911" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442911" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:21 -0700 IronPort-SDR: ATalc/ylTmbMwtD+r/F9tx0QGNEdp0ctvgdMMBgaUPZ12UGaIGtqg58DR3uJ8fEDsDw8+NGiqO TCXFXt3cnIDQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336944" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:20 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:56 +0100 Message-Id: <20210426095259.225354-10-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 09/12] raw/ioat: move idxd functions to separate file X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Split the rte_ioat_rawdev_fns.h file into two separate headers, so that the data structures for the original ioat devices and the newer idxd ones can be kept separate from each other. This makes code management and rework easier. Signed-off-by: Bruce Richardson --- drivers/raw/ioat/meson.build | 1 + drivers/raw/ioat/rte_idxd_rawdev_fns.h | 275 ++++++++++++++++++++++ drivers/raw/ioat/rte_ioat_rawdev_fns.h | 310 ++----------------------- 3 files changed, 299 insertions(+), 287 deletions(-) create mode 100644 drivers/raw/ioat/rte_idxd_rawdev_fns.h diff --git a/drivers/raw/ioat/meson.build b/drivers/raw/ioat/meson.build index 6382a826e7..0e81cb5951 100644 --- a/drivers/raw/ioat/meson.build +++ b/drivers/raw/ioat/meson.build @@ -13,5 +13,6 @@ sources = files( deps += ['bus_pci', 'mbuf', 'rawdev'] headers = files( 'rte_ioat_rawdev.h', + 'rte_idxd_rawdev_fns.h', 'rte_ioat_rawdev_fns.h', ) diff --git a/drivers/raw/ioat/rte_idxd_rawdev_fns.h b/drivers/raw/ioat/rte_idxd_rawdev_fns.h new file mode 100644 index 0000000000..c2a12ebef0 --- /dev/null +++ b/drivers/raw/ioat/rte_idxd_rawdev_fns.h @@ -0,0 +1,275 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2021 Intel Corporation + */ +#ifndef _RTE_IDXD_RAWDEV_FNS_H_ +#define _RTE_IDXD_RAWDEV_FNS_H_ + +#include + +/* + * Defines used in the data path for interacting with hardware. + */ +#define IDXD_CMD_OP_SHIFT 24 +enum rte_idxd_ops { + idxd_op_nop = 0, + idxd_op_batch, + idxd_op_drain, + idxd_op_memmove, + idxd_op_fill +}; + +#define IDXD_FLAG_FENCE (1 << 0) +#define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2) +#define IDXD_FLAG_REQUEST_COMPLETION (1 << 3) +#define IDXD_FLAG_CACHE_CONTROL (1 << 8) + +#define IOAT_COMP_UPDATE_SHIFT 3 +#define IOAT_CMD_OP_SHIFT 24 +enum rte_ioat_ops { + ioat_op_copy = 0, /* Standard DMA Operation */ + ioat_op_fill /* Block Fill */ +}; + +/** + * Hardware descriptor used by DSA hardware, for both bursts and + * for individual operations. + */ +struct rte_idxd_hw_desc { + uint32_t pasid; + uint32_t op_flags; + rte_iova_t completion; + + RTE_STD_C11 + union { + rte_iova_t src; /* source address for copy ops etc. */ + rte_iova_t desc_addr; /* descriptor pointer for batch */ + }; + rte_iova_t dst; + + uint32_t size; /* length of data for op, or batch size */ + + uint16_t intr_handle; /* completion interrupt handle */ + + /* remaining 26 bytes are reserved */ + uint16_t __reserved[13]; +} __rte_aligned(64); + +/** + * Completion record structure written back by DSA + */ +struct rte_idxd_completion { + uint8_t status; + uint8_t result; + /* 16-bits pad here */ + uint32_t completed_size; /* data length, or descriptors for batch */ + + rte_iova_t fault_address; + uint32_t invalid_flags; +} __rte_aligned(32); + +#define BATCH_SIZE 64 + +/** + * Structure used inside the driver for building up and submitting + * a batch of operations to the DSA hardware. + */ +struct rte_idxd_desc_batch { + struct rte_idxd_completion comp; /* the completion record for batch */ + + uint16_t submitted; + uint16_t op_count; + uint16_t hdl_end; + + struct rte_idxd_hw_desc batch_desc; + + /* batches must always have 2 descriptors, so put a null at the start */ + struct rte_idxd_hw_desc null_desc; + struct rte_idxd_hw_desc ops[BATCH_SIZE]; +}; + +/** + * structure used to save the "handles" provided by the user to be + * returned to the user on job completion. + */ +struct rte_idxd_user_hdl { + uint64_t src; + uint64_t dst; +}; + +/** + * @internal + * Structure representing an IDXD device instance + */ +struct rte_idxd_rawdev { + enum rte_ioat_dev_type type; + struct rte_ioat_xstats xstats; + + void *portal; /* address to write the batch descriptor */ + + /* counters to track the batches and the individual op handles */ + uint16_t batch_ring_sz; /* size of batch ring */ + uint16_t hdl_ring_sz; /* size of the user hdl ring */ + + uint16_t next_batch; /* where we write descriptor ops */ + uint16_t next_completed; /* batch where we read completions */ + uint16_t next_ret_hdl; /* the next user hdl to return */ + uint16_t last_completed_hdl; /* the last user hdl that has completed */ + uint16_t next_free_hdl; /* where the handle for next op will go */ + uint16_t hdls_disable; /* disable tracking completion handles */ + + struct rte_idxd_user_hdl *hdl_ring; + struct rte_idxd_desc_batch *batch_ring; +}; + +static __rte_always_inline int +__idxd_write_desc(int dev_id, const struct rte_idxd_hw_desc *desc, + const struct rte_idxd_user_hdl *hdl) +{ + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; + + /* check for room in the handle ring */ + if (((idxd->next_free_hdl + 1) & (idxd->hdl_ring_sz - 1)) == idxd->next_ret_hdl) + goto failed; + + /* check for space in current batch */ + if (b->op_count >= BATCH_SIZE) + goto failed; + + /* check that we can actually use the current batch */ + if (b->submitted) + goto failed; + + /* write the descriptor */ + b->ops[b->op_count++] = *desc; + + /* store the completion details */ + if (!idxd->hdls_disable) + idxd->hdl_ring[idxd->next_free_hdl] = *hdl; + if (++idxd->next_free_hdl == idxd->hdl_ring_sz) + idxd->next_free_hdl = 0; + + idxd->xstats.enqueued++; + return 1; + +failed: + idxd->xstats.enqueue_failed++; + rte_errno = ENOSPC; + return 0; +} + +static __rte_always_inline int +__idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst, + unsigned int length, uintptr_t dst_hdl) +{ + const struct rte_idxd_hw_desc desc = { + .op_flags = (idxd_op_fill << IDXD_CMD_OP_SHIFT) | + IDXD_FLAG_CACHE_CONTROL, + .src = pattern, + .dst = dst, + .size = length + }; + const struct rte_idxd_user_hdl hdl = { + .dst = dst_hdl + }; + return __idxd_write_desc(dev_id, &desc, &hdl); +} + +static __rte_always_inline int +__idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst, + unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) +{ + const struct rte_idxd_hw_desc desc = { + .op_flags = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | + IDXD_FLAG_CACHE_CONTROL, + .src = src, + .dst = dst, + .size = length + }; + const struct rte_idxd_user_hdl hdl = { + .src = src_hdl, + .dst = dst_hdl + }; + return __idxd_write_desc(dev_id, &desc, &hdl); +} + +static __rte_always_inline int +__idxd_fence(int dev_id) +{ + static const struct rte_idxd_hw_desc fence = { + .op_flags = IDXD_FLAG_FENCE + }; + static const struct rte_idxd_user_hdl null_hdl; + return __idxd_write_desc(dev_id, &fence, &null_hdl); +} + +static __rte_always_inline void +__idxd_movdir64b(volatile void *dst, const void *src) +{ + asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : + : "a" (dst), "d" (src)); +} + +static __rte_always_inline int +__idxd_perform_ops(int dev_id) +{ + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; + + if (b->submitted || b->op_count == 0) + return 0; + b->hdl_end = idxd->next_free_hdl; + b->comp.status = 0; + b->submitted = 1; + b->batch_desc.size = b->op_count + 1; + __idxd_movdir64b(idxd->portal, &b->batch_desc); + + if (++idxd->next_batch == idxd->batch_ring_sz) + idxd->next_batch = 0; + idxd->xstats.started = idxd->xstats.enqueued; + return 0; +} + +static __rte_always_inline int +__idxd_completed_ops(int dev_id, uint8_t max_ops, + uintptr_t *src_hdls, uintptr_t *dst_hdls) +{ + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_completed]; + uint16_t h_idx = idxd->next_ret_hdl; + int n = 0; + + while (b->submitted && b->comp.status != 0) { + idxd->last_completed_hdl = b->hdl_end; + b->submitted = 0; + b->op_count = 0; + if (++idxd->next_completed == idxd->batch_ring_sz) + idxd->next_completed = 0; + b = &idxd->batch_ring[idxd->next_completed]; + } + + if (!idxd->hdls_disable) + for (n = 0; n < max_ops && h_idx != idxd->last_completed_hdl; n++) { + src_hdls[n] = idxd->hdl_ring[h_idx].src; + dst_hdls[n] = idxd->hdl_ring[h_idx].dst; + if (++h_idx == idxd->hdl_ring_sz) + h_idx = 0; + } + else + while (h_idx != idxd->last_completed_hdl) { + n++; + if (++h_idx == idxd->hdl_ring_sz) + h_idx = 0; + } + + idxd->next_ret_hdl = h_idx; + + idxd->xstats.completed += n; + return n; +} + +#endif diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h index 477c1b7b41..598852b1fa 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h +++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h @@ -9,6 +9,29 @@ #include #include +/** + * @internal + * Identify the data path to use. + * Must be first field of rte_ioat_rawdev and rte_idxd_rawdev structs + */ +enum rte_ioat_dev_type { + RTE_IOAT_DEV, + RTE_IDXD_DEV, +}; + +/** + * @internal + * some statistics for tracking, if added/changed update xstats fns + */ +struct rte_ioat_xstats { + uint64_t enqueue_failed; + uint64_t enqueued; + uint64_t started; + uint64_t completed; +}; + +#include "rte_idxd_rawdev_fns.h" + /** * @internal * Structure representing a device descriptor @@ -39,27 +62,6 @@ struct rte_ioat_generic_hw_desc { uint64_t op_specific[4]; }; -/** - * @internal - * Identify the data path to use. - * Must be first field of rte_ioat_rawdev and rte_idxd_rawdev structs - */ -enum rte_ioat_dev_type { - RTE_IOAT_DEV, - RTE_IDXD_DEV, -}; - -/** - * @internal - * some statistics for tracking, if added/changed update xstats fns - */ -struct rte_ioat_xstats { - uint64_t enqueue_failed; - uint64_t enqueued; - uint64_t started; - uint64_t completed; -}; - /** * @internal * Structure representing an IOAT device instance @@ -98,121 +100,6 @@ struct rte_ioat_rawdev { #define RTE_IOAT_CHANSTS_HALTED 0x3 #define RTE_IOAT_CHANSTS_ARMED 0x4 -/* - * Defines used in the data path for interacting with hardware. - */ -#define IDXD_CMD_OP_SHIFT 24 -enum rte_idxd_ops { - idxd_op_nop = 0, - idxd_op_batch, - idxd_op_drain, - idxd_op_memmove, - idxd_op_fill -}; - -#define IDXD_FLAG_FENCE (1 << 0) -#define IDXD_FLAG_COMPLETION_ADDR_VALID (1 << 2) -#define IDXD_FLAG_REQUEST_COMPLETION (1 << 3) -#define IDXD_FLAG_CACHE_CONTROL (1 << 8) - -#define IOAT_COMP_UPDATE_SHIFT 3 -#define IOAT_CMD_OP_SHIFT 24 -enum rte_ioat_ops { - ioat_op_copy = 0, /* Standard DMA Operation */ - ioat_op_fill /* Block Fill */ -}; - -/** - * Hardware descriptor used by DSA hardware, for both bursts and - * for individual operations. - */ -struct rte_idxd_hw_desc { - uint32_t pasid; - uint32_t op_flags; - rte_iova_t completion; - - RTE_STD_C11 - union { - rte_iova_t src; /* source address for copy ops etc. */ - rte_iova_t desc_addr; /* descriptor pointer for batch */ - }; - rte_iova_t dst; - - uint32_t size; /* length of data for op, or batch size */ - - uint16_t intr_handle; /* completion interrupt handle */ - - /* remaining 26 bytes are reserved */ - uint16_t __reserved[13]; -} __rte_aligned(64); - -/** - * Completion record structure written back by DSA - */ -struct rte_idxd_completion { - uint8_t status; - uint8_t result; - /* 16-bits pad here */ - uint32_t completed_size; /* data length, or descriptors for batch */ - - rte_iova_t fault_address; - uint32_t invalid_flags; -} __rte_aligned(32); - -#define BATCH_SIZE 64 - -/** - * Structure used inside the driver for building up and submitting - * a batch of operations to the DSA hardware. - */ -struct rte_idxd_desc_batch { - struct rte_idxd_completion comp; /* the completion record for batch */ - - uint16_t submitted; - uint16_t op_count; - uint16_t hdl_end; - - struct rte_idxd_hw_desc batch_desc; - - /* batches must always have 2 descriptors, so put a null at the start */ - struct rte_idxd_hw_desc null_desc; - struct rte_idxd_hw_desc ops[BATCH_SIZE]; -}; - -/** - * structure used to save the "handles" provided by the user to be - * returned to the user on job completion. - */ -struct rte_idxd_user_hdl { - uint64_t src; - uint64_t dst; -}; - -/** - * @internal - * Structure representing an IDXD device instance - */ -struct rte_idxd_rawdev { - enum rte_ioat_dev_type type; - struct rte_ioat_xstats xstats; - - void *portal; /* address to write the batch descriptor */ - - /* counters to track the batches and the individual op handles */ - uint16_t batch_ring_sz; /* size of batch ring */ - uint16_t hdl_ring_sz; /* size of the user hdl ring */ - - uint16_t next_batch; /* where we write descriptor ops */ - uint16_t next_completed; /* batch where we read completions */ - uint16_t next_ret_hdl; /* the next user hdl to return */ - uint16_t last_completed_hdl; /* the last user hdl that has completed */ - uint16_t next_free_hdl; /* where the handle for next op will go */ - uint16_t hdls_disable; /* disable tracking completion handles */ - - struct rte_idxd_user_hdl *hdl_ring; - struct rte_idxd_desc_batch *batch_ring; -}; - static __rte_always_inline int __ioat_write_desc(int dev_id, uint32_t op, uint64_t src, phys_addr_t dst, unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) @@ -373,157 +260,6 @@ __ioat_completed_ops(int dev_id, uint8_t max_copies, return count; } -static __rte_always_inline int -__idxd_write_desc(int dev_id, const struct rte_idxd_hw_desc *desc, - const struct rte_idxd_user_hdl *hdl) -{ - struct rte_idxd_rawdev *idxd = - (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; - struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; - - /* check for room in the handle ring */ - if (((idxd->next_free_hdl + 1) & (idxd->hdl_ring_sz - 1)) == idxd->next_ret_hdl) - goto failed; - - /* check for space in current batch */ - if (b->op_count >= BATCH_SIZE) - goto failed; - - /* check that we can actually use the current batch */ - if (b->submitted) - goto failed; - - /* write the descriptor */ - b->ops[b->op_count++] = *desc; - - /* store the completion details */ - if (!idxd->hdls_disable) - idxd->hdl_ring[idxd->next_free_hdl] = *hdl; - if (++idxd->next_free_hdl == idxd->hdl_ring_sz) - idxd->next_free_hdl = 0; - - idxd->xstats.enqueued++; - return 1; - -failed: - idxd->xstats.enqueue_failed++; - rte_errno = ENOSPC; - return 0; -} - -static __rte_always_inline int -__idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst, - unsigned int length, uintptr_t dst_hdl) -{ - const struct rte_idxd_hw_desc desc = { - .op_flags = (idxd_op_fill << IDXD_CMD_OP_SHIFT) | - IDXD_FLAG_CACHE_CONTROL, - .src = pattern, - .dst = dst, - .size = length - }; - const struct rte_idxd_user_hdl hdl = { - .dst = dst_hdl - }; - return __idxd_write_desc(dev_id, &desc, &hdl); -} - -static __rte_always_inline int -__idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst, - unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) -{ - const struct rte_idxd_hw_desc desc = { - .op_flags = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | - IDXD_FLAG_CACHE_CONTROL, - .src = src, - .dst = dst, - .size = length - }; - const struct rte_idxd_user_hdl hdl = { - .src = src_hdl, - .dst = dst_hdl - }; - return __idxd_write_desc(dev_id, &desc, &hdl); -} - -static __rte_always_inline int -__idxd_fence(int dev_id) -{ - static const struct rte_idxd_hw_desc fence = { - .op_flags = IDXD_FLAG_FENCE - }; - static const struct rte_idxd_user_hdl null_hdl; - return __idxd_write_desc(dev_id, &fence, &null_hdl); -} - -static __rte_always_inline void -__idxd_movdir64b(volatile void *dst, const void *src) -{ - asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" - : - : "a" (dst), "d" (src)); -} - -static __rte_always_inline int -__idxd_perform_ops(int dev_id) -{ - struct rte_idxd_rawdev *idxd = - (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; - struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; - - if (b->submitted || b->op_count == 0) - return 0; - b->hdl_end = idxd->next_free_hdl; - b->comp.status = 0; - b->submitted = 1; - b->batch_desc.size = b->op_count + 1; - __idxd_movdir64b(idxd->portal, &b->batch_desc); - - if (++idxd->next_batch == idxd->batch_ring_sz) - idxd->next_batch = 0; - idxd->xstats.started = idxd->xstats.enqueued; - return 0; -} - -static __rte_always_inline int -__idxd_completed_ops(int dev_id, uint8_t max_ops, - uintptr_t *src_hdls, uintptr_t *dst_hdls) -{ - struct rte_idxd_rawdev *idxd = - (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; - struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_completed]; - uint16_t h_idx = idxd->next_ret_hdl; - int n = 0; - - while (b->submitted && b->comp.status != 0) { - idxd->last_completed_hdl = b->hdl_end; - b->submitted = 0; - b->op_count = 0; - if (++idxd->next_completed == idxd->batch_ring_sz) - idxd->next_completed = 0; - b = &idxd->batch_ring[idxd->next_completed]; - } - - if (!idxd->hdls_disable) - for (n = 0; n < max_ops && h_idx != idxd->last_completed_hdl; n++) { - src_hdls[n] = idxd->hdl_ring[h_idx].src; - dst_hdls[n] = idxd->hdl_ring[h_idx].dst; - if (++h_idx == idxd->hdl_ring_sz) - h_idx = 0; - } - else - while (h_idx != idxd->last_completed_hdl) { - n++; - if (++h_idx == idxd->hdl_ring_sz) - h_idx = 0; - } - - idxd->next_ret_hdl = h_idx; - - idxd->xstats.completed += n; - return n; -} - static inline int rte_ioat_enqueue_fill(int dev_id, uint64_t pattern, phys_addr_t dst, unsigned int len, uintptr_t dst_hdl) From patchwork Mon Apr 26 09:52:57 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92163 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 60EBEA0548; Mon, 26 Apr 2021 11:54:22 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 7150A411E0; Mon, 26 Apr 2021 11:53:32 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 31DB1411CD for ; Mon, 26 Apr 2021 11:53:27 +0200 (CEST) IronPort-SDR: Qkrr33YXYbELxFAKIZ0M6lzRySZfdlLKR7Pmz0Z4rW08AbIsMZLo7E59ztZKfCA9I29PvnYUOh fwxBMFJqVBLg== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442914" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442914" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:23 -0700 IronPort-SDR: LCKesTzLWPfCJVT/lG+QX5wwGr0WcLk9iiIjkTsQNtEzT4VCIhORa06D4uLCrhWGXFFSWsJbcs 6KGk97XU6CVA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336951" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:22 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:57 +0100 Message-Id: <20210426095259.225354-11-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 10/12] raw/ioat: rework SW ring layout X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The ring management in the idxd part of the driver is more complex than it needs to be, tracking individual batches in a ring and having null descriptors as padding to avoid having single-operation batches. This can be simplified by using a regular ring-based layout, with additional overflow at the end to ensure that the one does not need to wrap within a batch. Signed-off-by: Bruce Richardson --- drivers/raw/ioat/idxd_pci.c | 5 +- drivers/raw/ioat/ioat_common.c | 99 +++++------ drivers/raw/ioat/ioat_rawdev_test.c | 1 + drivers/raw/ioat/rte_idxd_rawdev_fns.h | 233 +++++++++++++------------ 4 files changed, 179 insertions(+), 159 deletions(-) diff --git a/drivers/raw/ioat/idxd_pci.c b/drivers/raw/ioat/idxd_pci.c index b48e565b4c..13515dbc6c 100644 --- a/drivers/raw/ioat/idxd_pci.c +++ b/drivers/raw/ioat/idxd_pci.c @@ -90,7 +90,7 @@ idxd_pci_dev_start(struct rte_rawdev *dev) return 0; } - if (idxd->public.batch_ring == NULL) { + if (idxd->public.desc_ring == NULL) { IOAT_PMD_ERR("WQ %d has not been fully configured", idxd->qid); return -EINVAL; } @@ -337,7 +337,8 @@ idxd_rawdev_destroy(const char *name) /* free device memory */ IOAT_PMD_DEBUG("Freeing device driver memory"); rdev->dev_private = NULL; - rte_free(idxd->public.batch_ring); + rte_free(idxd->public.batch_idx_ring); + rte_free(idxd->public.desc_ring); rte_free(idxd->public.hdl_ring); rte_memzone_free(idxd->mz); diff --git a/drivers/raw/ioat/ioat_common.c b/drivers/raw/ioat/ioat_common.c index d055c36a2a..fcb30572e6 100644 --- a/drivers/raw/ioat/ioat_common.c +++ b/drivers/raw/ioat/ioat_common.c @@ -84,21 +84,21 @@ idxd_dev_dump(struct rte_rawdev *dev, FILE *f) fprintf(f, "Driver: %s\n\n", dev->driver_name); fprintf(f, "Portal: %p\n", rte_idxd->portal); - fprintf(f, "Batch Ring size: %u\n", rte_idxd->batch_ring_sz); - fprintf(f, "Comp Handle Ring size: %u\n\n", rte_idxd->hdl_ring_sz); - - fprintf(f, "Next batch: %u\n", rte_idxd->next_batch); - fprintf(f, "Next batch to be completed: %u\n", rte_idxd->next_completed); - for (i = 0; i < rte_idxd->batch_ring_sz; i++) { - struct rte_idxd_desc_batch *b = &rte_idxd->batch_ring[i]; - fprintf(f, "Batch %u @%p: submitted=%u, op_count=%u, hdl_end=%u\n", - i, b, b->submitted, b->op_count, b->hdl_end); - } - - fprintf(f, "\n"); - fprintf(f, "Next free hdl: %u\n", rte_idxd->next_free_hdl); - fprintf(f, "Last completed hdl: %u\n", rte_idxd->last_completed_hdl); - fprintf(f, "Next returned hdl: %u\n", rte_idxd->next_ret_hdl); + fprintf(f, "Config: {ring_size: %u, hdls_disable: %u}\n\n", + rte_idxd->cfg.ring_size, rte_idxd->cfg.hdls_disable); + + fprintf(f, "max batches: %u\n", rte_idxd->max_batches); + fprintf(f, "batch idx read: %u\n", rte_idxd->batch_idx_read); + fprintf(f, "batch idx write: %u\n", rte_idxd->batch_idx_write); + fprintf(f, "batch idxes:"); + for (i = 0; i < rte_idxd->max_batches + 1; i++) + fprintf(f, "%u ", rte_idxd->batch_idx_ring[i]); + fprintf(f, "\n\n"); + + fprintf(f, "hdls read: %u\n", rte_idxd->max_batches); + fprintf(f, "hdls avail: %u\n", rte_idxd->hdls_avail); + fprintf(f, "batch start: %u\n", rte_idxd->batch_start); + fprintf(f, "batch size: %u\n", rte_idxd->batch_size); return 0; } @@ -114,10 +114,8 @@ idxd_dev_info_get(struct rte_rawdev *dev, rte_rawdev_obj_t dev_info, if (info_size != sizeof(*cfg)) return -EINVAL; - if (cfg != NULL) { - cfg->ring_size = rte_idxd->hdl_ring_sz; - cfg->hdls_disable = rte_idxd->hdls_disable; - } + if (cfg != NULL) + *cfg = rte_idxd->cfg; return 0; } @@ -129,8 +127,6 @@ idxd_dev_configure(const struct rte_rawdev *dev, struct rte_idxd_rawdev *rte_idxd = &idxd->public; struct rte_ioat_rawdev_config *cfg = config; uint16_t max_desc = cfg->ring_size; - uint16_t max_batches = max_desc / BATCH_SIZE; - uint16_t i; if (config_size != sizeof(*cfg)) return -EINVAL; @@ -140,47 +136,34 @@ idxd_dev_configure(const struct rte_rawdev *dev, return -EAGAIN; } - rte_idxd->hdls_disable = cfg->hdls_disable; + rte_idxd->cfg = *cfg; - /* limit the batches to what can be stored in hardware */ - if (max_batches > idxd->max_batches) { - IOAT_PMD_DEBUG("Ring size of %u is too large for this device, need to limit to %u batches of %u", - max_desc, idxd->max_batches, BATCH_SIZE); - max_batches = idxd->max_batches; - max_desc = max_batches * BATCH_SIZE; - } if (!rte_is_power_of_2(max_desc)) max_desc = rte_align32pow2(max_desc); - IOAT_PMD_DEBUG("Rawdev %u using %u descriptors in %u batches", - dev->dev_id, max_desc, max_batches); + IOAT_PMD_DEBUG("Rawdev %u using %u descriptors", + dev->dev_id, max_desc); + rte_idxd->desc_ring_mask = max_desc - 1; /* in case we are reconfiguring a device, free any existing memory */ - rte_free(rte_idxd->batch_ring); + rte_free(rte_idxd->desc_ring); rte_free(rte_idxd->hdl_ring); - rte_idxd->batch_ring = rte_zmalloc(NULL, - sizeof(*rte_idxd->batch_ring) * max_batches, 0); - if (rte_idxd->batch_ring == NULL) + /* allocate the descriptor ring at 2x size as batches can't wrap */ + rte_idxd->desc_ring = rte_zmalloc(NULL, + sizeof(*rte_idxd->desc_ring) * max_desc * 2, 0); + if (rte_idxd->desc_ring == NULL) return -ENOMEM; + rte_idxd->desc_iova = rte_mem_virt2iova(rte_idxd->desc_ring); rte_idxd->hdl_ring = rte_zmalloc(NULL, sizeof(*rte_idxd->hdl_ring) * max_desc, 0); if (rte_idxd->hdl_ring == NULL) { - rte_free(rte_idxd->batch_ring); - rte_idxd->batch_ring = NULL; + rte_free(rte_idxd->desc_ring); + rte_idxd->desc_ring = NULL; return -ENOMEM; } - rte_idxd->batch_ring_sz = max_batches; - rte_idxd->hdl_ring_sz = max_desc; - - for (i = 0; i < rte_idxd->batch_ring_sz; i++) { - struct rte_idxd_desc_batch *b = &rte_idxd->batch_ring[i]; - b->batch_desc.completion = rte_mem_virt2iova(&b->comp); - b->batch_desc.desc_addr = rte_mem_virt2iova(&b->null_desc); - b->batch_desc.op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) | - IDXD_FLAG_COMPLETION_ADDR_VALID | - IDXD_FLAG_REQUEST_COMPLETION; - } + rte_idxd->hdls_read = rte_idxd->batch_start = 0; + rte_idxd->batch_size = 0; return 0; } @@ -191,6 +174,7 @@ idxd_rawdev_create(const char *name, struct rte_device *dev, const struct rte_rawdev_ops *ops) { struct idxd_rawdev *idxd; + struct rte_idxd_rawdev *public; struct rte_rawdev *rawdev = NULL; const struct rte_memzone *mz = NULL; char mz_name[RTE_MEMZONE_NAMESIZE]; @@ -245,13 +229,30 @@ idxd_rawdev_create(const char *name, struct rte_device *dev, idxd = rawdev->dev_private; *idxd = *base_idxd; /* copy over the main fields already passed in */ - idxd->public.type = RTE_IDXD_DEV; idxd->rawdev = rawdev; idxd->mz = mz; + public = &idxd->public; + public->type = RTE_IDXD_DEV; + public->max_batches = idxd->max_batches; + public->batch_idx_read = 0; + public->batch_idx_write = 0; + /* allocate batch index ring. The +1 is because we can never fully use + * the ring, otherwise read == write means both full and empty. + */ + public->batch_idx_ring = rte_zmalloc(NULL, + sizeof(uint16_t) * (idxd->max_batches + 1), 0); + if (public->batch_idx_ring == NULL) { + IOAT_PMD_ERR("Unable to reserve memory for batch data\n"); + ret = -ENOMEM; + goto cleanup; + } + return 0; cleanup: + if (mz) + rte_memzone_free(mz); if (rawdev) rte_rawdev_pmd_release(rawdev); diff --git a/drivers/raw/ioat/ioat_rawdev_test.c b/drivers/raw/ioat/ioat_rawdev_test.c index a5064d739d..51eebe152f 100644 --- a/drivers/raw/ioat/ioat_rawdev_test.c +++ b/drivers/raw/ioat/ioat_rawdev_test.c @@ -206,6 +206,7 @@ test_enqueue_copies(int dev_id) if (rte_ioat_completed_ops(dev_id, max_completions, (void *)&completed[0], (void *)&completed[max_completions]) != max_ops) { PRINT_ERR("Error with rte_ioat_completed_ops\n"); + rte_rawdev_dump(dev_id, stdout); return -1; } if (completed[0] != src || completed[max_completions] != dst) { diff --git a/drivers/raw/ioat/rte_idxd_rawdev_fns.h b/drivers/raw/ioat/rte_idxd_rawdev_fns.h index c2a12ebef0..4c49d2b84a 100644 --- a/drivers/raw/ioat/rte_idxd_rawdev_fns.h +++ b/drivers/raw/ioat/rte_idxd_rawdev_fns.h @@ -7,7 +7,7 @@ #include /* - * Defines used in the data path for interacting with hardware. + * Defines used in the data path for interacting with IDXD hardware. */ #define IDXD_CMD_OP_SHIFT 24 enum rte_idxd_ops { @@ -67,26 +67,6 @@ struct rte_idxd_completion { uint32_t invalid_flags; } __rte_aligned(32); -#define BATCH_SIZE 64 - -/** - * Structure used inside the driver for building up and submitting - * a batch of operations to the DSA hardware. - */ -struct rte_idxd_desc_batch { - struct rte_idxd_completion comp; /* the completion record for batch */ - - uint16_t submitted; - uint16_t op_count; - uint16_t hdl_end; - - struct rte_idxd_hw_desc batch_desc; - - /* batches must always have 2 descriptors, so put a null at the start */ - struct rte_idxd_hw_desc null_desc; - struct rte_idxd_hw_desc ops[BATCH_SIZE]; -}; - /** * structure used to save the "handles" provided by the user to be * returned to the user on job completion. @@ -106,51 +86,65 @@ struct rte_idxd_rawdev { void *portal; /* address to write the batch descriptor */ - /* counters to track the batches and the individual op handles */ - uint16_t batch_ring_sz; /* size of batch ring */ - uint16_t hdl_ring_sz; /* size of the user hdl ring */ + struct rte_ioat_rawdev_config cfg; + rte_iova_t desc_iova; /* base address of desc ring, needed for completions */ - uint16_t next_batch; /* where we write descriptor ops */ - uint16_t next_completed; /* batch where we read completions */ - uint16_t next_ret_hdl; /* the next user hdl to return */ - uint16_t last_completed_hdl; /* the last user hdl that has completed */ - uint16_t next_free_hdl; /* where the handle for next op will go */ - uint16_t hdls_disable; /* disable tracking completion handles */ + /* counters to track the batches */ + unsigned short max_batches; + unsigned short batch_idx_read; + unsigned short batch_idx_write; + unsigned short *batch_idx_ring; /* store where each batch ends */ + /* track descriptors and handles */ + unsigned short desc_ring_mask; + unsigned short hdls_avail; /* handles for ops completed */ + unsigned short hdls_read; /* the read pointer for hdls/desc rings */ + unsigned short batch_start; /* start+size == write pointer for hdls/desc */ + unsigned short batch_size; + + struct rte_idxd_hw_desc *desc_ring; struct rte_idxd_user_hdl *hdl_ring; - struct rte_idxd_desc_batch *batch_ring; }; +static __rte_always_inline rte_iova_t +__desc_idx_to_iova(struct rte_idxd_rawdev *idxd, uint16_t n) +{ + return idxd->desc_iova + (n * sizeof(struct rte_idxd_hw_desc)); +} + static __rte_always_inline int -__idxd_write_desc(int dev_id, const struct rte_idxd_hw_desc *desc, +__idxd_write_desc(int dev_id, + const uint32_t op_flags, + const rte_iova_t src, + const rte_iova_t dst, + const uint32_t size, const struct rte_idxd_user_hdl *hdl) { struct rte_idxd_rawdev *idxd = (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; - struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; + uint16_t write_idx = idxd->batch_start + idxd->batch_size; - /* check for room in the handle ring */ - if (((idxd->next_free_hdl + 1) & (idxd->hdl_ring_sz - 1)) == idxd->next_ret_hdl) + /* first check batch ring space then desc ring space */ + if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) || + idxd->batch_idx_write + 1 == idxd->batch_idx_read) goto failed; - - /* check for space in current batch */ - if (b->op_count >= BATCH_SIZE) - goto failed; - - /* check that we can actually use the current batch */ - if (b->submitted) + if (((write_idx + 1) & idxd->desc_ring_mask) == idxd->hdls_read) goto failed; - /* write the descriptor */ - b->ops[b->op_count++] = *desc; + /* write desc and handle. Note, descriptors don't wrap */ + idxd->desc_ring[write_idx].pasid = 0; + idxd->desc_ring[write_idx].op_flags = op_flags | IDXD_FLAG_COMPLETION_ADDR_VALID; + idxd->desc_ring[write_idx].completion = __desc_idx_to_iova(idxd, write_idx); + idxd->desc_ring[write_idx].src = src; + idxd->desc_ring[write_idx].dst = dst; + idxd->desc_ring[write_idx].size = size; - /* store the completion details */ - if (!idxd->hdls_disable) - idxd->hdl_ring[idxd->next_free_hdl] = *hdl; - if (++idxd->next_free_hdl == idxd->hdl_ring_sz) - idxd->next_free_hdl = 0; + idxd->hdl_ring[write_idx & idxd->desc_ring_mask] = *hdl; + idxd->batch_size++; idxd->xstats.enqueued++; + + rte_prefetch0_write(&idxd->desc_ring[write_idx + 1]); return 1; failed: @@ -163,53 +157,42 @@ static __rte_always_inline int __idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst, unsigned int length, uintptr_t dst_hdl) { - const struct rte_idxd_hw_desc desc = { - .op_flags = (idxd_op_fill << IDXD_CMD_OP_SHIFT) | - IDXD_FLAG_CACHE_CONTROL, - .src = pattern, - .dst = dst, - .size = length - }; const struct rte_idxd_user_hdl hdl = { .dst = dst_hdl }; - return __idxd_write_desc(dev_id, &desc, &hdl); + return __idxd_write_desc(dev_id, + (idxd_op_fill << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL, + pattern, dst, length, &hdl); } static __rte_always_inline int __idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst, unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) { - const struct rte_idxd_hw_desc desc = { - .op_flags = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | - IDXD_FLAG_CACHE_CONTROL, - .src = src, - .dst = dst, - .size = length - }; const struct rte_idxd_user_hdl hdl = { .src = src_hdl, .dst = dst_hdl }; - return __idxd_write_desc(dev_id, &desc, &hdl); + return __idxd_write_desc(dev_id, + (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | IDXD_FLAG_CACHE_CONTROL, + src, dst, length, &hdl); } static __rte_always_inline int __idxd_fence(int dev_id) { - static const struct rte_idxd_hw_desc fence = { - .op_flags = IDXD_FLAG_FENCE - }; static const struct rte_idxd_user_hdl null_hdl; - return __idxd_write_desc(dev_id, &fence, &null_hdl); + /* only op field needs filling - zero src, dst and length */ + return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, &null_hdl); } static __rte_always_inline void -__idxd_movdir64b(volatile void *dst, const void *src) +__idxd_movdir64b(volatile void *dst, const struct rte_idxd_hw_desc *src) { asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" : - : "a" (dst), "d" (src)); + : "a" (dst), "d" (src) + : "memory"); } static __rte_always_inline int @@ -217,19 +200,49 @@ __idxd_perform_ops(int dev_id) { struct rte_idxd_rawdev *idxd = (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; - struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; + /* write completion to last desc in the batch */ + uint16_t comp_idx = idxd->batch_start + idxd->batch_size - 1; + if (comp_idx > idxd->desc_ring_mask) { + comp_idx &= idxd->desc_ring_mask; + *((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */ + } - if (b->submitted || b->op_count == 0) + if (idxd->batch_size == 0) return 0; - b->hdl_end = idxd->next_free_hdl; - b->comp.status = 0; - b->submitted = 1; - b->batch_desc.size = b->op_count + 1; - __idxd_movdir64b(idxd->portal, &b->batch_desc); - - if (++idxd->next_batch == idxd->batch_ring_sz) - idxd->next_batch = 0; - idxd->xstats.started = idxd->xstats.enqueued; + + _mm_sfence(); /* fence before writing desc to device */ + if (idxd->batch_size > 1) { + struct rte_idxd_hw_desc batch_desc = { + .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) | + IDXD_FLAG_COMPLETION_ADDR_VALID | + IDXD_FLAG_REQUEST_COMPLETION, + .desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start), + .completion = __desc_idx_to_iova(idxd, comp_idx), + .size = idxd->batch_size, + }; + + __idxd_movdir64b(idxd->portal, &batch_desc); + } else { + /* special case batch size of 1, as not allowed by HW */ + /* comp_idx == batch_start */ + struct rte_idxd_hw_desc *desc = &idxd->desc_ring[comp_idx]; + desc->op_flags |= IDXD_FLAG_COMPLETION_ADDR_VALID | + IDXD_FLAG_REQUEST_COMPLETION; + desc->completion = __desc_idx_to_iova(idxd, comp_idx); + + __idxd_movdir64b(idxd->portal, desc); + } + + idxd->xstats.started += idxd->batch_size; + + idxd->batch_start += idxd->batch_size; + idxd->batch_start &= idxd->desc_ring_mask; + idxd->batch_size = 0; + + idxd->batch_idx_ring[idxd->batch_idx_write++] = comp_idx; + if (idxd->batch_idx_write > idxd->max_batches) + idxd->batch_idx_write = 0; + return 0; } @@ -239,35 +252,39 @@ __idxd_completed_ops(int dev_id, uint8_t max_ops, { struct rte_idxd_rawdev *idxd = (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; - struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_completed]; - uint16_t h_idx = idxd->next_ret_hdl; - int n = 0; - - while (b->submitted && b->comp.status != 0) { - idxd->last_completed_hdl = b->hdl_end; - b->submitted = 0; - b->op_count = 0; - if (++idxd->next_completed == idxd->batch_ring_sz) - idxd->next_completed = 0; - b = &idxd->batch_ring[idxd->next_completed]; + unsigned short n, h_idx; + + while (idxd->batch_idx_read != idxd->batch_idx_write) { + uint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read]; + volatile struct rte_idxd_completion *comp_to_chk = + (struct rte_idxd_completion *)&idxd->desc_ring[idx_to_chk]; + if (comp_to_chk->status == 0) + break; + /* avail points to one after the last one written */ + idxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask; + idxd->batch_idx_read++; + if (idxd->batch_idx_read > idxd->max_batches) + idxd->batch_idx_read = 0; } - if (!idxd->hdls_disable) - for (n = 0; n < max_ops && h_idx != idxd->last_completed_hdl; n++) { - src_hdls[n] = idxd->hdl_ring[h_idx].src; - dst_hdls[n] = idxd->hdl_ring[h_idx].dst; - if (++h_idx == idxd->hdl_ring_sz) - h_idx = 0; - } - else - while (h_idx != idxd->last_completed_hdl) { - n++; - if (++h_idx == idxd->hdl_ring_sz) - h_idx = 0; - } - - idxd->next_ret_hdl = h_idx; + if (idxd->cfg.hdls_disable) { + n = (idxd->hdls_avail < idxd->hdls_read) ? + (idxd->hdls_avail + idxd->desc_ring_mask + 1 - idxd->hdls_read) : + (idxd->hdls_avail - idxd->hdls_read); + idxd->hdls_read = idxd->hdls_avail; + goto out; + } + + for (n = 0, h_idx = idxd->hdls_read; + n < max_ops && h_idx != idxd->hdls_avail; n++) { + src_hdls[n] = idxd->hdl_ring[h_idx].src; + dst_hdls[n] = idxd->hdl_ring[h_idx].dst; + if (++h_idx > idxd->desc_ring_mask) + h_idx = 0; + } + idxd->hdls_read = h_idx; +out: idxd->xstats.completed += n; return n; } From patchwork Mon Apr 26 09:52:58 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92162 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id DA526A0548; Mon, 26 Apr 2021 11:54:17 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 3305F411DC; Mon, 26 Apr 2021 11:53:31 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 52AA2411BF for ; Mon, 26 Apr 2021 11:53:27 +0200 (CEST) IronPort-SDR: KXuP5S4aKTce9u2ymefa78PwBPU2sU0Nar0HByDIx3jkHPvdTQojwDhzbRsjVqs7aauIbBIu42 s+CgGZwIzPoQ== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442915" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442915" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:25 -0700 IronPort-SDR: ZrYR/GX6f0ilbmtfOAM5sZv8u+JiQlbihqeHQt4NoSykWtq4GNcM51ZEghjoJxrfRH8U9dapIr KUWjDSUZneKA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336959" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:23 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:58 +0100 Message-Id: <20210426095259.225354-12-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 11/12] raw/ioat: add API to query remaining ring space X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Kevin Laatz Add a new API to query remaining descriptor ring capacity. This API is useful, for example, when an application needs to enqueue a fragmented packet and wants to ensure that all segments of the packet will be enqueued together. Signed-off-by: Kevin Laatz Signed-off-by: Bruce Richardson --- drivers/raw/ioat/ioat_rawdev_test.c | 138 ++++++++++++++++++++++++- drivers/raw/ioat/rte_idxd_rawdev_fns.h | 22 ++++ drivers/raw/ioat/rte_ioat_rawdev_fns.h | 24 +++++ 3 files changed, 183 insertions(+), 1 deletion(-) diff --git a/drivers/raw/ioat/ioat_rawdev_test.c b/drivers/raw/ioat/ioat_rawdev_test.c index 51eebe152f..5f75c6ff69 100644 --- a/drivers/raw/ioat/ioat_rawdev_test.c +++ b/drivers/raw/ioat/ioat_rawdev_test.c @@ -277,6 +277,138 @@ test_enqueue_fill(int dev_id) return 0; } +static inline void +reset_ring_ptrs(int dev_id) +{ + enum rte_ioat_dev_type *type = + (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private; + + if (*type == RTE_IDXD_DEV) { + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + + idxd->batch_start = 0; + idxd->hdls_read = 0; + } else { + struct rte_ioat_rawdev *ioat = + (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private; + + ioat->next_read = 0; + ioat->next_write = 0; + } +} + +static int +test_burst_capacity(int dev_id) +{ +#define BURST_SIZE 64 + struct rte_mbuf *src, *dst; + unsigned int bursts_enqueued = 0; + unsigned int i; + unsigned int length = 1024; + uintptr_t completions[BURST_SIZE]; + + /* Ring pointer reset needed for checking test results */ + reset_ring_ptrs(dev_id); + + const unsigned int ring_space = rte_ioat_burst_capacity(dev_id); + const unsigned int expected_bursts = (ring_space)/BURST_SIZE; + src = rte_pktmbuf_alloc(pool); + dst = rte_pktmbuf_alloc(pool); + + /* Enqueue burst until they won't fit */ + while (rte_ioat_burst_capacity(dev_id) >= BURST_SIZE) { + for (i = 0; i < BURST_SIZE; i++) { + + if (rte_ioat_enqueue_copy(dev_id, rte_pktmbuf_iova(src), + rte_pktmbuf_iova(dst), length, 0, 0) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy\n"); + return -1; + } + } + bursts_enqueued++; + if ((i & 1) == 1) /* hit doorbell every second burst */ + rte_ioat_perform_ops(dev_id); + } + rte_ioat_perform_ops(dev_id); + + /* check the number of bursts enqueued was as expected */ + if (bursts_enqueued != expected_bursts) { + PRINT_ERR("Capacity test failed, enqueued %u not %u bursts\n", + bursts_enqueued, expected_bursts); + return -1; + } + + /* check the space is now as expected */ + if (rte_ioat_burst_capacity(dev_id) != ring_space - bursts_enqueued * BURST_SIZE) { + printf("Capacity error. Expected %u free slots, got %u\n", + ring_space - bursts_enqueued * BURST_SIZE, + rte_ioat_burst_capacity(dev_id)); + return -1; + } + + /* do cleanup before next tests */ + usleep(100); + for (i = 0; i < bursts_enqueued; i++) { + if (rte_ioat_completed_ops(dev_id, BURST_SIZE, completions, + completions) != BURST_SIZE) { + PRINT_ERR("error with completions\n"); + return -1; + } + } + + /* Since we reset the ring pointers before the previous test, and we enqueued + * the max amount of bursts, enqueuing one more burst will enable us to test + * the wrap around handling in rte_ioat_burst_capacity(). + */ + + /* Verify the descriptor ring is empty before we test */ + if (rte_ioat_burst_capacity(dev_id) != ring_space) { + PRINT_ERR("Error, ring should be empty\n"); + return -1; + } + + /* Enqueue one burst of mbufs & verify the expected space is taken */ + for (i = 0; i < BURST_SIZE; i++) { + if (rte_ioat_enqueue_copy(dev_id, rte_pktmbuf_iova(src), + rte_pktmbuf_iova(dst), length, 0, 0) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy\n"); + return -1; + } + } + + /* Perform the copy before checking the capacity again so that the write + * pointer in the descriptor ring is wrapped/masked + */ + rte_ioat_perform_ops(dev_id); + usleep(100); + + /* This check will confirm both that the correct amount of space is taken + * the ring, and that the ring wrap around handling is correct. + */ + if (rte_ioat_burst_capacity(dev_id) != ring_space - BURST_SIZE) { + PRINT_ERR("Error, space available not as expected\n"); + return -1; + } + + /* Now we gather completions to update the read pointer */ + if (rte_ioat_completed_ops(dev_id, BURST_SIZE, completions, completions) != BURST_SIZE) { + PRINT_ERR("Error with completions\n"); + return -1; + } + + /* After gathering the completions, the descriptor ring should be empty */ + if (rte_ioat_burst_capacity(dev_id) != ring_space) { + PRINT_ERR("Error, space available not as expected\n"); + return -1; + } + + rte_pktmbuf_free(src); + rte_pktmbuf_free(dst); + + return 0; +} + int ioat_rawdev_test(uint16_t dev_id) { @@ -321,7 +453,7 @@ ioat_rawdev_test(uint16_t dev_id) } pool = rte_pktmbuf_pool_create("TEST_IOAT_POOL", - 256, /* n == num elements */ + p.ring_size * 2, /* n == num elements */ 32, /* cache size */ 0, /* priv size */ 2048, /* data room size */ @@ -385,6 +517,10 @@ ioat_rawdev_test(uint16_t dev_id) } printf("\n"); + printf("Running Burst Capacity Test\n"); + if (test_burst_capacity(dev_id) != 0) + goto err; + rte_rawdev_stop(dev_id); if (rte_rawdev_xstats_reset(dev_id, NULL, 0) != 0) { PRINT_ERR("Error resetting xstat values\n"); diff --git a/drivers/raw/ioat/rte_idxd_rawdev_fns.h b/drivers/raw/ioat/rte_idxd_rawdev_fns.h index 4c49d2b84a..41f0ad6e99 100644 --- a/drivers/raw/ioat/rte_idxd_rawdev_fns.h +++ b/drivers/raw/ioat/rte_idxd_rawdev_fns.h @@ -106,6 +106,28 @@ struct rte_idxd_rawdev { struct rte_idxd_user_hdl *hdl_ring; }; +static __rte_always_inline uint16_t +__idxd_burst_capacity(int dev_id) +{ + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + uint16_t write_idx = idxd->batch_start + idxd->batch_size; + uint16_t used_space; + + /* Check for space in the batch ring */ + if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) || + idxd->batch_idx_write + 1 == idxd->batch_idx_read) + return 0; + + /* for descriptors, check for wrap-around on write but not read */ + if (idxd->hdls_read > write_idx) + write_idx += idxd->desc_ring_mask + 1; + used_space = write_idx - idxd->hdls_read; + + /* Return amount of free space in the descriptor ring */ + return idxd->desc_ring_mask - used_space; +} + static __rte_always_inline rte_iova_t __desc_idx_to_iova(struct rte_idxd_rawdev *idxd, uint16_t n) { diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h index 598852b1fa..92ccdd03b9 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h +++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h @@ -100,6 +100,19 @@ struct rte_ioat_rawdev { #define RTE_IOAT_CHANSTS_HALTED 0x3 #define RTE_IOAT_CHANSTS_ARMED 0x4 +static __rte_always_inline uint16_t +__ioat_burst_capacity(int dev_id) +{ + struct rte_ioat_rawdev *ioat = + (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private; + unsigned short size = ioat->ring_size - 1; + unsigned short read = ioat->next_read; + unsigned short write = ioat->next_write; + unsigned short space = size - (write - read); + + return space; +} + static __rte_always_inline int __ioat_write_desc(int dev_id, uint32_t op, uint64_t src, phys_addr_t dst, unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) @@ -260,6 +273,17 @@ __ioat_completed_ops(int dev_id, uint8_t max_copies, return count; } +static inline uint16_t +rte_ioat_burst_capacity(int dev_id) +{ + enum rte_ioat_dev_type *type = + (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private; + if (*type == RTE_IDXD_DEV) + return __idxd_burst_capacity(dev_id); + else + return __ioat_burst_capacity(dev_id); +} + static inline int rte_ioat_enqueue_fill(int dev_id, uint64_t pattern, phys_addr_t dst, unsigned int len, uintptr_t dst_hdl) From patchwork Mon Apr 26 09:52:59 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 92164 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id BB53DA0548; Mon, 26 Apr 2021 11:54:27 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id A657A411E4; Mon, 26 Apr 2021 11:53:33 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by mails.dpdk.org (Postfix) with ESMTP id 7A250411BF for ; Mon, 26 Apr 2021 11:53:28 +0200 (CEST) IronPort-SDR: dQgKAjVprzK998uijJpP4zh7UVsnp/ZkxLr09KDXT9JmXM9Q6yswDDA4tteDr5qDkJ2tnfqUrq ECNeyx35asPA== X-IronPort-AV: E=McAfee;i="6200,9189,9965"; a="183442920" X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="183442920" Received: from orsmga008.jf.intel.com ([10.7.209.65]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 26 Apr 2021 02:53:26 -0700 IronPort-SDR: PLtSCht8LZKIgyfm1bqhtZuHeQdu7U7NrlRYYFzSrKsyjKkulIVdWjh7x9yxAWBIfraKyDNAcP WkI0p+OgqlCw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,252,1613462400"; d="scan'208";a="429336974" Received: from silpixa00399126.ir.intel.com ([10.237.223.81]) by orsmga008.jf.intel.com with ESMTP; 26 Apr 2021 02:53:24 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: kevin.laatz@intel.com, jiayu.hu@intel.com, Bruce Richardson Date: Mon, 26 Apr 2021 10:52:59 +0100 Message-Id: <20210426095259.225354-13-bruce.richardson@intel.com> X-Mailer: git-send-email 2.30.2 In-Reply-To: <20210426095259.225354-1-bruce.richardson@intel.com> References: <20210318182042.43658-2-bruce.richardson@intel.com> <20210426095259.225354-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 12/12] raw/ioat: report status of completed jobs X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add improved error handling to rte_ioat_completed_ops(). This patch adds new parameters to the function to enable the user to track the completion status of each individual operation in a batch. With this addition, the function can help the user to determine firstly, how many operations may have failed or been skipped and then secondly, which specific operations did not complete successfully. Signed-off-by: Kevin Laatz Signed-off-by: Bruce Richardson --- doc/guides/rel_notes/release_21_05.rst | 5 + drivers/raw/ioat/ioat_common.c | 9 + drivers/raw/ioat/ioat_rawdev_test.c | 300 +++++++++++++++++++++++-- drivers/raw/ioat/rte_idxd_rawdev_fns.h | 146 ++++++++---- drivers/raw/ioat/rte_ioat_rawdev.h | 53 ++++- drivers/raw/ioat/rte_ioat_rawdev_fns.h | 15 +- examples/ioat/ioatfwd.c | 14 +- examples/vhost/ioat.c | 2 +- 8 files changed, 464 insertions(+), 80 deletions(-) diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst index b3224dc332..7f29f5789f 100644 --- a/doc/guides/rel_notes/release_21_05.rst +++ b/doc/guides/rel_notes/release_21_05.rst @@ -329,6 +329,11 @@ API Changes ``policer_action_recolor_supported`` and ``policer_action_drop_supported`` have been removed. +* raw/ioat: The experimental function ``rte_ioat_completed_ops()`` now + supports two additional parameters, ``status`` and ``num_unsuccessful``, + to allow the reporting of errors from hardware when performing copy + operations. + ABI Changes ----------- diff --git a/drivers/raw/ioat/ioat_common.c b/drivers/raw/ioat/ioat_common.c index fcb30572e6..d01c1ee367 100644 --- a/drivers/raw/ioat/ioat_common.c +++ b/drivers/raw/ioat/ioat_common.c @@ -162,6 +162,15 @@ idxd_dev_configure(const struct rte_rawdev *dev, rte_idxd->desc_ring = NULL; return -ENOMEM; } + rte_idxd->hdl_ring_flags = rte_zmalloc(NULL, + sizeof(*rte_idxd->hdl_ring_flags) * max_desc, 0); + if (rte_idxd->hdl_ring_flags == NULL) { + rte_free(rte_idxd->desc_ring); + rte_free(rte_idxd->hdl_ring); + rte_idxd->desc_ring = NULL; + rte_idxd->hdl_ring = NULL; + return -ENOMEM; + } rte_idxd->hdls_read = rte_idxd->batch_start = 0; rte_idxd->batch_size = 0; diff --git a/drivers/raw/ioat/ioat_rawdev_test.c b/drivers/raw/ioat/ioat_rawdev_test.c index 5f75c6ff69..d987b560d2 100644 --- a/drivers/raw/ioat/ioat_rawdev_test.c +++ b/drivers/raw/ioat/ioat_rawdev_test.c @@ -73,13 +73,15 @@ do_multi_copies(int dev_id, int split_batches, int split_completions) if (split_completions) { /* gather completions in two halves */ uint16_t half_len = RTE_DIM(srcs) / 2; - if (rte_ioat_completed_ops(dev_id, half_len, (void *)completed_src, + if (rte_ioat_completed_ops(dev_id, half_len, NULL, NULL, + (void *)completed_src, (void *)completed_dst) != half_len) { PRINT_ERR("Error with rte_ioat_completed_ops - first half request\n"); rte_rawdev_dump(dev_id, stdout); return -1; } - if (rte_ioat_completed_ops(dev_id, half_len, (void *)&completed_src[half_len], + if (rte_ioat_completed_ops(dev_id, half_len, NULL, NULL, + (void *)&completed_src[half_len], (void *)&completed_dst[half_len]) != half_len) { PRINT_ERR("Error with rte_ioat_completed_ops - second half request\n"); rte_rawdev_dump(dev_id, stdout); @@ -87,7 +89,8 @@ do_multi_copies(int dev_id, int split_batches, int split_completions) } } else { /* gather all completions in one go */ - if (rte_ioat_completed_ops(dev_id, 64, (void *)completed_src, + if (rte_ioat_completed_ops(dev_id, RTE_DIM(completed_src), NULL, NULL, + (void *)completed_src, (void *)completed_dst) != RTE_DIM(srcs)) { PRINT_ERR("Error with rte_ioat_completed_ops\n"); rte_rawdev_dump(dev_id, stdout); @@ -151,7 +154,7 @@ test_enqueue_copies(int dev_id) rte_ioat_perform_ops(dev_id); usleep(10); - if (rte_ioat_completed_ops(dev_id, 1, (void *)&completed[0], + if (rte_ioat_completed_ops(dev_id, 1, NULL, NULL, (void *)&completed[0], (void *)&completed[1]) != 1) { PRINT_ERR("Error with rte_ioat_completed_ops\n"); return -1; @@ -170,6 +173,13 @@ test_enqueue_copies(int dev_id) } rte_pktmbuf_free(src); rte_pktmbuf_free(dst); + + /* check ring is now empty */ + if (rte_ioat_completed_ops(dev_id, 1, NULL, NULL, (void *)&completed[0], + (void *)&completed[1]) != 0) { + PRINT_ERR("Error: got unexpected returned handles from rte_ioat_completed_ops\n"); + return -1; + } } while (0); /* test doing a multiple single copies */ @@ -203,7 +213,8 @@ test_enqueue_copies(int dev_id) } usleep(10); - if (rte_ioat_completed_ops(dev_id, max_completions, (void *)&completed[0], + if (rte_ioat_completed_ops(dev_id, max_completions, NULL, NULL, + (void *)&completed[0], (void *)&completed[max_completions]) != max_ops) { PRINT_ERR("Error with rte_ioat_completed_ops\n"); rte_rawdev_dump(dev_id, stdout); @@ -256,7 +267,7 @@ test_enqueue_fill(int dev_id) rte_ioat_perform_ops(dev_id); usleep(100); - if (rte_ioat_completed_ops(dev_id, 1, (void *)&completed[0], + if (rte_ioat_completed_ops(dev_id, 1, NULL, NULL, (void *)&completed[0], (void *)&completed[1]) != 1) { PRINT_ERR("Error with completed ops\n"); return -1; @@ -266,8 +277,7 @@ test_enqueue_fill(int dev_id) char pat_byte = ((char *)&pattern)[j % 8]; if (dst_data[j] != pat_byte) { PRINT_ERR("Error with fill operation (lengths = %u): got (%x), not (%x)\n", - lengths[i], dst_data[j], - pat_byte); + lengths[i], dst_data[j], pat_byte); return -1; } } @@ -307,12 +317,16 @@ test_burst_capacity(int dev_id) unsigned int i; unsigned int length = 1024; uintptr_t completions[BURST_SIZE]; + /* for CBDMA, no batch descriptor, for DSA there is one */ + unsigned int batch_desc = (*(enum rte_ioat_dev_type *) + rte_rawdevs[dev_id].dev_private == RTE_IDXD_DEV); + unsigned int desc_per_burst = BURST_SIZE + batch_desc; /* Ring pointer reset needed for checking test results */ reset_ring_ptrs(dev_id); const unsigned int ring_space = rte_ioat_burst_capacity(dev_id); - const unsigned int expected_bursts = (ring_space)/BURST_SIZE; + const unsigned int expected_bursts = (ring_space)/(desc_per_burst); src = rte_pktmbuf_alloc(pool); dst = rte_pktmbuf_alloc(pool); @@ -327,8 +341,7 @@ test_burst_capacity(int dev_id) } } bursts_enqueued++; - if ((i & 1) == 1) /* hit doorbell every second burst */ - rte_ioat_perform_ops(dev_id); + rte_ioat_perform_ops(dev_id); } rte_ioat_perform_ops(dev_id); @@ -340,9 +353,9 @@ test_burst_capacity(int dev_id) } /* check the space is now as expected */ - if (rte_ioat_burst_capacity(dev_id) != ring_space - bursts_enqueued * BURST_SIZE) { - printf("Capacity error. Expected %u free slots, got %u\n", - ring_space - bursts_enqueued * BURST_SIZE, + if (rte_ioat_burst_capacity(dev_id) != ring_space - bursts_enqueued * desc_per_burst) { + PRINT_ERR("Capacity error. Expected %u free slots, got %u\n", + ring_space - bursts_enqueued * desc_per_burst, rte_ioat_burst_capacity(dev_id)); return -1; } @@ -350,8 +363,8 @@ test_burst_capacity(int dev_id) /* do cleanup before next tests */ usleep(100); for (i = 0; i < bursts_enqueued; i++) { - if (rte_ioat_completed_ops(dev_id, BURST_SIZE, completions, - completions) != BURST_SIZE) { + if (rte_ioat_completed_ops(dev_id, BURST_SIZE, NULL, NULL, + completions, completions) != BURST_SIZE) { PRINT_ERR("error with completions\n"); return -1; } @@ -364,7 +377,8 @@ test_burst_capacity(int dev_id) /* Verify the descriptor ring is empty before we test */ if (rte_ioat_burst_capacity(dev_id) != ring_space) { - PRINT_ERR("Error, ring should be empty\n"); + PRINT_ERR("Error, ring should be empty. Expected %u, got %u\n", + ring_space, rte_ioat_burst_capacity(dev_id)); return -1; } @@ -386,20 +400,23 @@ test_burst_capacity(int dev_id) /* This check will confirm both that the correct amount of space is taken * the ring, and that the ring wrap around handling is correct. */ - if (rte_ioat_burst_capacity(dev_id) != ring_space - BURST_SIZE) { - PRINT_ERR("Error, space available not as expected\n"); + if (rte_ioat_burst_capacity(dev_id) != ring_space - desc_per_burst) { + PRINT_ERR("Error, space available not as expected. Expected %u, got %u\n", + ring_space - desc_per_burst, rte_ioat_burst_capacity(dev_id)); return -1; } /* Now we gather completions to update the read pointer */ - if (rte_ioat_completed_ops(dev_id, BURST_SIZE, completions, completions) != BURST_SIZE) { + if (rte_ioat_completed_ops(dev_id, BURST_SIZE, NULL, NULL, + completions, completions) != BURST_SIZE) { PRINT_ERR("Error with completions\n"); return -1; } /* After gathering the completions, the descriptor ring should be empty */ if (rte_ioat_burst_capacity(dev_id) != ring_space) { - PRINT_ERR("Error, space available not as expected\n"); + PRINT_ERR("Error, space available not as expected, Expected %u, got %u\n", + ring_space, rte_ioat_burst_capacity(dev_id)); return -1; } @@ -409,6 +426,241 @@ test_burst_capacity(int dev_id) return 0; } +static int +test_completion_status(int dev_id) +{ +#define COMP_BURST_SZ 16 + const unsigned int fail_copy[] = {0, 7, 15}; + struct rte_mbuf *srcs[COMP_BURST_SZ], *dsts[COMP_BURST_SZ]; + struct rte_mbuf *completed_src[COMP_BURST_SZ * 2]; + struct rte_mbuf *completed_dst[COMP_BURST_SZ * 2]; + unsigned int length = 1024; + unsigned int i; + uint8_t not_ok = 0; + + /* Test single full batch statuses */ + for (i = 0; i < RTE_DIM(fail_copy); i++) { + uint32_t status[COMP_BURST_SZ] = {0}; + unsigned int j; + + for (j = 0; j < COMP_BURST_SZ; j++) { + srcs[j] = rte_pktmbuf_alloc(pool); + dsts[j] = rte_pktmbuf_alloc(pool); + + if (rte_ioat_enqueue_copy(dev_id, + (j == fail_copy[i] ? (phys_addr_t)NULL : + (srcs[j]->buf_iova + srcs[j]->data_off)), + dsts[j]->buf_iova + dsts[j]->data_off, + length, + (uintptr_t)srcs[j], + (uintptr_t)dsts[j]) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy for buffer %u\n", j); + return -1; + } + } + rte_ioat_perform_ops(dev_id); + usleep(100); + + if (rte_ioat_completed_ops(dev_id, COMP_BURST_SZ, status, ¬_ok, + (void *)completed_src, (void *)completed_dst) != COMP_BURST_SZ) { + PRINT_ERR("Error with rte_ioat_completed_ops\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + if (not_ok != 1 || status[fail_copy[i]] == RTE_IOAT_OP_SUCCESS) { + unsigned int j; + PRINT_ERR("Error, missing expected failed copy, %u\n", fail_copy[i]); + for (j = 0; j < COMP_BURST_SZ; j++) + printf("%u ", status[j]); + printf("<-- Statuses\n"); + return -1; + } + for (j = 0; j < COMP_BURST_SZ; j++) { + rte_pktmbuf_free(completed_src[j]); + rte_pktmbuf_free(completed_dst[j]); + } + } + + /* Test gathering status for two batches at once */ + for (i = 0; i < RTE_DIM(fail_copy); i++) { + uint32_t status[COMP_BURST_SZ] = {0}; + unsigned int batch, j; + unsigned int expected_failures = 0; + + for (batch = 0; batch < 2; batch++) { + for (j = 0; j < COMP_BURST_SZ/2; j++) { + srcs[j] = rte_pktmbuf_alloc(pool); + dsts[j] = rte_pktmbuf_alloc(pool); + + if (j == fail_copy[i]) + expected_failures++; + if (rte_ioat_enqueue_copy(dev_id, + (j == fail_copy[i] ? (phys_addr_t)NULL : + (srcs[j]->buf_iova + srcs[j]->data_off)), + dsts[j]->buf_iova + dsts[j]->data_off, + length, + (uintptr_t)srcs[j], + (uintptr_t)dsts[j]) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy for buffer %u\n", + j); + return -1; + } + } + rte_ioat_perform_ops(dev_id); + } + usleep(100); + + if (rte_ioat_completed_ops(dev_id, COMP_BURST_SZ, status, ¬_ok, + (void *)completed_src, (void *)completed_dst) != COMP_BURST_SZ) { + PRINT_ERR("Error with rte_ioat_completed_ops\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + if (not_ok != expected_failures) { + unsigned int j; + PRINT_ERR("Error, missing expected failed copy, got %u, not %u\n", + not_ok, expected_failures); + for (j = 0; j < COMP_BURST_SZ; j++) + printf("%u ", status[j]); + printf("<-- Statuses\n"); + return -1; + } + for (j = 0; j < COMP_BURST_SZ; j++) { + rte_pktmbuf_free(completed_src[j]); + rte_pktmbuf_free(completed_dst[j]); + } + } + + /* Test gathering status for half batch at a time */ + for (i = 0; i < RTE_DIM(fail_copy); i++) { + uint32_t status[COMP_BURST_SZ] = {0}; + unsigned int j; + + for (j = 0; j < COMP_BURST_SZ; j++) { + srcs[j] = rte_pktmbuf_alloc(pool); + dsts[j] = rte_pktmbuf_alloc(pool); + + if (rte_ioat_enqueue_copy(dev_id, + (j == fail_copy[i] ? (phys_addr_t)NULL : + (srcs[j]->buf_iova + srcs[j]->data_off)), + dsts[j]->buf_iova + dsts[j]->data_off, + length, + (uintptr_t)srcs[j], + (uintptr_t)dsts[j]) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy for buffer %u\n", j); + return -1; + } + } + rte_ioat_perform_ops(dev_id); + usleep(100); + + if (rte_ioat_completed_ops(dev_id, COMP_BURST_SZ / 2, status, ¬_ok, + (void *)completed_src, + (void *)completed_dst) != (COMP_BURST_SZ / 2)) { + PRINT_ERR("Error with rte_ioat_completed_ops\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + if (fail_copy[i] < COMP_BURST_SZ / 2 && + (not_ok != 1 || status[fail_copy[i]] == RTE_IOAT_OP_SUCCESS)) { + PRINT_ERR("Missing expected failure in first half-batch\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + if (rte_ioat_completed_ops(dev_id, COMP_BURST_SZ / 2, status, ¬_ok, + (void *)&completed_src[COMP_BURST_SZ / 2], + (void *)&completed_dst[COMP_BURST_SZ / 2]) != (COMP_BURST_SZ / 2)) { + PRINT_ERR("Error with rte_ioat_completed_ops\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + if (fail_copy[i] >= COMP_BURST_SZ / 2 && (not_ok != 1 || + status[fail_copy[i] - (COMP_BURST_SZ / 2)] + == RTE_IOAT_OP_SUCCESS)) { + PRINT_ERR("Missing expected failure in second half-batch\n"); + rte_rawdev_dump(dev_id, stdout); + return -1; + } + + for (j = 0; j < COMP_BURST_SZ; j++) { + rte_pktmbuf_free(completed_src[j]); + rte_pktmbuf_free(completed_dst[j]); + } + } + + /* Test gathering statuses with fence */ + for (i = 1; i < RTE_DIM(fail_copy); i++) { + uint32_t status[COMP_BURST_SZ * 2] = {0}; + unsigned int j; + uint16_t count; + + for (j = 0; j < COMP_BURST_SZ; j++) { + srcs[j] = rte_pktmbuf_alloc(pool); + dsts[j] = rte_pktmbuf_alloc(pool); + + /* always fail the first copy */ + if (rte_ioat_enqueue_copy(dev_id, + (j == 0 ? (phys_addr_t)NULL : + (srcs[j]->buf_iova + srcs[j]->data_off)), + dsts[j]->buf_iova + dsts[j]->data_off, + length, + (uintptr_t)srcs[j], + (uintptr_t)dsts[j]) != 1) { + PRINT_ERR("Error with rte_ioat_enqueue_copy for buffer %u\n", j); + return -1; + } + /* put in a fence which will stop any further transactions + * because we had a previous failure. + */ + if (j == fail_copy[i]) + rte_ioat_fence(dev_id); + } + rte_ioat_perform_ops(dev_id); + usleep(100); + + count = rte_ioat_completed_ops(dev_id, COMP_BURST_SZ * 2, status, ¬_ok, + (void *)completed_src, (void *)completed_dst); + if (count != COMP_BURST_SZ) { + PRINT_ERR("Error with rte_ioat_completed_ops, got %u not %u\n", + count, COMP_BURST_SZ); + for (j = 0; j < count; j++) + printf("%u ", status[j]); + printf("<-- Statuses\n"); + return -1; + } + if (not_ok != COMP_BURST_SZ - fail_copy[i]) { + PRINT_ERR("Unexpected failed copy count, got %u, expected %u\n", + not_ok, COMP_BURST_SZ - fail_copy[i]); + for (j = 0; j < COMP_BURST_SZ; j++) + printf("%u ", status[j]); + printf("<-- Statuses\n"); + return -1; + } + if (status[0] == RTE_IOAT_OP_SUCCESS || status[0] == RTE_IOAT_OP_SKIPPED) { + PRINT_ERR("Error, op 0 unexpectedly did not fail.\n"); + return -1; + } + for (j = 1; j <= fail_copy[i]; j++) { + if (status[j] != RTE_IOAT_OP_SUCCESS) { + PRINT_ERR("Error, op %u unexpectedly failed\n", j); + return -1; + } + } + for (j = fail_copy[i] + 1; j < COMP_BURST_SZ; j++) { + if (status[j] != RTE_IOAT_OP_SKIPPED) { + PRINT_ERR("Error, all descriptors after fence should be invalid\n"); + return -1; + } + } + for (j = 0; j < COMP_BURST_SZ; j++) { + rte_pktmbuf_free(completed_src[j]); + rte_pktmbuf_free(completed_dst[j]); + } + } + + return 0; +} + int ioat_rawdev_test(uint16_t dev_id) { @@ -521,6 +773,12 @@ ioat_rawdev_test(uint16_t dev_id) if (test_burst_capacity(dev_id) != 0) goto err; + if (rte_eal_iova_mode() == RTE_IOVA_VA) { + printf("Running Completions Status Test\n"); + if (test_completion_status(dev_id) != 0) + goto err; + } + rte_rawdev_stop(dev_id); if (rte_rawdev_xstats_reset(dev_id, NULL, 0) != 0) { PRINT_ERR("Error resetting xstat values\n"); diff --git a/drivers/raw/ioat/rte_idxd_rawdev_fns.h b/drivers/raw/ioat/rte_idxd_rawdev_fns.h index 41f0ad6e99..dc16917b63 100644 --- a/drivers/raw/ioat/rte_idxd_rawdev_fns.h +++ b/drivers/raw/ioat/rte_idxd_rawdev_fns.h @@ -104,8 +104,17 @@ struct rte_idxd_rawdev { struct rte_idxd_hw_desc *desc_ring; struct rte_idxd_user_hdl *hdl_ring; + /* flags to indicate handle validity. Kept separate from ring, to avoid + * using 8 bytes per flag. Upper 8 bits holds error code if any. + */ + uint16_t *hdl_ring_flags; }; +#define RTE_IDXD_HDL_NORMAL 0 +#define RTE_IDXD_HDL_INVALID (1 << 0) /* no handle stored for this element */ +#define RTE_IDXD_HDL_OP_FAILED (1 << 1) /* return failure for this one */ +#define RTE_IDXD_HDL_OP_SKIPPED (1 << 2) /* this op was skipped */ + static __rte_always_inline uint16_t __idxd_burst_capacity(int dev_id) { @@ -124,8 +133,10 @@ __idxd_burst_capacity(int dev_id) write_idx += idxd->desc_ring_mask + 1; used_space = write_idx - idxd->hdls_read; - /* Return amount of free space in the descriptor ring */ - return idxd->desc_ring_mask - used_space; + /* Return amount of free space in the descriptor ring + * subtract 1 for space for batch descriptor and 1 for possible null desc + */ + return idxd->desc_ring_mask - used_space - 2; } static __rte_always_inline rte_iova_t @@ -150,7 +161,8 @@ __idxd_write_desc(int dev_id, if ((idxd->batch_idx_read == 0 && idxd->batch_idx_write == idxd->max_batches) || idxd->batch_idx_write + 1 == idxd->batch_idx_read) goto failed; - if (((write_idx + 1) & idxd->desc_ring_mask) == idxd->hdls_read) + /* for descriptor ring, we always need a slot for batch completion */ + if (((write_idx + 2) & idxd->desc_ring_mask) == idxd->hdls_read) goto failed; /* write desc and handle. Note, descriptors don't wrap */ @@ -161,7 +173,10 @@ __idxd_write_desc(int dev_id, idxd->desc_ring[write_idx].dst = dst; idxd->desc_ring[write_idx].size = size; - idxd->hdl_ring[write_idx & idxd->desc_ring_mask] = *hdl; + if (hdl == NULL) + idxd->hdl_ring_flags[write_idx & idxd->desc_ring_mask] = RTE_IDXD_HDL_INVALID; + else + idxd->hdl_ring[write_idx & idxd->desc_ring_mask] = *hdl; idxd->batch_size++; idxd->xstats.enqueued++; @@ -203,9 +218,8 @@ __idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst, static __rte_always_inline int __idxd_fence(int dev_id) { - static const struct rte_idxd_user_hdl null_hdl; /* only op field needs filling - zero src, dst and length */ - return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, &null_hdl); + return __idxd_write_desc(dev_id, IDXD_FLAG_FENCE, 0, 0, 0, NULL); } static __rte_always_inline void @@ -222,42 +236,37 @@ __idxd_perform_ops(int dev_id) { struct rte_idxd_rawdev *idxd = (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; - /* write completion to last desc in the batch */ - uint16_t comp_idx = idxd->batch_start + idxd->batch_size - 1; - if (comp_idx > idxd->desc_ring_mask) { - comp_idx &= idxd->desc_ring_mask; - *((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */ - } + + if (!idxd->cfg.no_prefetch_completions) + rte_prefetch1(&idxd->desc_ring[idxd->batch_idx_ring[idxd->batch_idx_read]]); if (idxd->batch_size == 0) return 0; - _mm_sfence(); /* fence before writing desc to device */ - if (idxd->batch_size > 1) { - struct rte_idxd_hw_desc batch_desc = { - .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) | - IDXD_FLAG_COMPLETION_ADDR_VALID | - IDXD_FLAG_REQUEST_COMPLETION, - .desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start), - .completion = __desc_idx_to_iova(idxd, comp_idx), - .size = idxd->batch_size, - }; - - __idxd_movdir64b(idxd->portal, &batch_desc); - } else { - /* special case batch size of 1, as not allowed by HW */ - /* comp_idx == batch_start */ - struct rte_idxd_hw_desc *desc = &idxd->desc_ring[comp_idx]; - desc->op_flags |= IDXD_FLAG_COMPLETION_ADDR_VALID | - IDXD_FLAG_REQUEST_COMPLETION; - desc->completion = __desc_idx_to_iova(idxd, comp_idx); - - __idxd_movdir64b(idxd->portal, desc); - } + if (idxd->batch_size == 1) + /* use a fence as a null descriptor, so batch_size >= 2 */ + if (__idxd_fence(dev_id) != 1) + return -1; + + /* write completion beyond last desc in the batch */ + uint16_t comp_idx = (idxd->batch_start + idxd->batch_size) & idxd->desc_ring_mask; + *((uint64_t *)&idxd->desc_ring[comp_idx]) = 0; /* zero start of desc */ + idxd->hdl_ring_flags[comp_idx] = RTE_IDXD_HDL_INVALID; + + const struct rte_idxd_hw_desc batch_desc = { + .op_flags = (idxd_op_batch << IDXD_CMD_OP_SHIFT) | + IDXD_FLAG_COMPLETION_ADDR_VALID | + IDXD_FLAG_REQUEST_COMPLETION, + .desc_addr = __desc_idx_to_iova(idxd, idxd->batch_start), + .completion = __desc_idx_to_iova(idxd, comp_idx), + .size = idxd->batch_size, + }; + _mm_sfence(); /* fence before writing desc to device */ + __idxd_movdir64b(idxd->portal, &batch_desc); idxd->xstats.started += idxd->batch_size; - idxd->batch_start += idxd->batch_size; + idxd->batch_start += idxd->batch_size + 1; idxd->batch_start &= idxd->desc_ring_mask; idxd->batch_size = 0; @@ -269,7 +278,7 @@ __idxd_perform_ops(int dev_id) } static __rte_always_inline int -__idxd_completed_ops(int dev_id, uint8_t max_ops, +__idxd_completed_ops(int dev_id, uint8_t max_ops, uint32_t *status, uint8_t *num_unsuccessful, uintptr_t *src_hdls, uintptr_t *dst_hdls) { struct rte_idxd_rawdev *idxd = @@ -280,8 +289,35 @@ __idxd_completed_ops(int dev_id, uint8_t max_ops, uint16_t idx_to_chk = idxd->batch_idx_ring[idxd->batch_idx_read]; volatile struct rte_idxd_completion *comp_to_chk = (struct rte_idxd_completion *)&idxd->desc_ring[idx_to_chk]; - if (comp_to_chk->status == 0) + uint8_t status = comp_to_chk->status; + if (status == 0) break; + if (unlikely(status > 1)) { + /* error occurred somewhere in batch, start where last checked */ + uint16_t desc_count = comp_to_chk->completed_size; + uint16_t batch_start = idxd->hdls_avail; + uint16_t batch_end = idx_to_chk; + + if (batch_start > batch_end) + batch_end += idxd->desc_ring_mask + 1; + /* go through each batch entry and see status */ + for (n = 0; n < desc_count; n++) { + uint16_t idx = (batch_start + n) & idxd->desc_ring_mask; + volatile struct rte_idxd_completion *comp = + (struct rte_idxd_completion *)&idxd->desc_ring[idx]; + if (comp->status != 0 && + idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL) { + idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_FAILED; + idxd->hdl_ring_flags[idx] |= (comp->status << 8); + } + } + /* if batch is incomplete, mark rest as skipped */ + for ( ; n < batch_end - batch_start; n++) { + uint16_t idx = (batch_start + n) & idxd->desc_ring_mask; + if (idxd->hdl_ring_flags[idx] == RTE_IDXD_HDL_NORMAL) + idxd->hdl_ring_flags[idx] = RTE_IDXD_HDL_OP_SKIPPED; + } + } /* avail points to one after the last one written */ idxd->hdls_avail = (idx_to_chk + 1) & idxd->desc_ring_mask; idxd->batch_idx_read++; @@ -289,7 +325,7 @@ __idxd_completed_ops(int dev_id, uint8_t max_ops, idxd->batch_idx_read = 0; } - if (idxd->cfg.hdls_disable) { + if (idxd->cfg.hdls_disable && status == NULL) { n = (idxd->hdls_avail < idxd->hdls_read) ? (idxd->hdls_avail + idxd->desc_ring_mask + 1 - idxd->hdls_read) : (idxd->hdls_avail - idxd->hdls_read); @@ -297,10 +333,36 @@ __idxd_completed_ops(int dev_id, uint8_t max_ops, goto out; } - for (n = 0, h_idx = idxd->hdls_read; - n < max_ops && h_idx != idxd->hdls_avail; n++) { - src_hdls[n] = idxd->hdl_ring[h_idx].src; - dst_hdls[n] = idxd->hdl_ring[h_idx].dst; + n = 0; + h_idx = idxd->hdls_read; + while (h_idx != idxd->hdls_avail) { + uint16_t flag = idxd->hdl_ring_flags[h_idx]; + if (flag != RTE_IDXD_HDL_INVALID) { + if (!idxd->cfg.hdls_disable) { + src_hdls[n] = idxd->hdl_ring[h_idx].src; + dst_hdls[n] = idxd->hdl_ring[h_idx].dst; + } + if (unlikely(flag != RTE_IDXD_HDL_NORMAL)) { + if (status != NULL) + status[n] = flag == RTE_IDXD_HDL_OP_SKIPPED ? + RTE_IOAT_OP_SKIPPED : + /* failure case, return err code */ + idxd->hdl_ring_flags[h_idx] >> 8; + if (num_unsuccessful != NULL) + *num_unsuccessful += 1; + } + n++; + } + idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL; + if (++h_idx > idxd->desc_ring_mask) + h_idx = 0; + if (n >= max_ops) + break; + } + + /* skip over any remaining blank elements, e.g. batch completion */ + while (idxd->hdl_ring_flags[h_idx] == RTE_IDXD_HDL_INVALID && h_idx != idxd->hdls_avail) { + idxd->hdl_ring_flags[h_idx] = RTE_IDXD_HDL_NORMAL; if (++h_idx > idxd->desc_ring_mask) h_idx = 0; } diff --git a/drivers/raw/ioat/rte_ioat_rawdev.h b/drivers/raw/ioat/rte_ioat_rawdev.h index e5a22a0799..6cc1560a64 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev.h +++ b/drivers/raw/ioat/rte_ioat_rawdev.h @@ -35,6 +35,10 @@ extern "C" { struct rte_ioat_rawdev_config { unsigned short ring_size; /**< size of job submission descriptor ring */ bool hdls_disable; /**< if set, ignore user-supplied handle params */ + /** set "no_prefetch_completions", if polling completions on separate core + * from the core submitting the jobs + */ + bool no_prefetch_completions; }; /** @@ -131,40 +135,73 @@ static inline int __rte_experimental rte_ioat_perform_ops(int dev_id); +/* + * Status codes for operations. + */ +#define RTE_IOAT_OP_SUCCESS 0 /**< Operation completed successfully */ +#define RTE_IOAT_OP_SKIPPED 1 /**< Operation was not attempted (Earlier fenced op failed) */ +/* Values >1 indicate a failure condition */ +/* Error codes taken from Intel(R) Data Streaming Accelerator Architecture + * Specification, section 5.7 + */ +#define RTE_IOAT_OP_ADDRESS_ERR 0x03 /**< Page fault or invalid address */ +#define RTE_IOAT_OP_INVALID_LEN 0x13 /**< Invalid/too big length field passed */ +#define RTE_IOAT_OP_OVERLAPPING_BUFS 0x16 /**< Overlapping buffers error */ + + /** * Returns details of operations that have been completed * + * The status of each operation is returned in the status array parameter. * If the hdls_disable option was not set when the device was configured, * the function will return to the caller the user-provided "handles" for * the copy operations which have been completed by the hardware, and not * already returned by a previous call to this API. * If the hdls_disable option for the device was set on configure, the - * max_copies, src_hdls and dst_hdls parameters will be ignored, and the + * src_hdls and dst_hdls parameters will be ignored, and the * function returns the number of newly-completed operations. + * If status is also NULL, then max_copies parameter is also ignored and the + * function returns a count of the number of newly-completed operations. * * @param dev_id * The rawdev device id of the ioat instance * @param max_copies - * The number of entries which can fit in the src_hdls and dst_hdls + * The number of entries which can fit in the status, src_hdls and dst_hdls * arrays, i.e. max number of completed operations to report. * NOTE: If hdls_disable configuration option for the device is set, this - * parameter is ignored. + * parameter applies only to the "status" array if specified + * @param status + * Array to hold the status of each completed operation. Array should be + * set to zeros on input, as the driver will only write error status values. + * A value of 1 implies an operation was not attempted, and any other non-zero + * value indicates operation failure. + * Parameter may be NULL if no status value checking is required. + * @param num_unsuccessful + * Returns the number of elements in status where the value is non-zero, + * i.e. the operation either failed or was not attempted due to an earlier + * failure. If this value is returned as zero (the expected case), the + * status array will not have been modified by the function and need not be + * checked by software * @param src_hdls * Array to hold the source handle parameters of the completed ops. * NOTE: If hdls_disable configuration option for the device is set, this - * parameter is ignored. + * parameter is ignored, and may be NULL * @param dst_hdls * Array to hold the destination handle parameters of the completed ops. * NOTE: If hdls_disable configuration option for the device is set, this - * parameter is ignored. + * parameter is ignored, and may be NULL * @return - * -1 on error, with rte_errno set appropriately. - * Otherwise number of completed operations i.e. number of entries written - * to the src_hdls and dst_hdls array parameters. + * -1 on device error, with rte_errno set appropriately and parameters + * unmodified. + * Otherwise number of returned operations i.e. number of valid entries + * in the status, src_hdls and dst_hdls array parameters. If status is NULL, + * and the hdls_disable config option is set, this value may be greater than + * max_copies parameter. */ static inline int __rte_experimental rte_ioat_completed_ops(int dev_id, uint8_t max_copies, + uint32_t *status, uint8_t *num_unsuccessful, uintptr_t *src_hdls, uintptr_t *dst_hdls); /* include the implementation details from a separate file */ diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h index 92ccdd03b9..9b8a9fa88e 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h +++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h @@ -334,16 +334,22 @@ rte_ioat_perform_ops(int dev_id) static inline int rte_ioat_completed_ops(int dev_id, uint8_t max_copies, + uint32_t *status, uint8_t *num_unsuccessful, uintptr_t *src_hdls, uintptr_t *dst_hdls) { enum rte_ioat_dev_type *type = (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private; + uint8_t tmp; /* used so functions don't need to check for null parameter */ + + if (num_unsuccessful == NULL) + num_unsuccessful = &tmp; + + *num_unsuccessful = 0; if (*type == RTE_IDXD_DEV) - return __idxd_completed_ops(dev_id, max_copies, + return __idxd_completed_ops(dev_id, max_copies, status, num_unsuccessful, src_hdls, dst_hdls); else - return __ioat_completed_ops(dev_id, max_copies, - src_hdls, dst_hdls); + return __ioat_completed_ops(dev_id, max_copies, src_hdls, dst_hdls); } static inline void @@ -355,7 +361,8 @@ __rte_deprecated_msg("use rte_ioat_completed_ops() instead") rte_ioat_completed_copies(int dev_id, uint8_t max_copies, uintptr_t *src_hdls, uintptr_t *dst_hdls) { - return rte_ioat_completed_ops(dev_id, max_copies, src_hdls, dst_hdls); + return rte_ioat_completed_ops(dev_id, max_copies, NULL, NULL, + src_hdls, dst_hdls); } #endif /* _RTE_IOAT_RAWDEV_FNS_H_ */ diff --git a/examples/ioat/ioatfwd.c b/examples/ioat/ioatfwd.c index 845301a6db..2e377e2d4b 100644 --- a/examples/ioat/ioatfwd.c +++ b/examples/ioat/ioatfwd.c @@ -447,12 +447,15 @@ ioat_tx_port(struct rxtx_port_config *tx_config) for (i = 0; i < tx_config->nb_queues; i++) { if (copy_mode == COPY_MODE_IOAT_NUM) { - /* Deque the mbufs from IOAT device. */ + /* Dequeue the mbufs from IOAT device. Since all memory + * is DPDK pinned memory and therefore all addresses should + * be valid, we don't check for copy errors + */ nb_dq = rte_ioat_completed_ops( - tx_config->ioat_ids[i], MAX_PKT_BURST, + tx_config->ioat_ids[i], MAX_PKT_BURST, NULL, NULL, (void *)mbufs_src, (void *)mbufs_dst); } else { - /* Deque the mbufs from rx_to_tx_ring. */ + /* Dequeue the mbufs from rx_to_tx_ring. */ nb_dq = rte_ring_dequeue_burst( tx_config->rx_to_tx_ring, (void *)mbufs_dst, MAX_PKT_BURST, NULL); @@ -725,7 +728,10 @@ check_link_status(uint32_t port_mask) static void configure_rawdev_queue(uint32_t dev_id) { - struct rte_ioat_rawdev_config dev_config = { .ring_size = ring_size }; + struct rte_ioat_rawdev_config dev_config = { + .ring_size = ring_size, + .no_prefetch_completions = (cfg.nb_lcores > 1), + }; struct rte_rawdev_info info = { .dev_private = &dev_config }; if (rte_rawdev_configure(dev_id, &info, sizeof(dev_config)) != 0) { diff --git a/examples/vhost/ioat.c b/examples/vhost/ioat.c index 60b73be936..efdd3f6f76 100644 --- a/examples/vhost/ioat.c +++ b/examples/vhost/ioat.c @@ -183,7 +183,7 @@ ioat_check_completed_copies_cb(int vid, uint16_t queue_id, uint16_t dev_id = dma_bind[vid].dmas[queue_id * 2 + VIRTIO_RXQ].dev_id; - n_seg = rte_ioat_completed_ops(dev_id, 255, dump, dump); + n_seg = rte_ioat_completed_ops(dev_id, 255, NULL, NULL, dump, dump); if (n_seg < 0) { RTE_LOG(ERR, VHOST_DATA,