From patchwork Mon Sep 28 16:42:39 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Bruce Richardson X-Patchwork-Id: 79041 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id BB3E7A04DB; Mon, 28 Sep 2020 18:49:28 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id BA0F81D9B6; Mon, 28 Sep 2020 18:43:42 +0200 (CEST) Received: from mga06.intel.com (mga06.intel.com [134.134.136.31]) by dpdk.org (Postfix) with ESMTP id 93D901D938 for ; Mon, 28 Sep 2020 18:43:36 +0200 (CEST) IronPort-SDR: X1S9iH42/X9G/p+q4Qul1Jyy3k3hGxRipFu6WLkoj+qII0Xr33oEMBdx2bEUZG6wb8ew3yRqRD j3HpRu126kEw== X-IronPort-AV: E=McAfee;i="6000,8403,9758"; a="223619899" X-IronPort-AV: E=Sophos;i="5.77,313,1596524400"; d="scan'208";a="223619899" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga004.fm.intel.com ([10.253.24.48]) by orsmga104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 28 Sep 2020 09:43:36 -0700 IronPort-SDR: qDDWQsnbCR+R033hdA8vUouanKQjPrCTujx/GaZSA/pDnpXRbeDXtOMdHrN31Z4rXFJ0AQzd31 Iqt5Hy9qjlZg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.77,313,1596524400"; d="scan'208";a="338250624" Received: from silpixa00399126.ir.intel.com ([10.237.222.4]) by fmsmga004.fm.intel.com with ESMTP; 28 Sep 2020 09:43:34 -0700 From: Bruce Richardson To: dev@dpdk.org Cc: patrick.fu@intel.com, Bruce Richardson , Kevin Laatz Date: Mon, 28 Sep 2020 17:42:39 +0100 Message-Id: <20200928164245.84997-20-bruce.richardson@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20200928164245.84997-1-bruce.richardson@intel.com> References: <20200721095140.719297-1-bruce.richardson@intel.com> <20200928164245.84997-1-bruce.richardson@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v4 19/25] raw/ioat: add data path for idxd devices X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add support for doing copies using DSA hardware. This is implemented by just switching on the device type field at the start of the inline functions. Since there is no hardware which will have both device types present this branch will always be predictable after the first call, meaning it has little to no perf penalty. Signed-off-by: Bruce Richardson Reviewed-by: Kevin Laatz --- drivers/raw/ioat/ioat_common.c | 1 + drivers/raw/ioat/ioat_rawdev.c | 1 + drivers/raw/ioat/rte_ioat_rawdev_fns.h | 201 +++++++++++++++++++++++-- 3 files changed, 192 insertions(+), 11 deletions(-) diff --git a/drivers/raw/ioat/ioat_common.c b/drivers/raw/ioat/ioat_common.c index 5173c331c9..6a4e2979f5 100644 --- a/drivers/raw/ioat/ioat_common.c +++ b/drivers/raw/ioat/ioat_common.c @@ -153,6 +153,7 @@ idxd_rawdev_create(const char *name, struct rte_device *dev, idxd = rawdev->dev_private; *idxd = *base_idxd; /* copy over the main fields already passed in */ + idxd->public.type = RTE_IDXD_DEV; idxd->rawdev = rawdev; idxd->mz = mz; diff --git a/drivers/raw/ioat/ioat_rawdev.c b/drivers/raw/ioat/ioat_rawdev.c index 1fe32278d2..0097be87ee 100644 --- a/drivers/raw/ioat/ioat_rawdev.c +++ b/drivers/raw/ioat/ioat_rawdev.c @@ -260,6 +260,7 @@ ioat_rawdev_create(const char *name, struct rte_pci_device *dev) rawdev->driver_name = dev->device.driver->name; ioat = rawdev->dev_private; + ioat->type = RTE_IOAT_DEV; ioat->rawdev = rawdev; ioat->mz = mz; ioat->regs = dev->mem_resource[0].addr; diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h index e9cdce0162..36ba876eab 100644 --- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h +++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h @@ -196,8 +196,8 @@ struct rte_idxd_rawdev { /* * Enqueue a copy operation onto the ioat device */ -static inline int -rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, +static __rte_always_inline int +__ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) { struct rte_ioat_rawdev *ioat = @@ -233,8 +233,8 @@ rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, } /* add fence to last written descriptor */ -static inline int -rte_ioat_fence(int dev_id) +static __rte_always_inline int +__ioat_fence(int dev_id) { struct rte_ioat_rawdev *ioat = (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private; @@ -252,8 +252,8 @@ rte_ioat_fence(int dev_id) /* * Trigger hardware to begin performing enqueued operations */ -static inline void -rte_ioat_perform_ops(int dev_id) +static __rte_always_inline void +__ioat_perform_ops(int dev_id) { struct rte_ioat_rawdev *ioat = (struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private; @@ -268,8 +268,8 @@ rte_ioat_perform_ops(int dev_id) * @internal * Returns the index of the last completed operation. */ -static inline int -rte_ioat_get_last_completed(struct rte_ioat_rawdev *ioat, int *error) +static __rte_always_inline int +__ioat_get_last_completed(struct rte_ioat_rawdev *ioat, int *error) { uint64_t status = ioat->status; @@ -283,8 +283,8 @@ rte_ioat_get_last_completed(struct rte_ioat_rawdev *ioat, int *error) /* * Returns details of operations that have been completed */ -static inline int -rte_ioat_completed_ops(int dev_id, uint8_t max_copies, +static __rte_always_inline int +__ioat_completed_ops(int dev_id, uint8_t max_copies, uintptr_t *src_hdls, uintptr_t *dst_hdls) { struct rte_ioat_rawdev *ioat = @@ -295,7 +295,7 @@ rte_ioat_completed_ops(int dev_id, uint8_t max_copies, int error; int i = 0; - end_read = (rte_ioat_get_last_completed(ioat, &error) + 1) & mask; + end_read = (__ioat_get_last_completed(ioat, &error) + 1) & mask; count = (end_read - (read & mask)) & mask; if (error) { @@ -332,6 +332,185 @@ rte_ioat_completed_ops(int dev_id, uint8_t max_copies, return count; } +static __rte_always_inline int +__idxd_write_desc(int dev_id, const struct rte_idxd_hw_desc *desc, + const struct rte_idxd_user_hdl *hdl) +{ + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; + + /* check for room in the handle ring */ + if (((idxd->next_free_hdl + 1) & (idxd->hdl_ring_sz - 1)) == idxd->next_ret_hdl) + goto failed; + + /* check for space in current batch */ + if (b->op_count >= BATCH_SIZE) + goto failed; + + /* check that we can actually use the current batch */ + if (b->submitted) + goto failed; + + /* write the descriptor */ + b->ops[b->op_count++] = *desc; + + /* store the completion details */ + if (!idxd->hdls_disable) + idxd->hdl_ring[idxd->next_free_hdl] = *hdl; + if (++idxd->next_free_hdl == idxd->hdl_ring_sz) + idxd->next_free_hdl = 0; + + return 1; + +failed: + rte_errno = ENOSPC; + return 0; +} + +static __rte_always_inline int +__idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst, + unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) +{ + const struct rte_idxd_hw_desc desc = { + .op_flags = (idxd_op_memmove << IDXD_CMD_OP_SHIFT) | + IDXD_FLAG_CACHE_CONTROL, + .src = src, + .dst = dst, + .size = length + }; + const struct rte_idxd_user_hdl hdl = { + .src = src_hdl, + .dst = dst_hdl + }; + return __idxd_write_desc(dev_id, &desc, &hdl); +} + +static __rte_always_inline int +__idxd_fence(int dev_id) +{ + static const struct rte_idxd_hw_desc fence = { + .op_flags = IDXD_FLAG_FENCE + }; + static const struct rte_idxd_user_hdl null_hdl; + return __idxd_write_desc(dev_id, &fence, &null_hdl); +} + +static __rte_always_inline void +__idxd_movdir64b(volatile void *dst, const void *src) +{ + asm volatile (".byte 0x66, 0x0f, 0x38, 0xf8, 0x02" + : + : "a" (dst), "d" (src)); +} + +static __rte_always_inline void +__idxd_perform_ops(int dev_id) +{ + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_batch]; + + if (b->submitted || b->op_count == 0) + return; + b->hdl_end = idxd->next_free_hdl; + b->comp.status = 0; + b->submitted = 1; + b->batch_desc.size = b->op_count + 1; + __idxd_movdir64b(idxd->portal, &b->batch_desc); + + if (++idxd->next_batch == idxd->batch_ring_sz) + idxd->next_batch = 0; +} + +static __rte_always_inline int +__idxd_completed_ops(int dev_id, uint8_t max_ops, + uintptr_t *src_hdls, uintptr_t *dst_hdls) +{ + struct rte_idxd_rawdev *idxd = + (struct rte_idxd_rawdev *)rte_rawdevs[dev_id].dev_private; + struct rte_idxd_desc_batch *b = &idxd->batch_ring[idxd->next_completed]; + uint16_t h_idx = idxd->next_ret_hdl; + int n = 0; + + while (b->submitted && b->comp.status != 0) { + idxd->last_completed_hdl = b->hdl_end; + b->submitted = 0; + b->op_count = 0; + if (++idxd->next_completed == idxd->batch_ring_sz) + idxd->next_completed = 0; + b = &idxd->batch_ring[idxd->next_completed]; + } + + if (!idxd->hdls_disable) + for (n = 0; n < max_ops && h_idx != idxd->last_completed_hdl; n++) { + src_hdls[n] = idxd->hdl_ring[h_idx].src; + dst_hdls[n] = idxd->hdl_ring[h_idx].dst; + if (++h_idx == idxd->hdl_ring_sz) + h_idx = 0; + } + else + while (h_idx != idxd->last_completed_hdl) { + n++; + if (++h_idx == idxd->hdl_ring_sz) + h_idx = 0; + } + + idxd->next_ret_hdl = h_idx; + + return n; +} + +static inline int +rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst, + unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl) +{ + enum rte_ioat_dev_type *type = + (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private; + if (*type == RTE_IDXD_DEV) + return __idxd_enqueue_copy(dev_id, src, dst, length, + src_hdl, dst_hdl); + else + return __ioat_enqueue_copy(dev_id, src, dst, length, + src_hdl, dst_hdl); +} + +static inline int +rte_ioat_fence(int dev_id) +{ + enum rte_ioat_dev_type *type = + (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private; + if (*type == RTE_IDXD_DEV) + return __idxd_fence(dev_id); + else + return __ioat_fence(dev_id); +} + +static inline void +rte_ioat_perform_ops(int dev_id) +{ + enum rte_ioat_dev_type *type = + (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private; + if (*type == RTE_IDXD_DEV) + return __idxd_perform_ops(dev_id); + else + return __ioat_perform_ops(dev_id); +} + +static inline int +rte_ioat_completed_ops(int dev_id, uint8_t max_copies, + uintptr_t *src_hdls, uintptr_t *dst_hdls) +{ + enum rte_ioat_dev_type *type = + (enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private; + if (*type == RTE_IDXD_DEV) + return __idxd_completed_ops(dev_id, max_copies, + src_hdls, dst_hdls); + else + return __ioat_completed_ops(dev_id, max_copies, + src_hdls, dst_hdls); +} + static inline void __rte_deprecated_msg("use rte_ioat_perform_ops() instead") rte_ioat_do_copies(int dev_id) { rte_ioat_perform_ops(dev_id); }