[v3,25/25] raw/ioat: add fill operation
diff mbox series

Message ID 20200925110910.284098-26-bruce.richardson@intel.com
State Superseded
Delegated to: Thomas Monjalon
Headers show
Series
  • raw/ioat: enhancements and new hardware support
Related show

Checks

Context Check Description
ci/Intel-compilation fail Compilation issues
ci/iol-mellanox-Performance success Performance Testing PASS
ci/travis-robot warning Travis build: failed
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-testing fail Testing issues
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

Bruce Richardson Sept. 25, 2020, 11:09 a.m. UTC
From: Kevin Laatz <kevin.laatz@intel.com>

Add fill operation enqueue support for IOAT and IDXD. The fill enqueue is
similar to the copy enqueue, but takes a 'pattern' rather than a source
address to transfer to the destination address. This patch also includes an
additional test case for the new operation type.

Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
Reviewed-by: Bruce Richardson <bruce.richardson@intel.com>
---
 doc/guides/rawdevs/ioat.rst            | 10 ++++
 doc/guides/rel_notes/release_20_11.rst |  2 +
 drivers/raw/ioat/ioat_rawdev_test.c    | 44 +++++++++++++++++
 drivers/raw/ioat/rte_ioat_rawdev.h     | 26 +++++++++++
 drivers/raw/ioat/rte_ioat_rawdev_fns.h | 65 ++++++++++++++++++++++++--
 5 files changed, 142 insertions(+), 5 deletions(-)

Patch
diff mbox series

diff --git a/doc/guides/rawdevs/ioat.rst b/doc/guides/rawdevs/ioat.rst
index 7c2a2d457..250cfc48a 100644
--- a/doc/guides/rawdevs/ioat.rst
+++ b/doc/guides/rawdevs/ioat.rst
@@ -285,6 +285,16 @@  is correct before freeing the data buffers using the returned handles:
         }
 
 
+Filling an Area of Memory
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The IOAT driver also has support for the ``fill`` operation, where an area
+of memory is overwritten, or filled, with a short pattern of data.
+Fill operations can be performed in much the same was as copy operations
+described above, just using the ``rte_ioat_enqueue_fill()`` function rather
+than the ``rte_ioat_enqueue_copy()`` function.
+
+
 Querying Device Statistics
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst
index 4d8b78154..dd65b779d 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -84,6 +84,8 @@  New Features
 
   * Added support for Intel\ |reg| Data Streaming Accelerator hardware.
     For more information, see https://01.org/blogs/2019/introducing-intel-data-streaming-accelerator
+  * Added support for the fill operation via the API ``rte_ioat_enqueue_fill()``,
+    where the hardware fills an area of memory with a repeating pattern.
   * Added a per-device configuration flag to disable management of user-provided completion handles
   * Renamed the ``rte_ioat_do_copies()`` API to ``rte_ioat_perform_ops()``,
     and renamed the ``rte_ioat_completed_copies()`` API to ``rte_ioat_completed_ops()``
diff --git a/drivers/raw/ioat/ioat_rawdev_test.c b/drivers/raw/ioat/ioat_rawdev_test.c
index 7be6f2a2d..64269af55 100644
--- a/drivers/raw/ioat/ioat_rawdev_test.c
+++ b/drivers/raw/ioat/ioat_rawdev_test.c
@@ -152,6 +152,46 @@  test_enqueue_copies(int dev_id)
 	return 0;
 }
 
+static int
+test_enqueue_fill(int dev_id)
+{
+	const unsigned int length[] = {8, 64, 1024, 50, 100, 89};
+	struct rte_mbuf *dst = rte_pktmbuf_alloc(pool);
+	char *dst_data = rte_pktmbuf_mtod(dst, char *);
+	uint64_t pattern = 0xfedcba9876543210;
+	unsigned int i, j;
+
+	for (i = 0; i < RTE_DIM(length); i++) {
+		/* reset dst_data */
+		memset(dst_data, 0, length[i]);
+
+		/* perform the fill operation */
+		if (rte_ioat_enqueue_fill(dev_id, pattern,
+				dst->buf_iova + dst->data_off, length[i],
+				(uintptr_t)dst) != 1) {
+			PRINT_ERR("Error with rte_ioat_enqueue_fill\n");
+			return -1;
+		}
+
+		rte_ioat_perform_ops(dev_id);
+		usleep(100);
+
+		/* check the result */
+		for (j = 0; j < length[i]; j++) {
+			char pat_byte = ((char *)&pattern)[j % 8];
+			if (dst_data[j] != pat_byte) {
+				PRINT_ERR("Error with fill operation (length = %u): got (%x), not (%x)\n",
+						length[i], dst_data[j],
+						pat_byte);
+				return -1;
+			}
+		}
+	}
+
+	rte_pktmbuf_free(dst);
+	return 0;
+}
+
 int
 ioat_rawdev_test(uint16_t dev_id)
 {
@@ -238,6 +278,10 @@  ioat_rawdev_test(uint16_t dev_id)
 	}
 	printf("\n");
 
+	/* test enqueue fill operation */
+	if (test_enqueue_fill(dev_id) != 0)
+		goto err;
+
 	rte_rawdev_stop(dev_id);
 	if (rte_rawdev_xstats_reset(dev_id, NULL, 0) != 0) {
 		PRINT_ERR("Error resetting xstat values\n");
diff --git a/drivers/raw/ioat/rte_ioat_rawdev.h b/drivers/raw/ioat/rte_ioat_rawdev.h
index 6b891cd44..b7632ebf3 100644
--- a/drivers/raw/ioat/rte_ioat_rawdev.h
+++ b/drivers/raw/ioat/rte_ioat_rawdev.h
@@ -37,6 +37,32 @@  struct rte_ioat_rawdev_config {
 	bool hdls_disable;    /**< if set, ignore user-supplied handle params */
 };
 
+/**
+ * Enqueue a fill operation onto the ioat device
+ *
+ * This queues up a fill operation to be performed by hardware, but does not
+ * trigger hardware to begin that operation.
+ *
+ * @param dev_id
+ *   The rawdev device id of the ioat instance
+ * @param pattern
+ *   The pattern to populate the destination buffer with
+ * @param dst
+ *   The physical address of the destination buffer
+ * @param length
+ *   The length of the destination buffer
+ * @param dst_hdl
+ *   An opaque handle for the destination data, to be returned when this
+ *   operation has been completed and the user polls for the completion details.
+ *   NOTE: If hdls_disable configuration option for the device is set, this
+ *   parameter is ignored.
+ * @return
+ *   Number of operations enqueued, either 0 or 1
+ */
+static inline int
+rte_ioat_enqueue_fill(int dev_id, uint64_t pattern, phys_addr_t dst,
+		unsigned int length, uintptr_t dst_hdl);
+
 /**
  * Enqueue a copy operation onto the ioat device
  *
diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h
index d0045d8a4..c2c4601ca 100644
--- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h
+++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h
@@ -115,6 +115,13 @@  enum rte_idxd_ops {
 #define IDXD_FLAG_REQUEST_COMPLETION    (1 << 3)
 #define IDXD_FLAG_CACHE_CONTROL         (1 << 8)
 
+#define IOAT_COMP_UPDATE_SHIFT	3
+#define IOAT_CMD_OP_SHIFT	24
+enum rte_ioat_ops {
+	ioat_op_copy = 0,	/* Standard DMA Operation */
+	ioat_op_fill		/* Block Fill */
+};
+
 /**
  * Hardware descriptor used by DSA hardware, for both bursts and
  * for individual operations.
@@ -203,11 +210,8 @@  struct rte_idxd_rawdev {
 	struct rte_idxd_desc_batch *batch_ring;
 };
 
-/*
- * Enqueue a copy operation onto the ioat device
- */
 static __rte_always_inline int
-__ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
+__ioat_write_desc(int dev_id, uint32_t op, uint64_t src, phys_addr_t dst,
 		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
 {
 	struct rte_ioat_rawdev *ioat =
@@ -229,7 +233,8 @@  __ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
 	desc = &ioat->desc_ring[write];
 	desc->size = length;
 	/* set descriptor write-back every 16th descriptor */
-	desc->u.control_raw = (uint32_t)((!(write & 0xF)) << 3);
+	desc->u.control_raw = (uint32_t)((op << IOAT_CMD_OP_SHIFT) |
+			(!(write & 0xF) << IOAT_COMP_UPDATE_SHIFT));
 	desc->src_addr = src;
 	desc->dest_addr = dst;
 
@@ -242,6 +247,27 @@  __ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
 	return 1;
 }
 
+static __rte_always_inline int
+__ioat_enqueue_fill(int dev_id, uint64_t pattern, phys_addr_t dst,
+		unsigned int length, uintptr_t dst_hdl)
+{
+	static const uintptr_t null_hdl;
+
+	return __ioat_write_desc(dev_id, ioat_op_fill, pattern, dst, length,
+			null_hdl, dst_hdl);
+}
+
+/*
+ * Enqueue a copy operation onto the ioat device
+ */
+static __rte_always_inline int
+__ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
+		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
+{
+	return __ioat_write_desc(dev_id, ioat_op_copy, src, dst, length,
+			src_hdl, dst_hdl);
+}
+
 /* add fence to last written descriptor */
 static __rte_always_inline int
 __ioat_fence(int dev_id)
@@ -380,6 +406,23 @@  __idxd_write_desc(int dev_id, const struct rte_idxd_hw_desc *desc,
 	return 0;
 }
 
+static __rte_always_inline int
+__idxd_enqueue_fill(int dev_id, uint64_t pattern, rte_iova_t dst,
+		unsigned int length, uintptr_t dst_hdl)
+{
+	const struct rte_idxd_hw_desc desc = {
+			.op_flags =  (idxd_op_fill << IDXD_CMD_OP_SHIFT) |
+				IDXD_FLAG_CACHE_CONTROL,
+			.src = pattern,
+			.dst = dst,
+			.size = length
+	};
+	const struct rte_idxd_user_hdl hdl = {
+			.dst = dst_hdl
+	};
+	return __idxd_write_desc(dev_id, &desc, &hdl);
+}
+
 static __rte_always_inline int
 __idxd_enqueue_copy(int dev_id, rte_iova_t src, rte_iova_t dst,
 		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
@@ -475,6 +518,18 @@  __idxd_completed_ops(int dev_id, uint8_t max_ops,
 	return n;
 }
 
+static inline int
+rte_ioat_enqueue_fill(int dev_id, uint64_t pattern, phys_addr_t dst,
+		unsigned int len, uintptr_t dst_hdl)
+{
+	enum rte_ioat_dev_type *type =
+			(enum rte_ioat_dev_type *)rte_rawdevs[dev_id].dev_private;
+	if (*type == RTE_IDXD_DEV)
+		return __idxd_enqueue_fill(dev_id, pattern, dst, len, dst_hdl);
+	else
+		return __ioat_enqueue_fill(dev_id, pattern, dst, len, dst_hdl);
+}
+
 static inline int
 rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
 		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)