diff mbox series

[v3,06/11] dma/ioat: add data path job submission functions

Message ID 20210908104004.3218016-7-conor.walsh@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show
Series dma: add dmadev driver for ioat devices | expand

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Walsh, Conor Sept. 8, 2021, 10:39 a.m. UTC
Add data path functions for enqueuing and submitting operations to
IOAT devices.

Signed-off-by: Conor Walsh <conor.walsh@intel.com>
Reviewed-by: Kevin Laatz <kevin.laatz@intel.com>
---
 doc/guides/dmadevs/ioat.rst    | 54 ++++++++++++++++++++
 drivers/dma/ioat/ioat_dmadev.c | 92 ++++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+)
diff mbox series

Patch

diff --git a/doc/guides/dmadevs/ioat.rst b/doc/guides/dmadevs/ioat.rst
index f7742642b5..16acfda3e3 100644
--- a/doc/guides/dmadevs/ioat.rst
+++ b/doc/guides/dmadevs/ioat.rst
@@ -89,3 +89,57 @@  The following code shows how the device is configured in ``test_dmadev.c``:
 
 Once configured, the device can then be made ready for use by calling the
 ``rte_dmadev_start()`` API.
+
+Performing Data Copies
+~~~~~~~~~~~~~~~~~~~~~~~
+
+To perform data copies using IOAT dmadev devices, the functions
+``rte_dmadev_copy()`` and ``rte_dmadev_submit()`` should be used. Alternatively
+``rte_dmadev_copy()`` can be called with the ``RTE_DMA_OP_FLAG_SUBMIT`` flag
+set.
+
+The ``rte_dmadev_copy()`` function enqueues a single copy to the
+device ring for copying at a later point. The parameters to the function
+include the device ID of the desired device, the virtual DMA channel required
+(always 0 for IOAT), the IOVA addresses of both the source and destination
+buffers, the length of the data to be copied and any operation flags. The
+function will return the index of the enqueued job which can be use to
+track that operation.
+
+While the ``rte_dmadev_copy()`` function enqueues a copy operation on the device
+ring, the copy will not actually be performed until after the application calls
+the ``rte_dmadev_submit()`` function. This function informs the device hardware
+of the elements enqueued on the ring, and the device will begin to process them.
+It is expected that, for efficiency reasons, a burst of operations will be
+enqueued to the device via multiple enqueue calls between calls to the
+``rte_dmadev_submit()`` function. If desired you can pass the
+``RTE_DMA_OP_FLAG_SUBMIT`` flag when calling ``rte_dmadev_copy()`` and this will
+tell the device to perform the enqueued operation and any unperformed operations
+before it. The ``RTE_DMA_OP_FLAG_SUBMIT`` flag can be passed instead of calling
+the ``rte_dmadev_submit()`` function for example on the last enqueue of the burst.
+
+The following code from demonstrates how to enqueue a burst of copies to the
+device and start the hardware processing of them:
+
+.. code-block:: C
+
+   for (i = 0; i < BURST_SIZE; i++) {
+      if (rte_dmadev_copy(dev_id, vchan, rte_mbuf_data_iova(srcs[i]),
+            rte_mbuf_data_iova(dsts[i]), COPY_LEN, 0) < 0) {
+         PRINT_ERR("Error with rte_dmadev_copy for buffer %u\n", i);
+         return -1;
+      }
+   }
+   if (rte_dmadev_submit(dev_id, vchan) < 0) {
+      PRINT_ERR("Error with performing operations\n", i);
+      return -1;
+   }
+
+Filling an Area of Memory
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The driver also has support for the ``fill`` operation, where an area
+of memory is overwritten, or filled, with a short pattern of data.
+Fill operations can be performed in much the same was as copy operations
+described above, just using the ``rte_dmadev_fill()`` function rather
+than the ``rte_dmadev_copy()`` function.
diff --git a/drivers/dma/ioat/ioat_dmadev.c b/drivers/dma/ioat/ioat_dmadev.c
index 76ef977e93..41edfcfe86 100644
--- a/drivers/dma/ioat/ioat_dmadev.c
+++ b/drivers/dma/ioat/ioat_dmadev.c
@@ -5,6 +5,7 @@ 
 #include <rte_bus_pci.h>
 #include <rte_dmadev_pmd.h>
 #include <rte_malloc.h>
+#include <rte_prefetch.h>
 
 #include "ioat_internal.h"
 
@@ -17,6 +18,12 @@  RTE_LOG_REGISTER_DEFAULT(ioat_pmd_logtype, INFO);
 #define IOAT_PMD_NAME dmadev_ioat
 #define IOAT_PMD_NAME_STR RTE_STR(IOAT_PMD_NAME)
 
+/* IOAT operations. */
+enum rte_ioat_ops {
+	ioat_op_copy = 0,	/* Standard DMA Operation */
+	ioat_op_fill		/* Block Fill */
+};
+
 /* Configure a device. */
 static int
 ioat_dev_configure(struct rte_dmadev *dev __rte_unused, const struct rte_dmadev_conf *dev_conf,
@@ -197,6 +204,87 @@  ioat_dev_close(struct rte_dmadev *dev)
 	return 0;
 }
 
+/* Trigger hardware to begin performing enqueued operations. */
+static inline void
+__submit(struct ioat_dmadev *ioat)
+{
+	*ioat->doorbell = ioat->next_write - ioat->offset;
+
+	ioat->last_write = ioat->next_write;
+}
+
+/* External submit function wrapper. */
+static int
+ioat_submit(struct rte_dmadev *dev, uint16_t qid __rte_unused)
+{
+	struct ioat_dmadev *ioat = (struct ioat_dmadev *)dev->dev_private;
+
+	__submit(ioat);
+
+	return 0;
+}
+
+/* Write descriptor for enqueue. */
+static inline int
+__write_desc(struct rte_dmadev *dev, uint32_t op, uint64_t src, phys_addr_t dst,
+		unsigned int length, uint64_t flags)
+{
+	struct ioat_dmadev *ioat = dev->dev_private;
+	uint16_t ret;
+	const unsigned short mask = ioat->qcfg.nb_desc - 1;
+	const unsigned short read = ioat->next_read;
+	unsigned short write = ioat->next_write;
+	const unsigned short space = mask + read - write;
+	struct ioat_dma_hw_desc *desc;
+
+	if (space == 0)
+		return -ENOSPC;
+
+	ioat->next_write = write + 1;
+	write &= mask;
+
+	desc = &ioat->desc_ring[write];
+	desc->size = length;
+	desc->u.control_raw = (uint32_t)((op << IOAT_CMD_OP_SHIFT) |
+			(1 << IOAT_COMP_UPDATE_SHIFT));
+
+	/* In IOAT the fence ensures that all operations including the current one
+	 * are completed before moving on, DMAdev assumes that the fence ensures
+	 * all operations before the current one are completed before starting
+	 * the current one, so in IOAT we set the fence for the previous descriptor.
+	 */
+	if (flags & RTE_DMA_OP_FLAG_FENCE)
+		ioat->desc_ring[(write - 1) & mask].u.control.fence = 1;
+
+	desc->src_addr = src;
+	desc->dest_addr = dst;
+
+	rte_prefetch0(&ioat->desc_ring[ioat->next_write & mask]);
+
+	ret = (uint16_t)(ioat->next_write - 1);
+
+	if (flags & RTE_DMA_OP_FLAG_SUBMIT)
+		__submit(ioat);
+
+	return ret;
+}
+
+/* Enqueue a fill operation onto the ioat device. */
+static int
+ioat_enqueue_fill(struct rte_dmadev *dev, uint16_t qid __rte_unused, uint64_t pattern,
+		rte_iova_t dst, unsigned int length, uint64_t flags)
+{
+	return __write_desc(dev, ioat_op_fill, pattern, dst, length, flags);
+}
+
+/* Enqueue a copy operation onto the ioat device. */
+static int
+ioat_enqueue_copy(struct rte_dmadev *dev, uint16_t qid __rte_unused, rte_iova_t src,
+		rte_iova_t dst, unsigned int length, uint64_t flags)
+{
+	return __write_desc(dev, ioat_op_copy, src, dst, length, flags);
+}
+
 /* Dump DMA device info. */
 static int
 ioat_dev_dump(const struct rte_dmadev *dev, FILE *f)
@@ -292,6 +380,10 @@  ioat_dmadev_create(const char *name, struct rte_pci_device *dev)
 
 	dmadev->dev_ops = &ioat_dmadev_ops;
 
+	dmadev->copy = ioat_enqueue_copy;
+	dmadev->fill = ioat_enqueue_fill;
+	dmadev->submit = ioat_submit;
+
 	ioat = dmadev->data->dev_private;
 	ioat->dmadev = dmadev;
 	ioat->regs = dev->mem_resource[0].addr;