[v3,08/25] raw/ioat: add separate API for fence call
diff mbox series

Message ID 20200925110910.284098-9-bruce.richardson@intel.com
State Superseded
Delegated to: Thomas Monjalon
Headers show
Series
  • raw/ioat: enhancements and new hardware support
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Bruce Richardson Sept. 25, 2020, 11:08 a.m. UTC
Rather than having the fence signalled via a flag on a descriptor - which
requires reading the docs to find out whether the flag needs to go on the
last descriptor before, or the first descriptor after the fence - we can
instead add a separate fence API call. This becomes unambiguous to use,
since the fence call explicitly comes between two other enqueue calls. It
also allows more freedom of implementation in the driver code.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Reviewed-by: Kevin Laatz <kevin.laatz@intel.com>
---
 doc/guides/rawdevs/ioat.rst            |  3 +--
 doc/guides/rel_notes/release_20_11.rst |  4 ++++
 drivers/raw/ioat/ioat_rawdev_test.c    |  6 ++----
 drivers/raw/ioat/rte_ioat_rawdev.h     | 26 ++++++++++++++++++++------
 drivers/raw/ioat/rte_ioat_rawdev_fns.h | 22 +++++++++++++++++++---
 examples/ioat/ioatfwd.c                | 12 ++++--------
 6 files changed, 50 insertions(+), 23 deletions(-)

Patch
diff mbox series

diff --git a/doc/guides/rawdevs/ioat.rst b/doc/guides/rawdevs/ioat.rst
index 3db5f5d09..71bca0b28 100644
--- a/doc/guides/rawdevs/ioat.rst
+++ b/doc/guides/rawdevs/ioat.rst
@@ -203,8 +203,7 @@  a burst of copies to the device and start the hardware processing of them:
                                 dsts[i]->buf_iova + dsts[i]->data_off,
                                 length,
                                 (uintptr_t)srcs[i],
-                                (uintptr_t)dsts[i],
-                                0 /* nofence */) != 1) {
+                                (uintptr_t)dsts[i]) != 1) {
                         printf("Error with rte_ioat_enqueue_copy for buffer %u\n",
                                         i);
                         return -1;
diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_20_11.rst
index c99c0b33f..3868529ac 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -88,6 +88,10 @@  New Features
     to better reflect the APIs' purposes, and remove the implication that
     they are limited to copy operations only.
     [Note: The old API is still provided but marked as deprecated in the code]
+  * Added a new API ``rte_ioat_fence()`` to add a fence between operations.
+    This API replaces the ``fence`` flag parameter in the ``rte_ioat_enqueue_copies()`` function,
+    and is clearer as there is no ambiguity as to whether the flag should be
+    set on the last operation before the fence or the first operation after it.
 
 
 Removed Items
diff --git a/drivers/raw/ioat/ioat_rawdev_test.c b/drivers/raw/ioat/ioat_rawdev_test.c
index bb40eab6b..8ff546803 100644
--- a/drivers/raw/ioat/ioat_rawdev_test.c
+++ b/drivers/raw/ioat/ioat_rawdev_test.c
@@ -57,8 +57,7 @@  test_enqueue_copies(int dev_id)
 				dst->buf_iova + dst->data_off,
 				length,
 				(uintptr_t)src,
-				(uintptr_t)dst,
-				0 /* no fence */) != 1) {
+				(uintptr_t)dst) != 1) {
 			PRINT_ERR("Error with rte_ioat_enqueue_copy\n");
 			return -1;
 		}
@@ -109,8 +108,7 @@  test_enqueue_copies(int dev_id)
 					dsts[i]->buf_iova + dsts[i]->data_off,
 					length,
 					(uintptr_t)srcs[i],
-					(uintptr_t)dsts[i],
-					0 /* nofence */) != 1) {
+					(uintptr_t)dsts[i]) != 1) {
 				PRINT_ERR("Error with rte_ioat_enqueue_copy for buffer %u\n",
 						i);
 				return -1;
diff --git a/drivers/raw/ioat/rte_ioat_rawdev.h b/drivers/raw/ioat/rte_ioat_rawdev.h
index 5b2c47e8c..6b891cd44 100644
--- a/drivers/raw/ioat/rte_ioat_rawdev.h
+++ b/drivers/raw/ioat/rte_ioat_rawdev.h
@@ -61,17 +61,31 @@  struct rte_ioat_rawdev_config {
  *   operation has been completed and the user polls for the completion details.
  *   NOTE: If hdls_disable configuration option for the device is set, this
  *   parameter is ignored.
- * @param fence
- *   A flag parameter indicating that hardware should not begin to perform any
- *   subsequently enqueued copy operations until after this operation has
- *   completed
  * @return
  *   Number of operations enqueued, either 0 or 1
  */
 static inline int
 rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
-		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl,
-		int fence);
+		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl);
+
+/**
+ * Add a fence to force ordering between operations
+ *
+ * This adds a fence to a sequence of operations to enforce ordering, such that
+ * all operations enqueued before the fence must be completed before operations
+ * after the fence.
+ * NOTE: Since this fence may be added as a flag to the last operation enqueued,
+ * this API may not function correctly when called immediately after an
+ * "rte_ioat_perform_ops" call i.e. before any new operations are enqueued.
+ *
+ * @param dev_id
+ *   The rawdev device id of the ioat instance
+ * @return
+ *   Number of fences enqueued, either 0 or 1
+ */
+static inline int
+rte_ioat_fence(int dev_id);
+
 
 /**
  * Trigger hardware to begin performing enqueued operations
diff --git a/drivers/raw/ioat/rte_ioat_rawdev_fns.h b/drivers/raw/ioat/rte_ioat_rawdev_fns.h
index b155d79c4..466721a23 100644
--- a/drivers/raw/ioat/rte_ioat_rawdev_fns.h
+++ b/drivers/raw/ioat/rte_ioat_rawdev_fns.h
@@ -47,8 +47,7 @@  struct rte_ioat_rawdev {
  */
 static inline int
 rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
-		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl,
-		int fence)
+		unsigned int length, uintptr_t src_hdl, uintptr_t dst_hdl)
 {
 	struct rte_ioat_rawdev *ioat =
 			(struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private;
@@ -69,7 +68,7 @@  rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
 	desc = &ioat->desc_ring[write];
 	desc->size = length;
 	/* set descriptor write-back every 16th descriptor */
-	desc->u.control_raw = (uint32_t)((!!fence << 4) | (!(write & 0xF)) << 3);
+	desc->u.control_raw = (uint32_t)((!(write & 0xF)) << 3);
 	desc->src_addr = src;
 	desc->dest_addr = dst;
 
@@ -82,6 +81,23 @@  rte_ioat_enqueue_copy(int dev_id, phys_addr_t src, phys_addr_t dst,
 	return 1;
 }
 
+/* add fence to last written descriptor */
+static inline int
+rte_ioat_fence(int dev_id)
+{
+	struct rte_ioat_rawdev *ioat =
+			(struct rte_ioat_rawdev *)rte_rawdevs[dev_id].dev_private;
+	unsigned short write = ioat->next_write;
+	unsigned short mask = ioat->ring_size - 1;
+	struct rte_ioat_generic_hw_desc *desc;
+
+	write = (write - 1) & mask;
+	desc = &ioat->desc_ring[write];
+
+	desc->u.control.fence = 1;
+	return 0;
+}
+
 /*
  * Trigger hardware to begin performing enqueued operations
  */
diff --git a/examples/ioat/ioatfwd.c b/examples/ioat/ioatfwd.c
index 67f75737b..e6d1d1236 100644
--- a/examples/ioat/ioatfwd.c
+++ b/examples/ioat/ioatfwd.c
@@ -361,15 +361,11 @@  ioat_enqueue_packets(struct rte_mbuf **pkts,
 	for (i = 0; i < nb_rx; i++) {
 		/* Perform data copy */
 		ret = rte_ioat_enqueue_copy(dev_id,
-			pkts[i]->buf_iova
-			- addr_offset,
-			pkts_copy[i]->buf_iova
-			- addr_offset,
-			rte_pktmbuf_data_len(pkts[i])
-			+ addr_offset,
+			pkts[i]->buf_iova - addr_offset,
+			pkts_copy[i]->buf_iova - addr_offset,
+			rte_pktmbuf_data_len(pkts[i]) + addr_offset,
 			(uintptr_t)pkts[i],
-			(uintptr_t)pkts_copy[i],
-			0 /* nofence */);
+			(uintptr_t)pkts_copy[i]);
 
 		if (ret != 1)
 			break;