[v3,7/9] gpudev: add communication flag

Message ID 20211009015349.9694-8-eagostini@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series GPU library |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Elena Agostini Oct. 9, 2021, 1:53 a.m. UTC
  From: Elena Agostini <eagostini@nvidia.com>

In heterogeneous computing system, processing is not only in the CPU.
Some tasks can be delegated to devices working in parallel.
When mixing network activity with task processing there may be the need
to put in communication the CPU with the device in order to synchronize
operations.

The purpose of this flag is to allow the CPU and the GPU to
exchange ACKs. A possible use-case is described below.

CPU:
- Trigger some task on the GPU
- Prepare some data
- Signal to the GPU the data is ready updating the communication flag

GPU:
- Do some pre-processing
- Wait for more data from the CPU polling on the communication flag
- Consume the data prepared by the CPU

Signed-off-by: Elena Agostini <eagostini@nvidia.com>
---
 app/test-gpudev/main.c                 |  66 +++++++++++++++
 doc/guides/prog_guide/gpudev.rst       |  13 +++
 doc/guides/rel_notes/release_21_11.rst |   1 +
 lib/gpudev/gpudev.c                    |  94 +++++++++++++++++++++
 lib/gpudev/rte_gpudev.h                | 108 +++++++++++++++++++++++++
 lib/gpudev/version.map                 |   4 +
 6 files changed, 286 insertions(+)
  

Patch

diff --git a/app/test-gpudev/main.c b/app/test-gpudev/main.c
index 98c02a3ee0..22f5c950b2 100644
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@@ -166,6 +166,67 @@  register_cpu_memory(uint16_t gpu_id)
 	return 0;
 }
 
+static int
+create_update_comm_flag(uint16_t gpu_id)
+{
+	struct rte_gpu_comm_flag devflag;
+	int ret = 0;
+	uint32_t set_val;
+	uint32_t get_val;
+
+	printf("\n=======> TEST: Communication flag\n");
+
+	ret = rte_gpu_comm_create_flag(gpu_id, &devflag, RTE_GPU_COMM_FLAG_CPU);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_create_flag returned error %d\n", ret);
+		return -1;
+	}
+
+	set_val = 25;
+	ret = rte_gpu_comm_set_flag(&devflag, set_val);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
+		return -1;
+	}
+
+	ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
+		return -1;
+	}
+
+	printf("Communication flag value at 0x%p was set to %d and current value is %d\n", devflag.ptr, set_val, get_val);
+
+	set_val = 38;
+	ret = rte_gpu_comm_set_flag(&devflag, set_val);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
+		return -1;
+	}
+
+	ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
+		return -1;
+	}
+
+	printf("Communication flag value at 0x%p was set to %d and current value is %d\n", devflag.ptr, set_val, get_val);
+
+	ret = rte_gpu_comm_destroy_flag(&devflag);
+	if(ret < 0)
+	{
+		fprintf(stderr, "rte_gpu_comm_destroy_flags returned error %d\n", ret);
+		return -1;
+	}
+
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@@ -217,6 +278,11 @@  main(int argc, char **argv)
 	alloc_gpu_memory(gpu_id);
 	register_cpu_memory(gpu_id);
 
+	/**
+	 * Communication items test
+	 */
+	create_update_comm_flag(gpu_id);
+
 	/* clean up the EAL */
 	rte_eal_cleanup();
 	printf("Bye...\n");
diff --git a/doc/guides/prog_guide/gpudev.rst b/doc/guides/prog_guide/gpudev.rst
index eb5f0af817..e0db627aed 100644
--- a/doc/guides/prog_guide/gpudev.rst
+++ b/doc/guides/prog_guide/gpudev.rst
@@ -32,6 +32,10 @@  This library provides a number of features:
 - Interoperability with device-specific library through generic handlers.
 - Allocate and free memory on the device.
 - Register CPU memory to make it visible from the device.
+- Communication between the CPU and the device.
+
+The whole CPU - GPU communication is implemented
+using CPU memory visible from the GPU.
 
 
 API Overview
@@ -73,3 +77,12 @@  Some GPU drivers may need, under certain conditions,
 to enforce the coherency of external devices writes (e.g. NIC receiving packets)
 into the GPU memory.
 gpudev abstracts and exposes this capability.
+
+Communication Flag
+~~~~~~~~~~~~~~~~~~
+
+Considering an application with some GPU task
+that's waiting to receive a signal from the CPU
+to move forward with the execution.
+The communication flag allocates a CPU memory GPU-visible ``uint32_t`` flag
+that can be used by the CPU to communicate with a GPU task.
diff --git a/doc/guides/rel_notes/release_21_11.rst b/doc/guides/rel_notes/release_21_11.rst
index c4ac5e3053..59ab1a1920 100644
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@@ -66,6 +66,7 @@  New Features
 
   * Device information
   * Memory management
+  * Communication flag
 
 * **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**
 
diff --git a/lib/gpudev/gpudev.c b/lib/gpudev/gpudev.c
index cefefd737a..827e29d8f6 100644
--- a/lib/gpudev/gpudev.c
+++ b/lib/gpudev/gpudev.c
@@ -643,3 +643,97 @@  rte_gpu_mbw(int16_t dev_id)
 	}
 	return GPU_DRV_RET(dev->ops.mbw(dev));
 }
+
+int
+rte_gpu_comm_create_flag(uint16_t dev_id, struct rte_gpu_comm_flag *devflag,
+		enum rte_gpu_comm_flag_type mtype)
+{
+	size_t flag_size;
+	int ret;
+
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	if (mtype != RTE_GPU_COMM_FLAG_CPU) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	flag_size = sizeof(uint32_t);
+
+	devflag->ptr = rte_zmalloc(NULL, flag_size, 0);
+	if (devflag->ptr == NULL) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+
+	ret = rte_gpu_register(dev_id, flag_size, devflag->ptr);
+	if(ret < 0)
+	{
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+
+	devflag->mtype = mtype;
+	devflag->dev_id = dev_id;
+
+	return 0;
+}
+
+int
+rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag)
+{
+	int ret;
+
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	ret = rte_gpu_unregister(devflag->dev_id, devflag->ptr);
+	if(ret < 0)
+	{
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_free(devflag->ptr);
+
+	return 0;
+}
+
+int
+rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag, uint32_t val)
+{
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	RTE_GPU_VOLATILE(*devflag->ptr) = val;
+
+	return 0;
+}
+
+int
+rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag, uint32_t *val)
+{
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	*val = RTE_GPU_VOLATILE(*devflag->ptr);
+
+	return 0;
+}
diff --git a/lib/gpudev/rte_gpudev.h b/lib/gpudev/rte_gpudev.h
index e790b3e2b7..4a10a8bcf5 100644
--- a/lib/gpudev/rte_gpudev.h
+++ b/lib/gpudev/rte_gpudev.h
@@ -38,6 +38,9 @@  extern "C" {
 /** Catch-all callback data. */
 #define RTE_GPU_CALLBACK_ANY_DATA ((void *)-1)
 
+/** Access variable as volatile. */
+#define RTE_GPU_VOLATILE(x) (*(volatile typeof(x)*)&(x))
+
 /** Store device info. */
 struct rte_gpu_info {
 	/** Unique identifier name. */
@@ -68,6 +71,22 @@  enum rte_gpu_event {
 typedef void (rte_gpu_callback_t)(int16_t dev_id,
 		enum rte_gpu_event event, void *user_data);
 
+/** Memory where communication flag is allocated. */
+enum rte_gpu_comm_flag_type {
+	/** Allocate flag on CPU memory visible from device. */
+	RTE_GPU_COMM_FLAG_CPU = 0,
+};
+
+/** Communication flag to coordinate CPU with the device. */
+struct rte_gpu_comm_flag {
+	/** Device that will use the device flag. */
+	uint16_t dev_id;
+	/** Pointer to flag memory area. */
+	uint32_t *ptr;
+	/** Type of memory used to allocate the flag. */
+	enum rte_gpu_comm_flag_type mtype;
+};
+
 /**
  * @warning
  * @b EXPERIMENTAL: this API may change without prior notice.
@@ -405,6 +424,95 @@  int rte_gpu_unregister(int16_t dev_id, void *ptr);
 __rte_experimental
 int rte_gpu_mbw(int16_t dev_id);
 
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Create a communication flag that can be shared
+ * between CPU threads and device workload to exchange some status info
+ * (e.g. work is done, processing can start, etc..).
+ *
+ * @param dev_id
+ *   Reference device ID.
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ * @param mtype
+ *   Type of memory to allocate the communication flag.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - ENODEV if invalid dev_id
+ *   - EINVAL if invalid inputs
+ *   - ENOTSUP if operation not supported by the driver
+ *   - ENOMEM if out of space
+ *   - EPERM if driver error
+ */
+__rte_experimental
+int rte_gpu_comm_create_flag(uint16_t dev_id,
+		struct rte_gpu_comm_flag *devflag,
+		enum rte_gpu_comm_flag_type mtype);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Deallocate a communication flag.
+ *
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - ENODEV if invalid dev_id
+ *   - EINVAL if NULL devflag
+ *   - ENOTSUP if operation not supported by the driver
+ *   - EPERM if driver error
+ */
+__rte_experimental
+int rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set the value of a communication flag as the input value.
+ * Flag memory area is treated as volatile.
+ * The flag must have been allocated with RTE_GPU_COMM_FLAG_CPU.
+ *
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ * @param val
+ *   Value to set in the flag.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag,
+		uint32_t val);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get the value of the communication flag.
+ * Flag memory area is treated as volatile.
+ * The flag must have been allocated with RTE_GPU_COMM_FLAG_CPU.
+ *
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ * @param val
+ *   Flag output value.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag,
+		uint32_t *val);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/lib/gpudev/version.map b/lib/gpudev/version.map
index d72d470d8e..2fc039373a 100644
--- a/lib/gpudev/version.map
+++ b/lib/gpudev/version.map
@@ -6,6 +6,10 @@  EXPERIMENTAL {
 	rte_gpu_callback_register;
 	rte_gpu_callback_unregister;
 	rte_gpu_close;
+	rte_gpu_comm_create_flag;
+	rte_gpu_comm_destroy_flag;
+	rte_gpu_comm_get_flag_value;
+	rte_gpu_comm_set_flag;
 	rte_gpu_count_avail;
 	rte_gpu_find_next;
 	rte_gpu_free;