[v7,03/16] dma/idxd: add bus device probing

Message ID 20211013163053.1033998-4-kevin.laatz@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series add dmadev driver for idxd devices |

Checks

Context Check Description
ci/checkpatch warning coding style issues

Commit Message

Kevin Laatz Oct. 13, 2021, 4:30 p.m. UTC
Add the basic device probing for DSA devices bound to the IDXD kernel
driver. These devices can be configured via sysfs and made available to
DPDK if they are found during bus scan. Relevant documentation is included.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Signed-off-by: Kevin Laatz <kevin.laatz@intel.com>
Reviewed-by: Conor Walsh <conor.walsh@intel.com>
---
 doc/guides/dmadevs/idxd.rst  |  64 +++++++
 drivers/dma/idxd/idxd_bus.c  | 351 +++++++++++++++++++++++++++++++++++
 drivers/dma/idxd/meson.build |   1 +
 3 files changed, 416 insertions(+)
 create mode 100644 drivers/dma/idxd/idxd_bus.c
  

Patch

diff --git a/doc/guides/dmadevs/idxd.rst b/doc/guides/dmadevs/idxd.rst
index 924700d17e..ce33e2857a 100644
--- a/doc/guides/dmadevs/idxd.rst
+++ b/doc/guides/dmadevs/idxd.rst
@@ -32,6 +32,56 @@  target platform is x86-based. No additional compilation steps are necessary.
 Device Setup
 -------------
 
+Intel\ |reg| DSA devices can use the IDXD kernel driver or DPDK-supported drivers,
+such as ``vfio-pci``. Both are supported by the IDXD PMD.
+
+Intel\ |reg| DSA devices using IDXD kernel driver
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+To use an Intel\ |reg| DSA device bound to the IDXD kernel driver, the device must first be configured.
+The `accel-config <https://github.com/intel/idxd-config>`_ utility library can be used for configuration.
+
+.. note::
+        The device configuration can also be done by directly interacting with the sysfs nodes.
+        An example of how this may be done can be seen in the script ``dpdk_idxd_cfg.py``
+        included in the driver source directory.
+
+There are some mandatory configuration steps before being able to use a device with an application.
+The internal engines, which do the copies or other operations,
+and the work-queues, which are used by applications to assign work to the device,
+need to be assigned to groups, and the various other configuration options,
+such as priority or queue depth, need to be set for each queue.
+
+To assign an engine to a group::
+
+        $ accel-config config-engine dsa0/engine0.0 --group-id=0
+        $ accel-config config-engine dsa0/engine0.1 --group-id=1
+
+To assign work queues to groups for passing descriptors to the engines a similar accel-config command can be used.
+However, the work queues also need to be configured depending on the use case.
+Some configuration options include:
+
+* mode (Dedicated/Shared): Indicates whether a WQ may accept jobs from multiple queues simultaneously.
+* priority: WQ priority between 1 and 15. Larger value means higher priority.
+* wq-size: the size of the WQ. Sum of all WQ sizes must be less that the total-size defined by the device.
+* type: WQ type (kernel/mdev/user). Determines how the device is presented.
+* name: identifier given to the WQ.
+
+Example configuration for a work queue::
+
+        $ accel-config config-wq dsa0/wq0.0 --group-id=0 \
+           --mode=dedicated --priority=10 --wq-size=8 \
+           --type=user --name=dpdk_app1
+
+Once the devices have been configured, they need to be enabled::
+
+        $ accel-config enable-device dsa0
+        $ accel-config enable-wq dsa0/wq0.0
+
+Check the device configuration::
+
+        $ accel-config list
+
 Devices using VFIO/UIO drivers
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -56,3 +106,17 @@  If fewer workqueues are required, then the ``max_queues`` parameter may be passe
 the device driver on the EAL commandline, via the ``allowlist`` or ``-a`` flag e.g.::
 
 	$ dpdk-test -a <b:d:f>,max_queues=4
+
+For devices bound to the IDXD kernel driver,
+the DPDK IDXD driver will automatically perform a scan for available workqueues
+to use. Any workqueues found listed in ``/dev/dsa`` on the system will be checked
+in ``/sys``, and any which have ``dpdk_`` prefix in their name will be automatically
+probed by the driver to make them available to the application.
+Alternatively, to support use by multiple DPDK processes simultaneously,
+the value used as the DPDK ``--file-prefix`` parameter may be used as a workqueue
+name prefix, instead of ``dpdk_``, allowing each DPDK application instance to only
+use a subset of configured queues.
+
+Once probed successfully, irrespective of kernel driver, the device will appear as a ``dmadev``,
+that is a "DMA device type" inside DPDK, and can be accessed using APIs from the
+``rte_dmadev`` library.
diff --git a/drivers/dma/idxd/idxd_bus.c b/drivers/dma/idxd/idxd_bus.c
new file mode 100644
index 0000000000..ef589af30e
--- /dev/null
+++ b/drivers/dma/idxd/idxd_bus.c
@@ -0,0 +1,351 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2021 Intel Corporation
+ */
+
+#include <dirent.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <libgen.h>
+
+#include <rte_bus.h>
+#include <rte_log.h>
+#include <rte_dmadev_pmd.h>
+#include <rte_string_fns.h>
+
+#include "idxd_internal.h"
+
+/* default value for DSA paths, but allow override in environment for testing */
+#define DSA_DEV_PATH "/dev/dsa"
+#define DSA_SYSFS_PATH "/sys/bus/dsa/devices"
+
+static unsigned int devcount;
+
+/** unique identifier for a DSA device/WQ instance */
+struct dsa_wq_addr {
+	uint16_t device_id;
+	uint16_t wq_id;
+};
+
+/** a DSA device instance */
+struct rte_dsa_device {
+	struct rte_device device;           /**< Inherit core device */
+	TAILQ_ENTRY(rte_dsa_device) next;   /**< next dev in list */
+
+	char wq_name[32];                   /**< the workqueue name/number e.g. wq0.1 */
+	struct dsa_wq_addr addr;            /**< Identifies the specific WQ */
+};
+
+/* forward prototypes */
+struct dsa_bus;
+static int dsa_scan(void);
+static int dsa_probe(void);
+static struct rte_device *dsa_find_device(const struct rte_device *start,
+		rte_dev_cmp_t cmp,  const void *data);
+static enum rte_iova_mode dsa_get_iommu_class(void);
+static int dsa_addr_parse(const char *name, void *addr);
+
+/** List of devices */
+TAILQ_HEAD(dsa_device_list, rte_dsa_device);
+
+/**
+ * Structure describing the DSA bus
+ */
+struct dsa_bus {
+	struct rte_bus bus;               /**< Inherit the generic class */
+	struct rte_driver driver;         /**< Driver struct for devices to point to */
+	struct dsa_device_list device_list;  /**< List of PCI devices */
+};
+
+struct dsa_bus dsa_bus = {
+	.bus = {
+		.scan = dsa_scan,
+		.probe = dsa_probe,
+		.find_device = dsa_find_device,
+		.get_iommu_class = dsa_get_iommu_class,
+		.parse = dsa_addr_parse,
+	},
+	.driver = {
+		.name = "dmadev_idxd"
+	},
+	.device_list = TAILQ_HEAD_INITIALIZER(dsa_bus.device_list),
+};
+
+static inline const char *
+dsa_get_dev_path(void)
+{
+	const char *path = getenv("DSA_DEV_PATH");
+	return path ? path : DSA_DEV_PATH;
+}
+
+static inline const char *
+dsa_get_sysfs_path(void)
+{
+	const char *path = getenv("DSA_SYSFS_PATH");
+	return path ? path : DSA_SYSFS_PATH;
+}
+
+static void *
+idxd_bus_mmap_wq(struct rte_dsa_device *dev)
+{
+	void *addr;
+	char path[PATH_MAX];
+	int fd;
+
+	snprintf(path, sizeof(path), "%s/%s", dsa_get_dev_path(), dev->wq_name);
+	fd = open(path, O_RDWR);
+	if (fd < 0) {
+		IDXD_PMD_ERR("Failed to open device path: %s", path);
+		return NULL;
+	}
+
+	addr = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED, fd, 0);
+	close(fd);
+	if (addr == MAP_FAILED) {
+		IDXD_PMD_ERR("Failed to mmap device %s", path);
+		return NULL;
+	}
+
+	return addr;
+}
+
+static int
+read_wq_string(struct rte_dsa_device *dev, const char *filename,
+		char *value, size_t valuelen)
+{
+	char sysfs_node[PATH_MAX];
+	int len;
+	int fd;
+
+	snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s",
+			dsa_get_sysfs_path(), dev->wq_name, filename);
+	fd = open(sysfs_node, O_RDONLY);
+	if (fd < 0) {
+		IDXD_PMD_ERR("%s(): opening file '%s' failed: %s",
+				__func__, sysfs_node, strerror(errno));
+		return -1;
+	}
+
+	len = read(fd, value, valuelen - 1);
+	close(fd);
+	if (len < 0) {
+		IDXD_PMD_ERR("%s(): error reading file '%s': %s",
+				__func__, sysfs_node, strerror(errno));
+		return -1;
+	}
+	value[len] = '\0';
+	return 0;
+}
+
+static int
+read_wq_int(struct rte_dsa_device *dev, const char *filename,
+		int *value)
+{
+	char sysfs_node[PATH_MAX];
+	FILE *f;
+	int ret = 0;
+
+	snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s",
+			dsa_get_sysfs_path(), dev->wq_name, filename);
+	f = fopen(sysfs_node, "r");
+	if (f == NULL) {
+		IDXD_PMD_ERR("%s(): opening file '%s' failed: %s",
+				__func__, sysfs_node, strerror(errno));
+		return -1;
+	}
+
+	if (fscanf(f, "%d", value) != 1) {
+		IDXD_PMD_ERR("%s(): error reading file '%s': %s",
+				__func__, sysfs_node, strerror(errno));
+		ret = -1;
+	}
+
+	fclose(f);
+	return ret;
+}
+
+static int
+read_device_int(struct rte_dsa_device *dev, const char *filename,
+		int *value)
+{
+	char sysfs_node[PATH_MAX];
+	FILE *f;
+	int ret = 0;
+
+	snprintf(sysfs_node, sizeof(sysfs_node), "%s/dsa%d/%s",
+			dsa_get_sysfs_path(), dev->addr.device_id, filename);
+	f = fopen(sysfs_node, "r");
+	if (f == NULL) {
+		IDXD_PMD_ERR("%s(): opening file '%s' failed: %s",
+				__func__, sysfs_node, strerror(errno));
+		return -1;
+	}
+
+	if (fscanf(f, "%d", value) != 1) {
+		IDXD_PMD_ERR("%s(): error reading file '%s': %s",
+				__func__, sysfs_node, strerror(errno));
+		ret = -1;
+	}
+
+	fclose(f);
+	return ret;
+}
+
+static int
+idxd_probe_dsa(struct rte_dsa_device *dev)
+{
+	struct idxd_dmadev idxd = {0};
+	int ret = 0;
+
+	IDXD_PMD_INFO("Probing device %s on numa node %d",
+			dev->wq_name, dev->device.numa_node);
+	if (read_wq_int(dev, "size", &ret) < 0)
+		return -1;
+	idxd.max_batches = ret;
+	if (read_wq_int(dev, "max_batch_size", &ret) < 0)
+		return -1;
+	idxd.max_batch_size = ret;
+	idxd.qid = dev->addr.wq_id;
+	idxd.sva_support = 1;
+
+	idxd.portal = idxd_bus_mmap_wq(dev);
+	if (idxd.portal == NULL) {
+		IDXD_PMD_ERR("WQ mmap failed");
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int
+is_for_this_process_use(const char *name)
+{
+	char *runtime_dir = strdup(rte_eal_get_runtime_dir());
+	char *prefix = basename(runtime_dir);
+	int prefixlen = strlen(prefix);
+	int retval = 0;
+
+	if (strncmp(name, "dpdk_", 5) == 0)
+		retval = 1;
+	if (strncmp(name, prefix, prefixlen) == 0 && name[prefixlen] == '_')
+		retval = 1;
+
+	free(runtime_dir);
+	return retval;
+}
+
+static int
+dsa_probe(void)
+{
+	struct rte_dsa_device *dev;
+
+	TAILQ_FOREACH(dev, &dsa_bus.device_list, next) {
+		char type[64], name[64];
+
+		if (read_wq_string(dev, "type", type, sizeof(type)) < 0 ||
+				read_wq_string(dev, "name", name, sizeof(name)) < 0)
+			continue;
+
+		if (strncmp(type, "user", 4) == 0 && is_for_this_process_use(name)) {
+			dev->device.driver = &dsa_bus.driver;
+			idxd_probe_dsa(dev);
+			continue;
+		}
+		IDXD_PMD_DEBUG("WQ '%s', not allocated to DPDK", dev->wq_name);
+	}
+
+	return 0;
+}
+
+static int
+dsa_scan(void)
+{
+	const char *path = dsa_get_dev_path();
+	struct dirent *wq;
+	DIR *dev_dir;
+
+	dev_dir = opendir(path);
+	if (dev_dir == NULL) {
+		if (errno == ENOENT)
+			return 0; /* no bus, return without error */
+		IDXD_PMD_ERR("%s(): opendir '%s' failed: %s",
+				__func__, path, strerror(errno));
+		return -1;
+	}
+
+	while ((wq = readdir(dev_dir)) != NULL) {
+		struct rte_dsa_device *dev;
+		int numa_node = -1;
+
+		if (strncmp(wq->d_name, "wq", 2) != 0)
+			continue;
+		if (strnlen(wq->d_name, sizeof(dev->wq_name)) == sizeof(dev->wq_name)) {
+			IDXD_PMD_ERR("%s(): wq name too long: '%s', skipping",
+					__func__, wq->d_name);
+			continue;
+		}
+		IDXD_PMD_DEBUG("%s(): found %s/%s", __func__, path, wq->d_name);
+
+		dev = malloc(sizeof(*dev));
+		if (dsa_addr_parse(wq->d_name, &dev->addr) < 0) {
+			IDXD_PMD_ERR("Error parsing WQ name: %s", wq->d_name);
+			free(dev);
+			continue;
+		}
+		dev->device.bus = &dsa_bus.bus;
+		strlcpy(dev->wq_name, wq->d_name, sizeof(dev->wq_name));
+		TAILQ_INSERT_TAIL(&dsa_bus.device_list, dev, next);
+		devcount++;
+
+		read_device_int(dev, "numa_node", &numa_node);
+		dev->device.numa_node = numa_node;
+		dev->device.name = dev->wq_name;
+	}
+
+	closedir(dev_dir);
+	return 0;
+}
+
+static struct rte_device *
+dsa_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
+			 const void *data)
+{
+	struct rte_dsa_device *dev = TAILQ_FIRST(&dsa_bus.device_list);
+
+	/* the rte_device struct must be at start of dsa structure */
+	RTE_BUILD_BUG_ON(offsetof(struct rte_dsa_device, device) != 0);
+
+	if (start != NULL) /* jump to start point if given */
+		dev = TAILQ_NEXT((const struct rte_dsa_device *)start, next);
+	while (dev != NULL) {
+		if (cmp(&dev->device, data) == 0)
+			return &dev->device;
+		dev = TAILQ_NEXT(dev, next);
+	}
+	return NULL;
+}
+
+static enum rte_iova_mode
+dsa_get_iommu_class(void)
+{
+	/* if there are no devices, report don't care, otherwise VA mode */
+	return devcount > 0 ? RTE_IOVA_VA : RTE_IOVA_DC;
+}
+
+static int
+dsa_addr_parse(const char *name, void *addr)
+{
+	struct dsa_wq_addr *wq = addr;
+	unsigned int device_id, wq_id;
+
+	if (sscanf(name, "wq%u.%u", &device_id, &wq_id) != 2) {
+		IDXD_PMD_DEBUG("Parsing WQ name failed: %s", name);
+		return -1;
+	}
+
+	wq->device_id = device_id;
+	wq->wq_id = wq_id;
+	return 0;
+}
+
+RTE_REGISTER_BUS(dsa, dsa_bus.bus);
diff --git a/drivers/dma/idxd/meson.build b/drivers/dma/idxd/meson.build
index 4426a9f65c..45418077f4 100644
--- a/drivers/dma/idxd/meson.build
+++ b/drivers/dma/idxd/meson.build
@@ -7,5 +7,6 @@  endif
 
 deps += ['bus_pci']
 sources = files(
+        'idxd_bus.c',
         'idxd_pci.c'
 )