Message ID | 20210917152437.3270330-6-kevin.laatz@intel.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Thomas Monjalon |
Headers | show |
Series | add dmadev driver for idxd devices | expand |
Context | Check | Description |
---|---|---|
ci/Intel-compilation | warning | apply issues |
ci/checkpatch | warning | coding style issues |
On 2021/9/17 23:24, Kevin Laatz wrote: > When a suitable device is found during the PCI probe, create a dmadev > instance for each HW queue. HW definitions required are also included. > > Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> > Signed-off-by: Kevin Laatz <kevin.laatz@intel.com> > Reviewed-by: Conor Walsh <conor.walsh@intel.com> > > --- > v4: rebase changes > --- > drivers/dma/idxd/idxd_hw_defs.h | 71 ++++++++ > drivers/dma/idxd/idxd_internal.h | 16 ++ > drivers/dma/idxd/idxd_pci.c | 278 ++++++++++++++++++++++++++++++- > 3 files changed, 362 insertions(+), 3 deletions(-) > create mode 100644 drivers/dma/idxd/idxd_hw_defs.h > > diff --git a/drivers/dma/idxd/idxd_hw_defs.h b/drivers/dma/idxd/idxd_hw_defs.h > new file mode 100644 > index 0000000000..ea627cba6d > --- /dev/null > +++ b/drivers/dma/idxd/idxd_hw_defs.h > @@ -0,0 +1,71 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright 2021 Intel Corporation > + */ > + > +#ifndef _IDXD_HW_DEFS_H_ > +#define _IDXD_HW_DEFS_H_ > + > +/*** Definitions for Intel(R) Data Streaming Accelerator ***/ > + > +#define IDXD_CMD_SHIFT 20 > +enum rte_idxd_cmds { > + idxd_enable_dev = 1, > + idxd_disable_dev, > + idxd_drain_all, > + idxd_abort_all, > + idxd_reset_device, > + idxd_enable_wq, > + idxd_disable_wq, > + idxd_drain_wq, > + idxd_abort_wq, > + idxd_reset_wq, > +}; > + > +/* General bar0 registers */ > +struct rte_idxd_bar0 { > + uint32_t __rte_cache_aligned version; /* offset 0x00 */ > + uint64_t __rte_aligned(0x10) gencap; /* offset 0x10 */ > + uint64_t __rte_aligned(0x10) wqcap; /* offset 0x20 */ > + uint64_t __rte_aligned(0x10) grpcap; /* offset 0x30 */ > + uint64_t __rte_aligned(0x08) engcap; /* offset 0x38 */ > + uint64_t __rte_aligned(0x10) opcap; /* offset 0x40 */ > + uint64_t __rte_aligned(0x20) offsets[2]; /* offset 0x60 */ > + uint32_t __rte_aligned(0x20) gencfg; /* offset 0x80 */ > + uint32_t __rte_aligned(0x08) genctrl; /* offset 0x88 */ > + uint32_t __rte_aligned(0x10) gensts; /* offset 0x90 */ > + uint32_t __rte_aligned(0x08) intcause; /* offset 0x98 */ > + uint32_t __rte_aligned(0x10) cmd; /* offset 0xA0 */ > + uint32_t __rte_aligned(0x08) cmdstatus; /* offset 0xA8 */ > + uint64_t __rte_aligned(0x20) swerror[4]; /* offset 0xC0 */ > +}; > + > +/* workqueue config is provided by array of uint32_t. */ > +enum rte_idxd_wqcfg { > + wq_size_idx, /* size is in first 32-bit value */ > + wq_threshold_idx, /* WQ threshold second 32-bits */ > + wq_mode_idx, /* WQ mode and other flags */ > + wq_sizes_idx, /* WQ transfer and batch sizes */ > + wq_occ_int_idx, /* WQ occupancy interrupt handle */ > + wq_occ_limit_idx, /* WQ occupancy limit */ > + wq_state_idx, /* WQ state and occupancy state */ > +}; > + > +#define WQ_MODE_SHARED 0 > +#define WQ_MODE_DEDICATED 1 > +#define WQ_PRIORITY_SHIFT 4 > +#define WQ_BATCH_SZ_SHIFT 5 > +#define WQ_STATE_SHIFT 30 > +#define WQ_STATE_MASK 0x3 > + > +struct rte_idxd_grpcfg { > + uint64_t grpwqcfg[4] __rte_cache_aligned; /* 64-byte register set */ > + uint64_t grpengcfg; /* offset 32 */ > + uint32_t grpflags; /* offset 40 */ > +}; > + > +#define GENSTS_DEV_STATE_MASK 0x03 > +#define CMDSTATUS_ACTIVE_SHIFT 31 > +#define CMDSTATUS_ACTIVE_MASK (1 << 31) > +#define CMDSTATUS_ERR_MASK 0xFF > + > +#endif > diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h > index fa6f053f72..cb3a68c69b 100644 > --- a/drivers/dma/idxd/idxd_internal.h > +++ b/drivers/dma/idxd/idxd_internal.h > @@ -5,6 +5,10 @@ > #ifndef _IDXD_INTERNAL_H_ > #define _IDXD_INTERNAL_H_ > > +#include <rte_spinlock.h> > + > +#include "idxd_hw_defs.h" > + > /** > * @file idxd_internal.h > * > @@ -24,6 +28,16 @@ extern int idxd_pmd_logtype; > #define IDXD_PMD_ERR(fmt, args...) IDXD_PMD_LOG(ERR, fmt, ## args) > #define IDXD_PMD_WARN(fmt, args...) IDXD_PMD_LOG(WARNING, fmt, ## args) > > +struct idxd_pci_common { > + rte_spinlock_t lk; > + > + uint8_t wq_cfg_sz; > + volatile struct rte_idxd_bar0 *regs; > + volatile uint32_t *wq_regs_base; > + volatile struct rte_idxd_grpcfg *grp_regs; > + volatile void *portals; > +}; > + > struct idxd_dmadev { > /* counters to track the batches */ > unsigned short max_batches; > @@ -58,6 +72,8 @@ struct idxd_dmadev { > struct { > unsigned int dsa_id; > } bus; > + > + struct idxd_pci_common *pci; > } u; > }; > > diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c > index 79e4aadcab..171e5ffc07 100644 > --- a/drivers/dma/idxd/idxd_pci.c > +++ b/drivers/dma/idxd/idxd_pci.c > @@ -3,6 +3,9 @@ > */ > > #include <rte_bus_pci.h> > +#include <rte_devargs.h> > +#include <rte_dmadev_pmd.h> > +#include <rte_malloc.h> > > #include "idxd_internal.h" > > @@ -16,17 +19,286 @@ const struct rte_pci_id pci_id_idxd_map[] = { > { .vendor_id = 0, /* sentinel */ }, > }; > > +static inline int > +idxd_pci_dev_command(struct idxd_dmadev *idxd, enum rte_idxd_cmds command) > +{ > + uint8_t err_code; > + uint16_t qid = idxd->qid; > + int i = 0; > + > + if (command >= idxd_disable_wq && command <= idxd_reset_wq) > + qid = (1 << qid); > + rte_spinlock_lock(&idxd->u.pci->lk); > + idxd->u.pci->regs->cmd = (command << IDXD_CMD_SHIFT) | qid; > + > + do { > + rte_pause(); > + err_code = idxd->u.pci->regs->cmdstatus; > + if (++i >= 1000) { > + IDXD_PMD_ERR("Timeout waiting for command response from HW"); > + rte_spinlock_unlock(&idxd->u.pci->lk); > + return err_code; > + } > + } while (idxd->u.pci->regs->cmdstatus & CMDSTATUS_ACTIVE_MASK); > + rte_spinlock_unlock(&idxd->u.pci->lk); > + > + return err_code & CMDSTATUS_ERR_MASK; > +} > + > +static uint32_t * > +idxd_get_wq_cfg(struct idxd_pci_common *pci, uint8_t wq_idx) > +{ > + return RTE_PTR_ADD(pci->wq_regs_base, > + (uintptr_t)wq_idx << (5 + pci->wq_cfg_sz)); > +} > + > +static int > +idxd_is_wq_enabled(struct idxd_dmadev *idxd) > +{ > + uint32_t state = idxd_get_wq_cfg(idxd->u.pci, idxd->qid)[wq_state_idx]; > + return ((state >> WQ_STATE_SHIFT) & WQ_STATE_MASK) == 0x1; > +} > + > +static const struct rte_dma_dev_ops idxd_pci_ops = { > + > +}; > + > +/* each portal uses 4 x 4k pages */ > +#define IDXD_PORTAL_SIZE (4096 * 4) > + > +static int > +init_pci_device(struct rte_pci_device *dev, struct idxd_dmadev *idxd, > + unsigned int max_queues) > +{ > + struct idxd_pci_common *pci; > + uint8_t nb_groups, nb_engines, nb_wqs; > + uint16_t grp_offset, wq_offset; /* how far into bar0 the regs are */ > + uint16_t wq_size, total_wq_size; > + uint8_t lg2_max_batch, lg2_max_copy_size; > + unsigned int i, err_code; > + > + pci = malloc(sizeof(*pci)); > + if (pci == NULL) { > + IDXD_PMD_ERR("%s: Can't allocate memory", __func__); > + goto err; > + } > + rte_spinlock_init(&pci->lk); > + > + /* assign the bar registers, and then configure device */ > + pci->regs = dev->mem_resource[0].addr; > + grp_offset = (uint16_t)pci->regs->offsets[0]; > + pci->grp_regs = RTE_PTR_ADD(pci->regs, grp_offset * 0x100); > + wq_offset = (uint16_t)(pci->regs->offsets[0] >> 16); > + pci->wq_regs_base = RTE_PTR_ADD(pci->regs, wq_offset * 0x100); > + pci->portals = dev->mem_resource[2].addr; > + pci->wq_cfg_sz = (pci->regs->wqcap >> 24) & 0x0F; > + > + /* sanity check device status */ > + if (pci->regs->gensts & GENSTS_DEV_STATE_MASK) { > + /* need function-level-reset (FLR) or is enabled */ > + IDXD_PMD_ERR("Device status is not disabled, cannot init"); > + goto err; > + } > + if (pci->regs->cmdstatus & CMDSTATUS_ACTIVE_MASK) { > + /* command in progress */ > + IDXD_PMD_ERR("Device has a command in progress, cannot init"); > + goto err; > + } > + > + /* read basic info about the hardware for use when configuring */ > + nb_groups = (uint8_t)pci->regs->grpcap; > + nb_engines = (uint8_t)pci->regs->engcap; > + nb_wqs = (uint8_t)(pci->regs->wqcap >> 16); > + total_wq_size = (uint16_t)pci->regs->wqcap; > + lg2_max_copy_size = (uint8_t)(pci->regs->gencap >> 16) & 0x1F; > + lg2_max_batch = (uint8_t)(pci->regs->gencap >> 21) & 0x0F; > + > + IDXD_PMD_DEBUG("nb_groups = %u, nb_engines = %u, nb_wqs = %u", > + nb_groups, nb_engines, nb_wqs); > + > + /* zero out any old config */ > + for (i = 0; i < nb_groups; i++) { > + pci->grp_regs[i].grpengcfg = 0; > + pci->grp_regs[i].grpwqcfg[0] = 0; > + } > + for (i = 0; i < nb_wqs; i++) > + idxd_get_wq_cfg(pci, i)[0] = 0; > + > + /* limit queues if necessary */ > + if (max_queues != 0 && nb_wqs > max_queues) { > + nb_wqs = max_queues; > + if (nb_engines > max_queues) > + nb_engines = max_queues; > + if (nb_groups > max_queues) > + nb_engines = max_queues; > + IDXD_PMD_DEBUG("Limiting queues to %u", nb_wqs); > + } > + > + /* put each engine into a separate group to avoid reordering */ > + if (nb_groups > nb_engines) > + nb_groups = nb_engines; > + if (nb_groups < nb_engines) > + nb_engines = nb_groups; > + > + /* assign engines to groups, round-robin style */ > + for (i = 0; i < nb_engines; i++) { > + IDXD_PMD_DEBUG("Assigning engine %u to group %u", > + i, i % nb_groups); > + pci->grp_regs[i % nb_groups].grpengcfg |= (1ULL << i); > + } > + > + /* now do the same for queues and give work slots to each queue */ > + wq_size = total_wq_size / nb_wqs; > + IDXD_PMD_DEBUG("Work queue size = %u, max batch = 2^%u, max copy = 2^%u", > + wq_size, lg2_max_batch, lg2_max_copy_size); > + for (i = 0; i < nb_wqs; i++) { > + /* add engine "i" to a group */ > + IDXD_PMD_DEBUG("Assigning work queue %u to group %u", > + i, i % nb_groups); > + pci->grp_regs[i % nb_groups].grpwqcfg[0] |= (1ULL << i); > + /* now configure it, in terms of size, max batch, mode */ > + idxd_get_wq_cfg(pci, i)[wq_size_idx] = wq_size; > + idxd_get_wq_cfg(pci, i)[wq_mode_idx] = (1 << WQ_PRIORITY_SHIFT) | > + WQ_MODE_DEDICATED; > + idxd_get_wq_cfg(pci, i)[wq_sizes_idx] = lg2_max_copy_size | > + (lg2_max_batch << WQ_BATCH_SZ_SHIFT); > + } > + > + /* dump the group configuration to output */ > + for (i = 0; i < nb_groups; i++) { > + IDXD_PMD_DEBUG("## Group %d", i); > + IDXD_PMD_DEBUG(" GRPWQCFG: %"PRIx64, pci->grp_regs[i].grpwqcfg[0]); > + IDXD_PMD_DEBUG(" GRPENGCFG: %"PRIx64, pci->grp_regs[i].grpengcfg); > + IDXD_PMD_DEBUG(" GRPFLAGS: %"PRIx32, pci->grp_regs[i].grpflags); > + } > + > + idxd->u.pci = pci; > + idxd->max_batches = wq_size; > + > + /* enable the device itself */ > + err_code = idxd_pci_dev_command(idxd, idxd_enable_dev); > + if (err_code) { > + IDXD_PMD_ERR("Error enabling device: code %#x", err_code); > + return err_code; > + } > + IDXD_PMD_DEBUG("IDXD Device enabled OK"); > + > + return nb_wqs; > + > +err: > + free(pci); > + return -1; > +} > + > static int > idxd_dmadev_probe_pci(struct rte_pci_driver *drv, struct rte_pci_device *dev) > { > - int ret = 0; > + struct idxd_dmadev idxd = {0}; > + uint8_t nb_wqs; > + int qid, ret = 0; > char name[PCI_PRI_STR_SIZE]; > + unsigned int max_queues = 0; > > rte_pci_device_name(&dev->addr, name, sizeof(name)); > IDXD_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node); > dev->device.driver = &drv->driver; > > - return ret; > + if (dev->device.devargs && dev->device.devargs->args[0] != '\0') { > + /* if the number of devargs grows beyond just 1, use rte_kvargs */ > + if (sscanf(dev->device.devargs->args, > + "max_queues=%u", &max_queues) != 1) { > + IDXD_PMD_ERR("Invalid device parameter: '%s'", > + dev->device.devargs->args); > + return -1; > + } > + } > + > + ret = init_pci_device(dev, &idxd, max_queues); > + if (ret < 0) { > + IDXD_PMD_ERR("Error initializing PCI hardware"); > + return ret; > + } > + if (idxd.u.pci->portals == NULL) { > + IDXD_PMD_ERR("Error, invalid portal assigned during initialization\n"); > + return -EINVAL; > + } > + nb_wqs = (uint8_t)ret; > + > + /* set up one device for each queue */ > + for (qid = 0; qid < nb_wqs; qid++) { > + char qname[32]; > + > + /* add the queue number to each device name */ > + snprintf(qname, sizeof(qname), "%s-q%d", name, qid); > + idxd.qid = qid; > + idxd.portal = RTE_PTR_ADD(idxd.u.pci->portals, > + qid * IDXD_PORTAL_SIZE); > + if (idxd_is_wq_enabled(&idxd)) > + IDXD_PMD_ERR("Error, WQ %u seems enabled", qid); > + ret = idxd_dmadev_create(qname, &dev->device, > + &idxd, &idxd_pci_ops); > + if (ret != 0) { > + IDXD_PMD_ERR("Failed to create dmadev %s", name); > + if (qid == 0) /* if no devices using this, free pci */ > + free(idxd.u.pci); > + return ret; > + } > + } > + > + return 0; > +} > + > +static int > +idxd_dmadev_destroy(const char *name) > +{ > + int ret; > + uint8_t err_code; > + struct rte_dma_dev *dmadev; > + struct idxd_dmadev *idxd; > + int dev_id = rte_dma_get_dev_id(name); > + > + if (!name) { > + IDXD_PMD_ERR("Invalid device name"); > + return -EINVAL; > + } > + > + if (dev_id < 0) { > + IDXD_PMD_ERR("Invalid device ID"); > + return -EINVAL; > + } > + > + dmadev = &rte_dma_devices[dev_id]; > + if (!dmadev) { > + IDXD_PMD_ERR("Invalid device name (%s)", name); > + return -EINVAL; > + } > + > + idxd = dmadev->dev_private; > + if (!idxd) { > + IDXD_PMD_ERR("Error getting dev_private"); > + return -EINVAL; > + } > + > + /* disable the device */ > + err_code = idxd_pci_dev_command(idxd, idxd_disable_dev); > + if (err_code) { > + IDXD_PMD_ERR("Error disabling device: code %#x", err_code); > + return err_code; > + } > + IDXD_PMD_DEBUG("IDXD Device disabled OK"); Recommended: move the above disable idxd device to rte_dma_close() ops. when create: mark state as READY when destroy: direct call rte_dma_pmd_release(name), the lib will call rte_dma_close(). > + > + /* free device memory */ > + IDXD_PMD_DEBUG("Freeing device driver memory"); > + dmadev->dev_private = NULL; The dmalib managed dev_private, so that driver could not do free again. > + rte_free(idxd->batch_idx_ring); > + rte_free(idxd->desc_ring); Please move above free ops to rte_dma_close() ops. > + > + /* rte_dma_close is called by pmd_release */ > + ret = rte_dma_pmd_release(name); > + if (ret) > + IDXD_PMD_DEBUG("Device cleanup failed"); > + > + return 0; > } > > static int > @@ -39,7 +311,7 @@ idxd_dmadev_remove_pci(struct rte_pci_device *dev) > IDXD_PMD_INFO("Closing %s on NUMA node %d", > name, dev->device.numa_node); > > - return 0; > + return idxd_dmadev_destroy(name); > } > > struct rte_pci_driver idxd_pmd_drv_pci = { >
On 22/09/2021 03:12, fengchengwen wrote: > On 2021/9/17 23:24, Kevin Laatz wrote: >> When a suitable device is found during the PCI probe, create a dmadev >> instance for each HW queue. HW definitions required are also included. >> >> Signed-off-by: Bruce Richardson <bruce.richardson@intel.com> >> Signed-off-by: Kevin Laatz <kevin.laatz@intel.com> >> Reviewed-by: Conor Walsh <conor.walsh@intel.com> >> >> --- >> v4: rebase changes >> --- >> drivers/dma/idxd/idxd_hw_defs.h | 71 ++++++++ >> drivers/dma/idxd/idxd_internal.h | 16 ++ >> drivers/dma/idxd/idxd_pci.c | 278 ++++++++++++++++++++++++++++++- >> 3 files changed, 362 insertions(+), 3 deletions(-) >> create mode 100644 drivers/dma/idxd/idxd_hw_defs.h >> [snip] >> + >> +static int >> +idxd_dmadev_destroy(const char *name) >> +{ >> + int ret; >> + uint8_t err_code; >> + struct rte_dma_dev *dmadev; >> + struct idxd_dmadev *idxd; >> + int dev_id = rte_dma_get_dev_id(name); >> + >> + if (!name) { >> + IDXD_PMD_ERR("Invalid device name"); >> + return -EINVAL; >> + } >> + >> + if (dev_id < 0) { >> + IDXD_PMD_ERR("Invalid device ID"); >> + return -EINVAL; >> + } >> + >> + dmadev = &rte_dma_devices[dev_id]; >> + if (!dmadev) { >> + IDXD_PMD_ERR("Invalid device name (%s)", name); >> + return -EINVAL; >> + } >> + >> + idxd = dmadev->dev_private; >> + if (!idxd) { >> + IDXD_PMD_ERR("Error getting dev_private"); >> + return -EINVAL; >> + } >> + >> + /* disable the device */ >> + err_code = idxd_pci_dev_command(idxd, idxd_disable_dev); >> + if (err_code) { >> + IDXD_PMD_ERR("Error disabling device: code %#x", err_code); >> + return err_code; >> + } >> + IDXD_PMD_DEBUG("IDXD Device disabled OK"); > Recommended: move the above disable idxd device to rte_dma_close() ops. > when create: mark state as READY > when destroy: direct call rte_dma_pmd_release(name), the lib will call rte_dma_close(). > >> + >> + /* free device memory */ >> + IDXD_PMD_DEBUG("Freeing device driver memory"); >> + dmadev->dev_private = NULL; > The dmalib managed dev_private, so that driver could not do free again. > >> + rte_free(idxd->batch_idx_ring); >> + rte_free(idxd->desc_ring); > Please move above free ops to rte_dma_close() ops. > Will fix, thanks
diff --git a/drivers/dma/idxd/idxd_hw_defs.h b/drivers/dma/idxd/idxd_hw_defs.h new file mode 100644 index 0000000000..ea627cba6d --- /dev/null +++ b/drivers/dma/idxd/idxd_hw_defs.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright 2021 Intel Corporation + */ + +#ifndef _IDXD_HW_DEFS_H_ +#define _IDXD_HW_DEFS_H_ + +/*** Definitions for Intel(R) Data Streaming Accelerator ***/ + +#define IDXD_CMD_SHIFT 20 +enum rte_idxd_cmds { + idxd_enable_dev = 1, + idxd_disable_dev, + idxd_drain_all, + idxd_abort_all, + idxd_reset_device, + idxd_enable_wq, + idxd_disable_wq, + idxd_drain_wq, + idxd_abort_wq, + idxd_reset_wq, +}; + +/* General bar0 registers */ +struct rte_idxd_bar0 { + uint32_t __rte_cache_aligned version; /* offset 0x00 */ + uint64_t __rte_aligned(0x10) gencap; /* offset 0x10 */ + uint64_t __rte_aligned(0x10) wqcap; /* offset 0x20 */ + uint64_t __rte_aligned(0x10) grpcap; /* offset 0x30 */ + uint64_t __rte_aligned(0x08) engcap; /* offset 0x38 */ + uint64_t __rte_aligned(0x10) opcap; /* offset 0x40 */ + uint64_t __rte_aligned(0x20) offsets[2]; /* offset 0x60 */ + uint32_t __rte_aligned(0x20) gencfg; /* offset 0x80 */ + uint32_t __rte_aligned(0x08) genctrl; /* offset 0x88 */ + uint32_t __rte_aligned(0x10) gensts; /* offset 0x90 */ + uint32_t __rte_aligned(0x08) intcause; /* offset 0x98 */ + uint32_t __rte_aligned(0x10) cmd; /* offset 0xA0 */ + uint32_t __rte_aligned(0x08) cmdstatus; /* offset 0xA8 */ + uint64_t __rte_aligned(0x20) swerror[4]; /* offset 0xC0 */ +}; + +/* workqueue config is provided by array of uint32_t. */ +enum rte_idxd_wqcfg { + wq_size_idx, /* size is in first 32-bit value */ + wq_threshold_idx, /* WQ threshold second 32-bits */ + wq_mode_idx, /* WQ mode and other flags */ + wq_sizes_idx, /* WQ transfer and batch sizes */ + wq_occ_int_idx, /* WQ occupancy interrupt handle */ + wq_occ_limit_idx, /* WQ occupancy limit */ + wq_state_idx, /* WQ state and occupancy state */ +}; + +#define WQ_MODE_SHARED 0 +#define WQ_MODE_DEDICATED 1 +#define WQ_PRIORITY_SHIFT 4 +#define WQ_BATCH_SZ_SHIFT 5 +#define WQ_STATE_SHIFT 30 +#define WQ_STATE_MASK 0x3 + +struct rte_idxd_grpcfg { + uint64_t grpwqcfg[4] __rte_cache_aligned; /* 64-byte register set */ + uint64_t grpengcfg; /* offset 32 */ + uint32_t grpflags; /* offset 40 */ +}; + +#define GENSTS_DEV_STATE_MASK 0x03 +#define CMDSTATUS_ACTIVE_SHIFT 31 +#define CMDSTATUS_ACTIVE_MASK (1 << 31) +#define CMDSTATUS_ERR_MASK 0xFF + +#endif diff --git a/drivers/dma/idxd/idxd_internal.h b/drivers/dma/idxd/idxd_internal.h index fa6f053f72..cb3a68c69b 100644 --- a/drivers/dma/idxd/idxd_internal.h +++ b/drivers/dma/idxd/idxd_internal.h @@ -5,6 +5,10 @@ #ifndef _IDXD_INTERNAL_H_ #define _IDXD_INTERNAL_H_ +#include <rte_spinlock.h> + +#include "idxd_hw_defs.h" + /** * @file idxd_internal.h * @@ -24,6 +28,16 @@ extern int idxd_pmd_logtype; #define IDXD_PMD_ERR(fmt, args...) IDXD_PMD_LOG(ERR, fmt, ## args) #define IDXD_PMD_WARN(fmt, args...) IDXD_PMD_LOG(WARNING, fmt, ## args) +struct idxd_pci_common { + rte_spinlock_t lk; + + uint8_t wq_cfg_sz; + volatile struct rte_idxd_bar0 *regs; + volatile uint32_t *wq_regs_base; + volatile struct rte_idxd_grpcfg *grp_regs; + volatile void *portals; +}; + struct idxd_dmadev { /* counters to track the batches */ unsigned short max_batches; @@ -58,6 +72,8 @@ struct idxd_dmadev { struct { unsigned int dsa_id; } bus; + + struct idxd_pci_common *pci; } u; }; diff --git a/drivers/dma/idxd/idxd_pci.c b/drivers/dma/idxd/idxd_pci.c index 79e4aadcab..171e5ffc07 100644 --- a/drivers/dma/idxd/idxd_pci.c +++ b/drivers/dma/idxd/idxd_pci.c @@ -3,6 +3,9 @@ */ #include <rte_bus_pci.h> +#include <rte_devargs.h> +#include <rte_dmadev_pmd.h> +#include <rte_malloc.h> #include "idxd_internal.h" @@ -16,17 +19,286 @@ const struct rte_pci_id pci_id_idxd_map[] = { { .vendor_id = 0, /* sentinel */ }, }; +static inline int +idxd_pci_dev_command(struct idxd_dmadev *idxd, enum rte_idxd_cmds command) +{ + uint8_t err_code; + uint16_t qid = idxd->qid; + int i = 0; + + if (command >= idxd_disable_wq && command <= idxd_reset_wq) + qid = (1 << qid); + rte_spinlock_lock(&idxd->u.pci->lk); + idxd->u.pci->regs->cmd = (command << IDXD_CMD_SHIFT) | qid; + + do { + rte_pause(); + err_code = idxd->u.pci->regs->cmdstatus; + if (++i >= 1000) { + IDXD_PMD_ERR("Timeout waiting for command response from HW"); + rte_spinlock_unlock(&idxd->u.pci->lk); + return err_code; + } + } while (idxd->u.pci->regs->cmdstatus & CMDSTATUS_ACTIVE_MASK); + rte_spinlock_unlock(&idxd->u.pci->lk); + + return err_code & CMDSTATUS_ERR_MASK; +} + +static uint32_t * +idxd_get_wq_cfg(struct idxd_pci_common *pci, uint8_t wq_idx) +{ + return RTE_PTR_ADD(pci->wq_regs_base, + (uintptr_t)wq_idx << (5 + pci->wq_cfg_sz)); +} + +static int +idxd_is_wq_enabled(struct idxd_dmadev *idxd) +{ + uint32_t state = idxd_get_wq_cfg(idxd->u.pci, idxd->qid)[wq_state_idx]; + return ((state >> WQ_STATE_SHIFT) & WQ_STATE_MASK) == 0x1; +} + +static const struct rte_dma_dev_ops idxd_pci_ops = { + +}; + +/* each portal uses 4 x 4k pages */ +#define IDXD_PORTAL_SIZE (4096 * 4) + +static int +init_pci_device(struct rte_pci_device *dev, struct idxd_dmadev *idxd, + unsigned int max_queues) +{ + struct idxd_pci_common *pci; + uint8_t nb_groups, nb_engines, nb_wqs; + uint16_t grp_offset, wq_offset; /* how far into bar0 the regs are */ + uint16_t wq_size, total_wq_size; + uint8_t lg2_max_batch, lg2_max_copy_size; + unsigned int i, err_code; + + pci = malloc(sizeof(*pci)); + if (pci == NULL) { + IDXD_PMD_ERR("%s: Can't allocate memory", __func__); + goto err; + } + rte_spinlock_init(&pci->lk); + + /* assign the bar registers, and then configure device */ + pci->regs = dev->mem_resource[0].addr; + grp_offset = (uint16_t)pci->regs->offsets[0]; + pci->grp_regs = RTE_PTR_ADD(pci->regs, grp_offset * 0x100); + wq_offset = (uint16_t)(pci->regs->offsets[0] >> 16); + pci->wq_regs_base = RTE_PTR_ADD(pci->regs, wq_offset * 0x100); + pci->portals = dev->mem_resource[2].addr; + pci->wq_cfg_sz = (pci->regs->wqcap >> 24) & 0x0F; + + /* sanity check device status */ + if (pci->regs->gensts & GENSTS_DEV_STATE_MASK) { + /* need function-level-reset (FLR) or is enabled */ + IDXD_PMD_ERR("Device status is not disabled, cannot init"); + goto err; + } + if (pci->regs->cmdstatus & CMDSTATUS_ACTIVE_MASK) { + /* command in progress */ + IDXD_PMD_ERR("Device has a command in progress, cannot init"); + goto err; + } + + /* read basic info about the hardware for use when configuring */ + nb_groups = (uint8_t)pci->regs->grpcap; + nb_engines = (uint8_t)pci->regs->engcap; + nb_wqs = (uint8_t)(pci->regs->wqcap >> 16); + total_wq_size = (uint16_t)pci->regs->wqcap; + lg2_max_copy_size = (uint8_t)(pci->regs->gencap >> 16) & 0x1F; + lg2_max_batch = (uint8_t)(pci->regs->gencap >> 21) & 0x0F; + + IDXD_PMD_DEBUG("nb_groups = %u, nb_engines = %u, nb_wqs = %u", + nb_groups, nb_engines, nb_wqs); + + /* zero out any old config */ + for (i = 0; i < nb_groups; i++) { + pci->grp_regs[i].grpengcfg = 0; + pci->grp_regs[i].grpwqcfg[0] = 0; + } + for (i = 0; i < nb_wqs; i++) + idxd_get_wq_cfg(pci, i)[0] = 0; + + /* limit queues if necessary */ + if (max_queues != 0 && nb_wqs > max_queues) { + nb_wqs = max_queues; + if (nb_engines > max_queues) + nb_engines = max_queues; + if (nb_groups > max_queues) + nb_engines = max_queues; + IDXD_PMD_DEBUG("Limiting queues to %u", nb_wqs); + } + + /* put each engine into a separate group to avoid reordering */ + if (nb_groups > nb_engines) + nb_groups = nb_engines; + if (nb_groups < nb_engines) + nb_engines = nb_groups; + + /* assign engines to groups, round-robin style */ + for (i = 0; i < nb_engines; i++) { + IDXD_PMD_DEBUG("Assigning engine %u to group %u", + i, i % nb_groups); + pci->grp_regs[i % nb_groups].grpengcfg |= (1ULL << i); + } + + /* now do the same for queues and give work slots to each queue */ + wq_size = total_wq_size / nb_wqs; + IDXD_PMD_DEBUG("Work queue size = %u, max batch = 2^%u, max copy = 2^%u", + wq_size, lg2_max_batch, lg2_max_copy_size); + for (i = 0; i < nb_wqs; i++) { + /* add engine "i" to a group */ + IDXD_PMD_DEBUG("Assigning work queue %u to group %u", + i, i % nb_groups); + pci->grp_regs[i % nb_groups].grpwqcfg[0] |= (1ULL << i); + /* now configure it, in terms of size, max batch, mode */ + idxd_get_wq_cfg(pci, i)[wq_size_idx] = wq_size; + idxd_get_wq_cfg(pci, i)[wq_mode_idx] = (1 << WQ_PRIORITY_SHIFT) | + WQ_MODE_DEDICATED; + idxd_get_wq_cfg(pci, i)[wq_sizes_idx] = lg2_max_copy_size | + (lg2_max_batch << WQ_BATCH_SZ_SHIFT); + } + + /* dump the group configuration to output */ + for (i = 0; i < nb_groups; i++) { + IDXD_PMD_DEBUG("## Group %d", i); + IDXD_PMD_DEBUG(" GRPWQCFG: %"PRIx64, pci->grp_regs[i].grpwqcfg[0]); + IDXD_PMD_DEBUG(" GRPENGCFG: %"PRIx64, pci->grp_regs[i].grpengcfg); + IDXD_PMD_DEBUG(" GRPFLAGS: %"PRIx32, pci->grp_regs[i].grpflags); + } + + idxd->u.pci = pci; + idxd->max_batches = wq_size; + + /* enable the device itself */ + err_code = idxd_pci_dev_command(idxd, idxd_enable_dev); + if (err_code) { + IDXD_PMD_ERR("Error enabling device: code %#x", err_code); + return err_code; + } + IDXD_PMD_DEBUG("IDXD Device enabled OK"); + + return nb_wqs; + +err: + free(pci); + return -1; +} + static int idxd_dmadev_probe_pci(struct rte_pci_driver *drv, struct rte_pci_device *dev) { - int ret = 0; + struct idxd_dmadev idxd = {0}; + uint8_t nb_wqs; + int qid, ret = 0; char name[PCI_PRI_STR_SIZE]; + unsigned int max_queues = 0; rte_pci_device_name(&dev->addr, name, sizeof(name)); IDXD_PMD_INFO("Init %s on NUMA node %d", name, dev->device.numa_node); dev->device.driver = &drv->driver; - return ret; + if (dev->device.devargs && dev->device.devargs->args[0] != '\0') { + /* if the number of devargs grows beyond just 1, use rte_kvargs */ + if (sscanf(dev->device.devargs->args, + "max_queues=%u", &max_queues) != 1) { + IDXD_PMD_ERR("Invalid device parameter: '%s'", + dev->device.devargs->args); + return -1; + } + } + + ret = init_pci_device(dev, &idxd, max_queues); + if (ret < 0) { + IDXD_PMD_ERR("Error initializing PCI hardware"); + return ret; + } + if (idxd.u.pci->portals == NULL) { + IDXD_PMD_ERR("Error, invalid portal assigned during initialization\n"); + return -EINVAL; + } + nb_wqs = (uint8_t)ret; + + /* set up one device for each queue */ + for (qid = 0; qid < nb_wqs; qid++) { + char qname[32]; + + /* add the queue number to each device name */ + snprintf(qname, sizeof(qname), "%s-q%d", name, qid); + idxd.qid = qid; + idxd.portal = RTE_PTR_ADD(idxd.u.pci->portals, + qid * IDXD_PORTAL_SIZE); + if (idxd_is_wq_enabled(&idxd)) + IDXD_PMD_ERR("Error, WQ %u seems enabled", qid); + ret = idxd_dmadev_create(qname, &dev->device, + &idxd, &idxd_pci_ops); + if (ret != 0) { + IDXD_PMD_ERR("Failed to create dmadev %s", name); + if (qid == 0) /* if no devices using this, free pci */ + free(idxd.u.pci); + return ret; + } + } + + return 0; +} + +static int +idxd_dmadev_destroy(const char *name) +{ + int ret; + uint8_t err_code; + struct rte_dma_dev *dmadev; + struct idxd_dmadev *idxd; + int dev_id = rte_dma_get_dev_id(name); + + if (!name) { + IDXD_PMD_ERR("Invalid device name"); + return -EINVAL; + } + + if (dev_id < 0) { + IDXD_PMD_ERR("Invalid device ID"); + return -EINVAL; + } + + dmadev = &rte_dma_devices[dev_id]; + if (!dmadev) { + IDXD_PMD_ERR("Invalid device name (%s)", name); + return -EINVAL; + } + + idxd = dmadev->dev_private; + if (!idxd) { + IDXD_PMD_ERR("Error getting dev_private"); + return -EINVAL; + } + + /* disable the device */ + err_code = idxd_pci_dev_command(idxd, idxd_disable_dev); + if (err_code) { + IDXD_PMD_ERR("Error disabling device: code %#x", err_code); + return err_code; + } + IDXD_PMD_DEBUG("IDXD Device disabled OK"); + + /* free device memory */ + IDXD_PMD_DEBUG("Freeing device driver memory"); + dmadev->dev_private = NULL; + rte_free(idxd->batch_idx_ring); + rte_free(idxd->desc_ring); + + /* rte_dma_close is called by pmd_release */ + ret = rte_dma_pmd_release(name); + if (ret) + IDXD_PMD_DEBUG("Device cleanup failed"); + + return 0; } static int @@ -39,7 +311,7 @@ idxd_dmadev_remove_pci(struct rte_pci_device *dev) IDXD_PMD_INFO("Closing %s on NUMA node %d", name, dev->device.numa_node); - return 0; + return idxd_dmadev_destroy(name); } struct rte_pci_driver idxd_pmd_drv_pci = {