[v4,07/14] baseband/acc: add queue configuration for ACC200
Checks
Commit Message
Adding function to create and configure queues for the
device.
Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
---
drivers/baseband/acc/rte_acc200_pmd.c | 373 +++++++++++++++++++++++++++++++++-
1 file changed, 372 insertions(+), 1 deletion(-)
Comments
On 9/22/22 02:27, Nic Chautru wrote:
> Adding function to create and configure queues for the
> device.
>
> Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
> ---
> drivers/baseband/acc/rte_acc200_pmd.c | 373 +++++++++++++++++++++++++++++++++-
> 1 file changed, 372 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/baseband/acc/rte_acc200_pmd.c b/drivers/baseband/acc/rte_acc200_pmd.c
> index 43415eb..355cf8e 100644
> --- a/drivers/baseband/acc/rte_acc200_pmd.c
> +++ b/drivers/baseband/acc/rte_acc200_pmd.c
> @@ -220,16 +220,383 @@
> acc_conf->q_fft.aq_depth_log2);
> }
>
> +/* Allocate 64MB memory used for all software rings */
> +static int
> +acc200_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
> +{
> + uint32_t phys_low, phys_high, value;
> + struct acc_device *d = dev->data->dev_private;
> + const struct acc200_registry_addr *reg_addr;
> +
> + if (d->pf_device && !d->acc_conf.pf_mode_en) {
> + rte_bbdev_log(NOTICE,
> + "%s has PF mode disabled. This PF can't be used.",
> + dev->data->name);
> + return -ENODEV;
> + }
> + if (!d->pf_device && d->acc_conf.pf_mode_en) {
> + rte_bbdev_log(NOTICE,
> + "%s has PF mode enabled. This VF can't be used.",
> + dev->data->name);
> + return -ENODEV;
> + }
> +
> + alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
> +
> + /* If minimal memory space approach failed, then allocate
> + * the 2 * 64MB block for the sw rings
> + */
> + if (d->sw_rings == NULL)
> + alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
> +
> + if (d->sw_rings == NULL) {
> + rte_bbdev_log(NOTICE,
> + "Failure allocating sw_rings memory");
> + return -ENODEV;
-ENOMEM;
> + }
> +
> + /* Configure ACC200 with the base address for DMA descriptor rings
> + * Same descriptor rings used for UL and DL DMA Engines
> + * Note : Assuming only VF0 bundle is used for PF mode
> + */
> + phys_high = (uint32_t)(d->sw_rings_iova >> 32);
> + phys_low = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
> +
> + /* Choose correct registry addresses for the device type */
> + if (d->pf_device)
> + reg_addr = &pf_reg_addr;
> + else
> + reg_addr = &vf_reg_addr;
> +
> + /* Read the populated cfg from ACC200 registers */
> + fetch_acc200_config(dev);
> +
> + /* Start Pmon */
> + for (value = 0; value <= 2; value++) {
> + acc_reg_write(d, reg_addr->pmon_ctrl_a, value);
> + acc_reg_write(d, reg_addr->pmon_ctrl_b, value);
> + acc_reg_write(d, reg_addr->pmon_ctrl_c, value);
> + }
> +
> + /* Release AXI from PF */
> + if (d->pf_device)
> + acc_reg_write(d, HWPfDmaAxiControl, 1);
> +
> + acc_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
> + acc_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
> + acc_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
> + acc_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
> + acc_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
> + acc_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
> + acc_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
> + acc_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
> + acc_reg_write(d, reg_addr->dma_ring_fft_hi, phys_high);
> + acc_reg_write(d, reg_addr->dma_ring_fft_lo, phys_low);
> + /*
> + * Configure Ring Size to the max queue ring size
> + * (used for wrapping purpose)
> + */
> + value = log2_basic(d->sw_ring_size / 64);
What is the 64 value meaning? The size of a descriptor?
If so, you should either use sizeof() or a defined value.
> + acc_reg_write(d, reg_addr->ring_size, value);
> +
> + /* Configure tail pointer for use when SDONE enabled */
> + if (d->tail_ptrs == NULL)
> + d->tail_ptrs = rte_zmalloc_socket(
> + dev->device->driver->name,
> + ACC200_NUM_QGRPS * ACC200_NUM_AQS * sizeof(uint32_t),
> + RTE_CACHE_LINE_SIZE, socket_id);
> + if (d->tail_ptrs == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
> + dev->device->driver->name,
> + dev->data->dev_id);
> + rte_free(d->sw_rings);
You need to se to NULL, or you'll have a use after free looking at how
it is allocated.
ret = -ENOMEM;
goto free_sw_rings;
> + return -ENOMEM;
> + }
> + d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs);
> +
> + phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
> + phys_low = (uint32_t)(d->tail_ptr_iova);
> + acc_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
> + acc_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
> + acc_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
> + acc_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
> + acc_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
> + acc_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
> + acc_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
> + acc_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
> + acc_reg_write(d, reg_addr->tail_ptrs_fft_hi, phys_high);
> + acc_reg_write(d, reg_addr->tail_ptrs_fft_lo, phys_low);
> +
> + if (d->harq_layout == NULL)
> + d->harq_layout = rte_zmalloc_socket("HARQ Layout",
> + ACC_HARQ_LAYOUT * sizeof(*d->harq_layout),
> + RTE_CACHE_LINE_SIZE, dev->data->socket_id);
> + if (d->harq_layout == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u",
> + dev->device->driver->name,
> + dev->data->dev_id);
> + rte_free(d->sw_rings);
Same comment as above, and you also miss to free tail_ptrs.
Maybe it would be better to have an error path, see below:
> + return -ENOMEM;
> + }
> +
> + /* Mark as configured properly */
> + d->configured = true;
> +
> + rte_bbdev_log_debug(
> + "ACC200 (%s) configured sw_rings = %p, sw_rings_iova = %#"
> + PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova);
> +
> + return 0;
free_tail_ptrs:
rte_free(d->tail_ptrs);
d->tail_ptrs = NULL;
free_sw_rings:
rte_free(d->sw_rings);
d->sw_rings = NULL;
return ret;
> +}
> +
> /* Free memory used for software rings */
> static int
> acc200_dev_close(struct rte_bbdev *dev)
> {
> - RTE_SET_USED(dev);
> + struct acc_device *d = dev->data->dev_private;
> + if (d->sw_rings_base != NULL) {
Isn't d->sw_rings that is allocated in this patch?
Also, the NULL check is not necessary, rte_free() takes care of it.
> + rte_free(d->tail_ptrs);
> + rte_free(d->sw_rings_base);
> + rte_free(d->harq_layout);
> + d->sw_rings_base = NULL;
> + d->tail_ptrs = NULL;
> + d->harq_layout = NULL;
> + }
> /* Ensure all in flight HW transactions are completed */
> usleep(ACC_LONG_WAIT);
> return 0;
> }
>
> +/**
> + * Report a ACC200 queue index which is free
> + * Return 0 to 16k for a valid queue_idx or -1 when no queue is available
> + * Note : Only supporting VF0 Bundle for PF mode
> + */
> +static int
> +acc200_find_free_queue_idx(struct rte_bbdev *dev,
> + const struct rte_bbdev_queue_conf *conf)
> +{
> + struct acc_device *d = dev->data->dev_private;
> + int op_2_acc[6] = {0, UL_4G, DL_4G, UL_5G, DL_5G, FFT};
> + int acc = op_2_acc[conf->op_type];
> + struct rte_acc_queue_topology *qtop = NULL;
> +
> + qtopFromAcc(&qtop, acc, &(d->acc_conf));
> + if (qtop == NULL)
> + return -1;
> + /* Identify matching QGroup Index which are sorted in priority order */
> + uint16_t group_idx = qtop->first_qgroup_index;
Don't mix declarations & code.
> + group_idx += conf->priority;
> + if (group_idx >= ACC200_NUM_QGRPS ||
> + conf->priority >= qtop->num_qgroups) {
> + rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
> + dev->data->name, conf->priority);
> + return -1;
> + }
> + /* Find a free AQ_idx */
> + uint64_t aq_idx;
Don't mix declarations & code.
> + for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
> + if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) {
> + /* Mark the Queue as assigned */
> + d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
> + /* Report the AQ Index */
> + return (group_idx << ACC200_GRP_ID_SHIFT) + aq_idx;
> + }
> + }
> + rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
> + dev->data->name, conf->priority);
> + return -1;
> +}
> +
> +/* Setup ACC200 queue */
> +static int
> +acc200_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
> + const struct rte_bbdev_queue_conf *conf)
> +{
> + struct acc_device *d = dev->data->dev_private;
> + struct acc_queue *q;
> + int16_t q_idx;
> +
> + if (d == NULL) {
> + rte_bbdev_log(ERR, "Undefined device");
> + return -ENODEV;
> + }
> + /* Allocate the queue data structure. */
> + q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
> + RTE_CACHE_LINE_SIZE, conf->socket);
> + if (q == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate queue memory");
> + return -ENOMEM;
> + }
> +
> + q->d = d;
> + q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
> + q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
> +
> + /* Prepare the Ring with default descriptor format */
> + union acc_dma_desc *desc = NULL;
> + unsigned int desc_idx, b_idx;
> + int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
> + ACC_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ?
> + ACC_FCW_TD_BLEN : (conf->op_type == RTE_BBDEV_OP_LDPC_DEC ?
> + ACC_FCW_LD_BLEN : ACC_FCW_FFT_BLEN)));
> +
> + for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
> + desc = q->ring_addr + desc_idx;
> + desc->req.word0 = ACC_DMA_DESC_TYPE;
> + desc->req.word1 = 0; /**< Timestamp */
> + desc->req.word2 = 0;
> + desc->req.word3 = 0;
> + uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
> + desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
> + desc->req.data_ptrs[0].blen = fcw_len;
> + desc->req.data_ptrs[0].blkid = ACC_DMA_BLKID_FCW;
> + desc->req.data_ptrs[0].last = 0;
> + desc->req.data_ptrs[0].dma_ext = 0;
> + for (b_idx = 1; b_idx < ACC_DMA_MAX_NUM_POINTERS - 1;
> + b_idx++) {
> + desc->req.data_ptrs[b_idx].blkid = ACC_DMA_BLKID_IN;
> + desc->req.data_ptrs[b_idx].last = 1;
> + desc->req.data_ptrs[b_idx].dma_ext = 0;
> + b_idx++;
> + desc->req.data_ptrs[b_idx].blkid =
> + ACC_DMA_BLKID_OUT_ENC;
> + desc->req.data_ptrs[b_idx].last = 1;
> + desc->req.data_ptrs[b_idx].dma_ext = 0;
> + }
> + /* Preset some fields of LDPC FCW */
> + desc->req.fcw_ld.FCWversion = ACC_FCW_VER;
> + desc->req.fcw_ld.gain_i = 1;
> + desc->req.fcw_ld.gain_h = 1;
> + }
> +
> + q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
> + RTE_CACHE_LINE_SIZE,
> + RTE_CACHE_LINE_SIZE, conf->socket);
> + if (q->lb_in == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
> + rte_free(q);
Please introduce a proper error path as I gave example for
acc200_setup_queues().
> + return -ENOMEM;
> + }
> + q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in);
> + q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
> + RTE_CACHE_LINE_SIZE,
> + RTE_CACHE_LINE_SIZE, conf->socket);
> + if (q->lb_out == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
> + rte_free(q->lb_in);
> + rte_free(q);
> + return -ENOMEM;
> + }
> + q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
> + q->companion_ring_addr = rte_zmalloc_socket(dev->device->driver->name,
> + d->sw_ring_max_depth * sizeof(*q->companion_ring_addr),
> + RTE_CACHE_LINE_SIZE, conf->socket);
> + if (q->companion_ring_addr == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate companion_ring memory");
> + rte_free(q->lb_in);
> + rte_free(q->lb_out);
> + rte_free(q);
> + return -ENOMEM;
> + }
> +
> + /*
> + * Software queue ring wraps synchronously with the HW when it reaches
> + * the boundary of the maximum allocated queue size, no matter what the
> + * sw queue size is. This wrapping is guarded by setting the wrap_mask
> + * to represent the maximum queue size as allocated at the time when
> + * the device has been setup (in configure()).
> + *
> + * The queue depth is set to the queue size value (conf->queue_size).
> + * This limits the occupancy of the queue at any point of time, so that
> + * the queue does not get swamped with enqueue requests.
> + */
> + q->sw_ring_depth = conf->queue_size;
> + q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
> +
> + q->op_type = conf->op_type;
> +
> + q_idx = acc200_find_free_queue_idx(dev, conf);
> + if (q_idx == -1) {
> + rte_free(q->companion_ring_addr);
> + rte_free(q->lb_in);
> + rte_free(q->lb_out);
> + rte_free(q);
> + return -1;
> + }
> +
> + q->qgrp_id = (q_idx >> ACC200_GRP_ID_SHIFT) & 0xF;
> + q->vf_id = (q_idx >> ACC200_VF_ID_SHIFT) & 0x3F;
> + q->aq_id = q_idx & 0xF;
> + q->aq_depth = 0;
> + if (conf->op_type == RTE_BBDEV_OP_TURBO_DEC)
> + q->aq_depth = (1 << d->acc_conf.q_ul_4g.aq_depth_log2);
> + else if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC)
> + q->aq_depth = (1 << d->acc_conf.q_dl_4g.aq_depth_log2);
> + else if (conf->op_type == RTE_BBDEV_OP_LDPC_DEC)
> + q->aq_depth = (1 << d->acc_conf.q_ul_5g.aq_depth_log2);
> + else if (conf->op_type == RTE_BBDEV_OP_LDPC_ENC)
> + q->aq_depth = (1 << d->acc_conf.q_dl_5g.aq_depth_log2);
> + else if (conf->op_type == RTE_BBDEV_OP_FFT)
> + q->aq_depth = (1 << d->acc_conf.q_fft.aq_depth_log2);
> +
> + q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
> + queue_offset(d->pf_device,
> + q->vf_id, q->qgrp_id, q->aq_id));
> +
> + rte_bbdev_log_debug(
> + "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p base %p\n",
> + dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
> + q->aq_id, q->aq_depth, q->mmio_reg_enqueue,
> + d->mmio_base);
> +
> + dev->data->queues[queue_id].queue_private = q;
> + return 0;
> +}
> +
> +
> +static int
> +acc_queue_stop(struct rte_bbdev *dev, uint16_t queue_id)
> +{
> + struct acc_queue *q;
> + q = dev->data->queues[queue_id].queue_private;
> + rte_bbdev_log(INFO, "Queue Stop %d H/T/D %d %d %x OpType %d",
> + queue_id, q->sw_ring_head, q->sw_ring_tail,
> + q->sw_ring_depth, q->op_type);
> + /* ignore all operations in flight and clear counters */
> + q->sw_ring_tail = q->sw_ring_head;
> + q->aq_enqueued = 0;
> + q->aq_dequeued = 0;
> + dev->data->queues[queue_id].queue_stats.enqueued_count = 0;
> + dev->data->queues[queue_id].queue_stats.dequeued_count = 0;
> + dev->data->queues[queue_id].queue_stats.enqueue_err_count = 0;
> + dev->data->queues[queue_id].queue_stats.dequeue_err_count = 0;
> + dev->data->queues[queue_id].queue_stats.enqueue_warn_count = 0;
> + dev->data->queues[queue_id].queue_stats.dequeue_warn_count = 0;
> + return 0;
> +}
> +
> +/* Release ACC200 queue */
> +static int
> +acc200_queue_release(struct rte_bbdev *dev, uint16_t q_id)
> +{
> + struct acc_device *d = dev->data->dev_private;
> + struct acc_queue *q = dev->data->queues[q_id].queue_private;
> +
> + if (q != NULL) {
> + /* Mark the Queue as un-assigned */
> + d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF -
s/0xFFFFFFFFFFFFFFFF/~0ULL/
> + (uint64_t) (1 << q->aq_id));
Or better:
d->q_assigned_bit_map[q->qgrp_id] &= ~(1 << q->aq_id);
> + rte_free(q->companion_ring_addr);
> + rte_free(q->lb_in);
> + rte_free(q->lb_out);
> + rte_free(q);
> + dev->data->queues[q_id].queue_private = NULL;
> + }
> +
> + return 0;
> +}
> +
> /* Get ACC200 device info */
> static void
> acc200_dev_info_get(struct rte_bbdev *dev,
> @@ -279,8 +646,12 @@
> }
>
> static const struct rte_bbdev_ops acc200_bbdev_ops = {
> + .setup_queues = acc200_setup_queues,
> .close = acc200_dev_close,
> .info_get = acc200_dev_info_get,
> + .queue_setup = acc200_queue_setup,
> + .queue_release = acc200_queue_release,
> + .queue_stop = acc_queue_stop,
> };
>
> /* ACC200 PCI PF address map */
Hi Maxime,
Will update this in the new version. Thanks.
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, September 22, 2022 7:30 AM
> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org;
> thomas@monjalon.net
> Cc: trix@redhat.com; mdr@ashroe.eu; Richardson, Bruce
> <bruce.richardson@intel.com>; hemant.agrawal@nxp.com;
> david.marchand@redhat.com; stephen@networkplumber.org; Vargas,
> Hernan <hernan.vargas@intel.com>
> Subject: Re: [PATCH v4 07/14] baseband/acc: add queue configuration for
> ACC200
>
>
>
> On 9/22/22 02:27, Nic Chautru wrote:
> > Adding function to create and configure queues for the device.
> >
> > Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
> > ---
> > drivers/baseband/acc/rte_acc200_pmd.c | 373
> +++++++++++++++++++++++++++++++++-
> > 1 file changed, 372 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/baseband/acc/rte_acc200_pmd.c
> > b/drivers/baseband/acc/rte_acc200_pmd.c
> > index 43415eb..355cf8e 100644
> > --- a/drivers/baseband/acc/rte_acc200_pmd.c
> > +++ b/drivers/baseband/acc/rte_acc200_pmd.c
> > @@ -220,16 +220,383 @@
> > acc_conf->q_fft.aq_depth_log2);
> > }
> >
> > +/* Allocate 64MB memory used for all software rings */ static int
> > +acc200_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int
> > +socket_id) {
> > + uint32_t phys_low, phys_high, value;
> > + struct acc_device *d = dev->data->dev_private;
> > + const struct acc200_registry_addr *reg_addr;
> > +
> > + if (d->pf_device && !d->acc_conf.pf_mode_en) {
> > + rte_bbdev_log(NOTICE,
> > + "%s has PF mode disabled. This PF can't be
> used.",
> > + dev->data->name);
> > + return -ENODEV;
> > + }
> > + if (!d->pf_device && d->acc_conf.pf_mode_en) {
> > + rte_bbdev_log(NOTICE,
> > + "%s has PF mode enabled. This VF can't be
> used.",
> > + dev->data->name);
> > + return -ENODEV;
> > + }
> > +
> > + alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
> > +
> > + /* If minimal memory space approach failed, then allocate
> > + * the 2 * 64MB block for the sw rings
> > + */
> > + if (d->sw_rings == NULL)
> > + alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
> > +
> > + if (d->sw_rings == NULL) {
> > + rte_bbdev_log(NOTICE,
> > + "Failure allocating sw_rings memory");
> > + return -ENODEV;
>
> -ENOMEM;
OK
>
> > + }
> > +
> > + /* Configure ACC200 with the base address for DMA descriptor rings
> > + * Same descriptor rings used for UL and DL DMA Engines
> > + * Note : Assuming only VF0 bundle is used for PF mode
> > + */
> > + phys_high = (uint32_t)(d->sw_rings_iova >> 32);
> > + phys_low = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-
> 1));
> > +
> > + /* Choose correct registry addresses for the device type */
> > + if (d->pf_device)
> > + reg_addr = &pf_reg_addr;
> > + else
> > + reg_addr = &vf_reg_addr;
> > +
> > + /* Read the populated cfg from ACC200 registers */
> > + fetch_acc200_config(dev);
> > +
> > + /* Start Pmon */
> > + for (value = 0; value <= 2; value++) {
> > + acc_reg_write(d, reg_addr->pmon_ctrl_a, value);
> > + acc_reg_write(d, reg_addr->pmon_ctrl_b, value);
> > + acc_reg_write(d, reg_addr->pmon_ctrl_c, value);
> > + }
> > +
> > + /* Release AXI from PF */
> > + if (d->pf_device)
> > + acc_reg_write(d, HWPfDmaAxiControl, 1);
> > +
> > + acc_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->dma_ring_fft_hi, phys_high);
> > + acc_reg_write(d, reg_addr->dma_ring_fft_lo, phys_low);
> > + /*
> > + * Configure Ring Size to the max queue ring size
> > + * (used for wrapping purpose)
> > + */
> > + value = log2_basic(d->sw_ring_size / 64);
>
> What is the 64 value meaning? The size of a descriptor?
> If so, you should either use sizeof() or a defined value.
OK, adding defined value for the magic number.
>
> > + acc_reg_write(d, reg_addr->ring_size, value);
> > +
> > + /* Configure tail pointer for use when SDONE enabled */
> > + if (d->tail_ptrs == NULL)
> > + d->tail_ptrs = rte_zmalloc_socket(
> > + dev->device->driver->name,
> > + ACC200_NUM_QGRPS * ACC200_NUM_AQS
> * sizeof(uint32_t),
> > + RTE_CACHE_LINE_SIZE, socket_id);
> > + if (d->tail_ptrs == NULL) {
> > + rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
> > + dev->device->driver->name,
> > + dev->data->dev_id);
> > + rte_free(d->sw_rings);
> You need to se to NULL, or you'll have a use after free looking at how it is
> allocated.
>
> ret = -ENOMEM;
> goto free_sw_rings;
OK for the overall error path change. Thanks
> > + return -ENOMEM;
> > + }
> > + d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs);
> > +
> > + phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
> > + phys_low = (uint32_t)(d->tail_ptr_iova);
> > + acc_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
> > + acc_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
> > + acc_reg_write(d, reg_addr->tail_ptrs_fft_hi, phys_high);
> > + acc_reg_write(d, reg_addr->tail_ptrs_fft_lo, phys_low);
> > +
> > + if (d->harq_layout == NULL)
> > + d->harq_layout = rte_zmalloc_socket("HARQ Layout",
> > + ACC_HARQ_LAYOUT * sizeof(*d-
> >harq_layout),
> > + RTE_CACHE_LINE_SIZE, dev->data-
> >socket_id);
> > + if (d->harq_layout == NULL) {
> > + rte_bbdev_log(ERR, "Failed to allocate harq_layout for
> %s:%u",
> > + dev->device->driver->name,
> > + dev->data->dev_id);
> > + rte_free(d->sw_rings);
> Same comment as above, and you also miss to free tail_ptrs.
> Maybe it would be better to have an error path, see below:
>
> > + return -ENOMEM;
> > + }
> > +
> > + /* Mark as configured properly */
> > + d->configured = true;
> > +
> > + rte_bbdev_log_debug(
> > + "ACC200 (%s) configured sw_rings = %p,
> sw_rings_iova = %#"
> > + PRIx64, dev->data->name, d->sw_rings, d-
> >sw_rings_iova);
> > +
> > + return 0;
>
> free_tail_ptrs:
> rte_free(d->tail_ptrs);
> d->tail_ptrs = NULL;
> free_sw_rings:
> rte_free(d->sw_rings);
> d->sw_rings = NULL;
>
> return ret;
> > +}
> > +
> > /* Free memory used for software rings */
> > static int
> > acc200_dev_close(struct rte_bbdev *dev)
> > {
> > - RTE_SET_USED(dev);
> > + struct acc_device *d = dev->data->dev_private;
> > + if (d->sw_rings_base != NULL) {
>
> Isn't d->sw_rings that is allocated in this patch?
No this is sw_rings_base on purpose. The sw_rings pointers may have been realigned, while the sw_rings_base keeps track of the original malloc.
>
> Also, the NULL check is not necessary, rte_free() takes care of it.
>
> > + rte_free(d->tail_ptrs);
> > + rte_free(d->sw_rings_base);
> > + rte_free(d->harq_layout);
> > + d->sw_rings_base = NULL;
> > + d->tail_ptrs = NULL;
> > + d->harq_layout = NULL;
> > + }
> > /* Ensure all in flight HW transactions are completed */
> > usleep(ACC_LONG_WAIT);
> > return 0;
> > }
> >
> > +/**
> > + * Report a ACC200 queue index which is free
> > + * Return 0 to 16k for a valid queue_idx or -1 when no queue is
> > +available
> > + * Note : Only supporting VF0 Bundle for PF mode */ static int
> > +acc200_find_free_queue_idx(struct rte_bbdev *dev,
> > + const struct rte_bbdev_queue_conf *conf) {
> > + struct acc_device *d = dev->data->dev_private;
> > + int op_2_acc[6] = {0, UL_4G, DL_4G, UL_5G, DL_5G, FFT};
> > + int acc = op_2_acc[conf->op_type];
> > + struct rte_acc_queue_topology *qtop = NULL;
> > +
> > + qtopFromAcc(&qtop, acc, &(d->acc_conf));
> > + if (qtop == NULL)
> > + return -1;
> > + /* Identify matching QGroup Index which are sorted in priority order
> */
> > + uint16_t group_idx = qtop->first_qgroup_index;
>
> Don't mix declarations & code.
OK
>
> > + group_idx += conf->priority;
> > + if (group_idx >= ACC200_NUM_QGRPS ||
> > + conf->priority >= qtop->num_qgroups) {
> > + rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
> > + dev->data->name, conf->priority);
> > + return -1;
> > + }
> > + /* Find a free AQ_idx */
> > + uint64_t aq_idx;
>
> Don't mix declarations & code.
OK
>
> > + for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
> > + if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) ==
> 0) {
> > + /* Mark the Queue as assigned */
> > + d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
> > + /* Report the AQ Index */
> > + return (group_idx << ACC200_GRP_ID_SHIFT) +
> aq_idx;
> > + }
> > + }
> > + rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
> > + dev->data->name, conf->priority);
> > + return -1;
> > +}
> > +
> > +/* Setup ACC200 queue */
> > +static int
> > +acc200_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
> > + const struct rte_bbdev_queue_conf *conf) {
> > + struct acc_device *d = dev->data->dev_private;
> > + struct acc_queue *q;
> > + int16_t q_idx;
> > +
> > + if (d == NULL) {
> > + rte_bbdev_log(ERR, "Undefined device");
> > + return -ENODEV;
> > + }
> > + /* Allocate the queue data structure. */
> > + q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
> > + RTE_CACHE_LINE_SIZE, conf->socket);
> > + if (q == NULL) {
> > + rte_bbdev_log(ERR, "Failed to allocate queue memory");
> > + return -ENOMEM;
> > + }
> > +
> > + q->d = d;
> > + q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size *
> queue_id));
> > + q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size *
> queue_id);
> > +
> > + /* Prepare the Ring with default descriptor format */
> > + union acc_dma_desc *desc = NULL;
> > + unsigned int desc_idx, b_idx;
> > + int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
> > + ACC_FCW_LE_BLEN : (conf->op_type ==
> RTE_BBDEV_OP_TURBO_DEC ?
> > + ACC_FCW_TD_BLEN : (conf->op_type ==
> RTE_BBDEV_OP_LDPC_DEC ?
> > + ACC_FCW_LD_BLEN : ACC_FCW_FFT_BLEN)));
> > +
> > + for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
> > + desc = q->ring_addr + desc_idx;
> > + desc->req.word0 = ACC_DMA_DESC_TYPE;
> > + desc->req.word1 = 0; /**< Timestamp */
> > + desc->req.word2 = 0;
> > + desc->req.word3 = 0;
> > + uint64_t fcw_offset = (desc_idx << 8) +
> ACC_DESC_FCW_OFFSET;
> > + desc->req.data_ptrs[0].address = q->ring_addr_iova +
> fcw_offset;
> > + desc->req.data_ptrs[0].blen = fcw_len;
> > + desc->req.data_ptrs[0].blkid = ACC_DMA_BLKID_FCW;
> > + desc->req.data_ptrs[0].last = 0;
> > + desc->req.data_ptrs[0].dma_ext = 0;
> > + for (b_idx = 1; b_idx < ACC_DMA_MAX_NUM_POINTERS - 1;
> > + b_idx++) {
> > + desc->req.data_ptrs[b_idx].blkid =
> ACC_DMA_BLKID_IN;
> > + desc->req.data_ptrs[b_idx].last = 1;
> > + desc->req.data_ptrs[b_idx].dma_ext = 0;
> > + b_idx++;
> > + desc->req.data_ptrs[b_idx].blkid =
> > + ACC_DMA_BLKID_OUT_ENC;
> > + desc->req.data_ptrs[b_idx].last = 1;
> > + desc->req.data_ptrs[b_idx].dma_ext = 0;
> > + }
> > + /* Preset some fields of LDPC FCW */
> > + desc->req.fcw_ld.FCWversion = ACC_FCW_VER;
> > + desc->req.fcw_ld.gain_i = 1;
> > + desc->req.fcw_ld.gain_h = 1;
> > + }
> > +
> > + q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
> > + RTE_CACHE_LINE_SIZE,
> > + RTE_CACHE_LINE_SIZE, conf->socket);
> > + if (q->lb_in == NULL) {
> > + rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
> > + rte_free(q);
>
> Please introduce a proper error path as I gave example for
> acc200_setup_queues().
Fair enough, thanks.
>
> > + return -ENOMEM;
> > + }
> > + q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in);
> > + q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
> > + RTE_CACHE_LINE_SIZE,
> > + RTE_CACHE_LINE_SIZE, conf->socket);
> > + if (q->lb_out == NULL) {
> > + rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
> > + rte_free(q->lb_in);
> > + rte_free(q);
> > + return -ENOMEM;
> > + }
> > + q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
> > + q->companion_ring_addr = rte_zmalloc_socket(dev->device->driver-
> >name,
> > + d->sw_ring_max_depth * sizeof(*q-
> >companion_ring_addr),
> > + RTE_CACHE_LINE_SIZE, conf->socket);
> > + if (q->companion_ring_addr == NULL) {
> > + rte_bbdev_log(ERR, "Failed to allocate companion_ring
> memory");
> > + rte_free(q->lb_in);
> > + rte_free(q->lb_out);
> > + rte_free(q);
> > + return -ENOMEM;
> > + }
> > +
> > + /*
> > + * Software queue ring wraps synchronously with the HW when it
> reaches
> > + * the boundary of the maximum allocated queue size, no matter
> what the
> > + * sw queue size is. This wrapping is guarded by setting the
> wrap_mask
> > + * to represent the maximum queue size as allocated at the time
> when
> > + * the device has been setup (in configure()).
> > + *
> > + * The queue depth is set to the queue size value (conf->queue_size).
> > + * This limits the occupancy of the queue at any point of time, so that
> > + * the queue does not get swamped with enqueue requests.
> > + */
> > + q->sw_ring_depth = conf->queue_size;
> > + q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
> > +
> > + q->op_type = conf->op_type;
> > +
> > + q_idx = acc200_find_free_queue_idx(dev, conf);
> > + if (q_idx == -1) {
> > + rte_free(q->companion_ring_addr);
> > + rte_free(q->lb_in);
> > + rte_free(q->lb_out);
> > + rte_free(q);
> > + return -1;
> > + }
> > +
> > + q->qgrp_id = (q_idx >> ACC200_GRP_ID_SHIFT) & 0xF;
> > + q->vf_id = (q_idx >> ACC200_VF_ID_SHIFT) & 0x3F;
> > + q->aq_id = q_idx & 0xF;
> > + q->aq_depth = 0;
> > + if (conf->op_type == RTE_BBDEV_OP_TURBO_DEC)
> > + q->aq_depth = (1 << d->acc_conf.q_ul_4g.aq_depth_log2);
> > + else if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC)
> > + q->aq_depth = (1 << d->acc_conf.q_dl_4g.aq_depth_log2);
> > + else if (conf->op_type == RTE_BBDEV_OP_LDPC_DEC)
> > + q->aq_depth = (1 << d->acc_conf.q_ul_5g.aq_depth_log2);
> > + else if (conf->op_type == RTE_BBDEV_OP_LDPC_ENC)
> > + q->aq_depth = (1 << d->acc_conf.q_dl_5g.aq_depth_log2);
> > + else if (conf->op_type == RTE_BBDEV_OP_FFT)
> > + q->aq_depth = (1 << d->acc_conf.q_fft.aq_depth_log2);
> > +
> > + q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
> > + queue_offset(d->pf_device,
> > + q->vf_id, q->qgrp_id, q->aq_id));
> > +
> > + rte_bbdev_log_debug(
> > + "Setup dev%u q%u: qgrp_id=%u, vf_id=%u,
> aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p base %p\n",
> > + dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
> > + q->aq_id, q->aq_depth, q->mmio_reg_enqueue,
> > + d->mmio_base);
> > +
> > + dev->data->queues[queue_id].queue_private = q;
> > + return 0;
> > +}
> > +
> > +
> > +static int
> > +acc_queue_stop(struct rte_bbdev *dev, uint16_t queue_id) {
> > + struct acc_queue *q;
> > + q = dev->data->queues[queue_id].queue_private;
> > + rte_bbdev_log(INFO, "Queue Stop %d H/T/D %d %d %x OpType %d",
> > + queue_id, q->sw_ring_head, q->sw_ring_tail,
> > + q->sw_ring_depth, q->op_type);
> > + /* ignore all operations in flight and clear counters */
> > + q->sw_ring_tail = q->sw_ring_head;
> > + q->aq_enqueued = 0;
> > + q->aq_dequeued = 0;
> > + dev->data->queues[queue_id].queue_stats.enqueued_count = 0;
> > + dev->data->queues[queue_id].queue_stats.dequeued_count = 0;
> > + dev->data->queues[queue_id].queue_stats.enqueue_err_count = 0;
> > + dev->data->queues[queue_id].queue_stats.dequeue_err_count = 0;
> > + dev->data->queues[queue_id].queue_stats.enqueue_warn_count =
> 0;
> > + dev->data->queues[queue_id].queue_stats.dequeue_warn_count =
> 0;
> > + return 0;
> > +}
> > +
> > +/* Release ACC200 queue */
> > +static int
> > +acc200_queue_release(struct rte_bbdev *dev, uint16_t q_id) {
> > + struct acc_device *d = dev->data->dev_private;
> > + struct acc_queue *q = dev->data->queues[q_id].queue_private;
> > +
> > + if (q != NULL) {
> > + /* Mark the Queue as un-assigned */
> > + d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF
> -
>
> s/0xFFFFFFFFFFFFFFFF/~0ULL/
>
OK
> > + (uint64_t) (1 << q->aq_id));
>
> Or better:
>
> d->q_assigned_bit_map[q->qgrp_id] &= ~(1 << q->aq_id);
>
> > + rte_free(q->companion_ring_addr);
> > + rte_free(q->lb_in);
> > + rte_free(q->lb_out);
> > + rte_free(q);
> > + dev->data->queues[q_id].queue_private = NULL;
> > + }
> > +
> > + return 0;
> > +}
> > +
> > /* Get ACC200 device info */
> > static void
> > acc200_dev_info_get(struct rte_bbdev *dev, @@ -279,8 +646,12 @@
> > }
> >
> > static const struct rte_bbdev_ops acc200_bbdev_ops = {
> > + .setup_queues = acc200_setup_queues,
> > .close = acc200_dev_close,
> > .info_get = acc200_dev_info_get,
> > + .queue_setup = acc200_queue_setup,
> > + .queue_release = acc200_queue_release,
> > + .queue_stop = acc_queue_stop,
> > };
> >
> > /* ACC200 PCI PF address map */
@@ -220,16 +220,383 @@
acc_conf->q_fft.aq_depth_log2);
}
+/* Allocate 64MB memory used for all software rings */
+static int
+acc200_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id)
+{
+ uint32_t phys_low, phys_high, value;
+ struct acc_device *d = dev->data->dev_private;
+ const struct acc200_registry_addr *reg_addr;
+
+ if (d->pf_device && !d->acc_conf.pf_mode_en) {
+ rte_bbdev_log(NOTICE,
+ "%s has PF mode disabled. This PF can't be used.",
+ dev->data->name);
+ return -ENODEV;
+ }
+ if (!d->pf_device && d->acc_conf.pf_mode_en) {
+ rte_bbdev_log(NOTICE,
+ "%s has PF mode enabled. This VF can't be used.",
+ dev->data->name);
+ return -ENODEV;
+ }
+
+ alloc_sw_rings_min_mem(dev, d, num_queues, socket_id);
+
+ /* If minimal memory space approach failed, then allocate
+ * the 2 * 64MB block for the sw rings
+ */
+ if (d->sw_rings == NULL)
+ alloc_2x64mb_sw_rings_mem(dev, d, socket_id);
+
+ if (d->sw_rings == NULL) {
+ rte_bbdev_log(NOTICE,
+ "Failure allocating sw_rings memory");
+ return -ENODEV;
+ }
+
+ /* Configure ACC200 with the base address for DMA descriptor rings
+ * Same descriptor rings used for UL and DL DMA Engines
+ * Note : Assuming only VF0 bundle is used for PF mode
+ */
+ phys_high = (uint32_t)(d->sw_rings_iova >> 32);
+ phys_low = (uint32_t)(d->sw_rings_iova & ~(ACC_SIZE_64MBYTE-1));
+
+ /* Choose correct registry addresses for the device type */
+ if (d->pf_device)
+ reg_addr = &pf_reg_addr;
+ else
+ reg_addr = &vf_reg_addr;
+
+ /* Read the populated cfg from ACC200 registers */
+ fetch_acc200_config(dev);
+
+ /* Start Pmon */
+ for (value = 0; value <= 2; value++) {
+ acc_reg_write(d, reg_addr->pmon_ctrl_a, value);
+ acc_reg_write(d, reg_addr->pmon_ctrl_b, value);
+ acc_reg_write(d, reg_addr->pmon_ctrl_c, value);
+ }
+
+ /* Release AXI from PF */
+ if (d->pf_device)
+ acc_reg_write(d, HWPfDmaAxiControl, 1);
+
+ acc_reg_write(d, reg_addr->dma_ring_ul5g_hi, phys_high);
+ acc_reg_write(d, reg_addr->dma_ring_ul5g_lo, phys_low);
+ acc_reg_write(d, reg_addr->dma_ring_dl5g_hi, phys_high);
+ acc_reg_write(d, reg_addr->dma_ring_dl5g_lo, phys_low);
+ acc_reg_write(d, reg_addr->dma_ring_ul4g_hi, phys_high);
+ acc_reg_write(d, reg_addr->dma_ring_ul4g_lo, phys_low);
+ acc_reg_write(d, reg_addr->dma_ring_dl4g_hi, phys_high);
+ acc_reg_write(d, reg_addr->dma_ring_dl4g_lo, phys_low);
+ acc_reg_write(d, reg_addr->dma_ring_fft_hi, phys_high);
+ acc_reg_write(d, reg_addr->dma_ring_fft_lo, phys_low);
+ /*
+ * Configure Ring Size to the max queue ring size
+ * (used for wrapping purpose)
+ */
+ value = log2_basic(d->sw_ring_size / 64);
+ acc_reg_write(d, reg_addr->ring_size, value);
+
+ /* Configure tail pointer for use when SDONE enabled */
+ if (d->tail_ptrs == NULL)
+ d->tail_ptrs = rte_zmalloc_socket(
+ dev->device->driver->name,
+ ACC200_NUM_QGRPS * ACC200_NUM_AQS * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (d->tail_ptrs == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate tail ptr for %s:%u",
+ dev->device->driver->name,
+ dev->data->dev_id);
+ rte_free(d->sw_rings);
+ return -ENOMEM;
+ }
+ d->tail_ptr_iova = rte_malloc_virt2iova(d->tail_ptrs);
+
+ phys_high = (uint32_t)(d->tail_ptr_iova >> 32);
+ phys_low = (uint32_t)(d->tail_ptr_iova);
+ acc_reg_write(d, reg_addr->tail_ptrs_ul5g_hi, phys_high);
+ acc_reg_write(d, reg_addr->tail_ptrs_ul5g_lo, phys_low);
+ acc_reg_write(d, reg_addr->tail_ptrs_dl5g_hi, phys_high);
+ acc_reg_write(d, reg_addr->tail_ptrs_dl5g_lo, phys_low);
+ acc_reg_write(d, reg_addr->tail_ptrs_ul4g_hi, phys_high);
+ acc_reg_write(d, reg_addr->tail_ptrs_ul4g_lo, phys_low);
+ acc_reg_write(d, reg_addr->tail_ptrs_dl4g_hi, phys_high);
+ acc_reg_write(d, reg_addr->tail_ptrs_dl4g_lo, phys_low);
+ acc_reg_write(d, reg_addr->tail_ptrs_fft_hi, phys_high);
+ acc_reg_write(d, reg_addr->tail_ptrs_fft_lo, phys_low);
+
+ if (d->harq_layout == NULL)
+ d->harq_layout = rte_zmalloc_socket("HARQ Layout",
+ ACC_HARQ_LAYOUT * sizeof(*d->harq_layout),
+ RTE_CACHE_LINE_SIZE, dev->data->socket_id);
+ if (d->harq_layout == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate harq_layout for %s:%u",
+ dev->device->driver->name,
+ dev->data->dev_id);
+ rte_free(d->sw_rings);
+ return -ENOMEM;
+ }
+
+ /* Mark as configured properly */
+ d->configured = true;
+
+ rte_bbdev_log_debug(
+ "ACC200 (%s) configured sw_rings = %p, sw_rings_iova = %#"
+ PRIx64, dev->data->name, d->sw_rings, d->sw_rings_iova);
+
+ return 0;
+}
+
/* Free memory used for software rings */
static int
acc200_dev_close(struct rte_bbdev *dev)
{
- RTE_SET_USED(dev);
+ struct acc_device *d = dev->data->dev_private;
+ if (d->sw_rings_base != NULL) {
+ rte_free(d->tail_ptrs);
+ rte_free(d->sw_rings_base);
+ rte_free(d->harq_layout);
+ d->sw_rings_base = NULL;
+ d->tail_ptrs = NULL;
+ d->harq_layout = NULL;
+ }
/* Ensure all in flight HW transactions are completed */
usleep(ACC_LONG_WAIT);
return 0;
}
+/**
+ * Report a ACC200 queue index which is free
+ * Return 0 to 16k for a valid queue_idx or -1 when no queue is available
+ * Note : Only supporting VF0 Bundle for PF mode
+ */
+static int
+acc200_find_free_queue_idx(struct rte_bbdev *dev,
+ const struct rte_bbdev_queue_conf *conf)
+{
+ struct acc_device *d = dev->data->dev_private;
+ int op_2_acc[6] = {0, UL_4G, DL_4G, UL_5G, DL_5G, FFT};
+ int acc = op_2_acc[conf->op_type];
+ struct rte_acc_queue_topology *qtop = NULL;
+
+ qtopFromAcc(&qtop, acc, &(d->acc_conf));
+ if (qtop == NULL)
+ return -1;
+ /* Identify matching QGroup Index which are sorted in priority order */
+ uint16_t group_idx = qtop->first_qgroup_index;
+ group_idx += conf->priority;
+ if (group_idx >= ACC200_NUM_QGRPS ||
+ conf->priority >= qtop->num_qgroups) {
+ rte_bbdev_log(INFO, "Invalid Priority on %s, priority %u",
+ dev->data->name, conf->priority);
+ return -1;
+ }
+ /* Find a free AQ_idx */
+ uint64_t aq_idx;
+ for (aq_idx = 0; aq_idx < qtop->num_aqs_per_groups; aq_idx++) {
+ if (((d->q_assigned_bit_map[group_idx] >> aq_idx) & 0x1) == 0) {
+ /* Mark the Queue as assigned */
+ d->q_assigned_bit_map[group_idx] |= (1 << aq_idx);
+ /* Report the AQ Index */
+ return (group_idx << ACC200_GRP_ID_SHIFT) + aq_idx;
+ }
+ }
+ rte_bbdev_log(INFO, "Failed to find free queue on %s, priority %u",
+ dev->data->name, conf->priority);
+ return -1;
+}
+
+/* Setup ACC200 queue */
+static int
+acc200_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
+ const struct rte_bbdev_queue_conf *conf)
+{
+ struct acc_device *d = dev->data->dev_private;
+ struct acc_queue *q;
+ int16_t q_idx;
+
+ if (d == NULL) {
+ rte_bbdev_log(ERR, "Undefined device");
+ return -ENODEV;
+ }
+ /* Allocate the queue data structure. */
+ q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q),
+ RTE_CACHE_LINE_SIZE, conf->socket);
+ if (q == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate queue memory");
+ return -ENOMEM;
+ }
+
+ q->d = d;
+ q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id));
+ q->ring_addr_iova = d->sw_rings_iova + (d->sw_ring_size * queue_id);
+
+ /* Prepare the Ring with default descriptor format */
+ union acc_dma_desc *desc = NULL;
+ unsigned int desc_idx, b_idx;
+ int fcw_len = (conf->op_type == RTE_BBDEV_OP_LDPC_ENC ?
+ ACC_FCW_LE_BLEN : (conf->op_type == RTE_BBDEV_OP_TURBO_DEC ?
+ ACC_FCW_TD_BLEN : (conf->op_type == RTE_BBDEV_OP_LDPC_DEC ?
+ ACC_FCW_LD_BLEN : ACC_FCW_FFT_BLEN)));
+
+ for (desc_idx = 0; desc_idx < d->sw_ring_max_depth; desc_idx++) {
+ desc = q->ring_addr + desc_idx;
+ desc->req.word0 = ACC_DMA_DESC_TYPE;
+ desc->req.word1 = 0; /**< Timestamp */
+ desc->req.word2 = 0;
+ desc->req.word3 = 0;
+ uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
+ desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
+ desc->req.data_ptrs[0].blen = fcw_len;
+ desc->req.data_ptrs[0].blkid = ACC_DMA_BLKID_FCW;
+ desc->req.data_ptrs[0].last = 0;
+ desc->req.data_ptrs[0].dma_ext = 0;
+ for (b_idx = 1; b_idx < ACC_DMA_MAX_NUM_POINTERS - 1;
+ b_idx++) {
+ desc->req.data_ptrs[b_idx].blkid = ACC_DMA_BLKID_IN;
+ desc->req.data_ptrs[b_idx].last = 1;
+ desc->req.data_ptrs[b_idx].dma_ext = 0;
+ b_idx++;
+ desc->req.data_ptrs[b_idx].blkid =
+ ACC_DMA_BLKID_OUT_ENC;
+ desc->req.data_ptrs[b_idx].last = 1;
+ desc->req.data_ptrs[b_idx].dma_ext = 0;
+ }
+ /* Preset some fields of LDPC FCW */
+ desc->req.fcw_ld.FCWversion = ACC_FCW_VER;
+ desc->req.fcw_ld.gain_i = 1;
+ desc->req.fcw_ld.gain_h = 1;
+ }
+
+ q->lb_in = rte_zmalloc_socket(dev->device->driver->name,
+ RTE_CACHE_LINE_SIZE,
+ RTE_CACHE_LINE_SIZE, conf->socket);
+ if (q->lb_in == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate lb_in memory");
+ rte_free(q);
+ return -ENOMEM;
+ }
+ q->lb_in_addr_iova = rte_malloc_virt2iova(q->lb_in);
+ q->lb_out = rte_zmalloc_socket(dev->device->driver->name,
+ RTE_CACHE_LINE_SIZE,
+ RTE_CACHE_LINE_SIZE, conf->socket);
+ if (q->lb_out == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate lb_out memory");
+ rte_free(q->lb_in);
+ rte_free(q);
+ return -ENOMEM;
+ }
+ q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
+ q->companion_ring_addr = rte_zmalloc_socket(dev->device->driver->name,
+ d->sw_ring_max_depth * sizeof(*q->companion_ring_addr),
+ RTE_CACHE_LINE_SIZE, conf->socket);
+ if (q->companion_ring_addr == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate companion_ring memory");
+ rte_free(q->lb_in);
+ rte_free(q->lb_out);
+ rte_free(q);
+ return -ENOMEM;
+ }
+
+ /*
+ * Software queue ring wraps synchronously with the HW when it reaches
+ * the boundary of the maximum allocated queue size, no matter what the
+ * sw queue size is. This wrapping is guarded by setting the wrap_mask
+ * to represent the maximum queue size as allocated at the time when
+ * the device has been setup (in configure()).
+ *
+ * The queue depth is set to the queue size value (conf->queue_size).
+ * This limits the occupancy of the queue at any point of time, so that
+ * the queue does not get swamped with enqueue requests.
+ */
+ q->sw_ring_depth = conf->queue_size;
+ q->sw_ring_wrap_mask = d->sw_ring_max_depth - 1;
+
+ q->op_type = conf->op_type;
+
+ q_idx = acc200_find_free_queue_idx(dev, conf);
+ if (q_idx == -1) {
+ rte_free(q->companion_ring_addr);
+ rte_free(q->lb_in);
+ rte_free(q->lb_out);
+ rte_free(q);
+ return -1;
+ }
+
+ q->qgrp_id = (q_idx >> ACC200_GRP_ID_SHIFT) & 0xF;
+ q->vf_id = (q_idx >> ACC200_VF_ID_SHIFT) & 0x3F;
+ q->aq_id = q_idx & 0xF;
+ q->aq_depth = 0;
+ if (conf->op_type == RTE_BBDEV_OP_TURBO_DEC)
+ q->aq_depth = (1 << d->acc_conf.q_ul_4g.aq_depth_log2);
+ else if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC)
+ q->aq_depth = (1 << d->acc_conf.q_dl_4g.aq_depth_log2);
+ else if (conf->op_type == RTE_BBDEV_OP_LDPC_DEC)
+ q->aq_depth = (1 << d->acc_conf.q_ul_5g.aq_depth_log2);
+ else if (conf->op_type == RTE_BBDEV_OP_LDPC_ENC)
+ q->aq_depth = (1 << d->acc_conf.q_dl_5g.aq_depth_log2);
+ else if (conf->op_type == RTE_BBDEV_OP_FFT)
+ q->aq_depth = (1 << d->acc_conf.q_fft.aq_depth_log2);
+
+ q->mmio_reg_enqueue = RTE_PTR_ADD(d->mmio_base,
+ queue_offset(d->pf_device,
+ q->vf_id, q->qgrp_id, q->aq_id));
+
+ rte_bbdev_log_debug(
+ "Setup dev%u q%u: qgrp_id=%u, vf_id=%u, aq_id=%u, aq_depth=%u, mmio_reg_enqueue=%p base %p\n",
+ dev->data->dev_id, queue_id, q->qgrp_id, q->vf_id,
+ q->aq_id, q->aq_depth, q->mmio_reg_enqueue,
+ d->mmio_base);
+
+ dev->data->queues[queue_id].queue_private = q;
+ return 0;
+}
+
+
+static int
+acc_queue_stop(struct rte_bbdev *dev, uint16_t queue_id)
+{
+ struct acc_queue *q;
+ q = dev->data->queues[queue_id].queue_private;
+ rte_bbdev_log(INFO, "Queue Stop %d H/T/D %d %d %x OpType %d",
+ queue_id, q->sw_ring_head, q->sw_ring_tail,
+ q->sw_ring_depth, q->op_type);
+ /* ignore all operations in flight and clear counters */
+ q->sw_ring_tail = q->sw_ring_head;
+ q->aq_enqueued = 0;
+ q->aq_dequeued = 0;
+ dev->data->queues[queue_id].queue_stats.enqueued_count = 0;
+ dev->data->queues[queue_id].queue_stats.dequeued_count = 0;
+ dev->data->queues[queue_id].queue_stats.enqueue_err_count = 0;
+ dev->data->queues[queue_id].queue_stats.dequeue_err_count = 0;
+ dev->data->queues[queue_id].queue_stats.enqueue_warn_count = 0;
+ dev->data->queues[queue_id].queue_stats.dequeue_warn_count = 0;
+ return 0;
+}
+
+/* Release ACC200 queue */
+static int
+acc200_queue_release(struct rte_bbdev *dev, uint16_t q_id)
+{
+ struct acc_device *d = dev->data->dev_private;
+ struct acc_queue *q = dev->data->queues[q_id].queue_private;
+
+ if (q != NULL) {
+ /* Mark the Queue as un-assigned */
+ d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF -
+ (uint64_t) (1 << q->aq_id));
+ rte_free(q->companion_ring_addr);
+ rte_free(q->lb_in);
+ rte_free(q->lb_out);
+ rte_free(q);
+ dev->data->queues[q_id].queue_private = NULL;
+ }
+
+ return 0;
+}
+
/* Get ACC200 device info */
static void
acc200_dev_info_get(struct rte_bbdev *dev,
@@ -279,8 +646,12 @@
}
static const struct rte_bbdev_ops acc200_bbdev_ops = {
+ .setup_queues = acc200_setup_queues,
.close = acc200_dev_close,
.info_get = acc200_dev_info_get,
+ .queue_setup = acc200_queue_setup,
+ .queue_release = acc200_queue_release,
+ .queue_stop = acc_queue_stop,
};
/* ACC200 PCI PF address map */