[v2,16/37] baseband/acc100: add ring companion address
Checks
Commit Message
Store the virtual address of companion ring as part of queue
information. Use this address to calculate the op address.
Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
drivers/baseband/acc100/acc100_pmd.h | 12 ++
drivers/baseband/acc100/rte_acc100_pmd.c | 143 ++++++++++++++---------
2 files changed, 100 insertions(+), 55 deletions(-)
Comments
On 8/20/22 04:31, Hernan Vargas wrote:
> Store the virtual address of companion ring as part of queue
> information. Use this address to calculate the op address.
>
> Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> ---
> drivers/baseband/acc100/acc100_pmd.h | 12 ++
> drivers/baseband/acc100/rte_acc100_pmd.c | 143 ++++++++++++++---------
> 2 files changed, 100 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/baseband/acc100/acc100_pmd.h b/drivers/baseband/acc100/acc100_pmd.h
> index c98a182be6..20157e5886 100644
> --- a/drivers/baseband/acc100/acc100_pmd.h
> +++ b/drivers/baseband/acc100/acc100_pmd.h
> @@ -126,6 +126,7 @@
> #define ACC100_5GUL_SIZE_0 16
> #define ACC100_5GUL_SIZE_1 40
> #define ACC100_5GUL_OFFSET_0 36
> +#define ACC100_COMPANION_PTRS 8
>
> #define ACC100_FCW_VER 2
> #define ACC100_MUX_5GDL_DESC 6
> @@ -375,6 +376,15 @@ struct __rte_packed acc100_fcw_le {
> uint32_t res8;
> };
>
> +struct __rte_packed acc100_pad_ptr {
> + void *op_addr;
> + uint64_t pad1; /* pad to 64 bits */
A comment would help to understand why padding is necessary.
> +};
> +
> +struct __rte_packed acc100_ptrs {
> + struct acc100_pad_ptr ptr[ACC100_COMPANION_PTRS];
> +};
> +
> /* ACC100 DMA Request Descriptor */
> struct __rte_packed acc100_dma_req_desc {
> union {
> @@ -568,6 +578,8 @@ struct __rte_cache_aligned acc100_queue {
> uint32_t sw_ring_depth;
> /* mask used to wrap enqueued descriptors on the sw ring */
> uint32_t sw_ring_wrap_mask;
> + /* Virtual address of companion ring */
> + struct acc100_ptrs *companion_ring_addr;
> /* MMIO register used to enqueue descriptors */
> void *mmio_reg_enqueue;
> uint8_t vf_id; /* VF ID (max = 63) */
> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
> index 69c0714a37..ea54152856 100644
> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> @@ -913,6 +913,17 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
> return -ENOMEM;
> }
> q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
> + q->companion_ring_addr = rte_zmalloc_socket(dev->device->driver->name,
> + d->sw_ring_max_depth * sizeof(*q->companion_ring_addr),
> + RTE_CACHE_LINE_SIZE, conf->socket);
> + if (q->companion_ring_addr == NULL) {
> + rte_bbdev_log(ERR, "Failed to allocate companion_ring memory");
> + rte_free(q->derm_buffer);
> + rte_free(q->lb_in);
> + rte_free(q->lb_out);
> + rte_free(q);
> + return -ENOMEM;
> + }
Same comment as on previous patch, better to have a proper error path
than duplicating the free operations.
> /*
> * Software queue ring wraps synchronously with the HW when it reaches
> @@ -932,6 +943,7 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>
> q_idx = acc100_find_free_queue_idx(dev, conf);
> if (q_idx == -1) {
> + rte_free(q->companion_ring_addr);
> rte_free(q->derm_buffer);
> rte_free(q->lb_in);
> rte_free(q->lb_out);
> @@ -970,6 +982,7 @@ acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
> /* Mark the Queue as un-assigned */
> d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
> (1 << q->aq_id));
> + rte_free(q->companion_ring_addr);
> rte_free(q->derm_buffer);
> rte_free(q->lb_in);
> rte_free(q->lb_out);
> @@ -2889,6 +2902,10 @@ enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ops,
> }
>
> desc->req.op_addr = ops[0];
> + /* Keep track of pointers even when multiplexed in single descriptor */
> + struct acc100_ptrs *context_ptrs = q->companion_ring_addr + desc_idx;
Don't mix declarations & code.
> + for (i = 0; i < num; i++)
> + context_ptrs->ptr[i].op_addr = ops[i];
>
> #ifdef RTE_LIBRTE_BBDEV_DEBUG
> rte_memdump(stderr, "FCW", &desc->req.fcw_le,
> @@ -4517,15 +4534,16 @@ acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
> /* Dequeue one encode operations from ACC100 device in CB mode */
> static inline int
> dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
> - uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
> + uint16_t *dequeued_ops, uint32_t *aq_dequeued,
> + uint16_t *dequeued_descs)
> {
> union acc100_dma_desc *desc, atom_desc;
> union acc100_dma_rsp_desc rsp;
> struct rte_bbdev_enc_op *op;
> int i;
> -
> - desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
> + int desc_idx = ((q->sw_ring_tail + *dequeued_descs)
> & q->sw_ring_wrap_mask);
Please add new line.
> + desc = q->ring_addr + desc_idx;
> atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
> __ATOMIC_RELAXED);
>
> @@ -4534,7 +4552,8 @@ dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
> return -1;
>
> rsp.val = atom_desc.rsp.val;
> - rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
> + rte_bbdev_log_debug("Resp. desc %p: %x num %d\n",
> + desc, rsp.val, desc->req.numCBs);
>
> /* Dequeue */
> op = desc->req.op_addr;
> @@ -4552,27 +4571,32 @@ dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
> desc->rsp.add_info_0 = 0; /*Reserved bits */
> desc->rsp.add_info_1 = 0; /*Reserved bits */
>
> - /* Flag that the muxing cause loss of opaque data */
> - op->opaque_data = (void *)-1;
> - for (i = 0 ; i < desc->req.numCBs; i++)
> - ref_op[i] = op;
> + ref_op[0] = op;
> + struct acc100_ptrs *context_ptrs = q->companion_ring_addr + desc_idx;
Don't mix declarations & code.
> + for (i = 1 ; i < desc->req.numCBs; i++)
> + ref_op[i] = context_ptrs->ptr[i].op_addr;
>
> - /* One CB (op) was successfully dequeued */
> + /* One op was successfully dequeued */
> + (*dequeued_descs)++;
> + *dequeued_ops += desc->req.numCBs;
> return desc->req.numCBs;
> }
>
> -/* Dequeue one encode operations from ACC100 device in TB mode */
> +/* Dequeue one LDPC encode operations from ACC100 device in TB mode
> + * That operation may cover multiple descriptors
> + */
> static inline int
> dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
> - uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
> + uint16_t *dequeued_ops, uint32_t *aq_dequeued,
> + uint16_t *dequeued_descs)
> {
> union acc100_dma_desc *desc, *last_desc, atom_desc;
> union acc100_dma_rsp_desc rsp;
> struct rte_bbdev_enc_op *op;
> uint8_t i = 0;
> - uint16_t current_dequeued_cbs = 0, cbs_in_tb;
> + uint16_t current_dequeued_descs = 0, descs_in_tb;
>
> - desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
> + desc = q->ring_addr + ((q->sw_ring_tail + *dequeued_descs)
> & q->sw_ring_wrap_mask);
> atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
> __ATOMIC_RELAXED);
> @@ -4582,10 +4606,10 @@ dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
> return -1;
>
> /* Get number of CBs in dequeued TB */
> - cbs_in_tb = desc->req.cbs_in_tb;
> + descs_in_tb = desc->req.cbs_in_tb;
> /* Get last CB */
> last_desc = q->ring_addr + ((q->sw_ring_tail
> - + total_dequeued_cbs + cbs_in_tb - 1)
> + + *dequeued_descs + descs_in_tb - 1)
> & q->sw_ring_wrap_mask);
> /* Check if last CB in TB is ready to dequeue (and thus
> * the whole TB) - checking sdone bit. If not return.
> @@ -4601,15 +4625,17 @@ dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
> /* Clearing status, it will be set based on response */
> op->status = 0;
>
> - while (i < cbs_in_tb) {
> + while (i < descs_in_tb) {
> desc = q->ring_addr + ((q->sw_ring_tail
> - + total_dequeued_cbs)
> + + *dequeued_descs)
> & q->sw_ring_wrap_mask);
> atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
> __ATOMIC_RELAXED);
> rsp.val = atom_desc.rsp.val;
> - rte_bbdev_log_debug("Resp. desc %p: %x", desc,
> - rsp.val);
> + rte_bbdev_log_debug("Resp. desc %p: %x descs %d cbs %d\n",
> + desc,
> + rsp.val, descs_in_tb,
> + desc->req.numCBs);
>
> op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
> op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
> @@ -4621,14 +4647,14 @@ dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
> desc->rsp.val = ACC100_DMA_DESC_TYPE;
> desc->rsp.add_info_0 = 0;
> desc->rsp.add_info_1 = 0;
> - total_dequeued_cbs++;
> - current_dequeued_cbs++;
> + (*dequeued_descs)++;
> + current_dequeued_descs++;
> i++;
> }
>
> *ref_op = op;
> -
> - return current_dequeued_cbs;
> + (*dequeued_ops)++;
Please keep the new line.
> + return current_dequeued_descs;
> }
>
> /* Dequeue one decode operation from ACC100 device in CB mode */
> @@ -4824,12 +4850,11 @@ acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> struct rte_bbdev_enc_op **ops, uint16_t num)
> {
> struct acc100_queue *q = q_data->queue_private;
> - uint16_t dequeue_num;
> uint32_t avail = acc100_ring_avail_deq(q);
> uint32_t aq_dequeued = 0;
> - uint16_t i, dequeued_cbs = 0;
> - struct rte_bbdev_enc_op *op;
> + uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
> int ret;
> + struct rte_bbdev_enc_op *op;
> if (avail == 0)
> return 0;
> #ifdef RTE_LIBRTE_BBDEV_DEBUG
> @@ -4838,31 +4863,34 @@ acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> return 0;
> }
> #endif
> + op = (q->ring_addr + (q->sw_ring_tail &
> + q->sw_ring_wrap_mask))->req.op_addr;
> + if (unlikely(ops == NULL || op == NULL))
> + return 0;
>
> - dequeue_num = (avail < num) ? avail : num;
> + int cbm = op->turbo_enc.code_block_mode;
>
> - for (i = 0; i < dequeue_num; ++i) {
> - op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> - & q->sw_ring_wrap_mask))->req.op_addr;
> - if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
> - ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs,
> - &aq_dequeued);
> + for (i = 0; i < num; i++) {
> + if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
> + ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
> + &dequeued_ops, &aq_dequeued,
> + &dequeued_descs);
> else
> - ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs,
> - &aq_dequeued);
> -
> + ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
> + &dequeued_ops, &aq_dequeued,
> + &dequeued_descs);
> if (ret < 0)
> break;
> - dequeued_cbs += ret;
> + if (dequeued_ops >= num)
> + break;
> }
>
> q->aq_dequeued += aq_dequeued;
> - q->sw_ring_tail += dequeued_cbs;
> + q->sw_ring_tail += dequeued_descs;
>
> /* Update enqueue stats */
> - q_data->queue_stats.dequeued_count += i;
> -
> - return i;
> + q_data->queue_stats.dequeued_count += dequeued_ops;
Please keep the new line.
> + return dequeued_ops;
> }
>
> /* Dequeue LDPC encode operations from ACC100 device. */
> @@ -4873,24 +4901,31 @@ acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
> struct acc100_queue *q = q_data->queue_private;
> uint32_t avail = acc100_ring_avail_deq(q);
> uint32_t aq_dequeued = 0;
> - uint16_t dequeue_num, i, dequeued_cbs = 0, dequeued_descs = 0;
> + uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
> int ret;
> -
> + struct rte_bbdev_enc_op *op;
> #ifdef RTE_LIBRTE_BBDEV_DEBUG
> if (unlikely(ops == 0 && q == NULL))
The check for q being non-NULL should be moved out of the #ifdef debug,
as it is dereferenced afterwards.
More generally, sanity checks like these one preventing crashes should
> return 0;
> #endif
> + op = (q->ring_addr + (q->sw_ring_tail &
> + q->sw_ring_wrap_mask))->req.op_addr;
Splitting this in two would provide clarity, and give the possiblity
to check the desc address is valid before dereferencing.
> + if (unlikely(ops == NULL || op == NULL))
> + return 0;
> + int cbm = op->ldpc_enc.code_block_mode;
>
> - dequeue_num = RTE_MIN(avail, num);
> -
> - for (i = 0; i < dequeue_num; i++) {
> - ret = dequeue_enc_one_op_cb(q, &ops[dequeued_cbs],
> - dequeued_descs, &aq_dequeued);
> + for (i = 0; i < avail; i++) {
> + if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
> + ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
> + &dequeued_ops, &aq_dequeued,
> + &dequeued_descs);
> + else
> + ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
> + &dequeued_ops, &aq_dequeued,
> + &dequeued_descs);
> if (ret < 0)
> break;
> - dequeued_cbs += ret;
> - dequeued_descs++;
> - if (dequeued_cbs >= num)
> + if (dequeued_ops >= num)
> break;
> }
>
> @@ -4898,12 +4933,10 @@ acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
> q->sw_ring_tail += dequeued_descs;
>
> /* Update enqueue stats */
> - q_data->queue_stats.dequeued_count += dequeued_cbs;
> -
> - return dequeued_cbs;
> + q_data->queue_stats.dequeued_count += dequeued_ops;
Please keep the new line.
> + return dequeued_ops;
> }
>
> -
> /* Dequeue decode operations from ACC100 device. */
> static uint16_t
> acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,
@@ -126,6 +126,7 @@
#define ACC100_5GUL_SIZE_0 16
#define ACC100_5GUL_SIZE_1 40
#define ACC100_5GUL_OFFSET_0 36
+#define ACC100_COMPANION_PTRS 8
#define ACC100_FCW_VER 2
#define ACC100_MUX_5GDL_DESC 6
@@ -375,6 +376,15 @@ struct __rte_packed acc100_fcw_le {
uint32_t res8;
};
+struct __rte_packed acc100_pad_ptr {
+ void *op_addr;
+ uint64_t pad1; /* pad to 64 bits */
+};
+
+struct __rte_packed acc100_ptrs {
+ struct acc100_pad_ptr ptr[ACC100_COMPANION_PTRS];
+};
+
/* ACC100 DMA Request Descriptor */
struct __rte_packed acc100_dma_req_desc {
union {
@@ -568,6 +578,8 @@ struct __rte_cache_aligned acc100_queue {
uint32_t sw_ring_depth;
/* mask used to wrap enqueued descriptors on the sw ring */
uint32_t sw_ring_wrap_mask;
+ /* Virtual address of companion ring */
+ struct acc100_ptrs *companion_ring_addr;
/* MMIO register used to enqueue descriptors */
void *mmio_reg_enqueue;
uint8_t vf_id; /* VF ID (max = 63) */
@@ -913,6 +913,17 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
return -ENOMEM;
}
q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
+ q->companion_ring_addr = rte_zmalloc_socket(dev->device->driver->name,
+ d->sw_ring_max_depth * sizeof(*q->companion_ring_addr),
+ RTE_CACHE_LINE_SIZE, conf->socket);
+ if (q->companion_ring_addr == NULL) {
+ rte_bbdev_log(ERR, "Failed to allocate companion_ring memory");
+ rte_free(q->derm_buffer);
+ rte_free(q->lb_in);
+ rte_free(q->lb_out);
+ rte_free(q);
+ return -ENOMEM;
+ }
/*
* Software queue ring wraps synchronously with the HW when it reaches
@@ -932,6 +943,7 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
q_idx = acc100_find_free_queue_idx(dev, conf);
if (q_idx == -1) {
+ rte_free(q->companion_ring_addr);
rte_free(q->derm_buffer);
rte_free(q->lb_in);
rte_free(q->lb_out);
@@ -970,6 +982,7 @@ acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
/* Mark the Queue as un-assigned */
d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
(1 << q->aq_id));
+ rte_free(q->companion_ring_addr);
rte_free(q->derm_buffer);
rte_free(q->lb_in);
rte_free(q->lb_out);
@@ -2889,6 +2902,10 @@ enqueue_ldpc_enc_n_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ops,
}
desc->req.op_addr = ops[0];
+ /* Keep track of pointers even when multiplexed in single descriptor */
+ struct acc100_ptrs *context_ptrs = q->companion_ring_addr + desc_idx;
+ for (i = 0; i < num; i++)
+ context_ptrs->ptr[i].op_addr = ops[i];
#ifdef RTE_LIBRTE_BBDEV_DEBUG
rte_memdump(stderr, "FCW", &desc->req.fcw_le,
@@ -4517,15 +4534,16 @@ acc100_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
/* Dequeue one encode operations from ACC100 device in CB mode */
static inline int
dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
- uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
+ uint16_t *dequeued_ops, uint32_t *aq_dequeued,
+ uint16_t *dequeued_descs)
{
union acc100_dma_desc *desc, atom_desc;
union acc100_dma_rsp_desc rsp;
struct rte_bbdev_enc_op *op;
int i;
-
- desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
+ int desc_idx = ((q->sw_ring_tail + *dequeued_descs)
& q->sw_ring_wrap_mask);
+ desc = q->ring_addr + desc_idx;
atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
__ATOMIC_RELAXED);
@@ -4534,7 +4552,8 @@ dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
return -1;
rsp.val = atom_desc.rsp.val;
- rte_bbdev_log_debug("Resp. desc %p: %x", desc, rsp.val);
+ rte_bbdev_log_debug("Resp. desc %p: %x num %d\n",
+ desc, rsp.val, desc->req.numCBs);
/* Dequeue */
op = desc->req.op_addr;
@@ -4552,27 +4571,32 @@ dequeue_enc_one_op_cb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
desc->rsp.add_info_0 = 0; /*Reserved bits */
desc->rsp.add_info_1 = 0; /*Reserved bits */
- /* Flag that the muxing cause loss of opaque data */
- op->opaque_data = (void *)-1;
- for (i = 0 ; i < desc->req.numCBs; i++)
- ref_op[i] = op;
+ ref_op[0] = op;
+ struct acc100_ptrs *context_ptrs = q->companion_ring_addr + desc_idx;
+ for (i = 1 ; i < desc->req.numCBs; i++)
+ ref_op[i] = context_ptrs->ptr[i].op_addr;
- /* One CB (op) was successfully dequeued */
+ /* One op was successfully dequeued */
+ (*dequeued_descs)++;
+ *dequeued_ops += desc->req.numCBs;
return desc->req.numCBs;
}
-/* Dequeue one encode operations from ACC100 device in TB mode */
+/* Dequeue one LDPC encode operations from ACC100 device in TB mode
+ * That operation may cover multiple descriptors
+ */
static inline int
dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
- uint16_t total_dequeued_cbs, uint32_t *aq_dequeued)
+ uint16_t *dequeued_ops, uint32_t *aq_dequeued,
+ uint16_t *dequeued_descs)
{
union acc100_dma_desc *desc, *last_desc, atom_desc;
union acc100_dma_rsp_desc rsp;
struct rte_bbdev_enc_op *op;
uint8_t i = 0;
- uint16_t current_dequeued_cbs = 0, cbs_in_tb;
+ uint16_t current_dequeued_descs = 0, descs_in_tb;
- desc = q->ring_addr + ((q->sw_ring_tail + total_dequeued_cbs)
+ desc = q->ring_addr + ((q->sw_ring_tail + *dequeued_descs)
& q->sw_ring_wrap_mask);
atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
__ATOMIC_RELAXED);
@@ -4582,10 +4606,10 @@ dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
return -1;
/* Get number of CBs in dequeued TB */
- cbs_in_tb = desc->req.cbs_in_tb;
+ descs_in_tb = desc->req.cbs_in_tb;
/* Get last CB */
last_desc = q->ring_addr + ((q->sw_ring_tail
- + total_dequeued_cbs + cbs_in_tb - 1)
+ + *dequeued_descs + descs_in_tb - 1)
& q->sw_ring_wrap_mask);
/* Check if last CB in TB is ready to dequeue (and thus
* the whole TB) - checking sdone bit. If not return.
@@ -4601,15 +4625,17 @@ dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
/* Clearing status, it will be set based on response */
op->status = 0;
- while (i < cbs_in_tb) {
+ while (i < descs_in_tb) {
desc = q->ring_addr + ((q->sw_ring_tail
- + total_dequeued_cbs)
+ + *dequeued_descs)
& q->sw_ring_wrap_mask);
atom_desc.atom_hdr = __atomic_load_n((uint64_t *)desc,
__ATOMIC_RELAXED);
rsp.val = atom_desc.rsp.val;
- rte_bbdev_log_debug("Resp. desc %p: %x", desc,
- rsp.val);
+ rte_bbdev_log_debug("Resp. desc %p: %x descs %d cbs %d\n",
+ desc,
+ rsp.val, descs_in_tb,
+ desc->req.numCBs);
op->status |= ((rsp.dma_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
op->status |= ((rsp.fcw_err) ? (1 << RTE_BBDEV_DRV_ERROR) : 0);
@@ -4621,14 +4647,14 @@ dequeue_enc_one_op_tb(struct acc100_queue *q, struct rte_bbdev_enc_op **ref_op,
desc->rsp.val = ACC100_DMA_DESC_TYPE;
desc->rsp.add_info_0 = 0;
desc->rsp.add_info_1 = 0;
- total_dequeued_cbs++;
- current_dequeued_cbs++;
+ (*dequeued_descs)++;
+ current_dequeued_descs++;
i++;
}
*ref_op = op;
-
- return current_dequeued_cbs;
+ (*dequeued_ops)++;
+ return current_dequeued_descs;
}
/* Dequeue one decode operation from ACC100 device in CB mode */
@@ -4824,12 +4850,11 @@ acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
struct rte_bbdev_enc_op **ops, uint16_t num)
{
struct acc100_queue *q = q_data->queue_private;
- uint16_t dequeue_num;
uint32_t avail = acc100_ring_avail_deq(q);
uint32_t aq_dequeued = 0;
- uint16_t i, dequeued_cbs = 0;
- struct rte_bbdev_enc_op *op;
+ uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
int ret;
+ struct rte_bbdev_enc_op *op;
if (avail == 0)
return 0;
#ifdef RTE_LIBRTE_BBDEV_DEBUG
@@ -4838,31 +4863,34 @@ acc100_dequeue_enc(struct rte_bbdev_queue_data *q_data,
return 0;
}
#endif
+ op = (q->ring_addr + (q->sw_ring_tail &
+ q->sw_ring_wrap_mask))->req.op_addr;
+ if (unlikely(ops == NULL || op == NULL))
+ return 0;
- dequeue_num = (avail < num) ? avail : num;
+ int cbm = op->turbo_enc.code_block_mode;
- for (i = 0; i < dequeue_num; ++i) {
- op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
- & q->sw_ring_wrap_mask))->req.op_addr;
- if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
- ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs,
- &aq_dequeued);
+ for (i = 0; i < num; i++) {
+ if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
+ ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
+ &dequeued_ops, &aq_dequeued,
+ &dequeued_descs);
else
- ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs,
- &aq_dequeued);
-
+ ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
+ &dequeued_ops, &aq_dequeued,
+ &dequeued_descs);
if (ret < 0)
break;
- dequeued_cbs += ret;
+ if (dequeued_ops >= num)
+ break;
}
q->aq_dequeued += aq_dequeued;
- q->sw_ring_tail += dequeued_cbs;
+ q->sw_ring_tail += dequeued_descs;
/* Update enqueue stats */
- q_data->queue_stats.dequeued_count += i;
-
- return i;
+ q_data->queue_stats.dequeued_count += dequeued_ops;
+ return dequeued_ops;
}
/* Dequeue LDPC encode operations from ACC100 device. */
@@ -4873,24 +4901,31 @@ acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
struct acc100_queue *q = q_data->queue_private;
uint32_t avail = acc100_ring_avail_deq(q);
uint32_t aq_dequeued = 0;
- uint16_t dequeue_num, i, dequeued_cbs = 0, dequeued_descs = 0;
+ uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
int ret;
-
+ struct rte_bbdev_enc_op *op;
#ifdef RTE_LIBRTE_BBDEV_DEBUG
if (unlikely(ops == 0 && q == NULL))
return 0;
#endif
+ op = (q->ring_addr + (q->sw_ring_tail &
+ q->sw_ring_wrap_mask))->req.op_addr;
+ if (unlikely(ops == NULL || op == NULL))
+ return 0;
+ int cbm = op->ldpc_enc.code_block_mode;
- dequeue_num = RTE_MIN(avail, num);
-
- for (i = 0; i < dequeue_num; i++) {
- ret = dequeue_enc_one_op_cb(q, &ops[dequeued_cbs],
- dequeued_descs, &aq_dequeued);
+ for (i = 0; i < avail; i++) {
+ if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
+ ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
+ &dequeued_ops, &aq_dequeued,
+ &dequeued_descs);
+ else
+ ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
+ &dequeued_ops, &aq_dequeued,
+ &dequeued_descs);
if (ret < 0)
break;
- dequeued_cbs += ret;
- dequeued_descs++;
- if (dequeued_cbs >= num)
+ if (dequeued_ops >= num)
break;
}
@@ -4898,12 +4933,10 @@ acc100_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
q->sw_ring_tail += dequeued_descs;
/* Update enqueue stats */
- q_data->queue_stats.dequeued_count += dequeued_cbs;
-
- return dequeued_cbs;
+ q_data->queue_stats.dequeued_count += dequeued_ops;
+ return dequeued_ops;
}
-
/* Dequeue decode operations from ACC100 device. */
static uint16_t
acc100_dequeue_dec(struct rte_bbdev_queue_data *q_data,