[v4,09/14] baseband/acc: add LTE processing functions

Message ID 1663806460-45162-10-git-send-email-nicolas.chautru@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series [v4,01/14] baseband/acc100: remove unused registers |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Chautru, Nicolas Sept. 22, 2022, 12:27 a.m. UTC
  Add functions and capability for 4G FEC

Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
---
 drivers/baseband/acc/rte_acc200_pmd.c | 894 +++++++++++++++++++++++++++++++++-
 1 file changed, 874 insertions(+), 20 deletions(-)
  

Comments

Maxime Coquelin Sept. 23, 2022, 8:59 a.m. UTC | #1
On 9/22/22 02:27, Nic Chautru wrote:
> Add functions and capability for 4G FEC
> 
> Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
> ---
>   drivers/baseband/acc/rte_acc200_pmd.c | 894 +++++++++++++++++++++++++++++++++-
>   1 file changed, 874 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/baseband/acc/rte_acc200_pmd.c b/drivers/baseband/acc/rte_acc200_pmd.c
> index 1c59559..35ea0fe 100644
> --- a/drivers/baseband/acc/rte_acc200_pmd.c
> +++ b/drivers/baseband/acc/rte_acc200_pmd.c
> @@ -641,6 +641,46 @@
>   	int i;
>   	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
>   		{
> +			.type = RTE_BBDEV_OP_TURBO_DEC,
> +			.cap.turbo_dec = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
> +					RTE_BBDEV_TURBO_EQUALIZER |
> +					RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
> +					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> +					RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
> +					RTE_BBDEV_TURBO_SOFT_OUTPUT |
> +					RTE_BBDEV_TURBO_EARLY_TERMINATION |
> +					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> +					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
> +					RTE_BBDEV_TURBO_MAP_DEC |
> +					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> +					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> +				.max_llr_modulus = INT8_MAX,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_hard_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_soft_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
> +			.type = RTE_BBDEV_OP_TURBO_ENC,
> +			.cap.turbo_enc = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> +					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> +					RTE_BBDEV_TURBO_RATE_MATCH |
> +					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_dst =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
>   			.type   = RTE_BBDEV_OP_LDPC_ENC,
>   			.cap.ldpc_enc = {
>   				.capability_flags =
> @@ -691,15 +731,17 @@
>   
>   	/* Exposed number of queues */
>   	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
> -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
> -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
> +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_aqs_per_groups *
> +			d->acc_conf.q_ul_4g.num_qgroups;
> +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_aqs_per_groups *
> +			d->acc_conf.q_dl_4g.num_qgroups;
>   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_aqs_per_groups *
>   			d->acc_conf.q_ul_5g.num_qgroups;
>   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_aqs_per_groups *
>   			d->acc_conf.q_dl_5g.num_qgroups;
>   	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
> -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
> -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
> +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_qgroups;
> +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_qgroups;
>   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_qgroups;
>   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_qgroups;
>   	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0;
> @@ -744,6 +786,70 @@
>   	{.device_id = 0},
>   };
>   
> +/* Fill in a frame control word for turbo decoding. */
> +static inline void
> +acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc_fcw_td *fcw)
> +{
> +	fcw->fcw_ver = 1;
> +	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
> +	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
> +	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
> +		/* FIXME for TB block */
> +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> +		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
> +	} else {
> +		fcw->k_pos = op->turbo_dec.cb_params.k;
> +		fcw->k_neg = op->turbo_dec.cb_params.k;
> +	}
> +	fcw->c = 1;
> +	fcw->c_neg = 1;
> +	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> +		fcw->soft_output_en = 1;
> +		fcw->sw_soft_out_dis = 0;
> +		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
> +		fcw->sw_soft_out_saturation = check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
> +		if (check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_EQUALIZER)) {
> +			fcw->bypass_teq = 0;
> +			fcw->ea = op->turbo_dec.cb_params.e;
> +			fcw->eb = op->turbo_dec.cb_params.e;
> +			if (op->turbo_dec.rv_index == 0)
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
> +			else if (op->turbo_dec.rv_index == 1)
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
> +			else if (op->turbo_dec.rv_index == 2)
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
> +			else
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
> +		} else {
> +			fcw->bypass_teq = 1;
> +			fcw->eb = 64; /* avoid undefined value */
> +		}
> +	} else {
> +		fcw->soft_output_en = 0;
> +		fcw->sw_soft_out_dis = 1;
> +		fcw->bypass_teq = 0;
> +	}
> +
> +	fcw->code_block_mode = 1; /* FIXME */
> +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> +
> +	fcw->ext_td_cold_reg_en = 1;
> +	fcw->raw_decoder_input_on = 0;
> +	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
> +	fcw->min_iter = 2;
> +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> +
> +	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw->soft_output_en;
> +	fcw->ext_scale = 0xF;
> +}
> +
>   /* Fill in a frame control word for LDPC decoding. */
>   static inline void
>   acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
> @@ -870,6 +976,89 @@
>   }
>   
>   static inline int
> +acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> +		struct rte_mbuf *output, uint32_t *in_offset,
> +		uint32_t *out_offset, uint32_t *out_length,
> +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
> +{
> +	int next_triplet = 1; /* FCW already done */
> +	uint32_t e, ea, eb, length;
> +	uint16_t k, k_neg, k_pos;
> +	uint8_t cab, c_neg;
> +
> +	desc->word0 = ACC_DMA_DESC_TYPE;
> +	desc->word1 = 0; /**< Timestamp could be disabled */
> +	desc->word2 = 0;
> +	desc->word3 = 0;
> +	desc->numCBs = 1;
> +
> +	if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
> +		ea = op->turbo_enc.tb_params.ea;
> +		eb = op->turbo_enc.tb_params.eb;
> +		cab = op->turbo_enc.tb_params.cab;
> +		k_neg = op->turbo_enc.tb_params.k_neg;
> +		k_pos = op->turbo_enc.tb_params.k_pos;
> +		c_neg = op->turbo_enc.tb_params.c_neg;
> +		e = (r < cab) ? ea : eb;
> +		k = (r < c_neg) ? k_neg : k_pos;
> +	} else {
> +		e = op->turbo_enc.cb_params.e;
> +		k = op->turbo_enc.cb_params.k;
> +	}
> +
> +	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> +		length = (k - 24) >> 3;
> +	else
> +		length = k >> 3;
> +
> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
> +				*mbuf_total_left, length);
> +		return -1;
> +	}
> +
> +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
> +			length, seg_total_left, next_triplet,
> +			check_bit(op->turbo_enc.op_flags,
> +			RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->m2dlen = next_triplet;
> +	*mbuf_total_left -= length;
> +
> +	/* Set output length */
> +	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
> +		/* Integer round up division by 8 */
> +		*out_length = (e + 7) >> 3;
> +	else
> +		*out_length = (k >> 3) * 3 + 2;
> +
> +	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
> +			*out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	op->turbo_enc.output.length += *out_length;
> +	*out_offset += *out_length;
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->d2mlen = next_triplet - desc->m2dlen;
> +
> +	desc->op_addr = op;
> +
> +	return 0;
> +}
> +
> +static inline int
>   acc200_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
>   		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
>   		struct rte_mbuf *output, uint32_t *in_offset,
> @@ -929,6 +1118,122 @@
>   }
>   
>   static inline int
> +acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> +		uint32_t *in_offset, uint32_t *h_out_offset,
> +		uint32_t *s_out_offset, uint32_t *h_out_length,
> +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> +		uint32_t *seg_total_left, uint8_t r)
> +{
> +	int next_triplet = 1; /* FCW already done */
> +	uint16_t k;
> +	uint16_t crc24_overlap = 0;
> +	uint32_t e, kw;
> +
> +	desc->word0 = ACC_DMA_DESC_TYPE;
> +	desc->word1 = 0; /**< Timestamp could be disabled */
> +	desc->word2 = 0;
> +	desc->word3 = 0;
> +	desc->numCBs = 1;
> +
> +	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
> +		k = (r < op->turbo_dec.tb_params.c_neg)
> +			? op->turbo_dec.tb_params.k_neg
> +			: op->turbo_dec.tb_params.k_pos;
> +		e = (r < op->turbo_dec.tb_params.cab)
> +			? op->turbo_dec.tb_params.ea
> +			: op->turbo_dec.tb_params.eb;
> +	} else {
> +		k = op->turbo_dec.cb_params.k;
> +		e = op->turbo_dec.cb_params.e;
> +	}
> +
> +	if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
> +		&& !check_bit(op->turbo_dec.op_flags,
> +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> +		crc24_overlap = 24;
> +
> +	/* Calculates circular buffer size.
> +	 * According to 3gpp 36.212 section 5.1.4.2
> +	 *   Kw = 3 * Kpi,
> +	 * where:
> +	 *   Kpi = nCol * nRow
> +	 * where nCol is 32 and nRow can be calculated from:
> +	 *   D =< nCol * nRow
> +	 * where D is the size of each output from turbo encoder block (k + 4).
> +	 */
> +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> +
> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
> +				*mbuf_total_left, kw);
> +		return -1;
> +	}
> +
> +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
> +			seg_total_left, next_triplet,
> +			check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->m2dlen = next_triplet;
> +	*mbuf_total_left -= kw;
> +	*h_out_length = ((k - crc24_overlap) >> 3);
> +	next_triplet = acc_dma_fill_blk_type(
> +			desc, h_output, *h_out_offset,
> +			*h_out_length, next_triplet, ACC_DMA_BLKID_OUT_HARD);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +
> +	op->turbo_dec.hard_output.length += *h_out_length;
> +	*h_out_offset += *h_out_length;
> +
> +	/* Soft output */
> +	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> +		if (op->turbo_dec.soft_output.data == 0) {
> +			rte_bbdev_log(ERR, "Soft output is not defined");
> +			return -1;
> +		}
> +		if (check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_EQUALIZER))
> +			*s_out_length = e;
> +		else
> +			*s_out_length = (k * 3) + 12;
> +
> +		next_triplet = acc_dma_fill_blk_type(desc, s_output,
> +				*s_out_offset, *s_out_length, next_triplet,
> +				ACC_DMA_BLKID_OUT_SOFT);
> +		if (unlikely(next_triplet < 0)) {
> +			rte_bbdev_log(ERR,
> +					"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +					op);
> +			return -1;
> +		}
> +
> +		op->turbo_dec.soft_output.length += *s_out_length;
> +		*s_out_offset += *s_out_length;
> +	}
> +
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->d2mlen = next_triplet - desc->m2dlen;
> +
> +	desc->op_addr = op;
> +
> +	return 0;
> +}
> +
> +static inline int
>   acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
>   		struct acc_dma_req_desc *desc,
>   		struct rte_mbuf **input, struct rte_mbuf *h_output,
> @@ -1100,6 +1405,51 @@
>   	desc->op_addr = op;
>   }
>   
> +/* Enqueue one encode operations for ACC200 device in CB mode */
> +static inline int
> +enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
> +		uint16_t total_enqueued_cbs)
> +{
> +	union acc_dma_desc *desc = NULL;
> +	int ret;
> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> +		seg_total_left;
> +	struct rte_mbuf *input, *output_head, *output;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	acc_fcw_te_fill(op, &desc->req.fcw_te);
> +
> +	input = op->turbo_enc.input.data;
> +	output_head = output = op->turbo_enc.output.data;
> +	in_offset = op->turbo_enc.input.offset;
> +	out_offset = op->turbo_enc.output.offset;
> +	out_length = 0;
> +	mbuf_total_left = op->turbo_enc.input.length;
> +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> +			- in_offset;
> +
> +	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
> +			&in_offset, &out_offset, &out_length, &mbuf_total_left,
> +			&seg_total_left, 0);
> +
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	mbuf_append(output_head, output, out_length);
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> +			sizeof(desc->req.fcw_te) - 8);
> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +	if (check_mbuf_total_left(mbuf_total_left) != 0)
> +		return -EINVAL;
> +#endif
> +	/* One CB (one op) was successfully prepared to enqueue */
> +	return 1;
> +}
> +
>   /* Enqueue one encode operations for ACC200 device in CB mode
>    * multiplexed on the same descriptor
>    */
> @@ -1262,6 +1612,84 @@
>   	return 1;
>   }
>   
> +
> +/* Enqueue one encode operations for ACC200 device in TB mode. */
> +static inline int
> +enqueue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
> +{
> +	union acc_dma_desc *desc = NULL;
> +	int ret;
> +	uint8_t r, c;
> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> +		seg_total_left;
> +	struct rte_mbuf *input, *output_head, *output;
> +	uint16_t current_enqueued_cbs = 0;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
> +	acc_fcw_te_fill(op, &desc->req.fcw_te);
> +
> +	input = op->turbo_enc.input.data;
> +	output_head = output = op->turbo_enc.output.data;
> +	in_offset = op->turbo_enc.input.offset;
> +	out_offset = op->turbo_enc.output.offset;
> +	out_length = 0;
> +	mbuf_total_left = op->turbo_enc.input.length;
> +
> +	c = op->turbo_enc.tb_params.c;
> +	r = op->turbo_enc.tb_params.r;
> +
> +	while (mbuf_total_left > 0 && r < c) {
> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> +		/* Set up DMA descriptor */
> +		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> +				& q->sw_ring_wrap_mask);
> +		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
> +		desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN;
> +
> +		ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
> +				&in_offset, &out_offset, &out_length,
> +				&mbuf_total_left, &seg_total_left, r);
> +		if (unlikely(ret < 0))
> +			return ret;
> +		mbuf_append(output_head, output, out_length);
> +
> +		/* Set total number of CBs in TB */
> +		desc->req.cbs_in_tb = cbs_in_tb;
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> +				sizeof(desc->req.fcw_te) - 8);
> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +#endif
> +
> +		if (seg_total_left == 0) {
> +			/* Go to the next mbuf */
> +			input = input->next;
> +			in_offset = 0;
> +			output = output->next;
> +			out_offset = 0;
> +		}
> +
> +		total_enqueued_cbs++;
> +		current_enqueued_cbs++;
> +		r++;
> +	}
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (check_mbuf_total_left(mbuf_total_left) != 0)
> +		return -EINVAL;
> +#endif
> +
> +	/* Set SDone on last CB descriptor for TB mode. */
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	return current_enqueued_cbs;
> +}
> +
>   /* Enqueue one encode operations for ACC200 device in TB mode.
>    * returns the number of descs used
>    */
> @@ -1328,6 +1756,69 @@
>   
>   /** Enqueue one decode operations for ACC200 device in CB mode */
>   static inline int
> +enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
> +		uint16_t total_enqueued_cbs)
> +{
> +	union acc_dma_desc *desc = NULL;
> +	int ret;
> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> +		h_out_length, mbuf_total_left, seg_total_left;
> +	struct rte_mbuf *input, *h_output_head, *h_output,
> +		*s_output_head, *s_output;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	acc200_fcw_td_fill(op, &desc->req.fcw_td);
> +
> +	input = op->turbo_dec.input.data;
> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> +	in_offset = op->turbo_dec.input.offset;
> +	h_out_offset = op->turbo_dec.hard_output.offset;
> +	s_out_offset = op->turbo_dec.soft_output.offset;
> +	h_out_length = s_out_length = 0;
> +	mbuf_total_left = op->turbo_dec.input.length;
> +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (unlikely(input == NULL)) {
> +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
> +		return -EFAULT;
> +	}
> +#endif

It makes sense to have this check present unconditionnally.
Also, putting it here is useless since rte_pktmbuf_data_len()
dereference it just before. So if NULL, it will crash before being
checked.

> +
> +	/* Set up DMA descriptor */
> +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +
> +	ret = acc200_dma_desc_td_fill(op, &desc->req, &input, h_output,
> +			s_output, &in_offset, &h_out_offset, &s_out_offset,
> +			&h_out_length, &s_out_length, &mbuf_total_left,
> +			&seg_total_left, 0);
> +
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	/* Hard output */
> +	mbuf_append(h_output_head, h_output, h_out_length);
> +
> +	/* Soft output */
> +	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
> +		mbuf_append(s_output_head, s_output, s_out_length);
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> +			sizeof(desc->req.fcw_td));
> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +#endif
> +
> +	/* One CB (one op) was successfully prepared to enqueue */
> +	return 1;
> +}
> +
> +/** Enqueue one decode operations for ACC200 device in CB mode */
> +static inline int
>   enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
>   		uint16_t total_enqueued_cbs, bool same_op)
>   {
> @@ -1525,10 +2016,147 @@
>   	return current_enqueued_cbs;
>   }
>   
> -/** Enqueue encode operations for ACC200 device in CB mode. */
> -static inline uint16_t
> -acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
> -		struct rte_bbdev_enc_op **ops, uint16_t num)
> +/* Enqueue one decode operations for ACC200 device in TB mode */
> +static inline int
> +enqueue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
> +{
> +	union acc_dma_desc *desc = NULL;
> +	int ret;
> +	uint8_t r, c;
> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> +		h_out_length, mbuf_total_left, seg_total_left;
> +	struct rte_mbuf *input, *h_output_head, *h_output,
> +		*s_output_head, *s_output;
> +	uint16_t current_enqueued_cbs = 0;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
> +	acc200_fcw_td_fill(op, &desc->req.fcw_td);
> +
> +	input = op->turbo_dec.input.data;

Here you may want to check if not NULL, at least for consistency with
enqueue_dec_one_op_cb()

> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> +	in_offset = op->turbo_dec.input.offset;
> +	h_out_offset = op->turbo_dec.hard_output.offset;
> +	s_out_offset = op->turbo_dec.soft_output.offset;
> +	h_out_length = s_out_length = 0;
> +	mbuf_total_left = op->turbo_dec.input.length;
> +	c = op->turbo_dec.tb_params.c;
> +	r = op->turbo_dec.tb_params.r;
> +
> +	while (mbuf_total_left > 0 && r < c) {
> +
> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> +
> +		/* Set up DMA descriptor */
> +		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> +				& q->sw_ring_wrap_mask);
> +		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
> +		desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN;
> +		ret = acc200_dma_desc_td_fill(op, &desc->req, &input,
> +				h_output, s_output, &in_offset, &h_out_offset,
> +				&s_out_offset, &h_out_length, &s_out_length,
> +				&mbuf_total_left, &seg_total_left, r);
> +
> +		if (unlikely(ret < 0))
> +			return ret;
> +
> +		/* Hard output */
> +		mbuf_append(h_output_head, h_output, h_out_length);
> +
> +		/* Soft output */
> +		if (check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
> +			mbuf_append(s_output_head, s_output, s_out_length);
> +
> +		/* Set total number of CBs in TB */
> +		desc->req.cbs_in_tb = cbs_in_tb;
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> +				sizeof(desc->req.fcw_td) - 8);
> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +#endif
> +
> +		if (seg_total_left == 0) {
> +			/* Go to the next mbuf */
> +			input = input->next;
> +			in_offset = 0;
> +			h_output = h_output->next;
> +			h_out_offset = 0;
> +
> +			if (check_bit(op->turbo_dec.op_flags,
> +					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> +				s_output = s_output->next;
> +				s_out_offset = 0;
> +			}
> +		}
> +
> +		total_enqueued_cbs++;
> +		current_enqueued_cbs++;
> +		r++;
> +	}
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (check_mbuf_total_left(mbuf_total_left) != 0)
> +		return -EINVAL;
> +#endif

If it can happen, then it should alway be present.

> +	/* Set SDone on last CB descriptor for TB mode */
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	return current_enqueued_cbs;
> +}
> +
> +/* Enqueue encode operations for ACC200 device in CB mode. */
> +static uint16_t
> +acc200_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
> +{
> +	struct acc_queue *q = q_data->queue_private;
> +	int32_t avail = acc_ring_avail_enq(q);
> +	uint16_t i;
> +	union acc_dma_desc *desc;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		/* Check if there are available space for further processing */
> +		if (unlikely(avail - 1 < 0)) {
> +			acc_enqueue_ring_full(q_data);
> +			break;
> +		}
> +		avail -= 1;
> +
> +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
> +		if (ret < 0) {
> +			acc_enqueue_invalid(q_data);
> +			break;
> +		}
> +	}
> +
> +	if (unlikely(i == 0))
> +		return 0; /* Nothing to enqueue */
> +
> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> +			& q->sw_ring_wrap_mask);
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	acc_dma_enqueue(q, i, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +	return i;
> +}
> +
> +/** Enqueue encode operations for ACC200 device in CB mode. */
> +static inline uint16_t
> +acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
>   {
>   	struct acc_queue *q = q_data->queue_private;
>   	int32_t avail = acc_ring_avail_enq(q);
> @@ -1583,6 +2211,45 @@
>   	return i;
>   }
>   
> +/* Enqueue encode operations for ACC200 device in TB mode. */
> +static uint16_t
> +acc200_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
> +{
> +	struct acc_queue *q = q_data->queue_private;
> +	int32_t avail = acc_ring_avail_enq(q);
> +	uint16_t i, enqueued_cbs = 0;
> +	uint8_t cbs_in_tb;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
> +		/* Check if there are available space for further processing */
> +		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> +			acc_enqueue_ring_full(q_data);
> +			break;
> +		}
> +		avail -= cbs_in_tb;
> +
> +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
> +		if (ret <= 0) {
> +			acc_enqueue_invalid(q_data);
> +			break;
> +		}
> +		enqueued_cbs += ret;
> +	}
> +	if (unlikely(enqueued_cbs == 0))
> +		return 0; /* Nothing to enqueue */
> +
> +	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +
> +	return i;
> +}


This function is strictly identical to acc100_enqueue_enc_tb(), it
should be shared.

> +
>   /* Enqueue LDPC encode operations for ACC200 device in TB mode. */
>   static uint16_t
>   acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
> @@ -1623,18 +2290,18 @@
>   	return i;
>   }
>   
> -/* Check room in AQ for the enqueues batches into Qmgr */
> -static int32_t
> -acc200_aq_avail(struct rte_bbdev_queue_data *q_data, uint16_t num_ops)
> +/* Enqueue encode operations for ACC200 device. */
> +static uint16_t
> +acc200_enqueue_enc(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
>   {
> -	struct acc_queue *q = q_data->queue_private;
> -	int32_t aq_avail = q->aq_depth -
> -			((q->aq_enqueued - q->aq_dequeued +
> -			ACC_MAX_QUEUE_DEPTH) % ACC_MAX_QUEUE_DEPTH)
> -			- (num_ops >> 7);
> -	if (aq_avail <= 0)
> -		acc_enqueue_queue_full(q_data);
> -	return aq_avail;
> +	int32_t aq_avail = acc_aq_avail(q_data, num);
> +	if (unlikely((aq_avail <= 0) || (num == 0)))
> +		return 0;
> +	if (ops[0]->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
> +		return acc200_enqueue_enc_tb(q_data, ops, num);
> +	else
> +		return acc200_enqueue_enc_cb(q_data, ops, num);
>   }
>   
>   /* Enqueue encode operations for ACC200 device. */
> @@ -1652,6 +2319,47 @@
>   		return acc200_enqueue_ldpc_enc_cb(q_data, ops, num);
>   }
>   
> +
> +/* Enqueue decode operations for ACC200 device in CB mode */
> +static uint16_t
> +acc200_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{
> +	struct acc_queue *q = q_data->queue_private;
> +	int32_t avail = acc_ring_avail_enq(q);
> +	uint16_t i;
> +	union acc_dma_desc *desc;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		/* Check if there are available space for further processing */
> +		if (unlikely(avail - 1 < 0))
> +			break;
> +		avail -= 1;
> +
> +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
> +		if (ret < 0)
> +			break;
> +	}
> +
> +	if (unlikely(i == 0))
> +		return 0; /* Nothing to enqueue */
> +
> +	/* Set SDone in last CB in enqueued ops for CB mode*/
> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> +			& q->sw_ring_wrap_mask);
> +	desc->req.sdone_enable = 1;
> +	desc->req.irq_enable = q->irq_enable;
> +
> +	acc_dma_enqueue(q, i, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +
> +	return i;
> +}

This function is strictly identical to acc100_enqueue_dec_cb(), it
should be shared.

> +
>   /* Enqueue decode operations for ACC200 device in TB mode */
>   static uint16_t
>   acc200_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
> @@ -1740,12 +2448,64 @@
>   	return i;
>   }
>   
> +
> +/* Enqueue decode operations for ACC200 device in TB mode */
> +static uint16_t
> +acc200_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{
> +	struct acc_queue *q = q_data->queue_private;
> +	int32_t avail = acc_ring_avail_enq(q);
> +	uint16_t i, enqueued_cbs = 0;
> +	uint8_t cbs_in_tb;
> +	int ret;
> +
> +	for (i = 0; i < num; ++i) {
> +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
> +		/* Check if there are available space for further processing */
> +		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> +			acc_enqueue_ring_full(q_data);
> +			break;
> +		}
> +		avail -= cbs_in_tb;
> +
> +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
> +		if (ret <= 0) {
> +			acc_enqueue_invalid(q_data);
> +			break;
> +		}
> +		enqueued_cbs += ret;
> +	}
> +
> +	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> +
> +	/* Update stats */
> +	q_data->queue_stats.enqueued_count += i;
> +	q_data->queue_stats.enqueue_err_count += num - i;
> +
> +	return i;
> +}

This function is strictly identical to acc100_enqueue_dec_tb(), it
should be shared.

> +/* Enqueue decode operations for ACC200 device. */
> +static uint16_t
> +acc200_enqueue_dec(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{
> +	int32_t aq_avail = acc_aq_avail(q_data, num);
> +	if (unlikely((aq_avail <= 0) || (num == 0)))
> +		return 0;
> +	if (ops[0]->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
> +		return acc200_enqueue_dec_tb(q_data, ops, num);
> +	else
> +		return acc200_enqueue_dec_cb(q_data, ops, num);
> +}

Ditto

>   /* Enqueue decode operations for ACC200 device. */
>   static uint16_t
>   acc200_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
>   		struct rte_bbdev_dec_op **ops, uint16_t num)
>   {
> -	int32_t aq_avail = acc200_aq_avail(q_data, num);
> +	int32_t aq_avail = acc_aq_avail(q_data, num);
>   	if (unlikely((aq_avail <= 0) || (num == 0)))
>   		return 0;
>   	if (ops[0]->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
> @@ -2093,6 +2853,51 @@
>   	return cb_idx;
>   }
>   
> +/* Dequeue encode operations from ACC200 device. */
> +static uint16_t
> +acc200_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_enc_op **ops, uint16_t num)
> +{
> +	struct acc_queue *q = q_data->queue_private;
> +	uint32_t avail = acc_ring_avail_deq(q);
> +	uint32_t aq_dequeued = 0;
> +	uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
> +	int ret;
> +	struct rte_bbdev_enc_op *op;
> +	if (avail == 0)
> +		return 0;
> +	op = (q->ring_addr + (q->sw_ring_tail &
> +			q->sw_ring_wrap_mask))->req.op_addr;
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (unlikely(ops == NULL || q == NULL || op == NULL))
> +		return 0;
> +#endif

q is dereferenced before it is checked.

> +	int cbm = op->turbo_enc.code_block_mode;

Dont mix declarations & code.

> +	for (i = 0; i < num; i++) {
> +		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
> +			ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
> +					&dequeued_ops, &aq_dequeued,
> +					&dequeued_descs);
> +		else
> +			ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
> +					&dequeued_ops, &aq_dequeued,
> +					&dequeued_descs);
> +		if (ret < 0)
> +			break;
> +		if (dequeued_ops >= num)
> +			break;
> +	}
> +
> +	q->aq_dequeued += aq_dequeued;
> +	q->sw_ring_tail += dequeued_descs;
> +
> +	/* Update enqueue stats */
> +	q_data->queue_stats.dequeued_count += dequeued_ops;
> +
> +	return dequeued_ops;
> +}
> +
>   /* Dequeue LDPC encode operations from ACC200 device. */
>   static uint16_t
>   acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
> @@ -2140,6 +2945,51 @@
>   
>   /* Dequeue decode operations from ACC200 device. */
>   static uint16_t
> +acc200_dequeue_dec(struct rte_bbdev_queue_data *q_data,
> +		struct rte_bbdev_dec_op **ops, uint16_t num)
> +{
> +	struct acc_queue *q = q_data->queue_private;
> +	uint16_t dequeue_num;
> +	uint32_t avail = acc_ring_avail_deq(q);
> +	uint32_t aq_dequeued = 0;
> +	uint16_t i;
> +	uint16_t dequeued_cbs = 0;
> +	struct rte_bbdev_dec_op *op;
> +	int ret;
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	if (unlikely(ops == 0 && q == NULL))
> +		return 0;
> +#endif
> +
> +	dequeue_num = (avail < num) ? avail : num;
> +
> +	for (i = 0; i < dequeue_num; ++i) {
> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> +			& q->sw_ring_wrap_mask))->req.op_addr;
> +		if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
> +			ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
> +					&aq_dequeued);
> +		else
> +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
> +					dequeued_cbs, &aq_dequeued);
> +
> +		if (ret <= 0)
> +			break;
> +		dequeued_cbs += ret;
> +	}
> +
> +	q->aq_dequeued += aq_dequeued;
> +	q->sw_ring_tail += dequeued_cbs;
> +
> +	/* Update enqueue stats */
> +	q_data->queue_stats.dequeued_count += i;
> +
> +	return i;
> +}
> +
> +/* Dequeue decode operations from ACC200 device. */
> +static uint16_t
>   acc200_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
>   		struct rte_bbdev_dec_op **ops, uint16_t num)
>   {
> @@ -2191,6 +3041,10 @@
>   	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
>   
>   	dev->dev_ops = &acc200_bbdev_ops;
> +	dev->enqueue_enc_ops = acc200_enqueue_enc;
> +	dev->enqueue_dec_ops = acc200_enqueue_dec;
> +	dev->dequeue_enc_ops = acc200_dequeue_enc;
> +	dev->dequeue_dec_ops = acc200_dequeue_dec;
>   	dev->enqueue_ldpc_enc_ops = acc200_enqueue_ldpc_enc;
>   	dev->enqueue_ldpc_dec_ops = acc200_enqueue_ldpc_dec;
>   	dev->dequeue_ldpc_enc_ops = acc200_dequeue_ldpc_enc;

Overall, there are functions that are stricly identical to ACC100, they
should be shared and not duplicated.

If at some point they need to diverge, let's have device specific ones,
but only when it becomes needed.

Maxime
  
Chautru, Nicolas Sept. 23, 2022, 10:21 p.m. UTC | #2
Hi Maxime, 

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Friday, September 23, 2022 2:00 AM
> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org;
> thomas@monjalon.net
> Cc: trix@redhat.com; mdr@ashroe.eu; Richardson, Bruce
> <bruce.richardson@intel.com>; hemant.agrawal@nxp.com;
> david.marchand@redhat.com; stephen@networkplumber.org; Vargas, Hernan
> <hernan.vargas@intel.com>
> Subject: Re: [PATCH v4 09/14] baseband/acc: add LTE processing functions
> 
> 
> 
> On 9/22/22 02:27, Nic Chautru wrote:
> > Add functions and capability for 4G FEC
> >
> > Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
> > ---
> >   drivers/baseband/acc/rte_acc200_pmd.c | 894
> +++++++++++++++++++++++++++++++++-
> >   1 file changed, 874 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/baseband/acc/rte_acc200_pmd.c
> > b/drivers/baseband/acc/rte_acc200_pmd.c
> > index 1c59559..35ea0fe 100644
> > --- a/drivers/baseband/acc/rte_acc200_pmd.c
> > +++ b/drivers/baseband/acc/rte_acc200_pmd.c
> > @@ -641,6 +641,46 @@
> >   	int i;
> >   	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
> >   		{
> > +			.type = RTE_BBDEV_OP_TURBO_DEC,
> > +			.cap.turbo_dec = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> > +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
> > +					RTE_BBDEV_TURBO_EQUALIZER |
> > +
> 	RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
> > +
> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> > +
> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
> > +					RTE_BBDEV_TURBO_SOFT_OUTPUT |
> > +
> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
> > +					RTE_BBDEV_TURBO_MAP_DEC |
> > +
> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> > +
> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> > +				.max_llr_modulus = INT8_MAX,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_hard_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_soft_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> > +			.type = RTE_BBDEV_OP_TURBO_ENC,
> > +			.cap.turbo_enc = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> > +
> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> > +					RTE_BBDEV_TURBO_RATE_MATCH |
> > +
> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_dst =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> >   			.type   = RTE_BBDEV_OP_LDPC_ENC,
> >   			.cap.ldpc_enc = {
> >   				.capability_flags =
> > @@ -691,15 +731,17 @@
> >
> >   	/* Exposed number of queues */
> >   	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
> > -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
> > -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
> > +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d-
> >acc_conf.q_ul_4g.num_aqs_per_groups *
> > +			d->acc_conf.q_ul_4g.num_qgroups;
> > +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d-
> >acc_conf.q_dl_4g.num_aqs_per_groups *
> > +			d->acc_conf.q_dl_4g.num_qgroups;
> >   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d-
> >acc_conf.q_ul_5g.num_aqs_per_groups *
> >   			d->acc_conf.q_ul_5g.num_qgroups;
> >   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d-
> >acc_conf.q_dl_5g.num_aqs_per_groups *
> >   			d->acc_conf.q_dl_5g.num_qgroups;
> >   	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
> > -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
> > -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
> > +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d-
> >acc_conf.q_ul_4g.num_qgroups;
> > +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] =
> > +d->acc_conf.q_dl_4g.num_qgroups;
> >   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d-
> >acc_conf.q_ul_5g.num_qgroups;
> >   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d-
> >acc_conf.q_dl_5g.num_qgroups;
> >   	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0; @@ -744,6
> +786,70
> > @@
> >   	{.device_id = 0},
> >   };
> >
> > +/* Fill in a frame control word for turbo decoding. */ static inline
> > +void acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct
> > +acc_fcw_td *fcw) {
> > +	fcw->fcw_ver = 1;
> > +	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
> > +	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
> > +	if (op->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK) {
> > +		/* FIXME for TB block */
> > +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> > +		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
> > +	} else {
> > +		fcw->k_pos = op->turbo_dec.cb_params.k;
> > +		fcw->k_neg = op->turbo_dec.cb_params.k;
> > +	}
> > +	fcw->c = 1;
> > +	fcw->c_neg = 1;
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> > +		fcw->soft_output_en = 1;
> > +		fcw->sw_soft_out_dis = 0;
> > +		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
> > +
> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
> > +		fcw->sw_soft_out_saturation = check_bit(op-
> >turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_EQUALIZER)) {
> > +			fcw->bypass_teq = 0;
> > +			fcw->ea = op->turbo_dec.cb_params.e;
> > +			fcw->eb = op->turbo_dec.cb_params.e;
> > +			if (op->turbo_dec.rv_index == 0)
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
> > +			else if (op->turbo_dec.rv_index == 1)
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
> > +			else if (op->turbo_dec.rv_index == 2)
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
> > +			else
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
> > +		} else {
> > +			fcw->bypass_teq = 1;
> > +			fcw->eb = 64; /* avoid undefined value */
> > +		}
> > +	} else {
> > +		fcw->soft_output_en = 0;
> > +		fcw->sw_soft_out_dis = 1;
> > +		fcw->bypass_teq = 0;
> > +	}
> > +
> > +	fcw->code_block_mode = 1; /* FIXME */
> > +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> > +
> > +	fcw->ext_td_cold_reg_en = 1;
> > +	fcw->raw_decoder_input_on = 0;
> > +	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
> > +	fcw->min_iter = 2;
> > +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> > +
> > +	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw-
> >soft_output_en;
> > +	fcw->ext_scale = 0xF;
> > +}
> > +
> >   /* Fill in a frame control word for LDPC decoding. */
> >   static inline void
> >   acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld
> > *fcw, @@ -870,6 +976,89 @@
> >   }
> >
> >   static inline int
> > +acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> > +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *output, uint32_t *in_offset,
> > +		uint32_t *out_offset, uint32_t *out_length,
> > +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
> {
> > +	int next_triplet = 1; /* FCW already done */
> > +	uint32_t e, ea, eb, length;
> > +	uint16_t k, k_neg, k_pos;
> > +	uint8_t cab, c_neg;
> > +
> > +	desc->word0 = ACC_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_enc.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK) {
> > +		ea = op->turbo_enc.tb_params.ea;
> > +		eb = op->turbo_enc.tb_params.eb;
> > +		cab = op->turbo_enc.tb_params.cab;
> > +		k_neg = op->turbo_enc.tb_params.k_neg;
> > +		k_pos = op->turbo_enc.tb_params.k_pos;
> > +		c_neg = op->turbo_enc.tb_params.c_neg;
> > +		e = (r < cab) ? ea : eb;
> > +		k = (r < c_neg) ? k_neg : k_pos;
> > +	} else {
> > +		e = op->turbo_enc.cb_params.e;
> > +		k = op->turbo_enc.cb_params.k;
> > +	}
> > +
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> > +		length = (k - 24) >> 3;
> > +	else
> > +		length = k >> 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and included
> CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, length);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
> > +			length, seg_total_left, next_triplet,
> > +			check_bit(op->turbo_enc.op_flags,
> > +			RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and mbuf
> data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= length;
> > +
> > +	/* Set output length */
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_RATE_MATCH))
> > +		/* Integer round up division by 8 */
> > +		*out_length = (e + 7) >> 3;
> > +	else
> > +		*out_length = (k >> 3) * 3 + 2;
> > +
> > +	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
> > +			*out_length, next_triplet,
> ACC_DMA_BLKID_OUT_ENC);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and mbuf
> data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	op->turbo_enc.output.length += *out_length;
> > +	*out_offset += *out_length;
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> >   acc200_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
> >   		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> >   		struct rte_mbuf *output, uint32_t *in_offset, @@ -929,6
> +1118,122
> > @@
> >   }
> >
> >   static inline int
> > +acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> > +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> > +		uint32_t *in_offset, uint32_t *h_out_offset,
> > +		uint32_t *s_out_offset, uint32_t *h_out_length,
> > +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> > +		uint32_t *seg_total_left, uint8_t r) {
> > +	int next_triplet = 1; /* FCW already done */
> > +	uint16_t k;
> > +	uint16_t crc24_overlap = 0;
> > +	uint32_t e, kw;
> > +
> > +	desc->word0 = ACC_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK) {
> > +		k = (r < op->turbo_dec.tb_params.c_neg)
> > +			? op->turbo_dec.tb_params.k_neg
> > +			: op->turbo_dec.tb_params.k_pos;
> > +		e = (r < op->turbo_dec.tb_params.cab)
> > +			? op->turbo_dec.tb_params.ea
> > +			: op->turbo_dec.tb_params.eb;
> > +	} else {
> > +		k = op->turbo_dec.cb_params.k;
> > +		e = op->turbo_dec.cb_params.e;
> > +	}
> > +
> > +	if ((op->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK)
> > +		&& !check_bit(op->turbo_dec.op_flags,
> > +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> > +		crc24_overlap = 24;
> > +
> > +	/* Calculates circular buffer size.
> > +	 * According to 3gpp 36.212 section 5.1.4.2
> > +	 *   Kw = 3 * Kpi,
> > +	 * where:
> > +	 *   Kpi = nCol * nRow
> > +	 * where nCol is 32 and nRow can be calculated from:
> > +	 *   D =< nCol * nRow
> > +	 * where D is the size of each output from turbo encoder block (k + 4).
> > +	 */
> > +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and included
> CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, kw);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
> > +			seg_total_left, next_triplet,
> > +			check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and mbuf
> data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= kw;
> > +	*h_out_length = ((k - crc24_overlap) >> 3);
> > +	next_triplet = acc_dma_fill_blk_type(
> > +			desc, h_output, *h_out_offset,
> > +			*h_out_length, next_triplet,
> ACC_DMA_BLKID_OUT_HARD);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and mbuf
> data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +
> > +	op->turbo_dec.hard_output.length += *h_out_length;
> > +	*h_out_offset += *h_out_length;
> > +
> > +	/* Soft output */
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> > +		if (op->turbo_dec.soft_output.data == 0) {
> > +			rte_bbdev_log(ERR, "Soft output is not defined");
> > +			return -1;
> > +		}
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_EQUALIZER))
> > +			*s_out_length = e;
> > +		else
> > +			*s_out_length = (k * 3) + 12;
> > +
> > +		next_triplet = acc_dma_fill_blk_type(desc, s_output,
> > +				*s_out_offset, *s_out_length, next_triplet,
> > +				ACC_DMA_BLKID_OUT_SOFT);
> > +		if (unlikely(next_triplet < 0)) {
> > +			rte_bbdev_log(ERR,
> > +					"Mismatch between data to process
> and mbuf data length in bbdev_op: %p",
> > +					op);
> > +			return -1;
> > +		}
> > +
> > +		op->turbo_dec.soft_output.length += *s_out_length;
> > +		*s_out_offset += *s_out_length;
> > +	}
> > +
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> >   acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> >   		struct acc_dma_req_desc *desc,
> >   		struct rte_mbuf **input, struct rte_mbuf *h_output, @@ -
> 1100,6
> > +1405,51 @@
> >   	desc->op_addr = op;
> >   }
> >
> > +/* Enqueue one encode operations for ACC200 device in CB mode */
> > +static inline int enqueue_enc_one_op_cb(struct acc_queue *q, struct
> > +rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs)
> > +{
> > +	union acc_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	acc_fcw_te_fill(op, &desc->req.fcw_te);
> > +
> > +	input = op->turbo_enc.input.data;
> > +	output_head = output = op->turbo_enc.output.data;
> > +	in_offset = op->turbo_enc.input.offset;
> > +	out_offset = op->turbo_enc.output.offset;
> > +	out_length = 0;
> > +	mbuf_total_left = op->turbo_enc.input.length;
> > +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> > +			- in_offset;
> > +
> > +	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
> > +			&in_offset, &out_offset, &out_length,
> &mbuf_total_left,
> > +			&seg_total_left, 0);
> > +
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	mbuf_append(output_head, output, out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> > +			sizeof(desc->req.fcw_te) - 8);
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> > +	if (check_mbuf_total_left(mbuf_total_left) != 0)
> > +		return -EINVAL;
> > +#endif
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> > +}
> > +
> >   /* Enqueue one encode operations for ACC200 device in CB mode
> >    * multiplexed on the same descriptor
> >    */
> > @@ -1262,6 +1612,84 @@
> >   	return 1;
> >   }
> >
> > +
> > +/* Enqueue one encode operations for ACC200 device in TB mode. */
> > +static inline int enqueue_enc_one_op_tb(struct acc_queue *q, struct
> > +rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > +	union acc_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint8_t r, c;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +	uint16_t current_enqueued_cbs = 0;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
> > +	acc_fcw_te_fill(op, &desc->req.fcw_te);
> > +
> > +	input = op->turbo_enc.input.data;
> > +	output_head = output = op->turbo_enc.output.data;
> > +	in_offset = op->turbo_enc.input.offset;
> > +	out_offset = op->turbo_enc.output.offset;
> > +	out_length = 0;
> > +	mbuf_total_left = op->turbo_enc.input.length;
> > +
> > +	c = op->turbo_enc.tb_params.c;
> > +	r = op->turbo_enc.tb_params.r;
> > +
> > +	while (mbuf_total_left > 0 && r < c) {
> > +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +		/* Set up DMA descriptor */
> > +		desc = q->ring_addr + ((q->sw_ring_head +
> total_enqueued_cbs)
> > +				& q->sw_ring_wrap_mask);
> > +		desc->req.data_ptrs[0].address = q->ring_addr_iova +
> fcw_offset;
> > +		desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN;
> > +
> > +		ret = acc200_dma_desc_te_fill(op, &desc->req, &input,
> output,
> > +				&in_offset, &out_offset, &out_length,
> > +				&mbuf_total_left, &seg_total_left, r);
> > +		if (unlikely(ret < 0))
> > +			return ret;
> > +		mbuf_append(output_head, output, out_length);
> > +
> > +		/* Set total number of CBs in TB */
> > +		desc->req.cbs_in_tb = cbs_in_tb;
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> > +				sizeof(desc->req.fcw_te) - 8);
> > +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); #endif
> > +
> > +		if (seg_total_left == 0) {
> > +			/* Go to the next mbuf */
> > +			input = input->next;
> > +			in_offset = 0;
> > +			output = output->next;
> > +			out_offset = 0;
> > +		}
> > +
> > +		total_enqueued_cbs++;
> > +		current_enqueued_cbs++;
> > +		r++;
> > +	}
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (check_mbuf_total_left(mbuf_total_left) != 0)
> > +		return -EINVAL;
> > +#endif
> > +
> > +	/* Set SDone on last CB descriptor for TB mode. */
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	return current_enqueued_cbs;
> > +}
> > +
> >   /* Enqueue one encode operations for ACC200 device in TB mode.
> >    * returns the number of descs used
> >    */
> > @@ -1328,6 +1756,69 @@
> >
> >   /** Enqueue one decode operations for ACC200 device in CB mode */
> >   static inline int
> > +enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op
> *op,
> > +		uint16_t total_enqueued_cbs)
> > +{
> > +	union acc_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> > +		h_out_length, mbuf_total_left, seg_total_left;
> > +	struct rte_mbuf *input, *h_output_head, *h_output,
> > +		*s_output_head, *s_output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	acc200_fcw_td_fill(op, &desc->req.fcw_td);
> > +
> > +	input = op->turbo_dec.input.data;
> > +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> > +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> > +	in_offset = op->turbo_dec.input.offset;
> > +	h_out_offset = op->turbo_dec.hard_output.offset;
> > +	s_out_offset = op->turbo_dec.soft_output.offset;
> > +	h_out_length = s_out_length = 0;
> > +	mbuf_total_left = op->turbo_dec.input.length;
> > +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(input == NULL)) {
> > +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
> > +		return -EFAULT;
> > +	}
> > +#endif
> 
> It makes sense to have this check present unconditionnally.
> Also, putting it here is useless since rte_pktmbuf_data_len() dereference it just
> before. So if NULL, it will crash before being checked.

We would not want to do comprehensive check on input parameter in production. 
We had been expecting this kind of check to only happen in debug mode for user to troubleshoot their code more easily. 
I can take out for now but would be good to review further the usecase of debug build.

> 
> > +
> > +	/* Set up DMA descriptor */
> > +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +
> > +	ret = acc200_dma_desc_td_fill(op, &desc->req, &input, h_output,
> > +			s_output, &in_offset, &h_out_offset, &s_out_offset,
> > +			&h_out_length, &s_out_length, &mbuf_total_left,
> > +			&seg_total_left, 0);
> > +
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	/* Hard output */
> > +	mbuf_append(h_output_head, h_output, h_out_length);
> > +
> > +	/* Soft output */
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT))
> > +		mbuf_append(s_output_head, s_output, s_out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> > +			sizeof(desc->req.fcw_td));
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); #endif
> > +
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> > +}
> > +
> > +/** Enqueue one decode operations for ACC200 device in CB mode */
> > +static inline int
> >   enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct
> rte_bbdev_dec_op *op,
> >   		uint16_t total_enqueued_cbs, bool same_op)
> >   {
> > @@ -1525,10 +2016,147 @@
> >   	return current_enqueued_cbs;
> >   }
> >
> > -/** Enqueue encode operations for ACC200 device in CB mode. */
> > -static inline uint16_t -acc200_enqueue_ldpc_enc_cb(struct
> > rte_bbdev_queue_data *q_data,
> > -		struct rte_bbdev_enc_op **ops, uint16_t num)
> > +/* Enqueue one decode operations for ACC200 device in TB mode */
> > +static inline int enqueue_dec_one_op_tb(struct acc_queue *q, struct
> > +rte_bbdev_dec_op *op,
> > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > +	union acc_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint8_t r, c;
> > +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
> > +		h_out_length, mbuf_total_left, seg_total_left;
> > +	struct rte_mbuf *input, *h_output_head, *h_output,
> > +		*s_output_head, *s_output;
> > +	uint16_t current_enqueued_cbs = 0;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
> > +	acc200_fcw_td_fill(op, &desc->req.fcw_td);
> > +
> > +	input = op->turbo_dec.input.data;
> 
> Here you may want to check if not NULL, at least for consistency with
> enqueue_dec_one_op_cb()

Noted above this is taken out.

> 
> > +	h_output_head = h_output = op->turbo_dec.hard_output.data;
> > +	s_output_head = s_output = op->turbo_dec.soft_output.data;
> > +	in_offset = op->turbo_dec.input.offset;
> > +	h_out_offset = op->turbo_dec.hard_output.offset;
> > +	s_out_offset = op->turbo_dec.soft_output.offset;
> > +	h_out_length = s_out_length = 0;
> > +	mbuf_total_left = op->turbo_dec.input.length;
> > +	c = op->turbo_dec.tb_params.c;
> > +	r = op->turbo_dec.tb_params.r;
> > +
> > +	while (mbuf_total_left > 0 && r < c) {
> > +
> > +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
> > +
> > +		/* Set up DMA descriptor */
> > +		desc = q->ring_addr + ((q->sw_ring_head +
> total_enqueued_cbs)
> > +				& q->sw_ring_wrap_mask);
> > +		desc->req.data_ptrs[0].address = q->ring_addr_iova +
> fcw_offset;
> > +		desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN;
> > +		ret = acc200_dma_desc_td_fill(op, &desc->req, &input,
> > +				h_output, s_output, &in_offset,
> &h_out_offset,
> > +				&s_out_offset, &h_out_length,
> &s_out_length,
> > +				&mbuf_total_left, &seg_total_left, r);
> > +
> > +		if (unlikely(ret < 0))
> > +			return ret;
> > +
> > +		/* Hard output */
> > +		mbuf_append(h_output_head, h_output, h_out_length);
> > +
> > +		/* Soft output */
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
> > +			mbuf_append(s_output_head, s_output,
> s_out_length);
> > +
> > +		/* Set total number of CBs in TB */
> > +		desc->req.cbs_in_tb = cbs_in_tb;
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
> > +				sizeof(desc->req.fcw_td) - 8);
> > +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); #endif
> > +
> > +		if (seg_total_left == 0) {
> > +			/* Go to the next mbuf */
> > +			input = input->next;
> > +			in_offset = 0;
> > +			h_output = h_output->next;
> > +			h_out_offset = 0;
> > +
> > +			if (check_bit(op->turbo_dec.op_flags,
> > +					RTE_BBDEV_TURBO_SOFT_OUTPUT))
> {
> > +				s_output = s_output->next;
> > +				s_out_offset = 0;
> > +			}
> > +		}
> > +
> > +		total_enqueued_cbs++;
> > +		current_enqueued_cbs++;
> > +		r++;
> > +	}
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (check_mbuf_total_left(mbuf_total_left) != 0)
> > +		return -EINVAL;
> > +#endif
> 
> If it can happen, then it should alway be present.

Same comment as above, not meant for production code.

> 
> > +	/* Set SDone on last CB descriptor for TB mode */
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	return current_enqueued_cbs;
> > +}
> > +
> > +/* Enqueue encode operations for ACC200 device in CB mode. */ static
> > +uint16_t acc200_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc_queue *q = q_data->queue_private;
> > +	int32_t avail = acc_ring_avail_enq(q);
> > +	uint16_t i;
> > +	union acc_dma_desc *desc;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - 1 < 0)) {
> > +			acc_enqueue_ring_full(q_data);
> > +			break;
> > +		}
> > +		avail -= 1;
> > +
> > +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
> > +		if (ret < 0) {
> > +			acc_enqueue_invalid(q_data);
> > +			break;
> > +		}
> > +	}
> > +
> > +	if (unlikely(i == 0))
> > +		return 0; /* Nothing to enqueue */
> > +
> > +	/* Set SDone in last CB in enqueued ops for CB mode*/
> > +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> > +			& q->sw_ring_wrap_mask);
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	acc_dma_enqueue(q, i, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +	return i;
> > +}
> > +
> > +/** Enqueue encode operations for ACC200 device in CB mode. */ static
> > +inline uint16_t acc200_enqueue_ldpc_enc_cb(struct
> > +rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num)
> >   {
> >   	struct acc_queue *q = q_data->queue_private;
> >   	int32_t avail = acc_ring_avail_enq(q); @@ -1583,6 +2211,45 @@
> >   	return i;
> >   }
> >
> > +/* Enqueue encode operations for ACC200 device in TB mode. */ static
> > +uint16_t acc200_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc_queue *q = q_data->queue_private;
> > +	int32_t avail = acc_ring_avail_enq(q);
> > +	uint16_t i, enqueued_cbs = 0;
> > +	uint8_t cbs_in_tb;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> > +			acc_enqueue_ring_full(q_data);
> > +			break;
> > +		}
> > +		avail -= cbs_in_tb;
> > +
> > +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs,
> cbs_in_tb);
> > +		if (ret <= 0) {
> > +			acc_enqueue_invalid(q_data);
> > +			break;
> > +		}
> > +		enqueued_cbs += ret;
> > +	}
> > +	if (unlikely(enqueued_cbs == 0))
> > +		return 0; /* Nothing to enqueue */
> > +
> > +	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> > +}
> 
> 
> This function is strictly identical to acc100_enqueue_enc_tb(), it should be
> shared.

Actually these and/or the underlying functions have differences. (or will be different once ACC100 PMDs changed are applied).

> 
> > +
> >   /* Enqueue LDPC encode operations for ACC200 device in TB mode. */
> >   static uint16_t
> >   acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data, @@
> > -1623,18 +2290,18 @@
> >   	return i;
> >   }
> >
> > -/* Check room in AQ for the enqueues batches into Qmgr */ -static
> > int32_t -acc200_aq_avail(struct rte_bbdev_queue_data *q_data, uint16_t
> > num_ops)
> > +/* Enqueue encode operations for ACC200 device. */ static uint16_t
> > +acc200_enqueue_enc(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num)
> >   {
> > -	struct acc_queue *q = q_data->queue_private;
> > -	int32_t aq_avail = q->aq_depth -
> > -			((q->aq_enqueued - q->aq_dequeued +
> > -			ACC_MAX_QUEUE_DEPTH) %
> ACC_MAX_QUEUE_DEPTH)
> > -			- (num_ops >> 7);
> > -	if (aq_avail <= 0)
> > -		acc_enqueue_queue_full(q_data);
> > -	return aq_avail;
> > +	int32_t aq_avail = acc_aq_avail(q_data, num);
> > +	if (unlikely((aq_avail <= 0) || (num == 0)))
> > +		return 0;
> > +	if (ops[0]->turbo_enc.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK)
> > +		return acc200_enqueue_enc_tb(q_data, ops, num);
> > +	else
> > +		return acc200_enqueue_enc_cb(q_data, ops, num);
> >   }
> >
> >   /* Enqueue encode operations for ACC200 device. */ @@ -1652,6
> > +2319,47 @@
> >   		return acc200_enqueue_ldpc_enc_cb(q_data, ops, num);
> >   }
> >
> > +
> > +/* Enqueue decode operations for ACC200 device in CB mode */ static
> > +uint16_t acc200_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> > +	struct acc_queue *q = q_data->queue_private;
> > +	int32_t avail = acc_ring_avail_enq(q);
> > +	uint16_t i;
> > +	union acc_dma_desc *desc;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely(avail - 1 < 0))
> > +			break;
> > +		avail -= 1;
> > +
> > +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
> > +		if (ret < 0)
> > +			break;
> > +	}
> > +
> > +	if (unlikely(i == 0))
> > +		return 0; /* Nothing to enqueue */
> > +
> > +	/* Set SDone in last CB in enqueued ops for CB mode*/
> > +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
> > +			& q->sw_ring_wrap_mask);
> > +	desc->req.sdone_enable = 1;
> > +	desc->req.irq_enable = q->irq_enable;
> > +
> > +	acc_dma_enqueue(q, i, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> > +}
> 
> This function is strictly identical to acc100_enqueue_dec_cb(), it should be
> shared.

Same comment as above

> 
> > +
> >   /* Enqueue decode operations for ACC200 device in TB mode */
> >   static uint16_t
> >   acc200_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data, @@
> > -1740,12 +2448,64 @@
> >   	return i;
> >   }
> >
> > +
> > +/* Enqueue decode operations for ACC200 device in TB mode */ static
> > +uint16_t acc200_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> > +	struct acc_queue *q = q_data->queue_private;
> > +	int32_t avail = acc_ring_avail_enq(q);
> > +	uint16_t i, enqueued_cbs = 0;
> > +	uint8_t cbs_in_tb;
> > +	int ret;
> > +
> > +	for (i = 0; i < num; ++i) {
> > +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
> > +		/* Check if there are available space for further processing */
> > +		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
> > +			acc_enqueue_ring_full(q_data);
> > +			break;
> > +		}
> > +		avail -= cbs_in_tb;
> > +
> > +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs,
> cbs_in_tb);
> > +		if (ret <= 0) {
> > +			acc_enqueue_invalid(q_data);
> > +			break;
> > +		}
> > +		enqueued_cbs += ret;
> > +	}
> > +
> > +	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
> > +
> > +	/* Update stats */
> > +	q_data->queue_stats.enqueued_count += i;
> > +	q_data->queue_stats.enqueue_err_count += num - i;
> > +
> > +	return i;
> > +}
> 
> This function is strictly identical to acc100_enqueue_dec_tb(), it should be
> shared.
> 
> > +/* Enqueue decode operations for ACC200 device. */ static uint16_t
> > +acc200_enqueue_dec(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> > +	int32_t aq_avail = acc_aq_avail(q_data, num);
> > +	if (unlikely((aq_avail <= 0) || (num == 0)))
> > +		return 0;
> > +	if (ops[0]->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK)
> > +		return acc200_enqueue_dec_tb(q_data, ops, num);
> > +	else
> > +		return acc200_enqueue_dec_cb(q_data, ops, num); }
> 
> Ditto
> 
> >   /* Enqueue decode operations for ACC200 device. */
> >   static uint16_t
> >   acc200_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
> >   		struct rte_bbdev_dec_op **ops, uint16_t num)
> >   {
> > -	int32_t aq_avail = acc200_aq_avail(q_data, num);
> > +	int32_t aq_avail = acc_aq_avail(q_data, num);
> >   	if (unlikely((aq_avail <= 0) || (num == 0)))
> >   		return 0;
> >   	if (ops[0]->ldpc_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK)
> > @@ -2093,6 +2853,51 @@
> >   	return cb_idx;
> >   }
> >
> > +/* Dequeue encode operations from ACC200 device. */ static uint16_t
> > +acc200_dequeue_enc(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_enc_op **ops, uint16_t num) {
> > +	struct acc_queue *q = q_data->queue_private;
> > +	uint32_t avail = acc_ring_avail_deq(q);
> > +	uint32_t aq_dequeued = 0;
> > +	uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
> > +	int ret;
> > +	struct rte_bbdev_enc_op *op;
> > +	if (avail == 0)
> > +		return 0;
> > +	op = (q->ring_addr + (q->sw_ring_tail &
> > +			q->sw_ring_wrap_mask))->req.op_addr;
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(ops == NULL || q == NULL || op == NULL))
> > +		return 0;
> > +#endif
> 
> q is dereferenced before it is checked.

Thanks. Taking out. 

> 
> > +	int cbm = op->turbo_enc.code_block_mode;
> 
> Dont mix declarations & code.

OK

> 
> > +	for (i = 0; i < num; i++) {
> > +		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
> > +			ret = dequeue_enc_one_op_tb(q,
> &ops[dequeued_ops],
> > +					&dequeued_ops, &aq_dequeued,
> > +					&dequeued_descs);
> > +		else
> > +			ret = dequeue_enc_one_op_cb(q,
> &ops[dequeued_ops],
> > +					&dequeued_ops, &aq_dequeued,
> > +					&dequeued_descs);
> > +		if (ret < 0)
> > +			break;
> > +		if (dequeued_ops >= num)
> > +			break;
> > +	}
> > +
> > +	q->aq_dequeued += aq_dequeued;
> > +	q->sw_ring_tail += dequeued_descs;
> > +
> > +	/* Update enqueue stats */
> > +	q_data->queue_stats.dequeued_count += dequeued_ops;
> > +
> > +	return dequeued_ops;
> > +}
> > +
> >   /* Dequeue LDPC encode operations from ACC200 device. */
> >   static uint16_t
> >   acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data, @@
> > -2140,6 +2945,51 @@
> >
> >   /* Dequeue decode operations from ACC200 device. */
> >   static uint16_t
> > +acc200_dequeue_dec(struct rte_bbdev_queue_data *q_data,
> > +		struct rte_bbdev_dec_op **ops, uint16_t num) {
> > +	struct acc_queue *q = q_data->queue_private;
> > +	uint16_t dequeue_num;
> > +	uint32_t avail = acc_ring_avail_deq(q);
> > +	uint32_t aq_dequeued = 0;
> > +	uint16_t i;
> > +	uint16_t dequeued_cbs = 0;
> > +	struct rte_bbdev_dec_op *op;
> > +	int ret;
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	if (unlikely(ops == 0 && q == NULL))
> > +		return 0;
> > +#endif
> > +
> > +	dequeue_num = (avail < num) ? avail : num;
> > +
> > +	for (i = 0; i < dequeue_num; ++i) {
> > +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
> > +			& q->sw_ring_wrap_mask))->req.op_addr;
> > +		if (op->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK)
> > +			ret = dequeue_dec_one_op_tb(q, &ops[i],
> dequeued_cbs,
> > +					&aq_dequeued);
> > +		else
> > +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
> > +					dequeued_cbs, &aq_dequeued);
> > +
> > +		if (ret <= 0)
> > +			break;
> > +		dequeued_cbs += ret;
> > +	}
> > +
> > +	q->aq_dequeued += aq_dequeued;
> > +	q->sw_ring_tail += dequeued_cbs;
> > +
> > +	/* Update enqueue stats */
> > +	q_data->queue_stats.dequeued_count += i;
> > +
> > +	return i;
> > +}
> > +
> > +/* Dequeue decode operations from ACC200 device. */ static uint16_t
> >   acc200_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
> >   		struct rte_bbdev_dec_op **ops, uint16_t num)
> >   {
> > @@ -2191,6 +3041,10 @@
> >   	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
> >
> >   	dev->dev_ops = &acc200_bbdev_ops;
> > +	dev->enqueue_enc_ops = acc200_enqueue_enc;
> > +	dev->enqueue_dec_ops = acc200_enqueue_dec;
> > +	dev->dequeue_enc_ops = acc200_dequeue_enc;
> > +	dev->dequeue_dec_ops = acc200_dequeue_dec;
> >   	dev->enqueue_ldpc_enc_ops = acc200_enqueue_ldpc_enc;
> >   	dev->enqueue_ldpc_dec_ops = acc200_enqueue_ldpc_dec;
> >   	dev->dequeue_ldpc_enc_ops = acc200_dequeue_ldpc_enc;
> 
> Overall, there are functions that are stricly identical to ACC100, they should be
> shared and not duplicated.
> 
> If at some point they need to diverge, let's have device specific ones, but only
> when it becomes needed.

Actually the ones that are not in acc_common.h end up having differences notably due to SW mitigations/workaround specific to ACC100. 

> 
> Maxime
  
Maxime Coquelin Sept. 27, 2022, 1:33 p.m. UTC | #3
On 9/24/22 00:21, Chautru, Nicolas wrote:
> Hi Maxime,
> 
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Friday, September 23, 2022 2:00 AM
>> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org;
>> thomas@monjalon.net
>> Cc: trix@redhat.com; mdr@ashroe.eu; Richardson, Bruce
>> <bruce.richardson@intel.com>; hemant.agrawal@nxp.com;
>> david.marchand@redhat.com; stephen@networkplumber.org; Vargas, Hernan
>> <hernan.vargas@intel.com>
>> Subject: Re: [PATCH v4 09/14] baseband/acc: add LTE processing functions
>>
>>
>>
>> On 9/22/22 02:27, Nic Chautru wrote:
>>> Add functions and capability for 4G FEC
>>>
>>> Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
>>> ---
>>>    drivers/baseband/acc/rte_acc200_pmd.c | 894
>> +++++++++++++++++++++++++++++++++-
>>>    1 file changed, 874 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/baseband/acc/rte_acc200_pmd.c
>>> b/drivers/baseband/acc/rte_acc200_pmd.c
>>> index 1c59559..35ea0fe 100644
>>> --- a/drivers/baseband/acc/rte_acc200_pmd.c
>>> +++ b/drivers/baseband/acc/rte_acc200_pmd.c
>>> @@ -641,6 +641,46 @@
>>>    	int i;
>>>    	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
>>>    		{
>>> +			.type = RTE_BBDEV_OP_TURBO_DEC,
>>> +			.cap.turbo_dec = {
>>> +				.capability_flags =
>>> +
>> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
>>> +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
>>> +					RTE_BBDEV_TURBO_EQUALIZER |
>>> +
>> 	RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
>>> +
>> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
>>> +
>> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
>>> +					RTE_BBDEV_TURBO_SOFT_OUTPUT |
>>> +
>> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
>>> +
>> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
>>> +
>> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
>>> +					RTE_BBDEV_TURBO_MAP_DEC |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
>>> +
>> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
>>> +				.max_llr_modulus = INT8_MAX,
>>> +				.num_buffers_src =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_hard_out =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_soft_out =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +			}
>>> +		},
>>> +		{
>>> +			.type = RTE_BBDEV_OP_TURBO_ENC,
>>> +			.cap.turbo_enc = {
>>> +				.capability_flags =
>>> +
>> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
>>> +
>> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
>>> +					RTE_BBDEV_TURBO_RATE_MATCH |
>>> +
>> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
>>> +				.num_buffers_src =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +				.num_buffers_dst =
>>> +
>> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
>>> +			}
>>> +		},
>>> +		{
>>>    			.type   = RTE_BBDEV_OP_LDPC_ENC,
>>>    			.cap.ldpc_enc = {
>>>    				.capability_flags =
>>> @@ -691,15 +731,17 @@
>>>
>>>    	/* Exposed number of queues */
>>>    	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
>>> -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
>>> -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
>>> +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d-
>>> acc_conf.q_ul_4g.num_aqs_per_groups *
>>> +			d->acc_conf.q_ul_4g.num_qgroups;
>>> +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d-
>>> acc_conf.q_dl_4g.num_aqs_per_groups *
>>> +			d->acc_conf.q_dl_4g.num_qgroups;
>>>    	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d-
>>> acc_conf.q_ul_5g.num_aqs_per_groups *
>>>    			d->acc_conf.q_ul_5g.num_qgroups;
>>>    	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d-
>>> acc_conf.q_dl_5g.num_aqs_per_groups *
>>>    			d->acc_conf.q_dl_5g.num_qgroups;
>>>    	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
>>> -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
>>> -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
>>> +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d-
>>> acc_conf.q_ul_4g.num_qgroups;
>>> +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] =
>>> +d->acc_conf.q_dl_4g.num_qgroups;
>>>    	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d-
>>> acc_conf.q_ul_5g.num_qgroups;
>>>    	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d-
>>> acc_conf.q_dl_5g.num_qgroups;
>>>    	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0; @@ -744,6
>> +786,70
>>> @@
>>>    	{.device_id = 0},
>>>    };
>>>
>>> +/* Fill in a frame control word for turbo decoding. */ static inline
>>> +void acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct
>>> +acc_fcw_td *fcw) {
>>> +	fcw->fcw_ver = 1;
>>> +	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
>>> +	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
>>> +			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
>>> +	if (op->turbo_dec.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK) {
>>> +		/* FIXME for TB block */
>>> +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
>>> +		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
>>> +	} else {
>>> +		fcw->k_pos = op->turbo_dec.cb_params.k;
>>> +		fcw->k_neg = op->turbo_dec.cb_params.k;
>>> +	}
>>> +	fcw->c = 1;
>>> +	fcw->c_neg = 1;
>>> +	if (check_bit(op->turbo_dec.op_flags,
>> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
>>> +		fcw->soft_output_en = 1;
>>> +		fcw->sw_soft_out_dis = 0;
>>> +		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
>>> +
>> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
>>> +		fcw->sw_soft_out_saturation = check_bit(op-
>>> turbo_dec.op_flags,
>>> +				RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
>>> +		if (check_bit(op->turbo_dec.op_flags,
>>> +				RTE_BBDEV_TURBO_EQUALIZER)) {
>>> +			fcw->bypass_teq = 0;
>>> +			fcw->ea = op->turbo_dec.cb_params.e;
>>> +			fcw->eb = op->turbo_dec.cb_params.e;
>>> +			if (op->turbo_dec.rv_index == 0)
>>> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
>>> +			else if (op->turbo_dec.rv_index == 1)
>>> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
>>> +			else if (op->turbo_dec.rv_index == 2)
>>> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
>>> +			else
>>> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
>>> +		} else {
>>> +			fcw->bypass_teq = 1;
>>> +			fcw->eb = 64; /* avoid undefined value */
>>> +		}
>>> +	} else {
>>> +		fcw->soft_output_en = 0;
>>> +		fcw->sw_soft_out_dis = 1;
>>> +		fcw->bypass_teq = 0;
>>> +	}
>>> +
>>> +	fcw->code_block_mode = 1; /* FIXME */
>>> +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
>>> +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
>>> +
>>> +	fcw->ext_td_cold_reg_en = 1;
>>> +	fcw->raw_decoder_input_on = 0;
>>> +	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
>>> +	fcw->min_iter = 2;
>>> +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
>>> +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
>>> +
>>> +	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
>>> +			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw-
>>> soft_output_en;
>>> +	fcw->ext_scale = 0xF;
>>> +}
>>> +
>>>    /* Fill in a frame control word for LDPC decoding. */
>>>    static inline void
>>>    acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld
>>> *fcw, @@ -870,6 +976,89 @@
>>>    }
>>>
>>>    static inline int
>>> +acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
>>> +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
>>> +		struct rte_mbuf *output, uint32_t *in_offset,
>>> +		uint32_t *out_offset, uint32_t *out_length,
>>> +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
>> {
>>> +	int next_triplet = 1; /* FCW already done */
>>> +	uint32_t e, ea, eb, length;
>>> +	uint16_t k, k_neg, k_pos;
>>> +	uint8_t cab, c_neg;
>>> +
>>> +	desc->word0 = ACC_DMA_DESC_TYPE;
>>> +	desc->word1 = 0; /**< Timestamp could be disabled */
>>> +	desc->word2 = 0;
>>> +	desc->word3 = 0;
>>> +	desc->numCBs = 1;
>>> +
>>> +	if (op->turbo_enc.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK) {
>>> +		ea = op->turbo_enc.tb_params.ea;
>>> +		eb = op->turbo_enc.tb_params.eb;
>>> +		cab = op->turbo_enc.tb_params.cab;
>>> +		k_neg = op->turbo_enc.tb_params.k_neg;
>>> +		k_pos = op->turbo_enc.tb_params.k_pos;
>>> +		c_neg = op->turbo_enc.tb_params.c_neg;
>>> +		e = (r < cab) ? ea : eb;
>>> +		k = (r < c_neg) ? k_neg : k_pos;
>>> +	} else {
>>> +		e = op->turbo_enc.cb_params.e;
>>> +		k = op->turbo_enc.cb_params.k;
>>> +	}
>>> +
>>> +	if (check_bit(op->turbo_enc.op_flags,
>> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
>>> +		length = (k - 24) >> 3;
>>> +	else
>>> +		length = k >> 3;
>>> +
>>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between mbuf length and included
>> CB sizes: mbuf len %u, cb len %u",
>>> +				*mbuf_total_left, length);
>>> +		return -1;
>>> +	}
>>> +
>>> +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
>>> +			length, seg_total_left, next_triplet,
>>> +			check_bit(op->turbo_enc.op_flags,
>>> +			RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and mbuf
>> data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->m2dlen = next_triplet;
>>> +	*mbuf_total_left -= length;
>>> +
>>> +	/* Set output length */
>>> +	if (check_bit(op->turbo_enc.op_flags,
>> RTE_BBDEV_TURBO_RATE_MATCH))
>>> +		/* Integer round up division by 8 */
>>> +		*out_length = (e + 7) >> 3;
>>> +	else
>>> +		*out_length = (k >> 3) * 3 + 2;
>>> +
>>> +	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
>>> +			*out_length, next_triplet,
>> ACC_DMA_BLKID_OUT_ENC);
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and mbuf
>> data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +	op->turbo_enc.output.length += *out_length;
>>> +	*out_offset += *out_length;
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->d2mlen = next_triplet - desc->m2dlen;
>>> +
>>> +	desc->op_addr = op;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static inline int
>>>    acc200_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
>>>    		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
>>>    		struct rte_mbuf *output, uint32_t *in_offset, @@ -929,6
>> +1118,122
>>> @@
>>>    }
>>>
>>>    static inline int
>>> +acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
>>> +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
>>> +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
>>> +		uint32_t *in_offset, uint32_t *h_out_offset,
>>> +		uint32_t *s_out_offset, uint32_t *h_out_length,
>>> +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
>>> +		uint32_t *seg_total_left, uint8_t r) {
>>> +	int next_triplet = 1; /* FCW already done */
>>> +	uint16_t k;
>>> +	uint16_t crc24_overlap = 0;
>>> +	uint32_t e, kw;
>>> +
>>> +	desc->word0 = ACC_DMA_DESC_TYPE;
>>> +	desc->word1 = 0; /**< Timestamp could be disabled */
>>> +	desc->word2 = 0;
>>> +	desc->word3 = 0;
>>> +	desc->numCBs = 1;
>>> +
>>> +	if (op->turbo_dec.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK) {
>>> +		k = (r < op->turbo_dec.tb_params.c_neg)
>>> +			? op->turbo_dec.tb_params.k_neg
>>> +			: op->turbo_dec.tb_params.k_pos;
>>> +		e = (r < op->turbo_dec.tb_params.cab)
>>> +			? op->turbo_dec.tb_params.ea
>>> +			: op->turbo_dec.tb_params.eb;
>>> +	} else {
>>> +		k = op->turbo_dec.cb_params.k;
>>> +		e = op->turbo_dec.cb_params.e;
>>> +	}
>>> +
>>> +	if ((op->turbo_dec.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK)
>>> +		&& !check_bit(op->turbo_dec.op_flags,
>>> +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
>>> +		crc24_overlap = 24;
>>> +
>>> +	/* Calculates circular buffer size.
>>> +	 * According to 3gpp 36.212 section 5.1.4.2
>>> +	 *   Kw = 3 * Kpi,
>>> +	 * where:
>>> +	 *   Kpi = nCol * nRow
>>> +	 * where nCol is 32 and nRow can be calculated from:
>>> +	 *   D =< nCol * nRow
>>> +	 * where D is the size of each output from turbo encoder block (k + 4).
>>> +	 */
>>> +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
>>> +
>>> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between mbuf length and included
>> CB sizes: mbuf len %u, cb len %u",
>>> +				*mbuf_total_left, kw);
>>> +		return -1;
>>> +	}
>>> +
>>> +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
>>> +			seg_total_left, next_triplet,
>>> +			check_bit(op->turbo_dec.op_flags,
>>> +			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and mbuf
>> data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->m2dlen = next_triplet;
>>> +	*mbuf_total_left -= kw;
>>> +	*h_out_length = ((k - crc24_overlap) >> 3);
>>> +	next_triplet = acc_dma_fill_blk_type(
>>> +			desc, h_output, *h_out_offset,
>>> +			*h_out_length, next_triplet,
>> ACC_DMA_BLKID_OUT_HARD);
>>> +	if (unlikely(next_triplet < 0)) {
>>> +		rte_bbdev_log(ERR,
>>> +				"Mismatch between data to process and mbuf
>> data length in bbdev_op: %p",
>>> +				op);
>>> +		return -1;
>>> +	}
>>> +
>>> +	op->turbo_dec.hard_output.length += *h_out_length;
>>> +	*h_out_offset += *h_out_length;
>>> +
>>> +	/* Soft output */
>>> +	if (check_bit(op->turbo_dec.op_flags,
>> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
>>> +		if (op->turbo_dec.soft_output.data == 0) {
>>> +			rte_bbdev_log(ERR, "Soft output is not defined");
>>> +			return -1;
>>> +		}
>>> +		if (check_bit(op->turbo_dec.op_flags,
>>> +				RTE_BBDEV_TURBO_EQUALIZER))
>>> +			*s_out_length = e;
>>> +		else
>>> +			*s_out_length = (k * 3) + 12;
>>> +
>>> +		next_triplet = acc_dma_fill_blk_type(desc, s_output,
>>> +				*s_out_offset, *s_out_length, next_triplet,
>>> +				ACC_DMA_BLKID_OUT_SOFT);
>>> +		if (unlikely(next_triplet < 0)) {
>>> +			rte_bbdev_log(ERR,
>>> +					"Mismatch between data to process
>> and mbuf data length in bbdev_op: %p",
>>> +					op);
>>> +			return -1;
>>> +		}
>>> +
>>> +		op->turbo_dec.soft_output.length += *s_out_length;
>>> +		*s_out_offset += *s_out_length;
>>> +	}
>>> +
>>> +	desc->data_ptrs[next_triplet - 1].last = 1;
>>> +	desc->d2mlen = next_triplet - desc->m2dlen;
>>> +
>>> +	desc->op_addr = op;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static inline int
>>>    acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
>>>    		struct acc_dma_req_desc *desc,
>>>    		struct rte_mbuf **input, struct rte_mbuf *h_output, @@ -
>> 1100,6
>>> +1405,51 @@
>>>    	desc->op_addr = op;
>>>    }
>>>
>>> +/* Enqueue one encode operations for ACC200 device in CB mode */
>>> +static inline int enqueue_enc_one_op_cb(struct acc_queue *q, struct
>>> +rte_bbdev_enc_op *op,
>>> +		uint16_t total_enqueued_cbs)
>>> +{
>>> +	union acc_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
>>> +		seg_total_left;
>>> +	struct rte_mbuf *input, *output_head, *output;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	acc_fcw_te_fill(op, &desc->req.fcw_te);
>>> +
>>> +	input = op->turbo_enc.input.data;
>>> +	output_head = output = op->turbo_enc.output.data;
>>> +	in_offset = op->turbo_enc.input.offset;
>>> +	out_offset = op->turbo_enc.output.offset;
>>> +	out_length = 0;
>>> +	mbuf_total_left = op->turbo_enc.input.length;
>>> +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
>>> +			- in_offset;
>>> +
>>> +	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
>>> +			&in_offset, &out_offset, &out_length,
>> &mbuf_total_left,
>>> +			&seg_total_left, 0);
>>> +
>>> +	if (unlikely(ret < 0))
>>> +		return ret;
>>> +
>>> +	mbuf_append(output_head, output, out_length);
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
>>> +			sizeof(desc->req.fcw_te) - 8);
>>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
>>> +	if (check_mbuf_total_left(mbuf_total_left) != 0)
>>> +		return -EINVAL;
>>> +#endif
>>> +	/* One CB (one op) was successfully prepared to enqueue */
>>> +	return 1;
>>> +}
>>> +
>>>    /* Enqueue one encode operations for ACC200 device in CB mode
>>>     * multiplexed on the same descriptor
>>>     */
>>> @@ -1262,6 +1612,84 @@
>>>    	return 1;
>>>    }
>>>
>>> +
>>> +/* Enqueue one encode operations for ACC200 device in TB mode. */
>>> +static inline int enqueue_enc_one_op_tb(struct acc_queue *q, struct
>>> +rte_bbdev_enc_op *op,
>>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
>>> +	union acc_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint8_t r, c;
>>> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
>>> +		seg_total_left;
>>> +	struct rte_mbuf *input, *output_head, *output;
>>> +	uint16_t current_enqueued_cbs = 0;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
>>> +	acc_fcw_te_fill(op, &desc->req.fcw_te);
>>> +
>>> +	input = op->turbo_enc.input.data;
>>> +	output_head = output = op->turbo_enc.output.data;
>>> +	in_offset = op->turbo_enc.input.offset;
>>> +	out_offset = op->turbo_enc.output.offset;
>>> +	out_length = 0;
>>> +	mbuf_total_left = op->turbo_enc.input.length;
>>> +
>>> +	c = op->turbo_enc.tb_params.c;
>>> +	r = op->turbo_enc.tb_params.r;
>>> +
>>> +	while (mbuf_total_left > 0 && r < c) {
>>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
>>> +		/* Set up DMA descriptor */
>>> +		desc = q->ring_addr + ((q->sw_ring_head +
>> total_enqueued_cbs)
>>> +				& q->sw_ring_wrap_mask);
>>> +		desc->req.data_ptrs[0].address = q->ring_addr_iova +
>> fcw_offset;
>>> +		desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN;
>>> +
>>> +		ret = acc200_dma_desc_te_fill(op, &desc->req, &input,
>> output,
>>> +				&in_offset, &out_offset, &out_length,
>>> +				&mbuf_total_left, &seg_total_left, r);
>>> +		if (unlikely(ret < 0))
>>> +			return ret;
>>> +		mbuf_append(output_head, output, out_length);
>>> +
>>> +		/* Set total number of CBs in TB */
>>> +		desc->req.cbs_in_tb = cbs_in_tb;
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
>>> +				sizeof(desc->req.fcw_te) - 8);
>>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); #endif
>>> +
>>> +		if (seg_total_left == 0) {
>>> +			/* Go to the next mbuf */
>>> +			input = input->next;
>>> +			in_offset = 0;
>>> +			output = output->next;
>>> +			out_offset = 0;
>>> +		}
>>> +
>>> +		total_enqueued_cbs++;
>>> +		current_enqueued_cbs++;
>>> +		r++;
>>> +	}
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (check_mbuf_total_left(mbuf_total_left) != 0)
>>> +		return -EINVAL;
>>> +#endif
>>> +
>>> +	/* Set SDone on last CB descriptor for TB mode. */
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	return current_enqueued_cbs;
>>> +}
>>> +
>>>    /* Enqueue one encode operations for ACC200 device in TB mode.
>>>     * returns the number of descs used
>>>     */
>>> @@ -1328,6 +1756,69 @@
>>>
>>>    /** Enqueue one decode operations for ACC200 device in CB mode */
>>>    static inline int
>>> +enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op
>> *op,
>>> +		uint16_t total_enqueued_cbs)
>>> +{
>>> +	union acc_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
>>> +		h_out_length, mbuf_total_left, seg_total_left;
>>> +	struct rte_mbuf *input, *h_output_head, *h_output,
>>> +		*s_output_head, *s_output;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	acc200_fcw_td_fill(op, &desc->req.fcw_td);
>>> +
>>> +	input = op->turbo_dec.input.data;
>>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
>>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
>>> +	in_offset = op->turbo_dec.input.offset;
>>> +	h_out_offset = op->turbo_dec.hard_output.offset;
>>> +	s_out_offset = op->turbo_dec.soft_output.offset;
>>> +	h_out_length = s_out_length = 0;
>>> +	mbuf_total_left = op->turbo_dec.input.length;
>>> +	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (unlikely(input == NULL)) {
>>> +		rte_bbdev_log(ERR, "Invalid mbuf pointer");
>>> +		return -EFAULT;
>>> +	}
>>> +#endif
>>
>> It makes sense to have this check present unconditionnally.
>> Also, putting it here is useless since rte_pktmbuf_data_len() dereference it just
>> before. So if NULL, it will crash before being checked.
> 
> We would not want to do comprehensive check on input parameter in production.

Why? If for performance reasons, I highly doubt it would be noticeable.

> We had been expecting this kind of check to only happen in debug mode for user to troubleshoot their code more easily.
> I can take out for now but would be good to review further the usecase of debug build.

Well, where it is currently it is just useless since if input == NULL,
it will crash before the check.

But overall yes, we need to review all the code under debug build to see
if it makes sense. On patch 8, for example, the production build will
crash while the debug build will detect the issue but returns silently
(no logs are emitted). So it makes production build more fragile but 
easier to debug in the end (a coredump would be generated).

>>
>>> +
>>> +	/* Set up DMA descriptor */
>>> +	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +
>>> +	ret = acc200_dma_desc_td_fill(op, &desc->req, &input, h_output,
>>> +			s_output, &in_offset, &h_out_offset, &s_out_offset,
>>> +			&h_out_length, &s_out_length, &mbuf_total_left,
>>> +			&seg_total_left, 0);
>>> +
>>> +	if (unlikely(ret < 0))
>>> +		return ret;
>>> +
>>> +	/* Hard output */
>>> +	mbuf_append(h_output_head, h_output, h_out_length);
>>> +
>>> +	/* Soft output */
>>> +	if (check_bit(op->turbo_dec.op_flags,
>> RTE_BBDEV_TURBO_SOFT_OUTPUT))
>>> +		mbuf_append(s_output_head, s_output, s_out_length);
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
>>> +			sizeof(desc->req.fcw_td));
>>> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); #endif
>>> +
>>> +	/* One CB (one op) was successfully prepared to enqueue */
>>> +	return 1;
>>> +}
>>> +
>>> +/** Enqueue one decode operations for ACC200 device in CB mode */
>>> +static inline int
>>>    enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct
>> rte_bbdev_dec_op *op,
>>>    		uint16_t total_enqueued_cbs, bool same_op)
>>>    {
>>> @@ -1525,10 +2016,147 @@
>>>    	return current_enqueued_cbs;
>>>    }
>>>
>>> -/** Enqueue encode operations for ACC200 device in CB mode. */
>>> -static inline uint16_t -acc200_enqueue_ldpc_enc_cb(struct
>>> rte_bbdev_queue_data *q_data,
>>> -		struct rte_bbdev_enc_op **ops, uint16_t num)
>>> +/* Enqueue one decode operations for ACC200 device in TB mode */
>>> +static inline int enqueue_dec_one_op_tb(struct acc_queue *q, struct
>>> +rte_bbdev_dec_op *op,
>>> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
>>> +	union acc_dma_desc *desc = NULL;
>>> +	int ret;
>>> +	uint8_t r, c;
>>> +	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
>>> +		h_out_length, mbuf_total_left, seg_total_left;
>>> +	struct rte_mbuf *input, *h_output_head, *h_output,
>>> +		*s_output_head, *s_output;
>>> +	uint16_t current_enqueued_cbs = 0;
>>> +
>>> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc = q->ring_addr + desc_idx;
>>> +	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
>>> +	acc200_fcw_td_fill(op, &desc->req.fcw_td);
>>> +
>>> +	input = op->turbo_dec.input.data;
>>
>> Here you may want to check if not NULL, at least for consistency with
>> enqueue_dec_one_op_cb()
> 
> Noted above this is taken out.
> 
>>
>>> +	h_output_head = h_output = op->turbo_dec.hard_output.data;
>>> +	s_output_head = s_output = op->turbo_dec.soft_output.data;
>>> +	in_offset = op->turbo_dec.input.offset;
>>> +	h_out_offset = op->turbo_dec.hard_output.offset;
>>> +	s_out_offset = op->turbo_dec.soft_output.offset;
>>> +	h_out_length = s_out_length = 0;
>>> +	mbuf_total_left = op->turbo_dec.input.length;
>>> +	c = op->turbo_dec.tb_params.c;
>>> +	r = op->turbo_dec.tb_params.r;
>>> +
>>> +	while (mbuf_total_left > 0 && r < c) {
>>> +
>>> +		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
>>> +
>>> +		/* Set up DMA descriptor */
>>> +		desc = q->ring_addr + ((q->sw_ring_head +
>> total_enqueued_cbs)
>>> +				& q->sw_ring_wrap_mask);
>>> +		desc->req.data_ptrs[0].address = q->ring_addr_iova +
>> fcw_offset;
>>> +		desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN;
>>> +		ret = acc200_dma_desc_td_fill(op, &desc->req, &input,
>>> +				h_output, s_output, &in_offset,
>> &h_out_offset,
>>> +				&s_out_offset, &h_out_length,
>> &s_out_length,
>>> +				&mbuf_total_left, &seg_total_left, r);
>>> +
>>> +		if (unlikely(ret < 0))
>>> +			return ret;
>>> +
>>> +		/* Hard output */
>>> +		mbuf_append(h_output_head, h_output, h_out_length);
>>> +
>>> +		/* Soft output */
>>> +		if (check_bit(op->turbo_dec.op_flags,
>>> +				RTE_BBDEV_TURBO_SOFT_OUTPUT))
>>> +			mbuf_append(s_output_head, s_output,
>> s_out_length);
>>> +
>>> +		/* Set total number of CBs in TB */
>>> +		desc->req.cbs_in_tb = cbs_in_tb;
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
>>> +				sizeof(desc->req.fcw_td) - 8);
>>> +		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); #endif
>>> +
>>> +		if (seg_total_left == 0) {
>>> +			/* Go to the next mbuf */
>>> +			input = input->next;
>>> +			in_offset = 0;
>>> +			h_output = h_output->next;
>>> +			h_out_offset = 0;
>>> +
>>> +			if (check_bit(op->turbo_dec.op_flags,
>>> +					RTE_BBDEV_TURBO_SOFT_OUTPUT))
>> {
>>> +				s_output = s_output->next;
>>> +				s_out_offset = 0;
>>> +			}
>>> +		}
>>> +
>>> +		total_enqueued_cbs++;
>>> +		current_enqueued_cbs++;
>>> +		r++;
>>> +	}
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (check_mbuf_total_left(mbuf_total_left) != 0)
>>> +		return -EINVAL;
>>> +#endif
>>
>> If it can happen, then it should alway be present.
> 
> Same comment as above, not meant for production code.

Ok, then just remove it?

> 
>>
>>> +	/* Set SDone on last CB descriptor for TB mode */
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	return current_enqueued_cbs;
>>> +}
>>> +
>>> +/* Enqueue encode operations for ACC200 device in CB mode. */ static
>>> +uint16_t acc200_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> +	struct acc_queue *q = q_data->queue_private;
>>> +	int32_t avail = acc_ring_avail_enq(q);
>>> +	uint16_t i;
>>> +	union acc_dma_desc *desc;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely(avail - 1 < 0)) {
>>> +			acc_enqueue_ring_full(q_data);
>>> +			break;
>>> +		}
>>> +		avail -= 1;
>>> +
>>> +		ret = enqueue_enc_one_op_cb(q, ops[i], i);
>>> +		if (ret < 0) {
>>> +			acc_enqueue_invalid(q_data);
>>> +			break;
>>> +		}
>>> +	}
>>> +
>>> +	if (unlikely(i == 0))
>>> +		return 0; /* Nothing to enqueue */
>>> +
>>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
>>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	acc_dma_enqueue(q, i, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +	return i;
>>> +}
>>> +
>>> +/** Enqueue encode operations for ACC200 device in CB mode. */ static
>>> +inline uint16_t acc200_enqueue_ldpc_enc_cb(struct
>>> +rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num)
>>>    {
>>>    	struct acc_queue *q = q_data->queue_private;
>>>    	int32_t avail = acc_ring_avail_enq(q); @@ -1583,6 +2211,45 @@
>>>    	return i;
>>>    }
>>>
>>> +/* Enqueue encode operations for ACC200 device in TB mode. */ static
>>> +uint16_t acc200_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> +	struct acc_queue *q = q_data->queue_private;
>>> +	int32_t avail = acc_ring_avail_enq(q);
>>> +	uint16_t i, enqueued_cbs = 0;
>>> +	uint8_t cbs_in_tb;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
>>> +			acc_enqueue_ring_full(q_data);
>>> +			break;
>>> +		}
>>> +		avail -= cbs_in_tb;
>>> +
>>> +		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs,
>> cbs_in_tb);
>>> +		if (ret <= 0) {
>>> +			acc_enqueue_invalid(q_data);
>>> +			break;
>>> +		}
>>> +		enqueued_cbs += ret;
>>> +	}
>>> +	if (unlikely(enqueued_cbs == 0))
>>> +		return 0; /* Nothing to enqueue */
>>> +
>>> +	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +
>>> +	return i;
>>> +}
>>
>>
>> This function is strictly identical to acc100_enqueue_enc_tb(), it should be
>> shared.
> 
> Actually these and/or the underlying functions have differences. (or will be different once ACC100 PMDs changed are applied).

Ok.
Once ACC100 series is applied, we can revisit the two drivers and see if
we can increase shared code.

>>
>>> +
>>>    /* Enqueue LDPC encode operations for ACC200 device in TB mode. */
>>>    static uint16_t
>>>    acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data, @@
>>> -1623,18 +2290,18 @@
>>>    	return i;
>>>    }
>>>
>>> -/* Check room in AQ for the enqueues batches into Qmgr */ -static
>>> int32_t -acc200_aq_avail(struct rte_bbdev_queue_data *q_data, uint16_t
>>> num_ops)
>>> +/* Enqueue encode operations for ACC200 device. */ static uint16_t
>>> +acc200_enqueue_enc(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num)
>>>    {
>>> -	struct acc_queue *q = q_data->queue_private;
>>> -	int32_t aq_avail = q->aq_depth -
>>> -			((q->aq_enqueued - q->aq_dequeued +
>>> -			ACC_MAX_QUEUE_DEPTH) %
>> ACC_MAX_QUEUE_DEPTH)
>>> -			- (num_ops >> 7);
>>> -	if (aq_avail <= 0)
>>> -		acc_enqueue_queue_full(q_data);
>>> -	return aq_avail;
>>> +	int32_t aq_avail = acc_aq_avail(q_data, num);
>>> +	if (unlikely((aq_avail <= 0) || (num == 0)))
>>> +		return 0;
>>> +	if (ops[0]->turbo_enc.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK)
>>> +		return acc200_enqueue_enc_tb(q_data, ops, num);
>>> +	else
>>> +		return acc200_enqueue_enc_cb(q_data, ops, num);
>>>    }
>>>
>>>    /* Enqueue encode operations for ACC200 device. */ @@ -1652,6
>>> +2319,47 @@
>>>    		return acc200_enqueue_ldpc_enc_cb(q_data, ops, num);
>>>    }
>>>
>>> +
>>> +/* Enqueue decode operations for ACC200 device in CB mode */ static
>>> +uint16_t acc200_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>>> +	struct acc_queue *q = q_data->queue_private;
>>> +	int32_t avail = acc_ring_avail_enq(q);
>>> +	uint16_t i;
>>> +	union acc_dma_desc *desc;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely(avail - 1 < 0))
>>> +			break;
>>> +		avail -= 1;
>>> +
>>> +		ret = enqueue_dec_one_op_cb(q, ops[i], i);
>>> +		if (ret < 0)
>>> +			break;
>>> +	}
>>> +
>>> +	if (unlikely(i == 0))
>>> +		return 0; /* Nothing to enqueue */
>>> +
>>> +	/* Set SDone in last CB in enqueued ops for CB mode*/
>>> +	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
>>> +			& q->sw_ring_wrap_mask);
>>> +	desc->req.sdone_enable = 1;
>>> +	desc->req.irq_enable = q->irq_enable;
>>> +
>>> +	acc_dma_enqueue(q, i, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +
>>> +	return i;
>>> +}
>>
>> This function is strictly identical to acc100_enqueue_dec_cb(), it should be
>> shared.
> 
> Same comment as above
> 
>>
>>> +
>>>    /* Enqueue decode operations for ACC200 device in TB mode */
>>>    static uint16_t
>>>    acc200_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data, @@
>>> -1740,12 +2448,64 @@
>>>    	return i;
>>>    }
>>>
>>> +
>>> +/* Enqueue decode operations for ACC200 device in TB mode */ static
>>> +uint16_t acc200_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>>> +	struct acc_queue *q = q_data->queue_private;
>>> +	int32_t avail = acc_ring_avail_enq(q);
>>> +	uint16_t i, enqueued_cbs = 0;
>>> +	uint8_t cbs_in_tb;
>>> +	int ret;
>>> +
>>> +	for (i = 0; i < num; ++i) {
>>> +		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
>>> +		/* Check if there are available space for further processing */
>>> +		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
>>> +			acc_enqueue_ring_full(q_data);
>>> +			break;
>>> +		}
>>> +		avail -= cbs_in_tb;
>>> +
>>> +		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs,
>> cbs_in_tb);
>>> +		if (ret <= 0) {
>>> +			acc_enqueue_invalid(q_data);
>>> +			break;
>>> +		}
>>> +		enqueued_cbs += ret;
>>> +	}
>>> +
>>> +	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
>>> +
>>> +	/* Update stats */
>>> +	q_data->queue_stats.enqueued_count += i;
>>> +	q_data->queue_stats.enqueue_err_count += num - i;
>>> +
>>> +	return i;
>>> +}
>>
>> This function is strictly identical to acc100_enqueue_dec_tb(), it should be
>> shared.
>>
>>> +/* Enqueue decode operations for ACC200 device. */ static uint16_t
>>> +acc200_enqueue_dec(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>>> +	int32_t aq_avail = acc_aq_avail(q_data, num);
>>> +	if (unlikely((aq_avail <= 0) || (num == 0)))
>>> +		return 0;
>>> +	if (ops[0]->turbo_dec.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK)
>>> +		return acc200_enqueue_dec_tb(q_data, ops, num);
>>> +	else
>>> +		return acc200_enqueue_dec_cb(q_data, ops, num); }
>>
>> Ditto
>>
>>>    /* Enqueue decode operations for ACC200 device. */
>>>    static uint16_t
>>>    acc200_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
>>>    		struct rte_bbdev_dec_op **ops, uint16_t num)
>>>    {
>>> -	int32_t aq_avail = acc200_aq_avail(q_data, num);
>>> +	int32_t aq_avail = acc_aq_avail(q_data, num);
>>>    	if (unlikely((aq_avail <= 0) || (num == 0)))
>>>    		return 0;
>>>    	if (ops[0]->ldpc_dec.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK)
>>> @@ -2093,6 +2853,51 @@
>>>    	return cb_idx;
>>>    }
>>>
>>> +/* Dequeue encode operations from ACC200 device. */ static uint16_t
>>> +acc200_dequeue_enc(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_enc_op **ops, uint16_t num) {
>>> +	struct acc_queue *q = q_data->queue_private;
>>> +	uint32_t avail = acc_ring_avail_deq(q);
>>> +	uint32_t aq_dequeued = 0;
>>> +	uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
>>> +	int ret;
>>> +	struct rte_bbdev_enc_op *op;
>>> +	if (avail == 0)
>>> +		return 0;
>>> +	op = (q->ring_addr + (q->sw_ring_tail &
>>> +			q->sw_ring_wrap_mask))->req.op_addr;
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (unlikely(ops == NULL || q == NULL || op == NULL))
>>> +		return 0;
>>> +#endif
>>
>> q is dereferenced before it is checked.
> 
> Thanks. Taking out.
> 
>>
>>> +	int cbm = op->turbo_enc.code_block_mode;
>>
>> Dont mix declarations & code.
> 
> OK
> 
>>
>>> +	for (i = 0; i < num; i++) {
>>> +		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
>>> +			ret = dequeue_enc_one_op_tb(q,
>> &ops[dequeued_ops],
>>> +					&dequeued_ops, &aq_dequeued,
>>> +					&dequeued_descs);
>>> +		else
>>> +			ret = dequeue_enc_one_op_cb(q,
>> &ops[dequeued_ops],
>>> +					&dequeued_ops, &aq_dequeued,
>>> +					&dequeued_descs);
>>> +		if (ret < 0)
>>> +			break;
>>> +		if (dequeued_ops >= num)
>>> +			break;
>>> +	}
>>> +
>>> +	q->aq_dequeued += aq_dequeued;
>>> +	q->sw_ring_tail += dequeued_descs;
>>> +
>>> +	/* Update enqueue stats */
>>> +	q_data->queue_stats.dequeued_count += dequeued_ops;
>>> +
>>> +	return dequeued_ops;
>>> +}
>>> +
>>>    /* Dequeue LDPC encode operations from ACC200 device. */
>>>    static uint16_t
>>>    acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data, @@
>>> -2140,6 +2945,51 @@
>>>
>>>    /* Dequeue decode operations from ACC200 device. */
>>>    static uint16_t
>>> +acc200_dequeue_dec(struct rte_bbdev_queue_data *q_data,
>>> +		struct rte_bbdev_dec_op **ops, uint16_t num) {
>>> +	struct acc_queue *q = q_data->queue_private;
>>> +	uint16_t dequeue_num;
>>> +	uint32_t avail = acc_ring_avail_deq(q);
>>> +	uint32_t aq_dequeued = 0;
>>> +	uint16_t i;
>>> +	uint16_t dequeued_cbs = 0;
>>> +	struct rte_bbdev_dec_op *op;
>>> +	int ret;
>>> +
>>> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
>>> +	if (unlikely(ops == 0 && q == NULL))
>>> +		return 0;
>>> +#endif
>>> +
>>> +	dequeue_num = (avail < num) ? avail : num;
>>> +
>>> +	for (i = 0; i < dequeue_num; ++i) {
>>> +		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
>>> +			& q->sw_ring_wrap_mask))->req.op_addr;
>>> +		if (op->turbo_dec.code_block_mode ==
>> RTE_BBDEV_TRANSPORT_BLOCK)
>>> +			ret = dequeue_dec_one_op_tb(q, &ops[i],
>> dequeued_cbs,
>>> +					&aq_dequeued);
>>> +		else
>>> +			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
>>> +					dequeued_cbs, &aq_dequeued);
>>> +
>>> +		if (ret <= 0)
>>> +			break;
>>> +		dequeued_cbs += ret;
>>> +	}
>>> +
>>> +	q->aq_dequeued += aq_dequeued;
>>> +	q->sw_ring_tail += dequeued_cbs;
>>> +
>>> +	/* Update enqueue stats */
>>> +	q_data->queue_stats.dequeued_count += i;
>>> +
>>> +	return i;
>>> +}
>>> +
>>> +/* Dequeue decode operations from ACC200 device. */ static uint16_t
>>>    acc200_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
>>>    		struct rte_bbdev_dec_op **ops, uint16_t num)
>>>    {
>>> @@ -2191,6 +3041,10 @@
>>>    	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
>>>
>>>    	dev->dev_ops = &acc200_bbdev_ops;
>>> +	dev->enqueue_enc_ops = acc200_enqueue_enc;
>>> +	dev->enqueue_dec_ops = acc200_enqueue_dec;
>>> +	dev->dequeue_enc_ops = acc200_dequeue_enc;
>>> +	dev->dequeue_dec_ops = acc200_dequeue_dec;
>>>    	dev->enqueue_ldpc_enc_ops = acc200_enqueue_ldpc_enc;
>>>    	dev->enqueue_ldpc_dec_ops = acc200_enqueue_ldpc_dec;
>>>    	dev->dequeue_ldpc_enc_ops = acc200_dequeue_ldpc_enc;
>>
>> Overall, there are functions that are stricly identical to ACC100, they should be
>> shared and not duplicated.
>>
>> If at some point they need to diverge, let's have device specific ones, but only
>> when it becomes needed.
> 
> Actually the ones that are not in acc_common.h end up having differences notably due to SW mitigations/workaround specific to ACC100.
> 
>>
>> Maxime
>
  

Patch

diff --git a/drivers/baseband/acc/rte_acc200_pmd.c b/drivers/baseband/acc/rte_acc200_pmd.c
index 1c59559..35ea0fe 100644
--- a/drivers/baseband/acc/rte_acc200_pmd.c
+++ b/drivers/baseband/acc/rte_acc200_pmd.c
@@ -641,6 +641,46 @@ 
 	int i;
 	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
 		{
+			.type = RTE_BBDEV_OP_TURBO_DEC,
+			.cap.turbo_dec = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
+					RTE_BBDEV_TURBO_CRC_TYPE_24B |
+					RTE_BBDEV_TURBO_EQUALIZER |
+					RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
+					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
+					RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
+					RTE_BBDEV_TURBO_SOFT_OUTPUT |
+					RTE_BBDEV_TURBO_EARLY_TERMINATION |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
+					RTE_BBDEV_TURBO_MAP_DEC |
+					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
+					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
+				.max_llr_modulus = INT8_MAX,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_hard_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_soft_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
+			.type = RTE_BBDEV_OP_TURBO_ENC,
+			.cap.turbo_enc = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
+					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
+					RTE_BBDEV_TURBO_RATE_MATCH |
+					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_dst =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
 			.type   = RTE_BBDEV_OP_LDPC_ENC,
 			.cap.ldpc_enc = {
 				.capability_flags =
@@ -691,15 +731,17 @@ 
 
 	/* Exposed number of queues */
 	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
-	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
-	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
+	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_aqs_per_groups *
+			d->acc_conf.q_ul_4g.num_qgroups;
+	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_aqs_per_groups *
+			d->acc_conf.q_dl_4g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_aqs_per_groups *
 			d->acc_conf.q_ul_5g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_aqs_per_groups *
 			d->acc_conf.q_dl_5g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
-	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
-	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
+	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_qgroups;
+	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0;
@@ -744,6 +786,70 @@ 
 	{.device_id = 0},
 };
 
+/* Fill in a frame control word for turbo decoding. */
+static inline void
+acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc_fcw_td *fcw)
+{
+	fcw->fcw_ver = 1;
+	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
+	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
+	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		/* FIXME for TB block */
+		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
+		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
+	} else {
+		fcw->k_pos = op->turbo_dec.cb_params.k;
+		fcw->k_neg = op->turbo_dec.cb_params.k;
+	}
+	fcw->c = 1;
+	fcw->c_neg = 1;
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+		fcw->soft_output_en = 1;
+		fcw->sw_soft_out_dis = 0;
+		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
+		fcw->sw_soft_out_saturation = check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_EQUALIZER)) {
+			fcw->bypass_teq = 0;
+			fcw->ea = op->turbo_dec.cb_params.e;
+			fcw->eb = op->turbo_dec.cb_params.e;
+			if (op->turbo_dec.rv_index == 0)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
+			else if (op->turbo_dec.rv_index == 1)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
+			else if (op->turbo_dec.rv_index == 2)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
+			else
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
+		} else {
+			fcw->bypass_teq = 1;
+			fcw->eb = 64; /* avoid undefined value */
+		}
+	} else {
+		fcw->soft_output_en = 0;
+		fcw->sw_soft_out_dis = 1;
+		fcw->bypass_teq = 0;
+	}
+
+	fcw->code_block_mode = 1; /* FIXME */
+	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_CRC_TYPE_24B);
+
+	fcw->ext_td_cold_reg_en = 1;
+	fcw->raw_decoder_input_on = 0;
+	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
+	fcw->min_iter = 2;
+	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
+
+	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw->soft_output_en;
+	fcw->ext_scale = 0xF;
+}
+
 /* Fill in a frame control word for LDPC decoding. */
 static inline void
 acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
@@ -870,6 +976,89 @@ 
 }
 
 static inline int
+acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
+		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *output, uint32_t *in_offset,
+		uint32_t *out_offset, uint32_t *out_length,
+		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done */
+	uint32_t e, ea, eb, length;
+	uint16_t k, k_neg, k_pos;
+	uint8_t cab, c_neg;
+
+	desc->word0 = ACC_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		ea = op->turbo_enc.tb_params.ea;
+		eb = op->turbo_enc.tb_params.eb;
+		cab = op->turbo_enc.tb_params.cab;
+		k_neg = op->turbo_enc.tb_params.k_neg;
+		k_pos = op->turbo_enc.tb_params.k_pos;
+		c_neg = op->turbo_enc.tb_params.c_neg;
+		e = (r < cab) ? ea : eb;
+		k = (r < c_neg) ? k_neg : k_pos;
+	} else {
+		e = op->turbo_enc.cb_params.e;
+		k = op->turbo_enc.cb_params.k;
+	}
+
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
+		length = (k - 24) >> 3;
+	else
+		length = k >> 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, length);
+		return -1;
+	}
+
+	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
+			length, seg_total_left, next_triplet,
+			check_bit(op->turbo_enc.op_flags,
+			RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= length;
+
+	/* Set output length */
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
+		/* Integer round up division by 8 */
+		*out_length = (e + 7) >> 3;
+	else
+		*out_length = (k >> 3) * 3 + 2;
+
+	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
+			*out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	op->turbo_enc.output.length += *out_length;
+	*out_offset += *out_length;
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
+static inline int
 acc200_dma_desc_le_fill(struct rte_bbdev_enc_op *op,
 		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
 		struct rte_mbuf *output, uint32_t *in_offset,
@@ -929,6 +1118,122 @@ 
 }
 
 static inline int
+acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
+		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
+		uint32_t *in_offset, uint32_t *h_out_offset,
+		uint32_t *s_out_offset, uint32_t *h_out_length,
+		uint32_t *s_out_length, uint32_t *mbuf_total_left,
+		uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done */
+	uint16_t k;
+	uint16_t crc24_overlap = 0;
+	uint32_t e, kw;
+
+	desc->word0 = ACC_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		k = (r < op->turbo_dec.tb_params.c_neg)
+			? op->turbo_dec.tb_params.k_neg
+			: op->turbo_dec.tb_params.k_pos;
+		e = (r < op->turbo_dec.tb_params.cab)
+			? op->turbo_dec.tb_params.ea
+			: op->turbo_dec.tb_params.eb;
+	} else {
+		k = op->turbo_dec.cb_params.k;
+		e = op->turbo_dec.cb_params.e;
+	}
+
+	if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		&& !check_bit(op->turbo_dec.op_flags,
+		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
+		crc24_overlap = 24;
+
+	/* Calculates circular buffer size.
+	 * According to 3gpp 36.212 section 5.1.4.2
+	 *   Kw = 3 * Kpi,
+	 * where:
+	 *   Kpi = nCol * nRow
+	 * where nCol is 32 and nRow can be calculated from:
+	 *   D =< nCol * nRow
+	 * where D is the size of each output from turbo encoder block (k + 4).
+	 */
+	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, kw);
+		return -1;
+	}
+
+	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
+			seg_total_left, next_triplet,
+			check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= kw;
+	*h_out_length = ((k - crc24_overlap) >> 3);
+	next_triplet = acc_dma_fill_blk_type(
+			desc, h_output, *h_out_offset,
+			*h_out_length, next_triplet, ACC_DMA_BLKID_OUT_HARD);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+
+	op->turbo_dec.hard_output.length += *h_out_length;
+	*h_out_offset += *h_out_length;
+
+	/* Soft output */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+		if (op->turbo_dec.soft_output.data == 0) {
+			rte_bbdev_log(ERR, "Soft output is not defined");
+			return -1;
+		}
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_EQUALIZER))
+			*s_out_length = e;
+		else
+			*s_out_length = (k * 3) + 12;
+
+		next_triplet = acc_dma_fill_blk_type(desc, s_output,
+				*s_out_offset, *s_out_length, next_triplet,
+				ACC_DMA_BLKID_OUT_SOFT);
+		if (unlikely(next_triplet < 0)) {
+			rte_bbdev_log(ERR,
+					"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+					op);
+			return -1;
+		}
+
+		op->turbo_dec.soft_output.length += *s_out_length;
+		*s_out_offset += *s_out_length;
+	}
+
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
+static inline int
 acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
 		struct acc_dma_req_desc *desc,
 		struct rte_mbuf **input, struct rte_mbuf *h_output,
@@ -1100,6 +1405,51 @@ 
 	desc->op_addr = op;
 }
 
+/* Enqueue one encode operations for ACC200 device in CB mode */
+static inline int
+enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc_fcw_te_fill(op, &desc->req.fcw_te);
+
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
+	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
+			- in_offset;
+
+	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
+			&in_offset, &out_offset, &out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	mbuf_append(output_head, output, out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+			sizeof(desc->req.fcw_te) - 8);
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+	if (check_mbuf_total_left(mbuf_total_left) != 0)
+		return -EINVAL;
+#endif
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
 /* Enqueue one encode operations for ACC200 device in CB mode
  * multiplexed on the same descriptor
  */
@@ -1262,6 +1612,84 @@ 
 	return 1;
 }
 
+
+/* Enqueue one encode operations for ACC200 device in TB mode. */
+static inline int
+enqueue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+	uint16_t current_enqueued_cbs = 0;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
+	acc_fcw_te_fill(op, &desc->req.fcw_te);
+
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
+
+	c = op->turbo_enc.tb_params.c;
+	r = op->turbo_enc.tb_params.r;
+
+	while (mbuf_total_left > 0 && r < c) {
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN;
+
+		ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
+				&in_offset, &out_offset, &out_length,
+				&mbuf_total_left, &seg_total_left, r);
+		if (unlikely(ret < 0))
+			return ret;
+		mbuf_append(output_head, output, out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+				sizeof(desc->req.fcw_te) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			output = output->next;
+			out_offset = 0;
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (check_mbuf_total_left(mbuf_total_left) != 0)
+		return -EINVAL;
+#endif
+
+	/* Set SDone on last CB descriptor for TB mode. */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
+
 /* Enqueue one encode operations for ACC200 device in TB mode.
  * returns the number of descs used
  */
@@ -1328,6 +1756,69 @@ 
 
 /** Enqueue one decode operations for ACC200 device in CB mode */
 static inline int
+enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc200_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (unlikely(input == NULL)) {
+		rte_bbdev_log(ERR, "Invalid mbuf pointer");
+		return -EFAULT;
+	}
+#endif
+
+	/* Set up DMA descriptor */
+	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+
+	ret = acc200_dma_desc_td_fill(op, &desc->req, &input, h_output,
+			s_output, &in_offset, &h_out_offset, &s_out_offset,
+			&h_out_length, &s_out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	/* Hard output */
+	mbuf_append(h_output_head, h_output, h_out_length);
+
+	/* Soft output */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
+		mbuf_append(s_output_head, s_output, s_out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+			sizeof(desc->req.fcw_td));
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
+/** Enqueue one decode operations for ACC200 device in CB mode */
+static inline int
 enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 		uint16_t total_enqueued_cbs, bool same_op)
 {
@@ -1525,10 +2016,147 @@ 
 	return current_enqueued_cbs;
 }
 
-/** Enqueue encode operations for ACC200 device in CB mode. */
-static inline uint16_t
-acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
-		struct rte_bbdev_enc_op **ops, uint16_t num)
+/* Enqueue one decode operations for ACC200 device in TB mode */
+static inline int
+enqueue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+	uint16_t current_enqueued_cbs = 0;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
+	acc200_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	c = op->turbo_dec.tb_params.c;
+	r = op->turbo_dec.tb_params.r;
+
+	while (mbuf_total_left > 0 && r < c) {
+
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN;
+		ret = acc200_dma_desc_td_fill(op, &desc->req, &input,
+				h_output, s_output, &in_offset, &h_out_offset,
+				&s_out_offset, &h_out_length, &s_out_length,
+				&mbuf_total_left, &seg_total_left, r);
+
+		if (unlikely(ret < 0))
+			return ret;
+
+		/* Hard output */
+		mbuf_append(h_output_head, h_output, h_out_length);
+
+		/* Soft output */
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_SOFT_OUTPUT))
+			mbuf_append(s_output_head, s_output, s_out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+				sizeof(desc->req.fcw_td) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			h_output = h_output->next;
+			h_out_offset = 0;
+
+			if (check_bit(op->turbo_dec.op_flags,
+					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+				s_output = s_output->next;
+				s_out_offset = 0;
+			}
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (check_mbuf_total_left(mbuf_total_left) != 0)
+		return -EINVAL;
+#endif
+	/* Set SDone on last CB descriptor for TB mode */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
+
+/* Enqueue encode operations for ACC200 device in CB mode. */
+static uint16_t
+acc200_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i;
+	union acc_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - 1 < 0)) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= 1;
+
+		ret = enqueue_enc_one_op_cb(q, ops[i], i);
+		if (ret < 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue */
+
+	/* Set SDone in last CB in enqueued ops for CB mode*/
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+	return i;
+}
+
+/** Enqueue encode operations for ACC200 device in CB mode. */
+static inline uint16_t
+acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
 {
 	struct acc_queue *q = q_data->queue_private;
 	int32_t avail = acc_ring_avail_enq(q);
@@ -1583,6 +2211,45 @@ 
 	return i;
 }
 
+/* Enqueue encode operations for ACC200 device in TB mode. */
+static uint16_t
+acc200_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
+		/* Check if there are available space for further processing */
+		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= cbs_in_tb;
+
+		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret <= 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+		enqueued_cbs += ret;
+	}
+	if (unlikely(enqueued_cbs == 0))
+		return 0; /* Nothing to enqueue */
+
+	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
 /* Enqueue LDPC encode operations for ACC200 device in TB mode. */
 static uint16_t
 acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
@@ -1623,18 +2290,18 @@ 
 	return i;
 }
 
-/* Check room in AQ for the enqueues batches into Qmgr */
-static int32_t
-acc200_aq_avail(struct rte_bbdev_queue_data *q_data, uint16_t num_ops)
+/* Enqueue encode operations for ACC200 device. */
+static uint16_t
+acc200_enqueue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
 {
-	struct acc_queue *q = q_data->queue_private;
-	int32_t aq_avail = q->aq_depth -
-			((q->aq_enqueued - q->aq_dequeued +
-			ACC_MAX_QUEUE_DEPTH) % ACC_MAX_QUEUE_DEPTH)
-			- (num_ops >> 7);
-	if (aq_avail <= 0)
-		acc_enqueue_queue_full(q_data);
-	return aq_avail;
+	int32_t aq_avail = acc_aq_avail(q_data, num);
+	if (unlikely((aq_avail <= 0) || (num == 0)))
+		return 0;
+	if (ops[0]->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		return acc200_enqueue_enc_tb(q_data, ops, num);
+	else
+		return acc200_enqueue_enc_cb(q_data, ops, num);
 }
 
 /* Enqueue encode operations for ACC200 device. */
@@ -1652,6 +2319,47 @@ 
 		return acc200_enqueue_ldpc_enc_cb(q_data, ops, num);
 }
 
+
+/* Enqueue decode operations for ACC200 device in CB mode */
+static uint16_t
+acc200_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i;
+	union acc_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - 1 < 0))
+			break;
+		avail -= 1;
+
+		ret = enqueue_dec_one_op_cb(q, ops[i], i);
+		if (ret < 0)
+			break;
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue */
+
+	/* Set SDone in last CB in enqueued ops for CB mode*/
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
 /* Enqueue decode operations for ACC200 device in TB mode */
 static uint16_t
 acc200_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
@@ -1740,12 +2448,64 @@ 
 	return i;
 }
 
+
+/* Enqueue decode operations for ACC200 device in TB mode */
+static uint16_t
+acc200_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
+		/* Check if there are available space for further processing */
+		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= cbs_in_tb;
+
+		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret <= 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+		enqueued_cbs += ret;
+	}
+
+	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
+/* Enqueue decode operations for ACC200 device. */
+static uint16_t
+acc200_enqueue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	int32_t aq_avail = acc_aq_avail(q_data, num);
+	if (unlikely((aq_avail <= 0) || (num == 0)))
+		return 0;
+	if (ops[0]->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		return acc200_enqueue_dec_tb(q_data, ops, num);
+	else
+		return acc200_enqueue_dec_cb(q_data, ops, num);
+}
+
 /* Enqueue decode operations for ACC200 device. */
 static uint16_t
 acc200_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
 		struct rte_bbdev_dec_op **ops, uint16_t num)
 {
-	int32_t aq_avail = acc200_aq_avail(q_data, num);
+	int32_t aq_avail = acc_aq_avail(q_data, num);
 	if (unlikely((aq_avail <= 0) || (num == 0)))
 		return 0;
 	if (ops[0]->ldpc_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
@@ -2093,6 +2853,51 @@ 
 	return cb_idx;
 }
 
+/* Dequeue encode operations from ACC200 device. */
+static uint16_t
+acc200_dequeue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	uint32_t avail = acc_ring_avail_deq(q);
+	uint32_t aq_dequeued = 0;
+	uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
+	int ret;
+	struct rte_bbdev_enc_op *op;
+	if (avail == 0)
+		return 0;
+	op = (q->ring_addr + (q->sw_ring_tail &
+			q->sw_ring_wrap_mask))->req.op_addr;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (unlikely(ops == NULL || q == NULL || op == NULL))
+		return 0;
+#endif
+	int cbm = op->turbo_enc.code_block_mode;
+
+	for (i = 0; i < num; i++) {
+		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
+			ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
+					&dequeued_ops, &aq_dequeued,
+					&dequeued_descs);
+		else
+			ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
+					&dequeued_ops, &aq_dequeued,
+					&dequeued_descs);
+		if (ret < 0)
+			break;
+		if (dequeued_ops >= num)
+			break;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_descs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += dequeued_ops;
+
+	return dequeued_ops;
+}
+
 /* Dequeue LDPC encode operations from ACC200 device. */
 static uint16_t
 acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
@@ -2140,6 +2945,51 @@ 
 
 /* Dequeue decode operations from ACC200 device. */
 static uint16_t
+acc200_dequeue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	uint16_t dequeue_num;
+	uint32_t avail = acc_ring_avail_deq(q);
+	uint32_t aq_dequeued = 0;
+	uint16_t i;
+	uint16_t dequeued_cbs = 0;
+	struct rte_bbdev_dec_op *op;
+	int ret;
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	if (unlikely(ops == 0 && q == NULL))
+		return 0;
+#endif
+
+	dequeue_num = (avail < num) ? avail : num;
+
+	for (i = 0; i < dequeue_num; ++i) {
+		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
+			& q->sw_ring_wrap_mask))->req.op_addr;
+		if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+			ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
+					&aq_dequeued);
+		else
+			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
+					dequeued_cbs, &aq_dequeued);
+
+		if (ret <= 0)
+			break;
+		dequeued_cbs += ret;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_cbs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += i;
+
+	return i;
+}
+
+/* Dequeue decode operations from ACC200 device. */
+static uint16_t
 acc200_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
 		struct rte_bbdev_dec_op **ops, uint16_t num)
 {
@@ -2191,6 +3041,10 @@ 
 	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
 
 	dev->dev_ops = &acc200_bbdev_ops;
+	dev->enqueue_enc_ops = acc200_enqueue_enc;
+	dev->enqueue_dec_ops = acc200_enqueue_dec;
+	dev->dequeue_enc_ops = acc200_dequeue_enc;
+	dev->dequeue_dec_ops = acc200_dequeue_dec;
 	dev->enqueue_ldpc_enc_ops = acc200_enqueue_ldpc_enc;
 	dev->enqueue_ldpc_dec_ops = acc200_enqueue_ldpc_dec;
 	dev->dequeue_ldpc_enc_ops = acc200_dequeue_ldpc_enc;