[v9,09/14] baseband/acc: add LTE processing functions

Message ID 20221007213851.31524-10-nicolas.chautru@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series bbdev ACC200 PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Chautru, Nicolas Oct. 7, 2022, 9:38 p.m. UTC
  From: Nic Chautru <nicolas.chautru@intel.com>

Added functions and capability for 4G FEC

Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
---
 drivers/baseband/acc/rte_acc200_pmd.c | 851 +++++++++++++++++++++++++-
 1 file changed, 846 insertions(+), 5 deletions(-)
  

Comments

Maxime Coquelin Oct. 10, 2022, 9:47 a.m. UTC | #1
On 10/7/22 23:38, Nicolas Chautru wrote:
> From: Nic Chautru <nicolas.chautru@intel.com>
> 
> Added functions and capability for 4G FEC
> 
> Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
> ---
>   drivers/baseband/acc/rte_acc200_pmd.c | 851 +++++++++++++++++++++++++-
>   1 file changed, 846 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/baseband/acc/rte_acc200_pmd.c b/drivers/baseband/acc/rte_acc200_pmd.c
> index 0f018b19ac..058e38c3ec 100644
> --- a/drivers/baseband/acc/rte_acc200_pmd.c
> +++ b/drivers/baseband/acc/rte_acc200_pmd.c
> @@ -649,6 +649,46 @@ acc200_dev_info_get(struct rte_bbdev *dev,
>   	struct acc_device *d = dev->data->dev_private;
>   	int i;
>   	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
> +		{
> +			.type = RTE_BBDEV_OP_TURBO_DEC,
> +			.cap.turbo_dec = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> +					RTE_BBDEV_TURBO_CRC_TYPE_24B |
> +					RTE_BBDEV_TURBO_EQUALIZER |
> +					RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
> +					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> +					RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
> +					RTE_BBDEV_TURBO_SOFT_OUTPUT |
> +					RTE_BBDEV_TURBO_EARLY_TERMINATION |
> +					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> +					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
> +					RTE_BBDEV_TURBO_MAP_DEC |
> +					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> +					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> +				.max_llr_modulus = INT8_MAX,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_hard_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_soft_out =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
> +		{
> +			.type = RTE_BBDEV_OP_TURBO_ENC,
> +			.cap.turbo_enc = {
> +				.capability_flags =
> +					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> +					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> +					RTE_BBDEV_TURBO_RATE_MATCH |
> +					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> +				.num_buffers_src =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +				.num_buffers_dst =
> +						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> +			}
> +		},
>   		{
>   			.type   = RTE_BBDEV_OP_LDPC_ENC,
>   			.cap.ldpc_enc = {
> @@ -700,15 +740,17 @@ acc200_dev_info_get(struct rte_bbdev *dev,
>   
>   	/* Exposed number of queues. */
>   	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
> -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
> -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
> +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_aqs_per_groups *
> +			d->acc_conf.q_ul_4g.num_qgroups;
> +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_aqs_per_groups *
> +			d->acc_conf.q_dl_4g.num_qgroups;
>   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_aqs_per_groups *
>   			d->acc_conf.q_ul_5g.num_qgroups;
>   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_aqs_per_groups *
>   			d->acc_conf.q_dl_5g.num_qgroups;
>   	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
> -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
> -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
> +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_qgroups;
> +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_qgroups;
>   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_qgroups;
>   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_qgroups;
>   	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0;
> @@ -753,6 +795,70 @@ static struct rte_pci_id pci_id_acc200_vf_map[] = {
>   	{.device_id = 0},
>   };
>   
> +/* Fill in a frame control word for turbo decoding. */
> +static inline void
> +acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc_fcw_td *fcw)
> +{
> +	fcw->fcw_ver = 1;
> +	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
> +	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
> +	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
> +		/* FIXME for TB block */
> +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> +		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
> +	} else {
> +		fcw->k_pos = op->turbo_dec.cb_params.k;
> +		fcw->k_neg = op->turbo_dec.cb_params.k;
> +	}
> +	fcw->c = 1;
> +	fcw->c_neg = 1;
> +	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> +		fcw->soft_output_en = 1;
> +		fcw->sw_soft_out_dis = 0;
> +		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
> +		fcw->sw_soft_out_saturation = check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
> +		if (check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_EQUALIZER)) {
> +			fcw->bypass_teq = 0;
> +			fcw->ea = op->turbo_dec.cb_params.e;
> +			fcw->eb = op->turbo_dec.cb_params.e;
> +			if (op->turbo_dec.rv_index == 0)
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
> +			else if (op->turbo_dec.rv_index == 1)
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
> +			else if (op->turbo_dec.rv_index == 2)
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
> +			else
> +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
> +		} else {
> +			fcw->bypass_teq = 1;
> +			fcw->eb = 64; /* avoid undefined value */
> +		}
> +	} else {
> +		fcw->soft_output_en = 0;
> +		fcw->sw_soft_out_dis = 1;
> +		fcw->bypass_teq = 0;
> +	}
> +
> +	fcw->code_block_mode = 1; /* FIXME */
> +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> +
> +	fcw->ext_td_cold_reg_en = 1;
> +	fcw->raw_decoder_input_on = 0;
> +	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
> +	fcw->min_iter = 2;
> +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> +
> +	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw->soft_output_en;
> +	fcw->ext_scale = 0xF;
> +}
> +
>   /* Fill in a frame control word for LDPC decoding. */
>   static inline void
>   acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
> @@ -876,7 +982,206 @@ acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
>   }
>   
>   static inline int
> -acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op, struct acc_dma_req_desc *desc,
> +acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> +		struct rte_mbuf *output, uint32_t *in_offset,
> +		uint32_t *out_offset, uint32_t *out_length,
> +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
> +{
> +	int next_triplet = 1; /* FCW already done. */
> +	uint32_t e, ea, eb, length;
> +	uint16_t k, k_neg, k_pos;
> +	uint8_t cab, c_neg;
> +
> +	desc->word0 = ACC_DMA_DESC_TYPE;
> +	desc->word1 = 0; /**< Timestamp could be disabled. */
> +	desc->word2 = 0;
> +	desc->word3 = 0;
> +	desc->numCBs = 1;
> +
> +	if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
> +		ea = op->turbo_enc.tb_params.ea;
> +		eb = op->turbo_enc.tb_params.eb;
> +		cab = op->turbo_enc.tb_params.cab;
> +		k_neg = op->turbo_enc.tb_params.k_neg;
> +		k_pos = op->turbo_enc.tb_params.k_pos;
> +		c_neg = op->turbo_enc.tb_params.c_neg;
> +		e = (r < cab) ? ea : eb;
> +		k = (r < c_neg) ? k_neg : k_pos;
> +	} else {
> +		e = op->turbo_enc.cb_params.e;
> +		k = op->turbo_enc.cb_params.k;
> +	}
> +
> +	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> +		length = (k - 24) >> 3;
> +	else
> +		length = k >> 3;
> +
> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
> +				*mbuf_total_left, length);
> +		return -1;
> +	}
> +
> +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
> +			length, seg_total_left, next_triplet,
> +			check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->m2dlen = next_triplet;
> +	*mbuf_total_left -= length;
> +
> +	/* Set output length. */
> +	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
> +		/* Integer round up division by 8. */
> +		*out_length = (e + 7) >> 3;
> +	else
> +		*out_length = (k >> 3) * 3 + 2;
> +
> +	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
> +			*out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	op->turbo_enc.output.length += *out_length;
> +	*out_offset += *out_length;
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->d2mlen = next_triplet - desc->m2dlen;
> +
> +	desc->op_addr = op;
> +
> +	return 0;
> +}
> +
> +static inline int
> +acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> +		uint32_t *in_offset, uint32_t *h_out_offset,
> +		uint32_t *s_out_offset, uint32_t *h_out_length,
> +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> +		uint32_t *seg_total_left, uint8_t r)
> +{
> +	int next_triplet = 1; /* FCW already done. */
> +	uint16_t k;
> +	uint16_t crc24_overlap = 0;
> +	uint32_t e, kw;
> +
> +	desc->word0 = ACC_DMA_DESC_TYPE;
> +	desc->word1 = 0; /**< Timestamp could be disabled. */
> +	desc->word2 = 0;
> +	desc->word3 = 0;
> +	desc->numCBs = 1;
> +
> +	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
> +		k = (r < op->turbo_dec.tb_params.c_neg)
> +			? op->turbo_dec.tb_params.k_neg
> +			: op->turbo_dec.tb_params.k_pos;
> +		e = (r < op->turbo_dec.tb_params.cab)
> +			? op->turbo_dec.tb_params.ea
> +			: op->turbo_dec.tb_params.eb;
> +	} else {
> +		k = op->turbo_dec.cb_params.k;
> +		e = op->turbo_dec.cb_params.e;
> +	}
> +
> +	if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
> +		&& !check_bit(op->turbo_dec.op_flags,
> +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> +		crc24_overlap = 24;
> +
> +	/* Calculates circular buffer size.
> +	 * According to 3gpp 36.212 section 5.1.4.2
> +	 *   Kw = 3 * Kpi,
> +	 * where:
> +	 *   Kpi = nCol * nRow
> +	 * where nCol is 32 and nRow can be calculated from:
> +	 *   D =< nCol * nRow
> +	 * where D is the size of each output from turbo encoder block (k + 4).
> +	 */
> +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> +
> +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
> +				*mbuf_total_left, kw);
> +		return -1;
> +	}
> +
> +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
> +			seg_total_left, next_triplet,
> +			check_bit(op->turbo_dec.op_flags,
> +			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->m2dlen = next_triplet;
> +	*mbuf_total_left -= kw;
> +	*h_out_length = ((k - crc24_overlap) >> 3);
> +	next_triplet = acc_dma_fill_blk_type(
> +			desc, h_output, *h_out_offset,
> +			*h_out_length, next_triplet, ACC_DMA_BLKID_OUT_HARD);
> +	if (unlikely(next_triplet < 0)) {
> +		rte_bbdev_log(ERR,
> +				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +				op);
> +		return -1;
> +	}
> +
> +	op->turbo_dec.hard_output.length += *h_out_length;
> +	*h_out_offset += *h_out_length;
> +
> +	/* Soft output. */
> +	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> +		if (op->turbo_dec.soft_output.data == 0) {
> +			rte_bbdev_log(ERR, "Soft output is not defined");
> +			return -1;
> +		}
> +		if (check_bit(op->turbo_dec.op_flags,
> +				RTE_BBDEV_TURBO_EQUALIZER))
> +			*s_out_length = e;
> +		else
> +			*s_out_length = (k * 3) + 12;
> +
> +		next_triplet = acc_dma_fill_blk_type(desc, s_output,
> +				*s_out_offset, *s_out_length, next_triplet,
> +				ACC_DMA_BLKID_OUT_SOFT);
> +		if (unlikely(next_triplet < 0)) {
> +			rte_bbdev_log(ERR,
> +					"Mismatch between data to process and mbuf data length in bbdev_op: %p",
> +					op);
> +			return -1;
> +		}
> +
> +		op->turbo_dec.soft_output.length += *s_out_length;
> +		*s_out_offset += *s_out_length;
> +	}
> +
> +	desc->data_ptrs[next_triplet - 1].last = 1;
> +	desc->d2mlen = next_triplet - desc->m2dlen;
> +
> +	desc->op_addr = op;
> +
> +	return 0;
> +}
> +
> +static inline int
> +acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> +		struct acc_dma_req_desc *desc,
>   		struct rte_mbuf **input, struct rte_mbuf *h_output,
>   		uint32_t *in_offset, uint32_t *h_out_offset,
>   		uint32_t *h_out_length, uint32_t *mbuf_total_left,
> @@ -1034,6 +1339,49 @@ acc200_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
>   	desc->op_addr = op;
>   }
>   
> +/* Enqueue one encode operations for ACC200 device in CB mode */
> +static inline int
> +enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
> +		uint16_t total_enqueued_cbs)
> +{
> +	union acc_dma_desc *desc = NULL;
> +	int ret;
> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> +		seg_total_left;
> +	struct rte_mbuf *input, *output_head, *output;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);
> +	desc = q->ring_addr + desc_idx;
> +	acc_fcw_te_fill(op, &desc->req.fcw_te);
> +
> +	input = op->turbo_enc.input.data;
> +	output_head = output = op->turbo_enc.output.data;
> +	in_offset = op->turbo_enc.input.offset;
> +	out_offset = op->turbo_enc.output.offset;
> +	out_length = 0;
> +	mbuf_total_left = op->turbo_enc.input.length;
> +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> +			- in_offset;
> +
> +	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
> +			&in_offset, &out_offset, &out_length, &mbuf_total_left,
> +			&seg_total_left, 0);
> +
> +	if (unlikely(ret < 0))
> +		return ret;
> +
> +	mbuf_append(output_head, output, out_length);
> +
> +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> +			sizeof(desc->req.fcw_te) - 8);
> +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
> +#endif
> +	/* One CB (one op) was successfully prepared to enqueue */
> +	return 1;
> +}
> +
>   /* Enqueue one encode operations for ACC200 device in CB mode
>    * multiplexed on the same descriptor.
>    */
> @@ -1146,6 +1494,78 @@ enqueue_ldpc_enc_part_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
>   
>   }
>   
> +/* Enqueue one encode operations for ACC200 device in TB mode. */
> +static inline int
> +enqueue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
> +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
> +{
> +	union acc_dma_desc *desc = NULL;
> +	int ret;
> +	uint8_t r, c;
> +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> +		seg_total_left;
> +	struct rte_mbuf *input, *output_head, *output;
> +	uint16_t current_enqueued_cbs = 0;
> +
> +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> +			& q->sw_ring_wrap_mask);

Maybe I did not make the comment on this patch specifically, but having
a helper to get the descriptor index would make sense givent it is
duplicated at several places.


With this fixed, you can add:

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
  
Chautru, Nicolas Oct. 10, 2022, 5:33 p.m. UTC | #2
Hi Maxime, 

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Monday, October 10, 2022 2:48 AM
> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org;
> gakhil@marvell.com
> Cc: trix@redhat.com; mdr@ashroe.eu; Richardson, Bruce
> <bruce.richardson@intel.com>; hemant.agrawal@nxp.com;
> david.marchand@redhat.com; stephen@networkplumber.org; Vargas,
> Hernan <hernan.vargas@intel.com>
> Subject: Re: [PATCH v9 09/14] baseband/acc: add LTE processing functions
> 
> 
> 
> On 10/7/22 23:38, Nicolas Chautru wrote:
> > From: Nic Chautru <nicolas.chautru@intel.com>
> >
> > Added functions and capability for 4G FEC
> >
> > Signed-off-by: Nic Chautru <nicolas.chautru@intel.com>
> > ---
> >   drivers/baseband/acc/rte_acc200_pmd.c | 851
> +++++++++++++++++++++++++-
> >   1 file changed, 846 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/baseband/acc/rte_acc200_pmd.c
> > b/drivers/baseband/acc/rte_acc200_pmd.c
> > index 0f018b19ac..058e38c3ec 100644
> > --- a/drivers/baseband/acc/rte_acc200_pmd.c
> > +++ b/drivers/baseband/acc/rte_acc200_pmd.c
> > @@ -649,6 +649,46 @@ acc200_dev_info_get(struct rte_bbdev *dev,
> >   	struct acc_device *d = dev->data->dev_private;
> >   	int i;
> >   	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
> > +		{
> > +			.type = RTE_BBDEV_OP_TURBO_DEC,
> > +			.cap.turbo_dec = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
> > +					RTE_BBDEV_TURBO_CRC_TYPE_24B
> |
> > +					RTE_BBDEV_TURBO_EQUALIZER |
> > +
> 	RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
> > +
> 	RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
> > +
> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
> > +					RTE_BBDEV_TURBO_SOFT_OUTPUT
> |
> > +
> 	RTE_BBDEV_TURBO_EARLY_TERMINATION |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
> > +
> 	RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
> > +					RTE_BBDEV_TURBO_MAP_DEC |
> > +
> 	RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
> > +
> 	RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
> > +				.max_llr_modulus = INT8_MAX,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_hard_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_soft_out =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> > +		{
> > +			.type = RTE_BBDEV_OP_TURBO_ENC,
> > +			.cap.turbo_enc = {
> > +				.capability_flags =
> > +
> 	RTE_BBDEV_TURBO_CRC_24B_ATTACH |
> > +
> 	RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
> > +					RTE_BBDEV_TURBO_RATE_MATCH |
> > +
> 	RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
> > +				.num_buffers_src =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +				.num_buffers_dst =
> > +
> 	RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
> > +			}
> > +		},
> >   		{
> >   			.type   = RTE_BBDEV_OP_LDPC_ENC,
> >   			.cap.ldpc_enc = {
> > @@ -700,15 +740,17 @@ acc200_dev_info_get(struct rte_bbdev *dev,
> >
> >   	/* Exposed number of queues. */
> >   	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
> > -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
> > -	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
> > +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d-
> >acc_conf.q_ul_4g.num_aqs_per_groups *
> > +			d->acc_conf.q_ul_4g.num_qgroups;
> > +	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d-
> >acc_conf.q_dl_4g.num_aqs_per_groups *
> > +			d->acc_conf.q_dl_4g.num_qgroups;
> >   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d-
> >acc_conf.q_ul_5g.num_aqs_per_groups *
> >   			d->acc_conf.q_ul_5g.num_qgroups;
> >   	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d-
> >acc_conf.q_dl_5g.num_aqs_per_groups *
> >   			d->acc_conf.q_dl_5g.num_qgroups;
> >   	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
> > -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
> > -	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
> > +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d-
> >acc_conf.q_ul_4g.num_qgroups;
> > +	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] =
> > +d->acc_conf.q_dl_4g.num_qgroups;
> >   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d-
> >acc_conf.q_ul_5g.num_qgroups;
> >   	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d-
> >acc_conf.q_dl_5g.num_qgroups;
> >   	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0; @@ -753,6
> +795,70
> > @@ static struct rte_pci_id pci_id_acc200_vf_map[] = {
> >   	{.device_id = 0},
> >   };
> >
> > +/* Fill in a frame control word for turbo decoding. */ static inline
> > +void acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct
> > +acc_fcw_td *fcw) {
> > +	fcw->fcw_ver = 1;
> > +	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
> > +	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
> > +	if (op->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK) {
> > +		/* FIXME for TB block */
> > +		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
> > +		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
> > +	} else {
> > +		fcw->k_pos = op->turbo_dec.cb_params.k;
> > +		fcw->k_neg = op->turbo_dec.cb_params.k;
> > +	}
> > +	fcw->c = 1;
> > +	fcw->c_neg = 1;
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> > +		fcw->soft_output_en = 1;
> > +		fcw->sw_soft_out_dis = 0;
> > +		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
> > +
> 	RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
> > +		fcw->sw_soft_out_saturation = check_bit(op-
> >turbo_dec.op_flags,
> > +
> 	RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_EQUALIZER)) {
> > +			fcw->bypass_teq = 0;
> > +			fcw->ea = op->turbo_dec.cb_params.e;
> > +			fcw->eb = op->turbo_dec.cb_params.e;
> > +			if (op->turbo_dec.rv_index == 0)
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
> > +			else if (op->turbo_dec.rv_index == 1)
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
> > +			else if (op->turbo_dec.rv_index == 2)
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
> > +			else
> > +				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
> > +		} else {
> > +			fcw->bypass_teq = 1;
> > +			fcw->eb = 64; /* avoid undefined value */
> > +		}
> > +	} else {
> > +		fcw->soft_output_en = 0;
> > +		fcw->sw_soft_out_dis = 1;
> > +		fcw->bypass_teq = 0;
> > +	}
> > +
> > +	fcw->code_block_mode = 1; /* FIXME */
> > +	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_CRC_TYPE_24B);
> > +
> > +	fcw->ext_td_cold_reg_en = 1;
> > +	fcw->raw_decoder_input_on = 0;
> > +	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
> > +	fcw->min_iter = 2;
> > +	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
> > +
> > +	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw-
> >soft_output_en;
> > +	fcw->ext_scale = 0xF;
> > +}
> > +
> >   /* Fill in a frame control word for LDPC decoding. */
> >   static inline void
> >   acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld
> > *fcw, @@ -876,7 +982,206 @@ acc200_fcw_ld_fill(struct
> rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
> >   }
> >
> >   static inline int
> > -acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op, struct
> > acc_dma_req_desc *desc,
> > +acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
> > +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *output, uint32_t *in_offset,
> > +		uint32_t *out_offset, uint32_t *out_length,
> > +		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t
> r) {
> > +	int next_triplet = 1; /* FCW already done. */
> > +	uint32_t e, ea, eb, length;
> > +	uint16_t k, k_neg, k_pos;
> > +	uint8_t cab, c_neg;
> > +
> > +	desc->word0 = ACC_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled. */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_enc.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK) {
> > +		ea = op->turbo_enc.tb_params.ea;
> > +		eb = op->turbo_enc.tb_params.eb;
> > +		cab = op->turbo_enc.tb_params.cab;
> > +		k_neg = op->turbo_enc.tb_params.k_neg;
> > +		k_pos = op->turbo_enc.tb_params.k_pos;
> > +		c_neg = op->turbo_enc.tb_params.c_neg;
> > +		e = (r < cab) ? ea : eb;
> > +		k = (r < c_neg) ? k_neg : k_pos;
> > +	} else {
> > +		e = op->turbo_enc.cb_params.e;
> > +		k = op->turbo_enc.cb_params.k;
> > +	}
> > +
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_CRC_24B_ATTACH))
> > +		length = (k - 24) >> 3;
> > +	else
> > +		length = k >> 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and
> included CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, length);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
> > +			length, seg_total_left, next_triplet,
> > +			check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= length;
> > +
> > +	/* Set output length. */
> > +	if (check_bit(op->turbo_enc.op_flags,
> RTE_BBDEV_TURBO_RATE_MATCH))
> > +		/* Integer round up division by 8. */
> > +		*out_length = (e + 7) >> 3;
> > +	else
> > +		*out_length = (k >> 3) * 3 + 2;
> > +
> > +	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
> > +			*out_length, next_triplet,
> ACC_DMA_BLKID_OUT_ENC);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	op->turbo_enc.output.length += *out_length;
> > +	*out_offset += *out_length;
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> > +acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
> > +		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
> > +		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
> > +		uint32_t *in_offset, uint32_t *h_out_offset,
> > +		uint32_t *s_out_offset, uint32_t *h_out_length,
> > +		uint32_t *s_out_length, uint32_t *mbuf_total_left,
> > +		uint32_t *seg_total_left, uint8_t r) {
> > +	int next_triplet = 1; /* FCW already done. */
> > +	uint16_t k;
> > +	uint16_t crc24_overlap = 0;
> > +	uint32_t e, kw;
> > +
> > +	desc->word0 = ACC_DMA_DESC_TYPE;
> > +	desc->word1 = 0; /**< Timestamp could be disabled. */
> > +	desc->word2 = 0;
> > +	desc->word3 = 0;
> > +	desc->numCBs = 1;
> > +
> > +	if (op->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK) {
> > +		k = (r < op->turbo_dec.tb_params.c_neg)
> > +			? op->turbo_dec.tb_params.k_neg
> > +			: op->turbo_dec.tb_params.k_pos;
> > +		e = (r < op->turbo_dec.tb_params.cab)
> > +			? op->turbo_dec.tb_params.ea
> > +			: op->turbo_dec.tb_params.eb;
> > +	} else {
> > +		k = op->turbo_dec.cb_params.k;
> > +		e = op->turbo_dec.cb_params.e;
> > +	}
> > +
> > +	if ((op->turbo_dec.code_block_mode ==
> RTE_BBDEV_TRANSPORT_BLOCK)
> > +		&& !check_bit(op->turbo_dec.op_flags,
> > +		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
> > +		crc24_overlap = 24;
> > +
> > +	/* Calculates circular buffer size.
> > +	 * According to 3gpp 36.212 section 5.1.4.2
> > +	 *   Kw = 3 * Kpi,
> > +	 * where:
> > +	 *   Kpi = nCol * nRow
> > +	 * where nCol is 32 and nRow can be calculated from:
> > +	 *   D =< nCol * nRow
> > +	 * where D is the size of each output from turbo encoder block (k +
> 4).
> > +	 */
> > +	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
> > +
> > +	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between mbuf length and
> included CB sizes: mbuf len %u, cb len %u",
> > +				*mbuf_total_left, kw);
> > +		return -1;
> > +	}
> > +
> > +	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
> > +			seg_total_left, next_triplet,
> > +			check_bit(op->turbo_dec.op_flags,
> > +			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->m2dlen = next_triplet;
> > +	*mbuf_total_left -= kw;
> > +	*h_out_length = ((k - crc24_overlap) >> 3);
> > +	next_triplet = acc_dma_fill_blk_type(
> > +			desc, h_output, *h_out_offset,
> > +			*h_out_length, next_triplet,
> ACC_DMA_BLKID_OUT_HARD);
> > +	if (unlikely(next_triplet < 0)) {
> > +		rte_bbdev_log(ERR,
> > +				"Mismatch between data to process and
> mbuf data length in bbdev_op: %p",
> > +				op);
> > +		return -1;
> > +	}
> > +
> > +	op->turbo_dec.hard_output.length += *h_out_length;
> > +	*h_out_offset += *h_out_length;
> > +
> > +	/* Soft output. */
> > +	if (check_bit(op->turbo_dec.op_flags,
> RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
> > +		if (op->turbo_dec.soft_output.data == 0) {
> > +			rte_bbdev_log(ERR, "Soft output is not defined");
> > +			return -1;
> > +		}
> > +		if (check_bit(op->turbo_dec.op_flags,
> > +				RTE_BBDEV_TURBO_EQUALIZER))
> > +			*s_out_length = e;
> > +		else
> > +			*s_out_length = (k * 3) + 12;
> > +
> > +		next_triplet = acc_dma_fill_blk_type(desc, s_output,
> > +				*s_out_offset, *s_out_length, next_triplet,
> > +				ACC_DMA_BLKID_OUT_SOFT);
> > +		if (unlikely(next_triplet < 0)) {
> > +			rte_bbdev_log(ERR,
> > +					"Mismatch between data to process
> and mbuf data length in bbdev_op: %p",
> > +					op);
> > +			return -1;
> > +		}
> > +
> > +		op->turbo_dec.soft_output.length += *s_out_length;
> > +		*s_out_offset += *s_out_length;
> > +	}
> > +
> > +	desc->data_ptrs[next_triplet - 1].last = 1;
> > +	desc->d2mlen = next_triplet - desc->m2dlen;
> > +
> > +	desc->op_addr = op;
> > +
> > +	return 0;
> > +}
> > +
> > +static inline int
> > +acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
> > +		struct acc_dma_req_desc *desc,
> >   		struct rte_mbuf **input, struct rte_mbuf *h_output,
> >   		uint32_t *in_offset, uint32_t *h_out_offset,
> >   		uint32_t *h_out_length, uint32_t *mbuf_total_left, @@ -
> 1034,6
> > +1339,49 @@ acc200_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
> >   	desc->op_addr = op;
> >   }
> >
> > +/* Enqueue one encode operations for ACC200 device in CB mode */
> > +static inline int enqueue_enc_one_op_cb(struct acc_queue *q, struct
> > +rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs)
> > +{
> > +	union acc_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> > +	desc = q->ring_addr + desc_idx;
> > +	acc_fcw_te_fill(op, &desc->req.fcw_te);
> > +
> > +	input = op->turbo_enc.input.data;
> > +	output_head = output = op->turbo_enc.output.data;
> > +	in_offset = op->turbo_enc.input.offset;
> > +	out_offset = op->turbo_enc.output.offset;
> > +	out_length = 0;
> > +	mbuf_total_left = op->turbo_enc.input.length;
> > +	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
> > +			- in_offset;
> > +
> > +	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
> > +			&in_offset, &out_offset, &out_length,
> &mbuf_total_left,
> > +			&seg_total_left, 0);
> > +
> > +	if (unlikely(ret < 0))
> > +		return ret;
> > +
> > +	mbuf_append(output_head, output, out_length);
> > +
> > +#ifdef RTE_LIBRTE_BBDEV_DEBUG
> > +	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
> > +			sizeof(desc->req.fcw_te) - 8);
> > +	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc)); #endif
> > +	/* One CB (one op) was successfully prepared to enqueue */
> > +	return 1;
> > +}
> > +
> >   /* Enqueue one encode operations for ACC200 device in CB mode
> >    * multiplexed on the same descriptor.
> >    */
> > @@ -1146,6 +1494,78 @@ enqueue_ldpc_enc_part_tb(struct acc_queue
> *q,
> > struct rte_bbdev_enc_op *op,
> >
> >   }
> >
> > +/* Enqueue one encode operations for ACC200 device in TB mode. */
> > +static inline int enqueue_enc_one_op_tb(struct acc_queue *q, struct
> > +rte_bbdev_enc_op *op,
> > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > +	union acc_dma_desc *desc = NULL;
> > +	int ret;
> > +	uint8_t r, c;
> > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > +		seg_total_left;
> > +	struct rte_mbuf *input, *output_head, *output;
> > +	uint16_t current_enqueued_cbs = 0;
> > +
> > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > +			& q->sw_ring_wrap_mask);
> 
> Maybe I did not make the comment on this patch specifically, but having a
> helper to get the descriptor index would make sense givent it is duplicated at
> several places.
> 
> 
> With this fixed, you can add:

It is a good idea, notably for readability. But unsure we need it now for 22.11 with still a lot of acc100 and acc200 commits in flight. 
Are you okay if we to defer this small refactor to 23.03? 
There are few similar routines which may benefit from similar wrapper functions. 
Let me know what you think. 

> 
> Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> 
> Thanks,
> Maxime
  
Akhil Goyal Oct. 10, 2022, 6:08 p.m. UTC | #3
> > > +/* Enqueue one encode operations for ACC200 device in TB mode. */
> > > +static inline int enqueue_enc_one_op_tb(struct acc_queue *q, struct
> > > +rte_bbdev_enc_op *op,
> > > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > > +	union acc_dma_desc *desc = NULL;
> > > +	int ret;
> > > +	uint8_t r, c;
> > > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > > +		seg_total_left;
> > > +	struct rte_mbuf *input, *output_head, *output;
> > > +	uint16_t current_enqueued_cbs = 0;
> > > +
> > > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > > +			& q->sw_ring_wrap_mask);
> >
> > Maybe I did not make the comment on this patch specifically, but having a
> > helper to get the descriptor index would make sense givent it is duplicated at
> > several places.
> >
> >
> > With this fixed, you can add:
> 
> It is a good idea, notably for readability. But unsure we need it now for 22.11
> with still a lot of acc100 and acc200 commits in flight.
> Are you okay if we to defer this small refactor to 23.03?
> There are few similar routines which may benefit from similar wrapper
> functions.
> Let me know what you think.
> 
We have time of atleast 2 weeks from now to close RC2 and all these acc patches.
Do you think you need time more than that? I believe this is a simple code movement.
  
Chautru, Nicolas Oct. 11, 2022, 9:24 p.m. UTC | #4
Hi Akhil, Maxime, 

> -----Original Message-----
> From: Akhil Goyal <gakhil@marvell.com>
> Subject: RE: [PATCH v9 09/14] baseband/acc: add LTE processing functions
> 
> > > > +/* Enqueue one encode operations for ACC200 device in TB mode. */
> > > > +static inline int enqueue_enc_one_op_tb(struct acc_queue *q,
> > > > +struct rte_bbdev_enc_op *op,
> > > > +		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb) {
> > > > +	union acc_dma_desc *desc = NULL;
> > > > +	int ret;
> > > > +	uint8_t r, c;
> > > > +	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
> > > > +		seg_total_left;
> > > > +	struct rte_mbuf *input, *output_head, *output;
> > > > +	uint16_t current_enqueued_cbs = 0;
> > > > +
> > > > +	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
> > > > +			& q->sw_ring_wrap_mask);
> > >
> > > Maybe I did not make the comment on this patch specifically, but
> > > having a helper to get the descriptor index would make sense givent
> > > it is duplicated at several places.
> > >
> > >
> > > With this fixed, you can add:
> >
> > It is a good idea, notably for readability. But unsure we need it now
> > for 22.11 with still a lot of acc100 and acc200 commits in flight.
> > Are you okay if we to defer this small refactor to 23.03?
> > There are few similar routines which may benefit from similar wrapper
> > functions.
> > Let me know what you think.
> >
> We have time of atleast 2 weeks from now to close RC2 and all these acc
> patches.
> Do you think you need time more than that? I believe this is a simple code
> movement.

Fmhpov I am not sure this is a good practice to creep in some new changes during this window.
This is not super trivial change with always risk to break things for limited value (readability) and impact the 2nd serie, hence ideally I would have preferred to refactor in the next window cycle. 
Also we would much appreciate this acc200 serie to be applied soon if possible as the 2nd serie is likely to require a non-straight-forward rebase to be applied and is considered at risk at the moment due to this. 
Still no problem, just saying this of sharing our perspective => I did add that change in the new v10.
Thanks again, much appreciated.
Nic
  

Patch

diff --git a/drivers/baseband/acc/rte_acc200_pmd.c b/drivers/baseband/acc/rte_acc200_pmd.c
index 0f018b19ac..058e38c3ec 100644
--- a/drivers/baseband/acc/rte_acc200_pmd.c
+++ b/drivers/baseband/acc/rte_acc200_pmd.c
@@ -649,6 +649,46 @@  acc200_dev_info_get(struct rte_bbdev *dev,
 	struct acc_device *d = dev->data->dev_private;
 	int i;
 	static const struct rte_bbdev_op_cap bbdev_capabilities[] = {
+		{
+			.type = RTE_BBDEV_OP_TURBO_DEC,
+			.cap.turbo_dec = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE |
+					RTE_BBDEV_TURBO_CRC_TYPE_24B |
+					RTE_BBDEV_TURBO_EQUALIZER |
+					RTE_BBDEV_TURBO_SOFT_OUT_SATURATE |
+					RTE_BBDEV_TURBO_HALF_ITERATION_EVEN |
+					RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH |
+					RTE_BBDEV_TURBO_SOFT_OUTPUT |
+					RTE_BBDEV_TURBO_EARLY_TERMINATION |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN |
+					RTE_BBDEV_TURBO_NEG_LLR_1_BIT_SOFT_OUT |
+					RTE_BBDEV_TURBO_MAP_DEC |
+					RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP |
+					RTE_BBDEV_TURBO_DEC_SCATTER_GATHER,
+				.max_llr_modulus = INT8_MAX,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_hard_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_soft_out =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
+		{
+			.type = RTE_BBDEV_OP_TURBO_ENC,
+			.cap.turbo_enc = {
+				.capability_flags =
+					RTE_BBDEV_TURBO_CRC_24B_ATTACH |
+					RTE_BBDEV_TURBO_RV_INDEX_BYPASS |
+					RTE_BBDEV_TURBO_RATE_MATCH |
+					RTE_BBDEV_TURBO_ENC_SCATTER_GATHER,
+				.num_buffers_src =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+				.num_buffers_dst =
+						RTE_BBDEV_TURBO_MAX_CODE_BLOCKS,
+			}
+		},
 		{
 			.type   = RTE_BBDEV_OP_LDPC_ENC,
 			.cap.ldpc_enc = {
@@ -700,15 +740,17 @@  acc200_dev_info_get(struct rte_bbdev *dev,
 
 	/* Exposed number of queues. */
 	dev_info->num_queues[RTE_BBDEV_OP_NONE] = 0;
-	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = 0;
-	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = 0;
+	dev_info->num_queues[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_aqs_per_groups *
+			d->acc_conf.q_ul_4g.num_qgroups;
+	dev_info->num_queues[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_aqs_per_groups *
+			d->acc_conf.q_dl_4g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_aqs_per_groups *
 			d->acc_conf.q_ul_5g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_aqs_per_groups *
 			d->acc_conf.q_dl_5g.num_qgroups;
 	dev_info->num_queues[RTE_BBDEV_OP_FFT] = 0;
-	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = 0;
-	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = 0;
+	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_DEC] = d->acc_conf.q_ul_4g.num_qgroups;
+	dev_info->queue_priority[RTE_BBDEV_OP_TURBO_ENC] = d->acc_conf.q_dl_4g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_DEC] = d->acc_conf.q_ul_5g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_LDPC_ENC] = d->acc_conf.q_dl_5g.num_qgroups;
 	dev_info->queue_priority[RTE_BBDEV_OP_FFT] = 0;
@@ -753,6 +795,70 @@  static struct rte_pci_id pci_id_acc200_vf_map[] = {
 	{.device_id = 0},
 };
 
+/* Fill in a frame control word for turbo decoding. */
+static inline void
+acc200_fcw_td_fill(const struct rte_bbdev_dec_op *op, struct acc_fcw_td *fcw)
+{
+	fcw->fcw_ver = 1;
+	fcw->num_maps = ACC_FCW_TD_AUTOMAP;
+	fcw->bypass_sb_deint = !check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE);
+	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		/* FIXME for TB block */
+		fcw->k_pos = op->turbo_dec.tb_params.k_pos;
+		fcw->k_neg = op->turbo_dec.tb_params.k_neg;
+	} else {
+		fcw->k_pos = op->turbo_dec.cb_params.k;
+		fcw->k_neg = op->turbo_dec.cb_params.k;
+	}
+	fcw->c = 1;
+	fcw->c_neg = 1;
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+		fcw->soft_output_en = 1;
+		fcw->sw_soft_out_dis = 0;
+		fcw->sw_et_cont = check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_CONTINUE_CRC_MATCH);
+		fcw->sw_soft_out_saturation = check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_SOFT_OUT_SATURATE);
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_EQUALIZER)) {
+			fcw->bypass_teq = 0;
+			fcw->ea = op->turbo_dec.cb_params.e;
+			fcw->eb = op->turbo_dec.cb_params.e;
+			if (op->turbo_dec.rv_index == 0)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_0;
+			else if (op->turbo_dec.rv_index == 1)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_1;
+			else if (op->turbo_dec.rv_index == 2)
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_2;
+			else
+				fcw->k0_start_col = ACC_FCW_TD_RVIDX_3;
+		} else {
+			fcw->bypass_teq = 1;
+			fcw->eb = 64; /* avoid undefined value */
+		}
+	} else {
+		fcw->soft_output_en = 0;
+		fcw->sw_soft_out_dis = 1;
+		fcw->bypass_teq = 0;
+	}
+
+	fcw->code_block_mode = 1; /* FIXME */
+	fcw->turbo_crc_type = check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_CRC_TYPE_24B);
+
+	fcw->ext_td_cold_reg_en = 1;
+	fcw->raw_decoder_input_on = 0;
+	fcw->max_iter = RTE_MAX((uint8_t) op->turbo_dec.iter_max, 2);
+	fcw->min_iter = 2;
+	fcw->half_iter_on = !check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_HALF_ITERATION_EVEN);
+
+	fcw->early_stop_en = check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_EARLY_TERMINATION) & !fcw->soft_output_en;
+	fcw->ext_scale = 0xF;
+}
+
 /* Fill in a frame control word for LDPC decoding. */
 static inline void
 acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
@@ -876,7 +982,206 @@  acc200_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc_fcw_ld *fcw,
 }
 
 static inline int
-acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op, struct acc_dma_req_desc *desc,
+acc200_dma_desc_te_fill(struct rte_bbdev_enc_op *op,
+		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *output, uint32_t *in_offset,
+		uint32_t *out_offset, uint32_t *out_length,
+		uint32_t *mbuf_total_left, uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done. */
+	uint32_t e, ea, eb, length;
+	uint16_t k, k_neg, k_pos;
+	uint8_t cab, c_neg;
+
+	desc->word0 = ACC_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled. */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		ea = op->turbo_enc.tb_params.ea;
+		eb = op->turbo_enc.tb_params.eb;
+		cab = op->turbo_enc.tb_params.cab;
+		k_neg = op->turbo_enc.tb_params.k_neg;
+		k_pos = op->turbo_enc.tb_params.k_pos;
+		c_neg = op->turbo_enc.tb_params.c_neg;
+		e = (r < cab) ? ea : eb;
+		k = (r < c_neg) ? k_neg : k_pos;
+	} else {
+		e = op->turbo_enc.cb_params.e;
+		k = op->turbo_enc.cb_params.k;
+	}
+
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH))
+		length = (k - 24) >> 3;
+	else
+		length = k >> 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < length))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, length);
+		return -1;
+	}
+
+	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset,
+			length, seg_total_left, next_triplet,
+			check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_ENC_SCATTER_GATHER));
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= length;
+
+	/* Set output length. */
+	if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH))
+		/* Integer round up division by 8. */
+		*out_length = (e + 7) >> 3;
+	else
+		*out_length = (k >> 3) * 3 + 2;
+
+	next_triplet = acc_dma_fill_blk_type(desc, output, *out_offset,
+			*out_length, next_triplet, ACC_DMA_BLKID_OUT_ENC);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	op->turbo_enc.output.length += *out_length;
+	*out_offset += *out_length;
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
+static inline int
+acc200_dma_desc_td_fill(struct rte_bbdev_dec_op *op,
+		struct acc_dma_req_desc *desc, struct rte_mbuf **input,
+		struct rte_mbuf *h_output, struct rte_mbuf *s_output,
+		uint32_t *in_offset, uint32_t *h_out_offset,
+		uint32_t *s_out_offset, uint32_t *h_out_length,
+		uint32_t *s_out_length, uint32_t *mbuf_total_left,
+		uint32_t *seg_total_left, uint8_t r)
+{
+	int next_triplet = 1; /* FCW already done. */
+	uint16_t k;
+	uint16_t crc24_overlap = 0;
+	uint32_t e, kw;
+
+	desc->word0 = ACC_DMA_DESC_TYPE;
+	desc->word1 = 0; /**< Timestamp could be disabled. */
+	desc->word2 = 0;
+	desc->word3 = 0;
+	desc->numCBs = 1;
+
+	if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK) {
+		k = (r < op->turbo_dec.tb_params.c_neg)
+			? op->turbo_dec.tb_params.k_neg
+			: op->turbo_dec.tb_params.k_pos;
+		e = (r < op->turbo_dec.tb_params.cab)
+			? op->turbo_dec.tb_params.ea
+			: op->turbo_dec.tb_params.eb;
+	} else {
+		k = op->turbo_dec.cb_params.k;
+		e = op->turbo_dec.cb_params.e;
+	}
+
+	if ((op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		&& !check_bit(op->turbo_dec.op_flags,
+		RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP))
+		crc24_overlap = 24;
+
+	/* Calculates circular buffer size.
+	 * According to 3gpp 36.212 section 5.1.4.2
+	 *   Kw = 3 * Kpi,
+	 * where:
+	 *   Kpi = nCol * nRow
+	 * where nCol is 32 and nRow can be calculated from:
+	 *   D =< nCol * nRow
+	 * where D is the size of each output from turbo encoder block (k + 4).
+	 */
+	kw = RTE_ALIGN_CEIL(k + 4, 32) * 3;
+
+	if (unlikely((*mbuf_total_left == 0) || (*mbuf_total_left < kw))) {
+		rte_bbdev_log(ERR,
+				"Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u",
+				*mbuf_total_left, kw);
+		return -1;
+	}
+
+	next_triplet = acc_dma_fill_blk_type_in(desc, input, in_offset, kw,
+			seg_total_left, next_triplet,
+			check_bit(op->turbo_dec.op_flags,
+			RTE_BBDEV_TURBO_DEC_SCATTER_GATHER));
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->m2dlen = next_triplet;
+	*mbuf_total_left -= kw;
+	*h_out_length = ((k - crc24_overlap) >> 3);
+	next_triplet = acc_dma_fill_blk_type(
+			desc, h_output, *h_out_offset,
+			*h_out_length, next_triplet, ACC_DMA_BLKID_OUT_HARD);
+	if (unlikely(next_triplet < 0)) {
+		rte_bbdev_log(ERR,
+				"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+				op);
+		return -1;
+	}
+
+	op->turbo_dec.hard_output.length += *h_out_length;
+	*h_out_offset += *h_out_length;
+
+	/* Soft output. */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+		if (op->turbo_dec.soft_output.data == 0) {
+			rte_bbdev_log(ERR, "Soft output is not defined");
+			return -1;
+		}
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_EQUALIZER))
+			*s_out_length = e;
+		else
+			*s_out_length = (k * 3) + 12;
+
+		next_triplet = acc_dma_fill_blk_type(desc, s_output,
+				*s_out_offset, *s_out_length, next_triplet,
+				ACC_DMA_BLKID_OUT_SOFT);
+		if (unlikely(next_triplet < 0)) {
+			rte_bbdev_log(ERR,
+					"Mismatch between data to process and mbuf data length in bbdev_op: %p",
+					op);
+			return -1;
+		}
+
+		op->turbo_dec.soft_output.length += *s_out_length;
+		*s_out_offset += *s_out_length;
+	}
+
+	desc->data_ptrs[next_triplet - 1].last = 1;
+	desc->d2mlen = next_triplet - desc->m2dlen;
+
+	desc->op_addr = op;
+
+	return 0;
+}
+
+static inline int
+acc200_dma_desc_ld_fill(struct rte_bbdev_dec_op *op,
+		struct acc_dma_req_desc *desc,
 		struct rte_mbuf **input, struct rte_mbuf *h_output,
 		uint32_t *in_offset, uint32_t *h_out_offset,
 		uint32_t *h_out_length, uint32_t *mbuf_total_left,
@@ -1034,6 +1339,49 @@  acc200_dma_desc_ld_update(struct rte_bbdev_dec_op *op,
 	desc->op_addr = op;
 }
 
+/* Enqueue one encode operations for ACC200 device in CB mode */
+static inline int
+enqueue_enc_one_op_cb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc_fcw_te_fill(op, &desc->req.fcw_te);
+
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
+	seg_total_left = rte_pktmbuf_data_len(op->turbo_enc.input.data)
+			- in_offset;
+
+	ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
+			&in_offset, &out_offset, &out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	mbuf_append(output_head, output, out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+			sizeof(desc->req.fcw_te) - 8);
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
 /* Enqueue one encode operations for ACC200 device in CB mode
  * multiplexed on the same descriptor.
  */
@@ -1146,6 +1494,78 @@  enqueue_ldpc_enc_part_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
 
 }
 
+/* Enqueue one encode operations for ACC200 device in TB mode. */
+static inline int
+enqueue_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, out_offset, out_length, mbuf_total_left,
+		seg_total_left;
+	struct rte_mbuf *input, *output_head, *output;
+	uint16_t current_enqueued_cbs = 0;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
+	acc_fcw_te_fill(op, &desc->req.fcw_te);
+
+	input = op->turbo_enc.input.data;
+	output_head = output = op->turbo_enc.output.data;
+	in_offset = op->turbo_enc.input.offset;
+	out_offset = op->turbo_enc.output.offset;
+	out_length = 0;
+	mbuf_total_left = op->turbo_enc.input.length;
+
+	c = op->turbo_enc.tb_params.c;
+	r = op->turbo_enc.tb_params.r;
+
+	while (mbuf_total_left > 0 && r < c) {
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC_FCW_TE_BLEN;
+
+		ret = acc200_dma_desc_te_fill(op, &desc->req, &input, output,
+				&in_offset, &out_offset, &out_length,
+				&mbuf_total_left, &seg_total_left, r);
+		if (unlikely(ret < 0))
+			return ret;
+		mbuf_append(output_head, output, out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_te,
+				sizeof(desc->req.fcw_te) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			output = output->next;
+			out_offset = 0;
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+	/* Set SDone on last CB descriptor for TB mode. */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
+
 /* Enqueue one encode operations for ACC200 device in TB mode.
  * returns the number of descs used.
  */
@@ -1213,6 +1633,62 @@  enqueue_ldpc_enc_one_op_tb(struct acc_queue *q, struct rte_bbdev_enc_op *op,
 
 /** Enqueue one decode operations for ACC200 device in CB mode. */
 static inline int
+enqueue_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	acc200_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+	/* Set up DMA descriptor */
+	desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+
+	ret = acc200_dma_desc_td_fill(op, &desc->req, &input, h_output,
+			s_output, &in_offset, &h_out_offset, &s_out_offset,
+			&h_out_length, &s_out_length, &mbuf_total_left,
+			&seg_total_left, 0);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	/* Hard output */
+	mbuf_append(h_output_head, h_output, h_out_length);
+
+	/* Soft output */
+	if (check_bit(op->turbo_dec.op_flags, RTE_BBDEV_TURBO_SOFT_OUTPUT))
+		mbuf_append(s_output_head, s_output, s_out_length);
+
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+	rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+			sizeof(desc->req.fcw_td));
+	rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+	/* One CB (one op) was successfully prepared to enqueue */
+	return 1;
+}
+
+/** Enqueue one decode operations for ACC200 device in CB mode */
+static inline int
 enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 		uint16_t total_enqueued_cbs, bool same_op)
 {
@@ -1395,6 +1871,139 @@  enqueue_ldpc_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 	return current_enqueued_cbs;
 }
 
+/* Enqueue one decode operations for ACC200 device in TB mode */
+static inline int
+enqueue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
+		uint16_t total_enqueued_cbs, uint8_t cbs_in_tb)
+{
+	union acc_dma_desc *desc = NULL;
+	int ret;
+	uint8_t r, c;
+	uint32_t in_offset, h_out_offset, s_out_offset, s_out_length,
+		h_out_length, mbuf_total_left, seg_total_left;
+	struct rte_mbuf *input, *h_output_head, *h_output,
+		*s_output_head, *s_output;
+	uint16_t current_enqueued_cbs = 0;
+
+	uint16_t desc_idx = ((q->sw_ring_head + total_enqueued_cbs)
+			& q->sw_ring_wrap_mask);
+	desc = q->ring_addr + desc_idx;
+	uint64_t fcw_offset = (desc_idx << 8) + ACC_DESC_FCW_OFFSET;
+	acc200_fcw_td_fill(op, &desc->req.fcw_td);
+
+	input = op->turbo_dec.input.data;
+	h_output_head = h_output = op->turbo_dec.hard_output.data;
+	s_output_head = s_output = op->turbo_dec.soft_output.data;
+	in_offset = op->turbo_dec.input.offset;
+	h_out_offset = op->turbo_dec.hard_output.offset;
+	s_out_offset = op->turbo_dec.soft_output.offset;
+	h_out_length = s_out_length = 0;
+	mbuf_total_left = op->turbo_dec.input.length;
+	c = op->turbo_dec.tb_params.c;
+	r = op->turbo_dec.tb_params.r;
+
+	while (mbuf_total_left > 0 && r < c) {
+
+		seg_total_left = rte_pktmbuf_data_len(input) - in_offset;
+
+		/* Set up DMA descriptor */
+		desc = q->ring_addr + ((q->sw_ring_head + total_enqueued_cbs)
+				& q->sw_ring_wrap_mask);
+		desc->req.data_ptrs[0].address = q->ring_addr_iova + fcw_offset;
+		desc->req.data_ptrs[0].blen = ACC_FCW_TD_BLEN;
+		ret = acc200_dma_desc_td_fill(op, &desc->req, &input,
+				h_output, s_output, &in_offset, &h_out_offset,
+				&s_out_offset, &h_out_length, &s_out_length,
+				&mbuf_total_left, &seg_total_left, r);
+
+		if (unlikely(ret < 0))
+			return ret;
+
+		/* Hard output */
+		mbuf_append(h_output_head, h_output, h_out_length);
+
+		/* Soft output */
+		if (check_bit(op->turbo_dec.op_flags,
+				RTE_BBDEV_TURBO_SOFT_OUTPUT))
+			mbuf_append(s_output_head, s_output, s_out_length);
+
+		/* Set total number of CBs in TB */
+		desc->req.cbs_in_tb = cbs_in_tb;
+#ifdef RTE_LIBRTE_BBDEV_DEBUG
+		rte_memdump(stderr, "FCW", &desc->req.fcw_td,
+				sizeof(desc->req.fcw_td) - 8);
+		rte_memdump(stderr, "Req Desc.", desc, sizeof(*desc));
+#endif
+
+		if (seg_total_left == 0) {
+			/* Go to the next mbuf */
+			input = input->next;
+			in_offset = 0;
+			h_output = h_output->next;
+			h_out_offset = 0;
+
+			if (check_bit(op->turbo_dec.op_flags,
+					RTE_BBDEV_TURBO_SOFT_OUTPUT)) {
+				s_output = s_output->next;
+				s_out_offset = 0;
+			}
+		}
+
+		total_enqueued_cbs++;
+		current_enqueued_cbs++;
+		r++;
+	}
+
+	/* Set SDone on last CB descriptor for TB mode */
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	return current_enqueued_cbs;
+}
+
+/* Enqueue encode operations for ACC200 device in CB mode. */
+static uint16_t
+acc200_enqueue_enc_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i;
+	union acc_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing */
+		if (unlikely(avail - 1 < 0)) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= 1;
+
+		ret = enqueue_enc_one_op_cb(q, ops[i], i);
+		if (ret < 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue */
+
+	/* Set SDone in last CB in enqueued ops for CB mode*/
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+	return i;
+}
+
 /** Enqueue encode operations for ACC200 device in CB mode. */
 static inline uint16_t
 acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
@@ -1442,6 +2051,45 @@  acc200_enqueue_ldpc_enc_cb(struct rte_bbdev_queue_data *q_data,
 	return i;
 }
 
+/* Enqueue encode operations for ACC200 device in TB mode. */
+static uint16_t
+acc200_enqueue_enc_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_enc(&ops[i]->turbo_enc);
+		/* Check if there are available space for further processing */
+		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= cbs_in_tb;
+
+		ret = enqueue_enc_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret <= 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+		enqueued_cbs += ret;
+	}
+	if (unlikely(enqueued_cbs == 0))
+		return 0; /* Nothing to enqueue */
+
+	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
 /* Enqueue LDPC encode operations for ACC200 device in TB mode. */
 static uint16_t
 acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
@@ -1481,6 +2129,20 @@  acc200_enqueue_ldpc_enc_tb(struct rte_bbdev_queue_data *q_data,
 	return i;
 }
 
+/* Enqueue encode operations for ACC200 device. */
+static uint16_t
+acc200_enqueue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	int32_t aq_avail = acc_aq_avail(q_data, num);
+	if (unlikely((aq_avail <= 0) || (num == 0)))
+		return 0;
+	if (ops[0]->turbo_enc.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		return acc200_enqueue_enc_tb(q_data, ops, num);
+	else
+		return acc200_enqueue_enc_cb(q_data, ops, num);
+}
+
 /* Enqueue encode operations for ACC200 device. */
 static uint16_t
 acc200_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
@@ -1495,6 +2157,47 @@  acc200_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
 		return acc200_enqueue_ldpc_enc_cb(q_data, ops, num);
 }
 
+
+/* Enqueue decode operations for ACC200 device in CB mode. */
+static uint16_t
+acc200_enqueue_dec_cb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i;
+	union acc_dma_desc *desc;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		/* Check if there are available space for further processing. */
+		if (unlikely(avail - 1 < 0))
+			break;
+		avail -= 1;
+
+		ret = enqueue_dec_one_op_cb(q, ops[i], i);
+		if (ret < 0)
+			break;
+	}
+
+	if (unlikely(i == 0))
+		return 0; /* Nothing to enqueue. */
+
+	/* Set SDone in last CB in enqueued ops for CB mode. */
+	desc = q->ring_addr + ((q->sw_ring_head + i - 1)
+			& q->sw_ring_wrap_mask);
+	desc->req.sdone_enable = 1;
+	desc->req.irq_enable = q->irq_enable;
+
+	acc_dma_enqueue(q, i, &q_data->queue_stats);
+
+	/* Update stats. */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
 /* Enqueue decode operations for ACC200 device in TB mode. */
 static uint16_t
 acc200_enqueue_ldpc_dec_tb(struct rte_bbdev_queue_data *q_data,
@@ -1579,6 +2282,58 @@  acc200_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
 	return i;
 }
 
+
+/* Enqueue decode operations for ACC200 device in TB mode */
+static uint16_t
+acc200_enqueue_dec_tb(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	int32_t avail = acc_ring_avail_enq(q);
+	uint16_t i, enqueued_cbs = 0;
+	uint8_t cbs_in_tb;
+	int ret;
+
+	for (i = 0; i < num; ++i) {
+		cbs_in_tb = get_num_cbs_in_tb_dec(&ops[i]->turbo_dec);
+		/* Check if there are available space for further processing */
+		if (unlikely((avail - cbs_in_tb < 0) || (cbs_in_tb == 0))) {
+			acc_enqueue_ring_full(q_data);
+			break;
+		}
+		avail -= cbs_in_tb;
+
+		ret = enqueue_dec_one_op_tb(q, ops[i], enqueued_cbs, cbs_in_tb);
+		if (ret <= 0) {
+			acc_enqueue_invalid(q_data);
+			break;
+		}
+		enqueued_cbs += ret;
+	}
+
+	acc_dma_enqueue(q, enqueued_cbs, &q_data->queue_stats);
+
+	/* Update stats */
+	q_data->queue_stats.enqueued_count += i;
+	q_data->queue_stats.enqueue_err_count += num - i;
+
+	return i;
+}
+
+/* Enqueue decode operations for ACC200 device. */
+static uint16_t
+acc200_enqueue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	int32_t aq_avail = acc_aq_avail(q_data, num);
+	if (unlikely((aq_avail <= 0) || (num == 0)))
+		return 0;
+	if (ops[0]->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+		return acc200_enqueue_dec_tb(q_data, ops, num);
+	else
+		return acc200_enqueue_dec_cb(q_data, ops, num);
+}
+
 /* Enqueue decode operations for ACC200 device. */
 static uint16_t
 acc200_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
@@ -1905,6 +2660,48 @@  dequeue_dec_one_op_tb(struct acc_queue *q, struct rte_bbdev_dec_op **ref_op,
 	return cb_idx;
 }
 
+/* Dequeue encode operations from ACC200 device. */
+static uint16_t
+acc200_dequeue_enc(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_enc_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	uint32_t avail = acc_ring_avail_deq(q);
+	uint32_t aq_dequeued = 0;
+	uint16_t i, dequeued_ops = 0, dequeued_descs = 0;
+	int ret, cbm;
+	struct rte_bbdev_enc_op *op;
+	if (avail == 0)
+		return 0;
+	op = (q->ring_addr + (q->sw_ring_tail &
+			q->sw_ring_wrap_mask))->req.op_addr;
+
+	cbm = op->turbo_enc.code_block_mode;
+
+	for (i = 0; i < num; i++) {
+		if (cbm == RTE_BBDEV_TRANSPORT_BLOCK)
+			ret = dequeue_enc_one_op_tb(q, &ops[dequeued_ops],
+					&dequeued_ops, &aq_dequeued,
+					&dequeued_descs);
+		else
+			ret = dequeue_enc_one_op_cb(q, &ops[dequeued_ops],
+					&dequeued_ops, &aq_dequeued,
+					&dequeued_descs);
+		if (ret < 0)
+			break;
+		if (dequeued_ops >= num)
+			break;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_descs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += dequeued_ops;
+
+	return dequeued_ops;
+}
+
 /* Dequeue LDPC encode operations from ACC200 device. */
 static uint16_t
 acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
@@ -1945,6 +2742,46 @@  acc200_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data,
 	return dequeued_ops;
 }
 
+/* Dequeue decode operations from ACC200 device. */
+static uint16_t
+acc200_dequeue_dec(struct rte_bbdev_queue_data *q_data,
+		struct rte_bbdev_dec_op **ops, uint16_t num)
+{
+	struct acc_queue *q = q_data->queue_private;
+	uint16_t dequeue_num;
+	uint32_t avail = acc_ring_avail_deq(q);
+	uint32_t aq_dequeued = 0;
+	uint16_t i;
+	uint16_t dequeued_cbs = 0;
+	struct rte_bbdev_dec_op *op;
+	int ret;
+
+	dequeue_num = (avail < num) ? avail : num;
+
+	for (i = 0; i < dequeue_num; ++i) {
+		op = (q->ring_addr + ((q->sw_ring_tail + dequeued_cbs)
+			& q->sw_ring_wrap_mask))->req.op_addr;
+		if (op->turbo_dec.code_block_mode == RTE_BBDEV_TRANSPORT_BLOCK)
+			ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs,
+					&aq_dequeued);
+		else
+			ret = dequeue_dec_one_op_cb(q_data, q, &ops[i],
+					dequeued_cbs, &aq_dequeued);
+
+		if (ret <= 0)
+			break;
+		dequeued_cbs += ret;
+	}
+
+	q->aq_dequeued += aq_dequeued;
+	q->sw_ring_tail += dequeued_cbs;
+
+	/* Update enqueue stats */
+	q_data->queue_stats.dequeued_count += i;
+
+	return i;
+}
+
 /* Dequeue decode operations from ACC200 device. */
 static uint16_t
 acc200_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data,
@@ -1993,6 +2830,10 @@  acc200_bbdev_init(struct rte_bbdev *dev, struct rte_pci_driver *drv)
 	struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device);
 
 	dev->dev_ops = &acc200_bbdev_ops;
+	dev->enqueue_enc_ops = acc200_enqueue_enc;
+	dev->enqueue_dec_ops = acc200_enqueue_dec;
+	dev->dequeue_enc_ops = acc200_dequeue_enc;
+	dev->dequeue_dec_ops = acc200_dequeue_dec;
 	dev->enqueue_ldpc_enc_ops = acc200_enqueue_ldpc_enc;
 	dev->enqueue_ldpc_dec_ops = acc200_enqueue_ldpc_dec;
 	dev->dequeue_ldpc_enc_ops = acc200_dequeue_ldpc_enc;