[v2,15/37] baseband/acc100: add workaround for deRM corner cases

Message ID 20220820023157.189047-16-hernan.vargas@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series baseband/acc100: changes for 22.11 |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Vargas, Hernan Aug. 20, 2022, 2:31 a.m. UTC
  Add function to asses if de-ratematch pre-processing should be run
in SW for corner cases.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc100/acc100_pmd.h     |  13 +++
 drivers/baseband/acc100/rte_acc100_pmd.c | 103 ++++++++++++++++++++++-
 2 files changed, 114 insertions(+), 2 deletions(-)
  

Comments

Maxime Coquelin Sept. 15, 2022, 8:15 a.m. UTC | #1
On 8/20/22 04:31, Hernan Vargas wrote:
> Add function to asses if de-ratematch pre-processing should be run
> in SW for corner cases.
> 
> Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> ---
>   drivers/baseband/acc100/acc100_pmd.h     |  13 +++
>   drivers/baseband/acc100/rte_acc100_pmd.c | 103 ++++++++++++++++++++++-
>   2 files changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/baseband/acc100/acc100_pmd.h b/drivers/baseband/acc100/acc100_pmd.h
> index 19a1f434bc..c98a182be6 100644
> --- a/drivers/baseband/acc100/acc100_pmd.h
> +++ b/drivers/baseband/acc100/acc100_pmd.h
> @@ -140,6 +140,8 @@
>   /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
>   #define ACC100_N_ZC_1 66 /* N = 66 Zc for BG 1 */
>   #define ACC100_N_ZC_2 50 /* N = 50 Zc for BG 2 */
> +#define ACC100_K_ZC_1 22 /* K = 22 Zc for BG 1 */
> +#define ACC100_K_ZC_2 10 /* K = 10 Zc for BG 2 */
>   #define ACC100_K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
>   #define ACC100_K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
>   #define ACC100_K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
> @@ -177,6 +179,16 @@
>   #define ACC100_MS_IN_US         (1000)
>   #define ACC100_DDR_TRAINING_MAX (5000)
>   
> +/* Code rate limitation when padding is required */
> +#define ACC100_LIM_03 2  /* 0.03 */
> +#define ACC100_LIM_09 6  /* 0.09 */
> +#define ACC100_LIM_14 9  /* 0.14 */
> +#define ACC100_LIM_21 14 /* 0.21 */
> +#define ACC100_LIM_31 20 /* 0.31 */
> +#define ACC100_MAX_E (128 * 1024 - 2)
> +
> +
> +
>   /* ACC100 DMA Descriptor triplet */
>   struct acc100_dma_triplet {
>   	uint64_t address;
> @@ -572,6 +584,7 @@ struct __rte_cache_aligned acc100_queue {
>   	uint8_t *lb_out;
>   	rte_iova_t lb_in_addr_iova;
>   	rte_iova_t lb_out_addr_iova;
> +	int8_t *derm_buffer; /* interim buffer for de-rm in SDK */
>   	struct acc100_device *d;
>   };
>   
> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
> index 1504acfadd..69c0714a37 100644
> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> @@ -24,6 +24,10 @@
>   #include "acc100_pmd.h"
>   #include "acc101_pmd.h"
>   
> +#ifdef RTE_BBDEV_SDK_AVX512
> +#include <phy_rate_dematching_5gnr.h>
> +#endif
> +
>   #ifdef RTE_LIBRTE_BBDEV_DEBUG
>   RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
>   #else
> @@ -898,6 +902,16 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>   		rte_free(q);
>   		return -ENOMEM;
>   	}
> +	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
> +			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
> +			RTE_CACHE_LINE_SIZE, conf->socket);
> +	if (q->derm_buffer == NULL) {
> +		rte_bbdev_log(ERR, "Failed to allocate derm_buffer memory");
> +		rte_free(q->lb_in);
> +		rte_free(q->lb_out);
> +		rte_free(q);
> +		return -ENOMEM;
> +	}

It may make sense to have a common error path to avoid duplication and
so risk introducing memory leaks when changes will be made.

>   	q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
>   
>   	/*
> @@ -918,6 +932,7 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>   
>   	q_idx = acc100_find_free_queue_idx(dev, conf);
>   	if (q_idx == -1) {
> +		rte_free(q->derm_buffer);
>   		rte_free(q->lb_in);
>   		rte_free(q->lb_out);
>   		rte_free(q);
> @@ -955,6 +970,7 @@ acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
>   		/* Mark the Queue as un-assigned */
>   		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
>   				(1 << q->aq_id));
> +		rte_free(q->derm_buffer);
>   		rte_free(q->lb_in);
>   		rte_free(q->lb_out);
>   		rte_free(q);
> @@ -3512,10 +3528,42 @@ harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
>   	return 1;
>   }
>   
> +/** Assess whether a work around is required for the deRM corner cases */
> +static inline bool
> +derm_workaround_required(struct rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc100_queue *q)
> +{
> +	if (!is_acc100(q))
> +		return false;
> +	int32_t e = ldpc_dec->cb_params.e;
> +	int q_m = ldpc_dec->q_m;
> +	int z_c = ldpc_dec->z_c;
> +	int K = (ldpc_dec->basegraph == 1 ? ACC100_K_ZC_1 : ACC100_K_ZC_2)
> +			* z_c;
> +	bool required = false;

Add new line.

> +	if (ldpc_dec->basegraph == 1) {
> +		if ((q_m == 4) && (z_c >= 320) && (e * ACC100_LIM_31 > K * 64))
> +			required = true;
> +		else if ((e * ACC100_LIM_21 > K * 64))
> +			required = true;
> +	} else {
> +		if (q_m <= 2) {
> +			if ((z_c >= 208) && (e * ACC100_LIM_09 > K * 64))
> +				required = true;
> +			else if ((z_c < 208) && (e * ACC100_LIM_03 > K * 64))
> +				required = true;
> +		} else if (e * ACC100_LIM_14 > K * 64)
> +			required = true;
> +	}
> +	if (required)
> +		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");

Add new line.

> +	return required;
> +}
> +
>   /** Enqueue one decode operations for ACC100 device in CB mode */
>   static inline int
>   enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
> -		uint16_t total_enqueued_cbs, bool same_op)
> +		uint16_t total_enqueued_cbs, bool same_op,
> +		struct rte_bbdev_queue_data *q_data)
>   {
>   	int ret;
>   	if (unlikely(check_bit(op->ldpc_dec.op_flags,
> @@ -3571,6 +3619,57 @@ enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
>   				&in_offset, &h_out_offset,
>   				&h_out_length, harq_layout);
>   	} else {
> +		if (derm_workaround_required(&op->ldpc_dec, q)) {
> +			#ifdef RTE_BBDEV_SDK_AVX512

First, the indentation is not good here.

Also, my understanding is that this code will get built only if Flexran
SDK is available. Flexran SDK is proprietary, and so it is not possible
to have this code exercised by the upstream CI.

Code under RTE_BBDEV_SDK_AVX512 should be dropped IMO.

> +			struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
> +			/* Checking input size is matching with E */
> +			if (dec->input.data->data_len < dec->cb_params.e) {
> +				rte_bbdev_log(ERR,
> +						"deRM: Input size mismatch");
> +				return -EFAULT;
> +			}
> +			/* Run first deRM processing in SW */
> +			struct bblib_rate_dematching_5gnr_request derm_req;
> +			struct bblib_rate_dematching_5gnr_response derm_resp;
> +			uint8_t *in = rte_pktmbuf_mtod_offset(dec->input.data,
> +					uint8_t *, in_offset);

Don't mix declarations & code.

> +			derm_req.p_in = (int8_t *) in;
> +			derm_req.p_harq = (int8_t *) q->derm_buffer;
> +			derm_req.base_graph = dec->basegraph;
> +			derm_req.zc = dec->z_c;
> +			derm_req.ncb = dec->n_cb;
> +			derm_req.e = dec->cb_params.e;
> +			if (derm_req.e > ACC100_MAX_E) {
> +				rte_bbdev_log(WARNING,
> +						"deRM: E %d > %d max",
> +						derm_req.e, ACC100_MAX_E);
> +				derm_req.e = ACC100_MAX_E;
> +			}
> +			derm_req.k0 = 0; /* Actual output from SDK */
> +			derm_req.isretx = false;
> +			derm_req.rvid = dec->rv_index;
> +			derm_req.modulation_order = dec->q_m;
> +			derm_req.start_null_index =
> +					(dec->basegraph == 1 ? 22 : 10)
> +					* dec->z_c - 2 * dec->z_c
> +					- dec->n_filler;
> +			derm_req.num_of_null = dec->n_filler;
> +			bblib_rate_dematching_5gnr(&derm_req, &derm_resp);
> +			/* Force back the HW DeRM */
> +			dec->q_m = 1;
> +			dec->cb_params.e = dec->n_cb - dec->n_filler;
> +			dec->rv_index = 0;
> +			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
> +			/* Capture counter when pre-processing is used */
> +			q_data->queue_stats.enqueue_warn_count++;
> +			#else
> +			RTE_SET_USED(q_data);
> +			rte_bbdev_log(WARNING,
> +				"Corner case may require deRM pre-processing in SDK"
> +				);
> +			#endif
> +		}
> +
>   		struct acc100_fcw_ld *fcw;
>   		uint32_t seg_total_left;

Don't mix declarations & code.

>   		fcw = &desc->req.fcw_ld;
> @@ -4322,7 +4421,7 @@ acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
>   			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
>   			ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e,
>   			same_op);
> -		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
> +		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op, q_data);
>   		if (ret < 0) {
>   			acc100_enqueue_invalid(q_data);
>   			break;
  
Chautru, Nicolas Sept. 16, 2022, 1:20 a.m. UTC | #2
Hi Maxime, 

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, September 15, 2022 1:15 AM
> To: Vargas, Hernan <hernan.vargas@intel.com>; dev@dpdk.org;
> gakhil@marvell.com; trix@redhat.com
> Cc: Chautru, Nicolas <nicolas.chautru@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: Re: [PATCH v2 15/37] baseband/acc100: add workaround for deRM
> corner cases
> 
> 
> 
> On 8/20/22 04:31, Hernan Vargas wrote:
> > Add function to asses if de-ratematch pre-processing should be run in
> > SW for corner cases.
> >
> > Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> > ---
> >   drivers/baseband/acc100/acc100_pmd.h     |  13 +++
> >   drivers/baseband/acc100/rte_acc100_pmd.c | 103
> ++++++++++++++++++++++-
> >   2 files changed, 114 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/baseband/acc100/acc100_pmd.h
> > b/drivers/baseband/acc100/acc100_pmd.h
> > index 19a1f434bc..c98a182be6 100644
> > --- a/drivers/baseband/acc100/acc100_pmd.h
> > +++ b/drivers/baseband/acc100/acc100_pmd.h
> > @@ -140,6 +140,8 @@
> >   /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
> >   #define ACC100_N_ZC_1 66 /* N = 66 Zc for BG 1 */
> >   #define ACC100_N_ZC_2 50 /* N = 50 Zc for BG 2 */
> > +#define ACC100_K_ZC_1 22 /* K = 22 Zc for BG 1 */ #define
> > +ACC100_K_ZC_2 10 /* K = 10 Zc for BG 2 */
> >   #define ACC100_K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
> >   #define ACC100_K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
> >   #define ACC100_K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1
> > */ @@ -177,6 +179,16 @@
> >   #define ACC100_MS_IN_US         (1000)
> >   #define ACC100_DDR_TRAINING_MAX (5000)
> >
> > +/* Code rate limitation when padding is required */ #define
> > +ACC100_LIM_03 2  /* 0.03 */ #define ACC100_LIM_09 6  /* 0.09 */
> > +#define ACC100_LIM_14 9  /* 0.14 */ #define ACC100_LIM_21 14 /* 0.21
> > +*/ #define ACC100_LIM_31 20 /* 0.31 */ #define ACC100_MAX_E (128 *
> > +1024 - 2)
> > +
> > +
> > +
> >   /* ACC100 DMA Descriptor triplet */
> >   struct acc100_dma_triplet {
> >   	uint64_t address;
> > @@ -572,6 +584,7 @@ struct __rte_cache_aligned acc100_queue {
> >   	uint8_t *lb_out;
> >   	rte_iova_t lb_in_addr_iova;
> >   	rte_iova_t lb_out_addr_iova;
> > +	int8_t *derm_buffer; /* interim buffer for de-rm in SDK */
> >   	struct acc100_device *d;
> >   };
> >
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index 1504acfadd..69c0714a37 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -24,6 +24,10 @@
> >   #include "acc100_pmd.h"
> >   #include "acc101_pmd.h"
> >
> > +#ifdef RTE_BBDEV_SDK_AVX512
> > +#include <phy_rate_dematching_5gnr.h> #endif
> > +
> >   #ifdef RTE_LIBRTE_BBDEV_DEBUG
> >   RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
> >   #else
> > @@ -898,6 +902,16 @@ acc100_queue_setup(struct rte_bbdev *dev,
> uint16_t queue_id,
> >   		rte_free(q);
> >   		return -ENOMEM;
> >   	}
> > +	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
> > +			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
> > +			RTE_CACHE_LINE_SIZE, conf->socket);
> > +	if (q->derm_buffer == NULL) {
> > +		rte_bbdev_log(ERR, "Failed to allocate derm_buffer
> memory");
> > +		rte_free(q->lb_in);
> > +		rte_free(q->lb_out);
> > +		rte_free(q);
> > +		return -ENOMEM;
> > +	}
> 
> It may make sense to have a common error path to avoid duplication and so
> risk introducing memory leaks when changes will be made.
> 
> >   	q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
> >
> >   	/*
> > @@ -918,6 +932,7 @@ acc100_queue_setup(struct rte_bbdev *dev,
> uint16_t
> > queue_id,
> >
> >   	q_idx = acc100_find_free_queue_idx(dev, conf);
> >   	if (q_idx == -1) {
> > +		rte_free(q->derm_buffer);
> >   		rte_free(q->lb_in);
> >   		rte_free(q->lb_out);
> >   		rte_free(q);
> > @@ -955,6 +970,7 @@ acc100_queue_release(struct rte_bbdev *dev,
> uint16_t q_id)
> >   		/* Mark the Queue as un-assigned */
> >   		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
> >   				(1 << q->aq_id));
> > +		rte_free(q->derm_buffer);
> >   		rte_free(q->lb_in);
> >   		rte_free(q->lb_out);
> >   		rte_free(q);
> > @@ -3512,10 +3528,42 @@ harq_loopback(struct acc100_queue *q,
> struct rte_bbdev_dec_op *op,
> >   	return 1;
> >   }
> >
> > +/** Assess whether a work around is required for the deRM corner
> > +cases */ static inline bool derm_workaround_required(struct
> > +rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc100_queue *q) {
> > +	if (!is_acc100(q))
> > +		return false;
> > +	int32_t e = ldpc_dec->cb_params.e;
> > +	int q_m = ldpc_dec->q_m;
> > +	int z_c = ldpc_dec->z_c;
> > +	int K = (ldpc_dec->basegraph == 1 ? ACC100_K_ZC_1 :
> ACC100_K_ZC_2)
> > +			* z_c;
> > +	bool required = false;
> 
> Add new line.
> 
> > +	if (ldpc_dec->basegraph == 1) {
> > +		if ((q_m == 4) && (z_c >= 320) && (e * ACC100_LIM_31 > K *
> 64))
> > +			required = true;
> > +		else if ((e * ACC100_LIM_21 > K * 64))
> > +			required = true;
> > +	} else {
> > +		if (q_m <= 2) {
> > +			if ((z_c >= 208) && (e * ACC100_LIM_09 > K * 64))
> > +				required = true;
> > +			else if ((z_c < 208) && (e * ACC100_LIM_03 > K * 64))
> > +				required = true;
> > +		} else if (e * ACC100_LIM_14 > K * 64)
> > +			required = true;
> > +	}
> > +	if (required)
> > +		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");
> 
> Add new line.
> 
> > +	return required;
> > +}
> > +
> >   /** Enqueue one decode operations for ACC100 device in CB mode */
> >   static inline int
> >   enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct
> rte_bbdev_dec_op *op,
> > -		uint16_t total_enqueued_cbs, bool same_op)
> > +		uint16_t total_enqueued_cbs, bool same_op,
> > +		struct rte_bbdev_queue_data *q_data)
> >   {
> >   	int ret;
> >   	if (unlikely(check_bit(op->ldpc_dec.op_flags,
> > @@ -3571,6 +3619,57 @@ enqueue_ldpc_dec_one_op_cb(struct
> acc100_queue *q, struct rte_bbdev_dec_op *op,
> >   				&in_offset, &h_out_offset,
> >   				&h_out_length, harq_layout);
> >   	} else {
> > +		if (derm_workaround_required(&op->ldpc_dec, q)) {
> > +			#ifdef RTE_BBDEV_SDK_AVX512
> 
> First, the indentation is not good here.
> 
> Also, my understanding is that this code will get built only if Flexran SDK is
> available. Flexran SDK is proprietary, and so it is not possible to have this
> code exercised by the upstream CI.
> 
> Code under RTE_BBDEV_SDK_AVX512 should be dropped IMO.

We provide a subset of the intel SDKs to the community. More generally these functions could be replaced with other versions (including from other companies).

> 
> > +			struct rte_bbdev_op_ldpc_dec *dec = &op-
> >ldpc_dec;
> > +			/* Checking input size is matching with E */
> > +			if (dec->input.data->data_len < dec->cb_params.e) {
> > +				rte_bbdev_log(ERR,
> > +						"deRM: Input size
> mismatch");
> > +				return -EFAULT;
> > +			}
> > +			/* Run first deRM processing in SW */
> > +			struct bblib_rate_dematching_5gnr_request
> derm_req;
> > +			struct bblib_rate_dematching_5gnr_response
> derm_resp;
> > +			uint8_t *in = rte_pktmbuf_mtod_offset(dec-
> >input.data,
> > +					uint8_t *, in_offset);
> 
> Don't mix declarations & code.
> 
> > +			derm_req.p_in = (int8_t *) in;
> > +			derm_req.p_harq = (int8_t *) q->derm_buffer;
> > +			derm_req.base_graph = dec->basegraph;
> > +			derm_req.zc = dec->z_c;
> > +			derm_req.ncb = dec->n_cb;
> > +			derm_req.e = dec->cb_params.e;
> > +			if (derm_req.e > ACC100_MAX_E) {
> > +				rte_bbdev_log(WARNING,
> > +						"deRM: E %d > %d max",
> > +						derm_req.e,
> ACC100_MAX_E);
> > +				derm_req.e = ACC100_MAX_E;
> > +			}
> > +			derm_req.k0 = 0; /* Actual output from SDK */
> > +			derm_req.isretx = false;
> > +			derm_req.rvid = dec->rv_index;
> > +			derm_req.modulation_order = dec->q_m;
> > +			derm_req.start_null_index =
> > +					(dec->basegraph == 1 ? 22 : 10)
> > +					* dec->z_c - 2 * dec->z_c
> > +					- dec->n_filler;
> > +			derm_req.num_of_null = dec->n_filler;
> > +			bblib_rate_dematching_5gnr(&derm_req,
> &derm_resp);
> > +			/* Force back the HW DeRM */
> > +			dec->q_m = 1;
> > +			dec->cb_params.e = dec->n_cb - dec->n_filler;
> > +			dec->rv_index = 0;
> > +			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
> > +			/* Capture counter when pre-processing is used */
> > +			q_data->queue_stats.enqueue_warn_count++;
> > +			#else
> > +			RTE_SET_USED(q_data);
> > +			rte_bbdev_log(WARNING,
> > +				"Corner case may require deRM pre-
> processing in SDK"
> > +				);
> > +			#endif
> > +		}
> > +
> >   		struct acc100_fcw_ld *fcw;
> >   		uint32_t seg_total_left;
> 
> Don't mix declarations & code.
> 
> >   		fcw = &desc->req.fcw_ld;
> > @@ -4322,7 +4421,7 @@ acc100_enqueue_ldpc_dec_cb(struct
> rte_bbdev_queue_data *q_data,
> >   			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
> >   			ops[i]->ldpc_dec.n_filler, ops[i]-
> >ldpc_dec.cb_params.e,
> >   			same_op);
> > -		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
> > +		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op,
> q_data);
> >   		if (ret < 0) {
> >   			acc100_enqueue_invalid(q_data);
> >   			break;
  

Patch

diff --git a/drivers/baseband/acc100/acc100_pmd.h b/drivers/baseband/acc100/acc100_pmd.h
index 19a1f434bc..c98a182be6 100644
--- a/drivers/baseband/acc100/acc100_pmd.h
+++ b/drivers/baseband/acc100/acc100_pmd.h
@@ -140,6 +140,8 @@ 
 /* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */
 #define ACC100_N_ZC_1 66 /* N = 66 Zc for BG 1 */
 #define ACC100_N_ZC_2 50 /* N = 50 Zc for BG 2 */
+#define ACC100_K_ZC_1 22 /* K = 22 Zc for BG 1 */
+#define ACC100_K_ZC_2 10 /* K = 10 Zc for BG 2 */
 #define ACC100_K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */
 #define ACC100_K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */
 #define ACC100_K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */
@@ -177,6 +179,16 @@ 
 #define ACC100_MS_IN_US         (1000)
 #define ACC100_DDR_TRAINING_MAX (5000)
 
+/* Code rate limitation when padding is required */
+#define ACC100_LIM_03 2  /* 0.03 */
+#define ACC100_LIM_09 6  /* 0.09 */
+#define ACC100_LIM_14 9  /* 0.14 */
+#define ACC100_LIM_21 14 /* 0.21 */
+#define ACC100_LIM_31 20 /* 0.31 */
+#define ACC100_MAX_E (128 * 1024 - 2)
+
+
+
 /* ACC100 DMA Descriptor triplet */
 struct acc100_dma_triplet {
 	uint64_t address;
@@ -572,6 +584,7 @@  struct __rte_cache_aligned acc100_queue {
 	uint8_t *lb_out;
 	rte_iova_t lb_in_addr_iova;
 	rte_iova_t lb_out_addr_iova;
+	int8_t *derm_buffer; /* interim buffer for de-rm in SDK */
 	struct acc100_device *d;
 };
 
diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
index 1504acfadd..69c0714a37 100644
--- a/drivers/baseband/acc100/rte_acc100_pmd.c
+++ b/drivers/baseband/acc100/rte_acc100_pmd.c
@@ -24,6 +24,10 @@ 
 #include "acc100_pmd.h"
 #include "acc101_pmd.h"
 
+#ifdef RTE_BBDEV_SDK_AVX512
+#include <phy_rate_dematching_5gnr.h>
+#endif
+
 #ifdef RTE_LIBRTE_BBDEV_DEBUG
 RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
 #else
@@ -898,6 +902,16 @@  acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 		rte_free(q);
 		return -ENOMEM;
 	}
+	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
+			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
+			RTE_CACHE_LINE_SIZE, conf->socket);
+	if (q->derm_buffer == NULL) {
+		rte_bbdev_log(ERR, "Failed to allocate derm_buffer memory");
+		rte_free(q->lb_in);
+		rte_free(q->lb_out);
+		rte_free(q);
+		return -ENOMEM;
+	}
 	q->lb_out_addr_iova = rte_malloc_virt2iova(q->lb_out);
 
 	/*
@@ -918,6 +932,7 @@  acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 
 	q_idx = acc100_find_free_queue_idx(dev, conf);
 	if (q_idx == -1) {
+		rte_free(q->derm_buffer);
 		rte_free(q->lb_in);
 		rte_free(q->lb_out);
 		rte_free(q);
@@ -955,6 +970,7 @@  acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
 		/* Mark the Queue as un-assigned */
 		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFF -
 				(1 << q->aq_id));
+		rte_free(q->derm_buffer);
 		rte_free(q->lb_in);
 		rte_free(q->lb_out);
 		rte_free(q);
@@ -3512,10 +3528,42 @@  harq_loopback(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
 	return 1;
 }
 
+/** Assess whether a work around is required for the deRM corner cases */
+static inline bool
+derm_workaround_required(struct rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc100_queue *q)
+{
+	if (!is_acc100(q))
+		return false;
+	int32_t e = ldpc_dec->cb_params.e;
+	int q_m = ldpc_dec->q_m;
+	int z_c = ldpc_dec->z_c;
+	int K = (ldpc_dec->basegraph == 1 ? ACC100_K_ZC_1 : ACC100_K_ZC_2)
+			* z_c;
+	bool required = false;
+	if (ldpc_dec->basegraph == 1) {
+		if ((q_m == 4) && (z_c >= 320) && (e * ACC100_LIM_31 > K * 64))
+			required = true;
+		else if ((e * ACC100_LIM_21 > K * 64))
+			required = true;
+	} else {
+		if (q_m <= 2) {
+			if ((z_c >= 208) && (e * ACC100_LIM_09 > K * 64))
+				required = true;
+			else if ((z_c < 208) && (e * ACC100_LIM_03 > K * 64))
+				required = true;
+		} else if (e * ACC100_LIM_14 > K * 64)
+			required = true;
+	}
+	if (required)
+		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");
+	return required;
+}
+
 /** Enqueue one decode operations for ACC100 device in CB mode */
 static inline int
 enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
-		uint16_t total_enqueued_cbs, bool same_op)
+		uint16_t total_enqueued_cbs, bool same_op,
+		struct rte_bbdev_queue_data *q_data)
 {
 	int ret;
 	if (unlikely(check_bit(op->ldpc_dec.op_flags,
@@ -3571,6 +3619,57 @@  enqueue_ldpc_dec_one_op_cb(struct acc100_queue *q, struct rte_bbdev_dec_op *op,
 				&in_offset, &h_out_offset,
 				&h_out_length, harq_layout);
 	} else {
+		if (derm_workaround_required(&op->ldpc_dec, q)) {
+			#ifdef RTE_BBDEV_SDK_AVX512
+			struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
+			/* Checking input size is matching with E */
+			if (dec->input.data->data_len < dec->cb_params.e) {
+				rte_bbdev_log(ERR,
+						"deRM: Input size mismatch");
+				return -EFAULT;
+			}
+			/* Run first deRM processing in SW */
+			struct bblib_rate_dematching_5gnr_request derm_req;
+			struct bblib_rate_dematching_5gnr_response derm_resp;
+			uint8_t *in = rte_pktmbuf_mtod_offset(dec->input.data,
+					uint8_t *, in_offset);
+			derm_req.p_in = (int8_t *) in;
+			derm_req.p_harq = (int8_t *) q->derm_buffer;
+			derm_req.base_graph = dec->basegraph;
+			derm_req.zc = dec->z_c;
+			derm_req.ncb = dec->n_cb;
+			derm_req.e = dec->cb_params.e;
+			if (derm_req.e > ACC100_MAX_E) {
+				rte_bbdev_log(WARNING,
+						"deRM: E %d > %d max",
+						derm_req.e, ACC100_MAX_E);
+				derm_req.e = ACC100_MAX_E;
+			}
+			derm_req.k0 = 0; /* Actual output from SDK */
+			derm_req.isretx = false;
+			derm_req.rvid = dec->rv_index;
+			derm_req.modulation_order = dec->q_m;
+			derm_req.start_null_index =
+					(dec->basegraph == 1 ? 22 : 10)
+					* dec->z_c - 2 * dec->z_c
+					- dec->n_filler;
+			derm_req.num_of_null = dec->n_filler;
+			bblib_rate_dematching_5gnr(&derm_req, &derm_resp);
+			/* Force back the HW DeRM */
+			dec->q_m = 1;
+			dec->cb_params.e = dec->n_cb - dec->n_filler;
+			dec->rv_index = 0;
+			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
+			/* Capture counter when pre-processing is used */
+			q_data->queue_stats.enqueue_warn_count++;
+			#else
+			RTE_SET_USED(q_data);
+			rte_bbdev_log(WARNING,
+				"Corner case may require deRM pre-processing in SDK"
+				);
+			#endif
+		}
+
 		struct acc100_fcw_ld *fcw;
 		uint32_t seg_total_left;
 		fcw = &desc->req.fcw_ld;
@@ -4322,7 +4421,7 @@  acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
 			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
 			ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e,
 			same_op);
-		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
+		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op, q_data);
 		if (ret < 0) {
 			acc100_enqueue_invalid(q_data);
 			break;