[v6,1/1] baseband/acc100: add workaround for deRM corner cases

Message ID 20221025023824.127049-2-hernan.vargas@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series baseband/acc100: changes for 22.11 |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation warning apply issues
ci/iol-testing warning apply patch failure

Commit Message

Hernan Vargas Oct. 25, 2022, 2:38 a.m. UTC
  Add function to support de-ratematch pre-processing for SW corner cases.
Some specific 5GUL FEC corner cases may cause unintended back pressure
and in some cases a potential stability issue on the ACC100.
To be able to avoid completely such potential issue, the PMD can preempt
such code block configuration so that to process the first level deRM
in SW using the SDK libraries prior to running the rest of the FEC
decoding in HW using an amended code block configuration.
In case meson build system doesn't find such SDK libraries, the fall
method is to run in HW with a warning.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc/acc_common.h     |   8 ++
 drivers/baseband/acc/meson.build      |  21 +++++
 drivers/baseband/acc/rte_acc100_pmd.c | 108 +++++++++++++++++++++++++-
 3 files changed, 134 insertions(+), 3 deletions(-)
  

Comments

Maxime Coquelin Oct. 25, 2022, 8:26 a.m. UTC | #1
Hi Hernan,

On 10/25/22 04:38, Hernan Vargas wrote:
> Add function to support de-ratematch pre-processing for SW corner cases.
> Some specific 5GUL FEC corner cases may cause unintended back pressure
> and in some cases a potential stability issue on the ACC100.
> To be able to avoid completely such potential issue, the PMD can preempt
> such code block configuration so that to process the first level deRM
> in SW using the SDK libraries prior to running the rest of the FEC
> decoding in HW using an amended code block configuration.
> In case meson build system doesn't find such SDK libraries, the fall
> method is to run in HW with a warning.
> 
> Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> ---
>   drivers/baseband/acc/acc_common.h     |   8 ++
>   drivers/baseband/acc/meson.build      |  21 +++++
>   drivers/baseband/acc/rte_acc100_pmd.c | 108 +++++++++++++++++++++++++-
>   3 files changed, 134 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/baseband/acc/acc_common.h b/drivers/baseband/acc/acc_common.h
> index eae7eab4e9..5e8972b40a 100644
> --- a/drivers/baseband/acc/acc_common.h
> +++ b/drivers/baseband/acc/acc_common.h
> @@ -123,6 +123,14 @@
>   #define ACC_HARQ_ALIGN_64B          64
>   #define ACC_MAX_ZC                  384
>   
> +/* De-ratematch code rate limitation when padding is required */
> +#define ACC_LIM_03 2  /* 0.03 */
> +#define ACC_LIM_09 6  /* 0.09 */
> +#define ACC_LIM_14 9  /* 0.14 */
> +#define ACC_LIM_21 14 /* 0.21 */
> +#define ACC_LIM_31 20 /* 0.31 */
> +#define ACC_MAX_E (128 * 1024 - 2)
> +
>   /* Helper macro for logging */
>   #define rte_acc_log(level, fmt, ...) \
>   	rte_log(RTE_LOG_ ## level, RTE_LOG_NOTICE, fmt "\n", \
> diff --git a/drivers/baseband/acc/meson.build b/drivers/baseband/acc/meson.build
> index 77c393b533..a5fc4fed01 100644
> --- a/drivers/baseband/acc/meson.build
> +++ b/drivers/baseband/acc/meson.build
> @@ -1,6 +1,27 @@
>   # SPDX-License-Identifier: BSD-3-Clause
>   # Copyright(c) 2020 Intel Corporation
>   
> +# Check for FlexRAN SDK libraries
> +dep_dec5g = dependency('flexran_sdk_ldpc_decoder_5gnr', required: false)
> +
> +if dep_dec5g.found()
> +    ext_deps += cc.find_library('libstdc++', required: true)
> +    ext_deps += cc.find_library('libirc', required: true)
> +    ext_deps += cc.find_library('libimf', required: true)
> +    ext_deps += cc.find_library('libipps', required: true)
> +    ext_deps += cc.find_library('libsvml', required: true)
> +    ext_deps += dep_dec5g
> +    ext_deps += dependency('flexran_sdk_ldpc_encoder_5gnr', required: true)
> +    ext_deps += dependency('flexran_sdk_LDPC_ratematch_5gnr', required: true)
> +    ext_deps += dependency('flexran_sdk_rate_dematching_5gnr', required: true)
> +    ext_deps += dependency('flexran_sdk_turbo', required: true)
> +    ext_deps += dependency('flexran_sdk_crc', required: true)
> +    ext_deps += dependency('flexran_sdk_rate_matching', required: true)
> +    ext_deps += dependency('flexran_sdk_common', required: true)
> +    cflags += ['-DRTE_BBDEV_SDK_AVX2']
> +    cflags += ['-DRTE_BBDEV_SDK_AVX512']
> +endif
> +
>   deps += ['bbdev', 'bus_pci']
>   
>   sources = files('rte_acc100_pmd.c', 'rte_acc200_pmd.c')
> diff --git a/drivers/baseband/acc/rte_acc100_pmd.c b/drivers/baseband/acc/rte_acc100_pmd.c
> index 23bc5d25bb..e8b230e563 100644
> --- a/drivers/baseband/acc/rte_acc100_pmd.c
> +++ b/drivers/baseband/acc/rte_acc100_pmd.c
> @@ -25,6 +25,10 @@
>   #include "acc101_pmd.h"
>   #include "acc200_cfg.h"
>   
> +#ifdef RTE_BBDEV_SDK_AVX512
> +#include <phy_rate_dematching_5gnr.h>
> +#endif
> +
>   #ifdef RTE_LIBRTE_BBDEV_DEBUG
>   RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
>   #else
> @@ -756,6 +760,14 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>   		ret = -ENOMEM;
>   		goto free_lb_out;
>   	}
> +	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
> +			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
> +			RTE_CACHE_LINE_SIZE, conf->socket);
> +	if (q->derm_buffer == NULL) {
> +		rte_bbdev_log(ERR, "Failed to allocate derm_buffer memory");
> +		ret = -ENOMEM;
> +		goto free_companion_ring_addr;
> +	}
>   
>   	/*
>   	 * Software queue ring wraps synchronously with the HW when it reaches
> @@ -776,7 +788,7 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>   	q_idx = acc100_find_free_queue_idx(dev, conf);
>   	if (q_idx == -1) {
>   		ret = -EINVAL;
> -		goto free_companion_ring_addr;
> +		goto free_derm_buffer;
>   	}
>   
>   	q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF;
> @@ -804,6 +816,9 @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>   	dev->data->queues[queue_id].queue_private = q;
>   	return 0;
>   
> +free_derm_buffer:
> +	rte_free(q->derm_buffer);
> +	q->derm_buffer = NULL;
>   free_companion_ring_addr:
>   	rte_free(q->companion_ring_addr);
>   	q->companion_ring_addr = NULL;
> @@ -890,6 +905,7 @@ acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
>   		/* Mark the Queue as un-assigned */
>   		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF -
>   				(uint64_t) (1 << q->aq_id));
> +		rte_free(q->derm_buffer);
>   		rte_free(q->companion_ring_addr);
>   		rte_free(q->lb_in);
>   		rte_free(q->lb_out);
> @@ -3111,10 +3127,44 @@ harq_loopback(struct acc_queue *q, struct rte_bbdev_dec_op *op,
>   	return 1;
>   }
>   
> +/** Assess whether a work around is required for the deRM corner cases */
> +static inline bool
> +derm_workaround_required(struct rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc_queue *q)
> +{
> +	if (!is_acc100(q))
> +		return false;
> +	int32_t e = ldpc_dec->cb_params.e;
> +	int q_m = ldpc_dec->q_m;
> +	int z_c = ldpc_dec->z_c;
> +	int K = (ldpc_dec->basegraph == 1 ? ACC_K_ZC_1 : ACC_K_ZC_2)
> +			* z_c;
> +
> +	bool required = false;
> +	if (ldpc_dec->basegraph == 1) {
> +		if ((q_m == 4) && (z_c >= 320) && (e * ACC_LIM_31 > K * 64))
> +			required = true;
> +		else if ((e * ACC_LIM_21 > K * 64))
> +			required = true;
> +	} else {
> +		if (q_m <= 2) {
> +			if ((z_c >= 208) && (e * ACC_LIM_09 > K * 64))
> +				required = true;
> +			else if ((z_c < 208) && (e * ACC_LIM_03 > K * 64))
> +				required = true;
> +		} else if (e * ACC_LIM_14 > K * 64)
> +			required = true;
> +	}
> +	if (required)
> +		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");
> +
> +	return required;
> +}
> +
>   /** Enqueue one decode operations for ACC100 device in CB mode */
>   static inline int
>   enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
> -		uint16_t total_enqueued_cbs, bool same_op)
> +		uint16_t total_enqueued_cbs, bool same_op,
> +		struct rte_bbdev_queue_data *q_data)
>   {
>   	int ret;
>   	if (unlikely(check_bit(op->ldpc_dec.op_flags,
> @@ -3168,6 +3218,58 @@ enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
>   	} else {
>   		struct acc_fcw_ld *fcw;
>   		uint32_t seg_total_left;
> +
> +		if (derm_workaround_required(&op->ldpc_dec, q)) {
> +			#ifdef RTE_BBDEV_SDK_AVX512
> +			struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
> +			struct bblib_rate_dematching_5gnr_request derm_req;
> +			struct bblib_rate_dematching_5gnr_response derm_resp;
> +			uint8_t *in;
> +
> +			/* Checking input size is matching with E */
> +			if (dec->input.data->data_len < dec->cb_params.e) {
> +				rte_bbdev_log(ERR, "deRM: Input size mismatch");
> +				return -EFAULT;
> +			}
> +			/* Run first deRM processing in SW */
> +			in = rte_pktmbuf_mtod_offset(dec->input.data, uint8_t *, in_offset);
> +			derm_req.p_in = (int8_t *) in;
> +			derm_req.p_harq = (int8_t *) q->derm_buffer;
> +			derm_req.base_graph = dec->basegraph;
> +			derm_req.zc = dec->z_c;
> +			derm_req.ncb = dec->n_cb;
> +			derm_req.e = dec->cb_params.e;
> +			if (derm_req.e > ACC_MAX_E) {
> +				rte_bbdev_log(WARNING,
> +						"deRM: E %d > %d max",
> +						derm_req.e, ACC_MAX_E);
> +				derm_req.e = ACC_MAX_E;
> +			}
> +			derm_req.k0 = 0; /* Actual output from SDK */
> +			derm_req.isretx = false;
> +			derm_req.rvid = dec->rv_index;
> +			derm_req.modulation_order = dec->q_m;
> +			derm_req.start_null_index =
> +					(dec->basegraph == 1 ? 22 : 10)
> +					* dec->z_c - 2 * dec->z_c
> +					- dec->n_filler;
> +			derm_req.num_of_null = dec->n_filler;
> +			bblib_rate_dematching_5gnr(&derm_req, &derm_resp);
> +			/* Force back the HW DeRM */
> +			dec->q_m = 1;
> +			dec->cb_params.e = dec->n_cb - dec->n_filler;
> +			dec->rv_index = 0;
> +			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
> +			/* Capture counter when pre-processing is used */
> +			q_data->queue_stats.enqueue_warn_count++;
> +			#else
> +			RTE_SET_USED(q_data);
> +			rte_bbdev_log(WARNING,
> +				"Corner case may require deRM pre-processing in SDK"
> +				);
> +			#endif
> +		}
> +
>   		fcw = &desc->req.fcw_ld;
>   		q->d->fcw_ld_fill(op, fcw, harq_layout);
>   
> @@ -3721,7 +3823,7 @@ acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
>   			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
>   			ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e,
>   			same_op);
> -		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
> +		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op, q_data);
>   		if (ret < 0) {
>   			acc_enqueue_invalid(q_data);
>   			break;

As I already replied on a previous revision, I think it is not 
acceptable to condition the fix on forcing to depend on a proprietary
SDK. The ACC100 driver did not have this Flexran SDK requirement
initially, not like the SW-only BBDEV PMDs.

In my opinion, you can keep the call to Flexran SDK API but you should
provide an open-source alternative, even if not optimized so that it is
at least functionnal. Later contributors could provide an optimized
open-source version if they feel the need.

I had a quick look at the bblib_rate_dematching_5gnr C implementation,
and it is reasonably small and there is really nothing special in it.

Not having an open-source alternative is problematic, because it could
make CI to fail when using test-bbdev application if the HW bug is
triggered.

Regards,
Maxime
  
Chautru, Nicolas Oct. 25, 2022, 3:01 p.m. UTC | #2
Hi Maxime,

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Tuesday, October 25, 2022 1:26 AM
> To: Vargas, Hernan <hernan.vargas@intel.com>; dev@dpdk.org;
> gakhil@marvell.com; trix@redhat.com
> Cc: Chautru, Nicolas <nicolas.chautru@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>; David Marchand <david.marchand@redhat.com>;
> Thomas Monjalon <thomas@monjalon.net>
> Subject: Re: [PATCH v6 1/1] baseband/acc100: add workaround for deRM
> corner cases
> 
> Hi Hernan,
> 
> On 10/25/22 04:38, Hernan Vargas wrote:
> > Add function to support de-ratematch pre-processing for SW corner cases.
> > Some specific 5GUL FEC corner cases may cause unintended back pressure
> > and in some cases a potential stability issue on the ACC100.
> > To be able to avoid completely such potential issue, the PMD can
> > preempt such code block configuration so that to process the first
> > level deRM in SW using the SDK libraries prior to running the rest of
> > the FEC decoding in HW using an amended code block configuration.
> > In case meson build system doesn't find such SDK libraries, the fall
> > method is to run in HW with a warning.
> >
> > Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> > ---
> >   drivers/baseband/acc/acc_common.h     |   8 ++
> >   drivers/baseband/acc/meson.build      |  21 +++++
> >   drivers/baseband/acc/rte_acc100_pmd.c | 108
> +++++++++++++++++++++++++-
> >   3 files changed, 134 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/baseband/acc/acc_common.h
> > b/drivers/baseband/acc/acc_common.h
> > index eae7eab4e9..5e8972b40a 100644
> > --- a/drivers/baseband/acc/acc_common.h
> > +++ b/drivers/baseband/acc/acc_common.h
> > @@ -123,6 +123,14 @@
> >   #define ACC_HARQ_ALIGN_64B          64
> >   #define ACC_MAX_ZC                  384
> >
> > +/* De-ratematch code rate limitation when padding is required */
> > +#define ACC_LIM_03 2  /* 0.03 */ #define ACC_LIM_09 6  /* 0.09 */
> > +#define ACC_LIM_14 9  /* 0.14 */ #define ACC_LIM_21 14 /* 0.21 */
> > +#define ACC_LIM_31 20 /* 0.31 */ #define ACC_MAX_E (128 * 1024 - 2)
> > +
> >   /* Helper macro for logging */
> >   #define rte_acc_log(level, fmt, ...) \
> >   	rte_log(RTE_LOG_ ## level, RTE_LOG_NOTICE, fmt "\n", \ diff --git
> > a/drivers/baseband/acc/meson.build b/drivers/baseband/acc/meson.build
> > index 77c393b533..a5fc4fed01 100644
> > --- a/drivers/baseband/acc/meson.build
> > +++ b/drivers/baseband/acc/meson.build
> > @@ -1,6 +1,27 @@
> >   # SPDX-License-Identifier: BSD-3-Clause
> >   # Copyright(c) 2020 Intel Corporation
> >
> > +# Check for FlexRAN SDK libraries
> > +dep_dec5g = dependency('flexran_sdk_ldpc_decoder_5gnr', required:
> > +false)
> > +
> > +if dep_dec5g.found()
> > +    ext_deps += cc.find_library('libstdc++', required: true)
> > +    ext_deps += cc.find_library('libirc', required: true)
> > +    ext_deps += cc.find_library('libimf', required: true)
> > +    ext_deps += cc.find_library('libipps', required: true)
> > +    ext_deps += cc.find_library('libsvml', required: true)
> > +    ext_deps += dep_dec5g
> > +    ext_deps += dependency('flexran_sdk_ldpc_encoder_5gnr', required:
> true)
> > +    ext_deps += dependency('flexran_sdk_LDPC_ratematch_5gnr', required:
> true)
> > +    ext_deps += dependency('flexran_sdk_rate_dematching_5gnr', required:
> true)
> > +    ext_deps += dependency('flexran_sdk_turbo', required: true)
> > +    ext_deps += dependency('flexran_sdk_crc', required: true)
> > +    ext_deps += dependency('flexran_sdk_rate_matching', required: true)
> > +    ext_deps += dependency('flexran_sdk_common', required: true)
> > +    cflags += ['-DRTE_BBDEV_SDK_AVX2']
> > +    cflags += ['-DRTE_BBDEV_SDK_AVX512'] endif
> > +
> >   deps += ['bbdev', 'bus_pci']
> >
> >   sources = files('rte_acc100_pmd.c', 'rte_acc200_pmd.c') diff --git
> > a/drivers/baseband/acc/rte_acc100_pmd.c
> > b/drivers/baseband/acc/rte_acc100_pmd.c
> > index 23bc5d25bb..e8b230e563 100644
> > --- a/drivers/baseband/acc/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc/rte_acc100_pmd.c
> > @@ -25,6 +25,10 @@
> >   #include "acc101_pmd.h"
> >   #include "acc200_cfg.h"
> >
> > +#ifdef RTE_BBDEV_SDK_AVX512
> > +#include <phy_rate_dematching_5gnr.h> #endif
> > +
> >   #ifdef RTE_LIBRTE_BBDEV_DEBUG
> >   RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
> >   #else
> > @@ -756,6 +760,14 @@ acc100_queue_setup(struct rte_bbdev *dev,
> uint16_t queue_id,
> >   		ret = -ENOMEM;
> >   		goto free_lb_out;
> >   	}
> > +	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
> > +			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
> > +			RTE_CACHE_LINE_SIZE, conf->socket);
> > +	if (q->derm_buffer == NULL) {
> > +		rte_bbdev_log(ERR, "Failed to allocate derm_buffer memory");
> > +		ret = -ENOMEM;
> > +		goto free_companion_ring_addr;
> > +	}
> >
> >   	/*
> >   	 * Software queue ring wraps synchronously with the HW when it
> > reaches @@ -776,7 +788,7 @@ acc100_queue_setup(struct rte_bbdev *dev,
> uint16_t queue_id,
> >   	q_idx = acc100_find_free_queue_idx(dev, conf);
> >   	if (q_idx == -1) {
> >   		ret = -EINVAL;
> > -		goto free_companion_ring_addr;
> > +		goto free_derm_buffer;
> >   	}
> >
> >   	q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF; @@ -804,6
> +816,9
> > @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
> >   	dev->data->queues[queue_id].queue_private = q;
> >   	return 0;
> >
> > +free_derm_buffer:
> > +	rte_free(q->derm_buffer);
> > +	q->derm_buffer = NULL;
> >   free_companion_ring_addr:
> >   	rte_free(q->companion_ring_addr);
> >   	q->companion_ring_addr = NULL;
> > @@ -890,6 +905,7 @@ acc100_queue_release(struct rte_bbdev *dev,
> uint16_t q_id)
> >   		/* Mark the Queue as un-assigned */
> >   		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF -
> >   				(uint64_t) (1 << q->aq_id));
> > +		rte_free(q->derm_buffer);
> >   		rte_free(q->companion_ring_addr);
> >   		rte_free(q->lb_in);
> >   		rte_free(q->lb_out);
> > @@ -3111,10 +3127,44 @@ harq_loopback(struct acc_queue *q, struct
> rte_bbdev_dec_op *op,
> >   	return 1;
> >   }
> >
> > +/** Assess whether a work around is required for the deRM corner
> > +cases */ static inline bool derm_workaround_required(struct
> > +rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc_queue *q) {
> > +	if (!is_acc100(q))
> > +		return false;
> > +	int32_t e = ldpc_dec->cb_params.e;
> > +	int q_m = ldpc_dec->q_m;
> > +	int z_c = ldpc_dec->z_c;
> > +	int K = (ldpc_dec->basegraph == 1 ? ACC_K_ZC_1 : ACC_K_ZC_2)
> > +			* z_c;
> > +
> > +	bool required = false;
> > +	if (ldpc_dec->basegraph == 1) {
> > +		if ((q_m == 4) && (z_c >= 320) && (e * ACC_LIM_31 > K * 64))
> > +			required = true;
> > +		else if ((e * ACC_LIM_21 > K * 64))
> > +			required = true;
> > +	} else {
> > +		if (q_m <= 2) {
> > +			if ((z_c >= 208) && (e * ACC_LIM_09 > K * 64))
> > +				required = true;
> > +			else if ((z_c < 208) && (e * ACC_LIM_03 > K * 64))
> > +				required = true;
> > +		} else if (e * ACC_LIM_14 > K * 64)
> > +			required = true;
> > +	}
> > +	if (required)
> > +		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");
> > +
> > +	return required;
> > +}
> > +
> >   /** Enqueue one decode operations for ACC100 device in CB mode */
> >   static inline int
> >   enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct
> rte_bbdev_dec_op *op,
> > -		uint16_t total_enqueued_cbs, bool same_op)
> > +		uint16_t total_enqueued_cbs, bool same_op,
> > +		struct rte_bbdev_queue_data *q_data)
> >   {
> >   	int ret;
> >   	if (unlikely(check_bit(op->ldpc_dec.op_flags,
> > @@ -3168,6 +3218,58 @@ enqueue_ldpc_dec_one_op_cb(struct acc_queue
> *q, struct rte_bbdev_dec_op *op,
> >   	} else {
> >   		struct acc_fcw_ld *fcw;
> >   		uint32_t seg_total_left;
> > +
> > +		if (derm_workaround_required(&op->ldpc_dec, q)) {
> > +			#ifdef RTE_BBDEV_SDK_AVX512
> > +			struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
> > +			struct bblib_rate_dematching_5gnr_request
> derm_req;
> > +			struct bblib_rate_dematching_5gnr_response
> derm_resp;
> > +			uint8_t *in;
> > +
> > +			/* Checking input size is matching with E */
> > +			if (dec->input.data->data_len < dec->cb_params.e) {
> > +				rte_bbdev_log(ERR, "deRM: Input size
> mismatch");
> > +				return -EFAULT;
> > +			}
> > +			/* Run first deRM processing in SW */
> > +			in = rte_pktmbuf_mtod_offset(dec->input.data, uint8_t
> *, in_offset);
> > +			derm_req.p_in = (int8_t *) in;
> > +			derm_req.p_harq = (int8_t *) q->derm_buffer;
> > +			derm_req.base_graph = dec->basegraph;
> > +			derm_req.zc = dec->z_c;
> > +			derm_req.ncb = dec->n_cb;
> > +			derm_req.e = dec->cb_params.e;
> > +			if (derm_req.e > ACC_MAX_E) {
> > +				rte_bbdev_log(WARNING,
> > +						"deRM: E %d > %d max",
> > +						derm_req.e, ACC_MAX_E);
> > +				derm_req.e = ACC_MAX_E;
> > +			}
> > +			derm_req.k0 = 0; /* Actual output from SDK */
> > +			derm_req.isretx = false;
> > +			derm_req.rvid = dec->rv_index;
> > +			derm_req.modulation_order = dec->q_m;
> > +			derm_req.start_null_index =
> > +					(dec->basegraph == 1 ? 22 : 10)
> > +					* dec->z_c - 2 * dec->z_c
> > +					- dec->n_filler;
> > +			derm_req.num_of_null = dec->n_filler;
> > +			bblib_rate_dematching_5gnr(&derm_req,
> &derm_resp);
> > +			/* Force back the HW DeRM */
> > +			dec->q_m = 1;
> > +			dec->cb_params.e = dec->n_cb - dec->n_filler;
> > +			dec->rv_index = 0;
> > +			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
> > +			/* Capture counter when pre-processing is used */
> > +			q_data->queue_stats.enqueue_warn_count++;
> > +			#else
> > +			RTE_SET_USED(q_data);
> > +			rte_bbdev_log(WARNING,
> > +				"Corner case may require deRM pre-
> processing in SDK"
> > +				);
> > +			#endif
> > +		}
> > +
> >   		fcw = &desc->req.fcw_ld;
> >   		q->d->fcw_ld_fill(op, fcw, harq_layout);
> >
> > @@ -3721,7 +3823,7 @@ acc100_enqueue_ldpc_dec_cb(struct
> rte_bbdev_queue_data *q_data,
> >   			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
> >   			ops[i]->ldpc_dec.n_filler, ops[i]-
> >ldpc_dec.cb_params.e,
> >   			same_op);
> > -		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
> > +		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op,
> q_data);
> >   		if (ret < 0) {
> >   			acc_enqueue_invalid(q_data);
> >   			break;
> 
> As I already replied on a previous revision, I think it is not acceptable to
> condition the fix on forcing to depend on a proprietary SDK. The ACC100 driver
> did not have this Flexran SDK requirement initially, not like the SW-only BBDEV
> PMDs.

The SDK is not proprietary. This is available on IDZ with a permissible license and includes a plain C code implementation in case there is no AVX512 isa on the CPU. 

> In my opinion, you can keep the call to Flexran SDK API but you should provide
> an open-source alternative, even if not optimized so that it is at least
> functionnal. Later contributors could provide an optimized open-source version
> if they feel the need.

Again the SDK is available on the IDZ with a permissible license and includes a plain C implementation. 

> I had a quick look at the bblib_rate_dematching_5gnr C implementation, and it
> is reasonably small and there is really nothing special in it.
> 
> Not having an open-source alternative is problematic, because it could make CI
> to fail when using test-bbdev application if the HW bug is triggered.

CI would not fail. 
We can discuss further offline later this week as there seems to be some confusion still. 

Thanks
Nic

> 
> Regards,
> Maxime
  
Maxime Coquelin Oct. 25, 2022, 4 p.m. UTC | #3
Hi Nicolas,

On 10/25/22 17:01, Chautru, Nicolas wrote:
> Hi Maxime,
> 
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Tuesday, October 25, 2022 1:26 AM
>> To: Vargas, Hernan <hernan.vargas@intel.com>; dev@dpdk.org;
>> gakhil@marvell.com; trix@redhat.com
>> Cc: Chautru, Nicolas <nicolas.chautru@intel.com>; Zhang, Qi Z
>> <qi.z.zhang@intel.com>; David Marchand <david.marchand@redhat.com>;
>> Thomas Monjalon <thomas@monjalon.net>
>> Subject: Re: [PATCH v6 1/1] baseband/acc100: add workaround for deRM
>> corner cases
>>
>> Hi Hernan,
>>
>> On 10/25/22 04:38, Hernan Vargas wrote:
>>> Add function to support de-ratematch pre-processing for SW corner cases.
>>> Some specific 5GUL FEC corner cases may cause unintended back pressure
>>> and in some cases a potential stability issue on the ACC100.
>>> To be able to avoid completely such potential issue, the PMD can
>>> preempt such code block configuration so that to process the first
>>> level deRM in SW using the SDK libraries prior to running the rest of
>>> the FEC decoding in HW using an amended code block configuration.
>>> In case meson build system doesn't find such SDK libraries, the fall
>>> method is to run in HW with a warning.
>>>
>>> Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
>>> ---
>>>    drivers/baseband/acc/acc_common.h     |   8 ++
>>>    drivers/baseband/acc/meson.build      |  21 +++++
>>>    drivers/baseband/acc/rte_acc100_pmd.c | 108
>> +++++++++++++++++++++++++-
>>>    3 files changed, 134 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/baseband/acc/acc_common.h
>>> b/drivers/baseband/acc/acc_common.h
>>> index eae7eab4e9..5e8972b40a 100644
>>> --- a/drivers/baseband/acc/acc_common.h
>>> +++ b/drivers/baseband/acc/acc_common.h
>>> @@ -123,6 +123,14 @@
>>>    #define ACC_HARQ_ALIGN_64B          64
>>>    #define ACC_MAX_ZC                  384
>>>
>>> +/* De-ratematch code rate limitation when padding is required */
>>> +#define ACC_LIM_03 2  /* 0.03 */ #define ACC_LIM_09 6  /* 0.09 */
>>> +#define ACC_LIM_14 9  /* 0.14 */ #define ACC_LIM_21 14 /* 0.21 */
>>> +#define ACC_LIM_31 20 /* 0.31 */ #define ACC_MAX_E (128 * 1024 - 2)
>>> +
>>>    /* Helper macro for logging */
>>>    #define rte_acc_log(level, fmt, ...) \
>>>    	rte_log(RTE_LOG_ ## level, RTE_LOG_NOTICE, fmt "\n", \ diff --git
>>> a/drivers/baseband/acc/meson.build b/drivers/baseband/acc/meson.build
>>> index 77c393b533..a5fc4fed01 100644
>>> --- a/drivers/baseband/acc/meson.build
>>> +++ b/drivers/baseband/acc/meson.build
>>> @@ -1,6 +1,27 @@
>>>    # SPDX-License-Identifier: BSD-3-Clause
>>>    # Copyright(c) 2020 Intel Corporation
>>>
>>> +# Check for FlexRAN SDK libraries
>>> +dep_dec5g = dependency('flexran_sdk_ldpc_decoder_5gnr', required:
>>> +false)
>>> +
>>> +if dep_dec5g.found()
>>> +    ext_deps += cc.find_library('libstdc++', required: true)
>>> +    ext_deps += cc.find_library('libirc', required: true)
>>> +    ext_deps += cc.find_library('libimf', required: true)
>>> +    ext_deps += cc.find_library('libipps', required: true)
>>> +    ext_deps += cc.find_library('libsvml', required: true)
>>> +    ext_deps += dep_dec5g
>>> +    ext_deps += dependency('flexran_sdk_ldpc_encoder_5gnr', required:
>> true)
>>> +    ext_deps += dependency('flexran_sdk_LDPC_ratematch_5gnr', required:
>> true)
>>> +    ext_deps += dependency('flexran_sdk_rate_dematching_5gnr', required:
>> true)
>>> +    ext_deps += dependency('flexran_sdk_turbo', required: true)
>>> +    ext_deps += dependency('flexran_sdk_crc', required: true)
>>> +    ext_deps += dependency('flexran_sdk_rate_matching', required: true)
>>> +    ext_deps += dependency('flexran_sdk_common', required: true)
>>> +    cflags += ['-DRTE_BBDEV_SDK_AVX2']
>>> +    cflags += ['-DRTE_BBDEV_SDK_AVX512'] endif
>>> +
>>>    deps += ['bbdev', 'bus_pci']
>>>
>>>    sources = files('rte_acc100_pmd.c', 'rte_acc200_pmd.c') diff --git
>>> a/drivers/baseband/acc/rte_acc100_pmd.c
>>> b/drivers/baseband/acc/rte_acc100_pmd.c
>>> index 23bc5d25bb..e8b230e563 100644
>>> --- a/drivers/baseband/acc/rte_acc100_pmd.c
>>> +++ b/drivers/baseband/acc/rte_acc100_pmd.c
>>> @@ -25,6 +25,10 @@
>>>    #include "acc101_pmd.h"
>>>    #include "acc200_cfg.h"
>>>
>>> +#ifdef RTE_BBDEV_SDK_AVX512
>>> +#include <phy_rate_dematching_5gnr.h> #endif
>>> +
>>>    #ifdef RTE_LIBRTE_BBDEV_DEBUG
>>>    RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
>>>    #else
>>> @@ -756,6 +760,14 @@ acc100_queue_setup(struct rte_bbdev *dev,
>> uint16_t queue_id,
>>>    		ret = -ENOMEM;
>>>    		goto free_lb_out;
>>>    	}
>>> +	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
>>> +			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
>>> +			RTE_CACHE_LINE_SIZE, conf->socket);
>>> +	if (q->derm_buffer == NULL) {
>>> +		rte_bbdev_log(ERR, "Failed to allocate derm_buffer memory");
>>> +		ret = -ENOMEM;
>>> +		goto free_companion_ring_addr;
>>> +	}
>>>
>>>    	/*
>>>    	 * Software queue ring wraps synchronously with the HW when it
>>> reaches @@ -776,7 +788,7 @@ acc100_queue_setup(struct rte_bbdev *dev,
>> uint16_t queue_id,
>>>    	q_idx = acc100_find_free_queue_idx(dev, conf);
>>>    	if (q_idx == -1) {
>>>    		ret = -EINVAL;
>>> -		goto free_companion_ring_addr;
>>> +		goto free_derm_buffer;
>>>    	}
>>>
>>>    	q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF; @@ -804,6
>> +816,9
>>> @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>>>    	dev->data->queues[queue_id].queue_private = q;
>>>    	return 0;
>>>
>>> +free_derm_buffer:
>>> +	rte_free(q->derm_buffer);
>>> +	q->derm_buffer = NULL;
>>>    free_companion_ring_addr:
>>>    	rte_free(q->companion_ring_addr);
>>>    	q->companion_ring_addr = NULL;
>>> @@ -890,6 +905,7 @@ acc100_queue_release(struct rte_bbdev *dev,
>> uint16_t q_id)
>>>    		/* Mark the Queue as un-assigned */
>>>    		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF -
>>>    				(uint64_t) (1 << q->aq_id));
>>> +		rte_free(q->derm_buffer);
>>>    		rte_free(q->companion_ring_addr);
>>>    		rte_free(q->lb_in);
>>>    		rte_free(q->lb_out);
>>> @@ -3111,10 +3127,44 @@ harq_loopback(struct acc_queue *q, struct
>> rte_bbdev_dec_op *op,
>>>    	return 1;
>>>    }
>>>
>>> +/** Assess whether a work around is required for the deRM corner
>>> +cases */ static inline bool derm_workaround_required(struct
>>> +rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc_queue *q) {
>>> +	if (!is_acc100(q))
>>> +		return false;
>>> +	int32_t e = ldpc_dec->cb_params.e;
>>> +	int q_m = ldpc_dec->q_m;
>>> +	int z_c = ldpc_dec->z_c;
>>> +	int K = (ldpc_dec->basegraph == 1 ? ACC_K_ZC_1 : ACC_K_ZC_2)
>>> +			* z_c;
>>> +
>>> +	bool required = false;
>>> +	if (ldpc_dec->basegraph == 1) {
>>> +		if ((q_m == 4) && (z_c >= 320) && (e * ACC_LIM_31 > K * 64))
>>> +			required = true;
>>> +		else if ((e * ACC_LIM_21 > K * 64))
>>> +			required = true;
>>> +	} else {
>>> +		if (q_m <= 2) {
>>> +			if ((z_c >= 208) && (e * ACC_LIM_09 > K * 64))
>>> +				required = true;
>>> +			else if ((z_c < 208) && (e * ACC_LIM_03 > K * 64))
>>> +				required = true;
>>> +		} else if (e * ACC_LIM_14 > K * 64)
>>> +			required = true;
>>> +	}
>>> +	if (required)
>>> +		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");
>>> +
>>> +	return required;
>>> +}
>>> +
>>>    /** Enqueue one decode operations for ACC100 device in CB mode */
>>>    static inline int
>>>    enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct
>> rte_bbdev_dec_op *op,
>>> -		uint16_t total_enqueued_cbs, bool same_op)
>>> +		uint16_t total_enqueued_cbs, bool same_op,
>>> +		struct rte_bbdev_queue_data *q_data)
>>>    {
>>>    	int ret;
>>>    	if (unlikely(check_bit(op->ldpc_dec.op_flags,
>>> @@ -3168,6 +3218,58 @@ enqueue_ldpc_dec_one_op_cb(struct acc_queue
>> *q, struct rte_bbdev_dec_op *op,
>>>    	} else {
>>>    		struct acc_fcw_ld *fcw;
>>>    		uint32_t seg_total_left;
>>> +
>>> +		if (derm_workaround_required(&op->ldpc_dec, q)) {
>>> +			#ifdef RTE_BBDEV_SDK_AVX512
>>> +			struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
>>> +			struct bblib_rate_dematching_5gnr_request
>> derm_req;
>>> +			struct bblib_rate_dematching_5gnr_response
>> derm_resp;
>>> +			uint8_t *in;
>>> +
>>> +			/* Checking input size is matching with E */
>>> +			if (dec->input.data->data_len < dec->cb_params.e) {
>>> +				rte_bbdev_log(ERR, "deRM: Input size
>> mismatch");
>>> +				return -EFAULT;
>>> +			}
>>> +			/* Run first deRM processing in SW */
>>> +			in = rte_pktmbuf_mtod_offset(dec->input.data, uint8_t
>> *, in_offset);
>>> +			derm_req.p_in = (int8_t *) in;
>>> +			derm_req.p_harq = (int8_t *) q->derm_buffer;
>>> +			derm_req.base_graph = dec->basegraph;
>>> +			derm_req.zc = dec->z_c;
>>> +			derm_req.ncb = dec->n_cb;
>>> +			derm_req.e = dec->cb_params.e;
>>> +			if (derm_req.e > ACC_MAX_E) {
>>> +				rte_bbdev_log(WARNING,
>>> +						"deRM: E %d > %d max",
>>> +						derm_req.e, ACC_MAX_E);
>>> +				derm_req.e = ACC_MAX_E;
>>> +			}
>>> +			derm_req.k0 = 0; /* Actual output from SDK */
>>> +			derm_req.isretx = false;
>>> +			derm_req.rvid = dec->rv_index;
>>> +			derm_req.modulation_order = dec->q_m;
>>> +			derm_req.start_null_index =
>>> +					(dec->basegraph == 1 ? 22 : 10)
>>> +					* dec->z_c - 2 * dec->z_c
>>> +					- dec->n_filler;
>>> +			derm_req.num_of_null = dec->n_filler;
>>> +			bblib_rate_dematching_5gnr(&derm_req,
>> &derm_resp);
>>> +			/* Force back the HW DeRM */
>>> +			dec->q_m = 1;
>>> +			dec->cb_params.e = dec->n_cb - dec->n_filler;
>>> +			dec->rv_index = 0;
>>> +			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
>>> +			/* Capture counter when pre-processing is used */
>>> +			q_data->queue_stats.enqueue_warn_count++;
>>> +			#else
>>> +			RTE_SET_USED(q_data);
>>> +			rte_bbdev_log(WARNING,
>>> +				"Corner case may require deRM pre-
>> processing in SDK"
>>> +				);
>>> +			#endif
>>> +		}
>>> +
>>>    		fcw = &desc->req.fcw_ld;
>>>    		q->d->fcw_ld_fill(op, fcw, harq_layout);
>>>
>>> @@ -3721,7 +3823,7 @@ acc100_enqueue_ldpc_dec_cb(struct
>> rte_bbdev_queue_data *q_data,
>>>    			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
>>>    			ops[i]->ldpc_dec.n_filler, ops[i]-
>>> ldpc_dec.cb_params.e,
>>>    			same_op);
>>> -		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
>>> +		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op,
>> q_data);
>>>    		if (ret < 0) {
>>>    			acc_enqueue_invalid(q_data);
>>>    			break;
>>
>> As I already replied on a previous revision, I think it is not acceptable to
>> condition the fix on forcing to depend on a proprietary SDK. The ACC100 driver
>> did not have this Flexran SDK requirement initially, not like the SW-only BBDEV
>> PMDs.
> 
> The SDK is not proprietary. This is available on IDZ with a permissible license and includes a plain C code implementation in case there is no AVX512 isa on the CPU.

Maybe I did not look at the right place then, feel free to correct me.
I got the link from the turbo SW PMD documentation [0][1].

In the downloaded archive, there is a license file:
  - Intel_OBL_Commercial_Use_License.txt

This license states:
"
LICENSE. Licensee has a license under Intel’s copyrights to reproduce
Intel’s Software only in its unmodified and binary form, (with the
accompanying documentation, the “Software”) for Licensee’s personal use
only, and not commercial use, in connection with Intel-based products
for which the Software has been provided, subject to the following
conditions
"

Also, the file containing the needed function has a licence header stating:

"
INTEL CONFIDENTIAL

...

No part of the Material may be used, copied, reproduced, modified,
published, uploaded, posted, transmitted, distributed, or disclosed
in any way without Intel's prior express written permission.
"

IANAL, but I don't consider this as a permissive license.

> 
>> In my opinion, you can keep the call to Flexran SDK API but you should provide
>> an open-source alternative, even if not optimized so that it is at least
>> functionnal. Later contributors could provide an optimized open-source version
>> if they feel the need.
> 
> Again the SDK is available on the IDZ with a permissible license and includes a plain C implementation.
> 
>> I had a quick look at the bblib_rate_dematching_5gnr C implementation, and it
>> is reasonably small and there is really nothing special in it.
>>
>> Not having an open-source alternative is problematic, because it could make CI
>> to fail when using test-bbdev application if the HW bug is triggered.
> 
> CI would not fail.
> We can discuss further offline later this week as there seems to be some confusion still.

Let's continue the discussion here.

Thanks,
Maxime

> Thanks
> Nic
> 
>>
>> Regards,
>> Maxime
> 

[0]: http://doc.dpdk.org/guides/bbdevs/turbo_sw.html
[1]: 
https://www.intel.com/content/www/us/en/developer/articles/technical/flexran-lte-and-5g-nr-fec-software-development-kit-modules.html
  
Tom Rix Oct. 29, 2022, 12:11 p.m. UTC | #4
On 10/25/22 8:01 AM, Chautru, Nicolas wrote:
> Hi Maxime,
>
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Tuesday, October 25, 2022 1:26 AM
>> To: Vargas, Hernan <hernan.vargas@intel.com>; dev@dpdk.org;
>> gakhil@marvell.com; trix@redhat.com
>> Cc: Chautru, Nicolas <nicolas.chautru@intel.com>; Zhang, Qi Z
>> <qi.z.zhang@intel.com>; David Marchand <david.marchand@redhat.com>;
>> Thomas Monjalon <thomas@monjalon.net>
>> Subject: Re: [PATCH v6 1/1] baseband/acc100: add workaround for deRM
>> corner cases
>>
>> Hi Hernan,
>>
>> On 10/25/22 04:38, Hernan Vargas wrote:
>>> Add function to support de-ratematch pre-processing for SW corner cases.
>>> Some specific 5GUL FEC corner cases may cause unintended back pressure
>>> and in some cases a potential stability issue on the ACC100.
>>> To be able to avoid completely such potential issue, the PMD can
>>> preempt such code block configuration so that to process the first
>>> level deRM in SW using the SDK libraries prior to running the rest of
>>> the FEC decoding in HW using an amended code block configuration.
>>> In case meson build system doesn't find such SDK libraries, the fall
>>> method is to run in HW with a warning.
>>>
>>> Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
>>> ---
>>>    drivers/baseband/acc/acc_common.h     |   8 ++
>>>    drivers/baseband/acc/meson.build      |  21 +++++
>>>    drivers/baseband/acc/rte_acc100_pmd.c | 108
>> +++++++++++++++++++++++++-
>>>    3 files changed, 134 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/baseband/acc/acc_common.h
>>> b/drivers/baseband/acc/acc_common.h
>>> index eae7eab4e9..5e8972b40a 100644
>>> --- a/drivers/baseband/acc/acc_common.h
>>> +++ b/drivers/baseband/acc/acc_common.h
>>> @@ -123,6 +123,14 @@
>>>    #define ACC_HARQ_ALIGN_64B          64
>>>    #define ACC_MAX_ZC                  384
>>>
>>> +/* De-ratematch code rate limitation when padding is required */
>>> +#define ACC_LIM_03 2  /* 0.03 */ #define ACC_LIM_09 6  /* 0.09 */
>>> +#define ACC_LIM_14 9  /* 0.14 */ #define ACC_LIM_21 14 /* 0.21 */
>>> +#define ACC_LIM_31 20 /* 0.31 */ #define ACC_MAX_E (128 * 1024 - 2)
>>> +
>>>    /* Helper macro for logging */
>>>    #define rte_acc_log(level, fmt, ...) \
>>>    	rte_log(RTE_LOG_ ## level, RTE_LOG_NOTICE, fmt "\n", \ diff --git
>>> a/drivers/baseband/acc/meson.build b/drivers/baseband/acc/meson.build
>>> index 77c393b533..a5fc4fed01 100644
>>> --- a/drivers/baseband/acc/meson.build
>>> +++ b/drivers/baseband/acc/meson.build
>>> @@ -1,6 +1,27 @@
>>>    # SPDX-License-Identifier: BSD-3-Clause
>>>    # Copyright(c) 2020 Intel Corporation
>>>
>>> +# Check for FlexRAN SDK libraries
>>> +dep_dec5g = dependency('flexran_sdk_ldpc_decoder_5gnr', required:
>>> +false)
>>> +
>>> +if dep_dec5g.found()
>>> +    ext_deps += cc.find_library('libstdc++', required: true)
>>> +    ext_deps += cc.find_library('libirc', required: true)
>>> +    ext_deps += cc.find_library('libimf', required: true)
>>> +    ext_deps += cc.find_library('libipps', required: true)
>>> +    ext_deps += cc.find_library('libsvml', required: true)
>>> +    ext_deps += dep_dec5g
>>> +    ext_deps += dependency('flexran_sdk_ldpc_encoder_5gnr', required:
>> true)
>>> +    ext_deps += dependency('flexran_sdk_LDPC_ratematch_5gnr', required:
>> true)
>>> +    ext_deps += dependency('flexran_sdk_rate_dematching_5gnr', required:
>> true)
>>> +    ext_deps += dependency('flexran_sdk_turbo', required: true)
>>> +    ext_deps += dependency('flexran_sdk_crc', required: true)
>>> +    ext_deps += dependency('flexran_sdk_rate_matching', required: true)
>>> +    ext_deps += dependency('flexran_sdk_common', required: true)
>>> +    cflags += ['-DRTE_BBDEV_SDK_AVX2']
>>> +    cflags += ['-DRTE_BBDEV_SDK_AVX512'] endif
>>> +
>>>    deps += ['bbdev', 'bus_pci']
>>>
>>>    sources = files('rte_acc100_pmd.c', 'rte_acc200_pmd.c') diff --git
>>> a/drivers/baseband/acc/rte_acc100_pmd.c
>>> b/drivers/baseband/acc/rte_acc100_pmd.c
>>> index 23bc5d25bb..e8b230e563 100644
>>> --- a/drivers/baseband/acc/rte_acc100_pmd.c
>>> +++ b/drivers/baseband/acc/rte_acc100_pmd.c
>>> @@ -25,6 +25,10 @@
>>>    #include "acc101_pmd.h"
>>>    #include "acc200_cfg.h"
>>>
>>> +#ifdef RTE_BBDEV_SDK_AVX512
>>> +#include <phy_rate_dematching_5gnr.h> #endif
>>> +
>>>    #ifdef RTE_LIBRTE_BBDEV_DEBUG
>>>    RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
>>>    #else
>>> @@ -756,6 +760,14 @@ acc100_queue_setup(struct rte_bbdev *dev,
>> uint16_t queue_id,
>>>    		ret = -ENOMEM;
>>>    		goto free_lb_out;
>>>    	}
>>> +	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
>>> +			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
>>> +			RTE_CACHE_LINE_SIZE, conf->socket);
>>> +	if (q->derm_buffer == NULL) {
>>> +		rte_bbdev_log(ERR, "Failed to allocate derm_buffer memory");
>>> +		ret = -ENOMEM;
>>> +		goto free_companion_ring_addr;
>>> +	}
>>>
>>>    	/*
>>>    	 * Software queue ring wraps synchronously with the HW when it
>>> reaches @@ -776,7 +788,7 @@ acc100_queue_setup(struct rte_bbdev *dev,
>> uint16_t queue_id,
>>>    	q_idx = acc100_find_free_queue_idx(dev, conf);
>>>    	if (q_idx == -1) {
>>>    		ret = -EINVAL;
>>> -		goto free_companion_ring_addr;
>>> +		goto free_derm_buffer;
>>>    	}
>>>
>>>    	q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF; @@ -804,6
>> +816,9
>>> @@ acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
>>>    	dev->data->queues[queue_id].queue_private = q;
>>>    	return 0;
>>>
>>> +free_derm_buffer:
>>> +	rte_free(q->derm_buffer);
>>> +	q->derm_buffer = NULL;
>>>    free_companion_ring_addr:
>>>    	rte_free(q->companion_ring_addr);
>>>    	q->companion_ring_addr = NULL;
>>> @@ -890,6 +905,7 @@ acc100_queue_release(struct rte_bbdev *dev,
>> uint16_t q_id)
>>>    		/* Mark the Queue as un-assigned */
>>>    		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF -
>>>    				(uint64_t) (1 << q->aq_id));
>>> +		rte_free(q->derm_buffer);
>>>    		rte_free(q->companion_ring_addr);
>>>    		rte_free(q->lb_in);
>>>    		rte_free(q->lb_out);
>>> @@ -3111,10 +3127,44 @@ harq_loopback(struct acc_queue *q, struct
>> rte_bbdev_dec_op *op,
>>>    	return 1;
>>>    }
>>>
>>> +/** Assess whether a work around is required for the deRM corner
>>> +cases */ static inline bool derm_workaround_required(struct
>>> +rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc_queue *q) {
>>> +	if (!is_acc100(q))
>>> +		return false;
>>> +	int32_t e = ldpc_dec->cb_params.e;
>>> +	int q_m = ldpc_dec->q_m;
>>> +	int z_c = ldpc_dec->z_c;
>>> +	int K = (ldpc_dec->basegraph == 1 ? ACC_K_ZC_1 : ACC_K_ZC_2)
>>> +			* z_c;
>>> +
>>> +	bool required = false;
>>> +	if (ldpc_dec->basegraph == 1) {
>>> +		if ((q_m == 4) && (z_c >= 320) && (e * ACC_LIM_31 > K * 64))
>>> +			required = true;
>>> +		else if ((e * ACC_LIM_21 > K * 64))
>>> +			required = true;
>>> +	} else {
>>> +		if (q_m <= 2) {
>>> +			if ((z_c >= 208) && (e * ACC_LIM_09 > K * 64))
>>> +				required = true;
>>> +			else if ((z_c < 208) && (e * ACC_LIM_03 > K * 64))
>>> +				required = true;
>>> +		} else if (e * ACC_LIM_14 > K * 64)
>>> +			required = true;
>>> +	}
>>> +	if (required)
>>> +		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");
>>> +
>>> +	return required;
>>> +}
>>> +
>>>    /** Enqueue one decode operations for ACC100 device in CB mode */
>>>    static inline int
>>>    enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct
>> rte_bbdev_dec_op *op,
>>> -		uint16_t total_enqueued_cbs, bool same_op)
>>> +		uint16_t total_enqueued_cbs, bool same_op,
>>> +		struct rte_bbdev_queue_data *q_data)
>>>    {
>>>    	int ret;
>>>    	if (unlikely(check_bit(op->ldpc_dec.op_flags,
>>> @@ -3168,6 +3218,58 @@ enqueue_ldpc_dec_one_op_cb(struct acc_queue
>> *q, struct rte_bbdev_dec_op *op,
>>>    	} else {
>>>    		struct acc_fcw_ld *fcw;
>>>    		uint32_t seg_total_left;
>>> +
>>> +		if (derm_workaround_required(&op->ldpc_dec, q)) {
>>> +			#ifdef RTE_BBDEV_SDK_AVX512
>>> +			struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
>>> +			struct bblib_rate_dematching_5gnr_request
>> derm_req;
>>> +			struct bblib_rate_dematching_5gnr_response
>> derm_resp;
>>> +			uint8_t *in;
>>> +
>>> +			/* Checking input size is matching with E */
>>> +			if (dec->input.data->data_len < dec->cb_params.e) {
>>> +				rte_bbdev_log(ERR, "deRM: Input size
>> mismatch");
>>> +				return -EFAULT;
>>> +			}
>>> +			/* Run first deRM processing in SW */
>>> +			in = rte_pktmbuf_mtod_offset(dec->input.data, uint8_t
>> *, in_offset);
>>> +			derm_req.p_in = (int8_t *) in;
>>> +			derm_req.p_harq = (int8_t *) q->derm_buffer;
>>> +			derm_req.base_graph = dec->basegraph;
>>> +			derm_req.zc = dec->z_c;
>>> +			derm_req.ncb = dec->n_cb;
>>> +			derm_req.e = dec->cb_params.e;
>>> +			if (derm_req.e > ACC_MAX_E) {
>>> +				rte_bbdev_log(WARNING,
>>> +						"deRM: E %d > %d max",
>>> +						derm_req.e, ACC_MAX_E);
>>> +				derm_req.e = ACC_MAX_E;
>>> +			}
>>> +			derm_req.k0 = 0; /* Actual output from SDK */
>>> +			derm_req.isretx = false;
>>> +			derm_req.rvid = dec->rv_index;
>>> +			derm_req.modulation_order = dec->q_m;
>>> +			derm_req.start_null_index =
>>> +					(dec->basegraph == 1 ? 22 : 10)
>>> +					* dec->z_c - 2 * dec->z_c
>>> +					- dec->n_filler;
>>> +			derm_req.num_of_null = dec->n_filler;
>>> +			bblib_rate_dematching_5gnr(&derm_req,
>> &derm_resp);
>>> +			/* Force back the HW DeRM */
>>> +			dec->q_m = 1;
>>> +			dec->cb_params.e = dec->n_cb - dec->n_filler;
>>> +			dec->rv_index = 0;
>>> +			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
>>> +			/* Capture counter when pre-processing is used */
>>> +			q_data->queue_stats.enqueue_warn_count++;
>>> +			#else
>>> +			RTE_SET_USED(q_data);
>>> +			rte_bbdev_log(WARNING,
>>> +				"Corner case may require deRM pre-
>> processing in SDK"
>>> +				);
>>> +			#endif
>>> +		}
>>> +
>>>    		fcw = &desc->req.fcw_ld;
>>>    		q->d->fcw_ld_fill(op, fcw, harq_layout);
>>>
>>> @@ -3721,7 +3823,7 @@ acc100_enqueue_ldpc_dec_cb(struct
>> rte_bbdev_queue_data *q_data,
>>>    			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
>>>    			ops[i]->ldpc_dec.n_filler, ops[i]-
>>> ldpc_dec.cb_params.e,
>>>    			same_op);
>>> -		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
>>> +		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op,
>> q_data);
>>>    		if (ret < 0) {
>>>    			acc_enqueue_invalid(q_data);
>>>    			break;
>> As I already replied on a previous revision, I think it is not acceptable to
>> condition the fix on forcing to depend on a proprietary SDK. The ACC100 driver
>> did not have this Flexran SDK requirement initially, not like the SW-only BBDEV
>> PMDs.
> The SDK is not proprietary. This is available on IDZ with a permissible license and includes a plain C code implementation in case there is no AVX512 isa on the CPU.
>
>> In my opinion, you can keep the call to Flexran SDK API but you should provide
>> an open-source alternative, even if not optimized so that it is at least
>> functionnal. Later contributors could provide an optimized open-source version
>> if they feel the need.
> Again the SDK is available on the IDZ with a permissible license and includes a plain C implementation.

Agreeing this is a license issue.

Additionally, this approach does not scale.

If everyone solved corner cases by referencing a external library to 
pull in a function we would quickly have configuration mess. Even within 
using the flexran sdk its a mess, which version of the external library 
works with which version of dpdk ?  Leave it to the user to try and when 
it fails who fixes what ?

Distro wise we can not use idz so this is dead code for dpdk in fedora 
and rhel.

Can the c implementation be imported into dpdk or someone clean room it?

Tom

>
>> I had a quick look at the bblib_rate_dematching_5gnr C implementation, and it
>> is reasonably small and there is really nothing special in it.
>>
>> Not having an open-source alternative is problematic, because it could make CI
>> to fail when using test-bbdev application if the HW bug is triggered.
> CI would not fail.
> We can discuss further offline later this week as there seems to be some confusion still.
>
> Thanks
> Nic
>
>> Regards,
>> Maxime
  

Patch

diff --git a/drivers/baseband/acc/acc_common.h b/drivers/baseband/acc/acc_common.h
index eae7eab4e9..5e8972b40a 100644
--- a/drivers/baseband/acc/acc_common.h
+++ b/drivers/baseband/acc/acc_common.h
@@ -123,6 +123,14 @@ 
 #define ACC_HARQ_ALIGN_64B          64
 #define ACC_MAX_ZC                  384
 
+/* De-ratematch code rate limitation when padding is required */
+#define ACC_LIM_03 2  /* 0.03 */
+#define ACC_LIM_09 6  /* 0.09 */
+#define ACC_LIM_14 9  /* 0.14 */
+#define ACC_LIM_21 14 /* 0.21 */
+#define ACC_LIM_31 20 /* 0.31 */
+#define ACC_MAX_E (128 * 1024 - 2)
+
 /* Helper macro for logging */
 #define rte_acc_log(level, fmt, ...) \
 	rte_log(RTE_LOG_ ## level, RTE_LOG_NOTICE, fmt "\n", \
diff --git a/drivers/baseband/acc/meson.build b/drivers/baseband/acc/meson.build
index 77c393b533..a5fc4fed01 100644
--- a/drivers/baseband/acc/meson.build
+++ b/drivers/baseband/acc/meson.build
@@ -1,6 +1,27 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2020 Intel Corporation
 
+# Check for FlexRAN SDK libraries
+dep_dec5g = dependency('flexran_sdk_ldpc_decoder_5gnr', required: false)
+
+if dep_dec5g.found()
+    ext_deps += cc.find_library('libstdc++', required: true)
+    ext_deps += cc.find_library('libirc', required: true)
+    ext_deps += cc.find_library('libimf', required: true)
+    ext_deps += cc.find_library('libipps', required: true)
+    ext_deps += cc.find_library('libsvml', required: true)
+    ext_deps += dep_dec5g
+    ext_deps += dependency('flexran_sdk_ldpc_encoder_5gnr', required: true)
+    ext_deps += dependency('flexran_sdk_LDPC_ratematch_5gnr', required: true)
+    ext_deps += dependency('flexran_sdk_rate_dematching_5gnr', required: true)
+    ext_deps += dependency('flexran_sdk_turbo', required: true)
+    ext_deps += dependency('flexran_sdk_crc', required: true)
+    ext_deps += dependency('flexran_sdk_rate_matching', required: true)
+    ext_deps += dependency('flexran_sdk_common', required: true)
+    cflags += ['-DRTE_BBDEV_SDK_AVX2']
+    cflags += ['-DRTE_BBDEV_SDK_AVX512']
+endif
+
 deps += ['bbdev', 'bus_pci']
 
 sources = files('rte_acc100_pmd.c', 'rte_acc200_pmd.c')
diff --git a/drivers/baseband/acc/rte_acc100_pmd.c b/drivers/baseband/acc/rte_acc100_pmd.c
index 23bc5d25bb..e8b230e563 100644
--- a/drivers/baseband/acc/rte_acc100_pmd.c
+++ b/drivers/baseband/acc/rte_acc100_pmd.c
@@ -25,6 +25,10 @@ 
 #include "acc101_pmd.h"
 #include "acc200_cfg.h"
 
+#ifdef RTE_BBDEV_SDK_AVX512
+#include <phy_rate_dematching_5gnr.h>
+#endif
+
 #ifdef RTE_LIBRTE_BBDEV_DEBUG
 RTE_LOG_REGISTER_DEFAULT(acc100_logtype, DEBUG);
 #else
@@ -756,6 +760,14 @@  acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 		ret = -ENOMEM;
 		goto free_lb_out;
 	}
+	q->derm_buffer = rte_zmalloc_socket(dev->device->driver->name,
+			RTE_BBDEV_TURBO_MAX_CB_SIZE * 10,
+			RTE_CACHE_LINE_SIZE, conf->socket);
+	if (q->derm_buffer == NULL) {
+		rte_bbdev_log(ERR, "Failed to allocate derm_buffer memory");
+		ret = -ENOMEM;
+		goto free_companion_ring_addr;
+	}
 
 	/*
 	 * Software queue ring wraps synchronously with the HW when it reaches
@@ -776,7 +788,7 @@  acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 	q_idx = acc100_find_free_queue_idx(dev, conf);
 	if (q_idx == -1) {
 		ret = -EINVAL;
-		goto free_companion_ring_addr;
+		goto free_derm_buffer;
 	}
 
 	q->qgrp_id = (q_idx >> ACC100_GRP_ID_SHIFT) & 0xF;
@@ -804,6 +816,9 @@  acc100_queue_setup(struct rte_bbdev *dev, uint16_t queue_id,
 	dev->data->queues[queue_id].queue_private = q;
 	return 0;
 
+free_derm_buffer:
+	rte_free(q->derm_buffer);
+	q->derm_buffer = NULL;
 free_companion_ring_addr:
 	rte_free(q->companion_ring_addr);
 	q->companion_ring_addr = NULL;
@@ -890,6 +905,7 @@  acc100_queue_release(struct rte_bbdev *dev, uint16_t q_id)
 		/* Mark the Queue as un-assigned */
 		d->q_assigned_bit_map[q->qgrp_id] &= (0xFFFFFFFFFFFFFFFF -
 				(uint64_t) (1 << q->aq_id));
+		rte_free(q->derm_buffer);
 		rte_free(q->companion_ring_addr);
 		rte_free(q->lb_in);
 		rte_free(q->lb_out);
@@ -3111,10 +3127,44 @@  harq_loopback(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 	return 1;
 }
 
+/** Assess whether a work around is required for the deRM corner cases */
+static inline bool
+derm_workaround_required(struct rte_bbdev_op_ldpc_dec *ldpc_dec, struct acc_queue *q)
+{
+	if (!is_acc100(q))
+		return false;
+	int32_t e = ldpc_dec->cb_params.e;
+	int q_m = ldpc_dec->q_m;
+	int z_c = ldpc_dec->z_c;
+	int K = (ldpc_dec->basegraph == 1 ? ACC_K_ZC_1 : ACC_K_ZC_2)
+			* z_c;
+
+	bool required = false;
+	if (ldpc_dec->basegraph == 1) {
+		if ((q_m == 4) && (z_c >= 320) && (e * ACC_LIM_31 > K * 64))
+			required = true;
+		else if ((e * ACC_LIM_21 > K * 64))
+			required = true;
+	} else {
+		if (q_m <= 2) {
+			if ((z_c >= 208) && (e * ACC_LIM_09 > K * 64))
+				required = true;
+			else if ((z_c < 208) && (e * ACC_LIM_03 > K * 64))
+				required = true;
+		} else if (e * ACC_LIM_14 > K * 64)
+			required = true;
+	}
+	if (required)
+		rte_bbdev_log(INFO, "Running deRM pre-processing in SW");
+
+	return required;
+}
+
 /** Enqueue one decode operations for ACC100 device in CB mode */
 static inline int
 enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
-		uint16_t total_enqueued_cbs, bool same_op)
+		uint16_t total_enqueued_cbs, bool same_op,
+		struct rte_bbdev_queue_data *q_data)
 {
 	int ret;
 	if (unlikely(check_bit(op->ldpc_dec.op_flags,
@@ -3168,6 +3218,58 @@  enqueue_ldpc_dec_one_op_cb(struct acc_queue *q, struct rte_bbdev_dec_op *op,
 	} else {
 		struct acc_fcw_ld *fcw;
 		uint32_t seg_total_left;
+
+		if (derm_workaround_required(&op->ldpc_dec, q)) {
+			#ifdef RTE_BBDEV_SDK_AVX512
+			struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec;
+			struct bblib_rate_dematching_5gnr_request derm_req;
+			struct bblib_rate_dematching_5gnr_response derm_resp;
+			uint8_t *in;
+
+			/* Checking input size is matching with E */
+			if (dec->input.data->data_len < dec->cb_params.e) {
+				rte_bbdev_log(ERR, "deRM: Input size mismatch");
+				return -EFAULT;
+			}
+			/* Run first deRM processing in SW */
+			in = rte_pktmbuf_mtod_offset(dec->input.data, uint8_t *, in_offset);
+			derm_req.p_in = (int8_t *) in;
+			derm_req.p_harq = (int8_t *) q->derm_buffer;
+			derm_req.base_graph = dec->basegraph;
+			derm_req.zc = dec->z_c;
+			derm_req.ncb = dec->n_cb;
+			derm_req.e = dec->cb_params.e;
+			if (derm_req.e > ACC_MAX_E) {
+				rte_bbdev_log(WARNING,
+						"deRM: E %d > %d max",
+						derm_req.e, ACC_MAX_E);
+				derm_req.e = ACC_MAX_E;
+			}
+			derm_req.k0 = 0; /* Actual output from SDK */
+			derm_req.isretx = false;
+			derm_req.rvid = dec->rv_index;
+			derm_req.modulation_order = dec->q_m;
+			derm_req.start_null_index =
+					(dec->basegraph == 1 ? 22 : 10)
+					* dec->z_c - 2 * dec->z_c
+					- dec->n_filler;
+			derm_req.num_of_null = dec->n_filler;
+			bblib_rate_dematching_5gnr(&derm_req, &derm_resp);
+			/* Force back the HW DeRM */
+			dec->q_m = 1;
+			dec->cb_params.e = dec->n_cb - dec->n_filler;
+			dec->rv_index = 0;
+			rte_memcpy(in, q->derm_buffer, dec->cb_params.e);
+			/* Capture counter when pre-processing is used */
+			q_data->queue_stats.enqueue_warn_count++;
+			#else
+			RTE_SET_USED(q_data);
+			rte_bbdev_log(WARNING,
+				"Corner case may require deRM pre-processing in SDK"
+				);
+			#endif
+		}
+
 		fcw = &desc->req.fcw_ld;
 		q->d->fcw_ld_fill(op, fcw, harq_layout);
 
@@ -3721,7 +3823,7 @@  acc100_enqueue_ldpc_dec_cb(struct rte_bbdev_queue_data *q_data,
 			ops[i]->ldpc_dec.n_cb, ops[i]->ldpc_dec.q_m,
 			ops[i]->ldpc_dec.n_filler, ops[i]->ldpc_dec.cb_params.e,
 			same_op);
-		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op);
+		ret = enqueue_ldpc_dec_one_op_cb(q, ops[i], i, same_op, q_data);
 		if (ret < 0) {
 			acc_enqueue_invalid(q_data);
 			break;