[v2,13/37] baseband/acc10x: limit cases for HARQ pruning

Message ID 20220820023157.189047-14-hernan.vargas@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series baseband/acc100: changes for 22.11 |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Vargas, Hernan Aug. 20, 2022, 2:31 a.m. UTC
  Add flag ACC101_HARQ_PRUNING_OPTIMIZATION to limit cases when HARQ
pruning is valid.

Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
---
 drivers/baseband/acc100/rte_acc100_pmd.c | 52 +++++++++++++++++++-----
 1 file changed, 41 insertions(+), 11 deletions(-)
  

Comments

Maxime Coquelin Sept. 15, 2022, 7:37 a.m. UTC | #1
On 8/20/22 04:31, Hernan Vargas wrote:
> Add flag ACC101_HARQ_PRUNING_OPTIMIZATION to limit cases when HARQ
> pruning is valid.
> 
> Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> ---
>   drivers/baseband/acc100/rte_acc100_pmd.c | 52 +++++++++++++++++++-----
>   1 file changed, 41 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
> index 81bae4d695..e47f7d68c2 100644
> --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> @@ -1370,17 +1370,23 @@ acc100_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
>   	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
>   #ifdef ACC100_EXT_MEM
>   	/* Limit cases when HARQ pruning is valid */
> +#ifdef ACC100_HARQ_PRUNING_OPTIMIZATION
>   	harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
> -			ACC100_HARQ_OFFSET) == 0) &&
> -			(op->ldpc_dec.harq_combined_output.offset <= UINT16_MAX
> -			* ACC100_HARQ_OFFSET);
> +			ACC100_HARQ_OFFSET) == 0);
> +#endif

Optimizations should not be put under #ifdefs, it will become a testing
hell otherwise. CI will have to run as many builds as there are possible 
combinations, which is not sustainable.

Even if not part of this patch, the "#ifdef ACC100_EXT_MEM" should also
be removed.

>   #endif
>   	if (fcw->hcin_en > 0) {
>   		harq_in_length = op->ldpc_dec.harq_combined_input.length;
>   		if (fcw->hcin_decomp_mode > 0)
>   			harq_in_length = harq_in_length * 8 / 6;
> -		harq_in_length = RTE_ALIGN(harq_in_length, 64);
> -		if ((harq_layout[harq_index].offset > 0) & harq_prun) {
> +		harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb
> +				- op->ldpc_dec.n_filler);
> +		/* Alignment on next 64B - Already enforced from HC output */
> +		harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64);
> +		/* Stronger alignment requirement when in decompression mode */
> +		if (fcw->hcin_decomp_mode > 0)
> +			harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 256);
> +		if ((harq_layout[harq_index].offset > 0) && harq_prun) {
>   			rte_bbdev_log_debug("HARQ IN offset unexpected for now\n");
>   			fcw->hcin_size0 = harq_layout[harq_index].size0;
>   			fcw->hcin_offset = harq_layout[harq_index].offset;
> @@ -1455,6 +1461,7 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
>   	uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
>   	uint32_t harq_index;
>   	uint32_t l;
> +	bool harq_prun = false;
>   
>   	fcw->qm = op->ldpc_dec.q_m;
>   	fcw->nfiller = op->ldpc_dec.n_filler;
> @@ -1500,6 +1507,13 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
>   	fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
>   			RTE_BBDEV_LDPC_LLR_COMPRESSION);
>   	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
> +	#ifdef ACC100_EXT_MEM
> +	/* Limit cases when HARQ pruning is valid */
> +#ifdef ACC101_HARQ_PRUNING_OPTIMIZATION
> +	harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
> +			ACC101_HARQ_OFFSET) == 0);
> +#endif
> +#endif
>   	if (fcw->hcin_en > 0) {
>   		harq_in_length = op->ldpc_dec.harq_combined_input.length;
>   		if (fcw->hcin_decomp_mode > 0)
> @@ -1508,9 +1522,17 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
>   				- op->ldpc_dec.n_filler);
>   		/* Alignment on next 64B - Already enforced from HC output */
>   		harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64);
> -		fcw->hcin_size0 = harq_in_length;
> -		fcw->hcin_offset = 0;
> -		fcw->hcin_size1 = 0;
> +		if ((harq_layout[harq_index].offset > 0) && harq_prun) {
> +			rte_bbdev_log_debug("HARQ IN offset unexpected for now\n");
> +			fcw->hcin_size0 = harq_layout[harq_index].size0;
> +			fcw->hcin_offset = harq_layout[harq_index].offset;
> +			fcw->hcin_size1 = harq_in_length -
> +					harq_layout[harq_index].offset;
> +		} else {
> +			fcw->hcin_size0 = harq_in_length;
> +			fcw->hcin_offset = 0;
> +			fcw->hcin_size1 = 0;
> +		}
>   	} else {
>   		fcw->hcin_size0 = 0;
>   		fcw->hcin_offset = 0;
> @@ -1551,9 +1573,17 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
>   		harq_out_length = RTE_MIN(harq_out_length, ncb_p);
>   		/* Alignment on next 64B */
>   		harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
> -		fcw->hcout_size0 = harq_out_length;
> -		fcw->hcout_size1 = 0;
> -		fcw->hcout_offset = 0;
> +		if ((k0_p > fcw->hcin_size0 + ACC100_HARQ_OFFSET_THRESHOLD) &&
> +				harq_prun) {
> +			fcw->hcout_size0 = (uint16_t) fcw->hcin_size0;
> +			fcw->hcout_offset = k0_p & 0xFFC0;
> +			fcw->hcout_size1 = harq_out_length - fcw->hcout_offset;
> +		} else {
> +			fcw->hcout_size0 = harq_out_length;
> +			fcw->hcout_size1 = 0;
> +			fcw->hcout_offset = 0;
> +		}
> +
>   		harq_layout[harq_index].offset = fcw->hcout_offset;
>   		harq_layout[harq_index].size0 = fcw->hcout_size0;
>   	} else {
  
Chautru, Nicolas Sept. 16, 2022, 12:31 a.m. UTC | #2
Hi Maxime, 

> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, September 15, 2022 12:37 AM
> To: Vargas, Hernan <hernan.vargas@intel.com>; dev@dpdk.org;
> gakhil@marvell.com; trix@redhat.com
> Cc: Chautru, Nicolas <nicolas.chautru@intel.com>; Zhang, Qi Z
> <qi.z.zhang@intel.com>
> Subject: Re: [PATCH v2 13/37] baseband/acc10x: limit cases for HARQ
> pruning
> 
> 
> 
> On 8/20/22 04:31, Hernan Vargas wrote:
> > Add flag ACC101_HARQ_PRUNING_OPTIMIZATION to limit cases when
> HARQ
> > pruning is valid.
> >
> > Signed-off-by: Hernan Vargas <hernan.vargas@intel.com>
> > ---
> >   drivers/baseband/acc100/rte_acc100_pmd.c | 52
> +++++++++++++++++++-----
> >   1 file changed, 41 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c
> > b/drivers/baseband/acc100/rte_acc100_pmd.c
> > index 81bae4d695..e47f7d68c2 100644
> > --- a/drivers/baseband/acc100/rte_acc100_pmd.c
> > +++ b/drivers/baseband/acc100/rte_acc100_pmd.c
> > @@ -1370,17 +1370,23 @@ acc100_fcw_ld_fill(struct rte_bbdev_dec_op
> *op, struct acc100_fcw_ld *fcw,
> >   	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
> >   #ifdef ACC100_EXT_MEM
> >   	/* Limit cases when HARQ pruning is valid */
> > +#ifdef ACC100_HARQ_PRUNING_OPTIMIZATION
> >   	harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
> > -			ACC100_HARQ_OFFSET) == 0) &&
> > -			(op->ldpc_dec.harq_combined_output.offset <=
> UINT16_MAX
> > -			* ACC100_HARQ_OFFSET);
> > +			ACC100_HARQ_OFFSET) == 0);
> > +#endif
> 
> Optimizations should not be put under #ifdefs, it will become a testing hell
> otherwise. CI will have to run as many builds as there are possible
> combinations, which is not sustainable.
> 
> Even if not part of this patch, the "#ifdef ACC100_EXT_MEM" should also be
> removed.

With regards to the ACC100_EXT_MEM, this compilation switch is to be able to use the device using standard memory (not the dedicated one on the card).
I believe there is value notably for debug purpose for user to be able to rebuild with different capability (more like a DEBUG purpose). I understand that only the default value is being built by default. 
As you pointed out this is not related to that patchset. 


> 
> >   #endif
> >   	if (fcw->hcin_en > 0) {
> >   		harq_in_length = op->ldpc_dec.harq_combined_input.length;
> >   		if (fcw->hcin_decomp_mode > 0)
> >   			harq_in_length = harq_in_length * 8 / 6;
> > -		harq_in_length = RTE_ALIGN(harq_in_length, 64);
> > -		if ((harq_layout[harq_index].offset > 0) & harq_prun) {
> > +		harq_in_length = RTE_MIN(harq_in_length, op-
> >ldpc_dec.n_cb
> > +				- op->ldpc_dec.n_filler);
> > +		/* Alignment on next 64B - Already enforced from HC output
> */
> > +		harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64);
> > +		/* Stronger alignment requirement when in decompression
> mode */
> > +		if (fcw->hcin_decomp_mode > 0)
> > +			harq_in_length = RTE_ALIGN_FLOOR(harq_in_length,
> 256);
> > +		if ((harq_layout[harq_index].offset > 0) && harq_prun) {
> >   			rte_bbdev_log_debug("HARQ IN offset unexpected
> for now\n");
> >   			fcw->hcin_size0 = harq_layout[harq_index].size0;
> >   			fcw->hcin_offset = harq_layout[harq_index].offset;
> @@ -1455,6
> > +1461,7 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct
> acc100_fcw_ld *fcw,
> >   	uint16_t harq_out_length, harq_in_length, ncb_p, k0_p,
> parity_offset;
> >   	uint32_t harq_index;
> >   	uint32_t l;
> > +	bool harq_prun = false;
> >
> >   	fcw->qm = op->ldpc_dec.q_m;
> >   	fcw->nfiller = op->ldpc_dec.n_filler; @@ -1500,6 +1507,13 @@
> > acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld
> *fcw,
> >   	fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
> >   			RTE_BBDEV_LDPC_LLR_COMPRESSION);
> >   	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
> > +	#ifdef ACC100_EXT_MEM
> > +	/* Limit cases when HARQ pruning is valid */ #ifdef
> > +ACC101_HARQ_PRUNING_OPTIMIZATION
> > +	harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
> > +			ACC101_HARQ_OFFSET) == 0);
> > +#endif
> > +#endif
> >   	if (fcw->hcin_en > 0) {
> >   		harq_in_length = op->ldpc_dec.harq_combined_input.length;
> >   		if (fcw->hcin_decomp_mode > 0)
> > @@ -1508,9 +1522,17 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op
> *op, struct acc100_fcw_ld *fcw,
> >   				- op->ldpc_dec.n_filler);
> >   		/* Alignment on next 64B - Already enforced from HC output
> */
> >   		harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64);
> > -		fcw->hcin_size0 = harq_in_length;
> > -		fcw->hcin_offset = 0;
> > -		fcw->hcin_size1 = 0;
> > +		if ((harq_layout[harq_index].offset > 0) && harq_prun) {
> > +			rte_bbdev_log_debug("HARQ IN offset unexpected
> for now\n");
> > +			fcw->hcin_size0 = harq_layout[harq_index].size0;
> > +			fcw->hcin_offset = harq_layout[harq_index].offset;
> > +			fcw->hcin_size1 = harq_in_length -
> > +					harq_layout[harq_index].offset;
> > +		} else {
> > +			fcw->hcin_size0 = harq_in_length;
> > +			fcw->hcin_offset = 0;
> > +			fcw->hcin_size1 = 0;
> > +		}
> >   	} else {
> >   		fcw->hcin_size0 = 0;
> >   		fcw->hcin_offset = 0;
> > @@ -1551,9 +1573,17 @@ acc101_fcw_ld_fill(struct rte_bbdev_dec_op
> *op, struct acc100_fcw_ld *fcw,
> >   		harq_out_length = RTE_MIN(harq_out_length, ncb_p);
> >   		/* Alignment on next 64B */
> >   		harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
> > -		fcw->hcout_size0 = harq_out_length;
> > -		fcw->hcout_size1 = 0;
> > -		fcw->hcout_offset = 0;
> > +		if ((k0_p > fcw->hcin_size0 +
> ACC100_HARQ_OFFSET_THRESHOLD) &&
> > +				harq_prun) {
> > +			fcw->hcout_size0 = (uint16_t) fcw->hcin_size0;
> > +			fcw->hcout_offset = k0_p & 0xFFC0;
> > +			fcw->hcout_size1 = harq_out_length - fcw-
> >hcout_offset;
> > +		} else {
> > +			fcw->hcout_size0 = harq_out_length;
> > +			fcw->hcout_size1 = 0;
> > +			fcw->hcout_offset = 0;
> > +		}
> > +
> >   		harq_layout[harq_index].offset = fcw->hcout_offset;
> >   		harq_layout[harq_index].size0 = fcw->hcout_size0;
> >   	} else {
  

Patch

diff --git a/drivers/baseband/acc100/rte_acc100_pmd.c b/drivers/baseband/acc100/rte_acc100_pmd.c
index 81bae4d695..e47f7d68c2 100644
--- a/drivers/baseband/acc100/rte_acc100_pmd.c
+++ b/drivers/baseband/acc100/rte_acc100_pmd.c
@@ -1370,17 +1370,23 @@  acc100_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
 	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
 #ifdef ACC100_EXT_MEM
 	/* Limit cases when HARQ pruning is valid */
+#ifdef ACC100_HARQ_PRUNING_OPTIMIZATION
 	harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
-			ACC100_HARQ_OFFSET) == 0) &&
-			(op->ldpc_dec.harq_combined_output.offset <= UINT16_MAX
-			* ACC100_HARQ_OFFSET);
+			ACC100_HARQ_OFFSET) == 0);
+#endif
 #endif
 	if (fcw->hcin_en > 0) {
 		harq_in_length = op->ldpc_dec.harq_combined_input.length;
 		if (fcw->hcin_decomp_mode > 0)
 			harq_in_length = harq_in_length * 8 / 6;
-		harq_in_length = RTE_ALIGN(harq_in_length, 64);
-		if ((harq_layout[harq_index].offset > 0) & harq_prun) {
+		harq_in_length = RTE_MIN(harq_in_length, op->ldpc_dec.n_cb
+				- op->ldpc_dec.n_filler);
+		/* Alignment on next 64B - Already enforced from HC output */
+		harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64);
+		/* Stronger alignment requirement when in decompression mode */
+		if (fcw->hcin_decomp_mode > 0)
+			harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 256);
+		if ((harq_layout[harq_index].offset > 0) && harq_prun) {
 			rte_bbdev_log_debug("HARQ IN offset unexpected for now\n");
 			fcw->hcin_size0 = harq_layout[harq_index].size0;
 			fcw->hcin_offset = harq_layout[harq_index].offset;
@@ -1455,6 +1461,7 @@  acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
 	uint16_t harq_out_length, harq_in_length, ncb_p, k0_p, parity_offset;
 	uint32_t harq_index;
 	uint32_t l;
+	bool harq_prun = false;
 
 	fcw->qm = op->ldpc_dec.q_m;
 	fcw->nfiller = op->ldpc_dec.n_filler;
@@ -1500,6 +1507,13 @@  acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
 	fcw->llr_pack_mode = check_bit(op->ldpc_dec.op_flags,
 			RTE_BBDEV_LDPC_LLR_COMPRESSION);
 	harq_index = hq_index(op->ldpc_dec.harq_combined_output.offset);
+	#ifdef ACC100_EXT_MEM
+	/* Limit cases when HARQ pruning is valid */
+#ifdef ACC101_HARQ_PRUNING_OPTIMIZATION
+	harq_prun = ((op->ldpc_dec.harq_combined_output.offset %
+			ACC101_HARQ_OFFSET) == 0);
+#endif
+#endif
 	if (fcw->hcin_en > 0) {
 		harq_in_length = op->ldpc_dec.harq_combined_input.length;
 		if (fcw->hcin_decomp_mode > 0)
@@ -1508,9 +1522,17 @@  acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
 				- op->ldpc_dec.n_filler);
 		/* Alignment on next 64B - Already enforced from HC output */
 		harq_in_length = RTE_ALIGN_FLOOR(harq_in_length, 64);
-		fcw->hcin_size0 = harq_in_length;
-		fcw->hcin_offset = 0;
-		fcw->hcin_size1 = 0;
+		if ((harq_layout[harq_index].offset > 0) && harq_prun) {
+			rte_bbdev_log_debug("HARQ IN offset unexpected for now\n");
+			fcw->hcin_size0 = harq_layout[harq_index].size0;
+			fcw->hcin_offset = harq_layout[harq_index].offset;
+			fcw->hcin_size1 = harq_in_length -
+					harq_layout[harq_index].offset;
+		} else {
+			fcw->hcin_size0 = harq_in_length;
+			fcw->hcin_offset = 0;
+			fcw->hcin_size1 = 0;
+		}
 	} else {
 		fcw->hcin_size0 = 0;
 		fcw->hcin_offset = 0;
@@ -1551,9 +1573,17 @@  acc101_fcw_ld_fill(struct rte_bbdev_dec_op *op, struct acc100_fcw_ld *fcw,
 		harq_out_length = RTE_MIN(harq_out_length, ncb_p);
 		/* Alignment on next 64B */
 		harq_out_length = RTE_ALIGN_CEIL(harq_out_length, 64);
-		fcw->hcout_size0 = harq_out_length;
-		fcw->hcout_size1 = 0;
-		fcw->hcout_offset = 0;
+		if ((k0_p > fcw->hcin_size0 + ACC100_HARQ_OFFSET_THRESHOLD) &&
+				harq_prun) {
+			fcw->hcout_size0 = (uint16_t) fcw->hcin_size0;
+			fcw->hcout_offset = k0_p & 0xFFC0;
+			fcw->hcout_size1 = harq_out_length - fcw->hcout_offset;
+		} else {
+			fcw->hcout_size0 = harq_out_length;
+			fcw->hcout_size1 = 0;
+			fcw->hcout_offset = 0;
+		}
+
 		harq_layout[harq_index].offset = fcw->hcout_offset;
 		harq_layout[harq_index].size0 = fcw->hcout_size0;
 	} else {