[v5,3/7] app/bbdev: include explicit HARQ preloading

Message ID 1603496581-35966-4-git-send-email-nicolas.chautru@intel.com (mailing list archive)
State Superseded, archived
Delegated to: akhil goyal
Headers
Series BBDEV test updates |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Chautru, Nicolas Oct. 23, 2020, 11:42 p.m. UTC
  Run preloading explicitly for unit tests. Load each code block
by reusing existing input op then restore for the actual test.

Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
Acked-by: Liu Tianjiao <tianjiao.liu@intel.com>
---
 app/test-bbdev/main.h            |  1 +
 app/test-bbdev/test_bbdev_perf.c | 51 +++++++++++++++++++++-------------------
 2 files changed, 28 insertions(+), 24 deletions(-)
  

Comments

Tom Rix Oct. 26, 2020, 1:31 p.m. UTC | #1
On 10/23/20 4:42 PM, Nicolas Chautru wrote:
> Run preloading explicitly for unit tests. Load each code block
> by reusing existing input op then restore for the actual test.
>
> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> Acked-by: Liu Tianjiao <tianjiao.liu@intel.com>
> ---
>  app/test-bbdev/main.h            |  1 +
>  app/test-bbdev/test_bbdev_perf.c | 51 +++++++++++++++++++++-------------------
>  2 files changed, 28 insertions(+), 24 deletions(-)
>
> diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h
> index fb3dec8..dc10a50 100644
> --- a/app/test-bbdev/main.h
> +++ b/app/test-bbdev/main.h
> @@ -17,6 +17,7 @@
>  #define TEST_SKIPPED    1
>  
>  #define MAX_BURST 512U
> +#define MAX_OPS 1024U

This #define is not consistently used.

ex/ see retrieve_harq_ddr, the old 1024 is still being used.

>  #define DEFAULT_BURST 32U
>  #define DEFAULT_OPS 64U
>  #define DEFAULT_ITER 6U
> diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
> index b62848e..f30cbdb 100644
> --- a/app/test-bbdev/test_bbdev_perf.c
> +++ b/app/test-bbdev/test_bbdev_perf.c
> @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct active_device *ad,
>  		bool preload)
>  {
>  	uint16_t j;
> -	int ret;
> -	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
> -	struct rte_bbdev_op_data save_hc_in, save_hc_out;
> -	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
> +	int deq;
> +	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
> +	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
> +	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
>  	uint32_t flags = ops[0]->ldpc_dec.op_flags;
>  	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
>  	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
>  	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
>  	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
>  	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
> -	for (j = 0; j < n; ++j) {
> -		if ((mem_in || hc_in) && preload) {
> -			save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
> -			save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
> +	if ((mem_in || hc_in) && preload) {
> +		for (j = 0; j < n; ++j) {
> +			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
> +			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
>  			ops[j]->ldpc_dec.op_flags =
>  				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
>  				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;

flags are usually handled with bit operators, not arithmetic.

this seems to be a general issue.

> @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct active_device *ad,
>  			ops[j]->ldpc_dec.harq_combined_output.offset =
>  					harq_offset;
>  			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
> -			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
> -					&ops[j], 1);
> -			ret = 0;
> -			while (ret == 0)
> -				ret = rte_bbdev_dequeue_ldpc_dec_ops(
> -					dev_id, queue_id, &ops_deq[j], 1);
> +			harq_offset += HARQ_INCR;
> +		}
> +		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
Add check the return is 'n'
> +		deq = 0;
> +		while (deq != n)
> +			deq += rte_bbdev_dequeue_ldpc_dec_ops(
> +					dev_id, queue_id, &ops_deq[deq],
> +					n - deq);

Add check the return >= 0

Tom

> +		/* Restore the operations */
> +		for (j = 0; j < n; ++j) {
>  			ops[j]->ldpc_dec.op_flags = flags;
> -			ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
> -			ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
> +			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
> +			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
>  		}
> +	}
> +	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
> +	for (j = 0; j < n; ++j) {
>  		/* Adjust HARQ offset when we reach external DDR */
>  		if (mem_in || hc_in)
>  			ops[j]->ldpc_dec.harq_combined_input.offset
> @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct active_device *ad,
>  				mbuf_reset(
>  				ops_enq[j]->ldpc_dec.harq_combined_output.data);
>  		}
> -		if (extDdr) {
> -			bool preload = i == (TEST_REPETITIONS - 1);
> +		if (extDdr)
>  			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
> -					num_ops, preload);
> -		}
> +					num_ops, true);
>  		start_time = rte_rdtsc_precise();
>  
>  		for (enq = 0, deq = 0; enq < num_ops;) {
> @@ -3362,11 +3367,9 @@ typedef int (test_case_function)(struct active_device *ad,
>  				mbuf_reset(
>  				ops_enq[j]->ldpc_dec.harq_combined_output.data);
>  		}
> -		if (extDdr) {
> -			bool preload = i == (TEST_REPETITIONS - 1);
> +		if (extDdr)
>  			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
> -					num_ops, preload);
> -		}
> +					num_ops, true);
>  		start_time = rte_rdtsc_precise();
>  
>  		for (enq = 0, deq = 0; enq < num_ops;) {
  
Chautru, Nicolas Oct. 26, 2020, 4:50 p.m. UTC | #2
> -----Original Message-----
> From: Tom Rix <trix@redhat.com>
> Sent: Monday, October 26, 2020 6:32 AM
> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org;
> akhil.goyal@nxp.com
> Cc: david.marchand@redhat.com
> Subject: Re: [PATCH v5 3/7] app/bbdev: include explicit HARQ preloading
> 
> 
> On 10/23/20 4:42 PM, Nicolas Chautru wrote:
> > Run preloading explicitly for unit tests. Load each code block by
> > reusing existing input op then restore for the actual test.
> >
> > Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
> > Acked-by: Liu Tianjiao <tianjiao.liu@intel.com>
> > ---
> >  app/test-bbdev/main.h            |  1 +
> >  app/test-bbdev/test_bbdev_perf.c | 51
> > +++++++++++++++++++++-------------------
> >  2 files changed, 28 insertions(+), 24 deletions(-)
> >
> > diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h index
> > fb3dec8..dc10a50 100644
> > --- a/app/test-bbdev/main.h
> > +++ b/app/test-bbdev/main.h
> > @@ -17,6 +17,7 @@
> >  #define TEST_SKIPPED    1
> >
> >  #define MAX_BURST 512U
> > +#define MAX_OPS 1024U
> 
> This #define is not consistently used.
> 
> ex/ see retrieve_harq_ddr, the old 1024 is still being used.

Thanks I missed it. I will change this now.

> 
> >  #define DEFAULT_BURST 32U
> >  #define DEFAULT_OPS 64U
> >  #define DEFAULT_ITER 6U
> > diff --git a/app/test-bbdev/test_bbdev_perf.c
> > b/app/test-bbdev/test_bbdev_perf.c
> > index b62848e..f30cbdb 100644
> > --- a/app/test-bbdev/test_bbdev_perf.c
> > +++ b/app/test-bbdev/test_bbdev_perf.c
> > @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct
> active_device *ad,
> >  		bool preload)
> >  {
> >  	uint16_t j;
> > -	int ret;
> > -	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
> > -	struct rte_bbdev_op_data save_hc_in, save_hc_out;
> > -	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
> > +	int deq;
> > +	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR *
> MAX_OPS;
> > +	struct rte_bbdev_op_data save_hc_in[MAX_OPS],
> save_hc_out[MAX_OPS];
> > +	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
> >  	uint32_t flags = ops[0]->ldpc_dec.op_flags;
> >  	bool mem_in = flags &
> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
> >  	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
> >  	bool mem_out = flags &
> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
> >  	bool hc_out = flags &
> RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
> >  	bool h_comp = flags &
> RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
> > -	for (j = 0; j < n; ++j) {
> > -		if ((mem_in || hc_in) && preload) {
> > -			save_hc_in = ops[j]-
> >ldpc_dec.harq_combined_input;
> > -			save_hc_out = ops[j]-
> >ldpc_dec.harq_combined_output;
> > +	if ((mem_in || hc_in) && preload) {
> > +		for (j = 0; j < n; ++j) {
> > +			save_hc_in[j] = ops[j]-
> >ldpc_dec.harq_combined_input;
> > +			save_hc_out[j] = ops[j]-
> >ldpc_dec.harq_combined_output;
> >  			ops[j]->ldpc_dec.op_flags =
> >
> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
> >
> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
> 
> flags are usually handled with bit operators, not arithmetic.
> 
> this seems to be a general issue.

This is keeping same coding style as rest of file. So keeping as is. 

> 
> > @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct
> active_device *ad,
> >  			ops[j]->ldpc_dec.harq_combined_output.offset =
> >  					harq_offset;
> >  			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
> > -			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
> > -					&ops[j], 1);
> > -			ret = 0;
> > -			while (ret == 0)
> > -				ret = rte_bbdev_dequeue_ldpc_dec_ops(
> > -					dev_id, queue_id, &ops_deq[j], 1);
> > +			harq_offset += HARQ_INCR;
> > +		}
> > +		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
> &ops[0], n);
> Add check the return is 'n'
> > +		deq = 0;
> > +		while (deq != n)
> > +			deq += rte_bbdev_dequeue_ldpc_dec_ops(
> > +					dev_id, queue_id, &ops_deq[deq],
> > +					n - deq);
> 
> Add check the return >= 0

This cannot be <0.  uint16_t

> 
> Tom
> 
> > +		/* Restore the operations */
> > +		for (j = 0; j < n; ++j) {
> >  			ops[j]->ldpc_dec.op_flags = flags;
> > -			ops[j]->ldpc_dec.harq_combined_input =
> save_hc_in;
> > -			ops[j]->ldpc_dec.harq_combined_output =
> save_hc_out;
> > +			ops[j]->ldpc_dec.harq_combined_input =
> save_hc_in[j];
> > +			ops[j]->ldpc_dec.harq_combined_output =
> save_hc_out[j];
> >  		}
> > +	}
> > +	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
> > +	for (j = 0; j < n; ++j) {
> >  		/* Adjust HARQ offset when we reach external DDR */
> >  		if (mem_in || hc_in)
> >  			ops[j]->ldpc_dec.harq_combined_input.offset
> > @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct
> active_device *ad,
> >  				mbuf_reset(
> >  				ops_enq[j]-
> >ldpc_dec.harq_combined_output.data);
> >  		}
> > -		if (extDdr) {
> > -			bool preload = i == (TEST_REPETITIONS - 1);
> > +		if (extDdr)
> >  			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
> > -					num_ops, preload);
> > -		}
> > +					num_ops, true);
> >  		start_time = rte_rdtsc_precise();
> >
> >  		for (enq = 0, deq = 0; enq < num_ops;) { @@ -3362,11
> +3367,9 @@
> > typedef int (test_case_function)(struct active_device *ad,
> >  				mbuf_reset(
> >  				ops_enq[j]-
> >ldpc_dec.harq_combined_output.data);
> >  		}
> > -		if (extDdr) {
> > -			bool preload = i == (TEST_REPETITIONS - 1);
> > +		if (extDdr)
> >  			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
> > -					num_ops, preload);
> > -		}
> > +					num_ops, true);
> >  		start_time = rte_rdtsc_precise();
> >
> >  		for (enq = 0, deq = 0; enq < num_ops;) {
  
Tom Rix Oct. 28, 2020, 8:33 p.m. UTC | #3
On 10/26/20 9:50 AM, Chautru, Nicolas wrote:
>> -----Original Message-----
>> From: Tom Rix <trix@redhat.com>
>> Sent: Monday, October 26, 2020 6:32 AM
>> To: Chautru, Nicolas <nicolas.chautru@intel.com>; dev@dpdk.org;
>> akhil.goyal@nxp.com
>> Cc: david.marchand@redhat.com
>> Subject: Re: [PATCH v5 3/7] app/bbdev: include explicit HARQ preloading
>>
>>
>> On 10/23/20 4:42 PM, Nicolas Chautru wrote:
>>> Run preloading explicitly for unit tests. Load each code block by
>>> reusing existing input op then restore for the actual test.
>>>
>>> Signed-off-by: Nicolas Chautru <nicolas.chautru@intel.com>
>>> Acked-by: Liu Tianjiao <tianjiao.liu@intel.com>
>>> ---
>>>  app/test-bbdev/main.h            |  1 +
>>>  app/test-bbdev/test_bbdev_perf.c | 51
>>> +++++++++++++++++++++-------------------
>>>  2 files changed, 28 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h index
>>> fb3dec8..dc10a50 100644
>>> --- a/app/test-bbdev/main.h
>>> +++ b/app/test-bbdev/main.h
>>> @@ -17,6 +17,7 @@
>>>  #define TEST_SKIPPED    1
>>>
>>>  #define MAX_BURST 512U
>>> +#define MAX_OPS 1024U
>> This #define is not consistently used.
>>
>> ex/ see retrieve_harq_ddr, the old 1024 is still being used.
> Thanks I missed it. I will change this now.
>
>>>  #define DEFAULT_BURST 32U
>>>  #define DEFAULT_OPS 64U
>>>  #define DEFAULT_ITER 6U
>>> diff --git a/app/test-bbdev/test_bbdev_perf.c
>>> b/app/test-bbdev/test_bbdev_perf.c
>>> index b62848e..f30cbdb 100644
>>> --- a/app/test-bbdev/test_bbdev_perf.c
>>> +++ b/app/test-bbdev/test_bbdev_perf.c
>>> @@ -2513,20 +2513,20 @@ typedef int (test_case_function)(struct
>> active_device *ad,
>>>  		bool preload)
>>>  {
>>>  	uint16_t j;
>>> -	int ret;
>>> -	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
>>> -	struct rte_bbdev_op_data save_hc_in, save_hc_out;
>>> -	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
>>> +	int deq;
>>> +	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR *
>> MAX_OPS;
>>> +	struct rte_bbdev_op_data save_hc_in[MAX_OPS],
>> save_hc_out[MAX_OPS];
>>> +	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
>>>  	uint32_t flags = ops[0]->ldpc_dec.op_flags;
>>>  	bool mem_in = flags &
>> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
>>>  	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
>>>  	bool mem_out = flags &
>> RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
>>>  	bool hc_out = flags &
>> RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
>>>  	bool h_comp = flags &
>> RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
>>> -	for (j = 0; j < n; ++j) {
>>> -		if ((mem_in || hc_in) && preload) {
>>> -			save_hc_in = ops[j]-
>>> ldpc_dec.harq_combined_input;
>>> -			save_hc_out = ops[j]-
>>> ldpc_dec.harq_combined_output;
>>> +	if ((mem_in || hc_in) && preload) {
>>> +		for (j = 0; j < n; ++j) {
>>> +			save_hc_in[j] = ops[j]-
>>> ldpc_dec.harq_combined_input;
>>> +			save_hc_out[j] = ops[j]-
>>> ldpc_dec.harq_combined_output;
>>>  			ops[j]->ldpc_dec.op_flags =
>>>
>> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
>> 	RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
>>
>> flags are usually handled with bit operators, not arithmetic.
>>
>> this seems to be a general issue.
> This is keeping same coding style as rest of file. So keeping as is. 
Ugh. Add to a cleanup list.
>
>>> @@ -2536,16 +2536,23 @@ typedef int (test_case_function)(struct
>> active_device *ad,
>>>  			ops[j]->ldpc_dec.harq_combined_output.offset =
>>>  					harq_offset;
>>>  			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
>>> -			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
>>> -					&ops[j], 1);
>>> -			ret = 0;
>>> -			while (ret == 0)
>>> -				ret = rte_bbdev_dequeue_ldpc_dec_ops(
>>> -					dev_id, queue_id, &ops_deq[j], 1);
>>> +			harq_offset += HARQ_INCR;
>>> +		}
>>> +		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
>> &ops[0], n);
>> Add check the return is 'n'
>>> +		deq = 0;
>>> +		while (deq != n)
>>> +			deq += rte_bbdev_dequeue_ldpc_dec_ops(
>>> +					dev_id, queue_id, &ops_deq[deq],
>>> +					n - deq);
>> Add check the return >= 0
> This cannot be <0.  uint16_t

ok

Tom

>
>> Tom
>>
>>> +		/* Restore the operations */
>>> +		for (j = 0; j < n; ++j) {
>>>  			ops[j]->ldpc_dec.op_flags = flags;
>>> -			ops[j]->ldpc_dec.harq_combined_input =
>> save_hc_in;
>>> -			ops[j]->ldpc_dec.harq_combined_output =
>> save_hc_out;
>>> +			ops[j]->ldpc_dec.harq_combined_input =
>> save_hc_in[j];
>>> +			ops[j]->ldpc_dec.harq_combined_output =
>> save_hc_out[j];
>>>  		}
>>> +	}
>>> +	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
>>> +	for (j = 0; j < n; ++j) {
>>>  		/* Adjust HARQ offset when we reach external DDR */
>>>  		if (mem_in || hc_in)
>>>  			ops[j]->ldpc_dec.harq_combined_input.offset
>>> @@ -3231,11 +3238,9 @@ typedef int (test_case_function)(struct
>> active_device *ad,
>>>  				mbuf_reset(
>>>  				ops_enq[j]-
>>> ldpc_dec.harq_combined_output.data);
>>>  		}
>>> -		if (extDdr) {
>>> -			bool preload = i == (TEST_REPETITIONS - 1);
>>> +		if (extDdr)
>>>  			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
>>> -					num_ops, preload);
>>> -		}
>>> +					num_ops, true);
>>>  		start_time = rte_rdtsc_precise();
>>>
>>>  		for (enq = 0, deq = 0; enq < num_ops;) { @@ -3362,11
>> +3367,9 @@
>>> typedef int (test_case_function)(struct active_device *ad,
>>>  				mbuf_reset(
>>>  				ops_enq[j]-
>>> ldpc_dec.harq_combined_output.data);
>>>  		}
>>> -		if (extDdr) {
>>> -			bool preload = i == (TEST_REPETITIONS - 1);
>>> +		if (extDdr)
>>>  			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
>>> -					num_ops, preload);
>>> -		}
>>> +					num_ops, true);
>>>  		start_time = rte_rdtsc_precise();
>>>
>>>  		for (enq = 0, deq = 0; enq < num_ops;) {
  

Patch

diff --git a/app/test-bbdev/main.h b/app/test-bbdev/main.h
index fb3dec8..dc10a50 100644
--- a/app/test-bbdev/main.h
+++ b/app/test-bbdev/main.h
@@ -17,6 +17,7 @@ 
 #define TEST_SKIPPED    1
 
 #define MAX_BURST 512U
+#define MAX_OPS 1024U
 #define DEFAULT_BURST 32U
 #define DEFAULT_OPS 64U
 #define DEFAULT_ITER 6U
diff --git a/app/test-bbdev/test_bbdev_perf.c b/app/test-bbdev/test_bbdev_perf.c
index b62848e..f30cbdb 100644
--- a/app/test-bbdev/test_bbdev_perf.c
+++ b/app/test-bbdev/test_bbdev_perf.c
@@ -2513,20 +2513,20 @@  typedef int (test_case_function)(struct active_device *ad,
 		bool preload)
 {
 	uint16_t j;
-	int ret;
-	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * 1024;
-	struct rte_bbdev_op_data save_hc_in, save_hc_out;
-	struct rte_bbdev_dec_op *ops_deq[MAX_BURST];
+	int deq;
+	uint32_t harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
+	struct rte_bbdev_op_data save_hc_in[MAX_OPS], save_hc_out[MAX_OPS];
+	struct rte_bbdev_dec_op *ops_deq[MAX_OPS];
 	uint32_t flags = ops[0]->ldpc_dec.op_flags;
 	bool mem_in = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE;
 	bool hc_in = flags & RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE;
 	bool mem_out = flags & RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
 	bool hc_out = flags & RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE;
 	bool h_comp = flags & RTE_BBDEV_LDPC_HARQ_6BIT_COMPRESSION;
-	for (j = 0; j < n; ++j) {
-		if ((mem_in || hc_in) && preload) {
-			save_hc_in = ops[j]->ldpc_dec.harq_combined_input;
-			save_hc_out = ops[j]->ldpc_dec.harq_combined_output;
+	if ((mem_in || hc_in) && preload) {
+		for (j = 0; j < n; ++j) {
+			save_hc_in[j] = ops[j]->ldpc_dec.harq_combined_input;
+			save_hc_out[j] = ops[j]->ldpc_dec.harq_combined_output;
 			ops[j]->ldpc_dec.op_flags =
 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK +
 				RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE;
@@ -2536,16 +2536,23 @@  typedef int (test_case_function)(struct active_device *ad,
 			ops[j]->ldpc_dec.harq_combined_output.offset =
 					harq_offset;
 			ops[j]->ldpc_dec.harq_combined_input.offset = 0;
-			rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id,
-					&ops[j], 1);
-			ret = 0;
-			while (ret == 0)
-				ret = rte_bbdev_dequeue_ldpc_dec_ops(
-					dev_id, queue_id, &ops_deq[j], 1);
+			harq_offset += HARQ_INCR;
+		}
+		rte_bbdev_enqueue_ldpc_dec_ops(dev_id, queue_id, &ops[0], n);
+		deq = 0;
+		while (deq != n)
+			deq += rte_bbdev_dequeue_ldpc_dec_ops(
+					dev_id, queue_id, &ops_deq[deq],
+					n - deq);
+		/* Restore the operations */
+		for (j = 0; j < n; ++j) {
 			ops[j]->ldpc_dec.op_flags = flags;
-			ops[j]->ldpc_dec.harq_combined_input = save_hc_in;
-			ops[j]->ldpc_dec.harq_combined_output = save_hc_out;
+			ops[j]->ldpc_dec.harq_combined_input = save_hc_in[j];
+			ops[j]->ldpc_dec.harq_combined_output = save_hc_out[j];
 		}
+	}
+	harq_offset = (uint32_t) queue_id * HARQ_INCR * MAX_OPS;
+	for (j = 0; j < n; ++j) {
 		/* Adjust HARQ offset when we reach external DDR */
 		if (mem_in || hc_in)
 			ops[j]->ldpc_dec.harq_combined_input.offset
@@ -3231,11 +3238,9 @@  typedef int (test_case_function)(struct active_device *ad,
 				mbuf_reset(
 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
 		}
-		if (extDdr) {
-			bool preload = i == (TEST_REPETITIONS - 1);
+		if (extDdr)
 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
-					num_ops, preload);
-		}
+					num_ops, true);
 		start_time = rte_rdtsc_precise();
 
 		for (enq = 0, deq = 0; enq < num_ops;) {
@@ -3362,11 +3367,9 @@  typedef int (test_case_function)(struct active_device *ad,
 				mbuf_reset(
 				ops_enq[j]->ldpc_dec.harq_combined_output.data);
 		}
-		if (extDdr) {
-			bool preload = i == (TEST_REPETITIONS - 1);
+		if (extDdr)
 			preload_harq_ddr(tp->dev_id, queue_id, ops_enq,
-					num_ops, preload);
-		}
+					num_ops, true);
 		start_time = rte_rdtsc_precise();
 
 		for (enq = 0, deq = 0; enq < num_ops;) {