[v2] net/ark: fix index arithmetic optimization bug

Message ID 20240716213939.2561065-1-ed.czeck@atomicrules.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v2] net/ark: fix index arithmetic optimization bug |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/loongarch-unit-testing success Unit Testing PASS
ci/loongarch-compilation success Compilation OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/intel-Functional success Functional PASS
ci/iol-abi-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-marvell-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-unit-amd64-testing pending Testing pending
ci/iol-unit-arm64-testing pending Testing pending
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-compile-amd64-testing pending Testing pending
ci/iol-compile-arm64-testing success Testing PASS
ci/iol-sample-apps-testing success Testing PASS

Commit Message

Ed Czeck July 16, 2024, 9:39 p.m. UTC
fix for compiler optimizer error using int32_t.
(a - b) > 0 can behave differently under optimization
at values near max and min bounds.
This patch replaces int32_t with uint32_t except for
necessary casts.

Fixes: 9ee9e0d3b85e ("net/ark: update to reflect FPGA updates")
Cc: stable@dpdk.org

Signed-off-by: Ed Czeck <ed.czeck@atomicrules.com>
---
v2:
* update patch to apply to dpdk-next-net
---
 drivers/net/ark/ark_ethdev_tx.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)
  

Comments

Ferruh Yigit July 17, 2024, 10:14 a.m. UTC | #1
On 7/16/2024 10:39 PM, Ed Czeck wrote:
> fix for compiler optimizer error using int32_t.
> (a - b) > 0 can behave differently under optimization
> at values near max and min bounds.
>

Hi Ed,

Is this compiler optimization error, or can it be related to the
undefined behavior of signed integer overflow?
Although it is undefined I guess compilers wrap value to INT_MIN.

Just to understand issue better, so that we can apply learning to other
code base, can you please share values that create unexpected result?


> This patch replaces int32_t with uint32_t except for
> necessary casts.
> 
> Fixes: 9ee9e0d3b85e ("net/ark: update to reflect FPGA updates")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Ed Czeck <ed.czeck@atomicrules.com>
> ---
> v2:
> * update patch to apply to dpdk-next-net
> ---
>  drivers/net/ark/ark_ethdev_tx.c | 19 ++++++++++---------
>  1 file changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/ark/ark_ethdev_tx.c b/drivers/net/ark/ark_ethdev_tx.c
> index 9c89c85f50..30130b08de 100644
> --- a/drivers/net/ark/ark_ethdev_tx.c
> +++ b/drivers/net/ark/ark_ethdev_tx.c
> @@ -39,8 +39,8 @@ struct __rte_cache_aligned ark_tx_queue {
>  	uint32_t queue_mask;
>  
>  	/* 3 indexes to the paired data rings. */
> -	int32_t prod_index;		/* where to put the next one */
> -	int32_t free_index;		/* mbuf has been freed */
> +	uint32_t prod_index;		/* where to put the next one */
> +	uint32_t free_index;		/* mbuf has been freed */
>  
>  	/* The queue Id is used to identify the HW Q */
>  	uint16_t phys_qid;
> @@ -49,7 +49,7 @@ struct __rte_cache_aligned ark_tx_queue {
>  
>  	/* next cache line - fields written by device */
>  	alignas(RTE_CACHE_LINE_MIN_SIZE) RTE_MARKER cacheline1;
> -	volatile int32_t cons_index;		/* hw is done, can be freed */
> +	volatile uint32_t cons_index;		/* hw is done, can be freed */
>  };
>  
>  /* Forward declarations */
> @@ -108,7 +108,7 @@ eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>  	uint32_t user_meta[5];
>  
>  	int stat;
> -	int32_t prod_index_limit;
> +	uint32_t prod_index_limit;
>  	uint16_t nb;
>  	uint8_t user_len = 0;
>  	const uint32_t min_pkt_len = ARK_MIN_TX_PKTLEN;
> @@ -124,7 +124,7 @@ eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
>  	prod_index_limit = queue->queue_size + queue->free_index - 4;
>  
>  	for (nb = 0;
> -	     (nb < nb_pkts) && (prod_index_limit - queue->prod_index) > 0;
> +	     (nb < nb_pkts) && (int32_t)(prod_index_limit - queue->prod_index) > 0;
>

I don't know possible ranges of the values, but in above case if the
result of "prod_index_limit - queue->prod_index", is bigger than INT_MAX
but smaller than UINT_MAX, although value is positive casting it to
'int' will make is negative and "> 0" check will be unexpected.


>  	     ++nb) {
>  		mbuf = tx_pkts[nb];
>  
> @@ -194,13 +194,13 @@ eth_ark_tx_jumbo(struct ark_tx_queue *queue, struct rte_mbuf *mbuf,
>  		 uint32_t *user_meta, uint8_t meta_cnt)
>  {
>  	struct rte_mbuf *next;
> -	int32_t free_queue_space;
> +	uint32_t free_queue_space;
>  	uint8_t flags = ARK_DDM_SOP;
>  
>  	free_queue_space = queue->queue_mask -
>  		(queue->prod_index - queue->free_index);
>  	/* We need up to 4 mbufs for first header and 2 for subsequent ones */
> -	if (unlikely(free_queue_space < (2 + (2 * mbuf->nb_segs))))
> +	if (unlikely(free_queue_space < (2U + (2U * mbuf->nb_segs))))
>  		return -1;
>  
>  	while (mbuf != NULL) {
> @@ -392,10 +392,11 @@ free_completed_tx(struct ark_tx_queue *queue)
>  {
>  	struct rte_mbuf *mbuf;
>  	union ark_tx_meta *meta;
> -	int32_t top_index;
> +	uint32_t top_index;
>  
>  	top_index = queue->cons_index;	/* read once */
> -	while ((top_index - queue->free_index) > 0) {
> +
> +	while ((int32_t)(top_index - queue->free_index) > 0) {
>  		meta = &queue->meta_q[queue->free_index & queue->queue_mask];
>  		if (likely((meta->flags & ARK_DDM_SOP) != 0)) {
>  			mbuf = queue->bufs[queue->free_index &
  

Patch

diff --git a/drivers/net/ark/ark_ethdev_tx.c b/drivers/net/ark/ark_ethdev_tx.c
index 9c89c85f50..30130b08de 100644
--- a/drivers/net/ark/ark_ethdev_tx.c
+++ b/drivers/net/ark/ark_ethdev_tx.c
@@ -39,8 +39,8 @@  struct __rte_cache_aligned ark_tx_queue {
 	uint32_t queue_mask;
 
 	/* 3 indexes to the paired data rings. */
-	int32_t prod_index;		/* where to put the next one */
-	int32_t free_index;		/* mbuf has been freed */
+	uint32_t prod_index;		/* where to put the next one */
+	uint32_t free_index;		/* mbuf has been freed */
 
 	/* The queue Id is used to identify the HW Q */
 	uint16_t phys_qid;
@@ -49,7 +49,7 @@  struct __rte_cache_aligned ark_tx_queue {
 
 	/* next cache line - fields written by device */
 	alignas(RTE_CACHE_LINE_MIN_SIZE) RTE_MARKER cacheline1;
-	volatile int32_t cons_index;		/* hw is done, can be freed */
+	volatile uint32_t cons_index;		/* hw is done, can be freed */
 };
 
 /* Forward declarations */
@@ -108,7 +108,7 @@  eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	uint32_t user_meta[5];
 
 	int stat;
-	int32_t prod_index_limit;
+	uint32_t prod_index_limit;
 	uint16_t nb;
 	uint8_t user_len = 0;
 	const uint32_t min_pkt_len = ARK_MIN_TX_PKTLEN;
@@ -124,7 +124,7 @@  eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 	prod_index_limit = queue->queue_size + queue->free_index - 4;
 
 	for (nb = 0;
-	     (nb < nb_pkts) && (prod_index_limit - queue->prod_index) > 0;
+	     (nb < nb_pkts) && (int32_t)(prod_index_limit - queue->prod_index) > 0;
 	     ++nb) {
 		mbuf = tx_pkts[nb];
 
@@ -194,13 +194,13 @@  eth_ark_tx_jumbo(struct ark_tx_queue *queue, struct rte_mbuf *mbuf,
 		 uint32_t *user_meta, uint8_t meta_cnt)
 {
 	struct rte_mbuf *next;
-	int32_t free_queue_space;
+	uint32_t free_queue_space;
 	uint8_t flags = ARK_DDM_SOP;
 
 	free_queue_space = queue->queue_mask -
 		(queue->prod_index - queue->free_index);
 	/* We need up to 4 mbufs for first header and 2 for subsequent ones */
-	if (unlikely(free_queue_space < (2 + (2 * mbuf->nb_segs))))
+	if (unlikely(free_queue_space < (2U + (2U * mbuf->nb_segs))))
 		return -1;
 
 	while (mbuf != NULL) {
@@ -392,10 +392,11 @@  free_completed_tx(struct ark_tx_queue *queue)
 {
 	struct rte_mbuf *mbuf;
 	union ark_tx_meta *meta;
-	int32_t top_index;
+	uint32_t top_index;
 
 	top_index = queue->cons_index;	/* read once */
-	while ((top_index - queue->free_index) > 0) {
+
+	while ((int32_t)(top_index - queue->free_index) > 0) {
 		meta = &queue->meta_q[queue->free_index & queue->queue_mask];
 		if (likely((meta->flags & ARK_DDM_SOP) != 0)) {
 			mbuf = queue->bufs[queue->free_index &