[v2] net/ark: fix index arithmetic optimization bug
Checks
Commit Message
fix for compiler optimizer error using int32_t.
(a - b) > 0 can behave differently under optimization
at values near max and min bounds.
This patch replaces int32_t with uint32_t except for
necessary casts.
Fixes: 9ee9e0d3b85e ("net/ark: update to reflect FPGA updates")
Cc: stable@dpdk.org
Signed-off-by: Ed Czeck <ed.czeck@atomicrules.com>
---
v2:
* update patch to apply to dpdk-next-net
---
drivers/net/ark/ark_ethdev_tx.c | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)
Comments
On 7/16/2024 10:39 PM, Ed Czeck wrote:
> fix for compiler optimizer error using int32_t.
> (a - b) > 0 can behave differently under optimization
> at values near max and min bounds.
>
Hi Ed,
Is this compiler optimization error, or can it be related to the
undefined behavior of signed integer overflow?
Although it is undefined I guess compilers wrap value to INT_MIN.
Just to understand issue better, so that we can apply learning to other
code base, can you please share values that create unexpected result?
> This patch replaces int32_t with uint32_t except for
> necessary casts.
>
> Fixes: 9ee9e0d3b85e ("net/ark: update to reflect FPGA updates")
> Cc: stable@dpdk.org
>
> Signed-off-by: Ed Czeck <ed.czeck@atomicrules.com>
> ---
> v2:
> * update patch to apply to dpdk-next-net
> ---
> drivers/net/ark/ark_ethdev_tx.c | 19 ++++++++++---------
> 1 file changed, 10 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/net/ark/ark_ethdev_tx.c b/drivers/net/ark/ark_ethdev_tx.c
> index 9c89c85f50..30130b08de 100644
> --- a/drivers/net/ark/ark_ethdev_tx.c
> +++ b/drivers/net/ark/ark_ethdev_tx.c
> @@ -39,8 +39,8 @@ struct __rte_cache_aligned ark_tx_queue {
> uint32_t queue_mask;
>
> /* 3 indexes to the paired data rings. */
> - int32_t prod_index; /* where to put the next one */
> - int32_t free_index; /* mbuf has been freed */
> + uint32_t prod_index; /* where to put the next one */
> + uint32_t free_index; /* mbuf has been freed */
>
> /* The queue Id is used to identify the HW Q */
> uint16_t phys_qid;
> @@ -49,7 +49,7 @@ struct __rte_cache_aligned ark_tx_queue {
>
> /* next cache line - fields written by device */
> alignas(RTE_CACHE_LINE_MIN_SIZE) RTE_MARKER cacheline1;
> - volatile int32_t cons_index; /* hw is done, can be freed */
> + volatile uint32_t cons_index; /* hw is done, can be freed */
> };
>
> /* Forward declarations */
> @@ -108,7 +108,7 @@ eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> uint32_t user_meta[5];
>
> int stat;
> - int32_t prod_index_limit;
> + uint32_t prod_index_limit;
> uint16_t nb;
> uint8_t user_len = 0;
> const uint32_t min_pkt_len = ARK_MIN_TX_PKTLEN;
> @@ -124,7 +124,7 @@ eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> prod_index_limit = queue->queue_size + queue->free_index - 4;
>
> for (nb = 0;
> - (nb < nb_pkts) && (prod_index_limit - queue->prod_index) > 0;
> + (nb < nb_pkts) && (int32_t)(prod_index_limit - queue->prod_index) > 0;
>
I don't know possible ranges of the values, but in above case if the
result of "prod_index_limit - queue->prod_index", is bigger than INT_MAX
but smaller than UINT_MAX, although value is positive casting it to
'int' will make is negative and "> 0" check will be unexpected.
> ++nb) {
> mbuf = tx_pkts[nb];
>
> @@ -194,13 +194,13 @@ eth_ark_tx_jumbo(struct ark_tx_queue *queue, struct rte_mbuf *mbuf,
> uint32_t *user_meta, uint8_t meta_cnt)
> {
> struct rte_mbuf *next;
> - int32_t free_queue_space;
> + uint32_t free_queue_space;
> uint8_t flags = ARK_DDM_SOP;
>
> free_queue_space = queue->queue_mask -
> (queue->prod_index - queue->free_index);
> /* We need up to 4 mbufs for first header and 2 for subsequent ones */
> - if (unlikely(free_queue_space < (2 + (2 * mbuf->nb_segs))))
> + if (unlikely(free_queue_space < (2U + (2U * mbuf->nb_segs))))
> return -1;
>
> while (mbuf != NULL) {
> @@ -392,10 +392,11 @@ free_completed_tx(struct ark_tx_queue *queue)
> {
> struct rte_mbuf *mbuf;
> union ark_tx_meta *meta;
> - int32_t top_index;
> + uint32_t top_index;
>
> top_index = queue->cons_index; /* read once */
> - while ((top_index - queue->free_index) > 0) {
> +
> + while ((int32_t)(top_index - queue->free_index) > 0) {
> meta = &queue->meta_q[queue->free_index & queue->queue_mask];
> if (likely((meta->flags & ARK_DDM_SOP) != 0)) {
> mbuf = queue->bufs[queue->free_index &
@@ -39,8 +39,8 @@ struct __rte_cache_aligned ark_tx_queue {
uint32_t queue_mask;
/* 3 indexes to the paired data rings. */
- int32_t prod_index; /* where to put the next one */
- int32_t free_index; /* mbuf has been freed */
+ uint32_t prod_index; /* where to put the next one */
+ uint32_t free_index; /* mbuf has been freed */
/* The queue Id is used to identify the HW Q */
uint16_t phys_qid;
@@ -49,7 +49,7 @@ struct __rte_cache_aligned ark_tx_queue {
/* next cache line - fields written by device */
alignas(RTE_CACHE_LINE_MIN_SIZE) RTE_MARKER cacheline1;
- volatile int32_t cons_index; /* hw is done, can be freed */
+ volatile uint32_t cons_index; /* hw is done, can be freed */
};
/* Forward declarations */
@@ -108,7 +108,7 @@ eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
uint32_t user_meta[5];
int stat;
- int32_t prod_index_limit;
+ uint32_t prod_index_limit;
uint16_t nb;
uint8_t user_len = 0;
const uint32_t min_pkt_len = ARK_MIN_TX_PKTLEN;
@@ -124,7 +124,7 @@ eth_ark_xmit_pkts(void *vtxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
prod_index_limit = queue->queue_size + queue->free_index - 4;
for (nb = 0;
- (nb < nb_pkts) && (prod_index_limit - queue->prod_index) > 0;
+ (nb < nb_pkts) && (int32_t)(prod_index_limit - queue->prod_index) > 0;
++nb) {
mbuf = tx_pkts[nb];
@@ -194,13 +194,13 @@ eth_ark_tx_jumbo(struct ark_tx_queue *queue, struct rte_mbuf *mbuf,
uint32_t *user_meta, uint8_t meta_cnt)
{
struct rte_mbuf *next;
- int32_t free_queue_space;
+ uint32_t free_queue_space;
uint8_t flags = ARK_DDM_SOP;
free_queue_space = queue->queue_mask -
(queue->prod_index - queue->free_index);
/* We need up to 4 mbufs for first header and 2 for subsequent ones */
- if (unlikely(free_queue_space < (2 + (2 * mbuf->nb_segs))))
+ if (unlikely(free_queue_space < (2U + (2U * mbuf->nb_segs))))
return -1;
while (mbuf != NULL) {
@@ -392,10 +392,11 @@ free_completed_tx(struct ark_tx_queue *queue)
{
struct rte_mbuf *mbuf;
union ark_tx_meta *meta;
- int32_t top_index;
+ uint32_t top_index;
top_index = queue->cons_index; /* read once */
- while ((top_index - queue->free_index) > 0) {
+
+ while ((int32_t)(top_index - queue->free_index) > 0) {
meta = &queue->meta_q[queue->free_index & queue->queue_mask];
if (likely((meta->flags & ARK_DDM_SOP) != 0)) {
mbuf = queue->bufs[queue->free_index &