From patchwork Thu Oct 26 00:31:54 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tyler Retzlaff X-Patchwork-Id: 133339 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id E97AF43200; Thu, 26 Oct 2023 02:34:08 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 37D9142E22; Thu, 26 Oct 2023 02:32:25 +0200 (CEST) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id F326E4027F for ; Thu, 26 Oct 2023 02:32:01 +0200 (CEST) Received: by linux.microsoft.com (Postfix, from userid 1086) id 72D6B20B74D3; Wed, 25 Oct 2023 17:31:59 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 72D6B20B74D3 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1698280320; bh=U/2p6USff3gRGF6E70+OvgfXRTqkZBxg2gOGL7xCN+U=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rbAWfK/T3/7bSLLGDYnm2ZvPIgZ37JGPz5xBwc6KDDSVVSPmgQIwvxfsaI0K2ohcx eZ+b1LTXMY1VIakimLzp4j4EU0FYKPn9+X63VB00lYWzaxmB6lQP4OWKZO0oafi506 o1xOXMGGpRz1bYZqe1RiV9ygQAA/+xtj01pb5lb0= From: Tyler Retzlaff To: dev@dpdk.org Cc: Akhil Goyal , Anatoly Burakov , Andrew Rybchenko , Bruce Richardson , Chenbo Xia , Ciara Power , David Christensen , David Hunt , Dmitry Kozlyuk , Dmitry Malloy , Elena Agostini , Erik Gabriel Carrillo , Fan Zhang , Ferruh Yigit , Harman Kalra , Harry van Haaren , Honnappa Nagarahalli , Jerin Jacob , Konstantin Ananyev , Matan Azrad , Maxime Coquelin , Narcisa Ana Maria Vasile , Nicolas Chautru , Olivier Matz , Ori Kam , Pallavi Kadam , Pavan Nikhilesh , Reshma Pattan , Sameh Gobriel , Shijith Thotton , Sivaprasad Tummala , Stephen Hemminger , Suanming Mou , Sunil Kumar Kori , Thomas Monjalon , Viacheslav Ovsiienko , Vladimir Medvedkin , Yipeng Wang , Tyler Retzlaff Subject: [PATCH v3 19/19] ring: use rte optional stdatomic API Date: Wed, 25 Oct 2023 17:31:54 -0700 Message-Id: <1698280314-25861-20-git-send-email-roretzla@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1698280314-25861-1-git-send-email-roretzla@linux.microsoft.com> References: <1697497745-20664-1-git-send-email-roretzla@linux.microsoft.com> <1698280314-25861-1-git-send-email-roretzla@linux.microsoft.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Replace the use of gcc builtin __atomic_xxx intrinsics with corresponding rte_atomic_xxx optional stdatomic API Signed-off-by: Tyler Retzlaff Acked-by: Konstantin Ananyev --- drivers/net/mlx5/mlx5_hws_cnt.h | 4 ++-- lib/ring/rte_ring_c11_pvt.h | 47 +++++++++++++++++++++------------------ lib/ring/rte_ring_core.h | 12 +++++----- lib/ring/rte_ring_generic_pvt.h | 16 +++++++------ lib/ring/rte_ring_hts_elem_pvt.h | 22 +++++++++--------- lib/ring/rte_ring_peek_elem_pvt.h | 6 ++--- lib/ring/rte_ring_rts_elem_pvt.h | 27 +++++++++++----------- 7 files changed, 71 insertions(+), 63 deletions(-) diff --git a/drivers/net/mlx5/mlx5_hws_cnt.h b/drivers/net/mlx5/mlx5_hws_cnt.h index f462665..dcd5cec 100644 --- a/drivers/net/mlx5/mlx5_hws_cnt.h +++ b/drivers/net/mlx5/mlx5_hws_cnt.h @@ -386,7 +386,7 @@ struct mlx5_hws_age_param { MLX5_ASSERT(r->prod.sync_type == RTE_RING_SYNC_ST); MLX5_ASSERT(r->cons.sync_type == RTE_RING_SYNC_ST); - current_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); + current_head = rte_atomic_load_explicit(&r->prod.head, rte_memory_order_relaxed); MLX5_ASSERT(n <= r->capacity); MLX5_ASSERT(n <= rte_ring_count(r)); revert2head = current_head - n; @@ -394,7 +394,7 @@ struct mlx5_hws_age_param { __rte_ring_get_elem_addr(r, revert2head, sizeof(cnt_id_t), n, &zcd->ptr1, &zcd->n1, &zcd->ptr2); /* Update tail */ - __atomic_store_n(&r->prod.tail, revert2head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&r->prod.tail, revert2head, rte_memory_order_release); return n; } diff --git a/lib/ring/rte_ring_c11_pvt.h b/lib/ring/rte_ring_c11_pvt.h index f895950..5c10ad8 100644 --- a/lib/ring/rte_ring_c11_pvt.h +++ b/lib/ring/rte_ring_c11_pvt.h @@ -22,9 +22,10 @@ * we need to wait for them to complete */ if (!single) - rte_wait_until_equal_32(&ht->tail, old_val, __ATOMIC_RELAXED); + rte_wait_until_equal_32((uint32_t *)(uintptr_t)&ht->tail, old_val, + rte_memory_order_relaxed); - __atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&ht->tail, new_val, rte_memory_order_release); } /** @@ -61,19 +62,19 @@ unsigned int max = n; int success; - *old_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); + *old_head = rte_atomic_load_explicit(&r->prod.head, rte_memory_order_relaxed); do { /* Reset n to the initial burst count */ n = max; /* Ensure the head is read before tail */ - __atomic_thread_fence(__ATOMIC_ACQUIRE); + __atomic_thread_fence(rte_memory_order_acquire); /* load-acquire synchronize with store-release of ht->tail * in update_tail. */ - cons_tail = __atomic_load_n(&r->cons.tail, - __ATOMIC_ACQUIRE); + cons_tail = rte_atomic_load_explicit(&r->cons.tail, + rte_memory_order_acquire); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have @@ -91,14 +92,15 @@ return 0; *new_head = *old_head + n; - if (is_sp) - r->prod.head = *new_head, success = 1; - else + if (is_sp) { + r->prod.head = *new_head; + success = 1; + } else /* on failure, *old_head is updated */ - success = __atomic_compare_exchange_n(&r->prod.head, + success = rte_atomic_compare_exchange_strong_explicit(&r->prod.head, old_head, *new_head, - 0, __ATOMIC_RELAXED, - __ATOMIC_RELAXED); + rte_memory_order_relaxed, + rte_memory_order_relaxed); } while (unlikely(success == 0)); return n; } @@ -137,19 +139,19 @@ int success; /* move cons.head atomically */ - *old_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); + *old_head = rte_atomic_load_explicit(&r->cons.head, rte_memory_order_relaxed); do { /* Restore n as it may change every loop */ n = max; /* Ensure the head is read before tail */ - __atomic_thread_fence(__ATOMIC_ACQUIRE); + __atomic_thread_fence(rte_memory_order_acquire); /* this load-acquire synchronize with store-release of ht->tail * in update_tail. */ - prod_tail = __atomic_load_n(&r->prod.tail, - __ATOMIC_ACQUIRE); + prod_tail = rte_atomic_load_explicit(&r->prod.tail, + rte_memory_order_acquire); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have @@ -166,14 +168,15 @@ return 0; *new_head = *old_head + n; - if (is_sc) - r->cons.head = *new_head, success = 1; - else + if (is_sc) { + r->cons.head = *new_head; + success = 1; + } else /* on failure, *old_head will be updated */ - success = __atomic_compare_exchange_n(&r->cons.head, + success = rte_atomic_compare_exchange_strong_explicit(&r->cons.head, old_head, *new_head, - 0, __ATOMIC_RELAXED, - __ATOMIC_RELAXED); + rte_memory_order_relaxed, + rte_memory_order_relaxed); } while (unlikely(success == 0)); return n; } diff --git a/lib/ring/rte_ring_core.h b/lib/ring/rte_ring_core.h index 327fdcf..b770873 100644 --- a/lib/ring/rte_ring_core.h +++ b/lib/ring/rte_ring_core.h @@ -66,8 +66,8 @@ enum rte_ring_sync_type { * but offset for *sync_type* and *tail* values should remain the same. */ struct rte_ring_headtail { - volatile uint32_t head; /**< prod/consumer head. */ - volatile uint32_t tail; /**< prod/consumer tail. */ + volatile RTE_ATOMIC(uint32_t) head; /**< prod/consumer head. */ + volatile RTE_ATOMIC(uint32_t) tail; /**< prod/consumer tail. */ union { /** sync type of prod/cons */ enum rte_ring_sync_type sync_type; @@ -78,7 +78,7 @@ struct rte_ring_headtail { union __rte_ring_rts_poscnt { /** raw 8B value to read/write *cnt* and *pos* as one atomic op */ - uint64_t raw __rte_aligned(8); + RTE_ATOMIC(uint64_t) raw __rte_aligned(8); struct { uint32_t cnt; /**< head/tail reference counter */ uint32_t pos; /**< head/tail position */ @@ -94,10 +94,10 @@ struct rte_ring_rts_headtail { union __rte_ring_hts_pos { /** raw 8B value to read/write *head* and *tail* as one atomic op */ - uint64_t raw __rte_aligned(8); + RTE_ATOMIC(uint64_t) raw __rte_aligned(8); struct { - uint32_t head; /**< head position */ - uint32_t tail; /**< tail position */ + RTE_ATOMIC(uint32_t) head; /**< head position */ + RTE_ATOMIC(uint32_t) tail; /**< tail position */ } pos; }; diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h index 5acb6e5..457f41d 100644 --- a/lib/ring/rte_ring_generic_pvt.h +++ b/lib/ring/rte_ring_generic_pvt.h @@ -23,7 +23,8 @@ * we need to wait for them to complete */ if (!single) - rte_wait_until_equal_32(&ht->tail, old_val, __ATOMIC_RELAXED); + rte_wait_until_equal_32((volatile uint32_t *)(uintptr_t)&ht->tail, old_val, + rte_memory_order_relaxed); ht->tail = new_val; } @@ -89,10 +90,11 @@ return 0; *new_head = *old_head + n; - if (is_sp) - r->prod.head = *new_head, success = 1; - else - success = rte_atomic32_cmpset(&r->prod.head, + if (is_sp) { + r->prod.head = *new_head; + success = 1; + } else + success = rte_atomic32_cmpset((uint32_t *)(uintptr_t)&r->prod.head, *old_head, *new_head); } while (unlikely(success == 0)); return n; @@ -162,8 +164,8 @@ rte_smp_rmb(); success = 1; } else { - success = rte_atomic32_cmpset(&r->cons.head, *old_head, - *new_head); + success = rte_atomic32_cmpset((uint32_t *)(uintptr_t)&r->cons.head, + *old_head, *new_head); } } while (unlikely(success == 0)); return n; diff --git a/lib/ring/rte_ring_hts_elem_pvt.h b/lib/ring/rte_ring_hts_elem_pvt.h index a8678d3..91f5eec 100644 --- a/lib/ring/rte_ring_hts_elem_pvt.h +++ b/lib/ring/rte_ring_hts_elem_pvt.h @@ -10,6 +10,8 @@ #ifndef _RTE_RING_HTS_ELEM_PVT_H_ #define _RTE_RING_HTS_ELEM_PVT_H_ +#include + /** * @file rte_ring_hts_elem_pvt.h * It is not recommended to include this file directly, @@ -30,7 +32,7 @@ RTE_SET_USED(enqueue); tail = old_tail + num; - __atomic_store_n(&ht->ht.pos.tail, tail, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&ht->ht.pos.tail, tail, rte_memory_order_release); } /** @@ -44,7 +46,7 @@ { while (p->pos.head != p->pos.tail) { rte_pause(); - p->raw = __atomic_load_n(&ht->ht.raw, __ATOMIC_ACQUIRE); + p->raw = rte_atomic_load_explicit(&ht->ht.raw, rte_memory_order_acquire); } } @@ -61,7 +63,7 @@ const uint32_t capacity = r->capacity; - op.raw = __atomic_load_n(&r->hts_prod.ht.raw, __ATOMIC_ACQUIRE); + op.raw = rte_atomic_load_explicit(&r->hts_prod.ht.raw, rte_memory_order_acquire); do { /* Reset n to the initial burst count */ @@ -98,9 +100,9 @@ * - OOO reads of cons tail value * - OOO copy of elems from the ring */ - } while (__atomic_compare_exchange_n(&r->hts_prod.ht.raw, - &op.raw, np.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + } while (rte_atomic_compare_exchange_strong_explicit(&r->hts_prod.ht.raw, + (uint64_t *)(uintptr_t)&op.raw, np.raw, + rte_memory_order_acquire, rte_memory_order_acquire) == 0); *old_head = op.pos.head; return n; @@ -117,7 +119,7 @@ uint32_t n; union __rte_ring_hts_pos np, op; - op.raw = __atomic_load_n(&r->hts_cons.ht.raw, __ATOMIC_ACQUIRE); + op.raw = rte_atomic_load_explicit(&r->hts_cons.ht.raw, rte_memory_order_acquire); /* move cons.head atomically */ do { @@ -153,9 +155,9 @@ * - OOO reads of prod tail value * - OOO copy of elems from the ring */ - } while (__atomic_compare_exchange_n(&r->hts_cons.ht.raw, - &op.raw, np.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + } while (rte_atomic_compare_exchange_strong_explicit(&r->hts_cons.ht.raw, + (uint64_t *)(uintptr_t)&op.raw, np.raw, + rte_memory_order_acquire, rte_memory_order_acquire) == 0); *old_head = op.pos.head; return n; diff --git a/lib/ring/rte_ring_peek_elem_pvt.h b/lib/ring/rte_ring_peek_elem_pvt.h index bb0a7d5..b5f0822 100644 --- a/lib/ring/rte_ring_peek_elem_pvt.h +++ b/lib/ring/rte_ring_peek_elem_pvt.h @@ -59,7 +59,7 @@ pos = tail + num; ht->head = pos; - __atomic_store_n(&ht->tail, pos, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&ht->tail, pos, rte_memory_order_release); } /** @@ -78,7 +78,7 @@ uint32_t n; union __rte_ring_hts_pos p; - p.raw = __atomic_load_n(&ht->ht.raw, __ATOMIC_RELAXED); + p.raw = rte_atomic_load_explicit(&ht->ht.raw, rte_memory_order_relaxed); n = p.pos.head - p.pos.tail; RTE_ASSERT(n >= num); @@ -104,7 +104,7 @@ p.pos.head = tail + num; p.pos.tail = p.pos.head; - __atomic_store_n(&ht->ht.raw, p.raw, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&ht->ht.raw, p.raw, rte_memory_order_release); } /** diff --git a/lib/ring/rte_ring_rts_elem_pvt.h b/lib/ring/rte_ring_rts_elem_pvt.h index 7164213..1226503 100644 --- a/lib/ring/rte_ring_rts_elem_pvt.h +++ b/lib/ring/rte_ring_rts_elem_pvt.h @@ -31,18 +31,19 @@ * might preceded us, then don't update tail with new value. */ - ot.raw = __atomic_load_n(&ht->tail.raw, __ATOMIC_ACQUIRE); + ot.raw = rte_atomic_load_explicit(&ht->tail.raw, rte_memory_order_acquire); do { /* on 32-bit systems we have to do atomic read here */ - h.raw = __atomic_load_n(&ht->head.raw, __ATOMIC_RELAXED); + h.raw = rte_atomic_load_explicit(&ht->head.raw, rte_memory_order_relaxed); nt.raw = ot.raw; if (++nt.val.cnt == h.val.cnt) nt.val.pos = h.val.pos; - } while (__atomic_compare_exchange_n(&ht->tail.raw, &ot.raw, nt.raw, - 0, __ATOMIC_RELEASE, __ATOMIC_ACQUIRE) == 0); + } while (rte_atomic_compare_exchange_strong_explicit(&ht->tail.raw, + (uint64_t *)(uintptr_t)&ot.raw, nt.raw, + rte_memory_order_release, rte_memory_order_acquire) == 0); } /** @@ -59,7 +60,7 @@ while (h->val.pos - ht->tail.val.pos > max) { rte_pause(); - h->raw = __atomic_load_n(&ht->head.raw, __ATOMIC_ACQUIRE); + h->raw = rte_atomic_load_explicit(&ht->head.raw, rte_memory_order_acquire); } } @@ -76,7 +77,7 @@ const uint32_t capacity = r->capacity; - oh.raw = __atomic_load_n(&r->rts_prod.head.raw, __ATOMIC_ACQUIRE); + oh.raw = rte_atomic_load_explicit(&r->rts_prod.head.raw, rte_memory_order_acquire); do { /* Reset n to the initial burst count */ @@ -113,9 +114,9 @@ * - OOO reads of cons tail value * - OOO copy of elems to the ring */ - } while (__atomic_compare_exchange_n(&r->rts_prod.head.raw, - &oh.raw, nh.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + } while (rte_atomic_compare_exchange_strong_explicit(&r->rts_prod.head.raw, + (uint64_t *)(uintptr_t)&oh.raw, nh.raw, + rte_memory_order_acquire, rte_memory_order_acquire) == 0); *old_head = oh.val.pos; return n; @@ -132,7 +133,7 @@ uint32_t n; union __rte_ring_rts_poscnt nh, oh; - oh.raw = __atomic_load_n(&r->rts_cons.head.raw, __ATOMIC_ACQUIRE); + oh.raw = rte_atomic_load_explicit(&r->rts_cons.head.raw, rte_memory_order_acquire); /* move cons.head atomically */ do { @@ -168,9 +169,9 @@ * - OOO reads of prod tail value * - OOO copy of elems from the ring */ - } while (__atomic_compare_exchange_n(&r->rts_cons.head.raw, - &oh.raw, nh.raw, - 0, __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE) == 0); + } while (rte_atomic_compare_exchange_strong_explicit(&r->rts_cons.head.raw, + (uint64_t *)(uintptr_t)&oh.raw, nh.raw, + rte_memory_order_acquire, rte_memory_order_acquire) == 0); *old_head = oh.val.pos; return n;