From patchwork Wed Mar 27 22:37:36 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tyler Retzlaff X-Patchwork-Id: 138899 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id C84EF43D55; Wed, 27 Mar 2024 23:40:23 +0100 (CET) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id E2B4242DFD; Wed, 27 Mar 2024 23:38:34 +0100 (CET) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id AAF534161A for ; Wed, 27 Mar 2024 23:38:05 +0100 (CET) Received: by linux.microsoft.com (Postfix, from userid 1086) id 705A120E6AFE; Wed, 27 Mar 2024 15:38:00 -0700 (PDT) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 705A120E6AFE DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1711579081; bh=5J87jBEK97Q0K3S7fHZx8Q+coAj4Gma722CJzo5PJD8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=pa2Rm9mSykDhKeDSXKpcjCdsR/Z9pP0FgXVcwLFH5VK4tiEPIXgizCfg7eBhZqYnt 1Q5qnvOc/4y66jsuCqoWaCLSYq0OrRZjeLHjeSRiklPYJOUjP83AR0e+2kuFP2EuQM phqMhkYnEMuoGKC124lY5H+z844zNMZ5VFoz6dLw= From: Tyler Retzlaff To: dev@dpdk.org Cc: =?utf-8?q?Mattias_R=C3=B6nnblom?= , =?utf-8?q?Morten_Br=C3=B8rup?= , Abdullah Sevincer , Ajit Khaparde , Alok Prasad , Anatoly Burakov , Andrew Rybchenko , Anoob Joseph , Bruce Richardson , Byron Marohn , Chenbo Xia , Chengwen Feng , Ciara Loftus , Ciara Power , Dariusz Sosnowski , David Hunt , Devendra Singh Rawat , Erik Gabriel Carrillo , Guoyang Zhou , Harman Kalra , Harry van Haaren , Honnappa Nagarahalli , Jakub Grajciar , Jerin Jacob , Jeroen de Borst , Jian Wang , Jiawen Wu , Jie Hai , Jingjing Wu , Joshua Washington , Joyce Kong , Junfeng Guo , Kevin Laatz , Konstantin Ananyev , Liang Ma , Long Li , Maciej Czekaj , Matan Azrad , Maxime Coquelin , Nicolas Chautru , Ori Kam , Pavan Nikhilesh , Peter Mccarthy , Rahul Lakkireddy , Reshma Pattan , Rosen Xu , Ruifeng Wang , Rushil Gupta , Sameh Gobriel , Sivaprasad Tummala , Somnath Kotur , Stephen Hemminger , Suanming Mou , Sunil Kumar Kori , Sunil Uttarwar , Tetsuya Mukawa , Vamsi Attunuru , Viacheslav Ovsiienko , Vladimir Medvedkin , Xiaoyun Wang , Yipeng Wang , Yisen Zhuang , Yuying Zhang , Yuying Zhang , Ziyang Xuan , Tyler Retzlaff Subject: [PATCH v3 23/45] event/opdl: use rte stdatomic API Date: Wed, 27 Mar 2024 15:37:36 -0700 Message-Id: <1711579078-10624-24-git-send-email-roretzla@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1711579078-10624-1-git-send-email-roretzla@linux.microsoft.com> References: <1710967892-7046-1-git-send-email-roretzla@linux.microsoft.com> <1711579078-10624-1-git-send-email-roretzla@linux.microsoft.com> X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Replace the use of gcc builtin __atomic_xxx intrinsics with corresponding rte_atomic_xxx optional rte stdatomic API. Signed-off-by: Tyler Retzlaff Acked-by: Stephen Hemminger --- drivers/event/opdl/opdl_ring.c | 80 +++++++++++++++++++++--------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/event/opdl/opdl_ring.c b/drivers/event/opdl/opdl_ring.c index da5ea02..a86bfb8 100644 --- a/drivers/event/opdl/opdl_ring.c +++ b/drivers/event/opdl/opdl_ring.c @@ -47,12 +47,12 @@ struct shared_state { /* Last known minimum sequence number of dependencies, used for multi * thread operation */ - uint32_t available_seq; + RTE_ATOMIC(uint32_t) available_seq; char _pad1[RTE_CACHE_LINE_SIZE * 3]; - uint32_t head; /* Head sequence number (for multi thread operation) */ + RTE_ATOMIC(uint32_t) head; /* Head sequence number (for multi thread operation) */ char _pad2[RTE_CACHE_LINE_SIZE * 3]; struct opdl_stage *stage; /* back pointer */ - uint32_t tail; /* Tail sequence number */ + RTE_ATOMIC(uint32_t) tail; /* Tail sequence number */ char _pad3[RTE_CACHE_LINE_SIZE * 2]; } __rte_cache_aligned; @@ -150,10 +150,10 @@ struct opdl_ring { available(const struct opdl_stage *s) { if (s->threadsafe == true) { - uint32_t n = __atomic_load_n(&s->shared.available_seq, - __ATOMIC_ACQUIRE) - - __atomic_load_n(&s->shared.head, - __ATOMIC_ACQUIRE); + uint32_t n = rte_atomic_load_explicit(&s->shared.available_seq, + rte_memory_order_acquire) - + rte_atomic_load_explicit(&s->shared.head, + rte_memory_order_acquire); /* Return 0 if available_seq needs to be updated */ return (n <= s->num_slots) ? n : 0; @@ -169,7 +169,7 @@ struct opdl_ring { { uint32_t i; uint32_t this_tail = s->shared.tail; - uint32_t min_seq = __atomic_load_n(&s->deps[0]->tail, __ATOMIC_ACQUIRE); + uint32_t min_seq = rte_atomic_load_explicit(&s->deps[0]->tail, rte_memory_order_acquire); /* Input stage sequence numbers are greater than the sequence numbers of * its dependencies so an offset of t->num_slots is needed when * calculating available slots and also the condition which is used to @@ -180,16 +180,16 @@ struct opdl_ring { if (is_input_stage(s)) { wrap = s->num_slots; for (i = 1; i < s->num_deps; i++) { - uint32_t seq = __atomic_load_n(&s->deps[i]->tail, - __ATOMIC_ACQUIRE); + uint32_t seq = rte_atomic_load_explicit(&s->deps[i]->tail, + rte_memory_order_acquire); if ((this_tail - seq) > (this_tail - min_seq)) min_seq = seq; } } else { wrap = 0; for (i = 1; i < s->num_deps; i++) { - uint32_t seq = __atomic_load_n(&s->deps[i]->tail, - __ATOMIC_ACQUIRE); + uint32_t seq = rte_atomic_load_explicit(&s->deps[i]->tail, + rte_memory_order_acquire); if ((seq - this_tail) < (min_seq - this_tail)) min_seq = seq; } @@ -198,8 +198,8 @@ struct opdl_ring { if (s->threadsafe == false) s->available_seq = min_seq + wrap; else - __atomic_store_n(&s->shared.available_seq, min_seq + wrap, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.available_seq, min_seq + wrap, + rte_memory_order_release); } /* Wait until the number of available slots reaches number requested */ @@ -299,7 +299,7 @@ struct opdl_ring { copy_entries_in(t, head, entries, num_entries); s->head += num_entries; - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); return num_entries; } @@ -382,18 +382,18 @@ struct opdl_ring { /* There should be no race condition here. If shared.tail * matches, no other core can update it until this one does. */ - if (__atomic_load_n(&s->shared.tail, __ATOMIC_ACQUIRE) == + if (rte_atomic_load_explicit(&s->shared.tail, rte_memory_order_acquire) == tail) { if (num_entries >= (head - tail)) { claim_mgr_remove(disclaims); - __atomic_store_n(&s->shared.tail, head, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, head, + rte_memory_order_release); num_entries -= (head - tail); } else { claim_mgr_move_tail(disclaims, num_entries); - __atomic_store_n(&s->shared.tail, + rte_atomic_store_explicit(&s->shared.tail, num_entries + tail, - __ATOMIC_RELEASE); + rte_memory_order_release); num_entries = 0; } } else if (block == false) @@ -421,7 +421,7 @@ struct opdl_ring { opdl_stage_disclaim_multithread_n(s, disclaims->num_to_disclaim, false); - *old_head = __atomic_load_n(&s->shared.head, __ATOMIC_ACQUIRE); + *old_head = rte_atomic_load_explicit(&s->shared.head, rte_memory_order_acquire); while (true) { bool success; /* If called by opdl_ring_input(), claim does not need to be @@ -441,11 +441,10 @@ struct opdl_ring { if (*num_entries == 0) return; - success = __atomic_compare_exchange_n(&s->shared.head, old_head, + success = rte_atomic_compare_exchange_weak_explicit(&s->shared.head, old_head, *old_head + *num_entries, - true, /* may fail spuriously */ - __ATOMIC_RELEASE, /* memory order on success */ - __ATOMIC_ACQUIRE); /* memory order on fail */ + rte_memory_order_release, /* memory order on success */ + rte_memory_order_acquire); /* memory order on fail */ if (likely(success)) break; rte_pause(); @@ -473,10 +472,11 @@ struct opdl_ring { /* If another thread started inputting before this one, but hasn't * finished, we need to wait for it to complete to update the tail. */ - rte_wait_until_equal_32(&s->shared.tail, old_head, __ATOMIC_ACQUIRE); + rte_wait_until_equal_32((uint32_t *)(uintptr_t)&s->shared.tail, old_head, + rte_memory_order_acquire); - __atomic_store_n(&s->shared.tail, old_head + num_entries, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, old_head + num_entries, + rte_memory_order_release); return num_entries; } @@ -526,8 +526,8 @@ struct opdl_ring { for (j = 0; j < num_entries; j++) { ev = (struct rte_event *)get_slot(t, s->head+j); - event = __atomic_load_n(&(ev->event), - __ATOMIC_ACQUIRE); + event = rte_atomic_load_explicit((uint64_t __rte_atomic *)&ev->event, + rte_memory_order_acquire); opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET); flow_id = OPDL_FLOWID_MASK & event; @@ -628,8 +628,8 @@ struct opdl_ring { num_entries, s->head - old_tail); num_entries = s->head - old_tail; } - __atomic_store_n(&s->shared.tail, num_entries + old_tail, - __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, num_entries + old_tail, + rte_memory_order_release); } uint32_t @@ -658,7 +658,7 @@ struct opdl_ring { copy_entries_in(t, head, entries, num_entries); s->head += num_entries; - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); return num_entries; @@ -677,7 +677,7 @@ struct opdl_ring { copy_entries_out(t, head, entries, num_entries); s->head += num_entries; - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); return num_entries; } @@ -756,7 +756,7 @@ struct opdl_ring { return 0; } if (s->threadsafe == false) { - __atomic_store_n(&s->shared.tail, s->head, __ATOMIC_RELEASE); + rte_atomic_store_explicit(&s->shared.tail, s->head, rte_memory_order_release); s->seq += s->num_claimed; s->shadow_head = s->head; s->num_claimed = 0; @@ -1009,8 +1009,8 @@ struct opdl_ring * ev_orig = (struct rte_event *) get_slot(t, s->shadow_head+i); - event = __atomic_load_n(&(ev_orig->event), - __ATOMIC_ACQUIRE); + event = rte_atomic_load_explicit((uint64_t __rte_atomic *)&ev_orig->event, + rte_memory_order_acquire); opa_id = OPDL_OPA_MASK & (event >> OPDL_OPA_OFFSET); flow_id = OPDL_FLOWID_MASK & event; @@ -1027,9 +1027,9 @@ struct opdl_ring * if ((event & OPDL_EVENT_MASK) != ev_temp) { - __atomic_store_n(&(ev_orig->event), - ev_update, - __ATOMIC_RELEASE); + rte_atomic_store_explicit( + (uint64_t __rte_atomic *)&ev_orig->event, + ev_update, rte_memory_order_release); ev_updated = true; } if (ev_orig->u64 != ev->u64) {