From patchwork Thu Sep 24 08:18:30 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Radu Nicolau X-Patchwork-Id: 78654 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 55C35A04B1; Thu, 24 Sep 2020 10:19:34 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id AD6D11DD8C; Thu, 24 Sep 2020 10:19:27 +0200 (CEST) Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by dpdk.org (Postfix) with ESMTP id 27CBE1DD60 for ; Thu, 24 Sep 2020 10:19:22 +0200 (CEST) IronPort-SDR: r6/NbJ0N64XFfxznUU6et8vuYRWiY75DuXr1cAx5fiXNqPIUZskaapWyFJJeUeOZkZxAjoNZQK wX9CDgSsJsFA== X-IronPort-AV: E=McAfee;i="6000,8403,9753"; a="148790621" X-IronPort-AV: E=Sophos;i="5.77,296,1596524400"; d="scan'208";a="148790621" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Sep 2020 01:19:21 -0700 IronPort-SDR: XKEt8c5JLZKjbpCAcs8yPF0oB/3az8jdF+TmN8KPfimA7D6duWCJtnPoHXtpE2u73kWHYE1yQa t4iVZRMB/NIw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.77,296,1596524400"; d="scan'208";a="455254993" Received: from silpixa00399477.ir.intel.com ([10.237.214.232]) by orsmga004.jf.intel.com with ESMTP; 24 Sep 2020 01:19:19 -0700 From: Radu Nicolau To: dev@dpdk.org Cc: thomas@monjalon.net, david.marchand@redhat.com, viktorin@rehivetech.com, ruifeng.wang@arm.com, jerinj@marvell.com, drc@linux.vnet.ibm.com, bruce.richardson@intel.com, konstantin.ananyev@intel.com, Radu Nicolau , Sean Morrissey Date: Thu, 24 Sep 2020 08:18:30 +0000 Message-Id: <20200924081832.21581-3-radu.nicolau@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200924081832.21581-1-radu.nicolau@intel.com> References: <20200902104343.31774-2-radu.nicolau@intel.com> <20200924081832.21581-1-radu.nicolau@intel.com> Subject: [dpdk-dev] [PATCH v3 2/4] arm: change cpuflag macros to compiler macros X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Replace use of RTE_MACHINE_CPUFLAG macros with regular compiler macros, which are more complete than those provided by DPDK, and as such it allows new instruction sets to be leveraged without having to do extra work to set them up in DPDK. Signed-off-by: Sean Morrissey Signed-off-by: Radu Nicolau --- app/test-pmd/macswap.c | 2 +- config/arm/meson.build | 6 ------ drivers/net/ixgbe/ixgbe_ethdev.c | 2 +- examples/l3fwd/l3fwd.h | 2 +- examples/l3fwd/l3fwd_em.c | 12 ++++++------ examples/l3fwd/l3fwd_em_hlm.h | 2 +- examples/l3fwd/l3fwd_em_sequential.h | 2 +- examples/l3fwd/l3fwd_lpm.c | 6 +++--- lib/librte_eal/arm/include/rte_memcpy_32.h | 2 +- lib/librte_hash/rte_cuckoo_hash.c | 2 +- lib/librte_hash/rte_hash_crc.h | 2 +- lib/librte_hash/rte_thash.h | 4 ++-- lib/librte_member/rte_member.h | 2 +- lib/librte_net/rte_net_crc.c | 2 +- lib/librte_node/ip4_lookup.c | 2 +- lib/librte_sched/rte_sched.c | 2 +- lib/librte_table/rte_lru_arm64.h | 2 +- lib/librte_table/rte_table_hash_func.h | 2 +- 18 files changed, 25 insertions(+), 31 deletions(-) diff --git a/app/test-pmd/macswap.c b/app/test-pmd/macswap.c index 74e2dd838..310bca06a 100644 --- a/app/test-pmd/macswap.c +++ b/app/test-pmd/macswap.c @@ -39,7 +39,7 @@ #include "testpmd.h" #if defined(RTE_ARCH_X86) #include "macswap_sse.h" -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) #include "macswap_neon.h" #else #include "macswap.h" diff --git a/config/arm/meson.build b/config/arm/meson.build index 8728051d5..42c0c34a5 100644 --- a/config/arm/meson.build +++ b/config/arm/meson.build @@ -208,20 +208,14 @@ message(machine_args) if (cc.get_define('__ARM_NEON', args: machine_args) != '' or cc.get_define('__aarch64__', args: machine_args) != '') - dpdk_conf.set('RTE_MACHINE_CPUFLAG_NEON', 1) compile_time_cpuflags += ['RTE_CPUFLAG_NEON'] endif if cc.get_define('__ARM_FEATURE_CRC32', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_CRC32', 1) compile_time_cpuflags += ['RTE_CPUFLAG_CRC32'] endif if cc.get_define('__ARM_FEATURE_CRYPTO', args: machine_args) != '' - dpdk_conf.set('RTE_MACHINE_CPUFLAG_AES', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_PMULL', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA1', 1) - dpdk_conf.set('RTE_MACHINE_CPUFLAG_SHA2', 1) compile_time_cpuflags += ['RTE_CPUFLAG_AES', 'RTE_CPUFLAG_PMULL', 'RTE_CPUFLAG_SHA1', 'RTE_CPUFLAG_SHA2'] endif diff --git a/drivers/net/ixgbe/ixgbe_ethdev.c b/drivers/net/ixgbe/ixgbe_ethdev.c index 0f065bbc0..c74467e06 100644 --- a/drivers/net/ixgbe/ixgbe_ethdev.c +++ b/drivers/net/ixgbe/ixgbe_ethdev.c @@ -3960,7 +3960,7 @@ ixgbe_dev_supported_ptypes_get(struct rte_eth_dev *dev) dev->rx_pkt_burst == ixgbe_recv_pkts_bulk_alloc) return ptypes; -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) if (dev->rx_pkt_burst == ixgbe_recv_pkts_vec || dev->rx_pkt_burst == ixgbe_recv_scattered_pkts_vec) return ptypes; diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h index 67055431f..2cf06099e 100644 --- a/examples/l3fwd/l3fwd.h +++ b/examples/l3fwd/l3fwd.h @@ -12,7 +12,7 @@ #define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1 -#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON) +#if !defined(NO_HASH_MULTI_LOOKUP) && defined(__ARM_NEON) #define NO_HASH_MULTI_LOOKUP 1 #endif diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c index df0c8dd16..3b35fa3e5 100644 --- a/examples/l3fwd/l3fwd_em.c +++ b/examples/l3fwd/l3fwd_em.c @@ -28,7 +28,7 @@ #include "l3fwd.h" #include "l3fwd_event.h" -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #define EM_HASH_CRC 1 #endif @@ -223,7 +223,7 @@ em_mask_key(void *key, xmm_t mask) return _mm_and_si128(data, mask); } -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) static inline xmm_t em_mask_key(void *key, xmm_t mask) { @@ -303,7 +303,7 @@ em_get_ipv6_dst_port(void *ipv6_hdr, uint16_t portid, void *lookup_struct) return (ret < 0) ? portid : ipv6_l3fwd_out_if[ret]; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON #if defined(NO_HASH_MULTI_LOOKUP) #include "l3fwd_em_sequential.h" #else @@ -685,7 +685,7 @@ em_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_send_packets(nb_rx, pkts_burst, portid, qconf); #else @@ -723,7 +723,7 @@ em_event_loop_single(struct l3fwd_event_resources *evt_rsrc, struct rte_mbuf *mbuf = ev.mbuf; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON mbuf->port = em_get_dst_port(lconf, mbuf, mbuf->port); process_packet(mbuf, &mbuf->port); #else @@ -784,7 +784,7 @@ em_event_loop_burst(struct l3fwd_event_resources *evt_rsrc, continue; } -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON +#if defined RTE_ARCH_X86 || defined __ARM_NEON l3fwd_em_process_events(nb_deq, (struct rte_event **)&events, lconf); #else diff --git a/examples/l3fwd/l3fwd_em_hlm.h b/examples/l3fwd/l3fwd_em_hlm.h index 79812716c..278707c18 100644 --- a/examples/l3fwd/l3fwd_em_hlm.h +++ b/examples/l3fwd/l3fwd_em_hlm.h @@ -9,7 +9,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" #include "l3fwd_em_hlm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #include "l3fwd_em_hlm_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_em_sequential.h b/examples/l3fwd/l3fwd_em_sequential.h index b231b9994..6170052cf 100644 --- a/examples/l3fwd/l3fwd_em_sequential.h +++ b/examples/l3fwd/l3fwd_em_sequential.h @@ -16,7 +16,7 @@ #if defined RTE_ARCH_X86 #include "l3fwd_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_neon.h" #endif diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c index 91eb74272..3dcf1fef1 100644 --- a/examples/l3fwd/l3fwd_lpm.c +++ b/examples/l3fwd/l3fwd_lpm.c @@ -163,7 +163,7 @@ lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt, #if defined(RTE_ARCH_X86) #include "l3fwd_lpm_sse.h" -#elif defined RTE_MACHINE_CPUFLAG_NEON +#elif defined __ARM_NEON #include "l3fwd_lpm_neon.h" #elif defined(RTE_ARCH_PPC_64) #include "l3fwd_lpm_altivec.h" @@ -240,7 +240,7 @@ lpm_main_loop(__rte_unused void *dummy) if (nb_rx == 0) continue; -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 l3fwd_lpm_send_packets(nb_rx, pkts_burst, portid, qconf); @@ -259,7 +259,7 @@ lpm_process_event_pkt(const struct lcore_conf *lconf, struct rte_mbuf *mbuf) { mbuf->port = lpm_get_dst_port(lconf, mbuf, mbuf->port); -#if defined RTE_ARCH_X86 || defined RTE_MACHINE_CPUFLAG_NEON \ +#if defined RTE_ARCH_X86 || defined __ARM_NEON \ || defined RTE_ARCH_PPC_64 process_packet(mbuf, &mbuf->port); #else diff --git a/lib/librte_eal/arm/include/rte_memcpy_32.h b/lib/librte_eal/arm/include/rte_memcpy_32.h index eb02c3b41..fb3245b59 100644 --- a/lib/librte_eal/arm/include/rte_memcpy_32.h +++ b/lib/librte_eal/arm/include/rte_memcpy_32.h @@ -16,7 +16,7 @@ extern "C" { #ifdef RTE_ARCH_ARM_NEON_MEMCPY -#ifndef RTE_MACHINE_CPUFLAG_NEON +#ifndef __ARM_NEON #error "Cannot optimize memcpy by NEON as the CPU seems to not support this" #endif diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c index 7c7ab84af..aad0c965b 100644 --- a/lib/librte_hash/rte_cuckoo_hash.c +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -1704,7 +1704,7 @@ compare_signatures(uint32_t *prim_hash_matches, uint32_t *sec_hash_matches, (__m128i const *)sec_bkt->sig_current), _mm_set1_epi16(sig))); break; -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) case RTE_HASH_COMPARE_NEON: { uint16x8_t vmat, vsig, x; int16x8_t shift = {-15, -13, -11, -9, -7, -5, -3, -1}; diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h index cf28031b3..3e131aa6b 100644 --- a/lib/librte_hash/rte_hash_crc.h +++ b/lib/librte_hash/rte_hash_crc.h @@ -424,7 +424,7 @@ crc32c_sse42_u64(uint64_t data, uint64_t init_val) static uint8_t crc32_alg = CRC32_SW; -#if defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_crc_arm64.h" #else diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h index 51b512946..061efa2ae 100644 --- a/lib/librte_hash/rte_thash.h +++ b/lib/librte_hash/rte_thash.h @@ -28,7 +28,7 @@ extern "C" { #include #include -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(RTE_ARCH_X86) || defined(__ARM_NEON) #include #endif @@ -149,7 +149,7 @@ rte_thash_load_v6_addrs(const struct rte_ipv6_hdr *orig, ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); *(__m128i *)targ->v6.dst_addr = _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) uint8x16_t ipv6 = vld1q_u8((uint8_t const *)orig->src_addr); vst1q_u8((uint8_t *)targ->v6.src_addr, vrev32q_u8(ipv6)); ipv6 = vld1q_u8((uint8_t const *)orig->dst_addr); diff --git a/lib/librte_member/rte_member.h b/lib/librte_member/rte_member.h index ab2b23217..c0689e233 100644 --- a/lib/librte_member/rte_member.h +++ b/lib/librte_member/rte_member.h @@ -68,7 +68,7 @@ typedef uint16_t member_set_t; #define RTE_MEMBER_NAMESIZE 32 /** @internal Hash function used by membership library. */ -#if defined(RTE_ARCH_X86) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#if defined(RTE_ARCH_X86) || defined(__ARM_FEATURE_CRC32) #include #define MEMBER_HASH_FUNC rte_hash_crc #else diff --git a/lib/librte_net/rte_net_crc.c b/lib/librte_net/rte_net_crc.c index 56a0ed129..4f5b9e828 100644 --- a/lib/librte_net/rte_net_crc.c +++ b/lib/librte_net/rte_net_crc.c @@ -12,7 +12,7 @@ #if defined(RTE_ARCH_X86_64) && defined(__PCLMUL__) #define X86_64_SSE42_PCLMULQDQ 1 -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_PMULL) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRYPTO) #define ARM64_NEON_PMULL 1 #endif diff --git a/lib/librte_node/ip4_lookup.c b/lib/librte_node/ip4_lookup.c index 8e6379457..293c77f39 100644 --- a/lib/librte_node/ip4_lookup.c +++ b/lib/librte_node/ip4_lookup.c @@ -30,7 +30,7 @@ struct ip4_lookup_node_main { static struct ip4_lookup_node_main ip4_lookup_nm; -#if defined(RTE_MACHINE_CPUFLAG_NEON) +#if defined(__ARM_NEON) #include "ip4_lookup_neon.h" #elif defined(RTE_ARCH_X86) #include "ip4_lookup_sse.h" diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c index 0fa074166..75be8b6bd 100644 --- a/lib/librte_sched/rte_sched.c +++ b/lib/librte_sched/rte_sched.c @@ -29,7 +29,7 @@ #ifdef RTE_ARCH_X86 #define SCHED_VECTOR_SSE4 -#elif defined(RTE_MACHINE_CPUFLAG_NEON) +#elif defined(__ARM_NEON) #define SCHED_VECTOR_NEON #endif diff --git a/lib/librte_table/rte_lru_arm64.h b/lib/librte_table/rte_lru_arm64.h index b45e9d03c..add889a57 100644 --- a/lib/librte_table/rte_lru_arm64.h +++ b/lib/librte_table/rte_lru_arm64.h @@ -13,7 +13,7 @@ extern "C" { #include #ifndef RTE_TABLE_HASH_LRU_STRATEGY -#ifdef RTE_MACHINE_CPUFLAG_NEON +#ifdef __ARM_NEON #define RTE_TABLE_HASH_LRU_STRATEGY 3 #else /* if no NEON, use simple scalar version */ #define RTE_TABLE_HASH_LRU_STRATEGY 1 diff --git a/lib/librte_table/rte_table_hash_func.h b/lib/librte_table/rte_table_hash_func.h index 350c79564..c4c35cc06 100644 --- a/lib/librte_table/rte_table_hash_func.h +++ b/lib/librte_table/rte_table_hash_func.h @@ -41,7 +41,7 @@ rte_crc32_u64(uint64_t crc, uint64_t v) return _mm_crc32_u64(crc, v); } -#elif defined(RTE_ARCH_ARM64) && defined(RTE_MACHINE_CPUFLAG_CRC32) +#elif defined(RTE_ARCH_ARM64) && defined(__ARM_FEATURE_CRC32) #include "rte_table_hash_func_arm64.h" #else