[dpdk-dev] use generated flags for SSE and AVX checks

Message ID 1480952058-13591-1-git-send-email-thomas.monjalon@6wind.com (mailing list archive)
State Rejected, archived
Delegated to: Thomas Monjalon
Headers

Checks

Context Check Description
checkpatch/checkpatch success coding style OK

Commit Message

Thomas Monjalon Dec. 5, 2016, 3:34 p.m. UTC
  Clean up the code to always use the flags RTE_MACHINE_CPUFLAG_*
generated by the DPDK makefile rte.cpuflags.mk.

Signed-off-by: Thomas Monjalon <thomas.monjalon@6wind.com>
---
 examples/l3fwd/l3fwd_em.c                         |  8 ++++----
 examples/l3fwd/l3fwd_lpm.c                        |  6 +++---
 examples/performance-thread/l3fwd-thread/main.c   |  2 +-
 lib/librte_eal/common/include/arch/x86/rte_vect.h | 14 +++++++-------
 lib/librte_eal/common/include/rte_common.h        |  2 +-
 lib/librte_hash/rte_thash.h                       |  8 +++-----
 lib/librte_sched/rte_sched.c                      |  2 +-
 lib/librte_table/rte_lru.h                        |  2 +-
 8 files changed, 21 insertions(+), 23 deletions(-)
  

Comments

Thomas Monjalon Dec. 5, 2016, 3:59 p.m. UTC | #1
2016-12-05 16:34, Thomas Monjalon:
> Clean up the code to always use the flags RTE_MACHINE_CPUFLAG_*
> generated by the DPDK makefile rte.cpuflags.mk.

This patch does not work because RTE_MACHINE_CPUFLAG_* are generated
for the whole library when including rte.vars.mk.
So the flags are not accurate when overriding the flags per file like
it is done in rte_acl.

So the questions are:
	- should we use RTE_MACHINE_CPUFLAG_?
	- should we override the flags per file?
	- will we be able to use the function attribute __target__?
  
Ferruh Yigit Jan. 15, 2019, 6:23 p.m. UTC | #2
On 12/5/2016 3:59 PM, thomas.monjalon at 6wind.com (Thomas Monjalon) wrote:
> 2016-12-05 16:34, Thomas Monjalon:
>> Clean up the code to always use the flags RTE_MACHINE_CPUFLAG_*
>> generated by the DPDK makefile rte.cpuflags.mk.
> 
> This patch does not work because RTE_MACHINE_CPUFLAG_* are generated
> for the whole library when including rte.vars.mk.
> So the flags are not accurate when overriding the flags per file like
> it is done in rte_acl.
> 
> So the questions are:
> 	- should we use RTE_MACHINE_CPUFLAG_?
> 	- should we override the flags per file?
> 	- will we be able to use the function attribute __target__?
> 

Hi Thomas,

This patch is waiting for a comment for two years, is it still valid, if not can
we mark it as rejected?

For record, it is: https://patches.dpdk.org/patch/17684/

Thanks,
ferruh
  

Patch

diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
index 9cc4460..6714430 100644
--- a/examples/l3fwd/l3fwd_em.c
+++ b/examples/l3fwd/l3fwd_em.c
@@ -246,7 +246,7 @@  static rte_xmm_t mask0;
 static rte_xmm_t mask1;
 static rte_xmm_t mask2;
 
-#if defined(__SSE2__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE2)
 static inline xmm_t
 em_mask_key(void *key, xmm_t mask)
 {
@@ -328,7 +328,7 @@  em_get_ipv6_dst_port(void *ipv6_hdr,  uint8_t portid, void *lookup_struct)
 	return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
 }
 
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
 #if defined(NO_HASH_MULTI_LOOKUP)
 #include "l3fwd_em_sse.h"
 #else
@@ -709,13 +709,13 @@  em_main_loop(__attribute__((unused)) void *dummy)
 			if (nb_rx == 0)
 				continue;
 
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
 			l3fwd_em_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
 #else
 			l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
-#endif /* __SSE_4_1__ */
+#endif /* SSE_4_1 */
 		}
 	}
 
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
index f621269..005534d 100644
--- a/examples/l3fwd/l3fwd_lpm.c
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -104,7 +104,7 @@  static struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = {
 struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS];
 struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS];
 
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
 #include "l3fwd_lpm_sse.h"
 #else
 #include "l3fwd_lpm.h"
@@ -178,13 +178,13 @@  lpm_main_loop(__attribute__((unused)) void *dummy)
 			if (nb_rx == 0)
 				continue;
 
-#if defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_1)
 			l3fwd_lpm_send_packets(nb_rx, pkts_burst,
 						portid, qconf);
 #else
 			l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
 							portid, qconf);
-#endif /* __SSE_4_1__ */
+#endif /* SSE_4_1 */
 		}
 	}
 
diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c
index fdc90b2..0917aa1 100644
--- a/examples/performance-thread/l3fwd-thread/main.c
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -95,7 +95,7 @@ 
  *  When set to one, optimized forwarding path is enabled.
  *  Note that LPM optimisation path uses SSE4.1 instructions.
  */
-#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__))
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(RTE_MACHINE_CPUFLAG_SSE4_1))
 #define ENABLE_MULTI_BUFFER_OPTIMIZE	0
 #else
 #define ENABLE_MULTI_BUFFER_OPTIMIZE	1
diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h
index 77f2e25..56b53b7 100644
--- a/lib/librte_eal/common/include/arch/x86/rte_vect.h
+++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h
@@ -44,23 +44,23 @@ 
 
 #if (defined(__ICC) || (__GNUC__ == 4 &&  __GNUC_MINOR__ < 4))
 
-#ifdef __SSE__
+#ifdef RTE_MACHINE_CPUFLAG_SSE
 #include <xmmintrin.h>
 #endif
 
-#ifdef __SSE2__
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
 #include <emmintrin.h>
 #endif
 
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
 #include <tmmintrin.h>
 #endif
 
-#if defined(__SSE4_2__) || defined(__SSE4_1__)
+#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_SSE4_1)
 #include <smmintrin.h>
 #endif
 
-#if defined(__AVX__)
+#if defined(RTE_MACHINE_CPUFLAG_AVX)
 #include <immintrin.h>
 #endif
 
@@ -88,7 +88,7 @@  typedef union rte_xmm {
 	double   pd[XMM_SIZE / sizeof(double)];
 } rte_xmm_t;
 
-#ifdef __AVX__
+#ifdef RTE_MACHINE_CPUFLAG_AVX
 
 typedef __m256i ymm_t;
 
@@ -105,7 +105,7 @@  typedef union rte_ymm {
 	double   pd[YMM_SIZE / sizeof(double)];
 } rte_ymm_t;
 
-#endif /* __AVX__ */
+#endif /* AVX */
 
 #ifdef RTE_ARCH_I686
 #define _mm_cvtsi128_si64(a)    \
diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h
index db5ac91..bc0f4cd 100644
--- a/lib/librte_eal/common/include/rte_common.h
+++ b/lib/librte_eal/common/include/rte_common.h
@@ -294,7 +294,7 @@  rte_align64pow2(uint64_t v)
 
 /*********** Other general functions / macros ********/
 
-#ifdef __SSE2__
+#ifdef RTE_MACHINE_CPUFLAG_SSE2
 #include <emmintrin.h>
 /**
  * PAUSE instruction for tight loops (avoid busy waiting)
diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h
index a4886a8..9a352bd 100644
--- a/lib/librte_hash/rte_thash.h
+++ b/lib/librte_hash/rte_thash.h
@@ -56,11 +56,9 @@  extern "C" {
 #include <rte_ip.h>
 #include <rte_common.h>
 
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
 #include <rte_vect.h>
-#endif
 
-#ifdef __SSE3__
 /* Byte swap mask used for converting IPv6 address
  * 4-byte chunks to CPU byte order
  */
@@ -134,7 +132,7 @@  struct rte_ipv6_tuple {
 union rte_thash_tuple {
 	struct rte_ipv4_tuple	v4;
 	struct rte_ipv6_tuple	v6;
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
 } __attribute__((aligned(XMM_SIZE)));
 #else
 };
@@ -169,7 +167,7 @@  rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len)
 static inline void
 rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ)
 {
-#ifdef __SSE3__
+#ifdef RTE_MACHINE_CPUFLAG_SSE3
 	__m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr);
 	*(__m128i *)targ->v6.src_addr =
 			_mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask);
diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c
index e6dace2..c593363 100644
--- a/lib/librte_sched/rte_sched.c
+++ b/lib/librte_sched/rte_sched.c
@@ -56,7 +56,7 @@ 
 #ifdef RTE_SCHED_VECTOR
 #include <rte_vect.h>
 
-#if defined(__SSE4__)
+#ifdef RTE_MACHINE_CPUFLAG_SSE4
 #define SCHED_VECTOR_SSE4
 #endif
 
diff --git a/lib/librte_table/rte_lru.h b/lib/librte_table/rte_lru.h
index e87e062..3d677c8 100644
--- a/lib/librte_table/rte_lru.h
+++ b/lib/librte_table/rte_lru.h
@@ -47,7 +47,7 @@  extern "C" {
 #endif
 
 #ifndef RTE_TABLE_HASH_LRU_STRATEGY
-#ifdef __SSE4_2__
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
 #define RTE_TABLE_HASH_LRU_STRATEGY                        2
 #else /* if no SSE, use simple scalar version */
 #define RTE_TABLE_HASH_LRU_STRATEGY                        1