[09/10] test: add workaround for __builtin_constant_p in test_memcpy_perf

Message ID 1739311325-14425-10-git-send-email-andremue@linux.microsoft.com (mailing list archive)
State Not Applicable
Headers
Series enable "app" to be compiled with MSVC |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/intel-Functional success Functional PASS

Commit Message

Andre Muezerie Feb. 11, 2025, 10:02 p.m. UTC
There's no MSVC equivalent for compiler extension __builtin_constant_p,
so a workaround is needed.

Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>
---
 app/test/test_memcpy_perf.c | 106 ++++++++++++++++++------------------
 1 file changed, 53 insertions(+), 53 deletions(-)
  

Comments

Stephen Hemminger Feb. 11, 2025, 10:13 p.m. UTC | #1
On Tue, 11 Feb 2025 14:02:05 -0800
Andre Muezerie <andremue@linux.microsoft.com> wrote:

> There's no MSVC equivalent for compiler extension __builtin_constant_p,
> so a workaround is needed.
> 
> Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>

Prefer that __rte_constant worked on all platforms,
but template code is hard to maintain.
  
Andre Muezerie Feb. 12, 2025, 2:07 a.m. UTC | #2
On Tue, Feb 11, 2025 at 02:13:05PM -0800, Stephen Hemminger wrote:
> On Tue, 11 Feb 2025 14:02:05 -0800
> Andre Muezerie <andremue@linux.microsoft.com> wrote:
> 
> > There's no MSVC equivalent for compiler extension __builtin_constant_p,
> > so a workaround is needed.
> > 
> > Signed-off-by: Andre Muezerie <andremue@linux.microsoft.com>
> 
> Prefer that __rte_constant worked on all platforms,
> but template code is hard to maintain.

I'm not a huge fan of __rte_constant_with_default either. Here are some thoughts about it:

In test_memcpy_perf we could get rid of __builtin_constant_p or similar macros and just use the second argument (def) I'm passing to __rte_constant_with_default as the condition in the if() statement. I only used __rte_constant_with_default to keep the source code practically the same when using non-msvc compilers. But I don't see advantages beyond that, other than not having to pass that second parameter (def), which now we have to anyways.

__rte_constant_with_default is needed though in the first patch of this series, unless we decide to remove that extra check where it is being used.
  

Patch

diff --git a/app/test/test_memcpy_perf.c b/app/test/test_memcpy_perf.c
index 5c05a84619..6091b6f9dd 100644
--- a/app/test/test_memcpy_perf.c
+++ b/app/test/test_memcpy_perf.c
@@ -167,66 +167,66 @@  do_uncached_write(uint8_t *dst, int is_dst_cached,
  * Run a single memcpy performance test. This is a macro to ensure that if
  * the "size" parameter is a constant it won't be converted to a variable.
  */
-#define SINGLE_PERF_TEST(dst, is_dst_cached, dst_uoffset,                   \
-                         src, is_src_cached, src_uoffset, size)             \
-do {                                                                        \
-    unsigned int iter, t;                                                   \
-    size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE];          \
-    uint64_t start_time, total_time = 0;                                    \
-    uint64_t total_time2 = 0;                                               \
-    for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {    \
-        fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset,             \
-                         src_addrs, is_src_cached, src_uoffset);            \
-        start_time = rte_rdtsc();                                           \
-        for (t = 0; t < TEST_BATCH_SIZE; t++)                               \
-            rte_memcpy(dst+dst_addrs[t], src+src_addrs[t], size);           \
-        total_time += rte_rdtsc() - start_time;                             \
-    }                                                                       \
-    for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {    \
-        fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset,             \
-                         src_addrs, is_src_cached, src_uoffset);            \
-        start_time = rte_rdtsc();                                           \
-        for (t = 0; t < TEST_BATCH_SIZE; t++)                               \
-            memcpy(dst+dst_addrs[t], src+src_addrs[t], size);               \
-        total_time2 += rte_rdtsc() - start_time;                            \
-    }                                                                       \
-    printf("%3.0f -", (double)total_time  / TEST_ITERATIONS);                 \
-    printf("%3.0f",   (double)total_time2 / TEST_ITERATIONS);                 \
-    printf("(%6.2f%%) ", ((double)total_time - total_time2)*100/total_time2); \
+#define SINGLE_PERF_TEST(dst, is_dst_cached, dst_uoffset,                         \
+			 src, is_src_cached, src_uoffset, size)                   \
+do {                                                                              \
+	unsigned int iter, t;                                                     \
+	size_t dst_addrs[TEST_BATCH_SIZE], src_addrs[TEST_BATCH_SIZE];            \
+	uint64_t start_time, total_time = 0;                                      \
+	uint64_t total_time2 = 0;                                                 \
+	for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {      \
+		fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset,           \
+				 src_addrs, is_src_cached, src_uoffset);          \
+		start_time = rte_rdtsc();                                         \
+		for (t = 0; t < TEST_BATCH_SIZE; t++)                             \
+			rte_memcpy(dst+dst_addrs[t], src+src_addrs[t], size);     \
+		total_time += rte_rdtsc() - start_time;                           \
+	}                                                                         \
+	for (iter = 0; iter < (TEST_ITERATIONS / TEST_BATCH_SIZE); iter++) {      \
+		fill_addr_arrays(dst_addrs, is_dst_cached, dst_uoffset,           \
+				 src_addrs, is_src_cached, src_uoffset);          \
+		start_time = rte_rdtsc();                                         \
+		for (t = 0; t < TEST_BATCH_SIZE; t++)                             \
+			memcpy(dst+dst_addrs[t], src+src_addrs[t], size);         \
+		total_time2 += rte_rdtsc() - start_time;                          \
+	}                                                                         \
+	printf("%3.0f -", (double)total_time  / TEST_ITERATIONS);                 \
+	printf("%3.0f",   (double)total_time2 / TEST_ITERATIONS);                 \
+	printf("(%6.2f%%) ", ((double)total_time - total_time2)*100/total_time2); \
 } while (0)
 
 /* Run aligned memcpy tests for each cached/uncached permutation */
-#define ALL_PERF_TESTS_FOR_SIZE(n)                                       \
-do {                                                                     \
-    if (__builtin_constant_p(n))                                         \
-        printf("\nC%6u", (unsigned)n);                                   \
-    else                                                                 \
-        printf("\n%7u", (unsigned)n);                                    \
-    SINGLE_PERF_TEST(small_buf_write, 1, 0, small_buf_read, 1, 0, n);    \
-    SINGLE_PERF_TEST(large_buf_write, 0, 0, small_buf_read, 1, 0, n);    \
-    SINGLE_PERF_TEST(small_buf_write, 1, 0, large_buf_read, 0, 0, n);    \
-    SINGLE_PERF_TEST(large_buf_write, 0, 0, large_buf_read, 0, 0, n);    \
+#define ALL_PERF_TESTS_FOR_SIZE(n, def)                                      \
+do {                                                                         \
+	if (__rte_constant_with_default(n, def))                             \
+		printf("\nC%6u", (unsigned int)n);                           \
+	else                                                                 \
+		printf("\n%7u", (unsigned int)n);                            \
+	SINGLE_PERF_TEST(small_buf_write, 1, 0, small_buf_read, 1, 0, n);    \
+	SINGLE_PERF_TEST(large_buf_write, 0, 0, small_buf_read, 1, 0, n);    \
+	SINGLE_PERF_TEST(small_buf_write, 1, 0, large_buf_read, 0, 0, n);    \
+	SINGLE_PERF_TEST(large_buf_write, 0, 0, large_buf_read, 0, 0, n);    \
 } while (0)
 
 /* Run unaligned memcpy tests for each cached/uncached permutation */
-#define ALL_PERF_TESTS_FOR_SIZE_UNALIGNED(n)                             \
-do {                                                                     \
-    if (__builtin_constant_p(n))                                         \
-        printf("\nC%6u", (unsigned)n);                                   \
-    else                                                                 \
-        printf("\n%7u", (unsigned)n);                                    \
-    SINGLE_PERF_TEST(small_buf_write, 1, 1, small_buf_read, 1, 5, n);    \
-    SINGLE_PERF_TEST(large_buf_write, 0, 1, small_buf_read, 1, 5, n);    \
-    SINGLE_PERF_TEST(small_buf_write, 1, 1, large_buf_read, 0, 5, n);    \
-    SINGLE_PERF_TEST(large_buf_write, 0, 1, large_buf_read, 0, 5, n);    \
+#define ALL_PERF_TESTS_FOR_SIZE_UNALIGNED(n, def)                            \
+do {                                                                         \
+	if (__rte_constant_with_default(n, def))                             \
+		printf("\nC%6u", (unsigned int)n);                           \
+	else                                                                 \
+		printf("\n%7u", (unsigned int)n);                            \
+	SINGLE_PERF_TEST(small_buf_write, 1, 1, small_buf_read, 1, 5, n);    \
+	SINGLE_PERF_TEST(large_buf_write, 0, 1, small_buf_read, 1, 5, n);    \
+	SINGLE_PERF_TEST(small_buf_write, 1, 1, large_buf_read, 0, 5, n);    \
+	SINGLE_PERF_TEST(large_buf_write, 0, 1, large_buf_read, 0, 5, n);    \
 } while (0)
 
 /* Run memcpy tests for constant length */
-#define ALL_PERF_TEST_FOR_CONSTANT                                      \
-do {                                                                    \
-    TEST_CONSTANT(6U); TEST_CONSTANT(64U); TEST_CONSTANT(128U);         \
-    TEST_CONSTANT(192U); TEST_CONSTANT(256U); TEST_CONSTANT(512U);      \
-    TEST_CONSTANT(768U); TEST_CONSTANT(1024U); TEST_CONSTANT(1536U);    \
+#define ALL_PERF_TEST_FOR_CONSTANT                                                \
+do {                                                                              \
+	TEST_CONSTANT(6U, 1); TEST_CONSTANT(64U, 1); TEST_CONSTANT(128U, 1);      \
+	TEST_CONSTANT(192U, 1); TEST_CONSTANT(256U, 1); TEST_CONSTANT(512U, 1);   \
+	TEST_CONSTANT(768U, 1); TEST_CONSTANT(1024U, 1); TEST_CONSTANT(1536U, 1); \
 } while (0)
 
 /* Run all memcpy tests for aligned constant cases */
@@ -253,7 +253,7 @@  perf_test_variable_aligned(void)
 {
 	unsigned i;
 	for (i = 0; i < RTE_DIM(buf_sizes); i++) {
-		ALL_PERF_TESTS_FOR_SIZE((size_t)buf_sizes[i]);
+		ALL_PERF_TESTS_FOR_SIZE(buf_sizes[i], 0);
 	}
 }
 
@@ -263,7 +263,7 @@  perf_test_variable_unaligned(void)
 {
 	unsigned i;
 	for (i = 0; i < RTE_DIM(buf_sizes); i++) {
-		ALL_PERF_TESTS_FOR_SIZE_UNALIGNED((size_t)buf_sizes[i]);
+		ALL_PERF_TESTS_FOR_SIZE_UNALIGNED(buf_sizes[i], 0);
 	}
 }