[v3,1/2] app/test: add cksum performance test

Message ID 20220711121132.34546-1-mattias.ronnblom@ericsson.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series [v3,1/2] app/test: add cksum performance test |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Mattias Rönnblom July 11, 2022, 12:11 p.m. UTC
  Add performance test for the rte_raw_cksum() function, which delegates
the actual work to __rte_raw_cksum(), which in turn is used by other
functions in need of Internet checksum calculation.

Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

---

v3:
  * Changed init function buffer parameter type, to avoid cast.
  * Code formatting improved.
v2:
  * Added __rte_unused to unused volatile variable, to keep the Intel
    compiler happy.
---
 MAINTAINERS                |   1 +
 app/test/meson.build       |   1 +
 app/test/test_cksum_perf.c | 117 +++++++++++++++++++++++++++++++++++++
 3 files changed, 119 insertions(+)
 create mode 100644 app/test/test_cksum_perf.c
  

Comments

Olivier Matz July 11, 2022, 1:20 p.m. UTC | #1
On Mon, Jul 11, 2022 at 02:11:31PM +0200, Mattias Rönnblom wrote:
> Add performance test for the rte_raw_cksum() function, which delegates
> the actual work to __rte_raw_cksum(), which in turn is used by other
> functions in need of Internet checksum calculation.
> 
> Signed-off-by: Mattias Rönnblom <mattias.ronnblom@ericsson.com>

Acked-by: Olivier Matz <olivier.matz@6wind.com>

Thank you!
  

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index c923712946..2a4c99e05a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1414,6 +1414,7 @@  Network headers
 M: Olivier Matz <olivier.matz@6wind.com>
 F: lib/net/
 F: app/test/test_cksum.c
+F: app/test/test_cksum_perf.c
 
 Packet CRC
 M: Jasvinder Singh <jasvinder.singh@intel.com>
diff --git a/app/test/meson.build b/app/test/meson.build
index 431c5bd318..191db03d1d 100644
--- a/app/test/meson.build
+++ b/app/test/meson.build
@@ -18,6 +18,7 @@  test_sources = files(
         'test_bpf.c',
         'test_byteorder.c',
         'test_cksum.c',
+        'test_cksum_perf.c',
         'test_cmdline.c',
         'test_cmdline_cirbuf.c',
         'test_cmdline_etheraddr.c',
diff --git a/app/test/test_cksum_perf.c b/app/test/test_cksum_perf.c
new file mode 100644
index 0000000000..1f296cae34
--- /dev/null
+++ b/app/test/test_cksum_perf.c
@@ -0,0 +1,117 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Ericsson AB
+ */
+
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_ip.h>
+#include <rte_malloc.h>
+#include <rte_random.h>
+
+#include "test.h"
+
+#define NUM_BLOCKS 10
+#define ITERATIONS 1000000
+
+static const size_t data_sizes[] = { 20, 21, 100, 101, 1500, 1501 };
+
+static __rte_noinline uint16_t
+do_rte_raw_cksum(const void *buf, size_t len)
+{
+	return rte_raw_cksum(buf, len);
+}
+
+static void
+init_block(char *buf, size_t len)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++)
+		buf[i] = (char)rte_rand();
+}
+
+static int
+test_cksum_perf_size_alignment(size_t block_size, bool aligned)
+{
+	char *data[NUM_BLOCKS];
+	char *blocks[NUM_BLOCKS];
+	unsigned int i;
+	uint64_t start;
+	uint64_t end;
+	/* Floating point to handle low (pseudo-)TSC frequencies */
+	double block_latency;
+	double byte_latency;
+	volatile __rte_unused uint64_t sum = 0;
+
+	for (i = 0; i < NUM_BLOCKS; i++) {
+		data[i] = rte_malloc(NULL, block_size + 1, 0);
+
+		if (data[i] == NULL) {
+			printf("Failed to allocate memory for block\n");
+			return TEST_FAILED;
+		}
+
+		init_block(data[i], block_size + 1);
+
+		blocks[i] = aligned ? data[i] : data[i] + 1;
+	}
+
+	start = rte_rdtsc();
+
+	for (i = 0; i < ITERATIONS; i++) {
+		unsigned int j;
+		for (j = 0; j < NUM_BLOCKS; j++)
+			sum += do_rte_raw_cksum(blocks[j], block_size);
+	}
+
+	end = rte_rdtsc();
+
+	block_latency = (end - start) / (double)(ITERATIONS * NUM_BLOCKS);
+	byte_latency = block_latency / block_size;
+
+	printf("%-9s %10zd %19.1f %16.2f\n", aligned ? "Aligned" : "Unaligned",
+	       block_size, block_latency, byte_latency);
+
+	for (i = 0; i < NUM_BLOCKS; i++)
+		rte_free(data[i]);
+
+	return TEST_SUCCESS;
+}
+
+static int
+test_cksum_perf_size(size_t block_size)
+{
+	int rc;
+
+	rc = test_cksum_perf_size_alignment(block_size, true);
+	if (rc != TEST_SUCCESS)
+		return rc;
+
+	rc = test_cksum_perf_size_alignment(block_size, false);
+
+	return rc;
+}
+
+static int
+test_cksum_perf(void)
+{
+	uint16_t i;
+
+	printf("### rte_raw_cksum() performance ###\n");
+	printf("Alignment  Block size    TSC cycles/block  TSC cycles/byte\n");
+
+	for (i = 0; i < RTE_DIM(data_sizes); i++) {
+		int rc;
+
+		rc = test_cksum_perf_size(data_sizes[i]);
+		if (rc != TEST_SUCCESS)
+			return rc;
+	}
+
+	return TEST_SUCCESS;
+}
+
+
+REGISTER_TEST_COMMAND(cksum_perf_autotest, test_cksum_perf);