[dpdk-dev,v4,4/6] test: add distributor_perf autotest

Message ID 1483948248-91364-5-git-send-email-david.hunt@intel.com
State Superseded, archived
Headers show

Checks

Context Check Description
ci/Intel compilation success Compilation OK
ci/checkpatch success coding style OK

Commit Message

David Hunt Jan. 9, 2017, 7:50 a.m.
Signed-off-by: David Hunt <david.hunt@intel.com>
---
 app/test/test_distributor_perf.c | 148 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 137 insertions(+), 11 deletions(-)

Patch

diff --git a/app/test/test_distributor_perf.c b/app/test/test_distributor_perf.c
index 7947fe9..b273bf9 100644
--- a/app/test/test_distributor_perf.c
+++ b/app/test/test_distributor_perf.c
@@ -40,9 +40,11 @@ 
 #include <rte_common.h>
 #include <rte_mbuf.h>
 #include <rte_distributor.h>
+#include <rte_distributor_burst.h>
 
-#define ITER_POWER 20 /* log 2 of how many iterations we do when timing. */
-#define BURST 32
+#define ITER_POWER_CL 25 /* log 2 of how many iterations  for Cache Line test */
+#define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
+#define BURST 64
 #define BIG_BATCH 1024
 
 /* static vars - zero initialized by default */
@@ -54,7 +56,8 @@  struct worker_stats {
 } __rte_cache_aligned;
 struct worker_stats worker_stats[RTE_MAX_LCORE];
 
-/* worker thread used for testing the time to do a round-trip of a cache
+/*
+ * worker thread used for testing the time to do a round-trip of a cache
  * line between two cores and back again
  */
 static void
@@ -69,7 +72,8 @@  flip_bit(volatile uint64_t *arg)
 	}
 }
 
-/* test case to time the number of cycles to round-trip a cache line between
+/*
+ * test case to time the number of cycles to round-trip a cache line between
  * two cores and back again.
  */
 static void
@@ -86,7 +90,7 @@  time_cache_line_switch(void)
 		rte_pause();
 
 	const uint64_t start_time = rte_rdtsc();
-	for (i = 0; i < (1 << ITER_POWER); i++) {
+	for (i = 0; i < (1 << ITER_POWER_CL); i++) {
 		while (*pdata)
 			rte_pause();
 		*pdata = 1;
@@ -98,13 +102,14 @@  time_cache_line_switch(void)
 	*pdata = 2;
 	rte_eal_wait_lcore(slaveid);
 	printf("==== Cache line switch test ===\n");
-	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER),
+	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
 			end_time-start_time);
 	printf("Ticks per iteration = %"PRIu64"\n\n",
-			(end_time-start_time) >> ITER_POWER);
+			(end_time-start_time) >> ITER_POWER_CL);
 }
 
-/* returns the total count of the number of packets handled by the worker
+/*
+ * returns the total count of the number of packets handled by the worker
  * functions given below.
  */
 static unsigned
@@ -123,7 +128,8 @@  clear_packet_count(void)
 	memset(&worker_stats, 0, sizeof(worker_stats));
 }
 
-/* this is the basic worker function for performance tests.
+/*
+ * this is the basic worker function for performance tests.
  * it does nothing but return packets and count them.
  */
 static int
@@ -144,7 +150,37 @@  handle_work(void *arg)
 	return 0;
 }
 
-/* this basic performance test just repeatedly sends in 32 packets at a time
+/*
+ * this is the basic worker function for performance tests.
+ * it does nothing but return packets and count them.
+ */
+static int
+handle_work_burst(void *arg)
+{
+	struct rte_distributor_burst *d = arg;
+	unsigned int count = 0;
+	unsigned int num = 0;
+	int i;
+	unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+	struct rte_mbuf *buf[8] __rte_cache_aligned;
+
+	for (i = 0; i < 8; i++)
+		buf[i] = NULL;
+
+	num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+	while (!quit) {
+		worker_stats[id].handled_packets += num;
+		count += num;
+		num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+	}
+	worker_stats[id].handled_packets += num;
+	count += num;
+	rte_distributor_return_pkt_burst(d, id, buf, num);
+	return 0;
+}
+
+/*
+ * this basic performance test just repeatedly sends in 32 packets at a time
  * to the distributor and verifies at the end that we got them all in the worker
  * threads and finally how long per packet the processing took.
  */
@@ -174,6 +210,8 @@  perf_test(struct rte_distributor *d, struct rte_mempool *p)
 		rte_distributor_process(d, NULL, 0);
 	} while (total_packet_count() < (BURST << ITER_POWER));
 
+	rte_distributor_clear_returns(d);
+
 	printf("=== Performance test of distributor ===\n");
 	printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
 	printf("Time per packet: %"PRIu64"\n\n",
@@ -190,6 +228,55 @@  perf_test(struct rte_distributor *d, struct rte_mempool *p)
 	return 0;
 }
 
+/*
+ * this basic performance test just repeatedly sends in 32 packets at a time
+ * to the distributor and verifies at the end that we got them all in the worker
+ * threads and finally how long per packet the processing took.
+ */
+static inline int
+perf_test_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+	unsigned int i;
+	uint64_t start, end;
+	struct rte_mbuf *bufs[BURST];
+
+	clear_packet_count();
+	if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) {
+		printf("Error getting mbufs from pool\n");
+		return -1;
+	}
+	/* ensure we have different hash value for each pkt */
+	for (i = 0; i < BURST; i++)
+		bufs[i]->hash.usr = i;
+
+	start = rte_rdtsc();
+	for (i = 0; i < (1<<ITER_POWER); i++)
+		rte_distributor_process_burst(d, bufs, BURST);
+	end = rte_rdtsc();
+
+	do {
+		usleep(100);
+		rte_distributor_process_burst(d, NULL, 0);
+	} while (total_packet_count() < (BURST << ITER_POWER));
+
+	rte_distributor_clear_returns_burst(d);
+
+	printf("=== Performance test of burst distributor ===\n");
+	printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
+	printf("Time per packet: %"PRIu64"\n\n",
+			((end - start) >> ITER_POWER)/BURST);
+	rte_mempool_put_bulk(p, (void *)bufs, BURST);
+
+	for (i = 0; i < rte_lcore_count() - 1; i++)
+		printf("Worker %u handled %u packets\n", i,
+				worker_stats[i].handled_packets);
+	printf("Total packets: %u (%x)\n", total_packet_count(),
+			total_packet_count());
+	printf("=== Perf test done ===\n\n");
+
+	return 0;
+}
+
 /* Useful function which ensures that all worker functions terminate */
 static void
 quit_workers(struct rte_distributor *d, struct rte_mempool *p)
@@ -212,10 +299,34 @@  quit_workers(struct rte_distributor *d, struct rte_mempool *p)
 	worker_idx = 0;
 }
 
+/* Useful function which ensures that all worker functions terminate */
+static void
+quit_workers_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+	const unsigned int num_workers = rte_lcore_count() - 1;
+	unsigned int i;
+	struct rte_mbuf *bufs[RTE_MAX_LCORE];
+
+	rte_mempool_get_bulk(p, (void *)bufs, num_workers);
+
+	quit = 1;
+	for (i = 0; i < num_workers; i++)
+		bufs[i]->hash.usr = i << 1;
+	rte_distributor_process_burst(d, bufs, num_workers);
+
+	rte_mempool_put_bulk(p, (void *)bufs, num_workers);
+
+	rte_distributor_process_burst(d, NULL, 0);
+	rte_eal_mp_wait_lcore();
+	quit = 0;
+	worker_idx = 0;
+}
+
 static int
 test_distributor_perf(void)
 {
 	static struct rte_distributor *d;
+	static struct rte_distributor_burst *db;
 	static struct rte_mempool *p;
 
 	if (rte_lcore_count() < 2) {
@@ -234,10 +345,20 @@  test_distributor_perf(void)
 			return -1;
 		}
 	} else {
-		rte_distributor_flush(d);
 		rte_distributor_clear_returns(d);
 	}
 
+	if (db == NULL) {
+		db = rte_distributor_create_burst("Test_burst", rte_socket_id(),
+				rte_lcore_count() - 1);
+		if (db == NULL) {
+			printf("Error creating burst distributor\n");
+			return -1;
+		}
+	} else {
+		rte_distributor_clear_returns_burst(db);
+	}
+
 	const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
 			(BIG_BATCH * 2) - 1 : (511 * rte_lcore_count());
 	if (p == NULL) {
@@ -254,6 +375,11 @@  test_distributor_perf(void)
 		return -1;
 	quit_workers(d, p);
 
+	rte_eal_mp_remote_launch(handle_work_burst, db, SKIP_MASTER);
+	if (perf_test_burst(db, p) < 0)
+		return -1;
+	quit_workers_burst(db, p);
+
 	return 0;
 }