@@ -1,7 +1,7 @@
/*-
* BSD LICENSE
*
- * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2017 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -40,9 +40,11 @@
#include <rte_common.h>
#include <rte_mbuf.h>
#include <rte_distributor.h>
+#include <rte_distributor_burst.h>
-#define ITER_POWER 20 /* log 2 of how many iterations we do when timing. */
-#define BURST 32
+#define ITER_POWER_CL 25 /* log 2 of how many iterations for Cache Line test */
+#define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
+#define BURST 64
#define BIG_BATCH 1024
/* static vars - zero initialized by default */
@@ -54,7 +56,8 @@ struct worker_stats {
} __rte_cache_aligned;
struct worker_stats worker_stats[RTE_MAX_LCORE];
-/* worker thread used for testing the time to do a round-trip of a cache
+/*
+ * worker thread used for testing the time to do a round-trip of a cache
* line between two cores and back again
*/
static void
@@ -69,7 +72,8 @@ flip_bit(volatile uint64_t *arg)
}
}
-/* test case to time the number of cycles to round-trip a cache line between
+/*
+ * test case to time the number of cycles to round-trip a cache line between
* two cores and back again.
*/
static void
@@ -86,7 +90,7 @@ time_cache_line_switch(void)
rte_pause();
const uint64_t start_time = rte_rdtsc();
- for (i = 0; i < (1 << ITER_POWER); i++) {
+ for (i = 0; i < (1 << ITER_POWER_CL); i++) {
while (*pdata)
rte_pause();
*pdata = 1;
@@ -98,13 +102,14 @@ time_cache_line_switch(void)
*pdata = 2;
rte_eal_wait_lcore(slaveid);
printf("==== Cache line switch test ===\n");
- printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER),
+ printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
end_time-start_time);
printf("Ticks per iteration = %"PRIu64"\n\n",
- (end_time-start_time) >> ITER_POWER);
+ (end_time-start_time) >> ITER_POWER_CL);
}
-/* returns the total count of the number of packets handled by the worker
+/*
+ * returns the total count of the number of packets handled by the worker
* functions given below.
*/
static unsigned
@@ -123,7 +128,8 @@ clear_packet_count(void)
memset(&worker_stats, 0, sizeof(worker_stats));
}
-/* this is the basic worker function for performance tests.
+/*
+ * this is the basic worker function for performance tests.
* it does nothing but return packets and count them.
*/
static int
@@ -144,7 +150,37 @@ handle_work(void *arg)
return 0;
}
-/* this basic performance test just repeatedly sends in 32 packets at a time
+/*
+ * this is the basic worker function for performance tests.
+ * it does nothing but return packets and count them.
+ */
+static int
+handle_work_burst(void *arg)
+{
+ struct rte_distributor_burst *d = arg;
+ unsigned int count = 0;
+ unsigned int num = 0;
+ int i;
+ unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+ struct rte_mbuf *buf[8] __rte_cache_aligned;
+
+ for (i = 0; i < 8; i++)
+ buf[i] = NULL;
+
+ num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+ while (!quit) {
+ worker_stats[id].handled_packets += num;
+ count += num;
+ num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+ }
+ worker_stats[id].handled_packets += num;
+ count += num;
+ rte_distributor_return_pkt_burst(d, id, buf, num);
+ return 0;
+}
+
+/*
+ * this basic performance test just repeatedly sends in 32 packets at a time
* to the distributor and verifies at the end that we got them all in the worker
* threads and finally how long per packet the processing took.
*/
@@ -174,6 +210,8 @@ perf_test(struct rte_distributor *d, struct rte_mempool *p)
rte_distributor_process(d, NULL, 0);
} while (total_packet_count() < (BURST << ITER_POWER));
+ rte_distributor_clear_returns(d);
+
printf("=== Performance test of distributor ===\n");
printf("Time per burst: %"PRIu64"\n", (end - start) >> ITER_POWER);
printf("Time per packet: %"PRIu64"\n\n",
@@ -190,6 +228,55 @@ perf_test(struct rte_distributor *d, struct rte_mempool *p)
return 0;
}
+/*
+ * this basic performance test just repeatedly sends in 32 packets at a time
+ * to the distributor and verifies at the end that we got them all in the worker
+ * threads and finally how long per packet the processing took.
+ */
+static inline int
+perf_test_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+ unsigned int i;
+ uint64_t start, end;
+ struct rte_mbuf *bufs[BURST];
+
+ clear_packet_count();
+ if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) {
+ printf("Error getting mbufs from pool\n");
+ return -1;
+ }
+ /* ensure we have different hash value for each pkt */
+ for (i = 0; i < BURST; i++)
+ bufs[i]->hash.usr = i;
+
+ start = rte_rdtsc();
+ for (i = 0; i < (1<<ITER_POWER); i++)
+ rte_distributor_process_burst(d, bufs, BURST);
+ end = rte_rdtsc();
+
+ do {
+ usleep(100);
+ rte_distributor_process_burst(d, NULL, 0);
+ } while (total_packet_count() < (BURST << ITER_POWER));
+
+ rte_distributor_clear_returns_burst(d);
+
+ printf("=== Performance test of burst distributor ===\n");
+ printf("Time per burst: %"PRIu64"\n", (end - start) >> ITER_POWER);
+ printf("Time per packet: %"PRIu64"\n\n",
+ ((end - start) >> ITER_POWER)/BURST);
+ rte_mempool_put_bulk(p, (void *)bufs, BURST);
+
+ for (i = 0; i < rte_lcore_count() - 1; i++)
+ printf("Worker %u handled %u packets\n", i,
+ worker_stats[i].handled_packets);
+ printf("Total packets: %u (%x)\n", total_packet_count(),
+ total_packet_count());
+ printf("=== Perf test done ===\n\n");
+
+ return 0;
+}
+
/* Useful function which ensures that all worker functions terminate */
static void
quit_workers(struct rte_distributor *d, struct rte_mempool *p)
@@ -212,10 +299,34 @@ quit_workers(struct rte_distributor *d, struct rte_mempool *p)
worker_idx = 0;
}
+/* Useful function which ensures that all worker functions terminate */
+static void
+quit_workers_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+ const unsigned int num_workers = rte_lcore_count() - 1;
+ unsigned int i;
+ struct rte_mbuf *bufs[RTE_MAX_LCORE];
+
+ rte_mempool_get_bulk(p, (void *)bufs, num_workers);
+
+ quit = 1;
+ for (i = 0; i < num_workers; i++)
+ bufs[i]->hash.usr = i << 1;
+ rte_distributor_process_burst(d, bufs, num_workers);
+
+ rte_mempool_put_bulk(p, (void *)bufs, num_workers);
+
+ rte_distributor_process_burst(d, NULL, 0);
+ rte_eal_mp_wait_lcore();
+ quit = 0;
+ worker_idx = 0;
+}
+
static int
test_distributor_perf(void)
{
static struct rte_distributor *d;
+ static struct rte_distributor_burst *db;
static struct rte_mempool *p;
if (rte_lcore_count() < 2) {
@@ -234,10 +345,20 @@ test_distributor_perf(void)
return -1;
}
} else {
- rte_distributor_flush(d);
rte_distributor_clear_returns(d);
}
+ if (db == NULL) {
+ db = rte_distributor_create_burst("Test_burst", rte_socket_id(),
+ rte_lcore_count() - 1);
+ if (db == NULL) {
+ printf("Error creating burst distributor\n");
+ return -1;
+ }
+ } else {
+ rte_distributor_clear_returns_burst(db);
+ }
+
const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
(BIG_BATCH * 2) - 1 : (511 * rte_lcore_count());
if (p == NULL) {
@@ -254,6 +375,11 @@ test_distributor_perf(void)
return -1;
quit_workers(d, p);
+ rte_eal_mp_remote_launch(handle_work_burst, db, SKIP_MASTER);
+ if (perf_test_burst(db, p) < 0)
+ return -1;
+ quit_workers_burst(db, p);
+
return 0;
}