[dpdk-dev,v2,3/5] test: add distributor_perf autotest

Message ID 1482381428-148094-4-git-send-email-david.hunt@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel compilation fail Compilation issues

Commit Message

Hunt, David Dec. 22, 2016, 4:37 a.m. UTC
  Signed-off-by: David Hunt <david.hunt@intel.com>
---
 app/test/test_distributor_perf.c | 133 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 127 insertions(+), 6 deletions(-)
  

Comments

Jerin Jacob Dec. 22, 2016, 12:19 p.m. UTC | #1
On Thu, Dec 22, 2016 at 04:37:06AM +0000, David Hunt wrote:
> Signed-off-by: David Hunt <david.hunt@intel.com>
> ---
> + * it does nothing but return packets and count them.
> + */
> +static int
> +handle_work_burst(void *arg)
> +{
> +	//struct rte_mbuf *pkt = NULL;

Seems like their is lot test code with // in this file. Please remove it.

> +	struct rte_distributor_burst *d = arg;
> +	unsigned int count = 0;
> +	unsigned int num = 0;
> +	unsigned int id = __sync_fetch_and_add(&worker_idx, 1);

Use rte_atomic equivalent
  
Hunt, David Jan. 2, 2017, 4:24 p.m. UTC | #2
On 22/12/2016 12:19 PM, Jerin Jacob wrote:
> On Thu, Dec 22, 2016 at 04:37:06AM +0000, David Hunt wrote:
>> +	struct rte_distributor_burst *d = arg;
>> +	unsigned int count = 0;
>> +	unsigned int num = 0;
>> +	unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> Use rte_atomic equivalent

Jerin,
     I'm looking for an equivalent, but I can't seem to find one. Here 
I'm assigning 'id' with the incremented value of worker_idx in one 
statement.
However, rte_atomic32_add just increments the variable and returns void, 
so I'd have to use two statements, losing the atomicity.

   static inline void
   rte_atomic32_add(rte_atomic32_t *v, int32_t inc)

There's a second reason why I can't use the rte_atomics, and that's 
because worker_idx is a volatile.

Maybe we could add new atomic functions in the future to address this?

Thanks,
Dave.
  
Jerin Jacob Jan. 4, 2017, 1:09 p.m. UTC | #3
On Mon, Jan 02, 2017 at 04:24:01PM +0000, Hunt, David wrote:
> 
> 
> On 22/12/2016 12:19 PM, Jerin Jacob wrote:
> > On Thu, Dec 22, 2016 at 04:37:06AM +0000, David Hunt wrote:
> > > +	struct rte_distributor_burst *d = arg;
> > > +	unsigned int count = 0;
> > > +	unsigned int num = 0;
> > > +	unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
> > Use rte_atomic equivalent
> 
> Jerin,
>     I'm looking for an equivalent, but I can't seem to find one. Here I'm
> assigning 'id' with the incremented value of worker_idx in one statement.
> However, rte_atomic32_add just increments the variable and returns void, so
> I'd have to use two statements, losing the atomicity.
> 
>   static inline void
>   rte_atomic32_add(rte_atomic32_t *v, int32_t inc)

It would have been better rte_atomic32_add returns the old value.

> 
> There's a second reason why I can't use the rte_atomics, and that's because
> worker_idx is a volatile.

may be you could change worker_idx as rte_atomic32_t

> 
> Maybe we could add new atomic functions in the future to address this?

Yes. I guess, the fixing the return value of  rte_atomic*_[add/sub] may
be enough

> 
> Thanks,
> Dave.
  

Patch

diff --git a/app/test/test_distributor_perf.c b/app/test/test_distributor_perf.c
index 7947fe9..86285fd 100644
--- a/app/test/test_distributor_perf.c
+++ b/app/test/test_distributor_perf.c
@@ -40,9 +40,11 @@ 
 #include <rte_common.h>
 #include <rte_mbuf.h>
 #include <rte_distributor.h>
+#include <rte_distributor_burst.h>
 
-#define ITER_POWER 20 /* log 2 of how many iterations we do when timing. */
-#define BURST 32
+#define ITER_POWER_CL 25 /* log 2 of how many iterations  for Cache Line test */
+#define ITER_POWER 21 /* log 2 of how many iterations we do when timing. */
+#define BURST 64
 #define BIG_BATCH 1024
 
 /* static vars - zero initialized by default */
@@ -86,7 +88,7 @@  time_cache_line_switch(void)
 		rte_pause();
 
 	const uint64_t start_time = rte_rdtsc();
-	for (i = 0; i < (1 << ITER_POWER); i++) {
+	for (i = 0; i < (1 << ITER_POWER_CL); i++) {
 		while (*pdata)
 			rte_pause();
 		*pdata = 1;
@@ -98,10 +100,10 @@  time_cache_line_switch(void)
 	*pdata = 2;
 	rte_eal_wait_lcore(slaveid);
 	printf("==== Cache line switch test ===\n");
-	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER),
+	printf("Time for %u iterations = %"PRIu64" ticks\n", (1<<ITER_POWER_CL),
 			end_time-start_time);
 	printf("Ticks per iteration = %"PRIu64"\n\n",
-			(end_time-start_time) >> ITER_POWER);
+			(end_time-start_time) >> ITER_POWER_CL);
 }
 
 /* returns the total count of the number of packets handled by the worker
@@ -144,6 +146,34 @@  handle_work(void *arg)
 	return 0;
 }
 
+/* this is the basic worker function for performance tests.
+ * it does nothing but return packets and count them.
+ */
+static int
+handle_work_burst(void *arg)
+{
+	//struct rte_mbuf *pkt = NULL;
+	struct rte_distributor_burst *d = arg;
+	unsigned int count = 0;
+	unsigned int num = 0;
+	unsigned int id = __sync_fetch_and_add(&worker_idx, 1);
+	struct rte_mbuf *buf[8] __rte_cache_aligned;
+
+	for (int i = 0; i < 8; i++)
+		buf[i] = NULL;
+
+	num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+	while (!quit) {
+		worker_stats[id].handled_packets += num;
+		count += num;
+		num = rte_distributor_get_pkt_burst(d, id, buf, buf, num);
+	}
+	worker_stats[id].handled_packets += num;
+	count += num;
+	rte_distributor_return_pkt_burst(d, id, buf, num);
+	return 0;
+}
+
 /* this basic performance test just repeatedly sends in 32 packets at a time
  * to the distributor and verifies at the end that we got them all in the worker
  * threads and finally how long per packet the processing took.
@@ -174,6 +204,8 @@  perf_test(struct rte_distributor *d, struct rte_mempool *p)
 		rte_distributor_process(d, NULL, 0);
 	} while (total_packet_count() < (BURST << ITER_POWER));
 
+	rte_distributor_clear_returns(d);
+
 	printf("=== Performance test of distributor ===\n");
 	printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
 	printf("Time per packet: %"PRIu64"\n\n",
@@ -190,6 +222,54 @@  perf_test(struct rte_distributor *d, struct rte_mempool *p)
 	return 0;
 }
 
+/* this basic performance test just repeatedly sends in 32 packets at a time
+ * to the distributor and verifies at the end that we got them all in the worker
+ * threads and finally how long per packet the processing took.
+ */
+static inline int
+perf_test_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+	unsigned int i;
+	uint64_t start, end;
+	struct rte_mbuf *bufs[BURST];
+
+	clear_packet_count();
+	if (rte_mempool_get_bulk(p, (void *)bufs, BURST) != 0) {
+		printf("Error getting mbufs from pool\n");
+		return -1;
+	}
+	/* ensure we have different hash value for each pkt */
+	for (i = 0; i < BURST; i++)
+		bufs[i]->hash.usr = i;
+
+	start = rte_rdtsc();
+	for (i = 0; i < (1<<ITER_POWER); i++)
+		rte_distributor_process_burst(d, bufs, BURST);
+	end = rte_rdtsc();
+
+	do {
+		usleep(100);
+		rte_distributor_process_burst(d, NULL, 0);
+	} while (total_packet_count() < (BURST << ITER_POWER));
+
+	rte_distributor_clear_returns_burst(d);
+
+	printf("=== Performance test of burst distributor ===\n");
+	printf("Time per burst:  %"PRIu64"\n", (end - start) >> ITER_POWER);
+	printf("Time per packet: %"PRIu64"\n\n",
+			((end - start) >> ITER_POWER)/BURST);
+	rte_mempool_put_bulk(p, (void *)bufs, BURST);
+
+	for (i = 0; i < rte_lcore_count() - 1; i++)
+		printf("Worker %u handled %u packets\n", i,
+				worker_stats[i].handled_packets);
+	printf("Total packets: %u (%x)\n", total_packet_count(),
+			total_packet_count());
+	printf("=== Perf test done ===\n\n");
+
+	return 0;
+}
+
 /* Useful function which ensures that all worker functions terminate */
 static void
 quit_workers(struct rte_distributor *d, struct rte_mempool *p)
@@ -212,10 +292,34 @@  quit_workers(struct rte_distributor *d, struct rte_mempool *p)
 	worker_idx = 0;
 }
 
+/* Useful function which ensures that all worker functions terminate */
+static void
+quit_workers_burst(struct rte_distributor_burst *d, struct rte_mempool *p)
+{
+	const unsigned int num_workers = rte_lcore_count() - 1;
+	unsigned int i;
+	struct rte_mbuf *bufs[RTE_MAX_LCORE];
+
+	rte_mempool_get_bulk(p, (void *)bufs, num_workers);
+
+	quit = 1;
+	for (i = 0; i < num_workers; i++)
+		bufs[i]->hash.usr = i << 1;
+	rte_distributor_process_burst(d, bufs, num_workers);
+
+	rte_mempool_put_bulk(p, (void *)bufs, num_workers);
+
+	rte_distributor_process_burst(d, NULL, 0);
+	rte_eal_mp_wait_lcore();
+	quit = 0;
+	worker_idx = 0;
+}
+
 static int
 test_distributor_perf(void)
 {
 	static struct rte_distributor *d;
+	static struct rte_distributor_burst *db;
 	static struct rte_mempool *p;
 
 	if (rte_lcore_count() < 2) {
@@ -234,10 +338,22 @@  test_distributor_perf(void)
 			return -1;
 		}
 	} else {
-		rte_distributor_flush(d);
+		//rte_distributor_flush_burst(d);
 		rte_distributor_clear_returns(d);
 	}
 
+	if (db == NULL) {
+		db = rte_distributor_create_burst("Test_burst", rte_socket_id(),
+				rte_lcore_count() - 1);
+		if (db == NULL) {
+			printf("Error creating burst distributor\n");
+			return -1;
+		}
+	} else {
+		//rte_distributor_flush_burst(d);
+		rte_distributor_clear_returns_burst(db);
+	}
+
 	const unsigned nb_bufs = (511 * rte_lcore_count()) < BIG_BATCH ?
 			(BIG_BATCH * 2) - 1 : (511 * rte_lcore_count());
 	if (p == NULL) {
@@ -254,6 +370,11 @@  test_distributor_perf(void)
 		return -1;
 	quit_workers(d, p);
 
+	rte_eal_mp_remote_launch(handle_work_burst, db, SKIP_MASTER);
+	if (perf_test_burst(db, p) < 0)
+		return -1;
+	quit_workers_burst(db, p);
+
 	return 0;
 }