[v4,4/4] test/lpm: avoid code duplication in rcu qsbr perf

Message ID 20201103222352.11566-5-dharmik.thakkar@arm.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series test/lpm: fix rcu qsbr perf test |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/travis-robot success Travis build: passed
ci/Intel-compilation success Compilation OK

Commit Message

Dharmik Thakkar Nov. 3, 2020, 10:23 p.m. UTC
  Avoid code duplication by combining single and multi threaded tests

Also, enable support for more than 2 writers

Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
---
 app/test/test_lpm_perf.c | 359 +++++++++------------------------------
 1 file changed, 84 insertions(+), 275 deletions(-)
  

Comments

Honnappa Nagarahalli Nov. 3, 2020, 10:35 p.m. UTC | #1
> -----Original Message-----
> From: dev <dev-bounces@dpdk.org> On Behalf Of Dharmik Thakkar
> Sent: Tuesday, November 3, 2020 4:24 PM
> To: Bruce Richardson <bruce.richardson@intel.com>; Vladimir Medvedkin
> <vladimir.medvedkin@intel.com>
> Cc: dev@dpdk.org; nd <nd@arm.com>; Dharmik Thakkar
> <Dharmik.Thakkar@arm.com>
> Subject: [dpdk-dev] [PATCH v4 4/4] test/lpm: avoid code duplication in rcu
> qsbr perf
> 
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Looks good
Reviewed-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>

> ---
>  app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>  1 file changed, 84 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c index
> c8e70ec89ff5..a1485e74e77f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;  static volatile uint8_t
> writer_done;  static volatile uint32_t thr_id;  static uint64_t gwrite_cycles;
> +static uint32_t single_insert;
>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
> static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
> 
> @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)  {
>  	unsigned int i, j, si, ei;
>  	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>  	uint32_t next_hop_add = 0xAA;
> +	bool single_writer = (single_insert ==
> NUM_LDEPTH_ROUTE_ENTRIES) ?
> +				true : false;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
> 
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = pos_core * single_insert;
> +	ei = si + single_insert;
> 
>  	/* Measure add/delete. */
>  	begin = rte_rdtsc_precise();
>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>  		/* Add all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>  					large_ldepth_route_table[j].depth,
>  					next_hop_add) != 0) {
> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  					i, j);
>  				goto error;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
> 
>  		/* Delete all the entries */
>  		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>  			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
>  				large_ldepth_route_table[j].depth) != 0) {
>  				printf("Failed to delete iteration %d, route#
> %d\n",
>  					i, j);
>  				goto error;
>  			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>  		}
>  	}
> 
> @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>  	return 0;
> 
>  error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (!single_writer)
> +		pthread_mutex_unlock(&lpm_mutex);
>  	return -1;
>  }
> 
>  /*
>   * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>   */
>  static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>  {
>  	struct rte_lpm_config config;
>  	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>  	uint16_t core_id;
>  	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
> 
>  	if (rte_lcore_count() < 3) {
>  		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 3\n"); @@ -506,273 +509,79 @@
> test_lpm_rcu_perf_multi_writer(void)
>  		num_cores++;
>  	}
> 
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration
> disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest,
> expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Calculate writes by each writer */
> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +
> 	RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>  				goto error;
>  			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> 
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
> 
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> 
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration
> disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> 
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
> 
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
> 
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route#
> %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>  				goto error;
> -			}
> 
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm,
> large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route#
> %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles,
> __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>  	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> 
>  	return 0;
> 
> @@ -948,10 +757,10 @@ test_lpm_perf(void)
>  	rte_lpm_delete_all(lpm);
>  	rte_lpm_free(lpm);
> 
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>  		return -1;
> 
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>  		return -1;
> 
>  	return 0;
> --
> 2.17.1
  
Vladimir Medvedkin Nov. 4, 2020, 3:46 p.m. UTC | #2
Hi Thakkar,

On 03/11/2020 22:23, Dharmik Thakkar wrote:
> Avoid code duplication by combining single and multi threaded tests
> 
> Also, enable support for more than 2 writers
> 
> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> ---
>   app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>   1 file changed, 84 insertions(+), 275 deletions(-)
> 
> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
> index c8e70ec89ff5..a1485e74e77f 100644
> --- a/app/test/test_lpm_perf.c
> +++ b/app/test/test_lpm_perf.c
> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>   static volatile uint8_t writer_done;
>   static volatile uint32_t thr_id;
>   static uint64_t gwrite_cycles;
> +static uint32_t single_insert;
>   /* LPM APIs are not thread safe, use mutex to provide thread safety */
>   static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>   
> @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   {
>   	unsigned int i, j, si, ei;
>   	uint64_t begin, total_cycles;
> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>   	uint32_t next_hop_add = 0xAA;
> +	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
> +				true : false;
> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>   
> -	/* 2 writer threads are used */
> -	if (core_id % 2 == 0) {
> -		si = 0;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -	} else {
> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
> -	}
> +	si = pos_core * single_insert;
> +	ei = si + single_insert;
> 

In this case, given that you are doing
           "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;"
below, the number of ldepth_routes must be a multiple of the number of 
writers, so some number of routes can be skipped in the opposite case. 
Consider something like:

number_of_writers = j;
...
si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;


>   	/* Measure add/delete. */
>   	begin = rte_rdtsc_precise();
>   	for (i = 0; i < RCU_ITERATIONS; i++) {
>   		/* Add all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>   					large_ldepth_route_table[j].depth,
>   					next_hop_add) != 0) {
> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   
>   		/* Delete all the entries */
>   		for (j = si; j < ei; j++) {
> -			pthread_mutex_lock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_lock(&lpm_mutex);
>   			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>   				large_ldepth_route_table[j].depth) != 0) {
>   				printf("Failed to delete iteration %d, route# %d\n",
>   					i, j);
>   				goto error;
>   			}
> -			pthread_mutex_unlock(&lpm_mutex);
> +			if (!single_writer)
> +				pthread_mutex_unlock(&lpm_mutex);
>   		}
>   	}
>   
> @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>   	return 0;
>   
>   error:
> -	pthread_mutex_unlock(&lpm_mutex);
> +	if (!single_writer)
> +		pthread_mutex_unlock(&lpm_mutex);
>   	return -1;
>   }
>   
>   /*
>    * Functional test:
> - * 2 writers, rest are readers
> + * 1/2 writers, rest are readers
>    */
>   static int
> -test_lpm_rcu_perf_multi_writer(void)
> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>   {
>   	struct rte_lpm_config config;
>   	size_t sz;
> -	unsigned int i;
> +	unsigned int i, j;
>   	uint16_t core_id;
>   	struct rte_lpm_rcu_config rcu_cfg = {0};
> +	int (*reader_f)(void *arg) = NULL;
>   
>   	if (rte_lcore_count() < 3) {
>   		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
> @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
>   		num_cores++;
>   	}
>   
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> -
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
> -		num_cores - 2);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	writer_done = 0;
> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Launch writer threads */
> -	for (i = 0; i < 2; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> -					(void *)(uintptr_t)i,
> -					enabled_core_ids[i]);
> -
> -	/* Wait for writer threads */
> -	for (i = 0; i < 2; i++)
> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
> -			goto error;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> -		/ TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 2; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -
> -	return 0;
> -
> -error:
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	rte_eal_mp_wait_lcore();
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -
> -	return -1;
> -}
> -
> -/*
> - * Functional test:
> - * Single writer, rest are readers
> - */
> -static int
> -test_lpm_rcu_perf(void)
> -{
> -	struct rte_lpm_config config;
> -	uint64_t begin, total_cycles;
> -	size_t sz;
> -	unsigned int i, j;
> -	uint16_t core_id;
> -	uint32_t next_hop_add = 0xAA;
> -	struct rte_lpm_rcu_config rcu_cfg = {0};
> -
> -	if (rte_lcore_count() < 2) {
> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
> -		return TEST_SKIPPED;
> -	}
> -
> -	num_cores = 0;
> -	RTE_LCORE_FOREACH_WORKER(core_id) {
> -		enabled_core_ids[num_cores] = core_id;
> -		num_cores++;
> -	}
> -
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> -
> -	/* Init RCU variable */
> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> -						RTE_CACHE_LINE_SIZE);
> -	rte_rcu_qsbr_init(rv, num_cores);
> -
> -	rcu_cfg.v = rv;
> -	/* Assign the RCU variable to LPM */
> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> -		printf("RCU variable assignment failed\n");
> -		goto error;
> -	}
> -
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> -
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
> -					enabled_core_ids[i]);
> -
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> -
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> +	for (j = 1; j < 3; j++) {
> +		if (use_rcu)
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration enabled\n", j, num_cores - j);
> +		else
> +			printf("\nPerf test: %d writer(s), %d reader(s),"
> +			       " RCU integration disabled\n", j, num_cores - j);
> +
> +		/* Calculate writes by each writer */
> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
> +
> +		/* Create LPM table */
> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> +		config.flags = 0;
> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> +		TEST_LPM_ASSERT(lpm != NULL);
> +
> +		/* Init RCU variable */
> +		if (use_rcu) {
> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
> +							RTE_CACHE_LINE_SIZE);
> +			rte_rcu_qsbr_init(rv, num_cores);
> +
> +			rcu_cfg.v = rv;
> +			/* Assign the RCU variable to LPM */
> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
> +				printf("RCU variable assignment failed\n");
>   				goto error;
>   			}
> -	}
> -	total_cycles = rte_rdtsc_precise() - begin;
>   
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> +			reader_f = test_lpm_rcu_qsbr_reader;
> +		} else
> +			reader_f = test_lpm_reader;
>   
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
> -	rte_free(rv);
> -	lpm = NULL;
> -	rv = NULL;
> +		writer_done = 0;
> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>   
> -	/* Test without RCU integration */
> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
> -		num_cores);
> -
> -	/* Create LPM table */
> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
> -	config.flags = 0;
> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
> -	TEST_LPM_ASSERT(lpm != NULL);
> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>   
> -	writer_done = 0;
> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
> +		/* Launch reader threads */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_remote_launch(reader_f, NULL,
> +						enabled_core_ids[i]);
>   
> -	/* Launch reader threads */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_remote_launch(test_lpm_reader, NULL,
> -					enabled_core_ids[i]);
> +		/* Launch writer threads */
> +		for (i = 0; i < j; i++)
> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
> +						(void *)(uintptr_t)i,
> +						enabled_core_ids[i]);
>   
> -	/* Measure add/delete. */
> -	begin = rte_rdtsc_precise();
> -	for (i = 0; i < RCU_ITERATIONS; i++) {
> -		/* Add all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
> -					large_ldepth_route_table[j].depth,
> -					next_hop_add) != 0) {
> -				printf("Failed to add iteration %d, route# %d\n",
> -					i, j);
> +		/* Wait for writer threads */
> +		for (i = 0; i < j; i++)
> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>   				goto error;
> -			}
>   
> -		/* Delete all the entries */
> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
> -				large_ldepth_route_table[j].depth) != 0) {
> -				printf("Failed to delete iteration %d, route# %d\n",
> -					i, j);
> -				goto error;
> -			}
> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
> +			/ TOTAL_WRITES);
> +
> +		writer_done = 1;
> +		/* Wait until all readers have exited */
> +		for (i = j; i < num_cores; i++)
> +			rte_eal_wait_lcore(enabled_core_ids[i]);
> +
> +		rte_lpm_free(lpm);
> +		rte_free(rv);
> +		lpm = NULL;
> +		rv = NULL;
>   	}
> -	total_cycles = rte_rdtsc_precise() - begin;
> -
> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
> -	printf("Average LPM Add/Del: %g cycles\n",
> -		(double)total_cycles / TOTAL_WRITES);
> -
> -	writer_done = 1;
> -	/* Wait until all readers have exited */
> -	for (i = 0; i < num_cores; i++)
> -		rte_eal_wait_lcore(enabled_core_ids[i]);
> -
> -	rte_lpm_free(lpm);
>   
>   	return 0;
>   
> @@ -948,10 +757,10 @@ test_lpm_perf(void)
>   	rte_lpm_delete_all(lpm);
>   	rte_lpm_free(lpm);
>   
> -	if (test_lpm_rcu_perf() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>   		return -1;
>   
> -	if (test_lpm_rcu_perf_multi_writer() < 0)
> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>   		return -1;
>   
>   	return 0;
>
  
Dharmik Thakkar Nov. 4, 2020, 4:49 p.m. UTC | #3
> On Nov 4, 2020, at 9:46 AM, Medvedkin, Vladimir <vladimir.medvedkin@intel.com> wrote:
> 
> Hi Thakkar,
> 
> On 03/11/2020 22:23, Dharmik Thakkar wrote:
>> Avoid code duplication by combining single and multi threaded tests
>> Also, enable support for more than 2 writers
>> Signed-off-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
>> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
>> ---
>>  app/test/test_lpm_perf.c | 359 +++++++++------------------------------
>>  1 file changed, 84 insertions(+), 275 deletions(-)
>> diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
>> index c8e70ec89ff5..a1485e74e77f 100644
>> --- a/app/test/test_lpm_perf.c
>> +++ b/app/test/test_lpm_perf.c
>> @@ -23,6 +23,7 @@ static struct rte_rcu_qsbr *rv;
>>  static volatile uint8_t writer_done;
>>  static volatile uint32_t thr_id;
>>  static uint64_t gwrite_cycles;
>> +static uint32_t single_insert;
>>  /* LPM APIs are not thread safe, use mutex to provide thread safety */
>>  static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
>>  @@ -430,24 +431,21 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  {
>>  	unsigned int i, j, si, ei;
>>  	uint64_t begin, total_cycles;
>> -	uint8_t core_id = (uint8_t)((uintptr_t)arg);
>>  	uint32_t next_hop_add = 0xAA;
>> +	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
>> +				true : false;
>> +	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
>>  -	/* 2 writer threads are used */
>> -	if (core_id % 2 == 0) {
>> -		si = 0;
>> -		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> -	} else {
>> -		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
>> -		ei = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	}
>> +	si = pos_core * single_insert;
>> +	ei = si + single_insert;
> 
> In this case, given that you are doing
>          "single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;"
> below, the number of ldepth_routes must be a multiple of the number of writers, so some number of routes can be skipped in the opposite case. Consider something like:
> 
> number_of_writers = j;
> ...
> si = (pos_core * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
> ei = ((pos_core + 1 ) * NUM_LDEPTH_ROUTE_ENTRIES)/number_of_writers;
> 

Yes, agreed some routes can be skipped. I will update the patch with the above changes. Thanks!

> 
>>  	/* Measure add/delete. */
>>  	begin = rte_rdtsc_precise();
>>  	for (i = 0; i < RCU_ITERATIONS; i++) {
>>  		/* Add all the entries */
>>  		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_lock(&lpm_mutex);
>>  			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>>  					large_ldepth_route_table[j].depth,
>>  					next_hop_add) != 0) {
>> @@ -455,19 +453,22 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  					i, j);
>>  				goto error;
>>  			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_unlock(&lpm_mutex);
>>  		}
>>    		/* Delete all the entries */
>>  		for (j = si; j < ei; j++) {
>> -			pthread_mutex_lock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_lock(&lpm_mutex);
>>  			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>>  				large_ldepth_route_table[j].depth) != 0) {
>>  				printf("Failed to delete iteration %d, route# %d\n",
>>  					i, j);
>>  				goto error;
>>  			}
>> -			pthread_mutex_unlock(&lpm_mutex);
>> +			if (!single_writer)
>> +				pthread_mutex_unlock(&lpm_mutex);
>>  		}
>>  	}
>>  @@ -478,22 +479,24 @@ test_lpm_rcu_qsbr_writer(void *arg)
>>  	return 0;
>>    error:
>> -	pthread_mutex_unlock(&lpm_mutex);
>> +	if (!single_writer)
>> +		pthread_mutex_unlock(&lpm_mutex);
>>  	return -1;
>>  }
>>    /*
>>   * Functional test:
>> - * 2 writers, rest are readers
>> + * 1/2 writers, rest are readers
>>   */
>>  static int
>> -test_lpm_rcu_perf_multi_writer(void)
>> +test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
>>  {
>>  	struct rte_lpm_config config;
>>  	size_t sz;
>> -	unsigned int i;
>> +	unsigned int i, j;
>>  	uint16_t core_id;
>>  	struct rte_lpm_rcu_config rcu_cfg = {0};
>> +	int (*reader_f)(void *arg) = NULL;
>>    	if (rte_lcore_count() < 3) {
>>  		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
>> @@ -506,273 +509,79 @@ test_lpm_rcu_perf_multi_writer(void)
>>  		num_cores++;
>>  	}
>>  -	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> -
>> -	/* Test without RCU integration */
>> -	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
>> -		num_cores - 2);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Launch writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> -					(void *)(uintptr_t)i,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Wait for writer threads */
>> -	for (i = 0; i < 2; i++)
>> -		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>> -			goto error;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> -		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> -		/ TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 2; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -
>> -	return 0;
>> -
>> -error:
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	rte_eal_mp_wait_lcore();
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -
>> -	return -1;
>> -}
>> -
>> -/*
>> - * Functional test:
>> - * Single writer, rest are readers
>> - */
>> -static int
>> -test_lpm_rcu_perf(void)
>> -{
>> -	struct rte_lpm_config config;
>> -	uint64_t begin, total_cycles;
>> -	size_t sz;
>> -	unsigned int i, j;
>> -	uint16_t core_id;
>> -	uint32_t next_hop_add = 0xAA;
>> -	struct rte_lpm_rcu_config rcu_cfg = {0};
>> -
>> -	if (rte_lcore_count() < 2) {
>> -		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
>> -		return TEST_SKIPPED;
>> -	}
>> -
>> -	num_cores = 0;
>> -	RTE_LCORE_FOREACH_WORKER(core_id) {
>> -		enabled_core_ids[num_cores] = core_id;
>> -		num_cores++;
>> -	}
>> -
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> -
>> -	/* Init RCU variable */
>> -	sz = rte_rcu_qsbr_get_memsize(num_cores);
>> -	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> -						RTE_CACHE_LINE_SIZE);
>> -	rte_rcu_qsbr_init(rv, num_cores);
>> -
>> -	rcu_cfg.v = rv;
>> -	/* Assign the RCU variable to LPM */
>> -	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> -		printf("RCU variable assignment failed\n");
>> -		goto error;
>> -	}
>> -
>> -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> -
>> -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
>> -					enabled_core_ids[i]);
>> -
>> -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> -
>> -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> +	for (j = 1; j < 3; j++) {
>> +		if (use_rcu)
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration enabled\n", j, num_cores - j);
>> +		else
>> +			printf("\nPerf test: %d writer(s), %d reader(s),"
>> +			       " RCU integration disabled\n", j, num_cores - j);
>> +
>> +		/* Calculate writes by each writer */
>> +		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
>> +
>> +		/* Create LPM table */
>> +		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> +		config.flags = 0;
>> +		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> +		TEST_LPM_ASSERT(lpm != NULL);
>> +
>> +		/* Init RCU variable */
>> +		if (use_rcu) {
>> +			sz = rte_rcu_qsbr_get_memsize(num_cores);
>> +			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
>> +							RTE_CACHE_LINE_SIZE);
>> +			rte_rcu_qsbr_init(rv, num_cores);
>> +
>> +			rcu_cfg.v = rv;
>> +			/* Assign the RCU variable to LPM */
>> +			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
>> +				printf("RCU variable assignment failed\n");
>>  				goto error;
>>  			}
>> -	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>>  -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> +			reader_f = test_lpm_rcu_qsbr_reader;
>> +		} else
>> +			reader_f = test_lpm_reader;
>>  -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>> -	rte_free(rv);
>> -	lpm = NULL;
>> -	rv = NULL;
>> +		writer_done = 0;
>> +		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
>>  -	/* Test without RCU integration */
>> -	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
>> -		num_cores);
>> -
>> -	/* Create LPM table */
>> -	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
>> -	config.flags = 0;
>> -	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
>> -	TEST_LPM_ASSERT(lpm != NULL);
>> +		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>>  -	writer_done = 0;
>> -	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
>> +		/* Launch reader threads */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_remote_launch(reader_f, NULL,
>> +						enabled_core_ids[i]);
>>  -	/* Launch reader threads */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_remote_launch(test_lpm_reader, NULL,
>> -					enabled_core_ids[i]);
>> +		/* Launch writer threads */
>> +		for (i = 0; i < j; i++)
>> +			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
>> +						(void *)(uintptr_t)i,
>> +						enabled_core_ids[i]);
>>  -	/* Measure add/delete. */
>> -	begin = rte_rdtsc_precise();
>> -	for (i = 0; i < RCU_ITERATIONS; i++) {
>> -		/* Add all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
>> -					large_ldepth_route_table[j].depth,
>> -					next_hop_add) != 0) {
>> -				printf("Failed to add iteration %d, route# %d\n",
>> -					i, j);
>> +		/* Wait for writer threads */
>> +		for (i = 0; i < j; i++)
>> +			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
>>  				goto error;
>> -			}
>>  -		/* Delete all the entries */
>> -		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
>> -			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
>> -				large_ldepth_route_table[j].depth) != 0) {
>> -				printf("Failed to delete iteration %d, route# %d\n",
>> -					i, j);
>> -				goto error;
>> -			}
>> +		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> +		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> +		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
>> +			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
>> +			/ TOTAL_WRITES);
>> +
>> +		writer_done = 1;
>> +		/* Wait until all readers have exited */
>> +		for (i = j; i < num_cores; i++)
>> +			rte_eal_wait_lcore(enabled_core_ids[i]);
>> +
>> +		rte_lpm_free(lpm);
>> +		rte_free(rv);
>> +		lpm = NULL;
>> +		rv = NULL;
>>  	}
>> -	total_cycles = rte_rdtsc_precise() - begin;
>> -
>> -	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
>> -	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
>> -	printf("Average LPM Add/Del: %g cycles\n",
>> -		(double)total_cycles / TOTAL_WRITES);
>> -
>> -	writer_done = 1;
>> -	/* Wait until all readers have exited */
>> -	for (i = 0; i < num_cores; i++)
>> -		rte_eal_wait_lcore(enabled_core_ids[i]);
>> -
>> -	rte_lpm_free(lpm);
>>    	return 0;
>>  @@ -948,10 +757,10 @@ test_lpm_perf(void)
>>  	rte_lpm_delete_all(lpm);
>>  	rte_lpm_free(lpm);
>>  -	if (test_lpm_rcu_perf() < 0)
>> +	if (test_lpm_rcu_perf_multi_writer(0) < 0)
>>  		return -1;
>>  -	if (test_lpm_rcu_perf_multi_writer() < 0)
>> +	if (test_lpm_rcu_perf_multi_writer(1) < 0)
>>  		return -1;
>>    	return 0;
> 
> -- 
> Regards,
> Vladimir
  

Patch

diff --git a/app/test/test_lpm_perf.c b/app/test/test_lpm_perf.c
index c8e70ec89ff5..a1485e74e77f 100644
--- a/app/test/test_lpm_perf.c
+++ b/app/test/test_lpm_perf.c
@@ -23,6 +23,7 @@  static struct rte_rcu_qsbr *rv;
 static volatile uint8_t writer_done;
 static volatile uint32_t thr_id;
 static uint64_t gwrite_cycles;
+static uint32_t single_insert;
 /* LPM APIs are not thread safe, use mutex to provide thread safety */
 static pthread_mutex_t lpm_mutex = PTHREAD_MUTEX_INITIALIZER;
 
@@ -430,24 +431,21 @@  test_lpm_rcu_qsbr_writer(void *arg)
 {
 	unsigned int i, j, si, ei;
 	uint64_t begin, total_cycles;
-	uint8_t core_id = (uint8_t)((uintptr_t)arg);
 	uint32_t next_hop_add = 0xAA;
+	bool single_writer = (single_insert == NUM_LDEPTH_ROUTE_ENTRIES) ?
+				true : false;
+	uint8_t pos_core = (uint8_t)((uintptr_t)arg);
 
-	/* 2 writer threads are used */
-	if (core_id % 2 == 0) {
-		si = 0;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-	} else {
-		si = NUM_LDEPTH_ROUTE_ENTRIES / 2;
-		ei = NUM_LDEPTH_ROUTE_ENTRIES;
-	}
+	si = pos_core * single_insert;
+	ei = si + single_insert;
 
 	/* Measure add/delete. */
 	begin = rte_rdtsc_precise();
 	for (i = 0; i < RCU_ITERATIONS; i++) {
 		/* Add all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
 					large_ldepth_route_table[j].depth,
 					next_hop_add) != 0) {
@@ -455,19 +453,22 @@  test_lpm_rcu_qsbr_writer(void *arg)
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 
 		/* Delete all the entries */
 		for (j = si; j < ei; j++) {
-			pthread_mutex_lock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_lock(&lpm_mutex);
 			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
 				large_ldepth_route_table[j].depth) != 0) {
 				printf("Failed to delete iteration %d, route# %d\n",
 					i, j);
 				goto error;
 			}
-			pthread_mutex_unlock(&lpm_mutex);
+			if (!single_writer)
+				pthread_mutex_unlock(&lpm_mutex);
 		}
 	}
 
@@ -478,22 +479,24 @@  test_lpm_rcu_qsbr_writer(void *arg)
 	return 0;
 
 error:
-	pthread_mutex_unlock(&lpm_mutex);
+	if (!single_writer)
+		pthread_mutex_unlock(&lpm_mutex);
 	return -1;
 }
 
 /*
  * Functional test:
- * 2 writers, rest are readers
+ * 1/2 writers, rest are readers
  */
 static int
-test_lpm_rcu_perf_multi_writer(void)
+test_lpm_rcu_perf_multi_writer(uint8_t use_rcu)
 {
 	struct rte_lpm_config config;
 	size_t sz;
-	unsigned int i;
+	unsigned int i, j;
 	uint16_t core_id;
 	struct rte_lpm_rcu_config rcu_cfg = {0};
+	int (*reader_f)(void *arg) = NULL;
 
 	if (rte_lcore_count() < 3) {
 		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 3\n");
@@ -506,273 +509,79 @@  test_lpm_rcu_perf_multi_writer(void)
 		num_cores++;
 	}
 
-	printf("\nPerf test: 2 writers, %d readers, RCU integration enabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
-
-	/* Test without RCU integration */
-	printf("\nPerf test: 2 writers, %d readers, RCU integration disabled\n",
-		num_cores - 2);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	writer_done = 0;
-	__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Launch writer threads */
-	for (i = 0; i < 2; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
-					(void *)(uintptr_t)i,
-					enabled_core_ids[i]);
-
-	/* Wait for writer threads */
-	for (i = 0; i < 2; i++)
-		if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
-			goto error;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %"PRIu64" cycles\n",
-		__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
-		/ TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 2; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-
-	return 0;
-
-error:
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	rte_eal_mp_wait_lcore();
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-
-	return -1;
-}
-
-/*
- * Functional test:
- * Single writer, rest are readers
- */
-static int
-test_lpm_rcu_perf(void)
-{
-	struct rte_lpm_config config;
-	uint64_t begin, total_cycles;
-	size_t sz;
-	unsigned int i, j;
-	uint16_t core_id;
-	uint32_t next_hop_add = 0xAA;
-	struct rte_lpm_rcu_config rcu_cfg = {0};
-
-	if (rte_lcore_count() < 2) {
-		printf("Not enough cores for lpm_rcu_perf_autotest, expecting at least 2\n");
-		return TEST_SKIPPED;
-	}
-
-	num_cores = 0;
-	RTE_LCORE_FOREACH_WORKER(core_id) {
-		enabled_core_ids[num_cores] = core_id;
-		num_cores++;
-	}
-
-	printf("\nPerf test: 1 writer, %d readers, RCU integration enabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
-
-	/* Init RCU variable */
-	sz = rte_rcu_qsbr_get_memsize(num_cores);
-	rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
-						RTE_CACHE_LINE_SIZE);
-	rte_rcu_qsbr_init(rv, num_cores);
-
-	rcu_cfg.v = rv;
-	/* Assign the RCU variable to LPM */
-	if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
-		printf("RCU variable assignment failed\n");
-		goto error;
-	}
-
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
-
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_rcu_qsbr_reader, NULL,
-					enabled_core_ids[i]);
-
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
-
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
+	for (j = 1; j < 3; j++) {
+		if (use_rcu)
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration enabled\n", j, num_cores - j);
+		else
+			printf("\nPerf test: %d writer(s), %d reader(s),"
+			       " RCU integration disabled\n", j, num_cores - j);
+
+		/* Calculate writes by each writer */
+		single_insert = NUM_LDEPTH_ROUTE_ENTRIES / j;
+
+		/* Create LPM table */
+		config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
+		config.flags = 0;
+		lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
+		TEST_LPM_ASSERT(lpm != NULL);
+
+		/* Init RCU variable */
+		if (use_rcu) {
+			sz = rte_rcu_qsbr_get_memsize(num_cores);
+			rv = (struct rte_rcu_qsbr *)rte_zmalloc("rcu0", sz,
+							RTE_CACHE_LINE_SIZE);
+			rte_rcu_qsbr_init(rv, num_cores);
+
+			rcu_cfg.v = rv;
+			/* Assign the RCU variable to LPM */
+			if (rte_lpm_rcu_qsbr_add(lpm, &rcu_cfg) != 0) {
+				printf("RCU variable assignment failed\n");
 				goto error;
 			}
-	}
-	total_cycles = rte_rdtsc_precise() - begin;
 
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
+			reader_f = test_lpm_rcu_qsbr_reader;
+		} else
+			reader_f = test_lpm_reader;
 
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
-	rte_free(rv);
-	lpm = NULL;
-	rv = NULL;
+		writer_done = 0;
+		__atomic_store_n(&gwrite_cycles, 0, __ATOMIC_RELAXED);
 
-	/* Test without RCU integration */
-	printf("\nPerf test: 1 writer, %d readers, RCU integration disabled\n",
-		num_cores);
-
-	/* Create LPM table */
-	config.max_rules = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.number_tbl8s = NUM_LDEPTH_ROUTE_ENTRIES;
-	config.flags = 0;
-	lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config);
-	TEST_LPM_ASSERT(lpm != NULL);
+		__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
 
-	writer_done = 0;
-	__atomic_store_n(&thr_id, 0, __ATOMIC_SEQ_CST);
+		/* Launch reader threads */
+		for (i = j; i < num_cores; i++)
+			rte_eal_remote_launch(reader_f, NULL,
+						enabled_core_ids[i]);
 
-	/* Launch reader threads */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_remote_launch(test_lpm_reader, NULL,
-					enabled_core_ids[i]);
+		/* Launch writer threads */
+		for (i = 0; i < j; i++)
+			rte_eal_remote_launch(test_lpm_rcu_qsbr_writer,
+						(void *)(uintptr_t)i,
+						enabled_core_ids[i]);
 
-	/* Measure add/delete. */
-	begin = rte_rdtsc_precise();
-	for (i = 0; i < RCU_ITERATIONS; i++) {
-		/* Add all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_add(lpm, large_ldepth_route_table[j].ip,
-					large_ldepth_route_table[j].depth,
-					next_hop_add) != 0) {
-				printf("Failed to add iteration %d, route# %d\n",
-					i, j);
+		/* Wait for writer threads */
+		for (i = 0; i < j; i++)
+			if (rte_eal_wait_lcore(enabled_core_ids[i]) < 0)
 				goto error;
-			}
 
-		/* Delete all the entries */
-		for (j = 0; j < NUM_LDEPTH_ROUTE_ENTRIES; j++)
-			if (rte_lpm_delete(lpm, large_ldepth_route_table[j].ip,
-				large_ldepth_route_table[j].depth) != 0) {
-				printf("Failed to delete iteration %d, route# %d\n",
-					i, j);
-				goto error;
-			}
+		printf("Total LPM Adds: %d\n", TOTAL_WRITES);
+		printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
+		printf("Average LPM Add/Del: %"PRIu64" cycles\n",
+			__atomic_load_n(&gwrite_cycles, __ATOMIC_RELAXED)
+			/ TOTAL_WRITES);
+
+		writer_done = 1;
+		/* Wait until all readers have exited */
+		for (i = j; i < num_cores; i++)
+			rte_eal_wait_lcore(enabled_core_ids[i]);
+
+		rte_lpm_free(lpm);
+		rte_free(rv);
+		lpm = NULL;
+		rv = NULL;
 	}
-	total_cycles = rte_rdtsc_precise() - begin;
-
-	printf("Total LPM Adds: %d\n", TOTAL_WRITES);
-	printf("Total LPM Deletes: %d\n", TOTAL_WRITES);
-	printf("Average LPM Add/Del: %g cycles\n",
-		(double)total_cycles / TOTAL_WRITES);
-
-	writer_done = 1;
-	/* Wait until all readers have exited */
-	for (i = 0; i < num_cores; i++)
-		rte_eal_wait_lcore(enabled_core_ids[i]);
-
-	rte_lpm_free(lpm);
 
 	return 0;
 
@@ -948,10 +757,10 @@  test_lpm_perf(void)
 	rte_lpm_delete_all(lpm);
 	rte_lpm_free(lpm);
 
-	if (test_lpm_rcu_perf() < 0)
+	if (test_lpm_rcu_perf_multi_writer(0) < 0)
 		return -1;
 
-	if (test_lpm_rcu_perf_multi_writer() < 0)
+	if (test_lpm_rcu_perf_multi_writer(1) < 0)
 		return -1;
 
 	return 0;