[v3,6/6] app/regex: replace Linux clock() API with rdtsc

Message ID 20210110111023.9525-7-ophirmu@nvidia.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series regex multi Q with multi cores support |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-abi-testing success Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-testing success Testing PASS

Commit Message

Ophir Munk Jan. 10, 2021, 11:10 a.m. UTC
  Performance measurement (elapsed time and Gbps) are based on Linux
clock() API. The resolution is improved by replacing the clock() API
with rte_rdtsc_precise() API.

Signed-off-by: Ophir Munk <ophirmu@nvidia.com>
Acked-by: Ori Kam <orika@nvidia.com>
---
 app/test-regex/main.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)
  

Patch

diff --git a/app/test-regex/main.c b/app/test-regex/main.c
index 2fce55d..aea4fa6 100644
--- a/app/test-regex/main.c
+++ b/app/test-regex/main.c
@@ -48,8 +48,8 @@  struct qp_params {
 	struct rte_regex_ops **ops;
 	struct job_ctx *jobs_ctx;
 	char *buf;
-	time_t start;
-	time_t end;
+	uint64_t start;
+	uint64_t cycles;
 };
 
 struct qps_per_lcore {
@@ -326,7 +326,7 @@  run_regex(void *args)
 	unsigned long d_ind = 0;
 	struct rte_mbuf_ext_shared_info shinfo;
 	int res = 0;
-	double time;
+	long double time;
 	struct rte_mempool *mbuf_mp;
 	struct qp_params *qp;
 	struct qp_params *qps = NULL;
@@ -419,7 +419,7 @@  run_regex(void *args)
 		qp->buf = buf;
 		qp->total_matches = 0;
 		qp->start = 0;
-		qp->end = 0;
+		qp->cycles = 0;
 	}
 
 	for (i = 0; i < nb_iterations; i++) {
@@ -432,9 +432,8 @@  run_regex(void *args)
 			update = false;
 			for (qp_id = 0; qp_id < nb_qps; qp_id++) {
 				qp = &qps[qp_id];
-				if (!qp->start)
-					qp->start = clock();
 				if (qp->total_dequeue < actual_jobs) {
+					qp->start = rte_rdtsc_precise();
 					struct rte_regex_ops **
 						cur_ops_to_enqueue = qp->ops +
 						qp->total_enqueue;
@@ -463,25 +462,21 @@  run_regex(void *args)
 							cur_ops_to_dequeue,
 							qp->total_enqueue -
 							qp->total_dequeue);
+					qp->cycles +=
+					     (rte_rdtsc_precise() - qp->start);
 					update = true;
-				} else {
-					if (!qp->end)
-						qp->end = clock();
 				}
-
 			}
 		} while (update);
 	}
 	for (qp_id = 0; qp_id < nb_qps; qp_id++) {
 		qp = &qps[qp_id];
-		time = ((double)qp->end - qp->start) / CLOCKS_PER_SEC;
-		printf("Core=%u QP=%u\n", rte_lcore_id(), qp_id + qp_id_base);
-		printf("Job len = %ld Bytes\n",  job_len);
-		printf("Time = %lf sec\n",  time);
-		printf("Perf = %lf Gbps\n\n",
-				(((double)actual_jobs * job_len *
-				nb_iterations * 8) / time) /
-				1000000000.0);
+		time = (long double)qp->cycles / rte_get_timer_hz();
+		printf("Core=%u QP=%u Job=%ld Bytes Time=%Lf sec Perf=%Lf "
+		       "Gbps\n", rte_lcore_id(), qp_id + qp_id_base,
+		       job_len, time,
+		       (((double)actual_jobs * job_len * nb_iterations * 8)
+		       / time) / 1000000000.0);
 	}
 
 	if (rgxc->perf_mode)