test/compress: add cycle-count mode to the perf tool
diff mbox series

Message ID 20191211155000.26610-1-arturx.trybula@intel.com
State Accepted, archived
Delegated to: akhil goyal
Headers show
Series
  • test/compress: add cycle-count mode to the perf tool
Related show

Checks

Context Check Description
ci/Intel-compilation success Compilation OK
ci/travis-robot warning Travis build: failed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/checkpatch success coding style OK

Commit Message

Artur Trybula Dec. 11, 2019, 3:50 p.m. UTC
This commit adds cycle-count mode to the compression perf tool.
The new mode enhances the compression performance tool to allow
cycle-count measurement of both hardware and softwate PMDs.

Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
---
 app/test-compress-perf/Makefile               |   3 +-
 app/test-compress-perf/comp_perf.h            |   6 +-
 app/test-compress-perf/comp_perf_options.h    |   7 +-
 .../comp_perf_options_parse.c                 |  35 +-
 .../comp_perf_test_common.c                   |  23 +-
 .../comp_perf_test_common.h                   |   2 +-
 .../comp_perf_test_cyclecount.c               | 614 ++++++++++++++++++
 .../comp_perf_test_cyclecount.h               |  24 +
 ...enchmark.c => comp_perf_test_throughput.c} |  10 +-
 ...enchmark.h => comp_perf_test_throughput.h} |   6 +-
 .../comp_perf_test_verify.c                   |   4 +-
 app/test-compress-perf/main.c                 |  65 +-
 app/test-compress-perf/meson.build            |   3 +-
 13 files changed, 755 insertions(+), 47 deletions(-)
 create mode 100644 app/test-compress-perf/comp_perf_test_cyclecount.c
 create mode 100644 app/test-compress-perf/comp_perf_test_cyclecount.h
 rename app/test-compress-perf/{comp_perf_test_benchmark.c => comp_perf_test_throughput.c} (97%)
 rename app/test-compress-perf/{comp_perf_test_benchmark.h => comp_perf_test_throughput.h} (80%)

Comments

Trahe, Fiona Jan. 13, 2020, 3:18 p.m. UTC | #1
> -----Original Message-----
> From: Trybula, ArturX <arturx.trybula@intel.com>
> Sent: Wednesday, December 11, 2019 3:50 PM
> To: dev@dpdk.org; Trahe, Fiona <fiona.trahe@intel.com>; shallyv@marvell.com; Dybkowski, AdamX
> <adamx.dybkowski@intel.com>; Danilewicz, MarcinX <marcinx.danilewicz@intel.com>; Trybula,
> ArturX <arturx.trybula@intel.com>; akhil.goyal@nxp.com
> Subject: [PATCH] test/compress: add cycle-count mode to the perf tool
> 
> This commit adds cycle-count mode to the compression perf tool.
> The new mode enhances the compression performance tool to allow
> cycle-count measurement of both hardware and softwate PMDs.
> 
> Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
Acked-by: Fiona Trahe <fiona.trahe@intel.com>
Akhil Goyal Jan. 28, 2020, 6:10 a.m. UTC | #2
> >
> > This commit adds cycle-count mode to the compression perf tool.
> > The new mode enhances the compression performance tool to allow
> > cycle-count measurement of both hardware and softwate PMDs.
> >
> > Signed-off-by: Artur Trybula <arturx.trybula@intel.com>
> Acked-by: Fiona Trahe <fiona.trahe@intel.com>

Applied to dpdk-next-crypto

Thanks.

Patch
diff mbox series

diff --git a/app/test-compress-perf/Makefile b/app/test-compress-perf/Makefile
index d1a6820e6..ad3b91d0a 100644
--- a/app/test-compress-perf/Makefile
+++ b/app/test-compress-perf/Makefile
@@ -13,7 +13,8 @@  CFLAGS += -O3
 SRCS-y := main.c
 SRCS-y += comp_perf_options_parse.c
 SRCS-y += comp_perf_test_verify.c
-SRCS-y += comp_perf_test_benchmark.c
+SRCS-y += comp_perf_test_throughput.c
+SRCS-y += comp_perf_test_cyclecount.c
 SRCS-y += comp_perf_test_common.c
 
 include $(RTE_SDK)/mk/rte.app.mk
diff --git a/app/test-compress-perf/comp_perf.h b/app/test-compress-perf/comp_perf.h
index 57289b07a..997d46b59 100644
--- a/app/test-compress-perf/comp_perf.h
+++ b/app/test-compress-perf/comp_perf.h
@@ -26,15 +26,15 @@  struct cperf_test {
 /* Needed for weak functions*/
 
 void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
 				 uint16_t qp_id __rte_unused,
 				 struct comp_test_data *options __rte_unused);
 
 void
-cperf_benchmark_test_destructor(void *arg __rte_unused);
+cperf_throughput_test_destructor(void *arg __rte_unused);
 
 int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused);
+cperf_throughput_test_runner(void *test_ctx __rte_unused);
 
 void *
 cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
diff --git a/app/test-compress-perf/comp_perf_options.h b/app/test-compress-perf/comp_perf_options.h
index 2c26511ef..0b777521c 100644
--- a/app/test-compress-perf/comp_perf_options.h
+++ b/app/test-compress-perf/comp_perf_options.h
@@ -24,8 +24,9 @@  enum cleanup_st {
 };
 
 enum cperf_test_type {
-	CPERF_TEST_TYPE_BENCHMARK,
-	CPERF_TEST_TYPE_VERIFY
+	CPERF_TEST_TYPE_THROUGHPUT,
+	CPERF_TEST_TYPE_VERIFY,
+	CPERF_TEST_TYPE_PMDCC
 };
 
 enum comp_operation {
@@ -68,6 +69,8 @@  struct comp_test_data {
 	double ratio;
 	enum cleanup_st cleanup;
 	int perf_comp_force_stop;
+
+	uint32_t cyclecount_delay;
 };
 
 int
diff --git a/app/test-compress-perf/comp_perf_options_parse.c b/app/test-compress-perf/comp_perf_options_parse.c
index 12d0a6caf..04a8d2fbe 100644
--- a/app/test-compress-perf/comp_perf_options_parse.c
+++ b/app/test-compress-perf/comp_perf_options_parse.c
@@ -30,6 +30,9 @@ 
 #define CPERF_WINDOW_SIZE	("window-sz")
 #define CPERF_EXTERNAL_MBUFS	("external-mbufs")
 
+/* cyclecount-specific options */
+#define CPERF_CYCLECOUNT_DELAY_US ("cc-delay-us")
+
 struct name_id_map {
 	const char *name;
 	uint32_t id;
@@ -39,7 +42,7 @@  static void
 usage(char *progname)
 {
 	printf("%s [EAL options] --\n"
-		" --ptest benchmark / verify :"
+		" --ptest throughput / verify / pmd-cyclecount\n"
 		" --driver-name NAME: compress driver to use\n"
 		" --input-file NAME: file to compress and decompress\n"
 		" --extended-input-sz N: extend file data up to this size (default: no extension)\n"
@@ -61,6 +64,8 @@  usage(char *progname)
 		"		(e.g.: 15 => 32k, default: max supported by PMD)\n"
 		" --external-mbufs: use memzones as external buffers instead of\n"
 		"		keeping the data directly in mbuf area\n"
+		" --cc-delay-us N: delay between enqueue and dequeue operations in microseconds\n"
+		"		valid only for cyclecount perf test (default: 500 us)\n"
 		" -h: prints this help\n",
 		progname);
 }
@@ -85,12 +90,16 @@  parse_cperf_test_type(struct comp_test_data *test_data, const char *arg)
 {
 	struct name_id_map cperftest_namemap[] = {
 		{
-			comp_perf_test_type_strs[CPERF_TEST_TYPE_BENCHMARK],
-			CPERF_TEST_TYPE_BENCHMARK
+			comp_perf_test_type_strs[CPERF_TEST_TYPE_THROUGHPUT],
+			CPERF_TEST_TYPE_THROUGHPUT
 		},
 		{
 			comp_perf_test_type_strs[CPERF_TEST_TYPE_VERIFY],
 			CPERF_TEST_TYPE_VERIFY
+		},
+		{
+			comp_perf_test_type_strs[CPERF_TEST_TYPE_PMDCC],
+			CPERF_TEST_TYPE_PMDCC
 		}
 	};
 
@@ -531,17 +540,28 @@  parse_external_mbufs(struct comp_test_data *test_data,
 	return 0;
 }
 
+static int
+parse_cyclecount_delay_us(struct comp_test_data *test_data,
+			const char *arg)
+{
+	int ret = parse_uint32_t(&(test_data->cyclecount_delay), arg);
+
+	if (ret) {
+		RTE_LOG(ERR, USER1, "Failed to parse cyclecount delay\n");
+		return -1;
+	}
+	return 0;
+}
+
 typedef int (*option_parser_t)(struct comp_test_data *test_data,
 		const char *arg);
 
 struct long_opt_parser {
 	const char *lgopt_name;
 	option_parser_t parser_fn;
-
 };
 
 static struct option lgopts[] = {
-
 	{ CPERF_PTEST_TYPE, required_argument, 0, 0 },
 	{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
 	{ CPERF_TEST_FILE, required_argument, 0, 0 },
@@ -556,6 +576,7 @@  static struct option lgopts[] = {
 	{ CPERF_LEVEL, required_argument, 0, 0 },
 	{ CPERF_WINDOW_SIZE, required_argument, 0, 0 },
 	{ CPERF_EXTERNAL_MBUFS, 0, 0, 0 },
+	{ CPERF_CYCLECOUNT_DELAY_US, required_argument, 0, 0 },
 	{ NULL, 0, 0, 0 }
 };
 
@@ -577,6 +598,7 @@  comp_perf_opts_parse_long(int opt_idx, struct comp_test_data *test_data)
 		{ CPERF_LEVEL,		parse_level },
 		{ CPERF_WINDOW_SIZE,	parse_window_sz },
 		{ CPERF_EXTERNAL_MBUFS,	parse_external_mbufs },
+		{ CPERF_CYCLECOUNT_DELAY_US,	parse_cyclecount_delay_us },
 	};
 	unsigned int i;
 
@@ -631,8 +653,9 @@  comp_perf_options_default(struct comp_test_data *test_data)
 	test_data->level_lst.min = RTE_COMP_LEVEL_MIN;
 	test_data->level_lst.max = RTE_COMP_LEVEL_MAX;
 	test_data->level_lst.inc = 1;
-	test_data->test = CPERF_TEST_TYPE_BENCHMARK;
+	test_data->test = CPERF_TEST_TYPE_THROUGHPUT;
 	test_data->use_external_mbufs = 0;
+	test_data->cyclecount_delay = 500;
 }
 
 int
diff --git a/app/test-compress-perf/comp_perf_test_common.c b/app/test-compress-perf/comp_perf_test_common.c
index 1b8985b43..b402a0d83 100644
--- a/app/test-compress-perf/comp_perf_test_common.c
+++ b/app/test-compress-perf/comp_perf_test_common.c
@@ -9,7 +9,8 @@ 
 
 #include "comp_perf.h"
 #include "comp_perf_options.h"
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_cyclecount.h"
 #include "comp_perf_test_common.h"
 #include "comp_perf_test_verify.h"
 
@@ -276,9 +277,11 @@  comp_perf_allocate_memory(struct comp_test_data *test_data,
 
 	snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
 			mem->dev_id, mem->qp_id);
+
+	/* one mempool for both src and dst mbufs */
 	mem->op_pool = rte_comp_op_pool_create(pool_name,
-				  mem->total_bufs,
-				  0, 0, rte_socket_id());
+				mem->total_bufs * 2,
+				0, 0, rte_socket_id());
 	if (mem->op_pool == NULL) {
 		RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
 		return -1;
@@ -495,20 +498,24 @@  prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
 }
 
 void
-print_test_dynamics(void)
+print_test_dynamics(const struct comp_test_data *test_data)
 {
 	uint32_t opt_total_segs = DIV_CEIL(buffer_info.input_data_sz,
 			MAX_SEG_SIZE);
 
 	if (buffer_info.total_buffs > 1) {
-		printf("\nWarning: for the current input parameters, number"
+		if (test_data->test == CPERF_TEST_TYPE_THROUGHPUT) {
+			printf("\nWarning: for the current input parameters, number"
 				" of ops is higher than one, which may result"
 				" in sub-optimal performance.\n");
-		printf("To improve the performance (for the current"
+			printf("To improve the performance (for the current"
 				" input data) following parameters are"
 				" suggested:\n");
-		printf("	* Segment size: %d\n", MAX_SEG_SIZE);
-		printf("	* Number of segments: %u\n", opt_total_segs);
+			printf("	* Segment size: %d\n",
+			       MAX_SEG_SIZE);
+			printf("	* Number of segments: %u\n",
+			       opt_total_segs);
+		}
 	} else if (buffer_info.total_buffs == 1) {
 		printf("\nInfo: there is only one op with %u segments -"
 				" the compression ratio is the best.\n",
diff --git a/app/test-compress-perf/comp_perf_test_common.h b/app/test-compress-perf/comp_perf_test_common.h
index 920642888..72705c6a2 100644
--- a/app/test-compress-perf/comp_perf_test_common.h
+++ b/app/test-compress-perf/comp_perf_test_common.h
@@ -49,6 +49,6 @@  int
 prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
 
 void
-print_test_dynamics(void);
+print_test_dynamics(const struct comp_test_data *test_data);
 
 #endif /* _COMP_PERF_TEST_COMMON_H_ */
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.c b/app/test-compress-perf/comp_perf_test_cyclecount.c
new file mode 100644
index 000000000..55559a7d5
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_cyclecount.c
@@ -0,0 +1,614 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_eal.h>
+#include <rte_log.h>
+#include <rte_cycles.h>
+#include "rte_spinlock.h"
+#include <rte_compressdev.h>
+
+#include "comp_perf_test_cyclecount.h"
+
+struct cperf_cyclecount_ctx {
+	struct cperf_verify_ctx ver;
+
+	uint32_t ops_enq_retries;
+	uint32_t ops_deq_retries;
+
+	uint64_t duration_op;
+	uint64_t duration_enq;
+	uint64_t duration_deq;
+};
+
+void
+cperf_cyclecount_test_destructor(void *arg)
+{
+	struct cperf_cyclecount_ctx *ctx = arg;
+
+	if (arg) {
+		comp_perf_free_memory(ctx->ver.options, &ctx->ver.mem);
+		rte_free(arg);
+	}
+}
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+		struct comp_test_data *options)
+{
+	struct cperf_cyclecount_ctx *ctx = NULL;
+
+	ctx = rte_malloc(NULL, sizeof(struct cperf_cyclecount_ctx), 0);
+
+	if (ctx == NULL)
+		return NULL;
+
+	ctx->ver.mem.dev_id = dev_id;
+	ctx->ver.mem.qp_id = qp_id;
+	ctx->ver.options = options;
+	ctx->ver.silent = 1; /* ver. part will be silent */
+
+	if (!comp_perf_allocate_memory(ctx->ver.options, &ctx->ver.mem)
+			&& !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
+		return ctx;
+
+	cperf_cyclecount_test_destructor(ctx);
+	return NULL;
+}
+
+static int
+cperf_cyclecount_op_setup(struct rte_comp_op **ops,
+				 struct cperf_cyclecount_ctx *ctx,
+				 struct rte_mbuf **input_bufs,
+				 struct rte_mbuf **output_bufs,
+				 void *priv_xform,
+				 uint32_t out_seg_sz)
+{
+	struct comp_test_data *test_data = ctx->ver.options;
+	struct cperf_mem_resources *mem = &ctx->ver.mem;
+
+	uint32_t i, iter, num_iter;
+	int res = 0;
+	uint16_t ops_needed;
+
+	num_iter = test_data->num_iter;
+
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t remaining_ops = mem->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			ops_needed = num_ops;
+
+			/* Allocate compression operations */
+			if (ops_needed && rte_mempool_get_bulk(
+						mem->op_pool,
+						(void **)ops,
+						ops_needed) != 0) {
+				RTE_LOG(ERR, USER1,
+				      "Cyclecount: could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+
+			for (i = 0; i < ops_needed; i++) {
+
+				/* Calculate next buffer to attach */
+				/* to operation */
+				uint32_t buf_id = total_enq_ops + i;
+				uint16_t op_id = i;
+
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = out_seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			/* E N Q U E U I N G */
+			/* assuming that all ops are enqueued */
+			/* instead of the real enqueue operation */
+			num_enq = num_ops;
+
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			/* D E Q U E U I N G */
+			/* assuming that all ops dequeued */
+			/* instead of the real dequeue operation */
+			num_deq = num_ops;
+
+			total_deq_ops += num_deq;
+			rte_mempool_put_bulk(mem->op_pool,
+					     (void **)ops, num_deq);
+		}
+	}
+	return res;
+end:
+	rte_mempool_put_bulk(mem->op_pool, (void **)ops, ops_needed);
+	rte_free(ops);
+
+	return res;
+}
+
+static int
+main_loop(struct cperf_cyclecount_ctx *ctx, enum rte_comp_xform_type type)
+{
+	struct comp_test_data *test_data = ctx->ver.options;
+	struct cperf_mem_resources *mem = &ctx->ver.mem;
+	uint8_t dev_id = mem->dev_id;
+	uint32_t i, iter, num_iter;
+	struct rte_comp_op **ops, **deq_ops;
+	void *priv_xform = NULL;
+	struct rte_comp_xform xform;
+	struct rte_mbuf **input_bufs, **output_bufs;
+	int ret, res = 0;
+	int allocated = 0;
+	uint32_t out_seg_sz;
+
+	uint64_t tsc_start, tsc_end, tsc_duration;
+
+	if (test_data == NULL || !test_data->burst_sz) {
+		RTE_LOG(ERR, USER1, "Unknown burst size\n");
+		return -1;
+	}
+	ctx->duration_enq = 0;
+	ctx->duration_deq = 0;
+	ctx->ops_enq_retries = 0;
+	ctx->ops_deq_retries = 0;
+
+	/* one array for both enqueue and dequeue */
+	ops = rte_zmalloc_socket(NULL,
+		2 * mem->total_bufs * sizeof(struct rte_comp_op *),
+		0, rte_socket_id());
+
+	if (ops == NULL) {
+		RTE_LOG(ERR, USER1,
+			"Can't allocate memory for ops strucures\n");
+		return -1;
+	}
+
+	deq_ops = &ops[mem->total_bufs];
+
+	if (type == RTE_COMP_COMPRESS) {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_COMPRESS,
+			.compress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.deflate.huffman = test_data->huffman_enc,
+				.level = test_data->level,
+				.window_size = test_data->window_sz,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = mem->decomp_bufs;
+		output_bufs = mem->comp_bufs;
+		out_seg_sz = test_data->out_seg_sz;
+	} else {
+		xform = (struct rte_comp_xform) {
+			.type = RTE_COMP_DECOMPRESS,
+			.decompress = {
+				.algo = RTE_COMP_ALGO_DEFLATE,
+				.chksum = RTE_COMP_CHECKSUM_NONE,
+				.window_size = test_data->window_sz,
+				.hash_algo = RTE_COMP_HASH_ALGO_NONE
+			}
+		};
+		input_bufs = mem->comp_bufs;
+		output_bufs = mem->decomp_bufs;
+		out_seg_sz = test_data->seg_sz;
+	}
+
+	/* Create private xform */
+	if (rte_compressdev_private_xform_create(dev_id, &xform,
+						&priv_xform) < 0) {
+		RTE_LOG(ERR, USER1, "Private xform could not be created\n");
+		res = -1;
+		goto end;
+	}
+
+	tsc_start = rte_rdtsc_precise();
+	ret = cperf_cyclecount_op_setup(ops,
+				ctx,
+				input_bufs,
+				output_bufs,
+				priv_xform,
+				out_seg_sz);
+
+	tsc_end = rte_rdtsc_precise();
+
+	/* ret value check postponed a bit to cancel extra 'if' bias */
+	if (ret < 0) {
+		RTE_LOG(ERR, USER1, "Setup function failed\n");
+		res = -1;
+		goto end;
+	}
+
+	tsc_duration = tsc_end - tsc_start;
+	ctx->duration_op = tsc_duration;
+
+	num_iter = test_data->num_iter;
+	for (iter = 0; iter < num_iter; iter++) {
+		uint32_t total_ops = mem->total_bufs;
+		uint32_t remaining_ops = mem->total_bufs;
+		uint32_t total_deq_ops = 0;
+		uint32_t total_enq_ops = 0;
+		uint16_t ops_unused = 0;
+		uint16_t num_enq = 0;
+		uint16_t num_deq = 0;
+
+		while (remaining_ops > 0) {
+			uint16_t num_ops = RTE_MIN(remaining_ops,
+						   test_data->burst_sz);
+			uint16_t ops_needed = num_ops - ops_unused;
+
+			/*
+			 * Move the unused operations from the previous
+			 * enqueue_burst call to the front, to maintain order
+			 */
+			if ((ops_unused > 0) && (num_enq > 0)) {
+				size_t nb_b_to_mov =
+				      ops_unused * sizeof(struct rte_comp_op *);
+
+				memmove(ops, &ops[num_enq], nb_b_to_mov);
+			}
+
+			/* Allocate compression operations */
+			if (ops_needed && rte_mempool_get_bulk(
+						mem->op_pool,
+						(void **)ops,
+						ops_needed) != 0) {
+				RTE_LOG(ERR, USER1,
+				      "Could not allocate enough operations\n");
+				res = -1;
+				goto end;
+			}
+			allocated += ops_needed;
+
+			for (i = 0; i < ops_needed; i++) {
+				/*
+				 * Calculate next buffer to attach to operation
+				 */
+				uint32_t buf_id = total_enq_ops + i +
+						ops_unused;
+				uint16_t op_id = ops_unused + i;
+				/* Reset all data in output buffers */
+				struct rte_mbuf *m = output_bufs[buf_id];
+
+				m->pkt_len = out_seg_sz * m->nb_segs;
+				while (m) {
+					m->data_len = m->buf_len - m->data_off;
+					m = m->next;
+				}
+				ops[op_id]->m_src = input_bufs[buf_id];
+				ops[op_id]->m_dst = output_bufs[buf_id];
+				ops[op_id]->src.offset = 0;
+				ops[op_id]->src.length =
+					rte_pktmbuf_pkt_len(input_bufs[buf_id]);
+				ops[op_id]->dst.offset = 0;
+				ops[op_id]->flush_flag = RTE_COMP_FLUSH_FINAL;
+				ops[op_id]->input_chksum = buf_id;
+				ops[op_id]->private_xform = priv_xform;
+			}
+
+			if (unlikely(test_data->perf_comp_force_stop))
+				goto end;
+
+			tsc_start = rte_rdtsc_precise();
+			num_enq = rte_compressdev_enqueue_burst(dev_id,
+								mem->qp_id, ops,
+								num_ops);
+			tsc_end = rte_rdtsc_precise();
+			tsc_duration = tsc_end - tsc_start;
+			ctx->duration_enq += tsc_duration;
+
+			if (num_enq < num_ops)
+				ctx->ops_enq_retries++;
+
+			if (test_data->cyclecount_delay)
+				rte_delay_us_block(test_data->cyclecount_delay);
+
+			if (num_enq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.enqueue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+
+			ops_unused = num_ops - num_enq;
+			remaining_ops -= num_enq;
+			total_enq_ops += num_enq;
+
+			tsc_start = rte_rdtsc_precise();
+			num_deq = rte_compressdev_dequeue_burst(dev_id,
+							   mem->qp_id,
+							   deq_ops,
+							   allocated);
+			tsc_end = rte_rdtsc_precise();
+			tsc_duration = tsc_end - tsc_start;
+			ctx->duration_deq += tsc_duration;
+
+			if (num_deq < allocated)
+				ctx->ops_deq_retries++;
+
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+
+					if (op->status !=
+						RTE_COMP_OP_STATUS_SUCCESS) {
+						RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+						goto end;
+					}
+
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+							RTE_MIN(remaining_data,
+							     out_seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(mem->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+
+		/* Dequeue the last operations */
+		while (total_deq_ops < total_ops) {
+			if (unlikely(test_data->perf_comp_force_stop))
+				goto end;
+
+			tsc_start = rte_rdtsc_precise();
+			num_deq = rte_compressdev_dequeue_burst(dev_id,
+						mem->qp_id,
+						deq_ops,
+						test_data->burst_sz);
+			tsc_end = rte_rdtsc_precise();
+			tsc_duration = tsc_end - tsc_start;
+			ctx->duration_deq += tsc_duration;
+			ctx->ops_deq_retries++;
+
+			if (num_deq == 0) {
+				struct rte_compressdev_stats stats;
+
+				rte_compressdev_stats_get(dev_id, &stats);
+				if (stats.dequeue_err_count) {
+					res = -1;
+					goto end;
+				}
+			}
+			total_deq_ops += num_deq;
+
+			if (iter == num_iter - 1) {
+				for (i = 0; i < num_deq; i++) {
+					struct rte_comp_op *op = deq_ops[i];
+
+					if (op->status !=
+						RTE_COMP_OP_STATUS_SUCCESS) {
+						RTE_LOG(ERR, USER1, "Some operations were not successful\n");
+						goto end;
+					}
+
+					struct rte_mbuf *m = op->m_dst;
+
+					m->pkt_len = op->produced;
+					uint32_t remaining_data = op->produced;
+					uint16_t data_to_append;
+
+					while (remaining_data > 0) {
+						data_to_append =
+						RTE_MIN(remaining_data,
+							out_seg_sz);
+						m->data_len = data_to_append;
+						remaining_data -=
+								data_to_append;
+						m = m->next;
+					}
+				}
+			}
+			rte_mempool_put_bulk(mem->op_pool,
+					     (void **)deq_ops, num_deq);
+			allocated -= num_deq;
+		}
+	}
+	allocated = 0;
+
+end:
+	if (allocated)
+		rte_mempool_put_bulk(mem->op_pool, (void **)ops, allocated);
+	rte_compressdev_private_xform_free(dev_id, priv_xform);
+	rte_free(ops);
+
+	if (test_data->perf_comp_force_stop) {
+		RTE_LOG(ERR, USER1,
+		      "lcore: %d Perf. test has been aborted by user\n",
+			mem->lcore_id);
+		res = -1;
+	}
+	return res;
+}
+
+int
+cperf_cyclecount_test_runner(void *test_ctx)
+{
+	struct cperf_cyclecount_ctx *ctx = test_ctx;
+	struct comp_test_data *test_data = ctx->ver.options;
+	uint32_t lcore = rte_lcore_id();
+	static rte_atomic16_t display_once = RTE_ATOMIC16_INIT(0);
+	static rte_spinlock_t print_spinlock;
+	int i;
+
+	uint32_t ops_enq_retries_comp;
+	uint32_t ops_deq_retries_comp;
+
+	uint32_t ops_enq_retries_decomp;
+	uint32_t ops_deq_retries_decomp;
+
+	uint32_t duration_setup_per_op;
+
+	uint32_t duration_enq_per_op_comp;
+	uint32_t duration_deq_per_op_comp;
+
+	uint32_t duration_enq_per_op_decomp;
+	uint32_t duration_deq_per_op_decomp;
+
+	ctx->ver.mem.lcore_id = lcore;
+
+	/*
+	 * printing information about current compression thread
+	 */
+	if (rte_atomic16_test_and_set(&ctx->ver.mem.print_info_once))
+		printf("    lcore: %u,"
+				" driver name: %s,"
+				" device name: %s,"
+				" device id: %u,"
+				" socket id: %u,"
+				" queue pair id: %u\n",
+			lcore,
+			ctx->ver.options->driver_name,
+			rte_compressdev_name_get(ctx->ver.mem.dev_id),
+			ctx->ver.mem.dev_id,
+			rte_compressdev_socket_id(ctx->ver.mem.dev_id),
+			ctx->ver.mem.qp_id);
+
+	/*
+	 * First the verification part is needed
+	 */
+	if (cperf_verify_test_runner(&ctx->ver))
+		return EXIT_FAILURE;
+
+	/*
+	 * Run the tests twice, discarding the first performance
+	 * results, before the cache is warmed up
+	 */
+
+	/* C O M P R E S S */
+	for (i = 0; i < 2; i++) {
+		if (main_loop(ctx, RTE_COMP_COMPRESS) < 0)
+			return EXIT_FAILURE;
+	}
+
+	ops_enq_retries_comp = ctx->ops_enq_retries;
+	ops_deq_retries_comp = ctx->ops_deq_retries;
+
+	duration_enq_per_op_comp = ctx->duration_enq /
+			(ctx->ver.mem.total_bufs * test_data->num_iter);
+	duration_deq_per_op_comp = ctx->duration_deq /
+			(ctx->ver.mem.total_bufs * test_data->num_iter);
+
+	/* D E C O M P R E S S */
+	for (i = 0; i < 2; i++) {
+		if (main_loop(ctx, RTE_COMP_DECOMPRESS) < 0)
+			return EXIT_FAILURE;
+	}
+
+	ops_enq_retries_decomp = ctx->ops_enq_retries;
+	ops_deq_retries_decomp = ctx->ops_deq_retries;
+
+	duration_enq_per_op_decomp = ctx->duration_enq /
+			(ctx->ver.mem.total_bufs * test_data->num_iter);
+	duration_deq_per_op_decomp = ctx->duration_deq /
+			(ctx->ver.mem.total_bufs * test_data->num_iter);
+
+	duration_setup_per_op = ctx->duration_op /
+			(ctx->ver.mem.total_bufs * test_data->num_iter);
+
+	/* R E P O R T processing */
+	if (rte_atomic16_test_and_set(&display_once)) {
+
+		rte_spinlock_lock(&print_spinlock);
+
+		printf("\nLegend for the table\n"
+		"  - Retries section: number of retries for the following operations:\n"
+		"    [C-e] - compression enqueue\n"
+		"    [C-d] - compression dequeue\n"
+		"    [D-e] - decompression enqueue\n"
+		"    [D-d] - decompression dequeue\n"
+		"  - Cycles section: number of cycles per 'op' for the following operations:\n"
+		"    setup/op - memory allocation, op configuration and memory dealocation\n"
+		"    [C-e] - compression enqueue\n"
+		"    [C-d] - compression dequeue\n"
+		"    [D-e] - decompression enqueue\n"
+		"    [D-d] - decompression dequeue\n\n");
+
+		printf("\n%12s%6s%12s%17s",
+			"lcore id", "Level", "Comp size", "Comp ratio [%]");
+
+		printf("  |%10s %6s %8s %6s %8s",
+			" Retries:",
+			"[C-e]", "[C-d]",
+			"[D-e]", "[D-d]");
+
+		printf("  |%9s %9s %9s %9s %9s %9s\n",
+			" Cycles:",
+			"setup/op",
+			"[C-e]", "[C-d]",
+			"[D-e]", "[D-d]");
+
+		rte_spinlock_unlock(&print_spinlock);
+	}
+
+	rte_spinlock_lock(&print_spinlock);
+
+	printf("%12u"
+	       "%6u"
+	       "%12zu"
+	       "%17.2f",
+		ctx->ver.mem.lcore_id,
+		test_data->level,
+		ctx->ver.comp_data_sz,
+		ctx->ver.ratio);
+
+	printf("  |%10s %6u %8u %6u %8u",
+	       " ",
+		ops_enq_retries_comp,
+		ops_deq_retries_comp,
+		ops_enq_retries_decomp,
+		ops_deq_retries_decomp);
+
+	printf("  |%9s %9u %9u %9u %9u %9u\n",
+	       " ",
+		duration_setup_per_op,
+		duration_enq_per_op_comp,
+		duration_deq_per_op_comp,
+		duration_enq_per_op_decomp,
+		duration_deq_per_op_decomp);
+
+	rte_spinlock_unlock(&print_spinlock);
+
+	return EXIT_SUCCESS;
+}
diff --git a/app/test-compress-perf/comp_perf_test_cyclecount.h b/app/test-compress-perf/comp_perf_test_cyclecount.h
new file mode 100644
index 000000000..8e1b4d9e9
--- /dev/null
+++ b/app/test-compress-perf/comp_perf_test_cyclecount.h
@@ -0,0 +1,24 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#ifndef _COMP_PERF_TEST_CYCLECOUNT_
+#define _COMP_PERF_TEST_CYCLECOUNT_
+
+#include <stdint.h>
+
+#include "comp_perf_options.h"
+#include "comp_perf_test_common.h"
+#include "comp_perf_test_verify.h"
+
+void
+cperf_cyclecount_test_destructor(void *arg);
+
+int
+cperf_cyclecount_test_runner(void *test_ctx);
+
+void *
+cperf_cyclecount_test_constructor(uint8_t dev_id, uint16_t qp_id,
+		struct comp_test_data *options);
+
+#endif
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.c b/app/test-compress-perf/comp_perf_test_throughput.c
similarity index 97%
rename from app/test-compress-perf/comp_perf_test_benchmark.c
rename to app/test-compress-perf/comp_perf_test_throughput.c
index 0c6bb9b45..13922b658 100644
--- a/app/test-compress-perf/comp_perf_test_benchmark.c
+++ b/app/test-compress-perf/comp_perf_test_throughput.c
@@ -8,10 +8,10 @@ 
 #include <rte_cycles.h>
 #include <rte_compressdev.h>
 
-#include "comp_perf_test_benchmark.h"
+#include "comp_perf_test_throughput.h"
 
 void
-cperf_benchmark_test_destructor(void *arg)
+cperf_throughput_test_destructor(void *arg)
 {
 	if (arg) {
 		comp_perf_free_memory(
@@ -22,7 +22,7 @@  cperf_benchmark_test_destructor(void *arg)
 }
 
 void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
 		struct comp_test_data *options)
 {
 	struct cperf_benchmark_ctx *ctx = NULL;
@@ -41,7 +41,7 @@  cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
 			&& !prepare_bufs(ctx->ver.options, &ctx->ver.mem))
 		return ctx;
 
-	cperf_benchmark_test_destructor(ctx);
+	cperf_throughput_test_destructor(ctx);
 	return NULL;
 }
 
@@ -324,7 +324,7 @@  main_loop(struct cperf_benchmark_ctx *ctx, enum rte_comp_xform_type type)
 }
 
 int
-cperf_benchmark_test_runner(void *test_ctx)
+cperf_throughput_test_runner(void *test_ctx)
 {
 	struct cperf_benchmark_ctx *ctx = test_ctx;
 	struct comp_test_data *test_data = ctx->ver.options;
diff --git a/app/test-compress-perf/comp_perf_test_benchmark.h b/app/test-compress-perf/comp_perf_test_throughput.h
similarity index 80%
rename from app/test-compress-perf/comp_perf_test_benchmark.h
rename to app/test-compress-perf/comp_perf_test_throughput.h
index d9b2694b8..467e3aa78 100644
--- a/app/test-compress-perf/comp_perf_test_benchmark.h
+++ b/app/test-compress-perf/comp_perf_test_throughput.h
@@ -24,13 +24,13 @@  struct cperf_benchmark_ctx {
 };
 
 void
-cperf_benchmark_test_destructor(void *arg);
+cperf_throughput_test_destructor(void *arg);
 
 int
-cperf_benchmark_test_runner(void *test_ctx);
+cperf_throughput_test_runner(void *test_ctx);
 
 void *
-cperf_benchmark_test_constructor(uint8_t dev_id, uint16_t qp_id,
+cperf_throughput_test_constructor(uint8_t dev_id, uint16_t qp_id,
 		struct comp_test_data *options);
 
 #endif
diff --git a/app/test-compress-perf/comp_perf_test_verify.c b/app/test-compress-perf/comp_perf_test_verify.c
index 758a22ff5..5e13257b7 100644
--- a/app/test-compress-perf/comp_perf_test_verify.c
+++ b/app/test-compress-perf/comp_perf_test_verify.c
@@ -48,8 +48,8 @@  static int
 main_loop(struct cperf_verify_ctx *ctx, enum rte_comp_xform_type type)
 {
 	struct comp_test_data *test_data = ctx->options;
-	uint8_t *output_data_ptr;
-	size_t *output_data_sz;
+	uint8_t *output_data_ptr = NULL;
+	size_t *output_data_sz = NULL;
 	struct cperf_mem_resources *mem = &ctx->mem;
 
 	uint8_t dev_id = mem->dev_id;
diff --git a/app/test-compress-perf/main.c b/app/test-compress-perf/main.c
index 6b56dd680..ed21605d8 100644
--- a/app/test-compress-perf/main.c
+++ b/app/test-compress-perf/main.c
@@ -11,32 +11,41 @@ 
 #include <rte_log.h>
 #include <rte_compressdev.h>
 
-#include "comp_perf_options.h"
-#include "comp_perf_test_verify.h"
-#include "comp_perf_test_benchmark.h"
 #include "comp_perf.h"
+#include "comp_perf_options.h"
 #include "comp_perf_test_common.h"
+#include "comp_perf_test_cyclecount.h"
+#include "comp_perf_test_throughput.h"
+#include "comp_perf_test_verify.h"
 
 #define NUM_MAX_XFORMS 16
 #define NUM_MAX_INFLIGHT_OPS 512
 
 __extension__
 const char *comp_perf_test_type_strs[] = {
-	[CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
-	[CPERF_TEST_TYPE_VERIFY] = "verify"
+	[CPERF_TEST_TYPE_THROUGHPUT] = "throughput",
+	[CPERF_TEST_TYPE_VERIFY] = "verify",
+	[CPERF_TEST_TYPE_PMDCC] = "pmd-cyclecount"
 };
 
 __extension__
 static const struct cperf_test cperf_testmap[] = {
-	[CPERF_TEST_TYPE_BENCHMARK] = {
-			cperf_benchmark_test_constructor,
-			cperf_benchmark_test_runner,
-			cperf_benchmark_test_destructor
+	[CPERF_TEST_TYPE_THROUGHPUT] = {
+			cperf_throughput_test_constructor,
+			cperf_throughput_test_runner,
+			cperf_throughput_test_destructor
+
 	},
 	[CPERF_TEST_TYPE_VERIFY] = {
 			cperf_verify_test_constructor,
 			cperf_verify_test_runner,
 			cperf_verify_test_destructor
+	},
+
+	[CPERF_TEST_TYPE_PMDCC] = {
+			cperf_cyclecount_test_constructor,
+			cperf_cyclecount_test_runner,
+			cperf_cyclecount_test_destructor
 	}
 };
 
@@ -116,7 +125,8 @@  comp_perf_initialize_compressdev(struct comp_test_data *test_data,
 	enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
 			enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
 	if (enabled_cdev_count == 0) {
-		RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
+		RTE_LOG(ERR, USER1, "No compress devices type %s available,"
+				    " please check the list of specified devices in EAL section\n",
 				test_data->driver_name);
 		return -EINVAL;
 	}
@@ -270,6 +280,7 @@  comp_perf_dump_input_data(struct comp_test_data *test_data)
 		data += data_to_read;
 	}
 
+	printf("\n");
 	if (test_data->input_data_sz > actual_file_sz)
 		RTE_LOG(INFO, USER1,
 		  "%zu bytes read from file %s, extending the file %.2f times\n",
@@ -365,9 +376,12 @@  main(int argc, char **argv)
 	else
 		test_data->level = test_data->level_lst.list[0];
 
-	printf("App uses socket: %u\n", rte_socket_id());
+	printf("\nApp uses socket: %u\n", rte_socket_id());
 	printf("Burst size = %u\n", test_data->burst_sz);
 	printf("Input data size = %zu\n", test_data->input_data_sz);
+	if (test_data->test == CPERF_TEST_TYPE_PMDCC)
+		printf("Cycle-count delay = %u [us]\n",
+		       test_data->cyclecount_delay);
 
 	test_data->cleanup = ST_DURING_TEST;
 	total_nb_qps = nb_compressdevs * test_data->nb_qps;
@@ -394,7 +408,7 @@  main(int argc, char **argv)
 		i++;
 	}
 
-	print_test_dynamics(); /* constructors must be executed first */
+	print_test_dynamics(test_data);
 
 	while (test_data->level <= test_data->level_lst.max) {
 
@@ -472,7 +486,28 @@  main(int argc, char **argv)
 }
 
 __rte_weak void *
-cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
+cperf_cyclecount_test_constructor(uint8_t dev_id __rte_unused,
+				 uint16_t qp_id __rte_unused,
+				 struct comp_test_data *options __rte_unused)
+{
+	RTE_LOG(INFO, USER1, "Cycle count test is not supported yet\n");
+	return NULL;
+}
+
+__rte_weak void
+cperf_cyclecount_test_destructor(void *arg __rte_unused)
+{
+	RTE_LOG(INFO, USER1, "Something wrong happened!!!\n");
+}
+
+__rte_weak int
+cperf_cyclecount_test_runner(void *test_ctx __rte_unused)
+{
+	return 0;
+}
+
+__rte_weak void *
+cperf_throughput_test_constructor(uint8_t dev_id __rte_unused,
 				 uint16_t qp_id __rte_unused,
 				 struct comp_test_data *options __rte_unused)
 {
@@ -481,13 +516,13 @@  cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
 }
 
 __rte_weak void
-cperf_benchmark_test_destructor(void *arg __rte_unused)
+cperf_throughput_test_destructor(void *arg __rte_unused)
 {
 
 }
 
 __rte_weak int
-cperf_benchmark_test_runner(void *test_ctx __rte_unused)
+cperf_throughput_test_runner(void *test_ctx __rte_unused)
 {
 	return 0;
 }
diff --git a/app/test-compress-perf/meson.build b/app/test-compress-perf/meson.build
index 1136f04bc..1fe26cc14 100644
--- a/app/test-compress-perf/meson.build
+++ b/app/test-compress-perf/meson.build
@@ -5,6 +5,7 @@  allow_experimental_apis = true
 sources = files('comp_perf_options_parse.c',
 		'main.c',
 		'comp_perf_test_verify.c',
-		'comp_perf_test_benchmark.c',
+		'comp_perf_test_throughput.c',
+		'comp_perf_test_cyclecount.c',
 		'comp_perf_test_common.c')
 deps = ['compressdev']