app/flow-perf: support meter action

Message ID 1610542256-285053-1-git-send-email-dongzhou@nvidia.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series app/flow-perf: support meter action |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/intel-Testing success Testing PASS
ci/iol-testing success Testing PASS

Commit Message

Dong Zhou Jan. 13, 2021, 12:50 p.m. UTC
  Currently, test-flow-perf app cannot generate flows with meter
action. This patch introduces new parameter "--meter" to generate
flows with meter action.

Signed-off-by: Dong Zhou <dongzhou@nvidia.com>
Reviewed-by: Wisam Jaddo <wisamm@nvidia.com>
---
 app/test-flow-perf/actions_gen.c |  19 ++
 app/test-flow-perf/config.h      |   2 +
 app/test-flow-perf/main.c        | 415 +++++++++++++++++++++++++++++++--------
 doc/guides/tools/flow-perf.rst   |   4 +
 4 files changed, 353 insertions(+), 87 deletions(-)
  

Comments

Alexander Kozyrev Jan. 14, 2021, 5:21 a.m. UTC | #1
> From: Dong Zhou <dongzhou@nvidia.com>
> Sent: Wednesday, January 13, 2021 7:51
> To: Wisam Monther <wisamm@nvidia.com>; Suanming Mou
> <suanmingm@nvidia.com>; Alexander Kozyrev <akozyrev@nvidia.com>;
> NBU-Contact-Thomas Monjalon <thomas@monjalon.net>
> Cc: dev@dpdk.org
> Subject: [PATCH] app/flow-perf: support meter action
> 
> Currently, test-flow-perf app cannot generate flows with meter
> action. This patch introduces new parameter "--meter" to generate
> flows with meter action.
> 
> Signed-off-by: Dong Zhou <dongzhou@nvidia.com>
> Reviewed-by: Wisam Jaddo <wisamm@nvidia.com>
> ---
>  app/test-flow-perf/actions_gen.c |  19 ++
>  app/test-flow-perf/config.h      |   2 +
>  app/test-flow-perf/main.c        | 415
> +++++++++++++++++++++++++++++++--------
>  doc/guides/tools/flow-perf.rst   |   4 +
>  4 files changed, 353 insertions(+), 87 deletions(-)
> 
> diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-
> perf/actions_gen.c
> index c3545ba..1f5c64f 100644
> --- a/app/test-flow-perf/actions_gen.c
> +++ b/app/test-flow-perf/actions_gen.c
> @@ -891,6 +891,19 @@ struct action_rss_data {
>  	actions[actions_counter].type =
> RTE_FLOW_ACTION_TYPE_VXLAN_DECAP;
>  }
> 
> +static void
> +add_meter(struct rte_flow_action *actions,
> +	uint8_t actions_counter,
> +	__rte_unused struct additional_para para)
> +{
> +	static struct rte_flow_action_meter
> +		meters[RTE_MAX_LCORE] __rte_cache_aligned;
> +
> +	meters[para.core_idx].mtr_id = para.counter;
> +	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_METER;
> +	actions[actions_counter].conf = &meters[para.core_idx];
> +}
> +
>  void
>  fill_actions(struct rte_flow_action *actions, uint64_t *flow_actions,
>  	uint32_t counter, uint16_t next_table, uint16_t hairpinq,
> @@ -1103,6 +1116,12 @@ struct action_rss_data {
>  			),
>  			.funct = add_vxlan_decap,
>  		},
> +		{
> +			.mask = FLOW_ACTION_MASK(
> +				RTE_FLOW_ACTION_TYPE_METER
> +			),
> +			.funct = add_meter,
> +		},
>  	};
> 
>  	for (j = 0; j < MAX_ACTIONS_NUM; j++) {
> diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h
> index 94e83c9..3d4696d 100644
> --- a/app/test-flow-perf/config.h
> +++ b/app/test-flow-perf/config.h
> @@ -16,6 +16,8 @@
>  #define NR_RXD  256
>  #define NR_TXD  256
>  #define MAX_PORTS 64
> +#define METER_CIR 1250000
> +#define DEFAULT_METER_PROF_ID 100
> 
>  /* This is used for encap/decap & header modify actions.
>   * When it's 1: it means all actions have fixed values.
> diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
> index 3a0e4c1..4d881ec 100644
> --- a/app/test-flow-perf/main.c
> +++ b/app/test-flow-perf/main.c
> @@ -34,6 +34,7 @@
>  #include <rte_mbuf.h>
>  #include <rte_ethdev.h>
>  #include <rte_flow.h>
> +#include <rte_mtr.h>
> 
>  #include "config.h"
>  #include "flow_gen.h"
> @@ -72,6 +73,8 @@
>  #define LCORE_MODE_PKT    1
>  #define LCORE_MODE_STATS  2
>  #define MAX_STREAMS      64
> +#define METER_CREATE	  1
> +#define METER_DELETE	  2
> 
>  struct stream {
>  	int tx_port;
> @@ -93,11 +96,16 @@ struct lcore_info {
> 
>  static struct lcore_info lcore_infos[RTE_MAX_LCORE];
> 
> +struct used_cpu_time {
> +	double insertion[MAX_PORTS][RTE_MAX_LCORE];
> +	double deletion[MAX_PORTS][RTE_MAX_LCORE];
> +};
> +
>  struct multi_cores_pool {
>  	uint32_t cores_count;
>  	uint32_t rules_count;
> -	double cpu_time_used_insertion[MAX_PORTS][RTE_MAX_LCORE];
> -	double cpu_time_used_deletion[MAX_PORTS][RTE_MAX_LCORE];
> +	struct used_cpu_time create_meter;
> +	struct used_cpu_time create_flow;
>  	int64_t last_alloc[RTE_MAX_LCORE];
>  	int64_t current_alloc[RTE_MAX_LCORE];
>  } __rte_cache_aligned;
> @@ -195,6 +203,7 @@ struct multi_cores_pool {
>  	printf("  --set-ipv6-dscp: add set ipv6 dscp action to flow actions\n"
>  		"ipv6 dscp value to be set is random each flow\n");
>  	printf("  --flag: add flag action to flow actions\n");
> +	printf("  --meter: add meter action to flow actions\n");
>  	printf("  --raw-encap=<data>: add raw encap action to flow
> actions\n"
>  		"Data is the data needed to be encaped\n"
>  		"Example: raw-encap=ether,ipv4,udp,vxlan\n");
> @@ -524,6 +533,14 @@ struct multi_cores_pool {
>  			.map_idx = &actions_idx
>  		},
>  		{
> +			.str = "meter",
> +			.mask = FLOW_ACTION_MASK(
> +				RTE_FLOW_ACTION_TYPE_METER
> +			),
> +			.map = &flow_actions[0],
> +			.map_idx = &actions_idx
> +		},
> +		{
>  			.str = "vxlan-encap",
>  			.mask = FLOW_ACTION_MASK(
>  				RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
> @@ -602,6 +619,7 @@ struct multi_cores_pool {
>  		{ "set-ipv4-dscp",              0, 0, 0 },
>  		{ "set-ipv6-dscp",              0, 0, 0 },
>  		{ "flag",                       0, 0, 0 },
> +		{ "meter",		        0, 0, 0 },
>  		{ "raw-encap",                  1, 0, 0 },
>  		{ "raw-decap",                  1, 0, 0 },
>  		{ "vxlan-encap",                0, 0, 0 },
> @@ -874,6 +892,185 @@ struct multi_cores_pool {
>  	}
>  }
> 
> +
> +static inline int
> +has_meter(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < MAX_ACTIONS_NUM; i++) {
> +		if (flow_actions[i] == 0)
> +			break;
> +		if (flow_actions[i]
> +				&
> FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_METER))
> +			return 1;
> +	}
> +	return 0;
> +}
> +
> +static void
> +create_meter_rule(int port_id, uint32_t counter)
> +{
> +	int ret;
> +	struct rte_mtr_params params;
> +	uint32_t default_prof_id = 100;
> +	struct rte_mtr_error error;
> +
> +	memset(&params, 0, sizeof(struct rte_mtr_params));
> +	params.meter_enable = 1;
> +	params.stats_mask = 0xffff;
> +	params.use_prev_mtr_color = 0;
> +	params.dscp_table = NULL;
> +
> +	/*create meter*/
> +	params.meter_profile_id = default_prof_id;
> +	params.action[RTE_COLOR_GREEN] =
> +		MTR_POLICER_ACTION_COLOR_GREEN;
> +	params.action[RTE_COLOR_YELLOW] =
> +		MTR_POLICER_ACTION_COLOR_YELLOW;
> +	params.action[RTE_COLOR_RED] =
> +		MTR_POLICER_ACTION_DROP;
> +
> +	ret = rte_mtr_create(port_id, counter, &params, 1, &error);
> +	if (ret != 0) {
> +		printf("Port %u create meter idx(%d) error(%d) message:
> %s\n",
> +			port_id, counter, error.type,
> +			error.message ? error.message : "(no stated
> reason)");
> +		rte_exit(EXIT_FAILURE, "error in creating meter");
> +	}
> +}
> +
> +static void
> +destroy_meter_rule(int port_id, uint32_t counter)
> +{
> +	struct rte_mtr_error error;
> +
> +	if (rte_mtr_destroy(port_id, counter, &error)) {
> +		printf("Port %u destroy meter(%d) error(%d) message:
> %s\n",
> +			port_id, counter, error.type,
> +			error.message ? error.message : "(no stated
> reason)");
> +		rte_exit(EXIT_FAILURE, "Error in deleting meter rule");
> +	}
> +}
> +
> +static void
> +meters_handler(int port_id, uint8_t core_id, uint8_t ops)
> +{
> +	uint64_t start_batch;
> +	double cpu_time_used, insertion_rate;
> +	int rules_count_per_core, rules_batch_idx;
> +	uint32_t counter, start_counter = 0, end_counter;
> +	double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 };
> +
> +	rules_count_per_core = rules_count / mc_pool.cores_count;
> +
> +	if (core_id)
> +		start_counter = core_id * rules_count_per_core;
> +	end_counter = (core_id + 1) * rules_count_per_core;
> +
> +	cpu_time_used = 0;
> +	start_batch = rte_rdtsc();
> +	for (counter = start_counter; counter < end_counter; counter++) {
> +		if (ops == METER_CREATE)
> +			create_meter_rule(port_id, counter);
> +		else
> +			destroy_meter_rule(port_id, counter);
> +		/*
> +		 * Save the insertion rate for rules batch.
> +		 * Check if the insertion reached the rules
> +		 * patch counter, then save the insertion rate
> +		 * for this batch.
> +		 */
> +		if (!((counter + 1) % rules_batch)) {
> +			rules_batch_idx = ((counter + 1) / rules_batch) - 1;
> +			cpu_time_per_batch[rules_batch_idx] =
> +				((double)(rte_rdtsc() - start_batch))
> +				/ rte_get_tsc_hz();
> +			cpu_time_used +=
> cpu_time_per_batch[rules_batch_idx];
> +			start_batch = rte_rdtsc();
> +		}
> +	}
> +
> +	/* Print insertion rates for all batches */
> +	if (dump_iterations)
> +		print_rules_batches(cpu_time_per_batch);
> +
> +	insertion_rate =
> +		((double) (rules_count_per_core / cpu_time_used) / 1000);
> +
> +	/* Insertion rate for all rules in one core */
> +	printf(":: Port %d :: Core %d Meter %s :: start @[%d] - end @[%d],"
> +		" use:%.02fs, rate:%.02fk Rule/Sec\n",
> +		port_id, core_id, ops == METER_CREATE ? "create" :
> "delete",
> +		start_counter, end_counter - 1,
> +		cpu_time_used, insertion_rate);
> +
> +	if (ops == METER_CREATE)
> +		mc_pool.create_meter.insertion[port_id][core_id]
> +			= cpu_time_used;
> +	else
> +		mc_pool.create_meter.deletion[port_id][core_id]
> +			= cpu_time_used;
> +}
> +
> +static void
> +destroy_meter_profile(void)
> +{
> +	struct rte_mtr_error error;
> +	uint16_t nr_ports;
> +	int port_id;
> +
> +	nr_ports = rte_eth_dev_count_avail();
> +	for (port_id = 0; port_id < nr_ports; port_id++) {
> +		/* If port outside portmask */
> +		if (!((ports_mask >> port_id) & 0x1))
> +			continue;
> +
> +		if (rte_mtr_meter_profile_delete
> +			(port_id, DEFAULT_METER_PROF_ID, &error)) {
> +			printf("Port %u del profile error(%d) message: %s\n",
> +				port_id, error.type,
> +				error.message ? error.message : "(no stated
> reason)");
> +			rte_exit(EXIT_FAILURE, "Error: Destroy meter profile
> Failed!\n");
> +		}
> +	}
> +}
> +
> +static void
> +create_meter_profile(void)
> +{
> +	uint16_t nr_ports;
> +	int ret, port_id;
> +	struct rte_mtr_meter_profile mp;
> +	struct rte_mtr_error error;
> +
> +	/*
> +	 *currently , only create one meter file for one port
> +	 *1 meter profile -> N meter rules -> N rte flows
> +	 */
> +	memset(&mp, 0, sizeof(struct rte_mtr_meter_profile));
> +	nr_ports = rte_eth_dev_count_avail();
> +	for (port_id = 0; port_id < nr_ports; port_id++) {
> +		/* If port outside portmask */
> +		if (!((ports_mask >> port_id) & 0x1))
> +			continue;
> +
> +		mp.alg = RTE_MTR_SRTCM_RFC2697;
> +		mp.srtcm_rfc2697.cir = METER_CIR;
> +		mp.srtcm_rfc2697.cbs = METER_CIR / 8;
> +		mp.srtcm_rfc2697.ebs = 0;
> +
> +		ret = rte_mtr_meter_profile_add
> +			(port_id, DEFAULT_METER_PROF_ID, &mp, &error);
> +		if (ret != 0) {
> +			printf("Port %u create Profile error(%d) message:
> %s\n",
> +				port_id, error.type,
> +				error.message ? error.message : "(no stated
> reason)");
> +			rte_exit(EXIT_FAILURE, "Error: Creation meter profile
> Failed!\n");
> +		}
> +	}
> +}
> +
>  static inline void
>  destroy_flows(int port_id, uint8_t core_id, struct rte_flow **flows_list)
>  {
> @@ -888,6 +1085,8 @@ struct multi_cores_pool {
>  	int rules_count_per_core;
> 
>  	rules_count_per_core = rules_count / mc_pool.cores_count;
> +	if (flow_group > 0 && core_id == 0)
> +		rules_count_per_core++;
> 
>  	start_batch = rte_rdtsc();
>  	for (i = 0; i < (uint32_t) rules_count_per_core; i++) {
> @@ -927,7 +1126,7 @@ struct multi_cores_pool {
>  	printf(":: Port %d :: Core %d :: The time for deleting %d rules is %f
> seconds\n",
>  		port_id, core_id, rules_count_per_core, cpu_time_used);
> 
> -	mc_pool.cpu_time_used_deletion[port_id][core_id] =
> cpu_time_used;
> +	mc_pool.create_flow.deletion[port_id][core_id] = cpu_time_used;
>  }
> 
>  static struct rte_flow **
> @@ -1034,7 +1233,7 @@ struct multi_cores_pool {
>  	printf(":: Port %d :: Core %d :: The time for creating %d in rules %f
> seconds\n",
>  		port_id, core_id, rules_count_per_core, cpu_time_used);
> 
> -	mc_pool.cpu_time_used_insertion[port_id][core_id] =
> cpu_time_used;
> +	mc_pool.create_flow.insertion[port_id][core_id] = cpu_time_used;
>  	return flows_list;
>  }
> 
> @@ -1047,9 +1246,6 @@ struct multi_cores_pool {
> 
>  	nr_ports = rte_eth_dev_count_avail();
> 
> -	if (rules_batch > rules_count)
> -		rules_batch = rules_count;
> -
>  	printf(":: Rules Count per port: %d\n\n", rules_count);
> 
>  	for (port_id = 0; port_id < nr_ports; port_id++) {
> @@ -1059,21 +1255,27 @@ struct multi_cores_pool {
> 
>  		/* Insertion part. */
>  		mc_pool.last_alloc[core_id] =
> (int64_t)dump_socket_mem(stdout);
> +		if (has_meter())
> +			meters_handler(port_id, core_id, METER_CREATE);
>  		flows_list = insert_flows(port_id, core_id);
>  		if (flows_list == NULL)
>  			rte_exit(EXIT_FAILURE, "Error: Insertion Failed!\n");
>  		mc_pool.current_alloc[core_id] =
> (int64_t)dump_socket_mem(stdout);
> 
>  		/* Deletion part. */
> -		if (delete_flag)
> +		if (delete_flag) {
>  			destroy_flows(port_id, core_id, flows_list);
> +			if (has_meter())
> +				meters_handler(port_id, core_id,
> METER_DELETE);
> +		}
>  	}
>  }
> 
> -static int
> -run_rte_flow_handler_cores(void *data __rte_unused)
> +static void
> +dump_used_cpu_time(const char *item,
> +				uint16_t port, struct used_cpu_time
> *used_time)
>  {
> -	uint16_t port;
> +	uint32_t i;
>  	/* Latency: total count of rte rules divided
>  	 * over max time used by thread between all
>  	 * threads time.
> @@ -1088,8 +1290,111 @@ struct multi_cores_pool {
>  	double deletion_throughput_time;
>  	double insertion_latency, insertion_throughput;
>  	double deletion_latency, deletion_throughput;
> +
> +	/* Save first insertion/deletion rates from first thread.
> +	 * Start comparing with all threads, if any thread used
> +	 * time more than current saved, replace it.
> +	 *
> +	 * Thus in the end we will have the max time used for
> +	 * insertion/deletion by one thread.
> +	 *
> +	 * As for memory consumption, save the min of all threads
> +	 * of last alloc, and save the max for all threads for
> +	 * current alloc.
> +	 */
> +
> +	insertion_latency_time = used_time->insertion[port][0];
> +	deletion_latency_time = used_time->deletion[port][0];
> +	insertion_throughput_time = used_time->insertion[port][0];
> +	deletion_throughput_time = used_time->deletion[port][0];
> +
> +	i = mc_pool.cores_count;
> +	while (i-- > 1) {
> +		insertion_throughput_time += used_time-
> >insertion[port][i];
> +		deletion_throughput_time += used_time->deletion[port][i];
> +		if (insertion_latency_time < used_time->insertion[port][i])
> +			insertion_latency_time = used_time-
> >insertion[port][i];
> +		if (deletion_latency_time < used_time->deletion[port][i])
> +			deletion_latency_time = used_time-
> >deletion[port][i];
> +	}
> +
> +	insertion_latency = ((double) (mc_pool.rules_count
> +				/ insertion_latency_time) / 1000);
> +	deletion_latency = ((double) (mc_pool.rules_count
> +				/ deletion_latency_time) / 1000);
> +
> +	insertion_throughput_time /= mc_pool.cores_count;
> +	deletion_throughput_time /= mc_pool.cores_count;
> +	insertion_throughput = ((double) (mc_pool.rules_count
> +				/ insertion_throughput_time) / 1000);
> +	deletion_throughput = ((double) (mc_pool.rules_count
> +				/ deletion_throughput_time) / 1000);
> +
> +	/* Latency stats */
> +	printf("\n%s\n:: [Latency | Insertion] All Cores :: Port %d :: ",
> +		item, port);
> +	printf("Total flows insertion rate -> %f K Rules/Sec\n",
> +		insertion_latency);
> +	printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port);
> +	printf("The time for creating %d rules is %f seconds\n",
> +		mc_pool.rules_count, insertion_latency_time);
> +
> +	/* Throughput stats */
> +	printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
> +	printf("Total flows insertion rate -> %f K Rules/Sec\n",
> +		insertion_throughput);
> +	printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
> +	printf("The average time for creating %d rules is %f seconds\n",
> +		mc_pool.rules_count, insertion_throughput_time);
> +
> +	if (delete_flag) {
> +	/* Latency stats */
> +		printf(":: [Latency | Deletion] All Cores :: Port %d :: Total "
> +			"deletion rate -> %f K Rules/Sec\n",
> +			port, deletion_latency);
> +		printf(":: [Latency | Deletion] All Cores :: Port %d :: ",
> +			port);
> +		printf("The time for deleting %d rules is %f seconds\n",
> +			mc_pool.rules_count, deletion_latency_time);
> +
> +		/* Throughput stats */
> +		printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total "
> +			"deletion rate -> %f K Rules/Sec\n",
> +			port, deletion_throughput);
> +		printf(":: [Throughput | Deletion] All Cores :: Port %d :: ",
> +			port);
> +		printf("The average time for deleting %d rules is %f
> seconds\n",
> +			mc_pool.rules_count, deletion_throughput_time);
> +	}
> +}
> +
> +static void
> +dump_used_mem(uint16_t port)
> +{
> +	uint32_t i;
>  	int64_t last_alloc, current_alloc;
>  	int flow_size_in_bytes;
> +
> +	last_alloc = mc_pool.last_alloc[0];
> +	current_alloc = mc_pool.current_alloc[0];
> +
> +	i = mc_pool.cores_count;
> +	while (i-- > 1) {
> +		if (last_alloc > mc_pool.last_alloc[i])
> +			last_alloc = mc_pool.last_alloc[i];
> +		if (current_alloc < mc_pool.current_alloc[i])
> +			current_alloc = mc_pool.current_alloc[i];
> +	}
> +
> +	flow_size_in_bytes = (current_alloc - last_alloc) /
> mc_pool.rules_count;
> +	printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n",
> +		port, flow_size_in_bytes);
> +}
> +
> +static int
> +run_rte_flow_handler_cores(void *data __rte_unused)
> +{
> +	uint16_t port;
I don't see how this part of the patch is related to the meter action.
Probably this change deserves a separate commit with a proper message.

>  	int lcore_counter = 0;
>  	int lcore_id = rte_lcore_id();
>  	int i;
> @@ -1120,83 +1425,12 @@ struct multi_cores_pool {
>  	/* Make sure all cores finished insertion/deletion process. */
>  	rte_eal_mp_wait_lcore();
> 
> -	/* Save first insertion/deletion rates from first thread.
> -	 * Start comparing with all threads, if any thread used
> -	 * time more than current saved, replace it.
> -	 *
> -	 * Thus in the end we will have the max time used for
> -	 * insertion/deletion by one thread.
> -	 *
> -	 * As for memory consumption, save the min of all threads
> -	 * of last alloc, and save the max for all threads for
> -	 * current alloc.
> -	 */
>  	RTE_ETH_FOREACH_DEV(port) {
> -		last_alloc = mc_pool.last_alloc[0];
> -		current_alloc = mc_pool.current_alloc[0];
> -
> -		insertion_latency_time =
> mc_pool.cpu_time_used_insertion[port][0];
> -		deletion_latency_time =
> mc_pool.cpu_time_used_deletion[port][0];
> -		insertion_throughput_time =
> mc_pool.cpu_time_used_insertion[port][0];
> -		deletion_throughput_time =
> mc_pool.cpu_time_used_deletion[port][0];
> -		i = mc_pool.cores_count;
> -		while (i-- > 1) {
> -			insertion_throughput_time +=
> mc_pool.cpu_time_used_insertion[port][i];
> -			deletion_throughput_time +=
> mc_pool.cpu_time_used_deletion[port][i];
> -			if (insertion_latency_time <
> mc_pool.cpu_time_used_insertion[port][i])
> -				insertion_latency_time =
> mc_pool.cpu_time_used_insertion[port][i];
> -			if (deletion_latency_time <
> mc_pool.cpu_time_used_deletion[port][i])
> -				deletion_latency_time =
> mc_pool.cpu_time_used_deletion[port][i];
> -			if (last_alloc > mc_pool.last_alloc[i])
> -				last_alloc = mc_pool.last_alloc[i];
> -			if (current_alloc < mc_pool.current_alloc[i])
> -				current_alloc = mc_pool.current_alloc[i];
> -		}
> -
> -		flow_size_in_bytes = (current_alloc - last_alloc) /
> mc_pool.rules_count;
> -
> -		insertion_latency = ((double) (mc_pool.rules_count /
> insertion_latency_time) / 1000);
> -		deletion_latency = ((double) (mc_pool.rules_count /
> deletion_latency_time) / 1000);
> -
> -		insertion_throughput_time /= mc_pool.cores_count;
> -		deletion_throughput_time /= mc_pool.cores_count;
> -		insertion_throughput = ((double) (mc_pool.rules_count /
> insertion_throughput_time) / 1000);
> -		deletion_throughput = ((double) (mc_pool.rules_count /
> deletion_throughput_time) / 1000);
> -
> -		/* Latency stats */
> -		printf("\n:: [Latency | Insertion] All Cores :: Port %d :: ",
> port);
> -		printf("Total flows insertion rate -> %f K Rules/Sec\n",
> -			insertion_latency);
> -		printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port);
> -		printf("The time for creating %d rules is %f seconds\n",
> -			mc_pool.rules_count, insertion_latency_time);
> -
> -		/* Throughput stats */
> -		printf(":: [Throughput | Insertion] All Cores :: Port %d :: ",
> port);
> -		printf("Total flows insertion rate -> %f K Rules/Sec\n",
> -			insertion_throughput);
> -		printf(":: [Throughput | Insertion] All Cores :: Port %d :: ",
> port);
> -		printf("The average time for creating %d rules is %f
> seconds\n",
> -			mc_pool.rules_count, insertion_throughput_time);
> -
> -		if (delete_flag) {
> -			/* Latency stats */
> -			printf(":: [Latency | Deletion] All Cores :: Port %d ::
> Total flows "
> -				"deletion rate -> %f K Rules/Sec\n",
> -				port, deletion_latency);
> -			printf(":: [Latency | Deletion] All Cores :: Port %d :: ",
> port);
> -			printf("The time for deleting %d rules is %f
> seconds\n",
> -			mc_pool.rules_count, deletion_latency_time);
> -
> -			/* Throughput stats */
> -			printf(":: [Throughput | Deletion] All Cores :: Port %d
> :: Total flows "
> -				"deletion rate -> %f K Rules/Sec\n", port,
> deletion_throughput);
> -			printf(":: [Throughput | Deletion] All Cores :: Port %d
> :: ", port);
> -			printf("The average time for deleting %d rules is %f
> seconds\n",
> -			mc_pool.rules_count, deletion_throughput_time);
> -		}
> -		printf("\n:: Port %d :: rte_flow size in DPDK layer: %d
> Bytes\n",
> -			port, flow_size_in_bytes);
> +		dump_used_cpu_time("Meters:",
> +			port, &mc_pool.create_meter);
> +		dump_used_cpu_time("Flows:",
> +			port, &mc_pool.create_flow);
> +		dump_used_mem(port);
>  	}
> 
>  	return 0;
> @@ -1633,6 +1867,9 @@ struct multi_cores_pool {
>  	if (argc > 1)
>  		args_parse(argc, argv);
> 
> +	if (rules_batch > rules_count)
> +		rules_batch = rules_count;
> +
>  	init_port();
> 
>  	nb_lcores = rte_lcore_count();
> @@ -1642,12 +1879,16 @@ struct multi_cores_pool {
> 
>  	printf(":: Flows Count per port: %d\n\n", rules_count);
> 
> +	if (has_meter())
> +		create_meter_profile();
>  	rte_eal_mp_remote_launch(run_rte_flow_handler_cores, NULL,
> CALL_MAIN);
> 
>  	if (enable_fwd) {
>  		init_lcore_info();
>  		rte_eal_mp_remote_launch(start_forwarding, NULL,
> CALL_MAIN);
>  	}
> +	if (has_meter() && delete_flag)
> +		destroy_meter_profile();
> 
>  	RTE_ETH_FOREACH_DEV(port) {
>  		rte_flow_flush(port, &error);
> diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
> index 40d157e..017e200 100644
> --- a/doc/guides/tools/flow-perf.rst
> +++ b/doc/guides/tools/flow-perf.rst
> @@ -345,3 +345,7 @@ Actions:
> 
>  *	``--vxlan-decap``
>  	Add vxlan decap action to all flows actions.
> +
> +*       ``--meter``
> +        Add meter action to all flows actions.
> +        Currently, 1 meter profile -> N meter rules -> N rte flows.
> --
> 1.8.3.1
  

Patch

diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c
index c3545ba..1f5c64f 100644
--- a/app/test-flow-perf/actions_gen.c
+++ b/app/test-flow-perf/actions_gen.c
@@ -891,6 +891,19 @@  struct action_rss_data {
 	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_VXLAN_DECAP;
 }
 
+static void
+add_meter(struct rte_flow_action *actions,
+	uint8_t actions_counter,
+	__rte_unused struct additional_para para)
+{
+	static struct rte_flow_action_meter
+		meters[RTE_MAX_LCORE] __rte_cache_aligned;
+
+	meters[para.core_idx].mtr_id = para.counter;
+	actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_METER;
+	actions[actions_counter].conf = &meters[para.core_idx];
+}
+
 void
 fill_actions(struct rte_flow_action *actions, uint64_t *flow_actions,
 	uint32_t counter, uint16_t next_table, uint16_t hairpinq,
@@ -1103,6 +1116,12 @@  struct action_rss_data {
 			),
 			.funct = add_vxlan_decap,
 		},
+		{
+			.mask = FLOW_ACTION_MASK(
+				RTE_FLOW_ACTION_TYPE_METER
+			),
+			.funct = add_meter,
+		},
 	};
 
 	for (j = 0; j < MAX_ACTIONS_NUM; j++) {
diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h
index 94e83c9..3d4696d 100644
--- a/app/test-flow-perf/config.h
+++ b/app/test-flow-perf/config.h
@@ -16,6 +16,8 @@ 
 #define NR_RXD  256
 #define NR_TXD  256
 #define MAX_PORTS 64
+#define METER_CIR 1250000
+#define DEFAULT_METER_PROF_ID 100
 
 /* This is used for encap/decap & header modify actions.
  * When it's 1: it means all actions have fixed values.
diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c
index 3a0e4c1..4d881ec 100644
--- a/app/test-flow-perf/main.c
+++ b/app/test-flow-perf/main.c
@@ -34,6 +34,7 @@ 
 #include <rte_mbuf.h>
 #include <rte_ethdev.h>
 #include <rte_flow.h>
+#include <rte_mtr.h>
 
 #include "config.h"
 #include "flow_gen.h"
@@ -72,6 +73,8 @@ 
 #define LCORE_MODE_PKT    1
 #define LCORE_MODE_STATS  2
 #define MAX_STREAMS      64
+#define METER_CREATE	  1
+#define METER_DELETE	  2
 
 struct stream {
 	int tx_port;
@@ -93,11 +96,16 @@  struct lcore_info {
 
 static struct lcore_info lcore_infos[RTE_MAX_LCORE];
 
+struct used_cpu_time {
+	double insertion[MAX_PORTS][RTE_MAX_LCORE];
+	double deletion[MAX_PORTS][RTE_MAX_LCORE];
+};
+
 struct multi_cores_pool {
 	uint32_t cores_count;
 	uint32_t rules_count;
-	double cpu_time_used_insertion[MAX_PORTS][RTE_MAX_LCORE];
-	double cpu_time_used_deletion[MAX_PORTS][RTE_MAX_LCORE];
+	struct used_cpu_time create_meter;
+	struct used_cpu_time create_flow;
 	int64_t last_alloc[RTE_MAX_LCORE];
 	int64_t current_alloc[RTE_MAX_LCORE];
 } __rte_cache_aligned;
@@ -195,6 +203,7 @@  struct multi_cores_pool {
 	printf("  --set-ipv6-dscp: add set ipv6 dscp action to flow actions\n"
 		"ipv6 dscp value to be set is random each flow\n");
 	printf("  --flag: add flag action to flow actions\n");
+	printf("  --meter: add meter action to flow actions\n");
 	printf("  --raw-encap=<data>: add raw encap action to flow actions\n"
 		"Data is the data needed to be encaped\n"
 		"Example: raw-encap=ether,ipv4,udp,vxlan\n");
@@ -524,6 +533,14 @@  struct multi_cores_pool {
 			.map_idx = &actions_idx
 		},
 		{
+			.str = "meter",
+			.mask = FLOW_ACTION_MASK(
+				RTE_FLOW_ACTION_TYPE_METER
+			),
+			.map = &flow_actions[0],
+			.map_idx = &actions_idx
+		},
+		{
 			.str = "vxlan-encap",
 			.mask = FLOW_ACTION_MASK(
 				RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP
@@ -602,6 +619,7 @@  struct multi_cores_pool {
 		{ "set-ipv4-dscp",              0, 0, 0 },
 		{ "set-ipv6-dscp",              0, 0, 0 },
 		{ "flag",                       0, 0, 0 },
+		{ "meter",		        0, 0, 0 },
 		{ "raw-encap",                  1, 0, 0 },
 		{ "raw-decap",                  1, 0, 0 },
 		{ "vxlan-encap",                0, 0, 0 },
@@ -874,6 +892,185 @@  struct multi_cores_pool {
 	}
 }
 
+
+static inline int
+has_meter(void)
+{
+	int i;
+
+	for (i = 0; i < MAX_ACTIONS_NUM; i++) {
+		if (flow_actions[i] == 0)
+			break;
+		if (flow_actions[i]
+				& FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_METER))
+			return 1;
+	}
+	return 0;
+}
+
+static void
+create_meter_rule(int port_id, uint32_t counter)
+{
+	int ret;
+	struct rte_mtr_params params;
+	uint32_t default_prof_id = 100;
+	struct rte_mtr_error error;
+
+	memset(&params, 0, sizeof(struct rte_mtr_params));
+	params.meter_enable = 1;
+	params.stats_mask = 0xffff;
+	params.use_prev_mtr_color = 0;
+	params.dscp_table = NULL;
+
+	/*create meter*/
+	params.meter_profile_id = default_prof_id;
+	params.action[RTE_COLOR_GREEN] =
+		MTR_POLICER_ACTION_COLOR_GREEN;
+	params.action[RTE_COLOR_YELLOW] =
+		MTR_POLICER_ACTION_COLOR_YELLOW;
+	params.action[RTE_COLOR_RED] =
+		MTR_POLICER_ACTION_DROP;
+
+	ret = rte_mtr_create(port_id, counter, &params, 1, &error);
+	if (ret != 0) {
+		printf("Port %u create meter idx(%d) error(%d) message: %s\n",
+			port_id, counter, error.type,
+			error.message ? error.message : "(no stated reason)");
+		rte_exit(EXIT_FAILURE, "error in creating meter");
+	}
+}
+
+static void
+destroy_meter_rule(int port_id, uint32_t counter)
+{
+	struct rte_mtr_error error;
+
+	if (rte_mtr_destroy(port_id, counter, &error)) {
+		printf("Port %u destroy meter(%d) error(%d) message: %s\n",
+			port_id, counter, error.type,
+			error.message ? error.message : "(no stated reason)");
+		rte_exit(EXIT_FAILURE, "Error in deleting meter rule");
+	}
+}
+
+static void
+meters_handler(int port_id, uint8_t core_id, uint8_t ops)
+{
+	uint64_t start_batch;
+	double cpu_time_used, insertion_rate;
+	int rules_count_per_core, rules_batch_idx;
+	uint32_t counter, start_counter = 0, end_counter;
+	double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 };
+
+	rules_count_per_core = rules_count / mc_pool.cores_count;
+
+	if (core_id)
+		start_counter = core_id * rules_count_per_core;
+	end_counter = (core_id + 1) * rules_count_per_core;
+
+	cpu_time_used = 0;
+	start_batch = rte_rdtsc();
+	for (counter = start_counter; counter < end_counter; counter++) {
+		if (ops == METER_CREATE)
+			create_meter_rule(port_id, counter);
+		else
+			destroy_meter_rule(port_id, counter);
+		/*
+		 * Save the insertion rate for rules batch.
+		 * Check if the insertion reached the rules
+		 * patch counter, then save the insertion rate
+		 * for this batch.
+		 */
+		if (!((counter + 1) % rules_batch)) {
+			rules_batch_idx = ((counter + 1) / rules_batch) - 1;
+			cpu_time_per_batch[rules_batch_idx] =
+				((double)(rte_rdtsc() - start_batch))
+				/ rte_get_tsc_hz();
+			cpu_time_used += cpu_time_per_batch[rules_batch_idx];
+			start_batch = rte_rdtsc();
+		}
+	}
+
+	/* Print insertion rates for all batches */
+	if (dump_iterations)
+		print_rules_batches(cpu_time_per_batch);
+
+	insertion_rate =
+		((double) (rules_count_per_core / cpu_time_used) / 1000);
+
+	/* Insertion rate for all rules in one core */
+	printf(":: Port %d :: Core %d Meter %s :: start @[%d] - end @[%d],"
+		" use:%.02fs, rate:%.02fk Rule/Sec\n",
+		port_id, core_id, ops == METER_CREATE ? "create" : "delete",
+		start_counter, end_counter - 1,
+		cpu_time_used, insertion_rate);
+
+	if (ops == METER_CREATE)
+		mc_pool.create_meter.insertion[port_id][core_id]
+			= cpu_time_used;
+	else
+		mc_pool.create_meter.deletion[port_id][core_id]
+			= cpu_time_used;
+}
+
+static void
+destroy_meter_profile(void)
+{
+	struct rte_mtr_error error;
+	uint16_t nr_ports;
+	int port_id;
+
+	nr_ports = rte_eth_dev_count_avail();
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		/* If port outside portmask */
+		if (!((ports_mask >> port_id) & 0x1))
+			continue;
+
+		if (rte_mtr_meter_profile_delete
+			(port_id, DEFAULT_METER_PROF_ID, &error)) {
+			printf("Port %u del profile error(%d) message: %s\n",
+				port_id, error.type,
+				error.message ? error.message : "(no stated reason)");
+			rte_exit(EXIT_FAILURE, "Error: Destroy meter profile Failed!\n");
+		}
+	}
+}
+
+static void
+create_meter_profile(void)
+{
+	uint16_t nr_ports;
+	int ret, port_id;
+	struct rte_mtr_meter_profile mp;
+	struct rte_mtr_error error;
+
+	/*
+	 *currently , only create one meter file for one port
+	 *1 meter profile -> N meter rules -> N rte flows
+	 */
+	memset(&mp, 0, sizeof(struct rte_mtr_meter_profile));
+	nr_ports = rte_eth_dev_count_avail();
+	for (port_id = 0; port_id < nr_ports; port_id++) {
+		/* If port outside portmask */
+		if (!((ports_mask >> port_id) & 0x1))
+			continue;
+
+		mp.alg = RTE_MTR_SRTCM_RFC2697;
+		mp.srtcm_rfc2697.cir = METER_CIR;
+		mp.srtcm_rfc2697.cbs = METER_CIR / 8;
+		mp.srtcm_rfc2697.ebs = 0;
+
+		ret = rte_mtr_meter_profile_add
+			(port_id, DEFAULT_METER_PROF_ID, &mp, &error);
+		if (ret != 0) {
+			printf("Port %u create Profile error(%d) message: %s\n",
+				port_id, error.type,
+				error.message ? error.message : "(no stated reason)");
+			rte_exit(EXIT_FAILURE, "Error: Creation meter profile Failed!\n");
+		}
+	}
+}
+
 static inline void
 destroy_flows(int port_id, uint8_t core_id, struct rte_flow **flows_list)
 {
@@ -888,6 +1085,8 @@  struct multi_cores_pool {
 	int rules_count_per_core;
 
 	rules_count_per_core = rules_count / mc_pool.cores_count;
+	if (flow_group > 0 && core_id == 0)
+		rules_count_per_core++;
 
 	start_batch = rte_rdtsc();
 	for (i = 0; i < (uint32_t) rules_count_per_core; i++) {
@@ -927,7 +1126,7 @@  struct multi_cores_pool {
 	printf(":: Port %d :: Core %d :: The time for deleting %d rules is %f seconds\n",
 		port_id, core_id, rules_count_per_core, cpu_time_used);
 
-	mc_pool.cpu_time_used_deletion[port_id][core_id] = cpu_time_used;
+	mc_pool.create_flow.deletion[port_id][core_id] = cpu_time_used;
 }
 
 static struct rte_flow **
@@ -1034,7 +1233,7 @@  struct multi_cores_pool {
 	printf(":: Port %d :: Core %d :: The time for creating %d in rules %f seconds\n",
 		port_id, core_id, rules_count_per_core, cpu_time_used);
 
-	mc_pool.cpu_time_used_insertion[port_id][core_id] = cpu_time_used;
+	mc_pool.create_flow.insertion[port_id][core_id] = cpu_time_used;
 	return flows_list;
 }
 
@@ -1047,9 +1246,6 @@  struct multi_cores_pool {
 
 	nr_ports = rte_eth_dev_count_avail();
 
-	if (rules_batch > rules_count)
-		rules_batch = rules_count;
-
 	printf(":: Rules Count per port: %d\n\n", rules_count);
 
 	for (port_id = 0; port_id < nr_ports; port_id++) {
@@ -1059,21 +1255,27 @@  struct multi_cores_pool {
 
 		/* Insertion part. */
 		mc_pool.last_alloc[core_id] = (int64_t)dump_socket_mem(stdout);
+		if (has_meter())
+			meters_handler(port_id, core_id, METER_CREATE);
 		flows_list = insert_flows(port_id, core_id);
 		if (flows_list == NULL)
 			rte_exit(EXIT_FAILURE, "Error: Insertion Failed!\n");
 		mc_pool.current_alloc[core_id] = (int64_t)dump_socket_mem(stdout);
 
 		/* Deletion part. */
-		if (delete_flag)
+		if (delete_flag) {
 			destroy_flows(port_id, core_id, flows_list);
+			if (has_meter())
+				meters_handler(port_id, core_id, METER_DELETE);
+		}
 	}
 }
 
-static int
-run_rte_flow_handler_cores(void *data __rte_unused)
+static void
+dump_used_cpu_time(const char *item,
+				uint16_t port, struct used_cpu_time *used_time)
 {
-	uint16_t port;
+	uint32_t i;
 	/* Latency: total count of rte rules divided
 	 * over max time used by thread between all
 	 * threads time.
@@ -1088,8 +1290,111 @@  struct multi_cores_pool {
 	double deletion_throughput_time;
 	double insertion_latency, insertion_throughput;
 	double deletion_latency, deletion_throughput;
+
+	/* Save first insertion/deletion rates from first thread.
+	 * Start comparing with all threads, if any thread used
+	 * time more than current saved, replace it.
+	 *
+	 * Thus in the end we will have the max time used for
+	 * insertion/deletion by one thread.
+	 *
+	 * As for memory consumption, save the min of all threads
+	 * of last alloc, and save the max for all threads for
+	 * current alloc.
+	 */
+
+	insertion_latency_time = used_time->insertion[port][0];
+	deletion_latency_time = used_time->deletion[port][0];
+	insertion_throughput_time = used_time->insertion[port][0];
+	deletion_throughput_time = used_time->deletion[port][0];
+
+	i = mc_pool.cores_count;
+	while (i-- > 1) {
+		insertion_throughput_time += used_time->insertion[port][i];
+		deletion_throughput_time += used_time->deletion[port][i];
+		if (insertion_latency_time < used_time->insertion[port][i])
+			insertion_latency_time = used_time->insertion[port][i];
+		if (deletion_latency_time < used_time->deletion[port][i])
+			deletion_latency_time = used_time->deletion[port][i];
+	}
+
+	insertion_latency = ((double) (mc_pool.rules_count
+				/ insertion_latency_time) / 1000);
+	deletion_latency = ((double) (mc_pool.rules_count
+				/ deletion_latency_time) / 1000);
+
+	insertion_throughput_time /= mc_pool.cores_count;
+	deletion_throughput_time /= mc_pool.cores_count;
+	insertion_throughput = ((double) (mc_pool.rules_count
+				/ insertion_throughput_time) / 1000);
+	deletion_throughput = ((double) (mc_pool.rules_count
+				/ deletion_throughput_time) / 1000);
+
+	/* Latency stats */
+	printf("\n%s\n:: [Latency | Insertion] All Cores :: Port %d :: ",
+		item, port);
+	printf("Total flows insertion rate -> %f K Rules/Sec\n",
+		insertion_latency);
+	printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port);
+	printf("The time for creating %d rules is %f seconds\n",
+		mc_pool.rules_count, insertion_latency_time);
+
+	/* Throughput stats */
+	printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
+	printf("Total flows insertion rate -> %f K Rules/Sec\n",
+		insertion_throughput);
+	printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
+	printf("The average time for creating %d rules is %f seconds\n",
+		mc_pool.rules_count, insertion_throughput_time);
+
+	if (delete_flag) {
+	/* Latency stats */
+		printf(":: [Latency | Deletion] All Cores :: Port %d :: Total "
+			"deletion rate -> %f K Rules/Sec\n",
+			port, deletion_latency);
+		printf(":: [Latency | Deletion] All Cores :: Port %d :: ",
+			port);
+		printf("The time for deleting %d rules is %f seconds\n",
+			mc_pool.rules_count, deletion_latency_time);
+
+		/* Throughput stats */
+		printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total "
+			"deletion rate -> %f K Rules/Sec\n",
+			port, deletion_throughput);
+		printf(":: [Throughput | Deletion] All Cores :: Port %d :: ",
+			port);
+		printf("The average time for deleting %d rules is %f seconds\n",
+			mc_pool.rules_count, deletion_throughput_time);
+	}
+}
+
+static void
+dump_used_mem(uint16_t port)
+{
+	uint32_t i;
 	int64_t last_alloc, current_alloc;
 	int flow_size_in_bytes;
+
+	last_alloc = mc_pool.last_alloc[0];
+	current_alloc = mc_pool.current_alloc[0];
+
+	i = mc_pool.cores_count;
+	while (i-- > 1) {
+		if (last_alloc > mc_pool.last_alloc[i])
+			last_alloc = mc_pool.last_alloc[i];
+		if (current_alloc < mc_pool.current_alloc[i])
+			current_alloc = mc_pool.current_alloc[i];
+	}
+
+	flow_size_in_bytes = (current_alloc - last_alloc) / mc_pool.rules_count;
+	printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n",
+		port, flow_size_in_bytes);
+}
+
+static int
+run_rte_flow_handler_cores(void *data __rte_unused)
+{
+	uint16_t port;
 	int lcore_counter = 0;
 	int lcore_id = rte_lcore_id();
 	int i;
@@ -1120,83 +1425,12 @@  struct multi_cores_pool {
 	/* Make sure all cores finished insertion/deletion process. */
 	rte_eal_mp_wait_lcore();
 
-	/* Save first insertion/deletion rates from first thread.
-	 * Start comparing with all threads, if any thread used
-	 * time more than current saved, replace it.
-	 *
-	 * Thus in the end we will have the max time used for
-	 * insertion/deletion by one thread.
-	 *
-	 * As for memory consumption, save the min of all threads
-	 * of last alloc, and save the max for all threads for
-	 * current alloc.
-	 */
 	RTE_ETH_FOREACH_DEV(port) {
-		last_alloc = mc_pool.last_alloc[0];
-		current_alloc = mc_pool.current_alloc[0];
-
-		insertion_latency_time = mc_pool.cpu_time_used_insertion[port][0];
-		deletion_latency_time = mc_pool.cpu_time_used_deletion[port][0];
-		insertion_throughput_time = mc_pool.cpu_time_used_insertion[port][0];
-		deletion_throughput_time = mc_pool.cpu_time_used_deletion[port][0];
-		i = mc_pool.cores_count;
-		while (i-- > 1) {
-			insertion_throughput_time += mc_pool.cpu_time_used_insertion[port][i];
-			deletion_throughput_time += mc_pool.cpu_time_used_deletion[port][i];
-			if (insertion_latency_time < mc_pool.cpu_time_used_insertion[port][i])
-				insertion_latency_time = mc_pool.cpu_time_used_insertion[port][i];
-			if (deletion_latency_time < mc_pool.cpu_time_used_deletion[port][i])
-				deletion_latency_time = mc_pool.cpu_time_used_deletion[port][i];
-			if (last_alloc > mc_pool.last_alloc[i])
-				last_alloc = mc_pool.last_alloc[i];
-			if (current_alloc < mc_pool.current_alloc[i])
-				current_alloc = mc_pool.current_alloc[i];
-		}
-
-		flow_size_in_bytes = (current_alloc - last_alloc) / mc_pool.rules_count;
-
-		insertion_latency = ((double) (mc_pool.rules_count / insertion_latency_time) / 1000);
-		deletion_latency = ((double) (mc_pool.rules_count / deletion_latency_time) / 1000);
-
-		insertion_throughput_time /= mc_pool.cores_count;
-		deletion_throughput_time /= mc_pool.cores_count;
-		insertion_throughput = ((double) (mc_pool.rules_count / insertion_throughput_time) / 1000);
-		deletion_throughput = ((double) (mc_pool.rules_count / deletion_throughput_time) / 1000);
-
-		/* Latency stats */
-		printf("\n:: [Latency | Insertion] All Cores :: Port %d :: ", port);
-		printf("Total flows insertion rate -> %f K Rules/Sec\n",
-			insertion_latency);
-		printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port);
-		printf("The time for creating %d rules is %f seconds\n",
-			mc_pool.rules_count, insertion_latency_time);
-
-		/* Throughput stats */
-		printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
-		printf("Total flows insertion rate -> %f K Rules/Sec\n",
-			insertion_throughput);
-		printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port);
-		printf("The average time for creating %d rules is %f seconds\n",
-			mc_pool.rules_count, insertion_throughput_time);
-
-		if (delete_flag) {
-			/* Latency stats */
-			printf(":: [Latency | Deletion] All Cores :: Port %d :: Total flows "
-				"deletion rate -> %f K Rules/Sec\n",
-				port, deletion_latency);
-			printf(":: [Latency | Deletion] All Cores :: Port %d :: ", port);
-			printf("The time for deleting %d rules is %f seconds\n",
-			mc_pool.rules_count, deletion_latency_time);
-
-			/* Throughput stats */
-			printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total flows "
-				"deletion rate -> %f K Rules/Sec\n", port, deletion_throughput);
-			printf(":: [Throughput | Deletion] All Cores :: Port %d :: ", port);
-			printf("The average time for deleting %d rules is %f seconds\n",
-			mc_pool.rules_count, deletion_throughput_time);
-		}
-		printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n",
-			port, flow_size_in_bytes);
+		dump_used_cpu_time("Meters:",
+			port, &mc_pool.create_meter);
+		dump_used_cpu_time("Flows:",
+			port, &mc_pool.create_flow);
+		dump_used_mem(port);
 	}
 
 	return 0;
@@ -1633,6 +1867,9 @@  struct multi_cores_pool {
 	if (argc > 1)
 		args_parse(argc, argv);
 
+	if (rules_batch > rules_count)
+		rules_batch = rules_count;
+
 	init_port();
 
 	nb_lcores = rte_lcore_count();
@@ -1642,12 +1879,16 @@  struct multi_cores_pool {
 
 	printf(":: Flows Count per port: %d\n\n", rules_count);
 
+	if (has_meter())
+		create_meter_profile();
 	rte_eal_mp_remote_launch(run_rte_flow_handler_cores, NULL, CALL_MAIN);
 
 	if (enable_fwd) {
 		init_lcore_info();
 		rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MAIN);
 	}
+	if (has_meter() && delete_flag)
+		destroy_meter_profile();
 
 	RTE_ETH_FOREACH_DEV(port) {
 		rte_flow_flush(port, &error);
diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst
index 40d157e..017e200 100644
--- a/doc/guides/tools/flow-perf.rst
+++ b/doc/guides/tools/flow-perf.rst
@@ -345,3 +345,7 @@  Actions:
 
 *	``--vxlan-decap``
 	Add vxlan decap action to all flows actions.
+
+*       ``--meter``
+        Add meter action to all flows actions.
+        Currently, 1 meter profile -> N meter rules -> N rte flows.