Message ID | 1610542256-285053-1-git-send-email-dongzhou@nvidia.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Thomas Monjalon |
Headers | show |
Series | app/flow-perf: support meter action | expand |
Context | Check | Description |
---|---|---|
ci/iol-testing | success | Testing PASS |
ci/intel-Testing | success | Testing PASS |
ci/iol-intel-Performance | success | Performance Testing PASS |
ci/iol-broadcom-Functional | success | Functional Testing PASS |
ci/iol-intel-Functional | success | Functional Testing PASS |
ci/iol-broadcom-Performance | success | Performance Testing PASS |
ci/Intel-compilation | success | Compilation OK |
ci/checkpatch | success | coding style OK |
> From: Dong Zhou <dongzhou@nvidia.com> > Sent: Wednesday, January 13, 2021 7:51 > To: Wisam Monther <wisamm@nvidia.com>; Suanming Mou > <suanmingm@nvidia.com>; Alexander Kozyrev <akozyrev@nvidia.com>; > NBU-Contact-Thomas Monjalon <thomas@monjalon.net> > Cc: dev@dpdk.org > Subject: [PATCH] app/flow-perf: support meter action > > Currently, test-flow-perf app cannot generate flows with meter > action. This patch introduces new parameter "--meter" to generate > flows with meter action. > > Signed-off-by: Dong Zhou <dongzhou@nvidia.com> > Reviewed-by: Wisam Jaddo <wisamm@nvidia.com> > --- > app/test-flow-perf/actions_gen.c | 19 ++ > app/test-flow-perf/config.h | 2 + > app/test-flow-perf/main.c | 415 > +++++++++++++++++++++++++++++++-------- > doc/guides/tools/flow-perf.rst | 4 + > 4 files changed, 353 insertions(+), 87 deletions(-) > > diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow- > perf/actions_gen.c > index c3545ba..1f5c64f 100644 > --- a/app/test-flow-perf/actions_gen.c > +++ b/app/test-flow-perf/actions_gen.c > @@ -891,6 +891,19 @@ struct action_rss_data { > actions[actions_counter].type = > RTE_FLOW_ACTION_TYPE_VXLAN_DECAP; > } > > +static void > +add_meter(struct rte_flow_action *actions, > + uint8_t actions_counter, > + __rte_unused struct additional_para para) > +{ > + static struct rte_flow_action_meter > + meters[RTE_MAX_LCORE] __rte_cache_aligned; > + > + meters[para.core_idx].mtr_id = para.counter; > + actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_METER; > + actions[actions_counter].conf = &meters[para.core_idx]; > +} > + > void > fill_actions(struct rte_flow_action *actions, uint64_t *flow_actions, > uint32_t counter, uint16_t next_table, uint16_t hairpinq, > @@ -1103,6 +1116,12 @@ struct action_rss_data { > ), > .funct = add_vxlan_decap, > }, > + { > + .mask = FLOW_ACTION_MASK( > + RTE_FLOW_ACTION_TYPE_METER > + ), > + .funct = add_meter, > + }, > }; > > for (j = 0; j < MAX_ACTIONS_NUM; j++) { > diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h > index 94e83c9..3d4696d 100644 > --- a/app/test-flow-perf/config.h > +++ b/app/test-flow-perf/config.h > @@ -16,6 +16,8 @@ > #define NR_RXD 256 > #define NR_TXD 256 > #define MAX_PORTS 64 > +#define METER_CIR 1250000 > +#define DEFAULT_METER_PROF_ID 100 > > /* This is used for encap/decap & header modify actions. > * When it's 1: it means all actions have fixed values. > diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c > index 3a0e4c1..4d881ec 100644 > --- a/app/test-flow-perf/main.c > +++ b/app/test-flow-perf/main.c > @@ -34,6 +34,7 @@ > #include <rte_mbuf.h> > #include <rte_ethdev.h> > #include <rte_flow.h> > +#include <rte_mtr.h> > > #include "config.h" > #include "flow_gen.h" > @@ -72,6 +73,8 @@ > #define LCORE_MODE_PKT 1 > #define LCORE_MODE_STATS 2 > #define MAX_STREAMS 64 > +#define METER_CREATE 1 > +#define METER_DELETE 2 > > struct stream { > int tx_port; > @@ -93,11 +96,16 @@ struct lcore_info { > > static struct lcore_info lcore_infos[RTE_MAX_LCORE]; > > +struct used_cpu_time { > + double insertion[MAX_PORTS][RTE_MAX_LCORE]; > + double deletion[MAX_PORTS][RTE_MAX_LCORE]; > +}; > + > struct multi_cores_pool { > uint32_t cores_count; > uint32_t rules_count; > - double cpu_time_used_insertion[MAX_PORTS][RTE_MAX_LCORE]; > - double cpu_time_used_deletion[MAX_PORTS][RTE_MAX_LCORE]; > + struct used_cpu_time create_meter; > + struct used_cpu_time create_flow; > int64_t last_alloc[RTE_MAX_LCORE]; > int64_t current_alloc[RTE_MAX_LCORE]; > } __rte_cache_aligned; > @@ -195,6 +203,7 @@ struct multi_cores_pool { > printf(" --set-ipv6-dscp: add set ipv6 dscp action to flow actions\n" > "ipv6 dscp value to be set is random each flow\n"); > printf(" --flag: add flag action to flow actions\n"); > + printf(" --meter: add meter action to flow actions\n"); > printf(" --raw-encap=<data>: add raw encap action to flow > actions\n" > "Data is the data needed to be encaped\n" > "Example: raw-encap=ether,ipv4,udp,vxlan\n"); > @@ -524,6 +533,14 @@ struct multi_cores_pool { > .map_idx = &actions_idx > }, > { > + .str = "meter", > + .mask = FLOW_ACTION_MASK( > + RTE_FLOW_ACTION_TYPE_METER > + ), > + .map = &flow_actions[0], > + .map_idx = &actions_idx > + }, > + { > .str = "vxlan-encap", > .mask = FLOW_ACTION_MASK( > RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP > @@ -602,6 +619,7 @@ struct multi_cores_pool { > { "set-ipv4-dscp", 0, 0, 0 }, > { "set-ipv6-dscp", 0, 0, 0 }, > { "flag", 0, 0, 0 }, > + { "meter", 0, 0, 0 }, > { "raw-encap", 1, 0, 0 }, > { "raw-decap", 1, 0, 0 }, > { "vxlan-encap", 0, 0, 0 }, > @@ -874,6 +892,185 @@ struct multi_cores_pool { > } > } > > + > +static inline int > +has_meter(void) > +{ > + int i; > + > + for (i = 0; i < MAX_ACTIONS_NUM; i++) { > + if (flow_actions[i] == 0) > + break; > + if (flow_actions[i] > + & > FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_METER)) > + return 1; > + } > + return 0; > +} > + > +static void > +create_meter_rule(int port_id, uint32_t counter) > +{ > + int ret; > + struct rte_mtr_params params; > + uint32_t default_prof_id = 100; > + struct rte_mtr_error error; > + > + memset(¶ms, 0, sizeof(struct rte_mtr_params)); > + params.meter_enable = 1; > + params.stats_mask = 0xffff; > + params.use_prev_mtr_color = 0; > + params.dscp_table = NULL; > + > + /*create meter*/ > + params.meter_profile_id = default_prof_id; > + params.action[RTE_COLOR_GREEN] = > + MTR_POLICER_ACTION_COLOR_GREEN; > + params.action[RTE_COLOR_YELLOW] = > + MTR_POLICER_ACTION_COLOR_YELLOW; > + params.action[RTE_COLOR_RED] = > + MTR_POLICER_ACTION_DROP; > + > + ret = rte_mtr_create(port_id, counter, ¶ms, 1, &error); > + if (ret != 0) { > + printf("Port %u create meter idx(%d) error(%d) message: > %s\n", > + port_id, counter, error.type, > + error.message ? error.message : "(no stated > reason)"); > + rte_exit(EXIT_FAILURE, "error in creating meter"); > + } > +} > + > +static void > +destroy_meter_rule(int port_id, uint32_t counter) > +{ > + struct rte_mtr_error error; > + > + if (rte_mtr_destroy(port_id, counter, &error)) { > + printf("Port %u destroy meter(%d) error(%d) message: > %s\n", > + port_id, counter, error.type, > + error.message ? error.message : "(no stated > reason)"); > + rte_exit(EXIT_FAILURE, "Error in deleting meter rule"); > + } > +} > + > +static void > +meters_handler(int port_id, uint8_t core_id, uint8_t ops) > +{ > + uint64_t start_batch; > + double cpu_time_used, insertion_rate; > + int rules_count_per_core, rules_batch_idx; > + uint32_t counter, start_counter = 0, end_counter; > + double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 }; > + > + rules_count_per_core = rules_count / mc_pool.cores_count; > + > + if (core_id) > + start_counter = core_id * rules_count_per_core; > + end_counter = (core_id + 1) * rules_count_per_core; > + > + cpu_time_used = 0; > + start_batch = rte_rdtsc(); > + for (counter = start_counter; counter < end_counter; counter++) { > + if (ops == METER_CREATE) > + create_meter_rule(port_id, counter); > + else > + destroy_meter_rule(port_id, counter); > + /* > + * Save the insertion rate for rules batch. > + * Check if the insertion reached the rules > + * patch counter, then save the insertion rate > + * for this batch. > + */ > + if (!((counter + 1) % rules_batch)) { > + rules_batch_idx = ((counter + 1) / rules_batch) - 1; > + cpu_time_per_batch[rules_batch_idx] = > + ((double)(rte_rdtsc() - start_batch)) > + / rte_get_tsc_hz(); > + cpu_time_used += > cpu_time_per_batch[rules_batch_idx]; > + start_batch = rte_rdtsc(); > + } > + } > + > + /* Print insertion rates for all batches */ > + if (dump_iterations) > + print_rules_batches(cpu_time_per_batch); > + > + insertion_rate = > + ((double) (rules_count_per_core / cpu_time_used) / 1000); > + > + /* Insertion rate for all rules in one core */ > + printf(":: Port %d :: Core %d Meter %s :: start @[%d] - end @[%d]," > + " use:%.02fs, rate:%.02fk Rule/Sec\n", > + port_id, core_id, ops == METER_CREATE ? "create" : > "delete", > + start_counter, end_counter - 1, > + cpu_time_used, insertion_rate); > + > + if (ops == METER_CREATE) > + mc_pool.create_meter.insertion[port_id][core_id] > + = cpu_time_used; > + else > + mc_pool.create_meter.deletion[port_id][core_id] > + = cpu_time_used; > +} > + > +static void > +destroy_meter_profile(void) > +{ > + struct rte_mtr_error error; > + uint16_t nr_ports; > + int port_id; > + > + nr_ports = rte_eth_dev_count_avail(); > + for (port_id = 0; port_id < nr_ports; port_id++) { > + /* If port outside portmask */ > + if (!((ports_mask >> port_id) & 0x1)) > + continue; > + > + if (rte_mtr_meter_profile_delete > + (port_id, DEFAULT_METER_PROF_ID, &error)) { > + printf("Port %u del profile error(%d) message: %s\n", > + port_id, error.type, > + error.message ? error.message : "(no stated > reason)"); > + rte_exit(EXIT_FAILURE, "Error: Destroy meter profile > Failed!\n"); > + } > + } > +} > + > +static void > +create_meter_profile(void) > +{ > + uint16_t nr_ports; > + int ret, port_id; > + struct rte_mtr_meter_profile mp; > + struct rte_mtr_error error; > + > + /* > + *currently , only create one meter file for one port > + *1 meter profile -> N meter rules -> N rte flows > + */ > + memset(&mp, 0, sizeof(struct rte_mtr_meter_profile)); > + nr_ports = rte_eth_dev_count_avail(); > + for (port_id = 0; port_id < nr_ports; port_id++) { > + /* If port outside portmask */ > + if (!((ports_mask >> port_id) & 0x1)) > + continue; > + > + mp.alg = RTE_MTR_SRTCM_RFC2697; > + mp.srtcm_rfc2697.cir = METER_CIR; > + mp.srtcm_rfc2697.cbs = METER_CIR / 8; > + mp.srtcm_rfc2697.ebs = 0; > + > + ret = rte_mtr_meter_profile_add > + (port_id, DEFAULT_METER_PROF_ID, &mp, &error); > + if (ret != 0) { > + printf("Port %u create Profile error(%d) message: > %s\n", > + port_id, error.type, > + error.message ? error.message : "(no stated > reason)"); > + rte_exit(EXIT_FAILURE, "Error: Creation meter profile > Failed!\n"); > + } > + } > +} > + > static inline void > destroy_flows(int port_id, uint8_t core_id, struct rte_flow **flows_list) > { > @@ -888,6 +1085,8 @@ struct multi_cores_pool { > int rules_count_per_core; > > rules_count_per_core = rules_count / mc_pool.cores_count; > + if (flow_group > 0 && core_id == 0) > + rules_count_per_core++; > > start_batch = rte_rdtsc(); > for (i = 0; i < (uint32_t) rules_count_per_core; i++) { > @@ -927,7 +1126,7 @@ struct multi_cores_pool { > printf(":: Port %d :: Core %d :: The time for deleting %d rules is %f > seconds\n", > port_id, core_id, rules_count_per_core, cpu_time_used); > > - mc_pool.cpu_time_used_deletion[port_id][core_id] = > cpu_time_used; > + mc_pool.create_flow.deletion[port_id][core_id] = cpu_time_used; > } > > static struct rte_flow ** > @@ -1034,7 +1233,7 @@ struct multi_cores_pool { > printf(":: Port %d :: Core %d :: The time for creating %d in rules %f > seconds\n", > port_id, core_id, rules_count_per_core, cpu_time_used); > > - mc_pool.cpu_time_used_insertion[port_id][core_id] = > cpu_time_used; > + mc_pool.create_flow.insertion[port_id][core_id] = cpu_time_used; > return flows_list; > } > > @@ -1047,9 +1246,6 @@ struct multi_cores_pool { > > nr_ports = rte_eth_dev_count_avail(); > > - if (rules_batch > rules_count) > - rules_batch = rules_count; > - > printf(":: Rules Count per port: %d\n\n", rules_count); > > for (port_id = 0; port_id < nr_ports; port_id++) { > @@ -1059,21 +1255,27 @@ struct multi_cores_pool { > > /* Insertion part. */ > mc_pool.last_alloc[core_id] = > (int64_t)dump_socket_mem(stdout); > + if (has_meter()) > + meters_handler(port_id, core_id, METER_CREATE); > flows_list = insert_flows(port_id, core_id); > if (flows_list == NULL) > rte_exit(EXIT_FAILURE, "Error: Insertion Failed!\n"); > mc_pool.current_alloc[core_id] = > (int64_t)dump_socket_mem(stdout); > > /* Deletion part. */ > - if (delete_flag) > + if (delete_flag) { > destroy_flows(port_id, core_id, flows_list); > + if (has_meter()) > + meters_handler(port_id, core_id, > METER_DELETE); > + } > } > } > > -static int > -run_rte_flow_handler_cores(void *data __rte_unused) > +static void > +dump_used_cpu_time(const char *item, > + uint16_t port, struct used_cpu_time > *used_time) > { > - uint16_t port; > + uint32_t i; > /* Latency: total count of rte rules divided > * over max time used by thread between all > * threads time. > @@ -1088,8 +1290,111 @@ struct multi_cores_pool { > double deletion_throughput_time; > double insertion_latency, insertion_throughput; > double deletion_latency, deletion_throughput; > + > + /* Save first insertion/deletion rates from first thread. > + * Start comparing with all threads, if any thread used > + * time more than current saved, replace it. > + * > + * Thus in the end we will have the max time used for > + * insertion/deletion by one thread. > + * > + * As for memory consumption, save the min of all threads > + * of last alloc, and save the max for all threads for > + * current alloc. > + */ > + > + insertion_latency_time = used_time->insertion[port][0]; > + deletion_latency_time = used_time->deletion[port][0]; > + insertion_throughput_time = used_time->insertion[port][0]; > + deletion_throughput_time = used_time->deletion[port][0]; > + > + i = mc_pool.cores_count; > + while (i-- > 1) { > + insertion_throughput_time += used_time- > >insertion[port][i]; > + deletion_throughput_time += used_time->deletion[port][i]; > + if (insertion_latency_time < used_time->insertion[port][i]) > + insertion_latency_time = used_time- > >insertion[port][i]; > + if (deletion_latency_time < used_time->deletion[port][i]) > + deletion_latency_time = used_time- > >deletion[port][i]; > + } > + > + insertion_latency = ((double) (mc_pool.rules_count > + / insertion_latency_time) / 1000); > + deletion_latency = ((double) (mc_pool.rules_count > + / deletion_latency_time) / 1000); > + > + insertion_throughput_time /= mc_pool.cores_count; > + deletion_throughput_time /= mc_pool.cores_count; > + insertion_throughput = ((double) (mc_pool.rules_count > + / insertion_throughput_time) / 1000); > + deletion_throughput = ((double) (mc_pool.rules_count > + / deletion_throughput_time) / 1000); > + > + /* Latency stats */ > + printf("\n%s\n:: [Latency | Insertion] All Cores :: Port %d :: ", > + item, port); > + printf("Total flows insertion rate -> %f K Rules/Sec\n", > + insertion_latency); > + printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port); > + printf("The time for creating %d rules is %f seconds\n", > + mc_pool.rules_count, insertion_latency_time); > + > + /* Throughput stats */ > + printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); > + printf("Total flows insertion rate -> %f K Rules/Sec\n", > + insertion_throughput); > + printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); > + printf("The average time for creating %d rules is %f seconds\n", > + mc_pool.rules_count, insertion_throughput_time); > + > + if (delete_flag) { > + /* Latency stats */ > + printf(":: [Latency | Deletion] All Cores :: Port %d :: Total " > + "deletion rate -> %f K Rules/Sec\n", > + port, deletion_latency); > + printf(":: [Latency | Deletion] All Cores :: Port %d :: ", > + port); > + printf("The time for deleting %d rules is %f seconds\n", > + mc_pool.rules_count, deletion_latency_time); > + > + /* Throughput stats */ > + printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total " > + "deletion rate -> %f K Rules/Sec\n", > + port, deletion_throughput); > + printf(":: [Throughput | Deletion] All Cores :: Port %d :: ", > + port); > + printf("The average time for deleting %d rules is %f > seconds\n", > + mc_pool.rules_count, deletion_throughput_time); > + } > +} > + > +static void > +dump_used_mem(uint16_t port) > +{ > + uint32_t i; > int64_t last_alloc, current_alloc; > int flow_size_in_bytes; > + > + last_alloc = mc_pool.last_alloc[0]; > + current_alloc = mc_pool.current_alloc[0]; > + > + i = mc_pool.cores_count; > + while (i-- > 1) { > + if (last_alloc > mc_pool.last_alloc[i]) > + last_alloc = mc_pool.last_alloc[i]; > + if (current_alloc < mc_pool.current_alloc[i]) > + current_alloc = mc_pool.current_alloc[i]; > + } > + > + flow_size_in_bytes = (current_alloc - last_alloc) / > mc_pool.rules_count; > + printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n", > + port, flow_size_in_bytes); > +} > + > +static int > +run_rte_flow_handler_cores(void *data __rte_unused) > +{ > + uint16_t port; I don't see how this part of the patch is related to the meter action. Probably this change deserves a separate commit with a proper message. > int lcore_counter = 0; > int lcore_id = rte_lcore_id(); > int i; > @@ -1120,83 +1425,12 @@ struct multi_cores_pool { > /* Make sure all cores finished insertion/deletion process. */ > rte_eal_mp_wait_lcore(); > > - /* Save first insertion/deletion rates from first thread. > - * Start comparing with all threads, if any thread used > - * time more than current saved, replace it. > - * > - * Thus in the end we will have the max time used for > - * insertion/deletion by one thread. > - * > - * As for memory consumption, save the min of all threads > - * of last alloc, and save the max for all threads for > - * current alloc. > - */ > RTE_ETH_FOREACH_DEV(port) { > - last_alloc = mc_pool.last_alloc[0]; > - current_alloc = mc_pool.current_alloc[0]; > - > - insertion_latency_time = > mc_pool.cpu_time_used_insertion[port][0]; > - deletion_latency_time = > mc_pool.cpu_time_used_deletion[port][0]; > - insertion_throughput_time = > mc_pool.cpu_time_used_insertion[port][0]; > - deletion_throughput_time = > mc_pool.cpu_time_used_deletion[port][0]; > - i = mc_pool.cores_count; > - while (i-- > 1) { > - insertion_throughput_time += > mc_pool.cpu_time_used_insertion[port][i]; > - deletion_throughput_time += > mc_pool.cpu_time_used_deletion[port][i]; > - if (insertion_latency_time < > mc_pool.cpu_time_used_insertion[port][i]) > - insertion_latency_time = > mc_pool.cpu_time_used_insertion[port][i]; > - if (deletion_latency_time < > mc_pool.cpu_time_used_deletion[port][i]) > - deletion_latency_time = > mc_pool.cpu_time_used_deletion[port][i]; > - if (last_alloc > mc_pool.last_alloc[i]) > - last_alloc = mc_pool.last_alloc[i]; > - if (current_alloc < mc_pool.current_alloc[i]) > - current_alloc = mc_pool.current_alloc[i]; > - } > - > - flow_size_in_bytes = (current_alloc - last_alloc) / > mc_pool.rules_count; > - > - insertion_latency = ((double) (mc_pool.rules_count / > insertion_latency_time) / 1000); > - deletion_latency = ((double) (mc_pool.rules_count / > deletion_latency_time) / 1000); > - > - insertion_throughput_time /= mc_pool.cores_count; > - deletion_throughput_time /= mc_pool.cores_count; > - insertion_throughput = ((double) (mc_pool.rules_count / > insertion_throughput_time) / 1000); > - deletion_throughput = ((double) (mc_pool.rules_count / > deletion_throughput_time) / 1000); > - > - /* Latency stats */ > - printf("\n:: [Latency | Insertion] All Cores :: Port %d :: ", > port); > - printf("Total flows insertion rate -> %f K Rules/Sec\n", > - insertion_latency); > - printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port); > - printf("The time for creating %d rules is %f seconds\n", > - mc_pool.rules_count, insertion_latency_time); > - > - /* Throughput stats */ > - printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", > port); > - printf("Total flows insertion rate -> %f K Rules/Sec\n", > - insertion_throughput); > - printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", > port); > - printf("The average time for creating %d rules is %f > seconds\n", > - mc_pool.rules_count, insertion_throughput_time); > - > - if (delete_flag) { > - /* Latency stats */ > - printf(":: [Latency | Deletion] All Cores :: Port %d :: > Total flows " > - "deletion rate -> %f K Rules/Sec\n", > - port, deletion_latency); > - printf(":: [Latency | Deletion] All Cores :: Port %d :: ", > port); > - printf("The time for deleting %d rules is %f > seconds\n", > - mc_pool.rules_count, deletion_latency_time); > - > - /* Throughput stats */ > - printf(":: [Throughput | Deletion] All Cores :: Port %d > :: Total flows " > - "deletion rate -> %f K Rules/Sec\n", port, > deletion_throughput); > - printf(":: [Throughput | Deletion] All Cores :: Port %d > :: ", port); > - printf("The average time for deleting %d rules is %f > seconds\n", > - mc_pool.rules_count, deletion_throughput_time); > - } > - printf("\n:: Port %d :: rte_flow size in DPDK layer: %d > Bytes\n", > - port, flow_size_in_bytes); > + dump_used_cpu_time("Meters:", > + port, &mc_pool.create_meter); > + dump_used_cpu_time("Flows:", > + port, &mc_pool.create_flow); > + dump_used_mem(port); > } > > return 0; > @@ -1633,6 +1867,9 @@ struct multi_cores_pool { > if (argc > 1) > args_parse(argc, argv); > > + if (rules_batch > rules_count) > + rules_batch = rules_count; > + > init_port(); > > nb_lcores = rte_lcore_count(); > @@ -1642,12 +1879,16 @@ struct multi_cores_pool { > > printf(":: Flows Count per port: %d\n\n", rules_count); > > + if (has_meter()) > + create_meter_profile(); > rte_eal_mp_remote_launch(run_rte_flow_handler_cores, NULL, > CALL_MAIN); > > if (enable_fwd) { > init_lcore_info(); > rte_eal_mp_remote_launch(start_forwarding, NULL, > CALL_MAIN); > } > + if (has_meter() && delete_flag) > + destroy_meter_profile(); > > RTE_ETH_FOREACH_DEV(port) { > rte_flow_flush(port, &error); > diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst > index 40d157e..017e200 100644 > --- a/doc/guides/tools/flow-perf.rst > +++ b/doc/guides/tools/flow-perf.rst > @@ -345,3 +345,7 @@ Actions: > > * ``--vxlan-decap`` > Add vxlan decap action to all flows actions. > + > +* ``--meter`` > + Add meter action to all flows actions. > + Currently, 1 meter profile -> N meter rules -> N rte flows. > -- > 1.8.3.1
diff --git a/app/test-flow-perf/actions_gen.c b/app/test-flow-perf/actions_gen.c index c3545ba..1f5c64f 100644 --- a/app/test-flow-perf/actions_gen.c +++ b/app/test-flow-perf/actions_gen.c @@ -891,6 +891,19 @@ struct action_rss_data { actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_VXLAN_DECAP; } +static void +add_meter(struct rte_flow_action *actions, + uint8_t actions_counter, + __rte_unused struct additional_para para) +{ + static struct rte_flow_action_meter + meters[RTE_MAX_LCORE] __rte_cache_aligned; + + meters[para.core_idx].mtr_id = para.counter; + actions[actions_counter].type = RTE_FLOW_ACTION_TYPE_METER; + actions[actions_counter].conf = &meters[para.core_idx]; +} + void fill_actions(struct rte_flow_action *actions, uint64_t *flow_actions, uint32_t counter, uint16_t next_table, uint16_t hairpinq, @@ -1103,6 +1116,12 @@ struct action_rss_data { ), .funct = add_vxlan_decap, }, + { + .mask = FLOW_ACTION_MASK( + RTE_FLOW_ACTION_TYPE_METER + ), + .funct = add_meter, + }, }; for (j = 0; j < MAX_ACTIONS_NUM; j++) { diff --git a/app/test-flow-perf/config.h b/app/test-flow-perf/config.h index 94e83c9..3d4696d 100644 --- a/app/test-flow-perf/config.h +++ b/app/test-flow-perf/config.h @@ -16,6 +16,8 @@ #define NR_RXD 256 #define NR_TXD 256 #define MAX_PORTS 64 +#define METER_CIR 1250000 +#define DEFAULT_METER_PROF_ID 100 /* This is used for encap/decap & header modify actions. * When it's 1: it means all actions have fixed values. diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c index 3a0e4c1..4d881ec 100644 --- a/app/test-flow-perf/main.c +++ b/app/test-flow-perf/main.c @@ -34,6 +34,7 @@ #include <rte_mbuf.h> #include <rte_ethdev.h> #include <rte_flow.h> +#include <rte_mtr.h> #include "config.h" #include "flow_gen.h" @@ -72,6 +73,8 @@ #define LCORE_MODE_PKT 1 #define LCORE_MODE_STATS 2 #define MAX_STREAMS 64 +#define METER_CREATE 1 +#define METER_DELETE 2 struct stream { int tx_port; @@ -93,11 +96,16 @@ struct lcore_info { static struct lcore_info lcore_infos[RTE_MAX_LCORE]; +struct used_cpu_time { + double insertion[MAX_PORTS][RTE_MAX_LCORE]; + double deletion[MAX_PORTS][RTE_MAX_LCORE]; +}; + struct multi_cores_pool { uint32_t cores_count; uint32_t rules_count; - double cpu_time_used_insertion[MAX_PORTS][RTE_MAX_LCORE]; - double cpu_time_used_deletion[MAX_PORTS][RTE_MAX_LCORE]; + struct used_cpu_time create_meter; + struct used_cpu_time create_flow; int64_t last_alloc[RTE_MAX_LCORE]; int64_t current_alloc[RTE_MAX_LCORE]; } __rte_cache_aligned; @@ -195,6 +203,7 @@ struct multi_cores_pool { printf(" --set-ipv6-dscp: add set ipv6 dscp action to flow actions\n" "ipv6 dscp value to be set is random each flow\n"); printf(" --flag: add flag action to flow actions\n"); + printf(" --meter: add meter action to flow actions\n"); printf(" --raw-encap=<data>: add raw encap action to flow actions\n" "Data is the data needed to be encaped\n" "Example: raw-encap=ether,ipv4,udp,vxlan\n"); @@ -524,6 +533,14 @@ struct multi_cores_pool { .map_idx = &actions_idx }, { + .str = "meter", + .mask = FLOW_ACTION_MASK( + RTE_FLOW_ACTION_TYPE_METER + ), + .map = &flow_actions[0], + .map_idx = &actions_idx + }, + { .str = "vxlan-encap", .mask = FLOW_ACTION_MASK( RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP @@ -602,6 +619,7 @@ struct multi_cores_pool { { "set-ipv4-dscp", 0, 0, 0 }, { "set-ipv6-dscp", 0, 0, 0 }, { "flag", 0, 0, 0 }, + { "meter", 0, 0, 0 }, { "raw-encap", 1, 0, 0 }, { "raw-decap", 1, 0, 0 }, { "vxlan-encap", 0, 0, 0 }, @@ -874,6 +892,185 @@ struct multi_cores_pool { } } + +static inline int +has_meter(void) +{ + int i; + + for (i = 0; i < MAX_ACTIONS_NUM; i++) { + if (flow_actions[i] == 0) + break; + if (flow_actions[i] + & FLOW_ACTION_MASK(RTE_FLOW_ACTION_TYPE_METER)) + return 1; + } + return 0; +} + +static void +create_meter_rule(int port_id, uint32_t counter) +{ + int ret; + struct rte_mtr_params params; + uint32_t default_prof_id = 100; + struct rte_mtr_error error; + + memset(¶ms, 0, sizeof(struct rte_mtr_params)); + params.meter_enable = 1; + params.stats_mask = 0xffff; + params.use_prev_mtr_color = 0; + params.dscp_table = NULL; + + /*create meter*/ + params.meter_profile_id = default_prof_id; + params.action[RTE_COLOR_GREEN] = + MTR_POLICER_ACTION_COLOR_GREEN; + params.action[RTE_COLOR_YELLOW] = + MTR_POLICER_ACTION_COLOR_YELLOW; + params.action[RTE_COLOR_RED] = + MTR_POLICER_ACTION_DROP; + + ret = rte_mtr_create(port_id, counter, ¶ms, 1, &error); + if (ret != 0) { + printf("Port %u create meter idx(%d) error(%d) message: %s\n", + port_id, counter, error.type, + error.message ? error.message : "(no stated reason)"); + rte_exit(EXIT_FAILURE, "error in creating meter"); + } +} + +static void +destroy_meter_rule(int port_id, uint32_t counter) +{ + struct rte_mtr_error error; + + if (rte_mtr_destroy(port_id, counter, &error)) { + printf("Port %u destroy meter(%d) error(%d) message: %s\n", + port_id, counter, error.type, + error.message ? error.message : "(no stated reason)"); + rte_exit(EXIT_FAILURE, "Error in deleting meter rule"); + } +} + +static void +meters_handler(int port_id, uint8_t core_id, uint8_t ops) +{ + uint64_t start_batch; + double cpu_time_used, insertion_rate; + int rules_count_per_core, rules_batch_idx; + uint32_t counter, start_counter = 0, end_counter; + double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 }; + + rules_count_per_core = rules_count / mc_pool.cores_count; + + if (core_id) + start_counter = core_id * rules_count_per_core; + end_counter = (core_id + 1) * rules_count_per_core; + + cpu_time_used = 0; + start_batch = rte_rdtsc(); + for (counter = start_counter; counter < end_counter; counter++) { + if (ops == METER_CREATE) + create_meter_rule(port_id, counter); + else + destroy_meter_rule(port_id, counter); + /* + * Save the insertion rate for rules batch. + * Check if the insertion reached the rules + * patch counter, then save the insertion rate + * for this batch. + */ + if (!((counter + 1) % rules_batch)) { + rules_batch_idx = ((counter + 1) / rules_batch) - 1; + cpu_time_per_batch[rules_batch_idx] = + ((double)(rte_rdtsc() - start_batch)) + / rte_get_tsc_hz(); + cpu_time_used += cpu_time_per_batch[rules_batch_idx]; + start_batch = rte_rdtsc(); + } + } + + /* Print insertion rates for all batches */ + if (dump_iterations) + print_rules_batches(cpu_time_per_batch); + + insertion_rate = + ((double) (rules_count_per_core / cpu_time_used) / 1000); + + /* Insertion rate for all rules in one core */ + printf(":: Port %d :: Core %d Meter %s :: start @[%d] - end @[%d]," + " use:%.02fs, rate:%.02fk Rule/Sec\n", + port_id, core_id, ops == METER_CREATE ? "create" : "delete", + start_counter, end_counter - 1, + cpu_time_used, insertion_rate); + + if (ops == METER_CREATE) + mc_pool.create_meter.insertion[port_id][core_id] + = cpu_time_used; + else + mc_pool.create_meter.deletion[port_id][core_id] + = cpu_time_used; +} + +static void +destroy_meter_profile(void) +{ + struct rte_mtr_error error; + uint16_t nr_ports; + int port_id; + + nr_ports = rte_eth_dev_count_avail(); + for (port_id = 0; port_id < nr_ports; port_id++) { + /* If port outside portmask */ + if (!((ports_mask >> port_id) & 0x1)) + continue; + + if (rte_mtr_meter_profile_delete + (port_id, DEFAULT_METER_PROF_ID, &error)) { + printf("Port %u del profile error(%d) message: %s\n", + port_id, error.type, + error.message ? error.message : "(no stated reason)"); + rte_exit(EXIT_FAILURE, "Error: Destroy meter profile Failed!\n"); + } + } +} + +static void +create_meter_profile(void) +{ + uint16_t nr_ports; + int ret, port_id; + struct rte_mtr_meter_profile mp; + struct rte_mtr_error error; + + /* + *currently , only create one meter file for one port + *1 meter profile -> N meter rules -> N rte flows + */ + memset(&mp, 0, sizeof(struct rte_mtr_meter_profile)); + nr_ports = rte_eth_dev_count_avail(); + for (port_id = 0; port_id < nr_ports; port_id++) { + /* If port outside portmask */ + if (!((ports_mask >> port_id) & 0x1)) + continue; + + mp.alg = RTE_MTR_SRTCM_RFC2697; + mp.srtcm_rfc2697.cir = METER_CIR; + mp.srtcm_rfc2697.cbs = METER_CIR / 8; + mp.srtcm_rfc2697.ebs = 0; + + ret = rte_mtr_meter_profile_add + (port_id, DEFAULT_METER_PROF_ID, &mp, &error); + if (ret != 0) { + printf("Port %u create Profile error(%d) message: %s\n", + port_id, error.type, + error.message ? error.message : "(no stated reason)"); + rte_exit(EXIT_FAILURE, "Error: Creation meter profile Failed!\n"); + } + } +} + static inline void destroy_flows(int port_id, uint8_t core_id, struct rte_flow **flows_list) { @@ -888,6 +1085,8 @@ struct multi_cores_pool { int rules_count_per_core; rules_count_per_core = rules_count / mc_pool.cores_count; + if (flow_group > 0 && core_id == 0) + rules_count_per_core++; start_batch = rte_rdtsc(); for (i = 0; i < (uint32_t) rules_count_per_core; i++) { @@ -927,7 +1126,7 @@ struct multi_cores_pool { printf(":: Port %d :: Core %d :: The time for deleting %d rules is %f seconds\n", port_id, core_id, rules_count_per_core, cpu_time_used); - mc_pool.cpu_time_used_deletion[port_id][core_id] = cpu_time_used; + mc_pool.create_flow.deletion[port_id][core_id] = cpu_time_used; } static struct rte_flow ** @@ -1034,7 +1233,7 @@ struct multi_cores_pool { printf(":: Port %d :: Core %d :: The time for creating %d in rules %f seconds\n", port_id, core_id, rules_count_per_core, cpu_time_used); - mc_pool.cpu_time_used_insertion[port_id][core_id] = cpu_time_used; + mc_pool.create_flow.insertion[port_id][core_id] = cpu_time_used; return flows_list; } @@ -1047,9 +1246,6 @@ struct multi_cores_pool { nr_ports = rte_eth_dev_count_avail(); - if (rules_batch > rules_count) - rules_batch = rules_count; - printf(":: Rules Count per port: %d\n\n", rules_count); for (port_id = 0; port_id < nr_ports; port_id++) { @@ -1059,21 +1255,27 @@ struct multi_cores_pool { /* Insertion part. */ mc_pool.last_alloc[core_id] = (int64_t)dump_socket_mem(stdout); + if (has_meter()) + meters_handler(port_id, core_id, METER_CREATE); flows_list = insert_flows(port_id, core_id); if (flows_list == NULL) rte_exit(EXIT_FAILURE, "Error: Insertion Failed!\n"); mc_pool.current_alloc[core_id] = (int64_t)dump_socket_mem(stdout); /* Deletion part. */ - if (delete_flag) + if (delete_flag) { destroy_flows(port_id, core_id, flows_list); + if (has_meter()) + meters_handler(port_id, core_id, METER_DELETE); + } } } -static int -run_rte_flow_handler_cores(void *data __rte_unused) +static void +dump_used_cpu_time(const char *item, + uint16_t port, struct used_cpu_time *used_time) { - uint16_t port; + uint32_t i; /* Latency: total count of rte rules divided * over max time used by thread between all * threads time. @@ -1088,8 +1290,111 @@ struct multi_cores_pool { double deletion_throughput_time; double insertion_latency, insertion_throughput; double deletion_latency, deletion_throughput; + + /* Save first insertion/deletion rates from first thread. + * Start comparing with all threads, if any thread used + * time more than current saved, replace it. + * + * Thus in the end we will have the max time used for + * insertion/deletion by one thread. + * + * As for memory consumption, save the min of all threads + * of last alloc, and save the max for all threads for + * current alloc. + */ + + insertion_latency_time = used_time->insertion[port][0]; + deletion_latency_time = used_time->deletion[port][0]; + insertion_throughput_time = used_time->insertion[port][0]; + deletion_throughput_time = used_time->deletion[port][0]; + + i = mc_pool.cores_count; + while (i-- > 1) { + insertion_throughput_time += used_time->insertion[port][i]; + deletion_throughput_time += used_time->deletion[port][i]; + if (insertion_latency_time < used_time->insertion[port][i]) + insertion_latency_time = used_time->insertion[port][i]; + if (deletion_latency_time < used_time->deletion[port][i]) + deletion_latency_time = used_time->deletion[port][i]; + } + + insertion_latency = ((double) (mc_pool.rules_count + / insertion_latency_time) / 1000); + deletion_latency = ((double) (mc_pool.rules_count + / deletion_latency_time) / 1000); + + insertion_throughput_time /= mc_pool.cores_count; + deletion_throughput_time /= mc_pool.cores_count; + insertion_throughput = ((double) (mc_pool.rules_count + / insertion_throughput_time) / 1000); + deletion_throughput = ((double) (mc_pool.rules_count + / deletion_throughput_time) / 1000); + + /* Latency stats */ + printf("\n%s\n:: [Latency | Insertion] All Cores :: Port %d :: ", + item, port); + printf("Total flows insertion rate -> %f K Rules/Sec\n", + insertion_latency); + printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port); + printf("The time for creating %d rules is %f seconds\n", + mc_pool.rules_count, insertion_latency_time); + + /* Throughput stats */ + printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); + printf("Total flows insertion rate -> %f K Rules/Sec\n", + insertion_throughput); + printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); + printf("The average time for creating %d rules is %f seconds\n", + mc_pool.rules_count, insertion_throughput_time); + + if (delete_flag) { + /* Latency stats */ + printf(":: [Latency | Deletion] All Cores :: Port %d :: Total " + "deletion rate -> %f K Rules/Sec\n", + port, deletion_latency); + printf(":: [Latency | Deletion] All Cores :: Port %d :: ", + port); + printf("The time for deleting %d rules is %f seconds\n", + mc_pool.rules_count, deletion_latency_time); + + /* Throughput stats */ + printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total " + "deletion rate -> %f K Rules/Sec\n", + port, deletion_throughput); + printf(":: [Throughput | Deletion] All Cores :: Port %d :: ", + port); + printf("The average time for deleting %d rules is %f seconds\n", + mc_pool.rules_count, deletion_throughput_time); + } +} + +static void +dump_used_mem(uint16_t port) +{ + uint32_t i; int64_t last_alloc, current_alloc; int flow_size_in_bytes; + + last_alloc = mc_pool.last_alloc[0]; + current_alloc = mc_pool.current_alloc[0]; + + i = mc_pool.cores_count; + while (i-- > 1) { + if (last_alloc > mc_pool.last_alloc[i]) + last_alloc = mc_pool.last_alloc[i]; + if (current_alloc < mc_pool.current_alloc[i]) + current_alloc = mc_pool.current_alloc[i]; + } + + flow_size_in_bytes = (current_alloc - last_alloc) / mc_pool.rules_count; + printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n", + port, flow_size_in_bytes); +} + +static int +run_rte_flow_handler_cores(void *data __rte_unused) +{ + uint16_t port; int lcore_counter = 0; int lcore_id = rte_lcore_id(); int i; @@ -1120,83 +1425,12 @@ struct multi_cores_pool { /* Make sure all cores finished insertion/deletion process. */ rte_eal_mp_wait_lcore(); - /* Save first insertion/deletion rates from first thread. - * Start comparing with all threads, if any thread used - * time more than current saved, replace it. - * - * Thus in the end we will have the max time used for - * insertion/deletion by one thread. - * - * As for memory consumption, save the min of all threads - * of last alloc, and save the max for all threads for - * current alloc. - */ RTE_ETH_FOREACH_DEV(port) { - last_alloc = mc_pool.last_alloc[0]; - current_alloc = mc_pool.current_alloc[0]; - - insertion_latency_time = mc_pool.cpu_time_used_insertion[port][0]; - deletion_latency_time = mc_pool.cpu_time_used_deletion[port][0]; - insertion_throughput_time = mc_pool.cpu_time_used_insertion[port][0]; - deletion_throughput_time = mc_pool.cpu_time_used_deletion[port][0]; - i = mc_pool.cores_count; - while (i-- > 1) { - insertion_throughput_time += mc_pool.cpu_time_used_insertion[port][i]; - deletion_throughput_time += mc_pool.cpu_time_used_deletion[port][i]; - if (insertion_latency_time < mc_pool.cpu_time_used_insertion[port][i]) - insertion_latency_time = mc_pool.cpu_time_used_insertion[port][i]; - if (deletion_latency_time < mc_pool.cpu_time_used_deletion[port][i]) - deletion_latency_time = mc_pool.cpu_time_used_deletion[port][i]; - if (last_alloc > mc_pool.last_alloc[i]) - last_alloc = mc_pool.last_alloc[i]; - if (current_alloc < mc_pool.current_alloc[i]) - current_alloc = mc_pool.current_alloc[i]; - } - - flow_size_in_bytes = (current_alloc - last_alloc) / mc_pool.rules_count; - - insertion_latency = ((double) (mc_pool.rules_count / insertion_latency_time) / 1000); - deletion_latency = ((double) (mc_pool.rules_count / deletion_latency_time) / 1000); - - insertion_throughput_time /= mc_pool.cores_count; - deletion_throughput_time /= mc_pool.cores_count; - insertion_throughput = ((double) (mc_pool.rules_count / insertion_throughput_time) / 1000); - deletion_throughput = ((double) (mc_pool.rules_count / deletion_throughput_time) / 1000); - - /* Latency stats */ - printf("\n:: [Latency | Insertion] All Cores :: Port %d :: ", port); - printf("Total flows insertion rate -> %f K Rules/Sec\n", - insertion_latency); - printf(":: [Latency | Insertion] All Cores :: Port %d :: ", port); - printf("The time for creating %d rules is %f seconds\n", - mc_pool.rules_count, insertion_latency_time); - - /* Throughput stats */ - printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); - printf("Total flows insertion rate -> %f K Rules/Sec\n", - insertion_throughput); - printf(":: [Throughput | Insertion] All Cores :: Port %d :: ", port); - printf("The average time for creating %d rules is %f seconds\n", - mc_pool.rules_count, insertion_throughput_time); - - if (delete_flag) { - /* Latency stats */ - printf(":: [Latency | Deletion] All Cores :: Port %d :: Total flows " - "deletion rate -> %f K Rules/Sec\n", - port, deletion_latency); - printf(":: [Latency | Deletion] All Cores :: Port %d :: ", port); - printf("The time for deleting %d rules is %f seconds\n", - mc_pool.rules_count, deletion_latency_time); - - /* Throughput stats */ - printf(":: [Throughput | Deletion] All Cores :: Port %d :: Total flows " - "deletion rate -> %f K Rules/Sec\n", port, deletion_throughput); - printf(":: [Throughput | Deletion] All Cores :: Port %d :: ", port); - printf("The average time for deleting %d rules is %f seconds\n", - mc_pool.rules_count, deletion_throughput_time); - } - printf("\n:: Port %d :: rte_flow size in DPDK layer: %d Bytes\n", - port, flow_size_in_bytes); + dump_used_cpu_time("Meters:", + port, &mc_pool.create_meter); + dump_used_cpu_time("Flows:", + port, &mc_pool.create_flow); + dump_used_mem(port); } return 0; @@ -1633,6 +1867,9 @@ struct multi_cores_pool { if (argc > 1) args_parse(argc, argv); + if (rules_batch > rules_count) + rules_batch = rules_count; + init_port(); nb_lcores = rte_lcore_count(); @@ -1642,12 +1879,16 @@ struct multi_cores_pool { printf(":: Flows Count per port: %d\n\n", rules_count); + if (has_meter()) + create_meter_profile(); rte_eal_mp_remote_launch(run_rte_flow_handler_cores, NULL, CALL_MAIN); if (enable_fwd) { init_lcore_info(); rte_eal_mp_remote_launch(start_forwarding, NULL, CALL_MAIN); } + if (has_meter() && delete_flag) + destroy_meter_profile(); RTE_ETH_FOREACH_DEV(port) { rte_flow_flush(port, &error); diff --git a/doc/guides/tools/flow-perf.rst b/doc/guides/tools/flow-perf.rst index 40d157e..017e200 100644 --- a/doc/guides/tools/flow-perf.rst +++ b/doc/guides/tools/flow-perf.rst @@ -345,3 +345,7 @@ Actions: * ``--vxlan-decap`` Add vxlan decap action to all flows actions. + +* ``--meter`` + Add meter action to all flows actions. + Currently, 1 meter profile -> N meter rules -> N rte flows.