From patchwork Thu Nov 26 11:15:40 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Wisam Monther X-Patchwork-Id: 84571 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id E10FAA052A; Thu, 26 Nov 2020 12:16:23 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 4BFA2C970; Thu, 26 Nov 2020 12:16:06 +0100 (CET) Received: from hqnvemgate24.nvidia.com (hqnvemgate24.nvidia.com [216.228.121.143]) by dpdk.org (Postfix) with ESMTP id 7814FC950 for ; Thu, 26 Nov 2020 12:16:04 +0100 (CET) Received: from hqmail.nvidia.com (Not Verified[216.228.121.13]) by hqnvemgate24.nvidia.com (using TLS: TLSv1.2, AES256-SHA) id ; Thu, 26 Nov 2020 03:16:09 -0800 Received: from nvidia.com (10.124.1.5) by HQMAIL107.nvidia.com (172.20.187.13) with Microsoft SMTP Server (TLS) id 15.0.1473.3; Thu, 26 Nov 2020 11:16:00 +0000 From: Wisam Jaddo To: , , , CC: Date: Thu, 26 Nov 2020 13:15:40 +0200 Message-ID: <20201126111543.16928-2-wisamm@nvidia.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20201126111543.16928-1-wisamm@nvidia.com> References: <20201126111543.16928-1-wisamm@nvidia.com> MIME-Version: 1.0 X-Originating-IP: [10.124.1.5] X-ClientProxiedBy: HQMAIL101.nvidia.com (172.20.187.10) To HQMAIL107.nvidia.com (172.20.187.13) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=nvidia.com; s=n1; t=1606389369; bh=TrD9KEX1yfTgQ6RDM6fA5bD0Tp1PWdKumFzSkekrwdc=; h=From:To:CC:Subject:Date:Message-ID:X-Mailer:In-Reply-To: References:MIME-Version:Content-Type:X-Originating-IP: X-ClientProxiedBy; b=RrH8zltHZ6mewn5ZSqtD8wov8EvUgfXU9AVygbh5kQYChjrOGFNH2UVOxsDtuJQRY Go/ucJ4+DfYPPEOVFye1l2+OEU3sqnJN8I8hGiOhwTteHCX+KSAJELkhUpmqElO/9I D+fxXzSxkrYb2IHOBt/3qXleUXzhbW1pkItl5gR0pbcXRvevdBELsKpTsdoXYtOt6F Zxmp+5qnRYCmtBauia6jtpOMo+Nq5TGAJprlFJtxD87k5eWO75DJYcRcYT5/xziT5b WtR/2XcHi7lEF3pH65SB8ZLqr8CKRog57NEndbLeTVSN3IZcWYMzQg+AShTlNq/pbh A16tRa1LtK8Fw== Subject: [dpdk-dev] [PATCH 1/4] app/flow-perf: refactor flows handler X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Provide the flows_handler() function the ability to control flow performance processes. It is made possible after the introduction of the insert_flows() function. Also provide to the flows_handler() function the ability to print the DPDK layer memory consumption of rte_flow rule, regardless if deletion feature is enabled or not, while in previous solution it was printing all memory changes after flows_handler(). Thus if deletion is there, it will not provide any memory that represents the rte_flow rule size. Also current design is easier to read and understand. Signed-off-by: Wisam Jaddo Reviewed-by: Alexander Kozyrev Reviewed-by: Suanming Mou --- app/test-flow-perf/main.c | 300 ++++++++++++++++++++------------------ 1 file changed, 158 insertions(+), 142 deletions(-) diff --git a/app/test-flow-perf/main.c b/app/test-flow-perf/main.c index e2fc5b7f65..5ec9a15c61 100644 --- a/app/test-flow-perf/main.c +++ b/app/test-flow-perf/main.c @@ -38,7 +38,7 @@ #include "config.h" #include "flow_gen.h" -#define MAX_ITERATIONS 100 +#define MAX_BATCHES_COUNT 100 #define DEFAULT_RULES_COUNT 4000000 #define DEFAULT_RULES_BATCH 100000 #define DEFAULT_GROUP 0 @@ -826,188 +826,210 @@ print_flow_error(struct rte_flow_error error) } static inline void -destroy_flows(int port_id, struct rte_flow **flow_list) +print_rules_batches(double *cpu_time_per_batch) +{ + uint8_t idx; + double delta; + double rate; + + for (idx = 0; idx < MAX_BATCHES_COUNT; idx++) { + if (!cpu_time_per_batch[idx]) + break; + delta = (double)(rules_batch / cpu_time_per_batch[idx]); + rate = delta / 1000; /* Save rate in K unit. */ + printf(":: Rules batch #%d: %d rules " + "in %f sec[ Rate = %f K Rule/Sec ]\n", + idx, rules_batch, + cpu_time_per_batch[idx], rate); + } +} + +static inline void +destroy_flows(int port_id, struct rte_flow **flows_list) { struct rte_flow_error error; - clock_t start_iter, end_iter; + clock_t start_batch, end_batch; double cpu_time_used = 0; - double flows_rate; - double cpu_time_per_iter[MAX_ITERATIONS]; + double deletion_rate; + double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 }; double delta; uint32_t i; - int iter_id; - - for (i = 0; i < MAX_ITERATIONS; i++) - cpu_time_per_iter[i] = -1; - - if (rules_batch > rules_count) - rules_batch = rules_count; + int rules_batch_idx; /* Deletion Rate */ - printf("Flows Deletion on port = %d\n", port_id); - start_iter = clock(); + printf("\nRules Deletion on port = %d\n", port_id); + + start_batch = clock(); for (i = 0; i < rules_count; i++) { - if (flow_list[i] == 0) + if (flows_list[i] == 0) break; memset(&error, 0x33, sizeof(error)); - if (rte_flow_destroy(port_id, flow_list[i], &error)) { + if (rte_flow_destroy(port_id, flows_list[i], &error)) { print_flow_error(error); rte_exit(EXIT_FAILURE, "Error in deleting flow"); } - if (i && !((i + 1) % rules_batch)) { - /* Save the deletion rate of each iter */ - end_iter = clock(); - delta = (double) (end_iter - start_iter); - iter_id = ((i + 1) / rules_batch) - 1; - cpu_time_per_iter[iter_id] = - delta / CLOCKS_PER_SEC; - cpu_time_used += cpu_time_per_iter[iter_id]; - start_iter = clock(); + /* + * Save the deletion rate for rules batch. + * Check if the deletion reached the rules + * patch counter, then save the deletion rate + * for this batch. + */ + if (!((i + 1) % rules_batch)) { + end_batch = clock(); + delta = (double) (end_batch - start_batch); + rules_batch_idx = ((i + 1) / rules_batch) - 1; + cpu_time_per_batch[rules_batch_idx] = delta / CLOCKS_PER_SEC; + cpu_time_used += cpu_time_per_batch[rules_batch_idx]; + start_batch = clock(); } } - /* Deletion rate per iteration */ + /* Print deletion rates for all batches */ if (dump_iterations) - for (i = 0; i < MAX_ITERATIONS; i++) { - if (cpu_time_per_iter[i] == -1) - continue; - delta = (double)(rules_batch / - cpu_time_per_iter[i]); - flows_rate = delta / 1000; - printf(":: Iteration #%d: %d flows " - "in %f sec[ Rate = %f K/Sec ]\n", - i, rules_batch, - cpu_time_per_iter[i], flows_rate); - } + print_rules_batches(cpu_time_per_batch); - /* Deletion rate for all flows */ - flows_rate = ((double) (rules_count / cpu_time_used) / 1000); - printf("\n:: Total flow deletion rate -> %f K/Sec\n", - flows_rate); - printf(":: The time for deleting %d in flows %f seconds\n", + /* Deletion rate for all rules */ + deletion_rate = ((double) (rules_count / cpu_time_used) / 1000); + printf(":: Total rules deletion rate -> %f K Rule/Sec\n", + deletion_rate); + printf(":: The time for deleting %d in rules %f seconds\n", rules_count, cpu_time_used); } -static inline void -flows_handler(void) +static struct rte_flow ** +insert_flows(int port_id) { - struct rte_flow **flow_list; + struct rte_flow **flows_list; struct rte_flow_error error; - clock_t start_iter, end_iter; + clock_t start_batch, end_batch; double cpu_time_used; - double flows_rate; - double cpu_time_per_iter[MAX_ITERATIONS]; + double insertion_rate; + double cpu_time_per_batch[MAX_BATCHES_COUNT] = { 0 }; double delta; - uint16_t nr_ports; - uint32_t i; - int port_id; - int iter_id; uint32_t flow_index; + uint32_t counter; uint64_t global_items[MAX_ITEMS_NUM] = { 0 }; uint64_t global_actions[MAX_ACTIONS_NUM] = { 0 }; + int rules_batch_idx; global_items[0] = FLOW_ITEM_MASK(RTE_FLOW_ITEM_TYPE_ETH); global_actions[0] = FLOW_ITEM_MASK(RTE_FLOW_ACTION_TYPE_JUMP); - nr_ports = rte_eth_dev_count_avail(); + flows_list = rte_zmalloc("flows_list", + (sizeof(struct rte_flow *) * rules_count) + 1, 0); + if (flows_list == NULL) + rte_exit(EXIT_FAILURE, "No Memory available!"); + + cpu_time_used = 0; + flow_index = 0; + if (flow_group > 0) { + /* + * Create global rule to jump into flow_group, + * this way the app will avoid the default rules. + * + * Global rule: + * group 0 eth / end actions jump group + */ + flow = generate_flow(port_id, 0, flow_attrs, + global_items, global_actions, + flow_group, 0, 0, 0, 0, &error); + + if (flow == NULL) { + print_flow_error(error); + rte_exit(EXIT_FAILURE, "error in creating flow"); + } + flows_list[flow_index++] = flow; + } + + /* Insertion Rate */ + printf("Rules insertion on port = %d\n", port_id); + start_batch = clock(); + for (counter = 0; counter < rules_count; counter++) { + flow = generate_flow(port_id, flow_group, + flow_attrs, flow_items, flow_actions, + JUMP_ACTION_TABLE, counter, + hairpin_queues_num, + encap_data, decap_data, + &error); + + if (force_quit) + counter = rules_count; + + if (!flow) { + print_flow_error(error); + rte_exit(EXIT_FAILURE, "error in creating flow"); + } - for (i = 0; i < MAX_ITERATIONS; i++) - cpu_time_per_iter[i] = -1; + flows_list[flow_index++] = flow; + + /* + * Save the insertion rate for rules batch. + * Check if the insertion reached the rules + * patch counter, then save the insertion rate + * for this batch. + */ + if (!((counter + 1) % rules_batch)) { + end_batch = clock(); + delta = (double) (end_batch - start_batch); + rules_batch_idx = ((counter + 1) / rules_batch) - 1; + cpu_time_per_batch[rules_batch_idx] = delta / CLOCKS_PER_SEC; + cpu_time_used += cpu_time_per_batch[rules_batch_idx]; + start_batch = clock(); + } + } + + /* Print insertion rates for all batches */ + if (dump_iterations) + print_rules_batches(cpu_time_per_batch); + + /* Insertion rate for all rules */ + insertion_rate = ((double) (rules_count / cpu_time_used) / 1000); + printf(":: Total flow insertion rate -> %f K Rule/Sec\n", + insertion_rate); + printf(":: The time for creating %d in flows %f seconds\n", + rules_count, cpu_time_used); + + return flows_list; +} + +static inline void +flows_handler(void) +{ + struct rte_flow **flows_list; + uint16_t nr_ports; + int64_t alloc, last_alloc; + int flow_size_in_bytes; + int port_id; + + nr_ports = rte_eth_dev_count_avail(); if (rules_batch > rules_count) rules_batch = rules_count; - printf(":: Flows Count per port: %d\n", rules_count); - - flow_list = rte_zmalloc("flow_list", - (sizeof(struct rte_flow *) * rules_count) + 1, 0); - if (flow_list == NULL) - rte_exit(EXIT_FAILURE, "No Memory available!"); + printf(":: Rules Count per port: %d\n\n", rules_count); for (port_id = 0; port_id < nr_ports; port_id++) { /* If port outside portmask */ if (!((ports_mask >> port_id) & 0x1)) continue; - cpu_time_used = 0; - flow_index = 0; - if (flow_group > 0) { - /* - * Create global rule to jump into flow_group, - * this way the app will avoid the default rules. - * - * Global rule: - * group 0 eth / end actions jump group - * - */ - flow = generate_flow(port_id, 0, flow_attrs, - global_items, global_actions, - flow_group, 0, 0, 0, 0, &error); - if (flow == NULL) { - print_flow_error(error); - rte_exit(EXIT_FAILURE, "error in creating flow"); - } - flow_list[flow_index++] = flow; - } + /* Insertion part. */ + last_alloc = (int64_t)dump_socket_mem(stdout); + flows_list = insert_flows(port_id); + alloc = (int64_t)dump_socket_mem(stdout); - /* Insertion Rate */ - printf("Flows insertion on port = %d\n", port_id); - start_iter = clock(); - for (i = 0; i < rules_count; i++) { - flow = generate_flow(port_id, flow_group, - flow_attrs, flow_items, flow_actions, - JUMP_ACTION_TABLE, i, - hairpin_queues_num, - encap_data, decap_data, - &error); - - if (force_quit) - i = rules_count; - - if (!flow) { - print_flow_error(error); - rte_exit(EXIT_FAILURE, "error in creating flow"); - } + /* Deletion part. */ + if (delete_flag) + destroy_flows(port_id, flows_list); - flow_list[flow_index++] = flow; - - if (i && !((i + 1) % rules_batch)) { - /* Save the insertion rate of each iter */ - end_iter = clock(); - delta = (double) (end_iter - start_iter); - iter_id = ((i + 1) / rules_batch) - 1; - cpu_time_per_iter[iter_id] = - delta / CLOCKS_PER_SEC; - cpu_time_used += cpu_time_per_iter[iter_id]; - start_iter = clock(); - } + /* Report rte_flow size in huge pages. */ + if (last_alloc) { + flow_size_in_bytes = (alloc - last_alloc) / rules_count; + printf("\n:: rte_flow size in DPDK layer: %d Bytes", + flow_size_in_bytes); } - - /* Iteration rate per iteration */ - if (dump_iterations) - for (i = 0; i < MAX_ITERATIONS; i++) { - if (cpu_time_per_iter[i] == -1) - continue; - delta = (double)(rules_batch / - cpu_time_per_iter[i]); - flows_rate = delta / 1000; - printf(":: Iteration #%d: %d flows " - "in %f sec[ Rate = %f K/Sec ]\n", - i, rules_batch, - cpu_time_per_iter[i], flows_rate); - } - - /* Insertion rate for all flows */ - flows_rate = ((double) (rules_count / cpu_time_used) / 1000); - printf("\n:: Total flow insertion rate -> %f K/Sec\n", - flows_rate); - printf(":: The time for creating %d in flows %f seconds\n", - rules_count, cpu_time_used); - - if (delete_flag) - destroy_flows(port_id, flow_list); } } @@ -1421,7 +1443,6 @@ main(int argc, char **argv) int ret; uint16_t port; struct rte_flow_error error; - int64_t alloc, last_alloc; ret = rte_eal_init(argc, argv); if (ret < 0) @@ -1449,13 +1470,7 @@ main(int argc, char **argv) if (nb_lcores <= 1) rte_exit(EXIT_FAILURE, "This app needs at least two cores\n"); - last_alloc = (int64_t)dump_socket_mem(stdout); flows_handler(); - alloc = (int64_t)dump_socket_mem(stdout); - - if (last_alloc) - fprintf(stdout, ":: Memory allocation change(M): %.6lf\n", - (alloc - last_alloc) / 1.0e6); if (enable_fwd) { init_lcore_info(); @@ -1468,5 +1483,6 @@ main(int argc, char **argv) printf("Failed to stop device on port %u\n", port); rte_eth_dev_close(port); } + printf("\nBye ...\n"); return 0; }