From patchwork Sun Dec 14 18:10:45 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Ananyev, Konstantin" X-Patchwork-Id: 1975 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 0EE4E803F; Sun, 14 Dec 2014 19:11:27 +0100 (CET) Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by dpdk.org (Postfix) with ESMTP id 3366FDE0 for ; Sun, 14 Dec 2014 19:11:25 +0100 (CET) Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga102.jf.intel.com with ESMTP; 14 Dec 2014 10:09:51 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.07,575,1413270000"; d="scan'208";a="653756256" Received: from irvmail001.ir.intel.com ([163.33.26.43]) by orsmga002.jf.intel.com with ESMTP; 14 Dec 2014 10:11:22 -0800 Received: from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com [10.237.217.46]) by irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id sBEIBLdb032713; Sun, 14 Dec 2014 18:11:21 GMT Received: from sivswdev02.ir.intel.com (localhost [127.0.0.1]) by sivswdev02.ir.intel.com with ESMTP id sBEIBL4h013092; Sun, 14 Dec 2014 18:11:21 GMT Received: (from kananye1@localhost) by sivswdev02.ir.intel.com with id sBEIBLre013088; Sun, 14 Dec 2014 18:11:21 GMT From: Konstantin Ananyev To: dev@dpdk.org Date: Sun, 14 Dec 2014 18:10:45 +0000 Message-Id: <1418580659-12595-4-git-send-email-konstantin.ananyev@intel.com> X-Mailer: git-send-email 1.7.4.1 In-Reply-To: <1418580659-12595-1-git-send-email-konstantin.ananyev@intel.com> References: <1418580659-12595-1-git-send-email-konstantin.ananyev@intel.com> Subject: [dpdk-dev] [PATCH 03/17] librte_acl: remove build phase heuristsic with negative perfomance effect. X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Current rule-wildness based heuristsics can cause unnecessary splits of the ruleset. That might have negative perfomance effect: more tries to traverse, bigger RT tables. After removing it, on some test-cases with big rulesets (~10K) observed ~50% speedup. No difference for smaller rulesets. Signed-off-by: Konstantin Ananyev --- lib/librte_acl/acl_bld.c | 277 +++++++++++++++++------------------------------ 1 file changed, 97 insertions(+), 180 deletions(-) diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c index c5a674a..8bf4a54 100644 --- a/lib/librte_acl/acl_bld.c +++ b/lib/librte_acl/acl_bld.c @@ -1539,11 +1539,9 @@ acl_calc_wildness(struct rte_acl_build_rule *head, return 0; } -static int -acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config, - uint32_t *wild_limit) +static void +acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config) { - int min; struct rte_acl_build_rule *rule; uint32_t n, m, fields_deactivated = 0; uint32_t start = 0, deactivate = 0; @@ -1604,129 +1602,58 @@ acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config, for (k = 0; k < config->num_fields; k++) { if (tally[k][TALLY_DEACTIVATED] == 0) { - memcpy(&tally[l][0], &tally[k][0], + memmove(&tally[l][0], &tally[k][0], TALLY_NUM * sizeof(tally[0][0])); - memcpy(&config->defs[l++], + memmove(&config->defs[l++], &config->defs[k], sizeof(struct rte_acl_field_def)); } } config->num_fields = l; } - - min = RTE_ACL_SINGLE_TRIE_SIZE; - if (config->num_fields == 2) - min *= 4; - else if (config->num_fields == 3) - min *= 3; - else if (config->num_fields == 4) - min *= 2; - - if (tally[0][TALLY_0] < min) - return 0; - for (n = 0; n < config->num_fields; n++) - wild_limit[n] = 0; - - /* - * If trailing fields are 100% wild, group those together. - * This allows the search length of the trie to be shortened. - */ - for (n = 1; n < config->num_fields; n++) { - - double rule_percentage = (double)tally[n][TALLY_DEPTH] / - tally[n][0]; - - if (rule_percentage > RULE_PERCENTAGE) { - /* if it crosses an input boundary then round up */ - while (config->defs[n - 1].input_index == - config->defs[n].input_index) - n++; - - /* set the limit for selecting rules */ - while (n < config->num_fields) - wild_limit[n++] = 100; - - if (wild_limit[n - 1] == 100) - return 1; - } - } - - /* look for the most wild that's 40% or more of the rules */ - for (n = 1; n < config->num_fields; n++) { - for (m = TALLY_100; m > 0; m--) { - - double rule_percentage = (double)tally[n][m] / - tally[n][0]; - - if (tally[n][TALLY_DEACTIVATED] == 0 && - tally[n][TALLY_0] > - RTE_ACL_SINGLE_TRIE_SIZE && - rule_percentage > NODE_PERCENTAGE && - rule_percentage < 0.80) { - wild_limit[n] = wild_limits[m]; - return 1; - } - } - } - return 0; } static int -order(struct rte_acl_build_rule **insert, struct rte_acl_build_rule *rule) +rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2) { uint32_t n; - struct rte_acl_build_rule *left = *insert; - - if (left == NULL) - return 0; - for (n = 1; n < left->config->num_fields; n++) { - int field_index = left->config->defs[n].field_index; + for (n = 1; n < r1->config->num_fields; n++) { + int field_index = r1->config->defs[n].field_index; - if (left->wildness[field_index] != rule->wildness[field_index]) - return (left->wildness[field_index] >= - rule->wildness[field_index]); + if (r1->wildness[field_index] != r2->wildness[field_index]) + return (r1->wildness[field_index] - + r2->wildness[field_index]); } return 0; } static struct rte_acl_build_rule * -ordered_insert_rule(struct rte_acl_build_rule *head, - struct rte_acl_build_rule *rule) -{ - struct rte_acl_build_rule **insert; - - if (rule == NULL) - return head; - - rule->next = head; - if (head == NULL) - return rule; - - insert = &head; - while (order(insert, rule)) - insert = &(*insert)->next; - - rule->next = *insert; - *insert = rule; - return head; -} - -static struct rte_acl_build_rule * sort_rules(struct rte_acl_build_rule *head) { - struct rte_acl_build_rule *rule, *reordered_head = NULL; - struct rte_acl_build_rule *last_rule = NULL; - - for (rule = head; rule != NULL; rule = rule->next) { - reordered_head = ordered_insert_rule(reordered_head, last_rule); - last_rule = rule; + struct rte_acl_build_rule *new_head; + struct rte_acl_build_rule *l, *r, **p; + + new_head = NULL; + while (head != NULL) { + r = head; + head = r->next; + r->next = NULL; + if (new_head == NULL) { + new_head = r; + } else { + for (p = &new_head; + (l = *p) != NULL && + rule_cmp_wildness(l, r) >= 0; + p = &l->next) + ; + + r->next = *p; + *p = r; + } } - if (last_rule != reordered_head) - reordered_head = ordered_insert_rule(reordered_head, last_rule); - - return reordered_head; + return new_head; } static uint32_t @@ -1748,21 +1675,44 @@ acl_build_index(const struct rte_acl_config *config, uint32_t *data_index) return m; } +static struct rte_acl_build_rule * +build_one_trie(struct acl_build_context *context, + struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES], + uint32_t n) +{ + struct rte_acl_build_rule *last; + struct rte_acl_config *config; + + config = rule_sets[n]->config; + + acl_rule_stats(rule_sets[n], config); + rule_sets[n] = sort_rules(rule_sets[n]); + + context->tries[n].type = RTE_ACL_FULL_TRIE; + context->tries[n].count = 0; + + context->tries[n].num_data_indexes = acl_build_index(config, + context->data_indexes[n]); + context->tries[n].data_index = context->data_indexes[n]; + + context->bld_tries[n].trie = build_trie(context, rule_sets[n], + &last, &context->tries[n].count); + + return last; +} + static int acl_build_tries(struct acl_build_context *context, struct rte_acl_build_rule *head) { int32_t rc; - uint32_t n, m, num_tries; + uint32_t n, num_tries; struct rte_acl_config *config; - struct rte_acl_build_rule *last, *rule; - uint32_t wild_limit[RTE_ACL_MAX_LEVELS]; + struct rte_acl_build_rule *last; struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES]; config = head->config; - rule = head; rule_sets[0] = head; - num_tries = 1; /* initialize tries */ for (n = 0; n < RTE_DIM(context->tries); n++) { @@ -1779,91 +1729,55 @@ acl_build_tries(struct acl_build_context *context, if (rc != 0) return rc; - n = acl_rule_stats(head, config, &wild_limit[0]); - - /* put all rules that fit the wildness criteria into a seperate trie */ - while (n > 0 && num_tries < RTE_ACL_MAX_TRIES) { + for (n = 0;; n = num_tries) { - struct rte_acl_config *new_config; - struct rte_acl_build_rule **prev = &rule_sets[num_tries - 1]; - struct rte_acl_build_rule *next = head->next; + num_tries = n + 1; - new_config = acl_build_alloc(context, 1, sizeof(*new_config)); - if (new_config == NULL) { - RTE_LOG(ERR, ACL, - "Failed to get space for new config\n"); + last = build_one_trie(context, rule_sets, n); + if (context->bld_tries[n].trie == NULL) { + RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n); return -ENOMEM; } - memcpy(new_config, config, sizeof(*new_config)); - config = new_config; - rule_sets[num_tries] = NULL; - - for (rule = head; rule != NULL; rule = next) { + /* Build of the last trie completed. */ + if (last == NULL) + break; - int move = 1; + if (num_tries == RTE_DIM(context->tries)) { + RTE_LOG(ERR, ACL, + "Exceeded max number of tries: %u\n", + num_tries); + return -ENOMEM; + } - next = rule->next; - for (m = 0; m < config->num_fields; m++) { - int x = config->defs[m].field_index; - if (rule->wildness[x] < wild_limit[m]) { - move = 0; - break; - } - } + /* Trie is getting too big, split remaining rule set. */ + rule_sets[num_tries] = last->next; + last->next = NULL; + acl_free_node(context, context->bld_tries[n].trie); - if (move) { - rule->config = new_config; - rule->next = rule_sets[num_tries]; - rule_sets[num_tries] = rule; - *prev = next; - } else - prev = &rule->next; + /* Create a new copy of config for remaining rules. */ + config = acl_build_alloc(context, 1, sizeof(*config)); + if (config == NULL) { + RTE_LOG(ERR, ACL, + "New config allocation for %u-th " + "trie failed\n", num_tries); + return -ENOMEM; } - head = rule_sets[num_tries]; - n = acl_rule_stats(rule_sets[num_tries], config, - &wild_limit[0]); - num_tries++; - } - - if (n > 0) - RTE_LOG(DEBUG, ACL, - "Number of tries(%d) exceeded.\n", RTE_ACL_MAX_TRIES); + memcpy(config, rule_sets[n]->config, sizeof(*config)); - for (n = 0; n < num_tries; n++) { + /* Make remaining rules use new config. */ + for (head = rule_sets[num_tries]; head != NULL; + head = head->next) + head->config = config; - rule_sets[n] = sort_rules(rule_sets[n]); - context->tries[n].type = RTE_ACL_FULL_TRIE; - context->tries[n].count = 0; - context->tries[n].num_data_indexes = - acl_build_index(rule_sets[n]->config, - context->data_indexes[n]); - context->tries[n].data_index = context->data_indexes[n]; - - context->bld_tries[n].trie = - build_trie(context, rule_sets[n], - &last, &context->tries[n].count); - if (context->bld_tries[n].trie == NULL) { + /* Rebuild the trie for the reduced rule-set. */ + last = build_one_trie(context, rule_sets, n); + if (context->bld_tries[n].trie == NULL || last != NULL) { RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n); return -ENOMEM; } - if (last != NULL) { - rule_sets[num_tries++] = last->next; - last->next = NULL; - acl_free_node(context, context->bld_tries[n].trie); - context->tries[n].count = 0; - - context->bld_tries[n].trie = - build_trie(context, rule_sets[n], - &last, &context->tries[n].count); - if (context->bld_tries[n].trie == NULL) { - RTE_LOG(ERR, ACL, - "Build of %u-th trie failed\n", n); - return -ENOMEM; - } - } } context->num_tries = num_tries; @@ -1876,15 +1790,18 @@ acl_build_log(const struct acl_build_context *ctx) uint32_t n; RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n" + "nodes created: %u\n" "memory consumed: %zu\n", ctx->acx->name, + ctx->num_nodes, ctx->pool.alloc); for (n = 0; n < RTE_DIM(ctx->tries); n++) { if (ctx->tries[n].count != 0) RTE_LOG(DEBUG, ACL, - "trie %u: number of rules: %u\n", - n, ctx->tries[n].count); + "trie %u: number of rules: %u, indexes: %u\n", + n, ctx->tries[n].count, + ctx->tries[n].num_data_indexes); } }