From patchwork Mon Apr 1 16:14:40 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Hunt, David" X-Patchwork-Id: 51992 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 8011D4C9D; Mon, 1 Apr 2019 18:14:48 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 88DC04C99 for ; Mon, 1 Apr 2019 18:14:46 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 01 Apr 2019 09:14:45 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,297,1549958400"; d="scan'208";a="130499046" Received: from silpixa00399952.ir.intel.com (HELO silpixa00399952.ger.corp.intel.com) ([10.237.223.64]) by orsmga008.jf.intel.com with ESMTP; 01 Apr 2019 09:14:43 -0700 From: David Hunt To: dev@dpdk.org Cc: david.hunt@intel.com, anatoly.burakov@intel.com, liang.j.ma@intel.com Date: Mon, 1 Apr 2019 17:14:40 +0100 Message-Id: <20190401161441.29328-1-david.hunt@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190401153044.39273-1-david.hunt@intel.com> References: <20190401153044.39273-1-david.hunt@intel.com> Subject: [dpdk-dev] [PATCH v5 1/2] lib/power: add bit for high frequency cores X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch adds a new bit in the capabilities mask that's returned by rte_power_get_capabilities(), allowing application to query which cores have the higher frequencies, and can then pin the workloads accordingly. Returned Bits: 0 - Turbo Boost enabled 1 - Higher core base_frequency Signed-off-by: Liang Ma Signed-off-by: David Hunt Reviewed-by: Anatoly Burakov --- lib/librte_power/power_pstate_cpufreq.c | 59 ++++++++++++++++++++++--- lib/librte_power/rte_power.h | 1 + 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/lib/librte_power/power_pstate_cpufreq.c b/lib/librte_power/power_pstate_cpufreq.c index 9c1a1625f..beac00740 100644 --- a/lib/librte_power/power_pstate_cpufreq.c +++ b/lib/librte_power/power_pstate_cpufreq.c @@ -67,6 +67,8 @@ "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_max_freq" #define POWER_SYSFILE_BASE_MIN_FREQ \ "/sys/devices/system/cpu/cpu%u/cpufreq/cpuinfo_min_freq" +#define POWER_SYSFILE_BASE_FREQ \ + "/sys/devices/system/cpu/cpu%u/cpufreq/base_frequency" #define POWER_MSR_PATH "/dev/cpu/%u/msr" /* @@ -94,9 +96,11 @@ struct pstate_power_info { uint32_t curr_idx; /**< Freq index in freqs array */ uint32_t non_turbo_max_ratio; /**< Non Turbo Max ratio */ uint32_t sys_max_freq; /**< system wide max freq */ + uint32_t core_base_freq; /**< core base freq */ volatile uint32_t state; /**< Power in use state */ uint16_t turbo_available; /**< Turbo Boost available */ uint16_t turbo_enable; /**< Turbo Boost enable/disable */ + uint16_t priority_core; /**< High Performance core */ } __rte_cache_aligned; @@ -145,9 +149,13 @@ out: close(fd); static int power_init_for_setting_freq(struct pstate_power_info *pi) { - FILE *f_min, *f_max; + FILE *f_min, *f_max, *f_base; char fullpath_min[PATH_MAX]; char fullpath_max[PATH_MAX]; + char fullpath_base[PATH_MAX]; + char buf_base[BUFSIZ]; + char *s_base; + uint32_t base_ratio = 0; uint64_t max_non_turbo = 0; snprintf(fullpath_min, sizeof(fullpath_min), POWER_SYSFILE_MIN_FREQ, @@ -168,6 +176,26 @@ power_init_for_setting_freq(struct pstate_power_info *pi) pi->f_cur_min = f_min; pi->f_cur_max = f_max; + snprintf(fullpath_base, sizeof(fullpath_base), POWER_SYSFILE_BASE_FREQ, + pi->lcore_id); + + f_base = fopen(fullpath_base, "r"); + if (f_base == NULL) { + /* No sysfs base_frequency, that's OK, continue without */ + base_ratio = 0; + } else { + s_base = fgets(buf_base, sizeof(buf_base), f_base); + FOPS_OR_NULL_GOTO(s_base, out); + + buf_base[BUFSIZ-1] = '\0'; + if (strlen(buf_base)) + /* Strip off terminating '\n' */ + strtok(buf_base, "\n"); + + base_ratio = strtoul(buf_base, NULL, POWER_CONVERT_TO_DECIMAL) + / BUS_FREQ; + } + /* Add MSR read to detect turbo status */ if (power_rdmsr(PLATFORM_INFO, &max_non_turbo, pi->lcore_id) < 0) @@ -179,6 +207,17 @@ power_init_for_setting_freq(struct pstate_power_info *pi) pi->non_turbo_max_ratio = max_non_turbo; + /* + * If base_frequency is reported as greater than the maximum + * non-turbo frequency, then mark it as a high priority core. + */ + if (base_ratio > max_non_turbo) + pi->priority_core = 1; + else + pi->priority_core = 0; + pi->core_base_freq = base_ratio * BUS_FREQ; + +out: return 0; } @@ -215,9 +254,15 @@ set_freq_internal(struct pstate_power_info *pi, uint32_t idx) } /* Turbo is available and enabled, first freq bucket is sys max freq */ - if (pi->turbo_available && pi->turbo_enable && (idx == 0)) - target_freq = pi->sys_max_freq; - else + if (pi->turbo_available && idx == 0) { + if (pi->turbo_enable) + target_freq = pi->sys_max_freq; + else { + RTE_LOG(ERR, POWER, "Turbo is off, frequency can't be scaled up more %u\n", + pi->lcore_id); + return -1; + } + } else target_freq = pi->freqs[idx]; /* Decrease freq, the min freq should be updated first */ @@ -430,7 +475,10 @@ power_get_available_freqs(struct pstate_power_info *pi) pi->sys_max_freq = sys_max_freq; - base_max_freq = pi->non_turbo_max_ratio * BUS_FREQ; + if (pi->priority_core == 1) + base_max_freq = pi->core_base_freq; + else + base_max_freq = pi->non_turbo_max_ratio * BUS_FREQ; POWER_DEBUG_TRACE("sys min %u, sys max %u, base_max %u\n", sys_min_freq, @@ -781,6 +829,7 @@ int power_pstate_get_capabilities(unsigned int lcore_id, pi = &lcore_power_info[lcore_id]; caps->capabilities = 0; caps->turbo = !!(pi->turbo_available); + caps->priority = pi->priority_core; return 0; } diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h index c5e8f6b5b..dee7af345 100644 --- a/lib/librte_power/rte_power.h +++ b/lib/librte_power/rte_power.h @@ -258,6 +258,7 @@ struct rte_power_core_capabilities { RTE_STD_C11 struct { uint64_t turbo:1; /**< Turbo can be enabled. */ + uint64_t priority:1; /**< Priority core */ }; }; }; From patchwork Mon Apr 1 16:14:41 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Hunt, David" X-Patchwork-Id: 51993 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 77AC54F9A; Mon, 1 Apr 2019 18:14:50 +0200 (CEST) Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by dpdk.org (Postfix) with ESMTP id 1FE2E4D27 for ; Mon, 1 Apr 2019 18:14:48 +0200 (CEST) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga008.jf.intel.com ([10.7.209.65]) by fmsmga102.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 01 Apr 2019 09:14:48 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.60,297,1549958400"; d="scan'208";a="130499065" Received: from silpixa00399952.ir.intel.com (HELO silpixa00399952.ger.corp.intel.com) ([10.237.223.64]) by orsmga008.jf.intel.com with ESMTP; 01 Apr 2019 09:14:47 -0700 From: David Hunt To: dev@dpdk.org Cc: david.hunt@intel.com, anatoly.burakov@intel.com, liang.j.ma@intel.com Date: Mon, 1 Apr 2019 17:14:41 +0100 Message-Id: <20190401161441.29328-2-david.hunt@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190401161441.29328-1-david.hunt@intel.com> References: <20190401153044.39273-1-david.hunt@intel.com> <20190401161441.29328-1-david.hunt@intel.com> Subject: [dpdk-dev] [PATCH v5 2/2] examples/distributor: detect high frequency cores X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The distributor application is bottlenecked by the distributor core, so if we can give more frequency to this core, then the overall performance of the application may increase. This patch uses the rte_power_get_capabilities() API to query the cores provided in the core mask, and if any high frequency cores are found (e.g. Turbo Boost is enabled), we will pin the distributor workload to that core. Signed-off-by: Liang Ma Signed-off-by: David Hunt Reviewed-by: Anatoly Burakov --- examples/distributor/main.c | 201 ++++++++++++++++++++++++------- examples/distributor/meson.build | 2 +- 2 files changed, 156 insertions(+), 47 deletions(-) diff --git a/examples/distributor/main.c b/examples/distributor/main.c index 03a05e3d9..b5499bb12 100644 --- a/examples/distributor/main.c +++ b/examples/distributor/main.c @@ -16,6 +16,7 @@ #include #include #include +#include #define RX_RING_SIZE 1024 #define TX_RING_SIZE 1024 @@ -37,6 +38,7 @@ volatile uint8_t quit_signal; volatile uint8_t quit_signal_rx; volatile uint8_t quit_signal_dist; volatile uint8_t quit_signal_work; +unsigned int power_lib_initialised; static volatile struct app_stats { struct { @@ -281,6 +283,8 @@ lcore_rx(struct lcore_params *p) if (++port == nb_ports) port = 0; } + if (power_lib_initialised) + rte_power_exit(rte_lcore_id()); /* set worker & tx threads quit flag */ printf("\nCore %u exiting rx task.\n", rte_lcore_id()); quit_signal = 1; @@ -363,7 +367,8 @@ lcore_distributor(struct lcore_params *p) } printf("\nCore %u exiting distributor task.\n", rte_lcore_id()); quit_signal_work = 1; - + if (power_lib_initialised) + rte_power_exit(rte_lcore_id()); rte_distributor_flush(d); /* Unblock any returns so workers can exit */ rte_distributor_clear_returns(d); @@ -435,6 +440,8 @@ lcore_tx(struct rte_ring *in_r) } } } + if (power_lib_initialised) + rte_power_exit(rte_lcore_id()); printf("\nCore %u exiting tx task.\n", rte_lcore_id()); return 0; } @@ -575,9 +582,33 @@ lcore_worker(struct lcore_params *p) if (num > 0) app_stats.worker_bursts[p->worker_id][num-1]++; } + if (power_lib_initialised) + rte_power_exit(rte_lcore_id()); + rte_free(p); return 0; } +static int +init_power_library(void) +{ + int ret = 0, lcore_id; + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + /* init power management library */ + ret = rte_power_init(lcore_id); + if (ret) { + RTE_LOG(ERR, POWER, + "Library initialization failed on core %u\n", + lcore_id); + /* + * Return on first failure, we'll fall back + * to non-power operation + */ + return ret; + } + } + return ret; +} + /* display usage */ static void print_usage(const char *prgname) @@ -657,7 +688,9 @@ main(int argc, char *argv[]) struct rte_distributor *d; struct rte_ring *dist_tx_ring; struct rte_ring *rx_dist_ring; - unsigned lcore_id, worker_id = 0; + struct rte_power_core_capabilities lcore_cap; + unsigned int lcore_id, worker_id = 0; + int distr_core_id = -1, rx_core_id = -1, tx_core_id = -1; unsigned nb_ports; uint16_t portid; uint16_t nb_ports_available; @@ -687,6 +720,9 @@ main(int argc, char *argv[]) "1 lcore for packet TX\n" "and at least 1 lcore for worker threads\n"); + if (init_power_library() == 0) + power_lib_initialised = 1; + nb_ports = rte_eth_dev_count_avail(); if (nb_ports == 0) rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n"); @@ -742,54 +778,123 @@ main(int argc, char *argv[]) if (rx_dist_ring == NULL) rte_exit(EXIT_FAILURE, "Cannot create output ring\n"); - RTE_LCORE_FOREACH_SLAVE(lcore_id) { - if (worker_id == rte_lcore_count() - 3) { - printf("Starting distributor on lcore_id %d\n", + if (power_lib_initialised) { + /* + * Here we'll pre-assign lcore ids to the rx, tx and + * distributor workloads if there's higher frequency + * on those cores e.g. if Turbo Boost is enabled. + * It's also worth mentioning that it will assign cores in a + * specific order, so that if there's less than three + * available, the higher frequency cores will go to the + * distributor first, then rx, then tx. + */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + + rte_power_get_capabilities(lcore_id, &lcore_cap); + + if (lcore_cap.priority != 1) + continue; + + if (distr_core_id < 0) { + distr_core_id = lcore_id; + printf("Distributor on priority core %d\n", lcore_id); - /* distributor core */ - struct lcore_params *p = - rte_malloc(NULL, sizeof(*p), 0); - if (!p) - rte_panic("malloc failure\n"); - *p = (struct lcore_params){worker_id, d, - rx_dist_ring, dist_tx_ring, mbuf_pool}; - rte_eal_remote_launch( - (lcore_function_t *)lcore_distributor, - p, lcore_id); - } else if (worker_id == rte_lcore_count() - 4) { - printf("Starting tx on worker_id %d, lcore_id %d\n", - worker_id, lcore_id); - /* tx core */ - rte_eal_remote_launch((lcore_function_t *)lcore_tx, - dist_tx_ring, lcore_id); - } else if (worker_id == rte_lcore_count() - 2) { - printf("Starting rx on worker_id %d, lcore_id %d\n", - worker_id, lcore_id); - /* rx core */ - struct lcore_params *p = - rte_malloc(NULL, sizeof(*p), 0); - if (!p) - rte_panic("malloc failure\n"); - *p = (struct lcore_params){worker_id, d, rx_dist_ring, - dist_tx_ring, mbuf_pool}; - rte_eal_remote_launch((lcore_function_t *)lcore_rx, - p, lcore_id); - } else { - printf("Starting worker on worker_id %d, lcore_id %d\n", - worker_id, lcore_id); - struct lcore_params *p = - rte_malloc(NULL, sizeof(*p), 0); - if (!p) - rte_panic("malloc failure\n"); - *p = (struct lcore_params){worker_id, d, rx_dist_ring, - dist_tx_ring, mbuf_pool}; - - rte_eal_remote_launch((lcore_function_t *)lcore_worker, - p, lcore_id); + continue; + } + if (rx_core_id < 0) { + rx_core_id = lcore_id; + printf("Rx on priority core %d\n", + lcore_id); + continue; + } + if (tx_core_id < 0) { + tx_core_id = lcore_id; + printf("Tx on priority core %d\n", + lcore_id); + continue; + } + } + } + + /* + * If there's any of the key workloads left without an lcore_id + * after the high performing core assignment above, pre-assign + * them here. + */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (lcore_id == (unsigned int)distr_core_id || + lcore_id == (unsigned int)rx_core_id || + lcore_id == (unsigned int)tx_core_id) + continue; + if (distr_core_id < 0) { + distr_core_id = lcore_id; + printf("Distributor on core %d\n", lcore_id); + continue; + } + if (rx_core_id < 0) { + rx_core_id = lcore_id; + printf("Rx on core %d\n", lcore_id); + continue; + } + if (tx_core_id < 0) { + tx_core_id = lcore_id; + printf("Tx on core %d\n", lcore_id); + continue; } - worker_id++; } + printf(" tx id %d, dist id %d, rx id %d\n", + tx_core_id, + distr_core_id, + rx_core_id); + + /* + * Kick off all the worker threads first, avoiding the pre-assigned + * lcore_ids for tx, rx and distributor workloads. + */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (lcore_id == (unsigned int)distr_core_id || + lcore_id == (unsigned int)rx_core_id || + lcore_id == (unsigned int)tx_core_id) + continue; + printf("Starting thread %d as worker, lcore_id %d\n", + worker_id, lcore_id); + struct lcore_params *p = + rte_malloc(NULL, sizeof(*p), 0); + if (!p) + rte_panic("malloc failure\n"); + *p = (struct lcore_params){worker_id++, d, rx_dist_ring, + dist_tx_ring, mbuf_pool}; + + rte_eal_remote_launch((lcore_function_t *)lcore_worker, + p, lcore_id); + } + + /* Start tx core */ + rte_eal_remote_launch((lcore_function_t *)lcore_tx, + dist_tx_ring, tx_core_id); + + /* Start distributor core */ + struct lcore_params *pd = + rte_malloc(NULL, sizeof(*pd), 0); + if (!pd) + rte_panic("malloc failure\n"); + *pd = (struct lcore_params){worker_id++, d, + rx_dist_ring, dist_tx_ring, mbuf_pool}; + rte_eal_remote_launch( + (lcore_function_t *)lcore_distributor, + pd, distr_core_id); + + /* Start rx core */ + struct lcore_params *pr = + rte_malloc(NULL, sizeof(*pr), 0); + if (!pr) + rte_panic("malloc failure\n"); + *pr = (struct lcore_params){worker_id++, d, rx_dist_ring, + dist_tx_ring, mbuf_pool}; + rte_eal_remote_launch((lcore_function_t *)lcore_rx, + pr, rx_core_id); + freq = rte_get_timer_hz(); t = rte_rdtsc() + freq; while (!quit_signal_dist) { @@ -806,5 +911,9 @@ main(int argc, char *argv[]) } print_stats(); + + rte_free(pd); + rte_free(pr); + return 0; } diff --git a/examples/distributor/meson.build b/examples/distributor/meson.build index 88c001f56..8cf2ca1da 100644 --- a/examples/distributor/meson.build +++ b/examples/distributor/meson.build @@ -6,7 +6,7 @@ # To build this example as a standalone application with an already-installed # DPDK instance, use 'make' -deps += 'distributor' +deps += ['distributor', 'power'] sources = files( 'main.c' )