From patchwork Wed Jan 20 11:50:26 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 86971 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id DAEF9A0A05; Wed, 20 Jan 2021 12:50:39 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 6A375140D0D; Wed, 20 Jan 2021 12:50:35 +0100 (CET) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by mails.dpdk.org (Postfix) with ESMTP id 760A9140D02 for ; Wed, 20 Jan 2021 12:50:34 +0100 (CET) IronPort-SDR: qtRXXgC7lN3heN6rLEyhgq2GE91es72k6JnYUFDYOHr3n7tqClT3ilUXXa5s0zfjKPPfZIJ4h8 eU9tmH6cLXiw== X-IronPort-AV: E=McAfee;i="6000,8403,9869"; a="178313178" X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="178313178" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Jan 2021 03:50:34 -0800 IronPort-SDR: lrLRKO+CWYjk2tTYTXwA9TA6xXBFzHw/3BY3RDX0yqsbgDcxvaj9PEXlfccbbKGBgMutXVt3Dp A/QWOd5H0d/A== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="571319261" Received: from silpixa00399498.ir.intel.com (HELO silpixa00399498.ger.corp.intel.com) ([10.237.222.179]) by fmsmga006.fm.intel.com with ESMTP; 20 Jan 2021 03:50:31 -0800 From: Anatoly Burakov To: dev@dpdk.org Cc: Timothy McDaniel , Beilei Xing , Jeff Guo , Qiming Yang , Qi Zhang , Haiyue Wang , Bruce Richardson , Konstantin Ananyev , thomas@monjalon.net Date: Wed, 20 Jan 2021 11:50:26 +0000 Message-Id: X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v19 1/4] eal: rename power monitor condition member X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The `data_sz` name is fine, but it looks out of place because nothing else has "data" prefix in that structure. Rename it to "size", as well as add more clarity to the comments around each struct member. Fixes: 6a17919b0e2a ("eal: change power intrinsics API") Signed-off-by: Anatoly Burakov --- drivers/event/dlb/dlb.c | 2 +- drivers/event/dlb2/dlb2.c | 2 +- drivers/net/i40e/i40e_rxtx.c | 2 +- drivers/net/ice/ice_rxtx.c | 2 +- drivers/net/ixgbe/ixgbe_rxtx.c | 2 +- .../include/generic/rte_power_intrinsics.h | 19 +++++++++++-------- lib/librte_eal/x86/rte_power_intrinsics.c | 4 ++-- 7 files changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/event/dlb/dlb.c b/drivers/event/dlb/dlb.c index d2f2026291..a65f70882f 100644 --- a/drivers/event/dlb/dlb.c +++ b/drivers/event/dlb/dlb.c @@ -3185,7 +3185,7 @@ dlb_dequeue_wait(struct dlb_eventdev *dlb, pmc.addr = monitor_addr; pmc.val = expected_value; pmc.mask = qe_mask.raw_qe[1]; - pmc.data_sz = sizeof(uint64_t); + pmc.size = sizeof(uint64_t); rte_power_monitor(&pmc, timeout + start_ticks); diff --git a/drivers/event/dlb2/dlb2.c b/drivers/event/dlb2/dlb2.c index c9a8a02278..5782960158 100644 --- a/drivers/event/dlb2/dlb2.c +++ b/drivers/event/dlb2/dlb2.c @@ -2894,7 +2894,7 @@ dlb2_dequeue_wait(struct dlb2_eventdev *dlb2, pmc.addr = monitor_addr; pmc.val = expected_value; pmc.mask = qe_mask.raw_qe[1]; - pmc.data_sz = sizeof(uint64_t); + pmc.size = sizeof(uint64_t); rte_power_monitor(&pmc, timeout + start_ticks); diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index 0b4220fc9c..d8e9db55d8 100644 --- a/drivers/net/i40e/i40e_rxtx.c +++ b/drivers/net/i40e/i40e_rxtx.c @@ -92,7 +92,7 @@ i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) pmc->mask = rte_cpu_to_le_64(1 << I40E_RX_DESC_STATUS_DD_SHIFT); /* registers are 64-bit */ - pmc->data_sz = sizeof(uint64_t); + pmc->size = sizeof(uint64_t); return 0; } diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c index 066651dc48..5909e3707b 100644 --- a/drivers/net/ice/ice_rxtx.c +++ b/drivers/net/ice/ice_rxtx.c @@ -46,7 +46,7 @@ ice_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) pmc->mask = rte_cpu_to_le_16(1 << ICE_RX_FLEX_DESC_STATUS0_DD_S); /* register is 16-bit */ - pmc->data_sz = sizeof(uint16_t); + pmc->size = sizeof(uint16_t); return 0; } diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c index cc8f70e6dd..c0305a8238 100644 --- a/drivers/net/ixgbe/ixgbe_rxtx.c +++ b/drivers/net/ixgbe/ixgbe_rxtx.c @@ -1389,7 +1389,7 @@ ixgbe_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc) pmc->mask = rte_cpu_to_le_32(IXGBE_RXDADV_STAT_DD); /* the registers are 32-bit */ - pmc->data_sz = sizeof(uint32_t); + pmc->size = sizeof(uint32_t); return 0; } diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h index 6109d28faa..5960c48c80 100644 --- a/lib/librte_eal/include/generic/rte_power_intrinsics.h +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h @@ -20,14 +20,17 @@ struct rte_power_monitor_cond { volatile void *addr; /**< Address to monitor for changes */ - uint64_t val; /**< Before attempting the monitoring, the address - * may be read and compared against this value. - **/ - uint64_t mask; /**< 64-bit mask to extract current value from addr */ - uint8_t data_sz; /**< Data size (in bytes) that will be used to compare - * expected value with the memory address. Can be 1, - * 2, 4, or 8. Supplying any other value will lead to - * undefined result. */ + uint64_t val; /**< If the `mask` is non-zero, location pointed + * to by `addr` will be read and compared + * against this value. + */ + uint64_t mask; /**< 64-bit mask to extract value read from `addr` */ + uint8_t size; /**< Data size (in bytes) that will be used to compare + * expected value (`val`) with data read from the + * monitored memory location (`addr`). Can be 1, 2, + * 4, or 8. Supplying any other value will result in + * an error. + */ }; /** diff --git a/lib/librte_eal/x86/rte_power_intrinsics.c b/lib/librte_eal/x86/rte_power_intrinsics.c index af3ae3237c..39ea9fdecd 100644 --- a/lib/librte_eal/x86/rte_power_intrinsics.c +++ b/lib/librte_eal/x86/rte_power_intrinsics.c @@ -88,7 +88,7 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, if (pmc == NULL) return -EINVAL; - if (__check_val_size(pmc->data_sz) < 0) + if (__check_val_size(pmc->size) < 0) return -EINVAL; s = &wait_status[lcore_id]; @@ -113,7 +113,7 @@ rte_power_monitor(const struct rte_power_monitor_cond *pmc, /* if we have a comparison mask, we might not need to sleep at all */ if (pmc->mask) { const uint64_t cur_value = __get_umwait_val( - pmc->addr, pmc->data_sz); + pmc->addr, pmc->size); const uint64_t masked = cur_value & pmc->mask; /* if the masked value is already matching, abort */ From patchwork Wed Jan 20 11:50:27 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 86972 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id DA1B8A0A05; Wed, 20 Jan 2021 12:50:46 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 92D0C140D24; Wed, 20 Jan 2021 12:50:38 +0100 (CET) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by mails.dpdk.org (Postfix) with ESMTP id 964A6140D1B for ; Wed, 20 Jan 2021 12:50:35 +0100 (CET) IronPort-SDR: Vg8ye3VjjQKirF71Aka2C3VcW2wSVOYNihWlHj6uG0q3XtUdfGo0d2PYYCYBBSHoQ18LiDZT6U ygRxduwqtntA== X-IronPort-AV: E=McAfee;i="6000,8403,9869"; a="178313181" X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="178313181" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Jan 2021 03:50:35 -0800 IronPort-SDR: CZGXp0K7pyrLPGRZSGjchERbwoMXjNAn6unSW8qHcbnV+A+nF2wLrF4Fhq3bFXDRt0eHWyT+5u R/0xhp3ocrEw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="571319262" Received: from silpixa00399498.ir.intel.com (HELO silpixa00399498.ger.corp.intel.com) ([10.237.222.179]) by fmsmga006.fm.intel.com with ESMTP; 20 Jan 2021 03:50:34 -0800 From: Anatoly Burakov To: dev@dpdk.org Cc: thomas@monjalon.net Date: Wed, 20 Jan 2021 11:50:27 +0000 Message-Id: X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v19 2/4] eal: improve comments around power monitoring API X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Currently, the API documentation is ambiguous as to what happens when certain conditions are met. Document the behavior explicitly, as well as fix some typos and outdated comments. Fixes: 6a17919b0e2a ("eal: change power intrinsics API") Signed-off-by: Anatoly Burakov --- .../include/generic/rte_power_intrinsics.h | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/lib/librte_eal/include/generic/rte_power_intrinsics.h b/lib/librte_eal/include/generic/rte_power_intrinsics.h index 5960c48c80..dddca3d41c 100644 --- a/lib/librte_eal/include/generic/rte_power_intrinsics.h +++ b/lib/librte_eal/include/generic/rte_power_intrinsics.h @@ -35,17 +35,20 @@ struct rte_power_monitor_cond { /** * @warning - * @b EXPERIMENTAL: this API may change without prior notice + * @b EXPERIMENTAL: this API may change without prior notice. * * Monitor specific address for changes. This will cause the CPU to enter an * architecture-defined optimized power state until either the specified * memory address is written to, a certain TSC timestamp is reached, or other * reasons cause the CPU to wake up. * - * Additionally, an `expected` 64-bit value and 64-bit mask are provided. If - * mask is non-zero, the current value pointed to by the `p` pointer will be - * checked against the expected value, and if they match, the entering of - * optimized power state may be aborted. + * Additionally, an expected value (`pmc->val`), mask (`pmc->mask`), and data + * size (`pmc->size`) are provided in the `pmc` power monitoring condition. If + * the mask is non-zero, the current value pointed to by the `pmc->addr` pointer + * will be read and compared against the expected value, and if they match, the + * entering of optimized power state will be aborted. This is intended to + * prevent the CPU from entering optimized power state and waiting on a write + * that has already happened by the time this API is called. * * @warning It is responsibility of the user to check if this function is * supported at runtime using `rte_cpu_get_intrinsics_support()` API call. @@ -67,11 +70,14 @@ int rte_power_monitor(const struct rte_power_monitor_cond *pmc, /** * @warning - * @b EXPERIMENTAL: this API may change without prior notice + * @b EXPERIMENTAL: this API may change without prior notice. * * Wake up a specific lcore that is in a power optimized state and is monitoring * an address. * + * @note It is safe to call this function if the lcore in question is not + * sleeping. The function will have no effect. + * * @note This function will *not* wake up a core that is in a power optimized * state due to calling `rte_power_pause`. * @@ -83,7 +89,7 @@ int rte_power_monitor_wakeup(const unsigned int lcore_id); /** * @warning - * @b EXPERIMENTAL: this API may change without prior notice + * @b EXPERIMENTAL: this API may change without prior notice. * * Enter an architecture-defined optimized power state until a certain TSC * timestamp is reached. From patchwork Wed Jan 20 11:50:28 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 86973 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id E6926A0A05; Wed, 20 Jan 2021 12:50:53 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D0149140D4B; Wed, 20 Jan 2021 12:50:41 +0100 (CET) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by mails.dpdk.org (Postfix) with ESMTP id BBFF5140D20 for ; Wed, 20 Jan 2021 12:50:37 +0100 (CET) IronPort-SDR: VDn1Hhko2CS4KCItMZWhkO7qmKOzVgX5eRAecmPfEyepolb0mnr3fib4KJxmWGGSa1fK9o0/Le Viymt3eCSsGg== X-IronPort-AV: E=McAfee;i="6000,8403,9869"; a="178313184" X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="178313184" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Jan 2021 03:50:37 -0800 IronPort-SDR: 5bw6sczhCVCmUKLTCcDN5sIb8UAPeznVu0bwvIuky9Di/bsibxJUC7QU4gwOmmZKuPuw3nSBd/ 7OPXzPkeZMQw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="571319267" Received: from silpixa00399498.ir.intel.com (HELO silpixa00399498.ger.corp.intel.com) ([10.237.222.179]) by fmsmga006.fm.intel.com with ESMTP; 20 Jan 2021 03:50:35 -0800 From: Anatoly Burakov To: dev@dpdk.org Cc: Liang Ma , David Hunt , Ray Kinsella , Neil Horman , thomas@monjalon.net Date: Wed, 20 Jan 2021 11:50:28 +0000 Message-Id: <3ec9e024d047db8b2113af2af06e5c70ecdf7b86.1611143368.git.anatoly.burakov@intel.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v19 3/4] power: add PMD power management API and callback X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Liang Ma Add a simple on/off switch that will enable saving power when no packets are arriving. It is based on counting the number of empty polls and, when the number reaches a certain threshold, entering an architecture-defined optimized power state that will either wait until a TSC timestamp expires, or when packets arrive. This API mandates a core-to-single-queue mapping (that is, multiple queued per device are supported, but they have to be polled on different cores). This design is using PMD RX callbacks. 1. UMWAIT/UMONITOR: When a certain threshold of empty polls is reached, the core will go into a power optimized sleep while waiting on an address of next RX descriptor to be written to. 2. TPAUSE/Pause instruction This method uses the pause (or TPAUSE, if available) instruction to avoid busy polling. 3. Frequency scaling Reuse existing DPDK power library to scale up/down core frequency depending on traffic volume. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: David Hunt --- Notes: v17: - Added memory barriers suggested by Konstantin - Removed the BUSY state doc/guides/prog_guide/power_man.rst | 41 +++ doc/guides/rel_notes/release_21_02.rst | 10 + lib/librte_power/meson.build | 5 +- lib/librte_power/rte_power_pmd_mgmt.c | 365 +++++++++++++++++++++++++ lib/librte_power/rte_power_pmd_mgmt.h | 91 ++++++ lib/librte_power/version.map | 5 + 6 files changed, 515 insertions(+), 2 deletions(-) create mode 100644 lib/librte_power/rte_power_pmd_mgmt.c create mode 100644 lib/librte_power/rte_power_pmd_mgmt.h diff --git a/doc/guides/prog_guide/power_man.rst b/doc/guides/prog_guide/power_man.rst index 0a3755a901..f36ba0027c 100644 --- a/doc/guides/prog_guide/power_man.rst +++ b/doc/guides/prog_guide/power_man.rst @@ -192,6 +192,47 @@ User Cases ---------- The mechanism can applied to any device which is based on polling. e.g. NIC, FPGA. +Ethernet PMD Power Management API +--------------------------------- + +Abstract +~~~~~~~~ +Existing power management mechanisms require developers to change application +design or change code to make use of it. The PMD power management API provides a +convenient alternative by utilizing Ethernet PMD RX callbacks, and triggering +power saving whenever empty poll count reaches a certain number. + + * Monitor + + This power saving scheme will put the CPU into optimized power state and use + the ``rte_power_monitor()`` function to monitor the Ethernet PMD RX + descriptor address, and wake the CPU up whenever there's new traffic. + + * Pause + + This power saving scheme will avoid busy polling by either entering + power-optimized sleep state with ``rte_power_pause()`` function, or, if it's + not available, use ``rte_pause()``. + + * Frequency scaling + + This power saving scheme will use existing ``librte_power`` library + functionality to scale the core frequency up/down depending on traffic + volume. + + +.. note:: + + Currently, this power management API is limited to mandatory mapping of 1 + queue to 1 core (multiple queues are supported, but they must be polled from + different cores). + +API Overview for Ethernet PMD Power Management +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +* **Queue Enable**: Enable specific power scheme for certain queue/port/core. + +* **Queue Disable**: Disable power scheme for certain queue/port/core. + References ---------- diff --git a/doc/guides/rel_notes/release_21_02.rst b/doc/guides/rel_notes/release_21_02.rst index be5ea4370c..1988960b76 100644 --- a/doc/guides/rel_notes/release_21_02.rst +++ b/doc/guides/rel_notes/release_21_02.rst @@ -76,6 +76,16 @@ New Features * Added inner UDP/IPv4 support for VXLAN IPv4 GSO. +* **Added Ethernet PMD power management helper API.** + + A new helper API has been added to make using Ethernet PMD power management + easier for the user: ``rte_power_ethdev_pmgmt_queue_enable()``. Three power + management schemes are supported initially: + + * Power saving based on UMWAIT instruction (x86 only) + * Power saving based on ``rte_pause()`` (generic) or TPAUSE instruction (x86 only) + * Power saving based on frequency scaling through the ``librte_power`` library + Removed Items ------------- diff --git a/lib/librte_power/meson.build b/lib/librte_power/meson.build index 4b4cf1b90b..e5a11cb834 100644 --- a/lib/librte_power/meson.build +++ b/lib/librte_power/meson.build @@ -9,6 +9,7 @@ sources = files('rte_power.c', 'power_acpi_cpufreq.c', 'power_kvm_vm.c', 'guest_channel.c', 'rte_power_empty_poll.c', 'power_pstate_cpufreq.c', + 'rte_power_pmd_mgmt.c', 'power_common.c') -headers = files('rte_power.h','rte_power_empty_poll.h') -deps += ['timer'] +headers = files('rte_power.h','rte_power_empty_poll.h','rte_power_pmd_mgmt.h') +deps += ['timer', 'ethdev'] diff --git a/lib/librte_power/rte_power_pmd_mgmt.c b/lib/librte_power/rte_power_pmd_mgmt.c new file mode 100644 index 0000000000..454ef7091e --- /dev/null +++ b/lib/librte_power/rte_power_pmd_mgmt.c @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include +#include +#include +#include +#include +#include + +#include "rte_power_pmd_mgmt.h" + +#define EMPTYPOLL_MAX 512 + +/* store some internal state */ +static struct pmd_conf_data { + /** what do we support? */ + struct rte_cpu_intrinsics intrinsics_support; + /** pre-calculated tsc diff for 1us */ + uint64_t tsc_per_us; + /** how many rte_pause can we fit in a microsecond? */ + uint64_t pause_per_us; +} global_data; + +/** + * Possible power management states of an ethdev port. + */ +enum pmd_mgmt_state { + /** Device power management is disabled. */ + PMD_MGMT_DISABLED = 0, + /** Device power management is enabled. */ + PMD_MGMT_ENABLED +}; + +struct pmd_queue_cfg { + volatile enum pmd_mgmt_state pwr_mgmt_state; + /**< State of power management for this queue */ + enum rte_power_pmd_mgmt_type cb_mode; + /**< Callback mode for this queue */ + const struct rte_eth_rxtx_callback *cur_cb; + /**< Callback instance */ + volatile bool umwait_in_progress; + /**< are we currently sleeping? */ + uint64_t empty_poll_stats; + /**< Number of empty polls */ +} __rte_cache_aligned; + +static struct pmd_queue_cfg port_cfg[RTE_MAX_ETHPORTS][RTE_MAX_QUEUES_PER_PORT]; + +static void +calc_tsc(void) +{ + const uint64_t hz = rte_get_timer_hz(); + const uint64_t tsc_per_us = hz / US_PER_S; /* 1us */ + + global_data.tsc_per_us = tsc_per_us; + + /* only do this if we don't have tpause */ + if (!global_data.intrinsics_support.power_pause) { + const uint64_t start = rte_rdtsc_precise(); + const uint32_t n_pauses = 10000; + double us, us_per_pause; + uint64_t end; + unsigned int i; + + /* estimate number of rte_pause() calls per us*/ + for (i = 0; i < n_pauses; i++) + rte_pause(); + + end = rte_rdtsc_precise(); + us = (end - start) / (double)tsc_per_us; + us_per_pause = us / n_pauses; + + global_data.pause_per_us = (uint64_t)(1.0 / us_per_pause); + } +} + +static uint16_t +clb_umwait(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused, + uint16_t nb_rx, uint16_t max_pkts __rte_unused, + void *addr __rte_unused) +{ + + struct pmd_queue_cfg *q_conf; + + q_conf = &port_cfg[port_id][qidx]; + + if (unlikely(nb_rx == 0)) { + q_conf->empty_poll_stats++; + if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) { + struct rte_power_monitor_cond pmc; + uint16_t ret; + + /* + * we might get a cancellation request while being + * inside the callback, in which case the wakeup + * wouldn't work because it would've arrived too early. + * + * to get around this, we notify the other thread that + * we're sleeping, so that it can spin until we're done. + * unsolicited wakeups are perfectly safe. + */ + q_conf->umwait_in_progress = true; + + rte_atomic_thread_fence(__ATOMIC_SEQ_CST); + + /* check if we need to cancel sleep */ + if (q_conf->pwr_mgmt_state == PMD_MGMT_ENABLED) { + /* use monitoring condition to sleep */ + ret = rte_eth_get_monitor_addr(port_id, qidx, + &pmc); + if (ret == 0) + rte_power_monitor(&pmc, -1ULL); + } + q_conf->umwait_in_progress = false; + + rte_atomic_thread_fence(__ATOMIC_SEQ_CST); + } + } else + q_conf->empty_poll_stats = 0; + + return nb_rx; +} + +static uint16_t +clb_pause(uint16_t port_id, uint16_t qidx, struct rte_mbuf **pkts __rte_unused, + uint16_t nb_rx, uint16_t max_pkts __rte_unused, + void *addr __rte_unused) +{ + struct pmd_queue_cfg *q_conf; + + q_conf = &port_cfg[port_id][qidx]; + + if (unlikely(nb_rx == 0)) { + q_conf->empty_poll_stats++; + /* sleep for 1 microsecond */ + if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) { + /* use tpause if we have it */ + if (global_data.intrinsics_support.power_pause) { + const uint64_t cur = rte_rdtsc(); + const uint64_t wait_tsc = + cur + global_data.tsc_per_us; + rte_power_pause(wait_tsc); + } else { + uint64_t i; + for (i = 0; i < global_data.pause_per_us; i++) + rte_pause(); + } + } + } else + q_conf->empty_poll_stats = 0; + + return nb_rx; +} + +static uint16_t +clb_scale_freq(uint16_t port_id, uint16_t qidx, + struct rte_mbuf **pkts __rte_unused, uint16_t nb_rx, + uint16_t max_pkts __rte_unused, void *_ __rte_unused) +{ + struct pmd_queue_cfg *q_conf; + + q_conf = &port_cfg[port_id][qidx]; + + if (unlikely(nb_rx == 0)) { + q_conf->empty_poll_stats++; + if (unlikely(q_conf->empty_poll_stats > EMPTYPOLL_MAX)) + /* scale down freq */ + rte_power_freq_min(rte_lcore_id()); + } else { + q_conf->empty_poll_stats = 0; + /* scale up freq */ + rte_power_freq_max(rte_lcore_id()); + } + + return nb_rx; +} + +int +rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, uint16_t port_id, + uint16_t queue_id, enum rte_power_pmd_mgmt_type mode) +{ + struct pmd_queue_cfg *queue_cfg; + struct rte_eth_dev_info info; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (queue_id >= RTE_MAX_QUEUES_PER_PORT || lcore_id >= RTE_MAX_LCORE) { + ret = -EINVAL; + goto end; + } + + if (rte_eth_dev_info_get(port_id, &info) < 0) { + ret = -EINVAL; + goto end; + } + + /* check if queue id is valid */ + if (queue_id >= info.nb_rx_queues) { + ret = -EINVAL; + goto end; + } + + queue_cfg = &port_cfg[port_id][queue_id]; + + if (queue_cfg->pwr_mgmt_state != PMD_MGMT_DISABLED) { + ret = -EINVAL; + goto end; + } + + /* we need this in various places */ + rte_cpu_get_intrinsics_support(&global_data.intrinsics_support); + + switch (mode) { + case RTE_POWER_MGMT_TYPE_MONITOR: + { + struct rte_power_monitor_cond dummy; + + /* check if rte_power_monitor is supported */ + if (!global_data.intrinsics_support.power_monitor) { + RTE_LOG(DEBUG, POWER, "Monitoring intrinsics are not supported\n"); + ret = -ENOTSUP; + goto end; + } + + /* check if the device supports the necessary PMD API */ + if (rte_eth_get_monitor_addr(port_id, queue_id, + &dummy) == -ENOTSUP) { + RTE_LOG(DEBUG, POWER, "The device does not support rte_eth_get_monitor_addr\n"); + ret = -ENOTSUP; + goto end; + } + /* initialize data before enabling the callback */ + queue_cfg->empty_poll_stats = 0; + queue_cfg->cb_mode = mode; + queue_cfg->umwait_in_progress = false; + queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED; + + /* ensure we update our state before callback starts */ + rte_atomic_thread_fence(__ATOMIC_SEQ_CST); + + queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id, + clb_umwait, NULL); + break; + } + case RTE_POWER_MGMT_TYPE_SCALE: + { + enum power_management_env env; + /* only PSTATE and ACPI modes are supported */ + if (!rte_power_check_env_supported(PM_ENV_ACPI_CPUFREQ) && + !rte_power_check_env_supported( + PM_ENV_PSTATE_CPUFREQ)) { + RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes are supported\n"); + ret = -ENOTSUP; + goto end; + } + /* ensure we could initialize the power library */ + if (rte_power_init(lcore_id)) { + ret = -EINVAL; + goto end; + } + /* ensure we initialized the correct env */ + env = rte_power_get_env(); + if (env != PM_ENV_ACPI_CPUFREQ && + env != PM_ENV_PSTATE_CPUFREQ) { + RTE_LOG(DEBUG, POWER, "Neither ACPI nor PSTATE modes were initialized\n"); + ret = -ENOTSUP; + goto end; + } + /* initialize data before enabling the callback */ + queue_cfg->empty_poll_stats = 0; + queue_cfg->cb_mode = mode; + queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED; + + /* this is not necessary here, but do it anyway */ + rte_atomic_thread_fence(__ATOMIC_SEQ_CST); + + queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, + queue_id, clb_scale_freq, NULL); + break; + } + case RTE_POWER_MGMT_TYPE_PAUSE: + /* figure out various time-to-tsc conversions */ + if (global_data.tsc_per_us == 0) + calc_tsc(); + + /* initialize data before enabling the callback */ + queue_cfg->empty_poll_stats = 0; + queue_cfg->cb_mode = mode; + queue_cfg->pwr_mgmt_state = PMD_MGMT_ENABLED; + + /* this is not necessary here, but do it anyway */ + rte_atomic_thread_fence(__ATOMIC_SEQ_CST); + + queue_cfg->cur_cb = rte_eth_add_rx_callback(port_id, queue_id, + clb_pause, NULL); + break; + } + ret = 0; +end: + return ret; +} + +int +rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id, + uint16_t port_id, uint16_t queue_id) +{ + struct pmd_queue_cfg *queue_cfg; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (lcore_id >= RTE_MAX_LCORE || queue_id >= RTE_MAX_QUEUES_PER_PORT) + return -EINVAL; + + /* no need to check queue id as wrong queue id would not be enabled */ + queue_cfg = &port_cfg[port_id][queue_id]; + + if (queue_cfg->pwr_mgmt_state != PMD_MGMT_ENABLED) + return -EINVAL; + + /* stop any callbacks from progressing */ + queue_cfg->pwr_mgmt_state = PMD_MGMT_DISABLED; + + /* ensure we update our state before continuing */ + rte_atomic_thread_fence(__ATOMIC_SEQ_CST); + + switch (queue_cfg->cb_mode) { + case RTE_POWER_MGMT_TYPE_MONITOR: + { + bool exit = false; + do { + /* + * we may request cancellation while the other thread + * has just entered the callback but hasn't started + * sleeping yet, so keep waking it up until we know it's + * done sleeping. + */ + if (queue_cfg->umwait_in_progress) + rte_power_monitor_wakeup(lcore_id); + else + exit = true; + } while (!exit); + } + /* fall-through */ + case RTE_POWER_MGMT_TYPE_PAUSE: + rte_eth_remove_rx_callback(port_id, queue_id, + queue_cfg->cur_cb); + break; + case RTE_POWER_MGMT_TYPE_SCALE: + rte_power_freq_max(lcore_id); + rte_eth_remove_rx_callback(port_id, queue_id, + queue_cfg->cur_cb); + rte_power_exit(lcore_id); + break; + } + /* + * we don't free the RX callback here because it is unsafe to do so + * unless we know for a fact that all data plane threads have stopped. + */ + queue_cfg->cur_cb = NULL; + + return 0; +} diff --git a/lib/librte_power/rte_power_pmd_mgmt.h b/lib/librte_power/rte_power_pmd_mgmt.h new file mode 100644 index 0000000000..7a0ac24625 --- /dev/null +++ b/lib/librte_power/rte_power_pmd_mgmt.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef _RTE_POWER_PMD_MGMT_H +#define _RTE_POWER_PMD_MGMT_H + +/** + * @file + * RTE PMD Power Management + */ + +#include +#include + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * PMD Power Management Type + */ +enum rte_power_pmd_mgmt_type { + /** Use power-optimized monitoring to wait for incoming traffic */ + RTE_POWER_MGMT_TYPE_MONITOR = 1, + /** Use power-optimized sleep to avoid busy polling */ + RTE_POWER_MGMT_TYPE_PAUSE, + /** Use frequency scaling when traffic is low */ + RTE_POWER_MGMT_TYPE_SCALE, +}; + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice. + * + * Enable power management on a specified Ethernet device Rx queue and lcore. + * + * @note This function is not thread-safe. + * + * @param lcore_id + * The lcore the Rx queue will be polled from. + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue identifier of the Ethernet device. + * @param mode + * The power management scheme to use for specified Rx queue. + * @return + * 0 on success + * <0 on error + */ +__rte_experimental +int +rte_power_ethdev_pmgmt_queue_enable(unsigned int lcore_id, + uint16_t port_id, uint16_t queue_id, + enum rte_power_pmd_mgmt_type mode); + +/** + * @warning + * @b EXPERIMENTAL: this API may change, or be removed, without prior notice. + * + * Disable power management on a specified Ethernet device Rx queue and lcore. + * + * @note This function is not thread-safe. + * + * @param lcore_id + * The lcore the Rx queue is polled from. + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue identifier of the Ethernet device. + * @return + * 0 on success + * <0 on error + */ +__rte_experimental +int +rte_power_ethdev_pmgmt_queue_disable(unsigned int lcore_id, + uint16_t port_id, uint16_t queue_id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_power/version.map b/lib/librte_power/version.map index 69ca9af616..f38a380212 100644 --- a/lib/librte_power/version.map +++ b/lib/librte_power/version.map @@ -34,4 +34,9 @@ EXPERIMENTAL { rte_power_guest_channel_receive_msg; rte_power_poll_stat_fetch; rte_power_poll_stat_update; + + # added in 21.02 + rte_power_ethdev_pmgmt_queue_disable; + rte_power_ethdev_pmgmt_queue_enable; + }; From patchwork Wed Jan 20 11:50:29 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Burakov, Anatoly" X-Patchwork-Id: 86974 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id C1C60A0A05; Wed, 20 Jan 2021 12:51:01 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 25E1B140D6F; Wed, 20 Jan 2021 12:50:43 +0100 (CET) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by mails.dpdk.org (Postfix) with ESMTP id 6CE81140D29 for ; Wed, 20 Jan 2021 12:50:39 +0100 (CET) IronPort-SDR: BX8aMErEvbrYm7G45Sxlg5IzzvbX8yVrU3TGacLtVq3frQtWtGxaW5iZ9oRMzMu9DZDMqT/wff H5Z4KzQJIMhg== X-IronPort-AV: E=McAfee;i="6000,8403,9869"; a="178313190" X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="178313190" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga103.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 20 Jan 2021 03:50:38 -0800 IronPort-SDR: ih44+fBtgc7KicBVQfuVJ8+n47tVLIqCXEAUgP4C3oSSR1yY+m6SCRvHdlxwQpi8DmMfpV6GJC mxqjTSuG0z6Q== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.79,361,1602572400"; d="scan'208";a="571319273" Received: from silpixa00399498.ir.intel.com (HELO silpixa00399498.ger.corp.intel.com) ([10.237.222.179]) by fmsmga006.fm.intel.com with ESMTP; 20 Jan 2021 03:50:37 -0800 From: Anatoly Burakov To: dev@dpdk.org Cc: Liang Ma , David Hunt , thomas@monjalon.net Date: Wed, 20 Jan 2021 11:50:29 +0000 Message-Id: X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v19 4/4] examples/l3fwd-power: enable PMD power mgmt X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Liang Ma Add PMD power management feature support to l3fwd-power sample app. Signed-off-by: Liang Ma Signed-off-by: Anatoly Burakov Acked-by: David Hunt --- Notes: v12: - Allow selecting PMD power management scheme from command-line - Enforce 1 core 1 queue rule .../sample_app_ug/l3_forward_power_man.rst | 35 ++++++++ examples/l3fwd-power/main.c | 90 ++++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/doc/guides/sample_app_ug/l3_forward_power_man.rst b/doc/guides/sample_app_ug/l3_forward_power_man.rst index 85a78a5c1e..aaa9367fae 100644 --- a/doc/guides/sample_app_ug/l3_forward_power_man.rst +++ b/doc/guides/sample_app_ug/l3_forward_power_man.rst @@ -109,6 +109,8 @@ where, * --telemetry: Telemetry mode. +* --pmd-mgmt: PMD power management mode. + See :doc:`l3_forward` for details. The L3fwd-power example reuses the L3fwd command line options. @@ -456,3 +458,36 @@ reference cycles and accordingly busy rate is set to either 0% or The new stats ``empty_poll`` , ``full_poll`` and ``busy_percent`` can be viewed by running the script ``/usertools/dpdk-telemetry-client.py`` and selecting the menu option ``Send for global Metrics``. + +PMD power management Mode +------------------------- + +The PMD power management mode support for ``l3fwd-power`` is a standalone mode, in this mode +``l3fwd-power`` does simple l3fwding along with enable the power saving scheme on specific +port/queue/lcore. Main purpose for this mode is to demonstrate how to use the PMD power management API. + +.. code-block:: console + + ./build/examples/dpdk-l3fwd-power -l 1-3 -- --pmd-mgmt -p 0x0f --config="(0,0,2),(0,1,3)" + +PMD Power Management Mode +------------------------- +There is also a traffic-aware operating mode that, instead of using explicit +power management, will use automatic PMD power management. This mode is limited +to one queue per core, and has three available power management schemes: + +* ``monitor`` - this will use ``rte_power_monitor()`` function to enter a + power-optimized state (subject to platform support). + +* ``pause`` - this will use ``rte_power_pause()`` or ``rte_pause()`` to avoid + busy looping when there is no traffic. + +* ``scale`` - this will use frequency scaling routines available in the + ``librte_power`` library. + +See :doc:`Power Management<../prog_guide/power_man>` chapter in the DPDK +Programmer's Guide for more details on PMD power management. + +.. code-block:: console + + .//examples/dpdk-l3fwd-power -l 1-3 -- -p 0x0f --config="(0,0,2),(0,1,3)" --pmd-mgmt=scale diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c index 995a3b6ad7..61fbae6c4f 100644 --- a/examples/l3fwd-power/main.c +++ b/examples/l3fwd-power/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "perf_core.h" #include "main.h" @@ -199,11 +200,14 @@ enum appmode { APP_MODE_LEGACY, APP_MODE_EMPTY_POLL, APP_MODE_TELEMETRY, - APP_MODE_INTERRUPT + APP_MODE_INTERRUPT, + APP_MODE_PMD_MGMT }; enum appmode app_mode; +static enum rte_power_pmd_mgmt_type pmgmt_type; + enum freq_scale_hint_t { FREQ_LOWER = -1, @@ -1611,7 +1615,9 @@ print_usage(const char *prgname) " follow (training_flag, high_threshold, med_threshold)\n" " --telemetry: enable telemetry mode, to update" " empty polls, full polls, and core busyness to telemetry\n" - " --interrupt-only: enable interrupt-only mode\n", + " --interrupt-only: enable interrupt-only mode\n" + " --pmd-mgmt MODE: enable PMD power management mode. " + "Currently supported modes: monitor, pause, scale\n", prgname); } @@ -1701,6 +1707,32 @@ parse_config(const char *q_arg) return 0; } + +static int +parse_pmd_mgmt_config(const char *name) +{ +#define PMD_MGMT_MONITOR "monitor" +#define PMD_MGMT_PAUSE "pause" +#define PMD_MGMT_SCALE "scale" + + if (strncmp(PMD_MGMT_MONITOR, name, sizeof(PMD_MGMT_MONITOR)) == 0) { + pmgmt_type = RTE_POWER_MGMT_TYPE_MONITOR; + return 0; + } + + if (strncmp(PMD_MGMT_PAUSE, name, sizeof(PMD_MGMT_PAUSE)) == 0) { + pmgmt_type = RTE_POWER_MGMT_TYPE_PAUSE; + return 0; + } + + if (strncmp(PMD_MGMT_SCALE, name, sizeof(PMD_MGMT_SCALE)) == 0) { + pmgmt_type = RTE_POWER_MGMT_TYPE_SCALE; + return 0; + } + /* unknown PMD power management mode */ + return -1; +} + static int parse_ep_config(const char *q_arg) { @@ -1755,6 +1787,7 @@ parse_ep_config(const char *q_arg) #define CMD_LINE_OPT_EMPTY_POLL "empty-poll" #define CMD_LINE_OPT_INTERRUPT_ONLY "interrupt-only" #define CMD_LINE_OPT_TELEMETRY "telemetry" +#define CMD_LINE_OPT_PMD_MGMT "pmd-mgmt" /* Parse the argument given in the command line of the application */ static int @@ -1776,6 +1809,7 @@ parse_args(int argc, char **argv) {CMD_LINE_OPT_LEGACY, 0, 0, 0}, {CMD_LINE_OPT_TELEMETRY, 0, 0, 0}, {CMD_LINE_OPT_INTERRUPT_ONLY, 0, 0, 0}, + {CMD_LINE_OPT_PMD_MGMT, 1, 0, 0}, {NULL, 0, 0, 0} }; @@ -1886,6 +1920,21 @@ parse_args(int argc, char **argv) printf("telemetry mode is enabled\n"); } + if (!strncmp(lgopts[option_index].name, + CMD_LINE_OPT_PMD_MGMT, + sizeof(CMD_LINE_OPT_PMD_MGMT))) { + if (app_mode != APP_MODE_DEFAULT) { + printf(" power mgmt mode is mutually exclusive with other modes\n"); + return -1; + } + if (parse_pmd_mgmt_config(optarg) < 0) { + printf(" Invalid PMD power management mode: %s\n", + optarg); + return -1; + } + app_mode = APP_MODE_PMD_MGMT; + printf("PMD power mgmt mode is enabled\n"); + } if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_INTERRUPT_ONLY, sizeof(CMD_LINE_OPT_INTERRUPT_ONLY))) { @@ -2442,6 +2491,8 @@ mode_to_str(enum appmode mode) return "telemetry"; case APP_MODE_INTERRUPT: return "interrupt-only"; + case APP_MODE_PMD_MGMT: + return "pmd mgmt"; default: return "invalid"; } @@ -2671,6 +2722,13 @@ main(int argc, char **argv) qconf = &lcore_conf[lcore_id]; printf("\nInitializing rx queues on lcore %u ... ", lcore_id ); fflush(stdout); + + /* PMD power management mode can only do 1 queue per core */ + if (app_mode == APP_MODE_PMD_MGMT && qconf->n_rx_queue > 1) { + rte_exit(EXIT_FAILURE, + "In PMD power management mode, only one queue per lcore is allowed\n"); + } + /* init RX queues */ for(queue = 0; queue < qconf->n_rx_queue; ++queue) { struct rte_eth_rxconf rxq_conf; @@ -2708,6 +2766,16 @@ main(int argc, char **argv) rte_exit(EXIT_FAILURE, "Fail to add ptype cb\n"); } + + if (app_mode == APP_MODE_PMD_MGMT) { + ret = rte_power_ethdev_pmgmt_queue_enable( + lcore_id, portid, queueid, + pmgmt_type); + if (ret < 0) + rte_exit(EXIT_FAILURE, + "rte_power_ethdev_pmgmt_queue_enable: err=%d, port=%d\n", + ret, portid); + } } } @@ -2798,6 +2866,9 @@ main(int argc, char **argv) SKIP_MAIN); } else if (app_mode == APP_MODE_INTERRUPT) { rte_eal_mp_remote_launch(main_intr_loop, NULL, CALL_MAIN); + } else if (app_mode == APP_MODE_PMD_MGMT) { + /* reuse telemetry loop for PMD power management mode */ + rte_eal_mp_remote_launch(main_telemetry_loop, NULL, CALL_MAIN); } if (app_mode == APP_MODE_EMPTY_POLL || app_mode == APP_MODE_TELEMETRY) @@ -2824,6 +2895,21 @@ main(int argc, char **argv) if (app_mode == APP_MODE_EMPTY_POLL) rte_power_empty_poll_stat_free(); + if (app_mode == APP_MODE_PMD_MGMT) { + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (rte_lcore_is_enabled(lcore_id) == 0) + continue; + qconf = &lcore_conf[lcore_id]; + for (queue = 0; queue < qconf->n_rx_queue; ++queue) { + portid = qconf->rx_queue_list[queue].port_id; + queueid = qconf->rx_queue_list[queue].queue_id; + + rte_power_ethdev_pmgmt_queue_disable(lcore_id, + portid, queueid); + } + } + } + if ((app_mode == APP_MODE_LEGACY || app_mode == APP_MODE_EMPTY_POLL) && deinit_power_library()) rte_exit(EXIT_FAILURE, "deinit_power_library failed\n");