[v3,3/9] examples/vm_power: add oob monitoring functions

Message ID 20180626092317.11031-4-david.hunt@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series examples/vm_power: 100% Busy Polling |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Hunt, David June 26, 2018, 9:23 a.m. UTC
  This patch introduces the out-of-band (oob) core monitoring
functions.

The functions are similar to the channel manager functions.
There are function to add and remove cores from the
list of cores being monitored. There is a function to initialise
the monitor setup, run the monitor thread, and exit the monitor.

The monitor thread runs in it's own lcore, and is separate
functionality to the channel monitor which is epoll based.
THis thread is timer based. It loops through all monitored cores,
calculates the branch ratio, scales up or down the core, then
sleeps for an interval (~250 uS).

The method it uses to read the branch counters is a pread on the
/dev/cpu/x/msr file, so the 'msr' kernel module needs to be loaded.
Also, since the msr.h file has been made unavailable in recent
kernels, we have #defines for the relevant MSRs included in the
code.

The makefile has a switch for x86 and non-x86 platforms,
and compiles stub function for non-x86 platforms.

Signed-off-by: David Hunt <david.hunt@intel.com>
Acked-by: Radu Nicolau <radu.nicolau@intel.com>
---
 examples/vm_power_manager/Makefile          |   5 +
 examples/vm_power_manager/oob_monitor.h     |  68 +++++
 examples/vm_power_manager/oob_monitor_nop.c |  38 +++
 examples/vm_power_manager/oob_monitor_x86.c | 282 ++++++++++++++++++++
 4 files changed, 393 insertions(+)
 create mode 100644 examples/vm_power_manager/oob_monitor.h
 create mode 100644 examples/vm_power_manager/oob_monitor_nop.c
 create mode 100644 examples/vm_power_manager/oob_monitor_x86.c
  

Comments

Thomas Monjalon July 12, 2018, 7:13 p.m. UTC | #1
26/06/2018 11:23, David Hunt:
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <signal.h>
> +#include <errno.h>
> +#include <string.h>
> +#include <sys/types.h>
> +#include <sys/epoll.h>
> +#include <sys/queue.h>
> +#include <sys/time.h>
> +#include <fcntl.h>
> +
> +#include <rte_log.h>
> +#include <rte_memory.h>
> +#include <rte_malloc.h>
> +#include <rte_atomic.h>
> +#include <rte_cycles.h>
> +#include <rte_ethdev.h>
> +#include <rte_pmd_i40e.h>
> +
> +#include <libvirt/libvirt.h>
> +#include "oob_monitor.h"
> +#include "power_manager.h"
> +#include "channel_manager.h"
> +
> +#include <rte_log.h>
> +#define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1

I'm sure you don't need all these headers.
rte_log.h is included twice.
rte_pmd_i40e is more than suspicious...

This is a hint that the whole file was probably written too fast :)
  
Stephen Hemminger July 12, 2018, 10:18 p.m. UTC | #2
On Thu, 12 Jul 2018 21:13:26 +0200
Thomas Monjalon <thomas@monjalon.net> wrote:

> 26/06/2018 11:23, David Hunt:
> > +#include <unistd.h>
> > +#include <stdio.h>
> > +#include <stdlib.h>
> > +#include <stdint.h>
> > +#include <signal.h>
> > +#include <errno.h>
> > +#include <string.h>
> > +#include <sys/types.h>
> > +#include <sys/epoll.h>
> > +#include <sys/queue.h>
> > +#include <sys/time.h>
> > +#include <fcntl.h>
> > +
> > +#include <rte_log.h>
> > +#include <rte_memory.h>
> > +#include <rte_malloc.h>
> > +#include <rte_atomic.h>
> > +#include <rte_cycles.h>
> > +#include <rte_ethdev.h>
> > +#include <rte_pmd_i40e.h>
> > +
> > +#include <libvirt/libvirt.h>
> > +#include "oob_monitor.h"
> > +#include "power_manager.h"
> > +#include "channel_manager.h"
> > +
> > +#include <rte_log.h>
> > +#define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1  
> 
> I'm sure you don't need all these headers.
> rte_log.h is included twice.
> rte_pmd_i40e is more than suspicious...
> 
> This is a hint that the whole file was probably written too fast :)
> 

This tool can help
  https://include-what-you-use.org/
  
Hunt, David July 13, 2018, 8:24 a.m. UTC | #3
Hi Thomas,


On 12/7/2018 8:13 PM, Thomas Monjalon wrote:
> 26/06/2018 11:23, David Hunt:
>> +#include <unistd.h>
>> +#include <stdio.h>
>> +#include <stdlib.h>
>> +#include <stdint.h>
>> +#include <signal.h>
>> +#include <errno.h>
>> +#include <string.h>
>> +#include <sys/types.h>
>> +#include <sys/epoll.h>
>> +#include <sys/queue.h>
>> +#include <sys/time.h>
>> +#include <fcntl.h>
>> +
>> +#include <rte_log.h>
>> +#include <rte_memory.h>
>> +#include <rte_malloc.h>
>> +#include <rte_atomic.h>
>> +#include <rte_cycles.h>
>> +#include <rte_ethdev.h>
>> +#include <rte_pmd_i40e.h>
>> +
>> +#include <libvirt/libvirt.h>
>> +#include "oob_monitor.h"
>> +#include "power_manager.h"
>> +#include "channel_manager.h"
>> +
>> +#include <rte_log.h>
>> +#define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
> I'm sure you don't need all these headers.
> rte_log.h is included twice.
> rte_pmd_i40e is more than suspicious...
>
> This is a hint that the whole file was probably written too fast :)

Apologies, it was a cut-and-paste from another file in that same 
directory. I can clean it up and re-spin.

Regards,
Dave.
  

Patch

diff --git a/examples/vm_power_manager/Makefile b/examples/vm_power_manager/Makefile
index 0c925967c..13a5205ba 100644
--- a/examples/vm_power_manager/Makefile
+++ b/examples/vm_power_manager/Makefile
@@ -20,6 +20,11 @@  APP = vm_power_mgr
 # all source are stored in SRCS-y
 SRCS-y := main.c vm_power_cli.c power_manager.c channel_manager.c
 SRCS-y += channel_monitor.c parse.c
+ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
+SRCS-y += oob_monitor_x86.c
+else
+SRCS-y += oob_monitor_nop.c
+endif
 
 CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/
 CFLAGS += $(WERROR_FLAGS)
diff --git a/examples/vm_power_manager/oob_monitor.h b/examples/vm_power_manager/oob_monitor.h
new file mode 100644
index 000000000..b96e08df7
--- /dev/null
+++ b/examples/vm_power_manager/oob_monitor.h
@@ -0,0 +1,68 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#ifndef OOB_MONITOR_H_
+#define OOB_MONITOR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Setup the Branch Monitor resources required to initialize epoll.
+ * Must be called first before calling other functions.
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int branch_monitor_init(void);
+
+/**
+ * Run the OOB branch monitor, loops forever on on epoll_wait.
+ *
+ *
+ * @return
+ *  None
+ */
+void run_branch_monitor(void);
+
+/**
+ * Exit the OOB Branch Monitor.
+ *
+ * @return
+ *  None
+ */
+void branch_monitor_exit(void);
+
+/**
+ * Add a core to the list of cores to monitor.
+ *
+ * @param core
+ *  Core Number
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int add_core_to_monitor(int core);
+
+/**
+ * Remove a previously added core from core list.
+ *
+ * @param core
+ *  Core Number
+ *
+ * @return
+ *  - 0 on success.
+ *  - Negative on error.
+ */
+int remove_core_from_monitor(int core);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* OOB_MONITOR_H_ */
diff --git a/examples/vm_power_manager/oob_monitor_nop.c b/examples/vm_power_manager/oob_monitor_nop.c
new file mode 100644
index 000000000..7e7b8bc14
--- /dev/null
+++ b/examples/vm_power_manager/oob_monitor_nop.c
@@ -0,0 +1,38 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2014 Intel Corporation
+ */
+
+#include "oob_monitor.h"
+
+void branch_monitor_exit(void)
+{
+}
+
+__attribute__((unused)) static float
+apply_policy(__attribute__((unused)) int core)
+{
+	return 0.0;
+}
+
+int
+add_core_to_monitor(__attribute__((unused)) int core)
+{
+	return 0;
+}
+
+int
+remove_core_from_monitor(__attribute__((unused)) int core)
+{
+	return 0;
+}
+
+int
+branch_monitor_init(void)
+{
+	return 0;
+}
+
+void
+run_branch_monitor(void)
+{
+}
diff --git a/examples/vm_power_manager/oob_monitor_x86.c b/examples/vm_power_manager/oob_monitor_x86.c
new file mode 100644
index 000000000..485ec5e3f
--- /dev/null
+++ b/examples/vm_power_manager/oob_monitor_x86.c
@@ -0,0 +1,282 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2018 Intel Corporation
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/queue.h>
+#include <sys/time.h>
+#include <fcntl.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_pmd_i40e.h>
+
+#include <libvirt/libvirt.h>
+#include "oob_monitor.h"
+#include "power_manager.h"
+#include "channel_manager.h"
+
+#include <rte_log.h>
+#define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
+
+#define MAX_EVENTS 256
+
+static volatile unsigned run_loop = 1;
+static uint64_t g_branches, g_branch_misses;
+static int g_active;
+
+void branch_monitor_exit(void)
+{
+	run_loop = 0;
+}
+
+/* Number of microseconds between each poll */
+#define INTERVAL 100
+#define PRINT_LOOP_COUNT (1000000/INTERVAL)
+#define RATIO_THRESHOLD 0.03
+#define IA32_PERFEVTSEL0 0x186
+#define IA32_PERFEVTSEL1 0x187
+#define IA32_PERFCTR0 0xc1
+#define IA32_PERFCTR1 0xc2
+#define IA32_PERFEVT_BRANCH_HITS 0x05300c4
+#define IA32_PERFEVT_BRANCH_MISS 0x05300c5
+
+static float
+apply_policy(int core)
+{
+	struct core_info *ci;
+	uint64_t counter;
+	uint64_t branches, branch_misses;
+	uint32_t last_branches, last_branch_misses;
+	int hits_diff, miss_diff;
+	float ratio;
+	int ret;
+
+	g_active = 0;
+	ci = get_core_info();
+
+	last_branches = ci->cd[core].last_branches;
+	last_branch_misses = ci->cd[core].last_branch_misses;
+
+	ret = pread(ci->cd[core].msr_fd, &counter,
+			sizeof(counter), IA32_PERFCTR0);
+	if (ret < 0)
+		RTE_LOG(ERR, POWER_MANAGER,
+				"unable to read counter for core %u\n",
+				core);
+	branches = counter;
+
+	ret = pread(ci->cd[core].msr_fd, &counter,
+			sizeof(counter), IA32_PERFCTR1);
+	if (ret < 0)
+		RTE_LOG(ERR, POWER_MANAGER,
+				"unable to read counter for core %u\n",
+				core);
+	branch_misses = counter;
+
+
+	ci->cd[core].last_branches = branches;
+	ci->cd[core].last_branch_misses = branch_misses;
+
+	hits_diff = (int)branches - (int)last_branches;
+	if (hits_diff <= 0) {
+		/* Likely a counter overflow condition, skip this round */
+		return -1.0;
+	}
+
+	miss_diff = (int)branch_misses - (int)last_branch_misses;
+	if (miss_diff <= 0) {
+		/* Likely a counter overflow condition, skip this round */
+		return -1.0;
+	}
+
+	g_branches = hits_diff;
+	g_branch_misses = miss_diff;
+
+	if (hits_diff < (INTERVAL*100)) {
+		/* Likely no workload running on this core. Skip. */
+		return -1.0;
+	}
+
+	ratio = (float)miss_diff * (float)100 / (float)hits_diff;
+
+	if (ratio < RATIO_THRESHOLD)
+		power_manager_scale_core_min(core);
+	else
+		power_manager_scale_core_max(core);
+
+	g_active = 1;
+	return ratio;
+}
+
+int
+add_core_to_monitor(int core)
+{
+	struct core_info *ci;
+	char proc_file[UNIX_PATH_MAX];
+	int ret;
+
+	ci = get_core_info();
+
+	if (core < ci->core_count) {
+		long setup;
+
+		snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
+		ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
+		if (ci->cd[core].msr_fd < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"Error opening MSR file for core %d "
+					"(is msr kernel module loaded?)\n",
+					core);
+			return -1;
+		}
+		/*
+		 * Set up branch counters
+		 */
+		setup = IA32_PERFEVT_BRANCH_HITS;
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL0);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+		setup = IA32_PERFEVT_BRANCH_MISS;
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL1);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+		/*
+		 * Close the file and re-open as read only so
+		 * as not to hog the resource
+		 */
+		close(ci->cd[core].msr_fd);
+		ci->cd[core].msr_fd = open(proc_file, O_RDONLY);
+		if (ci->cd[core].msr_fd < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"Error opening MSR file for core %d "
+					"(is msr kernel module loaded?)\n",
+					core);
+			return -1;
+		}
+		ci->cd[core].oob_enabled = 1;
+	}
+	return 0;
+}
+
+int
+remove_core_from_monitor(int core)
+{
+	struct core_info *ci;
+	char proc_file[UNIX_PATH_MAX];
+	int ret;
+
+	ci = get_core_info();
+
+	if (ci->cd[core].oob_enabled) {
+		long setup;
+
+		/*
+		 * close the msr file, then reopen rw so we can
+		 * disable the counters
+		 */
+		if (ci->cd[core].msr_fd != 0)
+			close(ci->cd[core].msr_fd);
+		snprintf(proc_file, UNIX_PATH_MAX, "/dev/cpu/%d/msr", core);
+		ci->cd[core].msr_fd = open(proc_file, O_RDWR | O_SYNC);
+		if (ci->cd[core].msr_fd < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"Error opening MSR file for core %d "
+					"(is msr kernel module loaded?)\n",
+					core);
+			return -1;
+		}
+		setup = 0x0; /* clear event */
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL0);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+		setup = 0x0; /* clear event */
+		ret = pwrite(ci->cd[core].msr_fd, &setup,
+				sizeof(setup), IA32_PERFEVTSEL1);
+		if (ret < 0) {
+			RTE_LOG(ERR, POWER_MANAGER,
+					"unable to set counter for core %u\n",
+					core);
+			return ret;
+		}
+
+		close(ci->cd[core].msr_fd);
+		ci->cd[core].msr_fd = 0;
+		ci->cd[core].oob_enabled = 0;
+	}
+	return 0;
+}
+
+int
+branch_monitor_init(void)
+{
+	return 0;
+}
+
+void
+run_branch_monitor(void)
+{
+	struct core_info *ci;
+	int print = 0;
+	float ratio;
+	int printed;
+	int reads = 0;
+
+	ci = get_core_info();
+
+	while (run_loop) {
+
+		if (!run_loop)
+			break;
+		usleep(INTERVAL);
+		int j;
+		print++;
+		printed = 0;
+		for (j = 0; j < ci->core_count; j++) {
+			if (ci->cd[j].oob_enabled) {
+				ratio = apply_policy(j);
+				if ((print > PRINT_LOOP_COUNT) && (g_active)) {
+					printf("  %d: %.4f {%lu} {%d}", j,
+							ratio, g_branches,
+							reads);
+					printed = 1;
+					reads = 0;
+				} else {
+					reads++;
+				}
+			}
+		}
+		if (print > PRINT_LOOP_COUNT) {
+			if (printed)
+				printf("\n");
+			print = 0;
+		}
+	}
+}