[v2,1/2] eal/arm: generic counter based loop for CPU freq calculation

Message ID 20200626203502.20658-1-honnappa.nagarahalli@arm.com (mailing list archive)
State Accepted, archived
Delegated to: David Marchand
Headers
Series [v2,1/2] eal/arm: generic counter based loop for CPU freq calculation |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-nxp-Performance success Performance Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-testing success Testing PASS

Commit Message

Honnappa Nagarahalli June 26, 2020, 8:35 p.m. UTC
  get_tsc_freq uses 'nanosleep' system call to calculate the CPU
frequency. However, 'nanosleep' results in the process getting
un-scheduled. The kernel saves and restores the PMU state. This
ensures that the PMU cycles are not counted towards a sleeping
process. When RTE_ARM_EAL_RDTSC_USE_PMU is defined, this results
in incorrect CPU frequency calculation. This logic is replaced
with generic counter based loop.

Bugzilla ID: 450
Fixes: f91bcbb2d9a6 ("eal/arm: use high-resolution cycle counter")
Cc: stable@dpdk.org

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
Reviewed-by: Phil Yang <phil.yang@arm.com>
Acked-by: Jerin Jacob <jerinj@marvell.com>
---
v2:
1) renamed functions (Jerin)
2) Aligned the frequency to 1MHz ceiling (Pavan)
3) Made all the inlines to always inline for consistency

 lib/librte_eal/arm/include/rte_cycles_64.h | 45 +++++++++++++++++++---
 lib/librte_eal/arm/rte_cycles.c            | 27 +++++++++++--
 2 files changed, 63 insertions(+), 9 deletions(-)
  

Comments

David Marchand July 7, 2020, 11:16 a.m. UTC | #1
On Fri, Jun 26, 2020 at 10:35 PM Honnappa Nagarahalli
<honnappa.nagarahalli@arm.com> wrote:
>
> get_tsc_freq uses 'nanosleep' system call to calculate the CPU
> frequency. However, 'nanosleep' results in the process getting
> un-scheduled. The kernel saves and restores the PMU state. This
> ensures that the PMU cycles are not counted towards a sleeping
> process. When RTE_ARM_EAL_RDTSC_USE_PMU is defined, this results
> in incorrect CPU frequency calculation. This logic is replaced
> with generic counter based loop.
>
> Bugzilla ID: 450
> Fixes: f91bcbb2d9a6 ("eal/arm: use high-resolution cycle counter")
> Cc: stable@dpdk.org
>
> Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
> Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
> Reviewed-by: Dharmik Thakkar <dharmik.thakkar@arm.com>
> Reviewed-by: Phil Yang <phil.yang@arm.com>
> Acked-by: Jerin Jacob <jerinj@marvell.com>

Series applied, thanks.
  

Patch

diff --git a/lib/librte_eal/arm/include/rte_cycles_64.h b/lib/librte_eal/arm/include/rte_cycles_64.h
index da557b6a1..e41f9dbd6 100644
--- a/lib/librte_eal/arm/include/rte_cycles_64.h
+++ b/lib/librte_eal/arm/include/rte_cycles_64.h
@@ -1,5 +1,6 @@ 
 /* SPDX-License-Identifier: BSD-3-Clause
  * Copyright(c) 2015 Cavium, Inc
+ * Copyright(c) 2020 Arm Limited
  */
 
 #ifndef _RTE_CYCLES_ARM64_H_
@@ -11,6 +12,33 @@  extern "C" {
 
 #include "generic/rte_cycles.h"
 
+/** Read generic counter frequency */
+static __rte_always_inline uint64_t
+__rte_arm64_cntfrq(void)
+{
+	uint64_t freq;
+
+	asm volatile("mrs %0, cntfrq_el0" : "=r" (freq));
+	return freq;
+}
+
+/** Read generic counter */
+static __rte_always_inline uint64_t
+__rte_arm64_cntvct(void)
+{
+	uint64_t tsc;
+
+	asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
+	return tsc;
+}
+
+static __rte_always_inline uint64_t
+__rte_arm64_cntvct_precise(void)
+{
+	asm volatile("isb" : : : "memory");
+	return __rte_arm64_cntvct();
+}
+
 /**
  * Read the time base register.
  *
@@ -25,10 +53,7 @@  extern "C" {
 static inline uint64_t
 rte_rdtsc(void)
 {
-	uint64_t tsc;
-
-	asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
-	return tsc;
+	return __rte_arm64_cntvct();
 }
 #else
 /**
@@ -49,14 +74,22 @@  rte_rdtsc(void)
  * asm volatile("msr pmcr_el0, %0" : : "r" (val));
  *
  */
-static inline uint64_t
-rte_rdtsc(void)
+
+/** Read PMU cycle counter */
+static __rte_always_inline uint64_t
+__rte_arm64_pmccntr(void)
 {
 	uint64_t tsc;
 
 	asm volatile("mrs %0, pmccntr_el0" : "=r"(tsc));
 	return tsc;
 }
+
+static inline uint64_t
+rte_rdtsc(void)
+{
+	return __rte_arm64_pmccntr();
+}
 #endif
 
 static inline uint64_t
diff --git a/lib/librte_eal/arm/rte_cycles.c b/lib/librte_eal/arm/rte_cycles.c
index 3500d523e..5bd29b24b 100644
--- a/lib/librte_eal/arm/rte_cycles.c
+++ b/lib/librte_eal/arm/rte_cycles.c
@@ -3,14 +3,35 @@ 
  */
 
 #include "eal_private.h"
+#include "rte_cycles.h"
 
 uint64_t
 get_tsc_freq_arch(void)
 {
 #if defined RTE_ARCH_ARM64 && !defined RTE_ARM_EAL_RDTSC_USE_PMU
-	uint64_t freq;
-	asm volatile("mrs %0, cntfrq_el0" : "=r" (freq));
-	return freq;
+	return __rte_arm64_cntfrq();
+#elif defined RTE_ARCH_ARM64 && defined RTE_ARM_EAL_RDTSC_USE_PMU
+#define CYC_PER_1MHZ 1E6
+	/* Use the generic counter ticks to calculate the PMU
+	 * cycle frequency.
+	 */
+	uint64_t ticks;
+	uint64_t start_ticks, cur_ticks;
+	uint64_t start_pmu_cycles, end_pmu_cycles;
+
+	/* Number of ticks for 1/10 second */
+	ticks = __rte_arm64_cntfrq() / 10;
+
+	start_ticks = __rte_arm64_cntvct_precise();
+	start_pmu_cycles = rte_rdtsc_precise();
+	do {
+		cur_ticks = __rte_arm64_cntvct();
+	} while ((cur_ticks - start_ticks) < ticks);
+	end_pmu_cycles = rte_rdtsc_precise();
+
+	/* Adjust the cycles to next 1Mhz */
+	return RTE_ALIGN_MUL_CEIL(end_pmu_cycles - start_pmu_cycles,
+			CYC_PER_1MHZ) * 10;
 #else
 	return 0;
 #endif