[v3,2/2] timer/linux/x86: override TSC freq if no tsc_known_freq
Checks
Commit Message
If the tsc_known_freq cpu flag is missing, it means the kernel doesn't
trust it and calculates its own. We should do the same to avoid drift.
Signed-off-by: Isaac Boukris <iboukris@gmail.com>
---
lib/eal/common/eal_common_timer.c | 3 +-
lib/eal/common/eal_private.h | 2 +-
lib/eal/freebsd/eal_timer.c | 5 ++-
lib/eal/linux/eal_timer.c | 53 +++++++++++++++++++++++++++++--
lib/eal/windows/eal_timer.c | 5 ++-
5 files changed, 60 insertions(+), 8 deletions(-)
Comments
On Tue, 1 Oct 2024 03:22:51 +0300
Isaac Boukris <iboukris@gmail.com> wrote:
> diff --git a/lib/eal/windows/eal_timer.c b/lib/eal/windows/eal_timer.c
> index b070cb7751..cfd6c267ac 100644
> --- a/lib/eal/windows/eal_timer.c
> +++ b/lib/eal/windows/eal_timer.c
> @@ -49,13 +49,16 @@ rte_delay_us_sleep(unsigned int us)
> }
>
> uint64_t
> -get_tsc_freq(void)
> +get_tsc_freq(uint64_t arch_hz)
> {
> LARGE_INTEGER t_start, t_end, elapsed_us;
> LARGE_INTEGER frequency;
> uint64_t tsc_hz;
> uint64_t end, start;
>
> + if (arch_hz)
> + return arch_hz;
> +
> QueryPerformanceFrequency(&frequency);
>
> QueryPerformanceCounter(&t_start);
> --
On Windows, I would not use arch_hz at all, since it is opaque how
the Windows kernel determines the frequency, and best not to get
skew.
On Tue, Oct 01, 2024 at 03:22:51AM +0300, Isaac Boukris wrote:
> If the tsc_known_freq cpu flag is missing, it means the kernel doesn't
> trust it and calculates its own. We should do the same to avoid drift.
>
> Signed-off-by: Isaac Boukris <iboukris@gmail.com>
> ---
> lib/eal/common/eal_common_timer.c | 3 +-
> lib/eal/common/eal_private.h | 2 +-
> lib/eal/freebsd/eal_timer.c | 5 ++-
> lib/eal/linux/eal_timer.c | 53 +++++++++++++++++++++++++++++--
> lib/eal/windows/eal_timer.c | 5 ++-
> 5 files changed, 60 insertions(+), 8 deletions(-)
>
> diff --git a/lib/eal/common/eal_common_timer.c b/lib/eal/common/eal_common_timer.c
> index c5c4703f15..e00be0a5c8 100644
> --- a/lib/eal/common/eal_common_timer.c
> +++ b/lib/eal/common/eal_common_timer.c
> @@ -66,8 +66,7 @@ set_tsc_freq(void)
> }
>
> freq = get_tsc_freq_arch();
> - if (!freq)
> - freq = get_tsc_freq();
> + freq = get_tsc_freq(freq);
> if (!freq)
> freq = estimate_tsc_freq();
>
> diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
> index af09620426..bb315dab04 100644
> --- a/lib/eal/common/eal_private.h
> +++ b/lib/eal/common/eal_private.h
> @@ -374,7 +374,7 @@ void set_tsc_freq(void);
> *
> * This function is private to the EAL.
> */
> -uint64_t get_tsc_freq(void);
> +uint64_t get_tsc_freq(uint64_t arch_hz);
>
> /**
> * Get TSC frequency if the architecture supports.
> diff --git a/lib/eal/freebsd/eal_timer.c b/lib/eal/freebsd/eal_timer.c
> index 3dd70e24ba..5a8aea03e1 100644
> --- a/lib/eal/freebsd/eal_timer.c
> +++ b/lib/eal/freebsd/eal_timer.c
> @@ -26,12 +26,15 @@
> enum timer_source eal_timer_source = EAL_TIMER_TSC;
>
> uint64_t
> -get_tsc_freq(void)
> +get_tsc_freq(uint64_t arch_hz)
> {
> size_t sz;
> int tmp;
> uint64_t tsc_hz;
>
> + if (arch_hz)
> + return arch_hz;
> +
> sz = sizeof(tmp);
> tmp = 0;
>
On FreeBSD I'm not sure this is the best behaviour. On BSD we read the TSC
value from the kernel, which, one assumes, has measured it accurately.
Therefore I'd tend toward just using the kernel value in all cases, maybe
check the arch value (if non-zero) against that and warning if they have
significant divergence. WDYT?
/Bruce
On Tue, Oct 1, 2024 at 6:22 PM Stephen Hemminger
<stephen@networkplumber.org> wrote:
>
> On Tue, 1 Oct 2024 03:22:51 +0300
> Isaac Boukris <iboukris@gmail.com> wrote:
>
> > diff --git a/lib/eal/windows/eal_timer.c b/lib/eal/windows/eal_timer.c
> > index b070cb7751..cfd6c267ac 100644
> > --- a/lib/eal/windows/eal_timer.c
> > +++ b/lib/eal/windows/eal_timer.c
> > @@ -49,13 +49,16 @@ rte_delay_us_sleep(unsigned int us)
> > }
> >
> > uint64_t
> > -get_tsc_freq(void)
> > +get_tsc_freq(uint64_t arch_hz)
> > {
> > LARGE_INTEGER t_start, t_end, elapsed_us;
> > LARGE_INTEGER frequency;
> > uint64_t tsc_hz;
> > uint64_t end, start;
> >
> > + if (arch_hz)
> > + return arch_hz;
> > +
> > QueryPerformanceFrequency(&frequency);
> >
> > QueryPerformanceCounter(&t_start);
> > --
>
> On Windows, I would not use arch_hz at all, since it is opaque how
> the Windows kernel determines the frequency, and best not to get
> skew.
Not sure I follow, currently the patch doesn't change the behavior for
Windows, and the Windows code is quite similar to the Linux one,
should we always prefer the measured value in Windows?
On the other hand, I think I should update the rounding commit to also
change the Windows code, there is no sense in 10MHz rounding there
either.
On Tue, Oct 1, 2024 at 11:01 PM Bruce Richardson
<bruce.richardson@intel.com> wrote:
>
> On Tue, Oct 01, 2024 at 03:22:51AM +0300, Isaac Boukris wrote:
> > If the tsc_known_freq cpu flag is missing, it means the kernel doesn't
> > trust it and calculates its own. We should do the same to avoid drift.
> >
> > Signed-off-by: Isaac Boukris <iboukris@gmail.com>
> > ---
> > lib/eal/common/eal_common_timer.c | 3 +-
> > lib/eal/common/eal_private.h | 2 +-
> > lib/eal/freebsd/eal_timer.c | 5 ++-
> > lib/eal/linux/eal_timer.c | 53 +++++++++++++++++++++++++++++--
> > lib/eal/windows/eal_timer.c | 5 ++-
> > 5 files changed, 60 insertions(+), 8 deletions(-)
> >
> > diff --git a/lib/eal/common/eal_common_timer.c b/lib/eal/common/eal_common_timer.c
> > index c5c4703f15..e00be0a5c8 100644
> > --- a/lib/eal/common/eal_common_timer.c
> > +++ b/lib/eal/common/eal_common_timer.c
> > @@ -66,8 +66,7 @@ set_tsc_freq(void)
> > }
> >
> > freq = get_tsc_freq_arch();
> > - if (!freq)
> > - freq = get_tsc_freq();
> > + freq = get_tsc_freq(freq);
> > if (!freq)
> > freq = estimate_tsc_freq();
> >
> > diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
> > index af09620426..bb315dab04 100644
> > --- a/lib/eal/common/eal_private.h
> > +++ b/lib/eal/common/eal_private.h
> > @@ -374,7 +374,7 @@ void set_tsc_freq(void);
> > *
> > * This function is private to the EAL.
> > */
> > -uint64_t get_tsc_freq(void);
> > +uint64_t get_tsc_freq(uint64_t arch_hz);
> >
> > /**
> > * Get TSC frequency if the architecture supports.
> > diff --git a/lib/eal/freebsd/eal_timer.c b/lib/eal/freebsd/eal_timer.c
> > index 3dd70e24ba..5a8aea03e1 100644
> > --- a/lib/eal/freebsd/eal_timer.c
> > +++ b/lib/eal/freebsd/eal_timer.c
> > @@ -26,12 +26,15 @@
> > enum timer_source eal_timer_source = EAL_TIMER_TSC;
> >
> > uint64_t
> > -get_tsc_freq(void)
> > +get_tsc_freq(uint64_t arch_hz)
> > {
> > size_t sz;
> > int tmp;
> > uint64_t tsc_hz;
> >
> > + if (arch_hz)
> > + return arch_hz;
> > +
> > sz = sizeof(tmp);
> > tmp = 0;
> >
>
> On FreeBSD I'm not sure this is the best behaviour. On BSD we read the TSC
> value from the kernel, which, one assumes, has measured it accurately.
> Therefore I'd tend toward just using the kernel value in all cases, maybe
> check the arch value (if non-zero) against that and warning if they have
> significant divergence. WDYT?
Makes sense, I'll add a patch for that. We could also use the arch
value if for some reason the sysctlbyname() failed.
On Wed, Oct 02, 2024 at 12:59:58AM +0300, Isaac Boukris wrote:
> On Tue, Oct 1, 2024 at 11:01 PM Bruce Richardson
> <bruce.richardson@intel.com> wrote:
> >
> > On Tue, Oct 01, 2024 at 03:22:51AM +0300, Isaac Boukris wrote:
> > > If the tsc_known_freq cpu flag is missing, it means the kernel doesn't
> > > trust it and calculates its own. We should do the same to avoid drift.
> > >
> > > Signed-off-by: Isaac Boukris <iboukris@gmail.com>
> > > ---
> > > lib/eal/common/eal_common_timer.c | 3 +-
> > > lib/eal/common/eal_private.h | 2 +-
> > > lib/eal/freebsd/eal_timer.c | 5 ++-
> > > lib/eal/linux/eal_timer.c | 53 +++++++++++++++++++++++++++++--
> > > lib/eal/windows/eal_timer.c | 5 ++-
> > > 5 files changed, 60 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/lib/eal/common/eal_common_timer.c b/lib/eal/common/eal_common_timer.c
> > > index c5c4703f15..e00be0a5c8 100644
> > > --- a/lib/eal/common/eal_common_timer.c
> > > +++ b/lib/eal/common/eal_common_timer.c
> > > @@ -66,8 +66,7 @@ set_tsc_freq(void)
> > > }
> > >
> > > freq = get_tsc_freq_arch();
> > > - if (!freq)
> > > - freq = get_tsc_freq();
> > > + freq = get_tsc_freq(freq);
> > > if (!freq)
> > > freq = estimate_tsc_freq();
> > >
> > > diff --git a/lib/eal/common/eal_private.h b/lib/eal/common/eal_private.h
> > > index af09620426..bb315dab04 100644
> > > --- a/lib/eal/common/eal_private.h
> > > +++ b/lib/eal/common/eal_private.h
> > > @@ -374,7 +374,7 @@ void set_tsc_freq(void);
> > > *
> > > * This function is private to the EAL.
> > > */
> > > -uint64_t get_tsc_freq(void);
> > > +uint64_t get_tsc_freq(uint64_t arch_hz);
> > >
> > > /**
> > > * Get TSC frequency if the architecture supports.
> > > diff --git a/lib/eal/freebsd/eal_timer.c b/lib/eal/freebsd/eal_timer.c
> > > index 3dd70e24ba..5a8aea03e1 100644
> > > --- a/lib/eal/freebsd/eal_timer.c
> > > +++ b/lib/eal/freebsd/eal_timer.c
> > > @@ -26,12 +26,15 @@
> > > enum timer_source eal_timer_source = EAL_TIMER_TSC;
> > >
> > > uint64_t
> > > -get_tsc_freq(void)
> > > +get_tsc_freq(uint64_t arch_hz)
> > > {
> > > size_t sz;
> > > int tmp;
> > > uint64_t tsc_hz;
> > >
> > > + if (arch_hz)
> > > + return arch_hz;
> > > +
> > > sz = sizeof(tmp);
> > > tmp = 0;
> > >
> >
> > On FreeBSD I'm not sure this is the best behaviour. On BSD we read the TSC
> > value from the kernel, which, one assumes, has measured it accurately.
> > Therefore I'd tend toward just using the kernel value in all cases, maybe
> > check the arch value (if non-zero) against that and warning if they have
> > significant divergence. WDYT?
>
> Makes sense, I'll add a patch for that. We could also use the arch
> value if for some reason the sysctlbyname() failed.
Yep, +1 to that.
/Bruce
@@ -66,8 +66,7 @@ set_tsc_freq(void)
}
freq = get_tsc_freq_arch();
- if (!freq)
- freq = get_tsc_freq();
+ freq = get_tsc_freq(freq);
if (!freq)
freq = estimate_tsc_freq();
@@ -374,7 +374,7 @@ void set_tsc_freq(void);
*
* This function is private to the EAL.
*/
-uint64_t get_tsc_freq(void);
+uint64_t get_tsc_freq(uint64_t arch_hz);
/**
* Get TSC frequency if the architecture supports.
@@ -26,12 +26,15 @@
enum timer_source eal_timer_source = EAL_TIMER_TSC;
uint64_t
-get_tsc_freq(void)
+get_tsc_freq(uint64_t arch_hz)
{
size_t sz;
int tmp;
uint64_t tsc_hz;
+ if (arch_hz)
+ return arch_hz;
+
sz = sizeof(tmp);
tmp = 0;
@@ -5,9 +5,9 @@
#include <stdio.h>
#include <stdint.h>
+#include <inttypes.h>
#ifdef RTE_LIBEAL_USE_HPET
#include <fcntl.h>
-#include <inttypes.h>
#include <sys/mman.h>
#include <unistd.h>
#endif
@@ -187,8 +187,41 @@ rte_eal_hpet_init(int make_default)
}
#endif
+/* Check if the kernel deems the arch provided TSC frequency trustworthy. */
+
+static bool
+is_tsc_known_freq(void)
+{
+ bool ret = true; /* Assume tsc_known_freq */
+
+#if defined(RTE_ARCH_X86)
+ char line[2048];
+ FILE *stream;
+
+ stream = fopen("/proc/cpuinfo", "r");
+ if (!stream) {
+ EAL_LOG(WARNING, "Unable to open /proc/cpuinfo");
+ return ret;
+ }
+
+ while (fgets(line, sizeof(line), stream)) {
+ if (strncmp(line, "flags", 5) != 0)
+ continue;
+
+ if (!strstr(line, "tsc_known_freq"))
+ ret = false;
+
+ break;
+ }
+
+ fclose(stream);
+#endif
+
+ return ret;
+}
+
uint64_t
-get_tsc_freq(void)
+get_tsc_freq(uint64_t arch_hz)
{
#ifdef CLOCK_MONOTONIC_RAW
#define NS_PER_SEC 1E9
@@ -199,6 +232,9 @@ get_tsc_freq(void)
struct timespec t_start, t_end;
uint64_t tsc_hz;
+ if (arch_hz && is_tsc_known_freq())
+ return arch_hz;
+
if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) {
uint64_t ns, end, start = rte_rdtsc();
nanosleep(&sleeptime,NULL);
@@ -209,11 +245,22 @@ get_tsc_freq(void)
double secs = (double)ns/NS_PER_SEC;
tsc_hz = (uint64_t)((end - start)/secs);
+
+ if (arch_hz) {
+ /* Make sure we're within 1% for sanity check */
+ if (arch_hz - tsc_hz > arch_hz / 100)
+ return arch_hz;
+
+ EAL_LOG(DEBUG,
+ "Refined arch frequency %"PRIu64" to measured frequency %"PRIu64,
+ arch_hz, tsc_hz);
+ }
+
/* Round up to 100Khz. 1E5 ~ 100Khz */
return RTE_ALIGN_MUL_NEAR(tsc_hz, CYC_PER_100KHZ);
}
#endif
- return 0;
+ return arch_hz;
}
int
@@ -49,13 +49,16 @@ rte_delay_us_sleep(unsigned int us)
}
uint64_t
-get_tsc_freq(void)
+get_tsc_freq(uint64_t arch_hz)
{
LARGE_INTEGER t_start, t_end, elapsed_us;
LARGE_INTEGER frequency;
uint64_t tsc_hz;
uint64_t end, start;
+ if (arch_hz)
+ return arch_hz;
+
QueryPerformanceFrequency(&frequency);
QueryPerformanceCounter(&t_start);