[v5,08/11] build: optional NUMA and cpu counts detection

Message ID 1603893845-5736-9-git-send-email-juraj.linkes@pantheon.tech (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Arm build options rework |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Juraj Linkeš Oct. 28, 2020, 2:04 p.m. UTC
  Add an option to automatically discover the host's numa and cpu counts
and use those values for a non cross-build.
Give users the option to override the per-arch default values or values
from cross files by specifying them on the command line with -Dmax_lcores
and -Dmax_numa_nodes.

Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
---
 buildtools/get_cpu_count.py  |  7 ++++++
 buildtools/get_numa_count.py | 22 +++++++++++++++++
 buildtools/meson.build       |  2 ++
 config/meson.build           | 48 ++++++++++++++++++++++++++++++++++--
 config/x86/meson.build       |  2 ++
 meson_options.txt            |  8 +++---
 6 files changed, 83 insertions(+), 6 deletions(-)
 create mode 100644 buildtools/get_cpu_count.py
 create mode 100644 buildtools/get_numa_count.py
  

Comments

Bruce Richardson Oct. 28, 2020, 3:04 p.m. UTC | #1
On Wed, Oct 28, 2020 at 03:04:02PM +0100, Juraj Linkeš wrote:
> Add an option to automatically discover the host's numa and cpu counts
> and use those values for a non cross-build.
> Give users the option to override the per-arch default values or values
> from cross files by specifying them on the command line with -Dmax_lcores
> and -Dmax_numa_nodes.
> 
> Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> ---
>  buildtools/get_cpu_count.py  |  7 ++++++
>  buildtools/get_numa_count.py | 22 +++++++++++++++++
>  buildtools/meson.build       |  2 ++
>  config/meson.build           | 48 ++++++++++++++++++++++++++++++++++--
>  config/x86/meson.build       |  2 ++
>  meson_options.txt            |  8 +++---
>  6 files changed, 83 insertions(+), 6 deletions(-)
>  create mode 100644 buildtools/get_cpu_count.py
>  create mode 100644 buildtools/get_numa_count.py
> 
> diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
> new file mode 100644
> index 000000000..386f85f8b
> --- /dev/null
> +++ b/buildtools/get_cpu_count.py
> @@ -0,0 +1,7 @@
> +#!/usr/bin/python3

I think for DPDK python scripts we standardized on "/usr/bin/env python3"
[Someone please correct me if I'm wrong here!]

> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright (c) 2020 PANTHEON.tech s.r.o.
> +
> +import os
> +
> +print(os.cpu_count())
> diff --git a/buildtools/get_numa_count.py b/buildtools/get_numa_count.py
> new file mode 100644
> index 000000000..e7f3de6b0
> --- /dev/null
> +++ b/buildtools/get_numa_count.py
> @@ -0,0 +1,22 @@
> +#!/usr/bin/python3
> +# SPDX-License-Identifier: BSD-3-Clause
> +# Copyright (c) 2020 PANTHEON.tech s.r.o.
> +
> +import ctypes
> +import glob
> +import os
> +import subprocess
> +
> +if os.name == 'posix':
> +    if os.path.isdir('/sys/devices/system/node'):
> +        print(len(glob.glob('/sys/devices/system/node/node*')))
> +    else:
> +        subprocess.run(['sysctl', 'vm.ndomains'])
> +

I just realised that by default sysctl outputs the name of the element as
well as the value, which is not what we want. Using "-n" flag suppresses
the name, so please add that to the command. [Sorry for not spotting this
in previous versions].

> +elif os.name == 'nt':
> +    libkernel32 = ctypes.windll.kernel32
> +
> +    count = ctypes.c_ulong()
> +
> +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> +    print(count.value + 1)
> diff --git a/buildtools/meson.build b/buildtools/meson.build
> index 04808dabc..925e733b1 100644
> --- a/buildtools/meson.build
> +++ b/buildtools/meson.build
> @@ -17,3 +17,5 @@ else
>  endif
>  map_to_win_cmd = py3 + files('map_to_win.py')
>  sphinx_wrapper = py3 + files('call-sphinx-build.py')
> +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> +get_numa_count_cmd = py3 + files('get_numa_count.py')
> diff --git a/config/meson.build b/config/meson.build
> index c7f7aa6e2..a51000ab2 100644
> --- a/config/meson.build
> +++ b/config/meson.build
> @@ -231,8 +231,6 @@ foreach arg: warning_flags
>  endforeach
>  
>  # set other values pulled from the build options
> -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> -dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
>  dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
>  dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
>  dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
> @@ -251,6 +249,52 @@ compile_time_cpuflags = []
>  subdir(arch_subdir)
>  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS', ','.join(compile_time_cpuflags))
>  
> +max_lcores = get_option('max_lcores')
> +max_numa_nodes = get_option('max_numa_nodes')
> +if max_lcores > 0
> +	# Overwrite the default value from arch_subdir with user input
> +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> +elif max_lcores == -1
> +	# Overwrite the default value with discovered values
> +	if not meson.is_cross_build()

I'd suggest changing this to the inverse and erroring out, since I assume
that having -1 for cross-compilation should be an error?

	if meson.is_cross_build()
		error('....')
	endif

Thereafter you save a level of indentation for the rest of the block.

> +		# Discovery makes sense only for non-cross builds
> +		max_lcores = run_command(get_cpu_count_cmd).stdout().to_int()
> +		min_lcores = 2
> +		# DPDK must be build for at least 2 cores

Even 2 seems rather low. I'd personally look to set the minimum at 4.

> +		if max_lcores < min_lcores
> +			message('Found less than @0@ cores, building for @0@ cores'.format(min_lcores))
> +			max_lcores = min_lcores
> +		else
> +			message('Found @0@ cores'.format(max_lcores))
> +		endif
> +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> +	endif
> +endif
> +
> +if max_numa_nodes > 0
> +	# Overwrite the default value from arch_subdir with user input
> +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> +elif max_numa_nodes == -1
> +	# Overwrite the default value with discovered values
> +	if not meson.is_cross_build()
> +		# Discovery makes sense only for non-cross builds
> +		max_numa_nodes = run_command(get_numa_count_cmd).stdout().to_int()
> +		message('Found @0@ numa nodes'.format(max_numa_nodes))
> +		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> +	endif
> +endif
> +
> +# check that cpu and numa count is set in cross builds
> +# and error out if it's not set
> +if meson.is_cross_build()
> +	if not dpdk_conf.has('RTE_MAX_LCORE')
> +		error('Number of cores for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> +	endif
> +	if not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> +		error('Number of numa nodes for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> +	endif
> +endif
> +

Not sure you need the is_cross_build bit, since if we don't have
RTE_MAX_LCORES set at all, then it's an error too. It just so happens that
the code paths to this point prevent that from ever happening.

Is the intention here that the subdir meson.build files will just literally
take the value from the cross-file, or are you doing other processing in
the ARM case, e.g. to compute it based on other info. If it's just the
former where it's always read from the cross-files, I'd have the reading
done in this file (above the other assignment work), since it would be
architecture agnostic.

>  # set the install path for the drivers
>  dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
>  
<snip>
  
Juraj Linkeš Oct. 30, 2020, 11:04 a.m. UTC | #2
> -----Original Message-----
> From: Bruce Richardson <bruce.richardson@intel.com>
> Sent: Wednesday, October 28, 2020 4:04 PM
> To: Juraj Linkeš <juraj.linkes@pantheon.tech>
> Cc: Ruifeng.Wang@arm.com; Honnappa.Nagarahalli@arm.com;
> Phil.Yang@arm.com; vcchunga@amazon.com; Dharmik.Thakkar@arm.com;
> jerinjacobk@gmail.com; hemant.agrawal@nxp.com; dev@dpdk.org
> Subject: Re: [PATCH v5 08/11] build: optional NUMA and cpu counts detection
> 
> On Wed, Oct 28, 2020 at 03:04:02PM +0100, Juraj Linkeš wrote:
> > Add an option to automatically discover the host's numa and cpu counts
> > and use those values for a non cross-build.
> > Give users the option to override the per-arch default values or
> > values from cross files by specifying them on the command line with
> > -Dmax_lcores and -Dmax_numa_nodes.
> >
> > Signed-off-by: Juraj Linkeš <juraj.linkes@pantheon.tech>
> > ---
> >  buildtools/get_cpu_count.py  |  7 ++++++
> > buildtools/get_numa_count.py | 22 +++++++++++++++++
> >  buildtools/meson.build       |  2 ++
> >  config/meson.build           | 48 ++++++++++++++++++++++++++++++++++--
> >  config/x86/meson.build       |  2 ++
> >  meson_options.txt            |  8 +++---
> >  6 files changed, 83 insertions(+), 6 deletions(-)  create mode 100644
> > buildtools/get_cpu_count.py  create mode 100644
> > buildtools/get_numa_count.py
> >
> > diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
> > new file mode 100644 index 000000000..386f85f8b
> > --- /dev/null
> > +++ b/buildtools/get_cpu_count.py
> > @@ -0,0 +1,7 @@
> > +#!/usr/bin/python3
> 
> I think for DPDK python scripts we standardized on "/usr/bin/env python3"
> [Someone please correct me if I'm wrong here!]
> 

The other scripts are using it, so I'll change it.

> > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > +PANTHEON.tech s.r.o.
> > +
> > +import os
> > +
> > +print(os.cpu_count())
> > diff --git a/buildtools/get_numa_count.py
> > b/buildtools/get_numa_count.py new file mode 100644 index
> > 000000000..e7f3de6b0
> > --- /dev/null
> > +++ b/buildtools/get_numa_count.py
> > @@ -0,0 +1,22 @@
> > +#!/usr/bin/python3
> > +# SPDX-License-Identifier: BSD-3-Clause # Copyright (c) 2020
> > +PANTHEON.tech s.r.o.
> > +
> > +import ctypes
> > +import glob
> > +import os
> > +import subprocess
> > +
> > +if os.name == 'posix':
> > +    if os.path.isdir('/sys/devices/system/node'):
> > +        print(len(glob.glob('/sys/devices/system/node/node*')))
> > +    else:
> > +        subprocess.run(['sysctl', 'vm.ndomains'])
> > +
> 
> I just realised that by default sysctl outputs the name of the element as well as
> the value, which is not what we want. Using "-n" flag suppresses the name, so
> please add that to the command. [Sorry for not spotting this in previous
> versions].
> 

Ok, I'll add this. I verified how the output looks and we shouldn't need to change anything after this.

> > +elif os.name == 'nt':
> > +    libkernel32 = ctypes.windll.kernel32
> > +
> > +    count = ctypes.c_ulong()
> > +
> > +    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
> > +    print(count.value + 1)
> > diff --git a/buildtools/meson.build b/buildtools/meson.build index
> > 04808dabc..925e733b1 100644
> > --- a/buildtools/meson.build
> > +++ b/buildtools/meson.build
> > @@ -17,3 +17,5 @@ else
> >  endif
> >  map_to_win_cmd = py3 + files('map_to_win.py')  sphinx_wrapper = py3 +
> > files('call-sphinx-build.py')
> > +get_cpu_count_cmd = py3 + files('get_cpu_count.py')
> > +get_numa_count_cmd = py3 + files('get_numa_count.py')
> > diff --git a/config/meson.build b/config/meson.build index
> > c7f7aa6e2..a51000ab2 100644
> > --- a/config/meson.build
> > +++ b/config/meson.build
> > @@ -231,8 +231,6 @@ foreach arg: warning_flags  endforeach
> >
> >  # set other values pulled from the build options
> > -dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
> > -dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
> > dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
> > dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
> > dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp')) @@
> > -251,6 +249,52 @@ compile_time_cpuflags = []
> >  subdir(arch_subdir)
> >  dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS',
> > ','.join(compile_time_cpuflags))
> >
> > +max_lcores = get_option('max_lcores') max_numa_nodes =
> > +get_option('max_numa_nodes') if max_lcores > 0
> > +	# Overwrite the default value from arch_subdir with user input
> > +	dpdk_conf.set('RTE_MAX_LCORE', max_lcores) elif max_lcores == -1
> > +	# Overwrite the default value with discovered values
> > +	if not meson.is_cross_build()
> 
> I'd suggest changing this to the inverse and erroring out, since I assume that
> having -1 for cross-compilation should be an error?
> 
> 	if meson.is_cross_build()
> 		error('....')
> 	endif
> 
> Thereafter you save a level of indentation for the rest of the block.
> 

I thought of this in terms of cross-compilation taking precedence over discovered values, but erroring out makes sense.

> > +		# Discovery makes sense only for non-cross builds
> > +		max_lcores =
> run_command(get_cpu_count_cmd).stdout().to_int()
> > +		min_lcores = 2
> > +		# DPDK must be build for at least 2 cores
> 
> Even 2 seems rather low. I'd personally look to set the minimum at 4.
> 

This was informed by Honnappa's series: http://patches.dpdk.org/project/dpdk/list/?series=13041.
I believed he fixed the build so it works even with 2 cores. Setting it to lowest working number seems like the way to go to me.

> > +		if max_lcores < min_lcores
> > +			message('Found less than @0@ cores, building for
> @0@ cores'.format(min_lcores))
> > +			max_lcores = min_lcores
> > +		else
> > +			message('Found @0@ cores'.format(max_lcores))
> > +		endif
> > +		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
> > +	endif
> > +endif
> > +
> > +if max_numa_nodes > 0
> > +	# Overwrite the default value from arch_subdir with user input
> > +	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes) elif
> > +max_numa_nodes == -1
> > +	# Overwrite the default value with discovered values
> > +	if not meson.is_cross_build()
> > +		# Discovery makes sense only for non-cross builds
> > +		max_numa_nodes =
> run_command(get_numa_count_cmd).stdout().to_int()
> > +		message('Found @0@ numa nodes'.format(max_numa_nodes))
> > +		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
> > +	endif
> > +endif
> > +
> > +# check that cpu and numa count is set in cross builds # and error
> > +out if it's not set if meson.is_cross_build()
> > +	if not dpdk_conf.has('RTE_MAX_LCORE')
> > +		error('Number of cores for cross build not specified in @0@
> subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > +	endif
> > +	if not dpdk_conf.has('RTE_MAX_NUMA_NODES')
> > +		error('Number of numa nodes for cross build not specified in
> @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
> > +	endif
> > +endif
> > +
> 
> Not sure you need the is_cross_build bit, since if we don't have
> RTE_MAX_LCORES set at all, then it's an error too. It just so happens that the
> code paths to this point prevent that from ever happening.
> 

Ok, makes sense to do a generic check here.

> Is the intention here that the subdir meson.build files will just literally take the
> value from the cross-file, or are you doing other processing in the ARM case,
> e.g. to compute it based on other info. If it's just the former where it's always
> read from the cross-files, I'd have the reading done in this file (above the other
> assignment work), since it would be architecture agnostic.
> 

Good point, we're not doing anything special, so all configuration from cross files should be processed this way.

> >  # set the install path for the drivers
> > dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
> >
> <snip>
  

Patch

diff --git a/buildtools/get_cpu_count.py b/buildtools/get_cpu_count.py
new file mode 100644
index 000000000..386f85f8b
--- /dev/null
+++ b/buildtools/get_cpu_count.py
@@ -0,0 +1,7 @@ 
+#!/usr/bin/python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2020 PANTHEON.tech s.r.o.
+
+import os
+
+print(os.cpu_count())
diff --git a/buildtools/get_numa_count.py b/buildtools/get_numa_count.py
new file mode 100644
index 000000000..e7f3de6b0
--- /dev/null
+++ b/buildtools/get_numa_count.py
@@ -0,0 +1,22 @@ 
+#!/usr/bin/python3
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright (c) 2020 PANTHEON.tech s.r.o.
+
+import ctypes
+import glob
+import os
+import subprocess
+
+if os.name == 'posix':
+    if os.path.isdir('/sys/devices/system/node'):
+        print(len(glob.glob('/sys/devices/system/node/node*')))
+    else:
+        subprocess.run(['sysctl', 'vm.ndomains'])
+
+elif os.name == 'nt':
+    libkernel32 = ctypes.windll.kernel32
+
+    count = ctypes.c_ulong()
+
+    libkernel32.GetNumaHighestNodeNumber(ctypes.pointer(count))
+    print(count.value + 1)
diff --git a/buildtools/meson.build b/buildtools/meson.build
index 04808dabc..925e733b1 100644
--- a/buildtools/meson.build
+++ b/buildtools/meson.build
@@ -17,3 +17,5 @@  else
 endif
 map_to_win_cmd = py3 + files('map_to_win.py')
 sphinx_wrapper = py3 + files('call-sphinx-build.py')
+get_cpu_count_cmd = py3 + files('get_cpu_count.py')
+get_numa_count_cmd = py3 + files('get_numa_count.py')
diff --git a/config/meson.build b/config/meson.build
index c7f7aa6e2..a51000ab2 100644
--- a/config/meson.build
+++ b/config/meson.build
@@ -231,8 +231,6 @@  foreach arg: warning_flags
 endforeach
 
 # set other values pulled from the build options
-dpdk_conf.set('RTE_MAX_LCORE', get_option('max_lcores'))
-dpdk_conf.set('RTE_MAX_NUMA_NODES', get_option('max_numa_nodes'))
 dpdk_conf.set('RTE_MAX_ETHPORTS', get_option('max_ethports'))
 dpdk_conf.set('RTE_LIBEAL_USE_HPET', get_option('use_hpet'))
 dpdk_conf.set('RTE_ENABLE_TRACE_FP', get_option('enable_trace_fp'))
@@ -251,6 +249,52 @@  compile_time_cpuflags = []
 subdir(arch_subdir)
 dpdk_conf.set('RTE_COMPILE_TIME_CPUFLAGS', ','.join(compile_time_cpuflags))
 
+max_lcores = get_option('max_lcores')
+max_numa_nodes = get_option('max_numa_nodes')
+if max_lcores > 0
+	# Overwrite the default value from arch_subdir with user input
+	dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
+elif max_lcores == -1
+	# Overwrite the default value with discovered values
+	if not meson.is_cross_build()
+		# Discovery makes sense only for non-cross builds
+		max_lcores = run_command(get_cpu_count_cmd).stdout().to_int()
+		min_lcores = 2
+		# DPDK must be build for at least 2 cores
+		if max_lcores < min_lcores
+			message('Found less than @0@ cores, building for @0@ cores'.format(min_lcores))
+			max_lcores = min_lcores
+		else
+			message('Found @0@ cores'.format(max_lcores))
+		endif
+		dpdk_conf.set('RTE_MAX_LCORE', max_lcores)
+	endif
+endif
+
+if max_numa_nodes > 0
+	# Overwrite the default value from arch_subdir with user input
+	dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
+elif max_numa_nodes == -1
+	# Overwrite the default value with discovered values
+	if not meson.is_cross_build()
+		# Discovery makes sense only for non-cross builds
+		max_numa_nodes = run_command(get_numa_count_cmd).stdout().to_int()
+		message('Found @0@ numa nodes'.format(max_numa_nodes))
+		dpdk_conf.set('RTE_MAX_NUMA_NODES', max_numa_nodes)
+	endif
+endif
+
+# check that cpu and numa count is set in cross builds
+# and error out if it's not set
+if meson.is_cross_build()
+	if not dpdk_conf.has('RTE_MAX_LCORE')
+		error('Number of cores for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
+	endif
+	if not dpdk_conf.has('RTE_MAX_NUMA_NODES')
+		error('Number of numa nodes for cross build not specified in @0@ subdir (e.g. in a cross-file) nor on the cmdline'.format(arch_subdir))
+	endif
+endif
+
 # set the install path for the drivers
 dpdk_conf.set_quoted('RTE_EAL_PMD_PATH', eal_pmd_path)
 
diff --git a/config/x86/meson.build b/config/x86/meson.build
index 31bfa63b1..4989d47f3 100644
--- a/config/x86/meson.build
+++ b/config/x86/meson.build
@@ -57,3 +57,5 @@  else
 endif
 
 dpdk_conf.set('RTE_CACHE_LINE_SIZE', 64)
+dpdk_conf.set('RTE_MAX_LCORE', 128)
+dpdk_conf.set('RTE_MAX_NUMA_NODES', 4)
diff --git a/meson_options.txt b/meson_options.txt
index 9bf18ab6b..a78c21b0d 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -26,10 +26,10 @@  option('machine', type: 'string', value: 'native',
 	description: 'set the target machine type')
 option('max_ethports', type: 'integer', value: 32,
 	description: 'maximum number of Ethernet devices')
-option('max_lcores', type: 'integer', value: 128,
-	description: 'maximum number of cores/threads supported by EAL')
-option('max_numa_nodes', type: 'integer', value: 4,
-	description: 'maximum number of NUMA nodes supported by EAL')
+option('max_lcores', type: 'integer', value: 0,
+	description: 'maximum number of cores/threads supported by EAL. Set to positive integer to overwrite per-arch or cross-compilation defaults. Set to -1 to use number of cores on the build machine.')
+option('max_numa_nodes', type: 'integer', value: 0,
+	description: 'maximum number of NUMA nodes supported by EAL. Set to positive integer to overwrite per-arch or cross-compilation defaults. Set to -1 to use number of numa nodes on the build machine.')
 option('enable_trace_fp', type: 'boolean', value: false,
 	description: 'enable fast path trace points.')
 option('tests', type: 'boolean', value: true,