[v13,09/10] eal: add EAL argument for setting thread priority

Message ID 1628017291-3756-10-git-send-email-navasile@linux.microsoft.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers
Series eal: Add EAL API for threading |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Narcisa Ana Maria Vasile Aug. 3, 2021, 7:01 p.m. UTC
From: Narcisa Vasile <navasile@microsoft.com>

Allow the user to choose the thread priority through an EAL
command line argument.

The user can choose thread priority through an EAL parameter,
when starting an application.  If EAL parameter is not used,
the per-platform default value for thread priority is used.
Otherwise administrator has an option to set one of available options:
 --thread-prio normal
 --thread-prio realtime

 Example:
./dpdk-l2fwd -l 0-3 -n 4 --thread-prio normal -- -q 8 -p ffff

Signed-off-by: Narcisa Vasile <navasile@microsoft.com>
---
 lib/eal/common/eal_common_options.c | 28 +++++++++++++++++++++++++++-
 lib/eal/common/eal_internal_cfg.h   |  2 ++
 lib/eal/common/eal_options.h        |  2 ++
 3 files changed, 31 insertions(+), 1 deletion(-)
  

Comments

Dmitry Kozlyuk Aug. 15, 2021, 7:56 p.m. UTC | #1
2021-08-03 12:01 (UTC-0700), Narcisa Ana Maria Vasile:
> From: Narcisa Vasile <navasile@microsoft.com>
> 
> Allow the user to choose the thread priority through an EAL
> command line argument.

EAL options documentation update is needed.
With current wording the user may ask: the priority of which thread?

> The user can choose thread priority through an EAL parameter,
> when starting an application.  If EAL parameter is not used,
> the per-platform default value for thread priority is used.
> Otherwise administrator has an option to set one of available options:
>  --thread-prio normal
>  --thread-prio realtime
> 
>  Example:
> ./dpdk-l2fwd -l 0-3 -n 4 --thread-prio normal -- -q 8 -p ffff

IIUC, with this patch --thread-prio affects nothing,
because the value it fills is not used anywhere.
Why is it needed in this patchset then?

> 
> Signed-off-by: Narcisa Vasile <navasile@microsoft.com>
> ---
>  lib/eal/common/eal_common_options.c | 28 +++++++++++++++++++++++++++-
>  lib/eal/common/eal_internal_cfg.h   |  2 ++
>  lib/eal/common/eal_options.h        |  2 ++
>  3 files changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
> index ff5861b5f3..9d29696b84 100644
> --- a/lib/eal/common/eal_common_options.c
> +++ b/lib/eal/common/eal_common_options.c
> @@ -107,6 +107,7 @@ eal_long_options[] = {
>  	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
>  	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
>  	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
> +	{OPT_THREAD_PRIORITY,   1, NULL, OPT_THREAD_PRIORITY_NUM},
>  
>  	/* legacy options that will be removed in future */
>  	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
> @@ -1412,6 +1413,24 @@ eal_parse_simd_bitwidth(const char *arg)
>  	return 0;
>  }
>  
> +static int
> +eal_parse_thread_priority(const char *arg)
> +{
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	enum rte_thread_priority priority;
> +
> +	if (!strncmp("normal", arg, sizeof("normal")))
> +		priority = RTE_THREAD_PRIORITY_NORMAL;
> +	else if (!strncmp("realtime", arg, sizeof("realtime")))
> +		priority = RTE_THREAD_PRIORITY_REALTIME_CRITICAL;
> +	else
> +		return -1;

Why not just `strcmp()`?

> +
> +	internal_conf->thread_priority = priority;
> +	return 0;
> +}
> +
>  static int
>  eal_parse_base_virtaddr(const char *arg)
>  {
> @@ -1825,7 +1844,13 @@ eal_parse_common_option(int opt, const char *optarg,
>  			return -1;
>  		}
>  		break;
> -
> +	case OPT_THREAD_PRIORITY_NUM:
> +		if (eal_parse_thread_priority(optarg) < 0) {
> +			RTE_LOG(ERR, EAL, "invalid parameter for --"
> +					OPT_THREAD_PRIORITY "\n");
> +			return -1;
> +		}
> +		break;
>  	/* don't know what to do, leave this to caller */
>  	default:
>  		return 1;
> @@ -2088,6 +2113,7 @@ eal_common_usage(void)
>  	       "                      (can be used multiple times)\n"
>  	       "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
>  	       "  --"OPT_PROC_TYPE"         Type of this process (primary|secondary|auto)\n"
> +	       "  --"OPT_THREAD_PRIORITY"   Set threads priority (normal|realtime)\n"
>  #ifndef RTE_EXEC_ENV_WINDOWS
>  	       "  --"OPT_SYSLOG"            Set syslog facility\n"
>  #endif
> diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
> index d6c0470eb8..b2996cd65b 100644
> --- a/lib/eal/common/eal_internal_cfg.h
> +++ b/lib/eal/common/eal_internal_cfg.h
> @@ -94,6 +94,8 @@ struct internal_config {
>  	unsigned int no_telemetry; /**< true to disable Telemetry */
>  	struct simd_bitwidth max_simd_bitwidth;
>  	/**< max simd bitwidth path to use */
> +	enum rte_thread_priority thread_priority;
> +	/**< thread priority to configure */

Where is the default set?
If you remove RTE_THREAD_PRIORITY_UNDEFINED in patch 2/10,
it will be RTE_THREAD_PRIORITY_NORMAL in zeroed-out structure, which is OK.

>  };
>  
>  void eal_reset_internal_config(struct internal_config *internal_cfg);
> diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
> index 7b348e707f..9f5b209f64 100644
> --- a/lib/eal/common/eal_options.h
> +++ b/lib/eal/common/eal_options.h
> @@ -93,6 +93,8 @@ enum {
>  	OPT_NO_TELEMETRY_NUM,
>  #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
>  	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
> +#define OPT_THREAD_PRIORITY          "thread-prio"
> +	OPT_THREAD_PRIORITY_NUM,
>  
>  	/* legacy option that will be removed in future */
>  #define OPT_PCI_BLACKLIST     "pci-blacklist"
  
Stephen Hemminger Aug. 18, 2021, 9:28 p.m. UTC | #2
On Tue,  3 Aug 2021 12:01:30 -0700
Narcisa Ana Maria Vasile <navasile@linux.microsoft.com> wrote:

> +static int
> +eal_parse_thread_priority(const char *arg)
> +{
> +	struct internal_config *internal_conf =
> +		eal_get_internal_configuration();
> +	enum rte_thread_priority priority;
> +
> +	if (!strncmp("normal", arg, sizeof("normal")))
> +		priority = RTE_THREAD_PRIORITY_NORMAL;
> +	else if (!strncmp("realtime", arg, sizeof("realtime")))
> +		priority = RTE_THREAD_PRIORITY_REALTIME_CRITICAL;
> +	else
> +		return -1;
> +
> +	internal_conf->thread_priority = priority;
> +	return 0;
> +}
> +

In my experience using real time priority is dangerous and risks starvation
and deadlock. The problem is that DPDK applications are typically 100% CPU
poll mode with no system calls; but the kernel has a number of worker threads
that can be required on those CPUs.

The typical failure is a disk completion interrupt happens on a CPU that
is being used by DPDK lcore thread. With RT priority, the kernel thread to
process that I/O completion never runs because the RT user thread has
higher priority than the kernel I/O completion thread.

It maybe possible to workaround this with lots of hand crafting through
sysfs to reassign threads and irq's. Also, later kernels with full RT
might also help.

Before putting this in as part of DPDK in EAL, a full set of testing
and documentation of how to configure these kind of applications and
systems is needed.

I can't recommend this going in at this time.
  
Bruce Richardson Aug. 19, 2021, 9:06 a.m. UTC | #3
On Wed, Aug 18, 2021 at 02:28:33PM -0700, Stephen Hemminger wrote:
> On Tue,  3 Aug 2021 12:01:30 -0700 Narcisa Ana Maria Vasile
> <navasile@linux.microsoft.com> wrote:
> 
> > +static int +eal_parse_thread_priority(const char *arg) +{ +
> > struct internal_config *internal_conf = +
> > eal_get_internal_configuration(); +	enum rte_thread_priority priority;
> > + +	if (!strncmp("normal", arg, sizeof("normal"))) +
> > priority = RTE_THREAD_PRIORITY_NORMAL; +	else if
> > (!strncmp("realtime", arg, sizeof("realtime"))) +		priority =
> > RTE_THREAD_PRIORITY_REALTIME_CRITICAL; +	else +		return -1;
> > + +	internal_conf->thread_priority = priority; +	return 0; +} +
> 
> In my experience using real time priority is dangerous and risks
> starvation and deadlock. The problem is that DPDK applications are
> typically 100% CPU poll mode with no system calls; but the kernel has a
> number of worker threads that can be required on those CPUs.
> 
> The typical failure is a disk completion interrupt happens on a CPU that
> is being used by DPDK lcore thread. With RT priority, the kernel thread
> to process that I/O completion never runs because the RT user thread has
> higher priority than the kernel I/O completion thread.
> 
> It maybe possible to workaround this with lots of hand crafting through
> sysfs to reassign threads and irq's. Also, later kernels with full RT
> might also help.
> 
> Before putting this in as part of DPDK in EAL, a full set of testing and
> documentation of how to configure these kind of applications and systems
> is needed.
>
I would tend to agree caution here, based on my experience of having locked
up a number of systems in the past when testing running DPDK apps with RT
priority!
  
Narcisa Ana Maria Vasile Aug. 19, 2021, 9:30 p.m. UTC | #4
On Thu, Aug 19, 2021 at 10:06:06AM +0100, Bruce Richardson wrote:
> On Wed, Aug 18, 2021 at 02:28:33PM -0700, Stephen Hemminger wrote:
> > On Tue,  3 Aug 2021 12:01:30 -0700 Narcisa Ana Maria Vasile
> > <navasile@linux.microsoft.com> wrote:
> > 
> > > +static int +eal_parse_thread_priority(const char *arg) +{ +
> > > struct internal_config *internal_conf = +
> > > eal_get_internal_configuration(); +	enum rte_thread_priority priority;
> > > + +	if (!strncmp("normal", arg, sizeof("normal"))) +
> > > priority = RTE_THREAD_PRIORITY_NORMAL; +	else if
> > > (!strncmp("realtime", arg, sizeof("realtime"))) +		priority =
> > > RTE_THREAD_PRIORITY_REALTIME_CRITICAL; +	else +		return -1;
> > > + +	internal_conf->thread_priority = priority; +	return 0; +} +
> > 
> > In my experience using real time priority is dangerous and risks
> > starvation and deadlock. The problem is that DPDK applications are
> > typically 100% CPU poll mode with no system calls; but the kernel has a
> > number of worker threads that can be required on those CPUs.
> > 
> > The typical failure is a disk completion interrupt happens on a CPU that
> > is being used by DPDK lcore thread. With RT priority, the kernel thread
> > to process that I/O completion never runs because the RT user thread has
> > higher priority than the kernel I/O completion thread.
> > 
> > It maybe possible to workaround this with lots of hand crafting through
> > sysfs to reassign threads and irq's. Also, later kernels with full RT
> > might also help.
> > 
> > Before putting this in as part of DPDK in EAL, a full set of testing and
> > documentation of how to configure these kind of applications and systems
> > is needed.
> >
> I would tend to agree caution here, based on my experience of having locked
> up a number of systems in the past when testing running DPDK apps with RT
> priority!

Thank you for the comments! I've added this option since it was requested by
multiple users. I understand RT priority causes issues on Linux platforms.
On Windows we want to be able to use REALTIME priority in certain scenarios.

Would it be acceptable to replace this option with a "HIGH_PRIORITY" one
and keep it realtime on Windows and choose a higher (but non-realtime) option on Linux?
However, there are 2 issues here:
 * We will have different behaviors between the 2 platforms.
 * Not sure if I can set a normal but higher priority on Linux. SCHED_OTHER only allows
   one value and Linux "nice" values don't help. If anyone knows of a way to accomplish
   this on Linux, please do advise.
Alternatively, we can have this option for Windows only.

In the meantime, I've removed this patch from this patchset in v14 as the cmdline option is not
being enabled yet, as DmitryK noted.
  
Stephen Hemminger Aug. 19, 2021, 9:33 p.m. UTC | #5
On Thu, 19 Aug 2021 14:30:19 -0700
Narcisa Ana Maria Vasile <navasile@linux.microsoft.com> wrote:

> On Thu, Aug 19, 2021 at 10:06:06AM +0100, Bruce Richardson wrote:
> > On Wed, Aug 18, 2021 at 02:28:33PM -0700, Stephen Hemminger wrote:  
> > > On Tue,  3 Aug 2021 12:01:30 -0700 Narcisa Ana Maria Vasile
> > > <navasile@linux.microsoft.com> wrote:
> > >   
> > > > +static int +eal_parse_thread_priority(const char *arg) +{ +
> > > > struct internal_config *internal_conf = +
> > > > eal_get_internal_configuration(); +	enum rte_thread_priority priority;
> > > > + +	if (!strncmp("normal", arg, sizeof("normal"))) +
> > > > priority = RTE_THREAD_PRIORITY_NORMAL; +	else if
> > > > (!strncmp("realtime", arg, sizeof("realtime"))) +		priority =
> > > > RTE_THREAD_PRIORITY_REALTIME_CRITICAL; +	else +		return -1;
> > > > + +	internal_conf->thread_priority = priority; +	return 0; +} +  
> > > 
> > > In my experience using real time priority is dangerous and risks
> > > starvation and deadlock. The problem is that DPDK applications are
> > > typically 100% CPU poll mode with no system calls; but the kernel has a
> > > number of worker threads that can be required on those CPUs.
> > > 
> > > The typical failure is a disk completion interrupt happens on a CPU that
> > > is being used by DPDK lcore thread. With RT priority, the kernel thread
> > > to process that I/O completion never runs because the RT user thread has
> > > higher priority than the kernel I/O completion thread.
> > > 
> > > It maybe possible to workaround this with lots of hand crafting through
> > > sysfs to reassign threads and irq's. Also, later kernels with full RT
> > > might also help.
> > > 
> > > Before putting this in as part of DPDK in EAL, a full set of testing and
> > > documentation of how to configure these kind of applications and systems
> > > is needed.
> > >  
> > I would tend to agree caution here, based on my experience of having locked
> > up a number of systems in the past when testing running DPDK apps with RT
> > priority!  
> 
> Thank you for the comments! I've added this option since it was requested by
> multiple users. I understand RT priority causes issues on Linux platforms.
> On Windows we want to be able to use REALTIME priority in certain scenarios.
> 
> Would it be acceptable to replace this option with a "HIGH_PRIORITY" one
> and keep it realtime on Windows and choose a higher (but non-realtime) option on Linux?
> However, there are 2 issues here:
>  * We will have different behaviors between the 2 platforms.
>  * Not sure if I can set a normal but higher priority on Linux. SCHED_OTHER only allows
>    one value and Linux "nice" values don't help. If anyone knows of a way to accomplish
>    this on Linux, please do advise.
> Alternatively, we can have this option for Windows only.
> 
> In the meantime, I've removed this patch from this patchset in v14 as the cmdline option is not
> being enabled yet, as DmitryK noted.
> 
> 

I think on Linux it should produce a big a*** warning message.
  

Patch

diff --git a/lib/eal/common/eal_common_options.c b/lib/eal/common/eal_common_options.c
index ff5861b5f3..9d29696b84 100644
--- a/lib/eal/common/eal_common_options.c
+++ b/lib/eal/common/eal_common_options.c
@@ -107,6 +107,7 @@  eal_long_options[] = {
 	{OPT_TELEMETRY,         0, NULL, OPT_TELEMETRY_NUM        },
 	{OPT_NO_TELEMETRY,      0, NULL, OPT_NO_TELEMETRY_NUM     },
 	{OPT_FORCE_MAX_SIMD_BITWIDTH, 1, NULL, OPT_FORCE_MAX_SIMD_BITWIDTH_NUM},
+	{OPT_THREAD_PRIORITY,   1, NULL, OPT_THREAD_PRIORITY_NUM},
 
 	/* legacy options that will be removed in future */
 	{OPT_PCI_BLACKLIST,     1, NULL, OPT_PCI_BLACKLIST_NUM    },
@@ -1412,6 +1413,24 @@  eal_parse_simd_bitwidth(const char *arg)
 	return 0;
 }
 
+static int
+eal_parse_thread_priority(const char *arg)
+{
+	struct internal_config *internal_conf =
+		eal_get_internal_configuration();
+	enum rte_thread_priority priority;
+
+	if (!strncmp("normal", arg, sizeof("normal")))
+		priority = RTE_THREAD_PRIORITY_NORMAL;
+	else if (!strncmp("realtime", arg, sizeof("realtime")))
+		priority = RTE_THREAD_PRIORITY_REALTIME_CRITICAL;
+	else
+		return -1;
+
+	internal_conf->thread_priority = priority;
+	return 0;
+}
+
 static int
 eal_parse_base_virtaddr(const char *arg)
 {
@@ -1825,7 +1844,13 @@  eal_parse_common_option(int opt, const char *optarg,
 			return -1;
 		}
 		break;
-
+	case OPT_THREAD_PRIORITY_NUM:
+		if (eal_parse_thread_priority(optarg) < 0) {
+			RTE_LOG(ERR, EAL, "invalid parameter for --"
+					OPT_THREAD_PRIORITY "\n");
+			return -1;
+		}
+		break;
 	/* don't know what to do, leave this to caller */
 	default:
 		return 1;
@@ -2088,6 +2113,7 @@  eal_common_usage(void)
 	       "                      (can be used multiple times)\n"
 	       "  --"OPT_VMWARE_TSC_MAP"    Use VMware TSC map instead of native RDTSC\n"
 	       "  --"OPT_PROC_TYPE"         Type of this process (primary|secondary|auto)\n"
+	       "  --"OPT_THREAD_PRIORITY"   Set threads priority (normal|realtime)\n"
 #ifndef RTE_EXEC_ENV_WINDOWS
 	       "  --"OPT_SYSLOG"            Set syslog facility\n"
 #endif
diff --git a/lib/eal/common/eal_internal_cfg.h b/lib/eal/common/eal_internal_cfg.h
index d6c0470eb8..b2996cd65b 100644
--- a/lib/eal/common/eal_internal_cfg.h
+++ b/lib/eal/common/eal_internal_cfg.h
@@ -94,6 +94,8 @@  struct internal_config {
 	unsigned int no_telemetry; /**< true to disable Telemetry */
 	struct simd_bitwidth max_simd_bitwidth;
 	/**< max simd bitwidth path to use */
+	enum rte_thread_priority thread_priority;
+	/**< thread priority to configure */
 };
 
 void eal_reset_internal_config(struct internal_config *internal_cfg);
diff --git a/lib/eal/common/eal_options.h b/lib/eal/common/eal_options.h
index 7b348e707f..9f5b209f64 100644
--- a/lib/eal/common/eal_options.h
+++ b/lib/eal/common/eal_options.h
@@ -93,6 +93,8 @@  enum {
 	OPT_NO_TELEMETRY_NUM,
 #define OPT_FORCE_MAX_SIMD_BITWIDTH  "force-max-simd-bitwidth"
 	OPT_FORCE_MAX_SIMD_BITWIDTH_NUM,
+#define OPT_THREAD_PRIORITY          "thread-prio"
+	OPT_THREAD_PRIORITY_NUM,
 
 	/* legacy option that will be removed in future */
 #define OPT_PCI_BLACKLIST     "pci-blacklist"