[dpdk-dev] eal: avoid unnecessary conflicts over rte_config file

Message ID 20161012212917.10760-1-ouster@cs.stanford.edu (mailing list archive)
State Rejected, archived
Delegated to: Thomas Monjalon
Headers

Commit Message

John Ousterhout Oct. 12, 2016, 9:29 p.m. UTC
  Before this patch, DPDK used the file ~/.rte_config as a lock to detect
potential interference between multiple DPDK applications running on the
same machine. However, if a single user ran DPDK applications concurrently
on several different machines, and if the user's home directory was shared
between the machines via NFS, DPDK would incorrectly detect conflicts
for all but the first application and abort them. This patch fixes the
problem by incorporating the machine name into the config file name (e.g.,
~/.rte_hostname_config).

Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
---
 doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----
 lib/librte_eal/common/eal_common_proc.c      |  8 ++------
 lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--
 3 files changed, 22 insertions(+), 12 deletions(-)
  

Comments

Ananyev, Konstantin Oct. 13, 2016, 10:53 a.m. UTC | #1
Hi John,

> Before this patch, DPDK used the file ~/.rte_config as a lock to detect
> potential interference between multiple DPDK applications running on the
> same machine. However, if a single user ran DPDK applications concurrently
> on several different machines, and if the user's home directory was shared
> between the machines via NFS, DPDK would incorrectly detect conflicts
> for all but the first application and abort them. This patch fixes the
> problem by incorporating the machine name into the config file name (e.g.,
> ~/.rte_hostname_config).
> 
> Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
> ---
>  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----
>  lib/librte_eal/common/eal_common_proc.c      |  8 ++------
>  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--
>  3 files changed, 22 insertions(+), 12 deletions(-)
> 
> diff --git a/doc/guides/prog_guide/multi_proc_support.rst b/doc/guides/prog_guide/multi_proc_support.rst
> index badd102..a54fa1c 100644
> --- a/doc/guides/prog_guide/multi_proc_support.rst
> +++ b/doc/guides/prog_guide/multi_proc_support.rst
> @@ -129,10 +129,13 @@ Support for this usage scenario is provided using the ``--file-prefix`` paramete
> 
>  By default, the EAL creates hugepage files on each hugetlbfs filesystem using the rtemap_X filename,
>  where X is in the range 0 to the maximum number of hugepages -1.
> -Similarly, it creates shared configuration files, memory mapped in each process, using the /var/run/.rte_config filename,
> -when run as root (or $HOME/.rte_config when run as a non-root user;
> -if filesystem and device permissions are set up to allow this).
> -The rte part of the filenames of each of the above is configurable using the file-prefix parameter.
> +Similarly, it creates shared configuration files, memory mapped in each process.
> +When run as root, the name of the configuration file will be
> +/var/run/.rte_*host*_config, where *host* is the name of the machine.
> +When run as a non-root user, the the name of the configuration file will be
> +$HOME/.rte_*host*_config (if filesystem and device permissions are set up to allow this).
> +If the ``--file-prefix`` parameter has been specified, its value will be used
> +in place of "rte" in the file names.

I am not sure that we need to handle all such cases inside EAL.
User can easily overcome that problem by just adding something like:
--file-prefix=`uname -n`
to his command-line.
Konstantin

> 
>  In addition to specifying the file-prefix parameter,
>  any DPDK applications that are to be run side-by-side must explicitly limit their memory use.
> diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
> index 12e0fca..517aa0c 100644
> --- a/lib/librte_eal/common/eal_common_proc.c
> +++ b/lib/librte_eal/common/eal_common_proc.c
> @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char *config_file_path)
> 
>  	if (config_file_path)
>  		config_fd = open(config_file_path, O_RDONLY);
> -	else {
> -		char default_path[PATH_MAX+1];
> -		snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
> -			 default_config_dir, "rte");
> -		config_fd = open(default_path, O_RDONLY);
> -	}
> +	else
> +		config_fd = open(eal_runtime_config_path(), O_RDONLY);
>  	if (config_fd < 0)
>  		return 0;
> 
> diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
> index fdb4a70..4929aa3 100644
> --- a/lib/librte_eal/common/eal_filesystem.h
> +++ b/lib/librte_eal/common/eal_filesystem.h
> @@ -41,7 +41,7 @@
>  #define EAL_FILESYSTEM_H
> 
>  /** Path of rte config file. */
> -#define RUNTIME_CONFIG_FMT "%s/.%s_config"
> +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"
> 
>  #include <stdint.h>
>  #include <limits.h>
> @@ -59,11 +59,22 @@ eal_runtime_config_path(void)
>  	static char buffer[PATH_MAX]; /* static so auto-zeroed */
>  	const char *directory = default_config_dir;
>  	const char *home_dir = getenv("HOME");
> +	static char nameBuffer[1000];
> +	int result;
> 
>  	if (getuid() != 0 && home_dir != NULL)
>  		directory = home_dir;
> +
> +	/*
> +	 * Include the name of the host in the config file path. Otherwise,
> +	 * if DPDK applications run on different hosts but share a home
> +	 * directory (e.g. via NFS), they will choose the same config
> +	 * file and conflict unnecessarily.
> +	 */
> +	result = gethostname(nameBuffer, sizeof(nameBuffer)-1);
>  	snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
> -			internal_config.hugefile_prefix);
> +			internal_config.hugefile_prefix,
> +		(result == 0) ? nameBuffer : "unknown-host");
>  	return buffer;
>  }
> 
> --
> 2.8.3
  
John Ousterhout Oct. 13, 2016, 3:42 p.m. UTC | #2
It's true that users can patch around this problem (and I started off doing
just that), but why impose this inconvenience on users when DPDK can just
"do the right thing" to begin with? For example, it took me several hours
to figure out why the problem was occurring and then to hunt down the
--file-prefix solution. Is there some reason why it would be a bad idea to
fix this in DPDK?

-John-

On Thu, Oct 13, 2016 at 3:53 AM, Ananyev, Konstantin <
konstantin.ananyev@intel.com> wrote:

>
> Hi John,
>
> > Before this patch, DPDK used the file ~/.rte_config as a lock to detect
> > potential interference between multiple DPDK applications running on the
> > same machine. However, if a single user ran DPDK applications
> concurrently
> > on several different machines, and if the user's home directory was
> shared
> > between the machines via NFS, DPDK would incorrectly detect conflicts
> > for all but the first application and abort them. This patch fixes the
> > problem by incorporating the machine name into the config file name
> (e.g.,
> > ~/.rte_hostname_config).
> >
> > Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
> > ---
> >  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----
> >  lib/librte_eal/common/eal_common_proc.c      |  8 ++------
> >  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--
> >  3 files changed, 22 insertions(+), 12 deletions(-)
> >
> > diff --git a/doc/guides/prog_guide/multi_proc_support.rst
> b/doc/guides/prog_guide/multi_proc_support.rst
> > index badd102..a54fa1c 100644
> > --- a/doc/guides/prog_guide/multi_proc_support.rst
> > +++ b/doc/guides/prog_guide/multi_proc_support.rst
> > @@ -129,10 +129,13 @@ Support for this usage scenario is provided using
> the ``--file-prefix`` paramete
> >
> >  By default, the EAL creates hugepage files on each hugetlbfs filesystem
> using the rtemap_X filename,
> >  where X is in the range 0 to the maximum number of hugepages -1.
> > -Similarly, it creates shared configuration files, memory mapped in each
> process, using the /var/run/.rte_config filename,
> > -when run as root (or $HOME/.rte_config when run as a non-root user;
> > -if filesystem and device permissions are set up to allow this).
> > -The rte part of the filenames of each of the above is configurable
> using the file-prefix parameter.
> > +Similarly, it creates shared configuration files, memory mapped in each
> process.
> > +When run as root, the name of the configuration file will be
> > +/var/run/.rte_*host*_config, where *host* is the name of the machine.
> > +When run as a non-root user, the the name of the configuration file
> will be
> > +$HOME/.rte_*host*_config (if filesystem and device permissions are set
> up to allow this).
> > +If the ``--file-prefix`` parameter has been specified, its value will
> be used
> > +in place of "rte" in the file names.
>
> I am not sure that we need to handle all such cases inside EAL.
> User can easily overcome that problem by just adding something like:
> --file-prefix=`uname -n`
> to his command-line.
> Konstantin
>
> >
> >  In addition to specifying the file-prefix parameter,
> >  any DPDK applications that are to be run side-by-side must explicitly
> limit their memory use.
> > diff --git a/lib/librte_eal/common/eal_common_proc.c
> b/lib/librte_eal/common/eal_common_proc.c
> > index 12e0fca..517aa0c 100644
> > --- a/lib/librte_eal/common/eal_common_proc.c
> > +++ b/lib/librte_eal/common/eal_common_proc.c
> > @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char
> *config_file_path)
> >
> >       if (config_file_path)
> >               config_fd = open(config_file_path, O_RDONLY);
> > -     else {
> > -             char default_path[PATH_MAX+1];
> > -             snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
> > -                      default_config_dir, "rte");
> > -             config_fd = open(default_path, O_RDONLY);
> > -     }
> > +     else
> > +             config_fd = open(eal_runtime_config_path(), O_RDONLY);
> >       if (config_fd < 0)
> >               return 0;
> >
> > diff --git a/lib/librte_eal/common/eal_filesystem.h
> b/lib/librte_eal/common/eal_filesystem.h
> > index fdb4a70..4929aa3 100644
> > --- a/lib/librte_eal/common/eal_filesystem.h
> > +++ b/lib/librte_eal/common/eal_filesystem.h
> > @@ -41,7 +41,7 @@
> >  #define EAL_FILESYSTEM_H
> >
> >  /** Path of rte config file. */
> > -#define RUNTIME_CONFIG_FMT "%s/.%s_config"
> > +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"
> >
> >  #include <stdint.h>
> >  #include <limits.h>
> > @@ -59,11 +59,22 @@ eal_runtime_config_path(void)
> >       static char buffer[PATH_MAX]; /* static so auto-zeroed */
> >       const char *directory = default_config_dir;
> >       const char *home_dir = getenv("HOME");
> > +     static char nameBuffer[1000];
> > +     int result;
> >
> >       if (getuid() != 0 && home_dir != NULL)
> >               directory = home_dir;
> > +
> > +     /*
> > +      * Include the name of the host in the config file path. Otherwise,
> > +      * if DPDK applications run on different hosts but share a home
> > +      * directory (e.g. via NFS), they will choose the same config
> > +      * file and conflict unnecessarily.
> > +      */
> > +     result = gethostname(nameBuffer, sizeof(nameBuffer)-1);
> >       snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
> > -                     internal_config.hugefile_prefix);
> > +                     internal_config.hugefile_prefix,
> > +             (result == 0) ? nameBuffer : "unknown-host");
> >       return buffer;
> >  }
> >
> > --
> > 2.8.3
>
>
  
Van Haaren, Harry Oct. 13, 2016, 4:07 p.m. UTC | #3
Hi John,

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of John Ousterhout

> Subject: Re: [dpdk-dev] [PATCH] eal: avoid unnecessary conflicts over rte_config file


<snip>

> For example, it took me several hours

> to figure out why the problem was occurring and then to hunt down the

> --file-prefix solution. Is there some reason why it would be a bad idea to

> fix this in DPDK?


Right now, DPDK will by default always use a consistent .rte_config location, which allows other applications to monitor that. For example, Collectd is able to monitor a DPDK primary process by checking if the .rte_config file exists at its default location[1].

If we change the default behavior of DPDK, other projects can no longer rely on that file being created, and these monitoring projects must be updated in sync with DPDK to avoid breakage if versions mismatch. 

Although I see your use-case, I think using the --file-prefix as Konstantin suggests is a better solution in this case.

Regards, -Harry

[1] https://github.com/collectd/collectd/blob/master/src/dpdkstat.c#L60


> On Thu, Oct 13, 2016 at 3:53 AM, Ananyev, Konstantin <

> konstantin.ananyev@intel.com> wrote:

> 

> >

> > Hi John,

> >

> > > Before this patch, DPDK used the file ~/.rte_config as a lock to detect

> > > potential interference between multiple DPDK applications running on the

> > > same machine. However, if a single user ran DPDK applications

> > concurrently

> > > on several different machines, and if the user's home directory was

> > shared

> > > between the machines via NFS, DPDK would incorrectly detect conflicts

> > > for all but the first application and abort them. This patch fixes the

> > > problem by incorporating the machine name into the config file name

> > (e.g.,

> > > ~/.rte_hostname_config).

> > >

> > > Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>

> > > ---

> > >  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----

> > >  lib/librte_eal/common/eal_common_proc.c      |  8 ++------

> > >  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--

> > >  3 files changed, 22 insertions(+), 12 deletions(-)

> > >

> > > diff --git a/doc/guides/prog_guide/multi_proc_support.rst

> > b/doc/guides/prog_guide/multi_proc_support.rst

> > > index badd102..a54fa1c 100644

> > > --- a/doc/guides/prog_guide/multi_proc_support.rst

> > > +++ b/doc/guides/prog_guide/multi_proc_support.rst

> > > @@ -129,10 +129,13 @@ Support for this usage scenario is provided using

> > the ``--file-prefix`` paramete

> > >

> > >  By default, the EAL creates hugepage files on each hugetlbfs filesystem

> > using the rtemap_X filename,

> > >  where X is in the range 0 to the maximum number of hugepages -1.

> > > -Similarly, it creates shared configuration files, memory mapped in each

> > process, using the /var/run/.rte_config filename,

> > > -when run as root (or $HOME/.rte_config when run as a non-root user;

> > > -if filesystem and device permissions are set up to allow this).

> > > -The rte part of the filenames of each of the above is configurable

> > using the file-prefix parameter.

> > > +Similarly, it creates shared configuration files, memory mapped in each

> > process.

> > > +When run as root, the name of the configuration file will be

> > > +/var/run/.rte_*host*_config, where *host* is the name of the machine.

> > > +When run as a non-root user, the the name of the configuration file

> > will be

> > > +$HOME/.rte_*host*_config (if filesystem and device permissions are set

> > up to allow this).

> > > +If the ``--file-prefix`` parameter has been specified, its value will

> > be used

> > > +in place of "rte" in the file names.

> >

> > I am not sure that we need to handle all such cases inside EAL.

> > User can easily overcome that problem by just adding something like:

> > --file-prefix=`uname -n`

> > to his command-line.

> > Konstantin

> >

> > >

> > >  In addition to specifying the file-prefix parameter,

> > >  any DPDK applications that are to be run side-by-side must explicitly

> > limit their memory use.

> > > diff --git a/lib/librte_eal/common/eal_common_proc.c

> > b/lib/librte_eal/common/eal_common_proc.c

> > > index 12e0fca..517aa0c 100644

> > > --- a/lib/librte_eal/common/eal_common_proc.c

> > > +++ b/lib/librte_eal/common/eal_common_proc.c

> > > @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char

> > *config_file_path)

> > >

> > >       if (config_file_path)

> > >               config_fd = open(config_file_path, O_RDONLY);

> > > -     else {

> > > -             char default_path[PATH_MAX+1];

> > > -             snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,

> > > -                      default_config_dir, "rte");

> > > -             config_fd = open(default_path, O_RDONLY);

> > > -     }

> > > +     else

> > > +             config_fd = open(eal_runtime_config_path(), O_RDONLY);

> > >       if (config_fd < 0)

> > >               return 0;

> > >

> > > diff --git a/lib/librte_eal/common/eal_filesystem.h

> > b/lib/librte_eal/common/eal_filesystem.h

> > > index fdb4a70..4929aa3 100644

> > > --- a/lib/librte_eal/common/eal_filesystem.h

> > > +++ b/lib/librte_eal/common/eal_filesystem.h

> > > @@ -41,7 +41,7 @@

> > >  #define EAL_FILESYSTEM_H

> > >

> > >  /** Path of rte config file. */

> > > -#define RUNTIME_CONFIG_FMT "%s/.%s_config"

> > > +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"

> > >

> > >  #include <stdint.h>

> > >  #include <limits.h>

> > > @@ -59,11 +59,22 @@ eal_runtime_config_path(void)

> > >       static char buffer[PATH_MAX]; /* static so auto-zeroed */

> > >       const char *directory = default_config_dir;

> > >       const char *home_dir = getenv("HOME");

> > > +     static char nameBuffer[1000];

> > > +     int result;

> > >

> > >       if (getuid() != 0 && home_dir != NULL)

> > >               directory = home_dir;

> > > +

> > > +     /*

> > > +      * Include the name of the host in the config file path. Otherwise,

> > > +      * if DPDK applications run on different hosts but share a home

> > > +      * directory (e.g. via NFS), they will choose the same config

> > > +      * file and conflict unnecessarily.

> > > +      */

> > > +     result = gethostname(nameBuffer, sizeof(nameBuffer)-1);

> > >       snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,

> > > -                     internal_config.hugefile_prefix);

> > > +                     internal_config.hugefile_prefix,

> > > +             (result == 0) ? nameBuffer : "unknown-host");

> > >       return buffer;

> > >  }

> > >

> > > --

> > > 2.8.3

> >

> >
  
John Ousterhout Oct. 13, 2016, 4:20 p.m. UTC | #4
Hi Harry,

But, given the existence of the --file-prefix option, isn't it already
unsafe for Collectd to check only for .rte_config? If it's important for
other programs to be able to find the config files, it seems to me that a
more robust mechanism is needed than the current one.

-John-

On Thu, Oct 13, 2016 at 9:07 AM, Van Haaren, Harry <
harry.van.haaren@intel.com> wrote:

> Hi John,
>
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of John Ousterhout
> > Subject: Re: [dpdk-dev] [PATCH] eal: avoid unnecessary conflicts over
> rte_config file
>
> <snip>
>
> > For example, it took me several hours
> > to figure out why the problem was occurring and then to hunt down the
> > --file-prefix solution. Is there some reason why it would be a bad idea
> to
> > fix this in DPDK?
>
> Right now, DPDK will by default always use a consistent .rte_config
> location, which allows other applications to monitor that. For example,
> Collectd is able to monitor a DPDK primary process by checking if the
> .rte_config file exists at its default location[1].
>
> If we change the default behavior of DPDK, other projects can no longer
> rely on that file being created, and these monitoring projects must be
> updated in sync with DPDK to avoid breakage if versions mismatch.
>
> Although I see your use-case, I think using the --file-prefix as
> Konstantin suggests is a better solution in this case.
>
> Regards, -Harry
>
> [1] https://github.com/collectd/collectd/blob/master/src/dpdkstat.c#L60
>
>
> > On Thu, Oct 13, 2016 at 3:53 AM, Ananyev, Konstantin <
> > konstantin.ananyev@intel.com> wrote:
> >
> > >
> > > Hi John,
> > >
> > > > Before this patch, DPDK used the file ~/.rte_config as a lock to
> detect
> > > > potential interference between multiple DPDK applications running on
> the
> > > > same machine. However, if a single user ran DPDK applications
> > > concurrently
> > > > on several different machines, and if the user's home directory was
> > > shared
> > > > between the machines via NFS, DPDK would incorrectly detect conflicts
> > > > for all but the first application and abort them. This patch fixes
> the
> > > > problem by incorporating the machine name into the config file name
> > > (e.g.,
> > > > ~/.rte_hostname_config).
> > > >
> > > > Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
> > > > ---
> > > >  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----
> > > >  lib/librte_eal/common/eal_common_proc.c      |  8 ++------
> > > >  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--
> > > >  3 files changed, 22 insertions(+), 12 deletions(-)
> > > >
> > > > diff --git a/doc/guides/prog_guide/multi_proc_support.rst
> > > b/doc/guides/prog_guide/multi_proc_support.rst
> > > > index badd102..a54fa1c 100644
> > > > --- a/doc/guides/prog_guide/multi_proc_support.rst
> > > > +++ b/doc/guides/prog_guide/multi_proc_support.rst
> > > > @@ -129,10 +129,13 @@ Support for this usage scenario is provided
> using
> > > the ``--file-prefix`` paramete
> > > >
> > > >  By default, the EAL creates hugepage files on each hugetlbfs
> filesystem
> > > using the rtemap_X filename,
> > > >  where X is in the range 0 to the maximum number of hugepages -1.
> > > > -Similarly, it creates shared configuration files, memory mapped in
> each
> > > process, using the /var/run/.rte_config filename,
> > > > -when run as root (or $HOME/.rte_config when run as a non-root user;
> > > > -if filesystem and device permissions are set up to allow this).
> > > > -The rte part of the filenames of each of the above is configurable
> > > using the file-prefix parameter.
> > > > +Similarly, it creates shared configuration files, memory mapped in
> each
> > > process.
> > > > +When run as root, the name of the configuration file will be
> > > > +/var/run/.rte_*host*_config, where *host* is the name of the
> machine.
> > > > +When run as a non-root user, the the name of the configuration file
> > > will be
> > > > +$HOME/.rte_*host*_config (if filesystem and device permissions are
> set
> > > up to allow this).
> > > > +If the ``--file-prefix`` parameter has been specified, its value
> will
> > > be used
> > > > +in place of "rte" in the file names.
> > >
> > > I am not sure that we need to handle all such cases inside EAL.
> > > User can easily overcome that problem by just adding something like:
> > > --file-prefix=`uname -n`
> > > to his command-line.
> > > Konstantin
> > >
> > > >
> > > >  In addition to specifying the file-prefix parameter,
> > > >  any DPDK applications that are to be run side-by-side must
> explicitly
> > > limit their memory use.
> > > > diff --git a/lib/librte_eal/common/eal_common_proc.c
> > > b/lib/librte_eal/common/eal_common_proc.c
> > > > index 12e0fca..517aa0c 100644
> > > > --- a/lib/librte_eal/common/eal_common_proc.c
> > > > +++ b/lib/librte_eal/common/eal_common_proc.c
> > > > @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char
> > > *config_file_path)
> > > >
> > > >       if (config_file_path)
> > > >               config_fd = open(config_file_path, O_RDONLY);
> > > > -     else {
> > > > -             char default_path[PATH_MAX+1];
> > > > -             snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
> > > > -                      default_config_dir, "rte");
> > > > -             config_fd = open(default_path, O_RDONLY);
> > > > -     }
> > > > +     else
> > > > +             config_fd = open(eal_runtime_config_path(), O_RDONLY);
> > > >       if (config_fd < 0)
> > > >               return 0;
> > > >
> > > > diff --git a/lib/librte_eal/common/eal_filesystem.h
> > > b/lib/librte_eal/common/eal_filesystem.h
> > > > index fdb4a70..4929aa3 100644
> > > > --- a/lib/librte_eal/common/eal_filesystem.h
> > > > +++ b/lib/librte_eal/common/eal_filesystem.h
> > > > @@ -41,7 +41,7 @@
> > > >  #define EAL_FILESYSTEM_H
> > > >
> > > >  /** Path of rte config file. */
> > > > -#define RUNTIME_CONFIG_FMT "%s/.%s_config"
> > > > +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"
> > > >
> > > >  #include <stdint.h>
> > > >  #include <limits.h>
> > > > @@ -59,11 +59,22 @@ eal_runtime_config_path(void)
> > > >       static char buffer[PATH_MAX]; /* static so auto-zeroed */
> > > >       const char *directory = default_config_dir;
> > > >       const char *home_dir = getenv("HOME");
> > > > +     static char nameBuffer[1000];
> > > > +     int result;
> > > >
> > > >       if (getuid() != 0 && home_dir != NULL)
> > > >               directory = home_dir;
> > > > +
> > > > +     /*
> > > > +      * Include the name of the host in the config file path.
> Otherwise,
> > > > +      * if DPDK applications run on different hosts but share a home
> > > > +      * directory (e.g. via NFS), they will choose the same config
> > > > +      * file and conflict unnecessarily.
> > > > +      */
> > > > +     result = gethostname(nameBuffer, sizeof(nameBuffer)-1);
> > > >       snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT,
> directory,
> > > > -                     internal_config.hugefile_prefix);
> > > > +                     internal_config.hugefile_prefix,
> > > > +             (result == 0) ? nameBuffer : "unknown-host");
> > > >       return buffer;
> > > >  }
> > > >
> > > > --
> > > > 2.8.3
> > >
> > >
>
  
Van Haaren, Harry Oct. 13, 2016, 4:30 p.m. UTC | #5
Hi,

> From: John Ousterhout [mailto:ouster@cs.stanford.edu] 


> But, given the existence of the --file-prefix option, isn't it already unsafe for Collectd to check only for .rte_config? If it's important for other programs to be able to find the config files, it seems to me that a more robust mechanism is needed than the current one.


If the user is using the DPDK --file-prefix, we expect the user to provide that same --file-prefix to inform Collectd of the changed config path. Details on how to do so are available here: https://github.com/collectd/collectd/blob/master/src/collectd.conf.pod#plugin-dpdkstat

Keep in mind a for a simple setup the current defaults will just work, so changing the default seems a bad idea to me.

Regards, -Harry


> On Thu, Oct 13, 2016 at 9:07 AM, Van Haaren, Harry <harry.van.haaren@intel.com> wrote:

>Hi John,


>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of John Ousterhout

>> Subject: Re: [dpdk-dev] [PATCH] eal: avoid unnecessary conflicts over rte_config file


> <snip>


> For example, it took me several hours

> to figure out why the problem was occurring and then to hunt down the

> --file-prefix solution. Is there some reason why it would be a bad idea to

> fix this in DPDK?


Right now, DPDK will by default always use a consistent .rte_config location, which allows other applications to monitor that. For example, Collectd is able to monitor a DPDK primary process by checking if the .rte_config file exists at its default location[1].

If we change the default behavior of DPDK, other projects can no longer rely on that file being created, and these monitoring projects must be updated in sync with DPDK to avoid breakage if versions mismatch.

Although I see your use-case, I think using the --file-prefix as Konstantin suggests is a better solution in this case.

Regards, -Harry

[1] https://github.com/collectd/collectd/blob/master/src/dpdkstat.c#L60


> On Thu, Oct 13, 2016 at 3:53 AM, Ananyev, Konstantin <

> konstantin.ananyev@intel.com> wrote:

>

> >

> > Hi John,

> >

> > > Before this patch, DPDK used the file ~/.rte_config as a lock to detect

> > > potential interference between multiple DPDK applications running on the

> > > same machine. However, if a single user ran DPDK applications

> > concurrently

> > > on several different machines, and if the user's home directory was

> > shared

> > > between the machines via NFS, DPDK would incorrectly detect conflicts

> > > for all but the first application and abort them. This patch fixes the

> > > problem by incorporating the machine name into the config file name

> > (e.g.,

> > > ~/.rte_hostname_config).

> > >

> > > Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>

> > > ---

> > >  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----

> > >  lib/librte_eal/common/eal_common_proc.c      |  8 ++------

> > >  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--

> > >  3 files changed, 22 insertions(+), 12 deletions(-)

> > >

> > > diff --git a/doc/guides/prog_guide/multi_proc_support.rst

> > b/doc/guides/prog_guide/multi_proc_support.rst

> > > index badd102..a54fa1c 100644

> > > --- a/doc/guides/prog_guide/multi_proc_support.rst

> > > +++ b/doc/guides/prog_guide/multi_proc_support.rst

> > > @@ -129,10 +129,13 @@ Support for this usage scenario is provided using

> > the ``--file-prefix`` paramete

> > >

> > >  By default, the EAL creates hugepage files on each hugetlbfs filesystem

> > using the rtemap_X filename,

> > >  where X is in the range 0 to the maximum number of hugepages -1.

> > > -Similarly, it creates shared configuration files, memory mapped in each

> > process, using the /var/run/.rte_config filename,

> > > -when run as root (or $HOME/.rte_config when run as a non-root user;

> > > -if filesystem and device permissions are set up to allow this).

> > > -The rte part of the filenames of each of the above is configurable

> > using the file-prefix parameter.

> > > +Similarly, it creates shared configuration files, memory mapped in each

> > process.

> > > +When run as root, the name of the configuration file will be

> > > +/var/run/.rte_*host*_config, where *host* is the name of the machine.

> > > +When run as a non-root user, the the name of the configuration file

> > will be

> > > +$HOME/.rte_*host*_config (if filesystem and device permissions are set

> > up to allow this).

> > > +If the ``--file-prefix`` parameter has been specified, its value will

> > be used

> > > +in place of "rte" in the file names.

> >

> > I am not sure that we need to handle all such cases inside EAL.

> > User can easily overcome that problem by just adding something like:

> > --file-prefix=`uname -n`

> > to his command-line.

> > Konstantin

> >

> > >

> > >  In addition to specifying the file-prefix parameter,

> > >  any DPDK applications that are to be run side-by-side must explicitly

> > limit their memory use.

> > > diff --git a/lib/librte_eal/common/eal_common_proc.c

> > b/lib/librte_eal/common/eal_common_proc.c

> > > index 12e0fca..517aa0c 100644

> > > --- a/lib/librte_eal/common/eal_common_proc.c

> > > +++ b/lib/librte_eal/common/eal_common_proc.c

> > > @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char

> > *config_file_path)

> > >

> > >       if (config_file_path)

> > >               config_fd = open(config_file_path, O_RDONLY);

> > > -     else {

> > > -             char default_path[PATH_MAX+1];

> > > -             snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,

> > > -                      default_config_dir, "rte");

> > > -             config_fd = open(default_path, O_RDONLY);

> > > -     }

> > > +     else

> > > +             config_fd = open(eal_runtime_config_path(), O_RDONLY);

> > >       if (config_fd < 0)

> > >               return 0;

> > >

> > > diff --git a/lib/librte_eal/common/eal_filesystem.h

> > b/lib/librte_eal/common/eal_filesystem.h

> > > index fdb4a70..4929aa3 100644

> > > --- a/lib/librte_eal/common/eal_filesystem.h

> > > +++ b/lib/librte_eal/common/eal_filesystem.h

> > > @@ -41,7 +41,7 @@

> > >  #define EAL_FILESYSTEM_H

> > >

> > >  /** Path of rte config file. */

> > > -#define RUNTIME_CONFIG_FMT "%s/.%s_config"

> > > +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"

> > >

> > >  #include <stdint.h>

> > >  #include <limits.h>

> > > @@ -59,11 +59,22 @@ eal_runtime_config_path(void)

> > >       static char buffer[PATH_MAX]; /* static so auto-zeroed */

> > >       const char *directory = default_config_dir;

> > >       const char *home_dir = getenv("HOME");

> > > +     static char nameBuffer[1000];

> > > +     int result;

> > >

> > >       if (getuid() != 0 && home_dir != NULL)

> > >               directory = home_dir;

> > > +

> > > +     /*

> > > +      * Include the name of the host in the config file path. Otherwise,

> > > +      * if DPDK applications run on different hosts but share a home

> > > +      * directory (e.g. via NFS), they will choose the same config

> > > +      * file and conflict unnecessarily.

> > > +      */

> > > +     result = gethostname(nameBuffer, sizeof(nameBuffer)-1);

> > >       snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,

> > > -                     internal_config.hugefile_prefix);

> > > +                     internal_config.hugefile_prefix,

> > > +             (result == 0) ? nameBuffer : "unknown-host");

> > >       return buffer;

> > >  }

> > >

> > > --

> > > 2.8.3

> >

> >
  
Thomas Monjalon Oct. 13, 2016, 4:31 p.m. UTC | #6
2016-10-13 09:20, John Ousterhout:
> Hi Harry,
> 
> But, given the existence of the --file-prefix option, isn't it already
> unsafe for Collectd to check only for .rte_config? If it's important for
> other programs to be able to find the config files, it seems to me that a
> more robust mechanism is needed than the current one.

+1

And the default location of the file should respect the Linux standards,
e.g. .config/dpdk/...
  
Ananyev, Konstantin Oct. 13, 2016, 5:01 p.m. UTC | #7
> 

> It's true that users can patch around this problem (and I started off doing just that), but why impose this inconvenience on users when DPDK

> can just "do the right thing" to begin with? For example, it took me several hours to figure out why the problem was occurring and then to

> hunt down the --file-prefix solution. Is there some reason why it would be a bad idea to fix this in DPDK?


From my point it is really hard (if not impossible) to handle properly each and every possible
configuration case at program level.
As your program has no clue what is on users mind and how he configured his system.
Let say what to do next, if we'll have 2 boxes with the same hostname?
Should we then add domain-name into the config file-naming scheme too?
I suppose if the user runs dpdk apps in environment it is his responsibility to make sure that each
process gets an unique prefix.

Konstantin

> 

> -John-

> 

> On Thu, Oct 13, 2016 at 3:53 AM, Ananyev, Konstantin <konstantin.ananyev@intel.com> wrote:

> 

> Hi John,

> 

> > Before this patch, DPDK used the file ~/.rte_config as a lock to detect

> > potential interference between multiple DPDK applications running on the

> > same machine. However, if a single user ran DPDK applications concurrently

> > on several different machines, and if the user's home directory was shared

> > between the machines via NFS, DPDK would incorrectly detect conflicts

> > for all but the first application and abort them. This patch fixes the

> > problem by incorporating the machine name into the config file name (e.g.,

> > ~/.rte_hostname_config).

> >

> > Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>

> > ---

> >  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----

> >  lib/librte_eal/common/eal_common_proc.c      |  8 ++------

> >  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--

> >  3 files changed, 22 insertions(+), 12 deletions(-)

> >

> > diff --git a/doc/guides/prog_guide/multi_proc_support.rst b/doc/guides/prog_guide/multi_proc_support.rst

> > index badd102..a54fa1c 100644

> > --- a/doc/guides/prog_guide/multi_proc_support.rst

> > +++ b/doc/guides/prog_guide/multi_proc_support.rst

> > @@ -129,10 +129,13 @@ Support for this usage scenario is provided using the ``--file-prefix`` paramete

> >

> >  By default, the EAL creates hugepage files on each hugetlbfs filesystem using the rtemap_X filename,

> >  where X is in the range 0 to the maximum number of hugepages -1.

> > -Similarly, it creates shared configuration files, memory mapped in each process, using the /var/run/.rte_config filename,

> > -when run as root (or $HOME/.rte_config when run as a non-root user;

> > -if filesystem and device permissions are set up to allow this).

> > -The rte part of the filenames of each of the above is configurable using the file-prefix parameter.

> > +Similarly, it creates shared configuration files, memory mapped in each process.

> > +When run as root, the name of the configuration file will be

> > +/var/run/.rte_*host*_config, where *host* is the name of the machine.

> > +When run as a non-root user, the the name of the configuration file will be

> > +$HOME/.rte_*host*_config (if filesystem and device permissions are set up to allow this).

> > +If the ``--file-prefix`` parameter has been specified, its value will be used

> > +in place of "rte" in the file names.

> I am not sure that we need to handle all such cases inside EAL.

> User can easily overcome that problem by just adding something like:

> --file-prefix=`uname -n`

> to his command-line.

> Konstantin

> 

> >

> >  In addition to specifying the file-prefix parameter,

> >  any DPDK applications that are to be run side-by-side must explicitly limit their memory use.

> > diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c

> > index 12e0fca..517aa0c 100644

> > --- a/lib/librte_eal/common/eal_common_proc.c

> > +++ b/lib/librte_eal/common/eal_common_proc.c

> > @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char *config_file_path)

> >

> >       if (config_file_path)

> >               config_fd = open(config_file_path, O_RDONLY);

> > -     else {

> > -             char default_path[PATH_MAX+1];

> > -             snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,

> > -                      default_config_dir, "rte");

> > -             config_fd = open(default_path, O_RDONLY);

> > -     }

> > +     else

> > +             config_fd = open(eal_runtime_config_path(), O_RDONLY);

> >       if (config_fd < 0)

> >               return 0;

> >

> > diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h

> > index fdb4a70..4929aa3 100644

> > --- a/lib/librte_eal/common/eal_filesystem.h

> > +++ b/lib/librte_eal/common/eal_filesystem.h

> > @@ -41,7 +41,7 @@

> >  #define EAL_FILESYSTEM_H

> >

> >  /** Path of rte config file. */

> > -#define RUNTIME_CONFIG_FMT "%s/.%s_config"

> > +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"

> >

> >  #include <stdint.h>

> >  #include <limits.h>

> > @@ -59,11 +59,22 @@ eal_runtime_config_path(void)

> >       static char buffer[PATH_MAX]; /* static so auto-zeroed */

> >       const char *directory = default_config_dir;

> >       const char *home_dir = getenv("HOME");

> > +     static char nameBuffer[1000];

> > +     int result;

> >

> >       if (getuid() != 0 && home_dir != NULL)

> >               directory = home_dir;

> > +

> > +     /*

> > +      * Include the name of the host in the config file path. Otherwise,

> > +      * if DPDK applications run on different hosts but share a home

> > +      * directory (e.g. via NFS), they will choose the same config

> > +      * file and conflict unnecessarily.

> > +      */

> > +     result = gethostname(nameBuffer, sizeof(nameBuffer)-1);

> >       snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,

> > -                     internal_config.hugefile_prefix);

> > +                     internal_config.hugefile_prefix,

> > +             (result == 0) ? nameBuffer : "unknown-host");

> >       return buffer;

> >  }

> >

> > --

> > 2.8.3
  
Stephen Hemminger Oct. 13, 2016, 6:28 p.m. UTC | #8
On Thu, 13 Oct 2016 16:30:20 +0000
"Van Haaren, Harry" <harry.van.haaren@intel.com> wrote:

> Hi,
> 
> > From: John Ousterhout [mailto:ouster@cs.stanford.edu]   
> 
> > But, given the existence of the --file-prefix option, isn't it already unsafe for Collectd to check only for .rte_config? If it's important for other programs to be able to find the config files, it seems to me that a more robust mechanism is needed than the current one.  
> 
> If the user is using the DPDK --file-prefix, we expect the user to provide that same --file-prefix to inform Collectd of the changed config path. Details on how to do so are available here: https://github.com/collectd/collectd/blob/master/src/collectd.conf.pod#plugin-dpdkstat
> 
> Keep in mind a for a simple setup the current defaults will just work, so changing the default seems a bad idea to me.
> 
> Regards, -Harry
> 
> 
> > On Thu, Oct 13, 2016 at 9:07 AM, Van Haaren, Harry <harry.van.haaren@intel.com> wrote:
> >Hi John,  
> 
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of John Ousterhout
> >> Subject: Re: [dpdk-dev] [PATCH] eal: avoid unnecessary conflicts over rte_config file  
> 
> > <snip>  
> 
> > For example, it took me several hours
> > to figure out why the problem was occurring and then to hunt down the
> > --file-prefix solution. Is there some reason why it would be a bad idea to
> > fix this in DPDK?  
> 
> Right now, DPDK will by default always use a consistent .rte_config location, which allows other applications to monitor that. For example, Collectd is able to monitor a DPDK primary process by checking if the .rte_config file exists at its default location[1].
> 
> If we change the default behavior of DPDK, other projects can no longer rely on that file being created, and these monitoring projects must be updated in sync with DPDK to avoid breakage if versions mismatch.
> 
> Although I see your use-case, I think using the --file-prefix as Konstantin suggests is a better solution in this case.
> 
> Regards, -Harry
> 
> [1] https://github.com/collectd/collectd/blob/master/src/dpdkstat.c#L60

I think DPDK model of lock file is overly simplistic. On Linux it should be putting any lockfiles in /run
not users home directory, since it is a system not user lock.
  
Tahhan, Maryam Oct. 13, 2016, 9:39 p.m. UTC | #9
> Hi John,
> 
> > Before this patch, DPDK used the file ~/.rte_config as a lock to
> > detect potential interference between multiple DPDK applications
> > running on the same machine. However, if a single user ran DPDK
> > applications concurrently on several different machines, and if the
> > user's home directory was shared between the machines via NFS, DPDK
> > would incorrectly detect conflicts for all but the first application
> > and abort them. This patch fixes the problem by incorporating the
> > machine name into the config file name (e.g., ~/.rte_hostname_config).
> >
> > Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
> > ---
> >  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----
> >  lib/librte_eal/common/eal_common_proc.c      |  8 ++------
> >  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--
> >  3 files changed, 22 insertions(+), 12 deletions(-)
> >
> > diff --git a/doc/guides/prog_guide/multi_proc_support.rst
> > b/doc/guides/prog_guide/multi_proc_support.rst
> > index badd102..a54fa1c 100644
> > --- a/doc/guides/prog_guide/multi_proc_support.rst
> > +++ b/doc/guides/prog_guide/multi_proc_support.rst
> > @@ -129,10 +129,13 @@ Support for this usage scenario is provided
> > using the ``--file-prefix`` paramete
> >
> >  By default, the EAL creates hugepage files on each hugetlbfs
> > filesystem using the rtemap_X filename,  where X is in the range 0 to the
> maximum number of hugepages -1.
> > -Similarly, it creates shared configuration files, memory mapped in
> > each process, using the /var/run/.rte_config filename, -when run as
> > root (or $HOME/.rte_config when run as a non-root user; -if filesystem and
> device permissions are set up to allow this).
> > -The rte part of the filenames of each of the above is configurable using the
> file-prefix parameter.
> > +Similarly, it creates shared configuration files, memory mapped in each
> process.
> > +When run as root, the name of the configuration file will be
> > +/var/run/.rte_*host*_config, where *host* is the name of the machine.
> > +When run as a non-root user, the the name of the configuration file
> > +will be $HOME/.rte_*host*_config (if filesystem and device permissions
> are set up to allow this).
> > +If the ``--file-prefix`` parameter has been specified, its value will
> > +be used in place of "rte" in the file names.
> 
> I am not sure that we need to handle all such cases inside EAL.
> User can easily overcome that problem by just adding something like:
> --file-prefix=`uname -n`
> to his command-line.
> Konstantin
> 

I agree with Konstantin, there's no need to include the hostname in the rte config file + I'm not sure this will be backward compatible with existing DPDK applications that use a secondary processes that use the config file (as in, multiprocess DPDK applications in use would all need to be updated). What Konstantin suggests fixes the issue you were encountering without breaking backward compatibility.
Is addition, the hostname is not unique... you could in theory have 2 hosts with the same hostname and encounter the issue you were seeing again.

> >
> >  In addition to specifying the file-prefix parameter,  any DPDK
> > applications that are to be run side-by-side must explicitly limit their
> memory use.
> > diff --git a/lib/librte_eal/common/eal_common_proc.c
> > b/lib/librte_eal/common/eal_common_proc.c
> > index 12e0fca..517aa0c 100644
> > --- a/lib/librte_eal/common/eal_common_proc.c
> > +++ b/lib/librte_eal/common/eal_common_proc.c
> > @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char
> > *config_file_path)
> >
> >  	if (config_file_path)
> >  		config_fd = open(config_file_path, O_RDONLY);
> > -	else {
> > -		char default_path[PATH_MAX+1];
> > -		snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
> > -			 default_config_dir, "rte");
> > -		config_fd = open(default_path, O_RDONLY);
> > -	}
> > +	else
> > +		config_fd = open(eal_runtime_config_path(), O_RDONLY);
> >  	if (config_fd < 0)
> >  		return 0;
> >
> > diff --git a/lib/librte_eal/common/eal_filesystem.h
> > b/lib/librte_eal/common/eal_filesystem.h
> > index fdb4a70..4929aa3 100644
> > --- a/lib/librte_eal/common/eal_filesystem.h
> > +++ b/lib/librte_eal/common/eal_filesystem.h
> > @@ -41,7 +41,7 @@
> >  #define EAL_FILESYSTEM_H
> >
> >  /** Path of rte config file. */
> > -#define RUNTIME_CONFIG_FMT "%s/.%s_config"
> > +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"
> >
> >  #include <stdint.h>
> >  #include <limits.h>
> > @@ -59,11 +59,22 @@ eal_runtime_config_path(void)
> >  	static char buffer[PATH_MAX]; /* static so auto-zeroed */
> >  	const char *directory = default_config_dir;
> >  	const char *home_dir = getenv("HOME");
> > +	static char nameBuffer[1000];
> > +	int result;
> >
> >  	if (getuid() != 0 && home_dir != NULL)
> >  		directory = home_dir;
> > +
> > +	/*
> > +	 * Include the name of the host in the config file path. Otherwise,
> > +	 * if DPDK applications run on different hosts but share a home
> > +	 * directory (e.g. via NFS), they will choose the same config
> > +	 * file and conflict unnecessarily.
> > +	 */
> > +	result = gethostname(nameBuffer, sizeof(nameBuffer)-1);
> >  	snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT,
> directory,
> > -			internal_config.hugefile_prefix);
> > +			internal_config.hugefile_prefix,
> > +		(result == 0) ? nameBuffer : "unknown-host");
> >  	return buffer;
> >  }
> >
> > --
> > 2.8.3
  
John Ousterhout Oct. 14, 2016, 4:27 p.m. UTC | #10
It sounds like my patch would break some existing software, so it probably
doesn't make sense right now.

I'd still argue that the current mechanism has a number of problems, and it
should probably undergo a comprehensive overhaul at some point in the
future.

-John-

On Thu, Oct 13, 2016 at 2:39 PM, Tahhan, Maryam <maryam.tahhan@intel.com>
wrote:

> > Hi John,
> >
> > > Before this patch, DPDK used the file ~/.rte_config as a lock to
> > > detect potential interference between multiple DPDK applications
> > > running on the same machine. However, if a single user ran DPDK
> > > applications concurrently on several different machines, and if the
> > > user's home directory was shared between the machines via NFS, DPDK
> > > would incorrectly detect conflicts for all but the first application
> > > and abort them. This patch fixes the problem by incorporating the
> > > machine name into the config file name (e.g., ~/.rte_hostname_config).
> > >
> > > Signed-off-by: John Ousterhout <ouster@cs.stanford.edu>
> > > ---
> > >  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----
> > >  lib/librte_eal/common/eal_common_proc.c      |  8 ++------
> > >  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--
> > >  3 files changed, 22 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/doc/guides/prog_guide/multi_proc_support.rst
> > > b/doc/guides/prog_guide/multi_proc_support.rst
> > > index badd102..a54fa1c 100644
> > > --- a/doc/guides/prog_guide/multi_proc_support.rst
> > > +++ b/doc/guides/prog_guide/multi_proc_support.rst
> > > @@ -129,10 +129,13 @@ Support for this usage scenario is provided
> > > using the ``--file-prefix`` paramete
> > >
> > >  By default, the EAL creates hugepage files on each hugetlbfs
> > > filesystem using the rtemap_X filename,  where X is in the range 0 to
> the
> > maximum number of hugepages -1.
> > > -Similarly, it creates shared configuration files, memory mapped in
> > > each process, using the /var/run/.rte_config filename, -when run as
> > > root (or $HOME/.rte_config when run as a non-root user; -if filesystem
> and
> > device permissions are set up to allow this).
> > > -The rte part of the filenames of each of the above is configurable
> using the
> > file-prefix parameter.
> > > +Similarly, it creates shared configuration files, memory mapped in
> each
> > process.
> > > +When run as root, the name of the configuration file will be
> > > +/var/run/.rte_*host*_config, where *host* is the name of the machine.
> > > +When run as a non-root user, the the name of the configuration file
> > > +will be $HOME/.rte_*host*_config (if filesystem and device permissions
> > are set up to allow this).
> > > +If the ``--file-prefix`` parameter has been specified, its value will
> > > +be used in place of "rte" in the file names.
> >
> > I am not sure that we need to handle all such cases inside EAL.
> > User can easily overcome that problem by just adding something like:
> > --file-prefix=`uname -n`
> > to his command-line.
> > Konstantin
> >
>
> I agree with Konstantin, there's no need to include the hostname in the
> rte config file + I'm not sure this will be backward compatible with
> existing DPDK applications that use a secondary processes that use the
> config file (as in, multiprocess DPDK applications in use would all need to
> be updated). What Konstantin suggests fixes the issue you were encountering
> without breaking backward compatibility.
> Is addition, the hostname is not unique... you could in theory have 2
> hosts with the same hostname and encounter the issue you were seeing again.
>
> > >
> > >  In addition to specifying the file-prefix parameter,  any DPDK
> > > applications that are to be run side-by-side must explicitly limit
> their
> > memory use.
> > > diff --git a/lib/librte_eal/common/eal_common_proc.c
> > > b/lib/librte_eal/common/eal_common_proc.c
> > > index 12e0fca..517aa0c 100644
> > > --- a/lib/librte_eal/common/eal_common_proc.c
> > > +++ b/lib/librte_eal/common/eal_common_proc.c
> > > @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char
> > > *config_file_path)
> > >
> > >     if (config_file_path)
> > >             config_fd = open(config_file_path, O_RDONLY);
> > > -   else {
> > > -           char default_path[PATH_MAX+1];
> > > -           snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
> > > -                    default_config_dir, "rte");
> > > -           config_fd = open(default_path, O_RDONLY);
> > > -   }
> > > +   else
> > > +           config_fd = open(eal_runtime_config_path(), O_RDONLY);
> > >     if (config_fd < 0)
> > >             return 0;
> > >
> > > diff --git a/lib/librte_eal/common/eal_filesystem.h
> > > b/lib/librte_eal/common/eal_filesystem.h
> > > index fdb4a70..4929aa3 100644
> > > --- a/lib/librte_eal/common/eal_filesystem.h
> > > +++ b/lib/librte_eal/common/eal_filesystem.h
> > > @@ -41,7 +41,7 @@
> > >  #define EAL_FILESYSTEM_H
> > >
> > >  /** Path of rte config file. */
> > > -#define RUNTIME_CONFIG_FMT "%s/.%s_config"
> > > +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"
> > >
> > >  #include <stdint.h>
> > >  #include <limits.h>
> > > @@ -59,11 +59,22 @@ eal_runtime_config_path(void)
> > >     static char buffer[PATH_MAX]; /* static so auto-zeroed */
> > >     const char *directory = default_config_dir;
> > >     const char *home_dir = getenv("HOME");
> > > +   static char nameBuffer[1000];
> > > +   int result;
> > >
> > >     if (getuid() != 0 && home_dir != NULL)
> > >             directory = home_dir;
> > > +
> > > +   /*
> > > +    * Include the name of the host in the config file path. Otherwise,
> > > +    * if DPDK applications run on different hosts but share a home
> > > +    * directory (e.g. via NFS), they will choose the same config
> > > +    * file and conflict unnecessarily.
> > > +    */
> > > +   result = gethostname(nameBuffer, sizeof(nameBuffer)-1);
> > >     snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT,
> > directory,
> > > -                   internal_config.hugefile_prefix);
> > > +                   internal_config.hugefile_prefix,
> > > +           (result == 0) ? nameBuffer : "unknown-host");
> > >     return buffer;
> > >  }
> > >
> > > --
> > > 2.8.3
>
>
  
Ferruh Yigit Jan. 11, 2019, 9:52 p.m. UTC | #11
On 10/14/2016 5:27 PM, ouster at cs.stanford.edu (John Ousterhout) wrote:
> It sounds like my patch would break some existing software, so it probably
> doesn't make sense right now.
> 
> I'd still argue that the current mechanism has a number of problems, and it
> should probably undergo a comprehensive overhaul at some point in the
> future.

Hi John,

It seems there weren't any conclusion on the patch and it is sitting on
patchwork more than two years, I am updating it as rejected, if
it is still relevant please let us know.

Sorry for any inconvenience caused.

For record, patch: https://patches.dpdk.org/patch/16527/

Regards,
ferruh

> 
> -John-
> 
> On Thu, Oct 13, 2016 at 2:39 PM, Tahhan, Maryam <maryam.tahhan at intel.com>
> wrote:
> 
>>> Hi John,
>>>
>>>> Before this patch, DPDK used the file ~/.rte_config as a lock to
>>>> detect potential interference between multiple DPDK applications
>>>> running on the same machine. However, if a single user ran DPDK
>>>> applications concurrently on several different machines, and if the
>>>> user's home directory was shared between the machines via NFS, DPDK
>>>> would incorrectly detect conflicts for all but the first application
>>>> and abort them. This patch fixes the problem by incorporating the
>>>> machine name into the config file name (e.g., ~/.rte_hostname_config).
>>>>
>>>> Signed-off-by: John Ousterhout <ouster at cs.stanford.edu>
>>>> ---
>>>>  doc/guides/prog_guide/multi_proc_support.rst | 11 +++++++----
>>>>  lib/librte_eal/common/eal_common_proc.c      |  8 ++------
>>>>  lib/librte_eal/common/eal_filesystem.h       | 15 +++++++++++++--
>>>>  3 files changed, 22 insertions(+), 12 deletions(-)
>>>>
>>>> diff --git a/doc/guides/prog_guide/multi_proc_support.rst
>>>> b/doc/guides/prog_guide/multi_proc_support.rst
>>>> index badd102..a54fa1c 100644
>>>> --- a/doc/guides/prog_guide/multi_proc_support.rst
>>>> +++ b/doc/guides/prog_guide/multi_proc_support.rst
>>>> @@ -129,10 +129,13 @@ Support for this usage scenario is provided
>>>> using the ``--file-prefix`` paramete
>>>>
>>>>  By default, the EAL creates hugepage files on each hugetlbfs
>>>> filesystem using the rtemap_X filename,  where X is in the range 0 to
>> the
>>> maximum number of hugepages -1.
>>>> -Similarly, it creates shared configuration files, memory mapped in
>>>> each process, using the /var/run/.rte_config filename, -when run as
>>>> root (or $HOME/.rte_config when run as a non-root user; -if filesystem
>> and
>>> device permissions are set up to allow this).
>>>> -The rte part of the filenames of each of the above is configurable
>> using the
>>> file-prefix parameter.
>>>> +Similarly, it creates shared configuration files, memory mapped in
>> each
>>> process.
>>>> +When run as root, the name of the configuration file will be
>>>> +/var/run/.rte_*host*_config, where *host* is the name of the machine.
>>>> +When run as a non-root user, the the name of the configuration file
>>>> +will be $HOME/.rte_*host*_config (if filesystem and device permissions
>>> are set up to allow this).
>>>> +If the ``--file-prefix`` parameter has been specified, its value will
>>>> +be used in place of "rte" in the file names.
>>>
>>> I am not sure that we need to handle all such cases inside EAL.
>>> User can easily overcome that problem by just adding something like:
>>> --file-prefix=`uname -n`
>>> to his command-line.
>>> Konstantin
>>>
>>
>> I agree with Konstantin, there's no need to include the hostname in the
>> rte config file + I'm not sure this will be backward compatible with
>> existing DPDK applications that use a secondary processes that use the
>> config file (as in, multiprocess DPDK applications in use would all need to
>> be updated). What Konstantin suggests fixes the issue you were encountering
>> without breaking backward compatibility.
>> Is addition, the hostname is not unique... you could in theory have 2
>> hosts with the same hostname and encounter the issue you were seeing again.
>>
>>>>
>>>>  In addition to specifying the file-prefix parameter,  any DPDK
>>>> applications that are to be run side-by-side must explicitly limit
>> their
>>> memory use.
>>>> diff --git a/lib/librte_eal/common/eal_common_proc.c
>>>> b/lib/librte_eal/common/eal_common_proc.c
>>>> index 12e0fca..517aa0c 100644
>>>> --- a/lib/librte_eal/common/eal_common_proc.c
>>>> +++ b/lib/librte_eal/common/eal_common_proc.c
>>>> @@ -45,12 +45,8 @@ rte_eal_primary_proc_alive(const char
>>>> *config_file_path)
>>>>
>>>>     if (config_file_path)
>>>>             config_fd = open(config_file_path, O_RDONLY);
>>>> -   else {
>>>> -           char default_path[PATH_MAX+1];
>>>> -           snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
>>>> -                    default_config_dir, "rte");
>>>> -           config_fd = open(default_path, O_RDONLY);
>>>> -   }
>>>> +   else
>>>> +           config_fd = open(eal_runtime_config_path(), O_RDONLY);
>>>>     if (config_fd < 0)
>>>>             return 0;
>>>>
>>>> diff --git a/lib/librte_eal/common/eal_filesystem.h
>>>> b/lib/librte_eal/common/eal_filesystem.h
>>>> index fdb4a70..4929aa3 100644
>>>> --- a/lib/librte_eal/common/eal_filesystem.h
>>>> +++ b/lib/librte_eal/common/eal_filesystem.h
>>>> @@ -41,7 +41,7 @@
>>>>  #define EAL_FILESYSTEM_H
>>>>
>>>>  /** Path of rte config file. */
>>>> -#define RUNTIME_CONFIG_FMT "%s/.%s_config"
>>>> +#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"
>>>>
>>>>  #include <stdint.h>
>>>>  #include <limits.h>
>>>> @@ -59,11 +59,22 @@ eal_runtime_config_path(void)
>>>>     static char buffer[PATH_MAX]; /* static so auto-zeroed */
>>>>     const char *directory = default_config_dir;
>>>>     const char *home_dir = getenv("HOME");
>>>> +   static char nameBuffer[1000];
>>>> +   int result;
>>>>
>>>>     if (getuid() != 0 && home_dir != NULL)
>>>>             directory = home_dir;
>>>> +
>>>> +   /*
>>>> +    * Include the name of the host in the config file path. Otherwise,
>>>> +    * if DPDK applications run on different hosts but share a home
>>>> +    * directory (e.g. via NFS), they will choose the same config
>>>> +    * file and conflict unnecessarily.
>>>> +    */
>>>> +   result = gethostname(nameBuffer, sizeof(nameBuffer)-1);
>>>>     snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT,
>>> directory,
>>>> -                   internal_config.hugefile_prefix);
>>>> +                   internal_config.hugefile_prefix,
>>>> +           (result == 0) ? nameBuffer : "unknown-host");
>>>>     return buffer;
>>>>  }
>>>>
>>>> --
>>>> 2.8.3
>>
>>
>
  

Patch

diff --git a/doc/guides/prog_guide/multi_proc_support.rst b/doc/guides/prog_guide/multi_proc_support.rst
index badd102..a54fa1c 100644
--- a/doc/guides/prog_guide/multi_proc_support.rst
+++ b/doc/guides/prog_guide/multi_proc_support.rst
@@ -129,10 +129,13 @@  Support for this usage scenario is provided using the ``--file-prefix`` paramete
 
 By default, the EAL creates hugepage files on each hugetlbfs filesystem using the rtemap_X filename,
 where X is in the range 0 to the maximum number of hugepages -1.
-Similarly, it creates shared configuration files, memory mapped in each process, using the /var/run/.rte_config filename,
-when run as root (or $HOME/.rte_config when run as a non-root user;
-if filesystem and device permissions are set up to allow this).
-The rte part of the filenames of each of the above is configurable using the file-prefix parameter.
+Similarly, it creates shared configuration files, memory mapped in each process.
+When run as root, the name of the configuration file will be
+/var/run/.rte_*host*_config, where *host* is the name of the machine.
+When run as a non-root user, the the name of the configuration file will be
+$HOME/.rte_*host*_config (if filesystem and device permissions are set up to allow this).
+If the ``--file-prefix`` parameter has been specified, its value will be used
+in place of "rte" in the file names.
 
 In addition to specifying the file-prefix parameter,
 any DPDK applications that are to be run side-by-side must explicitly limit their memory use.
diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c
index 12e0fca..517aa0c 100644
--- a/lib/librte_eal/common/eal_common_proc.c
+++ b/lib/librte_eal/common/eal_common_proc.c
@@ -45,12 +45,8 @@  rte_eal_primary_proc_alive(const char *config_file_path)
 
 	if (config_file_path)
 		config_fd = open(config_file_path, O_RDONLY);
-	else {
-		char default_path[PATH_MAX+1];
-		snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT,
-			 default_config_dir, "rte");
-		config_fd = open(default_path, O_RDONLY);
-	}
+	else
+		config_fd = open(eal_runtime_config_path(), O_RDONLY);
 	if (config_fd < 0)
 		return 0;
 
diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h
index fdb4a70..4929aa3 100644
--- a/lib/librte_eal/common/eal_filesystem.h
+++ b/lib/librte_eal/common/eal_filesystem.h
@@ -41,7 +41,7 @@ 
 #define EAL_FILESYSTEM_H
 
 /** Path of rte config file. */
-#define RUNTIME_CONFIG_FMT "%s/.%s_config"
+#define RUNTIME_CONFIG_FMT "%s/.%s_%s_config"
 
 #include <stdint.h>
 #include <limits.h>
@@ -59,11 +59,22 @@  eal_runtime_config_path(void)
 	static char buffer[PATH_MAX]; /* static so auto-zeroed */
 	const char *directory = default_config_dir;
 	const char *home_dir = getenv("HOME");
+	static char nameBuffer[1000];
+	int result;
 
 	if (getuid() != 0 && home_dir != NULL)
 		directory = home_dir;
+
+	/*
+	 * Include the name of the host in the config file path. Otherwise,
+	 * if DPDK applications run on different hosts but share a home
+	 * directory (e.g. via NFS), they will choose the same config
+	 * file and conflict unnecessarily.
+	 */
+	result = gethostname(nameBuffer, sizeof(nameBuffer)-1);
 	snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory,
-			internal_config.hugefile_prefix);
+			internal_config.hugefile_prefix,
+		(result == 0) ? nameBuffer : "unknown-host");
 	return buffer;
 }