diff mbox series

[v6,1/1] fbarray: fix duplicated fbarray file in secondary

Message ID 20191101090413.17997-2-yasufum.o@gmail.com (mailing list archive)
State Superseded, archived
Delegated to: David Marchand
Headers show
Series fbarray: fix duplicated fbarray file in secondary | expand

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/travis-robot success Travis build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/Intel-compilation success Compilation OK
ci/iol-compilation success Compile Testing PASS
ci/iol-intel-Performance success Performance Testing PASS

Commit Message

Yasufumi Ogawa Nov. 1, 2019, 9:04 a.m. UTC
From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>

In secondary_msl_create_walk(), it creates a file for fbarrays with its
PID for reserving unique name among secondary processes. However, it
does not work if several secondaries run as app containers because each
of containerized secondary has PID 1, and failed to reserve unique name
other than first one. To reserve unique name in each of containers, use
hostname in addition to PID.

Cc: stable@dpdk.org

Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
---
 lib/librte_eal/common/include/rte_fbarray.h |  2 +-
 lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

Comments

Ananyev, Konstantin Nov. 1, 2019, 12:01 p.m. UTC | #1
> 
> From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>
> 
> In secondary_msl_create_walk(), it creates a file for fbarrays with its
> PID for reserving unique name among secondary processes. However, it
> does not work if several secondaries run as app containers because each
> of containerized secondary has PID 1, and failed to reserve unique name
> other than first one. To reserve unique name in each of containers, use
> hostname in addition to PID.
> 
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
> ---
>  lib/librte_eal/common/include/rte_fbarray.h |  2 +-
>  lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
>  2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
> index 6dccdbec9..5c2815093 100644
> --- a/lib/librte_eal/common/include/rte_fbarray.h
> +++ b/lib/librte_eal/common/include/rte_fbarray.h
> @@ -39,7 +39,7 @@ extern "C" {
>  #include <rte_compat.h>
>  #include <rte_rwlock.h>
> 
> -#define RTE_FBARRAY_NAME_LEN 64
> +#define RTE_FBARRAY_NAME_LEN NAME_MAX
> 
>  struct rte_fbarray {
>  	char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
> diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c
> index af6d0d023..24f0275c9 100644
> --- a/lib/librte_eal/linux/eal/eal_memalloc.c
> +++ b/lib/librte_eal/linux/eal/eal_memalloc.c
> @@ -1365,6 +1365,7 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>  	struct rte_memseg_list *primary_msl, *local_msl;
>  	char name[PATH_MAX];
>  	int msl_idx, ret;
> +	char hostname[HOST_NAME_MAX] = { 0 };
> 
>  	if (msl->external)
>  		return 0;
> @@ -1373,9 +1374,13 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>  	primary_msl = &mcfg->memsegs[msl_idx];
>  	local_msl = &local_memsegs[msl_idx];
> 
> -	/* create distinct fbarrays for each secondary */
> -	snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
> -		primary_msl->memseg_arr.name, getpid());
> +	/* Create distinct fbarrays for each secondary by using PID and
> +	 * hostname. The reason why using hostname is because PID could be
> +	 * duplicated among secondaries if it is launched in a container.
> +	 */
> +	gethostname(hostname, HOST_NAME_MAX);
> +	snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
> +			primary_msl->memseg_arr.name, hostname, (int)getpid());
> 
>  	ret = rte_fbarray_init(&local_msl->memseg_arr, name,
>  		primary_msl->memseg_arr.len,
> --

Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>

> 2.17.1
Anatoly Burakov Nov. 4, 2019, 10:20 a.m. UTC | #2
On 01-Nov-19 9:04 AM, yasufum.o@gmail.com wrote:
> From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>
> 
> In secondary_msl_create_walk(), it creates a file for fbarrays with its
> PID for reserving unique name among secondary processes. However, it
> does not work if several secondaries run as app containers because each
> of containerized secondary has PID 1, and failed to reserve unique name
> other than first one. To reserve unique name in each of containers, use
> hostname in addition to PID.
> 
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
> ---
>   lib/librte_eal/common/include/rte_fbarray.h |  2 +-
>   lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
>   2 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
> index 6dccdbec9..5c2815093 100644
> --- a/lib/librte_eal/common/include/rte_fbarray.h
> +++ b/lib/librte_eal/common/include/rte_fbarray.h
> @@ -39,7 +39,7 @@ extern "C" {
>   #include <rte_compat.h>
>   #include <rte_rwlock.h>
>   
> -#define RTE_FBARRAY_NAME_LEN 64
> +#define RTE_FBARRAY_NAME_LEN NAME_MAX
>   
>   struct rte_fbarray {
>   	char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
> diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c
> index af6d0d023..24f0275c9 100644
> --- a/lib/librte_eal/linux/eal/eal_memalloc.c
> +++ b/lib/librte_eal/linux/eal/eal_memalloc.c
> @@ -1365,6 +1365,7 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>   	struct rte_memseg_list *primary_msl, *local_msl;
>   	char name[PATH_MAX];
>   	int msl_idx, ret;
> +	char hostname[HOST_NAME_MAX] = { 0 };
>   
>   	if (msl->external)
>   		return 0;
> @@ -1373,9 +1374,13 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>   	primary_msl = &mcfg->memsegs[msl_idx];
>   	local_msl = &local_memsegs[msl_idx];
>   
> -	/* create distinct fbarrays for each secondary */
> -	snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
> -		primary_msl->memseg_arr.name, getpid());
> +	/* Create distinct fbarrays for each secondary by using PID and
> +	 * hostname. The reason why using hostname is because PID could be
> +	 * duplicated among secondaries if it is launched in a container.
> +	 */
> +	gethostname(hostname, HOST_NAME_MAX);
> +	snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
> +			primary_msl->memseg_arr.name, hostname, (int)getpid());
>   
>   	ret = rte_fbarray_init(&local_msl->memseg_arr, name,
>   		primary_msl->memseg_arr.len,
> 

I think the order should be reversed. Both containers and non-containers 
can have their hostname set, and RTE_FBARRAY_NAME_LEN is of fairly 
limited length, so if the hostname is long enough, the PID never gets 
into the name string, resulting in duplicates. It is better have pid first.

Once that's fixed,

Acked-by: Anatoly Burakov <anatoly.burakov@intel.com>
David Marchand Nov. 5, 2019, 10:13 a.m. UTC | #3
Hello Anatoly, Yasufumi,

On Mon, Nov 4, 2019 at 11:20 AM Burakov, Anatoly
<anatoly.burakov@intel.com> wrote:
>
> On 01-Nov-19 9:04 AM, yasufum.o@gmail.com wrote:
> > From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>
> >
> > In secondary_msl_create_walk(), it creates a file for fbarrays with its
> > PID for reserving unique name among secondary processes. However, it
> > does not work if several secondaries run as app containers because each
> > of containerized secondary has PID 1, and failed to reserve unique name
> > other than first one. To reserve unique name in each of containers, use
> > hostname in addition to PID.
> >
> > Cc: stable@dpdk.org

We can't backport this as is, see below.


> >
> > Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
> > ---
> >   lib/librte_eal/common/include/rte_fbarray.h |  2 +-
> >   lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
> >   2 files changed, 9 insertions(+), 4 deletions(-)
> >
> > diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
> > index 6dccdbec9..5c2815093 100644
> > --- a/lib/librte_eal/common/include/rte_fbarray.h
> > +++ b/lib/librte_eal/common/include/rte_fbarray.h
> > @@ -39,7 +39,7 @@ extern "C" {
> >   #include <rte_compat.h>
> >   #include <rte_rwlock.h>
> >
> > -#define RTE_FBARRAY_NAME_LEN 64
> > +#define RTE_FBARRAY_NAME_LEN NAME_MAX

The change on RTE_FBARRAY_NAME_LEN breaks the ABI, so we cannot
backport this as is.
For 19.11, we can allow this breakage, but we need an update of the
release notes.

Besides, what is the impact in terms of memory consumption?


> >
> >   struct rte_fbarray {
> >       char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
> > diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c
> > index af6d0d023..24f0275c9 100644
> > --- a/lib/librte_eal/linux/eal/eal_memalloc.c
> > +++ b/lib/librte_eal/linux/eal/eal_memalloc.c
> > @@ -1365,6 +1365,7 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
> >       struct rte_memseg_list *primary_msl, *local_msl;
> >       char name[PATH_MAX];
> >       int msl_idx, ret;
> > +     char hostname[HOST_NAME_MAX] = { 0 };
> >
> >       if (msl->external)
> >               return 0;
> > @@ -1373,9 +1374,13 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
> >       primary_msl = &mcfg->memsegs[msl_idx];
> >       local_msl = &local_memsegs[msl_idx];
> >
> > -     /* create distinct fbarrays for each secondary */
> > -     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
> > -             primary_msl->memseg_arr.name, getpid());
> > +     /* Create distinct fbarrays for each secondary by using PID and
> > +      * hostname. The reason why using hostname is because PID could be
> > +      * duplicated among secondaries if it is launched in a container.
> > +      */
> > +     gethostname(hostname, HOST_NAME_MAX);

Personal preference, s/HOST_NAME_MAX/sizeof(hostname)/.


hostname[] is HOST_NAME_MAX bytes long.
In the worst case, we can get a non NULL terminated hostname string.
"
       gethostname() returns the null-terminated hostname in the
character array name, which has a length of len bytes.  If the
null-terminated hostname is too large to fit, then the name is
truncated, and
       no error is returned (but see NOTES below).  POSIX.1-2001 says
that if such truncation occurs, then it is unspecified whether the
returned buffer includes a terminating null byte.
...
NOTES
       SUSv2 guarantees that "Host names are limited to 255 bytes".
POSIX.1-2001 guarantees that "Host names (not including the
terminating null byte) are  limited  to  HOST_NAME_MAX  bytes".   On
Linux,
       HOST_NAME_MAX is defined with the value 64, which has been the
limit since Linux 1.0 (earlier kernels imposed a limit of 8 bytes).
"

How about making hostname[] HOST_NAME_MAX+1 bytes long?

> > +     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
> > +                     primary_msl->memseg_arr.name, hostname, (int)getpid());
> >
> >       ret = rte_fbarray_init(&local_msl->memseg_arr, name,
> >               primary_msl->memseg_arr.len,
> >
>
> I think the order should be reversed. Both containers and non-containers
> can have their hostname set, and RTE_FBARRAY_NAME_LEN is of fairly
> limited length, so if the hostname is long enough, the PID never gets
> into the name string, resulting in duplicates. It is better have pid first.

Anatoly,

On the principle, it seems better, yes.
Just the comment on RTE_FBARRAY_NAME_LEN indicates that you missed the
change at the top of the patch.
What do you think of this change?
Anatoly Burakov Nov. 5, 2019, 11:31 a.m. UTC | #4
On 05-Nov-19 10:13 AM, David Marchand wrote:
> Hello Anatoly, Yasufumi,
> 
> On Mon, Nov 4, 2019 at 11:20 AM Burakov, Anatoly
> <anatoly.burakov@intel.com> wrote:
>>
>> On 01-Nov-19 9:04 AM, yasufum.o@gmail.com wrote:
>>> From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>
>>>
>>> In secondary_msl_create_walk(), it creates a file for fbarrays with its
>>> PID for reserving unique name among secondary processes. However, it
>>> does not work if several secondaries run as app containers because each
>>> of containerized secondary has PID 1, and failed to reserve unique name
>>> other than first one. To reserve unique name in each of containers, use
>>> hostname in addition to PID.
>>>
>>> Cc: stable@dpdk.org
> 
> We can't backport this as is, see below.
> 
> 
>>>
>>> Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
>>> ---
>>>    lib/librte_eal/common/include/rte_fbarray.h |  2 +-
>>>    lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
>>>    2 files changed, 9 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
>>> index 6dccdbec9..5c2815093 100644
>>> --- a/lib/librte_eal/common/include/rte_fbarray.h
>>> +++ b/lib/librte_eal/common/include/rte_fbarray.h
>>> @@ -39,7 +39,7 @@ extern "C" {
>>>    #include <rte_compat.h>
>>>    #include <rte_rwlock.h>
>>>
>>> -#define RTE_FBARRAY_NAME_LEN 64
>>> +#define RTE_FBARRAY_NAME_LEN NAME_MAX
> 
> The change on RTE_FBARRAY_NAME_LEN breaks the ABI, so we cannot
> backport this as is.
> For 19.11, we can allow this breakage, but we need an update of the
> release notes.
> 
> Besides, what is the impact in terms of memory consumption?
> 
> 
>>>
>>>    struct rte_fbarray {
>>>        char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
>>> diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c
>>> index af6d0d023..24f0275c9 100644
>>> --- a/lib/librte_eal/linux/eal/eal_memalloc.c
>>> +++ b/lib/librte_eal/linux/eal/eal_memalloc.c
>>> @@ -1365,6 +1365,7 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>>>        struct rte_memseg_list *primary_msl, *local_msl;
>>>        char name[PATH_MAX];
>>>        int msl_idx, ret;
>>> +     char hostname[HOST_NAME_MAX] = { 0 };
>>>
>>>        if (msl->external)
>>>                return 0;
>>> @@ -1373,9 +1374,13 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>>>        primary_msl = &mcfg->memsegs[msl_idx];
>>>        local_msl = &local_memsegs[msl_idx];
>>>
>>> -     /* create distinct fbarrays for each secondary */
>>> -     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
>>> -             primary_msl->memseg_arr.name, getpid());
>>> +     /* Create distinct fbarrays for each secondary by using PID and
>>> +      * hostname. The reason why using hostname is because PID could be
>>> +      * duplicated among secondaries if it is launched in a container.
>>> +      */
>>> +     gethostname(hostname, HOST_NAME_MAX);
> 
> Personal preference, s/HOST_NAME_MAX/sizeof(hostname)/.
> 
> 
> hostname[] is HOST_NAME_MAX bytes long.
> In the worst case, we can get a non NULL terminated hostname string.
> "
>         gethostname() returns the null-terminated hostname in the
> character array name, which has a length of len bytes.  If the
> null-terminated hostname is too large to fit, then the name is
> truncated, and
>         no error is returned (but see NOTES below).  POSIX.1-2001 says
> that if such truncation occurs, then it is unspecified whether the
> returned buffer includes a terminating null byte.
> ...
> NOTES
>         SUSv2 guarantees that "Host names are limited to 255 bytes".
> POSIX.1-2001 guarantees that "Host names (not including the
> terminating null byte) are  limited  to  HOST_NAME_MAX  bytes".   On
> Linux,
>         HOST_NAME_MAX is defined with the value 64, which has been the
> limit since Linux 1.0 (earlier kernels imposed a limit of 8 bytes).
> "
> 
> How about making hostname[] HOST_NAME_MAX+1 bytes long?
> 
>>> +     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
>>> +                     primary_msl->memseg_arr.name, hostname, (int)getpid());
>>>
>>>        ret = rte_fbarray_init(&local_msl->memseg_arr, name,
>>>                primary_msl->memseg_arr.len,
>>>
>>
>> I think the order should be reversed. Both containers and non-containers
>> can have their hostname set, and RTE_FBARRAY_NAME_LEN is of fairly
>> limited length, so if the hostname is long enough, the PID never gets
>> into the name string, resulting in duplicates. It is better have pid first.
> 
> Anatoly,
> 
> On the principle, it seems better, yes.
> Just the comment on RTE_FBARRAY_NAME_LEN indicates that you missed the
> change at the top of the patch.
> What do you think of this change?
> 

Yes, i did miss that, apologies.

I don't have a strong opinion on this change, however the above comment 
would still be true if we make fbarray size to be hostname_max + 1 - we 
still potentially get no space for a pid. So if we're going to have pid 
in there as well, it should be hostname_max + pid_max (5 digits?) + 
whatever underscores we have + null terminator, to ensure it fits under 
any and all circumstances.

Wrt memory usage, honestly, we don't live in a "640K should be enough 
for everyone" era any more. I don't see this being a major issue. This 
is not a hotpath, and we reserve half a terabyte of virtual memory at 
startup as it is. A few kilo/megabytes more isn't going to make much of 
a difference here.
Ananyev, Konstantin Nov. 5, 2019, 11:41 a.m. UTC | #5
> -----Original Message-----
> From: Burakov, Anatoly <anatoly.burakov@intel.com>
> Sent: Tuesday, November 5, 2019 11:31 AM
> To: David Marchand <david.marchand@redhat.com>; Yasufumi Ogawa <yasufum.o@gmail.com>
> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; dev <dev@dpdk.org>; dpdk stable <stable@dpdk.org>; Yasufumi Ogawa
> <ogawa.yasufumi@lab.ntt.co.jp>
> Subject: Re: [PATCH v6 1/1] fbarray: fix duplicated fbarray file in secondary
> 
> On 05-Nov-19 10:13 AM, David Marchand wrote:
> > Hello Anatoly, Yasufumi,
> >
> > On Mon, Nov 4, 2019 at 11:20 AM Burakov, Anatoly
> > <anatoly.burakov@intel.com> wrote:
> >>
> >> On 01-Nov-19 9:04 AM, yasufum.o@gmail.com wrote:
> >>> From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>
> >>>
> >>> In secondary_msl_create_walk(), it creates a file for fbarrays with its
> >>> PID for reserving unique name among secondary processes. However, it
> >>> does not work if several secondaries run as app containers because each
> >>> of containerized secondary has PID 1, and failed to reserve unique name
> >>> other than first one. To reserve unique name in each of containers, use
> >>> hostname in addition to PID.
> >>>
> >>> Cc: stable@dpdk.org
> >
> > We can't backport this as is, see below.
> >
> >
> >>>
> >>> Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
> >>> ---
> >>>    lib/librte_eal/common/include/rte_fbarray.h |  2 +-
> >>>    lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
> >>>    2 files changed, 9 insertions(+), 4 deletions(-)
> >>>
> >>> diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
> >>> index 6dccdbec9..5c2815093 100644
> >>> --- a/lib/librte_eal/common/include/rte_fbarray.h
> >>> +++ b/lib/librte_eal/common/include/rte_fbarray.h
> >>> @@ -39,7 +39,7 @@ extern "C" {
> >>>    #include <rte_compat.h>
> >>>    #include <rte_rwlock.h>
> >>>
> >>> -#define RTE_FBARRAY_NAME_LEN 64
> >>> +#define RTE_FBARRAY_NAME_LEN NAME_MAX
> >
> > The change on RTE_FBARRAY_NAME_LEN breaks the ABI, so we cannot
> > backport this as is.
> > For 19.11, we can allow this breakage, but we need an update of the
> > release notes.
> >
> > Besides, what is the impact in terms of memory consumption?
> >
> >
> >>>
> >>>    struct rte_fbarray {
> >>>        char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
> >>> diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c
> >>> index af6d0d023..24f0275c9 100644
> >>> --- a/lib/librte_eal/linux/eal/eal_memalloc.c
> >>> +++ b/lib/librte_eal/linux/eal/eal_memalloc.c
> >>> @@ -1365,6 +1365,7 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
> >>>        struct rte_memseg_list *primary_msl, *local_msl;
> >>>        char name[PATH_MAX];
> >>>        int msl_idx, ret;
> >>> +     char hostname[HOST_NAME_MAX] = { 0 };
> >>>
> >>>        if (msl->external)
> >>>                return 0;
> >>> @@ -1373,9 +1374,13 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
> >>>        primary_msl = &mcfg->memsegs[msl_idx];
> >>>        local_msl = &local_memsegs[msl_idx];
> >>>
> >>> -     /* create distinct fbarrays for each secondary */
> >>> -     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
> >>> -             primary_msl->memseg_arr.name, getpid());
> >>> +     /* Create distinct fbarrays for each secondary by using PID and
> >>> +      * hostname. The reason why using hostname is because PID could be
> >>> +      * duplicated among secondaries if it is launched in a container.
> >>> +      */
> >>> +     gethostname(hostname, HOST_NAME_MAX);
> >
> > Personal preference, s/HOST_NAME_MAX/sizeof(hostname)/.
> >
> >
> > hostname[] is HOST_NAME_MAX bytes long.
> > In the worst case, we can get a non NULL terminated hostname string.
> > "
> >         gethostname() returns the null-terminated hostname in the
> > character array name, which has a length of len bytes.  If the
> > null-terminated hostname is too large to fit, then the name is
> > truncated, and
> >         no error is returned (but see NOTES below).  POSIX.1-2001 says
> > that if such truncation occurs, then it is unspecified whether the
> > returned buffer includes a terminating null byte.
> > ...
> > NOTES
> >         SUSv2 guarantees that "Host names are limited to 255 bytes".
> > POSIX.1-2001 guarantees that "Host names (not including the
> > terminating null byte) are  limited  to  HOST_NAME_MAX  bytes".   On
> > Linux,
> >         HOST_NAME_MAX is defined with the value 64, which has been the
> > limit since Linux 1.0 (earlier kernels imposed a limit of 8 bytes).
> > "
> >
> > How about making hostname[] HOST_NAME_MAX+1 bytes long?
> >
> >>> +     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
> >>> +                     primary_msl->memseg_arr.name, hostname, (int)getpid());
> >>>
> >>>        ret = rte_fbarray_init(&local_msl->memseg_arr, name,
> >>>                primary_msl->memseg_arr.len,
> >>>
> >>
> >> I think the order should be reversed. Both containers and non-containers
> >> can have their hostname set, and RTE_FBARRAY_NAME_LEN is of fairly
> >> limited length, so if the hostname is long enough, the PID never gets
> >> into the name string, resulting in duplicates. It is better have pid first.
> >
> > Anatoly,
> >
> > On the principle, it seems better, yes.
> > Just the comment on RTE_FBARRAY_NAME_LEN indicates that you missed the
> > change at the top of the patch.
> > What do you think of this change?
> >
> 
> Yes, i did miss that, apologies.
> 
> I don't have a strong opinion on this change, however the above comment
> would still be true if we make fbarray size to be hostname_max + 1 - we
> still potentially get no space for a pid. So if we're going to have pid
> in there as well, it should be hostname_max + pid_max (5 digits?) +
> whatever underscores we have + null terminator, to ensure it fits under
> any and all circumstances.#

I think that at least on linux we have more than enough space here:

$ find /usr/include -type f | xargs grep ' NAME_MAX' | grep define
/usr/include/linux/limits.h:#define NAME_MAX         255        /* # chars in a file name */

$ find /usr/include -type f | xargs grep ' HOST_NAME_MAX' | grep define
/usr/include/i386-linux-gnu/bits/local_lim.h:#define HOST_NAME_MAX             64
/usr/include/x86_64-linux-gnu/bits/local_lim.h:#define HOST_NAME_MAX           64

> 
> Wrt memory usage, honestly, we don't live in a "640K should be enough
> for everyone" era any more. I don't see this being a major issue. This
> is not a hotpath, and we reserve half a terabyte of virtual memory at
> startup as it is. A few kilo/megabytes more isn't going to make much of
> a difference here.
> 
> --
> Thanks,
> Anatoly
Anatoly Burakov Nov. 6, 2019, 10:37 a.m. UTC | #6
On 05-Nov-19 11:41 AM, Ananyev, Konstantin wrote:
> 
> 
>> -----Original Message-----
>> From: Burakov, Anatoly <anatoly.burakov@intel.com>
>> Sent: Tuesday, November 5, 2019 11:31 AM
>> To: David Marchand <david.marchand@redhat.com>; Yasufumi Ogawa <yasufum.o@gmail.com>
>> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; dev <dev@dpdk.org>; dpdk stable <stable@dpdk.org>; Yasufumi Ogawa
>> <ogawa.yasufumi@lab.ntt.co.jp>
>> Subject: Re: [PATCH v6 1/1] fbarray: fix duplicated fbarray file in secondary
>>
>> On 05-Nov-19 10:13 AM, David Marchand wrote:
>>> Hello Anatoly, Yasufumi,
>>>
>>> On Mon, Nov 4, 2019 at 11:20 AM Burakov, Anatoly
>>> <anatoly.burakov@intel.com> wrote:
>>>>
>>>> On 01-Nov-19 9:04 AM, yasufum.o@gmail.com wrote:
>>>>> From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>
>>>>>
>>>>> In secondary_msl_create_walk(), it creates a file for fbarrays with its
>>>>> PID for reserving unique name among secondary processes. However, it
>>>>> does not work if several secondaries run as app containers because each
>>>>> of containerized secondary has PID 1, and failed to reserve unique name
>>>>> other than first one. To reserve unique name in each of containers, use
>>>>> hostname in addition to PID.
>>>>>
>>>>> Cc: stable@dpdk.org
>>>
>>> We can't backport this as is, see below.
>>>
>>>
>>>>>
>>>>> Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
>>>>> ---
>>>>>     lib/librte_eal/common/include/rte_fbarray.h |  2 +-
>>>>>     lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
>>>>>     2 files changed, 9 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
>>>>> index 6dccdbec9..5c2815093 100644
>>>>> --- a/lib/librte_eal/common/include/rte_fbarray.h
>>>>> +++ b/lib/librte_eal/common/include/rte_fbarray.h
>>>>> @@ -39,7 +39,7 @@ extern "C" {
>>>>>     #include <rte_compat.h>
>>>>>     #include <rte_rwlock.h>
>>>>>
>>>>> -#define RTE_FBARRAY_NAME_LEN 64
>>>>> +#define RTE_FBARRAY_NAME_LEN NAME_MAX
>>>
>>> The change on RTE_FBARRAY_NAME_LEN breaks the ABI, so we cannot
>>> backport this as is.
>>> For 19.11, we can allow this breakage, but we need an update of the
>>> release notes.
>>>
>>> Besides, what is the impact in terms of memory consumption?
>>>
>>>
>>>>>
>>>>>     struct rte_fbarray {
>>>>>         char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
>>>>> diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c
>>>>> index af6d0d023..24f0275c9 100644
>>>>> --- a/lib/librte_eal/linux/eal/eal_memalloc.c
>>>>> +++ b/lib/librte_eal/linux/eal/eal_memalloc.c
>>>>> @@ -1365,6 +1365,7 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>>>>>         struct rte_memseg_list *primary_msl, *local_msl;
>>>>>         char name[PATH_MAX];
>>>>>         int msl_idx, ret;
>>>>> +     char hostname[HOST_NAME_MAX] = { 0 };
>>>>>
>>>>>         if (msl->external)
>>>>>                 return 0;
>>>>> @@ -1373,9 +1374,13 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
>>>>>         primary_msl = &mcfg->memsegs[msl_idx];
>>>>>         local_msl = &local_memsegs[msl_idx];
>>>>>
>>>>> -     /* create distinct fbarrays for each secondary */
>>>>> -     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
>>>>> -             primary_msl->memseg_arr.name, getpid());
>>>>> +     /* Create distinct fbarrays for each secondary by using PID and
>>>>> +      * hostname. The reason why using hostname is because PID could be
>>>>> +      * duplicated among secondaries if it is launched in a container.
>>>>> +      */
>>>>> +     gethostname(hostname, HOST_NAME_MAX);
>>>
>>> Personal preference, s/HOST_NAME_MAX/sizeof(hostname)/.
>>>
>>>
>>> hostname[] is HOST_NAME_MAX bytes long.
>>> In the worst case, we can get a non NULL terminated hostname string.
>>> "
>>>          gethostname() returns the null-terminated hostname in the
>>> character array name, which has a length of len bytes.  If the
>>> null-terminated hostname is too large to fit, then the name is
>>> truncated, and
>>>          no error is returned (but see NOTES below).  POSIX.1-2001 says
>>> that if such truncation occurs, then it is unspecified whether the
>>> returned buffer includes a terminating null byte.
>>> ...
>>> NOTES
>>>          SUSv2 guarantees that "Host names are limited to 255 bytes".
>>> POSIX.1-2001 guarantees that "Host names (not including the
>>> terminating null byte) are  limited  to  HOST_NAME_MAX  bytes".   On
>>> Linux,
>>>          HOST_NAME_MAX is defined with the value 64, which has been the
>>> limit since Linux 1.0 (earlier kernels imposed a limit of 8 bytes).
>>> "
>>>
>>> How about making hostname[] HOST_NAME_MAX+1 bytes long?
>>>
>>>>> +     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
>>>>> +                     primary_msl->memseg_arr.name, hostname, (int)getpid());
>>>>>
>>>>>         ret = rte_fbarray_init(&local_msl->memseg_arr, name,
>>>>>                 primary_msl->memseg_arr.len,
>>>>>
>>>>
>>>> I think the order should be reversed. Both containers and non-containers
>>>> can have their hostname set, and RTE_FBARRAY_NAME_LEN is of fairly
>>>> limited length, so if the hostname is long enough, the PID never gets
>>>> into the name string, resulting in duplicates. It is better have pid first.
>>>
>>> Anatoly,
>>>
>>> On the principle, it seems better, yes.
>>> Just the comment on RTE_FBARRAY_NAME_LEN indicates that you missed the
>>> change at the top of the patch.
>>> What do you think of this change?
>>>
>>
>> Yes, i did miss that, apologies.
>>
>> I don't have a strong opinion on this change, however the above comment
>> would still be true if we make fbarray size to be hostname_max + 1 - we
>> still potentially get no space for a pid. So if we're going to have pid
>> in there as well, it should be hostname_max + pid_max (5 digits?) +
>> whatever underscores we have + null terminator, to ensure it fits under
>> any and all circumstances.#
> 
> I think that at least on linux we have more than enough space here:
> 
> $ find /usr/include -type f | xargs grep ' NAME_MAX' | grep define
> /usr/include/linux/limits.h:#define NAME_MAX         255        /* # chars in a file name */
> 
> $ find /usr/include -type f | xargs grep ' HOST_NAME_MAX' | grep define
> /usr/include/i386-linux-gnu/bits/local_lim.h:#define HOST_NAME_MAX             64
> /usr/include/x86_64-linux-gnu/bits/local_lim.h:#define HOST_NAME_MAX           64
> 

Okay, works for me :)
Yasufumi Ogawa Nov. 8, 2019, 3:19 a.m. UTC | #7
>>> -----Original Message-----
>>> From: Burakov, Anatoly <anatoly.burakov@intel.com>
>>> Sent: Tuesday, November 5, 2019 11:31 AM
>>> To: David Marchand <david.marchand@redhat.com>; Yasufumi Ogawa 
>>> <yasufum.o@gmail.com>
>>> Cc: Ananyev, Konstantin <konstantin.ananyev@intel.com>; dev 
>>> <dev@dpdk.org>; dpdk stable <stable@dpdk.org>; Yasufumi Ogawa
>>> <ogawa.yasufumi@lab.ntt.co.jp>
>>> Subject: Re: [PATCH v6 1/1] fbarray: fix duplicated fbarray file in 
>>> secondary
>>>
>>> On 05-Nov-19 10:13 AM, David Marchand wrote:
>>>> Hello Anatoly, Yasufumi,
>>>>
>>>> On Mon, Nov 4, 2019 at 11:20 AM Burakov, Anatoly
>>>> <anatoly.burakov@intel.com> wrote:
>>>>>
>>>>> On 01-Nov-19 9:04 AM, yasufum.o@gmail.com wrote:
>>>>>> From: Yasufumi Ogawa <ogawa.yasufumi@lab.ntt.co.jp>
>>>>>>
>>>>>> In secondary_msl_create_walk(), it creates a file for fbarrays 
>>>>>> with its
>>>>>> PID for reserving unique name among secondary processes. However, it
>>>>>> does not work if several secondaries run as app containers because 
>>>>>> each
>>>>>> of containerized secondary has PID 1, and failed to reserve unique 
>>>>>> name
>>>>>> other than first one. To reserve unique name in each of 
>>>>>> containers, use
>>>>>> hostname in addition to PID.
>>>>>>
>>>>>> Cc: stable@dpdk.org
>>>>
>>>> We can't backport this as is, see below.
>>>>
>>>>
>>>>>>
>>>>>> Signed-off-by: Yasufumi Ogawa <yasufum.o@gmail.com>
>>>>>> ---
>>>>>>     lib/librte_eal/common/include/rte_fbarray.h |  2 +-
>>>>>>     lib/librte_eal/linux/eal/eal_memalloc.c     | 11 ++++++++---
>>>>>>     2 files changed, 9 insertions(+), 4 deletions(-)
>>>>>>
>>>>>> diff --git a/lib/librte_eal/common/include/rte_fbarray.h 
>>>>>> b/lib/librte_eal/common/include/rte_fbarray.h
>>>>>> index 6dccdbec9..5c2815093 100644
>>>>>> --- a/lib/librte_eal/common/include/rte_fbarray.h
>>>>>> +++ b/lib/librte_eal/common/include/rte_fbarray.h
>>>>>> @@ -39,7 +39,7 @@ extern "C" {
>>>>>>     #include <rte_compat.h>
>>>>>>     #include <rte_rwlock.h>
>>>>>>
>>>>>> -#define RTE_FBARRAY_NAME_LEN 64
>>>>>> +#define RTE_FBARRAY_NAME_LEN NAME_MAX
>>>>
>>>> The change on RTE_FBARRAY_NAME_LEN breaks the ABI, so we cannot
>>>> backport this as is.
>>>> For 19.11, we can allow this breakage, but we need an update of the
>>>> release notes.
OK. I wasn't careful for the ABI change. I think RTE_FBARRAY_NAME_LEN 64 
is enough without using hostname as a part of fbarray. So I would like 
to discard this change.

>>>>
>>>> Besides, what is the impact in terms of memory consumption?
>>>>
>>>>
>>>>>>
>>>>>>     struct rte_fbarray {
>>>>>>         char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with 
>>>>>> an array */
>>>>>> diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c 
>>>>>> b/lib/librte_eal/linux/eal/eal_memalloc.c
>>>>>> index af6d0d023..24f0275c9 100644
>>>>>> --- a/lib/librte_eal/linux/eal/eal_memalloc.c
>>>>>> +++ b/lib/librte_eal/linux/eal/eal_memalloc.c
>>>>>> @@ -1365,6 +1365,7 @@ secondary_msl_create_walk(const struct 
>>>>>> rte_memseg_list *msl,
>>>>>>         struct rte_memseg_list *primary_msl, *local_msl;
>>>>>>         char name[PATH_MAX];
>>>>>>         int msl_idx, ret;
>>>>>> +     char hostname[HOST_NAME_MAX] = { 0 };
>>>>>>
>>>>>>         if (msl->external)
>>>>>>                 return 0;
>>>>>> @@ -1373,9 +1374,13 @@ secondary_msl_create_walk(const struct 
>>>>>> rte_memseg_list *msl,
>>>>>>         primary_msl = &mcfg->memsegs[msl_idx];
>>>>>>         local_msl = &local_memsegs[msl_idx];
>>>>>>
>>>>>> -     /* create distinct fbarrays for each secondary */
>>>>>> -     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
>>>>>> -             primary_msl->memseg_arr.name, getpid());
>>>>>> +     /* Create distinct fbarrays for each secondary by using PID and
>>>>>> +      * hostname. The reason why using hostname is because PID 
>>>>>> could be
>>>>>> +      * duplicated among secondaries if it is launched in a 
>>>>>> container.
>>>>>> +      */
>>>>>> +     gethostname(hostname, HOST_NAME_MAX);
>>>>
>>>> Personal preference, s/HOST_NAME_MAX/sizeof(hostname)/.
>>>>
>>>>
>>>> hostname[] is HOST_NAME_MAX bytes long.
>>>> In the worst case, we can get a non NULL terminated hostname string.
>>>> "
>>>>          gethostname() returns the null-terminated hostname in the
>>>> character array name, which has a length of len bytes.  If the
>>>> null-terminated hostname is too large to fit, then the name is
>>>> truncated, and
>>>>          no error is returned (but see NOTES below).  POSIX.1-2001 says
>>>> that if such truncation occurs, then it is unspecified whether the
>>>> returned buffer includes a terminating null byte.
>>>> ...
>>>> NOTES
>>>>          SUSv2 guarantees that "Host names are limited to 255 bytes".
>>>> POSIX.1-2001 guarantees that "Host names (not including the
>>>> terminating null byte) are  limited  to  HOST_NAME_MAX  bytes".   On
>>>> Linux,
>>>>          HOST_NAME_MAX is defined with the value 64, which has been the
>>>> limit since Linux 1.0 (earlier kernels imposed a limit of 8 bytes).
>>>> "
>>>>
>>>> How about making hostname[] HOST_NAME_MAX+1 bytes long?
>>>>
>>>>>> +     snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
>>>>>> +                     primary_msl->memseg_arr.name, hostname, 
>>>>>> (int)getpid());
>>>>>>
>>>>>>         ret = rte_fbarray_init(&local_msl->memseg_arr, name,
>>>>>>                 primary_msl->memseg_arr.len,
>>>>>>
>>>>>
>>>>> I think the order should be reversed. Both containers and 
>>>>> non-containers
>>>>> can have their hostname set, and RTE_FBARRAY_NAME_LEN is of fairly
>>>>> limited length, so if the hostname is long enough, the PID never gets
>>>>> into the name string, resulting in duplicates. It is better have 
>>>>> pid first.
>>>>
>>>> Anatoly,
>>>>
>>>> On the principle, it seems better, yes.
>>>> Just the comment on RTE_FBARRAY_NAME_LEN indicates that you missed the
>>>> change at the top of the patch.
>>>> What do you think of this change?
>>>>
>>>
>>> Yes, i did miss that, apologies.
>>>
>>> I don't have a strong opinion on this change, however the above comment
>>> would still be true if we make fbarray size to be hostname_max + 1 - we
>>> still potentially get no space for a pid. So if we're going to have pid
>>> in there as well, it should be hostname_max + pid_max (5 digits?) +
>>> whatever underscores we have + null terminator, to ensure it fits under
>>> any and all circumstances.#
In "man 5 proc", it says the default value of pid_max is 32768, but can 
be set up to 2^22 (=4194304) on 64-bit systems. So, I think it is safer 
to consider pid_max is 7 digits.

I can find secondary's fbarray file named as
$ sudo ls /var/run/dpdk/rte/
...
fbarray_memseg-1048576k-0-0_24118
fbarray_memseg-1048576k-0-0_24191
fbarray_memseg-1048576k-0-0_24199
...

It consists of [prefix]-[hugepage size]-[numa node?]-[memchan?]_[PID]", 
and size of the name before PID is totally 28 digits. If another 
underscore and hostname are included in the name, the max size of 
fbarray file name is
   28 + 7(PID) + 1(underscore) + HOST_NAME_MAX+1(null terminator)

Considering 28 can be larger, how about using 32 as following?
+     int fbarray_sec_name_len = 32 + 7 + 1 + HOST_NAME_MAX + 1;

+     snprintf(name, fbarray_sec_name_len, "%s_%d_%s",
+                     primary_msl->memseg_arr.name, getpid(), hostname);

>>
>> I think that at least on linux we have more than enough space here:
>>
>> $ find /usr/include -type f | xargs grep ' NAME_MAX' | grep define
>> /usr/include/linux/limits.h:#define NAME_MAX         255        /* # 
>> chars in a file name */
>>
>> $ find /usr/include -type f | xargs grep ' HOST_NAME_MAX' | grep define
>> /usr/include/i386-linux-gnu/bits/local_lim.h:#define 
>> HOST_NAME_MAX             64
>> /usr/include/x86_64-linux-gnu/bits/local_lim.h:#define 
>> HOST_NAME_MAX           64
>>
> 
> Okay, works for me :)

Thanks,
Yasufumi
diff mbox series

Patch

diff --git a/lib/librte_eal/common/include/rte_fbarray.h b/lib/librte_eal/common/include/rte_fbarray.h
index 6dccdbec9..5c2815093 100644
--- a/lib/librte_eal/common/include/rte_fbarray.h
+++ b/lib/librte_eal/common/include/rte_fbarray.h
@@ -39,7 +39,7 @@  extern "C" {
 #include <rte_compat.h>
 #include <rte_rwlock.h>
 
-#define RTE_FBARRAY_NAME_LEN 64
+#define RTE_FBARRAY_NAME_LEN NAME_MAX
 
 struct rte_fbarray {
 	char name[RTE_FBARRAY_NAME_LEN]; /**< name associated with an array */
diff --git a/lib/librte_eal/linux/eal/eal_memalloc.c b/lib/librte_eal/linux/eal/eal_memalloc.c
index af6d0d023..24f0275c9 100644
--- a/lib/librte_eal/linux/eal/eal_memalloc.c
+++ b/lib/librte_eal/linux/eal/eal_memalloc.c
@@ -1365,6 +1365,7 @@  secondary_msl_create_walk(const struct rte_memseg_list *msl,
 	struct rte_memseg_list *primary_msl, *local_msl;
 	char name[PATH_MAX];
 	int msl_idx, ret;
+	char hostname[HOST_NAME_MAX] = { 0 };
 
 	if (msl->external)
 		return 0;
@@ -1373,9 +1374,13 @@  secondary_msl_create_walk(const struct rte_memseg_list *msl,
 	primary_msl = &mcfg->memsegs[msl_idx];
 	local_msl = &local_memsegs[msl_idx];
 
-	/* create distinct fbarrays for each secondary */
-	snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%i",
-		primary_msl->memseg_arr.name, getpid());
+	/* Create distinct fbarrays for each secondary by using PID and
+	 * hostname. The reason why using hostname is because PID could be
+	 * duplicated among secondaries if it is launched in a container.
+	 */
+	gethostname(hostname, HOST_NAME_MAX);
+	snprintf(name, RTE_FBARRAY_NAME_LEN, "%s_%s_%d",
+			primary_msl->memseg_arr.name, hostname, (int)getpid());
 
 	ret = rte_fbarray_init(&local_msl->memseg_arr, name,
 		primary_msl->memseg_arr.len,