[v2] eal: use madvise to exclude unmapped memory from being dumped
Checks
Commit Message
Currently, even though memory is mapped with PROT_NONE, this does not
cause it to be excluded from core dumps. This is counter-productive,
because in a lot of cases, this memory will go unused (e.g. when the
memory subsystem preallocates VA space but hasn't yet mapped physical
pages into it).
Use `madvise()` call with MADV_DONTDUMP parameter to exclude the
unmapped memory from being dumped.
Signed-off-by: Li Feng <fengli@smartx.com>
---
V2:
- add support for freebsd.
- when free_seg is called, mark the memory MADV_DONTDUMP.
- when alloc_seg is called, mark the memory MADV_DODUMP.
lib/librte_eal/common/eal_common_memory.c | 26 ++++++++++++++++++++++++++
lib/librte_eal/linux/eal_memalloc.c | 6 ++++++
2 files changed, 32 insertions(+)
Comments
On 24-Apr-20 11:50 AM, Li Feng wrote:
> Currently, even though memory is mapped with PROT_NONE, this does not
> cause it to be excluded from core dumps. This is counter-productive,
> because in a lot of cases, this memory will go unused (e.g. when the
> memory subsystem preallocates VA space but hasn't yet mapped physical
> pages into it).
>
> Use `madvise()` call with MADV_DONTDUMP parameter to exclude the
> unmapped memory from being dumped.
>
> Signed-off-by: Li Feng <fengli@smartx.com>
> ---
> V2:
> - add support for freebsd.
> - when free_seg is called, mark the memory MADV_DONTDUMP.
> - when alloc_seg is called, mark the memory MADV_DODUMP.
Isn't this v3 now?
>
> lib/librte_eal/common/eal_common_memory.c | 26 ++++++++++++++++++++++++++
> lib/librte_eal/linux/eal_memalloc.c | 6 ++++++
> 2 files changed, 32 insertions(+)
>
> diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
> index cc7d54e0c..83be94a20 100644
> --- a/lib/librte_eal/common/eal_common_memory.c
> +++ b/lib/librte_eal/common/eal_common_memory.c
> @@ -177,6 +177,32 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
> after_len = RTE_PTR_DIFF(map_end, aligned_end);
> if (after_len > 0)
> munmap(aligned_end, after_len);
> +
> + /*
> + * Exclude this pages from a core dump.
> + */
> +#ifdef RTE_EXEC_ENV_LINUX
> + if (madvise(aligned_addr, *size, MADV_DONTDUMP) != 0)
> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> + strerror(errno));
> +#elif RTE_EXEC_ENV_FREEBSD
> + if (madvise(aligned_addr, *size, MADV_NOCORE) != 0)
> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> + strerror(errno));
> +#endif
> + } else {
> + /*
> + * Exclude this pages from a core dump.
> + */
> +#ifdef RTE_EXEC_ENV_LINUX
> + if (madvise(mapped_addr, map_sz, MADV_DONTDUMP) != 0)
> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> + strerror(errno));
> +#elif RTE_EXEC_ENV_FREEBSD
> + if (madvise(mapped_addr, map_sz, MADV_NOCORE) != 0)
> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> + strerror(errno));
> +#endif
> }
DRY (Don't Repeat Yourself) :) Probably easier to do "if (!unmap) ..."
than putting this into two places.
>
> return aligned_addr;
> diff --git a/lib/librte_eal/linux/eal_memalloc.c b/lib/librte_eal/linux/eal_memalloc.c
> index af6d0d023..9d2a6fc6f 100644
> --- a/lib/librte_eal/linux/eal_memalloc.c
> +++ b/lib/librte_eal/linux/eal_memalloc.c
> @@ -571,6 +571,9 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
> goto resized;
> }
>
> + if (madvise(addr, alloc_sz, MADV_DODUMP) != 0)
> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
> +
See discussion for v2, i believe this is unnecessary.
> /* In linux, hugetlb limitations, like cgroup, are
> * enforced at fault time instead of mmap(), even
> * with the option of MAP_POPULATE. Kernel will send
> @@ -687,6 +690,9 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
> return -1;
> }
>
> + if (madvise(ms->addr, ms->len, MADV_DONTDUMP) != 0)
> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
> +
> exit_early = false;
>
> /* if we're using anonymous hugepages, nothing to be done */
>
On Fri, Apr 24, 2020 at 12:23:44PM +0100, Burakov, Anatoly wrote:
> On 24-Apr-20 11:50 AM, Li Feng wrote:
> > Currently, even though memory is mapped with PROT_NONE, this does not
> > cause it to be excluded from core dumps. This is counter-productive,
> > because in a lot of cases, this memory will go unused (e.g. when the
> > memory subsystem preallocates VA space but hasn't yet mapped physical
> > pages into it).
> >
> > Use `madvise()` call with MADV_DONTDUMP parameter to exclude the
> > unmapped memory from being dumped.
> >
> > Signed-off-by: Li Feng <fengli@smartx.com>
> > ---
> > V2:
> > - add support for freebsd.
> > - when free_seg is called, mark the memory MADV_DONTDUMP.
> > - when alloc_seg is called, mark the memory MADV_DODUMP.
>
> Isn't this v3 now?
>
> >
> > lib/librte_eal/common/eal_common_memory.c | 26 ++++++++++++++++++++++++++
> > lib/librte_eal/linux/eal_memalloc.c | 6 ++++++
> > 2 files changed, 32 insertions(+)
> >
> > diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
> > index cc7d54e0c..83be94a20 100644
> > --- a/lib/librte_eal/common/eal_common_memory.c
> > +++ b/lib/librte_eal/common/eal_common_memory.c
> > @@ -177,6 +177,32 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
> > after_len = RTE_PTR_DIFF(map_end, aligned_end);
> > if (after_len > 0)
> > munmap(aligned_end, after_len);
> > +
> > + /*
> > + * Exclude this pages from a core dump.
> > + */
> > +#ifdef RTE_EXEC_ENV_LINUX
> > + if (madvise(aligned_addr, *size, MADV_DONTDUMP) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#elif RTE_EXEC_ENV_FREEBSD
> > + if (madvise(aligned_addr, *size, MADV_NOCORE) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#endif
> > + } else {
> > + /*
> > + * Exclude this pages from a core dump.
> > + */
> > +#ifdef RTE_EXEC_ENV_LINUX
> > + if (madvise(mapped_addr, map_sz, MADV_DONTDUMP) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#elif RTE_EXEC_ENV_FREEBSD
> > + if (madvise(mapped_addr, map_sz, MADV_NOCORE) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#endif
> > }
>
> DRY (Don't Repeat Yourself) :) Probably easier to do "if (!unmap) ..." than
> putting this into two places.
>
Can I also suggest putting this at the top of the file:
#ifdef RTE_EXEC_ENV_LINUX
#define RTE_DONTDUMP MADV_DONTDUMP
#elif RTE_EXEC_ENV_FREEBSD
#define RTE_DONTDUMP MADV_NOCORE
#else
#error ....
#endif
and thereafter using RTE_DONTDUMP flag and avoiding any #ifdefs inline in
the code.
On 24-Apr-20 12:36 PM, Bruce Richardson wrote:
> On Fri, Apr 24, 2020 at 12:23:44PM +0100, Burakov, Anatoly wrote:
>> On 24-Apr-20 11:50 AM, Li Feng wrote:
>>> Currently, even though memory is mapped with PROT_NONE, this does not
>>> cause it to be excluded from core dumps. This is counter-productive,
>>> because in a lot of cases, this memory will go unused (e.g. when the
>>> memory subsystem preallocates VA space but hasn't yet mapped physical
>>> pages into it).
>>>
>>> Use `madvise()` call with MADV_DONTDUMP parameter to exclude the
>>> unmapped memory from being dumped.
>>>
>>> Signed-off-by: Li Feng <fengli@smartx.com>
>>> ---
>>> V2:
>>> - add support for freebsd.
>>> - when free_seg is called, mark the memory MADV_DONTDUMP.
>>> - when alloc_seg is called, mark the memory MADV_DODUMP.
>>
>> Isn't this v3 now?
>>
>>>
>>> lib/librte_eal/common/eal_common_memory.c | 26 ++++++++++++++++++++++++++
>>> lib/librte_eal/linux/eal_memalloc.c | 6 ++++++
>>> 2 files changed, 32 insertions(+)
>>>
>>> diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
>>> index cc7d54e0c..83be94a20 100644
>>> --- a/lib/librte_eal/common/eal_common_memory.c
>>> +++ b/lib/librte_eal/common/eal_common_memory.c
>>> @@ -177,6 +177,32 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
>>> after_len = RTE_PTR_DIFF(map_end, aligned_end);
>>> if (after_len > 0)
>>> munmap(aligned_end, after_len);
>>> +
>>> + /*
>>> + * Exclude this pages from a core dump.
>>> + */
>>> +#ifdef RTE_EXEC_ENV_LINUX
>>> + if (madvise(aligned_addr, *size, MADV_DONTDUMP) != 0)
>>> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
>>> + strerror(errno));
>>> +#elif RTE_EXEC_ENV_FREEBSD
>>> + if (madvise(aligned_addr, *size, MADV_NOCORE) != 0)
>>> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
>>> + strerror(errno));
>>> +#endif
>>> + } else {
>>> + /*
>>> + * Exclude this pages from a core dump.
>>> + */
>>> +#ifdef RTE_EXEC_ENV_LINUX
>>> + if (madvise(mapped_addr, map_sz, MADV_DONTDUMP) != 0)
>>> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
>>> + strerror(errno));
>>> +#elif RTE_EXEC_ENV_FREEBSD
>>> + if (madvise(mapped_addr, map_sz, MADV_NOCORE) != 0)
>>> + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
>>> + strerror(errno));
>>> +#endif
>>> }
>>
>> DRY (Don't Repeat Yourself) :) Probably easier to do "if (!unmap) ..." than
>> putting this into two places.
>>
> Can I also suggest putting this at the top of the file:
> #ifdef RTE_EXEC_ENV_LINUX
> #define RTE_DONTDUMP MADV_DONTDUMP
> #elif RTE_EXEC_ENV_FREEBSD
> #define RTE_DONTDUMP MADV_NOCORE
> #else
> #error ....
> #endif
>
> and thereafter using RTE_DONTDUMP flag and avoiding any #ifdefs inline in
> the code.
>
+1
Thanks,
Feng Li
Burakov, Anatoly <anatoly.burakov@intel.com> 于2020年4月24日周五 下午7:23写道:
>
> On 24-Apr-20 11:50 AM, Li Feng wrote:
> > Currently, even though memory is mapped with PROT_NONE, this does not
> > cause it to be excluded from core dumps. This is counter-productive,
> > because in a lot of cases, this memory will go unused (e.g. when the
> > memory subsystem preallocates VA space but hasn't yet mapped physical
> > pages into it).
> >
> > Use `madvise()` call with MADV_DONTDUMP parameter to exclude the
> > unmapped memory from being dumped.
> >
> > Signed-off-by: Li Feng <fengli@smartx.com>
> > ---
> > V2:
> > - add support for freebsd.
> > - when free_seg is called, mark the memory MADV_DONTDUMP.
> > - when alloc_seg is called, mark the memory MADV_DODUMP.
>
> Isn't this v3 now?
My mistake. I will using v4 for next version.
>
> >
> > lib/librte_eal/common/eal_common_memory.c | 26 ++++++++++++++++++++++++++
> > lib/librte_eal/linux/eal_memalloc.c | 6 ++++++
> > 2 files changed, 32 insertions(+)
> >
> > diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
> > index cc7d54e0c..83be94a20 100644
> > --- a/lib/librte_eal/common/eal_common_memory.c
> > +++ b/lib/librte_eal/common/eal_common_memory.c
> > @@ -177,6 +177,32 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
> > after_len = RTE_PTR_DIFF(map_end, aligned_end);
> > if (after_len > 0)
> > munmap(aligned_end, after_len);
> > +
> > + /*
> > + * Exclude this pages from a core dump.
> > + */
> > +#ifdef RTE_EXEC_ENV_LINUX
> > + if (madvise(aligned_addr, *size, MADV_DONTDUMP) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#elif RTE_EXEC_ENV_FREEBSD
> > + if (madvise(aligned_addr, *size, MADV_NOCORE) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#endif
> > + } else {
> > + /*
> > + * Exclude this pages from a core dump.
> > + */
> > +#ifdef RTE_EXEC_ENV_LINUX
> > + if (madvise(mapped_addr, map_sz, MADV_DONTDUMP) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#elif RTE_EXEC_ENV_FREEBSD
> > + if (madvise(mapped_addr, map_sz, MADV_NOCORE) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
> > + strerror(errno));
> > +#endif
> > }
>
> DRY (Don't Repeat Yourself) :) Probably easier to do "if (!unmap) ..."
> than putting this into two places.
OK. I put this into two places, because the size is more accurate in this place.
>
> >
> > return aligned_addr;
> > diff --git a/lib/librte_eal/linux/eal_memalloc.c b/lib/librte_eal/linux/eal_memalloc.c
> > index af6d0d023..9d2a6fc6f 100644
> > --- a/lib/librte_eal/linux/eal_memalloc.c
> > +++ b/lib/librte_eal/linux/eal_memalloc.c
> > @@ -571,6 +571,9 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
> > goto resized;
> > }
> >
> > + if (madvise(addr, alloc_sz, MADV_DODUMP) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
> > +
>
> See discussion for v2, i believe this is unnecessary.
>
> > /* In linux, hugetlb limitations, like cgroup, are
> > * enforced at fault time instead of mmap(), even
> > * with the option of MAP_POPULATE. Kernel will send
> > @@ -687,6 +690,9 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
> > return -1;
> > }
> >
> > + if (madvise(ms->addr, ms->len, MADV_DONTDUMP) != 0)
> > + RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
> > +
> > exit_early = false;
> >
> > /* if we're using anonymous hugepages, nothing to be done */
> >
>
>
> --
> Thanks,
> Anatoly
@@ -177,6 +177,32 @@ eal_get_virtual_area(void *requested_addr, size_t *size,
after_len = RTE_PTR_DIFF(map_end, aligned_end);
if (after_len > 0)
munmap(aligned_end, after_len);
+
+ /*
+ * Exclude this pages from a core dump.
+ */
+#ifdef RTE_EXEC_ENV_LINUX
+ if (madvise(aligned_addr, *size, MADV_DONTDUMP) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
+ strerror(errno));
+#elif RTE_EXEC_ENV_FREEBSD
+ if (madvise(aligned_addr, *size, MADV_NOCORE) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
+ strerror(errno));
+#endif
+ } else {
+ /*
+ * Exclude this pages from a core dump.
+ */
+#ifdef RTE_EXEC_ENV_LINUX
+ if (madvise(mapped_addr, map_sz, MADV_DONTDUMP) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
+ strerror(errno));
+#elif RTE_EXEC_ENV_FREEBSD
+ if (madvise(mapped_addr, map_sz, MADV_NOCORE) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n",
+ strerror(errno));
+#endif
}
return aligned_addr;
@@ -571,6 +571,9 @@ alloc_seg(struct rte_memseg *ms, void *addr, int socket_id,
goto resized;
}
+ if (madvise(addr, alloc_sz, MADV_DODUMP) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
+
/* In linux, hugetlb limitations, like cgroup, are
* enforced at fault time instead of mmap(), even
* with the option of MAP_POPULATE. Kernel will send
@@ -687,6 +690,9 @@ free_seg(struct rte_memseg *ms, struct hugepage_info *hi,
return -1;
}
+ if (madvise(ms->addr, ms->len, MADV_DONTDUMP) != 0)
+ RTE_LOG(DEBUG, EAL, "madvise failed: %s\n", strerror(errno));
+
exit_early = false;
/* if we're using anonymous hugepages, nothing to be done */