dmadev: standardize alignment and allocation
Checks
Commit Message
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Align fp_objects based on cacheline size, allocate
devices and fp_objects memory on hugepages.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
lib/dmadev/rte_dmadev.c | 6 ++----
lib/dmadev/rte_dmadev_core.h | 2 +-
2 files changed, 3 insertions(+), 5 deletions(-)
Comments
Hi Pavan,
Alloc fp_objects from rte_memory is a good idea, but this may cause
the rte_memory memory leak, especially in multi-process scenario.
Currently, there is no mechanism for releasing such a rte_memory which
don't belong to any driver.
So I suggest: maybe we could add rte_mem_align API which alloc from libc
and use in this cases.
BTW: the rte_dma_devices is only used in control-path, so it don't need
use rte_memory API, but I think it could use the new rte_mem_align API.
Thanks
On 2024/2/2 17:06, pbhagavatula@marvell.com wrote:
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Align fp_objects based on cacheline size, allocate
> devices and fp_objects memory on hugepages.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> lib/dmadev/rte_dmadev.c | 6 ++----
> lib/dmadev/rte_dmadev_core.h | 2 +-
> 2 files changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c
> index 67434c805f43..1fe1434019f0 100644
> --- a/lib/dmadev/rte_dmadev.c
> +++ b/lib/dmadev/rte_dmadev.c
> @@ -143,10 +143,9 @@ dma_fp_data_prepare(void)
> */
> size = dma_devices_max * sizeof(struct rte_dma_fp_object) +
> RTE_CACHE_LINE_SIZE;
> - ptr = malloc(size);
> + ptr = rte_zmalloc("", size, RTE_CACHE_LINE_SIZE);
> if (ptr == NULL)
> return -ENOMEM;
> - memset(ptr, 0, size);
>
> rte_dma_fp_objs = RTE_PTR_ALIGN(ptr, RTE_CACHE_LINE_SIZE);
> for (i = 0; i < dma_devices_max; i++)
> @@ -164,10 +163,9 @@ dma_dev_data_prepare(void)
> return 0;
>
> size = dma_devices_max * sizeof(struct rte_dma_dev);
> - rte_dma_devices = malloc(size);
> + rte_dma_devices = rte_zmalloc("", size, RTE_CACHE_LINE_SIZE);
> if (rte_dma_devices == NULL)
> return -ENOMEM;
> - memset(rte_dma_devices, 0, size);
>
> return 0;
> }
> diff --git a/lib/dmadev/rte_dmadev_core.h b/lib/dmadev/rte_dmadev_core.h
> index 064785686f7f..e8239c2d22b6 100644
> --- a/lib/dmadev/rte_dmadev_core.h
> +++ b/lib/dmadev/rte_dmadev_core.h
> @@ -73,7 +73,7 @@ struct rte_dma_fp_object {
> rte_dma_completed_t completed;
> rte_dma_completed_status_t completed_status;
> rte_dma_burst_capacity_t burst_capacity;
> -} __rte_aligned(128);
> +} __rte_cache_aligned;
>
> extern struct rte_dma_fp_object *rte_dma_fp_objs;
>
>
> Hi Pavan,
>
> Alloc fp_objects from rte_memory is a good idea, but this may cause
> the rte_memory memory leak, especially in multi-process scenario.
>
> Currently, there is no mechanism for releasing such a rte_memory which
> don't belong to any driver.
>
Yeah, secondary process will leak rte_zmalloc allocations if not freed.
The only option currently is to use mmap and allocate non-shared memory
on secondary, which is not ideal.
> So I suggest: maybe we could add rte_mem_align API which alloc from libc
> and use in this cases.
>
Yeah, maybe in future we could add something like rte_zmalloc_private which
would create new mappings on secondary process. But that is out of scope for
this patch.
I will send a v2 dropping the malloc changes and keeping the cache alignment changes.
> BTW: the rte_dma_devices is only used in control-path, so it don't need
> use rte_memory API, but I think it could use the new rte_mem_align API.
>
> Thanks
>
Thanks,
Pavan.
> On 2024/2/2 17:06, pbhagavatula@marvell.com wrote:
> > From: Pavan Nikhilesh <pbhagavatula@marvell.com>
> >
> > Align fp_objects based on cacheline size, allocate
> > devices and fp_objects memory on hugepages.
> >
> > Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> > ---
> > lib/dmadev/rte_dmadev.c | 6 ++----
> > lib/dmadev/rte_dmadev_core.h | 2 +-
> > 2 files changed, 3 insertions(+), 5 deletions(-)
> >
> > diff --git a/lib/dmadev/rte_dmadev.c b/lib/dmadev/rte_dmadev.c
> > index 67434c805f43..1fe1434019f0 100644
> > --- a/lib/dmadev/rte_dmadev.c
> > +++ b/lib/dmadev/rte_dmadev.c
> > @@ -143,10 +143,9 @@ dma_fp_data_prepare(void)
> > */
> > size = dma_devices_max * sizeof(struct rte_dma_fp_object) +
> > RTE_CACHE_LINE_SIZE;
> > - ptr = malloc(size);
> > + ptr = rte_zmalloc("", size, RTE_CACHE_LINE_SIZE);
> > if (ptr == NULL)
> > return -ENOMEM;
> > - memset(ptr, 0, size);
> >
> > rte_dma_fp_objs = RTE_PTR_ALIGN(ptr, RTE_CACHE_LINE_SIZE);
> > for (i = 0; i < dma_devices_max; i++)
> > @@ -164,10 +163,9 @@ dma_dev_data_prepare(void)
> > return 0;
> >
> > size = dma_devices_max * sizeof(struct rte_dma_dev);
> > - rte_dma_devices = malloc(size);
> > + rte_dma_devices = rte_zmalloc("", size, RTE_CACHE_LINE_SIZE);
> > if (rte_dma_devices == NULL)
> > return -ENOMEM;
> > - memset(rte_dma_devices, 0, size);
> >
> > return 0;
> > }
> > diff --git a/lib/dmadev/rte_dmadev_core.h
> b/lib/dmadev/rte_dmadev_core.h
> > index 064785686f7f..e8239c2d22b6 100644
> > --- a/lib/dmadev/rte_dmadev_core.h
> > +++ b/lib/dmadev/rte_dmadev_core.h
> > @@ -73,7 +73,7 @@ struct rte_dma_fp_object {
> > rte_dma_completed_t completed;
> > rte_dma_completed_status_t completed_status;
> > rte_dma_burst_capacity_t burst_capacity;
> > -} __rte_aligned(128);
> > +} __rte_cache_aligned;
> >
> > extern struct rte_dma_fp_object *rte_dma_fp_objs;
> >
> >
@@ -143,10 +143,9 @@ dma_fp_data_prepare(void)
*/
size = dma_devices_max * sizeof(struct rte_dma_fp_object) +
RTE_CACHE_LINE_SIZE;
- ptr = malloc(size);
+ ptr = rte_zmalloc("", size, RTE_CACHE_LINE_SIZE);
if (ptr == NULL)
return -ENOMEM;
- memset(ptr, 0, size);
rte_dma_fp_objs = RTE_PTR_ALIGN(ptr, RTE_CACHE_LINE_SIZE);
for (i = 0; i < dma_devices_max; i++)
@@ -164,10 +163,9 @@ dma_dev_data_prepare(void)
return 0;
size = dma_devices_max * sizeof(struct rte_dma_dev);
- rte_dma_devices = malloc(size);
+ rte_dma_devices = rte_zmalloc("", size, RTE_CACHE_LINE_SIZE);
if (rte_dma_devices == NULL)
return -ENOMEM;
- memset(rte_dma_devices, 0, size);
return 0;
}
@@ -73,7 +73,7 @@ struct rte_dma_fp_object {
rte_dma_completed_t completed;
rte_dma_completed_status_t completed_status;
rte_dma_burst_capacity_t burst_capacity;
-} __rte_aligned(128);
+} __rte_cache_aligned;
extern struct rte_dma_fp_object *rte_dma_fp_objs;