[02/16] mem: allow memseg lists to be marked as external

Message ID e044616faa33794c4f59844112137ef28b0ce57f.1536064999.git.anatoly.burakov@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Support externally allocated memory in DPDK |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Burakov, Anatoly Sept. 4, 2018, 1:11 p.m. UTC
  When we allocate and use DPDK memory, we need to be able to
differentiate between DPDK hugepage segments and segments that
were made part of DPDK but are externally allocated. Add such
a property to memseg lists.

All current calls for memseg walk functions were adjusted to
ignore external segments where it made sense. Mempools is a
special case, because we may be asked to allocate a mempool on
a specific socket, and we need to ignore all page sizes on
other heaps or other sockets. Previously, this assumption of
knowing all page sizes was not a problem, but it will be now,
so we have to match socket ID with page size when calculating
minimum page size for a mempool.

Signed-off-by: Anatoly Burakov <anatoly.burakov@intel.com>
---

Notes:
    v1:
    - Adjust all calls to memseg walk functions to ignore external
      segments where it made sense to do so

 drivers/bus/fslmc/fslmc_vfio.c                |  7 +++--
 drivers/net/mlx4/mlx4_mr.c                    |  3 ++
 drivers/net/mlx5/mlx5.c                       |  5 ++-
 drivers/net/mlx5/mlx5_mr.c                    |  3 ++
 drivers/net/virtio/virtio_user/vhost_kernel.c |  5 ++-
 lib/librte_eal/bsdapp/eal/eal.c               |  3 ++
 lib/librte_eal/bsdapp/eal/eal_memory.c        |  7 +++--
 lib/librte_eal/common/eal_common_memory.c     |  4 +++
 .../common/include/rte_eal_memconfig.h        |  1 +
 lib/librte_eal/common/include/rte_memory.h    |  9 ++++++
 lib/librte_eal/common/malloc_heap.c           |  9 ++++--
 lib/librte_eal/linuxapp/eal/eal.c             |  3 ++
 lib/librte_eal/linuxapp/eal/eal_memalloc.c    |  9 ++++++
 lib/librte_eal/linuxapp/eal/eal_vfio.c        | 17 +++++++---
 lib/librte_mempool/rte_mempool.c              | 31 ++++++++++++++-----
 test/test/test_malloc.c                       |  3 ++
 test/test/test_memzone.c                      |  3 ++
 17 files changed, 102 insertions(+), 20 deletions(-)
  

Patch

diff --git a/drivers/bus/fslmc/fslmc_vfio.c b/drivers/bus/fslmc/fslmc_vfio.c
index 4c2cd2a87..2e9244fb7 100644
--- a/drivers/bus/fslmc/fslmc_vfio.c
+++ b/drivers/bus/fslmc/fslmc_vfio.c
@@ -317,12 +317,15 @@  fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
 }
 
 static int
-fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
-		 const struct rte_memseg *ms, void *arg)
+fslmc_dmamap_seg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
 {
 	int *n_segs = arg;
 	int ret;
 
+	if (msl->external)
+		return 0;
+
 	ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
 	if (ret)
 		DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
diff --git a/drivers/net/mlx4/mlx4_mr.c b/drivers/net/mlx4/mlx4_mr.c
index d23d3c613..9f5d790b6 100644
--- a/drivers/net/mlx4/mlx4_mr.c
+++ b/drivers/net/mlx4/mlx4_mr.c
@@ -496,6 +496,9 @@  mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
 {
 	struct mr_find_contig_memsegs_data *data = arg;
 
+	if (msl->external)
+		return 0;
+
 	if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len)
 		return 0;
 	/* Found, save it and stop walking. */
diff --git a/drivers/net/mlx5/mlx5.c b/drivers/net/mlx5/mlx5.c
index ec63bc6e2..d9ed15880 100644
--- a/drivers/net/mlx5/mlx5.c
+++ b/drivers/net/mlx5/mlx5.c
@@ -568,11 +568,14 @@  static struct rte_pci_driver mlx5_driver;
 static void *uar_base;
 
 static int
-find_lower_va_bound(const struct rte_memseg_list *msl __rte_unused,
+find_lower_va_bound(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
 	void **addr = arg;
 
+	if (msl->external)
+		return 0;
+
 	if (*addr == NULL)
 		*addr = ms->addr;
 	else
diff --git a/drivers/net/mlx5/mlx5_mr.c b/drivers/net/mlx5/mlx5_mr.c
index 1d1bcb5fe..fd4345f9c 100644
--- a/drivers/net/mlx5/mlx5_mr.c
+++ b/drivers/net/mlx5/mlx5_mr.c
@@ -486,6 +486,9 @@  mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
 {
 	struct mr_find_contig_memsegs_data *data = arg;
 
+	if (msl->external)
+		return 0;
+
 	if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len)
 		return 0;
 	/* Found, save it and stop walking. */
diff --git a/drivers/net/virtio/virtio_user/vhost_kernel.c b/drivers/net/virtio/virtio_user/vhost_kernel.c
index b2444096c..885c59c8a 100644
--- a/drivers/net/virtio/virtio_user/vhost_kernel.c
+++ b/drivers/net/virtio/virtio_user/vhost_kernel.c
@@ -75,13 +75,16 @@  struct walk_arg {
 	uint32_t region_nr;
 };
 static int
-add_memory_region(const struct rte_memseg_list *msl __rte_unused,
+add_memory_region(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, size_t len, void *arg)
 {
 	struct walk_arg *wa = arg;
 	struct vhost_memory_region *mr;
 	void *start_addr;
 
+	if (msl->external)
+		return 0;
+
 	if (wa->region_nr >= max_regions)
 		return -1;
 
diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c
index d7ae9d686..7735194a3 100644
--- a/lib/librte_eal/bsdapp/eal/eal.c
+++ b/lib/librte_eal/bsdapp/eal/eal.c
@@ -502,6 +502,9 @@  check_socket(const struct rte_memseg_list *msl, void *arg)
 {
 	int *socket_id = arg;
 
+	if (msl->external)
+		return 0;
+
 	if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
 		return 1;
 
diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c
index 65ea670f9..4b092e1f2 100644
--- a/lib/librte_eal/bsdapp/eal/eal_memory.c
+++ b/lib/librte_eal/bsdapp/eal/eal_memory.c
@@ -236,12 +236,15 @@  struct attach_walk_args {
 	int seg_idx;
 };
 static int
-attach_segment(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
 {
 	struct attach_walk_args *wa = arg;
 	void *addr;
 
+	if (msl->external)
+		return 0;
+
 	addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
 			MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
 			wa->seg_idx * EAL_PAGE_SIZE);
diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c
index 0868bf681..55a11bf4d 100644
--- a/lib/librte_eal/common/eal_common_memory.c
+++ b/lib/librte_eal/common/eal_common_memory.c
@@ -272,6 +272,9 @@  physmem_size(const struct rte_memseg_list *msl, void *arg)
 {
 	uint64_t *total_len = arg;
 
+	if (msl->external)
+		return 0;
+
 	*total_len += msl->memseg_arr.count * msl->page_sz;
 
 	return 0;
@@ -547,6 +550,7 @@  rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
 	return ret;
 }
 
+
 /* init memory subsystem */
 int
 rte_eal_memory_init(void)
diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h
index 1d8b0a6fe..76faf9a4a 100644
--- a/lib/librte_eal/common/include/rte_eal_memconfig.h
+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h
@@ -33,6 +33,7 @@  struct rte_memseg_list {
 	size_t len; /**< Length of memory area covered by this memseg list. */
 	int socket_id; /**< Socket ID for all memsegs in this list. */
 	uint64_t page_sz; /**< Page size for all memsegs in this list. */
+	bool external; /**< true if this list points to external memory */
 	volatile uint32_t version; /**< version number for multiprocess sync. */
 	struct rte_fbarray memseg_arr;
 };
diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h
index c4b7f4cff..b381d1cb6 100644
--- a/lib/librte_eal/common/include/rte_memory.h
+++ b/lib/librte_eal/common/include/rte_memory.h
@@ -215,6 +215,9 @@  typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -233,6 +236,9 @@  rte_memseg_walk(rte_memseg_walk_t func, void *arg);
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
@@ -251,6 +257,9 @@  rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
  * @note This function read-locks the memory hotplug subsystem, and thus cannot
  *       be used within memory-related callback functions.
  *
+ * @note This function will also walk through externally allocated segments. It
+ *       is up to the user to decide whether to skip through these segments.
+ *
  * @param func
  *   Iterator function
  * @param arg
diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c
index 12aaf2d72..8c37b9d7c 100644
--- a/lib/librte_eal/common/malloc_heap.c
+++ b/lib/librte_eal/common/malloc_heap.c
@@ -95,6 +95,9 @@  malloc_add_seg(const struct rte_memseg_list *msl,
 	struct malloc_heap *heap;
 	int msl_idx;
 
+	if (msl->external)
+		return 0;
+
 	heap = &mcfg->malloc_heaps[msl->socket_id];
 
 	/* msl is const, so find it */
@@ -756,8 +759,10 @@  malloc_heap_free(struct malloc_elem *elem)
 	/* anything after this is a bonus */
 	ret = 0;
 
-	/* ...of which we can't avail if we are in legacy mode */
-	if (internal_config.legacy_mem)
+	/* ...of which we can't avail if we are in legacy mode, or if this is an
+	 * externally allocated segment.
+	 */
+	if (internal_config.legacy_mem || msl->external)
 		goto free_unlock;
 
 	/* check if we can free any memory back to the system */
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index e59ac6577..729ae2060 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -725,6 +725,9 @@  check_socket(const struct rte_memseg_list *msl, void *arg)
 {
 	int *socket_id = arg;
 
+	if (msl->external)
+		return 0;
+
 	return *socket_id == msl->socket_id;
 }
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_memalloc.c b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
index d040a2f71..8b0bbe43f 100644
--- a/lib/librte_eal/linuxapp/eal/eal_memalloc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_memalloc.c
@@ -1250,6 +1250,9 @@  sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused)
 	unsigned int i;
 	int msl_idx;
 
+	if (msl->external)
+		return 0;
+
 	msl_idx = msl - mcfg->memsegs;
 	primary_msl = &mcfg->memsegs[msl_idx];
 	local_msl = &local_memsegs[msl_idx];
@@ -1298,6 +1301,9 @@  secondary_msl_create_walk(const struct rte_memseg_list *msl,
 	char name[PATH_MAX];
 	int msl_idx, ret;
 
+	if (msl->external)
+		return 0;
+
 	msl_idx = msl - mcfg->memsegs;
 	primary_msl = &mcfg->memsegs[msl_idx];
 	local_msl = &local_memsegs[msl_idx];
@@ -1328,6 +1334,9 @@  secondary_lock_list_create_walk(const struct rte_memseg_list *msl,
 	int msl_idx;
 	int *data;
 
+	if (msl->external)
+		return 0;
+
 	msl_idx = msl - mcfg->memsegs;
 	len = msl->memseg_arr.len;
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c
index c68dc38e0..fddbc3b54 100644
--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c
@@ -1082,11 +1082,14 @@  rte_vfio_get_group_num(const char *sysfs_base,
 }
 
 static int
-type1_map(const struct rte_memseg_list *msl __rte_unused,
-		const struct rte_memseg *ms, void *arg)
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+		void *arg)
 {
 	int *vfio_container_fd = arg;
 
+	if (msl->external)
+		return 0;
+
 	return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
@@ -1196,11 +1199,14 @@  vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
 }
 
 static int
-vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
 	int *vfio_container_fd = arg;
 
+	if (msl->external)
+		return 0;
+
 	return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
 			ms->len, 1);
 }
@@ -1210,12 +1216,15 @@  struct spapr_walk_param {
 	uint64_t hugepage_sz;
 };
 static int
-vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
 		const struct rte_memseg *ms, void *arg)
 {
 	struct spapr_walk_param *param = arg;
 	uint64_t max = ms->iova + ms->len;
 
+	if (msl->external)
+		return 0;
+
 	if (max > param->window_size) {
 		param->hugepage_sz = ms->hugepage_sz;
 		param->window_size = max;
diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c
index 03e6b5f73..4eae7bec6 100644
--- a/lib/librte_mempool/rte_mempool.c
+++ b/lib/librte_mempool/rte_mempool.c
@@ -99,25 +99,40 @@  static unsigned optimize_object_size(unsigned obj_size)
 	return new_obj_size * RTE_MEMPOOL_ALIGN;
 }
 
+struct pagesz_walk_arg {
+	int socket_id;
+	size_t min;
+};
+
 static int
 find_min_pagesz(const struct rte_memseg_list *msl, void *arg)
 {
-	size_t *min = arg;
+	struct pagesz_walk_arg *wa = arg;
+	bool valid;
 
-	if (msl->page_sz < *min)
-		*min = msl->page_sz;
+	valid = msl->socket_id == wa->socket_id;
+	valid |= wa->socket_id == SOCKET_ID_ANY && !msl->external;
+
+	if (!valid)
+		return 0;
+
+	if (msl->page_sz < wa->min)
+		wa->min = msl->page_sz;
 
 	return 0;
 }
 
 static size_t
-get_min_page_size(void)
+get_min_page_size(int socket_id)
 {
-	size_t min_pagesz = SIZE_MAX;
+	struct pagesz_walk_arg wa;
 
-	rte_memseg_list_walk(find_min_pagesz, &min_pagesz);
+	wa.min = SIZE_MAX;
+	wa.socket_id = socket_id;
 
-	return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz;
+	rte_memseg_list_walk(find_min_pagesz, &wa);
+
+	return wa.min == SIZE_MAX ? (size_t) getpagesize() : wa.min;
 }
 
 
@@ -470,7 +485,7 @@  rte_mempool_populate_default(struct rte_mempool *mp)
 		pg_sz = 0;
 		pg_shift = 0;
 	} else if (try_contig) {
-		pg_sz = get_min_page_size();
+		pg_sz = get_min_page_size(mp->socket_id);
 		pg_shift = rte_bsf32(pg_sz);
 	} else {
 		pg_sz = getpagesize();
diff --git a/test/test/test_malloc.c b/test/test/test_malloc.c
index 4b5abb4e0..5e5272419 100644
--- a/test/test/test_malloc.c
+++ b/test/test/test_malloc.c
@@ -711,6 +711,9 @@  check_socket_mem(const struct rte_memseg_list *msl, void *arg)
 {
 	int32_t *socket = arg;
 
+	if (msl->external)
+		return 0;
+
 	return *socket == msl->socket_id;
 }
 
diff --git a/test/test/test_memzone.c b/test/test/test_memzone.c
index 452d7cc5e..9fe465e62 100644
--- a/test/test/test_memzone.c
+++ b/test/test/test_memzone.c
@@ -115,6 +115,9 @@  find_available_pagesz(const struct rte_memseg_list *msl, void *arg)
 {
 	struct walk_arg *wa = arg;
 
+	if (msl->external)
+		return 0;
+
 	if (msl->page_sz == RTE_PGSIZE_2M)
 		wa->hugepage_2MB_avail = 1;
 	if (msl->page_sz == RTE_PGSIZE_1G)