@@ -11,21 +11,6 @@ API and ABI deprecation notices are to be posted here.
Deprecation Notices
-------------------
-* eal: certain structures will change in EAL on account of upcoming external
- memory support. Aside from internal changes leading to an ABI break, the
- following externally visible changes will also be implemented:
-
- - ``rte_memseg_list`` will change to include a boolean flag indicating
- whether a particular memseg list is externally allocated. This will have
- implications for any users of memseg-walk-related functions, as they will
- now have to skip externally allocated segments in most cases if the intent
- is to only iterate over internal DPDK memory.
- - ``socket_id`` parameter across the entire DPDK will gain additional meaning,
- as some socket ID's will now be representing externally allocated memory. No
- changes will be required for existing code as backwards compatibility will
- be kept, and those who do not use this feature will not see these extra
- socket ID's.
-
* eal: both declaring and identifying devices will be streamlined in v18.11.
New functions will appear to query a specific port from buses, classes of
device and device drivers. Device declaration will be made coherent with the
@@ -91,6 +91,13 @@ API Changes
flag the MAC can be properly configured in any case. This is particularly
important for bonding.
+* eal: The following API changes were made in 18.11:
+
+ - ``rte_memseg_list`` structure now has an additional flag indicating whether
+ the memseg list is externally allocated. This will have implications for any
+ users of memseg-walk-related functions, as they will now have to skip
+ externally allocated segments in most cases if the intent is to only iterate
+ over internal DPDK memory.
ABI Changes
-----------
@@ -107,6 +114,9 @@ ABI Changes
=========================================================
+* eal: EAL library ABI version was changed due to previously announced work on
+ supporting external memory in DPDK.
+
Removed Items
-------------
@@ -152,7 +162,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_compressdev.so.1
librte_cryptodev.so.5
librte_distributor.so.1
- librte_eal.so.8
+ + librte_eal.so.9
librte_ethdev.so.10
librte_eventdev.so.4
librte_flow_classify.so.1
@@ -317,12 +317,15 @@ fslmc_unmap_dma(uint64_t vaddr, uint64_t iovaddr __rte_unused, size_t len)
}
static int
-fslmc_dmamap_seg(const struct rte_memseg_list *msl __rte_unused,
- const struct rte_memseg *ms, void *arg)
+fslmc_dmamap_seg(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
{
int *n_segs = arg;
int ret;
+ if (msl->external)
+ return 0;
+
ret = fslmc_map_dma(ms->addr_64, ms->iova, ms->len);
if (ret)
DPAA2_BUS_ERR("Unable to VFIO map (addr=%p, len=%zu)",
@@ -496,6 +496,9 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
{
struct mr_find_contig_memsegs_data *data = arg;
+ if (msl->external)
+ return 0;
+
if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len)
return 0;
/* Found, save it and stop walking. */
@@ -568,11 +568,14 @@ static struct rte_pci_driver mlx5_driver;
static void *uar_base;
static int
-find_lower_va_bound(const struct rte_memseg_list *msl __rte_unused,
+find_lower_va_bound(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
void **addr = arg;
+ if (msl->external)
+ return 0;
+
if (*addr == NULL)
*addr = ms->addr;
else
@@ -486,6 +486,9 @@ mr_find_contig_memsegs_cb(const struct rte_memseg_list *msl,
{
struct mr_find_contig_memsegs_data *data = arg;
+ if (msl->external)
+ return 0;
+
if (data->addr < ms->addr_64 || data->addr >= ms->addr_64 + len)
return 0;
/* Found, save it and stop walking. */
@@ -75,13 +75,16 @@ struct walk_arg {
uint32_t region_nr;
};
static int
-add_memory_region(const struct rte_memseg_list *msl __rte_unused,
+add_memory_region(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, size_t len, void *arg)
{
struct walk_arg *wa = arg;
struct vhost_memory_region *mr;
void *start_addr;
+ if (msl->external)
+ return 0;
+
if (wa->region_nr >= max_regions)
return -1;
@@ -22,7 +22,7 @@ LDLIBS += -lrte_kvargs
EXPORT_MAP := ../../rte_eal_version.map
-LIBABIVER := 8
+LIBABIVER := 9
# specific to bsdapp exec-env
SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c
@@ -502,6 +502,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
{
int *socket_id = arg;
+ if (msl->external)
+ return 0;
+
if (msl->socket_id == *socket_id && msl->memseg_arr.count != 0)
return 1;
@@ -236,12 +236,15 @@ struct attach_walk_args {
int seg_idx;
};
static int
-attach_segment(const struct rte_memseg_list *msl __rte_unused,
- const struct rte_memseg *ms, void *arg)
+attach_segment(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
{
struct attach_walk_args *wa = arg;
void *addr;
+ if (msl->external)
+ return 0;
+
addr = mmap(ms->addr, ms->len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, wa->fd_hugepage,
wa->seg_idx * EAL_PAGE_SIZE);
@@ -272,6 +272,9 @@ physmem_size(const struct rte_memseg_list *msl, void *arg)
{
uint64_t *total_len = arg;
+ if (msl->external)
+ return 0;
+
*total_len += msl->memseg_arr.count * msl->page_sz;
return 0;
@@ -33,6 +33,7 @@ struct rte_memseg_list {
size_t len; /**< Length of memory area covered by this memseg list. */
int socket_id; /**< Socket ID for all memsegs in this list. */
uint64_t page_sz; /**< Page size for all memsegs in this list. */
+ unsigned int external; /**< 1 if this list points to external memory */
volatile uint32_t version; /**< version number for multiprocess sync. */
struct rte_fbarray memseg_arr;
};
@@ -215,6 +215,9 @@ typedef int (*rte_memseg_list_walk_t)(const struct rte_memseg_list *msl,
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -233,6 +236,9 @@ rte_memseg_walk(rte_memseg_walk_t func, void *arg);
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -251,6 +257,9 @@ rte_memseg_contig_walk(rte_memseg_contig_walk_t func, void *arg);
* @note This function read-locks the memory hotplug subsystem, and thus cannot
* be used within memory-related callback functions.
*
+ * @note This function will also walk through externally allocated segments. It
+ * is up to the user to decide whether to skip through these segments.
+ *
* @param func
* Iterator function
* @param arg
@@ -39,10 +39,14 @@ malloc_elem_find_max_iova_contig(struct malloc_elem *elem, size_t align)
contig_seg_start = RTE_PTR_ALIGN_CEIL(data_start, align);
/* if we're in IOVA as VA mode, or if we're in legacy mode with
- * hugepages, all elements are IOVA-contiguous.
+ * hugepages, all elements are IOVA-contiguous. however, we can only
+ * make these assumptions about internal memory - externally allocated
+ * segments have to be checked.
*/
- if (rte_eal_iova_mode() == RTE_IOVA_VA ||
- (internal_config.legacy_mem && rte_eal_has_hugepages()))
+ if (!elem->msl->external &&
+ (rte_eal_iova_mode() == RTE_IOVA_VA ||
+ (internal_config.legacy_mem &&
+ rte_eal_has_hugepages())))
return RTE_PTR_DIFF(data_end, contig_seg_start);
cur_page = RTE_PTR_ALIGN_FLOOR(contig_seg_start, page_sz);
@@ -95,6 +95,9 @@ malloc_add_seg(const struct rte_memseg_list *msl,
struct malloc_heap *heap;
int msl_idx;
+ if (msl->external)
+ return 0;
+
heap = &mcfg->malloc_heaps[msl->socket_id];
/* msl is const, so find it */
@@ -754,8 +757,10 @@ malloc_heap_free(struct malloc_elem *elem)
/* anything after this is a bonus */
ret = 0;
- /* ...of which we can't avail if we are in legacy mode */
- if (internal_config.legacy_mem)
+ /* ...of which we can't avail if we are in legacy mode, or if this is an
+ * externally allocated segment.
+ */
+ if (internal_config.legacy_mem || msl->external)
goto free_unlock;
/* check if we can free any memory back to the system */
@@ -223,7 +223,7 @@ rte_malloc_virt2iova(const void *addr)
if (elem == NULL)
return RTE_BAD_IOVA;
- if (rte_eal_iova_mode() == RTE_IOVA_VA)
+ if (!elem->msl->external && rte_eal_iova_mode() == RTE_IOVA_VA)
return (uintptr_t) addr;
ms = rte_mem_virt2memseg(addr, elem->msl);
@@ -10,7 +10,7 @@ ARCH_DIR ?= $(RTE_ARCH)
EXPORT_MAP := ../../rte_eal_version.map
VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR)
-LIBABIVER := 8
+LIBABIVER := 9
VPATH += $(RTE_SDK)/lib/librte_eal/common
@@ -725,6 +725,9 @@ check_socket(const struct rte_memseg_list *msl, void *arg)
{
int *socket_id = arg;
+ if (msl->external)
+ return 0;
+
return *socket_id == msl->socket_id;
}
@@ -1059,7 +1062,12 @@ mark_freeable(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
void *arg __rte_unused)
{
/* ms is const, so find this memseg */
- struct rte_memseg *found = rte_mem_virt2memseg(ms->addr, msl);
+ struct rte_memseg *found;
+
+ if (msl->external)
+ return 0;
+
+ found = rte_mem_virt2memseg(ms->addr, msl);
found->flags &= ~RTE_MEMSEG_FLAG_DO_NOT_FREE;
@@ -1408,6 +1408,9 @@ sync_walk(const struct rte_memseg_list *msl, void *arg __rte_unused)
unsigned int i;
int msl_idx;
+ if (msl->external)
+ return 0;
+
msl_idx = msl - mcfg->memsegs;
primary_msl = &mcfg->memsegs[msl_idx];
local_msl = &local_memsegs[msl_idx];
@@ -1456,6 +1459,9 @@ secondary_msl_create_walk(const struct rte_memseg_list *msl,
char name[PATH_MAX];
int msl_idx, ret;
+ if (msl->external)
+ return 0;
+
msl_idx = msl - mcfg->memsegs;
primary_msl = &mcfg->memsegs[msl_idx];
local_msl = &local_memsegs[msl_idx];
@@ -1509,6 +1515,9 @@ fd_list_create_walk(const struct rte_memseg_list *msl,
unsigned int len;
int msl_idx;
+ if (msl->external)
+ return 0;
+
msl_idx = msl - mcfg->memsegs;
len = msl->memseg_arr.len;
@@ -1082,11 +1082,14 @@ rte_vfio_get_group_num(const char *sysfs_base,
}
static int
-type1_map(const struct rte_memseg_list *msl __rte_unused,
- const struct rte_memseg *ms, void *arg)
+type1_map(const struct rte_memseg_list *msl, const struct rte_memseg *ms,
+ void *arg)
{
int *vfio_container_fd = arg;
+ if (msl->external)
+ return 0;
+
return vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 1);
}
@@ -1196,11 +1199,14 @@ vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
}
static int
-vfio_spapr_map_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_map_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
int *vfio_container_fd = arg;
+ if (msl->external)
+ return 0;
+
return vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,
ms->len, 1);
}
@@ -1210,12 +1216,15 @@ struct spapr_walk_param {
uint64_t hugepage_sz;
};
static int
-vfio_spapr_window_size_walk(const struct rte_memseg_list *msl __rte_unused,
+vfio_spapr_window_size_walk(const struct rte_memseg_list *msl,
const struct rte_memseg *ms, void *arg)
{
struct spapr_walk_param *param = arg;
uint64_t max = ms->iova + ms->len;
+ if (msl->external)
+ return 0;
+
if (max > param->window_size) {
param->hugepage_sz = ms->hugepage_sz;
param->window_size = max;
@@ -21,7 +21,7 @@ else
error('unsupported system type "@0@"'.format(host_machine.system()))
endif
-version = 8 # the version of the EAL API
+version = 9 # the version of the EAL API
allow_experimental_apis = true
deps += 'compat'
deps += 'kvargs'
@@ -99,25 +99,44 @@ static unsigned optimize_object_size(unsigned obj_size)
return new_obj_size * RTE_MEMPOOL_ALIGN;
}
+struct pagesz_walk_arg {
+ int socket_id;
+ size_t min;
+};
+
static int
find_min_pagesz(const struct rte_memseg_list *msl, void *arg)
{
- size_t *min = arg;
+ struct pagesz_walk_arg *wa = arg;
+ bool valid;
- if (msl->page_sz < *min)
- *min = msl->page_sz;
+ /*
+ * we need to only look at page sizes available for a particular socket
+ * ID. so, we either need an exact match on socket ID (can match both
+ * native and external memory), or, if SOCKET_ID_ANY was specified as a
+ * socket ID argument, we must only look at native memory and ignore any
+ * page sizes associated with external memory.
+ */
+ valid = msl->socket_id == wa->socket_id;
+ valid |= wa->socket_id == SOCKET_ID_ANY && msl->external == 0;
+
+ if (valid && msl->page_sz < wa->min)
+ wa->min = msl->page_sz;
return 0;
}
static size_t
-get_min_page_size(void)
+get_min_page_size(int socket_id)
{
- size_t min_pagesz = SIZE_MAX;
+ struct pagesz_walk_arg wa;
- rte_memseg_list_walk(find_min_pagesz, &min_pagesz);
+ wa.min = SIZE_MAX;
+ wa.socket_id = socket_id;
- return min_pagesz == SIZE_MAX ? (size_t) getpagesize() : min_pagesz;
+ rte_memseg_list_walk(find_min_pagesz, &wa);
+
+ return wa.min == SIZE_MAX ? (size_t) getpagesize() : wa.min;
}
@@ -470,7 +489,7 @@ rte_mempool_populate_default(struct rte_mempool *mp)
pg_sz = 0;
pg_shift = 0;
} else if (try_contig) {
- pg_sz = get_min_page_size();
+ pg_sz = get_min_page_size(mp->socket_id);
pg_shift = rte_bsf32(pg_sz);
} else {
pg_sz = getpagesize();
@@ -711,6 +711,9 @@ check_socket_mem(const struct rte_memseg_list *msl, void *arg)
{
int32_t *socket = arg;
+ if (msl->external)
+ return 0;
+
return *socket == msl->socket_id;
}
@@ -115,6 +115,9 @@ find_available_pagesz(const struct rte_memseg_list *msl, void *arg)
{
struct walk_arg *wa = arg;
+ if (msl->external)
+ return 0;
+
if (msl->page_sz == RTE_PGSIZE_2M)
wa->hugepage_2MB_avail = 1;
if (msl->page_sz == RTE_PGSIZE_1G)