@@ -33,6 +33,7 @@
*/
#define MEMSEG_LIST_FMT "memseg-%" PRIu64 "k-%i-%i"
+#define DEVICE_MEMORY_NAME "device_memory"
static uint64_t baseaddr_offset;
static uint64_t system_page_sz;
@@ -904,6 +905,227 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)
return ret;
}
+void * __rte_experimental
+rte_mem_dev_memory_alloc(size_t size, size_t align)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_fbarray *arr = &mcfg->device_memory.mem_map_arr;
+ unsigned int n_pages, page_align;
+ int start_idx, cur_idx;
+ void *addr = NULL;
+
+ /* check parameters first */
+ if (size == 0 || (size & (system_page_sz - 1)) != 0) {
+ RTE_LOG(ERR, EAL, "%s(): size is not page-aligned\n",
+ __func__);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ if ((align & (system_page_sz - 1)) != 0) {
+ RTE_LOG(ERR, EAL, "%s(): alignment is not page-aligned\n",
+ __func__);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ /* PCI BAR sizes can only be powers of two, but this memory may be used
+ * for more than just PCI BAR mappings, so only check if alignment is
+ * power of two.
+ */
+ if (align != 0 && !rte_is_power_of_2(align)) {
+ RTE_LOG(ERR, EAL, "%s(): alignment is not a power of two\n",
+ __func__);
+ rte_errno = EINVAL;
+ return NULL;
+ }
+ /* check if device memory map is uninitialized. */
+ if (mcfg->device_memory.base_va == NULL || arr->len == 0) {
+ RTE_LOG(ERR, EAL, "%s(): device memory map is not initialized\n",
+ __func__);
+ rte_errno = ENODEV;
+ return NULL;
+ }
+
+ n_pages = size / system_page_sz;
+ page_align = align / system_page_sz;
+
+ /* lock the device memory map */
+ rte_spinlock_lock(&mcfg->device_memory.lock);
+
+ start_idx = 0;
+ while (1) {
+ size_t offset;
+ int end;
+
+ cur_idx = rte_fbarray_find_next_n_free(arr, start_idx, n_pages);
+ if (cur_idx < 0)
+ break;
+
+ /* if there are alignment requirements, check if the offset we
+ * found is aligned, and if not, align it and check if we still
+ * have enough space.
+ */
+ if (page_align != 0 && (cur_idx & (page_align - 1)) != 0) {
+ unsigned int aligned, len;
+
+ aligned = RTE_ALIGN_CEIL(cur_idx, page_align);
+ len = rte_fbarray_find_contig_free(arr, aligned);
+
+ /* if there's not enough space, keep looking */
+ if (len < n_pages) {
+ start_idx = aligned + len;
+ continue;
+ }
+
+ /* we've found space */
+ cur_idx = aligned;
+ }
+ end = cur_idx + n_pages;
+ offset = cur_idx * system_page_sz;
+ addr = RTE_PTR_ADD(mcfg->device_memory.base_va,
+ offset);
+
+ /* now, mark all space as occupied */
+ for (; cur_idx < end; cur_idx++)
+ rte_fbarray_set_used(arr, cur_idx);
+ break;
+ }
+ rte_spinlock_unlock(&mcfg->device_memory.lock);
+
+ if (addr != NULL)
+ RTE_LOG(DEBUG, EAL, "%s(): allocated %p-%p (%lu bytes) for hardware device usage\n",
+ __func__, addr, RTE_PTR_ADD(addr, size), size);
+
+ return addr;
+}
+
+int __rte_experimental
+rte_mem_dev_memory_free(void *addr, size_t size)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ struct rte_fbarray *arr = &mcfg->device_memory.mem_map_arr;
+ int cur_idx, end, ret, n_pages, len;
+ void *map_end, *mem_end;
+
+ /* check parameters first */
+ if (size == 0 || (size & (system_page_sz - 1)) != 0) {
+ RTE_LOG(ERR, EAL, "%s(): size is not page-aligned\n",
+ __func__);
+ rte_errno = EINVAL;
+ return -1;
+ }
+ /* check if device memory map is uninitialized. */
+ if (mcfg->device_memory.base_va == NULL || arr->len == 0) {
+ RTE_LOG(ERR, EAL, "%s(): device memory map is not initialized\n",
+ __func__);
+ rte_errno = ENODEV;
+ return -1;
+ }
+ map_end = RTE_PTR_ADD(mcfg->device_memory.base_va,
+ arr->len * system_page_sz);
+ mem_end = RTE_PTR_ADD(addr, size);
+
+ /* check if address is within the memory map */
+ if (addr < mcfg->device_memory.base_va || addr >= map_end ||
+ mem_end > map_end) {
+ RTE_LOG(ERR, EAL, "%s(): address is beyond device memory map range\n",
+ __func__);
+ rte_errno = EINVAL;
+ return -1;
+ }
+
+ rte_spinlock_lock(&mcfg->device_memory.lock);
+
+ n_pages = size / system_page_sz;
+ cur_idx = RTE_PTR_DIFF(addr, mcfg->device_memory.base_va) /
+ system_page_sz;
+ end = cur_idx + n_pages;
+
+ /* check all space we will be marking as free is currently occupied */
+ len = rte_fbarray_find_contig_used(arr, cur_idx);
+ if (len < n_pages) {
+ RTE_LOG(ERR, EAL, "%s(): attempting to free unoccupied space\n",
+ __func__);
+ rte_errno = EINVAL;
+ ret = -1;
+ goto unlock;
+ }
+ /* now, mark all space as free */
+ for (; cur_idx < end; cur_idx++)
+ rte_fbarray_set_free(arr, cur_idx);
+
+ /* success */
+ ret = 0;
+
+ RTE_LOG(DEBUG, EAL, "%s(): deallocated %p-%p (%lu bytes) for hardware device usage\n",
+ __func__, addr, RTE_PTR_ADD(addr, size), size);
+unlock:
+ rte_spinlock_unlock(&mcfg->device_memory.lock);
+ return ret;
+}
+
+static int
+dev_memory_init(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ size_t size;
+ void *addr;
+ int retval;
+
+ if (system_page_sz == 0)
+ system_page_sz = sysconf(_SC_PAGESIZE);
+
+ size = (size_t) 2 << 30;
+
+ addr = eal_get_virtual_area(NULL, &size, system_page_sz, 0, 0);
+ if (addr == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot reserve device memory\n");
+ return -1;
+ }
+
+ retval = rte_fbarray_init(&mcfg->device_memory.mem_map_arr,
+ DEVICE_MEMORY_NAME, size / system_page_sz, 0);
+ if (retval < 0) {
+ RTE_LOG(ERR, EAL, "Cannot initialize device memory map\n");
+ return -1;
+ }
+ mcfg->device_memory.base_va = addr;
+ rte_spinlock_init(&mcfg->device_memory.lock);
+ return 0;
+}
+
+static int
+dev_memory_attach(void)
+{
+ struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
+ size_t size;
+ void *addr;
+ int retval;
+
+ rte_spinlock_lock(&mcfg->device_memory.lock);
+
+ if (system_page_sz == 0)
+ system_page_sz = sysconf(_SC_PAGESIZE);
+
+ size = mcfg->device_memory.mem_map_arr.len * system_page_sz;
+
+ addr = eal_get_virtual_area(mcfg->device_memory.base_va, &size,
+ system_page_sz, 0, 0);
+ if (addr == NULL) {
+ RTE_LOG(ERR, EAL, "Cannot reserve device memory\n");
+ return -1;
+ }
+
+ retval = rte_fbarray_attach(&mcfg->device_memory.mem_map_arr);
+ if (retval < 0) {
+ RTE_LOG(ERR, EAL, "Cannot attach to device memory map\n");
+ return -1;
+ }
+
+ rte_spinlock_unlock(&mcfg->device_memory.lock);
+
+ return 0;
+}
+
/* init memory subsystem */
int
rte_eal_memory_init(void)
@@ -918,25 +1140,41 @@ rte_eal_memory_init(void)
/* lock mem hotplug here, to prevent races while we init */
rte_rwlock_read_lock(&mcfg->memory_hotplug_lock);
- retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+ retval = dev_memory_init();
+ if (retval < 0)
+ goto fail;
+
#ifndef RTE_ARCH_64
- memseg_primary_init_32() :
+ retval = memseg_primary_init_32();
#else
- memseg_primary_init() :
+ retval = memseg_primary_init();
#endif
- memseg_secondary_init();
-
- if (retval < 0)
- goto fail;
-
- if (eal_memalloc_init() < 0)
- goto fail;
-
- retval = rte_eal_process_type() == RTE_PROC_PRIMARY ?
- rte_eal_hugepage_init() :
- rte_eal_hugepage_attach();
- if (retval < 0)
- goto fail;
+ if (retval < 0)
+ goto fail;
+
+ if (eal_memalloc_init() < 0)
+ goto fail;
+
+ retval = rte_eal_hugepage_init();
+ if (retval < 0)
+ goto fail;
+ } else {
+ retval = dev_memory_attach();
+ if (retval < 0)
+ goto fail;
+
+ retval = memseg_secondary_init();
+ if (retval < 0)
+ goto fail;
+
+ if (eal_memalloc_init() < 0)
+ goto fail;
+
+ retval = rte_eal_hugepage_attach();
+ if (retval < 0)
+ goto fail;
+ }
if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)
goto fail;
@@ -36,6 +36,22 @@ struct rte_memseg_list {
struct rte_fbarray memseg_arr;
};
+/**
+ * mem map is a special case because we need to store a bunch of other data
+ * together with the array itself.
+ */
+struct rte_mem_map {
+ RTE_STD_C11
+ union {
+ void *base_va;
+ /**< Base virtual address for this mem map. */
+ uint64_t addr_64;
+ /**< Makes sure addr is always 64-bits */
+ };
+ rte_spinlock_t lock;
+ struct rte_fbarray mem_map_arr;
+};
+
/**
* the structure for the memory configuration for the RTE.
* Used by the rte_config structure. It is separated out, as for multi-process
@@ -68,6 +84,8 @@ struct rte_mem_config {
struct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];
/**< list of dynamic arrays holding memsegs */
+ struct rte_mem_map device_memory; /**< Occupancy map of preallocated device memory */
+
struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */
/* Heaps of Malloc per socket */
@@ -445,6 +445,46 @@ rte_mem_alloc_validator_register(const char *name,
int __rte_experimental
rte_mem_alloc_validator_unregister(const char *name, int socket_id);
+/**
+ * @brief Request memory for device mapping.
+ *
+ * @note after this call, reserved memory will be marked as unavailable in all
+ * processes until it is released, even if it goes unused.
+ *
+ * @param size
+ * Size of memory to request.
+ *
+ * @param align
+ * Alignment of memory to be returned.
+ *
+ * @return
+ * Valid pointer on successful fulfillment of request.
+ * NULL on unsuccessful fulfillment of request, with rte_errno indicating the
+ * case of error.
+ */
+void * __rte_experimental
+rte_mem_dev_memory_alloc(size_t size, size_t align);
+
+/**
+ * @brief Release memory for device mapping.
+ *
+ * @note by the time this call is made, memory region being freed must not be in
+ * use.
+ *
+ * @param addr
+ * Address of previously requested block of memory.
+ *
+ * @param size
+ * Size of memory to request.
+ *
+ * @return
+ * 0 on successful memory release.
+ * -1 on unsuccessful memory release, with rte_errno indicating the cause of
+ * error.
+ */
+int __rte_experimental
+rte_mem_dev_memory_free(void *addr, size_t size);
+
#ifdef __cplusplus
}
#endif