@@ -2413,6 +2413,23 @@ fwd_config_setup(void)
simple_fwd_config_setup();
}
+static const char *
+mp_alloc_to_str(uint8_t mode)
+{
+ switch (mode) {
+ case MP_ALLOC_NATIVE:
+ return "native";
+ case MP_ALLOC_ANON:
+ return "anon";
+ case MP_ALLOC_XMEM:
+ return "xmem";
+ case MP_ALLOC_XMEM_HUGE:
+ return "xmemhuge";
+ default:
+ return "invalid";
+ }
+}
+
void
pkt_fwd_config_display(struct fwd_config *cfg)
{
@@ -2421,12 +2438,12 @@ pkt_fwd_config_display(struct fwd_config *cfg)
streamid_t sm_id;
printf("%s packet forwarding%s - ports=%d - cores=%d - streams=%d - "
- "NUMA support %s, MP over anonymous pages %s\n",
+ "NUMA support %s, MP allocation mode: %s\n",
cfg->fwd_eng->fwd_mode_name,
retry_enabled == 0 ? "" : " with retry",
cfg->nb_fwd_ports, cfg->nb_fwd_lcores, cfg->nb_fwd_streams,
numa_support == 1 ? "enabled" : "disabled",
- mp_anon != 0 ? "enabled" : "disabled");
+ mp_alloc_to_str(mp_alloc_type));
if (retry_enabled)
printf("TX retry num: %u, delay between TX retries: %uus\n",
@@ -190,6 +190,11 @@ usage(char* progname)
printf(" --vxlan-gpe-port=N: UPD port of tunnel VXLAN-GPE\n");
printf(" --mlockall: lock all memory\n");
printf(" --no-mlockall: do not lock all memory\n");
+ printf(" --mp-alloc <native|anon|xmem|xmemhuge>: mempool allocation method.\n"
+ " native: use regular DPDK memory to create and populate mempool\n"
+ " anon: use regular DPDK memory to create and anonymous memory to populate mempool\n"
+ " xmem: use anonymous memory to create and populate mempool\n"
+ " xmemhuge: use anonymous hugepage memory to create and populate mempool\n");
}
#ifdef RTE_LIBRTE_CMDLINE
@@ -625,6 +630,7 @@ launch_args_parse(int argc, char** argv)
{ "vxlan-gpe-port", 1, 0, 0 },
{ "mlockall", 0, 0, 0 },
{ "no-mlockall", 0, 0, 0 },
+ { "mp-alloc", 1, 0, 0 },
{ 0, 0, 0, 0 },
};
@@ -743,7 +749,22 @@ launch_args_parse(int argc, char** argv)
if (!strcmp(lgopts[opt_idx].name, "numa"))
numa_support = 1;
if (!strcmp(lgopts[opt_idx].name, "mp-anon")) {
- mp_anon = 1;
+ mp_alloc_type = MP_ALLOC_ANON;
+ }
+ if (!strcmp(lgopts[opt_idx].name, "mp-alloc")) {
+ if (!strcmp(optarg, "native"))
+ mp_alloc_type = MP_ALLOC_NATIVE;
+ else if (!strcmp(optarg, "anon"))
+ mp_alloc_type = MP_ALLOC_ANON;
+ else if (!strcmp(optarg, "xmem"))
+ mp_alloc_type = MP_ALLOC_XMEM;
+ else if (!strcmp(optarg, "xmemhuge"))
+ mp_alloc_type = MP_ALLOC_XMEM_HUGE;
+ else
+ rte_exit(EXIT_FAILURE,
+ "mp-alloc %s invalid - must be: "
+ "native, anon or xmem\n",
+ optarg);
}
if (!strcmp(lgopts[opt_idx].name, "port-numa-config")) {
if (parse_portnuma_config(optarg))
@@ -27,6 +27,7 @@
#include <rte_log.h>
#include <rte_debug.h>
#include <rte_cycles.h>
+#include <rte_malloc_heap.h>
#include <rte_memory.h>
#include <rte_memcpy.h>
#include <rte_launch.h>
@@ -63,6 +64,22 @@
#include "testpmd.h"
+#ifndef MAP_HUGETLB
+/* FreeBSD may not have MAP_HUGETLB (in fact, it probably doesn't) */
+#define HUGE_FLAG (0x40000)
+#else
+#define HUGE_FLAG MAP_HUGETLB
+#endif
+
+#ifndef MAP_HUGE_SHIFT
+/* older kernels (or FreeBSD) will not have this define */
+#define HUGE_SHIFT (26)
+#else
+#define HUGE_SHIFT MAP_HUGE_SHIFT
+#endif
+
+#define EXTMEM_HEAP_NAME "extmem"
+
uint16_t verbose_level = 0; /**< Silent by default. */
int testpmd_logtype; /**< Log type for testpmd logs */
@@ -88,9 +105,13 @@ uint8_t numa_support = 1; /**< numa enabled by default */
uint8_t socket_num = UMA_NO_CONFIG;
/*
- * Use ANONYMOUS mapped memory (might be not physically continuous) for mbufs.
+ * Select mempool allocation type:
+ * - native: use regular DPDK memory
+ * - anon: use regular DPDK memory to create mempool, but populate using
+ * anonymous memory (may not be IOVA-contiguous)
+ * - xmem: use externally allocated hugepage memory
*/
-uint8_t mp_anon = 0;
+uint8_t mp_alloc_type = MP_ALLOC_NATIVE;
/*
* Store specified sockets on which memory pool to be used by ports
@@ -527,6 +548,216 @@ set_def_fwd_config(void)
set_default_fwd_ports_config();
}
+/* extremely pessimistic estimation of memory required to create a mempool */
+static int
+calc_mem_size(uint32_t nb_mbufs, uint32_t mbuf_sz, size_t pgsz, size_t *out)
+{
+ unsigned int n_pages, mbuf_per_pg, leftover;
+ uint64_t total_mem, mbuf_mem, obj_sz;
+
+ /* there is no good way to predict how much space the mempool will
+ * occupy because it will allocate chunks on the fly, and some of those
+ * will come from default DPDK memory while some will come from our
+ * external memory, so just assume 32MB will be enough for everyone.
+ */
+ uint64_t hdr_mem = 32 << 20;
+
+ /* account for possible non-contiguousness */
+ obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
+ if (obj_sz > pgsz) {
+ TESTPMD_LOG(ERR, "Object size is bigger than page size\n");
+ return -1;
+ }
+
+ mbuf_per_pg = pgsz / obj_sz;
+ leftover = (nb_mbufs % mbuf_per_pg) > 0;
+ n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
+
+ mbuf_mem = n_pages * pgsz;
+
+ total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
+
+ if (total_mem > SIZE_MAX) {
+ TESTPMD_LOG(ERR, "Memory size too big\n");
+ return -1;
+ }
+ *out = (size_t)total_mem;
+
+ return 0;
+}
+
+static inline uint32_t
+bsf64(uint64_t v)
+{
+ return (uint32_t)__builtin_ctzll(v);
+}
+
+static inline uint32_t
+log2_u64(uint64_t v)
+{
+ if (v == 0)
+ return 0;
+ v = rte_align64pow2(v);
+ return bsf64(v);
+}
+
+static int
+pagesz_flags(uint64_t page_sz)
+{
+ /* as per mmap() manpage, all page sizes are log2 of page size
+ * shifted by MAP_HUGE_SHIFT
+ */
+ int log2 = log2_u64(page_sz);
+ return (log2 << HUGE_SHIFT);
+}
+
+static void *
+alloc_mem(size_t memsz, size_t pgsz, bool huge)
+{
+ void *addr;
+ int flags;
+
+ /* allocate anonymous hugepages */
+ flags = MAP_ANONYMOUS | MAP_PRIVATE;
+ if (huge)
+ flags |= HUGE_FLAG | pagesz_flags(pgsz);
+
+ addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (addr == MAP_FAILED)
+ return NULL;
+
+ return addr;
+}
+
+struct extmem_param {
+ void *addr;
+ size_t len;
+ size_t pgsz;
+ rte_iova_t *iova_table;
+ unsigned int iova_table_len;
+};
+
+static int
+create_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, struct extmem_param *param,
+ bool huge)
+{
+ uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
+ RTE_PGSIZE_16M, RTE_PGSIZE_16G}; /* POWER */
+ unsigned int n_pages, pgsz_idx;
+ size_t mem_sz, cur_pgsz;
+ rte_iova_t *iovas = NULL;
+ void *addr;
+ int ret;
+
+ for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
+ /* skip anything that is too big */
+ if (pgsizes[pgsz_idx] > SIZE_MAX)
+ continue;
+
+ cur_pgsz = pgsizes[pgsz_idx];
+
+ /* if we were told not to allocate hugepages, override */
+ if (!huge)
+ cur_pgsz = sysconf(_SC_PAGESIZE);
+
+ ret = calc_mem_size(nb_mbufs, mbuf_sz, cur_pgsz, &mem_sz);
+ if (ret < 0) {
+ TESTPMD_LOG(ERR, "Cannot calculate memory size\n");
+ return -1;
+ }
+
+ /* allocate our memory */
+ addr = alloc_mem(mem_sz, cur_pgsz, huge);
+
+ /* if we couldn't allocate memory with a specified page size,
+ * that doesn't mean we can't do it with other page sizes, so
+ * try another one.
+ */
+ if (addr == NULL)
+ continue;
+
+ /* store IOVA addresses for every page in this memory area */
+ n_pages = mem_sz / cur_pgsz;
+
+ iovas = malloc(sizeof(*iovas) * n_pages);
+
+ if (iovas == NULL) {
+ TESTPMD_LOG(ERR, "Cannot allocate memory for iova addresses\n");
+ goto fail;
+ }
+ /* lock memory if it's not huge pages */
+ if (!huge)
+ mlock(addr, mem_sz);
+
+ break;
+ }
+ /* if we couldn't allocate anything */
+ if (iovas == NULL)
+ return -1;
+
+ param->addr = addr;
+ param->len = mem_sz;
+ param->pgsz = cur_pgsz;
+ param->iova_table = iovas;
+ param->iova_table_len = n_pages;
+
+ return 0;
+fail:
+ if (iovas)
+ free(iovas);
+ if (addr)
+ munmap(addr, mem_sz);
+
+ return -1;
+}
+
+static int
+setup_extmem(uint32_t nb_mbufs, uint32_t mbuf_sz, bool huge)
+{
+ struct extmem_param param = {};
+ int socket_id, ret;
+
+ /* check if our heap exists */
+ socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
+ if (socket_id < 0) {
+ /* create our heap */
+ ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
+ if (ret < 0) {
+ TESTPMD_LOG(ERR, "Cannot create heap\n");
+ return -1;
+ }
+ }
+
+ ret = create_extmem(nb_mbufs, mbuf_sz, ¶m, huge);
+ if (ret < 0) {
+ TESTPMD_LOG(ERR, "Cannot create memory area\n");
+ return -1;
+ }
+
+ /* we now have a valid memory area, so add it to heap */
+ ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
+ param.addr, param.len, param.iova_table,
+ param.iova_table_len, param.pgsz);
+
+ /* when using VFIO, memory is automatically mapped for DMA by EAL */
+
+ /* not needed any more */
+ free(param.iova_table);
+
+ if (ret < 0) {
+ TESTPMD_LOG(ERR, "Cannot add memory to heap\n");
+ munmap(param.addr, param.len);
+ return -1;
+ }
+
+ /* success */
+
+ TESTPMD_LOG(DEBUG, "Allocated %zuMB of external memory\n",
+ param.len >> 20);
+
+ return 0;
+}
+
/*
* Configuration initialisation done once at init time.
*/
@@ -545,27 +776,59 @@ mbuf_pool_create(uint16_t mbuf_seg_size, unsigned nb_mbuf,
"create a new mbuf pool <%s>: n=%u, size=%u, socket=%u\n",
pool_name, nb_mbuf, mbuf_seg_size, socket_id);
- if (mp_anon != 0) {
- rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
- mb_size, (unsigned) mb_mempool_cache,
- sizeof(struct rte_pktmbuf_pool_private),
- socket_id, 0);
- if (rte_mp == NULL)
- goto err;
+ switch (mp_alloc_type) {
+ case MP_ALLOC_NATIVE:
+ {
+ /* wrapper to rte_mempool_create() */
+ TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
+ rte_mbuf_best_mempool_ops());
+ rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
+ mb_mempool_cache, 0, mbuf_seg_size, socket_id);
+ break;
+ }
+ case MP_ALLOC_ANON:
+ {
+ rte_mp = rte_mempool_create_empty(pool_name, nb_mbuf,
+ mb_size, (unsigned int) mb_mempool_cache,
+ sizeof(struct rte_pktmbuf_pool_private),
+ socket_id, 0);
+ if (rte_mp == NULL)
+ goto err;
+
+ if (rte_mempool_populate_anon(rte_mp) == 0) {
+ rte_mempool_free(rte_mp);
+ rte_mp = NULL;
+ goto err;
+ }
+ rte_pktmbuf_pool_init(rte_mp, NULL);
+ rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
+ break;
+ }
+ case MP_ALLOC_XMEM:
+ case MP_ALLOC_XMEM_HUGE:
+ {
+ int heap_socket;
+ bool huge = mp_alloc_type == MP_ALLOC_XMEM_HUGE;
- if (rte_mempool_populate_anon(rte_mp) == 0) {
- rte_mempool_free(rte_mp);
- rte_mp = NULL;
- goto err;
+ if (setup_extmem(nb_mbuf, mbuf_seg_size, huge) < 0)
+ rte_exit(EXIT_FAILURE, "Could not create external memory\n");
+
+ heap_socket =
+ rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
+ if (heap_socket < 0)
+ rte_exit(EXIT_FAILURE, "Could not get external memory socket ID\n");
+
+ TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
+ rte_mbuf_best_mempool_ops());
+ rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
+ mb_mempool_cache, 0, mbuf_seg_size,
+ heap_socket);
+ break;
+ }
+ default:
+ {
+ rte_exit(EXIT_FAILURE, "Invalid mempool creation mode\n");
}
- rte_pktmbuf_pool_init(rte_mp, NULL);
- rte_mempool_obj_iter(rte_mp, rte_pktmbuf_init, NULL);
- } else {
- /* wrapper to rte_mempool_create() */
- TESTPMD_LOG(INFO, "preferred mempool ops selected: %s\n",
- rte_mbuf_best_mempool_ops());
- rte_mp = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
- mb_mempool_cache, 0, mbuf_seg_size, socket_id);
}
err:
@@ -69,6 +69,16 @@ enum {
PORT_TOPOLOGY_LOOP,
};
+enum {
+ MP_ALLOC_NATIVE, /**< allocate and populate mempool natively */
+ MP_ALLOC_ANON,
+ /**< allocate mempool natively, but populate using anonymous memory */
+ MP_ALLOC_XMEM,
+ /**< allocate and populate mempool using anonymous memory */
+ MP_ALLOC_XMEM_HUGE
+ /**< allocate and populate mempool using anonymous hugepage memory */
+};
+
#ifdef RTE_TEST_PMD_RECORD_BURST_STATS
/**
* The data structure associated with RX and TX packet burst statistics
@@ -304,7 +314,8 @@ extern uint8_t numa_support; /**< set by "--numa" parameter */
extern uint16_t port_topology; /**< set by "--port-topology" parameter */
extern uint8_t no_flush_rx; /**<set by "--no-flush-rx" parameter */
extern uint8_t flow_isolate_all; /**< set by "--flow-isolate-all */
-extern uint8_t mp_anon; /**< set by "--mp-anon" parameter */
+extern uint8_t mp_alloc_type;
+/**< set by "--mp-anon" or "--mp-alloc" parameter */
extern uint8_t no_link_check; /**<set by "--disable-link-check" parameter */
extern volatile int test_done; /* stop packet forwarding when set to 1. */
extern uint8_t lsc_interrupt; /**< disabled by "--no-lsc-interrupt" parameter */
@@ -498,3 +498,15 @@ The commandline options are:
* ``--no-mlockall``
Disable locking all memory.
+
+* ``--mp-alloc <native|anon|xmem|xmemhuge>``
+
+ Select mempool allocation mode:
+
+ * native: create and populate mempool using native DPDK memory
+ * anon: create mempool using native DPDK memory, but populate using
+ anonymous memory
+ * xmem: create and populate mempool using externally and anonymously
+ allocated area
+ * xmemhuge: create and populate mempool using externally and anonymously
+ allocated hugepage area