new file mode 100644
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2010-2018 Intel Corporation
+
+# binary name
+APP = extmem
+
+# all source are stored in SRCS-y
+SRCS-y := extmem.c
+
+# Build using pkg-config variables if possible
+$(shell pkg-config --exists libdpdk)
+ifeq ($(.SHELLSTATUS),0)
+
+all: shared
+.PHONY: shared static
+shared: build/$(APP)-shared
+ ln -sf $(APP)-shared build/$(APP)
+static: build/$(APP)-static
+ ln -sf $(APP)-static build/$(APP)
+
+PC_FILE := $(shell pkg-config --path libdpdk)
+CFLAGS += -O3 $(shell pkg-config --cflags libdpdk)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+LDFLAGS_SHARED = $(shell pkg-config --libs libdpdk)
+LDFLAGS_STATIC = -Wl,-Bstatic $(shell pkg-config --static --libs libdpdk)
+
+build/$(APP)-shared: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_SHARED)
+
+build/$(APP)-static: $(SRCS-y) Makefile $(PC_FILE) | build
+ $(CC) $(CFLAGS) $(SRCS-y) -o $@ $(LDFLAGS) $(LDFLAGS_STATIC)
+
+build:
+ @mkdir -p $@
+
+.PHONY: clean
+clean:
+ rm -f build/$(APP) build/$(APP)-static build/$(APP)-shared
+ rmdir --ignore-fail-on-non-empty build
+
+else # Build using legacy build system
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+endif
new file mode 100644
@@ -0,0 +1,461 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2010-2018 Intel Corporation
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_vfio.h>
+
+#define RX_RING_SIZE 1024
+#define TX_RING_SIZE 1024
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+#define EXTMEM_HEAP_NAME "extmem"
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ },
+};
+
+/* extmem.c: Basic DPDK skeleton forwarding example using external memory. */
+
+/*
+ * Initializes a given port using global settings and with the RX buffers
+ * coming from the mbuf_pool passed as a parameter.
+ */
+static inline int
+port_init(uint16_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ uint16_t nb_rxd = RX_RING_SIZE;
+ uint16_t nb_txd = TX_RING_SIZE;
+ int retval;
+ uint16_t q;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf txconf;
+
+ if (!rte_eth_dev_is_valid_port(port))
+ return -1;
+
+ rte_eth_dev_info_get(port, &dev_info);
+ if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
+ port_conf.txmode.offloads |=
+ DEV_TX_OFFLOAD_MBUF_FAST_FREE;
+
+ /* Configure the Ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &nb_rxd, &nb_txd);
+ if (retval != 0)
+ return retval;
+
+ /* Allocate and set up 1 RX queue per Ethernet port. */
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, nb_rxd,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ txconf = dev_info.default_txconf;
+ txconf.offloads = port_conf.txmode.offloads;
+ /* Allocate and set up 1 TX queue per Ethernet port. */
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, nb_txd,
+ rte_eth_dev_socket_id(port), &txconf);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the Ethernet port. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ /* Display the port MAC address. */
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+ " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+ port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ /* Enable RX in promiscuous mode for the Ethernet device. */
+ rte_eth_promiscuous_enable(port);
+
+ return 0;
+}
+
+/*
+ * The lcore main. This is the main thread that does the work, reading from
+ * an input port and writing to an output port.
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint16_t port;
+
+ /*
+ * Check that the port is on the same NUMA node as the polling thread
+ * for best performance.
+ */
+ RTE_ETH_FOREACH_DEV(port)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+
+ /* Run until the application is quit or killed. */
+ for (;;) {
+ /*
+ * Receive packets on a port and forward them on the paired
+ * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.
+ */
+ RTE_ETH_FOREACH_DEV(port) {
+
+ /* Get burst of RX packets, from first port of pair. */
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+
+ if (unlikely(nb_rx == 0))
+ continue;
+
+ /* Send burst of TX packets, to second port of pair. */
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+
+ /* Free any unsent packets. */
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* extremely pessimistic estimation of memory required to create a mempool */
+static int
+calc_mem_size(uint32_t nb_ports, uint32_t nb_mbufs_per_port,
+ uint32_t mbuf_sz, size_t pgsz, size_t *out)
+{
+ uint32_t nb_mbufs = nb_ports * nb_mbufs_per_port;
+ uint64_t total_mem, mbuf_mem, obj_sz;
+
+ /* there is no good way to predict how much space the mempool will
+ * occupy because it will allocate chunks on the fly, and some of those
+ * will come from default DPDK memory while some will come from our
+ * external memory, so just assume 16MB will be enough for everyone.
+ */
+ uint64_t hdr_mem = 16 << 20;
+
+ obj_sz = rte_mempool_calc_obj_size(mbuf_sz, 0, NULL);
+ if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+ /* contiguous - no need to account for page boundaries */
+ mbuf_mem = nb_mbufs * obj_sz;
+ } else {
+ /* account for possible non-contiguousness */
+ unsigned int n_pages, mbuf_per_pg, leftover;
+
+ mbuf_per_pg = pgsz / obj_sz;
+ leftover = (nb_mbufs % mbuf_per_pg) > 0;
+ n_pages = (nb_mbufs / mbuf_per_pg) + leftover;
+
+ mbuf_mem = n_pages * pgsz;
+ }
+
+ total_mem = RTE_ALIGN(hdr_mem + mbuf_mem, pgsz);
+
+ if (total_mem > SIZE_MAX) {
+ printf("Memory size too big\n");
+ return -1;
+ }
+ *out = (size_t)total_mem;
+
+ return 0;
+}
+
+static inline uint32_t
+bsf64(uint64_t v)
+{
+ return (uint32_t)__builtin_ctzll(v);
+}
+
+static inline uint32_t
+log2_u64(uint64_t v)
+{
+ if (v == 0)
+ return 0;
+ v = rte_align64pow2(v);
+ return bsf64(v);
+}
+
+#ifndef MAP_HUGE_SHIFT
+#define HUGE_SHIFT 26
+#else
+#define HUGE_SHIFT MAP_HUGE_SHIFT
+#endif
+
+static int
+pagesz_flags(uint64_t page_sz)
+{
+ /* as per mmap() manpage, all page sizes are log2 of page size
+ * shifted by MAP_HUGE_SHIFT
+ */
+ int log2 = log2_u64(page_sz);
+ return log2 << HUGE_SHIFT;
+}
+
+static void *
+alloc_mem(size_t memsz, size_t pgsz)
+{
+ void *addr;
+ int flags;
+
+ /* allocate anonymous hugepages */
+ flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB | pagesz_flags(pgsz);
+
+ addr = mmap(NULL, memsz, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (addr == MAP_FAILED)
+ return NULL;
+
+ return addr;
+}
+
+struct extmem_param {
+ void *addr;
+ size_t len;
+ size_t pgsz;
+ rte_iova_t *iova_table;
+ unsigned int iova_table_len;
+};
+
+static int
+create_extmem(uint32_t nb_ports, uint32_t nb_mbufs_per_port, uint32_t mbuf_sz,
+ struct extmem_param *param)
+{
+ uint64_t pgsizes[] = {RTE_PGSIZE_2M, RTE_PGSIZE_1G, /* x86_64, ARM */
+ RTE_PGSIZE_16M, RTE_PGSIZE_16G}; /* POWER */
+ unsigned int n_pages, cur_page, pgsz_idx;
+ size_t mem_sz, offset, cur_pgsz;
+ bool vfio_supported = true;
+ rte_iova_t *iovas = NULL;
+ void *addr;
+ int ret;
+
+ for (pgsz_idx = 0; pgsz_idx < RTE_DIM(pgsizes); pgsz_idx++) {
+ /* skip anything that is too big */
+ if (pgsizes[pgsz_idx] > SIZE_MAX)
+ continue;
+
+ cur_pgsz = pgsizes[pgsz_idx];
+
+ ret = calc_mem_size(nb_ports, nb_mbufs_per_port,
+ mbuf_sz, cur_pgsz, &mem_sz);
+ if (ret < 0) {
+ printf("Cannot calculate memory size\n");
+ return -1;
+ }
+
+ /* allocate our memory */
+ addr = alloc_mem(mem_sz, cur_pgsz);
+
+ /* if we couldn't allocate memory with a specified page size,
+ * that doesn't mean we can't do it with other page sizes, so
+ * try another one.
+ */
+ if (addr == NULL)
+ continue;
+
+ /* store IOVA addresses for every page in this memory area */
+ n_pages = mem_sz / cur_pgsz;
+
+ iovas = malloc(sizeof(*iovas) * n_pages);
+
+ if (iovas == NULL) {
+ printf("Cannot allocate memory for iova addresses\n");
+ goto fail;
+ }
+
+ /* populate IOVA table */
+ for (cur_page = 0; cur_page < n_pages; cur_page++) {
+ rte_iova_t iova;
+ void *cur;
+
+ offset = cur_pgsz * cur_page;
+ cur = RTE_PTR_ADD(addr, offset);
+
+ iova = (uintptr_t)rte_mem_virt2iova(cur);
+
+ iovas[cur_page] = iova;
+
+ if (vfio_supported) {
+ /* map memory for DMA */
+ ret = rte_vfio_dma_map((uintptr_t)addr,
+ iova, cur_pgsz);
+ if (ret < 0) {
+ /*
+ * ENODEV means VFIO is not initialized
+ * ENOTSUP means current IOMMU mode
+ * doesn't support mapping
+ * both cases are not an error
+ */
+ if (rte_errno == ENOTSUP ||
+ rte_errno == ENODEV)
+ /* VFIO is unsupported, don't
+ * try again.
+ */
+ vfio_supported = false;
+ else
+ /* this is an actual error */
+ goto fail;
+ }
+ }
+ }
+
+ break;
+ }
+ /* if we couldn't allocate anything */
+ if (iovas == NULL)
+ return -1;
+
+ param->addr = addr;
+ param->len = mem_sz;
+ param->pgsz = cur_pgsz;
+ param->iova_table = iovas;
+ param->iova_table_len = n_pages;
+
+ return 0;
+fail:
+ if (iovas)
+ free(iovas);
+ if (addr)
+ munmap(addr, mem_sz);
+
+ return -1;
+}
+
+static int
+setup_extmem(uint32_t nb_ports, uint32_t nb_mbufs_per_port, uint32_t mbuf_sz)
+{
+ struct extmem_param param;
+ int ret;
+
+ /* create our heap */
+ ret = rte_malloc_heap_create(EXTMEM_HEAP_NAME);
+ if (ret < 0) {
+ printf("Cannot create heap\n");
+ return -1;
+ }
+
+ ret = create_extmem(nb_ports, nb_mbufs_per_port, mbuf_sz, ¶m);
+ if (ret < 0) {
+ printf("Cannot create memory area\n");
+ return -1;
+ }
+
+ /* we now have a valid memory area, so add it to heap */
+ ret = rte_malloc_heap_memory_add(EXTMEM_HEAP_NAME,
+ param.addr, param.len, param.iova_table,
+ param.iova_table_len, param.pgsz);
+
+ /* not needed any more */
+ free(param.iova_table);
+
+ if (ret < 0) {
+ printf("Cannot add memory to heap\n");
+ munmap(param.addr, param.len);
+ return -1;
+ }
+
+ printf("Allocated %zuMB of memory\n", param.len >> 20);
+
+ /* success */
+ return 0;
+}
+
+
+/*
+ * The main function, which does initialization and calls the per-lcore
+ * functions.
+ */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ unsigned int nb_ports;
+ int socket_id;
+ uint16_t portid;
+ uint32_t nb_mbufs_per_port, mbuf_sz;
+
+ /* Initialize the Environment Abstraction Layer (EAL). */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+
+ argc -= ret;
+ argv += ret;
+
+ /* Check that there is an even number of ports to send/receive on. */
+ nb_ports = rte_eth_dev_count_avail();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ nb_mbufs_per_port = NUM_MBUFS;
+ mbuf_sz = RTE_MBUF_DEFAULT_BUF_SIZE;
+
+ if (setup_extmem(nb_ports, nb_mbufs_per_port, mbuf_sz) < 0)
+ rte_exit(EXIT_FAILURE, "Error: cannot set up external memory\n");
+
+ /* retrieve socket ID for our heap */
+ socket_id = rte_malloc_heap_get_socket(EXTMEM_HEAP_NAME);
+ if (socket_id < 0)
+ rte_exit(EXIT_FAILURE, "Invalid socket for external heap\n");
+
+ /* Creates a new mempool in memory to hold the mbufs. */
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ nb_mbufs_per_port * nb_ports, MBUF_CACHE_SIZE, 0,
+ mbuf_sz, socket_id);
+
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* Initialize all ports. */
+ RTE_ETH_FOREACH_DEV(portid)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu16 "\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
+
+ /* Call lcore_main on the master core only. */
+ lcore_main();
+
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2017 Intel Corporation
+
+# meson file, for building this example as part of a main DPDK build.
+#
+# To build this example as a standalone application with an already-installed
+# DPDK instance, use 'make'
+
+allow_experimental_apis = true
+sources = files(
+ 'extmem.c'
+)