get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/40537/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 40537,
    "url": "https://patches.dpdk.org/api/patches/40537/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/5281a08a4d08db8692b38905ae1c6035343bca99.1527764061.git.anatoly.burakov@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<5281a08a4d08db8692b38905ae1c6035343bca99.1527764061.git.anatoly.burakov@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/5281a08a4d08db8692b38905ae1c6035343bca99.1527764061.git.anatoly.burakov@intel.com",
    "date": "2018-05-31T10:57:49",
    "name": "[dpdk-dev,RFC,2/3] mem: add device memory reserve/free API",
    "commit_ref": null,
    "pull_url": null,
    "state": "rejected",
    "archived": true,
    "hash": "4d2e5ddc81ff48b8de62c56f82be9adc278bc266",
    "submitter": {
        "id": 4,
        "url": "https://patches.dpdk.org/api/people/4/?format=api",
        "name": "Anatoly Burakov",
        "email": "anatoly.burakov@intel.com"
    },
    "delegate": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/users/1/?format=api",
        "username": "tmonjalo",
        "first_name": "Thomas",
        "last_name": "Monjalon",
        "email": "thomas@monjalon.net"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/5281a08a4d08db8692b38905ae1c6035343bca99.1527764061.git.anatoly.burakov@intel.com/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/40537/comments/",
    "check": "fail",
    "checks": "https://patches.dpdk.org/api/patches/40537/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 884A94F92;\n\tThu, 31 May 2018 12:58:11 +0200 (CEST)",
            "from mga02.intel.com (mga02.intel.com [134.134.136.20])\n\tby dpdk.org (Postfix) with ESMTP id 1DACC4C74\n\tfor <dev@dpdk.org>; Thu, 31 May 2018 12:58:05 +0200 (CEST)",
            "from fmsmga004.fm.intel.com ([10.253.24.48])\n\tby orsmga101.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t31 May 2018 03:58:03 -0700",
            "from irvmail001.ir.intel.com ([163.33.26.43])\n\tby fmsmga004.fm.intel.com with ESMTP; 31 May 2018 03:58:00 -0700",
            "from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com\n\t[10.237.217.45])\n\tby irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\n\tw4VAw0on004132; Thu, 31 May 2018 11:58:00 +0100",
            "from sivswdev01.ir.intel.com (localhost [127.0.0.1])\n\tby sivswdev01.ir.intel.com with ESMTP id w4VAw0SJ002509;\n\tThu, 31 May 2018 11:58:00 +0100",
            "(from aburakov@localhost)\n\tby sivswdev01.ir.intel.com with LOCAL id w4VAw0j2002505;\n\tThu, 31 May 2018 11:58:00 +0100"
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.49,463,1520924400\"; d=\"scan'208\";a=\"59202811\"",
        "From": "Anatoly Burakov <anatoly.burakov@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "thomas@monjalon.net, hemant.agrawal@nxp.com, bruce.richardson@intel.com, \n\tferruh.yigit@intel.com, konstantin.ananyev@intel.com,\n\tjerin.jacob@caviumnetworks.com, olivier.matz@6wind.com,\n\tstephen@networkplumber.org, nhorman@tuxdriver.com,\n\tdavid.marchand@6wind.com, gowrishankar.m@linux.vnet.ibm.com",
        "Date": "Thu, 31 May 2018 11:57:49 +0100",
        "Message-Id": "<5281a08a4d08db8692b38905ae1c6035343bca99.1527764061.git.anatoly.burakov@intel.com>",
        "X-Mailer": "git-send-email 1.7.0.7",
        "In-Reply-To": [
            "<cover.1527764061.git.anatoly.burakov@intel.com>",
            "<cover.1527764061.git.anatoly.burakov@intel.com>"
        ],
        "References": [
            "<cover.1527764061.git.anatoly.burakov@intel.com>",
            "<cover.1527764061.git.anatoly.burakov@intel.com>"
        ],
        "Subject": "[dpdk-dev] [RFC 2/3] mem: add device memory reserve/free API",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "In order for hotplug in multiprocess to work reliably, we will need\na common shared memory area that is guaranteed to be accessible to all\nprocesses at all times. This is accomplished by pre-reserving memory\nthat will be used for device mappings at startup, and managing it\nat runtime.\n\nTwo new API calls are added: alloc and free of device memory. Once\nallocation is requested, memory is considered to be reserved until it\nis freed back using the same API. Usage of which blocks are occupied is\ntracked using shared fbarray. This allows us to give out device memory\npiecemeal and lessen fragmentation.\n\nNaturally, this adds a limitation of how much device memory DPDK can\nuse. This is currently set to 2 gigabytes, but will be adjustable in\nlater revisions.\n\nSigned-off-by: Anatoly Burakov <anatoly.burakov@intel.com>\n---\n lib/librte_eal/common/eal_common_memory.c     | 270 ++++++++++++++++--\n .../common/include/rte_eal_memconfig.h        |  18 ++\n lib/librte_eal/common/include/rte_memory.h    |  40 +++\n 3 files changed, 312 insertions(+), 16 deletions(-)",
    "diff": "diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c\nindex 4f0688f9d..8cae9b354 100644\n--- a/lib/librte_eal/common/eal_common_memory.c\n+++ b/lib/librte_eal/common/eal_common_memory.c\n@@ -33,6 +33,7 @@\n  */\n \n #define MEMSEG_LIST_FMT \"memseg-%\" PRIu64 \"k-%i-%i\"\n+#define DEVICE_MEMORY_NAME \"device_memory\"\n \n static uint64_t baseaddr_offset;\n static uint64_t system_page_sz;\n@@ -904,6 +905,227 @@ rte_memseg_list_walk(rte_memseg_list_walk_t func, void *arg)\n \treturn ret;\n }\n \n+void * __rte_experimental\n+rte_mem_dev_memory_alloc(size_t size, size_t align)\n+{\n+\tstruct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;\n+\tstruct rte_fbarray *arr = &mcfg->device_memory.mem_map_arr;\n+\tunsigned int n_pages, page_align;\n+\tint start_idx, cur_idx;\n+\tvoid *addr = NULL;\n+\n+\t/* check parameters first */\n+\tif (size == 0 || (size & (system_page_sz - 1)) != 0) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): size is not page-aligned\\n\",\n+\t\t\t\t__func__);\n+\t\trte_errno = EINVAL;\n+\t\treturn NULL;\n+\t}\n+\tif ((align & (system_page_sz - 1)) != 0) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): alignment is not page-aligned\\n\",\n+\t\t\t__func__);\n+\t\trte_errno = EINVAL;\n+\t\treturn NULL;\n+\t}\n+\t/* PCI BAR sizes can only be powers of two, but this memory may be used\n+\t * for more than just PCI BAR mappings, so only check if alignment is\n+\t * power of two.\n+\t */\n+\tif (align != 0 && !rte_is_power_of_2(align)) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): alignment is not a power of two\\n\",\n+\t\t\t__func__);\n+\t\trte_errno = EINVAL;\n+\t\treturn NULL;\n+\t}\n+\t/* check if device memory map is uninitialized. */\n+\tif (mcfg->device_memory.base_va == NULL || arr->len == 0) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): device memory map is not initialized\\n\",\n+\t\t\t__func__);\n+\t\trte_errno = ENODEV;\n+\t\treturn NULL;\n+\t}\n+\n+\tn_pages = size / system_page_sz;\n+\tpage_align = align / system_page_sz;\n+\n+\t/* lock the device memory map */\n+\trte_spinlock_lock(&mcfg->device_memory.lock);\n+\n+\tstart_idx = 0;\n+\twhile (1) {\n+\t\tsize_t offset;\n+\t\tint end;\n+\n+\t\tcur_idx = rte_fbarray_find_next_n_free(arr, start_idx, n_pages);\n+\t\tif (cur_idx < 0)\n+\t\t\tbreak;\n+\n+\t\t/* if there are alignment requirements, check if the offset we\n+\t\t * found is aligned, and if not, align it and check if we still\n+\t\t * have enough space.\n+\t\t */\n+\t\tif (page_align != 0 && (cur_idx & (page_align - 1)) != 0) {\n+\t\t\tunsigned int aligned, len;\n+\n+\t\t\taligned = RTE_ALIGN_CEIL(cur_idx, page_align);\n+\t\t\tlen = rte_fbarray_find_contig_free(arr, aligned);\n+\n+\t\t\t/* if there's not enough space, keep looking */\n+\t\t\tif (len < n_pages) {\n+\t\t\t\tstart_idx = aligned + len;\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\n+\t\t\t/* we've found space */\n+\t\t\tcur_idx = aligned;\n+\t\t}\n+\t\tend = cur_idx + n_pages;\n+\t\toffset = cur_idx * system_page_sz;\n+\t\taddr = RTE_PTR_ADD(mcfg->device_memory.base_va,\n+\t\t\t\toffset);\n+\n+\t\t/* now, mark all space as occupied */\n+\t\tfor (; cur_idx < end; cur_idx++)\n+\t\t\trte_fbarray_set_used(arr, cur_idx);\n+\t\tbreak;\n+\t}\n+\trte_spinlock_unlock(&mcfg->device_memory.lock);\n+\n+\tif (addr != NULL)\n+\t\tRTE_LOG(DEBUG, EAL, \"%s(): allocated %p-%p (%lu bytes) for hardware device usage\\n\",\n+\t\t\t__func__, addr, RTE_PTR_ADD(addr, size), size);\n+\n+\treturn addr;\n+}\n+\n+int __rte_experimental\n+rte_mem_dev_memory_free(void *addr, size_t size)\n+{\n+\tstruct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;\n+\tstruct rte_fbarray *arr = &mcfg->device_memory.mem_map_arr;\n+\tint cur_idx, end, ret, n_pages, len;\n+\tvoid *map_end, *mem_end;\n+\n+\t/* check parameters first */\n+\tif (size == 0 || (size & (system_page_sz - 1)) != 0) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): size is not page-aligned\\n\",\n+\t\t\t\t__func__);\n+\t\trte_errno = EINVAL;\n+\t\treturn -1;\n+\t}\n+\t/* check if device memory map is uninitialized. */\n+\tif (mcfg->device_memory.base_va == NULL || arr->len == 0) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): device memory map is not initialized\\n\",\n+\t\t\t__func__);\n+\t\trte_errno = ENODEV;\n+\t\treturn -1;\n+\t}\n+\tmap_end = RTE_PTR_ADD(mcfg->device_memory.base_va,\n+\t\t\tarr->len * system_page_sz);\n+\tmem_end = RTE_PTR_ADD(addr, size);\n+\n+\t/* check if address is within the memory map */\n+\tif (addr < mcfg->device_memory.base_va || addr >= map_end ||\n+\t\t\tmem_end > map_end) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): address is beyond device memory map range\\n\",\n+\t\t\t\t__func__);\n+\t\trte_errno = EINVAL;\n+\t\treturn -1;\n+\t}\n+\n+\trte_spinlock_lock(&mcfg->device_memory.lock);\n+\n+\tn_pages = size / system_page_sz;\n+\tcur_idx = RTE_PTR_DIFF(addr, mcfg->device_memory.base_va) /\n+\t\t\tsystem_page_sz;\n+\tend = cur_idx + n_pages;\n+\n+\t/* check all space we will be marking as free is currently occupied */\n+\tlen = rte_fbarray_find_contig_used(arr, cur_idx);\n+\tif (len < n_pages) {\n+\t\tRTE_LOG(ERR, EAL, \"%s(): attempting to free unoccupied space\\n\",\n+\t\t\t__func__);\n+\t\trte_errno = EINVAL;\n+\t\tret = -1;\n+\t\tgoto unlock;\n+\t}\n+\t/* now, mark all space as free */\n+\tfor (; cur_idx < end; cur_idx++)\n+\t\trte_fbarray_set_free(arr, cur_idx);\n+\n+\t/* success */\n+\tret = 0;\n+\n+\tRTE_LOG(DEBUG, EAL, \"%s(): deallocated %p-%p (%lu bytes) for hardware device usage\\n\",\n+\t\t__func__, addr, RTE_PTR_ADD(addr, size), size);\n+unlock:\n+\trte_spinlock_unlock(&mcfg->device_memory.lock);\n+\treturn ret;\n+}\n+\n+static int\n+dev_memory_init(void)\n+{\n+\tstruct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;\n+\tsize_t size;\n+\tvoid *addr;\n+\tint retval;\n+\n+\tif (system_page_sz == 0)\n+\t\tsystem_page_sz = sysconf(_SC_PAGESIZE);\n+\n+\tsize = (size_t) 2 << 30;\n+\n+\taddr = eal_get_virtual_area(NULL, &size, system_page_sz, 0, 0);\n+\tif (addr == NULL) {\n+\t\tRTE_LOG(ERR, EAL, \"Cannot reserve device memory\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tretval = rte_fbarray_init(&mcfg->device_memory.mem_map_arr,\n+\t\t\tDEVICE_MEMORY_NAME, size / system_page_sz, 0);\n+\tif (retval < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Cannot initialize device memory map\\n\");\n+\t\treturn -1;\n+\t}\n+\tmcfg->device_memory.base_va = addr;\n+\trte_spinlock_init(&mcfg->device_memory.lock);\n+\treturn 0;\n+}\n+\n+static int\n+dev_memory_attach(void)\n+{\n+\tstruct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;\n+\tsize_t size;\n+\tvoid *addr;\n+\tint retval;\n+\n+\trte_spinlock_lock(&mcfg->device_memory.lock);\n+\n+\tif (system_page_sz == 0)\n+\t\tsystem_page_sz = sysconf(_SC_PAGESIZE);\n+\n+\tsize = mcfg->device_memory.mem_map_arr.len * system_page_sz;\n+\n+\taddr = eal_get_virtual_area(mcfg->device_memory.base_va, &size,\n+\t\t\tsystem_page_sz, 0, 0);\n+\tif (addr == NULL) {\n+\t\tRTE_LOG(ERR, EAL, \"Cannot reserve device memory\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tretval = rte_fbarray_attach(&mcfg->device_memory.mem_map_arr);\n+\tif (retval < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Cannot attach to device memory map\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\trte_spinlock_unlock(&mcfg->device_memory.lock);\n+\n+\treturn 0;\n+}\n+\n /* init memory subsystem */\n int\n rte_eal_memory_init(void)\n@@ -918,25 +1140,41 @@ rte_eal_memory_init(void)\n \t/* lock mem hotplug here, to prevent races while we init */\n \trte_rwlock_read_lock(&mcfg->memory_hotplug_lock);\n \n-\tretval = rte_eal_process_type() == RTE_PROC_PRIMARY ?\n+\tif (rte_eal_process_type() == RTE_PROC_PRIMARY) {\n+\t\tretval = dev_memory_init();\n+\t\tif (retval < 0)\n+\t\t\tgoto fail;\n+\n #ifndef RTE_ARCH_64\n-\t\t\tmemseg_primary_init_32() :\n+\t\tretval = memseg_primary_init_32();\n #else\n-\t\t\tmemseg_primary_init() :\n+\t\tretval = memseg_primary_init();\n #endif\n-\t\t\tmemseg_secondary_init();\n-\n-\tif (retval < 0)\n-\t\tgoto fail;\n-\n-\tif (eal_memalloc_init() < 0)\n-\t\tgoto fail;\n-\n-\tretval = rte_eal_process_type() == RTE_PROC_PRIMARY ?\n-\t\t\trte_eal_hugepage_init() :\n-\t\t\trte_eal_hugepage_attach();\n-\tif (retval < 0)\n-\t\tgoto fail;\n+\t\tif (retval < 0)\n+\t\t\tgoto fail;\n+\n+\t\tif (eal_memalloc_init() < 0)\n+\t\t\tgoto fail;\n+\n+\t\tretval = rte_eal_hugepage_init();\n+\t\tif (retval < 0)\n+\t\t\tgoto fail;\n+\t} else {\n+\t\tretval = dev_memory_attach();\n+\t\tif (retval < 0)\n+\t\t\tgoto fail;\n+\n+\t\tretval = memseg_secondary_init();\n+\t\tif (retval < 0)\n+\t\t\tgoto fail;\n+\n+\t\tif (eal_memalloc_init() < 0)\n+\t\t\tgoto fail;\n+\n+\t\tretval = rte_eal_hugepage_attach();\n+\t\tif (retval < 0)\n+\t\t\tgoto fail;\n+\t}\n \n \tif (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0)\n \t\tgoto fail;\ndiff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h\nindex aff0688dd..a8e7c39ff 100644\n--- a/lib/librte_eal/common/include/rte_eal_memconfig.h\n+++ b/lib/librte_eal/common/include/rte_eal_memconfig.h\n@@ -36,6 +36,22 @@ struct rte_memseg_list {\n \tstruct rte_fbarray memseg_arr;\n };\n \n+/**\n+ * mem map is a special case because we need to store a bunch of other data\n+ * together with the array itself.\n+ */\n+struct rte_mem_map {\n+\tRTE_STD_C11\n+\tunion {\n+\t\tvoid *base_va;\n+\t\t/**< Base virtual address for this mem map. */\n+\t\tuint64_t addr_64;\n+\t\t/**< Makes sure addr is always 64-bits */\n+\t};\n+\trte_spinlock_t lock;\n+\tstruct rte_fbarray mem_map_arr;\n+};\n+\n /**\n  * the structure for the memory configuration for the RTE.\n  * Used by the rte_config structure. It is separated out, as for multi-process\n@@ -68,6 +84,8 @@ struct rte_mem_config {\n \tstruct rte_memseg_list memsegs[RTE_MAX_MEMSEG_LISTS];\n \t/**< list of dynamic arrays holding memsegs */\n \n+\tstruct rte_mem_map device_memory; /**< Occupancy map of preallocated device memory */\n+\n \tstruct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */\n \n \t/* Heaps of Malloc per socket */\ndiff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h\nindex aab9f6fe5..4cf58bd2a 100644\n--- a/lib/librte_eal/common/include/rte_memory.h\n+++ b/lib/librte_eal/common/include/rte_memory.h\n@@ -445,6 +445,46 @@ rte_mem_alloc_validator_register(const char *name,\n int __rte_experimental\n rte_mem_alloc_validator_unregister(const char *name, int socket_id);\n \n+/**\n+ * @brief Request memory for device mapping.\n+ *\n+ * @note after this call, reserved memory will be marked as unavailable in all\n+ *       processes until it is released, even if it goes unused.\n+ *\n+ * @param size\n+ *   Size of memory to request.\n+ *\n+ * @param align\n+ *   Alignment of memory to be returned.\n+ *\n+ * @return\n+ *   Valid pointer on successful fulfillment of request.\n+ *   NULL on unsuccessful fulfillment of request, with rte_errno indicating the\n+ *   case of error.\n+ */\n+void * __rte_experimental\n+rte_mem_dev_memory_alloc(size_t size, size_t align);\n+\n+/**\n+ * @brief Release memory for device mapping.\n+ *\n+ * @note by the time this call is made, memory region being freed must not be in\n+ *       use.\n+ *\n+ * @param addr\n+ *   Address of previously requested block of memory.\n+ *\n+ * @param size\n+ *   Size of memory to request.\n+ *\n+ * @return\n+ *   0 on successful memory release.\n+ *   -1 on unsuccessful memory release, with rte_errno indicating the cause of\n+ *   error.\n+ */\n+int __rte_experimental\n+rte_mem_dev_memory_free(void *addr, size_t size);\n+\n #ifdef __cplusplus\n }\n #endif\n",
    "prefixes": [
        "dpdk-dev",
        "RFC",
        "2/3"
    ]
}