Patch Detail
get:
Show a patch.
patch:
Update a patch.
put:
Update a patch.
GET /api/patches/45034/?format=api
http://patches.dpdk.org/api/patches/45034/?format=api", "web_url": "http://patches.dpdk.org/project/dpdk/patch/ae23cc3f53af9edc3e1fd74b1745c5bcd167e127.1537448866.git.anatoly.burakov@intel.com/", "project": { "id": 1, "url": "http://patches.dpdk.org/api/projects/1/?format=api", "name": "DPDK", "link_name": "dpdk", "list_id": "dev.dpdk.org", "list_email": "dev@dpdk.org", "web_url": "http://core.dpdk.org", "scm_url": "git://dpdk.org/dpdk", "webscm_url": "http://git.dpdk.org/dpdk", "list_archive_url": "https://inbox.dpdk.org/dev", "list_archive_url_format": "https://inbox.dpdk.org/dev/{}", "commit_url_format": "" }, "msgid": "<ae23cc3f53af9edc3e1fd74b1745c5bcd167e127.1537448866.git.anatoly.burakov@intel.com>", "list_archive_url": "https://inbox.dpdk.org/dev/ae23cc3f53af9edc3e1fd74b1745c5bcd167e127.1537448866.git.anatoly.burakov@intel.com", "date": "2018-09-20T13:11:13", "name": "[v3] pci/vfio: allow mapping MSI-X BARs if kernel allows it", "commit_ref": null, "pull_url": null, "state": "accepted", "archived": true, "hash": "d288f327fe64cab96bc0383667c7f3fdbda8f0c5", "submitter": { "id": 4, "url": "http://patches.dpdk.org/api/people/4/?format=api", "name": "Anatoly Burakov", "email": "anatoly.burakov@intel.com" }, "delegate": { "id": 1, "url": "http://patches.dpdk.org/api/users/1/?format=api", "username": "tmonjalo", "first_name": "Thomas", "last_name": "Monjalon", "email": "thomas@monjalon.net" }, "mbox": "http://patches.dpdk.org/project/dpdk/patch/ae23cc3f53af9edc3e1fd74b1745c5bcd167e127.1537448866.git.anatoly.burakov@intel.com/mbox/", "series": [ { "id": 1419, "url": "http://patches.dpdk.org/api/series/1419/?format=api", "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=1419", "date": "2018-09-20T13:11:13", "name": "[v3] pci/vfio: allow mapping MSI-X BARs if kernel allows it", "version": 3, "mbox": "http://patches.dpdk.org/series/1419/mbox/" } ], "comments": "http://patches.dpdk.org/api/patches/45034/comments/", "check": "success", "checks": "http://patches.dpdk.org/api/patches/45034/checks/", "tags": {}, "related": [], "headers": { "Return-Path": "<dev-bounces@dpdk.org>", "X-Original-To": "patchwork@dpdk.org", "Delivered-To": "patchwork@dpdk.org", "Received": [ "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 4993B4CA6;\n\tThu, 20 Sep 2018 15:11:24 +0200 (CEST)", "from mga17.intel.com (mga17.intel.com [192.55.52.151])\n\tby dpdk.org (Postfix) with ESMTP id 590E0326D\n\tfor <dev@dpdk.org>; Thu, 20 Sep 2018 15:11:22 +0200 (CEST)", "from orsmga004.jf.intel.com ([10.7.209.38])\n\tby fmsmga107.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t20 Sep 2018 06:11:21 -0700", "from irvmail001.ir.intel.com ([163.33.26.43])\n\tby orsmga004.jf.intel.com with ESMTP; 20 Sep 2018 06:11:14 -0700", "from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com\n\t[10.237.217.45])\n\tby irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\n\tw8KDBEGe021632; Thu, 20 Sep 2018 14:11:14 +0100", "from sivswdev01.ir.intel.com (localhost [127.0.0.1])\n\tby sivswdev01.ir.intel.com with ESMTP id w8KDBE0r008131;\n\tThu, 20 Sep 2018 14:11:14 +0100", "(from aburakov@localhost)\n\tby sivswdev01.ir.intel.com with LOCAL id w8KDBDg7008123;\n\tThu, 20 Sep 2018 14:11:13 +0100" ], "X-Amp-Result": "SKIPPED(no attachment in message)", "X-Amp-File-Uploaded": "False", "X-ExtLoop1": "1", "X-IronPort-AV": "E=Sophos;i=\"5.53,398,1531810800\"; d=\"scan'208\";a=\"234521330\"", "From": "Anatoly Burakov <anatoly.burakov@intel.com>", "To": "dev@dpdk.org", "Cc": "t.yoshimura8869@gmail.com, thomas@monjalon.net,\n\tjerin.jacob@caviumnetworks.com", "Date": "Thu, 20 Sep 2018 14:11:13 +0100", "Message-Id": "<ae23cc3f53af9edc3e1fd74b1745c5bcd167e127.1537448866.git.anatoly.burakov@intel.com>", "X-Mailer": "git-send-email 1.7.0.7", "In-Reply-To": "<35b0d3fcf990a064a94dc93d834d86352603de98.1533036456.git.anatoly.burakov@intel.com>", "References": "<35b0d3fcf990a064a94dc93d834d86352603de98.1533036456.git.anatoly.burakov@intel.com>", "Subject": "[dpdk-dev] [PATCH v3] pci/vfio: allow mapping MSI-X BARs if kernel\n\tallows it", "X-BeenThere": "dev@dpdk.org", "X-Mailman-Version": "2.1.15", "Precedence": "list", "List-Id": "DPDK patches and discussions <dev.dpdk.org>", "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>", "List-Archive": "<http://mails.dpdk.org/archives/dev/>", "List-Post": "<mailto:dev@dpdk.org>", "List-Help": "<mailto:dev-request@dpdk.org?subject=help>", "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>", "Errors-To": "dev-bounces@dpdk.org", "Sender": "\"dev\" <dev-bounces@dpdk.org>" }, "content": "Currently, DPDK will skip mapping some areas (or even an entire BAR)\nif MSI-X table happens to be in them but is smaller than page size.\n\nKernels 4.16+ will allow mapping MSI-X BARs [1], and will report this\nas a capability flag. Capability flags themselves are also only\nsupported since kernel 4.6 [2].\n\nThis commit will introduce support for checking VFIO capabilities,\nand will use it to check if we are allowed to map BARs with MSI-X\ntables in them, along with backwards compatibility for older\nkernels, including a workaround for a variable rename in VFIO\nregion info structure [3].\n\n[1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/\nlinux.git/commit/?id=a32295c612c57990d17fb0f41e7134394b2f35f6\n\n[2] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/\nlinux.git/commit/?id=c84982adb23bcf3b99b79ca33527cd2625fbe279\n\n[3] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/\nlinux.git/commit/?id=ff63eb638d63b95e489f976428f1df01391e15e4\n\nSigned-off-by: Anatoly Burakov <anatoly.burakov@intel.com>\n---\n\nNotes:\n v3->v2:\n - Fix potential uninitialized value access as per Takeshi's\n comments\n - Fix potential memory leak on failed memory reallocation\n \n v2->v1:\n - Fix pointer in pci_vfio_get_region_info\n - Fix commit message\n\n drivers/bus/pci/linux/pci_vfio.c | 132 ++++++++++++++++++++---\n lib/librte_eal/common/include/rte_vfio.h | 26 +++++\n 2 files changed, 145 insertions(+), 13 deletions(-)", "diff": "diff --git a/drivers/bus/pci/linux/pci_vfio.c b/drivers/bus/pci/linux/pci_vfio.c\nindex 686386d6a..d112b4b54 100644\n--- a/drivers/bus/pci/linux/pci_vfio.c\n+++ b/drivers/bus/pci/linux/pci_vfio.c\n@@ -415,6 +415,93 @@ pci_vfio_mmap_bar(int vfio_dev_fd, struct mapped_pci_resource *vfio_res,\n \treturn 0;\n }\n \n+/*\n+ * region info may contain capability headers, so we need to keep reallocating\n+ * the memory until we match allocated memory size with argsz.\n+ */\n+static int\n+pci_vfio_get_region_info(int vfio_dev_fd, struct vfio_region_info **info,\n+\t\tint region)\n+{\n+\tstruct vfio_region_info *ri;\n+\tsize_t argsz = sizeof(*ri);\n+\tint ret;\n+\n+\tri = malloc(sizeof(*ri));\n+\tif (ri == NULL) {\n+\t\tRTE_LOG(ERR, EAL, \"Cannot allocate memory for region info\\n\");\n+\t\treturn -1;\n+\t}\n+again:\n+\tmemset(ri, 0, argsz);\n+\tri->argsz = argsz;\n+\tri->index = region;\n+\n+\tret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ri);\n+\tif (ret < 0) {\n+\t\tfree(ri);\n+\t\treturn ret;\n+\t}\n+\tif (ri->argsz != argsz) {\n+\t\tstruct vfio_region_info *tmp;\n+\n+\t\targsz = ri->argsz;\n+\t\ttmp = realloc(ri, argsz);\n+\n+\t\tif (tmp == NULL) {\n+\t\t\t/* realloc failed but the ri is still there */\n+\t\t\tfree(ri);\n+\t\t\tRTE_LOG(ERR, EAL, \"Cannot reallocate memory for region info\\n\");\n+\t\t\treturn -1;\n+\t\t}\n+\t\tri = tmp;\n+\t\tgoto again;\n+\t}\n+\t*info = ri;\n+\n+\treturn 0;\n+}\n+\n+static struct vfio_info_cap_header *\n+pci_vfio_info_cap(struct vfio_region_info *info, int cap)\n+{\n+\tstruct vfio_info_cap_header *h;\n+\tsize_t offset;\n+\n+\tif ((info->flags & RTE_VFIO_INFO_FLAG_CAPS) == 0) {\n+\t\t/* VFIO info does not advertise capabilities */\n+\t\treturn NULL;\n+\t}\n+\n+\toffset = VFIO_CAP_OFFSET(info);\n+\twhile (offset != 0) {\n+\t\th = RTE_PTR_ADD(info, offset);\n+\t\tif (h->id == cap)\n+\t\t\treturn h;\n+\t\toffset = h->next;\n+\t}\n+\treturn NULL;\n+}\n+\n+static int\n+pci_vfio_msix_is_mappable(int vfio_dev_fd, int msix_region)\n+{\n+\tstruct vfio_region_info *info;\n+\tint ret;\n+\n+\tret = pci_vfio_get_region_info(vfio_dev_fd, &info, msix_region);\n+\tif (ret < 0)\n+\t\treturn -1;\n+\n+\tret = pci_vfio_info_cap(info, RTE_VFIO_CAP_MSIX_MAPPABLE) != NULL;\n+\n+\t/* cleanup */\n+\tfree(info);\n+\n+\treturn ret;\n+}\n+\n+\n static int\n pci_vfio_map_resource_primary(struct rte_pci_device *dev)\n {\n@@ -464,56 +551,75 @@ pci_vfio_map_resource_primary(struct rte_pci_device *dev)\n \tif (ret < 0) {\n \t\tRTE_LOG(ERR, EAL, \" %s cannot get MSI-X BAR number!\\n\",\n \t\t\t\tpci_addr);\n-\t\tgoto err_vfio_dev_fd;\n+\t\tgoto err_vfio_res;\n+\t}\n+\t/* if we found our MSI-X BAR region, check if we can mmap it */\n+\tif (vfio_res->msix_table.bar_index != -1) {\n+\t\tint ret = pci_vfio_msix_is_mappable(vfio_dev_fd,\n+\t\t\t\tvfio_res->msix_table.bar_index);\n+\t\tif (ret < 0) {\n+\t\t\tRTE_LOG(ERR, EAL, \"Couldn't check if MSI-X BAR is mappable\\n\");\n+\t\t\tgoto err_vfio_res;\n+\t\t} else if (ret != 0) {\n+\t\t\t/* we can map it, so we don't care where it is */\n+\t\t\tRTE_LOG(DEBUG, EAL, \"VFIO reports MSI-X BAR as mappable\\n\");\n+\t\t\tvfio_res->msix_table.bar_index = -1;\n+\t\t}\n \t}\n \n \tfor (i = 0; i < (int) vfio_res->nb_maps; i++) {\n-\t\tstruct vfio_region_info reg = { .argsz = sizeof(reg) };\n+\t\tstruct vfio_region_info *reg = NULL;\n \t\tvoid *bar_addr;\n \n-\t\treg.index = i;\n-\n-\t\tret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®);\n-\t\tif (ret) {\n+\t\tret = pci_vfio_get_region_info(vfio_dev_fd, ®, i);\n+\t\tif (ret < 0) {\n \t\t\tRTE_LOG(ERR, EAL, \" %s cannot get device region info \"\n-\t\t\t\t\t\"error %i (%s)\\n\", pci_addr, errno, strerror(errno));\n+\t\t\t\t\"error %i (%s)\\n\", pci_addr, errno,\n+\t\t\t\tstrerror(errno));\n \t\t\tgoto err_vfio_res;\n \t\t}\n \n \t\t/* chk for io port region */\n \t\tret = pci_vfio_is_ioport_bar(vfio_dev_fd, i);\n-\t\tif (ret < 0)\n+\t\tif (ret < 0) {\n+\t\t\tfree(reg);\n \t\t\tgoto err_vfio_res;\n-\t\telse if (ret) {\n+\t\t} else if (ret) {\n \t\t\tRTE_LOG(INFO, EAL, \"Ignore mapping IO port bar(%d)\\n\",\n \t\t\t\t\ti);\n+\t\t\tfree(reg);\n \t\t\tcontinue;\n \t\t}\n \n \t\t/* skip non-mmapable BARs */\n-\t\tif ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0)\n+\t\tif ((reg->flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) {\n+\t\t\tfree(reg);\n \t\t\tcontinue;\n+\t\t}\n \n \t\t/* try mapping somewhere close to the end of hugepages */\n \t\tif (pci_map_addr == NULL)\n \t\t\tpci_map_addr = pci_find_max_end_va();\n \n \t\tbar_addr = pci_map_addr;\n-\t\tpci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size);\n+\t\tpci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg->size);\n \n \t\tmaps[i].addr = bar_addr;\n-\t\tmaps[i].offset = reg.offset;\n-\t\tmaps[i].size = reg.size;\n+\t\tmaps[i].offset = reg->offset;\n+\t\tmaps[i].size = reg->size;\n \t\tmaps[i].path = NULL; /* vfio doesn't have per-resource paths */\n \n \t\tret = pci_vfio_mmap_bar(vfio_dev_fd, vfio_res, i, 0);\n \t\tif (ret < 0) {\n \t\t\tRTE_LOG(ERR, EAL, \" %s mapping BAR%i failed: %s\\n\",\n \t\t\t\t\tpci_addr, i, strerror(errno));\n+\t\t\tfree(reg);\n \t\t\tgoto err_vfio_res;\n \t\t}\n \n \t\tdev->mem_resource[i].addr = maps[i].addr;\n+\n+\t\tfree(reg);\n \t}\n \n \tif (pci_rte_vfio_setup_device(dev, vfio_dev_fd) < 0) {\ndiff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h\nindex 5ca13fcce..f6617e004 100644\n--- a/lib/librte_eal/common/include/rte_vfio.h\n+++ b/lib/librte_eal/common/include/rte_vfio.h\n@@ -14,6 +14,8 @@\n extern \"C\" {\n #endif\n \n+#include <stdint.h>\n+\n /*\n * determine if VFIO is present on the system\n */\n@@ -44,6 +46,30 @@ extern \"C\" {\n #define RTE_VFIO_NOIOMMU 8\n #endif\n \n+/*\n+ * capabilities are only supported on kernel 4.6+. there were also some API\n+ * changes as well, so add a macro to get cap offset.\n+ */\n+#ifdef VFIO_REGION_INFO_FLAG_CAPS\n+#define RTE_VFIO_INFO_FLAG_CAPS VFIO_REGION_INFO_FLAG_CAPS\n+#define VFIO_CAP_OFFSET(x) (x->cap_offset)\n+#else\n+#define RTE_VFIO_INFO_FLAG_CAPS (1 << 3)\n+#define VFIO_CAP_OFFSET(x) (x->resv)\n+struct vfio_info_cap_header {\n+\tuint16_t id;\n+\tuint16_t version;\n+\tuint32_t next;\n+};\n+#endif\n+\n+/* kernels 4.16+ can map BAR containing MSI-X table */\n+#ifdef VFIO_REGION_INFO_CAP_MSIX_MAPPABLE\n+#define RTE_VFIO_CAP_MSIX_MAPPABLE VFIO_REGION_INFO_CAP_MSIX_MAPPABLE\n+#else\n+#define RTE_VFIO_CAP_MSIX_MAPPABLE 3\n+#endif\n+\n #else /* not VFIO_PRESENT */\n \n /* we don't need an actual definition, only pointer is used */\n", "prefixes": [ "v3" ] }{ "id": 45034, "url": "