get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/37258/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 37258,
    "url": "http://patches.dpdk.org/api/patches/37258/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20180405180701.16853-2-xiao.w.wang@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20180405180701.16853-2-xiao.w.wang@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20180405180701.16853-2-xiao.w.wang@intel.com",
    "date": "2018-04-05T18:06:58",
    "name": "[dpdk-dev,v5,1/4] eal/vfio: add multiple container support",
    "commit_ref": null,
    "pull_url": null,
    "state": "changes-requested",
    "archived": true,
    "hash": "aabb4a94ced62579756280810af592e2f4ef45d9",
    "submitter": {
        "id": 281,
        "url": "http://patches.dpdk.org/api/people/281/?format=api",
        "name": "Xiao Wang",
        "email": "xiao.w.wang@intel.com"
    },
    "delegate": {
        "id": 319,
        "url": "http://patches.dpdk.org/api/users/319/?format=api",
        "username": "fyigit",
        "first_name": "Ferruh",
        "last_name": "Yigit",
        "email": "ferruh.yigit@amd.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20180405180701.16853-2-xiao.w.wang@intel.com/mbox/",
    "series": [],
    "comments": "http://patches.dpdk.org/api/patches/37258/comments/",
    "check": "fail",
    "checks": "http://patches.dpdk.org/api/patches/37258/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 1A4291CA59;\n\tThu,  5 Apr 2018 11:33:39 +0200 (CEST)",
            "from mga09.intel.com (mga09.intel.com [134.134.136.24])\n\tby dpdk.org (Postfix) with ESMTP id 5A93F1CA59\n\tfor <dev@dpdk.org>; Thu,  5 Apr 2018 11:33:36 +0200 (CEST)",
            "from orsmga004.jf.intel.com ([10.7.209.38])\n\tby orsmga102.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t05 Apr 2018 02:33:35 -0700",
            "from dpdk-xiao-1.sh.intel.com ([10.67.110.153])\n\tby orsmga004.jf.intel.com with ESMTP; 05 Apr 2018 02:33:33 -0700"
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.48,410,1517904000\"; d=\"scan'208\";a=\"188854856\"",
        "From": "Xiao Wang <xiao.w.wang@intel.com>",
        "To": "ferruh.yigit@intel.com",
        "Cc": "maxime.coquelin@redhat.com, dev@dpdk.org, zhihong.wang@intel.com,\n\tjianfeng.tan@intel.com, tiwei.bie@intel.com, cunming.liang@intel.com, \n\tdan.daly@intel.com, thomas@monjalon.net, gaetan.rivet@6wind.com,\n\tanatoly.burakov@intel.com, hemant.agrawal@nxp.com,\n\tXiao Wang <xiao.w.wang@intel.com>, Junjie Chen <junjie.j.chen@intel.com>",
        "Date": "Fri,  6 Apr 2018 02:06:58 +0800",
        "Message-Id": "<20180405180701.16853-2-xiao.w.wang@intel.com>",
        "X-Mailer": "git-send-email 2.15.1",
        "In-Reply-To": "<20180405180701.16853-1-xiao.w.wang@intel.com>",
        "References": "<20180404144042.29901-2-xiao.w.wang@intel.com>\n\t<20180405180701.16853-1-xiao.w.wang@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v5 1/4] eal/vfio: add multiple container support",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Currently eal vfio framework binds vfio group fd to the default\ncontainer fd during rte_vfio_setup_device, while in some cases,\ne.g. vDPA (vhost data path acceleration), we want to put vfio group\nto a separate container and program IOMMU via this container.\n\nThis patch adds some APIs to support container creating and device\nbinding with a container.\n\nA driver could use \"rte_vfio_create_container\" helper to create a\nnew container from eal, use \"rte_vfio_bind_group\" to bind a device\nto the newly created container.\n\nDuring rte_vfio_setup_device, the container bound with the device\nwill be used for IOMMU setup.\n\nSigned-off-by: Junjie Chen <junjie.j.chen@intel.com>\nSigned-off-by: Xiao Wang <xiao.w.wang@intel.com>\nReviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>\n---\n config/common_base                       |   1 +\n lib/librte_eal/bsdapp/eal/eal.c          |  50 +++\n lib/librte_eal/common/include/rte_vfio.h | 113 +++++++\n lib/librte_eal/linuxapp/eal/eal_vfio.c   | 522 +++++++++++++++++++++++++------\n lib/librte_eal/linuxapp/eal/eal_vfio.h   |   1 +\n lib/librte_eal/rte_eal_version.map       |   6 +\n 6 files changed, 601 insertions(+), 92 deletions(-)",
    "diff": "diff --git a/config/common_base b/config/common_base\nindex 7abf7c6fc..2c40b2603 100644\n--- a/config/common_base\n+++ b/config/common_base\n@@ -74,6 +74,7 @@ CONFIG_RTE_EAL_ALWAYS_PANIC_ON_ERROR=n\n CONFIG_RTE_EAL_IGB_UIO=n\n CONFIG_RTE_EAL_VFIO=n\n CONFIG_RTE_MAX_VFIO_GROUPS=64\n+CONFIG_RTE_MAX_VFIO_CONTAINERS=64\n CONFIG_RTE_MALLOC_DEBUG=n\n CONFIG_RTE_EAL_NUMA_AWARE_HUGEPAGES=n\n \ndiff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c\nindex 4eafcb5ad..0a3d8783d 100644\n--- a/lib/librte_eal/bsdapp/eal/eal.c\n+++ b/lib/librte_eal/bsdapp/eal/eal.c\n@@ -746,6 +746,14 @@ int rte_vfio_enable(const char *modname);\n int rte_vfio_is_enabled(const char *modname);\n int rte_vfio_noiommu_is_enabled(void);\n int rte_vfio_clear_group(int vfio_group_fd);\n+int rte_vfio_create_container(void);\n+int rte_vfio_destroy_container(int container_fd);\n+int rte_vfio_bind_group(int container_fd, int iommu_group_no);\n+int rte_vfio_unbind_group(int container_fd, int iommu_group_no);\n+int rte_vfio_dma_map(int container_fd, int dma_type,\n+\t\tconst struct rte_memseg *ms);\n+int rte_vfio_dma_unmap(int container_fd, int dma_type,\n+\t\tconst struct rte_memseg *ms);\n \n int rte_vfio_setup_device(__rte_unused const char *sysfs_base,\n \t\t      __rte_unused const char *dev_addr,\n@@ -781,3 +789,45 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd)\n {\n \treturn 0;\n }\n+\n+int __rte_experimental\n+rte_vfio_create_container(void)\n+{\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_destroy_container(__rte_unused int container_fd)\n+{\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_bind_group(__rte_unused int container_fd,\n+\t__rte_unused int iommu_group_no)\n+{\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_unbind_group(__rte_unused int container_fd,\n+\t__rte_unused int iommu_group_no)\n+{\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_dma_map(__rte_unused int container_fd,\n+\t__rte_unused int dma_type,\n+\t__rte_unused const struct rte_memseg *ms)\n+{\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_dma_unmap(__rte_unused int container_fd,\n+\t__rte_unused int dma_type,\n+\t__rte_unused const struct rte_memseg *ms)\n+{\n+\treturn -1;\n+}\ndiff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h\nindex 249095e46..9bb026703 100644\n--- a/lib/librte_eal/common/include/rte_vfio.h\n+++ b/lib/librte_eal/common/include/rte_vfio.h\n@@ -32,6 +32,8 @@\n extern \"C\" {\n #endif\n \n+struct rte_memseg;\n+\n /**\n  * Setup vfio_cfg for the device identified by its address.\n  * It discovers the configured I/O MMU groups or sets a new one for the device.\n@@ -131,6 +133,117 @@ rte_vfio_clear_group(int vfio_group_fd);\n }\n #endif\n \n+/**\n+ * @warning\n+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice\n+ *\n+ * Create a new container for device binding.\n+ *\n+ * @return\n+ *   the container fd if successful\n+ *   <0 if failed\n+ */\n+int __rte_experimental\n+rte_vfio_create_container(void);\n+\n+/**\n+ * @warning\n+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice\n+ *\n+ * Destroy the container, unbind all vfio groups within it.\n+ *\n+ * @param container_fd\n+ *   the container fd to destroy\n+ *\n+ * @return\n+ *    0 if successful\n+ *   <0 if failed\n+ */\n+int __rte_experimental\n+rte_vfio_destroy_container(int container_fd);\n+\n+/**\n+ * @warning\n+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice\n+ *\n+ * Bind a IOMMU group to a container.\n+ *\n+ * @param container_fd\n+ *   the container's fd\n+ *\n+ * @param iommu_group_no\n+ *   the iommu_group_no to bind to container\n+ *\n+ * @return\n+ *   group fd if successful\n+ *   <0 if failed\n+ */\n+int __rte_experimental\n+rte_vfio_bind_group(int container_fd, int iommu_group_no);\n+\n+/**\n+ * @warning\n+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice\n+ *\n+ * Unbind a IOMMU group from a container.\n+ *\n+ * @param container_fd\n+ *   the container fd of container\n+ *\n+ * @param iommu_group_no\n+ *   the iommu_group_no to delete from container\n+ *\n+ * @return\n+ *    0 if successful\n+ *   <0 if failed\n+ */\n+int __rte_experimental\n+rte_vfio_unbind_group(int container_fd, int iommu_group_no);\n+\n+/**\n+ * @warning\n+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice\n+ *\n+ * Perform dma mapping for devices in a conainer.\n+ *\n+ * @param container_fd\n+ *   the specified container fd\n+ *\n+ * @param dma_type\n+ *   the dma map type\n+ *\n+ * @param ms\n+ *   the dma address region to map\n+ *\n+ * @return\n+ *    0 if successful\n+ *   <0 if failed\n+ */\n+int __rte_experimental\n+rte_vfio_dma_map(int container_fd, int dma_type, const struct rte_memseg *ms);\n+\n+/**\n+ * @warning\n+ * @b EXPERIMENTAL: this API may change, or be removed, without prior notice\n+ *\n+ * Perform dma unmapping for devices in a conainer.\n+ *\n+ * @param container_fd\n+ *   the specified container fd\n+ *\n+ * @param dma_type\n+ *    the dma map type\n+ *\n+ * @param ms\n+ *   the dma address region to unmap\n+ *\n+ * @return\n+ *    0 if successful\n+ *   <0 if failed\n+ */\n+int __rte_experimental\n+rte_vfio_dma_unmap(int container_fd, int dma_type, const struct rte_memseg *ms);\n+\n #endif /* VFIO_PRESENT */\n \n #endif /* _RTE_VFIO_H_ */\ndiff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c\nindex e44ae4d04..e474f6e9f 100644\n--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c\n+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c\n@@ -9,6 +9,7 @@\n \n #include <rte_log.h>\n #include <rte_memory.h>\n+#include <rte_malloc.h>\n #include <rte_eal_memconfig.h>\n #include <rte_vfio.h>\n \n@@ -19,7 +20,9 @@\n #ifdef VFIO_PRESENT\n \n /* per-process VFIO config */\n-static struct vfio_config vfio_cfg;\n+static struct vfio_config default_vfio_cfg;\n+\n+static struct vfio_config *vfio_cfgs[VFIO_MAX_CONTAINERS] = {&default_vfio_cfg};\n \n static int vfio_type1_dma_map(int);\n static int vfio_spapr_dma_map(int);\n@@ -35,38 +38,13 @@ static const struct vfio_iommu_type iommu_types[] = {\n \t{ RTE_VFIO_NOIOMMU, \"No-IOMMU\", &vfio_noiommu_dma_map},\n };\n \n-int\n-vfio_get_group_fd(int iommu_group_no)\n+static int\n+vfio_open_group_fd(int iommu_group_no)\n {\n-\tint i;\n \tint vfio_group_fd;\n \tchar filename[PATH_MAX];\n-\tstruct vfio_group *cur_grp;\n-\n-\t/* check if we already have the group descriptor open */\n-\tfor (i = 0; i < VFIO_MAX_GROUPS; i++)\n-\t\tif (vfio_cfg.vfio_groups[i].group_no == iommu_group_no)\n-\t\t\treturn vfio_cfg.vfio_groups[i].fd;\n-\n-\t/* Lets see first if there is room for a new group */\n-\tif (vfio_cfg.vfio_active_groups == VFIO_MAX_GROUPS) {\n-\t\tRTE_LOG(ERR, EAL, \"Maximum number of VFIO groups reached!\\n\");\n-\t\treturn -1;\n-\t}\n-\n-\t/* Now lets get an index for the new group */\n-\tfor (i = 0; i < VFIO_MAX_GROUPS; i++)\n-\t\tif (vfio_cfg.vfio_groups[i].group_no == -1) {\n-\t\t\tcur_grp = &vfio_cfg.vfio_groups[i];\n-\t\t\tbreak;\n-\t\t}\n \n-\t/* This should not happen */\n-\tif (i == VFIO_MAX_GROUPS) {\n-\t\tRTE_LOG(ERR, EAL, \"No VFIO group free slot found\\n\");\n-\t\treturn -1;\n-\t}\n-\t/* if primary, try to open the group */\n+\t/* if in primary process, try to open the group */\n \tif (internal_config.process_type == RTE_PROC_PRIMARY) {\n \t\t/* try regular group format */\n \t\tsnprintf(filename, sizeof(filename),\n@@ -75,8 +53,8 @@ vfio_get_group_fd(int iommu_group_no)\n \t\tif (vfio_group_fd < 0) {\n \t\t\t/* if file not found, it's not an error */\n \t\t\tif (errno != ENOENT) {\n-\t\t\t\tRTE_LOG(ERR, EAL, \"Cannot open %s: %s\\n\", filename,\n-\t\t\t\t\t\tstrerror(errno));\n+\t\t\t\tRTE_LOG(ERR, EAL, \"Cannot open %s: %s\\n\",\n+\t\t\t\t\tfilename, strerror(errno));\n \t\t\t\treturn -1;\n \t\t\t}\n \n@@ -86,8 +64,10 @@ vfio_get_group_fd(int iommu_group_no)\n \t\t\tvfio_group_fd = open(filename, O_RDWR);\n \t\t\tif (vfio_group_fd < 0) {\n \t\t\t\tif (errno != ENOENT) {\n-\t\t\t\t\tRTE_LOG(ERR, EAL, \"Cannot open %s: %s\\n\", filename,\n-\t\t\t\t\t\t\tstrerror(errno));\n+\t\t\t\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\t\t\t\"Cannot open %s: %s\\n\",\n+\t\t\t\t\t\tfilename,\n+\t\t\t\t\t\tstrerror(errno));\n \t\t\t\t\treturn -1;\n \t\t\t\t}\n \t\t\t\treturn 0;\n@@ -95,21 +75,19 @@ vfio_get_group_fd(int iommu_group_no)\n \t\t\t/* noiommu group found */\n \t\t}\n \n-\t\tcur_grp->group_no = iommu_group_no;\n-\t\tcur_grp->fd = vfio_group_fd;\n-\t\tvfio_cfg.vfio_active_groups++;\n \t\treturn vfio_group_fd;\n \t}\n-\t/* if we're in a secondary process, request group fd from the primary\n+\t/*\n+\t * if we're in a secondary process, request group fd from the primary\n \t * process via our socket\n \t */\n \telse {\n-\t\tint socket_fd, ret;\n-\n-\t\tsocket_fd = vfio_mp_sync_connect_to_primary();\n+\t\tint ret;\n+\t\tint socket_fd = vfio_mp_sync_connect_to_primary();\n \n \t\tif (socket_fd < 0) {\n-\t\t\tRTE_LOG(ERR, EAL, \"  cannot connect to primary process!\\n\");\n+\t\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\t\"  cannot connect to primary process!\\n\");\n \t\t\treturn -1;\n \t\t}\n \t\tif (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) {\n@@ -122,6 +100,7 @@ vfio_get_group_fd(int iommu_group_no)\n \t\t\tclose(socket_fd);\n \t\t\treturn -1;\n \t\t}\n+\n \t\tret = vfio_mp_sync_receive_request(socket_fd);\n \t\tswitch (ret) {\n \t\tcase SOCKET_NO_FD:\n@@ -132,9 +111,6 @@ vfio_get_group_fd(int iommu_group_no)\n \t\t\t/* if we got the fd, store it and return it */\n \t\t\tif (vfio_group_fd > 0) {\n \t\t\t\tclose(socket_fd);\n-\t\t\t\tcur_grp->group_no = iommu_group_no;\n-\t\t\t\tcur_grp->fd = vfio_group_fd;\n-\t\t\t\tvfio_cfg.vfio_active_groups++;\n \t\t\t\treturn vfio_group_fd;\n \t\t\t}\n \t\t\t/* fall-through on error */\n@@ -147,70 +123,349 @@ vfio_get_group_fd(int iommu_group_no)\n \treturn -1;\n }\n \n+static struct vfio_config *\n+get_vfio_cfg_by_group_fd(int vfio_group_fd)\n+{\n+\tstruct vfio_config *vfio_cfg;\n+\tint i, j;\n+\n+\tfor (i = 0; i < VFIO_MAX_CONTAINERS; i++) {\n+\t\tif (!vfio_cfgs[i])\n+\t\t\tcontinue;\n+\n+\t\tvfio_cfg = vfio_cfgs[i];\n+\t\tfor (j = 0; j < VFIO_MAX_GROUPS; j++)\n+\t\t\tif (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)\n+\t\t\t\treturn vfio_cfg;\n+\t}\n+\n+\treturn &default_vfio_cfg;\n+}\n+\n+static struct vfio_config *\n+get_vfio_cfg_by_group_no(int iommu_group_no)\n+{\n+\tstruct vfio_config *vfio_cfg;\n+\tint i, j;\n+\n+\tfor (i = 0; i < VFIO_MAX_CONTAINERS; i++) {\n+\t\tif (!vfio_cfgs[i])\n+\t\t\tcontinue;\n+\n+\t\tvfio_cfg = vfio_cfgs[i];\n+\t\tfor (j = 0; j < VFIO_MAX_GROUPS; j++) {\n+\t\t\tif (vfio_cfg->vfio_groups[j].group_no ==\n+\t\t\t\t\tiommu_group_no)\n+\t\t\t\treturn vfio_cfg;\n+\t\t}\n+\t}\n+\n+\treturn &default_vfio_cfg;\n+}\n \n static int\n-get_vfio_group_idx(int vfio_group_fd)\n+get_container_idx(int container_fd)\n {\n \tint i;\n-\tfor (i = 0; i < VFIO_MAX_GROUPS; i++)\n-\t\tif (vfio_cfg.vfio_groups[i].fd == vfio_group_fd)\n+\n+\tfor (i = 0; i < VFIO_MAX_CONTAINERS; i++) {\n+\t\tif (!vfio_cfgs[i])\n+\t\t\tcontinue;\n+\n+\t\tif (vfio_cfgs[i]->vfio_container_fd == container_fd)\n \t\t\treturn i;\n+\t}\n+\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_create_container(void)\n+{\n+\tstruct vfio_config *vfio_cfg;\n+\tint i;\n+\n+\t/* Find an empty slot to store new vfio config */\n+\tfor (i = 1; i < VFIO_MAX_CONTAINERS; i++) {\n+\t\tif (vfio_cfgs[i] == NULL)\n+\t\t\tbreak;\n+\t}\n+\n+\tif (i == VFIO_MAX_CONTAINERS) {\n+\t\tRTE_LOG(ERR, EAL, \"exceed max vfio container limit\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tvfio_cfgs[i] = rte_zmalloc(\"vfio_container\", sizeof(struct vfio_config),\n+\t\tRTE_CACHE_LINE_SIZE);\n+\tif (vfio_cfgs[i] == NULL)\n+\t\treturn -ENOMEM;\n+\n+\tRTE_LOG(INFO, EAL, \"alloc container at slot %d\\n\", i);\n+\tvfio_cfg = vfio_cfgs[i];\n+\tvfio_cfg->vfio_active_groups = 0;\n+\tvfio_cfg->vfio_container_fd = vfio_get_container_fd();\n+\n+\tif (vfio_cfg->vfio_container_fd < 0) {\n+\t\trte_free(vfio_cfgs[i]);\n+\t\tvfio_cfgs[i] = NULL;\n+\t\treturn -1;\n+\t}\n+\n+\tfor (i = 0; i < VFIO_MAX_GROUPS; i++) {\n+\t\tvfio_cfg->vfio_groups[i].group_no = -1;\n+\t\tvfio_cfg->vfio_groups[i].fd = -1;\n+\t\tvfio_cfg->vfio_groups[i].devices = 0;\n+\t}\n+\n+\treturn vfio_cfg->vfio_container_fd;\n+}\n+\n+int __rte_experimental\n+rte_vfio_destroy_container(int container_fd)\n+{\n+\tstruct vfio_config *vfio_cfg;\n+\tint i, idx;\n+\n+\tidx = get_container_idx(container_fd);\n+\tif (idx < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Invalid container fd\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tvfio_cfg = vfio_cfgs[idx];\n+\tfor (i = 0; i < VFIO_MAX_GROUPS; i++)\n+\t\tif (vfio_cfg->vfio_groups[i].group_no != -1)\n+\t\t\trte_vfio_unbind_group(container_fd,\n+\t\t\t\tvfio_cfg->vfio_groups[i].group_no);\n+\n+\trte_free(vfio_cfgs[idx]);\n+\tvfio_cfgs[idx] = NULL;\n+\tclose(container_fd);\n+\n+\treturn 0;\n+}\n+\n+int __rte_experimental\n+rte_vfio_bind_group(int container_fd, int iommu_group_no)\n+{\n+\tstruct vfio_config *vfio_cfg;\n+\tstruct vfio_group *cur_grp;\n+\tint vfio_group_fd;\n+\tint i;\n+\n+\ti = get_container_idx(container_fd);\n+\tif (i < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Invalid container fd\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tvfio_cfg = vfio_cfgs[i];\n+\t/* Check room for new group */\n+\tif (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {\n+\t\tRTE_LOG(ERR, EAL, \"Maximum number of VFIO groups reached!\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\t/* Get an index for the new group */\n+\tfor (i = 0; i < VFIO_MAX_GROUPS; i++)\n+\t\tif (vfio_cfg->vfio_groups[i].group_no == -1) {\n+\t\t\tcur_grp = &vfio_cfg->vfio_groups[i];\n+\t\t\tbreak;\n+\t\t}\n+\n+\t/* This should not happen */\n+\tif (i == VFIO_MAX_GROUPS) {\n+\t\tRTE_LOG(ERR, EAL, \"No VFIO group free slot found\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tvfio_group_fd = vfio_open_group_fd(iommu_group_no);\n+\tif (vfio_group_fd < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Failed to open group %d\\n\", iommu_group_no);\n+\t\treturn -1;\n+\t}\n+\tcur_grp->group_no = iommu_group_no;\n+\tcur_grp->fd = vfio_group_fd;\n+\tvfio_cfg->vfio_active_groups++;\n+\n+\treturn vfio_group_fd;\n+}\n+\n+int __rte_experimental\n+rte_vfio_unbind_group(int container_fd, int iommu_group_no)\n+{\n+\tstruct vfio_config *vfio_cfg;\n+\tstruct vfio_group *cur_grp;\n+\tint i;\n+\n+\ti = get_container_idx(container_fd);\n+\tif (i < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Invalid container fd\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tvfio_cfg = vfio_cfgs[i];\n+\tfor (i = 0; i < VFIO_MAX_GROUPS; i++) {\n+\t\tif (vfio_cfg->vfio_groups[i].group_no == iommu_group_no) {\n+\t\t\tcur_grp = &vfio_cfg->vfio_groups[i];\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\n+\t/* This should not happen */\n+\tif (i == VFIO_MAX_GROUPS) {\n+\t\tRTE_LOG(ERR, EAL, \"Specified group number not found\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tif (cur_grp->fd >= 0 && close(cur_grp->fd) < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Error when closing vfio_group_fd for\"\n+\t\t\t\t\" iommu_group_no %d\\n\",\n+\t\t\tiommu_group_no);\n+\t\treturn -1;\n+\t}\n+\tcur_grp->group_no = -1;\n+\tcur_grp->fd = -1;\n+\tvfio_cfg->vfio_active_groups--;\n+\n+\treturn 0;\n+}\n+\n+int\n+vfio_get_group_fd(int iommu_group_no)\n+{\n+\tstruct vfio_group *cur_grp;\n+\tstruct vfio_config *vfio_cfg;\n+\tint vfio_group_fd;\n+\tint i;\n+\n+\tvfio_cfg = get_vfio_cfg_by_group_no(iommu_group_no);\n+\n+\t/* check if we already have the group descriptor open */\n+\tfor (i = 0; i < VFIO_MAX_GROUPS; i++)\n+\t\tif (vfio_cfg->vfio_groups[i].group_no == iommu_group_no)\n+\t\t\treturn vfio_cfg->vfio_groups[i].fd;\n+\n+\t/* Lets see first if there is room for a new group */\n+\tif (vfio_cfg->vfio_active_groups == VFIO_MAX_GROUPS) {\n+\t\tRTE_LOG(ERR, EAL, \"Maximum number of VFIO groups reached!\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\t/* Now lets get an index for the new group */\n+\tfor (i = 0; i < VFIO_MAX_GROUPS; i++)\n+\t\tif (vfio_cfg->vfio_groups[i].group_no == -1) {\n+\t\t\tcur_grp = &vfio_cfg->vfio_groups[i];\n+\t\t\tbreak;\n+\t\t}\n+\n+\t/* This should not happen */\n+\tif (i == VFIO_MAX_GROUPS) {\n+\t\tRTE_LOG(ERR, EAL, \"No VFIO group free slot found\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tvfio_group_fd = vfio_open_group_fd(iommu_group_no);\n+\tif (vfio_group_fd < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Failed to open group %d\\n\", iommu_group_no);\n+\t\treturn -1;\n+\t}\n+\n+\tcur_grp->group_no = iommu_group_no;\n+\tcur_grp->fd = vfio_group_fd;\n+\tvfio_cfg->vfio_active_groups++;\n+\n+\treturn vfio_group_fd;\n+}\n+\n+static int\n+get_vfio_group_idx(int vfio_group_fd)\n+{\n+\tstruct vfio_config *vfio_cfg;\n+\tint i, j;\n+\n+\tfor (i = 0; i < VFIO_MAX_CONTAINERS; i++) {\n+\t\tif (!vfio_cfgs[i])\n+\t\t\tcontinue;\n+\n+\t\tvfio_cfg = vfio_cfgs[i];\n+\t\tfor (j = 0; j < VFIO_MAX_GROUPS; j++) {\n+\t\t\tif (vfio_cfg->vfio_groups[j].fd == vfio_group_fd)\n+\t\t\t\treturn j;\n+\t\t}\n+\t}\n+\n \treturn -1;\n }\n \n static void\n vfio_group_device_get(int vfio_group_fd)\n {\n+\tstruct vfio_config *vfio_cfg;\n \tint i;\n \n+\tvfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);\n+\n \ti = get_vfio_group_idx(vfio_group_fd);\n \tif (i < 0 || i > (VFIO_MAX_GROUPS - 1))\n \t\tRTE_LOG(ERR, EAL, \"  wrong vfio_group index (%d)\\n\", i);\n \telse\n-\t\tvfio_cfg.vfio_groups[i].devices++;\n+\t\tvfio_cfg->vfio_groups[i].devices++;\n }\n \n static void\n vfio_group_device_put(int vfio_group_fd)\n {\n+\tstruct vfio_config *vfio_cfg;\n \tint i;\n \n+\tvfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);\n+\n \ti = get_vfio_group_idx(vfio_group_fd);\n \tif (i < 0 || i > (VFIO_MAX_GROUPS - 1))\n \t\tRTE_LOG(ERR, EAL, \"  wrong vfio_group index (%d)\\n\", i);\n \telse\n-\t\tvfio_cfg.vfio_groups[i].devices--;\n+\t\tvfio_cfg->vfio_groups[i].devices--;\n }\n \n static int\n vfio_group_device_count(int vfio_group_fd)\n {\n+\tstruct vfio_config *vfio_cfg;\n \tint i;\n \n+\tvfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);\n+\n \ti = get_vfio_group_idx(vfio_group_fd);\n \tif (i < 0 || i > (VFIO_MAX_GROUPS - 1)) {\n \t\tRTE_LOG(ERR, EAL, \"  wrong vfio_group index (%d)\\n\", i);\n \t\treturn -1;\n \t}\n \n-\treturn vfio_cfg.vfio_groups[i].devices;\n+\treturn vfio_cfg->vfio_groups[i].devices;\n }\n \n int\n rte_vfio_clear_group(int vfio_group_fd)\n {\n+\tstruct vfio_config *vfio_cfg;\n \tint i;\n \tint socket_fd, ret;\n \n+\tvfio_cfg = get_vfio_cfg_by_group_fd(vfio_group_fd);\n+\n \tif (internal_config.process_type == RTE_PROC_PRIMARY) {\n \n \t\ti = get_vfio_group_idx(vfio_group_fd);\n-\t\tif (i < 0)\n+\t\tif (i < 0 || i > (VFIO_MAX_GROUPS - 1)) {\n+\t\t\tRTE_LOG(ERR, EAL, \"  wrong vfio_group index (%d)\\n\", i);\n \t\t\treturn -1;\n-\t\tvfio_cfg.vfio_groups[i].group_no = -1;\n-\t\tvfio_cfg.vfio_groups[i].fd = -1;\n-\t\tvfio_cfg.vfio_groups[i].devices = 0;\n-\t\tvfio_cfg.vfio_active_groups--;\n+\t\t}\n+\t\tvfio_cfg->vfio_groups[i].group_no = -1;\n+\t\tvfio_cfg->vfio_groups[i].fd = -1;\n+\t\tvfio_cfg->vfio_groups[i].devices = 0;\n+\t\tvfio_cfg->vfio_active_groups--;\n \t\treturn 0;\n \t}\n \n@@ -261,6 +516,8 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,\n \tstruct vfio_group_status group_status = {\n \t\t\t.argsz = sizeof(group_status)\n \t};\n+\tstruct vfio_config *vfio_cfg;\n+\tint vfio_container_fd;\n \tint vfio_group_fd;\n \tint iommu_group_no;\n \tint ret;\n@@ -309,12 +566,14 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,\n \t\treturn -1;\n \t}\n \n+\tvfio_cfg = get_vfio_cfg_by_group_no(iommu_group_no);\n+\tvfio_container_fd = vfio_cfg->vfio_container_fd;\n+\n \t/* check if group does not have a container yet */\n \tif (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) {\n-\n \t\t/* add group to a container */\n \t\tret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER,\n-\t\t\t\t&vfio_cfg.vfio_container_fd);\n+\t\t\t\t&vfio_container_fd);\n \t\tif (ret) {\n \t\t\tRTE_LOG(ERR, EAL, \"  %s cannot add VFIO group to container, \"\n \t\t\t\t\t\"error %i (%s)\\n\", dev_addr, errno, strerror(errno));\n@@ -331,11 +590,12 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,\n \t\t * Note this can happen several times with the hotplug\n \t\t * functionality.\n \t\t */\n+\n \t\tif (internal_config.process_type == RTE_PROC_PRIMARY &&\n-\t\t\t\tvfio_cfg.vfio_active_groups == 1) {\n+\t\t\t\tvfio_cfg->vfio_active_groups == 1) {\n \t\t\t/* select an IOMMU type which we will be using */\n \t\t\tconst struct vfio_iommu_type *t =\n-\t\t\t\tvfio_set_iommu_type(vfio_cfg.vfio_container_fd);\n+\t\t\t\tvfio_set_iommu_type(vfio_container_fd);\n \t\t\tif (!t) {\n \t\t\t\tRTE_LOG(ERR, EAL,\n \t\t\t\t\t\"  %s failed to select IOMMU type\\n\",\n@@ -344,7 +604,13 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,\n \t\t\t\trte_vfio_clear_group(vfio_group_fd);\n \t\t\t\treturn -1;\n \t\t\t}\n-\t\t\tret = t->dma_map_func(vfio_cfg.vfio_container_fd);\n+\t\t\t/* DMA map for the default container only. */\n+\t\t\tif (default_vfio_cfg.vfio_container_fd ==\n+\t\t\t\tvfio_container_fd)\n+\t\t\t\tret = t->dma_map_func(vfio_container_fd);\n+\t\t\telse\n+\t\t\t\tret = 0;\n+\n \t\t\tif (ret) {\n \t\t\t\tRTE_LOG(ERR, EAL,\n \t\t\t\t\t\"  %s DMA remapping failed, error %i (%s)\\n\",\n@@ -388,7 +654,7 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,\n \n int\n rte_vfio_release_device(const char *sysfs_base, const char *dev_addr,\n-\t\t    int vfio_dev_fd)\n+\t\t\tint vfio_dev_fd)\n {\n \tstruct vfio_group_status group_status = {\n \t\t\t.argsz = sizeof(group_status)\n@@ -456,9 +722,9 @@ rte_vfio_enable(const char *modname)\n \tint vfio_available;\n \n \tfor (i = 0; i < VFIO_MAX_GROUPS; i++) {\n-\t\tvfio_cfg.vfio_groups[i].fd = -1;\n-\t\tvfio_cfg.vfio_groups[i].group_no = -1;\n-\t\tvfio_cfg.vfio_groups[i].devices = 0;\n+\t\tdefault_vfio_cfg.vfio_groups[i].fd = -1;\n+\t\tdefault_vfio_cfg.vfio_groups[i].group_no = -1;\n+\t\tdefault_vfio_cfg.vfio_groups[i].devices = 0;\n \t}\n \n \t/* inform the user that we are probing for VFIO */\n@@ -480,12 +746,12 @@ rte_vfio_enable(const char *modname)\n \t\treturn 0;\n \t}\n \n-\tvfio_cfg.vfio_container_fd = vfio_get_container_fd();\n+\tdefault_vfio_cfg.vfio_container_fd = vfio_get_container_fd();\n \n \t/* check if we have VFIO driver enabled */\n-\tif (vfio_cfg.vfio_container_fd != -1) {\n+\tif (default_vfio_cfg.vfio_container_fd != -1) {\n \t\tRTE_LOG(NOTICE, EAL, \"VFIO support initialized\\n\");\n-\t\tvfio_cfg.vfio_enabled = 1;\n+\t\tdefault_vfio_cfg.vfio_enabled = 1;\n \t} else {\n \t\tRTE_LOG(NOTICE, EAL, \"VFIO support could not be initialized\\n\");\n \t}\n@@ -497,7 +763,7 @@ int\n rte_vfio_is_enabled(const char *modname)\n {\n \tconst int mod_available = rte_eal_check_module(modname) > 0;\n-\treturn vfio_cfg.vfio_enabled && mod_available;\n+\treturn default_vfio_cfg.vfio_enabled && mod_available;\n }\n \n const struct vfio_iommu_type *\n@@ -665,41 +931,80 @@ vfio_get_group_no(const char *sysfs_base,\n }\n \n static int\n-vfio_type1_dma_map(int vfio_container_fd)\n+do_vfio_type1_dma_map(int vfio_container_fd, const struct rte_memseg *ms)\n {\n-\tconst struct rte_memseg *ms = rte_eal_get_physmem_layout();\n-\tint i, ret;\n+\tint ret;\n+\tstruct vfio_iommu_type1_dma_map dma_map;\n \n-\t/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */\n-\tfor (i = 0; i < RTE_MAX_MEMSEG; i++) {\n-\t\tstruct vfio_iommu_type1_dma_map dma_map;\n+\tmemset(&dma_map, 0, sizeof(dma_map));\n+\tdma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);\n+\tdma_map.vaddr = ms->addr_64;\n+\tdma_map.size = ms->len;\n \n-\t\tif (ms[i].addr == NULL)\n-\t\t\tbreak;\n+\tif (rte_eal_iova_mode() == RTE_IOVA_VA)\n+\t\tdma_map.iova = dma_map.vaddr;\n+\telse\n+\t\tdma_map.iova = ms->iova;\n+\tdma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;\n \n-\t\tmemset(&dma_map, 0, sizeof(dma_map));\n-\t\tdma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);\n-\t\tdma_map.vaddr = ms[i].addr_64;\n-\t\tdma_map.size = ms[i].len;\n-\t\tif (rte_eal_iova_mode() == RTE_IOVA_VA)\n-\t\t\tdma_map.iova = dma_map.vaddr;\n-\t\telse\n-\t\t\tdma_map.iova = ms[i].iova;\n-\t\tdma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;\n+\tret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);\n+\tif (ret) {\n+\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\"  cannot set up DMA remapping, error %i (%s)\\n\",\n+\t\t\terrno,\n+\t\t\tstrerror(errno));\n+\t\treturn -1;\n+\t}\n \n-\t\tret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);\n+\treturn 0;\n+}\n \n-\t\tif (ret) {\n-\t\t\tRTE_LOG(ERR, EAL, \"  cannot set up DMA remapping, \"\n-\t\t\t\t\t  \"error %i (%s)\\n\", errno,\n-\t\t\t\t\t  strerror(errno));\n-\t\t\treturn -1;\n-\t\t}\n+static int\n+do_vfio_type1_dma_unmap(int vfio_container_fd, const struct rte_memseg *ms)\n+{\n+\tint ret;\n+\tstruct vfio_iommu_type1_dma_unmap dma_unmap;\n+\n+\tmemset(&dma_unmap, 0, sizeof(dma_unmap));\n+\tdma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);\n+\tdma_unmap.size = ms->len;\n+\n+\tif (rte_eal_iova_mode() == RTE_IOVA_VA)\n+\t\tdma_unmap.iova = ms->addr_64;\n+\telse\n+\t\tdma_unmap.iova = ms->iova;\n+\tdma_unmap.flags = 0;\n+\n+\tret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);\n+\tif (ret) {\n+\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\"  cannot unmap DMA, error %i (%s)\\n\",\n+\t\t\terrno,\n+\t\t\tstrerror(errno));\n+\t\treturn -1;\n \t}\n \n \treturn 0;\n }\n \n+static int\n+vfio_type1_dma_map(int vfio_container_fd)\n+{\n+\tconst struct rte_memseg *ms = rte_eal_get_physmem_layout();\n+\tint i;\n+\tint ret = 0;\n+\n+\tfor (i = 0; i < RTE_MAX_MEMSEG; i++) {\n+\t\tif (ms[i].addr == NULL)\n+\t\t\tbreak;\n+\t\tret = do_vfio_type1_dma_map(vfio_container_fd, &ms[i]);\n+\t\tif (ret < 0)\n+\t\t\treturn ret;\n+\t}\n+\n+\treturn ret;\n+}\n+\n static int\n vfio_spapr_dma_map(int vfio_container_fd)\n {\n@@ -843,4 +1148,37 @@ rte_vfio_noiommu_is_enabled(void)\n \treturn c == 'Y';\n }\n \n+int __rte_experimental\n+rte_vfio_dma_map(int container_fd, int dma_type, const struct rte_memseg *ms)\n+{\n+\n+\tif (dma_type == RTE_VFIO_TYPE1) {\n+\t\treturn do_vfio_type1_dma_map(container_fd, ms);\n+\t} else if (dma_type == RTE_VFIO_SPAPR) {\n+\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\"Additional dma map for SPAPR type not support yet.\");\n+\t\t\treturn -1;\n+\t} else if (dma_type == RTE_VFIO_NOIOMMU) {\n+\t\treturn 0;\n+\t}\n+\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_dma_unmap(int container_fd, int dma_type, const struct rte_memseg *ms)\n+{\n+\tif (dma_type == RTE_VFIO_TYPE1) {\n+\t\treturn do_vfio_type1_dma_unmap(container_fd, ms);\n+\t} else if (dma_type == RTE_VFIO_SPAPR) {\n+\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\"Additional dma unmap for SPAPR type not support yet.\");\n+\t\t\treturn -1;\n+\t} else if (dma_type == RTE_VFIO_NOIOMMU) {\n+\t\treturn 0;\n+\t}\n+\n+\treturn -1;\n+}\n+\n #endif\ndiff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h\nindex 80595773e..23a1e3608 100644\n--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h\n+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h\n@@ -86,6 +86,7 @@ struct vfio_iommu_spapr_tce_info {\n #endif\n \n #define VFIO_MAX_GROUPS RTE_MAX_VFIO_GROUPS\n+#define VFIO_MAX_CONTAINERS RTE_MAX_VFIO_CONTAINERS\n \n /*\n  * Function prototypes for VFIO multiprocess sync functions\ndiff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map\nindex f331f54c9..fcf9494d1 100644\n--- a/lib/librte_eal/rte_eal_version.map\n+++ b/lib/librte_eal/rte_eal_version.map\n@@ -255,5 +255,11 @@ EXPERIMENTAL {\n \trte_service_set_runstate_mapped_check;\n \trte_service_set_stats_enable;\n \trte_service_start_with_defaults;\n+\trte_vfio_bind_group;\n+\trte_vfio_create_container;\n+\trte_vfio_destroy_container;\n+\trte_vfio_dma_map;\n+\trte_vfio_dma_unmap;\n+\trte_vfio_unbind_group;\n \n } DPDK_18.02;\n",
    "prefixes": [
        "dpdk-dev",
        "v5",
        "1/4"
    ]
}