get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/37036/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 37036,
    "url": "http://patches.dpdk.org/api/patches/37036/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/0de49c396f7e2c0651acb4cc4a575919234f4c8d.1522797505.git.anatoly.burakov@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<0de49c396f7e2c0651acb4cc4a575919234f4c8d.1522797505.git.anatoly.burakov@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/0de49c396f7e2c0651acb4cc4a575919234f4c8d.1522797505.git.anatoly.burakov@intel.com",
    "date": "2018-04-03T23:21:58",
    "name": "[dpdk-dev,v3,46/68] vfio: allow to map other memory regions",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "865c84ce438593d4b322fcc8465fc72b422b3966",
    "submitter": {
        "id": 4,
        "url": "http://patches.dpdk.org/api/people/4/?format=api",
        "name": "Anatoly Burakov",
        "email": "anatoly.burakov@intel.com"
    },
    "delegate": null,
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/0de49c396f7e2c0651acb4cc4a575919234f4c8d.1522797505.git.anatoly.burakov@intel.com/mbox/",
    "series": [],
    "comments": "http://patches.dpdk.org/api/patches/37036/comments/",
    "check": "fail",
    "checks": "http://patches.dpdk.org/api/patches/37036/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id 29D731BA86;\n\tWed,  4 Apr 2018 01:23:46 +0200 (CEST)",
            "from mga01.intel.com (mga01.intel.com [192.55.52.88])\n\tby dpdk.org (Postfix) with ESMTP id DD0B21B897\n\tfor <dev@dpdk.org>; Wed,  4 Apr 2018 01:22:33 +0200 (CEST)",
            "from orsmga004.jf.intel.com ([10.7.209.38])\n\tby fmsmga101.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384;\n\t03 Apr 2018 16:22:33 -0700",
            "from irvmail001.ir.intel.com ([163.33.26.43])\n\tby orsmga004.jf.intel.com with ESMTP; 03 Apr 2018 16:22:29 -0700",
            "from sivswdev01.ir.intel.com (sivswdev01.ir.intel.com\n\t[10.237.217.45])\n\tby irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\n\tw33NMSkX013176; Wed, 4 Apr 2018 00:22:28 +0100",
            "from sivswdev01.ir.intel.com (localhost [127.0.0.1])\n\tby sivswdev01.ir.intel.com with ESMTP id w33NMSRF014913;\n\tWed, 4 Apr 2018 00:22:28 +0100",
            "(from aburakov@localhost)\n\tby sivswdev01.ir.intel.com with LOCAL id w33NMS71014908;\n\tWed, 4 Apr 2018 00:22:28 +0100"
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.48,403,1517904000\"; d=\"scan'208\";a=\"188428160\"",
        "From": "Anatoly Burakov <anatoly.burakov@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "Bruce Richardson <bruce.richardson@intel.com>, keith.wiles@intel.com,\n\tjianfeng.tan@intel.com, andras.kovacs@ericsson.com,\n\tlaszlo.vadkeri@ericsson.com, benjamin.walker@intel.com,\n\tthomas@monjalon.net, konstantin.ananyev@intel.com,\n\tkuralamudhan.ramakrishnan@intel.com, louise.m.daly@intel.com,\n\tnelio.laranjeiro@6wind.com, yskoh@mellanox.com, pepperjo@japf.ch,\n\tjerin.jacob@caviumnetworks.com, hemant.agrawal@nxp.com,\n\tolivier.matz@6wind.com, shreyansh.jain@nxp.com,\n\tgowrishankar.m@linux.vnet.ibm.com,\n\tPawel Wodkowski <pawelx.wodkowski@intel.com>",
        "Date": "Wed,  4 Apr 2018 00:21:58 +0100",
        "Message-Id": "<0de49c396f7e2c0651acb4cc4a575919234f4c8d.1522797505.git.anatoly.burakov@intel.com>",
        "X-Mailer": "git-send-email 1.7.0.7",
        "In-Reply-To": [
            "<cover.1522797505.git.anatoly.burakov@intel.com>",
            "<cover.1522797505.git.anatoly.burakov@intel.com>"
        ],
        "References": [
            "<cover.1522797505.git.anatoly.burakov@intel.com>",
            "<cover.1520428025.git.anatoly.burakov@intel.com>\n\t<cover.1522797505.git.anatoly.burakov@intel.com>"
        ],
        "Subject": "[dpdk-dev] [PATCH v3 46/68] vfio: allow to map other memory regions",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Currently it is not possible to use memory that is not owned by DPDK to\nperform DMA. This scenarion might be used in vhost applications (like\nSPDK) where guest send its own memory table. To fill this gap provide\nAPI to allow registering arbitrary address in VFIO container.\n\nSigned-off-by: Pawel Wodkowski <pawelx.wodkowski@intel.com>\nSigned-off-by: Anatoly Burakov <anatoly.burakov@intel.com>\nSigned-off-by: Gowrishankar Muthukrishnan <gowrishankar.m@linux.vnet.ibm.com>\n---\n\nNotes:\n    v3:\n    - Added PPC64, courtesy of Gowrishankar\n    \n    v3:\n    - Moved to earlier in the patchset\n    - Made API experimental\n    - Do not print out error message if init isn't finished\n    - SPAPR code provided by Gowrishankar\n\n lib/librte_eal/bsdapp/eal/eal.c          |  16 ++\n lib/librte_eal/common/include/rte_vfio.h |  39 ++++\n lib/librte_eal/linuxapp/eal/eal_vfio.c   | 347 ++++++++++++++++++++++++-------\n lib/librte_eal/linuxapp/eal/eal_vfio.h   |  12 ++\n lib/librte_eal/rte_eal_version.map       |   2 +\n 5 files changed, 341 insertions(+), 75 deletions(-)",
    "diff": "diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c\nindex 8e25d78..032a5ea 100644\n--- a/lib/librte_eal/bsdapp/eal/eal.c\n+++ b/lib/librte_eal/bsdapp/eal/eal.c\n@@ -749,6 +749,8 @@ int rte_vfio_enable(const char *modname);\n int rte_vfio_is_enabled(const char *modname);\n int rte_vfio_noiommu_is_enabled(void);\n int rte_vfio_clear_group(int vfio_group_fd);\n+int rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);\n+int rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);\n \n int rte_vfio_setup_device(__rte_unused const char *sysfs_base,\n \t\t      __rte_unused const char *dev_addr,\n@@ -784,3 +786,17 @@ int rte_vfio_clear_group(__rte_unused int vfio_group_fd)\n {\n \treturn 0;\n }\n+\n+int __rte_experimental\n+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,\n+\t\t  __rte_unused uint64_t len)\n+{\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,\n+\t\t    __rte_unused uint64_t len)\n+{\n+\treturn -1;\n+}\ndiff --git a/lib/librte_eal/common/include/rte_vfio.h b/lib/librte_eal/common/include/rte_vfio.h\nindex 249095e..bd4663c 100644\n--- a/lib/librte_eal/common/include/rte_vfio.h\n+++ b/lib/librte_eal/common/include/rte_vfio.h\n@@ -127,6 +127,45 @@ int rte_vfio_noiommu_is_enabled(void);\n int\n rte_vfio_clear_group(int vfio_group_fd);\n \n+/**\n+ * Map memory region for use with VFIO.\n+ *\n+ * @param vaddr\n+ *   Starting virtual address of memory to be mapped.\n+ *\n+ * @param iova\n+ *   Starting IOVA address of memory to be mapped.\n+ *\n+ * @param len\n+ *   Length of memory segment being mapped.\n+ *\n+ * @return\n+ *   0 if success.\n+ *   -1 on error.\n+ */\n+int  __rte_experimental\n+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);\n+\n+\n+/**\n+ * Unmap memory region from VFIO.\n+ *\n+ * @param vaddr\n+ *   Starting virtual address of memory to be unmapped.\n+ *\n+ * @param iova\n+ *   Starting IOVA address of memory to be unmapped.\n+ *\n+ * @param len\n+ *   Length of memory segment being unmapped.\n+ *\n+ * @return\n+ *   0 if success.\n+ *   -1 on error.\n+ */\n+int __rte_experimental\n+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);\n+\n #ifdef __cplusplus\n }\n #endif\ndiff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.c b/lib/librte_eal/linuxapp/eal/eal_vfio.c\nindex fb41e82..f6fe93e 100644\n--- a/lib/librte_eal/linuxapp/eal/eal_vfio.c\n+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.c\n@@ -22,17 +22,35 @@\n static struct vfio_config vfio_cfg;\n \n static int vfio_type1_dma_map(int);\n+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);\n static int vfio_spapr_dma_map(int);\n+static int vfio_spapr_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);\n static int vfio_noiommu_dma_map(int);\n+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);\n \n /* IOMMU types we support */\n static const struct vfio_iommu_type iommu_types[] = {\n \t/* x86 IOMMU, otherwise known as type 1 */\n-\t{ RTE_VFIO_TYPE1, \"Type 1\", &vfio_type1_dma_map},\n+\t{\n+\t\t.type_id = RTE_VFIO_TYPE1,\n+\t\t.name = \"Type 1\",\n+\t\t.dma_map_func = &vfio_type1_dma_map,\n+\t\t.dma_user_map_func = &vfio_type1_dma_mem_map\n+\t},\n \t/* ppc64 IOMMU, otherwise known as spapr */\n-\t{ RTE_VFIO_SPAPR, \"sPAPR\", &vfio_spapr_dma_map},\n+\t{\n+\t\t.type_id = RTE_VFIO_SPAPR,\n+\t\t.name = \"sPAPR\",\n+\t\t.dma_map_func = &vfio_spapr_dma_map,\n+\t\t.dma_user_map_func = &vfio_spapr_dma_mem_map\n+\t},\n \t/* IOMMU-less mode */\n-\t{ RTE_VFIO_NOIOMMU, \"No-IOMMU\", &vfio_noiommu_dma_map},\n+\t{\n+\t\t.type_id = RTE_VFIO_NOIOMMU,\n+\t\t.name = \"No-IOMMU\",\n+\t\t.dma_map_func = &vfio_noiommu_dma_map,\n+\t\t.dma_user_map_func = &vfio_noiommu_dma_mem_map\n+\t},\n };\n \n int\n@@ -333,9 +351,10 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,\n \t\t */\n \t\tif (internal_config.process_type == RTE_PROC_PRIMARY &&\n \t\t\t\tvfio_cfg.vfio_active_groups == 1) {\n+\t\t\tconst struct vfio_iommu_type *t;\n+\n \t\t\t/* select an IOMMU type which we will be using */\n-\t\t\tconst struct vfio_iommu_type *t =\n-\t\t\t\tvfio_set_iommu_type(vfio_cfg.vfio_container_fd);\n+\t\t\tt = vfio_set_iommu_type(vfio_cfg.vfio_container_fd);\n \t\t\tif (!t) {\n \t\t\t\tRTE_LOG(ERR, EAL,\n \t\t\t\t\t\"  %s failed to select IOMMU type\\n\",\n@@ -353,6 +372,8 @@ rte_vfio_setup_device(const char *sysfs_base, const char *dev_addr,\n \t\t\t\trte_vfio_clear_group(vfio_group_fd);\n \t\t\t\treturn -1;\n \t\t\t}\n+\n+\t\t\tvfio_cfg.vfio_iommu_type = t;\n \t\t}\n \t}\n \n@@ -668,23 +689,49 @@ static int\n type1_map(const struct rte_memseg *ms, void *arg)\n {\n \tint *vfio_container_fd = arg;\n+\n+\treturn vfio_type1_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,\n+\t\t\tms->len, 1);\n+}\n+\n+static int\n+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,\n+\t\tuint64_t len, int do_map)\n+{\n \tstruct vfio_iommu_type1_dma_map dma_map;\n+\tstruct vfio_iommu_type1_dma_unmap dma_unmap;\n \tint ret;\n \n-\tmemset(&dma_map, 0, sizeof(dma_map));\n-\tdma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);\n-\tdma_map.vaddr = ms->addr_64;\n-\tdma_map.size = ms->len;\n-\tdma_map.iova = ms->iova;\n-\tdma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;\n-\n-\tret = ioctl(*vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);\n+\tif (do_map != 0) {\n+\t\tmemset(&dma_map, 0, sizeof(dma_map));\n+\t\tdma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);\n+\t\tdma_map.vaddr = vaddr;\n+\t\tdma_map.size = len;\n+\t\tdma_map.iova = iova;\n+\t\tdma_map.flags = VFIO_DMA_MAP_FLAG_READ |\n+\t\t\t\tVFIO_DMA_MAP_FLAG_WRITE;\n \n-\tif (ret) {\n-\t\tRTE_LOG(ERR, EAL, \"  cannot set up DMA remapping, error %i (%s)\\n\",\n+\t\tret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);\n+\t\tif (ret) {\n+\t\t\tRTE_LOG(ERR, EAL, \"  cannot set up DMA remapping, error %i (%s)\\n\",\n \t\t\t\terrno, strerror(errno));\n-\t\treturn -1;\n+\t\t\t\treturn -1;\n+\t\t}\n+\t} else {\n+\t\tmemset(&dma_unmap, 0, sizeof(dma_unmap));\n+\t\tdma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);\n+\t\tdma_unmap.size = len;\n+\t\tdma_unmap.iova = iova;\n+\n+\t\tret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,\n+\t\t\t\t&dma_unmap);\n+\t\tif (ret) {\n+\t\t\tRTE_LOG(ERR, EAL, \"  cannot clear DMA remapping, error %i (%s)\\n\",\n+\t\t\t\t\terrno, strerror(errno));\n+\t\t\treturn -1;\n+\t\t}\n \t}\n+\n \treturn 0;\n }\n \n@@ -694,12 +741,78 @@ vfio_type1_dma_map(int vfio_container_fd)\n \treturn rte_memseg_walk(type1_map, &vfio_container_fd);\n }\n \n+static int\n+vfio_spapr_dma_do_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,\n+\t\tuint64_t len, int do_map)\n+{\n+\tstruct vfio_iommu_type1_dma_map dma_map;\n+\tstruct vfio_iommu_type1_dma_unmap dma_unmap;\n+\tint ret;\n+\n+\tif (do_map != 0) {\n+\t\tmemset(&dma_map, 0, sizeof(dma_map));\n+\t\tdma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);\n+\t\tdma_map.vaddr = vaddr;\n+\t\tdma_map.size = len;\n+\t\tdma_map.iova = iova;\n+\t\tdma_map.flags = VFIO_DMA_MAP_FLAG_READ |\n+\t\t\t\tVFIO_DMA_MAP_FLAG_WRITE;\n+\n+\t\tret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);\n+\t\tif (ret) {\n+\t\t\tRTE_LOG(ERR, EAL, \"  cannot set up DMA remapping, error %i (%s)\\n\",\n+\t\t\t\terrno, strerror(errno));\n+\t\t\t\treturn -1;\n+\t\t}\n+\n+\t} else {\n+\t\tstruct vfio_iommu_spapr_register_memory reg = {\n+\t\t\t.argsz = sizeof(reg),\n+\t\t\t.flags = 0\n+\t\t};\n+\t\treg.vaddr = (uintptr_t) vaddr;\n+\t\treg.size = len;\n+\n+\t\tret = ioctl(vfio_container_fd,\n+\t\t\t\tVFIO_IOMMU_SPAPR_UNREGISTER_MEMORY, &reg);\n+\t\tif (ret) {\n+\t\t\tRTE_LOG(ERR, EAL, \"  cannot unregister vaddr for IOMMU, error %i (%s)\\n\",\n+\t\t\t\t\terrno, strerror(errno));\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tmemset(&dma_unmap, 0, sizeof(dma_unmap));\n+\t\tdma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);\n+\t\tdma_unmap.size = len;\n+\t\tdma_unmap.iova = iova;\n+\n+\t\tret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA,\n+\t\t\t\t&dma_unmap);\n+\t\tif (ret) {\n+\t\t\tRTE_LOG(ERR, EAL, \"  cannot clear DMA remapping, error %i (%s)\\n\",\n+\t\t\t\t\terrno, strerror(errno));\n+\t\t\treturn -1;\n+\t\t}\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static int\n+vfio_spapr_map_walk(const struct rte_memseg *ms, void *arg)\n+{\n+\tint *vfio_container_fd = arg;\n+\n+\treturn vfio_spapr_dma_mem_map(*vfio_container_fd, ms->addr_64, ms->iova,\n+\t\t\tms->len, 1);\n+}\n+\n struct spapr_walk_param {\n \tuint64_t window_size;\n \tuint64_t hugepage_sz;\n };\n static int\n-spapr_window_size(const struct rte_memseg *ms, void *arg)\n+vfio_spapr_window_size_walk(const struct rte_memseg *ms, void *arg)\n {\n \tstruct spapr_walk_param *param = arg;\n \tuint64_t max = ms->iova + ms->len;\n@@ -713,39 +826,43 @@ spapr_window_size(const struct rte_memseg *ms, void *arg)\n }\n \n static int\n-spapr_map(const struct rte_memseg *ms, void *arg)\n-{\n-\tstruct vfio_iommu_type1_dma_map dma_map;\n-\tstruct vfio_iommu_spapr_register_memory reg = {\n-\t\t.argsz = sizeof(reg),\n-\t\t.flags = 0\n+vfio_spapr_create_new_dma_window(int vfio_container_fd,\n+\t\tstruct vfio_iommu_spapr_tce_create *create) {\n+\tstruct vfio_iommu_spapr_tce_remove remove = {\n+\t\t.argsz = sizeof(remove),\n+\t};\n+\tstruct vfio_iommu_spapr_tce_info info = {\n+\t\t.argsz = sizeof(info),\n \t};\n-\tint *vfio_container_fd = arg;\n \tint ret;\n \n-\treg.vaddr = (uintptr_t) ms->addr;\n-\treg.size = ms->len;\n-\tret = ioctl(*vfio_container_fd,\n-\t\tVFIO_IOMMU_SPAPR_REGISTER_MEMORY, &reg);\n+\t/* query spapr iommu info */\n+\tret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);\n \tif (ret) {\n-\t\tRTE_LOG(ERR, EAL, \"  cannot register vaddr for IOMMU, error %i (%s)\\n\",\n-\t\t\t\terrno, strerror(errno));\n+\t\tRTE_LOG(ERR, EAL, \"  cannot get iommu info, \"\n+\t\t\t\t\"error %i (%s)\\n\", errno, strerror(errno));\n \t\treturn -1;\n \t}\n \n-\tmemset(&dma_map, 0, sizeof(dma_map));\n-\tdma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);\n-\tdma_map.vaddr = ms->addr_64;\n-\tdma_map.size = ms->len;\n-\tdma_map.iova = ms->iova;\n-\tdma_map.flags = VFIO_DMA_MAP_FLAG_READ |\n-\t\t\t VFIO_DMA_MAP_FLAG_WRITE;\n-\n-\tret = ioctl(*vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);\n+\t/* remove default DMA of 32 bit window */\n+\tremove.start_addr = info.dma32_window_start;\n+\tret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);\n+\tif (ret) {\n+\t\tRTE_LOG(ERR, EAL, \"  cannot remove default DMA window, \"\n+\t\t\t\t\"error %i (%s)\\n\", errno, strerror(errno));\n+\t\treturn -1;\n+\t}\n \n+\t/* create new DMA window */\n+\tret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, create);\n \tif (ret) {\n-\t\tRTE_LOG(ERR, EAL, \"  cannot set up DMA remapping, error %i (%s)\\n\",\n-\t\t\t\terrno, strerror(errno));\n+\t\tRTE_LOG(ERR, EAL, \"  cannot create new DMA window, \"\n+\t\t\t\t\"error %i (%s)\\n\", errno, strerror(errno));\n+\t\treturn -1;\n+\t}\n+\n+\tif (create->start_addr != 0) {\n+\t\tRTE_LOG(ERR, EAL, \"  DMA window start address != 0\\n\");\n \t\treturn -1;\n \t}\n \n@@ -753,61 +870,82 @@ spapr_map(const struct rte_memseg *ms, void *arg)\n }\n \n static int\n-vfio_spapr_dma_map(int vfio_container_fd)\n+vfio_spapr_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,\n+\t\tuint64_t len, int do_map)\n {\n \tstruct spapr_walk_param param;\n-\tint ret;\n-\tstruct vfio_iommu_spapr_tce_info info = {\n-\t\t.argsz = sizeof(info),\n-\t};\n \tstruct vfio_iommu_spapr_tce_create create = {\n \t\t.argsz = sizeof(create),\n \t};\n-\tstruct vfio_iommu_spapr_tce_remove remove = {\n-\t\t.argsz = sizeof(remove),\n-\t};\n \n+\t/* check if window size needs to be adjusted */\n \tmemset(&param, 0, sizeof(param));\n \n-\t/* query spapr iommu info */\n-\tret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &info);\n-\tif (ret) {\n-\t\tRTE_LOG(ERR, EAL, \"  cannot get iommu info, \"\n-\t\t\t\t\"error %i (%s)\\n\", errno, strerror(errno));\n+\tif (rte_memseg_walk(vfio_spapr_window_size_walk, &param) < 0) {\n+\t\tRTE_LOG(ERR, EAL, \"Could not get window size\\n\");\n \t\treturn -1;\n \t}\n \n-\t/* remove default DMA of 32 bit window */\n-\tremove.start_addr = info.dma32_window_start;\n-\tret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_REMOVE, &remove);\n-\tif (ret) {\n-\t\tRTE_LOG(ERR, EAL, \"  cannot remove default DMA window, \"\n-\t\t\t\t\"error %i (%s)\\n\", errno, strerror(errno));\n-\t\treturn -1;\n+\t/* sPAPR requires window size to be a power of 2 */\n+\tcreate.window_size = rte_align64pow2(param.window_size);\n+\tcreate.page_shift = __builtin_ctzll(param.hugepage_sz);\n+\tcreate.levels = 1;\n+\n+\tif (do_map) {\n+\t\t/* re-create window and remap the entire memory */\n+\t\tif (iova > create.window_size) {\n+\t\t\tif (vfio_spapr_create_new_dma_window(vfio_container_fd,\n+\t\t\t\t\t&create) < 0) {\n+\t\t\t\tRTE_LOG(ERR, EAL, \"Could not create new DMA window\\n\");\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t\tif (rte_memseg_walk(vfio_spapr_map_walk,\n+\t\t\t\t\t&vfio_container_fd) < 0) {\n+\t\t\t\tRTE_LOG(ERR, EAL, \"Could not recreate DMA maps\\n\");\n+\t\t\t\treturn -1;\n+\t\t\t}\n+\t\t}\n+\t\t/* now that we've remapped all of the memory that was present\n+\t\t * before, map the segment that we were requested to map.\n+\t\t */\n+\t\tif (vfio_spapr_dma_do_map(vfio_container_fd,\n+\t\t\t\tvaddr, iova, len, 1) < 0) {\n+\t\t\tRTE_LOG(ERR, EAL, \"Could not map segment\\n\");\n+\t\t\treturn -1;\n+\t\t}\n+\t} else {\n+\n+\t\t/* for unmap, check if iova within DMA window */\n+\t\tif (iova > create.window_size) {\n+\t\t\tRTE_LOG(ERR, EAL, \"iova beyond DMA window for unmap\");\n+\t\t\treturn -1;\n+\t\t}\n+\n+\t\tvfio_spapr_dma_do_map(vfio_container_fd, vaddr, iova, len, 0);\n \t}\n+\treturn 0;\n+}\n+\n+static int\n+vfio_spapr_dma_map(int vfio_container_fd)\n+{\n+\tstruct vfio_iommu_spapr_tce_create create = {\n+\t\t.argsz = sizeof(create),\n+\t};\n+\tstruct spapr_walk_param param;\n+\n+\tmemset(&param, 0, sizeof(param));\n \n \t/* create DMA window from 0 to max(phys_addr + len) */\n-\trte_memseg_walk(spapr_window_size, &param);\n+\trte_memseg_walk(vfio_spapr_window_size_walk, &param);\n \n \t/* sPAPR requires window size to be a power of 2 */\n \tcreate.window_size = rte_align64pow2(param.window_size);\n \tcreate.page_shift = __builtin_ctzll(param.hugepage_sz);\n \tcreate.levels = 1;\n \n-\tret = ioctl(vfio_container_fd, VFIO_IOMMU_SPAPR_TCE_CREATE, &create);\n-\tif (ret) {\n-\t\tRTE_LOG(ERR, EAL, \"  cannot create new DMA window, \"\n-\t\t\t\t\"error %i (%s)\\n\", errno, strerror(errno));\n-\t\treturn -1;\n-\t}\n-\n-\tif (create.start_addr != 0) {\n-\t\tRTE_LOG(ERR, EAL, \"  DMA window start address != 0\\n\");\n-\t\treturn -1;\n-\t}\n-\n \t/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */\n-\tif (rte_memseg_walk(spapr_map, &vfio_container_fd) < 0)\n+\tif (rte_memseg_walk(vfio_spapr_map_walk, &vfio_container_fd) < 0)\n \t\treturn -1;\n \n \treturn 0;\n@@ -820,6 +958,49 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)\n \treturn 0;\n }\n \n+static int\n+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,\n+\t\t\t uint64_t __rte_unused vaddr,\n+\t\t\t uint64_t __rte_unused iova, uint64_t __rte_unused len,\n+\t\t\t int __rte_unused do_map)\n+{\n+\t/* No-IOMMU mode does not need DMA mapping */\n+\treturn 0;\n+}\n+\n+static int\n+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova, uint64_t len, int do_map)\n+{\n+\tconst struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;\n+\n+\tif (!t) {\n+\t\tRTE_LOG(ERR, EAL, \"  VFIO support not initialized\\n\");\n+\t\treturn -1;\n+\t}\n+\n+\tif (!t->dma_user_map_func) {\n+\t\tRTE_LOG(ERR, EAL,\n+\t\t\t\"  VFIO custom DMA region maping not supported by IOMMU %s\\n\",\n+\t\t\tt->name);\n+\t\treturn -1;\n+\t}\n+\n+\treturn t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,\n+\t\t\tlen, do_map);\n+}\n+\n+int __rte_experimental\n+rte_vfio_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)\n+{\n+\treturn vfio_dma_mem_map(vaddr, iova, len, 1);\n+}\n+\n+int __rte_experimental\n+rte_vfio_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)\n+{\n+\treturn vfio_dma_mem_map(vaddr, iova, len, 0);\n+}\n+\n int\n rte_vfio_noiommu_is_enabled(void)\n {\n@@ -852,4 +1033,20 @@ rte_vfio_noiommu_is_enabled(void)\n \treturn c == 'Y';\n }\n \n+#else\n+\n+int __rte_experimental\n+rte_vfio_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,\n+\t\t  __rte_unused uint64_t len)\n+{\n+\treturn -1;\n+}\n+\n+int __rte_experimental\n+rte_vfio_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,\n+\t\t    __rte_unused uint64_t len)\n+{\n+\treturn -1;\n+}\n+\n #endif\ndiff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h\nindex 8059577..549f442 100644\n--- a/lib/librte_eal/linuxapp/eal/eal_vfio.h\n+++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h\n@@ -19,6 +19,7 @@\n \n #ifdef VFIO_PRESENT\n \n+#include <stdint.h>\n #include <linux/vfio.h>\n \n #define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU\n@@ -26,6 +27,7 @@\n #ifndef VFIO_SPAPR_TCE_v2_IOMMU\n #define RTE_VFIO_SPAPR 7\n #define VFIO_IOMMU_SPAPR_REGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 17)\n+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY _IO(VFIO_TYPE, VFIO_BASE + 18)\n #define VFIO_IOMMU_SPAPR_TCE_CREATE _IO(VFIO_TYPE, VFIO_BASE + 19)\n #define VFIO_IOMMU_SPAPR_TCE_REMOVE _IO(VFIO_TYPE, VFIO_BASE + 20)\n \n@@ -110,6 +112,7 @@ struct vfio_config {\n \tint vfio_enabled;\n \tint vfio_container_fd;\n \tint vfio_active_groups;\n+\tconst struct vfio_iommu_type *vfio_iommu_type;\n \tstruct vfio_group vfio_groups[VFIO_MAX_GROUPS];\n };\n \n@@ -119,9 +122,18 @@ struct vfio_config {\n  * */\n typedef int (*vfio_dma_func_t)(int);\n \n+/* Custom memory region DMA mapping function prototype.\n+ * Takes VFIO container fd, virtual address, phisical address, length and\n+ * operation type (0 to unmap 1 for map) as a parameters.\n+ * Returns 0 on success, -1 on error.\n+ **/\n+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,\n+\t\tuint64_t len, int do_map);\n+\n struct vfio_iommu_type {\n \tint type_id;\n \tconst char *name;\n+\tvfio_dma_user_func_t dma_user_map_func;\n \tvfio_dma_func_t dma_map_func;\n };\n \ndiff --git a/lib/librte_eal/rte_eal_version.map b/lib/librte_eal/rte_eal_version.map\nindex 70ec778..fe4a9c9 100644\n--- a/lib/librte_eal/rte_eal_version.map\n+++ b/lib/librte_eal/rte_eal_version.map\n@@ -266,5 +266,7 @@ EXPERIMENTAL {\n \trte_service_start_with_defaults;\n \trte_socket_count;\n \trte_socket_id_by_idx;\n+\trte_vfio_dma_map;\n+\trte_vfio_dma_unmap;\n \n } DPDK_18.02;\n",
    "prefixes": [
        "dpdk-dev",
        "v3",
        "46/68"
    ]
}