@@ -126,6 +126,9 @@ ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
CFLAGS_eal_thread.o += -Wno-return-type
endif
+SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include = \
+ include/rte_iommu.h
+
INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h
SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \
@@ -39,6 +39,7 @@
#include <rte_log.h>
#include <rte_memory.h>
#include <rte_eal_memconfig.h>
+#include <rte_iommu.h>
#include "eal_filesystem.h"
#include "eal_vfio.h"
@@ -50,17 +51,34 @@
static struct vfio_config vfio_cfg;
static int vfio_type1_dma_map(int);
+static int vfio_type1_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
static int vfio_spapr_dma_map(int);
static int vfio_noiommu_dma_map(int);
+static int vfio_noiommu_dma_mem_map(int, uint64_t, uint64_t, uint64_t, int);
/* IOMMU types we support */
static const struct vfio_iommu_type iommu_types[] = {
/* x86 IOMMU, otherwise known as type 1 */
- { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map},
+ {
+ .type_id = RTE_VFIO_TYPE1,
+ .name = "Type 1",
+ .dma_map_func = &vfio_type1_dma_map,
+ .dma_user_map_func = &vfio_type1_dma_mem_map
+ },
/* ppc64 IOMMU, otherwise known as spapr */
- { RTE_VFIO_SPAPR, "sPAPR", &vfio_spapr_dma_map},
+ {
+ .type_id = RTE_VFIO_SPAPR,
+ .name = "sPAPR",
+ .dma_map_func = &vfio_spapr_dma_map,
+ .dma_user_map_func = NULL
+ },
/* IOMMU-less mode */
- { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map},
+ {
+ .type_id = RTE_VFIO_NOIOMMU,
+ .name = "No-IOMMU",
+ .dma_map_func = &vfio_noiommu_dma_map,
+ .dma_user_map_func = &vfio_noiommu_dma_mem_map
+ },
};
int
@@ -378,6 +396,8 @@ vfio_setup_device(const char *sysfs_base, const char *dev_addr,
clear_group(vfio_group_fd);
return -1;
}
+
+ vfio_cfg.vfio_iommu_type = t;
}
}
@@ -690,33 +710,61 @@ vfio_get_group_no(const char *sysfs_base,
}
static int
-vfio_type1_dma_map(int vfio_container_fd)
+vfio_type1_dma_mem_map(int vfio_container_fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
{
- const struct rte_memseg *ms = rte_eal_get_physmem_layout();
- int i, ret;
-
- /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
- for (i = 0; i < RTE_MAX_MEMSEG; i++) {
- struct vfio_iommu_type1_dma_map dma_map;
-
- if (ms[i].addr == NULL)
- break;
+ struct vfio_iommu_type1_dma_map dma_map;
+ struct vfio_iommu_type1_dma_unmap dma_unmap;
+ int ret;
+ if (do_map != 0) {
memset(&dma_map, 0, sizeof(dma_map));
dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map);
- dma_map.vaddr = ms[i].addr_64;
- dma_map.size = ms[i].len;
- dma_map.iova = ms[i].phys_addr;
+ dma_map.vaddr = vaddr;
+ dma_map.size = len;
+ dma_map.iova = iova;
dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map);
-
if (ret) {
RTE_LOG(ERR, EAL, " cannot set up DMA remapping, "
"error %i (%s)\n", errno,
strerror(errno));
return -1;
}
+
+ } else {
+ memset(&dma_unmap, 0, sizeof(dma_unmap));
+ dma_unmap.argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ dma_unmap.size = len;
+ dma_unmap.iova = iova;
+
+ ret = ioctl(vfio_container_fd, VFIO_IOMMU_UNMAP_DMA, &dma_unmap);
+ if (ret) {
+ RTE_LOG(ERR, EAL, " cannot clear DMA remapping, "
+ "error %i (%s)\n", errno,
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+vfio_type1_dma_map(int vfio_container_fd)
+{
+ const struct rte_memseg *ms = rte_eal_get_physmem_layout();
+ int i;
+
+ /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
+ for (i = 0; i < RTE_MAX_MEMSEG; i++) {
+ if (ms[i].addr == NULL)
+ break;
+
+ if (vfio_type1_dma_mem_map(vfio_container_fd, ms[i].addr_64,
+ ms[i].phys_addr, ms[i].len, 1))
+ return 1;
}
return 0;
@@ -816,4 +864,64 @@ vfio_noiommu_dma_map(int __rte_unused vfio_container_fd)
return 0;
}
+static int
+vfio_noiommu_dma_mem_map(int __rte_unused vfio_container_fd,
+ uint64_t __rte_unused vaddr,
+ uint64_t __rte_unused iova, uint64_t __rte_unused len,
+ int __rte_unused do_map)
+{
+ /* No-IOMMU mode does not need DMA mapping */
+ return 0;
+}
+
+static int
+vfio_dma_mem_map(uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map)
+{
+ const struct vfio_iommu_type *t = vfio_cfg.vfio_iommu_type;
+
+ if (!t) {
+ RTE_LOG(ERR, EAL, " VFIO support not initialized\n");
+ return -1;
+ }
+
+ if (!t->dma_user_map_func) {
+ RTE_LOG(ERR, EAL,
+ " VFIO custom DMA region maping not supported by IOMMU %s\n",
+ t->name);
+ return -1;
+ }
+
+ return t->dma_user_map_func(vfio_cfg.vfio_container_fd, vaddr, iova,
+ len, do_map);
+}
+
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ return vfio_dma_mem_map(vaddr, iova, len, 1);
+}
+
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len)
+{
+ return vfio_dma_mem_map(vaddr, iova, len, 0);
+}
+
+#else
+
+int
+rte_iommu_dma_map(uint64_t __rte_unused vaddr, __rte_unused uint64_t iova,
+ __rte_unused uint64_t len)
+{
+ return 0;
+}
+
+int
+rte_iommu_dma_unmap(uint64_t __rte_unused vaddr, uint64_t __rte_unused iova,
+ __rte_unused uint64_t len)
+{
+ return 0;
+}
+
#endif
@@ -141,6 +141,7 @@ struct vfio_config {
int vfio_enabled;
int vfio_container_fd;
int vfio_active_groups;
+ const struct vfio_iommu_type *vfio_iommu_type;
struct vfio_group vfio_groups[VFIO_MAX_GROUPS];
};
@@ -157,10 +158,19 @@ struct vfio_config {
* */
typedef int (*vfio_dma_func_t)(int);
+/* Custom memory region DMA mapping function prototype.
+ * Takes VFIO container fd, virtual address, phisical address, length and
+ * operation type (0 to unmap 1 for map) as a parameters.
+ * Returns 0 on success, -1 on error.
+ **/
+typedef int (*vfio_dma_user_func_t)(int fd, uint64_t vaddr, uint64_t iova,
+ uint64_t len, int do_map);
+
struct vfio_iommu_type {
int type_id;
const char *name;
vfio_dma_func_t dma_map_func;
+ vfio_dma_user_func_t dma_user_map_func;
};
/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */
new file mode 100644
@@ -0,0 +1,78 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_IOMMU_H_
+#define _RTE_IOMMU_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Register memory region in current IOMMU to enable DMA.
+ *
+ * @note
+ * Registered memory is not shared in multiprocess environment.
+ * @param vaddr
+ * Start of process virtual address.
+ * @param iova
+ * Start of IO virtual address.
+ * @param len
+ * Length of memory region.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_iommu_dma_map(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+/**
+ * Unregister previously registered memory region.
+ *
+ * @param vaddr
+ * Start of process virtual address.
+ * @param iova
+ * Start of IO virtual address.
+ * @param len
+ * Length of memory region.
+ * @return
+ * - On success, zero.
+ * - On failure, a negative value.
+ */
+int
+rte_iommu_dma_unmap(uint64_t vaddr, uint64_t iova, uint64_t len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_IOMMU_H_ */
@@ -198,3 +198,11 @@ DPDK_17.05 {
vfio_get_group_no;
} DPDK_17.02;
+
+DPDK_17.08 {
+ global:
+
+ rte_iommu_dma_map;
+ rte_iommu_dma_unmap;
+
+} DPDK_17.05;