[dpdk-dev] vfio: add no-iommu support

Message ID 1450700305-26453-1-git-send-email-ferruh.yigit@intel.com (mailing list archive)
State Rejected, archived
Headers

Commit Message

Ferruh Yigit Dec. 21, 2015, 12:18 p.m. UTC
  This is based on patch from Alex Williamson:
https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=033291eccbdb
plus
http://dpdk.org/dev/patchwork/patch/9598/

This patch is intended to test above patches on DPDK rather than
official patch to DPDK.

Test result is DPDK successfully run on no-iommu environment.

Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
---
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)
  

Comments

Burakov, Anatoly Dec. 21, 2015, 3:15 p.m. UTC | #1
Hi Ferruh,

> This is based on patch from Alex Williamson:
> https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=03
> 3291eccbdb
> plus
> http://dpdk.org/dev/patchwork/patch/9598/
> 
> This patch is intended to test above patches on DPDK rather than official
> patch to DPDK.
> 
> Test result is DPDK successfully run on no-iommu environment.
> 
> Signed-off-by: Ferruh Yigit <ferruh.yigit@intel.com>
> ---
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 28
> +++++++++++++++++++++++++---
>  1 file changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index 74f91ba..90bba4a 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -61,6 +61,18 @@
> 
>  #ifdef VFIO_PRESENT
> 
> +/*#define VFIO_NOIOMMU*/
> +
> +#ifndef VFIO_NOIOMMU_IOMMU
> +#define VFIO_NOIOMMU_IOMMU 8
> +#endif
> +
> +#ifdef VFIO_NOIOMMU
> +#define VFIO_IOMMU_TYPE VFIO_NOIOMMU_IOMMU #else #define
> +VFIO_IOMMU_TYPE VFIO_TYPE1_IOMMU #endif
> +
>  #define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
>  #define PAGE_MASK   (~(PAGE_SIZE - 1))
> 
> @@ -71,7 +83,11 @@ EAL_REGISTER_TAILQ(rte_vfio_tailq)
> 
>  #define VFIO_DIR "/dev/vfio"
>  #define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
> +#ifdef VFIO_NOIOMMU
> +#define VFIO_GROUP_FMT "/dev/vfio/noiommu-%u"
> +#else
>  #define VFIO_GROUP_FMT "/dev/vfio/%u"
> +#endif
>  #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
> 
>  /* per-process VFIO config */
> @@ -212,17 +228,21 @@ pci_vfio_set_bus_master(int dev_fd)  static int
> pci_vfio_setup_dma_maps(int vfio_container_fd)  {
> +#ifndef VFIO_NOIOMMU
>  	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
> -	int i, ret;
> +	int i;
> +#endif
> +	int ret;
> 
>  	ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
> -			VFIO_TYPE1_IOMMU);
> +			VFIO_IOMMU_TYPE);
>  	if (ret) {
>  		RTE_LOG(ERR, EAL, "  cannot set IOMMU type, "
>  				"error %i (%s)\n", errno, strerror(errno));
>  		return -1;
>  	}
> 
> +#ifndef VFIO_NOIOMMU
>  	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
>  	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
>  		struct vfio_iommu_type1_dma_map dma_map; @@ -245,6
> +265,7 @@ pci_vfio_setup_dma_maps(int vfio_container_fd)
>  			return -1;
>  		}
>  	}
> +#endif
> 
>  	return 0;
>  }
> @@ -373,7 +394,8 @@ pci_vfio_get_container_fd(void)
>  		}
> 
>  		/* check if we support IOMMU type 1 */
> -		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
> VFIO_TYPE1_IOMMU);
> +		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
> +				VFIO_IOMMU_TYPE);
>  		if (ret != 1) {
>  			if (ret < 0)
>  				RTE_LOG(ERR, EAL, "  could not get IOMMU
> type, "
> --
> 2.5.0

This is one approach :) I was thinking of another, building some kind of more generic support for multiple VFIO drivers. It's a bit more code and probably overkill as a solution to this particular problem, but hopefully it'll make it easier to add new VFIO drivers down the line (with each driver having their own DMA mapping function), should we choose to do so. I'm still working on the patch, but if everyone is OK with this approach instead of a more general one, that's fine with me.

Thanks,
Anatoly
  
Burakov, Anatoly Dec. 21, 2015, 3:28 p.m. UTC | #2
Hi Ferruh,

> On Mon, Dec 21, 2015 at 03:15:46PM +0000, Burakov, Anatoly wrote:
> > > This is based on patch from Alex Williamson:
> > > https://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/comm
> > > it/?id=03
> > > 3291eccbdb
> > > plus
> > > http://dpdk.org/dev/patchwork/patch/9598/
> > >
> > > This patch is intended to test above patches on DPDK rather than
> > > official patch to DPDK.
> > >
> > > Test result is DPDK successfully run on no-iommu environment.
> > >
> >
> > This is one approach :) I was thinking of another, building some kind of
> more generic support for multiple VFIO drivers. It's a bit more code and
> probably overkill as a solution to this particular problem, but hopefully it'll
> make it easier to add new VFIO drivers down the line (with each driver
> having their own DMA mapping function), should we choose to do so. I'm still
> working on the patch, but if everyone is OK with this approach instead of a
> more general one, that's fine with me.
> >
> Hi Anatoly,
> 
> This patch sent just to show what changes done to test VFIO no-iommu I
> mentioned, and to have a justification for the kernel patch, not sent as a final
> solution in DPDK, sorry for interrupting your work.
> 
> Thanks,
> Ferruh

Ah OK, I misread the part where it said that it is not to be applied as-is. Thanks!

Thanks,
Anatoly
  

Patch

diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index 74f91ba..90bba4a 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -61,6 +61,18 @@ 
 
 #ifdef VFIO_PRESENT
 
+/*#define VFIO_NOIOMMU*/
+
+#ifndef VFIO_NOIOMMU_IOMMU
+#define VFIO_NOIOMMU_IOMMU 8
+#endif
+
+#ifdef VFIO_NOIOMMU
+#define VFIO_IOMMU_TYPE VFIO_NOIOMMU_IOMMU
+#else
+#define VFIO_IOMMU_TYPE VFIO_TYPE1_IOMMU
+#endif
+
 #define PAGE_SIZE   (sysconf(_SC_PAGESIZE))
 #define PAGE_MASK   (~(PAGE_SIZE - 1))
 
@@ -71,7 +83,11 @@  EAL_REGISTER_TAILQ(rte_vfio_tailq)
 
 #define VFIO_DIR "/dev/vfio"
 #define VFIO_CONTAINER_PATH "/dev/vfio/vfio"
+#ifdef VFIO_NOIOMMU
+#define VFIO_GROUP_FMT "/dev/vfio/noiommu-%u"
+#else
 #define VFIO_GROUP_FMT "/dev/vfio/%u"
+#endif
 #define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL)
 
 /* per-process VFIO config */
@@ -212,17 +228,21 @@  pci_vfio_set_bus_master(int dev_fd)
 static int
 pci_vfio_setup_dma_maps(int vfio_container_fd)
 {
+#ifndef VFIO_NOIOMMU
 	const struct rte_memseg *ms = rte_eal_get_physmem_layout();
-	int i, ret;
+	int i;
+#endif
+	int ret;
 
 	ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU,
-			VFIO_TYPE1_IOMMU);
+			VFIO_IOMMU_TYPE);
 	if (ret) {
 		RTE_LOG(ERR, EAL, "  cannot set IOMMU type, "
 				"error %i (%s)\n", errno, strerror(errno));
 		return -1;
 	}
 
+#ifndef VFIO_NOIOMMU
 	/* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */
 	for (i = 0; i < RTE_MAX_MEMSEG; i++) {
 		struct vfio_iommu_type1_dma_map dma_map;
@@ -245,6 +265,7 @@  pci_vfio_setup_dma_maps(int vfio_container_fd)
 			return -1;
 		}
 	}
+#endif
 
 	return 0;
 }
@@ -373,7 +394,8 @@  pci_vfio_get_container_fd(void)
 		}
 
 		/* check if we support IOMMU type 1 */
-		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
+		ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION,
+				VFIO_IOMMU_TYPE);
 		if (ret != 1) {
 			if (ret < 0)
 				RTE_LOG(ERR, EAL, "  could not get IOMMU type, "