[dpdk-dev,v5,08/11] eal: pci: introduce RTE_KDRV_VFIO_NOIOMMUi driver mode

Message ID 1453203972-24855-9-git-send-email-sshukla@mvista.com
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show

Commit Message

Santosh Shukla Jan. 19, 2016, 11:46 a.m.
Adding RTE_KDRV_VFIO_NOIOMMU mode in kernel driver. Also including
rte_vfio_is_noiommu() helper function. This function will parse
/sys/bus/pci/device/<bus_addr>/ and make sure that
- vfio noiommu mode set in kernel driver
- pci device attached to vfio-noiommu driver only

If both condition satisfies then set drv->kdrv = RTE_KDRV_VFIO_NOIOMMU

Also did similar changes in virtio_rd/wr, Changes applicable for virtio spec
0.95 only.

Signed-off-by: Santosh Shukla <sshukla@mvista.com>
---
v4--> v5:
- Removed virtio_xx_init_by_vfio and added new driver mode.
- Now no need to parse vfio interface in virtio. As pci_eal module will take of
  vfio-noiommu driver parsing for virtio or any other future device willing to
  use vfio-noiommu driver.

 drivers/net/virtio/virtio_pci.c            |   12 ++---
 lib/librte_eal/common/include/rte_pci.h    |    1 +
 lib/librte_eal/linuxapp/eal/eal_pci.c      |   13 ++++--
 lib/librte_eal/linuxapp/eal/eal_pci_init.h |    1 +
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c |   69 ++++++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 9 deletions(-)

Comments

Burakov, Anatoly Jan. 19, 2016, 2:18 p.m. | #1
Hi Santosh,

> +int
> +pci_vfio_is_noiommu(struct rte_pci_device *pci_dev) {
> +	FILE *fp;
> +	struct rte_pci_addr *loc;
> +	const char *path =
> "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode";
> +	char filename[PATH_MAX] = {0};
> +	char buf[PATH_MAX] = {0};
> +
> +	/*
> +	 * 1. chk vfio-noiommu mode set in kernel driver
> +	 * 2. verify pci device attached to vfio-noiommu driver
> +	 * example:
> +	 * cd /sys/bus/pci/drivers/vfio-pci/<virtio_dev_addr>/iommu_group
> +	 * > cat name
> +	 * > vfio-noiommu --> means virtio_dev attached to vfio-noiommu
> driver
> +	 */
> +
> +	fp = fopen(path, "r");
> +	if (fp == NULL) {
> +		RTE_LOG(ERR, EAL, "can't open %s\n", path);
> +		return -1;
> +	}
> +
> +	if (fread(buf, sizeof(char), 1, fp) != 1) {
> +		RTE_LOG(ERR, EAL, "can't read from file %s\n", path);
> +		fclose(fp);
> +		return -1;
> +	}
> +
> +	if (strncmp(buf, "Y", 1) != 0) {
> +		RTE_LOG(ERR, EAL, "[%s]: vfio: noiommu mode not set\n",
> path);
> +		fclose(fp);
> +		return -1;
> +	}
> +
> +	fclose(fp);
> +
> +	/* 2. chk whether attached driver is vfio-noiommu or not */
> +	loc = &pci_dev->addr;
> +	snprintf(filename, sizeof(filename),
> +		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT
> "/iommu_group/name",
> +		     loc->domain, loc->bus, loc->devid, loc->function);
> +
> +	/* check for vfio-noiommu */
> +	fp = fopen(filename, "r");
> +	if (fp == NULL) {
> +		RTE_LOG(ERR, EAL, "can't open %s\n", filename);
> +		return -1;
> +	}
> +
> +	if (fread(buf, sizeof(char), sizeof("vfio-noiommu"), fp) !=
> +		  sizeof("vfio-noiommu")) {
> +		RTE_LOG(ERR, EAL, "can't read from file %s\n", filename);
> +		fclose(fp);
> +		return -1;
> +	}
> +
> +	if (strncmp(buf, "vfio-noiommu", strlen("vfio-noiommu")) != 0) {
> +		RTE_LOG(ERR, EAL, "not a vfio-noiommu driver\n");
> +		fclose(fp);
> +		return -1;
> +	}
> +
> +	fclose(fp);
> +
> +	return 0;
> +}

Since this is a public non-performance critical API, shouldn't we check if pci_dev is NULL? Otherwise the patch-set seems fine to me as far as VFIO parts are concerned.

Thanks,
Anatoly
Santosh Shukla Jan. 19, 2016, 6:36 p.m. | #2
On Tue, Jan 19, 2016 at 7:48 PM, Burakov, Anatoly
<anatoly.burakov@intel.com> wrote:
> Hi Santosh,
>
>> +int
>> +pci_vfio_is_noiommu(struct rte_pci_device *pci_dev) {
>> +     FILE *fp;
>> +     struct rte_pci_addr *loc;
>> +     const char *path =
>> "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode";
>> +     char filename[PATH_MAX] = {0};
>> +     char buf[PATH_MAX] = {0};
>> +
>> +     /*
>> +      * 1. chk vfio-noiommu mode set in kernel driver
>> +      * 2. verify pci device attached to vfio-noiommu driver
>> +      * example:
>> +      * cd /sys/bus/pci/drivers/vfio-pci/<virtio_dev_addr>/iommu_group
>> +      * > cat name
>> +      * > vfio-noiommu --> means virtio_dev attached to vfio-noiommu
>> driver
>> +      */
>> +
>> +     fp = fopen(path, "r");
>> +     if (fp == NULL) {
>> +             RTE_LOG(ERR, EAL, "can't open %s\n", path);
>> +             return -1;
>> +     }
>> +
>> +     if (fread(buf, sizeof(char), 1, fp) != 1) {
>> +             RTE_LOG(ERR, EAL, "can't read from file %s\n", path);
>> +             fclose(fp);
>> +             return -1;
>> +     }
>> +
>> +     if (strncmp(buf, "Y", 1) != 0) {
>> +             RTE_LOG(ERR, EAL, "[%s]: vfio: noiommu mode not set\n",
>> path);
>> +             fclose(fp);
>> +             return -1;
>> +     }
>> +
>> +     fclose(fp);
>> +
>> +     /* 2. chk whether attached driver is vfio-noiommu or not */
>> +     loc = &pci_dev->addr;
>> +     snprintf(filename, sizeof(filename),
>> +                  SYSFS_PCI_DEVICES "/" PCI_PRI_FMT
>> "/iommu_group/name",
>> +                  loc->domain, loc->bus, loc->devid, loc->function);
>> +
>> +     /* check for vfio-noiommu */
>> +     fp = fopen(filename, "r");
>> +     if (fp == NULL) {
>> +             RTE_LOG(ERR, EAL, "can't open %s\n", filename);
>> +             return -1;
>> +     }
>> +
>> +     if (fread(buf, sizeof(char), sizeof("vfio-noiommu"), fp) !=
>> +               sizeof("vfio-noiommu")) {
>> +             RTE_LOG(ERR, EAL, "can't read from file %s\n", filename);
>> +             fclose(fp);
>> +             return -1;
>> +     }
>> +
>> +     if (strncmp(buf, "vfio-noiommu", strlen("vfio-noiommu")) != 0) {
>> +             RTE_LOG(ERR, EAL, "not a vfio-noiommu driver\n");
>> +             fclose(fp);
>> +             return -1;
>> +     }
>> +
>> +     fclose(fp);
>> +
>> +     return 0;
>> +}
>
> Since this is a public non-performance critical API, shouldn't we check if pci_dev is NULL? Otherwise the patch-set seems fine to me as far as VFIO parts are concerned.
>
pci_scan_one() uses this api for now and it populate pci_dev before
pci_vfio_is_noiommu() could use. So didn't though to add a check, But
you are right in case any other module want to use this api. Sending
patch now. Thanks.

> Thanks,
> Anatoly

Patch

diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 0c29f1d..537c552 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -60,7 +60,7 @@  virtio_read_reg_1(struct virtio_hw *hw, uint64_t reg_offset)
 	struct rte_pci_device *dev;
 
 	dev = hw->dev;
-	if (dev->kdrv == RTE_KDRV_VFIO)
+	if (dev->kdrv == RTE_KDRV_VFIO_NOIOMMU)
 		ioport_inb(dev, reg_offset, &ret);
 	else
 		ret = inb(VIRTIO_PCI_REG_ADDR(hw, reg_offset));
@@ -75,7 +75,7 @@  virtio_read_reg_2(struct virtio_hw *hw, uint64_t reg_offset)
 	struct rte_pci_device *dev;
 
 	dev = hw->dev;
-	if (dev->kdrv == RTE_KDRV_VFIO)
+	if (dev->kdrv == RTE_KDRV_VFIO_NOIOMMU)
 		ioport_inw(dev, reg_offset, &ret);
 	else
 		ret = inw(VIRTIO_PCI_REG_ADDR(hw, reg_offset));
@@ -90,7 +90,7 @@  virtio_read_reg_4(struct virtio_hw *hw, uint64_t reg_offset)
 	struct rte_pci_device *dev;
 
 	dev = hw->dev;
-	if (dev->kdrv == RTE_KDRV_VFIO)
+	if (dev->kdrv == RTE_KDRV_VFIO_NOIOMMU)
 		ioport_inl(dev, reg_offset, &ret);
 	else
 		ret = inl(VIRTIO_PCI_REG_ADDR(hw, reg_offset));
@@ -104,7 +104,7 @@  virtio_write_reg_1(struct virtio_hw *hw, uint64_t reg_offset, uint8_t value)
 	struct rte_pci_device *dev;
 
 	dev = hw->dev;
-	if (dev->kdrv == RTE_KDRV_VFIO)
+	if (dev->kdrv == RTE_KDRV_VFIO_NOIOMMU)
 		ioport_outb_p(dev, reg_offset, value);
 	else
 		outb_p((unsigned char)value,
@@ -117,7 +117,7 @@  virtio_write_reg_2(struct virtio_hw *hw, uint64_t reg_offset, uint16_t value)
 	struct rte_pci_device *dev;
 
 	dev = hw->dev;
-	if (dev->kdrv == RTE_KDRV_VFIO)
+	if (dev->kdrv == RTE_KDRV_VFIO_NOIOMMU)
 		ioport_outw_p(dev, reg_offset, value);
 	else
 		outw_p((unsigned short)value,
@@ -130,7 +130,7 @@  virtio_write_reg_4(struct virtio_hw *hw, uint64_t reg_offset, uint32_t value)
 	struct rte_pci_device *dev;
 
 	dev = hw->dev;
-	if (dev->kdrv == RTE_KDRV_VFIO)
+	if (dev->kdrv == RTE_KDRV_VFIO_NOIOMMU)
 		ioport_outl_p(dev, reg_offset, value);
 	else
 		outl_p((unsigned int)value,
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 0c667ff..2dbc658 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -149,6 +149,7 @@  enum rte_kernel_driver {
 	RTE_KDRV_VFIO,
 	RTE_KDRV_UIO_GENERIC,
 	RTE_KDRV_NIC_UIO,
+	RTE_KDRV_VFIO_NOIOMMU,
 	RTE_KDRV_NONE,
 };
 
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index eb503f0..2936497 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -131,6 +131,7 @@  rte_eal_pci_map_device(struct rte_pci_device *dev)
 	/* try mapping the NIC resources using VFIO if it exists */
 	switch (dev->kdrv) {
 	case RTE_KDRV_VFIO:
+	case RTE_KDRV_VFIO_NOIOMMU:
 #ifdef VFIO_PRESENT
 		if (pci_vfio_is_enabled())
 			ret = pci_vfio_map_resource(dev);
@@ -158,6 +159,7 @@  rte_eal_pci_unmap_device(struct rte_pci_device *dev)
 	/* try unmapping the NIC resources using VFIO if it exists */
 	switch (dev->kdrv) {
 	case RTE_KDRV_VFIO:
+	case RTE_KDRV_VFIO_NOIOMMU:
 		RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n");
 		break;
 	case RTE_KDRV_IGB_UIO:
@@ -353,9 +355,12 @@  pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 	}
 
 	if (!ret) {
-		if (!strcmp(driver, "vfio-pci"))
-			dev->kdrv = RTE_KDRV_VFIO;
-		else if (!strcmp(driver, "igb_uio"))
+		if (!strcmp(driver, "vfio-pci")) {
+			if (pci_vfio_is_noiommu(dev) == 0)
+				dev->kdrv = RTE_KDRV_VFIO_NOIOMMU;
+			else
+				dev->kdrv = RTE_KDRV_VFIO;
+		} else if (!strcmp(driver, "igb_uio"))
 			dev->kdrv = RTE_KDRV_IGB_UIO;
 		else if (!strcmp(driver, "uio_pci_generic"))
 			dev->kdrv = RTE_KDRV_UIO_GENERIC;
@@ -630,6 +635,7 @@  int rte_eal_pci_read_bar(const struct rte_pci_device *device,
 
 	switch (device->kdrv) {
 	case RTE_KDRV_VFIO:
+	case RTE_KDRV_VFIO_NOIOMMU:
 		return pci_vfio_read_bar(intr_handle, buf, len,
 					 offset, bar_idx);
 	default:
@@ -647,6 +653,7 @@  int rte_eal_pci_write_bar(const struct rte_pci_device *device,
 
 	switch (device->kdrv) {
 	case RTE_KDRV_VFIO:
+	case RTE_KDRV_VFIO_NOIOMMU:
 		return pci_vfio_write_bar(intr_handle, buf, len,
 					  offset, bar_idx);
 	default:
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
index 3bc592b..60b95d7 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h
@@ -60,6 +60,7 @@  int pci_uio_write_config(const struct rte_intr_handle *intr_handle,
 
 int pci_vfio_enable(void);
 int pci_vfio_is_enabled(void);
+int pci_vfio_is_noiommu(struct rte_pci_device *pci_dev);
 int pci_vfio_mp_sync_setup(void);
 
 /* access config space */
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index df407ef..31d688b 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -973,4 +973,73 @@  pci_vfio_is_enabled(void)
 {
 	return vfio_cfg.vfio_enabled;
 }
+
+int
+pci_vfio_is_noiommu(struct rte_pci_device *pci_dev)
+{
+	FILE *fp;
+	struct rte_pci_addr *loc;
+	const char *path = "/sys/module/vfio/parameters/enable_unsafe_noiommu_mode";
+	char filename[PATH_MAX] = {0};
+	char buf[PATH_MAX] = {0};
+
+	/*
+	 * 1. chk vfio-noiommu mode set in kernel driver
+	 * 2. verify pci device attached to vfio-noiommu driver
+	 * example:
+	 * cd /sys/bus/pci/drivers/vfio-pci/<virtio_dev_addr>/iommu_group
+	 * > cat name
+	 * > vfio-noiommu --> means virtio_dev attached to vfio-noiommu driver
+	 */
+
+	fp = fopen(path, "r");
+	if (fp == NULL) {
+		RTE_LOG(ERR, EAL, "can't open %s\n", path);
+		return -1;
+	}
+
+	if (fread(buf, sizeof(char), 1, fp) != 1) {
+		RTE_LOG(ERR, EAL, "can't read from file %s\n", path);
+		fclose(fp);
+		return -1;
+	}
+
+	if (strncmp(buf, "Y", 1) != 0) {
+		RTE_LOG(ERR, EAL, "[%s]: vfio: noiommu mode not set\n", path);
+		fclose(fp);
+		return -1;
+	}
+
+	fclose(fp);
+
+	/* 2. chk whether attached driver is vfio-noiommu or not */
+	loc = &pci_dev->addr;
+	snprintf(filename, sizeof(filename),
+		     SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/iommu_group/name",
+		     loc->domain, loc->bus, loc->devid, loc->function);
+
+	/* check for vfio-noiommu */
+	fp = fopen(filename, "r");
+	if (fp == NULL) {
+		RTE_LOG(ERR, EAL, "can't open %s\n", filename);
+		return -1;
+	}
+
+	if (fread(buf, sizeof(char), sizeof("vfio-noiommu"), fp) !=
+		  sizeof("vfio-noiommu")) {
+		RTE_LOG(ERR, EAL, "can't read from file %s\n", filename);
+		fclose(fp);
+		return -1;
+	}
+
+	if (strncmp(buf, "vfio-noiommu", strlen("vfio-noiommu")) != 0) {
+		RTE_LOG(ERR, EAL, "not a vfio-noiommu driver\n");
+		fclose(fp);
+		return -1;
+	}
+
+	fclose(fp);
+
+	return 0;
+}
 #endif