[v3,3/6] bus/pci: use IOVAs check when setting IOVA mode

Message ID 1530708838-2682-4-git-send-email-alejandro.lucero@netronome.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series use IOVAs check based on DMA mask |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Alejandro Lucero July 4, 2018, 12:53 p.m. UTC
  Although VT-d emulation currently only supports 39 bits, it could
be iovas being within that supported range. This patch allows
IOVA mode in such a case.

Indeed, memory initialization code can be modified for using lower
virtual addresses than those used by the kernel for 64 bits processes
by default, and therefore memsegs iovas can use 39 bits or less for
most system. And this is likely 100% true for VMs.

Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
---
 drivers/bus/pci/linux/pci.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)
  

Comments

Eelco Chaudron July 10, 2018, 10:14 a.m. UTC | #1
On 4 Jul 2018, at 14:53, Alejandro Lucero wrote:

> Although VT-d emulation currently only supports 39 bits, it could
> be iovas being within that supported range. This patch allows
> IOVA mode in such a case.
>
> Indeed, memory initialization code can be modified for using lower
> virtual addresses than those used by the kernel for 64 bits processes
> by default, and therefore memsegs iovas can use 39 bits or less for
> most system. And this is likely 100% true for VMs.
>
> Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
> ---
>  drivers/bus/pci/linux/pci.c | 15 +++++++++++----
>  1 file changed, 11 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
> index 74deef3..792c819 100644
> --- a/drivers/bus/pci/linux/pci.c
> +++ b/drivers/bus/pci/linux/pci.c
> @@ -43,6 +43,7 @@
>  #include <rte_devargs.h>
>  #include <rte_memcpy.h>
>  #include <rte_vfio.h>
> +#include <rte_memory.h>
>
>  #include "eal_private.h"
>  #include "eal_filesystem.h"
> @@ -613,10 +614,12 @@
>  	fclose(fp);
>
>  	mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 
> 1;
> -	if (mgaw < X86_VA_WIDTH)
> +
> +	if (!rte_eal_check_dma_mask(mgaw))

If think in this case we still need to check the X86_VA_WIDTH, i.e.
if (mgaw < X86_VA_WIDTH && !rte_eal_check_dma_mask(mgaw))


> +		return true;
> +	else
>  		return false;
>
> -	return true;
>  }
>  #elif defined(RTE_ARCH_PPC_64)
>  static bool
> @@ -640,13 +643,17 @@
>  {
>  	struct rte_pci_device *dev = NULL;
>  	struct rte_pci_driver *drv = NULL;
> +	int iommu_dma_mask_check_done = 0;
>
>  	FOREACH_DRIVER_ON_PCIBUS(drv) {
>  		FOREACH_DEVICE_ON_PCIBUS(dev) {
>  			if (!rte_pci_match(drv, dev))
>  				continue;
> -			if (!pci_one_device_iommu_support_va(dev))
> -				return false;
> +			if (!iommu_dma_mask_check_done) {
> +				if (pci_one_device_iommu_support_va(dev) < 0)
> +					return false;
> +				iommu_dma_mask_check_done  = 1;

Not sure why this change? Why do we only need to check one device on the 
bus?

In addition, if this is what was intended, rather than a variable you 
can return true in this case, or did you intended to clear the 
iommu_dma_mask_check_done on every PCI BUS iteration?

> +			}
>  		}
>  	}
>  	return true;
> -- 
> 1.9.1
  
Alejandro Lucero July 10, 2018, 3:37 p.m. UTC | #2
On Tue, Jul 10, 2018 at 11:14 AM, Eelco Chaudron <echaudro@redhat.com>
wrote:

>
>
> On 4 Jul 2018, at 14:53, Alejandro Lucero wrote:
>
> Although VT-d emulation currently only supports 39 bits, it could
>> be iovas being within that supported range. This patch allows
>> IOVA mode in such a case.
>>
>> Indeed, memory initialization code can be modified for using lower
>> virtual addresses than those used by the kernel for 64 bits processes
>> by default, and therefore memsegs iovas can use 39 bits or less for
>> most system. And this is likely 100% true for VMs.
>>
>> Signed-off-by: Alejandro Lucero <alejandro.lucero@netronome.com>
>> ---
>>  drivers/bus/pci/linux/pci.c | 15 +++++++++++----
>>  1 file changed, 11 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
>> index 74deef3..792c819 100644
>> --- a/drivers/bus/pci/linux/pci.c
>> +++ b/drivers/bus/pci/linux/pci.c
>> @@ -43,6 +43,7 @@
>>  #include <rte_devargs.h>
>>  #include <rte_memcpy.h>
>>  #include <rte_vfio.h>
>> +#include <rte_memory.h>
>>
>>  #include "eal_private.h"
>>  #include "eal_filesystem.h"
>> @@ -613,10 +614,12 @@
>>         fclose(fp);
>>
>>         mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT)
>> + 1;
>> -       if (mgaw < X86_VA_WIDTH)
>> +
>> +       if (!rte_eal_check_dma_mask(mgaw))
>>
>
> If think in this case we still need to check the X86_VA_WIDTH, i.e.
> if (mgaw < X86_VA_WIDTH && !rte_eal_check_dma_mask(mgaw))
>
>
> +               return true;
>> +       else
>>                 return false;
>>
>> -       return true;
>>  }
>>  #elif defined(RTE_ARCH_PPC_64)
>>  static bool
>> @@ -640,13 +643,17 @@
>>  {
>>         struct rte_pci_device *dev = NULL;
>>         struct rte_pci_driver *drv = NULL;
>> +       int iommu_dma_mask_check_done = 0;
>>
>>         FOREACH_DRIVER_ON_PCIBUS(drv) {
>>                 FOREACH_DEVICE_ON_PCIBUS(dev) {
>>                         if (!rte_pci_match(drv, dev))
>>                                 continue;
>> -                       if (!pci_one_device_iommu_support_va(dev))
>> -                               return false;
>> +                       if (!iommu_dma_mask_check_done) {
>> +                               if (pci_one_device_iommu_support_va(dev)
>> < 0)
>> +                                       return false;
>> +                               iommu_dma_mask_check_done  = 1;
>>
>
> Not sure why this change? Why do we only need to check one device on the
> bus?
>
>
Because there is just one emulated IOMMU hardware. The limitation in this
case is not in a specific PCI device. And I do not think it is possible to
have two different (emulated or not) IOMMU hardware. Yes, you can have more
than one controller but being same IOMMU type.


> In addition, if this is what was intended, rather than a variable you can
> return true in this case, or did you intended to clear the
> iommu_dma_mask_check_done on every PCI BUS iteration?
>
>
If pci_one_device_iommu_support_va, because the dma check, finds out the
IOVAs are out of range, then the IOVA mode is PA and no further checks are
required. But there could be a PCI device precluding the IOVA VA, so all
the PCI devices need to be processed.


> +                       }
>>                 }
>>         }
>>         return true;
>> --
>> 1.9.1
>>
>
  

Patch

diff --git a/drivers/bus/pci/linux/pci.c b/drivers/bus/pci/linux/pci.c
index 74deef3..792c819 100644
--- a/drivers/bus/pci/linux/pci.c
+++ b/drivers/bus/pci/linux/pci.c
@@ -43,6 +43,7 @@ 
 #include <rte_devargs.h>
 #include <rte_memcpy.h>
 #include <rte_vfio.h>
+#include <rte_memory.h>
 
 #include "eal_private.h"
 #include "eal_filesystem.h"
@@ -613,10 +614,12 @@ 
 	fclose(fp);
 
 	mgaw = ((vtd_cap_reg & VTD_CAP_MGAW_MASK) >> VTD_CAP_MGAW_SHIFT) + 1;
-	if (mgaw < X86_VA_WIDTH)
+
+	if (!rte_eal_check_dma_mask(mgaw))
+		return true;
+	else
 		return false;
 
-	return true;
 }
 #elif defined(RTE_ARCH_PPC_64)
 static bool
@@ -640,13 +643,17 @@ 
 {
 	struct rte_pci_device *dev = NULL;
 	struct rte_pci_driver *drv = NULL;
+	int iommu_dma_mask_check_done = 0;
 
 	FOREACH_DRIVER_ON_PCIBUS(drv) {
 		FOREACH_DEVICE_ON_PCIBUS(dev) {
 			if (!rte_pci_match(drv, dev))
 				continue;
-			if (!pci_one_device_iommu_support_va(dev))
-				return false;
+			if (!iommu_dma_mask_check_done) {
+				if (pci_one_device_iommu_support_va(dev) < 0)
+					return false;
+				iommu_dma_mask_check_done  = 1;
+			}
 		}
 	}
 	return true;