[v2,2/2] vhost: enable IOMMU for async vhost

Message ID 20210917052546.23883-3-xuan.ding@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series support IOMMU for DMA device |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/github-robot: build success github build: passed
ci/Intel-compilation success Compilation OK
ci/intel-Testing fail Testing issues
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS

Commit Message

Ding, Xuan Sept. 17, 2021, 5:25 a.m. UTC
  The use of IOMMU has many advantages, such as isolation and address
translation. This patch extends the capbility of DMA engine to use
IOMMU if the DMA engine is bound to vfio.

When set memory table, the guest memory will be mapped
into the default container of DPDK.

Signed-off-by: Xuan Ding <xuan.ding@intel.com>
---
 lib/vhost/rte_vhost.h  |  1 +
 lib/vhost/vhost_user.c | 57 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 57 insertions(+), 1 deletion(-)
  

Comments

Hu, Jiayu Sept. 23, 2021, 2:39 p.m. UTC | #1
Hi Xuan,

> -----Original Message-----
> From: Ding, Xuan <xuan.ding@intel.com>
> Sent: Friday, September 17, 2021 1:26 PM
> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> 
> The use of IOMMU has many advantages, such as isolation and address
> translation. This patch extends the capbility of DMA engine to use IOMMU if
> the DMA engine is bound to vfio.
> 
> When set memory table, the guest memory will be mapped into the default
> container of DPDK.
> 
> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> ---
>  lib/vhost/rte_vhost.h  |  1 +
>  lib/vhost/vhost_user.c | 57
> +++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> 8d875e9322..e0537249f3 100644
> --- a/lib/vhost/rte_vhost.h
> +++ b/lib/vhost/rte_vhost.h
> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
>  	void	 *mmap_addr;
>  	uint64_t mmap_size;
>  	int fd;
> +	uint64_t dma_map_success;

How about using bool for dma_map_success?

>  };
> 
>  /**
> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
> 29a4c9af60..7d1d592b86 100644
> --- a/lib/vhost/vhost_user.c
> +++ b/lib/vhost/vhost_user.c
> @@ -45,6 +45,8 @@
>  #include <rte_common.h>
>  #include <rte_malloc.h>
>  #include <rte_log.h>
> +#include <rte_vfio.h>
> +#include <rte_errno.h>
> 
>  #include "iotlb.h"
>  #include "vhost.h"
> @@ -141,6 +143,46 @@ get_blk_size(int fd)
>  	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
> 
> +static int
> +async_dma_map(struct rte_vhost_mem_region *region, bool do_map) {
> +	int ret = 0;
> +	uint64_t host_iova;
> +	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
> >host_user_addr);
> +	if (do_map) {
> +		/* Add mapped region into the default container of DPDK. */
> +		ret =
> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +						 region->host_user_addr,
> +						 host_iova,
> +						 region->size);
> +		region->dma_map_success = ret == 0;
> +		if (ret) {
> +			if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
> +				VHOST_LOG_CONFIG(ERR, "DMA engine map
> failed\n");
> +				return ret;
> +			}
> +			return 0;

Why return 0, if ret is -1 here?

Thanks,
Jiayu

> +		}
> +		return ret;
> +	} else {
> +		/* No need to do vfio unmap if the map failed. */
> +		if (!region->dma_map_success)
> +			return 0;
> +
> +		/* Remove mapped region from the default container of
> DPDK. */
> +		ret =
> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
> +						   region->host_user_addr,
> +						   host_iova,
> +						   region->size);
> +		if (ret) {
> +			VHOST_LOG_CONFIG(ERR, "DMA engine unmap
> failed\n");
> +			return ret;
> +		}
> +		region->dma_map_success = 0;
> +	}
> +	return ret;
> +}
> +
>  static void
>  free_mem_region(struct virtio_net *dev)  { @@ -153,6 +195,9 @@
> free_mem_region(struct virtio_net *dev)
>  	for (i = 0; i < dev->mem->nregions; i++) {
>  		reg = &dev->mem->regions[i];
>  		if (reg->host_user_addr) {
> +			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
> +				async_dma_map(reg, false);
> +
>  			munmap(reg->mmap_addr, reg->mmap_size);
>  			close(reg->fd);
>  		}
> @@ -1157,6 +1202,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
>  	uint64_t mmap_size;
>  	uint64_t alignment;
>  	int populate;
> +	int ret;
> 
>  	/* Check for memory_size + mmap_offset overflow */
>  	if (mmap_offset >= -region->size) {
> @@ -1210,13 +1256,22 @@ vhost_user_mmap_region(struct virtio_net *dev,
>  	region->mmap_size = mmap_size;
>  	region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
> mmap_offset;
> 
> -	if (dev->async_copy)
> +	if (dev->async_copy) {
>  		if (add_guest_pages(dev, region, alignment) < 0) {
>  			VHOST_LOG_CONFIG(ERR,
>  					"adding guest pages to region
> failed.\n");
>  			return -1;
>  		}
> 
> +		if (rte_vfio_is_enabled("vfio")) {
> +			ret = async_dma_map(region, true);
> +			if (ret < 0) {
> +				VHOST_LOG_CONFIG(ERR, "Configure
> IOMMU for DMA engine failed\n");
> +				return -1;
> +			}
> +		}
> +	}
> +
>  	VHOST_LOG_CONFIG(INFO,
>  			"guest memory region size: 0x%" PRIx64 "\n"
>  			"\t guest physical addr: 0x%" PRIx64 "\n"
> --
> 2.17.1
  
Maxime Coquelin Sept. 23, 2021, 2:56 p.m. UTC | #2
On 9/23/21 16:39, Hu, Jiayu wrote:
> Hi Xuan,
> 
>> -----Original Message-----
>> From: Ding, Xuan <xuan.ding@intel.com>
>> Sent: Friday, September 17, 2021 1:26 PM
>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>>
>> The use of IOMMU has many advantages, such as isolation and address
>> translation. This patch extends the capbility of DMA engine to use IOMMU if
>> the DMA engine is bound to vfio.
>>
>> When set memory table, the guest memory will be mapped into the default
>> container of DPDK.
>>
>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
>> ---
>>   lib/vhost/rte_vhost.h  |  1 +
>>   lib/vhost/vhost_user.c | 57
>> +++++++++++++++++++++++++++++++++++++++++-
>>   2 files changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
>> 8d875e9322..e0537249f3 100644
>> --- a/lib/vhost/rte_vhost.h
>> +++ b/lib/vhost/rte_vhost.h
>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
>>   	void	 *mmap_addr;
>>   	uint64_t mmap_size;
>>   	int fd;
>> +	uint64_t dma_map_success;
> 
> How about using bool for dma_map_success?

The bigger problem here is that you are breaking the ABI.

>>   };
>>
>>   /**
>> diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c index
>> 29a4c9af60..7d1d592b86 100644
>> --- a/lib/vhost/vhost_user.c
>> +++ b/lib/vhost/vhost_user.c
>> @@ -45,6 +45,8 @@
>>   #include <rte_common.h>
>>   #include <rte_malloc.h>
>>   #include <rte_log.h>
>> +#include <rte_vfio.h>
>> +#include <rte_errno.h>
>>
>>   #include "iotlb.h"
>>   #include "vhost.h"
>> @@ -141,6 +143,46 @@ get_blk_size(int fd)
>>   	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;  }
>>
>> +static int
>> +async_dma_map(struct rte_vhost_mem_region *region, bool do_map) {
>> +	int ret = 0;
>> +	uint64_t host_iova;
>> +	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region-
>>> host_user_addr);
>> +	if (do_map) {
>> +		/* Add mapped region into the default container of DPDK. */
>> +		ret =
>> rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> +						 region->host_user_addr,
>> +						 host_iova,
>> +						 region->size);
>> +		region->dma_map_success = ret == 0;
>> +		if (ret) {
>> +			if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
>> +				VHOST_LOG_CONFIG(ERR, "DMA engine map
>> failed\n");
>> +				return ret;
>> +			}
>> +			return 0;
> 
> Why return 0, if ret is -1 here?
> 
> Thanks,
> Jiayu
> 
>> +		}
>> +		return ret;
>> +	} else {
>> +		/* No need to do vfio unmap if the map failed. */
>> +		if (!region->dma_map_success)
>> +			return 0;
>> +
>> +		/* Remove mapped region from the default container of
>> DPDK. */
>> +		ret =
>> rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
>> +						   region->host_user_addr,
>> +						   host_iova,
>> +						   region->size);
>> +		if (ret) {
>> +			VHOST_LOG_CONFIG(ERR, "DMA engine unmap
>> failed\n");
>> +			return ret;
>> +		}
>> +		region->dma_map_success = 0;
>> +	}
>> +	return ret;
>> +}
>> +
>>   static void
>>   free_mem_region(struct virtio_net *dev)  { @@ -153,6 +195,9 @@
>> free_mem_region(struct virtio_net *dev)
>>   	for (i = 0; i < dev->mem->nregions; i++) {
>>   		reg = &dev->mem->regions[i];
>>   		if (reg->host_user_addr) {
>> +			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
>> +				async_dma_map(reg, false);
>> +
>>   			munmap(reg->mmap_addr, reg->mmap_size);
>>   			close(reg->fd);
>>   		}
>> @@ -1157,6 +1202,7 @@ vhost_user_mmap_region(struct virtio_net *dev,
>>   	uint64_t mmap_size;
>>   	uint64_t alignment;
>>   	int populate;
>> +	int ret;
>>
>>   	/* Check for memory_size + mmap_offset overflow */
>>   	if (mmap_offset >= -region->size) {
>> @@ -1210,13 +1256,22 @@ vhost_user_mmap_region(struct virtio_net *dev,
>>   	region->mmap_size = mmap_size;
>>   	region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr +
>> mmap_offset;
>>
>> -	if (dev->async_copy)
>> +	if (dev->async_copy) {
>>   		if (add_guest_pages(dev, region, alignment) < 0) {
>>   			VHOST_LOG_CONFIG(ERR,
>>   					"adding guest pages to region
>> failed.\n");
>>   			return -1;
>>   		}
>>
>> +		if (rte_vfio_is_enabled("vfio")) {
>> +			ret = async_dma_map(region, true);
>> +			if (ret < 0) {
>> +				VHOST_LOG_CONFIG(ERR, "Configure
>> IOMMU for DMA engine failed\n");
>> +				return -1;
>> +			}
>> +		}
>> +	}
>> +
>>   	VHOST_LOG_CONFIG(INFO,
>>   			"guest memory region size: 0x%" PRIx64 "\n"
>>   			"\t guest physical addr: 0x%" PRIx64 "\n"
>> --
>> 2.17.1
>
  
Chenbo Xia Sept. 24, 2021, 1:53 a.m. UTC | #3
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Thursday, September 23, 2021 10:56 PM
> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> 
> 
> 
> On 9/23/21 16:39, Hu, Jiayu wrote:
> > Hi Xuan,
> >
> >> -----Original Message-----
> >> From: Ding, Xuan <xuan.ding@intel.com>
> >> Sent: Friday, September 17, 2021 1:26 PM
> >> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> >> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> >> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
> >> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> >> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> >> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> >> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >>
> >> The use of IOMMU has many advantages, such as isolation and address
> >> translation. This patch extends the capbility of DMA engine to use IOMMU if
> >> the DMA engine is bound to vfio.
> >>
> >> When set memory table, the guest memory will be mapped into the default
> >> container of DPDK.
> >>
> >> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> >> ---
> >>   lib/vhost/rte_vhost.h  |  1 +
> >>   lib/vhost/vhost_user.c | 57
> >> +++++++++++++++++++++++++++++++++++++++++-
> >>   2 files changed, 57 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> >> 8d875e9322..e0537249f3 100644
> >> --- a/lib/vhost/rte_vhost.h
> >> +++ b/lib/vhost/rte_vhost.h
> >> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
> >>   	void	 *mmap_addr;
> >>   	uint64_t mmap_size;
> >>   	int fd;
> >> +	uint64_t dma_map_success;
> >
> > How about using bool for dma_map_success?
> 
> The bigger problem here is that you are breaking the ABI.

Maybe this kind of driver-facing structs/functions should be removed
from ABI, since we are refactoring DPDK ABI recently.

/Chenbo

> 
> >>   };
> >>
> >>   /**
  
Maxime Coquelin Sept. 24, 2021, 7:13 a.m. UTC | #4
On 9/24/21 03:53, Xia, Chenbo wrote:
>> -----Original Message-----
>> From: Maxime Coquelin <maxime.coquelin@redhat.com>
>> Sent: Thursday, September 23, 2021 10:56 PM
>> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
>> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia, Chenbo
>> <chenbo.xia@intel.com>
>> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
>> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
>> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
>> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>>
>>
>>
>> On 9/23/21 16:39, Hu, Jiayu wrote:
>>> Hi Xuan,
>>>
>>>> -----Original Message-----
>>>> From: Ding, Xuan <xuan.ding@intel.com>
>>>> Sent: Friday, September 17, 2021 1:26 PM
>>>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
>>>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
>>>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1 <cheng1.jiang@intel.com>;
>>>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
>>>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
>>>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
>>>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
>>>>
>>>> The use of IOMMU has many advantages, such as isolation and address
>>>> translation. This patch extends the capbility of DMA engine to use IOMMU if
>>>> the DMA engine is bound to vfio.
>>>>
>>>> When set memory table, the guest memory will be mapped into the default
>>>> container of DPDK.
>>>>
>>>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
>>>> ---
>>>>    lib/vhost/rte_vhost.h  |  1 +
>>>>    lib/vhost/vhost_user.c | 57
>>>> +++++++++++++++++++++++++++++++++++++++++-
>>>>    2 files changed, 57 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
>>>> 8d875e9322..e0537249f3 100644
>>>> --- a/lib/vhost/rte_vhost.h
>>>> +++ b/lib/vhost/rte_vhost.h
>>>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
>>>>    	void	 *mmap_addr;
>>>>    	uint64_t mmap_size;
>>>>    	int fd;
>>>> +	uint64_t dma_map_success;
>>>
>>> How about using bool for dma_map_success?
>>
>> The bigger problem here is that you are breaking the ABI.
> 
> Maybe this kind of driver-facing structs/functions should be removed
> from ABI, since we are refactoring DPDK ABI recently.

It has actually been exposed for SPDK, we cannot just remove it from
API.

Maxime

> /Chenbo
> 
>>
>>>>    };
>>>>
>>>>    /**
>
  
Chenbo Xia Sept. 24, 2021, 7:35 a.m. UTC | #5
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Friday, September 24, 2021 3:14 PM
> To: Xia, Chenbo <chenbo.xia@intel.com>; Hu, Jiayu <jiayu.hu@intel.com>; Ding,
> Xuan <xuan.ding@intel.com>; dev@dpdk.org; Burakov, Anatoly
> <anatoly.burakov@intel.com>
> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> 
> 
> 
> On 9/24/21 03:53, Xia, Chenbo wrote:
> >> -----Original Message-----
> >> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> Sent: Thursday, September 23, 2021 10:56 PM
> >> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
> >> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia, Chenbo
> >> <chenbo.xia@intel.com>
> >> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> >> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> >> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> >> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >>
> >>
> >>
> >> On 9/23/21 16:39, Hu, Jiayu wrote:
> >>> Hi Xuan,
> >>>
> >>>> -----Original Message-----
> >>>> From: Ding, Xuan <xuan.ding@intel.com>
> >>>> Sent: Friday, September 17, 2021 1:26 PM
> >>>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> >>>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> >>>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>;
> >>>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> >>>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> >>>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> >>>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >>>>
> >>>> The use of IOMMU has many advantages, such as isolation and address
> >>>> translation. This patch extends the capbility of DMA engine to use IOMMU
> if
> >>>> the DMA engine is bound to vfio.
> >>>>
> >>>> When set memory table, the guest memory will be mapped into the default
> >>>> container of DPDK.
> >>>>
> >>>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> >>>> ---
> >>>>    lib/vhost/rte_vhost.h  |  1 +
> >>>>    lib/vhost/vhost_user.c | 57
> >>>> +++++++++++++++++++++++++++++++++++++++++-
> >>>>    2 files changed, 57 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> >>>> 8d875e9322..e0537249f3 100644
> >>>> --- a/lib/vhost/rte_vhost.h
> >>>> +++ b/lib/vhost/rte_vhost.h
> >>>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
> >>>>    	void	 *mmap_addr;
> >>>>    	uint64_t mmap_size;
> >>>>    	int fd;
> >>>> +	uint64_t dma_map_success;
> >>>
> >>> How about using bool for dma_map_success?
> >>
> >> The bigger problem here is that you are breaking the ABI.
> >
> > Maybe this kind of driver-facing structs/functions should be removed
> > from ABI, since we are refactoring DPDK ABI recently.
> 
> It has actually been exposed for SPDK, we cannot just remove it from
> API.

'exposed' does not mean it has to be ABI. Like 'driver_sdk_headers' in
ethdev lib, those headers can be exposed but do not include ABI. I see
SPDK is using that for building its lib. Not sure in this case, the SPDK
Vhost lib should be considered as application.

Thanks,
Chenbo 

> 
> Maxime
> 
> > /Chenbo
> >
> >>
> >>>>    };
> >>>>
> >>>>    /**
> >
  
Ding, Xuan Sept. 24, 2021, 8:18 a.m. UTC | #6
> -----Original Message-----
> From: Xia, Chenbo <chenbo.xia@intel.com>
> Sent: Friday, September 24, 2021 3:36 PM
> To: Maxime Coquelin <maxime.coquelin@redhat.com>; Hu, Jiayu
> <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>; dev@dpdk.org;
> Burakov, Anatoly <anatoly.burakov@intel.com>
> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> Subject: RE: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> 
> > -----Original Message-----
> > From: Maxime Coquelin <maxime.coquelin@redhat.com>
> > Sent: Friday, September 24, 2021 3:14 PM
> > To: Xia, Chenbo <chenbo.xia@intel.com>; Hu, Jiayu <jiayu.hu@intel.com>;
> Ding,
> > Xuan <xuan.ding@intel.com>; dev@dpdk.org; Burakov, Anatoly
> > <anatoly.burakov@intel.com>
> > Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> > <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> > Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> > Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> >
> >
> >
> > On 9/24/21 03:53, Xia, Chenbo wrote:
> > >> -----Original Message-----
> > >> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> > >> Sent: Thursday, September 23, 2021 10:56 PM
> > >> To: Hu, Jiayu <jiayu.hu@intel.com>; Ding, Xuan <xuan.ding@intel.com>;
> > >> dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>; Xia,
> Chenbo
> > >> <chenbo.xia@intel.com>
> > >> Cc: Jiang, Cheng1 <cheng1.jiang@intel.com>; Richardson, Bruce
> > >> <bruce.richardson@intel.com>; Pai G, Sunil <sunil.pai.g@intel.com>; Wang,
> > >> Yinan <yinan.wang@intel.com>; Yang, YvonneX <yvonnex.yang@intel.com>
> > >> Subject: Re: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> > >>
> > >>
> > >>
> > >> On 9/23/21 16:39, Hu, Jiayu wrote:
> > >>> Hi Xuan,
> > >>>
> > >>>> -----Original Message-----
> > >>>> From: Ding, Xuan <xuan.ding@intel.com>
> > >>>> Sent: Friday, September 17, 2021 1:26 PM
> > >>>> To: dev@dpdk.org; Burakov, Anatoly <anatoly.burakov@intel.com>;
> > >>>> maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>
> > >>>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; Jiang, Cheng1
> > <cheng1.jiang@intel.com>;
> > >>>> Richardson, Bruce <bruce.richardson@intel.com>; Pai G, Sunil
> > >>>> <sunil.pai.g@intel.com>; Wang, Yinan <yinan.wang@intel.com>; Yang,
> > >>>> YvonneX <yvonnex.yang@intel.com>; Ding, Xuan <xuan.ding@intel.com>
> > >>>> Subject: [PATCH v2 2/2] vhost: enable IOMMU for async vhost
> > >>>>
> > >>>> The use of IOMMU has many advantages, such as isolation and address
> > >>>> translation. This patch extends the capbility of DMA engine to use
> IOMMU
> > if
> > >>>> the DMA engine is bound to vfio.
> > >>>>
> > >>>> When set memory table, the guest memory will be mapped into the
> default
> > >>>> container of DPDK.
> > >>>>
> > >>>> Signed-off-by: Xuan Ding <xuan.ding@intel.com>
> > >>>> ---
> > >>>>    lib/vhost/rte_vhost.h  |  1 +
> > >>>>    lib/vhost/vhost_user.c | 57
> > >>>> +++++++++++++++++++++++++++++++++++++++++-
> > >>>>    2 files changed, 57 insertions(+), 1 deletion(-)
> > >>>>
> > >>>> diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h index
> > >>>> 8d875e9322..e0537249f3 100644
> > >>>> --- a/lib/vhost/rte_vhost.h
> > >>>> +++ b/lib/vhost/rte_vhost.h
> > >>>> @@ -127,6 +127,7 @@ struct rte_vhost_mem_region {
> > >>>>    	void	 *mmap_addr;
> > >>>>    	uint64_t mmap_size;
> > >>>>    	int fd;
> > >>>> +	uint64_t dma_map_success;
> > >>>
> > >>> How about using bool for dma_map_success?
> > >>
> > >> The bigger problem here is that you are breaking the ABI.
> > >
> > > Maybe this kind of driver-facing structs/functions should be removed
> > > from ABI, since we are refactoring DPDK ABI recently.
> >
> > It has actually been exposed for SPDK, we cannot just remove it from
> > API.
> 
> 'exposed' does not mean it has to be ABI. Like 'driver_sdk_headers' in
> ethdev lib, those headers can be exposed but do not include ABI. I see
> SPDK is using that for building its lib. Not sure in this case, the SPDK
> Vhost lib should be considered as application.
> 
> Thanks,
> Chenbo

Thanks for the discussion. Since the possible ABI changing is in the future,
I consider adding the dma_map_success in the virtio_net structure, to indicate
the map status of each region. This flag can even be removed if it is not considering
the restrictions on user(kernel driver support). Details can be provided in next version's patch.

Hope to get your insights. :)

Thanks,
Xuan

> 
> >
> > Maxime
> >
> > > /Chenbo
> > >
> > >>
> > >>>>    };
> > >>>>
> > >>>>    /**
> > >
  

Patch

diff --git a/lib/vhost/rte_vhost.h b/lib/vhost/rte_vhost.h
index 8d875e9322..e0537249f3 100644
--- a/lib/vhost/rte_vhost.h
+++ b/lib/vhost/rte_vhost.h
@@ -127,6 +127,7 @@  struct rte_vhost_mem_region {
 	void	 *mmap_addr;
 	uint64_t mmap_size;
 	int fd;
+	uint64_t dma_map_success;
 };
 
 /**
diff --git a/lib/vhost/vhost_user.c b/lib/vhost/vhost_user.c
index 29a4c9af60..7d1d592b86 100644
--- a/lib/vhost/vhost_user.c
+++ b/lib/vhost/vhost_user.c
@@ -45,6 +45,8 @@ 
 #include <rte_common.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
+#include <rte_vfio.h>
+#include <rte_errno.h>
 
 #include "iotlb.h"
 #include "vhost.h"
@@ -141,6 +143,46 @@  get_blk_size(int fd)
 	return ret == -1 ? (uint64_t)-1 : (uint64_t)stat.st_blksize;
 }
 
+static int
+async_dma_map(struct rte_vhost_mem_region *region, bool do_map)
+{
+	int ret = 0;
+	uint64_t host_iova;
+	host_iova = rte_mem_virt2iova((void *)(uintptr_t)region->host_user_addr);
+	if (do_map) {
+		/* Add mapped region into the default container of DPDK. */
+		ret = rte_vfio_container_dma_map(RTE_VFIO_DEFAULT_CONTAINER_FD,
+						 region->host_user_addr,
+						 host_iova,
+						 region->size);
+		region->dma_map_success = ret == 0;
+		if (ret) {
+			if (rte_errno != ENODEV && rte_errno != ENOTSUP) {
+				VHOST_LOG_CONFIG(ERR, "DMA engine map failed\n");
+				return ret;
+			}
+			return 0;
+		}
+		return ret;
+	} else {
+		/* No need to do vfio unmap if the map failed. */
+		if (!region->dma_map_success)
+			return 0;
+
+		/* Remove mapped region from the default container of DPDK. */
+		ret = rte_vfio_container_dma_unmap(RTE_VFIO_DEFAULT_CONTAINER_FD,
+						   region->host_user_addr,
+						   host_iova,
+						   region->size);
+		if (ret) {
+			VHOST_LOG_CONFIG(ERR, "DMA engine unmap failed\n");
+			return ret;
+		}
+		region->dma_map_success = 0;
+	}
+	return ret;
+}
+
 static void
 free_mem_region(struct virtio_net *dev)
 {
@@ -153,6 +195,9 @@  free_mem_region(struct virtio_net *dev)
 	for (i = 0; i < dev->mem->nregions; i++) {
 		reg = &dev->mem->regions[i];
 		if (reg->host_user_addr) {
+			if (dev->async_copy && rte_vfio_is_enabled("vfio"))
+				async_dma_map(reg, false);
+
 			munmap(reg->mmap_addr, reg->mmap_size);
 			close(reg->fd);
 		}
@@ -1157,6 +1202,7 @@  vhost_user_mmap_region(struct virtio_net *dev,
 	uint64_t mmap_size;
 	uint64_t alignment;
 	int populate;
+	int ret;
 
 	/* Check for memory_size + mmap_offset overflow */
 	if (mmap_offset >= -region->size) {
@@ -1210,13 +1256,22 @@  vhost_user_mmap_region(struct virtio_net *dev,
 	region->mmap_size = mmap_size;
 	region->host_user_addr = (uint64_t)(uintptr_t)mmap_addr + mmap_offset;
 
-	if (dev->async_copy)
+	if (dev->async_copy) {
 		if (add_guest_pages(dev, region, alignment) < 0) {
 			VHOST_LOG_CONFIG(ERR,
 					"adding guest pages to region failed.\n");
 			return -1;
 		}
 
+		if (rte_vfio_is_enabled("vfio")) {
+			ret = async_dma_map(region, true);
+			if (ret < 0) {
+				VHOST_LOG_CONFIG(ERR, "Configure IOMMU for DMA engine failed\n");
+				return -1;
+			}
+		}
+	}
+
 	VHOST_LOG_CONFIG(INFO,
 			"guest memory region size: 0x%" PRIx64 "\n"
 			"\t guest physical addr: 0x%" PRIx64 "\n"