[dpdk-dev,v3,6/6] DPDK changes for accommodating ENIC PMD

Message ID 1416758899-1351-7-git-send-email-ssujith@cisco.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Sujith Sankar Nov. 23, 2014, 4:08 p.m. UTC
  Signed-off-by: Sujith Sankar <ssujith@cisco.com>
---
 config/common_linuxapp                             | 5 +++++
 lib/Makefile                                       | 1 +
 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++
 lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +
 mk/rte.app.mk                                      | 4 ++++
 5 files changed, 18 insertions(+)
  

Comments

Neil Horman Nov. 24, 2014, 12:17 a.m. UTC | #1
On Sun, Nov 23, 2014 at 09:38:19PM +0530, Sujith Sankar wrote:
> Signed-off-by: Sujith Sankar <ssujith@cisco.com>
> ---
>  config/common_linuxapp                             | 5 +++++
>  lib/Makefile                                       | 1 +
>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++
>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +
>  mk/rte.app.mk                                      | 4 ++++
>  5 files changed, 18 insertions(+)
> 
> diff --git a/config/common_linuxapp b/config/common_linuxapp
> index 57b61c9..3c091e7 100644
> --- a/config/common_linuxapp
> +++ b/config/common_linuxapp
> @@ -210,6 +210,11 @@ CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
>  CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
>  
>  #
> +# Compile burst-oriented Cisco ENIC PMD driver
> +#
> +CONFIG_RTE_LIBRTE_ENIC_PMD=y
> +
> +#
>  # Compile burst-oriented VIRTIO PMD driver
>  #
>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
> diff --git a/lib/Makefile b/lib/Makefile
> index e3237ff..1911790 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
>  DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
>  DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000
>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
> +DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic
>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index c776ddc..6bf8f2e 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
>  		maps[i].offset = reg.offset;
>  		maps[i].size = reg.size;
>  		dev->mem_resource[i].addr = bar_addr;
> +		dev->mem_resource[i].len = reg.size;
>  	}
>  
>  	/* if secondary process, do not set up interrupts */
> @@ -791,4 +792,10 @@ pci_vfio_is_enabled(void)
>  {
>  	return vfio_cfg.vfio_enabled;
>  }
> +
> +int
> +pci_vfio_container_fd(void)
> +{
> +	return vfio_cfg.vfio_container_fd;
> +}
You should move this function definition to a separate patch and put it earlier
in the series, as you call this function two patches back.

Also, this gives me pause, as it seems like you're working around the VFIO api.
From what I see, you just use this to get an fd that you can use in an ioctl to
set some DMA settings.  First off, theres already a function called
pci_vfio_get_container_fd, which does exactly what you are doing here, with
additional safety checking.  

However, even though there is an existing function to do what you want, I would
recommend that you not use it for your purposes.  Whenever you expose something
like a file descriptor, you run the risk of multiple accessors racing trying to
set/unset features and preform operations.  It would be better if you could add
apropriate api calls to vfio interface to set what you want.  That way the
library can add appropriate locking if/when needed

Neil

>  #endif
> diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> index d758bee..c9e9e40 100644
> --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
> @@ -71,6 +71,7 @@ int pci_uio_map_resource(struct rte_pci_device *dev);
>  
>  int pci_vfio_enable(void);
>  int pci_vfio_is_enabled(void);
> +int pci_vfio_container_fd(void);
>  int pci_vfio_mp_sync_setup(void);
>  
>  /* map VFIO resource prototype */
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 285b65c..95c652f 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -186,6 +186,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_VMXNET3_PMD),y)
>  LDLIBS += -lrte_pmd_vmxnet3_uio
>  endif
>  
> +ifeq ($(CONFIG_RTE_LIBRTE_ENIC_PMD),y)
> +LDLIBS += -lrte_pmd_enic
> +endif
> +
>  ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_PMD),y)
>  LDLIBS += -lrte_pmd_virtio_uio
>  endif
> -- 
> 1.9.1
> 
>
  
Sujith Sankar Nov. 24, 2014, 5:45 a.m. UTC | #2
On 24/11/14 5:47 am, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>On Sun, Nov 23, 2014 at 09:38:19PM +0530, Sujith Sankar wrote:
>> Signed-off-by: Sujith Sankar <ssujith@cisco.com>
>> ---
>>  config/common_linuxapp                             | 5 +++++
>>  lib/Makefile                                       | 1 +
>>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++
>>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +
>>  mk/rte.app.mk                                      | 4 ++++
>>  5 files changed, 18 insertions(+)
>> 
>> diff --git a/config/common_linuxapp b/config/common_linuxapp
>> index 57b61c9..3c091e7 100644
>> --- a/config/common_linuxapp
>> +++ b/config/common_linuxapp
>> @@ -210,6 +210,11 @@ CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
>>  CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
>>  
>>  #
>> +# Compile burst-oriented Cisco ENIC PMD driver
>> +#
>> +CONFIG_RTE_LIBRTE_ENIC_PMD=y
>> +
>> +#
>>  # Compile burst-oriented VIRTIO PMD driver
>>  #
>>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
>> diff --git a/lib/Makefile b/lib/Makefile
>> index e3237ff..1911790 100644
>> --- a/lib/Makefile
>> +++ b/lib/Makefile
>> @@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
>>  DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
>>  DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000
>>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
>> +DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic
>>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
>>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
>>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
>>b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
>> index c776ddc..6bf8f2e 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
>> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
>>  		maps[i].offset = reg.offset;
>>  		maps[i].size = reg.size;
>>  		dev->mem_resource[i].addr = bar_addr;
>> +		dev->mem_resource[i].len = reg.size;
>>  	}
>>  
>>  	/* if secondary process, do not set up interrupts */
>> @@ -791,4 +792,10 @@ pci_vfio_is_enabled(void)
>>  {
>>  	return vfio_cfg.vfio_enabled;
>>  }
>> +
>> +int
>> +pci_vfio_container_fd(void)
>> +{
>> +	return vfio_cfg.vfio_container_fd;
>> +}
>You should move this function definition to a separate patch and put it
>earlier
>in the series, as you call this function two patches back.

Thanks for the comment, Neil.  I shall move this to a separate patch and
put it earlier in the series.

>
>Also, this gives me pause, as it seems like you're working around the
>VFIO api.
>From what I see, you just use this to get an fd that you can use in an
>ioctl to
>set some DMA settings.  First off, theres already a function called
>pci_vfio_get_container_fd, which does exactly what you are doing here,
>with
>additional safety checking.
>
>However, even though there is an existing function to do what you want, I
>would
>recommend that you not use it for your purposes.  Whenever you expose
>something
>like a file descriptor, you run the risk of multiple accessors racing
>trying to
>set/unset features and preform operations.  It would be better if you
>could add
>apropriate api calls to vfio interface to set what you want.  That way the
>library can add appropriate locking if/when needed


I see that vfio_cfg.vfio_container_fd is obtained and stored in
pci_vfio_enable(), and this is not modified later.
ENIC PMD needs it to add the IOMMU mapping for buffers used for
communicating with adapter firmware.  That¹s just adding an entry, and
container fd is just passed as an argument.  So the following addition in
eal_pci_vfio.c should be sufficient.  Since vfio_cfg is per process, I do
not think that any other checking is required.

int
pci_vfio_map_dma(struct vfio_iommu_type1_dma_map *dma_map)
{
	return ioctl(vfio_cfg.vfio_container_fd, VFIO_IOMMU_MAP_DMA, dma_map);
}



Does this look alright?  Do you think that I¹ve missed out anything here?

Thanks,
-Sujith


>
>Neil
>
>>  #endif
>> diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
>>b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
>> index d758bee..c9e9e40 100644
>> --- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
>> +++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
>> @@ -71,6 +71,7 @@ int pci_uio_map_resource(struct rte_pci_device *dev);
>>  
>>  int pci_vfio_enable(void);
>>  int pci_vfio_is_enabled(void);
>> +int pci_vfio_container_fd(void);
>>  int pci_vfio_mp_sync_setup(void);
>>  
>>  /* map VFIO resource prototype */
>> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
>> index 285b65c..95c652f 100644
>> --- a/mk/rte.app.mk
>> +++ b/mk/rte.app.mk
>> @@ -186,6 +186,10 @@ ifeq ($(CONFIG_RTE_LIBRTE_VMXNET3_PMD),y)
>>  LDLIBS += -lrte_pmd_vmxnet3_uio
>>  endif
>>  
>> +ifeq ($(CONFIG_RTE_LIBRTE_ENIC_PMD),y)
>> +LDLIBS += -lrte_pmd_enic
>> +endif
>> +
>>  ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_PMD),y)
>>  LDLIBS += -lrte_pmd_virtio_uio
>>  endif
>> -- 
>> 1.9.1
>> 
>>
  
David Marchand Nov. 24, 2014, 11:03 a.m. UTC | #3
Hello Sujith,

On Sun, Nov 23, 2014 at 5:08 PM, Sujith Sankar <ssujith@cisco.com> wrote:

> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> index c776ddc..6bf8f2e 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
>                 maps[i].offset = reg.offset;
>                 maps[i].size = reg.size;
>                 dev->mem_resource[i].addr = bar_addr;
> +               dev->mem_resource[i].len = reg.size;
>         }
>
>         /* if secondary process, do not set up interrupts */
>

Not sure I understand why you need to overwrite the length value.
This is supposed to be initialised before by "generic" code.
This looks like a hack or a workaround.

Can you elaborate on this change ?
Thanks.
  
Neil Horman Nov. 24, 2014, 11:33 a.m. UTC | #4
On Mon, Nov 24, 2014 at 05:45:54AM +0000, Sujith Sankar (ssujith) wrote:
> 
> 
> On 24/11/14 5:47 am, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> 
> >On Sun, Nov 23, 2014 at 09:38:19PM +0530, Sujith Sankar wrote:
> >> Signed-off-by: Sujith Sankar <ssujith@cisco.com>
> >> ---
> >>  config/common_linuxapp                             | 5 +++++
> >>  lib/Makefile                                       | 1 +
> >>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++
> >>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +
> >>  mk/rte.app.mk                                      | 4 ++++
> >>  5 files changed, 18 insertions(+)
> >> 
> >> diff --git a/config/common_linuxapp b/config/common_linuxapp
> >> index 57b61c9..3c091e7 100644
> >> --- a/config/common_linuxapp
> >> +++ b/config/common_linuxapp
> >> @@ -210,6 +210,11 @@ CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> >>  CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
> >>  
> >>  #
> >> +# Compile burst-oriented Cisco ENIC PMD driver
> >> +#
> >> +CONFIG_RTE_LIBRTE_ENIC_PMD=y
> >> +
> >> +#
> >>  # Compile burst-oriented VIRTIO PMD driver
> >>  #
> >>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
> >> diff --git a/lib/Makefile b/lib/Makefile
> >> index e3237ff..1911790 100644
> >> --- a/lib/Makefile
> >> +++ b/lib/Makefile
> >> @@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
> >>  DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
> >>  DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000
> >>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
> >> +DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic
> >>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >>b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> index c776ddc..6bf8f2e 100644
> >> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
> >>  		maps[i].offset = reg.offset;
> >>  		maps[i].size = reg.size;
> >>  		dev->mem_resource[i].addr = bar_addr;
> >> +		dev->mem_resource[i].len = reg.size;
> >>  	}
> >>  
> >>  	/* if secondary process, do not set up interrupts */
> >> @@ -791,4 +792,10 @@ pci_vfio_is_enabled(void)
> >>  {
> >>  	return vfio_cfg.vfio_enabled;
> >>  }
> >> +
> >> +int
> >> +pci_vfio_container_fd(void)
> >> +{
> >> +	return vfio_cfg.vfio_container_fd;
> >> +}
> >You should move this function definition to a separate patch and put it
> >earlier
> >in the series, as you call this function two patches back.
> 
> Thanks for the comment, Neil.  I shall move this to a separate patch and
> put it earlier in the series.
> 
> >
> >Also, this gives me pause, as it seems like you're working around the
> >VFIO api.
> >From what I see, you just use this to get an fd that you can use in an
> >ioctl to
> >set some DMA settings.  First off, theres already a function called
> >pci_vfio_get_container_fd, which does exactly what you are doing here,
> >with
> >additional safety checking.
> >
> >However, even though there is an existing function to do what you want, I
> >would
> >recommend that you not use it for your purposes.  Whenever you expose
> >something
> >like a file descriptor, you run the risk of multiple accessors racing
> >trying to
> >set/unset features and preform operations.  It would be better if you
> >could add
> >apropriate api calls to vfio interface to set what you want.  That way the
> >library can add appropriate locking if/when needed
> 
> 
> I see that vfio_cfg.vfio_container_fd is obtained and stored in
> pci_vfio_enable(), and this is not modified later.
> ENIC PMD needs it to add the IOMMU mapping for buffers used for
> communicating with adapter firmware.  That¹s just adding an entry, and
> container fd is just passed as an argument.  So the following addition in
> eal_pci_vfio.c should be sufficient.  Since vfio_cfg is per process, I do
> not think that any other checking is required.
> 
> int
> pci_vfio_map_dma(struct vfio_iommu_type1_dma_map *dma_map)
> {
> 	return ioctl(vfio_cfg.vfio_container_fd, VFIO_IOMMU_MAP_DMA, dma_map);
> }
> 
> 
> 
> Does this look alright?  Do you think that I¹ve missed out anything here?
> 
It looks better yes, but I'm still confused as to why its necessecary.  Looking
back at your use of the fd, you're adding an iov entry for a buffer allocated
via rte_memzone_reserve, which should come out of the dpdk's configured memory.
In pci_vfio_setup_dma_maps, which is part of the pci probe path in eal library,
all of the DPDK's physically available memory is already mapped into the iommu
in a 1:1 fashion.  So why do you need to do this again?

Neil
  
Sujith Sankar Nov. 24, 2014, 4:12 p.m. UTC | #5
On 24/11/14 5:03 pm, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>On Mon, Nov 24, 2014 at 05:45:54AM +0000, Sujith Sankar (ssujith) wrote:

>> 

>> 

>> On 24/11/14 5:47 am, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>> 

>> >On Sun, Nov 23, 2014 at 09:38:19PM +0530, Sujith Sankar wrote:

>> >> Signed-off-by: Sujith Sankar <ssujith@cisco.com>

>> >> ---

>> >>  config/common_linuxapp                             | 5 +++++

>> >>  lib/Makefile                                       | 1 +

>> >>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++

>> >>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +

>> >>  mk/rte.app.mk                                      | 4 ++++

>> >>  5 files changed, 18 insertions(+)

>> >> 

>> >> diff --git a/config/common_linuxapp b/config/common_linuxapp

>> >> index 57b61c9..3c091e7 100644

>> >> --- a/config/common_linuxapp

>> >> +++ b/config/common_linuxapp

>> >> @@ -210,6 +210,11 @@ CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4

>> >>  CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1

>> >>  

>> >>  #

>> >> +# Compile burst-oriented Cisco ENIC PMD driver

>> >> +#

>> >> +CONFIG_RTE_LIBRTE_ENIC_PMD=y

>> >> +

>> >> +#

>> >>  # Compile burst-oriented VIRTIO PMD driver

>> >>  #

>> >>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y

>> >> diff --git a/lib/Makefile b/lib/Makefile

>> >> index e3237ff..1911790 100644

>> >> --- a/lib/Makefile

>> >> +++ b/lib/Makefile

>> >> @@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline

>> >>  DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether

>> >>  DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000

>> >>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe

>> >> +DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic

>> >>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e

>> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond

>> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring

>> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >>b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >> index c776ddc..6bf8f2e 100644

>> >> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev)

>> >>  		maps[i].offset = reg.offset;

>> >>  		maps[i].size = reg.size;

>> >>  		dev->mem_resource[i].addr = bar_addr;

>> >> +		dev->mem_resource[i].len = reg.size;

>> >>  	}

>> >>  

>> >>  	/* if secondary process, do not set up interrupts */

>> >> @@ -791,4 +792,10 @@ pci_vfio_is_enabled(void)

>> >>  {

>> >>  	return vfio_cfg.vfio_enabled;

>> >>  }

>> >> +

>> >> +int

>> >> +pci_vfio_container_fd(void)

>> >> +{

>> >> +	return vfio_cfg.vfio_container_fd;

>> >> +}

>> >You should move this function definition to a separate patch and put it

>> >earlier

>> >in the series, as you call this function two patches back.

>> 

>> Thanks for the comment, Neil.  I shall move this to a separate patch and

>> put it earlier in the series.

>> 

>> >

>> >Also, this gives me pause, as it seems like you're working around the

>> >VFIO api.

>> >From what I see, you just use this to get an fd that you can use in an

>> >ioctl to

>> >set some DMA settings.  First off, theres already a function called

>> >pci_vfio_get_container_fd, which does exactly what you are doing here,

>> >with

>> >additional safety checking.

>> >

>> >However, even though there is an existing function to do what you

>>want, I

>> >would

>> >recommend that you not use it for your purposes.  Whenever you expose

>> >something

>> >like a file descriptor, you run the risk of multiple accessors racing

>> >trying to

>> >set/unset features and preform operations.  It would be better if you

>> >could add

>> >apropriate api calls to vfio interface to set what you want.  That way

>>the

>> >library can add appropriate locking if/when needed

>> 

>> 

>> I see that vfio_cfg.vfio_container_fd is obtained and stored in

>> pci_vfio_enable(), and this is not modified later.

>> ENIC PMD needs it to add the IOMMU mapping for buffers used for

>> communicating with adapter firmware.  That¹s just adding an entry, and

>> container fd is just passed as an argument.  So the following addition

>>in

>> eal_pci_vfio.c should be sufficient.  Since vfio_cfg is per process, I

>>do

>> not think that any other checking is required.

>> 

>> int

>> pci_vfio_map_dma(struct vfio_iommu_type1_dma_map *dma_map)

>> {

>> 	return ioctl(vfio_cfg.vfio_container_fd, VFIO_IOMMU_MAP_DMA, dma_map);

>> }

>> 

>> 

>> 

>> Does this look alright?  Do you think that I¹ve missed out anything

>>here?

>> 

>It looks better yes, but I'm still confused as to why its necessecary.

>Looking

>back at your use of the fd, you're adding an iov entry for a buffer

>allocated

>via rte_memzone_reserve, which should come out of the dpdk's configured

>memory.

>In pci_vfio_setup_dma_maps, which is part of the pci probe path in eal

>library,

>all of the DPDK's physically available memory is already mapped into the

>iommu

>in a 1:1 fashion.  So why do you need to do this again?


Yes, ideally all the memory allocated using rte_memzone_reserve_aligned()
should have its mapping already there in IOMMU.  The code that adds the
mapping was not there initially, but while testing, I saw that it was
giving DMAR errors (like what’s shown below).  Putting it back solved the
problem.


[295042.852620] DMAR:[fault reason 05] PTE Write access is not set
[295044.406282] dmar: DRHD: handling fault status reg 602
[295044.467981] dmar: DMAR:[DMA Write] Request device [86:00.0] fault addr
7fbd73b32000


Regards,
-Sujith

>

>Neil

>
  
Neil Horman Nov. 24, 2014, 5:19 p.m. UTC | #6
On Mon, Nov 24, 2014 at 04:12:48PM +0000, Sujith Sankar (ssujith) wrote:
> 
> 
> On 24/11/14 5:03 pm, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> 
> >On Mon, Nov 24, 2014 at 05:45:54AM +0000, Sujith Sankar (ssujith) wrote:
> >> 
> >> 
> >> On 24/11/14 5:47 am, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> >> 
> >> >On Sun, Nov 23, 2014 at 09:38:19PM +0530, Sujith Sankar wrote:
> >> >> Signed-off-by: Sujith Sankar <ssujith@cisco.com>
> >> >> ---
> >> >>  config/common_linuxapp                             | 5 +++++
> >> >>  lib/Makefile                                       | 1 +
> >> >>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++
> >> >>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +
> >> >>  mk/rte.app.mk                                      | 4 ++++
> >> >>  5 files changed, 18 insertions(+)
> >> >> 
> >> >> diff --git a/config/common_linuxapp b/config/common_linuxapp
> >> >> index 57b61c9..3c091e7 100644
> >> >> --- a/config/common_linuxapp
> >> >> +++ b/config/common_linuxapp
> >> >> @@ -210,6 +210,11 @@ CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> >> >>  CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
> >> >>  
> >> >>  #
> >> >> +# Compile burst-oriented Cisco ENIC PMD driver
> >> >> +#
> >> >> +CONFIG_RTE_LIBRTE_ENIC_PMD=y
> >> >> +
> >> >> +#
> >> >>  # Compile burst-oriented VIRTIO PMD driver
> >> >>  #
> >> >>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
> >> >> diff --git a/lib/Makefile b/lib/Makefile
> >> >> index e3237ff..1911790 100644
> >> >> --- a/lib/Makefile
> >> >> +++ b/lib/Makefile
> >> >> @@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000
> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
> >> >> +DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic
> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
> >> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >>b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >> index c776ddc..6bf8f2e 100644
> >> >> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device *dev)
> >> >>  		maps[i].offset = reg.offset;
> >> >>  		maps[i].size = reg.size;
> >> >>  		dev->mem_resource[i].addr = bar_addr;
> >> >> +		dev->mem_resource[i].len = reg.size;
> >> >>  	}
> >> >>  
> >> >>  	/* if secondary process, do not set up interrupts */
> >> >> @@ -791,4 +792,10 @@ pci_vfio_is_enabled(void)
> >> >>  {
> >> >>  	return vfio_cfg.vfio_enabled;
> >> >>  }
> >> >> +
> >> >> +int
> >> >> +pci_vfio_container_fd(void)
> >> >> +{
> >> >> +	return vfio_cfg.vfio_container_fd;
> >> >> +}
> >> >You should move this function definition to a separate patch and put it
> >> >earlier
> >> >in the series, as you call this function two patches back.
> >> 
> >> Thanks for the comment, Neil.  I shall move this to a separate patch and
> >> put it earlier in the series.
> >> 
> >> >
> >> >Also, this gives me pause, as it seems like you're working around the
> >> >VFIO api.
> >> >From what I see, you just use this to get an fd that you can use in an
> >> >ioctl to
> >> >set some DMA settings.  First off, theres already a function called
> >> >pci_vfio_get_container_fd, which does exactly what you are doing here,
> >> >with
> >> >additional safety checking.
> >> >
> >> >However, even though there is an existing function to do what you
> >>want, I
> >> >would
> >> >recommend that you not use it for your purposes.  Whenever you expose
> >> >something
> >> >like a file descriptor, you run the risk of multiple accessors racing
> >> >trying to
> >> >set/unset features and preform operations.  It would be better if you
> >> >could add
> >> >apropriate api calls to vfio interface to set what you want.  That way
> >>the
> >> >library can add appropriate locking if/when needed
> >> 
> >> 
> >> I see that vfio_cfg.vfio_container_fd is obtained and stored in
> >> pci_vfio_enable(), and this is not modified later.
> >> ENIC PMD needs it to add the IOMMU mapping for buffers used for
> >> communicating with adapter firmware.  That¹s just adding an entry, and
> >> container fd is just passed as an argument.  So the following addition
> >>in
> >> eal_pci_vfio.c should be sufficient.  Since vfio_cfg is per process, I
> >>do
> >> not think that any other checking is required.
> >> 
> >> int
> >> pci_vfio_map_dma(struct vfio_iommu_type1_dma_map *dma_map)
> >> {
> >> 	return ioctl(vfio_cfg.vfio_container_fd, VFIO_IOMMU_MAP_DMA, dma_map);
> >> }
> >> 
> >> 
> >> 
> >> Does this look alright?  Do you think that I¹ve missed out anything
> >>here?
> >> 
> >It looks better yes, but I'm still confused as to why its necessecary.
> >Looking
> >back at your use of the fd, you're adding an iov entry for a buffer
> >allocated
> >via rte_memzone_reserve, which should come out of the dpdk's configured
> >memory.
> >In pci_vfio_setup_dma_maps, which is part of the pci probe path in eal
> >library,
> >all of the DPDK's physically available memory is already mapped into the
> >iommu
> >in a 1:1 fashion.  So why do you need to do this again?
> 
> Yes, ideally all the memory allocated using rte_memzone_reserve_aligned()
> should have its mapping already there in IOMMU.  The code that adds the
> mapping was not there initially, but while testing, I saw that it was
> giving DMAR errors (like what’s shown below).  Putting it back solved the
> problem.
> 
> 
> [295042.852620] DMAR:[fault reason 05] PTE Write access is not set
> [295044.406282] dmar: DRHD: handling fault status reg 602
> [295044.467981] dmar: DMAR:[DMA Write] Request device [86:00.0] fault addr
> 7fbd73b32000
> 
Ok, so what I hear you saying is that, now that that mapping code is available
in the pci probe path, you can remove your workaround correct?

Neil

> 
> Regards,
> -Sujith
> 
> >
> >Neil
> >
>
  
Sujith Sankar Nov. 25, 2014, 6:13 a.m. UTC | #7
On 24/11/14 10:49 pm, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>On Mon, Nov 24, 2014 at 04:12:48PM +0000, Sujith Sankar (ssujith) wrote:

>> 

>> 

>> On 24/11/14 5:03 pm, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>> 

>> >On Mon, Nov 24, 2014 at 05:45:54AM +0000, Sujith Sankar (ssujith)

>>wrote:

>> >> 

>> >> 

>> >> On 24/11/14 5:47 am, "Neil Horman" <nhorman@tuxdriver.com> wrote:

>> >> 

>> >> >On Sun, Nov 23, 2014 at 09:38:19PM +0530, Sujith Sankar wrote:

>> >> >> Signed-off-by: Sujith Sankar <ssujith@cisco.com>

>> >> >> ---

>> >> >>  config/common_linuxapp                             | 5 +++++

>> >> >>  lib/Makefile                                       | 1 +

>> >> >>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++

>> >> >>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +

>> >> >>  mk/rte.app.mk                                      | 4 ++++

>> >> >>  5 files changed, 18 insertions(+)

>> >> >> 

>> >> >> diff --git a/config/common_linuxapp b/config/common_linuxapp

>> >> >> index 57b61c9..3c091e7 100644

>> >> >> --- a/config/common_linuxapp

>> >> >> +++ b/config/common_linuxapp

>> >> >> @@ -210,6 +210,11 @@ CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4

>> >> >>  CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1

>> >> >>  

>> >> >>  #

>> >> >> +# Compile burst-oriented Cisco ENIC PMD driver

>> >> >> +#

>> >> >> +CONFIG_RTE_LIBRTE_ENIC_PMD=y

>> >> >> +

>> >> >> +#

>> >> >>  # Compile burst-oriented VIRTIO PMD driver

>> >> >>  #

>> >> >>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y

>> >> >> diff --git a/lib/Makefile b/lib/Makefile

>> >> >> index e3237ff..1911790 100644

>> >> >> --- a/lib/Makefile

>> >> >> +++ b/lib/Makefile

>> >> >> @@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) +=

>>librte_cmdline

>> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether

>> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000

>> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe

>> >> >> +DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic

>> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e

>> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond

>> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring

>> >> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >> >>b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >> >> index c776ddc..6bf8f2e 100644

>> >> >> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >> >> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c

>> >> >> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device

>>*dev)

>> >> >>  		maps[i].offset = reg.offset;

>> >> >>  		maps[i].size = reg.size;

>> >> >>  		dev->mem_resource[i].addr = bar_addr;

>> >> >> +		dev->mem_resource[i].len = reg.size;

>> >> >>  	}

>> >> >>  

>> >> >>  	/* if secondary process, do not set up interrupts */

>> >> >> @@ -791,4 +792,10 @@ pci_vfio_is_enabled(void)

>> >> >>  {

>> >> >>  	return vfio_cfg.vfio_enabled;

>> >> >>  }

>> >> >> +

>> >> >> +int

>> >> >> +pci_vfio_container_fd(void)

>> >> >> +{

>> >> >> +	return vfio_cfg.vfio_container_fd;

>> >> >> +}

>> >> >You should move this function definition to a separate patch and

>>put it

>> >> >earlier

>> >> >in the series, as you call this function two patches back.

>> >> 

>> >> Thanks for the comment, Neil.  I shall move this to a separate patch

>>and

>> >> put it earlier in the series.

>> >> 

>> >> >

>> >> >Also, this gives me pause, as it seems like you're working around

>>the

>> >> >VFIO api.

>> >> >From what I see, you just use this to get an fd that you can use in

>>an

>> >> >ioctl to

>> >> >set some DMA settings.  First off, theres already a function called

>> >> >pci_vfio_get_container_fd, which does exactly what you are doing

>>here,

>> >> >with

>> >> >additional safety checking.

>> >> >

>> >> >However, even though there is an existing function to do what you

>> >>want, I

>> >> >would

>> >> >recommend that you not use it for your purposes.  Whenever you

>>expose

>> >> >something

>> >> >like a file descriptor, you run the risk of multiple accessors

>>racing

>> >> >trying to

>> >> >set/unset features and preform operations.  It would be better if

>>you

>> >> >could add

>> >> >apropriate api calls to vfio interface to set what you want.  That

>>way

>> >>the

>> >> >library can add appropriate locking if/when needed

>> >> 

>> >> 

>> >> I see that vfio_cfg.vfio_container_fd is obtained and stored in

>> >> pci_vfio_enable(), and this is not modified later.

>> >> ENIC PMD needs it to add the IOMMU mapping for buffers used for

>> >> communicating with adapter firmware.  That¹s just adding an entry,

>>and

>> >> container fd is just passed as an argument.  So the following

>>addition

>> >>in

>> >> eal_pci_vfio.c should be sufficient.  Since vfio_cfg is per process,

>>I

>> >>do

>> >> not think that any other checking is required.

>> >> 

>> >> int

>> >> pci_vfio_map_dma(struct vfio_iommu_type1_dma_map *dma_map)

>> >> {

>> >> 	return ioctl(vfio_cfg.vfio_container_fd, VFIO_IOMMU_MAP_DMA,

>>dma_map);

>> >> }

>> >> 

>> >> 

>> >> 

>> >> Does this look alright?  Do you think that I¹ve missed out anything

>> >>here?

>> >> 

>> >It looks better yes, but I'm still confused as to why its necessecary.

>> >Looking

>> >back at your use of the fd, you're adding an iov entry for a buffer

>> >allocated

>> >via rte_memzone_reserve, which should come out of the dpdk's configured

>> >memory.

>> >In pci_vfio_setup_dma_maps, which is part of the pci probe path in eal

>> >library,

>> >all of the DPDK's physically available memory is already mapped into

>>the

>> >iommu

>> >in a 1:1 fashion.  So why do you need to do this again?

>> 

>> Yes, ideally all the memory allocated using

>>rte_memzone_reserve_aligned()

>> should have its mapping already there in IOMMU.  The code that adds the

>> mapping was not there initially, but while testing, I saw that it was

>> giving DMAR errors (like what’s shown below).  Putting it back solved

>>the

>> problem.

>> 

>> 

>> [295042.852620] DMAR:[fault reason 05] PTE Write access is not set

>> [295044.406282] dmar: DRHD: handling fault status reg 602

>> [295044.467981] dmar: DMAR:[DMA Write] Request device [86:00.0] fault

>>addr

>> 7fbd73b32000

>> 

>Ok, so what I hear you saying is that, now that that mapping code is

>available

>in the pci probe path, you can remove your workaround correct?


Comparing the two mapping functions, I could find out a difference.  ENIC
PMD is using virtual address of the buffers to fill dma_map->iova, while
the lib is filling in the phys_addr.  Both the cases work.
I’ve modified ENIC PMD to use phys_addr and the initial tests went through
fine.  Let me spend some more time testing it.  After that I shall get
back with V4.

Thank you Neil !

-Sujith

>

>Neil

>

>> 

>> Regards,

>> -Sujith

>> 

>> >

>> >Neil

>> >

>>
  
Neil Horman Nov. 25, 2014, 11:30 a.m. UTC | #8
On Tue, Nov 25, 2014 at 06:13:26AM +0000, Sujith Sankar (ssujith) wrote:
> 
> 
> On 24/11/14 10:49 pm, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> 
> >On Mon, Nov 24, 2014 at 04:12:48PM +0000, Sujith Sankar (ssujith) wrote:
> >> 
> >> 
> >> On 24/11/14 5:03 pm, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> >> 
> >> >On Mon, Nov 24, 2014 at 05:45:54AM +0000, Sujith Sankar (ssujith)
> >>wrote:
> >> >> 
> >> >> 
> >> >> On 24/11/14 5:47 am, "Neil Horman" <nhorman@tuxdriver.com> wrote:
> >> >> 
> >> >> >On Sun, Nov 23, 2014 at 09:38:19PM +0530, Sujith Sankar wrote:
> >> >> >> Signed-off-by: Sujith Sankar <ssujith@cisco.com>
> >> >> >> ---
> >> >> >>  config/common_linuxapp                             | 5 +++++
> >> >> >>  lib/Makefile                                       | 1 +
> >> >> >>  lib/librte_eal/linuxapp/eal/eal_pci_vfio.c         | 7 +++++++
> >> >> >>  lib/librte_eal/linuxapp/eal/include/eal_pci_init.h | 1 +
> >> >> >>  mk/rte.app.mk                                      | 4 ++++
> >> >> >>  5 files changed, 18 insertions(+)
> >> >> >> 
> >> >> >> diff --git a/config/common_linuxapp b/config/common_linuxapp
> >> >> >> index 57b61c9..3c091e7 100644
> >> >> >> --- a/config/common_linuxapp
> >> >> >> +++ b/config/common_linuxapp
> >> >> >> @@ -210,6 +210,11 @@ CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
> >> >> >>  CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
> >> >> >>  
> >> >> >>  #
> >> >> >> +# Compile burst-oriented Cisco ENIC PMD driver
> >> >> >> +#
> >> >> >> +CONFIG_RTE_LIBRTE_ENIC_PMD=y
> >> >> >> +
> >> >> >> +#
> >> >> >>  # Compile burst-oriented VIRTIO PMD driver
> >> >> >>  #
> >> >> >>  CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
> >> >> >> diff --git a/lib/Makefile b/lib/Makefile
> >> >> >> index e3237ff..1911790 100644
> >> >> >> --- a/lib/Makefile
> >> >> >> +++ b/lib/Makefile
> >> >> >> @@ -43,6 +43,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) +=
> >>librte_cmdline
> >> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
> >> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000
> >> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
> >> >> >> +DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic
> >> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
> >> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
> >> >> >>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
> >> >> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >> >>b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >> >> index c776ddc..6bf8f2e 100644
> >> >> >> --- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >> >> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
> >> >> >> @@ -736,6 +736,7 @@ pci_vfio_map_resource(struct rte_pci_device
> >>*dev)
> >> >> >>  		maps[i].offset = reg.offset;
> >> >> >>  		maps[i].size = reg.size;
> >> >> >>  		dev->mem_resource[i].addr = bar_addr;
> >> >> >> +		dev->mem_resource[i].len = reg.size;
> >> >> >>  	}
> >> >> >>  
> >> >> >>  	/* if secondary process, do not set up interrupts */
> >> >> >> @@ -791,4 +792,10 @@ pci_vfio_is_enabled(void)
> >> >> >>  {
> >> >> >>  	return vfio_cfg.vfio_enabled;
> >> >> >>  }
> >> >> >> +
> >> >> >> +int
> >> >> >> +pci_vfio_container_fd(void)
> >> >> >> +{
> >> >> >> +	return vfio_cfg.vfio_container_fd;
> >> >> >> +}
> >> >> >You should move this function definition to a separate patch and
> >>put it
> >> >> >earlier
> >> >> >in the series, as you call this function two patches back.
> >> >> 
> >> >> Thanks for the comment, Neil.  I shall move this to a separate patch
> >>and
> >> >> put it earlier in the series.
> >> >> 
> >> >> >
> >> >> >Also, this gives me pause, as it seems like you're working around
> >>the
> >> >> >VFIO api.
> >> >> >From what I see, you just use this to get an fd that you can use in
> >>an
> >> >> >ioctl to
> >> >> >set some DMA settings.  First off, theres already a function called
> >> >> >pci_vfio_get_container_fd, which does exactly what you are doing
> >>here,
> >> >> >with
> >> >> >additional safety checking.
> >> >> >
> >> >> >However, even though there is an existing function to do what you
> >> >>want, I
> >> >> >would
> >> >> >recommend that you not use it for your purposes.  Whenever you
> >>expose
> >> >> >something
> >> >> >like a file descriptor, you run the risk of multiple accessors
> >>racing
> >> >> >trying to
> >> >> >set/unset features and preform operations.  It would be better if
> >>you
> >> >> >could add
> >> >> >apropriate api calls to vfio interface to set what you want.  That
> >>way
> >> >>the
> >> >> >library can add appropriate locking if/when needed
> >> >> 
> >> >> 
> >> >> I see that vfio_cfg.vfio_container_fd is obtained and stored in
> >> >> pci_vfio_enable(), and this is not modified later.
> >> >> ENIC PMD needs it to add the IOMMU mapping for buffers used for
> >> >> communicating with adapter firmware.  That¹s just adding an entry,
> >>and
> >> >> container fd is just passed as an argument.  So the following
> >>addition
> >> >>in
> >> >> eal_pci_vfio.c should be sufficient.  Since vfio_cfg is per process,
> >>I
> >> >>do
> >> >> not think that any other checking is required.
> >> >> 
> >> >> int
> >> >> pci_vfio_map_dma(struct vfio_iommu_type1_dma_map *dma_map)
> >> >> {
> >> >> 	return ioctl(vfio_cfg.vfio_container_fd, VFIO_IOMMU_MAP_DMA,
> >>dma_map);
> >> >> }
> >> >> 
> >> >> 
> >> >> 
> >> >> Does this look alright?  Do you think that I¹ve missed out anything
> >> >>here?
> >> >> 
> >> >It looks better yes, but I'm still confused as to why its necessecary.
> >> >Looking
> >> >back at your use of the fd, you're adding an iov entry for a buffer
> >> >allocated
> >> >via rte_memzone_reserve, which should come out of the dpdk's configured
> >> >memory.
> >> >In pci_vfio_setup_dma_maps, which is part of the pci probe path in eal
> >> >library,
> >> >all of the DPDK's physically available memory is already mapped into
> >>the
> >> >iommu
> >> >in a 1:1 fashion.  So why do you need to do this again?
> >> 
> >> Yes, ideally all the memory allocated using
> >>rte_memzone_reserve_aligned()
> >> should have its mapping already there in IOMMU.  The code that adds the
> >> mapping was not there initially, but while testing, I saw that it was
> >> giving DMAR errors (like what’s shown below).  Putting it back solved
> >>the
> >> problem.
> >> 
> >> 
> >> [295042.852620] DMAR:[fault reason 05] PTE Write access is not set
> >> [295044.406282] dmar: DRHD: handling fault status reg 602
> >> [295044.467981] dmar: DMAR:[DMA Write] Request device [86:00.0] fault
> >>addr
> >> 7fbd73b32000
> >> 
> >Ok, so what I hear you saying is that, now that that mapping code is
> >available
> >in the pci probe path, you can remove your workaround correct?
> 
> Comparing the two mapping functions, I could find out a difference.  ENIC
> PMD is using virtual address of the buffers to fill dma_map->iova, while
> the lib is filling in the phys_addr.  Both the cases work.
> I’ve modified ENIC PMD to use phys_addr and the initial tests went through
> fine.  Let me spend some more time testing it.  After that I shall get
> back with V4.
> 
> Thank you Neil !
> 
Thank you!
neil

>
  

Patch

diff --git a/config/common_linuxapp b/config/common_linuxapp
index 57b61c9..3c091e7 100644
--- a/config/common_linuxapp
+++ b/config/common_linuxapp
@@ -210,6 +210,11 @@  CONFIG_RTE_LIBRTE_I40E_QUEUE_NUM_PER_VM=4
 CONFIG_RTE_LIBRTE_I40E_ITR_INTERVAL=-1
 
 #
+# Compile burst-oriented Cisco ENIC PMD driver
+#
+CONFIG_RTE_LIBRTE_ENIC_PMD=y
+
+#
 # Compile burst-oriented VIRTIO PMD driver
 #
 CONFIG_RTE_LIBRTE_VIRTIO_PMD=y
diff --git a/lib/Makefile b/lib/Makefile
index e3237ff..1911790 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -43,6 +43,7 @@  DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline
 DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether
 DIRS-$(CONFIG_RTE_LIBRTE_E1000_PMD) += librte_pmd_e1000
 DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_ixgbe
+DIRS-$(CONFIG_RTE_LIBRTE_IXGBE_PMD) += librte_pmd_enic
 DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
index c776ddc..6bf8f2e 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c
@@ -736,6 +736,7 @@  pci_vfio_map_resource(struct rte_pci_device *dev)
 		maps[i].offset = reg.offset;
 		maps[i].size = reg.size;
 		dev->mem_resource[i].addr = bar_addr;
+		dev->mem_resource[i].len = reg.size;
 	}
 
 	/* if secondary process, do not set up interrupts */
@@ -791,4 +792,10 @@  pci_vfio_is_enabled(void)
 {
 	return vfio_cfg.vfio_enabled;
 }
+
+int
+pci_vfio_container_fd(void)
+{
+	return vfio_cfg.vfio_container_fd;
+}
 #endif
diff --git a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
index d758bee..c9e9e40 100644
--- a/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
+++ b/lib/librte_eal/linuxapp/eal/include/eal_pci_init.h
@@ -71,6 +71,7 @@  int pci_uio_map_resource(struct rte_pci_device *dev);
 
 int pci_vfio_enable(void);
 int pci_vfio_is_enabled(void);
+int pci_vfio_container_fd(void);
 int pci_vfio_mp_sync_setup(void);
 
 /* map VFIO resource prototype */
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 285b65c..95c652f 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -186,6 +186,10 @@  ifeq ($(CONFIG_RTE_LIBRTE_VMXNET3_PMD),y)
 LDLIBS += -lrte_pmd_vmxnet3_uio
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_ENIC_PMD),y)
+LDLIBS += -lrte_pmd_enic
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_VIRTIO_PMD),y)
 LDLIBS += -lrte_pmd_virtio_uio
 endif