[dpdk-dev,v7,04/14] eal/pci: Consolidate pci address comparison APIs

Message ID 1423470639-15744-5-git-send-email-mukawa@igel.co.jp (mailing list archive)
State Superseded, archived
Headers

Commit Message

Tetsuya Mukawa Feb. 9, 2015, 8:30 a.m. UTC
  This patch replaces pci_addr_comparison() and memcmp() of pci addresses by
eal_compare_pci_addr().

v5:
- Fix pci_scan_one to handle pt_driver correctly.
v4:
- Fix calculation method of eal_compare_pci_addr().
- Add parameter checking.

Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
---
 lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
 lib/librte_eal/common/eal_common_pci.c    |  2 +-
 lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
 lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
 lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
 5 files changed, 54 insertions(+), 34 deletions(-)
  

Comments

Michael Qiu Feb. 9, 2015, 1:10 p.m. UTC | #1
On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
> This patch replaces pci_addr_comparison() and memcmp() of pci addresses by
> eal_compare_pci_addr().
>
> v5:
> - Fix pci_scan_one to handle pt_driver correctly.
> v4:
> - Fix calculation method of eal_compare_pci_addr().
> - Add parameter checking.
>
> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> ---
>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>  5 files changed, 54 insertions(+), 34 deletions(-)
>
> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
> index 74ecce7..c844d58 100644
> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>  	return (0);
>  }
>  
> -/* Compare two PCI device addresses. */
> -static int
> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
> -{
> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr->function;
> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) + addr2->function;
> -
> -	if (dev_addr > dev_addr2)
> -		return 1;
> -	else
> -		return 0;
> -}
> -
> -
>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>  static int
>  pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
> @@ -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>  	}
>  	else {
>  		struct rte_pci_device *dev2 = NULL;
> +		int ret;
>  
>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
> +			if (ret > 0)
>  				continue;
> -			else {
> +			else if (ret < 0) {
>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>  				return 0;
> +			} else { /* already registered */
> +				/* update pt_driver */
> +				dev2->pt_driver = dev->pt_driver;
> +				free(dev);
> +				return 0;
>  			}
>  		}
>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
> diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
> index f3c7f71..a89f5c3 100644
> --- a/lib/librte_eal/common/eal_common_pci.c
> +++ b/lib/librte_eal/common/eal_common_pci.c
> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>  			continue;
> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>  			return devargs;
>  	}
>  	return NULL;
> diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
> index 7f2d699..4814cd7 100644
> --- a/lib/librte_eal/common/include/rte_pci.h
> +++ b/lib/librte_eal/common/include/rte_pci.h
> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
>  }
>  #undef GET_PCIADDR_FIELD
>  
> +/* Compare two PCI device addresses. */
> +/**
> + * Utility function to compare two PCI device addresses.
> + *
> + * @param addr
> + *	The PCI Bus-Device-Function address to compare
> + * @param addr2
> + *	The PCI Bus-Device-Function address to compare
> + * @return
> + *	0 on equal PCI address.
> + *	Positive on addr is greater than addr2.
> + *	Negative on addr is less than addr2, or error.
> + */
> +static inline int
> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
> +{
> +	uint64_t dev_addr, dev_addr2;
> +
> +	if ((addr == NULL) || (addr2 == NULL))
> +		return -1;
> +
> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
> +				(addr->devid << 8) | addr->function;
> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
> +				(addr2->devid << 8) | addr2->function;
> +
> +	if (dev_addr > dev_addr2)
> +		return 1;
> +	else if (dev_addr < dev_addr2)
> +		return -1;
> +	else
> +		return 0;
> +}
> +
>  /**
>   * Probe the PCI bus for registered drivers.
>   *
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
> index c0ca5a5..d847102 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> @@ -229,20 +229,6 @@ error:
>  	return -1;
>  }
>  
> -/* Compare two PCI device addresses. */
> -static int
> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
> -{
> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr->function;
> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) + addr2->function;
> -
> -	if (dev_addr > dev_addr2)
> -		return 1;
> -	else
> -		return 0;
> -}
> -
> -
>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>  static int
>  pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
> @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
>  	}
>  	else {
>  		struct rte_pci_device *dev2 = NULL;
> +		int ret;
>  
>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
> +			if (ret > 0)
>  				continue;
> -			else {
> +			else if (ret < 0) {
>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>  				return 0;
> +			} else { /* already registered */
> +				/* update pt_driver */
> +				dev2->pt_driver = dev->pt_driver;
> +				free(dev);
> +				return 0;
>  			}
>  		}
>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> index e53f06b..1da3507 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>  
>  		/* skip this element if it doesn't match our PCI address */
> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>  			continue;
>  
>  		for (i = 0; i != uio_res->nb_maps; i++) {
Acked-by: Michael Qiu <michael.qiu@intel.com>
  
Iremonger, Bernard Feb. 10, 2015, 3:08 p.m. UTC | #2
> -----Original Message-----
> From: Qiu, Michael
> Sent: Monday, February 9, 2015 1:10 PM
> To: Tetsuya Mukawa; dev@dpdk.org
> Cc: Iremonger, Bernard
> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
> 
> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
> > This patch replaces pci_addr_comparison() and memcmp() of pci
> > addresses by eal_compare_pci_addr().
> >
> > v5:
> > - Fix pci_scan_one to handle pt_driver correctly.
> > v4:
> > - Fix calculation method of eal_compare_pci_addr().
> > - Add parameter checking.
> >
> > Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
> > ---
> >  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
> >  lib/librte_eal/common/eal_common_pci.c    |  2 +-
> >  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
> >  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
> >  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
> >  5 files changed, 54 insertions(+), 34 deletions(-)
> >
> > diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
> > b/lib/librte_eal/bsdapp/eal/eal_pci.c
> > index 74ecce7..c844d58 100644
> > --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
> > +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
> > @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
> >  	return (0);
> >  }
> >
> > -/* Compare two PCI device addresses. */ -static int
> > -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
> > *addr2) -{
> > -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
> >function;
> > -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
> addr2->function;
> > -
> > -	if (dev_addr > dev_addr2)
> > -		return 1;
> > -	else
> > -		return 0;
> > -}
> > -
> > -
> >  /* Scan one pci sysfs entry, and fill the devices list from it. */
> > static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
> > -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
> >  	}
> >  	else {
> >  		struct rte_pci_device *dev2 = NULL;
> > +		int ret;
> >
> >  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
> > -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
> > +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
> > +			if (ret > 0)
> >  				continue;
> > -			else {
> > +			else if (ret < 0) {
> >  				TAILQ_INSERT_BEFORE(dev2, dev, next);
> >  				return 0;
> > +			} else { /* already registered */
> > +				/* update pt_driver */
> > +				dev2->pt_driver = dev->pt_driver;
> > +				free(dev);
> > +				return 0;
> >  			}
> >  		}
> >  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
> > a/lib/librte_eal/common/eal_common_pci.c
> > b/lib/librte_eal/common/eal_common_pci.c
> > index f3c7f71..a89f5c3 100644
> > --- a/lib/librte_eal/common/eal_common_pci.c
> > +++ b/lib/librte_eal/common/eal_common_pci.c
> > @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
> >  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
> >  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
> >  			continue;
> > -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
> > +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
> >  			return devargs;
> >  	}
> >  	return NULL;
> > diff --git a/lib/librte_eal/common/include/rte_pci.h
> > b/lib/librte_eal/common/include/rte_pci.h
> > index 7f2d699..4814cd7 100644
> > --- a/lib/librte_eal/common/include/rte_pci.h
> > +++ b/lib/librte_eal/common/include/rte_pci.h
> > @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
> > rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
> >
> > +/* Compare two PCI device addresses. */
> > +/**
> > + * Utility function to compare two PCI device addresses.
> > + *
> > + * @param addr
> > + *	The PCI Bus-Device-Function address to compare
> > + * @param addr2
> > + *	The PCI Bus-Device-Function address to compare
> > + * @return
> > + *	0 on equal PCI address.
> > + *	Positive on addr is greater than addr2.
> > + *	Negative on addr is less than addr2, or error.
> > + */
> > +static inline int
> > +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
> > +*addr2) {
> > +	uint64_t dev_addr, dev_addr2;
> > +
> > +	if ((addr == NULL) || (addr2 == NULL))
> > +		return -1;
> > +
> > +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
> > +				(addr->devid << 8) | addr->function;
> > +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
> > +				(addr2->devid << 8) | addr2->function;
> > +
> > +	if (dev_addr > dev_addr2)
> > +		return 1;
> > +	else if (dev_addr < dev_addr2)
> > +		return -1;
> > +	else
> > +		return 0;
> > +}
> > +
> >  /**
> >   * Probe the PCI bus for registered drivers.
> >   *
> > diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
> > b/lib/librte_eal/linuxapp/eal/eal_pci.c
> > index c0ca5a5..d847102 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
> > @@ -229,20 +229,6 @@ error:
> >  	return -1;
> >  }
> >
> > -/* Compare two PCI device addresses. */ -static int
> > -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
> > *addr2) -{
> > -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
> >function;
> > -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
> addr2->function;
> > -
> > -	if (dev_addr > dev_addr2)
> > -		return 1;
> > -	else
> > -		return 0;
> > -}
> > -
> > -
> >  /* Scan one pci sysfs entry, and fill the devices list from it. */
> > static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
> > bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
> > domain, uint8_t bus,
> >  	}
> >  	else {
> >  		struct rte_pci_device *dev2 = NULL;
> > +		int ret;
> >
> >  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
> > -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
> > +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
> > +			if (ret > 0)
> >  				continue;
> > -			else {
> > +			else if (ret < 0) {
> >  				TAILQ_INSERT_BEFORE(dev2, dev, next);
> >  				return 0;
> > +			} else { /* already registered */
> > +				/* update pt_driver */
> > +				dev2->pt_driver = dev->pt_driver;

Hi Tetsuya,

I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
The following line should be added here:
      dev2->max_vfs = dev->max_vfs;

numa_mode should probably be updated too (although it is not causing a problem at present).
      dev2->numa_mode = dev->numa_mode;

Regards,

Bernard.




> > +				free(dev);
> > +				return 0;
> >  			}
> >  		}
> >  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
> > a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > index e53f06b..1da3507 100644
> > --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
> > @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
> >  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
> >
> >  		/* skip this element if it doesn't match our PCI address */
> > -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
> > +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
> >  			continue;
> >
> >  		for (i = 0; i != uio_res->nb_maps; i++) {
> Acked-by: Michael Qiu <michael.qiu@intel.com>
  
Tetsuya Mukawa Feb. 11, 2015, 2:51 a.m. UTC | #3
On 2015/02/11 0:08, Iremonger, Bernard wrote:
>> -----Original Message-----
>> From: Qiu, Michael
>> Sent: Monday, February 9, 2015 1:10 PM
>> To: Tetsuya Mukawa; dev@dpdk.org
>> Cc: Iremonger, Bernard
>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>
>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>> addresses by eal_compare_pci_addr().
>>>
>>> v5:
>>> - Fix pci_scan_one to handle pt_driver correctly.
>>> v4:
>>> - Fix calculation method of eal_compare_pci_addr().
>>> - Add parameter checking.
>>>
>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>> ---
>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> index 74ecce7..c844d58 100644
>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>  	return (0);
>>>  }
>>>
>>> -/* Compare two PCI device addresses. */ -static int
>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>> *addr2) -{
>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>> function;
>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>> addr2->function;
>>> -
>>> -	if (dev_addr > dev_addr2)
>>> -		return 1;
>>> -	else
>>> -		return 0;
>>> -}
>>> -
>>> -
>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>  	}
>>>  	else {
>>>  		struct rte_pci_device *dev2 = NULL;
>>> +		int ret;
>>>
>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>> +			if (ret > 0)
>>>  				continue;
>>> -			else {
>>> +			else if (ret < 0) {
>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>  				return 0;
>>> +			} else { /* already registered */
>>> +				/* update pt_driver */
>>> +				dev2->pt_driver = dev->pt_driver;
>>> +				free(dev);
>>> +				return 0;
>>>  			}
>>>  		}
>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>> a/lib/librte_eal/common/eal_common_pci.c
>>> b/lib/librte_eal/common/eal_common_pci.c
>>> index f3c7f71..a89f5c3 100644
>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>  			continue;
>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>  			return devargs;
>>>  	}
>>>  	return NULL;
>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>> b/lib/librte_eal/common/include/rte_pci.h
>>> index 7f2d699..4814cd7 100644
>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>
>>> +/* Compare two PCI device addresses. */
>>> +/**
>>> + * Utility function to compare two PCI device addresses.
>>> + *
>>> + * @param addr
>>> + *	The PCI Bus-Device-Function address to compare
>>> + * @param addr2
>>> + *	The PCI Bus-Device-Function address to compare
>>> + * @return
>>> + *	0 on equal PCI address.
>>> + *	Positive on addr is greater than addr2.
>>> + *	Negative on addr is less than addr2, or error.
>>> + */
>>> +static inline int
>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>> +*addr2) {
>>> +	uint64_t dev_addr, dev_addr2;
>>> +
>>> +	if ((addr == NULL) || (addr2 == NULL))
>>> +		return -1;
>>> +
>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>> +				(addr->devid << 8) | addr->function;
>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>> +				(addr2->devid << 8) | addr2->function;
>>> +
>>> +	if (dev_addr > dev_addr2)
>>> +		return 1;
>>> +	else if (dev_addr < dev_addr2)
>>> +		return -1;
>>> +	else
>>> +		return 0;
>>> +}
>>> +
>>>  /**
>>>   * Probe the PCI bus for registered drivers.
>>>   *
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> index c0ca5a5..d847102 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> @@ -229,20 +229,6 @@ error:
>>>  	return -1;
>>>  }
>>>
>>> -/* Compare two PCI device addresses. */ -static int
>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>> *addr2) -{
>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>> function;
>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>> addr2->function;
>>> -
>>> -	if (dev_addr > dev_addr2)
>>> -		return 1;
>>> -	else
>>> -		return 0;
>>> -}
>>> -
>>> -
>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>> domain, uint8_t bus,
>>>  	}
>>>  	else {
>>>  		struct rte_pci_device *dev2 = NULL;
>>> +		int ret;
>>>
>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>> +			if (ret > 0)
>>>  				continue;
>>> -			else {
>>> +			else if (ret < 0) {
>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>  				return 0;
>>> +			} else { /* already registered */
>>> +				/* update pt_driver */
>>> +				dev2->pt_driver = dev->pt_driver;
> Hi Tetsuya,
>
> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
> The following line should be added here:
>       dev2->max_vfs = dev->max_vfs;
>
> numa_mode should probably be updated too (although it is not causing a problem at present).
>       dev2->numa_mode = dev->numa_mode;

Hi Bernard,

Thanks for reporting. I will add above 2 lines.

Regards,
Tetsuya

> Regards,
>
> Bernard.
>
>
>
>
>>> +				free(dev);
>>> +				return 0;
>>>  			}
>>>  		}
>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> index e53f06b..1da3507 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>
>>>  		/* skip this element if it doesn't match our PCI address */
>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>  			continue;
>>>
>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>> Acked-by: Michael Qiu <michael.qiu@intel.com>
  
Michael Qiu Feb. 11, 2015, 3:27 a.m. UTC | #4
On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>> -----Original Message-----
>> From: Qiu, Michael
>> Sent: Monday, February 9, 2015 1:10 PM
>> To: Tetsuya Mukawa; dev@dpdk.org
>> Cc: Iremonger, Bernard
>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>
>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>> addresses by eal_compare_pci_addr().
>>>
>>> v5:
>>> - Fix pci_scan_one to handle pt_driver correctly.
>>> v4:
>>> - Fix calculation method of eal_compare_pci_addr().
>>> - Add parameter checking.
>>>
>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>> ---
>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> index 74ecce7..c844d58 100644
>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>  	return (0);
>>>  }
>>>
>>> -/* Compare two PCI device addresses. */ -static int
>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>> *addr2) -{
>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>> function;
>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>> addr2->function;
>>> -
>>> -	if (dev_addr > dev_addr2)
>>> -		return 1;
>>> -	else
>>> -		return 0;
>>> -}
>>> -
>>> -
>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>  	}
>>>  	else {
>>>  		struct rte_pci_device *dev2 = NULL;
>>> +		int ret;
>>>
>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>> +			if (ret > 0)
>>>  				continue;
>>> -			else {
>>> +			else if (ret < 0) {
>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>  				return 0;
>>> +			} else { /* already registered */
>>> +				/* update pt_driver */
>>> +				dev2->pt_driver = dev->pt_driver;
>>> +				free(dev);
>>> +				return 0;
>>>  			}
>>>  		}
>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>> a/lib/librte_eal/common/eal_common_pci.c
>>> b/lib/librte_eal/common/eal_common_pci.c
>>> index f3c7f71..a89f5c3 100644
>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>  			continue;
>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>  			return devargs;
>>>  	}
>>>  	return NULL;
>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>> b/lib/librte_eal/common/include/rte_pci.h
>>> index 7f2d699..4814cd7 100644
>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>
>>> +/* Compare two PCI device addresses. */
>>> +/**
>>> + * Utility function to compare two PCI device addresses.
>>> + *
>>> + * @param addr
>>> + *	The PCI Bus-Device-Function address to compare
>>> + * @param addr2
>>> + *	The PCI Bus-Device-Function address to compare
>>> + * @return
>>> + *	0 on equal PCI address.
>>> + *	Positive on addr is greater than addr2.
>>> + *	Negative on addr is less than addr2, or error.
>>> + */
>>> +static inline int
>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>> +*addr2) {
>>> +	uint64_t dev_addr, dev_addr2;
>>> +
>>> +	if ((addr == NULL) || (addr2 == NULL))
>>> +		return -1;
>>> +
>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>> +				(addr->devid << 8) | addr->function;
>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>> +				(addr2->devid << 8) | addr2->function;
>>> +
>>> +	if (dev_addr > dev_addr2)
>>> +		return 1;
>>> +	else if (dev_addr < dev_addr2)
>>> +		return -1;
>>> +	else
>>> +		return 0;
>>> +}
>>> +
>>>  /**
>>>   * Probe the PCI bus for registered drivers.
>>>   *
>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> index c0ca5a5..d847102 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>> @@ -229,20 +229,6 @@ error:
>>>  	return -1;
>>>  }
>>>
>>> -/* Compare two PCI device addresses. */ -static int
>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>> *addr2) -{
>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>> function;
>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>> addr2->function;
>>> -
>>> -	if (dev_addr > dev_addr2)
>>> -		return 1;
>>> -	else
>>> -		return 0;
>>> -}
>>> -
>>> -
>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>> domain, uint8_t bus,
>>>  	}
>>>  	else {
>>>  		struct rte_pci_device *dev2 = NULL;
>>> +		int ret;
>>>
>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>> +			if (ret > 0)
>>>  				continue;
>>> -			else {
>>> +			else if (ret < 0) {
>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>  				return 0;
>>> +			} else { /* already registered */
>>> +				/* update pt_driver */
>>> +				dev2->pt_driver = dev->pt_driver;
> Hi Tetsuya,
>
> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
> The following line should be added here:
>       dev2->max_vfs = dev->max_vfs;
>
> numa_mode should probably be updated too (although it is not causing a problem at present).
>       dev2->numa_mode = dev->numa_mode;

I'm very curious, why those field miss? I haven't see any places clear
this field.

What is the root cause?

Thanks,
Michael

> Regards,
>
> Bernard.
>
>
>
>
>>> +				free(dev);
>>> +				return 0;
>>>  			}
>>>  		}
>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> index e53f06b..1da3507 100644
>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>
>>>  		/* skip this element if it doesn't match our PCI address */
>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>  			continue;
>>>
>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>> Acked-by: Michael Qiu <michael.qiu@intel.com>
  
Tetsuya Mukawa Feb. 11, 2015, 4:53 a.m. UTC | #5
On 2015/02/11 12:27, Qiu, Michael wrote:
> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>> -----Original Message-----
>>> From: Qiu, Michael
>>> Sent: Monday, February 9, 2015 1:10 PM
>>> To: Tetsuya Mukawa; dev@dpdk.org
>>> Cc: Iremonger, Bernard
>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>
>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>> addresses by eal_compare_pci_addr().
>>>>
>>>> v5:
>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>> v4:
>>>> - Fix calculation method of eal_compare_pci_addr().
>>>> - Add parameter checking.
>>>>
>>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>>> ---
>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>
>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>> index 74ecce7..c844d58 100644
>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>  	return (0);
>>>>  }
>>>>
>>>> -/* Compare two PCI device addresses. */ -static int
>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>> *addr2) -{
>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>> function;
>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>> addr2->function;
>>>> -
>>>> -	if (dev_addr > dev_addr2)
>>>> -		return 1;
>>>> -	else
>>>> -		return 0;
>>>> -}
>>>> -
>>>> -
>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>  	}
>>>>  	else {
>>>>  		struct rte_pci_device *dev2 = NULL;
>>>> +		int ret;
>>>>
>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>> +			if (ret > 0)
>>>>  				continue;
>>>> -			else {
>>>> +			else if (ret < 0) {
>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>  				return 0;
>>>> +			} else { /* already registered */
>>>> +				/* update pt_driver */
>>>> +				dev2->pt_driver = dev->pt_driver;
>>>> +				free(dev);
>>>> +				return 0;
>>>>  			}
>>>>  		}
>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>> index f3c7f71..a89f5c3 100644
>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>  			continue;
>>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>  			return devargs;
>>>>  	}
>>>>  	return NULL;
>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>> index 7f2d699..4814cd7 100644
>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>
>>>> +/* Compare two PCI device addresses. */
>>>> +/**
>>>> + * Utility function to compare two PCI device addresses.
>>>> + *
>>>> + * @param addr
>>>> + *	The PCI Bus-Device-Function address to compare
>>>> + * @param addr2
>>>> + *	The PCI Bus-Device-Function address to compare
>>>> + * @return
>>>> + *	0 on equal PCI address.
>>>> + *	Positive on addr is greater than addr2.
>>>> + *	Negative on addr is less than addr2, or error.
>>>> + */
>>>> +static inline int
>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>> +*addr2) {
>>>> +	uint64_t dev_addr, dev_addr2;
>>>> +
>>>> +	if ((addr == NULL) || (addr2 == NULL))
>>>> +		return -1;
>>>> +
>>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>> +				(addr->devid << 8) | addr->function;
>>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>> +				(addr2->devid << 8) | addr2->function;
>>>> +
>>>> +	if (dev_addr > dev_addr2)
>>>> +		return 1;
>>>> +	else if (dev_addr < dev_addr2)
>>>> +		return -1;
>>>> +	else
>>>> +		return 0;
>>>> +}
>>>> +
>>>>  /**
>>>>   * Probe the PCI bus for registered drivers.
>>>>   *
>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> index c0ca5a5..d847102 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>> @@ -229,20 +229,6 @@ error:
>>>>  	return -1;
>>>>  }
>>>>
>>>> -/* Compare two PCI device addresses. */ -static int
>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>> *addr2) -{
>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>> function;
>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>> addr2->function;
>>>> -
>>>> -	if (dev_addr > dev_addr2)
>>>> -		return 1;
>>>> -	else
>>>> -		return 0;
>>>> -}
>>>> -
>>>> -
>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>> domain, uint8_t bus,
>>>>  	}
>>>>  	else {
>>>>  		struct rte_pci_device *dev2 = NULL;
>>>> +		int ret;
>>>>
>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>> +			if (ret > 0)
>>>>  				continue;
>>>> -			else {
>>>> +			else if (ret < 0) {
>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>  				return 0;
>>>> +			} else { /* already registered */
>>>> +				/* update pt_driver */
>>>> +				dev2->pt_driver = dev->pt_driver;
>> Hi Tetsuya,
>>
>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>> The following line should be added here:
>>       dev2->max_vfs = dev->max_vfs;
>>
>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>       dev2->numa_mode = dev->numa_mode;
> I'm very curious, why those field miss? I haven't see any places clear
> this field.
>
> What is the root cause?

Hi Michael,

Here is my guess.
The above function creates pci device list.
And current DPDK implementation assumes all devices needed to be managed
are under igb_uio or vfio when above code is processed.
To add hotplug function, we also need to think some devices will start
to be managed under igb_uio or vfio after initializing pci device list.
Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
manages the device.

Hi Bernard,

Could you please check "max_vfs" and "num_node" values, then check the
values again after the device is managed by igb_uio or vfio?
In my environment, it seems max_vfs is created by igb_uio.
But my NIC doesn't have VF, so behavior might be different in your
environment.
I guess "numa_node" should not be changed theoretically.

If my guess is correct, how about replacing following values?
- driver
- max_vfs
- resource
- (numa_node)
Except for above value, I guess other value shouldn't be changed even
after the device is managed by igb_uio or vfio.

Thanks,
Tetsuya

> Thanks,
> Michael
>
>> Regards,
>>
>> Bernard.
>>
>>
>>
>>
>>>> +				free(dev);
>>>> +				return 0;
>>>>  			}
>>>>  		}
>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>> index e53f06b..1da3507 100644
>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>
>>>>  		/* skip this element if it doesn't match our PCI address */
>>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>  			continue;
>>>>
>>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>>> Acked-by: Michael Qiu <michael.qiu@intel.com>
  
Tetsuya Mukawa Feb. 11, 2015, 4:57 a.m. UTC | #6
On 2015/02/11 13:53, Tetsuya Mukawa wrote:
> On 2015/02/11 12:27, Qiu, Michael wrote:
>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>> -----Original Message-----
>>>> From: Qiu, Michael
>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>> To: Tetsuya Mukawa; dev@dpdk.org
>>>> Cc: Iremonger, Bernard
>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>>
>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>> addresses by eal_compare_pci_addr().
>>>>>
>>>>> v5:
>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>> v4:
>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>> - Add parameter checking.
>>>>>
>>>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>>>> ---
>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>
>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> index 74ecce7..c844d58 100644
>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>  	return (0);
>>>>>  }
>>>>>
>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>> *addr2) -{
>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>> function;
>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>> addr2->function;
>>>>> -
>>>>> -	if (dev_addr > dev_addr2)
>>>>> -		return 1;
>>>>> -	else
>>>>> -		return 0;
>>>>> -}
>>>>> -
>>>>> -
>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>  	}
>>>>>  	else {
>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>> +		int ret;
>>>>>
>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>> +			if (ret > 0)
>>>>>  				continue;
>>>>> -			else {
>>>>> +			else if (ret < 0) {
>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>  				return 0;
>>>>> +			} else { /* already registered */
>>>>> +				/* update pt_driver */
>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>> +				free(dev);
>>>>> +				return 0;
>>>>>  			}
>>>>>  		}
>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>> index f3c7f71..a89f5c3 100644
>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>  			continue;
>>>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>  			return devargs;
>>>>>  	}
>>>>>  	return NULL;
>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>> index 7f2d699..4814cd7 100644
>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>
>>>>> +/* Compare two PCI device addresses. */
>>>>> +/**
>>>>> + * Utility function to compare two PCI device addresses.
>>>>> + *
>>>>> + * @param addr
>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>> + * @param addr2
>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>> + * @return
>>>>> + *	0 on equal PCI address.
>>>>> + *	Positive on addr is greater than addr2.
>>>>> + *	Negative on addr is less than addr2, or error.
>>>>> + */
>>>>> +static inline int
>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>> +*addr2) {
>>>>> +	uint64_t dev_addr, dev_addr2;
>>>>> +
>>>>> +	if ((addr == NULL) || (addr2 == NULL))
>>>>> +		return -1;
>>>>> +
>>>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>> +				(addr->devid << 8) | addr->function;
>>>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>> +				(addr2->devid << 8) | addr2->function;
>>>>> +
>>>>> +	if (dev_addr > dev_addr2)
>>>>> +		return 1;
>>>>> +	else if (dev_addr < dev_addr2)
>>>>> +		return -1;
>>>>> +	else
>>>>> +		return 0;
>>>>> +}
>>>>> +
>>>>>  /**
>>>>>   * Probe the PCI bus for registered drivers.
>>>>>   *
>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> index c0ca5a5..d847102 100644
>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>> @@ -229,20 +229,6 @@ error:
>>>>>  	return -1;
>>>>>  }
>>>>>
>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>> *addr2) -{
>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>> function;
>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>> addr2->function;
>>>>> -
>>>>> -	if (dev_addr > dev_addr2)
>>>>> -		return 1;
>>>>> -	else
>>>>> -		return 0;
>>>>> -}
>>>>> -
>>>>> -
>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>> domain, uint8_t bus,
>>>>>  	}
>>>>>  	else {
>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>> +		int ret;
>>>>>
>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>> +			if (ret > 0)
>>>>>  				continue;
>>>>> -			else {
>>>>> +			else if (ret < 0) {
>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>  				return 0;
>>>>> +			} else { /* already registered */
>>>>> +				/* update pt_driver */
>>>>> +				dev2->pt_driver = dev->pt_driver;
>>> Hi Tetsuya,
>>>
>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>>> The following line should be added here:
>>>       dev2->max_vfs = dev->max_vfs;
>>>
>>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>>       dev2->numa_mode = dev->numa_mode;
>> I'm very curious, why those field miss? I haven't see any places clear
>> this field.
>>
>> What is the root cause?
> Hi Michael,
>
> Here is my guess.
> The above function creates pci device list.

I am sorry. I forgot to add below information.

"max_vfs" or "numa_node" value is came from sysfs when the above
function is processed.

> And current DPDK implementation assumes all devices needed to be managed
> are under igb_uio or vfio when above code is processed.
> To add hotplug function, we also need to think some devices will start
> to be managed under igb_uio or vfio after initializing pci device list.
> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
> manages the device.
>
> Hi Bernard,
>
> Could you please check "max_vfs" and "num_node" values, then check the
> values again after the device is managed by igb_uio or vfio?
> In my environment, it seems max_vfs is created by igb_uio.
> But my NIC doesn't have VF, so behavior might be different in your
> environment.
> I guess "numa_node" should not be changed theoretically.
>
> If my guess is correct, how about replacing following values?
> - driver
> - max_vfs
> - resource
> - (numa_node)
> Except for above value, I guess other value shouldn't be changed even
> after the device is managed by igb_uio or vfio.
>
> Thanks,
> Tetsuya
>
>> Thanks,
>> Michael
>>
>>> Regards,
>>>
>>> Bernard.
>>>
>>>
>>>
>>>
>>>>> +				free(dev);
>>>>> +				return 0;
>>>>>  			}
>>>>>  		}
>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> index e53f06b..1da3507 100644
>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>
>>>>>  		/* skip this element if it doesn't match our PCI address */
>>>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>  			continue;
>>>>>
>>>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>>>> Acked-by: Michael Qiu <michael.qiu@intel.com>
>
  
Michael Qiu Feb. 11, 2015, 6:29 a.m. UTC | #7
On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>> -----Original Message-----
>>>>> From: Qiu, Michael
>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>> To: Tetsuya Mukawa; dev@dpdk.org
>>>>> Cc: Iremonger, Bernard
>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>>>
>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>> addresses by eal_compare_pci_addr().
>>>>>>
>>>>>> v5:
>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>> v4:
>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>> - Add parameter checking.
>>>>>>
>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>>>>> ---
>>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>
>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> index 74ecce7..c844d58 100644
>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>>  	return (0);
>>>>>>  }
>>>>>>
>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>> *addr2) -{
>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>> function;
>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>> addr2->function;
>>>>>> -
>>>>>> -	if (dev_addr > dev_addr2)
>>>>>> -		return 1;
>>>>>> -	else
>>>>>> -		return 0;
>>>>>> -}
>>>>>> -
>>>>>> -
>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>>  	}
>>>>>>  	else {
>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>> +		int ret;
>>>>>>
>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>> +			if (ret > 0)
>>>>>>  				continue;
>>>>>> -			else {
>>>>>> +			else if (ret < 0) {
>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>  				return 0;
>>>>>> +			} else { /* already registered */
>>>>>> +				/* update pt_driver */
>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>>> +				free(dev);
>>>>>> +				return 0;
>>>>>>  			}
>>>>>>  		}
>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>> index f3c7f71..a89f5c3 100644
>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>>  			continue;
>>>>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>>  			return devargs;
>>>>>>  	}
>>>>>>  	return NULL;
>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>> index 7f2d699..4814cd7 100644
>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>>
>>>>>> +/* Compare two PCI device addresses. */
>>>>>> +/**
>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>> + *
>>>>>> + * @param addr
>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>> + * @param addr2
>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>> + * @return
>>>>>> + *	0 on equal PCI address.
>>>>>> + *	Positive on addr is greater than addr2.
>>>>>> + *	Negative on addr is less than addr2, or error.
>>>>>> + */
>>>>>> +static inline int
>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>> +*addr2) {
>>>>>> +	uint64_t dev_addr, dev_addr2;
>>>>>> +
>>>>>> +	if ((addr == NULL) || (addr2 == NULL))
>>>>>> +		return -1;
>>>>>> +
>>>>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>> +				(addr->devid << 8) | addr->function;
>>>>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>> +				(addr2->devid << 8) | addr2->function;
>>>>>> +
>>>>>> +	if (dev_addr > dev_addr2)
>>>>>> +		return 1;
>>>>>> +	else if (dev_addr < dev_addr2)
>>>>>> +		return -1;
>>>>>> +	else
>>>>>> +		return 0;
>>>>>> +}
>>>>>> +
>>>>>>  /**
>>>>>>   * Probe the PCI bus for registered drivers.
>>>>>>   *
>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> index c0ca5a5..d847102 100644
>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>>  	return -1;
>>>>>>  }
>>>>>>
>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>> *addr2) -{
>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>> function;
>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>> addr2->function;
>>>>>> -
>>>>>> -	if (dev_addr > dev_addr2)
>>>>>> -		return 1;
>>>>>> -	else
>>>>>> -		return 0;
>>>>>> -}
>>>>>> -
>>>>>> -
>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>> domain, uint8_t bus,
>>>>>>  	}
>>>>>>  	else {
>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>> +		int ret;
>>>>>>
>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>> +			if (ret > 0)
>>>>>>  				continue;
>>>>>> -			else {
>>>>>> +			else if (ret < 0) {
>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>  				return 0;
>>>>>> +			} else { /* already registered */
>>>>>> +				/* update pt_driver */
>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>> Hi Tetsuya,
>>>>
>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>>>> The following line should be added here:
>>>>       dev2->max_vfs = dev->max_vfs;
>>>>
>>>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>>>       dev2->numa_mode = dev->numa_mode;
>>> I'm very curious, why those field miss? I haven't see any places clear
>>> this field.
>>>
>>> What is the root cause?
>> Hi Michael,
>>
>> Here is my guess.
>> The above function creates pci device list.
> I am sorry. I forgot to add below information.
>
> "max_vfs" or "numa_node" value is came from sysfs when the above
> function is processed.

Yes, but it has already been registered, why it missed?

Thanks,
Michael
>> And current DPDK implementation assumes all devices needed to be managed
>> are under igb_uio or vfio when above code is processed.
>> To add hotplug function, we also need to think some devices will start
>> to be managed under igb_uio or vfio after initializing pci device list.
>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>> manages the device.
>>
>> Hi Bernard,
>>
>> Could you please check "max_vfs" and "num_node" values, then check the
>> values again after the device is managed by igb_uio or vfio?
>> In my environment, it seems max_vfs is created by igb_uio.
>> But my NIC doesn't have VF, so behavior might be different in your
>> environment.
>> I guess "numa_node" should not be changed theoretically.
>>
>> If my guess is correct, how about replacing following values?
>> - driver
>> - max_vfs
>> - resource
>> - (numa_node)
>> Except for above value, I guess other value shouldn't be changed even
>> after the device is managed by igb_uio or vfio.
>>
>> Thanks,
>> Tetsuya
>>
>>> Thanks,
>>> Michael
>>>
>>>> Regards,
>>>>
>>>> Bernard.
>>>>
>>>>
>>>>
>>>>
>>>>>> +				free(dev);
>>>>>> +				return 0;
>>>>>>  			}
>>>>>>  		}
>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> index e53f06b..1da3507 100644
>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>
>>>>>>  		/* skip this element if it doesn't match our PCI address */
>>>>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>>  			continue;
>>>>>>
>>>>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>>>>> Acked-by: Michael Qiu <michael.qiu@intel.com>
>
  
Tetsuya Mukawa Feb. 11, 2015, 8:14 a.m. UTC | #8
On 2015/02/11 15:29, Qiu, Michael wrote:
> On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
>> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>>> -----Original Message-----
>>>>>> From: Qiu, Michael
>>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>>> To: Tetsuya Mukawa; dev@dpdk.org
>>>>>> Cc: Iremonger, Bernard
>>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>>>>
>>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>>> addresses by eal_compare_pci_addr().
>>>>>>>
>>>>>>> v5:
>>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>>> v4:
>>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>>> - Add parameter checking.
>>>>>>>
>>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>>>>>> ---
>>>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>>
>>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> index 74ecce7..c844d58 100644
>>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>>>  	return (0);
>>>>>>>  }
>>>>>>>
>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>> *addr2) -{
>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>> function;
>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>> addr2->function;
>>>>>>> -
>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>> -		return 1;
>>>>>>> -	else
>>>>>>> -		return 0;
>>>>>>> -}
>>>>>>> -
>>>>>>> -
>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>>>  	}
>>>>>>>  	else {
>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>> +		int ret;
>>>>>>>
>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>> +			if (ret > 0)
>>>>>>>  				continue;
>>>>>>> -			else {
>>>>>>> +			else if (ret < 0) {
>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>  				return 0;
>>>>>>> +			} else { /* already registered */
>>>>>>> +				/* update pt_driver */
>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>>>> +				free(dev);
>>>>>>> +				return 0;
>>>>>>>  			}
>>>>>>>  		}
>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>>> index f3c7f71..a89f5c3 100644
>>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>>>  			continue;
>>>>>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>>>  			return devargs;
>>>>>>>  	}
>>>>>>>  	return NULL;
>>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>>> index 7f2d699..4814cd7 100644
>>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>>>
>>>>>>> +/* Compare two PCI device addresses. */
>>>>>>> +/**
>>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>>> + *
>>>>>>> + * @param addr
>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>> + * @param addr2
>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>> + * @return
>>>>>>> + *	0 on equal PCI address.
>>>>>>> + *	Positive on addr is greater than addr2.
>>>>>>> + *	Negative on addr is less than addr2, or error.
>>>>>>> + */
>>>>>>> +static inline int
>>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>> +*addr2) {
>>>>>>> +	uint64_t dev_addr, dev_addr2;
>>>>>>> +
>>>>>>> +	if ((addr == NULL) || (addr2 == NULL))
>>>>>>> +		return -1;
>>>>>>> +
>>>>>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>>> +				(addr->devid << 8) | addr->function;
>>>>>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>>> +				(addr2->devid << 8) | addr2->function;
>>>>>>> +
>>>>>>> +	if (dev_addr > dev_addr2)
>>>>>>> +		return 1;
>>>>>>> +	else if (dev_addr < dev_addr2)
>>>>>>> +		return -1;
>>>>>>> +	else
>>>>>>> +		return 0;
>>>>>>> +}
>>>>>>> +
>>>>>>>  /**
>>>>>>>   * Probe the PCI bus for registered drivers.
>>>>>>>   *
>>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> index c0ca5a5..d847102 100644
>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>>>  	return -1;
>>>>>>>  }
>>>>>>>
>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>> *addr2) -{
>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>> function;
>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>> addr2->function;
>>>>>>> -
>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>> -		return 1;
>>>>>>> -	else
>>>>>>> -		return 0;
>>>>>>> -}
>>>>>>> -
>>>>>>> -
>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>>> domain, uint8_t bus,
>>>>>>>  	}
>>>>>>>  	else {
>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>> +		int ret;
>>>>>>>
>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>> +			if (ret > 0)
>>>>>>>  				continue;
>>>>>>> -			else {
>>>>>>> +			else if (ret < 0) {
>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>  				return 0;
>>>>>>> +			} else { /* already registered */
>>>>>>> +				/* update pt_driver */
>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>> Hi Tetsuya,
>>>>>
>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>>>>> The following line should be added here:
>>>>>       dev2->max_vfs = dev->max_vfs;
>>>>>
>>>>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>>>>       dev2->numa_mode = dev->numa_mode;
>>>> I'm very curious, why those field miss? I haven't see any places clear
>>>> this field.
>>>>
>>>> What is the root cause?
>>> Hi Michael,
>>>
>>> Here is my guess.
>>> The above function creates pci device list.
>> I am sorry. I forgot to add below information.
>>
>> "max_vfs" or "numa_node" value is came from sysfs when the above
>> function is processed.
> Yes, but it has already been registered, why it missed?

Yes, it has been registered already, but probably should be updated.
I guess sysfs value will be changed when igb_uio starts managing the device.

ex)
1. Boot linux
2. start a dpdk application with no port.
3. pci device list is registered.
 - Here, "max_vfs" is came from sysfs. Or there is no such a entry.
4. igb_uio binds the device.
5.  I guess max_vfs value of sysfs is changed. Or max_vfs entry is created.
6. The dpdk application calls hotplug function.
 - Here, I guess we need to update "max_vfs" value.

Above is a just my assumption.
It may be good to wait for Bernard's reply.

Thanks,
Tetsuya

> Thanks,
> Michael
>>> And current DPDK implementation assumes all devices needed to be managed
>>> are under igb_uio or vfio when above code is processed.
>>> To add hotplug function, we also need to think some devices will start
>>> to be managed under igb_uio or vfio after initializing pci device list.
>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>>> manages the device.
>>>
>>> Hi Bernard,
>>>
>>> Could you please check "max_vfs" and "num_node" values, then check the
>>> values again after the device is managed by igb_uio or vfio?
>>> In my environment, it seems max_vfs is created by igb_uio.
>>> But my NIC doesn't have VF, so behavior might be different in your
>>> environment.
>>> I guess "numa_node" should not be changed theoretically.
>>>
>>> If my guess is correct, how about replacing following values?
>>> - driver
>>> - max_vfs
>>> - resource
>>> - (numa_node)
>>> Except for above value, I guess other value shouldn't be changed even
>>> after the device is managed by igb_uio or vfio.
>>>
>>> Thanks,
>>> Tetsuya
>>>
>>>> Thanks,
>>>> Michael
>>>>
>>>>> Regards,
>>>>>
>>>>> Bernard.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>>> +				free(dev);
>>>>>>> +				return 0;
>>>>>>>  			}
>>>>>>>  		}
>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> index e53f06b..1da3507 100644
>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>>
>>>>>>>  		/* skip this element if it doesn't match our PCI address */
>>>>>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>>>  			continue;
>>>>>>>
>>>>>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>>>>>> Acked-by: Michael Qiu <michael.qiu@intel.com>
  
Michael Qiu Feb. 11, 2015, 12:13 p.m. UTC | #9
On 2/11/2015 4:14 PM, Tetsuya Mukawa wrote:
> On 2015/02/11 15:29, Qiu, Michael wrote:
>> On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
>>> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>>>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>>>> -----Original Message-----
>>>>>>> From: Qiu, Michael
>>>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>>>> To: Tetsuya Mukawa; dev@dpdk.org
>>>>>>> Cc: Iremonger, Bernard
>>>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>>>>>
>>>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>>>> addresses by eal_compare_pci_addr().
>>>>>>>>
>>>>>>>> v5:
>>>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>>>> v4:
>>>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>>>> - Add parameter checking.
>>>>>>>>
>>>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>>>>>>> ---
>>>>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> index 74ecce7..c844d58 100644
>>>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>>>>  	return (0);
>>>>>>>>  }
>>>>>>>>
>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>> *addr2) -{
>>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>>> function;
>>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>>> addr2->function;
>>>>>>>> -
>>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>>> -		return 1;
>>>>>>>> -	else
>>>>>>>> -		return 0;
>>>>>>>> -}
>>>>>>>> -
>>>>>>>> -
>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>>>>  	}
>>>>>>>>  	else {
>>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>>> +		int ret;
>>>>>>>>
>>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>>> +			if (ret > 0)
>>>>>>>>  				continue;
>>>>>>>> -			else {
>>>>>>>> +			else if (ret < 0) {
>>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>  				return 0;
>>>>>>>> +			} else { /* already registered */
>>>>>>>> +				/* update pt_driver */
>>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>>>>> +				free(dev);
>>>>>>>> +				return 0;
>>>>>>>>  			}
>>>>>>>>  		}
>>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> index f3c7f71..a89f5c3 100644
>>>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>>>>  			continue;
>>>>>>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>>>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>>>>  			return devargs;
>>>>>>>>  	}
>>>>>>>>  	return NULL;
>>>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> index 7f2d699..4814cd7 100644
>>>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>>>>
>>>>>>>> +/* Compare two PCI device addresses. */
>>>>>>>> +/**
>>>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>>>> + *
>>>>>>>> + * @param addr
>>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>>> + * @param addr2
>>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>>> + * @return
>>>>>>>> + *	0 on equal PCI address.
>>>>>>>> + *	Positive on addr is greater than addr2.
>>>>>>>> + *	Negative on addr is less than addr2, or error.
>>>>>>>> + */
>>>>>>>> +static inline int
>>>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>> +*addr2) {
>>>>>>>> +	uint64_t dev_addr, dev_addr2;
>>>>>>>> +
>>>>>>>> +	if ((addr == NULL) || (addr2 == NULL))
>>>>>>>> +		return -1;
>>>>>>>> +
>>>>>>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>>>> +				(addr->devid << 8) | addr->function;
>>>>>>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>>>> +				(addr2->devid << 8) | addr2->function;
>>>>>>>> +
>>>>>>>> +	if (dev_addr > dev_addr2)
>>>>>>>> +		return 1;
>>>>>>>> +	else if (dev_addr < dev_addr2)
>>>>>>>> +		return -1;
>>>>>>>> +	else
>>>>>>>> +		return 0;
>>>>>>>> +}
>>>>>>>> +
>>>>>>>>  /**
>>>>>>>>   * Probe the PCI bus for registered drivers.
>>>>>>>>   *
>>>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> index c0ca5a5..d847102 100644
>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>>>>  	return -1;
>>>>>>>>  }
>>>>>>>>
>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>> *addr2) -{
>>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>>> function;
>>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>>> addr2->function;
>>>>>>>> -
>>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>>> -		return 1;
>>>>>>>> -	else
>>>>>>>> -		return 0;
>>>>>>>> -}
>>>>>>>> -
>>>>>>>> -
>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>>>> domain, uint8_t bus,
>>>>>>>>  	}
>>>>>>>>  	else {
>>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>>> +		int ret;
>>>>>>>>
>>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>>> +			if (ret > 0)
>>>>>>>>  				continue;
>>>>>>>> -			else {
>>>>>>>> +			else if (ret < 0) {
>>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>  				return 0;
>>>>>>>> +			} else { /* already registered */
>>>>>>>> +				/* update pt_driver */
>>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>>> Hi Tetsuya,
>>>>>>
>>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>>>>>> The following line should be added here:
>>>>>>       dev2->max_vfs = dev->max_vfs;
>>>>>>
>>>>>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>>>>>       dev2->numa_mode = dev->numa_mode;
>>>>> I'm very curious, why those field miss? I haven't see any places clear
>>>>> this field.
>>>>>
>>>>> What is the root cause?
>>>> Hi Michael,
>>>>
>>>> Here is my guess.
>>>> The above function creates pci device list.
>>> I am sorry. I forgot to add below information.
>>>
>>> "max_vfs" or "numa_node" value is came from sysfs when the above
>>> function is processed.
>> Yes, but it has already been registered, why it missed?
> Yes, it has been registered already, but probably should be updated.
> I guess sysfs value will be changed when igb_uio starts managing the device.
>
> ex)
> 1. Boot linux
> 2. start a dpdk application with no port.
> 3. pci device list is registered.
>  - Here, "max_vfs" is came from sysfs. Or there is no such a entry.
> 4. igb_uio binds the device.
> 5.  I guess max_vfs value of sysfs is changed. Or max_vfs entry is created.
> 6. The dpdk application calls hotplug function.

Yes, agree.

But numa node can be changed?

Bernard, does your issue occur after max_vfs changed in igb_uio?

If not, I think must be figure out the reason.

Thanks,
Michael
>  - Here, I guess we need to update "max_vfs" value.
>
> Above is a just my assumption.
> It may be good to wait for Bernard's reply.
>
> Thanks,
> Tetsuya
>
>> Thanks,
>> Michael
>>>> And current DPDK implementation assumes all devices needed to be managed
>>>> are under igb_uio or vfio when above code is processed.
>>>> To add hotplug function, we also need to think some devices will start
>>>> to be managed under igb_uio or vfio after initializing pci device list.
>>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>>>> manages the device.
>>>>
>>>> Hi Bernard,
>>>>
>>>> Could you please check "max_vfs" and "num_node" values, then check the
>>>> values again after the device is managed by igb_uio or vfio?
>>>> In my environment, it seems max_vfs is created by igb_uio.
>>>> But my NIC doesn't have VF, so behavior might be different in your
>>>> environment.
>>>> I guess "numa_node" should not be changed theoretically.
>>>>
>>>> If my guess is correct, how about replacing following values?
>>>> - driver
>>>> - max_vfs
>>>> - resource
>>>> - (numa_node)
>>>> Except for above value, I guess other value shouldn't be changed even
>>>> after the device is managed by igb_uio or vfio.
>>>>
>>>> Thanks,
>>>> Tetsuya
>>>>
>>>>> Thanks,
>>>>> Michael
>>>>>
>>>>>> Regards,
>>>>>>
>>>>>> Bernard.
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>>> +				free(dev);
>>>>>>>> +				return 0;
>>>>>>>>  			}
>>>>>>>>  		}
>>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> index e53f06b..1da3507 100644
>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>>>
>>>>>>>>  		/* skip this element if it doesn't match our PCI address */
>>>>>>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>>>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>>>>  			continue;
>>>>>>>>
>>>>>>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>>>>>>> Acked-by: Michael Qiu <michael.qiu@intel.com>
>
>
  
Tetsuya Mukawa Feb. 12, 2015, 1:44 a.m. UTC | #10
On 2015/02/11 21:13, Qiu, Michael wrote:
> On 2/11/2015 4:14 PM, Tetsuya Mukawa wrote:
>> On 2015/02/11 15:29, Qiu, Michael wrote:
>>> On 2/11/2015 12:57 PM, Tetsuya Mukawa wrote:
>>>> On 2015/02/11 13:53, Tetsuya Mukawa wrote:
>>>>> On 2015/02/11 12:27, Qiu, Michael wrote:
>>>>>> On 2/10/2015 11:11 PM, Iremonger, Bernard wrote:
>>>>>>>> -----Original Message-----
>>>>>>>> From: Qiu, Michael
>>>>>>>> Sent: Monday, February 9, 2015 1:10 PM
>>>>>>>> To: Tetsuya Mukawa; dev@dpdk.org
>>>>>>>> Cc: Iremonger, Bernard
>>>>>>>> Subject: Re: [PATCH v7 04/14] eal/pci: Consolidate pci address comparison APIs
>>>>>>>>
>>>>>>>> On 2/9/2015 4:31 PM, Tetsuya Mukawa wrote:
>>>>>>>>> This patch replaces pci_addr_comparison() and memcmp() of pci
>>>>>>>>> addresses by eal_compare_pci_addr().
>>>>>>>>>
>>>>>>>>> v5:
>>>>>>>>> - Fix pci_scan_one to handle pt_driver correctly.
>>>>>>>>> v4:
>>>>>>>>> - Fix calculation method of eal_compare_pci_addr().
>>>>>>>>> - Add parameter checking.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Tetsuya Mukawa <mukawa@igel.co.jp>
>>>>>>>>> ---
>>>>>>>>>  lib/librte_eal/bsdapp/eal/eal_pci.c       | 25 ++++++++---------------
>>>>>>>>>  lib/librte_eal/common/eal_common_pci.c    |  2 +-
>>>>>>>>>  lib/librte_eal/common/include/rte_pci.h   | 34 +++++++++++++++++++++++++++++++
>>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci.c     | 25 ++++++++---------------
>>>>>>>>>  lib/librte_eal/linuxapp/eal/eal_pci_uio.c |  2 +-
>>>>>>>>>  5 files changed, 54 insertions(+), 34 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> index 74ecce7..c844d58 100644
>>>>>>>>> --- a/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
>>>>>>>>> @@ -270,20 +270,6 @@ pci_uio_map_resource(struct rte_pci_device *dev)
>>>>>>>>>  	return (0);
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>>> *addr2) -{
>>>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>>>> function;
>>>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>>>> addr2->function;
>>>>>>>>> -
>>>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>>>> -		return 1;
>>>>>>>>> -	else
>>>>>>>>> -		return 0;
>>>>>>>>> -}
>>>>>>>>> -
>>>>>>>>> -
>>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>>> static int  pci_scan_one(int dev_pci_fd, struct pci_conf *conf) @@
>>>>>>>>> -356,13 +342,20 @@ pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
>>>>>>>>>  	}
>>>>>>>>>  	else {
>>>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>>>> +		int ret;
>>>>>>>>>
>>>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>>>> +			if (ret > 0)
>>>>>>>>>  				continue;
>>>>>>>>> -			else {
>>>>>>>>> +			else if (ret < 0) {
>>>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>>  				return 0;
>>>>>>>>> +			} else { /* already registered */
>>>>>>>>> +				/* update pt_driver */
>>>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>>>>>> +				free(dev);
>>>>>>>>> +				return 0;
>>>>>>>>>  			}
>>>>>>>>>  		}
>>>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>>>> a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> index f3c7f71..a89f5c3 100644
>>>>>>>>> --- a/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> +++ b/lib/librte_eal/common/eal_common_pci.c
>>>>>>>>> @@ -93,7 +93,7 @@ static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
>>>>>>>>>  		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
>>>>>>>>>  			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
>>>>>>>>>  			continue;
>>>>>>>>> -		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
>>>>>>>>> +		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
>>>>>>>>>  			return devargs;
>>>>>>>>>  	}
>>>>>>>>>  	return NULL;
>>>>>>>>> diff --git a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> index 7f2d699..4814cd7 100644
>>>>>>>>> --- a/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> +++ b/lib/librte_eal/common/include/rte_pci.h
>>>>>>>>> @@ -269,6 +269,40 @@ eal_parse_pci_DomBDF(const char *input, struct
>>>>>>>>> rte_pci_addr *dev_addr)  }  #undef GET_PCIADDR_FIELD
>>>>>>>>>
>>>>>>>>> +/* Compare two PCI device addresses. */
>>>>>>>>> +/**
>>>>>>>>> + * Utility function to compare two PCI device addresses.
>>>>>>>>> + *
>>>>>>>>> + * @param addr
>>>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>>>> + * @param addr2
>>>>>>>>> + *	The PCI Bus-Device-Function address to compare
>>>>>>>>> + * @return
>>>>>>>>> + *	0 on equal PCI address.
>>>>>>>>> + *	Positive on addr is greater than addr2.
>>>>>>>>> + *	Negative on addr is less than addr2, or error.
>>>>>>>>> + */
>>>>>>>>> +static inline int
>>>>>>>>> +eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>>> +*addr2) {
>>>>>>>>> +	uint64_t dev_addr, dev_addr2;
>>>>>>>>> +
>>>>>>>>> +	if ((addr == NULL) || (addr2 == NULL))
>>>>>>>>> +		return -1;
>>>>>>>>> +
>>>>>>>>> +	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
>>>>>>>>> +				(addr->devid << 8) | addr->function;
>>>>>>>>> +	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
>>>>>>>>> +				(addr2->devid << 8) | addr2->function;
>>>>>>>>> +
>>>>>>>>> +	if (dev_addr > dev_addr2)
>>>>>>>>> +		return 1;
>>>>>>>>> +	else if (dev_addr < dev_addr2)
>>>>>>>>> +		return -1;
>>>>>>>>> +	else
>>>>>>>>> +		return 0;
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>>  /**
>>>>>>>>>   * Probe the PCI bus for registered drivers.
>>>>>>>>>   *
>>>>>>>>> diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> index c0ca5a5..d847102 100644
>>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
>>>>>>>>> @@ -229,20 +229,6 @@ error:
>>>>>>>>>  	return -1;
>>>>>>>>>  }
>>>>>>>>>
>>>>>>>>> -/* Compare two PCI device addresses. */ -static int
>>>>>>>>> -pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr
>>>>>>>>> *addr2) -{
>>>>>>>>> -	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr-
>>>>>>>>> function;
>>>>>>>>> -	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) +
>>>>>>>> addr2->function;
>>>>>>>>> -
>>>>>>>>> -	if (dev_addr > dev_addr2)
>>>>>>>>> -		return 1;
>>>>>>>>> -	else
>>>>>>>>> -		return 0;
>>>>>>>>> -}
>>>>>>>>> -
>>>>>>>>> -
>>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from it. */
>>>>>>>>> static int  pci_scan_one(const char *dirname, uint16_t domain, uint8_t
>>>>>>>>> bus, @@ -353,13 +339,20 @@ pci_scan_one(const char *dirname, uint16_t
>>>>>>>>> domain, uint8_t bus,
>>>>>>>>>  	}
>>>>>>>>>  	else {
>>>>>>>>>  		struct rte_pci_device *dev2 = NULL;
>>>>>>>>> +		int ret;
>>>>>>>>>
>>>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
>>>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
>>>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
>>>>>>>>> +			if (ret > 0)
>>>>>>>>>  				continue;
>>>>>>>>> -			else {
>>>>>>>>> +			else if (ret < 0) {
>>>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
>>>>>>>>>  				return 0;
>>>>>>>>> +			} else { /* already registered */
>>>>>>>>> +				/* update pt_driver */
>>>>>>>>> +				dev2->pt_driver = dev->pt_driver;
>>>>>>> Hi Tetsuya,
>>>>>>>
>>>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in some scenarios.
>>>>>>> The following line should be added here:
>>>>>>>       dev2->max_vfs = dev->max_vfs;
>>>>>>>
>>>>>>> numa_mode should probably be updated too (although it is not causing a problem at present).
>>>>>>>       dev2->numa_mode = dev->numa_mode;
>>>>>> I'm very curious, why those field miss? I haven't see any places clear
>>>>>> this field.
>>>>>>
>>>>>> What is the root cause?
>>>>> Hi Michael,
>>>>>
>>>>> Here is my guess.
>>>>> The above function creates pci device list.
>>>> I am sorry. I forgot to add below information.
>>>>
>>>> "max_vfs" or "numa_node" value is came from sysfs when the above
>>>> function is processed.
>>> Yes, but it has already been registered, why it missed?
>> Yes, it has been registered already, but probably should be updated.
>> I guess sysfs value will be changed when igb_uio starts managing the device.
>>
>> ex)
>> 1. Boot linux
>> 2. start a dpdk application with no port.
>> 3. pci device list is registered.
>>  - Here, "max_vfs" is came from sysfs. Or there is no such a entry.
>> 4. igb_uio binds the device.
>> 5.  I guess max_vfs value of sysfs is changed. Or max_vfs entry is created.
>> 6. The dpdk application calls hotplug function.
> Yes, agree.
>
> But numa node can be changed?

Hi Michael,

I may misunderstand meaning of numa_node.
I thought it indicated which numa node was nearest from the pci device,
so it could not be configurable.
BTW, I will be out of office tomorrow. So, I will submit v8 patches next
Monday.

Thanks,
Tetsuya

>
> Bernard, does your issue occur after max_vfs changed in igb_uio?
>
> If not, I think must be figure out the reason.
>
> Thanks,
> Michael
>>  - Here, I guess we need to update "max_vfs" value.
>>
>> Above is a just my assumption.
>> It may be good to wait for Bernard's reply.
>>
>> Thanks,
>> Tetsuya
>>
>>> Thanks,
>>> Michael
>>>>> And current DPDK implementation assumes all devices needed to be managed
>>>>> are under igb_uio or vfio when above code is processed.
>>>>> To add hotplug function, we also need to think some devices will start
>>>>> to be managed under igb_uio or vfio after initializing pci device list.
>>>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or vfio
>>>>> manages the device.
>>>>>
>>>>> Hi Bernard,
>>>>>
>>>>> Could you please check "max_vfs" and "num_node" values, then check the
>>>>> values again after the device is managed by igb_uio or vfio?
>>>>> In my environment, it seems max_vfs is created by igb_uio.
>>>>> But my NIC doesn't have VF, so behavior might be different in your
>>>>> environment.
>>>>> I guess "numa_node" should not be changed theoretically.
>>>>>
>>>>> If my guess is correct, how about replacing following values?
>>>>> - driver
>>>>> - max_vfs
>>>>> - resource
>>>>> - (numa_node)
>>>>> Except for above value, I guess other value shouldn't be changed even
>>>>> after the device is managed by igb_uio or vfio.
>>>>>
>>>>> Thanks,
>>>>> Tetsuya
>>>>>
>>>>>> Thanks,
>>>>>> Michael
>>>>>>
>>>>>>> Regards,
>>>>>>>
>>>>>>> Bernard.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>>> +				free(dev);
>>>>>>>>> +				return 0;
>>>>>>>>>  			}
>>>>>>>>>  		}
>>>>>>>>>  		TAILQ_INSERT_TAIL(&pci_device_list, dev, next); diff --git
>>>>>>>>> a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> index e53f06b..1da3507 100644
>>>>>>>>> --- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
>>>>>>>>> @@ -123,7 +123,7 @@ pci_uio_map_secondary(struct rte_pci_device *dev)
>>>>>>>>>  	TAILQ_FOREACH(uio_res, pci_res_list, next) {
>>>>>>>>>
>>>>>>>>>  		/* skip this element if it doesn't match our PCI address */
>>>>>>>>> -		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
>>>>>>>>> +		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
>>>>>>>>>  			continue;
>>>>>>>>>
>>>>>>>>>  		for (i = 0; i != uio_res->nb_maps; i++) {
>>>>>>>> Acked-by: Michael Qiu <michael.qiu@intel.com>
>>
  
Iremonger, Bernard Feb. 12, 2015, 1:55 p.m. UTC | #11
> >>>>>>>>>  /* Scan one pci sysfs entry, and fill the devices list from
> >>>>>>>>> it. */ static int  pci_scan_one(const char *dirname, uint16_t
> >>>>>>>>> domain, uint8_t bus, @@ -353,13 +339,20 @@ pci_scan_one(const
> >>>>>>>>> char *dirname, uint16_t domain, uint8_t bus,
> >>>>>>>>>  	}
> >>>>>>>>>  	else {
> >>>>>>>>>  		struct rte_pci_device *dev2 = NULL;
> >>>>>>>>> +		int ret;
> >>>>>>>>>
> >>>>>>>>>  		TAILQ_FOREACH(dev2, &pci_device_list, next) {
> >>>>>>>>> -			if (pci_addr_comparison(&dev->addr, &dev2->addr))
> >>>>>>>>> +			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
> >>>>>>>>> +			if (ret > 0)
> >>>>>>>>>  				continue;
> >>>>>>>>> -			else {
> >>>>>>>>> +			else if (ret < 0) {
> >>>>>>>>>  				TAILQ_INSERT_BEFORE(dev2, dev, next);
> >>>>>>>>>  				return 0;
> >>>>>>>>> +			} else { /* already registered */
> >>>>>>>>> +				/* update pt_driver */
> >>>>>>>>> +				dev2->pt_driver = dev->pt_driver;
> >>>>>>> Hi Tetsuya,
> >>>>>>>
> >>>>>>> I am seeing a problem with the librte_pmd_ixgbe code where dev->max_vfs is being lost in
> some scenarios.
> >>>>>>> The following line should be added here:
> >>>>>>>       dev2->max_vfs = dev->max_vfs;
> >>>>>>>
> >>>>>>> numa_mode should probably be updated too (although it is not causing a problem at
> present).
> >>>>>>>       dev2->numa_mode = dev->numa_mode;

Hi Tetsuya, Michael,

The "already registered path" in the code is being executed.

dev->numa_mode does not change so the above line is not needed.

dev2->max_vfs has a value of 0  and dev->max_vfs has a value  of 2 ( 2 VF's have been configured previously).

dev2->mem_resource is different from dev->mem_resource so the following line needs to be added:

memmove(dev2->mem_resource, dev->mem_resource, sizeof(dev->mem_resource));

More replies below.
 
> >>>>>> I'm very curious, why those field miss? I haven't see any places
> >>>>>> clear this field.
> >>>>>>
> >>>>>> What is the root cause?

The max_vfs entry is created when igb_uio binds the device.
dev2  is the device already in the pci_device_list  so dev2->max_vfs  is 0.
dev is the new device being parsed, in this case dev->max_vfs is 2.
So the dev2->max_vfs  value needs to be updated.
Similarly  dev2-> pt_driver needs to be updated as its value is 0 (this is already done).
Similarly dev2->mem_resource needs to be updated as it is different from dev->mem_resource. 


> >>>>> Hi Michael,
> >>>>>
> >>>>> Here is my guess.
> >>>>> The above function creates pci device list.
> >>>> I am sorry. I forgot to add below information.
> >>>>
> >>>> "max_vfs" or "numa_node" value is came from sysfs when the above
> >>>> function is processed.
> >>> Yes, but it has already been registered, why it missed?
> >> Yes, it has been registered already, but probably should be updated.
> >> I guess sysfs value will be changed when igb_uio starts managing the device.
> >>
> >> ex)
> >> 1. Boot linux
> >> 2. start a dpdk application with no port.
> >> 3. pci device list is registered.
> >>  - Here, "max_vfs" is came from sysfs. Or there is no such a entry.
> >> 4. igb_uio binds the device.
> >> 5.  I guess max_vfs value of sysfs is changed. Or max_vfs entry is created.
> >> 6. The dpdk application calls hotplug function.
> > Yes, agree.
> >
> > But numa node can be changed?
> 
> Hi Michael,
> 
> I may misunderstand meaning of numa_node.
> I thought it indicated which numa node was nearest from the pci device, so it could not be
> configurable.
> BTW, I will be out of office tomorrow. So, I will submit v8 patches next Monday.
> 
> Thanks,
> Tetsuya
> 
> >
> > Bernard, does your issue occur after max_vfs changed in igb_uio?
> >
> > If not, I think must be figure out the reason.
> >
> > Thanks,
> > Michael
> >>  - Here, I guess we need to update "max_vfs" value.
> >>
> >> Above is a just my assumption.
> >> It may be good to wait for Bernard's reply.
> >>
> >> Thanks,
> >> Tetsuya
> >>
> >>> Thanks,
> >>> Michael
> >>>>> And current DPDK implementation assumes all devices needed to be
> >>>>> managed are under igb_uio or vfio when above code is processed.
> >>>>> To add hotplug function, we also need to think some devices will
> >>>>> start to be managed under igb_uio or vfio after initializing pci device list.
> >>>>> Anyway, I guess "max_vfs" value will be changed when igb_uio or
> >>>>> vfio manages the device.
> >>>>>
> >>>>> Hi Bernard,
> >>>>>
> >>>>> Could you please check "max_vfs" and "num_node" values, then check
> >>>>> the values again after the device is managed by igb_uio or vfio?
> >>>>> In my environment, it seems max_vfs is created by igb_uio.

Yes, max_vfs is created by igb_uio

> >>>>> But my NIC doesn't have VF, so behavior might be different in your
> >>>>> environment.
> >>>>> I guess "numa_node" should not be changed theoretically.
> >>>>>
> >>>>> If my guess is correct, how about replacing following values?
> >>>>> - driver

Agreed 

> >>>>> - max_vfs

Agreed

> >>>>> - resource

Agreed

> >>>>> - (numa_node)

Not necessary, as it does not change.


> >>>>> Except for above value, I guess other value shouldn't be changed
> >>>>> even after the device is managed by igb_uio or vfio.
> >>>>>
> >>>>> Thanks,
> >>>>> Tetsuya
> >>>>>
> >>>>>> Thanks,
> >>>>>> Michael
> >>>>>>

The scenario I am using is as follows:

Terminal1: rmmod ixgbe
Terminal1: rmmod ixgbevf
Terminal1:  rmmod igb_uio 

Terminal2: ./testpmd -c f -n 1 -d ../../librte_pmd_null.so -- -i --no-flush-rx

Terminal1:  insmod ./igb_uio.ko

Terminal3: ./dpdk_nic_bind.py -b igb_uio 0000:05:00.0

Terminal2: testpmd> port attach 0000:05:00.0

Regards,

Bernard.
  

Patch

diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c
index 74ecce7..c844d58 100644
--- a/lib/librte_eal/bsdapp/eal/eal_pci.c
+++ b/lib/librte_eal/bsdapp/eal/eal_pci.c
@@ -270,20 +270,6 @@  pci_uio_map_resource(struct rte_pci_device *dev)
 	return (0);
 }
 
-/* Compare two PCI device addresses. */
-static int
-pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
-{
-	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr->function;
-	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) + addr2->function;
-
-	if (dev_addr > dev_addr2)
-		return 1;
-	else
-		return 0;
-}
-
-
 /* Scan one pci sysfs entry, and fill the devices list from it. */
 static int
 pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
@@ -356,13 +342,20 @@  pci_scan_one(int dev_pci_fd, struct pci_conf *conf)
 	}
 	else {
 		struct rte_pci_device *dev2 = NULL;
+		int ret;
 
 		TAILQ_FOREACH(dev2, &pci_device_list, next) {
-			if (pci_addr_comparison(&dev->addr, &dev2->addr))
+			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
+			if (ret > 0)
 				continue;
-			else {
+			else if (ret < 0) {
 				TAILQ_INSERT_BEFORE(dev2, dev, next);
 				return 0;
+			} else { /* already registered */
+				/* update pt_driver */
+				dev2->pt_driver = dev->pt_driver;
+				free(dev);
+				return 0;
 			}
 		}
 		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c
index f3c7f71..a89f5c3 100644
--- a/lib/librte_eal/common/eal_common_pci.c
+++ b/lib/librte_eal/common/eal_common_pci.c
@@ -93,7 +93,7 @@  static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev)
 		if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI &&
 			devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)
 			continue;
-		if (!memcmp(&dev->addr, &devargs->pci.addr, sizeof(dev->addr)))
+		if (!eal_compare_pci_addr(&dev->addr, &devargs->pci.addr))
 			return devargs;
 	}
 	return NULL;
diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h
index 7f2d699..4814cd7 100644
--- a/lib/librte_eal/common/include/rte_pci.h
+++ b/lib/librte_eal/common/include/rte_pci.h
@@ -269,6 +269,40 @@  eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr)
 }
 #undef GET_PCIADDR_FIELD
 
+/* Compare two PCI device addresses. */
+/**
+ * Utility function to compare two PCI device addresses.
+ *
+ * @param addr
+ *	The PCI Bus-Device-Function address to compare
+ * @param addr2
+ *	The PCI Bus-Device-Function address to compare
+ * @return
+ *	0 on equal PCI address.
+ *	Positive on addr is greater than addr2.
+ *	Negative on addr is less than addr2, or error.
+ */
+static inline int
+eal_compare_pci_addr(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
+{
+	uint64_t dev_addr, dev_addr2;
+
+	if ((addr == NULL) || (addr2 == NULL))
+		return -1;
+
+	dev_addr = (addr->domain << 24) | (addr->bus << 16) |
+				(addr->devid << 8) | addr->function;
+	dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) |
+				(addr2->devid << 8) | addr2->function;
+
+	if (dev_addr > dev_addr2)
+		return 1;
+	else if (dev_addr < dev_addr2)
+		return -1;
+	else
+		return 0;
+}
+
 /**
  * Probe the PCI bus for registered drivers.
  *
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c
index c0ca5a5..d847102 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci.c
@@ -229,20 +229,6 @@  error:
 	return -1;
 }
 
-/* Compare two PCI device addresses. */
-static int
-pci_addr_comparison(struct rte_pci_addr *addr, struct rte_pci_addr *addr2)
-{
-	uint64_t dev_addr = (addr->domain << 24) + (addr->bus << 16) + (addr->devid << 8) + addr->function;
-	uint64_t dev_addr2 = (addr2->domain << 24) + (addr2->bus << 16) + (addr2->devid << 8) + addr2->function;
-
-	if (dev_addr > dev_addr2)
-		return 1;
-	else
-		return 0;
-}
-
-
 /* Scan one pci sysfs entry, and fill the devices list from it. */
 static int
 pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
@@ -353,13 +339,20 @@  pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus,
 	}
 	else {
 		struct rte_pci_device *dev2 = NULL;
+		int ret;
 
 		TAILQ_FOREACH(dev2, &pci_device_list, next) {
-			if (pci_addr_comparison(&dev->addr, &dev2->addr))
+			ret = eal_compare_pci_addr(&dev->addr, &dev2->addr);
+			if (ret > 0)
 				continue;
-			else {
+			else if (ret < 0) {
 				TAILQ_INSERT_BEFORE(dev2, dev, next);
 				return 0;
+			} else { /* already registered */
+				/* update pt_driver */
+				dev2->pt_driver = dev->pt_driver;
+				free(dev);
+				return 0;
 			}
 		}
 		TAILQ_INSERT_TAIL(&pci_device_list, dev, next);
diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
index e53f06b..1da3507 100644
--- a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
+++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c
@@ -123,7 +123,7 @@  pci_uio_map_secondary(struct rte_pci_device *dev)
 	TAILQ_FOREACH(uio_res, pci_res_list, next) {
 
 		/* skip this element if it doesn't match our PCI address */
-		if (memcmp(&uio_res->pci_addr, &dev->addr, sizeof(dev->addr)))
+		if (eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr))
 			continue;
 
 		for (i = 0; i != uio_res->nb_maps; i++) {