[dpdk-dev,v3,06/15] eal/soc: implement probing of drivers

Message ID 1473410639-10367-7-git-send-email-shreyansh.jain@nxp.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Shreyansh Jain Sept. 9, 2016, 8:43 a.m. UTC
  Each SoC PMD registers a set of callback for scanning its own bus/infra and
matching devices to drivers when probe is called.
This patch introduces the infra for calls to SoC scan on rte_eal_soc_init()
and match on rte_eal_soc_probe().

Patch also adds test case for scan and probe.

Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
---
 app/test/test_soc.c                             | 138 ++++++++++++++-
 lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   4 +
 lib/librte_eal/common/eal_common_soc.c          | 215 ++++++++++++++++++++++++
 lib/librte_eal/common/include/rte_soc.h         |  51 ++++++
 lib/librte_eal/linuxapp/eal/eal.c               |   5 +
 lib/librte_eal/linuxapp/eal/eal_soc.c           |  16 ++
 lib/librte_eal/linuxapp/eal/rte_eal_version.map |   4 +
 7 files changed, 432 insertions(+), 1 deletion(-)
  

Comments

Jan Viktorin Sept. 16, 2016, 12:27 p.m. UTC | #1
On Fri, 9 Sep 2016 14:13:50 +0530
Shreyansh Jain <shreyansh.jain@nxp.com> wrote:

> Each SoC PMD registers a set of callback for scanning its own bus/infra and
> matching devices to drivers when probe is called.
> This patch introduces the infra for calls to SoC scan on rte_eal_soc_init()
> and match on rte_eal_soc_probe().
> 
> Patch also adds test case for scan and probe.
> 
> Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
> Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
> ---
>  app/test/test_soc.c                             | 138 ++++++++++++++-
>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   4 +
>  lib/librte_eal/common/eal_common_soc.c          | 215 ++++++++++++++++++++++++
>  lib/librte_eal/common/include/rte_soc.h         |  51 ++++++
>  lib/librte_eal/linuxapp/eal/eal.c               |   5 +
>  lib/librte_eal/linuxapp/eal/eal_soc.c           |  16 ++
>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   4 +
>  7 files changed, 432 insertions(+), 1 deletion(-)
> 
> diff --git a/app/test/test_soc.c b/app/test/test_soc.c
> index ac03e64..d2b9462 100644
> --- a/app/test/test_soc.c
> +++ b/app/test/test_soc.c
> @@ -87,14 +87,45 @@ static int test_compare_addr(void)
>   */
>  struct test_wrapper {
>  	struct rte_soc_driver soc_drv;
> +	struct rte_soc_device soc_dev;
>  };
>  
> +static int empty_pmd0_devinit(struct rte_soc_driver *drv,
> +			      struct rte_soc_device *dev);
> +static int empty_pmd0_devuninit(struct rte_soc_device *dev);

I prefer an empty line here.


What is the prupose of the scan here? What device does it provide
to the test? I'd prefer to call it e.g. "allways_find_device0" or
something describing the purpose and explaining what is the goal
of the related test.

Probably a comment explaining "provide a device named 'empty_pmd0_dev'
would be helpful.

> +static void test_soc_scan_dev0_cb(void);

Similar here, something like "match_by_name".

> +static int test_soc_match_dev0_cb(struct rte_soc_driver *drv,
> +				  struct rte_soc_device *dev);

I prefer an empty line here.


ditto...

> +static void test_soc_scan_dev1_cb(void);

ditto...

> +static int test_soc_match_dev1_cb(struct rte_soc_driver *drv,
> +				  struct rte_soc_device *dev);
> +
> +static int
> +empty_pmd0_devinit(struct rte_soc_driver *drv __rte_unused,
> +		   struct rte_soc_device *dev __rte_unused)
> +{
> +	return 0;
> +}
> +
> +static int
> +empty_pmd0_devuninit(struct rte_soc_device *dev)
> +{
> +	/* Release the memory associated with dev->addr.name */
> +	free(dev->addr.name);
> +
> +	return 0;
> +}
> +
>  struct test_wrapper empty_pmd0 = {
>  	.soc_drv = {
>  		.driver = {
>  			.name = "empty_pmd0"
>  		},
> -	},
> +		.devinit = empty_pmd0_devinit,
> +		.devuninit = empty_pmd0_devuninit,
> +		.scan_fn = test_soc_scan_dev0_cb,
> +		.match_fn = test_soc_match_dev0_cb,
> +	}
>  };
>  
>  struct test_wrapper empty_pmd1 = {
> @@ -102,9 +133,54 @@ struct test_wrapper empty_pmd1 = {
>  		.driver = {
>  			.name = "empty_pmd1"
>  		},
> +		.scan_fn = test_soc_scan_dev1_cb,
> +		.match_fn = test_soc_match_dev1_cb,
>  	},
>  };
>  
> +static void
> +test_soc_scan_dev0_cb(void)
> +{
> +	/* SoC's scan would scan devices on its bus and add to
> +	 * soc_device_list
> +	 */
> +	empty_pmd0.soc_dev.addr.name = strdup("empty_pmd0_dev");
> +
> +	TAILQ_INSERT_TAIL(&soc_device_list, &empty_pmd0.soc_dev, next);
> +}
> +
> +static int
> +test_soc_match_dev0_cb(struct rte_soc_driver *drv __rte_unused,
> +		       struct rte_soc_device *dev)
> +{
> +	if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd0_dev"))
> +		return 0;
> +
> +	return 1;
> +}
> +
> +
> +static void
> +test_soc_scan_dev1_cb(void)
> +{
> +	/* SoC's scan would scan devices on its bus and add to
> +	 * soc_device_list
> +	 */
> +	empty_pmd0.soc_dev.addr.name = strdup("empty_pmd1_dev");
> +
> +	TAILQ_INSERT_TAIL(&soc_device_list, &empty_pmd1.soc_dev, next);
> +}
> +
> +static int
> +test_soc_match_dev1_cb(struct rte_soc_driver *drv __rte_unused,
> +		       struct rte_soc_device *dev)
> +{
> +	if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd1_dev"))
> +		return 0;
> +
> +	return 1;
> +}
> +
>  static int
>  count_registered_socdrvs(void)
>  {
> @@ -148,13 +224,54 @@ test_register_unregister(void)
>  	return 0;
>  }
>  
> +/* Test Probe (scan and match) functionality */
> +static int
> +test_soc_init_and_probe(void)

You say to test scan and match. I'd prefer to reflect this in the name
of the test. Otherwise, it seems you are testing init and probe which
is not true, I think.

Do you test that "match principle works" or that "match functions are OK"
or "match functions are called as expected", ...?

> +{
> +	struct rte_soc_driver *drv;
> +
> +	/* Registering dummy drivers */
> +	rte_eal_soc_register(&empty_pmd0.soc_drv);
> +	rte_eal_soc_register(&empty_pmd1.soc_drv);
> +	/* Assuming that test_register_unregister is working, not verifying
> +	 * that drivers are indeed registered
> +	*/
> +
> +	/* rte_eal_soc_init is called by rte_eal_init, which in turn calls the
> +	 * scan_fn of each driver.
> +	 */
> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
> +		if (drv && drv->scan_fn)
> +			drv->scan_fn();
> +	}

Here, I suppose you mimic the rte_eal_soc_init?

> +
> +	/* rte_eal_init() would perform other inits here */
> +
> +	/* Probe would link the SoC devices<=>drivers */
> +	rte_eal_soc_probe();
> +
> +	/* Unregistering dummy drivers */
> +	rte_eal_soc_unregister(&empty_pmd0.soc_drv);
> +	rte_eal_soc_unregister(&empty_pmd1.soc_drv);
> +
> +	free(empty_pmd0.soc_dev.addr.name);
> +
> +	printf("%s has been successful\n", __func__);

How you detect it is unsuccessful? Is it possible to fail in this test?
A test that can never fail is in fact not a test :).

> +	return 0;
> +}
> +
>  /* save real devices and drivers until the tests finishes */
>  struct soc_driver_list real_soc_driver_list =
>  	TAILQ_HEAD_INITIALIZER(real_soc_driver_list);
>  
> +/* save real devices and drivers until the tests finishes */
> +struct soc_device_list real_soc_device_list =
> +	TAILQ_HEAD_INITIALIZER(real_soc_device_list);
> +
>  static int test_soc_setup(void)
>  {
>  	struct rte_soc_driver *drv;
> +	struct rte_soc_device *dev;
>  
>  	/* no real drivers for the test */
>  	while (!TAILQ_EMPTY(&soc_driver_list)) {
> @@ -163,12 +280,20 @@ static int test_soc_setup(void)
>  		TAILQ_INSERT_TAIL(&real_soc_driver_list, drv, next);
>  	}
>  
> +	/* And, no real devices for the test */
> +	while (!TAILQ_EMPTY(&soc_device_list)) {
> +		dev = TAILQ_FIRST(&soc_device_list);
> +		TAILQ_REMOVE(&soc_device_list, dev, next);
> +		TAILQ_INSERT_TAIL(&real_soc_device_list, dev, next);
> +	}
> +
>  	return 0;
>  }
>  
>  static int test_soc_cleanup(void)
>  {
>  	struct rte_soc_driver *drv;
> +	struct rte_soc_device *dev;
>  
>  	/* bring back real drivers after the test */
>  	while (!TAILQ_EMPTY(&real_soc_driver_list)) {
> @@ -177,6 +302,13 @@ static int test_soc_cleanup(void)
>  		rte_eal_soc_register(drv);
>  	}
>  
> +	/* And, bring back real devices after the test */
> +	while (!TAILQ_EMPTY(&real_soc_device_list)) {
> +		dev = TAILQ_FIRST(&real_soc_device_list);
> +		TAILQ_REMOVE(&real_soc_device_list, dev, next);
> +		TAILQ_INSERT_TAIL(&soc_device_list, dev, next);
> +	}
> +
>  	return 0;
>  }
>  
> @@ -192,6 +324,10 @@ test_soc(void)
>  	if (test_register_unregister())
>  		return -1;
>  
> +	/* Assuming test_register_unregister has succeeded */
> +	if (test_soc_init_and_probe())
> +		return -1;
> +
>  	if (test_soc_cleanup())
>  		return -1;
>  
> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> index de38848..3c407be 100644
> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
> @@ -173,5 +173,9 @@ DPDK_16.11 {
>  	rte_eal_soc_register;
>  	rte_eal_soc_unregister;
>  	rte_eal_soc_dump;
> +	rte_eal_soc_match;
> +	rte_eal_soc_detach;
> +	rte_eal_soc_probe;
> +	rte_eal_soc_probe_one;
>  
>  } DPDK_16.07;
> diff --git a/lib/librte_eal/common/eal_common_soc.c b/lib/librte_eal/common/eal_common_soc.c
> index 5dcddc5..bb87a67 100644
> --- a/lib/librte_eal/common/eal_common_soc.c
> +++ b/lib/librte_eal/common/eal_common_soc.c
> @@ -36,6 +36,8 @@
>  #include <sys/queue.h>
>  
>  #include <rte_log.h>
> +#include <rte_common.h>
> +#include <rte_soc.h>
>  
>  #include "eal_private.h"
>  
> @@ -45,6 +47,213 @@ struct soc_driver_list soc_driver_list =
>  struct soc_device_list soc_device_list =
>  	TAILQ_HEAD_INITIALIZER(soc_device_list);
>  
> +/* Default SoC device<->Driver match handler function */

I think this comment is redundant. All this is already said in the rte_soc.h.

> +int
> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev)
> +{
> +	int i, j;
> +
> +	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
> +	RTE_VERIFY(dev != NULL && dev->id != NULL);
> +
> +	for (i = 0; drv->id_table[i].compatible; ++i) {
> +		const char *drv_compat = drv->id_table[i].compatible;
> +
> +		for (j = 0; dev->id[j].compatible; ++j) {
> +			const char *dev_compat = dev->id[j].compatible;
> +
> +			if (!strcmp(drv_compat, dev_compat))
> +				return 0;
> +		}
> +	}
> +
> +	return 1;
> +}
> +
> +
> +static int
> +rte_eal_soc_probe_one_driver(struct rte_soc_driver *drv,
> +			     struct rte_soc_device *dev)
> +{
> +	int ret = 1;
> +

I think, the RTE_VERIFY(dev->match_fn) might be good here.
It avoids any doubts about the validity of the pointer.

> +	ret = drv->match_fn(drv, dev);
> +	if (ret) {
> +		RTE_LOG(DEBUG, EAL,
> +			" match function failed, skipping\n");

Is this a failure? I think it is not. Failure would be if the match
function cannot execute correctly. This is more like "no-match".

When debugging, I'd like to see more a message like "driver <name> does not match".

> +		return ret;
> +	}
> +
> +	dev->driver = drv;
> +	RTE_VERIFY(drv->devinit != NULL);
> +	return drv->devinit(drv, dev);
> +}
> +
> +static int
> +soc_probe_all_drivers(struct rte_soc_device *dev)
> +{
> +	struct rte_soc_driver *drv = NULL;
> +	int rc = 0;
> +
> +	if (dev == NULL)
> +		return -1;
> +
> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
> +		rc = rte_eal_soc_probe_one_driver(drv, dev);
> +		if (rc < 0)
> +			/* negative value is an error */
> +			return -1;
> +		if (rc > 0)
> +			/* positive value means driver doesn't support it */
> +			continue;
> +		return 0;
> +	}
> +	return 1;
> +}
> +
> +/* If the IDs match, call the devuninit() function of the driver. */

Again, I think this comment is redudant. I'd leave it if it explains some
implementation-specific detail but it does not seem to...

> +static int
> +rte_eal_soc_detach_dev(struct rte_soc_driver *drv,
> +		       struct rte_soc_device *dev)
> +{
> +	int ret;
> +
> +	if ((drv == NULL) || (dev == NULL))
> +		return -EINVAL;
> +
> +	ret = drv->match_fn(drv, dev);
> +	if (ret) {
> +		RTE_LOG(DEBUG, EAL,
> +			" match function failed, skipping\n");

When debugging, I'd like to see more "driver <name> does not match".

> +		return ret;
> +	}
> +
> +	RTE_LOG(DEBUG, EAL, "SoC device %s\n",
> +		dev->addr.name);
> +
> +	RTE_LOG(DEBUG, EAL, "  remove driver: %s\n", drv->driver.name);
> +
> +	if (drv->devuninit && (drv->devuninit(dev) < 0))
> +		return -1;	/* negative value is an error */
> +
> +	/* clear driver structure */
> +	dev->driver = NULL;
> +
> +	return 0;
> +}
> +
> +/*
> + * Call the devuninit() function of all registered drivers for the given
> + * device if their IDs match.

I think, the "IDs match" is obsolete becase the match_fn may work in a different way now.

> + *
> + * @return
> + *       0 when successful
> + *      -1 if deinitialization fails
> + *       1 if no driver is found for this device.
> + */
> +static int
> +soc_detach_all_drivers(struct rte_soc_device *dev)
> +{
> +	struct rte_soc_driver *dr = NULL;
> +	int rc = 0;
> +
> +	if (dev == NULL)
> +		return -1;
> +
> +	TAILQ_FOREACH(dr, &soc_driver_list, next) {
> +		rc = rte_eal_soc_detach_dev(dr, dev);
> +		if (rc < 0)
> +			/* negative value is an error */
> +			return -1;
> +		if (rc > 0)
> +			/* positive value means driver doesn't support it */
> +			continue;
> +		return 0;
> +	}
> +	return 1;
> +}
> +
> +/*
> + * Detach device specified by its SoC address.
> + */
> +int
> +rte_eal_soc_detach(const struct rte_soc_addr *addr)
> +{
> +	struct rte_soc_device *dev = NULL;
> +	int ret = 0;
> +
> +	if (addr == NULL)
> +		return -1;
> +
> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
> +		if (rte_eal_compare_soc_addr(&dev->addr, addr))
> +			continue;
> +
> +		ret = soc_detach_all_drivers(dev);
> +		if (ret < 0)
> +			goto err_return;
> +
> +		TAILQ_REMOVE(&soc_device_list, dev, next);
> +		return 0;
> +	}
> +	return -1;
> +
> +err_return:
> +	RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
> +		dev->addr.name);
> +	return -1;
> +}
> +
> +int
> +rte_eal_soc_probe_one(const struct rte_soc_addr *addr)
> +{
> +	struct rte_soc_device *dev = NULL;
> +	int ret = 0;
> +
> +	if (addr == NULL)
> +		return -1;
> +
> +	/* unlike pci, in case of soc, it the responsibility of the soc driver
> +	 * to check during init whether device has been updated since last add.

Why? Can you give a more detailed explanation?

> +	 */
> +
> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
> +		if (rte_eal_compare_soc_addr(&dev->addr, addr))
> +			continue;
> +
> +		ret = soc_probe_all_drivers(dev);
> +		if (ret < 0)
> +			goto err_return;
> +		return 0;
> +	}
> +	return -1;
> +
> +err_return:
> +	RTE_LOG(WARNING, EAL,
> +		"Requested device %s cannot be used\n", addr->name);
> +	return -1;
> +}
> +
> +/*
> + * Scan the SoC devices and call the devinit() function for all registered
> + * drivers that have a matching entry in its id_table for discovered devices.
> + */

Should be in header. Here it is redundant.

> +int
> +rte_eal_soc_probe(void)
> +{
> +	struct rte_soc_device *dev = NULL;
> +	int ret = 0;
> +
> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
> +		ret = soc_probe_all_drivers(dev);
> +		if (ret < 0)
> +			rte_exit(EXIT_FAILURE, "Requested device %s"
> +				 " cannot be used\n", dev->addr.name);
> +	}
> +
> +	return 0;
> +}
> +
>  /* dump one device */
>  static int
>  soc_dump_one_device(FILE *f, struct rte_soc_device *dev)
> @@ -79,6 +288,12 @@ rte_eal_soc_dump(FILE *f)
>  void
>  rte_eal_soc_register(struct rte_soc_driver *driver)
>  {
> +	/* For a valid soc driver, match and scan function
> +	 * should be provided.
> +	 */

This comment should be in the header file.

> +	RTE_VERIFY(driver != NULL);
> +	RTE_VERIFY(driver->match_fn != NULL);
> +	RTE_VERIFY(driver->scan_fn != NULL);
>  	TAILQ_INSERT_TAIL(&soc_driver_list, driver, next);
>  }
>  
> diff --git a/lib/librte_eal/common/include/rte_soc.h b/lib/librte_eal/common/include/rte_soc.h
> index c6f98eb..bfb49a2 100644
> --- a/lib/librte_eal/common/include/rte_soc.h
> +++ b/lib/librte_eal/common/include/rte_soc.h
> @@ -97,6 +97,16 @@ typedef int (soc_devinit_t)(struct rte_soc_driver *, struct rte_soc_device *);
>  typedef int (soc_devuninit_t)(struct rte_soc_device *);
>  
>  /**
> + * SoC device scan callback, called from rte_eal_soc_init.

Can you explain what is the goal of the callback?
What is the expected behaviour.

It returns void so it seems it can never fail. Is this correct?
I can image that to scan for devices, I need to check some file-system
structure which can be unavailable...

> + */
> +typedef void (soc_scan_t)(void);

You are missing the '*' in (*soc_scan_t).

> +
> +/**
> + * Custom device<=>driver match callback for SoC

Can you explain the semantics (return values), please?

> + */
> +typedef int (soc_match_t)(struct rte_soc_driver *, struct rte_soc_device *);

You are missing the '*' in (*soc_match_t).

> +
> +/**
>   * A structure describing a SoC driver.
>   */
>  struct rte_soc_driver {
> @@ -104,6 +114,8 @@ struct rte_soc_driver {
>  	struct rte_driver driver;          /**< Inherit core driver. */
>  	soc_devinit_t *devinit;            /**< Device initialization */
>  	soc_devuninit_t *devuninit;        /**< Device uninitialization */

Those should be renamed to probe/remove.

> +	soc_scan_t *scan_fn;               /**< Callback for scanning SoC bus*/
> +	soc_match_t *match_fn;             /**< Callback to match dev<->drv */

Here the '*' would be redundant if you add them to the typedefs.

I think, we should tell the users that scan_fn and match_fn must be always set
to something.

>  	const struct rte_soc_id *id_table; /**< ID table, NULL terminated */
>  };
>  
> @@ -146,6 +158,45 @@ rte_eal_compare_soc_addr(const struct rte_soc_addr *a0,
>  }
>  
>  /**
> + * Default function for matching the Soc driver with device. Each driver can
> + * either use this function or define their own soc matching function.
> + * This function relies on the compatible string extracted from sysfs. But,
> + * a SoC might have different way of identifying its devices. Such SoC can
> + * override match_fn.
> + *
> + * @return
> + * 	 0 on success
> + *	-1 when no match found
> +  */
> +int
> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev);

What about naming it

	rte_eal_soc_match_default

or maybe better

	rte_eal_soc_match_compatible

what do you think?

> +
> +/**
> + * Probe SoC devices for registered drivers.
> + */
> +int rte_eal_soc_probe(void);
> +
> +/**
> + * Probe the single SoC device.
> + */
> +int rte_eal_soc_probe_one(const struct rte_soc_addr *addr);
> +
> +/**
> + * Close the single SoC device.
> + *
> + * Scan the SoC devices and find the SoC device specified by the SoC
> + * address, then call the devuninit() function for registered driver
> + * that has a matching entry in its id_table for discovered device.
> + *
> + * @param addr
> + *	The SoC address to close.
> + * @return
> + *   - 0 on success.
> + *   - Negative on error.
> + */
> +int rte_eal_soc_detach(const struct rte_soc_addr *addr);
> +
> +/**
>   * Dump discovered SoC devices.
>   */
>  void rte_eal_soc_dump(FILE *f);
> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> index 15c8c3d..147b601 100644
> --- a/lib/librte_eal/linuxapp/eal/eal.c
> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> @@ -70,6 +70,7 @@
>  #include <rte_cpuflags.h>
>  #include <rte_interrupts.h>
>  #include <rte_pci.h>
> +#include <rte_soc.h>
>  #include <rte_dev.h>
>  #include <rte_devargs.h>
>  #include <rte_common.h>
> @@ -881,6 +882,10 @@ rte_eal_init(int argc, char **argv)
>  	if (rte_eal_pci_probe())
>  		rte_panic("Cannot probe PCI\n");
>  
> +	/* Probe & Initialize SoC devices */
> +	if (rte_eal_soc_probe())
> +		rte_panic("Cannot probe SoC\n");
> +
>  	rte_eal_mcfg_complete();
>  
>  	return fctret;
> diff --git a/lib/librte_eal/linuxapp/eal/eal_soc.c b/lib/librte_eal/linuxapp/eal/eal_soc.c
> index 04848b9..5f961c4 100644
> --- a/lib/librte_eal/linuxapp/eal/eal_soc.c
> +++ b/lib/librte_eal/linuxapp/eal/eal_soc.c
> @@ -52,5 +52,21 @@
>  int
>  rte_eal_soc_init(void)
>  {
> +	struct rte_soc_driver *drv;
> +
> +	/* for debug purposes, SoC can be disabled */
> +	if (internal_config.no_soc)
> +		return 0;
> +
> +	/* For each registered driver, call their scan routine to perform any
> +	 * custom scan for devices (for example, custom buses)
> +	 */
> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {

Is it possible to have drv->scan_fn == NULL? I suppose, this is invalid.
I'd prefer to have RTE_VERIFY for this check.

> +		if (drv && drv->scan_fn) {
> +			drv->scan_fn();
> +			/* Ignore all errors from this */
> +		}

> +	}
> +
>  	return 0;
>  }
> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> index b9d1932..adcfe7d 100644
> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> @@ -179,5 +179,9 @@ DPDK_16.11 {
>  	rte_eal_soc_register;
>  	rte_eal_soc_unregister;
>  	rte_eal_soc_dump;
> +	rte_eal_soc_match;
> +	rte_eal_soc_detach;
> +	rte_eal_soc_probe;
> +	rte_eal_soc_probe_one;
>  
>  } DPDK_16.07;

Regards
Jan
  
Shreyansh Jain Sept. 19, 2016, 6:47 a.m. UTC | #2
Hi Jan,

On Friday 16 September 2016 05:57 PM, Jan Viktorin wrote:
> On Fri, 9 Sep 2016 14:13:50 +0530
> Shreyansh Jain <shreyansh.jain@nxp.com> wrote:
>
>> Each SoC PMD registers a set of callback for scanning its own bus/infra and
>> matching devices to drivers when probe is called.
>> This patch introduces the infra for calls to SoC scan on rte_eal_soc_init()
>> and match on rte_eal_soc_probe().
>>
>> Patch also adds test case for scan and probe.
>>
>> Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
>> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
>> Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
>> ---
>>  app/test/test_soc.c                             | 138 ++++++++++++++-
>>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   4 +
>>  lib/librte_eal/common/eal_common_soc.c          | 215 ++++++++++++++++++++++++
>>  lib/librte_eal/common/include/rte_soc.h         |  51 ++++++
>>  lib/librte_eal/linuxapp/eal/eal.c               |   5 +
>>  lib/librte_eal/linuxapp/eal/eal_soc.c           |  16 ++
>>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   4 +
>>  7 files changed, 432 insertions(+), 1 deletion(-)
>>
>> diff --git a/app/test/test_soc.c b/app/test/test_soc.c
>> index ac03e64..d2b9462 100644
>> --- a/app/test/test_soc.c
>> +++ b/app/test/test_soc.c
>> @@ -87,14 +87,45 @@ static int test_compare_addr(void)
>>   */
>>  struct test_wrapper {
>>  	struct rte_soc_driver soc_drv;
>> +	struct rte_soc_device soc_dev;
>>  };
>>
>> +static int empty_pmd0_devinit(struct rte_soc_driver *drv,
>> +			      struct rte_soc_device *dev);
>> +static int empty_pmd0_devuninit(struct rte_soc_device *dev);
>
> I prefer an empty line here.

Ok. I will add that.

>
>
> What is the prupose of the scan here? What device does it provide
> to the test? I'd prefer to call it e.g. "allways_find_device0" or
> something describing the purpose and explaining what is the goal
> of the related test.

I understand what you are hinting at. Purpose of scan is obviously to 
'always add a device0'. I will update the code.

>
> Probably a comment explaining "provide a device named 'empty_pmd0_dev'
> would be helpful.

Ok.

>
>> +static void test_soc_scan_dev0_cb(void);
>
> Similar here, something like "match_by_name".
>
>> +static int test_soc_match_dev0_cb(struct rte_soc_driver *drv,
>> +				  struct rte_soc_device *dev);
>
> I prefer an empty line here.

Do we really place newlines in function declarations? That doesn't 
really help anything, until and unless some comments are added to those. 
Anyways, rather than added blank lines, I will add some comments - those 
are indeed misssing.

>
>
> ditto...

Will add comments.

>
>> +static void test_soc_scan_dev1_cb(void);
>
> ditto...

Same here, I prefer comment rather than blank line.

>
>> +static int test_soc_match_dev1_cb(struct rte_soc_driver *drv,
>> +				  struct rte_soc_device *dev);
>> +
>> +static int
>> +empty_pmd0_devinit(struct rte_soc_driver *drv __rte_unused,
>> +		   struct rte_soc_device *dev __rte_unused)
>> +{
>> +	return 0;
>> +}
>> +
>> +static int
>> +empty_pmd0_devuninit(struct rte_soc_device *dev)
>> +{
>> +	/* Release the memory associated with dev->addr.name */
>> +	free(dev->addr.name);
>> +
>> +	return 0;
>> +}
>> +
>>  struct test_wrapper empty_pmd0 = {
>>  	.soc_drv = {
>>  		.driver = {
>>  			.name = "empty_pmd0"
>>  		},
>> -	},
>> +		.devinit = empty_pmd0_devinit,
>> +		.devuninit = empty_pmd0_devuninit,
>> +		.scan_fn = test_soc_scan_dev0_cb,
>> +		.match_fn = test_soc_match_dev0_cb,
>> +	}
>>  };
>>
>>  struct test_wrapper empty_pmd1 = {
>> @@ -102,9 +133,54 @@ struct test_wrapper empty_pmd1 = {
>>  		.driver = {
>>  			.name = "empty_pmd1"
>>  		},
>> +		.scan_fn = test_soc_scan_dev1_cb,
>> +		.match_fn = test_soc_match_dev1_cb,
>>  	},
>>  };
>>
>> +static void
>> +test_soc_scan_dev0_cb(void)
>> +{
>> +	/* SoC's scan would scan devices on its bus and add to
>> +	 * soc_device_list
>> +	 */
>> +	empty_pmd0.soc_dev.addr.name = strdup("empty_pmd0_dev");
>> +
>> +	TAILQ_INSERT_TAIL(&soc_device_list, &empty_pmd0.soc_dev, next);
>> +}
>> +
>> +static int
>> +test_soc_match_dev0_cb(struct rte_soc_driver *drv __rte_unused,
>> +		       struct rte_soc_device *dev)
>> +{
>> +	if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd0_dev"))
>> +		return 0;
>> +
>> +	return 1;
>> +}
>> +
>> +
>> +static void
>> +test_soc_scan_dev1_cb(void)
>> +{
>> +	/* SoC's scan would scan devices on its bus and add to
>> +	 * soc_device_list
>> +	 */
>> +	empty_pmd0.soc_dev.addr.name = strdup("empty_pmd1_dev");
>> +
>> +	TAILQ_INSERT_TAIL(&soc_device_list, &empty_pmd1.soc_dev, next);
>> +}
>> +
>> +static int
>> +test_soc_match_dev1_cb(struct rte_soc_driver *drv __rte_unused,
>> +		       struct rte_soc_device *dev)
>> +{
>> +	if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd1_dev"))
>> +		return 0;
>> +
>> +	return 1;
>> +}
>> +
>>  static int
>>  count_registered_socdrvs(void)
>>  {
>> @@ -148,13 +224,54 @@ test_register_unregister(void)
>>  	return 0;
>>  }
>>
>> +/* Test Probe (scan and match) functionality */
>> +static int
>> +test_soc_init_and_probe(void)
>
> You say to test scan and match. I'd prefer to reflect this in the name
> of the test. Otherwise, it seems you are testing init and probe which
> is not true, I think.

I agree. I will update the name of the function.

>
> Do you test that "match principle works" or that "match functions are OK"
> or "match functions are called as expected", ...?

"match functions are called as expected"
The model for the patchset was to allow PMDs to write their own match 
and hence, verifying a particular match is not definitive. Rather, the 
test case simply confirms that a SoC based PMD would be able to 
implement its own match/scan and these would be called from EAL as expected.

>
>> +{
>> +	struct rte_soc_driver *drv;
>> +
>> +	/* Registering dummy drivers */
>> +	rte_eal_soc_register(&empty_pmd0.soc_drv);
>> +	rte_eal_soc_register(&empty_pmd1.soc_drv);
>> +	/* Assuming that test_register_unregister is working, not verifying
>> +	 * that drivers are indeed registered
>> +	*/
>> +
>> +	/* rte_eal_soc_init is called by rte_eal_init, which in turn calls the
>> +	 * scan_fn of each driver.
>> +	 */
>> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
>> +		if (drv && drv->scan_fn)
>> +			drv->scan_fn();
>> +	}
>
> Here, I suppose you mimic the rte_eal_soc_init?

Yes.

>
>> +
>> +	/* rte_eal_init() would perform other inits here */
>> +
>> +	/* Probe would link the SoC devices<=>drivers */
>> +	rte_eal_soc_probe();
>> +
>> +	/* Unregistering dummy drivers */
>> +	rte_eal_soc_unregister(&empty_pmd0.soc_drv);
>> +	rte_eal_soc_unregister(&empty_pmd1.soc_drv);
>> +
>> +	free(empty_pmd0.soc_dev.addr.name);
>> +
>> +	printf("%s has been successful\n", __func__);
>
> How you detect it is unsuccessful? Is it possible to fail in this test?
> A test that can never fail is in fact not a test :).

The design assumption for SoC patcheset was: A PMDs scan is called to 
find devices on its bus (PMD ~ bus). Whether devices are found or not, 
is irrelevant to EAL - whether that is because of error or actually no 
devices were available.
With the above logic, no 'success/failure' is checked in the test. It is 
simply a verification of EAL's ability to link the PMD with it 
(scan/match function pointers).

>
>> +	return 0;
>> +}
>> +
>>  /* save real devices and drivers until the tests finishes */
>>  struct soc_driver_list real_soc_driver_list =
>>  	TAILQ_HEAD_INITIALIZER(real_soc_driver_list);
>>
>> +/* save real devices and drivers until the tests finishes */
>> +struct soc_device_list real_soc_device_list =
>> +	TAILQ_HEAD_INITIALIZER(real_soc_device_list);
>> +
>>  static int test_soc_setup(void)
>>  {
>>  	struct rte_soc_driver *drv;
>> +	struct rte_soc_device *dev;
>>
>>  	/* no real drivers for the test */
>>  	while (!TAILQ_EMPTY(&soc_driver_list)) {
>> @@ -163,12 +280,20 @@ static int test_soc_setup(void)
>>  		TAILQ_INSERT_TAIL(&real_soc_driver_list, drv, next);
>>  	}
>>
>> +	/* And, no real devices for the test */
>> +	while (!TAILQ_EMPTY(&soc_device_list)) {
>> +		dev = TAILQ_FIRST(&soc_device_list);
>> +		TAILQ_REMOVE(&soc_device_list, dev, next);
>> +		TAILQ_INSERT_TAIL(&real_soc_device_list, dev, next);
>> +	}
>> +
>>  	return 0;
>>  }
>>
>>  static int test_soc_cleanup(void)
>>  {
>>  	struct rte_soc_driver *drv;
>> +	struct rte_soc_device *dev;
>>
>>  	/* bring back real drivers after the test */
>>  	while (!TAILQ_EMPTY(&real_soc_driver_list)) {
>> @@ -177,6 +302,13 @@ static int test_soc_cleanup(void)
>>  		rte_eal_soc_register(drv);
>>  	}
>>
>> +	/* And, bring back real devices after the test */
>> +	while (!TAILQ_EMPTY(&real_soc_device_list)) {
>> +		dev = TAILQ_FIRST(&real_soc_device_list);
>> +		TAILQ_REMOVE(&real_soc_device_list, dev, next);
>> +		TAILQ_INSERT_TAIL(&soc_device_list, dev, next);
>> +	}
>> +
>>  	return 0;
>>  }
>>
>> @@ -192,6 +324,10 @@ test_soc(void)
>>  	if (test_register_unregister())
>>  		return -1;
>>
>> +	/* Assuming test_register_unregister has succeeded */
>> +	if (test_soc_init_and_probe())
>> +		return -1;
>> +
>>  	if (test_soc_cleanup())
>>  		return -1;
>>
>> diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>> index de38848..3c407be 100644
>> --- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>> +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
>> @@ -173,5 +173,9 @@ DPDK_16.11 {
>>  	rte_eal_soc_register;
>>  	rte_eal_soc_unregister;
>>  	rte_eal_soc_dump;
>> +	rte_eal_soc_match;
>> +	rte_eal_soc_detach;
>> +	rte_eal_soc_probe;
>> +	rte_eal_soc_probe_one;
>>
>>  } DPDK_16.07;
>> diff --git a/lib/librte_eal/common/eal_common_soc.c b/lib/librte_eal/common/eal_common_soc.c
>> index 5dcddc5..bb87a67 100644
>> --- a/lib/librte_eal/common/eal_common_soc.c
>> +++ b/lib/librte_eal/common/eal_common_soc.c
>> @@ -36,6 +36,8 @@
>>  #include <sys/queue.h>
>>
>>  #include <rte_log.h>
>> +#include <rte_common.h>
>> +#include <rte_soc.h>
>>
>>  #include "eal_private.h"
>>
>> @@ -45,6 +47,213 @@ struct soc_driver_list soc_driver_list =
>>  struct soc_device_list soc_device_list =
>>  	TAILQ_HEAD_INITIALIZER(soc_device_list);
>>
>> +/* Default SoC device<->Driver match handler function */
>
> I think this comment is redundant. All this is already said in the rte_soc.h.

Ok. I will remove it from here and if need be, update the rte_soc.h to 
have elaborate comments.

>
>> +int
>> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev)
>> +{
>> +	int i, j;
>> +
>> +	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
>> +	RTE_VERIFY(dev != NULL && dev->id != NULL);
>> +
>> +	for (i = 0; drv->id_table[i].compatible; ++i) {
>> +		const char *drv_compat = drv->id_table[i].compatible;
>> +
>> +		for (j = 0; dev->id[j].compatible; ++j) {
>> +			const char *dev_compat = dev->id[j].compatible;
>> +
>> +			if (!strcmp(drv_compat, dev_compat))
>> +				return 0;
>> +		}
>> +	}
>> +
>> +	return 1;
>> +}
>> +
>> +
>> +static int
>> +rte_eal_soc_probe_one_driver(struct rte_soc_driver *drv,
>> +			     struct rte_soc_device *dev)
>> +{
>> +	int ret = 1;
>> +
>
> I think, the RTE_VERIFY(dev->match_fn) might be good here.
> It avoids any doubts about the validity of the pointer.

That has already been done in rte_eal_soc_register which is called when 
PMDs are registering themselves through DRIVER_REGISTER_SOC. That would 
prevent any PMD leaking through to this stage without a proper 
match_fn/scan_fn.

>
>> +	ret = drv->match_fn(drv, dev);
>> +	if (ret) {
>> +		RTE_LOG(DEBUG, EAL,
>> +			" match function failed, skipping\n");
>
> Is this a failure? I think it is not. Failure would be if the match
> function cannot execute correctly. This is more like "no-match".

The log message is misleading. This is _not_ a failure but simply a 
'no-match'. I will update this.

>
> When debugging, I'd like to see more a message like "driver <name> does not match".

Problem would be about '<name>' of a driver. There is already another 
discussion about SoC capability/platform bus definitions - probably I 
will wait for that so as to define what a '<name>' for a driver and 
device is.
In this case, the key reason for not adding such a message was because 
it was assumed PMDs are black boxes with EAL not even assuming what 
'<name>' means. Anyways, it is better to discuss these things in that 
other email.

>
>> +		return ret;
>> +	}
>> +
>> +	dev->driver = drv;
>> +	RTE_VERIFY(drv->devinit != NULL);
>> +	return drv->devinit(drv, dev);
>> +}
>> +
>> +static int
>> +soc_probe_all_drivers(struct rte_soc_device *dev)
>> +{
>> +	struct rte_soc_driver *drv = NULL;
>> +	int rc = 0;
>> +
>> +	if (dev == NULL)
>> +		return -1;
>> +
>> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
>> +		rc = rte_eal_soc_probe_one_driver(drv, dev);
>> +		if (rc < 0)
>> +			/* negative value is an error */
>> +			return -1;
>> +		if (rc > 0)
>> +			/* positive value means driver doesn't support it */
>> +			continue;
>> +		return 0;
>> +	}
>> +	return 1;
>> +}
>> +
>> +/* If the IDs match, call the devuninit() function of the driver. */
>
> Again, I think this comment is redudant. I'd leave it if it explains some
> implementation-specific detail but it does not seem to...

Ok.

>
>> +static int
>> +rte_eal_soc_detach_dev(struct rte_soc_driver *drv,
>> +		       struct rte_soc_device *dev)
>> +{
>> +	int ret;
>> +
>> +	if ((drv == NULL) || (dev == NULL))
>> +		return -EINVAL;
>> +
>> +	ret = drv->match_fn(drv, dev);
>> +	if (ret) {
>> +		RTE_LOG(DEBUG, EAL,
>> +			" match function failed, skipping\n");
>
> When debugging, I'd like to see more "driver <name> does not match".

My reply is same as above - I will like to wait and see what we conclude 
from the other discussion on SoC scan/match.

>
>> +		return ret;
>> +	}
>> +
>> +	RTE_LOG(DEBUG, EAL, "SoC device %s\n",
>> +		dev->addr.name);
>> +
>> +	RTE_LOG(DEBUG, EAL, "  remove driver: %s\n", drv->driver.name);
>> +
>> +	if (drv->devuninit && (drv->devuninit(dev) < 0))
>> +		return -1;	/* negative value is an error */
>> +
>> +	/* clear driver structure */
>> +	dev->driver = NULL;
>> +
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Call the devuninit() function of all registered drivers for the given
>> + * device if their IDs match.
>
> I think, the "IDs match" is obsolete becase the match_fn may work in a different way now.

Yes, I will remove this comment.

>
>> + *
>> + * @return
>> + *       0 when successful
>> + *      -1 if deinitialization fails
>> + *       1 if no driver is found for this device.
>> + */
>> +static int
>> +soc_detach_all_drivers(struct rte_soc_device *dev)
>> +{
>> +	struct rte_soc_driver *dr = NULL;
>> +	int rc = 0;
>> +
>> +	if (dev == NULL)
>> +		return -1;
>> +
>> +	TAILQ_FOREACH(dr, &soc_driver_list, next) {
>> +		rc = rte_eal_soc_detach_dev(dr, dev);
>> +		if (rc < 0)
>> +			/* negative value is an error */
>> +			return -1;
>> +		if (rc > 0)
>> +			/* positive value means driver doesn't support it */
>> +			continue;
>> +		return 0;
>> +	}
>> +	return 1;
>> +}
>> +
>> +/*
>> + * Detach device specified by its SoC address.
>> + */
>> +int
>> +rte_eal_soc_detach(const struct rte_soc_addr *addr)
>> +{
>> +	struct rte_soc_device *dev = NULL;
>> +	int ret = 0;
>> +
>> +	if (addr == NULL)
>> +		return -1;
>> +
>> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
>> +		if (rte_eal_compare_soc_addr(&dev->addr, addr))
>> +			continue;
>> +
>> +		ret = soc_detach_all_drivers(dev);
>> +		if (ret < 0)
>> +			goto err_return;
>> +
>> +		TAILQ_REMOVE(&soc_device_list, dev, next);
>> +		return 0;
>> +	}
>> +	return -1;
>> +
>> +err_return:
>> +	RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
>> +		dev->addr.name);
>> +	return -1;
>> +}
>> +
>> +int
>> +rte_eal_soc_probe_one(const struct rte_soc_addr *addr)
>> +{
>> +	struct rte_soc_device *dev = NULL;
>> +	int ret = 0;
>> +
>> +	if (addr == NULL)
>> +		return -1;
>> +
>> +	/* unlike pci, in case of soc, it the responsibility of the soc driver
>> +	 * to check during init whether device has been updated since last add.
>
> Why? Can you give a more detailed explanation?

For this patch, I have _not_ assumed anything for a SoC's 
bus/driver/device model. In absence of a proper standard, each SoC is 
unique - categorizing all SoC under a platform bus, for example, would 
only mean assuming platform bus is a standard.
Best judge for the layout of SoC devices is the SoC PMD (which is also 
like a bus driver, other than being a device driver).

Once again, if the discussion in other thread comes to a logical 
conclusion, this would get updated.

>
>> +	 */
>> +
>> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
>> +		if (rte_eal_compare_soc_addr(&dev->addr, addr))
>> +			continue;
>> +
>> +		ret = soc_probe_all_drivers(dev);
>> +		if (ret < 0)
>> +			goto err_return;
>> +		return 0;
>> +	}
>> +	return -1;
>> +
>> +err_return:
>> +	RTE_LOG(WARNING, EAL,
>> +		"Requested device %s cannot be used\n", addr->name);
>> +	return -1;
>> +}
>> +
>> +/*
>> + * Scan the SoC devices and call the devinit() function for all registered
>> + * drivers that have a matching entry in its id_table for discovered devices.
>> + */
>
> Should be in header. Here it is redundant.

Ok. I will move to rte_soc.h.

>
>> +int
>> +rte_eal_soc_probe(void)
>> +{
>> +	struct rte_soc_device *dev = NULL;
>> +	int ret = 0;
>> +
>> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
>> +		ret = soc_probe_all_drivers(dev);
>> +		if (ret < 0)
>> +			rte_exit(EXIT_FAILURE, "Requested device %s"
>> +				 " cannot be used\n", dev->addr.name);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>>  /* dump one device */
>>  static int
>>  soc_dump_one_device(FILE *f, struct rte_soc_device *dev)
>> @@ -79,6 +288,12 @@ rte_eal_soc_dump(FILE *f)
>>  void
>>  rte_eal_soc_register(struct rte_soc_driver *driver)
>>  {
>> +	/* For a valid soc driver, match and scan function
>> +	 * should be provided.
>> +	 */
>
> This comment should be in the header file.

Actually there is no valueable addition made by this comment. RTE_VERIFY 
is self explanatory. I will remove the comment all together.

>
>> +	RTE_VERIFY(driver != NULL);
>> +	RTE_VERIFY(driver->match_fn != NULL);
>> +	RTE_VERIFY(driver->scan_fn != NULL);
>>  	TAILQ_INSERT_TAIL(&soc_driver_list, driver, next);
>>  }
>>
>> diff --git a/lib/librte_eal/common/include/rte_soc.h b/lib/librte_eal/common/include/rte_soc.h
>> index c6f98eb..bfb49a2 100644
>> --- a/lib/librte_eal/common/include/rte_soc.h
>> +++ b/lib/librte_eal/common/include/rte_soc.h
>> @@ -97,6 +97,16 @@ typedef int (soc_devinit_t)(struct rte_soc_driver *, struct rte_soc_device *);
>>  typedef int (soc_devuninit_t)(struct rte_soc_device *);
>>
>>  /**
>> + * SoC device scan callback, called from rte_eal_soc_init.
>
> Can you explain what is the goal of the callback?
> What is the expected behaviour.

EAL would call the scan of each registered SoC PMD 
(DRIVER_REGISTER_SOC). This scan is responsible for finding devices on 
SoC's specific bus and add them to SoC device_list. This is a callback 
because SoC don't have a generalization like PCI. A SoC is not 
necessarily a platform bus either (what original patch series assumed).

>
> It returns void so it seems it can never fail. Is this correct?
> I can image that to scan for devices, I need to check some file-system
> structure which can be unavailable...

This is what I had in mind:
That is true, it never fails. It is expected that scan function simply 
ignores (logs error) and moves ahead. A local error for a particular SoC 
(I agree, there might not be more than one SoC) doesn't necessarily mean 
that complete DPDK Application should quit. It only means that 
application user should get some error/warning/message about failure.

>
>> + */
>> +typedef void (soc_scan_t)(void);
>
> You are missing the '*' in (*soc_scan_t).

That was put in the definition in the rte_soc_driver - but, I see you 
have already commented there. I will add the '*' here and remove from there.

>
>> +
>> +/**
>> + * Custom device<=>driver match callback for SoC
>
> Can you explain the semantics (return values), please?

rte_soc.h already has explanation on the expected semantics over 
rte_eal_soc_match - the default implementation. But, I agree, it should 
be above this declaration.

>
>> + */
>> +typedef int (soc_match_t)(struct rte_soc_driver *, struct rte_soc_device *);
>
> You are missing the '*' in (*soc_match_t).

Same as above - I will add '*' and remove from rte_soc_driver.

>
>> +
>> +/**
>>   * A structure describing a SoC driver.
>>   */
>>  struct rte_soc_driver {
>> @@ -104,6 +114,8 @@ struct rte_soc_driver {
>>  	struct rte_driver driver;          /**< Inherit core driver. */
>>  	soc_devinit_t *devinit;            /**< Device initialization */
>>  	soc_devuninit_t *devuninit;        /**< Device uninitialization */
>
> Those should be renamed to probe/remove.

Yes, agree with that.

>
>> +	soc_scan_t *scan_fn;               /**< Callback for scanning SoC bus*/
>> +	soc_match_t *match_fn;             /**< Callback to match dev<->drv */
>
> Here the '*' would be redundant if you add them to the typedefs.

As stated above, I will remove from there and add to typedefs.

>
> I think, we should tell the users that scan_fn and match_fn must be always set
> to something.

How? I think it would be part of documentation, isn't it?
Also, rte_eal_soc_init() already enforces this check with RTE_VERIFY.

>
>>  	const struct rte_soc_id *id_table; /**< ID table, NULL terminated */
>>  };
>>
>> @@ -146,6 +158,45 @@ rte_eal_compare_soc_addr(const struct rte_soc_addr *a0,
>>  }
>>
>>  /**
>> + * Default function for matching the Soc driver with device. Each driver can
>> + * either use this function or define their own soc matching function.
>> + * This function relies on the compatible string extracted from sysfs. But,
>> + * a SoC might have different way of identifying its devices. Such SoC can
>> + * override match_fn.
>> + *
>> + * @return
>> + * 	 0 on success
>> + *	-1 when no match found
>> +  */
>> +int
>> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev);
>
> What about naming it
>
> 	rte_eal_soc_match_default

Ok.

>
> or maybe better
>
> 	rte_eal_soc_match_compatible
>
> what do you think?

 From what I had in mind - the discussion about SoC not necessarily 
being a Platform bus - 'compatible' doesn't look fine to me. But again, 
it is still open debate so - I will wait until that is conlcuded.

>
>> +
>> +/**
>> + * Probe SoC devices for registered drivers.
>> + */
>> +int rte_eal_soc_probe(void);
>> +
>> +/**
>> + * Probe the single SoC device.
>> + */
>> +int rte_eal_soc_probe_one(const struct rte_soc_addr *addr);
>> +
>> +/**
>> + * Close the single SoC device.
>> + *
>> + * Scan the SoC devices and find the SoC device specified by the SoC
>> + * address, then call the devuninit() function for registered driver
>> + * that has a matching entry in its id_table for discovered device.
>> + *
>> + * @param addr
>> + *	The SoC address to close.
>> + * @return
>> + *   - 0 on success.
>> + *   - Negative on error.
>> + */
>> +int rte_eal_soc_detach(const struct rte_soc_addr *addr);
>> +
>> +/**
>>   * Dump discovered SoC devices.
>>   */
>>  void rte_eal_soc_dump(FILE *f);
>> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
>> index 15c8c3d..147b601 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal.c
>> @@ -70,6 +70,7 @@
>>  #include <rte_cpuflags.h>
>>  #include <rte_interrupts.h>
>>  #include <rte_pci.h>
>> +#include <rte_soc.h>
>>  #include <rte_dev.h>
>>  #include <rte_devargs.h>
>>  #include <rte_common.h>
>> @@ -881,6 +882,10 @@ rte_eal_init(int argc, char **argv)
>>  	if (rte_eal_pci_probe())
>>  		rte_panic("Cannot probe PCI\n");
>>
>> +	/* Probe & Initialize SoC devices */
>> +	if (rte_eal_soc_probe())
>> +		rte_panic("Cannot probe SoC\n");
>> +
>>  	rte_eal_mcfg_complete();
>>
>>  	return fctret;
>> diff --git a/lib/librte_eal/linuxapp/eal/eal_soc.c b/lib/librte_eal/linuxapp/eal/eal_soc.c
>> index 04848b9..5f961c4 100644
>> --- a/lib/librte_eal/linuxapp/eal/eal_soc.c
>> +++ b/lib/librte_eal/linuxapp/eal/eal_soc.c
>> @@ -52,5 +52,21 @@
>>  int
>>  rte_eal_soc_init(void)
>>  {
>> +	struct rte_soc_driver *drv;
>> +
>> +	/* for debug purposes, SoC can be disabled */
>> +	if (internal_config.no_soc)
>> +		return 0;
>> +
>> +	/* For each registered driver, call their scan routine to perform any
>> +	 * custom scan for devices (for example, custom buses)
>> +	 */
>> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
>
> Is it possible to have drv->scan_fn == NULL? I suppose, this is invalid.
> I'd prefer to have RTE_VERIFY for this check.

rte_eal_soc_init() has this check already. Driver wouldn't even be 
registered in case scan/match are not implemented.

>
>> +		if (drv && drv->scan_fn) {
>> +			drv->scan_fn();
>> +			/* Ignore all errors from this */
>> +		}
>
>> +	}
>> +
>>  	return 0;
>>  }
>> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> index b9d1932..adcfe7d 100644
>> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
>> @@ -179,5 +179,9 @@ DPDK_16.11 {
>>  	rte_eal_soc_register;
>>  	rte_eal_soc_unregister;
>>  	rte_eal_soc_dump;
>> +	rte_eal_soc_match;
>> +	rte_eal_soc_detach;
>> +	rte_eal_soc_probe;
>> +	rte_eal_soc_probe_one;
>>
>>  } DPDK_16.07;
>
> Regards
> Jan
>

I hope I have covered all your comments. That was an exhaustive review. 
Thanks a lot for your time.

Lets work to resolve the architectural issues revolving around SoC 
scan/match.

-
Shreyansh
  
Jan Viktorin Sept. 19, 2016, 11:34 a.m. UTC | #3
On Mon, 19 Sep 2016 12:17:53 +0530
Shreyansh Jain <shreyansh.jain@nxp.com> wrote:

> Hi Jan,
> 
> On Friday 16 September 2016 05:57 PM, Jan Viktorin wrote:
> > On Fri, 9 Sep 2016 14:13:50 +0530
> > Shreyansh Jain <shreyansh.jain@nxp.com> wrote:
> >  
> >> Each SoC PMD registers a set of callback for scanning its own bus/infra and
> >> matching devices to drivers when probe is called.
> >> This patch introduces the infra for calls to SoC scan on rte_eal_soc_init()
> >> and match on rte_eal_soc_probe().
> >>
> >> Patch also adds test case for scan and probe.
> >>
> >> Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
> >> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
> >> Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
> >> ---
> >>  app/test/test_soc.c                             | 138 ++++++++++++++-
> >>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   4 +
> >>  lib/librte_eal/common/eal_common_soc.c          | 215 ++++++++++++++++++++++++
> >>  lib/librte_eal/common/include/rte_soc.h         |  51 ++++++
> >>  lib/librte_eal/linuxapp/eal/eal.c               |   5 +
> >>  lib/librte_eal/linuxapp/eal/eal_soc.c           |  16 ++
> >>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   4 +
> >>  7 files changed, 432 insertions(+), 1 deletion(-)
> >>

[...]

> 
> >  
> >> +static void test_soc_scan_dev0_cb(void);  
> >
> > Similar here, something like "match_by_name".
> >  
> >> +static int test_soc_match_dev0_cb(struct rte_soc_driver *drv,
> >> +				  struct rte_soc_device *dev);  
> >
> > I prefer an empty line here.  
> 
> Do we really place newlines in function declarations? That doesn't 
> really help anything, until and unless some comments are added to those. 
> Anyways, rather than added blank lines, I will add some comments - those 
> are indeed misssing.

It took me a while to parse those lines... If they are logically grouped,
it'd be ok. Comments might be helpful. However, here these are forward
declarations so it's a question whether to put comments here or to the
implementations below.

> 
> >
> >
> > ditto...  
> 
> Will add comments.
> 
> >  
> >> +static void test_soc_scan_dev1_cb(void);  
> >
> > ditto...  
> 
> Same here, I prefer comment rather than blank line.
> 
> >  

[...]

> >>
> >> +/* Test Probe (scan and match) functionality */
> >> +static int
> >> +test_soc_init_and_probe(void)  
> >
> > You say to test scan and match. I'd prefer to reflect this in the name
> > of the test. Otherwise, it seems you are testing init and probe which
> > is not true, I think.  
> 
> I agree. I will update the name of the function.
> 
> >
> > Do you test that "match principle works" or that "match functions are OK"
> > or "match functions are called as expected", ...?  
> 
> "match functions are called as expected"

OK, but there is no assert that says "yes, the match function has been called".
In other words, it is not an automatic test and it does not help to verify
that the code is working.

I think that you should test that a particular match function succeeds or not.
So again, I don't consider this to be a test. It does not verify anything.

> The model for the patchset was to allow PMDs to write their own match 
> and hence, verifying a particular match is not definitive. Rather, the 

If you want to verify a particular match implementation then there should
be a particular test verifying that implementation, e.g. test_match_compatible(),
test_match_proprietary, test_match_by_name.

However, this is testing the rte_eal_soc_probe (at least, I understand it that way).
The probe iterates over devices and drivers and matches them. Thus, the argument
"a particular match is not definitive" seems to be irrelevant here. You should build
a testing match function like "match_always" that verifies the probe is working. Not
that the "match" is working.

> test case simply confirms that a SoC based PMD would be able to 

It does not confirm anything from my point of view. You *always* print "successful"
at the end of this test (see below).

> implement its own match/scan and these would be called from EAL as expected.
> 
> >  
> >> +{
> >> +	struct rte_soc_driver *drv;
> >> +
> >> +	/* Registering dummy drivers */
> >> +	rte_eal_soc_register(&empty_pmd0.soc_drv);
> >> +	rte_eal_soc_register(&empty_pmd1.soc_drv);
> >> +	/* Assuming that test_register_unregister is working, not verifying
> >> +	 * that drivers are indeed registered
> >> +	*/
> >> +
> >> +	/* rte_eal_soc_init is called by rte_eal_init, which in turn calls the
> >> +	 * scan_fn of each driver.

So, I'd comment this as something like:

"mimic rte_eal_soc_init to prepare for the rte_eal_soc_probe"

> >> +	 */
> >> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
> >> +		if (drv && drv->scan_fn)
> >> +			drv->scan_fn();
> >> +	}  
> >
> > Here, I suppose you mimic the rte_eal_soc_init?  
> 
> Yes.
> 
> >  
> >> +
> >> +	/* rte_eal_init() would perform other inits here */
> >> +
> >> +	/* Probe would link the SoC devices<=>drivers */
> >> +	rte_eal_soc_probe();
> >> +
> >> +	/* Unregistering dummy drivers */
> >> +	rte_eal_soc_unregister(&empty_pmd0.soc_drv);
> >> +	rte_eal_soc_unregister(&empty_pmd1.soc_drv);
> >> +
> >> +	free(empty_pmd0.soc_dev.addr.name);
> >> +
> >> +	printf("%s has been successful\n", __func__);  
> >
> > How you detect it is unsuccessful? Is it possible to fail in this test?
> > A test that can never fail is in fact not a test :).  
> 
> The design assumption for SoC patcheset was: A PMDs scan is called to 
> find devices on its bus (PMD ~ bus). Whether devices are found or not, 
> is irrelevant to EAL - whether that is because of error or actually no 
> devices were available.
> With the above logic, no 'success/failure' is checked in the test. It is 
> simply a verification of EAL's ability to link the PMD with it 
> (scan/match function pointers).

I am sorry, I disagree. You always print "successful". The only way to fail
here is a SIGSEGV or other very serious system failure. But we test rte_eal_soc_probe
and not system failures.

> 
> >  
> >> +	return 0;
> >> +}
> >> +
> >>  /* save real devices and drivers until the tests finishes */

[...]

> >> diff --git a/lib/librte_eal/common/eal_common_soc.c b/lib/librte_eal/common/eal_common_soc.c
> >> index 5dcddc5..bb87a67 100644
> >> --- a/lib/librte_eal/common/eal_common_soc.c
> >> +++ b/lib/librte_eal/common/eal_common_soc.c
> >> @@ -36,6 +36,8 @@
> >>  #include <sys/queue.h>
> >>
> >>  #include <rte_log.h>
> >> +#include <rte_common.h>
> >> +#include <rte_soc.h>
> >>
> >>  #include "eal_private.h"
> >>
> >> @@ -45,6 +47,213 @@ struct soc_driver_list soc_driver_list =
> >>  struct soc_device_list soc_device_list =
> >>  	TAILQ_HEAD_INITIALIZER(soc_device_list);
> >>
> >> +/* Default SoC device<->Driver match handler function */  
> >
> > I think this comment is redundant. All this is already said in the rte_soc.h.  
> 
> Ok. I will remove it from here and if need be, update the rte_soc.h to 
> have elaborate comments.
> 
> >  
> >> +int
> >> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev)
> >> +{
> >> +	int i, j;
> >> +
> >> +	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
> >> +	RTE_VERIFY(dev != NULL && dev->id != NULL);
> >> +
> >> +	for (i = 0; drv->id_table[i].compatible; ++i) {
> >> +		const char *drv_compat = drv->id_table[i].compatible;
> >> +
> >> +		for (j = 0; dev->id[j].compatible; ++j) {
> >> +			const char *dev_compat = dev->id[j].compatible;
> >> +
> >> +			if (!strcmp(drv_compat, dev_compat))
> >> +				return 0;
> >> +		}
> >> +	}
> >> +
> >> +	return 1;
> >> +}
> >> +

A redundant empty line here...

> >> +
> >> +static int
> >> +rte_eal_soc_probe_one_driver(struct rte_soc_driver *drv,
> >> +			     struct rte_soc_device *dev)
> >> +{
> >> +	int ret = 1;
> >> +  
> >
> > I think, the RTE_VERIFY(dev->match_fn) might be good here.
> > It avoids any doubts about the validity of the pointer.  
> 
> That has already been done in rte_eal_soc_register which is called when 
> PMDs are registering themselves through DRIVER_REGISTER_SOC. That would 
> prevent any PMD leaking through to this stage without a proper 
> match_fn/scan_fn.

Well, yes. It seems to be redundant. However, it would emphesize the fact
that this function expects that match_fn is set.

In the rte_eal_soc_register, the RTE_VERIFY says "The API requires those".

But when I review I do not always see all the context. It is not safe for
me to assume that there was probably some RTE_VERIFY in the path... It is
not a fast path so it does not hurt the performance in anyway.

> 
> >  
> >> +	ret = drv->match_fn(drv, dev);
> >> +	if (ret) {
> >> +		RTE_LOG(DEBUG, EAL,
> >> +			" match function failed, skipping\n");  
> >
> > Is this a failure? I think it is not. Failure would be if the match
> > function cannot execute correctly. This is more like "no-match".  
> 
> The log message is misleading. This is _not_ a failure but simply a 
> 'no-match'. I will update this.
> 
> >
> > When debugging, I'd like to see more a message like "driver <name> does not match".  
> 
> Problem would be about '<name>' of a driver. There is already another 
> discussion about SoC capability/platform bus definitions - probably I 
> will wait for that so as to define what a '<name>' for a driver and 
> device is.
> In this case, the key reason for not adding such a message was because 
> it was assumed PMDs are black boxes with EAL not even assuming what 
> '<name>' means. Anyways, it is better to discuss these things in that 
> other email.

I am not sure which thread do you mean... Can you point me there, please?

> 
> >  
> >> +		return ret;

[...]

> >> +
> >> +int
> >> +rte_eal_soc_probe_one(const struct rte_soc_addr *addr)
> >> +{
> >> +	struct rte_soc_device *dev = NULL;
> >> +	int ret = 0;
> >> +
> >> +	if (addr == NULL)
> >> +		return -1;
> >> +
> >> +	/* unlike pci, in case of soc, it the responsibility of the soc driver
> >> +	 * to check during init whether device has been updated since last add.  
> >
> > Why? Can you give a more detailed explanation?  
> 
> For this patch, I have _not_ assumed anything for a SoC's 
> bus/driver/device model. In absence of a proper standard, each SoC is 
> unique - categorizing all SoC under a platform bus, for example, would 
> only mean assuming platform bus is a standard.
> Best judge for the layout of SoC devices is the SoC PMD (which is also 
> like a bus driver, other than being a device driver).
> 
> Once again, if the discussion in other thread comes to a logical 
> conclusion, this would get updated.

Again, I am not sure which thread discusses this topic.

I just don't like the idea to leave update responsibility on PMDs.
Maybe, there can be a callback update in rte_soc_device (set or not-set
by the custom scan function) that is to be called here.

> 
> >  
> >> +	 */
> >> +
> >> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
> >> +		if (rte_eal_compare_soc_addr(&dev->addr, addr))
> >> +			continue;
> >> +
> >> +		ret = soc_probe_all_drivers(dev);
> >> +		if (ret < 0)
> >> +			goto err_return;
> >> +		return 0;
> >> +	}
> >> +	return -1;
> >> +
> >> +err_return:
> >> +	RTE_LOG(WARNING, EAL,
> >> +		"Requested device %s cannot be used\n", addr->name);
> >> +	return -1;
> >> +}
> >> +
> >> +/*
> >> + * Scan the SoC devices and call the devinit() function for all registered
> >> + * drivers that have a matching entry in its id_table for discovered devices.
> >> + */  
> >
> > Should be in header. Here it is redundant.  
> 
> Ok. I will move to rte_soc.h.
> 
> >  
> >> +int
> >> +rte_eal_soc_probe(void)
> >> +{
> >> +	struct rte_soc_device *dev = NULL;
> >> +	int ret = 0;
> >> +
> >> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
> >> +		ret = soc_probe_all_drivers(dev);
> >> +		if (ret < 0)
> >> +			rte_exit(EXIT_FAILURE, "Requested device %s"
> >> +				 " cannot be used\n", dev->addr.name);
> >> +	}
> >> +
> >> +	return 0;
> >> +}
> >> +
> >>  /* dump one device */
> >>  static int
> >>  soc_dump_one_device(FILE *f, struct rte_soc_device *dev)
> >> @@ -79,6 +288,12 @@ rte_eal_soc_dump(FILE *f)
> >>  void
> >>  rte_eal_soc_register(struct rte_soc_driver *driver)
> >>  {
> >> +	/* For a valid soc driver, match and scan function
> >> +	 * should be provided.
> >> +	 */  
> >
> > This comment should be in the header file.  
> 
> Actually there is no valueable addition made by this comment. RTE_VERIFY 
> is self explanatory. I will remove the comment all together.

No, the comment must be present for the rte_eal_soc_register function as
its documentation. The RTE_VERIFY is not an excuse, it just verifies the
fact that the caller understands the documentation and that she didn't make
a mistake.

> 
> >  
> >> +	RTE_VERIFY(driver != NULL);
> >> +	RTE_VERIFY(driver->match_fn != NULL);
> >> +	RTE_VERIFY(driver->scan_fn != NULL);
> >>  	TAILQ_INSERT_TAIL(&soc_driver_list, driver, next);
> >>  }
> >>
> >> diff --git a/lib/librte_eal/common/include/rte_soc.h b/lib/librte_eal/common/include/rte_soc.h
> >> index c6f98eb..bfb49a2 100644
> >> --- a/lib/librte_eal/common/include/rte_soc.h
> >> +++ b/lib/librte_eal/common/include/rte_soc.h
> >> @@ -97,6 +97,16 @@ typedef int (soc_devinit_t)(struct rte_soc_driver *, struct rte_soc_device *);
> >>  typedef int (soc_devuninit_t)(struct rte_soc_device *);
> >>
> >>  /**
> >> + * SoC device scan callback, called from rte_eal_soc_init.  
> >
> > Can you explain what is the goal of the callback?
> > What is the expected behaviour.  
> 
> EAL would call the scan of each registered SoC PMD 
> (DRIVER_REGISTER_SOC). This scan is responsible for finding devices on 
> SoC's specific bus and add them to SoC device_list. This is a callback 
> because SoC don't have a generalization like PCI. A SoC is not 
> necessarily a platform bus either (what original patch series assumed).

In doc comment...

> 
> >
> > It returns void so it seems it can never fail. Is this correct?
> > I can image that to scan for devices, I need to check some file-system
> > structure which can be unavailable...  
> 
> This is what I had in mind:
> That is true, it never fails. It is expected that scan function simply 
> ignores (logs error) and moves ahead. A local error for a particular SoC 
> (I agree, there might not be more than one SoC) doesn't necessarily mean 
> that complete DPDK Application should quit. It only means that 
> application user should get some error/warning/message about failure.

I understand, this is OK then.

> 
> >  
> >> + */
> >> +typedef void (soc_scan_t)(void);  
> >
> > You are missing the '*' in (*soc_scan_t).  
> 
> That was put in the definition in the rte_soc_driver - but, I see you 
> have already commented there. I will add the '*' here and remove from there.
> 
> >  
> >> +
> >> +/**
> >> + * Custom device<=>driver match callback for SoC  
> >
> > Can you explain the semantics (return values), please?  
> 
> rte_soc.h already has explanation on the expected semantics over 
> rte_eal_soc_match - the default implementation. But, I agree, it should 
> be above this declaration.

True ;).

> 
> >  
> >> + */
> >> +typedef int (soc_match_t)(struct rte_soc_driver *, struct rte_soc_device *);  
> >

[...]

> 
> >
> > I think, we should tell the users that scan_fn and match_fn must be always set
> > to something.  
> 
> How? I think it would be part of documentation, isn't it?

Yes. It should be documented in the comment for rte_eal_soc_init.
My comment was misplaced a bit...

> Also, rte_eal_soc_init() already enforces this check with RTE_VERIFY.
> 
> >  
> >>  	const struct rte_soc_id *id_table; /**< ID table, NULL terminated */
> >>  };
> >>
> >> @@ -146,6 +158,45 @@ rte_eal_compare_soc_addr(const struct rte_soc_addr *a0,
> >>  }
> >>
> >>  /**
> >> + * Default function for matching the Soc driver with device. Each driver can
> >> + * either use this function or define their own soc matching function.
> >> + * This function relies on the compatible string extracted from sysfs. But,
> >> + * a SoC might have different way of identifying its devices. Such SoC can
> >> + * override match_fn.
> >> + *
> >> + * @return
> >> + * 	 0 on success
> >> + *	-1 when no match found
> >> +  */
> >> +int
> >> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev);  
> >
> > What about naming it
> >
> > 	rte_eal_soc_match_default  
> 
> Ok.
> 
> >
> > or maybe better
> >
> > 	rte_eal_soc_match_compatible
> >
> > what do you think?  
> 
>  From what I had in mind - the discussion about SoC not necessarily 
> being a Platform bus - 'compatible' doesn't look fine to me. But again, 
> it is still open debate so - I will wait until that is conlcuded.

Why? The current implementation works this way:

int
rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev)
{
	int i, j;

	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
	RTE_VERIFY(dev != NULL && dev->id != NULL);

	for (i = 0; drv->id_table[i].compatible; ++i) {
		const char *drv_compat = drv->id_table[i].compatible;

		for (j = 0; dev->id[j].compatible; ++j) {
			const char *dev_compat = dev->id[j].compatible;

			if (!strcmp(drv_compat, dev_compat))
				return 0;
		}
	}

	return 1;
}

It checks for compatible. So why not to name it that way? If you provide
a match testing the name of devices then it can be named *_match_name.

> 
> >  
> >> +
> >> +/**
> >> + * Probe SoC devices for registered drivers.
> >> + */
> >> +int rte_eal_soc_probe(void);
> >> +
> >> +/**
> >> + * Probe the single SoC device.
> >> + */
> >> +int rte_eal_soc_probe_one(const struct rte_soc_addr *addr);
> >> +
> >> +/**
> >> + * Close the single SoC device.
> >> + *
> >> + * Scan the SoC devices and find the SoC device specified by the SoC
> >> + * address, then call the devuninit() function for registered driver
> >> + * that has a matching entry in its id_table for discovered device.
> >> + *
> >> + * @param addr
> >> + *	The SoC address to close.
> >> + * @return
> >> + *   - 0 on success.
> >> + *   - Negative on error.
> >> + */
> >> +int rte_eal_soc_detach(const struct rte_soc_addr *addr);
> >> +
> >> +/**
> >>   * Dump discovered SoC devices.
> >>   */
> >>  void rte_eal_soc_dump(FILE *f);
> >> diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
> >> index 15c8c3d..147b601 100644
> >> --- a/lib/librte_eal/linuxapp/eal/eal.c
> >> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> >> @@ -70,6 +70,7 @@
> >>  #include <rte_cpuflags.h>
> >>  #include <rte_interrupts.h>
> >>  #include <rte_pci.h>
> >> +#include <rte_soc.h>
> >>  #include <rte_dev.h>
> >>  #include <rte_devargs.h>
> >>  #include <rte_common.h>
> >> @@ -881,6 +882,10 @@ rte_eal_init(int argc, char **argv)
> >>  	if (rte_eal_pci_probe())
> >>  		rte_panic("Cannot probe PCI\n");
> >>
> >> +	/* Probe & Initialize SoC devices */
> >> +	if (rte_eal_soc_probe())
> >> +		rte_panic("Cannot probe SoC\n");
> >> +
> >>  	rte_eal_mcfg_complete();
> >>
> >>  	return fctret;
> >> diff --git a/lib/librte_eal/linuxapp/eal/eal_soc.c b/lib/librte_eal/linuxapp/eal/eal_soc.c
> >> index 04848b9..5f961c4 100644
> >> --- a/lib/librte_eal/linuxapp/eal/eal_soc.c
> >> +++ b/lib/librte_eal/linuxapp/eal/eal_soc.c
> >> @@ -52,5 +52,21 @@
> >>  int
> >>  rte_eal_soc_init(void)
> >>  {
> >> +	struct rte_soc_driver *drv;
> >> +
> >> +	/* for debug purposes, SoC can be disabled */
> >> +	if (internal_config.no_soc)
> >> +		return 0;
> >> +
> >> +	/* For each registered driver, call their scan routine to perform any
> >> +	 * custom scan for devices (for example, custom buses)
> >> +	 */
> >> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {  
> >
> > Is it possible to have drv->scan_fn == NULL? I suppose, this is invalid.
> > I'd prefer to have RTE_VERIFY for this check.  
> 
> rte_eal_soc_init() has this check already. Driver wouldn't even be 
> registered in case scan/match are not implemented.

True, but when reviewing or refactoring, you cannot see always all this context.
It is more defensive to explain here "don't worry, the scan_fn is always set here".

> 
> >  
> >> +		if (drv && drv->scan_fn) {
> >> +			drv->scan_fn();
> >> +			/* Ignore all errors from this */
> >> +		}  
> >  
> >> +	}
> >> +
> >>  	return 0;
> >>  }
> >> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> >> index b9d1932..adcfe7d 100644
> >> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> >> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> >> @@ -179,5 +179,9 @@ DPDK_16.11 {
> >>  	rte_eal_soc_register;
> >>  	rte_eal_soc_unregister;
> >>  	rte_eal_soc_dump;
> >> +	rte_eal_soc_match;
> >> +	rte_eal_soc_detach;
> >> +	rte_eal_soc_probe;
> >> +	rte_eal_soc_probe_one;
> >>
> >>  } DPDK_16.07;  
> >
> > Regards
> > Jan
> >  
> 
> I hope I have covered all your comments. That was an exhaustive review. 
> Thanks a lot for your time.
> 
> Lets work to resolve the architectural issues revolving around SoC 
> scan/match.

;)

> 
> -
> Shreyansh
  
Shreyansh Jain Sept. 20, 2016, 6:46 a.m. UTC | #4
Hi Jan,

> -----Original Message-----
> From: Jan Viktorin [mailto:viktorin@rehivetech.com]
> Sent: Monday, September 19, 2016 5:04 PM
> To: Shreyansh Jain <shreyansh.jain@nxp.com>
> Cc: dev@dpdk.org; Hemant Agrawal <hemant.agrawal@nxp.com>
> Subject: Re: [PATCH v3 06/15] eal/soc: implement probing of drivers
> 
> On Mon, 19 Sep 2016 12:17:53 +0530
> Shreyansh Jain <shreyansh.jain@nxp.com> wrote:
> 
> > Hi Jan,
> >
> > On Friday 16 September 2016 05:57 PM, Jan Viktorin wrote:
> > > On Fri, 9 Sep 2016 14:13:50 +0530
> > > Shreyansh Jain <shreyansh.jain@nxp.com> wrote:
> > >
> > >> Each SoC PMD registers a set of callback for scanning its own bus/infra
> and
> > >> matching devices to drivers when probe is called.
> > >> This patch introduces the infra for calls to SoC scan on
> rte_eal_soc_init()
> > >> and match on rte_eal_soc_probe().
> > >>
> > >> Patch also adds test case for scan and probe.
> > >>
> > >> Signed-off-by: Jan Viktorin <viktorin@rehivetech.com>
> > >> Signed-off-by: Shreyansh Jain <shreyansh.jain@nxp.com>
> > >> Signed-off-by: Hemant Agrawal <hemant.agrawal@nxp.com>
> > >> ---
> > >>  app/test/test_soc.c                             | 138 ++++++++++++++-
> > >>  lib/librte_eal/bsdapp/eal/rte_eal_version.map   |   4 +
> > >>  lib/librte_eal/common/eal_common_soc.c          | 215
> ++++++++++++++++++++++++
> > >>  lib/librte_eal/common/include/rte_soc.h         |  51 ++++++
> > >>  lib/librte_eal/linuxapp/eal/eal.c               |   5 +
> > >>  lib/librte_eal/linuxapp/eal/eal_soc.c           |  16 ++
> > >>  lib/librte_eal/linuxapp/eal/rte_eal_version.map |   4 +
> > >>  7 files changed, 432 insertions(+), 1 deletion(-)
> > >>
> 
> [...]
> 
> >
> > >
> > >> +static void test_soc_scan_dev0_cb(void);
> > >
> > > Similar here, something like "match_by_name".
> > >
> > >> +static int test_soc_match_dev0_cb(struct rte_soc_driver *drv,
> > >> +				  struct rte_soc_device *dev);
> > >
> > > I prefer an empty line here.
> >
> > Do we really place newlines in function declarations? That doesn't
> > really help anything, until and unless some comments are added to those.
> > Anyways, rather than added blank lines, I will add some comments - those
> > are indeed misssing.
> 
> It took me a while to parse those lines... If they are logically grouped,
> it'd be ok. Comments might be helpful. However, here these are forward
> declarations so it's a question whether to put comments here or to the
> implementations below.

Ok. I will keep this mind in v4.

> 
> >
> > >
> > >
> > > ditto...
> >
> > Will add comments.
> >
> > >
> > >> +static void test_soc_scan_dev1_cb(void);
> > >
> > > ditto...
> >
> > Same here, I prefer comment rather than blank line.
> >
> > >
> 
> [...]
> 
> > >>
> > >> +/* Test Probe (scan and match) functionality */
> > >> +static int
> > >> +test_soc_init_and_probe(void)
> > >
> > > You say to test scan and match. I'd prefer to reflect this in the name
> > > of the test. Otherwise, it seems you are testing init and probe which
> > > is not true, I think.
> >
> > I agree. I will update the name of the function.
> >
> > >
> > > Do you test that "match principle works" or that "match functions are OK"
> > > or "match functions are called as expected", ...?
> >
> > "match functions are called as expected"
> 
> OK, but there is no assert that says "yes, the match function has been
> called".
> In other words, it is not an automatic test and it does not help to verify
> that the code is working.
> 
> I think that you should test that a particular match function succeeds or
> not.
> So again, I don't consider this to be a test. It does not verify anything.
 
Agree with you. This is not a 'test' in real sense. I will revisit and see what can be done about this. Probably your method of 'match' succeeds with 'always_find_device0' style function.

> 
> > The model for the patchset was to allow PMDs to write their own match
> > and hence, verifying a particular match is not definitive. Rather, the
> 
> If you want to verify a particular match implementation then there should
> be a particular test verifying that implementation, e.g.
> test_match_compatible(),
> test_match_proprietary, test_match_by_name.
> 
> However, this is testing the rte_eal_soc_probe (at least, I understand it
> that way).
> The probe iterates over devices and drivers and matches them. Thus, the
> argument
> "a particular match is not definitive" seems to be irrelevant here. You
> should build
> a testing match function like "match_always" that verifies the probe is
> working. Not
> that the "match" is working.
 
Ok. 'match_always' called after 'always_find_device0' like scan function. So, essentially rather than a functional match, the testcase only checks if these handlers can be called or not. The naming of such handlers in test case would explain the user what the intention of the test is, rather than its outcome. Is this what you are suggesting? 

> 
> > test case simply confirms that a SoC based PMD would be able to
> 
> It does not confirm anything from my point of view. You *always* print
> "successful"
> at the end of this test (see below).
> 
> > implement its own match/scan and these would be called from EAL as
> expected.
> >
> > >
> > >> +{
> > >> +	struct rte_soc_driver *drv;
> > >> +
> > >> +	/* Registering dummy drivers */
> > >> +	rte_eal_soc_register(&empty_pmd0.soc_drv);
> > >> +	rte_eal_soc_register(&empty_pmd1.soc_drv);
> > >> +	/* Assuming that test_register_unregister is working, not
> verifying
> > >> +	 * that drivers are indeed registered
> > >> +	*/
> > >> +
> > >> +	/* rte_eal_soc_init is called by rte_eal_init, which in turn
> calls the
> > >> +	 * scan_fn of each driver.
> 
> So, I'd comment this as something like:
> 
> "mimic rte_eal_soc_init to prepare for the rte_eal_soc_probe"
 
Agree.

> 
> > >> +	 */
> > >> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
> > >> +		if (drv && drv->scan_fn)
> > >> +			drv->scan_fn();
> > >> +	}
> > >
> > > Here, I suppose you mimic the rte_eal_soc_init?
> >
> > Yes.
> >
> > >
> > >> +
> > >> +	/* rte_eal_init() would perform other inits here */
> > >> +
> > >> +	/* Probe would link the SoC devices<=>drivers */
> > >> +	rte_eal_soc_probe();
> > >> +
> > >> +	/* Unregistering dummy drivers */
> > >> +	rte_eal_soc_unregister(&empty_pmd0.soc_drv);
> > >> +	rte_eal_soc_unregister(&empty_pmd1.soc_drv);
> > >> +
> > >> +	free(empty_pmd0.soc_dev.addr.name);
> > >> +
> > >> +	printf("%s has been successful\n", __func__);
> > >
> > > How you detect it is unsuccessful? Is it possible to fail in this test?
> > > A test that can never fail is in fact not a test :).
> >
> > The design assumption for SoC patcheset was: A PMDs scan is called to
> > find devices on its bus (PMD ~ bus). Whether devices are found or not,
> > is irrelevant to EAL - whether that is because of error or actually no
> > devices were available.
> > With the above logic, no 'success/failure' is checked in the test. It is
> > simply a verification of EAL's ability to link the PMD with it
> > (scan/match function pointers).
> 
> I am sorry, I disagree. You always print "successful". The only way to fail
> here is a SIGSEGV or other very serious system failure. But we test
> rte_eal_soc_probe
> and not system failures.
 
Ok. I am not yet clear how the test case would be created, except what I have mentioned above that rather than being functional, testcase only explains the case through function naming.
The premise on which match/scan are based is that they would be implemented by the respective PMD. Which makes testing of such function either irrelevant or simply a help to user to understand how SoC PMD should be created.
If this persists, probably I would rather remove this test case all together. It doesn't serve any purpose, which you have rightly pointed out.

> 
> >
> > >
> > >> +	return 0;
> > >> +}
> > >> +
> > >>  /* save real devices and drivers until the tests finishes */
> 
> [...]
> 
> > >> diff --git a/lib/librte_eal/common/eal_common_soc.c
> b/lib/librte_eal/common/eal_common_soc.c
> > >> index 5dcddc5..bb87a67 100644
> > >> --- a/lib/librte_eal/common/eal_common_soc.c
> > >> +++ b/lib/librte_eal/common/eal_common_soc.c
> > >> @@ -36,6 +36,8 @@
> > >>  #include <sys/queue.h>
> > >>
> > >>  #include <rte_log.h>
> > >> +#include <rte_common.h>
> > >> +#include <rte_soc.h>
> > >>
> > >>  #include "eal_private.h"
> > >>
> > >> @@ -45,6 +47,213 @@ struct soc_driver_list soc_driver_list =
> > >>  struct soc_device_list soc_device_list =
> > >>  	TAILQ_HEAD_INITIALIZER(soc_device_list);
> > >>
> > >> +/* Default SoC device<->Driver match handler function */
> > >
> > > I think this comment is redundant. All this is already said in the
> rte_soc.h.
> >
> > Ok. I will remove it from here and if need be, update the rte_soc.h to
> > have elaborate comments.
> >
> > >
> > >> +int
> > >> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device
> *dev)
> > >> +{
> > >> +	int i, j;
> > >> +
> > >> +	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
> > >> +	RTE_VERIFY(dev != NULL && dev->id != NULL);
> > >> +
> > >> +	for (i = 0; drv->id_table[i].compatible; ++i) {
> > >> +		const char *drv_compat = drv->id_table[i].compatible;
> > >> +
> > >> +		for (j = 0; dev->id[j].compatible; ++j) {
> > >> +			const char *dev_compat = dev->id[j].compatible;
> > >> +
> > >> +			if (!strcmp(drv_compat, dev_compat))
> > >> +				return 0;
> > >> +		}
> > >> +	}
> > >> +
> > >> +	return 1;
> > >> +}
> > >> +
> 
> A redundant empty line here...

Yes, I will remove this. 

> 
> > >> +
> > >> +static int
> > >> +rte_eal_soc_probe_one_driver(struct rte_soc_driver *drv,
> > >> +			     struct rte_soc_device *dev)
> > >> +{
> > >> +	int ret = 1;
> > >> +
> > >
> > > I think, the RTE_VERIFY(dev->match_fn) might be good here.
> > > It avoids any doubts about the validity of the pointer.
> >
> > That has already been done in rte_eal_soc_register which is called when
> > PMDs are registering themselves through DRIVER_REGISTER_SOC. That would
> > prevent any PMD leaking through to this stage without a proper
> > match_fn/scan_fn.
> 
> Well, yes. It seems to be redundant. However, it would emphesize the fact
> that this function expects that match_fn is set.
> 
> In the rte_eal_soc_register, the RTE_VERIFY says "The API requires those".
> 
> But when I review I do not always see all the context. It is not safe for
> me to assume that there was probably some RTE_VERIFY in the path... It is
> not a fast path so it does not hurt the performance in anyway.
> 

You mean RTE_VERIFY should be duplicated so that code readability increases?
I am not really convinced on that.
My opinion: rte_eal_soc_*probe* series shouldn't actually worry about whether a valid PMD has been plugged in or. It should simply assume that having reached here, all is well on the PMDs definition side.

On the contrary, placing a RTE_VERIFY here would only state the reader that it is possible to reach this code path without a valid match function.

> >
> > >
> > >> +	ret = drv->match_fn(drv, dev);
> > >> +	if (ret) {
> > >> +		RTE_LOG(DEBUG, EAL,
> > >> +			" match function failed, skipping\n");
> > >
> > > Is this a failure? I think it is not. Failure would be if the match
> > > function cannot execute correctly. This is more like "no-match".
> >
> > The log message is misleading. This is _not_ a failure but simply a
> > 'no-match'. I will update this.
> >
> > >
> > > When debugging, I'd like to see more a message like "driver <name> does
> not match".
> >
> > Problem would be about '<name>' of a driver. There is already another
> > discussion about SoC capability/platform bus definitions - probably I
> > will wait for that so as to define what a '<name>' for a driver and
> > device is.
> > In this case, the key reason for not adding such a message was because
> > it was assumed PMDs are black boxes with EAL not even assuming what
> > '<name>' means. Anyways, it is better to discuss these things in that
> > other email.
> 
> I am not sure which thread do you mean... Can you point me there, please?
 
Sorry, somehow I forgot to add that:
http://dpdk.org/ml/archives/dev/2016-September/046933.html

One of the discussion point in above thread is whether platform bus is a good assumption for a SoC driver or not. Based on that '<name>' (or any other identifier, like 'compatible'), the log can be printed as you suggested above.

> 
> >
> > >
> > >> +		return ret;
> 
> [...]
> 
> > >> +
> > >> +int
> > >> +rte_eal_soc_probe_one(const struct rte_soc_addr *addr)
> > >> +{
> > >> +	struct rte_soc_device *dev = NULL;
> > >> +	int ret = 0;
> > >> +
> > >> +	if (addr == NULL)
> > >> +		return -1;
> > >> +
> > >> +	/* unlike pci, in case of soc, it the responsibility of the soc
> driver
> > >> +	 * to check during init whether device has been updated since
> last add.
> > >
> > > Why? Can you give a more detailed explanation?
> >
> > For this patch, I have _not_ assumed anything for a SoC's
> > bus/driver/device model. In absence of a proper standard, each SoC is
> > unique - categorizing all SoC under a platform bus, for example, would
> > only mean assuming platform bus is a standard.
> > Best judge for the layout of SoC devices is the SoC PMD (which is also
> > like a bus driver, other than being a device driver).
> >
> > Once again, if the discussion in other thread comes to a logical
> > conclusion, this would get updated.
> 
> Again, I am not sure which thread discusses this topic.

Copying the link again: http://dpdk.org/ml/archives/dev/2016-September/046933.html

> 
> I just don't like the idea to leave update responsibility on PMDs.
> Maybe, there can be a callback update in rte_soc_device (set or not-set
> by the custom scan function) that is to be called here.
 
And for me, the very idea of 'leave update responsibility to PMD' is appealing in case of SoC.
Though, I think your suggestion of 'callback' is pretty good. Just to clarify, is my understanding correct: PMD registers a callback for 'update' and EAL would call that when it reaches this code path (of update device information during probe). The callback can be passed the device found so that PMD can update that. Is that what you too had in mind?

> 
> >
> > >
> > >> +	 */
> > >> +
> > >> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
> > >> +		if (rte_eal_compare_soc_addr(&dev->addr, addr))
> > >> +			continue;
> > >> +
> > >> +		ret = soc_probe_all_drivers(dev);
> > >> +		if (ret < 0)
> > >> +			goto err_return;
> > >> +		return 0;
> > >> +	}
> > >> +	return -1;
> > >> +
> > >> +err_return:
> > >> +	RTE_LOG(WARNING, EAL,
> > >> +		"Requested device %s cannot be used\n", addr->name);
> > >> +	return -1;
> > >> +}
> > >> +
> > >> +/*
> > >> + * Scan the SoC devices and call the devinit() function for all
> registered
> > >> + * drivers that have a matching entry in its id_table for discovered
> devices.
> > >> + */
> > >
> > > Should be in header. Here it is redundant.
> >
> > Ok. I will move to rte_soc.h.
> >
> > >
> > >> +int
> > >> +rte_eal_soc_probe(void)
> > >> +{
> > >> +	struct rte_soc_device *dev = NULL;
> > >> +	int ret = 0;
> > >> +
> > >> +	TAILQ_FOREACH(dev, &soc_device_list, next) {
> > >> +		ret = soc_probe_all_drivers(dev);
> > >> +		if (ret < 0)
> > >> +			rte_exit(EXIT_FAILURE, "Requested device %s"
> > >> +				 " cannot be used\n", dev->addr.name);
> > >> +	}
> > >> +
> > >> +	return 0;
> > >> +}
> > >> +
> > >>  /* dump one device */
> > >>  static int
> > >>  soc_dump_one_device(FILE *f, struct rte_soc_device *dev)
> > >> @@ -79,6 +288,12 @@ rte_eal_soc_dump(FILE *f)
> > >>  void
> > >>  rte_eal_soc_register(struct rte_soc_driver *driver)
> > >>  {
> > >> +	/* For a valid soc driver, match and scan function
> > >> +	 * should be provided.
> > >> +	 */
> > >
> > > This comment should be in the header file.
> >
> > Actually there is no valueable addition made by this comment. RTE_VERIFY
> > is self explanatory. I will remove the comment all together.
> 
> No, the comment must be present for the rte_eal_soc_register function as
> its documentation. The RTE_VERIFY is not an excuse, it just verifies the
> fact that the caller understands the documentation and that she didn't make
> a mistake.

Ok. It doesn't hurt to have a comment. I will add that. 

> 
> >
> > >
> > >> +	RTE_VERIFY(driver != NULL);
> > >> +	RTE_VERIFY(driver->match_fn != NULL);
> > >> +	RTE_VERIFY(driver->scan_fn != NULL);
> > >>  	TAILQ_INSERT_TAIL(&soc_driver_list, driver, next);
> > >>  }
> > >>
> > >> diff --git a/lib/librte_eal/common/include/rte_soc.h
> b/lib/librte_eal/common/include/rte_soc.h
> > >> index c6f98eb..bfb49a2 100644
> > >> --- a/lib/librte_eal/common/include/rte_soc.h
> > >> +++ b/lib/librte_eal/common/include/rte_soc.h
> > >> @@ -97,6 +97,16 @@ typedef int (soc_devinit_t)(struct rte_soc_driver *,
> struct rte_soc_device *);
> > >>  typedef int (soc_devuninit_t)(struct rte_soc_device *);
> > >>
> > >>  /**
> > >> + * SoC device scan callback, called from rte_eal_soc_init.
> > >
> > > Can you explain what is the goal of the callback?
> > > What is the expected behaviour.
> >
> > EAL would call the scan of each registered SoC PMD
> > (DRIVER_REGISTER_SOC). This scan is responsible for finding devices on
> > SoC's specific bus and add them to SoC device_list. This is a callback
> > because SoC don't have a generalization like PCI. A SoC is not
> > necessarily a platform bus either (what original patch series assumed).
> 
> In doc comment...
> 
> >
> > >
> > > It returns void so it seems it can never fail. Is this correct?
> > > I can image that to scan for devices, I need to check some file-system
> > > structure which can be unavailable...
> >
> > This is what I had in mind:
> > That is true, it never fails. It is expected that scan function simply
> > ignores (logs error) and moves ahead. A local error for a particular SoC
> > (I agree, there might not be more than one SoC) doesn't necessarily mean
> > that complete DPDK Application should quit. It only means that
> > application user should get some error/warning/message about failure.
> 
> I understand, this is OK then.
> 
> >
> > >
> > >> + */
> > >> +typedef void (soc_scan_t)(void);
> > >
> > > You are missing the '*' in (*soc_scan_t).
> >
> > That was put in the definition in the rte_soc_driver - but, I see you
> > have already commented there. I will add the '*' here and remove from
> there.
> >
> > >
> > >> +
> > >> +/**
> > >> + * Custom device<=>driver match callback for SoC
> > >
> > > Can you explain the semantics (return values), please?
> >
> > rte_soc.h already has explanation on the expected semantics over
> > rte_eal_soc_match - the default implementation. But, I agree, it should
> > be above this declaration.
> 
> True ;).
> 
> >
> > >
> > >> + */
> > >> +typedef int (soc_match_t)(struct rte_soc_driver *, struct
> rte_soc_device *);
> > >
> 
> [...]
> 
> >
> > >
> > > I think, we should tell the users that scan_fn and match_fn must be
> always set
> > > to something.
> >
> > How? I think it would be part of documentation, isn't it?
> 
> Yes. It should be documented in the comment for rte_eal_soc_init.
> My comment was misplaced a bit...
 
Ok.

> 
> > Also, rte_eal_soc_init() already enforces this check with RTE_VERIFY.
> >
> > >
> > >>  	const struct rte_soc_id *id_table; /**< ID table, NULL terminated
> */
> > >>  };
> > >>
> > >> @@ -146,6 +158,45 @@ rte_eal_compare_soc_addr(const struct rte_soc_addr
> *a0,
> > >>  }
> > >>
> > >>  /**
> > >> + * Default function for matching the Soc driver with device. Each
> driver can
> > >> + * either use this function or define their own soc matching function.
> > >> + * This function relies on the compatible string extracted from sysfs.
> But,
> > >> + * a SoC might have different way of identifying its devices. Such SoC
> can
> > >> + * override match_fn.
> > >> + *
> > >> + * @return
> > >> + * 	 0 on success
> > >> + *	-1 when no match found
> > >> +  */
> > >> +int
> > >> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device
> *dev);
> > >
> > > What about naming it
> > >
> > > 	rte_eal_soc_match_default
> >
> > Ok.
> >
> > >
> > > or maybe better
> > >
> > > 	rte_eal_soc_match_compatible
> > >
> > > what do you think?
> >
> >  From what I had in mind - the discussion about SoC not necessarily
> > being a Platform bus - 'compatible' doesn't look fine to me. But again,
> > it is still open debate so - I will wait until that is conlcuded.
> 
> Why? The current implementation works this way:
> 
> int
> rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev)
> {
> 	int i, j;
> 
> 	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
> 	RTE_VERIFY(dev != NULL && dev->id != NULL);
> 
> 	for (i = 0; drv->id_table[i].compatible; ++i) {
> 		const char *drv_compat = drv->id_table[i].compatible;
> 
> 		for (j = 0; dev->id[j].compatible; ++j) {
> 			const char *dev_compat = dev->id[j].compatible;
> 
> 			if (!strcmp(drv_compat, dev_compat))
> 				return 0;
> 		}
> 	}
> 
> 	return 1;
> }
> 
> It checks for compatible. So why not to name it that way? If you provide
> a match testing the name of devices then it can be named *_match_name.
 
I think all the confusion is about whether match on compatible string should be kept as default or not.
My opinion: No.
Because I have a usecase where the SoC PMD is not a platform bus. [1]
By keeping a 'compatible' string based match, it would be obvious to have a scan for /sys/bus/platform. That in itself is based on assumption that a SoC is platform bus and would adhere to generally known semantics of a platform bus.

Other the other hand, as Hemant has suggested in [1], plan is to have a skeletal code for SoC framework - one which allows any SoC PMD to be plugged in irrespective of the bus/device design. Thereafter, when more PMDs become part of the DPDK framework, generalize functions (which is where your implementations of scan/match over /sys/bus/platform fit pretty well).

[1] http://dpdk.org/ml/archives/dev/2016-September/046967.html

> 
> >
> > >
> > >> +
> > >> +/**
> > >> + * Probe SoC devices for registered drivers.
> > >> + */
> > >> +int rte_eal_soc_probe(void);
> > >> +
> > >> +/**
> > >> + * Probe the single SoC device.
> > >> + */
> > >> +int rte_eal_soc_probe_one(const struct rte_soc_addr *addr);
> > >> +
> > >> +/**
> > >> + * Close the single SoC device.
> > >> + *
> > >> + * Scan the SoC devices and find the SoC device specified by the SoC
> > >> + * address, then call the devuninit() function for registered driver
> > >> + * that has a matching entry in its id_table for discovered device.
> > >> + *
> > >> + * @param addr
> > >> + *	The SoC address to close.
> > >> + * @return
> > >> + *   - 0 on success.
> > >> + *   - Negative on error.
> > >> + */
> > >> +int rte_eal_soc_detach(const struct rte_soc_addr *addr);
> > >> +
> > >> +/**
> > >>   * Dump discovered SoC devices.
> > >>   */
> > >>  void rte_eal_soc_dump(FILE *f);
> > >> diff --git a/lib/librte_eal/linuxapp/eal/eal.c
> b/lib/librte_eal/linuxapp/eal/eal.c
> > >> index 15c8c3d..147b601 100644
> > >> --- a/lib/librte_eal/linuxapp/eal/eal.c
> > >> +++ b/lib/librte_eal/linuxapp/eal/eal.c
> > >> @@ -70,6 +70,7 @@
> > >>  #include <rte_cpuflags.h>
> > >>  #include <rte_interrupts.h>
> > >>  #include <rte_pci.h>
> > >> +#include <rte_soc.h>
> > >>  #include <rte_dev.h>
> > >>  #include <rte_devargs.h>
> > >>  #include <rte_common.h>
> > >> @@ -881,6 +882,10 @@ rte_eal_init(int argc, char **argv)
> > >>  	if (rte_eal_pci_probe())
> > >>  		rte_panic("Cannot probe PCI\n");
> > >>
> > >> +	/* Probe & Initialize SoC devices */
> > >> +	if (rte_eal_soc_probe())
> > >> +		rte_panic("Cannot probe SoC\n");
> > >> +
> > >>  	rte_eal_mcfg_complete();
> > >>
> > >>  	return fctret;
> > >> diff --git a/lib/librte_eal/linuxapp/eal/eal_soc.c
> b/lib/librte_eal/linuxapp/eal/eal_soc.c
> > >> index 04848b9..5f961c4 100644
> > >> --- a/lib/librte_eal/linuxapp/eal/eal_soc.c
> > >> +++ b/lib/librte_eal/linuxapp/eal/eal_soc.c
> > >> @@ -52,5 +52,21 @@
> > >>  int
> > >>  rte_eal_soc_init(void)
> > >>  {
> > >> +	struct rte_soc_driver *drv;
> > >> +
> > >> +	/* for debug purposes, SoC can be disabled */
> > >> +	if (internal_config.no_soc)
> > >> +		return 0;
> > >> +
> > >> +	/* For each registered driver, call their scan routine to perform
> any
> > >> +	 * custom scan for devices (for example, custom buses)
> > >> +	 */
> > >> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
> > >
> > > Is it possible to have drv->scan_fn == NULL? I suppose, this is invalid.
> > > I'd prefer to have RTE_VERIFY for this check.
> >
> > rte_eal_soc_init() has this check already. Driver wouldn't even be
> > registered in case scan/match are not implemented.
> 
> True, but when reviewing or refactoring, you cannot see always all this
> context.
> It is more defensive to explain here "don't worry, the scan_fn is always set
> here".
 
Ok. I will add this.

> 
> >
> > >
> > >> +		if (drv && drv->scan_fn) {
> > >> +			drv->scan_fn();
> > >> +			/* Ignore all errors from this */
> > >> +		}
> > >
> > >> +	}
> > >> +
> > >>  	return 0;
> > >>  }
> > >> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > >> index b9d1932..adcfe7d 100644
> > >> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > >> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > >> @@ -179,5 +179,9 @@ DPDK_16.11 {
> > >>  	rte_eal_soc_register;
> > >>  	rte_eal_soc_unregister;
> > >>  	rte_eal_soc_dump;
> > >> +	rte_eal_soc_match;
> > >> +	rte_eal_soc_detach;
> > >> +	rte_eal_soc_probe;
> > >> +	rte_eal_soc_probe_one;
> > >>
> > >>  } DPDK_16.07;
> > >
> > > Regards
> > > Jan
> > >
> >
> > I hope I have covered all your comments. That was an exhaustive review.
> > Thanks a lot for your time.
> >
> > Lets work to resolve the architectural issues revolving around SoC
> > scan/match.
> 
> ;)
 
Phew!
I am already loosing track of things you have suggested.
What if we can chat on IRC and resolve the issue surrounding the 'default match/scan', 'platform or not platform dependent SoC' etc? I think it would save a lot of ping-pong effort. 
(For today (20/Sep), I might not be available, but I am OK any other time).

> 
> >
> > -
> > Shreyansh
> 
> 
> 
> --
>    Jan Viktorin                  E-mail: Viktorin@RehiveTech.com
>    System Architect              Web:    www.RehiveTech.com
>    RehiveTech
>    Brno, Czech Republic

Thanks a lot.

-
Shreyansh
  
Jan Viktorin Sept. 20, 2016, 10:49 a.m. UTC | #5
On Tue, 20 Sep 2016 06:46:31 +0000
Shreyansh Jain <shreyansh.jain@nxp.com> wrote:

> Hi Jan,
> 
> > -----Original Message-----
> > From: Jan Viktorin [mailto:viktorin@rehivetech.com]
> > Sent: Monday, September 19, 2016 5:04 PM
> > To: Shreyansh Jain <shreyansh.jain@nxp.com>
> > Cc: dev@dpdk.org; Hemant Agrawal <hemant.agrawal@nxp.com>
> > Subject: Re: [PATCH v3 06/15] eal/soc: implement probing of drivers
> > 
> > On Mon, 19 Sep 2016 12:17:53 +0530
> > Shreyansh Jain <shreyansh.jain@nxp.com> wrote:
> >   
> > > Hi Jan,

[...]

> 
> >   
> > > The model for the patchset was to allow PMDs to write their own match
> > > and hence, verifying a particular match is not definitive. Rather, the  
> > 
> > If you want to verify a particular match implementation then there should
> > be a particular test verifying that implementation, e.g.
> > test_match_compatible(),
> > test_match_proprietary, test_match_by_name.
> > 
> > However, this is testing the rte_eal_soc_probe (at least, I understand it
> > that way).
> > The probe iterates over devices and drivers and matches them. Thus, the
> > argument
> > "a particular match is not definitive" seems to be irrelevant here. You
> > should build
> > a testing match function like "match_always" that verifies the probe is
> > working. Not
> > that the "match" is working.  
>  
> Ok. 'match_always' called after 'always_find_device0' like scan function. So, essentially rather than a functional match, the testcase only checks if these handlers can be called or not. The naming of such handlers in test case would explain the user what the intention of the test is, rather than its outcome. Is this what you are suggesting? 

Yes, it seems to be ;).

> 
> >   
> > > test case simply confirms that a SoC based PMD would be able to  
> > 
> > It does not confirm anything from my point of view. You *always* print
> > "successful"
> > at the end of this test (see below).
> >   
> > > implement its own match/scan and these would be called from EAL as  
> > expected.  
> > >  
> > > >  
> > > >> +{
> > > >> +	struct rte_soc_driver *drv;
> > > >> +
> > > >> +	/* Registering dummy drivers */
> > > >> +	rte_eal_soc_register(&empty_pmd0.soc_drv);
> > > >> +	rte_eal_soc_register(&empty_pmd1.soc_drv);
> > > >> +	/* Assuming that test_register_unregister is working, not  
> > verifying  
> > > >> +	 * that drivers are indeed registered
> > > >> +	*/
> > > >> +
> > > >> +	/* rte_eal_soc_init is called by rte_eal_init, which in turn  
> > calls the  
> > > >> +	 * scan_fn of each driver.  
> > 
> > So, I'd comment this as something like:
> > 
> > "mimic rte_eal_soc_init to prepare for the rte_eal_soc_probe"  
>  
> Agree.
> 
> >   
> > > >> +	 */
> > > >> +	TAILQ_FOREACH(drv, &soc_driver_list, next) {
> > > >> +		if (drv && drv->scan_fn)
> > > >> +			drv->scan_fn();
> > > >> +	}  
> > > >
> > > > Here, I suppose you mimic the rte_eal_soc_init?  
> > >
> > > Yes.
> > >  
> > > >  
> > > >> +
> > > >> +	/* rte_eal_init() would perform other inits here */
> > > >> +
> > > >> +	/* Probe would link the SoC devices<=>drivers */
> > > >> +	rte_eal_soc_probe();
> > > >> +
> > > >> +	/* Unregistering dummy drivers */
> > > >> +	rte_eal_soc_unregister(&empty_pmd0.soc_drv);
> > > >> +	rte_eal_soc_unregister(&empty_pmd1.soc_drv);
> > > >> +
> > > >> +	free(empty_pmd0.soc_dev.addr.name);
> > > >> +
> > > >> +	printf("%s has been successful\n", __func__);  
> > > >
> > > > How you detect it is unsuccessful? Is it possible to fail in this test?
> > > > A test that can never fail is in fact not a test :).  
> > >
> > > The design assumption for SoC patcheset was: A PMDs scan is called to
> > > find devices on its bus (PMD ~ bus). Whether devices are found or not,
> > > is irrelevant to EAL - whether that is because of error or actually no
> > > devices were available.
> > > With the above logic, no 'success/failure' is checked in the test. It is
> > > simply a verification of EAL's ability to link the PMD with it
> > > (scan/match function pointers).  
> > 
> > I am sorry, I disagree. You always print "successful". The only way to fail
> > here is a SIGSEGV or other very serious system failure. But we test
> > rte_eal_soc_probe
> > and not system failures.  
>  
> Ok. I am not yet clear how the test case would be created, except what I have mentioned above that rather than being functional, testcase only explains the case through function naming.
> The premise on which match/scan are based is that they would be implemented by the respective PMD. Which makes testing of such function either irrelevant or simply a help to user to understand how SoC PMD should be created.
> If this persists, probably I would rather remove this test case all together. It doesn't serve any purpose, which you have rightly pointed out.

I'd vote to have this test to check for regressions. See the test_pci.c, e.g. test_pci_blacklist(). It is not
ideal but it works. You can check how many devices have been matched or seen.

Unfortunately, I am not familiar with DPAA2 API. I assume that it works via /sys as well as platform_device
but with different properties. You can create a fake /sys sub-tree (see app/test/test_pci_sysfs/) and test
whether your scan function reads data successfully and fills all the structures as expected. I think, there
is a lot of space in this area.

Next, is it possible to write a generic DPAA2 backend for the SoC Framework? I mean to not introduce the DPAA2
into PMDs (this looks like mixing of responsibilities) but have it in EAL as an alternative to UIO, VFIO, etc.
I suppose that as DPAA2 is a kind of API, it should be possible to go this way.

> 
> >   
> > >  
> > > >  
> > > >> +	return 0;
> > > >> +}
> > > >> +
> > > >>  /* save real devices and drivers until the tests finishes */  
> > 

[...]

> > >  
> > > >  
> > > >> +int
> > > >> +rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device  
> > *dev)  
> > > >> +{
> > > >> +	int i, j;
> > > >> +
> > > >> +	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
> > > >> +	RTE_VERIFY(dev != NULL && dev->id != NULL);
> > > >> +
> > > >> +	for (i = 0; drv->id_table[i].compatible; ++i) {
> > > >> +		const char *drv_compat = drv->id_table[i].compatible;
> > > >> +
> > > >> +		for (j = 0; dev->id[j].compatible; ++j) {
> > > >> +			const char *dev_compat = dev->id[j].compatible;
> > > >> +
> > > >> +			if (!strcmp(drv_compat, dev_compat))
> > > >> +				return 0;
> > > >> +		}
> > > >> +	}
> > > >> +
> > > >> +	return 1;
> > > >> +}
> > > >> +  
> > 
> > A redundant empty line here...  
> 
> Yes, I will remove this. 
> 
> >   
> > > >> +
> > > >> +static int
> > > >> +rte_eal_soc_probe_one_driver(struct rte_soc_driver *drv,
> > > >> +			     struct rte_soc_device *dev)
> > > >> +{
> > > >> +	int ret = 1;
> > > >> +  
> > > >
> > > > I think, the RTE_VERIFY(dev->match_fn) might be good here.
> > > > It avoids any doubts about the validity of the pointer.  
> > >
> > > That has already been done in rte_eal_soc_register which is called when
> > > PMDs are registering themselves through DRIVER_REGISTER_SOC. That would
> > > prevent any PMD leaking through to this stage without a proper
> > > match_fn/scan_fn.  
> > 
> > Well, yes. It seems to be redundant. However, it would emphesize the fact
> > that this function expects that match_fn is set.
> > 
> > In the rte_eal_soc_register, the RTE_VERIFY says "The API requires those".
> > 
> > But when I review I do not always see all the context. It is not safe for
> > me to assume that there was probably some RTE_VERIFY in the path... It is
> > not a fast path so it does not hurt the performance in anyway.
> >   
> 
> You mean RTE_VERIFY should be duplicated so that code readability increases?
> I am not really convinced on that.
> My opinion: rte_eal_soc_*probe* series shouldn't actually worry about whether a valid PMD has been plugged in or. It should simply assume that having reached here, all is well on the PMDs definition side.
> 
> On the contrary, placing a RTE_VERIFY here would only state the reader that it is possible to reach this code path without a valid match function.

Yes, it would state that the match function must always be valid here. That's the point.

> 
> > >  
> > > >  
> > > >> +	ret = drv->match_fn(drv, dev);
> > > >> +	if (ret) {
> > > >> +		RTE_LOG(DEBUG, EAL,
> > > >> +			" match function failed, skipping\n");  
> > > >
> > > > Is this a failure? I think it is not. Failure would be if the match
> > > > function cannot execute correctly. This is more like "no-match".  
> > >
> > > The log message is misleading. This is _not_ a failure but simply a
> > > 'no-match'. I will update this.
> > >  
> > > >
> > > > When debugging, I'd like to see more a message like "driver <name> does  
> > not match".  
> > >
> > > Problem would be about '<name>' of a driver. There is already another
> > > discussion about SoC capability/platform bus definitions - probably I
> > > will wait for that so as to define what a '<name>' for a driver and
> > > device is.
> > > In this case, the key reason for not adding such a message was because
> > > it was assumed PMDs are black boxes with EAL not even assuming what
> > > '<name>' means. Anyways, it is better to discuss these things in that
> > > other email.  
> > 
> > I am not sure which thread do you mean... Can you point me there, please?  
>  
> Sorry, somehow I forgot to add that:
> http://dpdk.org/ml/archives/dev/2016-September/046933.html
> 
> One of the discussion point in above thread is whether platform bus is a good assumption for a SoC driver or not. Based on that '<name>' (or any other identifier, like 'compatible'), the log can be printed as you suggested above.

Quite frankly, I don't know. The platform bus seemed the obvious starting point to me when I started working
on this topic. There was no DPAA2 (as far as I know), nothing more suitable. I write custom kernel drivers
as platform devices.

However, if you look into PCI infrastructure you can see that the UIO, VFIO and custom approaches are available.
And there is no PMD-specific scan function. I'd rather go the way it is already done to not diverge a lot from
the base code. Otherwise, we can easily come to a point like "drop DPDK and write again" :).

> 
> >   
> > >  
> > > >  

[...]

> 
> >   
> > >  
> > > >  
> > > >> +		if (drv && drv->scan_fn) {
> > > >> +			drv->scan_fn();
> > > >> +			/* Ignore all errors from this */
> > > >> +		}  
> > > >  
> > > >> +	}
> > > >> +
> > > >>  	return 0;
> > > >>  }
> > > >> diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map  
> > b/lib/librte_eal/linuxapp/eal/rte_eal_version.map  
> > > >> index b9d1932..adcfe7d 100644
> > > >> --- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > >> +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
> > > >> @@ -179,5 +179,9 @@ DPDK_16.11 {
> > > >>  	rte_eal_soc_register;
> > > >>  	rte_eal_soc_unregister;
> > > >>  	rte_eal_soc_dump;
> > > >> +	rte_eal_soc_match;
> > > >> +	rte_eal_soc_detach;
> > > >> +	rte_eal_soc_probe;
> > > >> +	rte_eal_soc_probe_one;
> > > >>
> > > >>  } DPDK_16.07;  
> > > >
> > > > Regards
> > > > Jan
> > > >  
> > >
> > > I hope I have covered all your comments. That was an exhaustive review.
> > > Thanks a lot for your time.
> > >
> > > Lets work to resolve the architectural issues revolving around SoC
> > > scan/match.  
> > 
> > ;)  
>  
> Phew!
> I am already loosing track of things you have suggested.
> What if we can chat on IRC and resolve the issue surrounding the 'default match/scan', 'platform or not platform dependent SoC' etc? I think it would save a lot of ping-pong effort. 
> (For today (20/Sep), I might not be available, but I am OK any other time).

Sure... write me when you are available. I maybe temporarily away.

> 
> >   
> > >
> > > -
> > > Shreyansh  
> > 
> > 
> > 
> > --
> >    Jan Viktorin                  E-mail: Viktorin@RehiveTech.com
> >    System Architect              Web:    www.RehiveTech.com
> >    RehiveTech
> >    Brno, Czech Republic  
> 
> Thanks a lot.
> 
> -
> Shreyansh
  

Patch

diff --git a/app/test/test_soc.c b/app/test/test_soc.c
index ac03e64..d2b9462 100644
--- a/app/test/test_soc.c
+++ b/app/test/test_soc.c
@@ -87,14 +87,45 @@  static int test_compare_addr(void)
  */
 struct test_wrapper {
 	struct rte_soc_driver soc_drv;
+	struct rte_soc_device soc_dev;
 };
 
+static int empty_pmd0_devinit(struct rte_soc_driver *drv,
+			      struct rte_soc_device *dev);
+static int empty_pmd0_devuninit(struct rte_soc_device *dev);
+static void test_soc_scan_dev0_cb(void);
+static int test_soc_match_dev0_cb(struct rte_soc_driver *drv,
+				  struct rte_soc_device *dev);
+static void test_soc_scan_dev1_cb(void);
+static int test_soc_match_dev1_cb(struct rte_soc_driver *drv,
+				  struct rte_soc_device *dev);
+
+static int
+empty_pmd0_devinit(struct rte_soc_driver *drv __rte_unused,
+		   struct rte_soc_device *dev __rte_unused)
+{
+	return 0;
+}
+
+static int
+empty_pmd0_devuninit(struct rte_soc_device *dev)
+{
+	/* Release the memory associated with dev->addr.name */
+	free(dev->addr.name);
+
+	return 0;
+}
+
 struct test_wrapper empty_pmd0 = {
 	.soc_drv = {
 		.driver = {
 			.name = "empty_pmd0"
 		},
-	},
+		.devinit = empty_pmd0_devinit,
+		.devuninit = empty_pmd0_devuninit,
+		.scan_fn = test_soc_scan_dev0_cb,
+		.match_fn = test_soc_match_dev0_cb,
+	}
 };
 
 struct test_wrapper empty_pmd1 = {
@@ -102,9 +133,54 @@  struct test_wrapper empty_pmd1 = {
 		.driver = {
 			.name = "empty_pmd1"
 		},
+		.scan_fn = test_soc_scan_dev1_cb,
+		.match_fn = test_soc_match_dev1_cb,
 	},
 };
 
+static void
+test_soc_scan_dev0_cb(void)
+{
+	/* SoC's scan would scan devices on its bus and add to
+	 * soc_device_list
+	 */
+	empty_pmd0.soc_dev.addr.name = strdup("empty_pmd0_dev");
+
+	TAILQ_INSERT_TAIL(&soc_device_list, &empty_pmd0.soc_dev, next);
+}
+
+static int
+test_soc_match_dev0_cb(struct rte_soc_driver *drv __rte_unused,
+		       struct rte_soc_device *dev)
+{
+	if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd0_dev"))
+		return 0;
+
+	return 1;
+}
+
+
+static void
+test_soc_scan_dev1_cb(void)
+{
+	/* SoC's scan would scan devices on its bus and add to
+	 * soc_device_list
+	 */
+	empty_pmd0.soc_dev.addr.name = strdup("empty_pmd1_dev");
+
+	TAILQ_INSERT_TAIL(&soc_device_list, &empty_pmd1.soc_dev, next);
+}
+
+static int
+test_soc_match_dev1_cb(struct rte_soc_driver *drv __rte_unused,
+		       struct rte_soc_device *dev)
+{
+	if (!dev->addr.name || strcmp(dev->addr.name, "empty_pmd1_dev"))
+		return 0;
+
+	return 1;
+}
+
 static int
 count_registered_socdrvs(void)
 {
@@ -148,13 +224,54 @@  test_register_unregister(void)
 	return 0;
 }
 
+/* Test Probe (scan and match) functionality */
+static int
+test_soc_init_and_probe(void)
+{
+	struct rte_soc_driver *drv;
+
+	/* Registering dummy drivers */
+	rte_eal_soc_register(&empty_pmd0.soc_drv);
+	rte_eal_soc_register(&empty_pmd1.soc_drv);
+	/* Assuming that test_register_unregister is working, not verifying
+	 * that drivers are indeed registered
+	*/
+
+	/* rte_eal_soc_init is called by rte_eal_init, which in turn calls the
+	 * scan_fn of each driver.
+	 */
+	TAILQ_FOREACH(drv, &soc_driver_list, next) {
+		if (drv && drv->scan_fn)
+			drv->scan_fn();
+	}
+
+	/* rte_eal_init() would perform other inits here */
+
+	/* Probe would link the SoC devices<=>drivers */
+	rte_eal_soc_probe();
+
+	/* Unregistering dummy drivers */
+	rte_eal_soc_unregister(&empty_pmd0.soc_drv);
+	rte_eal_soc_unregister(&empty_pmd1.soc_drv);
+
+	free(empty_pmd0.soc_dev.addr.name);
+
+	printf("%s has been successful\n", __func__);
+	return 0;
+}
+
 /* save real devices and drivers until the tests finishes */
 struct soc_driver_list real_soc_driver_list =
 	TAILQ_HEAD_INITIALIZER(real_soc_driver_list);
 
+/* save real devices and drivers until the tests finishes */
+struct soc_device_list real_soc_device_list =
+	TAILQ_HEAD_INITIALIZER(real_soc_device_list);
+
 static int test_soc_setup(void)
 {
 	struct rte_soc_driver *drv;
+	struct rte_soc_device *dev;
 
 	/* no real drivers for the test */
 	while (!TAILQ_EMPTY(&soc_driver_list)) {
@@ -163,12 +280,20 @@  static int test_soc_setup(void)
 		TAILQ_INSERT_TAIL(&real_soc_driver_list, drv, next);
 	}
 
+	/* And, no real devices for the test */
+	while (!TAILQ_EMPTY(&soc_device_list)) {
+		dev = TAILQ_FIRST(&soc_device_list);
+		TAILQ_REMOVE(&soc_device_list, dev, next);
+		TAILQ_INSERT_TAIL(&real_soc_device_list, dev, next);
+	}
+
 	return 0;
 }
 
 static int test_soc_cleanup(void)
 {
 	struct rte_soc_driver *drv;
+	struct rte_soc_device *dev;
 
 	/* bring back real drivers after the test */
 	while (!TAILQ_EMPTY(&real_soc_driver_list)) {
@@ -177,6 +302,13 @@  static int test_soc_cleanup(void)
 		rte_eal_soc_register(drv);
 	}
 
+	/* And, bring back real devices after the test */
+	while (!TAILQ_EMPTY(&real_soc_device_list)) {
+		dev = TAILQ_FIRST(&real_soc_device_list);
+		TAILQ_REMOVE(&real_soc_device_list, dev, next);
+		TAILQ_INSERT_TAIL(&soc_device_list, dev, next);
+	}
+
 	return 0;
 }
 
@@ -192,6 +324,10 @@  test_soc(void)
 	if (test_register_unregister())
 		return -1;
 
+	/* Assuming test_register_unregister has succeeded */
+	if (test_soc_init_and_probe())
+		return -1;
+
 	if (test_soc_cleanup())
 		return -1;
 
diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
index de38848..3c407be 100644
--- a/lib/librte_eal/bsdapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map
@@ -173,5 +173,9 @@  DPDK_16.11 {
 	rte_eal_soc_register;
 	rte_eal_soc_unregister;
 	rte_eal_soc_dump;
+	rte_eal_soc_match;
+	rte_eal_soc_detach;
+	rte_eal_soc_probe;
+	rte_eal_soc_probe_one;
 
 } DPDK_16.07;
diff --git a/lib/librte_eal/common/eal_common_soc.c b/lib/librte_eal/common/eal_common_soc.c
index 5dcddc5..bb87a67 100644
--- a/lib/librte_eal/common/eal_common_soc.c
+++ b/lib/librte_eal/common/eal_common_soc.c
@@ -36,6 +36,8 @@ 
 #include <sys/queue.h>
 
 #include <rte_log.h>
+#include <rte_common.h>
+#include <rte_soc.h>
 
 #include "eal_private.h"
 
@@ -45,6 +47,213 @@  struct soc_driver_list soc_driver_list =
 struct soc_device_list soc_device_list =
 	TAILQ_HEAD_INITIALIZER(soc_device_list);
 
+/* Default SoC device<->Driver match handler function */
+int
+rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev)
+{
+	int i, j;
+
+	RTE_VERIFY(drv != NULL && drv->id_table != NULL);
+	RTE_VERIFY(dev != NULL && dev->id != NULL);
+
+	for (i = 0; drv->id_table[i].compatible; ++i) {
+		const char *drv_compat = drv->id_table[i].compatible;
+
+		for (j = 0; dev->id[j].compatible; ++j) {
+			const char *dev_compat = dev->id[j].compatible;
+
+			if (!strcmp(drv_compat, dev_compat))
+				return 0;
+		}
+	}
+
+	return 1;
+}
+
+
+static int
+rte_eal_soc_probe_one_driver(struct rte_soc_driver *drv,
+			     struct rte_soc_device *dev)
+{
+	int ret = 1;
+
+	ret = drv->match_fn(drv, dev);
+	if (ret) {
+		RTE_LOG(DEBUG, EAL,
+			" match function failed, skipping\n");
+		return ret;
+	}
+
+	dev->driver = drv;
+	RTE_VERIFY(drv->devinit != NULL);
+	return drv->devinit(drv, dev);
+}
+
+static int
+soc_probe_all_drivers(struct rte_soc_device *dev)
+{
+	struct rte_soc_driver *drv = NULL;
+	int rc = 0;
+
+	if (dev == NULL)
+		return -1;
+
+	TAILQ_FOREACH(drv, &soc_driver_list, next) {
+		rc = rte_eal_soc_probe_one_driver(drv, dev);
+		if (rc < 0)
+			/* negative value is an error */
+			return -1;
+		if (rc > 0)
+			/* positive value means driver doesn't support it */
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+/* If the IDs match, call the devuninit() function of the driver. */
+static int
+rte_eal_soc_detach_dev(struct rte_soc_driver *drv,
+		       struct rte_soc_device *dev)
+{
+	int ret;
+
+	if ((drv == NULL) || (dev == NULL))
+		return -EINVAL;
+
+	ret = drv->match_fn(drv, dev);
+	if (ret) {
+		RTE_LOG(DEBUG, EAL,
+			" match function failed, skipping\n");
+		return ret;
+	}
+
+	RTE_LOG(DEBUG, EAL, "SoC device %s\n",
+		dev->addr.name);
+
+	RTE_LOG(DEBUG, EAL, "  remove driver: %s\n", drv->driver.name);
+
+	if (drv->devuninit && (drv->devuninit(dev) < 0))
+		return -1;	/* negative value is an error */
+
+	/* clear driver structure */
+	dev->driver = NULL;
+
+	return 0;
+}
+
+/*
+ * Call the devuninit() function of all registered drivers for the given
+ * device if their IDs match.
+ *
+ * @return
+ *       0 when successful
+ *      -1 if deinitialization fails
+ *       1 if no driver is found for this device.
+ */
+static int
+soc_detach_all_drivers(struct rte_soc_device *dev)
+{
+	struct rte_soc_driver *dr = NULL;
+	int rc = 0;
+
+	if (dev == NULL)
+		return -1;
+
+	TAILQ_FOREACH(dr, &soc_driver_list, next) {
+		rc = rte_eal_soc_detach_dev(dr, dev);
+		if (rc < 0)
+			/* negative value is an error */
+			return -1;
+		if (rc > 0)
+			/* positive value means driver doesn't support it */
+			continue;
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * Detach device specified by its SoC address.
+ */
+int
+rte_eal_soc_detach(const struct rte_soc_addr *addr)
+{
+	struct rte_soc_device *dev = NULL;
+	int ret = 0;
+
+	if (addr == NULL)
+		return -1;
+
+	TAILQ_FOREACH(dev, &soc_device_list, next) {
+		if (rte_eal_compare_soc_addr(&dev->addr, addr))
+			continue;
+
+		ret = soc_detach_all_drivers(dev);
+		if (ret < 0)
+			goto err_return;
+
+		TAILQ_REMOVE(&soc_device_list, dev, next);
+		return 0;
+	}
+	return -1;
+
+err_return:
+	RTE_LOG(WARNING, EAL, "Requested device %s cannot be used\n",
+		dev->addr.name);
+	return -1;
+}
+
+int
+rte_eal_soc_probe_one(const struct rte_soc_addr *addr)
+{
+	struct rte_soc_device *dev = NULL;
+	int ret = 0;
+
+	if (addr == NULL)
+		return -1;
+
+	/* unlike pci, in case of soc, it the responsibility of the soc driver
+	 * to check during init whether device has been updated since last add.
+	 */
+
+	TAILQ_FOREACH(dev, &soc_device_list, next) {
+		if (rte_eal_compare_soc_addr(&dev->addr, addr))
+			continue;
+
+		ret = soc_probe_all_drivers(dev);
+		if (ret < 0)
+			goto err_return;
+		return 0;
+	}
+	return -1;
+
+err_return:
+	RTE_LOG(WARNING, EAL,
+		"Requested device %s cannot be used\n", addr->name);
+	return -1;
+}
+
+/*
+ * Scan the SoC devices and call the devinit() function for all registered
+ * drivers that have a matching entry in its id_table for discovered devices.
+ */
+int
+rte_eal_soc_probe(void)
+{
+	struct rte_soc_device *dev = NULL;
+	int ret = 0;
+
+	TAILQ_FOREACH(dev, &soc_device_list, next) {
+		ret = soc_probe_all_drivers(dev);
+		if (ret < 0)
+			rte_exit(EXIT_FAILURE, "Requested device %s"
+				 " cannot be used\n", dev->addr.name);
+	}
+
+	return 0;
+}
+
 /* dump one device */
 static int
 soc_dump_one_device(FILE *f, struct rte_soc_device *dev)
@@ -79,6 +288,12 @@  rte_eal_soc_dump(FILE *f)
 void
 rte_eal_soc_register(struct rte_soc_driver *driver)
 {
+	/* For a valid soc driver, match and scan function
+	 * should be provided.
+	 */
+	RTE_VERIFY(driver != NULL);
+	RTE_VERIFY(driver->match_fn != NULL);
+	RTE_VERIFY(driver->scan_fn != NULL);
 	TAILQ_INSERT_TAIL(&soc_driver_list, driver, next);
 }
 
diff --git a/lib/librte_eal/common/include/rte_soc.h b/lib/librte_eal/common/include/rte_soc.h
index c6f98eb..bfb49a2 100644
--- a/lib/librte_eal/common/include/rte_soc.h
+++ b/lib/librte_eal/common/include/rte_soc.h
@@ -97,6 +97,16 @@  typedef int (soc_devinit_t)(struct rte_soc_driver *, struct rte_soc_device *);
 typedef int (soc_devuninit_t)(struct rte_soc_device *);
 
 /**
+ * SoC device scan callback, called from rte_eal_soc_init.
+ */
+typedef void (soc_scan_t)(void);
+
+/**
+ * Custom device<=>driver match callback for SoC
+ */
+typedef int (soc_match_t)(struct rte_soc_driver *, struct rte_soc_device *);
+
+/**
  * A structure describing a SoC driver.
  */
 struct rte_soc_driver {
@@ -104,6 +114,8 @@  struct rte_soc_driver {
 	struct rte_driver driver;          /**< Inherit core driver. */
 	soc_devinit_t *devinit;            /**< Device initialization */
 	soc_devuninit_t *devuninit;        /**< Device uninitialization */
+	soc_scan_t *scan_fn;               /**< Callback for scanning SoC bus*/
+	soc_match_t *match_fn;             /**< Callback to match dev<->drv */
 	const struct rte_soc_id *id_table; /**< ID table, NULL terminated */
 };
 
@@ -146,6 +158,45 @@  rte_eal_compare_soc_addr(const struct rte_soc_addr *a0,
 }
 
 /**
+ * Default function for matching the Soc driver with device. Each driver can
+ * either use this function or define their own soc matching function.
+ * This function relies on the compatible string extracted from sysfs. But,
+ * a SoC might have different way of identifying its devices. Such SoC can
+ * override match_fn.
+ *
+ * @return
+ * 	 0 on success
+ *	-1 when no match found
+  */
+int
+rte_eal_soc_match(struct rte_soc_driver *drv, struct rte_soc_device *dev);
+
+/**
+ * Probe SoC devices for registered drivers.
+ */
+int rte_eal_soc_probe(void);
+
+/**
+ * Probe the single SoC device.
+ */
+int rte_eal_soc_probe_one(const struct rte_soc_addr *addr);
+
+/**
+ * Close the single SoC device.
+ *
+ * Scan the SoC devices and find the SoC device specified by the SoC
+ * address, then call the devuninit() function for registered driver
+ * that has a matching entry in its id_table for discovered device.
+ *
+ * @param addr
+ *	The SoC address to close.
+ * @return
+ *   - 0 on success.
+ *   - Negative on error.
+ */
+int rte_eal_soc_detach(const struct rte_soc_addr *addr);
+
+/**
  * Dump discovered SoC devices.
  */
 void rte_eal_soc_dump(FILE *f);
diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c
index 15c8c3d..147b601 100644
--- a/lib/librte_eal/linuxapp/eal/eal.c
+++ b/lib/librte_eal/linuxapp/eal/eal.c
@@ -70,6 +70,7 @@ 
 #include <rte_cpuflags.h>
 #include <rte_interrupts.h>
 #include <rte_pci.h>
+#include <rte_soc.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
 #include <rte_common.h>
@@ -881,6 +882,10 @@  rte_eal_init(int argc, char **argv)
 	if (rte_eal_pci_probe())
 		rte_panic("Cannot probe PCI\n");
 
+	/* Probe & Initialize SoC devices */
+	if (rte_eal_soc_probe())
+		rte_panic("Cannot probe SoC\n");
+
 	rte_eal_mcfg_complete();
 
 	return fctret;
diff --git a/lib/librte_eal/linuxapp/eal/eal_soc.c b/lib/librte_eal/linuxapp/eal/eal_soc.c
index 04848b9..5f961c4 100644
--- a/lib/librte_eal/linuxapp/eal/eal_soc.c
+++ b/lib/librte_eal/linuxapp/eal/eal_soc.c
@@ -52,5 +52,21 @@ 
 int
 rte_eal_soc_init(void)
 {
+	struct rte_soc_driver *drv;
+
+	/* for debug purposes, SoC can be disabled */
+	if (internal_config.no_soc)
+		return 0;
+
+	/* For each registered driver, call their scan routine to perform any
+	 * custom scan for devices (for example, custom buses)
+	 */
+	TAILQ_FOREACH(drv, &soc_driver_list, next) {
+		if (drv && drv->scan_fn) {
+			drv->scan_fn();
+			/* Ignore all errors from this */
+		}
+	}
+
 	return 0;
 }
diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
index b9d1932..adcfe7d 100644
--- a/lib/librte_eal/linuxapp/eal/rte_eal_version.map
+++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map
@@ -179,5 +179,9 @@  DPDK_16.11 {
 	rte_eal_soc_register;
 	rte_eal_soc_unregister;
 	rte_eal_soc_dump;
+	rte_eal_soc_match;
+	rte_eal_soc_detach;
+	rte_eal_soc_probe;
+	rte_eal_soc_probe_one;
 
 } DPDK_16.07;