Message ID | 20190302024253.15594-4-thomas@monjalon.net (mailing list archive) |
---|---|
State | New |
Delegated to: | Thomas Monjalon |
Headers | show |
Series | fix error path of multi-process probe | expand |
Context | Check | Description |
---|---|---|
ci/checkpatch | success | coding style OK |
ci/Intel-compilation | success | Compilation OK |
> -----Original Message----- > From: Thomas Monjalon [mailto:thomas@monjalon.net] > Sent: Saturday, March 2, 2019 10:43 AM > To: dev@dpdk.org > Cc: Zhang, Qi Z <qi.z.zhang@intel.com>; stable@dpdk.org > Subject: [PATCH 3/3] eal: fix multi-process probe failure handling > > If probe fails in multi-process context, the device must removed in other > processes for consistency. This is a rollback mechanism. > However the rollback should not happen for devices which were already probed > before the current probe transaction. > > When probing an already probed device, the driver may reject with -EEXIST or > update and succeed with code 0. > In order to distinguish successful new probe from re-probe, in the function > local_dev_probe(), the positive EEXIST code is returned for the latter case. > > The functions rte_dev_probe() and __handle_secondary_request() can test for > -EEXIST and +EEXIST, and skip rollback in such case. > > Fixes: 244d5130719c ("eal: enable hotplug on multi-process") > Fixes: ac9e4a17370f ("eal: support attach/detach shared device from secondary") > Cc: qi.z.zhang@intel.com > Cc: stable@dpdk.org > > Signed-off-by: Thomas Monjalon <thomas@monjalon.net> > --- > lib/librte_eal/common/eal_common_dev.c | 12 ++++++++++-- > lib/librte_eal/common/eal_private.h | 2 +- > lib/librte_eal/common/hotplug_mp.c | 8 ++++++-- > 3 files changed, 17 insertions(+), 5 deletions(-) > > diff --git a/lib/librte_eal/common/eal_common_dev.c > b/lib/librte_eal/common/eal_common_dev.c > index deaaea9345..2c7b1ab071 100644 > --- a/lib/librte_eal/common/eal_common_dev.c > +++ b/lib/librte_eal/common/eal_common_dev.c > @@ -132,6 +132,7 @@ local_dev_probe(const char *devargs, struct rte_device > **new_dev) { > struct rte_device *dev; > struct rte_devargs *da; > + bool already_probed; > int ret; > > *new_dev = NULL; > @@ -171,12 +172,15 @@ local_dev_probe(const char *devargs, struct rte_device > **new_dev) > * those devargs shouldn't be removed manually anymore. > */ > > + already_probed = rte_dev_is_probed(dev); > ret = dev->bus->plug(dev); > if (ret && !rte_dev_is_probed(dev)) { /* if hasn't ever succeeded */ > RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", > dev->name); > return ret; > } > + if (ret == 0 && already_probed) > + ret = EEXIST; /* hint to avoid any rollback */ What if bus->plug return -EEXIST and rte_dev_is_probed return true? (See rte_pci_probe_one_driver) You will not give hint here, but is this expected? > > *new_dev = dev; > return ret; > @@ -194,6 +198,7 @@ rte_dev_probe(const char *devargs) { > struct eal_dev_mp_req req; > struct rte_device *dev; > + bool already_probed; > int ret; > > memset(&req, 0, sizeof(req)); > @@ -221,8 +226,8 @@ rte_dev_probe(const char *devargs) > > /* primary attach the new device itself. */ > ret = local_dev_probe(devargs, &dev); > - > - if (ret != 0 && ret != -EEXIST) { > + already_probed = (ret == -EEXIST || ret == EEXIST); > + if (ret < 0 && !already_probed) { > RTE_LOG(ERR, EAL, > "Failed to attach device on primary process\n"); > return ret; > @@ -250,6 +255,9 @@ rte_dev_probe(const char *devargs) > return 0; > > rollback: > + if (already_probed) > + return ret; /* skip rollback */ > + > req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK; > > /* primary send rollback request to secondary. */ diff --git > a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h > index 798ede553b..a01d252930 100644 > --- a/lib/librte_eal/common/eal_private.h > +++ b/lib/librte_eal/common/eal_private.h > @@ -304,7 +304,7 @@ rte_devargs_layers_parse(struct rte_devargs *devargs, > * @param new_dev > * new device be probed as output. > * @return > - * 0 on success, negative on error. > + * >=0 on success (+EEXIST if already probed), negative on error. > */ > int local_dev_probe(const char *devargs, struct rte_device **new_dev); > > diff --git a/lib/librte_eal/common/hotplug_mp.c > b/lib/librte_eal/common/hotplug_mp.c > index 69e9a16d6a..9f8ef28a3b 100644 > --- a/lib/librte_eal/common/hotplug_mp.c > +++ b/lib/librte_eal/common/hotplug_mp.c > @@ -90,13 +90,15 @@ __handle_secondary_request(void *param) > struct rte_devargs da; > struct rte_device *dev; > struct rte_bus *bus; > + bool already_probed = false; > int ret = 0; > > tmp_req = *req; > > if (req->t == EAL_DEV_REQ_TYPE_ATTACH) { > ret = local_dev_probe(req->devargs, &dev); > - if (ret != 0 && ret != -EEXIST) { > + already_probed = (ret == -EEXIST || ret == EEXIST); > + if (ret < 0 && !already_probed) { > RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n"); > goto finish; > } > @@ -159,7 +161,7 @@ __handle_secondary_request(void *param) > goto finish; > > rollback: > - if (req->t == EAL_DEV_REQ_TYPE_ATTACH) { > + if (req->t == EAL_DEV_REQ_TYPE_ATTACH && !already_probed) { > tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK; > eal_dev_hotplug_request_to_secondary(&tmp_req); > local_dev_remove(dev); > @@ -238,6 +240,8 @@ static void __handle_primary_request(void *param) > case EAL_DEV_REQ_TYPE_ATTACH: > case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK: > ret = local_dev_probe(req->devargs, &dev); > + if (ret > 0) > + ret = 0; /* return only errors */ > break; > case EAL_DEV_REQ_TYPE_DETACH: > case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK: > -- > 2.20.1
diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c index deaaea9345..2c7b1ab071 100644 --- a/lib/librte_eal/common/eal_common_dev.c +++ b/lib/librte_eal/common/eal_common_dev.c @@ -132,6 +132,7 @@ local_dev_probe(const char *devargs, struct rte_device **new_dev) { struct rte_device *dev; struct rte_devargs *da; + bool already_probed; int ret; *new_dev = NULL; @@ -171,12 +172,15 @@ local_dev_probe(const char *devargs, struct rte_device **new_dev) * those devargs shouldn't be removed manually anymore. */ + already_probed = rte_dev_is_probed(dev); ret = dev->bus->plug(dev); if (ret && !rte_dev_is_probed(dev)) { /* if hasn't ever succeeded */ RTE_LOG(ERR, EAL, "Driver cannot attach the device (%s)\n", dev->name); return ret; } + if (ret == 0 && already_probed) + ret = EEXIST; /* hint to avoid any rollback */ *new_dev = dev; return ret; @@ -194,6 +198,7 @@ rte_dev_probe(const char *devargs) { struct eal_dev_mp_req req; struct rte_device *dev; + bool already_probed; int ret; memset(&req, 0, sizeof(req)); @@ -221,8 +226,8 @@ rte_dev_probe(const char *devargs) /* primary attach the new device itself. */ ret = local_dev_probe(devargs, &dev); - - if (ret != 0 && ret != -EEXIST) { + already_probed = (ret == -EEXIST || ret == EEXIST); + if (ret < 0 && !already_probed) { RTE_LOG(ERR, EAL, "Failed to attach device on primary process\n"); return ret; @@ -250,6 +255,9 @@ rte_dev_probe(const char *devargs) return 0; rollback: + if (already_probed) + return ret; /* skip rollback */ + req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK; /* primary send rollback request to secondary. */ diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h index 798ede553b..a01d252930 100644 --- a/lib/librte_eal/common/eal_private.h +++ b/lib/librte_eal/common/eal_private.h @@ -304,7 +304,7 @@ rte_devargs_layers_parse(struct rte_devargs *devargs, * @param new_dev * new device be probed as output. * @return - * 0 on success, negative on error. + * >=0 on success (+EEXIST if already probed), negative on error. */ int local_dev_probe(const char *devargs, struct rte_device **new_dev); diff --git a/lib/librte_eal/common/hotplug_mp.c b/lib/librte_eal/common/hotplug_mp.c index 69e9a16d6a..9f8ef28a3b 100644 --- a/lib/librte_eal/common/hotplug_mp.c +++ b/lib/librte_eal/common/hotplug_mp.c @@ -90,13 +90,15 @@ __handle_secondary_request(void *param) struct rte_devargs da; struct rte_device *dev; struct rte_bus *bus; + bool already_probed = false; int ret = 0; tmp_req = *req; if (req->t == EAL_DEV_REQ_TYPE_ATTACH) { ret = local_dev_probe(req->devargs, &dev); - if (ret != 0 && ret != -EEXIST) { + already_probed = (ret == -EEXIST || ret == EEXIST); + if (ret < 0 && !already_probed) { RTE_LOG(ERR, EAL, "Failed to hotplug add device on primary\n"); goto finish; } @@ -159,7 +161,7 @@ __handle_secondary_request(void *param) goto finish; rollback: - if (req->t == EAL_DEV_REQ_TYPE_ATTACH) { + if (req->t == EAL_DEV_REQ_TYPE_ATTACH && !already_probed) { tmp_req.t = EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK; eal_dev_hotplug_request_to_secondary(&tmp_req); local_dev_remove(dev); @@ -238,6 +240,8 @@ static void __handle_primary_request(void *param) case EAL_DEV_REQ_TYPE_ATTACH: case EAL_DEV_REQ_TYPE_DETACH_ROLLBACK: ret = local_dev_probe(req->devargs, &dev); + if (ret > 0) + ret = 0; /* return only errors */ break; case EAL_DEV_REQ_TYPE_DETACH: case EAL_DEV_REQ_TYPE_ATTACH_ROLLBACK:
If probe fails in multi-process context, the device must removed in other processes for consistency. This is a rollback mechanism. However the rollback should not happen for devices which were already probed before the current probe transaction. When probing an already probed device, the driver may reject with -EEXIST or update and succeed with code 0. In order to distinguish successful new probe from re-probe, in the function local_dev_probe(), the positive EEXIST code is returned for the latter case. The functions rte_dev_probe() and __handle_secondary_request() can test for -EEXIST and +EEXIST, and skip rollback in such case. Fixes: 244d5130719c ("eal: enable hotplug on multi-process") Fixes: ac9e4a17370f ("eal: support attach/detach shared device from secondary") Cc: qi.z.zhang@intel.com Cc: stable@dpdk.org Signed-off-by: Thomas Monjalon <thomas@monjalon.net> --- lib/librte_eal/common/eal_common_dev.c | 12 ++++++++++-- lib/librte_eal/common/eal_private.h | 2 +- lib/librte_eal/common/hotplug_mp.c | 8 ++++++-- 3 files changed, 17 insertions(+), 5 deletions(-)