[v2,3/4] net/failsafe: replace local sub-device with shared data

Message ID 1551779507-10857-3-git-send-email-rasland@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v2,1/4] net/failsafe: replace local device with shared data |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Raslan Darawsheh March 5, 2019, 9:52 a.m. UTC
  In multiprocess context, the pointer to sub-device is shared between
processes. Previously, it was a pointer to per process eth_dev so
it's needed to replace this dependency.

Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
---
v2: - moved comment in fs_sdev about subs to this commit
    - added parenthesis around macro arguments.
---
 drivers/net/failsafe/failsafe_eal.c     |  2 +-
 drivers/net/failsafe/failsafe_ether.c   |  7 ++++---
 drivers/net/failsafe/failsafe_private.h | 13 ++++++++-----
 3 files changed, 13 insertions(+), 9 deletions(-)
  

Comments

Thomas Monjalon March 5, 2019, 9:59 a.m. UTC | #1
05/03/2019 10:52, Raslan Darawsheh:
> +/*
> + * Allocated in shared memory.
> + */
>  struct sub_device {
>  	/* Exhaustive DPDK device description */
>  	struct sub_device *next;
>  	struct rte_devargs devargs;
> -	struct rte_bus *bus;
> -	struct rte_device *dev;
> -	struct rte_eth_dev *edev;
> +	struct rte_bus *bus; /* per process. */
> +	struct rte_device *dev; /* per process. */

Thinking again about these comments.
Given it is in a shared struct, it would be more precise to say
"only for primary process".

> +	struct rte_eth_dev_data *data; /* shared between processes */
  
Gaëtan Rivet March 5, 2019, 5:38 p.m. UTC | #2
On Tue, Mar 05, 2019 at 09:52:05AM +0000, Raslan Darawsheh wrote:
> In multiprocess context, the pointer to sub-device is shared between
> processes. Previously, it was a pointer to per process eth_dev so
> it's needed to replace this dependency.
> 
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> ---
> v2: - moved comment in fs_sdev about subs to this commit
>     - added parenthesis around macro arguments.
> ---
>  drivers/net/failsafe/failsafe_eal.c     |  2 +-
>  drivers/net/failsafe/failsafe_ether.c   |  7 ++++---
>  drivers/net/failsafe/failsafe_private.h | 13 ++++++++-----
>  3 files changed, 13 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
> index 56d1669..6fac4b6 100644
> --- a/drivers/net/failsafe/failsafe_eal.c
> +++ b/drivers/net/failsafe/failsafe_eal.c
> @@ -112,7 +112,7 @@ fs_bus_init(struct rte_eth_dev *dev)
>  				continue;
>  			}
>  		}
> -		ETH(sdev) = &rte_eth_devices[pid];
> +		sdev->data = rte_eth_devices[pid].data;
>  		SUB_ID(sdev) = i;
>  		sdev->fs_port_id = dev->data->port_id;
>  		sdev->dev = ETH(sdev)->device;
> diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
> index d5b1488..e1fff59 100644
> --- a/drivers/net/failsafe/failsafe_ether.c
> +++ b/drivers/net/failsafe/failsafe_ether.c
> @@ -267,18 +267,19 @@ static void
>  fs_dev_remove(struct sub_device *sdev)
>  {
>  	int ret;
> +	struct rte_eth_dev *edev = ETH(sdev);

I'd have added that above the "int ret;".
(inverse christmas tree and all that.)

>  
>  	if (sdev == NULL)
>  		return;
>  	switch (sdev->state) {
>  	case DEV_STARTED:
>  		failsafe_rx_intr_uninstall_subdevice(sdev);
> -		rte_eth_dev_stop(PORT_ID(sdev));
> +		rte_eth_dev_stop(edev->data->port_id);
>  		sdev->state = DEV_ACTIVE;
>  		/* fallthrough */
>  	case DEV_ACTIVE:
>  		failsafe_eth_dev_unregister_callbacks(sdev);
> -		rte_eth_dev_close(PORT_ID(sdev));
> +		rte_eth_dev_close(edev->data->port_id);

Ok I see. I missed that during the first reading, the private_data is
zeroed on dev_close(), so ETH(sdev) becomes invalid here.

What happens when a primary process closes a device before a secondary?
Is the secondary unable to stop / close its own then? Isn't there some
missing uninit?

This seems dangerous to me. Why not instead allocating a per-process
slab of memory that would hold the relevant references and outlive the
shared data (a per-process rte_eth_dev private data...).

>  		sdev->state = DEV_PROBED;
>  		/* fallthrough */
>  	case DEV_PROBED:
> @@ -287,7 +288,7 @@ fs_dev_remove(struct sub_device *sdev)
>  			ERROR("Bus detach failed for sub_device %u",
>  			      SUB_ID(sdev));
>  		} else {
> -			rte_eth_dev_release_port(ETH(sdev));
> +			rte_eth_dev_release_port(edev);
>  		}
>  		sdev->state = DEV_PARSED;
>  		/* fallthrough */
> diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
> index 84e847f..1e2ad2d 100644
> --- a/drivers/net/failsafe/failsafe_private.h
> +++ b/drivers/net/failsafe/failsafe_private.h
> @@ -100,13 +100,16 @@ struct fs_stats {
>  	uint64_t timestamp;
>  };
>  
> +/*
> + * Allocated in shared memory.
> + */
>  struct sub_device {
>  	/* Exhaustive DPDK device description */
>  	struct sub_device *next;
>  	struct rte_devargs devargs;
> -	struct rte_bus *bus;
> -	struct rte_device *dev;
> -	struct rte_eth_dev *edev;
> +	struct rte_bus *bus; /* per process. */
> +	struct rte_device *dev; /* per process. */
> +	struct rte_eth_dev_data *data; /* shared between processes */
>  	uint8_t sid;
>  	/* Device state machine */
>  	enum dev_state state;
> @@ -139,7 +142,7 @@ struct fs_priv {
>  	 * subs[0] is the preferred device
>  	 * any other is just another slave
>  	 */
> -	struct sub_device *subs;
> +	struct sub_device *subs;  /* shared between processes */
>  	uint8_t subs_head; /* if head == tail, no subs */
>  	uint8_t subs_tail; /* first invalid */
>  	uint8_t subs_tx; /* current emitting device */
> @@ -254,7 +257,7 @@ extern int failsafe_mac_from_arg;
>  
>  /* sdev: (struct sub_device *) */
>  #define ETH(sdev) \
> -	((sdev)->edev)
> +	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id])
>  
>  /* sdev: (struct sub_device *) */
>  #define PORT_ID(sdev) \
> -- 
> 2.7.4
>
  
Thomas Monjalon March 5, 2019, 5:58 p.m. UTC | #3
Hi,

05/03/2019 18:38, Gaëtan Rivet:
> >  fs_dev_remove(struct sub_device *sdev)
[...]
> > -		rte_eth_dev_close(PORT_ID(sdev));
> > +		rte_eth_dev_close(edev->data->port_id);
> 
> Ok I see. I missed that during the first reading, the private_data is
> zeroed on dev_close(), so ETH(sdev) becomes invalid here.

I don't follow you. What do you mean with this comment?

> What happens when a primary process closes a device before a secondary?
> Is the secondary unable to stop / close its own then? Isn't there some
> missing uninit?

Is the secondary process supposed to do any closing?
The device management should be done only by the primary process.

Note: anyway all this hotplug related code should be dropped
from failsafe to be replaced by EAL hotplug management.

> This seems dangerous to me. Why not instead allocating a per-process
> slab of memory that would hold the relevant references and outlive the
> shared data (a per-process rte_eth_dev private data...).

Which data do you think should be allocated per process?
  
Gaëtan Rivet March 6, 2019, 10:46 a.m. UTC | #4
On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> Hi,
> 
> 05/03/2019 18:38, Gaëtan Rivet:
> > >  fs_dev_remove(struct sub_device *sdev)
> [...]
> > > -		rte_eth_dev_close(PORT_ID(sdev));
> > > +		rte_eth_dev_close(edev->data->port_id);
> > 
> > Ok I see. I missed that during the first reading, the private_data is
> > zeroed on dev_close(), so ETH(sdev) becomes invalid here.
> 
> I don't follow you. What do you mean with this comment?
> 

PORT_ID(sdev) uses ETH(sdev).

ETH(sdev) will now point to `&rte_eth_devices[(sdev)->data->port_id]`,
so data->port_id will be zeroed on sdev close.

So once the sub-device has been closed, calling
rte_eth_dev_release_port(ETH(sdev)) is not possible anymore, justifying
the change (taking the reference to ETH(sdev) first, then using it after
shared data has been overwritten).

> > What happens when a primary process closes a device before a secondary?
> > Is the secondary unable to stop / close its own then? Isn't there some
> > missing uninit?
> 
> Is the secondary process supposed to do any closing?
> The device management should be done only by the primary process.
> 
> Note: anyway all this hotplug related code should be dropped
> from failsafe to be replaced by EAL hotplug management.
> 

I don't know, I've never used secondary process.
However, cursory reading the code of rte_eth_dev_close(), I don't see
a guard against calling it from a secondary process?

Reading code like

   rte_free(dev->data->rx_queues);
   dev->data->rx_queues = NULL;

within makes me think the issue has been seen at least once, where
shared data is freed multiple times, so I guessed some secondary
processes were calling it. Maybe they are not meant to, but what
prevents them from being badly written?

Also, given rte_dev_remove IPC call to transfer the order to the
primary, it seems that at least secondary processes are expected to call
rte_dev_remove() at some point? So are they only authorized to call
rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
there a specific documentation detailing the design of secondary process
and the related responsibilities in the lifetime of a device? How are
they synching their rte_eth_devices list if they are not calling
rte_eth_dev_close(), ever?

> > This seems dangerous to me. Why not instead allocating a per-process
> > slab of memory that would hold the relevant references and outlive the
> > shared data (a per-process rte_eth_dev private data...).
> 
> Which data do you think should be allocated per process?
> 
> 

[-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
+--------------------------------------------------------------+
| +------------------+                +- rte_eth_devices[n] -+ |
| |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
| |                  |   +dev_priv-+  |                      | |
| |      dev_private +-->|         |  |                      | |
| |              ... |   |         |  |                      | |
| |          port_id |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   |         |  +----------------------+ |
| |                  |   |         |  +- rte_eth_devices[n] -+ |
| |                  |   |         |  |                      | | SECONDARY
| |                  |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   |         |  |                      | |
| |                  |   +---------+  |                      | |
| |                  |<---------------+ data                 | |
| +------------------+                +----------------------+ |
+--------------------------------------------------------------+

Here port_id is used within fail-safe to get back to rte_eth_devices[n].
This disappears once a device is closed, as all shared space is zeroed.

This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
and at some point it is not anymore, once a sub-device has been
closed. This seems dangerous.

I was thinking initially that allocating a place where each sdev would
store their rte_eth_devices / port_id back-reference could alleviate the
issue, meaning that the fail-safe would not zero it on sdev_close(), and
it would remain valid for the lifetime of a sub-device, so even when a
sub-device is in DEV_PROBED state.

But now that I think about it, it could probably be simpler: instead of
using (ETH(sdev)->data->port_id) for the port_id of an sdev (meaning
that it is dependent on the lifetime of the sdev, instead of the
lifetime of the failsafe), the port-id itself should be stored in the
sub_device structure. This structure will be available for the lifetime
of the failsafe, and the port_id is correct accross all processes.

So PORT_ID(sdev) would be defined to something like (sdev->port_id), and
ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
correct even once the primary has closed the sub-device.

What do you think? Do you agree that the current state is dangerous, and
do you think the solution would alleviate the issue? Maybe the concern
is unfounded and the only issue is within fs_dev_remove().
  
Thomas Monjalon March 6, 2019, 6:02 p.m. UTC | #5
06/03/2019 11:46, Gaëtan Rivet:
> On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > 05/03/2019 18:38, Gaëtan Rivet:
> > > What happens when a primary process closes a device before a secondary?
> > > Is the secondary unable to stop / close its own then? Isn't there some
> > > missing uninit?
> > 
> > Is the secondary process supposed to do any closing?
> > The device management should be done only by the primary process.
> > 
> > Note: anyway all this hotplug related code should be dropped
> > from failsafe to be replaced by EAL hotplug management.
> > 
> 
> I don't know, I've never used secondary process.
> However, cursory reading the code of rte_eth_dev_close(), I don't see
> a guard against calling it from a secondary process?

Yes indeed, there is no guard.
That's something not clear in DPDK, previously we were
attaching some vdevs in secondary only.

> Reading code like
> 
>    rte_free(dev->data->rx_queues);
>    dev->data->rx_queues = NULL;
> 
> within makes me think the issue has been seen at least once, where
> shared data is freed multiple times, so I guessed some secondary
> processes were calling it. Maybe they are not meant to, but what
> prevents them from being badly written?
> 
> Also, given rte_dev_remove IPC call to transfer the order to the
> primary, it seems that at least secondary processes are expected to call
> rte_dev_remove() at some point? So are they only authorized to call
> rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
> there a specific documentation detailing the design of secondary process
> and the related responsibilities in the lifetime of a device? How are
> they synching their rte_eth_devices list if they are not calling
> rte_eth_dev_close(), ever?

All these calls should be done in primary.
The IPC mechanism calls the attach/detach in secondary at EAL level.
The PMDs does the bridge between EAL device and ethdev port status.
But you are right, there can be a sync issue if closing an ethdev port
and not removing the EAL device.
This is a generic question about deciding whether we want all ethdev ports
to be synced in multi-process or not.

In failsafe context, we are closing the EAL device and change
the state of the sub-device accordingly. So I think there is no issue.

> > > This seems dangerous to me. Why not instead allocating a per-process
> > > slab of memory that would hold the relevant references and outlive the
> > > shared data (a per-process rte_eth_dev private data...).
> > 
> > Which data do you think should be allocated per process?
> > 
> > 
> 
> [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> +--------------------------------------------------------------+
> | +------------------+                +- rte_eth_devices[n] -+ |
> | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> | |                  |   +dev_priv-+  |                      | |
> | |      dev_private +-->|         |  |                      | |
> | |              ... |   |         |  |                      | |
> | |          port_id |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   |         |  +----------------------+ |
> | |                  |   |         |  +- rte_eth_devices[n] -+ |
> | |                  |   |         |  |                      | | SECONDARY
> | |                  |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   |         |  |                      | |
> | |                  |   +---------+  |                      | |
> | |                  |<---------------+ data                 | |
> | +------------------+                +----------------------+ |
> +--------------------------------------------------------------+
> 
> Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> This disappears once a device is closed, as all shared space is zeroed.
> 
> This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
> and at some point it is not anymore, once a sub-device has been
> closed. This seems dangerous.

The state of the sub-device is changed.
I don't see any issue.

> I was thinking initially that allocating a place where each sdev would
> store their rte_eth_devices / port_id back-reference could alleviate the
> issue, meaning that the fail-safe would not zero it on sdev_close(), and
> it would remain valid for the lifetime of a sub-device, so even when a
> sub-device is in DEV_PROBED state.
> 
> But now that I think about it, it could probably be simpler: instead of
> using (ETH(sdev)->data->port_id) for the port_id of an sdev (meaning
> that it is dependent on the lifetime of the sdev, instead of the
> lifetime of the failsafe), the port-id itself should be stored in the
> sub_device structure. This structure will be available for the lifetime
> of the failsafe, and the port_id is correct accross all processes.
> 
> So PORT_ID(sdev) would be defined to something like (sdev->port_id), and
> ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
> correct even once the primary has closed the sub-device.
> 
> What do you think? Do you agree that the current state is dangerous, and
> do you think the solution would alleviate the issue? Maybe the concern
> is unfounded and the only issue is within fs_dev_remove().

Yes it is only seen in fs_dev_remove().
I discussed about your proposal with Raslan, and we agree we
could change from sub_device.data to sub_device.port_id,
it may be more future-proof.

I have only one doubt: look at the macro in this patch:

#define ETH(sdev) \
	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id])

The NULL check cannot be done with a port id.
I think it was needed to manage one case. Raslan?
  
Raslan Darawsheh March 7, 2019, 8:43 a.m. UTC | #6
Hi,

> -----Original Message-----
> From: Thomas Monjalon <thomas@monjalon.net>
> Sent: Wednesday, March 6, 2019 8:02 PM
> To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> <rasland@mellanox.com>
> Cc: dev@dpdk.org; stephen@networkplumber.org
> Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> with shared data
> 
> 06/03/2019 11:46, Gaëtan Rivet:
> > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > What happens when a primary process closes a device before a
> secondary?
> > > > Is the secondary unable to stop / close its own then? Isn't there
> > > > some missing uninit?
> > >
> > > Is the secondary process supposed to do any closing?
> > > The device management should be done only by the primary process.
> > >
> > > Note: anyway all this hotplug related code should be dropped from
> > > failsafe to be replaced by EAL hotplug management.
> > >
> >
> > I don't know, I've never used secondary process.
> > However, cursory reading the code of rte_eth_dev_close(), I don't see
> > a guard against calling it from a secondary process?
> 
> Yes indeed, there is no guard.
> That's something not clear in DPDK, previously we were attaching some
> vdevs in secondary only.
> 
> > Reading code like
> >
> >    rte_free(dev->data->rx_queues);
> >    dev->data->rx_queues = NULL;
> >
> > within makes me think the issue has been seen at least once, where
> > shared data is freed multiple times, so I guessed some secondary
> > processes were calling it. Maybe they are not meant to, but what
> > prevents them from being badly written?
> >
> > Also, given rte_dev_remove IPC call to transfer the order to the
> > primary, it seems that at least secondary processes are expected to
> > call
> > rte_dev_remove() at some point? So are they only authorized to call
> > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
> > there a specific documentation detailing the design of secondary
> > process and the related responsibilities in the lifetime of a device?
> > How are they synching their rte_eth_devices list if they are not
> > calling rte_eth_dev_close(), ever?
> 
> All these calls should be done in primary.
> The IPC mechanism calls the attach/detach in secondary at EAL level.
> The PMDs does the bridge between EAL device and ethdev port status.
> But you are right, there can be a sync issue if closing an ethdev port and not
> removing the EAL device.
> This is a generic question about deciding whether we want all ethdev ports to
> be synced in multi-process or not.
> 
> In failsafe context, we are closing the EAL device and change the state of the
> sub-device accordingly. So I think there is no issue.
> 
> > > > This seems dangerous to me. Why not instead allocating a
> > > > per-process slab of memory that would hold the relevant references
> > > > and outlive the shared data (a per-process rte_eth_dev private data...).
> > >
> > > Which data do you think should be allocated per process?
> > >
> > >
> >
> > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > +--------------------------------------------------------------+
> > | +------------------+                +- rte_eth_devices[n] -+ |
> > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > | |                  |   +dev_priv-+  |                      | |
> > | |      dev_private +-->|         |  |                      | |
> > | |              ... |   |         |  |                      | |
> > | |          port_id |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  +----------------------+ |
> > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > | |                  |   |         |  |                      | | SECONDARY
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   |         |  |                      | |
> > | |                  |   +---------+  |                      | |
> > | |                  |<---------------+ data                 | |
> > | +------------------+                +----------------------+ |
> > +--------------------------------------------------------------+
> >
> > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > This disappears once a device is closed, as all shared space is zeroed.
> >
> > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, and
> > at some point it is not anymore, once a sub-device has been closed.
> > This seems dangerous.
> 
> The state of the sub-device is changed.
> I don't see any issue.
> 
> > I was thinking initially that allocating a place where each sdev would
> > store their rte_eth_devices / port_id back-reference could alleviate
> > the issue, meaning that the fail-safe would not zero it on
> > sdev_close(), and it would remain valid for the lifetime of a
> > sub-device, so even when a sub-device is in DEV_PROBED state.
> >
> > But now that I think about it, it could probably be simpler: instead
> > of using (ETH(sdev)->data->port_id) for the port_id of an sdev
> > (meaning that it is dependent on the lifetime of the sdev, instead of
> > the lifetime of the failsafe), the port-id itself should be stored in
> > the sub_device structure. This structure will be available for the
> > lifetime of the failsafe, and the port_id is correct accross all processes.
> >
> > So PORT_ID(sdev) would be defined to something like (sdev->port_id),
> > and
> > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
> > correct even once the primary has closed the sub-device.
> >
> > What do you think? Do you agree that the current state is dangerous,
> > and do you think the solution would alleviate the issue? Maybe the
> > concern is unfounded and the only issue is within fs_dev_remove().
> 
> Yes it is only seen in fs_dev_remove().
> I discussed about your proposal with Raslan, and we agree we could change
> from sub_device.data to sub_device.port_id, it may be more future-proof.
> 
> I have only one doubt: look at the macro in this patch:
> 
> #define ETH(sdev) \
> 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> >port_id])
> 
> The NULL check cannot be done with a port id.
> I think it was needed to manage one case. Raslan?

That's right since we need it for fs_tx_unsafe, to add a protection for plugged out devices during TX.
> 


> 
Kindest regards
Raslan Darawsheh
  
Gaëtan Rivet March 7, 2019, 9:47 a.m. UTC | #7
On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Thomas Monjalon <thomas@monjalon.net>
> > Sent: Wednesday, March 6, 2019 8:02 PM
> > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> > <rasland@mellanox.com>
> > Cc: dev@dpdk.org; stephen@networkplumber.org
> > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> > with shared data
> > 
> > 06/03/2019 11:46, Gaëtan Rivet:
> > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > > What happens when a primary process closes a device before a
> > secondary?
> > > > > Is the secondary unable to stop / close its own then? Isn't there
> > > > > some missing uninit?
> > > >
> > > > Is the secondary process supposed to do any closing?
> > > > The device management should be done only by the primary process.
> > > >
> > > > Note: anyway all this hotplug related code should be dropped from
> > > > failsafe to be replaced by EAL hotplug management.
> > > >
> > >
> > > I don't know, I've never used secondary process.
> > > However, cursory reading the code of rte_eth_dev_close(), I don't see
> > > a guard against calling it from a secondary process?
> > 
> > Yes indeed, there is no guard.
> > That's something not clear in DPDK, previously we were attaching some
> > vdevs in secondary only.
> > 
> > > Reading code like
> > >
> > >    rte_free(dev->data->rx_queues);
> > >    dev->data->rx_queues = NULL;
> > >
> > > within makes me think the issue has been seen at least once, where
> > > shared data is freed multiple times, so I guessed some secondary
> > > processes were calling it. Maybe they are not meant to, but what
> > > prevents them from being badly written?
> > >
> > > Also, given rte_dev_remove IPC call to transfer the order to the
> > > primary, it seems that at least secondary processes are expected to
> > > call
> > > rte_dev_remove() at some point? So are they only authorized to call
> > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()? Is
> > > there a specific documentation detailing the design of secondary
> > > process and the related responsibilities in the lifetime of a device?
> > > How are they synching their rte_eth_devices list if they are not
> > > calling rte_eth_dev_close(), ever?
> > 
> > All these calls should be done in primary.
> > The IPC mechanism calls the attach/detach in secondary at EAL level.
> > The PMDs does the bridge between EAL device and ethdev port status.
> > But you are right, there can be a sync issue if closing an ethdev port and not
> > removing the EAL device.
> > This is a generic question about deciding whether we want all ethdev ports to
> > be synced in multi-process or not.
> > 
> > In failsafe context, we are closing the EAL device and change the state of the
> > sub-device accordingly. So I think there is no issue.
> > 
> > > > > This seems dangerous to me. Why not instead allocating a
> > > > > per-process slab of memory that would hold the relevant references
> > > > > and outlive the shared data (a per-process rte_eth_dev private data...).
> > > >
> > > > Which data do you think should be allocated per process?
> > > >
> > > >
> > >
> > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > > +--------------------------------------------------------------+
> > > | +------------------+                +- rte_eth_devices[n] -+ |
> > > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > > | |                  |   +dev_priv-+  |                      | |
> > > | |      dev_private +-->|         |  |                      | |
> > > | |              ... |   |         |  |                      | |
> > > | |          port_id |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  +----------------------+ |
> > > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > > | |                  |   |         |  |                      | | SECONDARY
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   |         |  |                      | |
> > > | |                  |   +---------+  |                      | |
> > > | |                  |<---------------+ data                 | |
> > > | +------------------+                +----------------------+ |
> > > +--------------------------------------------------------------+
> > >
> > > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > > This disappears once a device is closed, as all shared space is zeroed.
> > >
> > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct, and
> > > at some point it is not anymore, once a sub-device has been closed.
> > > This seems dangerous.
> > 
> > The state of the sub-device is changed.
> > I don't see any issue.
> > 
> > > I was thinking initially that allocating a place where each sdev would
> > > store their rte_eth_devices / port_id back-reference could alleviate
> > > the issue, meaning that the fail-safe would not zero it on
> > > sdev_close(), and it would remain valid for the lifetime of a
> > > sub-device, so even when a sub-device is in DEV_PROBED state.
> > >
> > > But now that I think about it, it could probably be simpler: instead
> > > of using (ETH(sdev)->data->port_id) for the port_id of an sdev
> > > (meaning that it is dependent on the lifetime of the sdev, instead of
> > > the lifetime of the failsafe), the port-id itself should be stored in
> > > the sub_device structure. This structure will be available for the
> > > lifetime of the failsafe, and the port_id is correct accross all processes.
> > >
> > > So PORT_ID(sdev) would be defined to something like (sdev->port_id),
> > > and
> > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would remain
> > > correct even once the primary has closed the sub-device.
> > >
> > > What do you think? Do you agree that the current state is dangerous,
> > > and do you think the solution would alleviate the issue? Maybe the
> > > concern is unfounded and the only issue is within fs_dev_remove().
> > 
> > Yes it is only seen in fs_dev_remove().
> > I discussed about your proposal with Raslan, and we agree we could change
> > from sub_device.data to sub_device.port_id, it may be more future-proof.
> > 
> > I have only one doubt: look at the macro in this patch:
> > 
> > #define ETH(sdev) \
> > 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> > >port_id])
> > 
> > The NULL check cannot be done with a port id.
> > I think it was needed to manage one case. Raslan?
> 
> That's right since we need it for fs_tx_unsafe, to add a protection for plugged out devices during TX.

Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling
above I needed to write down the stuff to think about it.

You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way
the value is kept unsigned and there are several checks against this
specific value otherwise.

so ETH(sdev) could be

        (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL : &rte_eth_devices[PORT_ID(sdev)])
  
Raslan Darawsheh March 7, 2019, 11:34 a.m. UTC | #8
Hi,

> -----Original Message-----
> From: Gaëtan Rivet <gaetan.rivet@6wind.com>
> Sent: Thursday, March 7, 2019 11:48 AM
> To: Raslan Darawsheh <rasland@mellanox.com>
> Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org;
> stephen@networkplumber.org
> Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> with shared data
> 
> On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote:
> > Hi,
> >
> > > -----Original Message-----
> > > From: Thomas Monjalon <thomas@monjalon.net>
> > > Sent: Wednesday, March 6, 2019 8:02 PM
> > > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> > > <rasland@mellanox.com>
> > > Cc: dev@dpdk.org; stephen@networkplumber.org
> > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local
> > > sub-device with shared data
> > >
> > > 06/03/2019 11:46, Gaëtan Rivet:
> > > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > > > What happens when a primary process closes a device before a
> > > secondary?
> > > > > > Is the secondary unable to stop / close its own then? Isn't
> > > > > > there some missing uninit?
> > > > >
> > > > > Is the secondary process supposed to do any closing?
> > > > > The device management should be done only by the primary process.
> > > > >
> > > > > Note: anyway all this hotplug related code should be dropped
> > > > > from failsafe to be replaced by EAL hotplug management.
> > > > >
> > > >
> > > > I don't know, I've never used secondary process.
> > > > However, cursory reading the code of rte_eth_dev_close(), I don't
> > > > see a guard against calling it from a secondary process?
> > >
> > > Yes indeed, there is no guard.
> > > That's something not clear in DPDK, previously we were attaching
> > > some vdevs in secondary only.
> > >
> > > > Reading code like
> > > >
> > > >    rte_free(dev->data->rx_queues);
> > > >    dev->data->rx_queues = NULL;
> > > >
> > > > within makes me think the issue has been seen at least once, where
> > > > shared data is freed multiple times, so I guessed some secondary
> > > > processes were calling it. Maybe they are not meant to, but what
> > > > prevents them from being badly written?
> > > >
> > > > Also, given rte_dev_remove IPC call to transfer the order to the
> > > > primary, it seems that at least secondary processes are expected
> > > > to call
> > > > rte_dev_remove() at some point? So are they only authorized to
> > > > call
> > > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()?
> > > > Is there a specific documentation detailing the design of
> > > > secondary process and the related responsibilities in the lifetime of a
> device?
> > > > How are they synching their rte_eth_devices list if they are not
> > > > calling rte_eth_dev_close(), ever?
> > >
> > > All these calls should be done in primary.
> > > The IPC mechanism calls the attach/detach in secondary at EAL level.
> > > The PMDs does the bridge between EAL device and ethdev port status.
> > > But you are right, there can be a sync issue if closing an ethdev
> > > port and not removing the EAL device.
> > > This is a generic question about deciding whether we want all ethdev
> > > ports to be synced in multi-process or not.
> > >
> > > In failsafe context, we are closing the EAL device and change the
> > > state of the sub-device accordingly. So I think there is no issue.
> > >
> > > > > > This seems dangerous to me. Why not instead allocating a
> > > > > > per-process slab of memory that would hold the relevant
> > > > > > references and outlive the shared data (a per-process rte_eth_dev
> private data...).
> > > > >
> > > > > Which data do you think should be allocated per process?
> > > > >
> > > > >
> > > >
> > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > > > +--------------------------------------------------------------+
> > > > | +------------------+                +- rte_eth_devices[n] -+ |
> > > > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > > > | |                  |   +dev_priv-+  |                      | |
> > > > | |      dev_private +-->|         |  |                      | |
> > > > | |              ... |   |         |  |                      | |
> > > > | |          port_id |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  +----------------------+ |
> > > > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > > > | |                  |   |         |  |                      | | SECONDARY
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   |         |  |                      | |
> > > > | |                  |   +---------+  |                      | |
> > > > | |                  |<---------------+ data                 | |
> > > > | +------------------+                +----------------------+ |
> > > > +--------------------------------------------------------------+
> > > >
> > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > > > This disappears once a device is closed, as all shared space is zeroed.
> > > >
> > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
> > > > and at some point it is not anymore, once a sub-device has been closed.
> > > > This seems dangerous.
> > >
> > > The state of the sub-device is changed.
> > > I don't see any issue.
> > >
> > > > I was thinking initially that allocating a place where each sdev
> > > > would store their rte_eth_devices / port_id back-reference could
> > > > alleviate the issue, meaning that the fail-safe would not zero it
> > > > on sdev_close(), and it would remain valid for the lifetime of a
> > > > sub-device, so even when a sub-device is in DEV_PROBED state.
> > > >
> > > > But now that I think about it, it could probably be simpler:
> > > > instead of using (ETH(sdev)->data->port_id) for the port_id of an
> > > > sdev (meaning that it is dependent on the lifetime of the sdev,
> > > > instead of the lifetime of the failsafe), the port-id itself
> > > > should be stored in the sub_device structure. This structure will
> > > > be available for the lifetime of the failsafe, and the port_id is correct
> accross all processes.
> > > >
> > > > So PORT_ID(sdev) would be defined to something like
> > > > (sdev->port_id), and
> > > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would
> > > > remain correct even once the primary has closed the sub-device.
> > > >
> > > > What do you think? Do you agree that the current state is
> > > > dangerous, and do you think the solution would alleviate the
> > > > issue? Maybe the concern is unfounded and the only issue is within
> fs_dev_remove().
> > >
> > > Yes it is only seen in fs_dev_remove().
> > > I discussed about your proposal with Raslan, and we agree we could
> > > change from sub_device.data to sub_device.port_id, it may be more
> future-proof.
> > >
> > > I have only one doubt: look at the macro in this patch:
> > >
> > > #define ETH(sdev) \
> > > 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> > > >port_id])
> > >
> > > The NULL check cannot be done with a port id.
> > > I think it was needed to manage one case. Raslan?
> >
> > That's right since we need it for fs_tx_unsafe, to add a protection for
> plugged out devices during TX.
> 
> Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling
> above I needed to write down the stuff to think about it.
> 
> You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way the
> value is kept unsigned and there are several checks against this specific value
> otherwise.
> 
> so ETH(sdev) could be
> 
>         (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL :
> &rte_eth_devices[PORT_ID(sdev)])
> 
But, this mean that you have to explicitly set the port id  to RTE_MAX_ETH_PORTS in the sdev after fs_dev_remove and you shouldn't rely on the port ID anymore.

> --

> Gaëtan Rivet
> 6WIND

Kindest regards
Raslan Darawsheh
  
Gaëtan Rivet March 7, 2019, 11:50 a.m. UTC | #9
On Thu, Mar 07, 2019 at 11:34:42AM +0000, Raslan Darawsheh wrote:
> Hi,
> 
> > -----Original Message-----
> > From: Gaëtan Rivet <gaetan.rivet@6wind.com>
> > Sent: Thursday, March 7, 2019 11:48 AM
> > To: Raslan Darawsheh <rasland@mellanox.com>
> > Cc: Thomas Monjalon <thomas@monjalon.net>; dev@dpdk.org;
> > stephen@networkplumber.org
> > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local sub-device
> > with shared data
> > 
> > On Thu, Mar 07, 2019 at 08:43:01AM +0000, Raslan Darawsheh wrote:
> > > Hi,
> > >
> > > > -----Original Message-----
> > > > From: Thomas Monjalon <thomas@monjalon.net>
> > > > Sent: Wednesday, March 6, 2019 8:02 PM
> > > > To: Gaëtan Rivet <gaetan.rivet@6wind.com>; Raslan Darawsheh
> > > > <rasland@mellanox.com>
> > > > Cc: dev@dpdk.org; stephen@networkplumber.org
> > > > Subject: Re: [dpdk-dev] [PATCH v2 3/4] net/failsafe: replace local
> > > > sub-device with shared data
> > > >
> > > > 06/03/2019 11:46, Gaëtan Rivet:
> > > > > On Tue, Mar 05, 2019 at 06:58:04PM +0100, Thomas Monjalon wrote:
> > > > > > 05/03/2019 18:38, Gaëtan Rivet:
> > > > > > > What happens when a primary process closes a device before a
> > > > secondary?
> > > > > > > Is the secondary unable to stop / close its own then? Isn't
> > > > > > > there some missing uninit?
> > > > > >
> > > > > > Is the secondary process supposed to do any closing?
> > > > > > The device management should be done only by the primary process.
> > > > > >
> > > > > > Note: anyway all this hotplug related code should be dropped
> > > > > > from failsafe to be replaced by EAL hotplug management.
> > > > > >
> > > > >
> > > > > I don't know, I've never used secondary process.
> > > > > However, cursory reading the code of rte_eth_dev_close(), I don't
> > > > > see a guard against calling it from a secondary process?
> > > >
> > > > Yes indeed, there is no guard.
> > > > That's something not clear in DPDK, previously we were attaching
> > > > some vdevs in secondary only.
> > > >
> > > > > Reading code like
> > > > >
> > > > >    rte_free(dev->data->rx_queues);
> > > > >    dev->data->rx_queues = NULL;
> > > > >
> > > > > within makes me think the issue has been seen at least once, where
> > > > > shared data is freed multiple times, so I guessed some secondary
> > > > > processes were calling it. Maybe they are not meant to, but what
> > > > > prevents them from being badly written?
> > > > >
> > > > > Also, given rte_dev_remove IPC call to transfer the order to the
> > > > > primary, it seems that at least secondary processes are expected
> > > > > to call
> > > > > rte_dev_remove() at some point? So are they only authorized to
> > > > > call
> > > > > rte_dev_remove() (to manage hotplug), but not rte_eth_dev_close()?
> > > > > Is there a specific documentation detailing the design of
> > > > > secondary process and the related responsibilities in the lifetime of a
> > device?
> > > > > How are they synching their rte_eth_devices list if they are not
> > > > > calling rte_eth_dev_close(), ever?
> > > >
> > > > All these calls should be done in primary.
> > > > The IPC mechanism calls the attach/detach in secondary at EAL level.
> > > > The PMDs does the bridge between EAL device and ethdev port status.
> > > > But you are right, there can be a sync issue if closing an ethdev
> > > > port and not removing the EAL device.
> > > > This is a generic question about deciding whether we want all ethdev
> > > > ports to be synced in multi-process or not.
> > > >
> > > > In failsafe context, we are closing the EAL device and change the
> > > > state of the sub-device accordingly. So I think there is no issue.
> > > >
> > > > > > > This seems dangerous to me. Why not instead allocating a
> > > > > > > per-process slab of memory that would hold the relevant
> > > > > > > references and outlive the shared data (a per-process rte_eth_dev
> > private data...).
> > > > > >
> > > > > > Which data do you think should be allocated per process?
> > > > > >
> > > > > >
> > > > >
> > > > > [-------- SHARED SPACE --------------] [-- PER-PROCESS --------]
> > > > > +--------------------------------------------------------------+
> > > > > | +------------------+                +- rte_eth_devices[n] -+ |
> > > > > | |rte_eth_dev_data  |<---------------+ data                 | | PRIMARY
> > > > > | |                  |   +dev_priv-+  |                      | |
> > > > > | |      dev_private +-->|         |  |                      | |
> > > > > | |              ... |   |         |  |                      | |
> > > > > | |          port_id |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  +----------------------+ |
> > > > > | |                  |   |         |  +- rte_eth_devices[n] -+ |
> > > > > | |                  |   |         |  |                      | | SECONDARY
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   |         |  |                      | |
> > > > > | |                  |   +---------+  |                      | |
> > > > > | |                  |<---------------+ data                 | |
> > > > > | +------------------+                +----------------------+ |
> > > > > +--------------------------------------------------------------+
> > > > >
> > > > > Here port_id is used within fail-safe to get back to rte_eth_devices[n].
> > > > > This disappears once a device is closed, as all shared space is zeroed.
> > > > >
> > > > > This means that sometimes ETH(sdev) and PORT_ID(sdev) is correct,
> > > > > and at some point it is not anymore, once a sub-device has been closed.
> > > > > This seems dangerous.
> > > >
> > > > The state of the sub-device is changed.
> > > > I don't see any issue.
> > > >
> > > > > I was thinking initially that allocating a place where each sdev
> > > > > would store their rte_eth_devices / port_id back-reference could
> > > > > alleviate the issue, meaning that the fail-safe would not zero it
> > > > > on sdev_close(), and it would remain valid for the lifetime of a
> > > > > sub-device, so even when a sub-device is in DEV_PROBED state.
> > > > >
> > > > > But now that I think about it, it could probably be simpler:
> > > > > instead of using (ETH(sdev)->data->port_id) for the port_id of an
> > > > > sdev (meaning that it is dependent on the lifetime of the sdev,
> > > > > instead of the lifetime of the failsafe), the port-id itself
> > > > > should be stored in the sub_device structure. This structure will
> > > > > be available for the lifetime of the failsafe, and the port_id is correct
> > accross all processes.
> > > > >
> > > > > So PORT_ID(sdev) would be defined to something like
> > > > > (sdev->port_id), and
> > > > > ETH(sdev) would be (&rte_eth_devices[PORT_ID(sdev)]). It would
> > > > > remain correct even once the primary has closed the sub-device.
> > > > >
> > > > > What do you think? Do you agree that the current state is
> > > > > dangerous, and do you think the solution would alleviate the
> > > > > issue? Maybe the concern is unfounded and the only issue is within
> > fs_dev_remove().
> > > >
> > > > Yes it is only seen in fs_dev_remove().
> > > > I discussed about your proposal with Raslan, and we agree we could
> > > > change from sub_device.data to sub_device.port_id, it may be more
> > future-proof.
> > > >
> > > > I have only one doubt: look at the macro in this patch:
> > > >
> > > > #define ETH(sdev) \
> > > > 	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data-
> > > > >port_id])
> > > >
> > > > The NULL check cannot be done with a port id.
> > > > I think it was needed to manage one case. Raslan?
> > >
> > > That's right since we need it for fs_tx_unsafe, to add a protection for
> > plugged out devices during TX.
> > 
> > Ok, thanks for your insights Thomas and Raslan. Sorry about the rambling
> > above I needed to write down the stuff to think about it.
> > 
> > You can use RTE_MAX_ETHPORTS as a sentinel value for port_id, this way the
> > value is kept unsigned and there are several checks against this specific value
> > otherwise.
> > 
> > so ETH(sdev) could be
> > 
> >         (PORT_ID(sdev) >= RTE_MAX_ETHPORTS ? NULL :
> > &rte_eth_devices[PORT_ID(sdev)])
> > 
> But, this mean that you have to explicitly set the port id  to RTE_MAX_ETH_PORTS in the sdev after fs_dev_remove and you shouldn't rely on the port ID anymore.
> 

Yes, once a sub-device has completely finished its removal (from the
fail-safe PoV), the fail-safe marks the sub-device as not used anymore.
This seems correct.

If the fail-safe used the sdev->data->port_id instead, it would return
0, which is a valid port_id that is probably still used by another port.

> > --
> 
> > Gaëtan Rivet
> > 6WIND
> 
> Kindest regards
> Raslan Darawsheh
  

Patch

diff --git a/drivers/net/failsafe/failsafe_eal.c b/drivers/net/failsafe/failsafe_eal.c
index 56d1669..6fac4b6 100644
--- a/drivers/net/failsafe/failsafe_eal.c
+++ b/drivers/net/failsafe/failsafe_eal.c
@@ -112,7 +112,7 @@  fs_bus_init(struct rte_eth_dev *dev)
 				continue;
 			}
 		}
-		ETH(sdev) = &rte_eth_devices[pid];
+		sdev->data = rte_eth_devices[pid].data;
 		SUB_ID(sdev) = i;
 		sdev->fs_port_id = dev->data->port_id;
 		sdev->dev = ETH(sdev)->device;
diff --git a/drivers/net/failsafe/failsafe_ether.c b/drivers/net/failsafe/failsafe_ether.c
index d5b1488..e1fff59 100644
--- a/drivers/net/failsafe/failsafe_ether.c
+++ b/drivers/net/failsafe/failsafe_ether.c
@@ -267,18 +267,19 @@  static void
 fs_dev_remove(struct sub_device *sdev)
 {
 	int ret;
+	struct rte_eth_dev *edev = ETH(sdev);
 
 	if (sdev == NULL)
 		return;
 	switch (sdev->state) {
 	case DEV_STARTED:
 		failsafe_rx_intr_uninstall_subdevice(sdev);
-		rte_eth_dev_stop(PORT_ID(sdev));
+		rte_eth_dev_stop(edev->data->port_id);
 		sdev->state = DEV_ACTIVE;
 		/* fallthrough */
 	case DEV_ACTIVE:
 		failsafe_eth_dev_unregister_callbacks(sdev);
-		rte_eth_dev_close(PORT_ID(sdev));
+		rte_eth_dev_close(edev->data->port_id);
 		sdev->state = DEV_PROBED;
 		/* fallthrough */
 	case DEV_PROBED:
@@ -287,7 +288,7 @@  fs_dev_remove(struct sub_device *sdev)
 			ERROR("Bus detach failed for sub_device %u",
 			      SUB_ID(sdev));
 		} else {
-			rte_eth_dev_release_port(ETH(sdev));
+			rte_eth_dev_release_port(edev);
 		}
 		sdev->state = DEV_PARSED;
 		/* fallthrough */
diff --git a/drivers/net/failsafe/failsafe_private.h b/drivers/net/failsafe/failsafe_private.h
index 84e847f..1e2ad2d 100644
--- a/drivers/net/failsafe/failsafe_private.h
+++ b/drivers/net/failsafe/failsafe_private.h
@@ -100,13 +100,16 @@  struct fs_stats {
 	uint64_t timestamp;
 };
 
+/*
+ * Allocated in shared memory.
+ */
 struct sub_device {
 	/* Exhaustive DPDK device description */
 	struct sub_device *next;
 	struct rte_devargs devargs;
-	struct rte_bus *bus;
-	struct rte_device *dev;
-	struct rte_eth_dev *edev;
+	struct rte_bus *bus; /* per process. */
+	struct rte_device *dev; /* per process. */
+	struct rte_eth_dev_data *data; /* shared between processes */
 	uint8_t sid;
 	/* Device state machine */
 	enum dev_state state;
@@ -139,7 +142,7 @@  struct fs_priv {
 	 * subs[0] is the preferred device
 	 * any other is just another slave
 	 */
-	struct sub_device *subs;
+	struct sub_device *subs;  /* shared between processes */
 	uint8_t subs_head; /* if head == tail, no subs */
 	uint8_t subs_tail; /* first invalid */
 	uint8_t subs_tx; /* current emitting device */
@@ -254,7 +257,7 @@  extern int failsafe_mac_from_arg;
 
 /* sdev: (struct sub_device *) */
 #define ETH(sdev) \
-	((sdev)->edev)
+	((sdev)->data == NULL ? NULL : &rte_eth_devices[(sdev)->data->port_id])
 
 /* sdev: (struct sub_device *) */
 #define PORT_ID(sdev) \