[v2,2/3] net/vmxnet3: fix vmxnet3 dev_uninit() hot-unplug

Message ID 20180919125757.17938-2-bluca@debian.org (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series [v2,1/3] net/virtio: register/unregister intr handler on start/stop |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Luca Boccassi Sept. 19, 2018, 12:57 p.m. UTC
  The vmxnet3 driver can't call back into dev_close(), and possibly
dev_stop(), in dev_uninit().  When dev_uninit() is called, anything
that those routines would want to clean up has already been released.
Further, for complete cleanup, it is necessary to release any of the
queue resources during dev_close().
This allows a vmxnet3 device to be hot-unplugged without leaking
queues.

Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode driver implementation")
Cc: stable@dpdk.org

Signed-off-by: Brian Russell <brussell@brocade.com>
Signed-off-by: Luca Boccassi <bluca@debian.org>
---
v2: add back extra close() call in uninit() for buggy applications as
    requested by the reviewers, and add debug log noting the issue

 drivers/net/vmxnet3/vmxnet3_ethdev.c | 35 +++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 6 deletions(-)
  

Comments

Chas Williams Sept. 19, 2018, 3:47 p.m. UTC | #1
On Wed, Sep 19, 2018 at 8:58 AM Luca Boccassi <bluca@debian.org> wrote:
>
> The vmxnet3 driver can't call back into dev_close(), and possibly
> dev_stop(), in dev_uninit().  When dev_uninit() is called, anything
> that those routines would want to clean up has already been released.
> Further, for complete cleanup, it is necessary to release any of the
> queue resources during dev_close().
> This allows a vmxnet3 device to be hot-unplugged without leaking
> queues.
>
> Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode driver implementation")
> Cc: stable@dpdk.org
>
> Signed-off-by: Brian Russell <brussell@brocade.com>
> Signed-off-by: Luca Boccassi <bluca@debian.org>
> ---
> v2: add back extra close() call in uninit() for buggy applications as
>     requested by the reviewers, and add debug log noting the issue
>
>  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35 +++++++++++++++++++++++-----
>  1 file changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> index f1596ab19d..98e5d01890 100644
> --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
>         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>                 return 0;

This should probably be EPERM as well.  Out of scope though.

>
> -       if (hw->adapter_stopped == 0)
> +       if (hw->adapter_stopped == 0) {
> +               PMD_INIT_LOG(DEBUG, "Device has not been closed.");
>                 vmxnet3_dev_close(eth_dev);

This just seems wrong.  You have called uninit() will the driver is
still busy.  Instead of "fixing" the state of the driver, return EBUSY
here.

> +       }
>
>         eth_dev->dev_ops = NULL;
>         eth_dev->rx_pkt_burst = NULL;
> @@ -802,7 +804,7 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>         PMD_INIT_FUNC_TRACE();
>
>         if (hw->adapter_stopped == 1) {
> -               PMD_INIT_LOG(DEBUG, "Device already closed.");
> +               PMD_INIT_LOG(DEBUG, "Device already stopped.");
>                 return;
>         }
>
> @@ -826,7 +828,6 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>         /* reset the device */
>         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
>         PMD_INIT_LOG(DEBUG, "Device reset.");
> -       hw->adapter_stopped = 0;
>
>         vmxnet3_dev_clear_queues(dev);
>
> @@ -836,6 +837,30 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>         link.link_speed = ETH_SPEED_NUM_10G;
>         link.link_autoneg = ETH_LINK_FIXED;
>         rte_eth_linkstatus_set(dev, &link);
> +
> +       hw->adapter_stopped = 1;
> +}
> +
> +static void
> +vmxnet3_free_queues(struct rte_eth_dev *dev)
> +{
> +       int i;
> +
> +       PMD_INIT_FUNC_TRACE();
> +
> +       for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +               void *rxq = dev->data->rx_queues[i];
> +
> +               vmxnet3_dev_rx_queue_release(rxq);
> +       }
> +       dev->data->nb_rx_queues = 0;
> +
> +       for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +               void *txq = dev->data->tx_queues[i];
> +
> +               vmxnet3_dev_tx_queue_release(txq);
> +       }
> +       dev->data->nb_tx_queues = 0;
>  }
>
>  /*
> @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>  static void
>  vmxnet3_dev_close(struct rte_eth_dev *dev)
>  {
> -       struct vmxnet3_hw *hw = dev->data->dev_private;
> -
>         PMD_INIT_FUNC_TRACE();
>
>         vmxnet3_dev_stop(dev);
> -       hw->adapter_stopped = 1;
> +       vmxnet3_free_queues(dev);
>  }
>
>  static void
> --
> 2.18.0
>
  
Luca Boccassi Sept. 19, 2018, 4:08 p.m. UTC | #2
On Wed, 2018-09-19 at 11:47 -0400, Chas Williams wrote:
> On Wed, Sep 19, 2018 at 8:58 AM Luca Boccassi <bluca@debian.org>
> wrote:
> > 
> > The vmxnet3 driver can't call back into dev_close(), and possibly
> > dev_stop(), in dev_uninit().  When dev_uninit() is called, anything
> > that those routines would want to clean up has already been
> > released.
> > Further, for complete cleanup, it is necessary to release any of
> > the
> > queue resources during dev_close().
> > This allows a vmxnet3 device to be hot-unplugged without leaking
> > queues.
> > 
> > Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode driver
> > implementation")
> > Cc: stable@dpdk.org
> > 
> > Signed-off-by: Brian Russell <brussell@brocade.com>
> > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > ---
> > v2: add back extra close() call in uninit() for buggy applications
> > as
> >     requested by the reviewers, and add debug log noting the issue
> > 
> >  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35 +++++++++++++++++++++++-
> > ----
> >  1 file changed, 29 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > index f1596ab19d..98e5d01890 100644
> > --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev
> > *eth_dev)
> >         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> >                 return 0;
> 
> This should probably be EPERM as well.  Out of scope though.
> 
> > 
> > -       if (hw->adapter_stopped == 0)
> > +       if (hw->adapter_stopped == 0) {
> > +               PMD_INIT_LOG(DEBUG, "Device has not been closed.");
> >                 vmxnet3_dev_close(eth_dev);
> 
> This just seems wrong.  You have called uninit() will the driver is
> still busy.  Instead of "fixing" the state of the driver, return
> EBUSY
> here.

At this point that's out of scope too - it doesn't affect the ability
to hotplug or not.
So please send another patch and discuss it further with Louis, who
requested to drop that change.

> > +       }
> > 
> >         eth_dev->dev_ops = NULL;
> >         eth_dev->rx_pkt_burst = NULL;
> > @@ -802,7 +804,7 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> >         PMD_INIT_FUNC_TRACE();
> > 
> >         if (hw->adapter_stopped == 1) {
> > -               PMD_INIT_LOG(DEBUG, "Device already closed.");
> > +               PMD_INIT_LOG(DEBUG, "Device already stopped.");
> >                 return;
> >         }
> > 
> > @@ -826,7 +828,6 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> >         /* reset the device */
> >         VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
> > VMXNET3_CMD_RESET_DEV);
> >         PMD_INIT_LOG(DEBUG, "Device reset.");
> > -       hw->adapter_stopped = 0;
> > 
> >         vmxnet3_dev_clear_queues(dev);
> > 
> > @@ -836,6 +837,30 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> >         link.link_speed = ETH_SPEED_NUM_10G;
> >         link.link_autoneg = ETH_LINK_FIXED;
> >         rte_eth_linkstatus_set(dev, &link);
> > +
> > +       hw->adapter_stopped = 1;
> > +}
> > +
> > +static void
> > +vmxnet3_free_queues(struct rte_eth_dev *dev)
> > +{
> > +       int i;
> > +
> > +       PMD_INIT_FUNC_TRACE();
> > +
> > +       for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > +               void *rxq = dev->data->rx_queues[i];
> > +
> > +               vmxnet3_dev_rx_queue_release(rxq);
> > +       }
> > +       dev->data->nb_rx_queues = 0;
> > +
> > +       for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > +               void *txq = dev->data->tx_queues[i];
> > +
> > +               vmxnet3_dev_tx_queue_release(txq);
> > +       }
> > +       dev->data->nb_tx_queues = 0;
> >  }
> > 
> >  /*
> > @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> >  static void
> >  vmxnet3_dev_close(struct rte_eth_dev *dev)
> >  {
> > -       struct vmxnet3_hw *hw = dev->data->dev_private;
> > -
> >         PMD_INIT_FUNC_TRACE();
> > 
> >         vmxnet3_dev_stop(dev);
> > -       hw->adapter_stopped = 1;
> > +       vmxnet3_free_queues(dev);
> >  }
> > 
> >  static void
> > --
> > 2.18.0
> >
  
Luca Boccassi Sept. 27, 2018, 8:39 a.m. UTC | #3
On Wed, 2018-09-19 at 13:57 +0100, Luca Boccassi wrote:
> The vmxnet3 driver can't call back into dev_close(), and possibly
> dev_stop(), in dev_uninit().  When dev_uninit() is called, anything
> that those routines would want to clean up has already been released.
> Further, for complete cleanup, it is necessary to release any of the
> queue resources during dev_close().
> This allows a vmxnet3 device to be hot-unplugged without leaking
> queues.
> 
> Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode driver
> implementation")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Brian Russell <brussell@brocade.com>
> Signed-off-by: Luca Boccassi <bluca@debian.org>
> ---
> v2: add back extra close() call in uninit() for buggy applications as
>     requested by the reviewers, and add debug log noting the issue
> 
>  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35 +++++++++++++++++++++++---
> --
>  1 file changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> index f1596ab19d..98e5d01890 100644
> --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev
> *eth_dev)
>  	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
>  		return 0;
>  
> -	if (hw->adapter_stopped == 0)
> +	if (hw->adapter_stopped == 0) {
> +		PMD_INIT_LOG(DEBUG, "Device has not been closed.");
>  		vmxnet3_dev_close(eth_dev);
> +	}
>  
>  	eth_dev->dev_ops = NULL;
>  	eth_dev->rx_pkt_burst = NULL;
> @@ -802,7 +804,7 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>  	PMD_INIT_FUNC_TRACE();
>  
>  	if (hw->adapter_stopped == 1) {
> -		PMD_INIT_LOG(DEBUG, "Device already closed.");
> +		PMD_INIT_LOG(DEBUG, "Device already stopped.");
>  		return;
>  	}
>  
> @@ -826,7 +828,6 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>  	/* reset the device */
>  	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD,
> VMXNET3_CMD_RESET_DEV);
>  	PMD_INIT_LOG(DEBUG, "Device reset.");
> -	hw->adapter_stopped = 0;
>  
>  	vmxnet3_dev_clear_queues(dev);
>  
> @@ -836,6 +837,30 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>  	link.link_speed = ETH_SPEED_NUM_10G;
>  	link.link_autoneg = ETH_LINK_FIXED;
>  	rte_eth_linkstatus_set(dev, &link);
> +
> +	hw->adapter_stopped = 1;
> +}
> +
> +static void
> +vmxnet3_free_queues(struct rte_eth_dev *dev)
> +{
> +	int i;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +		void *rxq = dev->data->rx_queues[i];
> +
> +		vmxnet3_dev_rx_queue_release(rxq);
> +	}
> +	dev->data->nb_rx_queues = 0;
> +
> +	for (i = 0; i < dev->data->nb_tx_queues; i++) {
> +		void *txq = dev->data->tx_queues[i];
> +
> +		vmxnet3_dev_tx_queue_release(txq);
> +	}
> +	dev->data->nb_tx_queues = 0;
>  }
>  
>  /*
> @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
>  static void
>  vmxnet3_dev_close(struct rte_eth_dev *dev)
>  {
> -	struct vmxnet3_hw *hw = dev->data->dev_private;
> -
>  	PMD_INIT_FUNC_TRACE();
>  
>  	vmxnet3_dev_stop(dev);
> -	hw->adapter_stopped = 1;
> +	vmxnet3_free_queues(dev);
>  }
>  
>  static void

Hi Louis,

Are you happy with the diff as in v2 now for vmxnet3? Thanks
  
Thomas Monjalon Oct. 27, 2018, 3:09 p.m. UTC | #4
19/09/2018 17:47, Chas Williams:
> On Wed, Sep 19, 2018 at 8:58 AM Luca Boccassi <bluca@debian.org> wrote:
> >
> > The vmxnet3 driver can't call back into dev_close(), and possibly
> > dev_stop(), in dev_uninit().  When dev_uninit() is called, anything
> > that those routines would want to clean up has already been released.
> > Further, for complete cleanup, it is necessary to release any of the
> > queue resources during dev_close().
> > This allows a vmxnet3 device to be hot-unplugged without leaking
> > queues.
> >
> > Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode driver implementation")
> > Cc: stable@dpdk.org
> >
> > Signed-off-by: Brian Russell <brussell@brocade.com>
> > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > ---
> > v2: add back extra close() call in uninit() for buggy applications as
> >     requested by the reviewers, and add debug log noting the issue
> >
> >  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35 +++++++++++++++++++++++-----
> >  1 file changed, 29 insertions(+), 6 deletions(-)
> >
> > diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > index f1596ab19d..98e5d01890 100644
> > --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
> >         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> >                 return 0;
> 
> This should probably be EPERM as well.  Out of scope though.
> 
> >
> > -       if (hw->adapter_stopped == 0)
> > +       if (hw->adapter_stopped == 0) {
> > +               PMD_INIT_LOG(DEBUG, "Device has not been closed.");
> >                 vmxnet3_dev_close(eth_dev);
> 
> This just seems wrong.  You have called uninit() will the driver is
> still busy.  Instead of "fixing" the state of the driver, return EBUSY
> here.

I agree.
If the port is not stopped, either you stop it or you return EBUSY.

Closing the device should be done outside of this check.
It is OK to close from uninit if the app did not close it.

[...]
> > +static void
> > +vmxnet3_free_queues(struct rte_eth_dev *dev)
> > +{
> > +       int i;
> > +
> > +       PMD_INIT_FUNC_TRACE();
> > +
> > +       for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > +               void *rxq = dev->data->rx_queues[i];
> > +
> > +               vmxnet3_dev_rx_queue_release(rxq);
> > +       }
> > +       dev->data->nb_rx_queues = 0;
> > +
> > +       for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > +               void *txq = dev->data->tx_queues[i];
> > +
> > +               vmxnet3_dev_tx_queue_release(txq);
> > +       }
> > +       dev->data->nb_tx_queues = 0;
> >  }
> >
> >  /*
> > @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> >  static void
> >  vmxnet3_dev_close(struct rte_eth_dev *dev)
> >  {
> > -       struct vmxnet3_hw *hw = dev->data->dev_private;
> > -
> >         PMD_INIT_FUNC_TRACE();
> >
> >         vmxnet3_dev_stop(dev);
> > -       hw->adapter_stopped = 1;
> > +       vmxnet3_free_queues(dev);
> >  }

Good clean-up on dev_close.
You probably want to go further and set RTE_ETH_DEV_CLOSE_REMOVE for allowing
a real release of the port on close.
Note: every PMDs should migrate towards this behaviour.

To make things clear (I will write a doc for -rc2):
	- "stop" should be called by the app but the PMD is allowed to force it.
	- "close" may be called by the app, and the PMD should enforce it in uninit.
		With RTE_ETH_DEV_CLOSE_REMOVE flag, it must completely release the port.
	- "remove" (implemented in PMD as uninit) is responsible of closing
		ethdev ports if not already done, and release the shared resources
		which are not specific to a port. It removes the whole EAL rte_device.

PS: for any hotplug patch or questions, feel free to Cc me.
  
Thomas Monjalon Oct. 31, 2018, 5:27 p.m. UTC | #5
Any update or question for this patch?
If no update, it will miss 18.11.


27/10/2018 17:09, Thomas Monjalon:
> 19/09/2018 17:47, Chas Williams:
> > On Wed, Sep 19, 2018 at 8:58 AM Luca Boccassi <bluca@debian.org> wrote:
> > >
> > > The vmxnet3 driver can't call back into dev_close(), and possibly
> > > dev_stop(), in dev_uninit().  When dev_uninit() is called, anything
> > > that those routines would want to clean up has already been released.
> > > Further, for complete cleanup, it is necessary to release any of the
> > > queue resources during dev_close().
> > > This allows a vmxnet3 device to be hot-unplugged without leaking
> > > queues.
> > >
> > > Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode driver implementation")
> > > Cc: stable@dpdk.org
> > >
> > > Signed-off-by: Brian Russell <brussell@brocade.com>
> > > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > > ---
> > > v2: add back extra close() call in uninit() for buggy applications as
> > >     requested by the reviewers, and add debug log noting the issue
> > >
> > >  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35 +++++++++++++++++++++++-----
> > >  1 file changed, 29 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > index f1596ab19d..98e5d01890 100644
> > > --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
> > >         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> > >                 return 0;
> > 
> > This should probably be EPERM as well.  Out of scope though.
> > 
> > >
> > > -       if (hw->adapter_stopped == 0)
> > > +       if (hw->adapter_stopped == 0) {
> > > +               PMD_INIT_LOG(DEBUG, "Device has not been closed.");
> > >                 vmxnet3_dev_close(eth_dev);
> > 
> > This just seems wrong.  You have called uninit() will the driver is
> > still busy.  Instead of "fixing" the state of the driver, return EBUSY
> > here.
> 
> I agree.
> If the port is not stopped, either you stop it or you return EBUSY.
> 
> Closing the device should be done outside of this check.
> It is OK to close from uninit if the app did not close it.
> 
> [...]
> > > +static void
> > > +vmxnet3_free_queues(struct rte_eth_dev *dev)
> > > +{
> > > +       int i;
> > > +
> > > +       PMD_INIT_FUNC_TRACE();
> > > +
> > > +       for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > > +               void *rxq = dev->data->rx_queues[i];
> > > +
> > > +               vmxnet3_dev_rx_queue_release(rxq);
> > > +       }
> > > +       dev->data->nb_rx_queues = 0;
> > > +
> > > +       for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > > +               void *txq = dev->data->tx_queues[i];
> > > +
> > > +               vmxnet3_dev_tx_queue_release(txq);
> > > +       }
> > > +       dev->data->nb_tx_queues = 0;
> > >  }
> > >
> > >  /*
> > > @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> > >  static void
> > >  vmxnet3_dev_close(struct rte_eth_dev *dev)
> > >  {
> > > -       struct vmxnet3_hw *hw = dev->data->dev_private;
> > > -
> > >         PMD_INIT_FUNC_TRACE();
> > >
> > >         vmxnet3_dev_stop(dev);
> > > -       hw->adapter_stopped = 1;
> > > +       vmxnet3_free_queues(dev);
> > >  }
> 
> Good clean-up on dev_close.
> You probably want to go further and set RTE_ETH_DEV_CLOSE_REMOVE for allowing
> a real release of the port on close.
> Note: every PMDs should migrate towards this behaviour.
> 
> To make things clear (I will write a doc for -rc2):
> 	- "stop" should be called by the app but the PMD is allowed to force it.
> 	- "close" may be called by the app, and the PMD should enforce it in uninit.
> 		With RTE_ETH_DEV_CLOSE_REMOVE flag, it must completely release the port.
> 	- "remove" (implemented in PMD as uninit) is responsible of closing
> 		ethdev ports if not already done, and release the shared resources
> 		which are not specific to a port. It removes the whole EAL rte_device.
> 
> PS: for any hotplug patch or questions, feel free to Cc me.
  
Luca Boccassi Oct. 31, 2018, 5:46 p.m. UTC | #6
Sorry, been otherwise busy - I can do what you and Chas have asked, but
the problem is that v1 already did that and the VMWare maintainers
asked to change it back. So can I assume that the v1 way is the way to
go?

On Wed, 2018-10-31 at 18:27 +0100, Thomas Monjalon wrote:
> Any update or question for this patch?
> If no update, it will miss 18.11.
> 
> 
> 27/10/2018 17:09, Thomas Monjalon:
> > 19/09/2018 17:47, Chas Williams:
> > > On Wed, Sep 19, 2018 at 8:58 AM Luca Boccassi <bluca@debian.org>
> > > wrote:
> > > > 
> > > > The vmxnet3 driver can't call back into dev_close(), and
> > > > possibly
> > > > dev_stop(), in dev_uninit().  When dev_uninit() is called,
> > > > anything
> > > > that those routines would want to clean up has already been
> > > > released.
> > > > Further, for complete cleanup, it is necessary to release any
> > > > of the
> > > > queue resources during dev_close().
> > > > This allows a vmxnet3 device to be hot-unplugged without
> > > > leaking
> > > > queues.
> > > > 
> > > > Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode
> > > > driver implementation")
> > > > Cc: stable@dpdk.org
> > > > 
> > > > Signed-off-by: Brian Russell <brussell@brocade.com>
> > > > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > > > ---
> > > > v2: add back extra close() call in uninit() for buggy
> > > > applications as
> > > >     requested by the reviewers, and add debug log noting the
> > > > issue
> > > > 
> > > >  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35
> > > > +++++++++++++++++++++++-----
> > > >  1 file changed, 29 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > index f1596ab19d..98e5d01890 100644
> > > > --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev
> > > > *eth_dev)
> > > >         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> > > >                 return 0;
> > > 
> > > This should probably be EPERM as well.  Out of scope though.
> > > 
> > > > 
> > > > -       if (hw->adapter_stopped == 0)
> > > > +       if (hw->adapter_stopped == 0) {
> > > > +               PMD_INIT_LOG(DEBUG, "Device has not been
> > > > closed.");
> > > >                 vmxnet3_dev_close(eth_dev);
> > > 
> > > This just seems wrong.  You have called uninit() will the driver
> > > is
> > > still busy.  Instead of "fixing" the state of the driver, return
> > > EBUSY
> > > here.
> > 
> > I agree.
> > If the port is not stopped, either you stop it or you return EBUSY.
> > 
> > Closing the device should be done outside of this check.
> > It is OK to close from uninit if the app did not close it.
> > 
> > [...]
> > > > +static void
> > > > +vmxnet3_free_queues(struct rte_eth_dev *dev)
> > > > +{
> > > > +       int i;
> > > > +
> > > > +       PMD_INIT_FUNC_TRACE();
> > > > +
> > > > +       for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > > > +               void *rxq = dev->data->rx_queues[i];
> > > > +
> > > > +               vmxnet3_dev_rx_queue_release(rxq);
> > > > +       }
> > > > +       dev->data->nb_rx_queues = 0;
> > > > +
> > > > +       for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > > > +               void *txq = dev->data->tx_queues[i];
> > > > +
> > > > +               vmxnet3_dev_tx_queue_release(txq);
> > > > +       }
> > > > +       dev->data->nb_tx_queues = 0;
> > > >  }
> > > > 
> > > >  /*
> > > > @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> > > >  static void
> > > >  vmxnet3_dev_close(struct rte_eth_dev *dev)
> > > >  {
> > > > -       struct vmxnet3_hw *hw = dev->data->dev_private;
> > > > -
> > > >         PMD_INIT_FUNC_TRACE();
> > > > 
> > > >         vmxnet3_dev_stop(dev);
> > > > -       hw->adapter_stopped = 1;
> > > > +       vmxnet3_free_queues(dev);
> > > >  }
> > 
> > Good clean-up on dev_close.
> > You probably want to go further and set RTE_ETH_DEV_CLOSE_REMOVE
> > for allowing
> > a real release of the port on close.
> > Note: every PMDs should migrate towards this behaviour.
> > 
> > To make things clear (I will write a doc for -rc2):
> > 	- "stop" should be called by the app but the PMD is allowed to
> > force it.
> > 	- "close" may be called by the app, and the PMD should enforce
> > it in uninit.
> > 		With RTE_ETH_DEV_CLOSE_REMOVE flag, it must completely
> > release the port.
> > 	- "remove" (implemented in PMD as uninit) is responsible of
> > closing
> > 		ethdev ports if not already done, and release the
> > shared resources
> > 		which are not specific to a port. It removes the whole
> > EAL rte_device.
> > 
> > PS: for any hotplug patch or questions, feel free to Cc me.
> 
> 
> 
> 
>
  
Thomas Monjalon Oct. 31, 2018, 6:02 p.m. UTC | #7
31/10/2018 18:46, Luca Boccassi:
> Sorry, been otherwise busy - I can do what you and Chas have asked, but
> the problem is that v1 already did that and the VMWare maintainers
> asked to change it back. So can I assume that the v1 way is the way to
> go?

I am expecting an answer from the vmxnet3 maintainer,
or other VMware developpers.
If they don't reply, we should work without them,
and ask for a new maintainer in the community.

I assume you can now work on a v3 reproducing what was done in v1.


> On Wed, 2018-10-31 at 18:27 +0100, Thomas Monjalon wrote:
> > Any update or question for this patch?
> > If no update, it will miss 18.11.
> > 
> > 
> > 27/10/2018 17:09, Thomas Monjalon:
> > > 19/09/2018 17:47, Chas Williams:
> > > > On Wed, Sep 19, 2018 at 8:58 AM Luca Boccassi <bluca@debian.org>
> > > > wrote:
> > > > > 
> > > > > The vmxnet3 driver can't call back into dev_close(), and
> > > > > possibly
> > > > > dev_stop(), in dev_uninit().  When dev_uninit() is called,
> > > > > anything
> > > > > that those routines would want to clean up has already been
> > > > > released.
> > > > > Further, for complete cleanup, it is necessary to release any
> > > > > of the
> > > > > queue resources during dev_close().
> > > > > This allows a vmxnet3 device to be hot-unplugged without
> > > > > leaking
> > > > > queues.
> > > > > 
> > > > > Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode
> > > > > driver implementation")
> > > > > Cc: stable@dpdk.org
> > > > > 
> > > > > Signed-off-by: Brian Russell <brussell@brocade.com>
> > > > > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > > > > ---
> > > > > v2: add back extra close() call in uninit() for buggy
> > > > > applications as
> > > > >     requested by the reviewers, and add debug log noting the
> > > > > issue
> > > > > 
> > > > >  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35
> > > > > +++++++++++++++++++++++-----
> > > > >  1 file changed, 29 insertions(+), 6 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > > b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > > index f1596ab19d..98e5d01890 100644
> > > > > --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > > +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
> > > > > @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev
> > > > > *eth_dev)
> > > > >         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
> > > > >                 return 0;
> > > > 
> > > > This should probably be EPERM as well.  Out of scope though.
> > > > 
> > > > > 
> > > > > -       if (hw->adapter_stopped == 0)
> > > > > +       if (hw->adapter_stopped == 0) {
> > > > > +               PMD_INIT_LOG(DEBUG, "Device has not been
> > > > > closed.");
> > > > >                 vmxnet3_dev_close(eth_dev);
> > > > 
> > > > This just seems wrong.  You have called uninit() will the driver
> > > > is
> > > > still busy.  Instead of "fixing" the state of the driver, return
> > > > EBUSY
> > > > here.
> > > 
> > > I agree.
> > > If the port is not stopped, either you stop it or you return EBUSY.
> > > 
> > > Closing the device should be done outside of this check.
> > > It is OK to close from uninit if the app did not close it.
> > > 
> > > [...]
> > > > > +static void
> > > > > +vmxnet3_free_queues(struct rte_eth_dev *dev)
> > > > > +{
> > > > > +       int i;
> > > > > +
> > > > > +       PMD_INIT_FUNC_TRACE();
> > > > > +
> > > > > +       for (i = 0; i < dev->data->nb_rx_queues; i++) {
> > > > > +               void *rxq = dev->data->rx_queues[i];
> > > > > +
> > > > > +               vmxnet3_dev_rx_queue_release(rxq);
> > > > > +       }
> > > > > +       dev->data->nb_rx_queues = 0;
> > > > > +
> > > > > +       for (i = 0; i < dev->data->nb_tx_queues; i++) {
> > > > > +               void *txq = dev->data->tx_queues[i];
> > > > > +
> > > > > +               vmxnet3_dev_tx_queue_release(txq);
> > > > > +       }
> > > > > +       dev->data->nb_tx_queues = 0;
> > > > >  }
> > > > > 
> > > > >  /*
> > > > > @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
> > > > >  static void
> > > > >  vmxnet3_dev_close(struct rte_eth_dev *dev)
> > > > >  {
> > > > > -       struct vmxnet3_hw *hw = dev->data->dev_private;
> > > > > -
> > > > >         PMD_INIT_FUNC_TRACE();
> > > > > 
> > > > >         vmxnet3_dev_stop(dev);
> > > > > -       hw->adapter_stopped = 1;
> > > > > +       vmxnet3_free_queues(dev);
> > > > >  }
> > > 
> > > Good clean-up on dev_close.
> > > You probably want to go further and set RTE_ETH_DEV_CLOSE_REMOVE
> > > for allowing
> > > a real release of the port on close.
> > > Note: every PMDs should migrate towards this behaviour.
> > > 
> > > To make things clear (I will write a doc for -rc2):
> > > 	- "stop" should be called by the app but the PMD is allowed to
> > > force it.
> > > 	- "close" may be called by the app, and the PMD should enforce
> > > it in uninit.
> > > 		With RTE_ETH_DEV_CLOSE_REMOVE flag, it must completely
> > > release the port.
> > > 	- "remove" (implemented in PMD as uninit) is responsible of
> > > closing
> > > 		ethdev ports if not already done, and release the
> > > shared resources
> > > 		which are not specific to a port. It removes the whole
> > > EAL rte_device.
> > > 
> > > PS: for any hotplug patch or questions, feel free to Cc me.
> > 
> > 
> > 
> > 
> > 
> 
>
  
Louis Luo Oct. 31, 2018, 6:54 p.m. UTC | #8
Hey I'm taking paternity leave now so late on response.

The v1 was different from what Thomas asked for and (hw->adapter_stopped == 0) was ignored (see cited below). So we felt uncomfortable about that as there is no guarantee that the device has been closed before calling uninit. Now that you fail the uninit call for non-stopped device and return EBUSY, I'm fine with it.

Regards,
Louis

static int
eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
{
-	struct vmxnet3_hw *hw = eth_dev->data->dev_private;
-
	PMD_INIT_FUNC_TRACE();
	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
		return 0;
-	if (hw->adapter_stopped == 0)
-		vmxnet3_dev_close(eth_dev);
-
	eth_dev->dev_ops = NULL;
	eth_dev->rx_pkt_burst = NULL;
	eth_dev->tx_pkt_burst = NULL;

On 10/31/18, 10:46 AM, "Luca Boccassi" <bluca@debian.org> wrote:

    Sorry, been otherwise busy - I can do what you and Chas have asked, but
    the problem is that v1 already did that and the VMWare maintainers
    asked to change it back. So can I assume that the v1 way is the way to
    go?
    
    On Wed, 2018-10-31 at 18:27 +0100, Thomas Monjalon wrote:
    > Any update or question for this patch?
    > If no update, it will miss 18.11.
    > 
    > 
    > 27/10/2018 17:09, Thomas Monjalon:
    > > 19/09/2018 17:47, Chas Williams:
    > > > On Wed, Sep 19, 2018 at 8:58 AM Luca Boccassi <bluca@debian.org>
    > > > wrote:
    > > > > 
    > > > > The vmxnet3 driver can't call back into dev_close(), and
    > > > > possibly
    > > > > dev_stop(), in dev_uninit().  When dev_uninit() is called,
    > > > > anything
    > > > > that those routines would want to clean up has already been
    > > > > released.
    > > > > Further, for complete cleanup, it is necessary to release any
    > > > > of the
    > > > > queue resources during dev_close().
    > > > > This allows a vmxnet3 device to be hot-unplugged without
    > > > > leaking
    > > > > queues.
    > > > > 
    > > > > Fixes: dfaff37fc46d ("vmxnet3: import new vmxnet3 poll mode
    > > > > driver implementation")
    > > > > Cc: stable@dpdk.org
    > > > > 
    > > > > Signed-off-by: Brian Russell <brussell@brocade.com>
    > > > > Signed-off-by: Luca Boccassi <bluca@debian.org>
    > > > > ---
    > > > > v2: add back extra close() call in uninit() for buggy
    > > > > applications as
    > > > >     requested by the reviewers, and add debug log noting the
    > > > > issue
    > > > > 
    > > > >  drivers/net/vmxnet3/vmxnet3_ethdev.c | 35
    > > > > +++++++++++++++++++++++-----
    > > > >  1 file changed, 29 insertions(+), 6 deletions(-)
    > > > > 
    > > > > diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c
    > > > > b/drivers/net/vmxnet3/vmxnet3_ethdev.c
    > > > > index f1596ab19d..98e5d01890 100644
    > > > > --- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
    > > > > +++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
    > > > > @@ -354,8 +354,10 @@ eth_vmxnet3_dev_uninit(struct rte_eth_dev
    > > > > *eth_dev)
    > > > >         if (rte_eal_process_type() != RTE_PROC_PRIMARY)
    > > > >                 return 0;
    > > > 
    > > > This should probably be EPERM as well.  Out of scope though.
    > > > 
    > > > > 
    > > > > -       if (hw->adapter_stopped == 0)
    > > > > +       if (hw->adapter_stopped == 0) {
    > > > > +               PMD_INIT_LOG(DEBUG, "Device has not been
    > > > > closed.");
    > > > >                 vmxnet3_dev_close(eth_dev);
    > > > 
    > > > This just seems wrong.  You have called uninit() will the driver
    > > > is
    > > > still busy.  Instead of "fixing" the state of the driver, return
    > > > EBUSY
    > > > here.
    > > 
    > > I agree.
    > > If the port is not stopped, either you stop it or you return EBUSY.
    > > 
    > > Closing the device should be done outside of this check.
    > > It is OK to close from uninit if the app did not close it.
    > > 
    > > [...]
    > > > > +static void
    > > > > +vmxnet3_free_queues(struct rte_eth_dev *dev)
    > > > > +{
    > > > > +       int i;
    > > > > +
    > > > > +       PMD_INIT_FUNC_TRACE();
    > > > > +
    > > > > +       for (i = 0; i < dev->data->nb_rx_queues; i++) {
    > > > > +               void *rxq = dev->data->rx_queues[i];
    > > > > +
    > > > > +               vmxnet3_dev_rx_queue_release(rxq);
    > > > > +       }
    > > > > +       dev->data->nb_rx_queues = 0;
    > > > > +
    > > > > +       for (i = 0; i < dev->data->nb_tx_queues; i++) {
    > > > > +               void *txq = dev->data->tx_queues[i];
    > > > > +
    > > > > +               vmxnet3_dev_tx_queue_release(txq);
    > > > > +       }
    > > > > +       dev->data->nb_tx_queues = 0;
    > > > >  }
    > > > > 
    > > > >  /*
    > > > > @@ -844,12 +869,10 @@ vmxnet3_dev_stop(struct rte_eth_dev *dev)
    > > > >  static void
    > > > >  vmxnet3_dev_close(struct rte_eth_dev *dev)
    > > > >  {
    > > > > -       struct vmxnet3_hw *hw = dev->data->dev_private;
    > > > > -
    > > > >         PMD_INIT_FUNC_TRACE();
    > > > > 
    > > > >         vmxnet3_dev_stop(dev);
    > > > > -       hw->adapter_stopped = 1;
    > > > > +       vmxnet3_free_queues(dev);
    > > > >  }
    > > 
    > > Good clean-up on dev_close.
    > > You probably want to go further and set RTE_ETH_DEV_CLOSE_REMOVE
    > > for allowing
    > > a real release of the port on close.
    > > Note: every PMDs should migrate towards this behaviour.
    > > 
    > > To make things clear (I will write a doc for -rc2):
    > > 	- "stop" should be called by the app but the PMD is allowed to
    > > force it.
    > > 	- "close" may be called by the app, and the PMD should enforce
    > > it in uninit.
    > > 		With RTE_ETH_DEV_CLOSE_REMOVE flag, it must completely
    > > release the port.
    > > 	- "remove" (implemented in PMD as uninit) is responsible of
    > > closing
    > > 		ethdev ports if not already done, and release the
    > > shared resources
    > > 		which are not specific to a port. It removes the whole
    > > EAL rte_device.
    > > 
    > > PS: for any hotplug patch or questions, feel free to Cc me.
    > 
    > 
    > 
    > 
    > 
    
    -- 
    Kind regards,
    Luca Boccassi
  

Patch

diff --git a/drivers/net/vmxnet3/vmxnet3_ethdev.c b/drivers/net/vmxnet3/vmxnet3_ethdev.c
index f1596ab19d..98e5d01890 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethdev.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethdev.c
@@ -354,8 +354,10 @@  eth_vmxnet3_dev_uninit(struct rte_eth_dev *eth_dev)
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY)
 		return 0;
 
-	if (hw->adapter_stopped == 0)
+	if (hw->adapter_stopped == 0) {
+		PMD_INIT_LOG(DEBUG, "Device has not been closed.");
 		vmxnet3_dev_close(eth_dev);
+	}
 
 	eth_dev->dev_ops = NULL;
 	eth_dev->rx_pkt_burst = NULL;
@@ -802,7 +804,7 @@  vmxnet3_dev_stop(struct rte_eth_dev *dev)
 	PMD_INIT_FUNC_TRACE();
 
 	if (hw->adapter_stopped == 1) {
-		PMD_INIT_LOG(DEBUG, "Device already closed.");
+		PMD_INIT_LOG(DEBUG, "Device already stopped.");
 		return;
 	}
 
@@ -826,7 +828,6 @@  vmxnet3_dev_stop(struct rte_eth_dev *dev)
 	/* reset the device */
 	VMXNET3_WRITE_BAR1_REG(hw, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
 	PMD_INIT_LOG(DEBUG, "Device reset.");
-	hw->adapter_stopped = 0;
 
 	vmxnet3_dev_clear_queues(dev);
 
@@ -836,6 +837,30 @@  vmxnet3_dev_stop(struct rte_eth_dev *dev)
 	link.link_speed = ETH_SPEED_NUM_10G;
 	link.link_autoneg = ETH_LINK_FIXED;
 	rte_eth_linkstatus_set(dev, &link);
+
+	hw->adapter_stopped = 1;
+}
+
+static void
+vmxnet3_free_queues(struct rte_eth_dev *dev)
+{
+	int i;
+
+	PMD_INIT_FUNC_TRACE();
+
+	for (i = 0; i < dev->data->nb_rx_queues; i++) {
+		void *rxq = dev->data->rx_queues[i];
+
+		vmxnet3_dev_rx_queue_release(rxq);
+	}
+	dev->data->nb_rx_queues = 0;
+
+	for (i = 0; i < dev->data->nb_tx_queues; i++) {
+		void *txq = dev->data->tx_queues[i];
+
+		vmxnet3_dev_tx_queue_release(txq);
+	}
+	dev->data->nb_tx_queues = 0;
 }
 
 /*
@@ -844,12 +869,10 @@  vmxnet3_dev_stop(struct rte_eth_dev *dev)
 static void
 vmxnet3_dev_close(struct rte_eth_dev *dev)
 {
-	struct vmxnet3_hw *hw = dev->data->dev_private;
-
 	PMD_INIT_FUNC_TRACE();
 
 	vmxnet3_dev_stop(dev);
-	hw->adapter_stopped = 1;
+	vmxnet3_free_queues(dev);
 }
 
 static void