[dpdk-dev,2/2] net/mlx5: fix link status to use wait to complete
Checks
Commit Message
Wait to complete is present to let the application get a correct status
when it requires it, it should not be ignored.
Fixes: cb8faed7dde8 ("mlx5: support link status update")
Cc: adrien.mazarguil@6wind.com
Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
---
drivers/net/mlx5/mlx5_ethdev.c | 35 +++++++++++++++++++++--------------
1 file changed, 21 insertions(+), 14 deletions(-)
Comments
On Thu, Feb 15, 2018 at 09:47:28AM +0100, Nelio Laranjeiro wrote:
> Wait to complete is present to let the application get a correct status
> when it requires it, it should not be ignored.
>
> Fixes: cb8faed7dde8 ("mlx5: support link status update")
> Cc: adrien.mazarguil@6wind.com
>
> Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Several comments, please see below.
> ---
> drivers/net/mlx5/mlx5_ethdev.c | 35 +++++++++++++++++++++--------------
> 1 file changed, 21 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index 0ce9f438a..112cc2a40 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -502,11 +502,12 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
> *
> * @param dev
> * Pointer to Ethernet device structure.
> - * @param wait_to_complete
> - * Wait for request completion (ignored).
> + *
> + * @return
> + * 0 on success, -1 otherwise.
See below regarding the return value.
> */
> static int
> -mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> +mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
> {
> struct priv *priv = dev->data->dev_private;
> struct ethtool_cmd edata = {
> @@ -518,7 +519,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
>
> /* priv_lock() is not taken to allow concurrent calls. */
>
> - (void)wait_to_complete;
> if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
> WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
> return -1;
> @@ -568,11 +568,9 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> *
> * @param dev
> * Pointer to Ethernet device structure.
> - * @param wait_to_complete
> - * Wait for request completion (ignored).
You should use this opportunity to document its return value as well.
> */
> static int
> -mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
> +mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
> {
> struct priv *priv = dev->data->dev_private;
> struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
> @@ -580,7 +578,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
> struct rte_eth_link dev_link;
> uint64_t sc;
>
> - (void)wait_to_complete;
> if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
> WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
> return -1;
> @@ -708,6 +705,9 @@ priv_link_stop(struct priv *priv)
> * Pointer to private structure.
> * @param wait_to_complete
> * Wait for request completion (ignored).
> + *
> + * @return
> + * 0 on success, negative value otherwise.
How about "0 on success, negative errno value otherwise" according to
subsequent comments.
> */
> int
> priv_link_update(struct priv *priv, int wait_to_complete)
> @@ -720,10 +720,16 @@ priv_link_update(struct priv *priv, int wait_to_complete)
> current_version >= KERNEL_VERSION(4, 9, 0) :
> 0;
>
> - if (use_new_api)
> - ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
> - else
> - ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
> + do {
> + if (use_new_api)
> + ret = mlx5_link_update_unlocked_gset(dev);
> + else
> + ret = mlx5_link_update_unlocked_gs(dev);
> + if (ret == -1)
> + return -EAGAIN;
Like for system calls, EAGAIN isn't an acceptable error in case
wait_to_complete (blocking mode) is requested. It must be some other reason.
This check should be replaced with my next suggestion.
> + } while(wait_to_complete && !ret);
Missing space after "while".
One issue is when wait_to_complete is enabled and link status never settles
down due to bad cabling or buggy SFP. I think this function should give up
and return an error after a while (not -EAGAIN in this case but -EIO, -EBUSY
or even -EINTR to reflect the call had to be interrupted due to HW trouble).
You could use MLX5_MAX_LINK_QUERY_ATTEMPTS for that, e.g.:
int attempt = MLX5_MAX_LINK_QUERY_ATTEMPTS;
[...]
while (1) {
[...]
if (!wait_to_complete || ret != -EAGAIN || !attempt--)
break;
sleep(1);
}
> + if (ret == -EAGAIN)
> + return ret;
Since neither function may return anything other than -1 in case of error at
the moment and since their wait_to_complete argument is being removed, I
suggest to make them properly non-blocking by default (i.e. O_NONBLOCK on
their ioctl() FD), then return -errno in case of error on intermediate
system calls, then the above check will make sense.
> /* If lsc interrupt is disabled, should always be ready for traffic. */
> if (!dev->data->dev_conf.intr_conf.lsc) {
> priv_link_start(priv);
> @@ -755,10 +761,11 @@ int
> priv_force_link_status_change(struct priv *priv, int status)
> {
> int try = 0;
> + int ret = 0;
>
> while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
> - priv_link_update(priv, 0);
> - if (priv->dev->data->dev_link.link_status == status)
> + ret = priv_link_update(priv, 0);
> + if (!ret && priv->dev->data->dev_link.link_status == status)
> return 0;
> try++;
> sleep(1);
I think this function could be removed in the same patch (with e313ef4c2fe8
"net/mlx5: fix link state on device start" partially reverted) since a call
to priv_link_update(priv, 1) would now result in the same behavior.
On Fri, Feb 16, 2018 at 11:49:00AM +0100, Adrien Mazarguil wrote:
> On Thu, Feb 15, 2018 at 09:47:28AM +0100, Nelio Laranjeiro wrote:
> > Wait to complete is present to let the application get a correct status
> > when it requires it, it should not be ignored.
> >
> > Fixes: cb8faed7dde8 ("mlx5: support link status update")
> > Cc: adrien.mazarguil@6wind.com
> >
> > Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
>
> Several comments, please see below.
>
> > ---
> > drivers/net/mlx5/mlx5_ethdev.c | 35 +++++++++++++++++++++--------------
> > 1 file changed, 21 insertions(+), 14 deletions(-)
> >
> > diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> > index 0ce9f438a..112cc2a40 100644
> > --- a/drivers/net/mlx5/mlx5_ethdev.c
> > +++ b/drivers/net/mlx5/mlx5_ethdev.c
> > @@ -502,11 +502,12 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
> > *
> > * @param dev
> > * Pointer to Ethernet device structure.
> > - * @param wait_to_complete
> > - * Wait for request completion (ignored).
> > + *
> > + * @return
> > + * 0 on success, -1 otherwise.
>
> See below regarding the return value.
>
> > */
> > static int
> > -mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> > +mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
> > {
> > struct priv *priv = dev->data->dev_private;
> > struct ethtool_cmd edata = {
> > @@ -518,7 +519,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> >
> > /* priv_lock() is not taken to allow concurrent calls. */
> >
> > - (void)wait_to_complete;
> > if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
> > WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
> > return -1;
> > @@ -568,11 +568,9 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
> > *
> > * @param dev
> > * Pointer to Ethernet device structure.
> > - * @param wait_to_complete
> > - * Wait for request completion (ignored).
>
> You should use this opportunity to document its return value as well.
>
> > */
> > static int
> > -mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
> > +mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
> > {
> > struct priv *priv = dev->data->dev_private;
> > struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
> > @@ -580,7 +578,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
> > struct rte_eth_link dev_link;
> > uint64_t sc;
> >
> > - (void)wait_to_complete;
> > if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
> > WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
> > return -1;
> > @@ -708,6 +705,9 @@ priv_link_stop(struct priv *priv)
> > * Pointer to private structure.
> > * @param wait_to_complete
> > * Wait for request completion (ignored).
> > + *
> > + * @return
> > + * 0 on success, negative value otherwise.
>
> How about "0 on success, negative errno value otherwise" according to
> subsequent comments.
>
> > */
> > int
> > priv_link_update(struct priv *priv, int wait_to_complete)
> > @@ -720,10 +720,16 @@ priv_link_update(struct priv *priv, int wait_to_complete)
> > current_version >= KERNEL_VERSION(4, 9, 0) :
> > 0;
> >
> > - if (use_new_api)
> > - ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
> > - else
> > - ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
> > + do {
> > + if (use_new_api)
> > + ret = mlx5_link_update_unlocked_gset(dev);
> > + else
> > + ret = mlx5_link_update_unlocked_gs(dev);
> > + if (ret == -1)
> > + return -EAGAIN;
>
> Like for system calls, EAGAIN isn't an acceptable error in case
> wait_to_complete (blocking mode) is requested. It must be some other reason.
>
> This check should be replaced with my next suggestion.
>
> > + } while(wait_to_complete && !ret);
>
> Missing space after "while".
>
> One issue is when wait_to_complete is enabled and link status never settles
> down due to bad cabling or buggy SFP. I think this function should give up
> and return an error after a while (not -EAGAIN in this case but -EIO, -EBUSY
> or even -EINTR to reflect the call had to be interrupted due to HW trouble).
>
> You could use MLX5_MAX_LINK_QUERY_ATTEMPTS for that, e.g.:
>
> int attempt = MLX5_MAX_LINK_QUERY_ATTEMPTS;
> [...]
> while (1) {
> [...]
> if (!wait_to_complete || ret != -EAGAIN || !attempt--)
> break;
> sleep(1);
> }
>
> > + if (ret == -EAGAIN)
> > + return ret;
>
> Since neither function may return anything other than -1 in case of error at
> the moment and since their wait_to_complete argument is being removed, I
> suggest to make them properly non-blocking by default (i.e. O_NONBLOCK on
> their ioctl() FD), then return -errno in case of error on intermediate
> system calls, then the above check will make sense.
>
> > /* If lsc interrupt is disabled, should always be ready for traffic. */
> > if (!dev->data->dev_conf.intr_conf.lsc) {
> > priv_link_start(priv);
> > @@ -755,10 +761,11 @@ int
> > priv_force_link_status_change(struct priv *priv, int status)
> > {
> > int try = 0;
> > + int ret = 0;
> >
> > while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
> > - priv_link_update(priv, 0);
> > - if (priv->dev->data->dev_link.link_status == status)
> > + ret = priv_link_update(priv, 0);
> > + if (!ret && priv->dev->data->dev_link.link_status == status)
> > return 0;
> > try++;
> > sleep(1);
>
> I think this function could be removed in the same patch (with e313ef4c2fe8
> "net/mlx5: fix link state on device start" partially reverted) since a call
> to priv_link_update(priv, 1) would now result in the same behavior.
Agreed, I'll re-work it in a v2.
Thanks,
@@ -502,11 +502,12 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
*
* @param dev
* Pointer to Ethernet device structure.
- * @param wait_to_complete
- * Wait for request completion (ignored).
+ *
+ * @return
+ * 0 on success, -1 otherwise.
*/
static int
-mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
+mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
struct ethtool_cmd edata = {
@@ -518,7 +519,6 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
/* priv_lock() is not taken to allow concurrent calls. */
- (void)wait_to_complete;
if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
return -1;
@@ -568,11 +568,9 @@ mlx5_link_update_unlocked_gset(struct rte_eth_dev *dev, int wait_to_complete)
*
* @param dev
* Pointer to Ethernet device structure.
- * @param wait_to_complete
- * Wait for request completion (ignored).
*/
static int
-mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
+mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev)
{
struct priv *priv = dev->data->dev_private;
struct ethtool_link_settings gcmd = { .cmd = ETHTOOL_GLINKSETTINGS };
@@ -580,7 +578,6 @@ mlx5_link_update_unlocked_gs(struct rte_eth_dev *dev, int wait_to_complete)
struct rte_eth_link dev_link;
uint64_t sc;
- (void)wait_to_complete;
if (priv_ifreq(priv, SIOCGIFFLAGS, &ifr)) {
WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(errno));
return -1;
@@ -708,6 +705,9 @@ priv_link_stop(struct priv *priv)
* Pointer to private structure.
* @param wait_to_complete
* Wait for request completion (ignored).
+ *
+ * @return
+ * 0 on success, negative value otherwise.
*/
int
priv_link_update(struct priv *priv, int wait_to_complete)
@@ -720,10 +720,16 @@ priv_link_update(struct priv *priv, int wait_to_complete)
current_version >= KERNEL_VERSION(4, 9, 0) :
0;
- if (use_new_api)
- ret = mlx5_link_update_unlocked_gset(dev, wait_to_complete);
- else
- ret = mlx5_link_update_unlocked_gs(dev, wait_to_complete);
+ do {
+ if (use_new_api)
+ ret = mlx5_link_update_unlocked_gset(dev);
+ else
+ ret = mlx5_link_update_unlocked_gs(dev);
+ if (ret == -1)
+ return -EAGAIN;
+ } while(wait_to_complete && !ret);
+ if (ret == -EAGAIN)
+ return ret;
/* If lsc interrupt is disabled, should always be ready for traffic. */
if (!dev->data->dev_conf.intr_conf.lsc) {
priv_link_start(priv);
@@ -755,10 +761,11 @@ int
priv_force_link_status_change(struct priv *priv, int status)
{
int try = 0;
+ int ret = 0;
while (try < MLX5_MAX_LINK_QUERY_ATTEMPTS) {
- priv_link_update(priv, 0);
- if (priv->dev->data->dev_link.link_status == status)
+ ret = priv_link_update(priv, 0);
+ if (!ret && priv->dev->data->dev_link.link_status == status)
return 0;
try++;
sleep(1);