[dpdk-dev,v3] net/mlx5: fix link status is always inconsist
Checks
Commit Message
Query the link status can end up in an inconsist state where the port is
down but it is reporting speed. For that another query is scheduled for a
later time.
A race condition is possible between the scheduled call and other link
status interrupt handlers. When the scheduled query by-pass those handlers,
the link status will be stuck in an in-consist state.
This patch addresses the race condition by not blocking link status
queries in case delayed query is on the flight.
Fixes: 198a3c339a8f ("mlx5: handle link status interrupts")
CC: stable@dpdk.org
Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
---
on v3:
* allow only single alarm on the flight.
on v2:
* change the commit log to better explain the race
* instead of blocking other interrupt handlers allow everyone to query the link status
---
drivers/net/mlx5/mlx5_ethdev.c | 26 +++++++++++---------------
1 file changed, 11 insertions(+), 15 deletions(-)
Comments
On Tue, Jan 31, 2017 at 03:13:38PM +0200, Shahaf Shuler wrote:
> Query the link status can end up in an inconsist state where the port is
> down but it is reporting speed. For that another query is scheduled for a
> later time.
> A race condition is possible between the scheduled call and other link
> status interrupt handlers. When the scheduled query by-pass those handlers,
> the link status will be stuck in an in-consist state.
> This patch addresses the race condition by not blocking link status
> queries in case delayed query is on the flight.
>
> Fixes: 198a3c339a8f ("mlx5: handle link status interrupts")
> CC: stable@dpdk.org
>
> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> ---
> on v3:
> * allow only single alarm on the flight.
>
> on v2:
> * change the commit log to better explain the race
> * instead of blocking other interrupt handlers allow everyone to query the link status
> ---
> drivers/net/mlx5/mlx5_ethdev.c | 26 +++++++++++---------------
> 1 file changed, 11 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/net/mlx5/mlx5_ethdev.c b/drivers/net/mlx5/mlx5_ethdev.c
> index e77238f..fcbe273 100644
> --- a/drivers/net/mlx5/mlx5_ethdev.c
> +++ b/drivers/net/mlx5/mlx5_ethdev.c
> @@ -1128,7 +1128,7 @@ struct priv *
> priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
> {
> struct ibv_async_event event;
> - int port_change = 0;
> + struct rte_eth_link *link = &dev->data->dev_link;
> int ret = 0;
>
> /* Read all message and acknowledge them. */
> @@ -1136,29 +1136,24 @@ struct priv *
> if (ibv_get_async_event(priv->ctx, &event))
> break;
>
> - if (event.event_type == IBV_EVENT_PORT_ACTIVE ||
> - event.event_type == IBV_EVENT_PORT_ERR)
> - port_change = 1;
> - else
> + if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
> + event.event_type != IBV_EVENT_PORT_ERR)
> DEBUG("event type %d on port %d not handled",
> event.event_type, event.element.port_num);
> ibv_ack_async_event(&event);
> }
> -
> - if (port_change ^ priv->pending_alarm) {
> - struct rte_eth_link *link = &dev->data->dev_link;
> -
> - priv->pending_alarm = 0;
> - mlx5_link_update(dev, 0);
> - if (((link->link_speed == 0) && link->link_status) ||
> - ((link->link_speed != 0) && !link->link_status)) {
> + mlx5_link_update(dev, 0);
> + if (((link->link_speed == 0) && link->link_status) ||
> + ((link->link_speed != 0) && !link->link_status)) {
> + if (!priv->pending_alarm) {
> /* Inconsistent status, check again later. */
> priv->pending_alarm = 1;
> rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
> mlx5_dev_link_status_handler,
> dev);
> - } else
> - ret = 1;
> + }
> + } else {
> + ret = 1;
> }
> return ret;
> }
> @@ -1178,6 +1173,7 @@ struct priv *
>
> priv_lock(priv);
> assert(priv->pending_alarm == 1);
> + priv->pending_alarm = 0;
> ret = priv_dev_link_status_handler(priv, dev);
> priv_unlock(priv);
> if (ret)
> --
> 1.8.3.1
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
On 1/31/2017 3:40 PM, Nélio Laranjeiro wrote:
> On Tue, Jan 31, 2017 at 03:13:38PM +0200, Shahaf Shuler wrote:
>> Query the link status can end up in an inconsist state where the port is
>> down but it is reporting speed. For that another query is scheduled for a
>> later time.
>> A race condition is possible between the scheduled call and other link
>> status interrupt handlers. When the scheduled query by-pass those handlers,
>> the link status will be stuck in an in-consist state.
>> This patch addresses the race condition by not blocking link status
>> queries in case delayed query is on the flight.
>>
>> Fixes: 198a3c339a8f ("mlx5: handle link status interrupts")
>> CC: stable@dpdk.org
>>
>> Signed-off-by: Shahaf Shuler <shahafs@mellanox.com>
> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
Applied to dpdk-next-net/master, thanks.
@@ -1128,7 +1128,7 @@ struct priv *
priv_dev_link_status_handler(struct priv *priv, struct rte_eth_dev *dev)
{
struct ibv_async_event event;
- int port_change = 0;
+ struct rte_eth_link *link = &dev->data->dev_link;
int ret = 0;
/* Read all message and acknowledge them. */
@@ -1136,29 +1136,24 @@ struct priv *
if (ibv_get_async_event(priv->ctx, &event))
break;
- if (event.event_type == IBV_EVENT_PORT_ACTIVE ||
- event.event_type == IBV_EVENT_PORT_ERR)
- port_change = 1;
- else
+ if (event.event_type != IBV_EVENT_PORT_ACTIVE &&
+ event.event_type != IBV_EVENT_PORT_ERR)
DEBUG("event type %d on port %d not handled",
event.event_type, event.element.port_num);
ibv_ack_async_event(&event);
}
-
- if (port_change ^ priv->pending_alarm) {
- struct rte_eth_link *link = &dev->data->dev_link;
-
- priv->pending_alarm = 0;
- mlx5_link_update(dev, 0);
- if (((link->link_speed == 0) && link->link_status) ||
- ((link->link_speed != 0) && !link->link_status)) {
+ mlx5_link_update(dev, 0);
+ if (((link->link_speed == 0) && link->link_status) ||
+ ((link->link_speed != 0) && !link->link_status)) {
+ if (!priv->pending_alarm) {
/* Inconsistent status, check again later. */
priv->pending_alarm = 1;
rte_eal_alarm_set(MLX5_ALARM_TIMEOUT_US,
mlx5_dev_link_status_handler,
dev);
- } else
- ret = 1;
+ }
+ } else {
+ ret = 1;
}
return ret;
}
@@ -1178,6 +1173,7 @@ struct priv *
priv_lock(priv);
assert(priv->pending_alarm == 1);
+ priv->pending_alarm = 0;
ret = priv_dev_link_status_handler(priv, dev);
priv_unlock(priv);
if (ret)