[v2] net/vhost: fix deadlock on vring state change

Message ID 20220627055125.1541652-1-yuanx.wang@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers
Series [v2] net/vhost: fix deadlock on vring state change |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/github-robot: build success github build: passed
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Wang, YuanX June 27, 2022, 5:51 a.m. UTC
  If vring state changes after pmd starts working, the locked vring
notifies pmd, thus calling update_queuing_status(), the latter
will wait for pmd to finish accessing vring, while pmd is also
waiting for vring to be unlocked, thus causing deadlock.

Actually, update_queuing_status() only needs to wait while
destroy/stopping the device, but not in other cases.

This patch adds a flag for whether or not to wait to fix this issue.

Fixes: 1ce3c7fe149f ("net/vhost: emulate device start/stop behavior")
Cc: stable@dpdk.org

Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
---
V2: rewrite the commit log.
---
 drivers/net/vhost/rte_eth_vhost.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)
  

Comments

Chenbo Xia July 1, 2022, 12:31 p.m. UTC | #1
> -----Original Message-----
> From: Wang, YuanX <yuanx.wang@intel.com>
> Sent: Monday, June 27, 2022 1:51 PM
> To: maxime.coquelin@redhat.com; Xia, Chenbo <chenbo.xia@intel.com>;
> dev@dpdk.org
> Cc: Hu, Jiayu <jiayu.hu@intel.com>; He, Xingguang <xingguang.he@intel.com>;
> Jiang, Cheng1 <cheng1.jiang@intel.com>; Ling, WeiX <weix.ling@intel.com>;
> Wang, YuanX <yuanx.wang@intel.com>; stable@dpdk.org
> Subject: [PATCH v2] net/vhost: fix deadlock on vring state change
> 
> If vring state changes after pmd starts working, the locked vring
> notifies pmd, thus calling update_queuing_status(), the latter
> will wait for pmd to finish accessing vring, while pmd is also
> waiting for vring to be unlocked, thus causing deadlock.
> 
> Actually, update_queuing_status() only needs to wait while
> destroy/stopping the device, but not in other cases.
> 
> This patch adds a flag for whether or not to wait to fix this issue.
> 
> Fixes: 1ce3c7fe149f ("net/vhost: emulate device start/stop behavior")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> ---
> V2: rewrite the commit log.
> ---
>  drivers/net/vhost/rte_eth_vhost.c | 16 ++++++++--------
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/vhost/rte_eth_vhost.c
> b/drivers/net/vhost/rte_eth_vhost.c
> index d75d256040..7e512d94bf 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -741,7 +741,7 @@ eth_vhost_install_intr(struct rte_eth_dev *dev)
>  }
> 
>  static void
> -update_queuing_status(struct rte_eth_dev *dev)
> +update_queuing_status(struct rte_eth_dev *dev, bool wait_queuing)
>  {
>  	struct pmd_internal *internal = dev->data->dev_private;
>  	struct vhost_queue *vq;
> @@ -767,7 +767,7 @@ update_queuing_status(struct rte_eth_dev *dev)
>  			rte_atomic32_set(&vq->allow_queuing, 1);
>  		else
>  			rte_atomic32_set(&vq->allow_queuing, 0);
> -		while (rte_atomic32_read(&vq->while_queuing))
> +		while (wait_queuing && rte_atomic32_read(&vq->while_queuing))
>  			rte_pause();
>  	}
> 
> @@ -779,7 +779,7 @@ update_queuing_status(struct rte_eth_dev *dev)
>  			rte_atomic32_set(&vq->allow_queuing, 1);
>  		else
>  			rte_atomic32_set(&vq->allow_queuing, 0);
> -		while (rte_atomic32_read(&vq->while_queuing))
> +		while (wait_queuing && rte_atomic32_read(&vq->while_queuing))
>  			rte_pause();
>  	}
>  }
> @@ -868,7 +868,7 @@ new_device(int vid)
>  	vhost_dev_csum_configure(eth_dev);
> 
>  	rte_atomic32_set(&internal->dev_attached, 1);
> -	update_queuing_status(eth_dev);
> +	update_queuing_status(eth_dev, false);
> 
>  	VHOST_LOG(INFO, "Vhost device %d created\n", vid);
> 
> @@ -898,7 +898,7 @@ destroy_device(int vid)
>  	internal = eth_dev->data->dev_private;
> 
>  	rte_atomic32_set(&internal->dev_attached, 0);
> -	update_queuing_status(eth_dev);
> +	update_queuing_status(eth_dev, true);
> 
>  	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
> 
> @@ -1008,7 +1008,7 @@ vring_state_changed(int vid, uint16_t vring, int
> enable)
>  	state->max_vring = RTE_MAX(vring, state->max_vring);
>  	rte_spinlock_unlock(&state->lock);
> 
> -	update_queuing_status(eth_dev);
> +	update_queuing_status(eth_dev, false);
> 
>  	VHOST_LOG(INFO, "vring%u is %s\n",
>  			vring, enable ? "enabled" : "disabled");
> @@ -1197,7 +1197,7 @@ eth_dev_start(struct rte_eth_dev *eth_dev)
>  	}
> 
>  	rte_atomic32_set(&internal->started, 1);
> -	update_queuing_status(eth_dev);
> +	update_queuing_status(eth_dev, false);
> 
>  	return 0;
>  }
> @@ -1209,7 +1209,7 @@ eth_dev_stop(struct rte_eth_dev *dev)
> 
>  	dev->data->dev_started = 0;
>  	rte_atomic32_set(&internal->started, 0);
> -	update_queuing_status(dev);
> +	update_queuing_status(dev, true);
> 
>  	return 0;
>  }
> --
> 2.25.1

Reviewed-by: Chenbo Xia <chenbo.xia@intel.com>
  
Maxime Coquelin July 1, 2022, 1:58 p.m. UTC | #2
On 6/27/22 07:51, Yuan Wang wrote:
> If vring state changes after pmd starts working, the locked vring
> notifies pmd, thus calling update_queuing_status(), the latter
> will wait for pmd to finish accessing vring, while pmd is also
> waiting for vring to be unlocked, thus causing deadlock.
> 
> Actually, update_queuing_status() only needs to wait while
> destroy/stopping the device, but not in other cases.
> 
> This patch adds a flag for whether or not to wait to fix this issue.
> 
> Fixes: 1ce3c7fe149f ("net/vhost: emulate device start/stop behavior")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Yuan Wang <yuanx.wang@intel.com>
> ---
> V2: rewrite the commit log.
> ---
>   drivers/net/vhost/rte_eth_vhost.c | 16 ++++++++--------
>   1 file changed, 8 insertions(+), 8 deletions(-)
> 


Applied to dpdk-next-virtio/main.

Thanks,
Maxime
  

Patch

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index d75d256040..7e512d94bf 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -741,7 +741,7 @@  eth_vhost_install_intr(struct rte_eth_dev *dev)
 }
 
 static void
-update_queuing_status(struct rte_eth_dev *dev)
+update_queuing_status(struct rte_eth_dev *dev, bool wait_queuing)
 {
 	struct pmd_internal *internal = dev->data->dev_private;
 	struct vhost_queue *vq;
@@ -767,7 +767,7 @@  update_queuing_status(struct rte_eth_dev *dev)
 			rte_atomic32_set(&vq->allow_queuing, 1);
 		else
 			rte_atomic32_set(&vq->allow_queuing, 0);
-		while (rte_atomic32_read(&vq->while_queuing))
+		while (wait_queuing && rte_atomic32_read(&vq->while_queuing))
 			rte_pause();
 	}
 
@@ -779,7 +779,7 @@  update_queuing_status(struct rte_eth_dev *dev)
 			rte_atomic32_set(&vq->allow_queuing, 1);
 		else
 			rte_atomic32_set(&vq->allow_queuing, 0);
-		while (rte_atomic32_read(&vq->while_queuing))
+		while (wait_queuing && rte_atomic32_read(&vq->while_queuing))
 			rte_pause();
 	}
 }
@@ -868,7 +868,7 @@  new_device(int vid)
 	vhost_dev_csum_configure(eth_dev);
 
 	rte_atomic32_set(&internal->dev_attached, 1);
-	update_queuing_status(eth_dev);
+	update_queuing_status(eth_dev, false);
 
 	VHOST_LOG(INFO, "Vhost device %d created\n", vid);
 
@@ -898,7 +898,7 @@  destroy_device(int vid)
 	internal = eth_dev->data->dev_private;
 
 	rte_atomic32_set(&internal->dev_attached, 0);
-	update_queuing_status(eth_dev);
+	update_queuing_status(eth_dev, true);
 
 	eth_dev->data->dev_link.link_status = RTE_ETH_LINK_DOWN;
 
@@ -1008,7 +1008,7 @@  vring_state_changed(int vid, uint16_t vring, int enable)
 	state->max_vring = RTE_MAX(vring, state->max_vring);
 	rte_spinlock_unlock(&state->lock);
 
-	update_queuing_status(eth_dev);
+	update_queuing_status(eth_dev, false);
 
 	VHOST_LOG(INFO, "vring%u is %s\n",
 			vring, enable ? "enabled" : "disabled");
@@ -1197,7 +1197,7 @@  eth_dev_start(struct rte_eth_dev *eth_dev)
 	}
 
 	rte_atomic32_set(&internal->started, 1);
-	update_queuing_status(eth_dev);
+	update_queuing_status(eth_dev, false);
 
 	return 0;
 }
@@ -1209,7 +1209,7 @@  eth_dev_stop(struct rte_eth_dev *dev)
 
 	dev->data->dev_started = 0;
 	rte_atomic32_set(&internal->started, 0);
-	update_queuing_status(dev);
+	update_queuing_status(dev, true);
 
 	return 0;
 }