[v3,4/4] vhost: fix async register/unregister deadlock
Checks
Commit Message
When async register/unregister function is invoked in certain vhost
event callbacks (e.g. vring state change), deadlock may occur due to
recursive spinlock acquire. This patch removes unnecessary spinlock
from register API and use trylock() primitive in the unregister API
to avoid deadlock.
Fixes: 78639d54563a ("vhost: introduce async enqueue registration API")
Signed-off-by: Patrick Fu <patrick.fu@intel.com>
---
lib/librte_vhost/vhost.c | 13 +++++++------
lib/librte_vhost/vhost_user.c | 4 ++--
2 files changed, 9 insertions(+), 8 deletions(-)
Comments
On 9/29/20 11:29 AM, Patrick Fu wrote:
> When async register/unregister function is invoked in certain vhost
> event callbacks (e.g. vring state change), deadlock may occur due to
> recursive spinlock acquire. This patch removes unnecessary spinlock
> from register API and use trylock() primitive in the unregister API
> to avoid deadlock.
I am not convinced it is unnecessary in every cases.
> Fixes: 78639d54563a ("vhost: introduce async enqueue registration API")
>
> Signed-off-by: Patrick Fu <patrick.fu@intel.com>
> ---
> lib/librte_vhost/vhost.c | 13 +++++++------
> lib/librte_vhost/vhost_user.c | 4 ++--
> 2 files changed, 9 insertions(+), 8 deletions(-)
>
> diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> index 05b578c2f..ba504a00a 100644
> --- a/lib/librte_vhost/vhost.c
> +++ b/lib/librte_vhost/vhost.c
> @@ -1577,8 +1577,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
> ops->transfer_data == NULL))
> return -1;
>
> - rte_spinlock_lock(&vq->access_lock);
> -
What if a virtqueue is being destroyed while this function is called?
> if (unlikely(vq->async_registered)) {
> VHOST_LOG_CONFIG(ERR,
> "async register failed: channel already registered "
> @@ -1627,8 +1625,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
> vq->async_registered = true;
>
> reg_out:
> - rte_spinlock_unlock(&vq->access_lock);
> -
> return 0;
> }
>
> @@ -1647,10 +1643,15 @@ int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
> return ret;
>
> ret = 0;
> - rte_spinlock_lock(&vq->access_lock);
>
> if (!vq->async_registered)
> - goto out;
> + return ret;
> +
> + if (!rte_spinlock_trylock(&vq->access_lock)) {
> + VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
> + "virt queue busy.\n");
> + return -1;
> + }
>
> if (vq->async_pkts_inflight_n) {
> VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index 67d2a2d43..c8d74bde6 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -2043,9 +2043,9 @@ vhost_user_set_vring_enable(struct virtio_net **pdev,
> "set queue enable: %d to qp idx: %d\n",
> enable, index);
>
> - if (!enable && dev->virtqueue[index]->async_registered) {
> + if (enable && dev->virtqueue[index]->async_registered) {
> if (dev->virtqueue[index]->async_pkts_inflight_n) {
> - VHOST_LOG_CONFIG(ERR, "failed to disable vring. "
> + VHOST_LOG_CONFIG(ERR, "failed to enable vring. "
> "async inflight packets must be completed first\n");
> return RTE_VHOST_MSG_RESULT_ERR;
> }
>
> -----Original Message-----
> From: Maxime Coquelin <maxime.coquelin@redhat.com>
> Sent: Monday, October 5, 2020 10:26 PM
> To: Fu, Patrick <patrick.fu@intel.com>; dev@dpdk.org; Xia, Chenbo
> <chenbo.xia@intel.com>
> Cc: Wang, Zhihong <zhihong.wang@intel.com>; Jiang, Cheng1
> <cheng1.jiang@intel.com>
> Subject: Re: [PATCH v3 4/4] vhost: fix async register/unregister deadlock
>
>
>
> On 9/29/20 11:29 AM, Patrick Fu wrote:
> > When async register/unregister function is invoked in certain vhost
> > event callbacks (e.g. vring state change), deadlock may occur due to
> > recursive spinlock acquire. This patch removes unnecessary spinlock
> > from register API and use trylock() primitive in the unregister API
> > to avoid deadlock.
>
> I am not convinced it is unnecessary in every cases.
>
>
> > Fixes: 78639d54563a ("vhost: introduce async enqueue registration API")
> >
> > Signed-off-by: Patrick Fu <patrick.fu@intel.com>
> > ---
> > lib/librte_vhost/vhost.c | 13 +++++++------
> > lib/librte_vhost/vhost_user.c | 4 ++--
> > 2 files changed, 9 insertions(+), 8 deletions(-)
> >
> > diff --git a/lib/librte_vhost/vhost.c b/lib/librte_vhost/vhost.c
> > index 05b578c2f..ba504a00a 100644
> > --- a/lib/librte_vhost/vhost.c
> > +++ b/lib/librte_vhost/vhost.c
> > @@ -1577,8 +1577,6 @@ int rte_vhost_async_channel_register(int vid,
> uint16_t queue_id,
> > ops->transfer_data == NULL))
> > return -1;
> >
> > - rte_spinlock_lock(&vq->access_lock);
> > -
>
> What if a virtqueue is being destroyed while this function is called?
>
Yes, this is a scenario that access_lock may help. I will add spinlock back in v4.
Thanks,
Patrick
@@ -1577,8 +1577,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
ops->transfer_data == NULL))
return -1;
- rte_spinlock_lock(&vq->access_lock);
-
if (unlikely(vq->async_registered)) {
VHOST_LOG_CONFIG(ERR,
"async register failed: channel already registered "
@@ -1627,8 +1625,6 @@ int rte_vhost_async_channel_register(int vid, uint16_t queue_id,
vq->async_registered = true;
reg_out:
- rte_spinlock_unlock(&vq->access_lock);
-
return 0;
}
@@ -1647,10 +1643,15 @@ int rte_vhost_async_channel_unregister(int vid, uint16_t queue_id)
return ret;
ret = 0;
- rte_spinlock_lock(&vq->access_lock);
if (!vq->async_registered)
- goto out;
+ return ret;
+
+ if (!rte_spinlock_trylock(&vq->access_lock)) {
+ VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
+ "virt queue busy.\n");
+ return -1;
+ }
if (vq->async_pkts_inflight_n) {
VHOST_LOG_CONFIG(ERR, "Failed to unregister async channel. "
@@ -2043,9 +2043,9 @@ vhost_user_set_vring_enable(struct virtio_net **pdev,
"set queue enable: %d to qp idx: %d\n",
enable, index);
- if (!enable && dev->virtqueue[index]->async_registered) {
+ if (enable && dev->virtqueue[index]->async_registered) {
if (dev->virtqueue[index]->async_pkts_inflight_n) {
- VHOST_LOG_CONFIG(ERR, "failed to disable vring. "
+ VHOST_LOG_CONFIG(ERR, "failed to enable vring. "
"async inflight packets must be completed first\n");
return RTE_VHOST_MSG_RESULT_ERR;
}