[dpdk-dev] net/vhost: fix segfault when creating vdev dynamically

Message ID 1522166726-42025-1-git-send-email-junjie.j.chen@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

junjie.j.chen@intel.com March 27, 2018, 4:05 p.m. UTC
  when creating vdev dynamically, vhost pmd driver start directly without
checking TX/RX queues ready or not, and thus cause segmentation fault when
vhost library accessing queues. This patch add flag to check whether queues
setup or not, and add driver start call into dev_start to allow user start
it after setting up queue.

Fixes: aed0b12930b33("net/vhost: fix socket file deleted on stop")
Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
---
 drivers/net/vhost/rte_eth_vhost.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)
  

Comments

Jianfeng Tan March 27, 2018, 8:56 a.m. UTC | #1
On 3/28/2018 12:05 AM, Junjie Chen wrote:
> when creating vdev dynamically, vhost pmd driver start directly without
> checking TX/RX queues ready or not, and thus cause segmentation fault when
> vhost library accessing queues. This patch add flag to check whether queues
> setup or not, and add driver start call into dev_start to allow user start
> it after setting up queue.

The issue is clear now. But this patch just puts the situation before 
below fix: "it doesn't create the actual datagram socket until you call 
.dev_start()."

To fix this issue, we might delay the queue setup until we really have 
queues?

> Fixes: aed0b12930b33("net/vhost: fix socket file deleted on stop")

Missed the space between commit number and title. And we keep 12 
characters for the commit id.

Thanks,
Jianfeng

> Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> ---
>   drivers/net/vhost/rte_eth_vhost.c | 21 +++++++++++++++++++--
>   1 file changed, 19 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
> index 3aae01c..719a150 100644
> --- a/drivers/net/vhost/rte_eth_vhost.c
> +++ b/drivers/net/vhost/rte_eth_vhost.c
> @@ -118,6 +118,7 @@ struct pmd_internal {
>   	char *iface_name;
>   	uint16_t max_queues;
>   	rte_atomic32_t started;
> +	rte_atomic32_t once;
>   };
>   
>   struct internal_list {
> @@ -772,12 +773,24 @@ rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
>   static int
>   eth_dev_start(struct rte_eth_dev *dev)
>   {
> +	int ret = 0;
>   	struct pmd_internal *internal = dev->data->dev_private;
>   
> +	if (unlikely(rte_atomic32_read(&internal->once) == 0)) {
> +		ret = rte_vhost_driver_start(internal->iface_name);
> +		if (ret < 0) {
> +			RTE_LOG(ERR, PMD, "Failed to start driver for %s\n",
> +				internal->iface_name);
> +			return ret;
> +		}
> +
> +		rte_atomic32_set(&internal->once, 1);
> +	}
> +
>   	rte_atomic32_set(&internal->started, 1);
>   	update_queuing_status(dev);
>   
> -	return 0;
> +	return ret;
>   }
>   
>   static void
> @@ -1101,7 +1114,11 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
>   		goto error;
>   	}
>   
> -	if (rte_vhost_driver_start(iface_name) < 0) {
> +	if (!data->rx_queues || !data->tx_queues) {
> +		RTE_LOG(INFO, PMD,
> +			"TX/RX queue is not ready, driver will not start\n");
> +		rte_atomic32_set(&internal->once, 0);
> +	} else if (rte_vhost_driver_start(iface_name) < 0) {
>   		RTE_LOG(ERR, PMD, "Failed to start driver for %s\n",
>   			iface_name);
>   		goto error;
  
junjie.j.chen@intel.com March 27, 2018, 9:02 a.m. UTC | #2
Hi Jianfeng

> On 3/28/2018 12:05 AM, Junjie Chen wrote:
> > when creating vdev dynamically, vhost pmd driver start directly
> > without checking TX/RX queues ready or not, and thus cause
> > segmentation fault when vhost library accessing queues. This patch add
> > flag to check whether queues setup or not, and add driver start call
> > into dev_start to allow user start it after setting up queue.
> 
> The issue is clear now. But this patch just puts the situation before below fix:
> "it doesn't create the actual datagram socket until you call .dev_start()."

No, if the queue exist, the datagram socket still get created in vhost_create API, since the vhost_driver_register still exist in vhost_create.

> 
> To fix this issue, we might delay the queue setup until we really have queues?
> 
> > Fixes: aed0b12930b33("net/vhost: fix socket file deleted on stop")
> 
> Missed the space between commit number and title. And we keep 12
> characters for the commit id.

Thanks for reminder, will update in v2.
> 
> Thanks,
> Jianfeng
> 
> > Signed-off-by: Junjie Chen <junjie.j.chen@intel.com>
> > ---
> >   drivers/net/vhost/rte_eth_vhost.c | 21 +++++++++++++++++++--
> >   1 file changed, 19 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/net/vhost/rte_eth_vhost.c
> > b/drivers/net/vhost/rte_eth_vhost.c
> > index 3aae01c..719a150 100644
> > --- a/drivers/net/vhost/rte_eth_vhost.c
> > +++ b/drivers/net/vhost/rte_eth_vhost.c
> > @@ -118,6 +118,7 @@ struct pmd_internal {
> >   	char *iface_name;
> >   	uint16_t max_queues;
> >   	rte_atomic32_t started;
> > +	rte_atomic32_t once;
> >   };
> >
> >   struct internal_list {
> > @@ -772,12 +773,24 @@ rte_eth_vhost_get_vid_from_port_id(uint16_t
> port_id)
> >   static int
> >   eth_dev_start(struct rte_eth_dev *dev)
> >   {
> > +	int ret = 0;
> >   	struct pmd_internal *internal = dev->data->dev_private;
> >
> > +	if (unlikely(rte_atomic32_read(&internal->once) == 0)) {
> > +		ret = rte_vhost_driver_start(internal->iface_name);
> > +		if (ret < 0) {
> > +			RTE_LOG(ERR, PMD, "Failed to start driver for %s\n",
> > +				internal->iface_name);
> > +			return ret;
> > +		}
> > +
> > +		rte_atomic32_set(&internal->once, 1);
> > +	}
> > +
> >   	rte_atomic32_set(&internal->started, 1);
> >   	update_queuing_status(dev);
> >
> > -	return 0;
> > +	return ret;
> >   }
> >
> >   static void
> > @@ -1101,7 +1114,11 @@ eth_dev_vhost_create(struct rte_vdev_device
> *dev, char *iface_name,
> >   		goto error;
> >   	}
> >
> > -	if (rte_vhost_driver_start(iface_name) < 0) {
> > +	if (!data->rx_queues || !data->tx_queues) {
> > +		RTE_LOG(INFO, PMD,
> > +			"TX/RX queue is not ready, driver will not start\n");
> > +		rte_atomic32_set(&internal->once, 0);
> > +	} else if (rte_vhost_driver_start(iface_name) < 0) {
> >   		RTE_LOG(ERR, PMD, "Failed to start driver for %s\n",
> >   			iface_name);
> >   		goto error;
  
Jianfeng Tan March 27, 2018, 9:10 a.m. UTC | #3
On 3/27/2018 5:02 PM, Chen, Junjie J wrote:
> Hi Jianfeng
>
>> On 3/28/2018 12:05 AM, Junjie Chen wrote:
>>> when creating vdev dynamically, vhost pmd driver start directly
>>> without checking TX/RX queues ready or not, and thus cause
>>> segmentation fault when vhost library accessing queues. This patch add
>>> flag to check whether queues setup or not, and add driver start call
>>> into dev_start to allow user start it after setting up queue.
>> The issue is clear now. But this patch just puts the situation before below fix:
>> "it doesn't create the actual datagram socket until you call .dev_start()."
> No, if the queue exist, the datagram socket still get created in vhost_create API, since the vhost_driver_register still exist in vhost_create.

The queue can never be created, as it's still not probed.
  
junjie.j.chen@intel.com March 27, 2018, 9:24 a.m. UTC | #4
> >
> >> On 3/28/2018 12:05 AM, Junjie Chen wrote:
> >>> when creating vdev dynamically, vhost pmd driver start directly
> >>> without checking TX/RX queues ready or not, and thus cause
> >>> segmentation fault when vhost library accessing queues. This patch
> >>> add flag to check whether queues setup or not, and add driver start
> >>> call into dev_start to allow user start it after setting up queue.
> >> The issue is clear now. But this patch just puts the situation before below
> fix:
> >> "it doesn't create the actual datagram socket until you call .dev_start()."
> > No, if the queue exist, the datagram socket still get created in vhost_create
> API, since the vhost_driver_register still exist in vhost_create.
> 
> The queue can never be created, as it's still not probed.

I think we need to separate this into two cases:
	Statically create vdev, the datagram recreate logical is still there since queues are exist already, this patch doesn't change anything.
	Dynamic create vdev, as you pointed out, queue can never be created, while this should be not valid since In normal process of creating vdev dynamically, we always need to config queues. Correct me if I'm wrong.

In summary, I think the previously commit fixes the static code path and this patch fixes the dynamic code path (we need to at least setup queue once).
  
Jianfeng Tan March 27, 2018, 9:42 a.m. UTC | #5
On 3/27/2018 5:24 PM, Chen, Junjie J wrote:
>>>> On 3/28/2018 12:05 AM, Junjie Chen wrote:
>>>>> when creating vdev dynamically, vhost pmd driver start directly
>>>>> without checking TX/RX queues ready or not, and thus cause
>>>>> segmentation fault when vhost library accessing queues. This patch
>>>>> add flag to check whether queues setup or not, and add driver start
>>>>> call into dev_start to allow user start it after setting up queue.
>>>> The issue is clear now. But this patch just puts the situation before below
>> fix:
>>>> "it doesn't create the actual datagram socket until you call .dev_start()."
>>> No, if the queue exist, the datagram socket still get created in vhost_create
>> API, since the vhost_driver_register still exist in vhost_create.
>>
>> The queue can never be created, as it's still not probed.
> I think we need to separate this into two cases:
> 	Statically create vdev, the datagram recreate logical is still there since queues are exist already, this patch doesn't change anything.
> 	Dynamic create vdev, as you pointed out, queue can never be created, while this should be not valid since In normal process of creating vdev dynamically, we always need to config queues. Correct me if I'm wrong.

My point is, either vdev is created statically or dynamically, when 
probe(), queues are not setup yet definitely, then *the unix socket will 
not be created* until we set up the queues and do dev_start(). If the 
unix socket is not created, then VM cannot connect to it.

> In summary, I think the previously commit fixes the static code path and this patch fixes the dynamic code path (we need to at least setup queue once).
  
junjie.j.chen@intel.com March 27, 2018, 10:18 a.m. UTC | #6
> 
> On 3/27/2018 5:24 PM, Chen, Junjie J wrote:
> >>>> On 3/28/2018 12:05 AM, Junjie Chen wrote:
> >>>>> when creating vdev dynamically, vhost pmd driver start directly
> >>>>> without checking TX/RX queues ready or not, and thus cause
> >>>>> segmentation fault when vhost library accessing queues. This patch
> >>>>> add flag to check whether queues setup or not, and add driver
> >>>>> start call into dev_start to allow user start it after setting up queue.
> >>>> The issue is clear now. But this patch just puts the situation
> >>>> before below
> >> fix:
> >>>> "it doesn't create the actual datagram socket until you call .dev_start()."
> >>> No, if the queue exist, the datagram socket still get created in
> >>> vhost_create
> >> API, since the vhost_driver_register still exist in vhost_create.
> >>
> >> The queue can never be created, as it's still not probed.
> > I think we need to separate this into two cases:
> > 	Statically create vdev, the datagram recreate logical is still there since
> queues are exist already, this patch doesn't change anything.
> > 	Dynamic create vdev, as you pointed out, queue can never be created,
> while this should be not valid since In normal process of creating vdev
> dynamically, we always need to config queues. Correct me if I'm wrong.
> 
> My point is, either vdev is created statically or dynamically, when probe(),
> queues are not setup yet definitely, then *the unix socket will not be created*
> until we set up the queues and do dev_start(). If the unix socket is not created,
> then VM cannot connect to it.

Yes, I agree this. 
In this patch, it just check whether queue is setup or not and give user a chance to setup queue with dev_start, it doesn't revert the logical from previously commit.

So the logical is change to stop creating unix socket before queue setup, what do you think about this? 
> 
> > In summary, I think the previously commit fixes the static code path and this
> patch fixes the dynamic code path (we need to at least setup queue once).
  
Maxime Coquelin March 27, 2018, 11:28 a.m. UTC | #7
On 03/27/2018 11:42 AM, Tan, Jianfeng wrote:
> 
> 
> On 3/27/2018 5:24 PM, Chen, Junjie J wrote:
>>>>> On 3/28/2018 12:05 AM, Junjie Chen wrote:
>>>>>> when creating vdev dynamically, vhost pmd driver start directly
>>>>>> without checking TX/RX queues ready or not, and thus cause
>>>>>> segmentation fault when vhost library accessing queues. This patch
>>>>>> add flag to check whether queues setup or not, and add driver start
>>>>>> call into dev_start to allow user start it after setting up queue.
>>>>> The issue is clear now. But this patch just puts the situation 
>>>>> before below
>>> fix:
>>>>> "it doesn't create the actual datagram socket until you call 
>>>>> .dev_start()."
>>>> No, if the queue exist, the datagram socket still get created in 
>>>> vhost_create
>>> API, since the vhost_driver_register still exist in vhost_create.
>>>
>>> The queue can never be created, as it's still not probed.
>> I think we need to separate this into two cases:
>>     Statically create vdev, the datagram recreate logical is still 
>> there since queues are exist already, this patch doesn't change anything.
>>     Dynamic create vdev, as you pointed out, queue can never be 
>> created, while this should be not valid since In normal process of 
>> creating vdev dynamically, we always need to config queues. Correct me 
>> if I'm wrong.
> 
> My point is, either vdev is created statically or dynamically, when 
> probe(), queues are not setup yet definitely, then *the unix socket will 
> not be created* until we set up the queues and do dev_start(). If the 
> unix socket is not created, then VM cannot connect to it.

FYI, I think I reproduced such an issue with the vdev statically created
in the past, while doing some experiments. I didn't went further into 
the analysis at that time, but it looks like the issue Junjie is trying
to address with this patch for dynamically created vdev.

Cheers,
Maxime
>> In summary, I think the previously commit fixes the static code path 
>> and this patch fixes the dynamic code path (we need to at least setup 
>> queue once).
>
  
Jianfeng Tan March 27, 2018, 1:54 p.m. UTC | #8
On 3/27/2018 6:18 PM, Chen, Junjie J wrote:
>> On 3/27/2018 5:24 PM, Chen, Junjie J wrote:
>>>>>> On 3/28/2018 12:05 AM, Junjie Chen wrote:
>>>>>>> when creating vdev dynamically, vhost pmd driver start directly
>>>>>>> without checking TX/RX queues ready or not, and thus cause
>>>>>>> segmentation fault when vhost library accessing queues. This patch
>>>>>>> add flag to check whether queues setup or not, and add driver
>>>>>>> start call into dev_start to allow user start it after setting up queue.
>>>>>> The issue is clear now. But this patch just puts the situation
>>>>>> before below
>>>> fix:
>>>>>> "it doesn't create the actual datagram socket until you call .dev_start()."
>>>>> No, if the queue exist, the datagram socket still get created in
>>>>> vhost_create
>>>> API, since the vhost_driver_register still exist in vhost_create.
>>>>
>>>> The queue can never be created, as it's still not probed.
>>> I think we need to separate this into two cases:
>>> 	Statically create vdev, the datagram recreate logical is still there since
>> queues are exist already, this patch doesn't change anything.
>>> 	Dynamic create vdev, as you pointed out, queue can never be created,
>> while this should be not valid since In normal process of creating vdev
>> dynamically, we always need to config queues. Correct me if I'm wrong.
>>
>> My point is, either vdev is created statically or dynamically, when probe(),
>> queues are not setup yet definitely, then *the unix socket will not be created*
>> until we set up the queues and do dev_start(). If the unix socket is not created,
>> then VM cannot connect to it.
> Yes, I agree this.
> In this patch, it just check whether queue is setup or not and give user a chance to setup queue with dev_start, it doesn't revert the logical from previously commit.
>
> So the logical is change to stop creating unix socket before queue setup, what do you think about this?

As you said, we partially revert this back, of delaying the unix socket 
creation to queue setup, which can be observed by users.

So what I'm suggesting is: we still keep unix socket creation at probe. 
But in the new_device(), check if queues are setup or not: if yes, we 
just do the queue setting (vid, internal, port); if not, we will delay 
the queue setting until dev_start().

Thanks,
Jianfeng
  
Jianfeng Tan March 27, 2018, 2:01 p.m. UTC | #9
On 3/27/2018 7:28 PM, Maxime Coquelin wrote:
>
>
> On 03/27/2018 11:42 AM, Tan, Jianfeng wrote:
>>
>>
>> On 3/27/2018 5:24 PM, Chen, Junjie J wrote:
>>>>>> On 3/28/2018 12:05 AM, Junjie Chen wrote:
>>>>>>> when creating vdev dynamically, vhost pmd driver start directly
>>>>>>> without checking TX/RX queues ready or not, and thus cause
>>>>>>> segmentation fault when vhost library accessing queues. This patch
>>>>>>> add flag to check whether queues setup or not, and add driver start
>>>>>>> call into dev_start to allow user start it after setting up queue.
>>>>>> The issue is clear now. But this patch just puts the situation 
>>>>>> before below
>>>> fix:
>>>>>> "it doesn't create the actual datagram socket until you call 
>>>>>> .dev_start()."
>>>>> No, if the queue exist, the datagram socket still get created in 
>>>>> vhost_create
>>>> API, since the vhost_driver_register still exist in vhost_create.
>>>>
>>>> The queue can never be created, as it's still not probed.
>>> I think we need to separate this into two cases:
>>>     Statically create vdev, the datagram recreate logical is still 
>>> there since queues are exist already, this patch doesn't change 
>>> anything.
>>>     Dynamic create vdev, as you pointed out, queue can never be 
>>> created, while this should be not valid since In normal process of 
>>> creating vdev dynamically, we always need to config queues. Correct 
>>> me if I'm wrong.
>>
>> My point is, either vdev is created statically or dynamically, when 
>> probe(), queues are not setup yet definitely, then *the unix socket 
>> will not be created* until we set up the queues and do dev_start(). 
>> If the unix socket is not created, then VM cannot connect to it.
>
> FYI, I think I reproduced such an issue with the vdev statically created
> in the past, while doing some experiments. I didn't went further into 
> the analysis at that time, but it looks like the issue Junjie is trying
> to address with this patch for dynamically created vdev.

Yes, I have noticed that this issue mostly happens at dynamic case. Just 
try to suggest a proper way to fix. Please check if my suggestion in 
another email makes sense.

Thanks,
Jianfeng

>
> Cheers,
> Maxime
  
Maxime Coquelin March 29, 2018, 12:35 p.m. UTC | #10
On 03/27/2018 04:01 PM, Tan, Jianfeng wrote:
> 
> 
> On 3/27/2018 7:28 PM, Maxime Coquelin wrote:
>>
>>
>> On 03/27/2018 11:42 AM, Tan, Jianfeng wrote:
>>>
>>>
>>> On 3/27/2018 5:24 PM, Chen, Junjie J wrote:
>>>>>>> On 3/28/2018 12:05 AM, Junjie Chen wrote:
>>>>>>>> when creating vdev dynamically, vhost pmd driver start directly
>>>>>>>> without checking TX/RX queues ready or not, and thus cause
>>>>>>>> segmentation fault when vhost library accessing queues. This patch
>>>>>>>> add flag to check whether queues setup or not, and add driver start
>>>>>>>> call into dev_start to allow user start it after setting up queue.
>>>>>>> The issue is clear now. But this patch just puts the situation 
>>>>>>> before below
>>>>> fix:
>>>>>>> "it doesn't create the actual datagram socket until you call 
>>>>>>> .dev_start()."
>>>>>> No, if the queue exist, the datagram socket still get created in 
>>>>>> vhost_create
>>>>> API, since the vhost_driver_register still exist in vhost_create.
>>>>>
>>>>> The queue can never be created, as it's still not probed.
>>>> I think we need to separate this into two cases:
>>>>     Statically create vdev, the datagram recreate logical is still 
>>>> there since queues are exist already, this patch doesn't change 
>>>> anything.
>>>>     Dynamic create vdev, as you pointed out, queue can never be 
>>>> created, while this should be not valid since In normal process of 
>>>> creating vdev dynamically, we always need to config queues. Correct 
>>>> me if I'm wrong.
>>>
>>> My point is, either vdev is created statically or dynamically, when 
>>> probe(), queues are not setup yet definitely, then *the unix socket 
>>> will not be created* until we set up the queues and do dev_start(). 
>>> If the unix socket is not created, then VM cannot connect to it.
>>
>> FYI, I think I reproduced such an issue with the vdev statically created
>> in the past, while doing some experiments. I didn't went further into 
>> the analysis at that time, but it looks like the issue Junjie is trying
>> to address with this patch for dynamically created vdev.
> 
> Yes, I have noticed that this issue mostly happens at dynamic case. Just 
> try to suggest a proper way to fix. Please check if my suggestion in 
> another email makes sense.

Yes, it makes sense, that's the right thing to do I think.

Thanks,
Maxime
> 
> Thanks,
> Jianfeng
> 
>>
>> Cheers,
>> Maxime
>
  

Patch

diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c
index 3aae01c..719a150 100644
--- a/drivers/net/vhost/rte_eth_vhost.c
+++ b/drivers/net/vhost/rte_eth_vhost.c
@@ -118,6 +118,7 @@  struct pmd_internal {
 	char *iface_name;
 	uint16_t max_queues;
 	rte_atomic32_t started;
+	rte_atomic32_t once;
 };
 
 struct internal_list {
@@ -772,12 +773,24 @@  rte_eth_vhost_get_vid_from_port_id(uint16_t port_id)
 static int
 eth_dev_start(struct rte_eth_dev *dev)
 {
+	int ret = 0;
 	struct pmd_internal *internal = dev->data->dev_private;
 
+	if (unlikely(rte_atomic32_read(&internal->once) == 0)) {
+		ret = rte_vhost_driver_start(internal->iface_name);
+		if (ret < 0) {
+			RTE_LOG(ERR, PMD, "Failed to start driver for %s\n",
+				internal->iface_name);
+			return ret;
+		}
+
+		rte_atomic32_set(&internal->once, 1);
+	}
+
 	rte_atomic32_set(&internal->started, 1);
 	update_queuing_status(dev);
 
-	return 0;
+	return ret;
 }
 
 static void
@@ -1101,7 +1114,11 @@  eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name,
 		goto error;
 	}
 
-	if (rte_vhost_driver_start(iface_name) < 0) {
+	if (!data->rx_queues || !data->tx_queues) {
+		RTE_LOG(INFO, PMD,
+			"TX/RX queue is not ready, driver will not start\n");
+		rte_atomic32_set(&internal->once, 0);
+	} else if (rte_vhost_driver_start(iface_name) < 0) {
 		RTE_LOG(ERR, PMD, "Failed to start driver for %s\n",
 			iface_name);
 		goto error;