[dpdk-dev,2/2] vhost: add pipe event for optimizing negotiating

Message ID 1522216165-19666-3-git-send-email-xiangxia.m.yue@gmail.com (mailing list archive)
State Accepted, archived
Delegated to: Maxime Coquelin
Headers

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail apply issues

Commit Message

Tonghao Zhang March 28, 2018, 5:49 a.m. UTC
  From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

When vhost—user connects qemu successfully, dpdk will call
the vhost_user_add_connection to add unix socket fd to poll.
And fdset_add only set the socket fd to a fdentry while poll
may sleep now. In a general case, this is no problem. But if
we use hot update for vhost-user, most downtime of VMs network
is 750+ms. This patch adds pipe event, so after connections are
ok, dpdk rebuild the poll immediately. With this patch, the
most downtime is 20~30ms.

Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
---
 lib/librte_vhost/fd_man.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/librte_vhost/fd_man.h | 16 ++++++++++++++++
 lib/librte_vhost/socket.c | 14 ++++++++++++++
 3 files changed, 79 insertions(+)
  

Comments

Jianfeng Tan March 29, 2018, 7:32 a.m. UTC | #1
Hi Xiangxia,

> -----Original Message-----

> From: xiangxia.m.yue@gmail.com [mailto:xiangxia.m.yue@gmail.com]

> Sent: Wednesday, March 28, 2018 1:49 PM

> To: Tan, Jianfeng

> Cc: dev@dpdk.org; Tonghao Zhang

> Subject: [PATCH 2/2] vhost: add pipe event for optimizing negotiating

> 

> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

> 

> When vhost—user connects qemu successfully, dpdk will call


Typo: "-"

> the vhost_user_add_connection to add unix socket fd to poll.

> And fdset_add only set the socket fd to a fdentry while poll

> may sleep now. In a general case, this is no problem. But if

> we use hot update for vhost-user, most downtime of VMs network

> is 750+ms. This patch adds pipe event, so after connections are

> ok, dpdk rebuild the poll immediately. With this patch, the

> most downtime is 20~30ms.

> 

> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>

> ---

>  lib/librte_vhost/fd_man.c | 49

> +++++++++++++++++++++++++++++++++++++++++++++++

>  lib/librte_vhost/fd_man.h | 16 ++++++++++++++++

>  lib/librte_vhost/socket.c | 14 ++++++++++++++

>  3 files changed, 79 insertions(+)

> 

> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c

> index 181711c..7716757 100644

> --- a/lib/librte_vhost/fd_man.c

> +++ b/lib/librte_vhost/fd_man.c

> @@ -15,6 +15,7 @@

>  #include <rte_log.h>

> 

>  #include "fd_man.h"

> +#include "vhost.h"


This is a nice finding and solution, however, I don't think we shall include vhost header file in fd related files. Actually, I did not find out why you need to include this header file.

Thanks,
Jianfeng

> 

>  #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)

> 

> @@ -272,3 +273,51 @@

> 

>  	return NULL;

>  }

> +

> +static void

> +fdset_pipe_read_cb(int readfd, void *dat __rte_unused,

> +		   int *remove __rte_unused)

> +{

> +	char charbuf[16];

> +	read(readfd, charbuf, sizeof(charbuf));

> +}

> +

> +void

> +fdset_pipe_uninit(struct fdset *fdset)

> +{

> +	fdset_del(fdset, fdset->u.readfd);

> +	close(fdset->u.readfd);

> +	close(fdset->u.writefd);

> +}

> +

> +int

> +fdset_pipe_init(struct fdset *fdset)

> +{

> +	int ret;

> +

> +	if (pipe(fdset->u.pipefd) < 0) {

> +		RTE_LOG(ERR, VHOST_CONFIG,

> +			"failed to create pipe for vhost fdset\n");

> +		return -1;

> +	}

> +

> +	ret = fdset_add(fdset, fdset->u.readfd,

> +			fdset_pipe_read_cb, NULL, NULL);

> +

> +	if (ret < 0) {

> +		RTE_LOG(ERR, VHOST_CONFIG,

> +			"failed to add pipe readfd %d into vhost server

> fdset\n",

> +			fdset->u.readfd);

> +

> +		fdset_pipe_uninit(fdset);

> +		return -1;

> +	}

> +

> +	return 0;

> +}

> +

> +void

> +fdset_pipe_notify(struct fdset *fdset)

> +{

> +	write(fdset->u.writefd, "1", 1);

> +}

> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h

> index 3a9276c..76a42fb 100644

> --- a/lib/librte_vhost/fd_man.h

> +++ b/lib/librte_vhost/fd_man.h

> @@ -25,6 +25,16 @@ struct fdset {

>  	struct fdentry fd[MAX_FDS];

>  	pthread_mutex_t fd_mutex;

>  	int num;	/* current fd number of this fdset */

> +

> +	union pipefds {

> +		struct {

> +			int pipefd[2];

> +		};

> +		struct {

> +			int readfd;

> +			int writefd;

> +		};

> +	} u;

>  };

> 

> 

> @@ -37,4 +47,10 @@ int fdset_add(struct fdset *pfdset, int fd,

> 

>  void *fdset_event_dispatch(void *arg);

> 

> +int fdset_pipe_init(struct fdset *fdset);

> +

> +void fdset_pipe_uninit(struct fdset *fdset);

> +

> +void fdset_pipe_notify(struct fdset *fdset);

> +

>  #endif

> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c

> index 95bed78..795239c 100644

> --- a/lib/librte_vhost/socket.c

> +++ b/lib/librte_vhost/socket.c

> @@ -231,6 +231,8 @@ struct vhost_user {

>  	pthread_mutex_lock(&vsocket->conn_mutex);

>  	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);

>  	pthread_mutex_unlock(&vsocket->conn_mutex);

> +

> +	fdset_pipe_notify(&vhost_user.fdset);

>  	return;

> 

>  err:

> @@ -829,11 +831,23 @@ struct vhost_device_ops const *

>  		return -1;

> 

>  	if (fdset_tid == 0) {

> +		/**

> +		 * create a pipe which will be waited by poll and notified to

> +		 * rebuild the wait list of poll.

> +		 */

> +		if (fdset_pipe_init(&vhost_user.fdset) < 0) {

> +			RTE_LOG(ERR, VHOST_CONFIG,

> +				"failed to create pipe for vhost fdset\n");

> +			return -1;

> +		}

> +

>  		int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,

>  				     &vhost_user.fdset);

>  		if (ret != 0) {

>  			RTE_LOG(ERR, VHOST_CONFIG,

>  				"failed to create fdset handling thread");

> +

> +			fdset_pipe_uninit(&vhost_user.fdset);

>  			return -1;

>  		} else {

>  			snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,

> --

> 1.8.3.1
  
Tonghao Zhang March 29, 2018, 11:24 a.m. UTC | #2
On Thu, Mar 29, 2018 at 3:32 PM, Tan, Jianfeng <jianfeng.tan@intel.com> wrote:
> Hi Xiangxia,
>
>> -----Original Message-----
>> From: xiangxia.m.yue@gmail.com [mailto:xiangxia.m.yue@gmail.com]
>> Sent: Wednesday, March 28, 2018 1:49 PM
>> To: Tan, Jianfeng
>> Cc: dev@dpdk.org; Tonghao Zhang
>> Subject: [PATCH 2/2] vhost: add pipe event for optimizing negotiating
>>
>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>
>> When vhost—user connects qemu successfully, dpdk will call
>
> Typo: "-"
v2 will update it. :)

>> the vhost_user_add_connection to add unix socket fd to poll.
>> And fdset_add only set the socket fd to a fdentry while poll
>> may sleep now. In a general case, this is no problem. But if
>> we use hot update for vhost-user, most downtime of VMs network
>> is 750+ms. This patch adds pipe event, so after connections are
>> ok, dpdk rebuild the poll immediately. With this patch, the
>> most downtime is 20~30ms.
>>
>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>> ---
>>  lib/librte_vhost/fd_man.c | 49
>> +++++++++++++++++++++++++++++++++++++++++++++++
>>  lib/librte_vhost/fd_man.h | 16 ++++++++++++++++
>>  lib/librte_vhost/socket.c | 14 ++++++++++++++
>>  3 files changed, 79 insertions(+)
>>
>> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
>> index 181711c..7716757 100644
>> --- a/lib/librte_vhost/fd_man.c
>> +++ b/lib/librte_vhost/fd_man.c
>> @@ -15,6 +15,7 @@
>>  #include <rte_log.h>
>>
>>  #include "fd_man.h"
>> +#include "vhost.h"
>
> This is a nice finding and solution, however, I don't think we shall include vhost header file in fd related files. Actually, I did not find out why you need to include this header file.

Hi Jianfeng, thanks for your review. In the fdset_pipe_init function,
I call the RTE_LOG with VHOST_CONFIG, when init pipe not successfully.
So I included the vhost header file.


> Thanks,
> Jianfeng
>
>>
>>  #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
>>
>> @@ -272,3 +273,51 @@
>>
>>       return NULL;
>>  }
>> +
>> +static void
>> +fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
>> +                int *remove __rte_unused)
>> +{
>> +     char charbuf[16];
>> +     read(readfd, charbuf, sizeof(charbuf));
>> +}
>> +
>> +void
>> +fdset_pipe_uninit(struct fdset *fdset)
>> +{
>> +     fdset_del(fdset, fdset->u.readfd);
>> +     close(fdset->u.readfd);
>> +     close(fdset->u.writefd);
>> +}
>> +
>> +int
>> +fdset_pipe_init(struct fdset *fdset)
>> +{
>> +     int ret;
>> +
>> +     if (pipe(fdset->u.pipefd) < 0) {
>> +             RTE_LOG(ERR, VHOST_CONFIG,
>> +                     "failed to create pipe for vhost fdset\n");
>> +             return -1;
>> +     }
>> +
>> +     ret = fdset_add(fdset, fdset->u.readfd,
>> +                     fdset_pipe_read_cb, NULL, NULL);
>> +
>> +     if (ret < 0) {
>> +             RTE_LOG(ERR, VHOST_CONFIG,
>> +                     "failed to add pipe readfd %d into vhost server
>> fdset\n",
>> +                     fdset->u.readfd);
>> +
>> +             fdset_pipe_uninit(fdset);
>> +             return -1;
>> +     }
>> +
>> +     return 0;
>> +}
>> +
>> +void
>> +fdset_pipe_notify(struct fdset *fdset)
>> +{
>> +     write(fdset->u.writefd, "1", 1);
>> +}
>> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
>> index 3a9276c..76a42fb 100644
>> --- a/lib/librte_vhost/fd_man.h
>> +++ b/lib/librte_vhost/fd_man.h
>> @@ -25,6 +25,16 @@ struct fdset {
>>       struct fdentry fd[MAX_FDS];
>>       pthread_mutex_t fd_mutex;
>>       int num;        /* current fd number of this fdset */
>> +
>> +     union pipefds {
>> +             struct {
>> +                     int pipefd[2];
>> +             };
>> +             struct {
>> +                     int readfd;
>> +                     int writefd;
>> +             };
>> +     } u;
>>  };
>>
>>
>> @@ -37,4 +47,10 @@ int fdset_add(struct fdset *pfdset, int fd,
>>
>>  void *fdset_event_dispatch(void *arg);
>>
>> +int fdset_pipe_init(struct fdset *fdset);
>> +
>> +void fdset_pipe_uninit(struct fdset *fdset);
>> +
>> +void fdset_pipe_notify(struct fdset *fdset);
>> +
>>  #endif
>> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
>> index 95bed78..795239c 100644
>> --- a/lib/librte_vhost/socket.c
>> +++ b/lib/librte_vhost/socket.c
>> @@ -231,6 +231,8 @@ struct vhost_user {
>>       pthread_mutex_lock(&vsocket->conn_mutex);
>>       TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
>>       pthread_mutex_unlock(&vsocket->conn_mutex);
>> +
>> +     fdset_pipe_notify(&vhost_user.fdset);
>>       return;
>>
>>  err:
>> @@ -829,11 +831,23 @@ struct vhost_device_ops const *
>>               return -1;
>>
>>       if (fdset_tid == 0) {
>> +             /**
>> +              * create a pipe which will be waited by poll and notified to
>> +              * rebuild the wait list of poll.
>> +              */
>> +             if (fdset_pipe_init(&vhost_user.fdset) < 0) {
>> +                     RTE_LOG(ERR, VHOST_CONFIG,
>> +                             "failed to create pipe for vhost fdset\n");
>> +                     return -1;
>> +             }
>> +
>>               int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
>>                                    &vhost_user.fdset);
>>               if (ret != 0) {
>>                       RTE_LOG(ERR, VHOST_CONFIG,
>>                               "failed to create fdset handling thread");
>> +
>> +                     fdset_pipe_uninit(&vhost_user.fdset);
>>                       return -1;
>>               } else {
>>                       snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
>> --
>> 1.8.3.1
>
  
Maxime Coquelin March 30, 2018, 7:57 a.m. UTC | #3
Hi Xiangxia,

On 03/29/2018 01:24 PM, Tonghao Zhang wrote:
> On Thu, Mar 29, 2018 at 3:32 PM, Tan, Jianfeng <jianfeng.tan@intel.com> wrote:
>> Hi Xiangxia,
>>
>>> -----Original Message-----
>>> From: xiangxia.m.yue@gmail.com [mailto:xiangxia.m.yue@gmail.com]
>>> Sent: Wednesday, March 28, 2018 1:49 PM
>>> To: Tan, Jianfeng
>>> Cc: dev@dpdk.org; Tonghao Zhang
>>> Subject: [PATCH 2/2] vhost: add pipe event for optimizing negotiating
>>>
>>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>
>>> When vhost—user connects qemu successfully, dpdk will call
>>
>> Typo: "-"
> v2 will update it. :)
> 
>>> the vhost_user_add_connection to add unix socket fd to poll.
>>> And fdset_add only set the socket fd to a fdentry while poll
>>> may sleep now. In a general case, this is no problem. But if
>>> we use hot update for vhost-user, most downtime of VMs network
>>> is 750+ms. This patch adds pipe event, so after connections are
>>> ok, dpdk rebuild the poll immediately. With this patch, the
>>> most downtime is 20~30ms.
>>>
>>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>> ---
>>>   lib/librte_vhost/fd_man.c | 49
>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>>   lib/librte_vhost/fd_man.h | 16 ++++++++++++++++
>>>   lib/librte_vhost/socket.c | 14 ++++++++++++++
>>>   3 files changed, 79 insertions(+)
>>>
>>> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
>>> index 181711c..7716757 100644
>>> --- a/lib/librte_vhost/fd_man.c
>>> +++ b/lib/librte_vhost/fd_man.c
>>> @@ -15,6 +15,7 @@
>>>   #include <rte_log.h>
>>>
>>>   #include "fd_man.h"
>>> +#include "vhost.h"
>>
>> This is a nice finding and solution, however, I don't think we shall include vhost header file in fd related files. Actually, I did not find out why you need to include this header file.
> 
> Hi Jianfeng, thanks for your review. In the fdset_pipe_init function,
> I call the RTE_LOG with VHOST_CONFIG, when init pipe not successfully.
> So I included the vhost header file.

Maybe better to create VHOST_SOCKET in socket.c, than including vhost.h 
that creates a layer violation:

#define RTE_LOGTYPE_VHOST_SOCKET RTE_LOGTYPE_USER1

Is it Ok for you? I so no need to resend, I can handle the change when
applying.

Thanks,
Maxime
> 
> 
>> Thanks,
>> Jianfeng
>>
>>>
>>>   #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
>>>
>>> @@ -272,3 +273,51 @@
>>>
>>>        return NULL;
>>>   }
>>> +
>>> +static void
>>> +fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
>>> +                int *remove __rte_unused)
>>> +{
>>> +     char charbuf[16];
>>> +     read(readfd, charbuf, sizeof(charbuf));
>>> +}
>>> +
>>> +void
>>> +fdset_pipe_uninit(struct fdset *fdset)
>>> +{
>>> +     fdset_del(fdset, fdset->u.readfd);
>>> +     close(fdset->u.readfd);
>>> +     close(fdset->u.writefd);
>>> +}
>>> +
>>> +int
>>> +fdset_pipe_init(struct fdset *fdset)
>>> +{
>>> +     int ret;
>>> +
>>> +     if (pipe(fdset->u.pipefd) < 0) {
>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>> +                     "failed to create pipe for vhost fdset\n");
>>> +             return -1;
>>> +     }
>>> +
>>> +     ret = fdset_add(fdset, fdset->u.readfd,
>>> +                     fdset_pipe_read_cb, NULL, NULL);
>>> +
>>> +     if (ret < 0) {
>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>> +                     "failed to add pipe readfd %d into vhost server
>>> fdset\n",
>>> +                     fdset->u.readfd);
>>> +
>>> +             fdset_pipe_uninit(fdset);
>>> +             return -1;
>>> +     }
>>> +
>>> +     return 0;
>>> +}
>>> +
>>> +void
>>> +fdset_pipe_notify(struct fdset *fdset)
>>> +{
>>> +     write(fdset->u.writefd, "1", 1);
>>> +}
>>> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
>>> index 3a9276c..76a42fb 100644
>>> --- a/lib/librte_vhost/fd_man.h
>>> +++ b/lib/librte_vhost/fd_man.h
>>> @@ -25,6 +25,16 @@ struct fdset {
>>>        struct fdentry fd[MAX_FDS];
>>>        pthread_mutex_t fd_mutex;
>>>        int num;        /* current fd number of this fdset */
>>> +
>>> +     union pipefds {
>>> +             struct {
>>> +                     int pipefd[2];
>>> +             };
>>> +             struct {
>>> +                     int readfd;
>>> +                     int writefd;
>>> +             };
>>> +     } u;
>>>   };
>>>
>>>
>>> @@ -37,4 +47,10 @@ int fdset_add(struct fdset *pfdset, int fd,
>>>
>>>   void *fdset_event_dispatch(void *arg);
>>>
>>> +int fdset_pipe_init(struct fdset *fdset);
>>> +
>>> +void fdset_pipe_uninit(struct fdset *fdset);
>>> +
>>> +void fdset_pipe_notify(struct fdset *fdset);
>>> +
>>>   #endif
>>> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
>>> index 95bed78..795239c 100644
>>> --- a/lib/librte_vhost/socket.c
>>> +++ b/lib/librte_vhost/socket.c
>>> @@ -231,6 +231,8 @@ struct vhost_user {
>>>        pthread_mutex_lock(&vsocket->conn_mutex);
>>>        TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
>>>        pthread_mutex_unlock(&vsocket->conn_mutex);
>>> +
>>> +     fdset_pipe_notify(&vhost_user.fdset);
>>>        return;
>>>
>>>   err:
>>> @@ -829,11 +831,23 @@ struct vhost_device_ops const *
>>>                return -1;
>>>
>>>        if (fdset_tid == 0) {
>>> +             /**
>>> +              * create a pipe which will be waited by poll and notified to
>>> +              * rebuild the wait list of poll.
>>> +              */
>>> +             if (fdset_pipe_init(&vhost_user.fdset) < 0) {
>>> +                     RTE_LOG(ERR, VHOST_CONFIG,
>>> +                             "failed to create pipe for vhost fdset\n");
>>> +                     return -1;
>>> +             }
>>> +
>>>                int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
>>>                                     &vhost_user.fdset);
>>>                if (ret != 0) {
>>>                        RTE_LOG(ERR, VHOST_CONFIG,
>>>                                "failed to create fdset handling thread");
>>> +
>>> +                     fdset_pipe_uninit(&vhost_user.fdset);
>>>                        return -1;
>>>                } else {
>>>                        snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
>>> --
>>> 1.8.3.1
>>
  
Tonghao Zhang March 30, 2018, 8:07 a.m. UTC | #4
On Fri, Mar 30, 2018 at 3:57 PM, Maxime Coquelin
<maxime.coquelin@redhat.com> wrote:
> Hi Xiangxia,
>
>
> On 03/29/2018 01:24 PM, Tonghao Zhang wrote:
>>
>> On Thu, Mar 29, 2018 at 3:32 PM, Tan, Jianfeng <jianfeng.tan@intel.com>
>> wrote:
>>>
>>> Hi Xiangxia,
>>>
>>>> -----Original Message-----
>>>> From: xiangxia.m.yue@gmail.com [mailto:xiangxia.m.yue@gmail.com]
>>>> Sent: Wednesday, March 28, 2018 1:49 PM
>>>> To: Tan, Jianfeng
>>>> Cc: dev@dpdk.org; Tonghao Zhang
>>>> Subject: [PATCH 2/2] vhost: add pipe event for optimizing negotiating
>>>>
>>>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>>
>>>> When vhost—user connects qemu successfully, dpdk will call
>>>
>>>
>>> Typo: "-"
>>
>> v2 will update it. :)
>>
>>>> the vhost_user_add_connection to add unix socket fd to poll.
>>>> And fdset_add only set the socket fd to a fdentry while poll
>>>> may sleep now. In a general case, this is no problem. But if
>>>> we use hot update for vhost-user, most downtime of VMs network
>>>> is 750+ms. This patch adds pipe event, so after connections are
>>>> ok, dpdk rebuild the poll immediately. With this patch, the
>>>> most downtime is 20~30ms.
>>>>
>>>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>> ---
>>>>   lib/librte_vhost/fd_man.c | 49
>>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>>>   lib/librte_vhost/fd_man.h | 16 ++++++++++++++++
>>>>   lib/librte_vhost/socket.c | 14 ++++++++++++++
>>>>   3 files changed, 79 insertions(+)
>>>>
>>>> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
>>>> index 181711c..7716757 100644
>>>> --- a/lib/librte_vhost/fd_man.c
>>>> +++ b/lib/librte_vhost/fd_man.c
>>>> @@ -15,6 +15,7 @@
>>>>   #include <rte_log.h>
>>>>
>>>>   #include "fd_man.h"
>>>> +#include "vhost.h"
>>>
>>>
>>> This is a nice finding and solution, however, I don't think we shall
>>> include vhost header file in fd related files. Actually, I did not find out
>>> why you need to include this header file.
>>
>>
>> Hi Jianfeng, thanks for your review. In the fdset_pipe_init function,
>> I call the RTE_LOG with VHOST_CONFIG, when init pipe not successfully.
>> So I included the vhost header file.
>
>
> Maybe better to create VHOST_SOCKET in socket.c, than including vhost.h that
> creates a layer violation:
>
> #define RTE_LOGTYPE_VHOST_SOCKET RTE_LOGTYPE_USER1
>
> Is it Ok for you? I so no need to resend, I can handle the change when
> applying.
yes, thanks.

>
> Thanks,
> Maxime
>
>>
>>
>>> Thanks,
>>> Jianfeng
>>>
>>>>
>>>>   #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
>>>>
>>>> @@ -272,3 +273,51 @@
>>>>
>>>>        return NULL;
>>>>   }
>>>> +
>>>> +static void
>>>> +fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
>>>> +                int *remove __rte_unused)
>>>> +{
>>>> +     char charbuf[16];
>>>> +     read(readfd, charbuf, sizeof(charbuf));
>>>> +}
>>>> +
>>>> +void
>>>> +fdset_pipe_uninit(struct fdset *fdset)
>>>> +{
>>>> +     fdset_del(fdset, fdset->u.readfd);
>>>> +     close(fdset->u.readfd);
>>>> +     close(fdset->u.writefd);
>>>> +}
>>>> +
>>>> +int
>>>> +fdset_pipe_init(struct fdset *fdset)
>>>> +{
>>>> +     int ret;
>>>> +
>>>> +     if (pipe(fdset->u.pipefd) < 0) {
>>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>>> +                     "failed to create pipe for vhost fdset\n");
>>>> +             return -1;
>>>> +     }
>>>> +
>>>> +     ret = fdset_add(fdset, fdset->u.readfd,
>>>> +                     fdset_pipe_read_cb, NULL, NULL);
>>>> +
>>>> +     if (ret < 0) {
>>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>>> +                     "failed to add pipe readfd %d into vhost server
>>>> fdset\n",
>>>> +                     fdset->u.readfd);
>>>> +
>>>> +             fdset_pipe_uninit(fdset);
>>>> +             return -1;
>>>> +     }
>>>> +
>>>> +     return 0;
>>>> +}
>>>> +
>>>> +void
>>>> +fdset_pipe_notify(struct fdset *fdset)
>>>> +{
>>>> +     write(fdset->u.writefd, "1", 1);
>>>> +}
>>>> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
>>>> index 3a9276c..76a42fb 100644
>>>> --- a/lib/librte_vhost/fd_man.h
>>>> +++ b/lib/librte_vhost/fd_man.h
>>>> @@ -25,6 +25,16 @@ struct fdset {
>>>>        struct fdentry fd[MAX_FDS];
>>>>        pthread_mutex_t fd_mutex;
>>>>        int num;        /* current fd number of this fdset */
>>>> +
>>>> +     union pipefds {
>>>> +             struct {
>>>> +                     int pipefd[2];
>>>> +             };
>>>> +             struct {
>>>> +                     int readfd;
>>>> +                     int writefd;
>>>> +             };
>>>> +     } u;
>>>>   };
>>>>
>>>>
>>>> @@ -37,4 +47,10 @@ int fdset_add(struct fdset *pfdset, int fd,
>>>>
>>>>   void *fdset_event_dispatch(void *arg);
>>>>
>>>> +int fdset_pipe_init(struct fdset *fdset);
>>>> +
>>>> +void fdset_pipe_uninit(struct fdset *fdset);
>>>> +
>>>> +void fdset_pipe_notify(struct fdset *fdset);
>>>> +
>>>>   #endif
>>>> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
>>>> index 95bed78..795239c 100644
>>>> --- a/lib/librte_vhost/socket.c
>>>> +++ b/lib/librte_vhost/socket.c
>>>> @@ -231,6 +231,8 @@ struct vhost_user {
>>>>        pthread_mutex_lock(&vsocket->conn_mutex);
>>>>        TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
>>>>        pthread_mutex_unlock(&vsocket->conn_mutex);
>>>> +
>>>> +     fdset_pipe_notify(&vhost_user.fdset);
>>>>        return;
>>>>
>>>>   err:
>>>> @@ -829,11 +831,23 @@ struct vhost_device_ops const *
>>>>                return -1;
>>>>
>>>>        if (fdset_tid == 0) {
>>>> +             /**
>>>> +              * create a pipe which will be waited by poll and notified
>>>> to
>>>> +              * rebuild the wait list of poll.
>>>> +              */
>>>> +             if (fdset_pipe_init(&vhost_user.fdset) < 0) {
>>>> +                     RTE_LOG(ERR, VHOST_CONFIG,
>>>> +                             "failed to create pipe for vhost
>>>> fdset\n");
>>>> +                     return -1;
>>>> +             }
>>>> +
>>>>                int ret = pthread_create(&fdset_tid, NULL,
>>>> fdset_event_dispatch,
>>>>                                     &vhost_user.fdset);
>>>>                if (ret != 0) {
>>>>                        RTE_LOG(ERR, VHOST_CONFIG,
>>>>                                "failed to create fdset handling
>>>> thread");
>>>> +
>>>> +                     fdset_pipe_uninit(&vhost_user.fdset);
>>>>                        return -1;
>>>>                } else {
>>>>                        snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
>>>> --
>>>> 1.8.3.1
>>>
>>>
>
  
Maxime Coquelin March 30, 2018, 8:09 a.m. UTC | #5
On 03/30/2018 10:07 AM, Tonghao Zhang wrote:
> On Fri, Mar 30, 2018 at 3:57 PM, Maxime Coquelin
> <maxime.coquelin@redhat.com> wrote:
>> Hi Xiangxia,
>>
>>
>> On 03/29/2018 01:24 PM, Tonghao Zhang wrote:
>>>
>>> On Thu, Mar 29, 2018 at 3:32 PM, Tan, Jianfeng <jianfeng.tan@intel.com>
>>> wrote:
>>>>
>>>> Hi Xiangxia,
>>>>
>>>>> -----Original Message-----
>>>>> From: xiangxia.m.yue@gmail.com [mailto:xiangxia.m.yue@gmail.com]
>>>>> Sent: Wednesday, March 28, 2018 1:49 PM
>>>>> To: Tan, Jianfeng
>>>>> Cc: dev@dpdk.org; Tonghao Zhang
>>>>> Subject: [PATCH 2/2] vhost: add pipe event for optimizing negotiating
>>>>>
>>>>> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>>>
>>>>> When vhost—user connects qemu successfully, dpdk will call
>>>>
>>>>
>>>> Typo: "-"
>>>
>>> v2 will update it. :)
>>>
>>>>> the vhost_user_add_connection to add unix socket fd to poll.
>>>>> And fdset_add only set the socket fd to a fdentry while poll
>>>>> may sleep now. In a general case, this is no problem. But if
>>>>> we use hot update for vhost-user, most downtime of VMs network
>>>>> is 750+ms. This patch adds pipe event, so after connections are
>>>>> ok, dpdk rebuild the poll immediately. With this patch, the
>>>>> most downtime is 20~30ms.
>>>>>
>>>>> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>>>>> ---
>>>>>    lib/librte_vhost/fd_man.c | 49
>>>>> +++++++++++++++++++++++++++++++++++++++++++++++
>>>>>    lib/librte_vhost/fd_man.h | 16 ++++++++++++++++
>>>>>    lib/librte_vhost/socket.c | 14 ++++++++++++++
>>>>>    3 files changed, 79 insertions(+)
>>>>>
>>>>> diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
>>>>> index 181711c..7716757 100644
>>>>> --- a/lib/librte_vhost/fd_man.c
>>>>> +++ b/lib/librte_vhost/fd_man.c
>>>>> @@ -15,6 +15,7 @@
>>>>>    #include <rte_log.h>
>>>>>
>>>>>    #include "fd_man.h"
>>>>> +#include "vhost.h"
>>>>
>>>>
>>>> This is a nice finding and solution, however, I don't think we shall
>>>> include vhost header file in fd related files. Actually, I did not find out
>>>> why you need to include this header file.
>>>
>>>
>>> Hi Jianfeng, thanks for your review. In the fdset_pipe_init function,
>>> I call the RTE_LOG with VHOST_CONFIG, when init pipe not successfully.
>>> So I included the vhost header file.
>>
>>
>> Maybe better to create VHOST_SOCKET in socket.c, than including vhost.h that
>> creates a layer violation:
>>
>> #define RTE_LOGTYPE_VHOST_SOCKET RTE_LOGTYPE_USER1
>>
>> Is it Ok for you? I so no need to resend, I can handle the change when
>> applying.
> yes, thanks.

Great, so with above change:

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
>>
>> Thanks,
>> Maxime
>>
>>>
>>>
>>>> Thanks,
>>>> Jianfeng
>>>>
>>>>>
>>>>>    #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
>>>>>
>>>>> @@ -272,3 +273,51 @@
>>>>>
>>>>>         return NULL;
>>>>>    }
>>>>> +
>>>>> +static void
>>>>> +fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
>>>>> +                int *remove __rte_unused)
>>>>> +{
>>>>> +     char charbuf[16];
>>>>> +     read(readfd, charbuf, sizeof(charbuf));
>>>>> +}
>>>>> +
>>>>> +void
>>>>> +fdset_pipe_uninit(struct fdset *fdset)
>>>>> +{
>>>>> +     fdset_del(fdset, fdset->u.readfd);
>>>>> +     close(fdset->u.readfd);
>>>>> +     close(fdset->u.writefd);
>>>>> +}
>>>>> +
>>>>> +int
>>>>> +fdset_pipe_init(struct fdset *fdset)
>>>>> +{
>>>>> +     int ret;
>>>>> +
>>>>> +     if (pipe(fdset->u.pipefd) < 0) {
>>>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>>>> +                     "failed to create pipe for vhost fdset\n");
>>>>> +             return -1;
>>>>> +     }
>>>>> +
>>>>> +     ret = fdset_add(fdset, fdset->u.readfd,
>>>>> +                     fdset_pipe_read_cb, NULL, NULL);
>>>>> +
>>>>> +     if (ret < 0) {
>>>>> +             RTE_LOG(ERR, VHOST_CONFIG,
>>>>> +                     "failed to add pipe readfd %d into vhost server
>>>>> fdset\n",
>>>>> +                     fdset->u.readfd);
>>>>> +
>>>>> +             fdset_pipe_uninit(fdset);
>>>>> +             return -1;
>>>>> +     }
>>>>> +
>>>>> +     return 0;
>>>>> +}
>>>>> +
>>>>> +void
>>>>> +fdset_pipe_notify(struct fdset *fdset)
>>>>> +{
>>>>> +     write(fdset->u.writefd, "1", 1);
>>>>> +}
>>>>> diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
>>>>> index 3a9276c..76a42fb 100644
>>>>> --- a/lib/librte_vhost/fd_man.h
>>>>> +++ b/lib/librte_vhost/fd_man.h
>>>>> @@ -25,6 +25,16 @@ struct fdset {
>>>>>         struct fdentry fd[MAX_FDS];
>>>>>         pthread_mutex_t fd_mutex;
>>>>>         int num;        /* current fd number of this fdset */
>>>>> +
>>>>> +     union pipefds {
>>>>> +             struct {
>>>>> +                     int pipefd[2];
>>>>> +             };
>>>>> +             struct {
>>>>> +                     int readfd;
>>>>> +                     int writefd;
>>>>> +             };
>>>>> +     } u;
>>>>>    };
>>>>>
>>>>>
>>>>> @@ -37,4 +47,10 @@ int fdset_add(struct fdset *pfdset, int fd,
>>>>>
>>>>>    void *fdset_event_dispatch(void *arg);
>>>>>
>>>>> +int fdset_pipe_init(struct fdset *fdset);
>>>>> +
>>>>> +void fdset_pipe_uninit(struct fdset *fdset);
>>>>> +
>>>>> +void fdset_pipe_notify(struct fdset *fdset);
>>>>> +
>>>>>    #endif
>>>>> diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
>>>>> index 95bed78..795239c 100644
>>>>> --- a/lib/librte_vhost/socket.c
>>>>> +++ b/lib/librte_vhost/socket.c
>>>>> @@ -231,6 +231,8 @@ struct vhost_user {
>>>>>         pthread_mutex_lock(&vsocket->conn_mutex);
>>>>>         TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
>>>>>         pthread_mutex_unlock(&vsocket->conn_mutex);
>>>>> +
>>>>> +     fdset_pipe_notify(&vhost_user.fdset);
>>>>>         return;
>>>>>
>>>>>    err:
>>>>> @@ -829,11 +831,23 @@ struct vhost_device_ops const *
>>>>>                 return -1;
>>>>>
>>>>>         if (fdset_tid == 0) {
>>>>> +             /**
>>>>> +              * create a pipe which will be waited by poll and notified
>>>>> to
>>>>> +              * rebuild the wait list of poll.
>>>>> +              */
>>>>> +             if (fdset_pipe_init(&vhost_user.fdset) < 0) {
>>>>> +                     RTE_LOG(ERR, VHOST_CONFIG,
>>>>> +                             "failed to create pipe for vhost
>>>>> fdset\n");
>>>>> +                     return -1;
>>>>> +             }
>>>>> +
>>>>>                 int ret = pthread_create(&fdset_tid, NULL,
>>>>> fdset_event_dispatch,
>>>>>                                      &vhost_user.fdset);
>>>>>                 if (ret != 0) {
>>>>>                         RTE_LOG(ERR, VHOST_CONFIG,
>>>>>                                 "failed to create fdset handling
>>>>> thread");
>>>>> +
>>>>> +                     fdset_pipe_uninit(&vhost_user.fdset);
>>>>>                         return -1;
>>>>>                 } else {
>>>>>                         snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,
>>>>> --
>>>>> 1.8.3.1
>>>>
>>>>
>>
  

Patch

diff --git a/lib/librte_vhost/fd_man.c b/lib/librte_vhost/fd_man.c
index 181711c..7716757 100644
--- a/lib/librte_vhost/fd_man.c
+++ b/lib/librte_vhost/fd_man.c
@@ -15,6 +15,7 @@ 
 #include <rte_log.h>
 
 #include "fd_man.h"
+#include "vhost.h"
 
 #define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)
 
@@ -272,3 +273,51 @@ 
 
 	return NULL;
 }
+
+static void
+fdset_pipe_read_cb(int readfd, void *dat __rte_unused,
+		   int *remove __rte_unused)
+{
+	char charbuf[16];
+	read(readfd, charbuf, sizeof(charbuf));
+}
+
+void
+fdset_pipe_uninit(struct fdset *fdset)
+{
+	fdset_del(fdset, fdset->u.readfd);
+	close(fdset->u.readfd);
+	close(fdset->u.writefd);
+}
+
+int
+fdset_pipe_init(struct fdset *fdset)
+{
+	int ret;
+
+	if (pipe(fdset->u.pipefd) < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to create pipe for vhost fdset\n");
+		return -1;
+	}
+
+	ret = fdset_add(fdset, fdset->u.readfd,
+			fdset_pipe_read_cb, NULL, NULL);
+
+	if (ret < 0) {
+		RTE_LOG(ERR, VHOST_CONFIG,
+			"failed to add pipe readfd %d into vhost server fdset\n",
+			fdset->u.readfd);
+
+		fdset_pipe_uninit(fdset);
+		return -1;
+	}
+
+	return 0;
+}
+
+void
+fdset_pipe_notify(struct fdset *fdset)
+{
+	write(fdset->u.writefd, "1", 1);
+}
diff --git a/lib/librte_vhost/fd_man.h b/lib/librte_vhost/fd_man.h
index 3a9276c..76a42fb 100644
--- a/lib/librte_vhost/fd_man.h
+++ b/lib/librte_vhost/fd_man.h
@@ -25,6 +25,16 @@  struct fdset {
 	struct fdentry fd[MAX_FDS];
 	pthread_mutex_t fd_mutex;
 	int num;	/* current fd number of this fdset */
+
+	union pipefds {
+		struct {
+			int pipefd[2];
+		};
+		struct {
+			int readfd;
+			int writefd;
+		};
+	} u;
 };
 
 
@@ -37,4 +47,10 @@  int fdset_add(struct fdset *pfdset, int fd,
 
 void *fdset_event_dispatch(void *arg);
 
+int fdset_pipe_init(struct fdset *fdset);
+
+void fdset_pipe_uninit(struct fdset *fdset);
+
+void fdset_pipe_notify(struct fdset *fdset);
+
 #endif
diff --git a/lib/librte_vhost/socket.c b/lib/librte_vhost/socket.c
index 95bed78..795239c 100644
--- a/lib/librte_vhost/socket.c
+++ b/lib/librte_vhost/socket.c
@@ -231,6 +231,8 @@  struct vhost_user {
 	pthread_mutex_lock(&vsocket->conn_mutex);
 	TAILQ_INSERT_TAIL(&vsocket->conn_list, conn, next);
 	pthread_mutex_unlock(&vsocket->conn_mutex);
+
+	fdset_pipe_notify(&vhost_user.fdset);
 	return;
 
 err:
@@ -829,11 +831,23 @@  struct vhost_device_ops const *
 		return -1;
 
 	if (fdset_tid == 0) {
+		/**
+		 * create a pipe which will be waited by poll and notified to
+		 * rebuild the wait list of poll.
+		 */
+		if (fdset_pipe_init(&vhost_user.fdset) < 0) {
+			RTE_LOG(ERR, VHOST_CONFIG,
+				"failed to create pipe for vhost fdset\n");
+			return -1;
+		}
+
 		int ret = pthread_create(&fdset_tid, NULL, fdset_event_dispatch,
 				     &vhost_user.fdset);
 		if (ret != 0) {
 			RTE_LOG(ERR, VHOST_CONFIG,
 				"failed to create fdset handling thread");
+
+			fdset_pipe_uninit(&vhost_user.fdset);
 			return -1;
 		} else {
 			snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN,