[04/12] vhost: introduce postcopy's advise message

Message ID 20180926072705.22641-5-maxime.coquelin@redhat.com (mailing list archive)
State Superseded, archived
Delegated to: Maxime Coquelin
Headers
Series vhost: add postcopy live-migration support |

Checks

Context Check Description
ci/Intel-compilation success Compilation OK

Commit Message

Maxime Coquelin Sept. 26, 2018, 7:26 a.m. UTC
  This patch opens a userfaultfd and sends it back to Qemu's
VHOST_USER_POSTCOPY_ADVISE request.

Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
---
 lib/librte_vhost/vhost.h      |  2 ++
 lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
 lib/librte_vhost/vhost_user.h |  3 ++-
 3 files changed, 41 insertions(+), 1 deletion(-)
  

Comments

Alejandro Lucero Sept. 26, 2018, 3:22 p.m. UTC | #1
On Wed, Sep 26, 2018 at 8:27 AM Maxime Coquelin <maxime.coquelin@redhat.com>
wrote:

> This patch opens a userfaultfd and sends it back to Qemu's
> VHOST_USER_POSTCOPY_ADVISE request.
>
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_vhost/vhost.h      |  2 ++
>  lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>  lib/librte_vhost/vhost_user.h |  3 ++-
>  3 files changed, 41 insertions(+), 1 deletion(-)
>
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 25ffd7614..21722d8a8 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -363,6 +363,8 @@ struct virtio_net {
>         int                     slave_req_fd;
>         rte_spinlock_t          slave_req_lock;
>
> +       int                     postcopy_ufd;
> +
>         /*
>          * Device id to identify a specific backend device.
>          * It's set to -1 for the default software implementation.
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index a9b429598..bdfe2cac0 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -24,9 +24,13 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <linux/userfaultfd.h>
> +#include <sys/ioctl.h>
>  #include <sys/mman.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
> +#include <sys/syscall.h>
>  #include <assert.h>
>  #ifdef RTE_LIBRTE_VHOST_NUMA
>  #include <numaif.h>
> @@ -69,6 +73,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
>         [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
>         [VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>         [VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
> +       [VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
>  };
>
>  static uint64_t
> @@ -1412,6 +1417,33 @@ vhost_user_iotlb_msg(struct virtio_net **pdev,
> struct VhostUserMsg *msg)
>         return 0;
>  }
>
> +static int
> +vhost_user_set_postcopy_advise(struct virtio_net *dev, struct
> VhostUserMsg *msg)
> +{
> +       struct uffdio_api api_struct;
> +
> +
> +       dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC |
> O_NONBLOCK);
> +
> +       if (dev->postcopy_ufd == -1) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available:
> %s\n",
> +                               strerror(errno));
> +               return -1;
> +       }
> +       api_struct.api = UFFD_API;
> +       api_struct.features = 0;
> +       if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
> +               RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure:
> %s\n",
> +                               strerror(errno));
> +               close(dev->postcopy_ufd);
> +               return -1;
> +       }
> +       msg->fds[0] = dev->postcopy_ufd;
> +       msg->fd_num = 1;
> +
> +       return 0;
> +}
> +
>  /* return bytes# of read on success or negative val on failure. */
>  static int
>  read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> @@ -1756,6 +1788,11 @@ vhost_user_msg_handler(int vid, int fd)
>                 ret = vhost_user_iotlb_msg(&dev, &msg);
>                 break;
>
> +       case VHOST_USER_POSTCOPY_ADVISE:
> +               vhost_user_set_postcopy_advise(dev, &msg);
> +               send_vhost_reply(fd, &msg);
> +               break;
> +
>

This should handle the case of vhost_user_set_postcopy_advise returning an
error. Otherwise the msg wrong contents are sent back.


>         default:
>                 ret = -1;
>                 break;
> diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
> index dd0262f8f..2030b40a5 100644
> --- a/lib/librte_vhost/vhost_user.h
> +++ b/lib/librte_vhost/vhost_user.h
> @@ -50,7 +50,8 @@ typedef enum VhostUserRequest {
>         VHOST_USER_IOTLB_MSG = 22,
>         VHOST_USER_CRYPTO_CREATE_SESS = 26,
>         VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> -       VHOST_USER_MAX = 28
> +       VHOST_USER_POSTCOPY_ADVISE = 28,
> +       VHOST_USER_MAX = 29
>  } VhostUserRequest;
>
>  typedef enum VhostUserSlaveRequest {
> --
> 2.17.1
>
>
  
Ilya Maximets Sept. 27, 2018, 8:28 a.m. UTC | #2
On 26.09.2018 10:26, Maxime Coquelin wrote:
> This patch opens a userfaultfd and sends it back to Qemu's
> VHOST_USER_POSTCOPY_ADVISE request.
> 
> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> ---
>  lib/librte_vhost/vhost.h      |  2 ++
>  lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>  lib/librte_vhost/vhost_user.h |  3 ++-
>  3 files changed, 41 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> index 25ffd7614..21722d8a8 100644
> --- a/lib/librte_vhost/vhost.h
> +++ b/lib/librte_vhost/vhost.h
> @@ -363,6 +363,8 @@ struct virtio_net {
>  	int			slave_req_fd;
>  	rte_spinlock_t		slave_req_lock;
>  
> +	int			postcopy_ufd;
> +
>  	/*
>  	 * Device id to identify a specific backend device.
>  	 * It's set to -1 for the default software implementation.
> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> index a9b429598..bdfe2cac0 100644
> --- a/lib/librte_vhost/vhost_user.c
> +++ b/lib/librte_vhost/vhost_user.c
> @@ -24,9 +24,13 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <linux/userfaultfd.h>

Maybe we need compile time check for this header existence?
Otherwise, this will bump minimal kernel version for default linux build
to something like 4.3.

> +#include <sys/ioctl.h>
>  #include <sys/mman.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
> +#include <sys/syscall.h>
>  #include <assert.h>
>  #ifdef RTE_LIBRTE_VHOST_NUMA
>  #include <numaif.h>
> @@ -69,6 +73,7 @@ static const char *vhost_message_str[VHOST_USER_MAX] = {
>  	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
>  	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
>  	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
> +	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
>  };
>  
>  static uint64_t
> @@ -1412,6 +1417,33 @@ vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
>  	return 0;
>  }
>  
> +static int
> +vhost_user_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
> +{
> +	struct uffdio_api api_struct;
> +
> +
> +	dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
> +
> +	if (dev->postcopy_ufd == -1) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
> +				strerror(errno));
> +		return -1;
> +	}
> +	api_struct.api = UFFD_API;
> +	api_struct.features = 0;
> +	if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
> +		RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
> +				strerror(errno));
> +		close(dev->postcopy_ufd);
> +		return -1;
> +	}
> +	msg->fds[0] = dev->postcopy_ufd;
> +	msg->fd_num = 1;
> +
> +	return 0;
> +}
> +
>  /* return bytes# of read on success or negative val on failure. */
>  static int
>  read_vhost_message(int sockfd, struct VhostUserMsg *msg)
> @@ -1756,6 +1788,11 @@ vhost_user_msg_handler(int vid, int fd)
>  		ret = vhost_user_iotlb_msg(&dev, &msg);
>  		break;
>  
> +	case VHOST_USER_POSTCOPY_ADVISE:
> +		vhost_user_set_postcopy_advise(dev, &msg);
> +		send_vhost_reply(fd, &msg);
> +		break;
> +
>  	default:
>  		ret = -1;
>  		break;
> diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
> index dd0262f8f..2030b40a5 100644
> --- a/lib/librte_vhost/vhost_user.h
> +++ b/lib/librte_vhost/vhost_user.h
> @@ -50,7 +50,8 @@ typedef enum VhostUserRequest {
>  	VHOST_USER_IOTLB_MSG = 22,
>  	VHOST_USER_CRYPTO_CREATE_SESS = 26,
>  	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
> -	VHOST_USER_MAX = 28
> +	VHOST_USER_POSTCOPY_ADVISE = 28,
> +	VHOST_USER_MAX = 29
>  } VhostUserRequest;
>  
>  typedef enum VhostUserSlaveRequest {
>
  
Maxime Coquelin Sept. 27, 2018, 9:35 a.m. UTC | #3
On 09/26/2018 05:22 PM, Alejandro Lucero wrote:
> 
> 
> On Wed, Sep 26, 2018 at 8:27 AM Maxime Coquelin 
> <maxime.coquelin@redhat.com <mailto:maxime.coquelin@redhat.com>> wrote:
> 
>     This patch opens a userfaultfd and sends it back to Qemu's
>     VHOST_USER_POSTCOPY_ADVISE request.
> 
>     Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com
>     <mailto:dgilbert@redhat.com>>
>     Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com
>     <mailto:maxime.coquelin@redhat.com>>
>     ---
>       lib/librte_vhost/vhost.h      |  2 ++
>       lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>       lib/librte_vhost/vhost_user.h |  3 ++-
>       3 files changed, 41 insertions(+), 1 deletion(-)
> 
>     diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>     index 25ffd7614..21722d8a8 100644
>     --- a/lib/librte_vhost/vhost.h
>     +++ b/lib/librte_vhost/vhost.h
>     @@ -363,6 +363,8 @@ struct virtio_net {
>              int                     slave_req_fd;
>              rte_spinlock_t          slave_req_lock;
> 
>     +       int                     postcopy_ufd;
>     +
>              /*
>               * Device id to identify a specific backend device.
>               * It's set to -1 for the default software implementation.
>     diff --git a/lib/librte_vhost/vhost_user.c
>     b/lib/librte_vhost/vhost_user.c
>     index a9b429598..bdfe2cac0 100644
>     --- a/lib/librte_vhost/vhost_user.c
>     +++ b/lib/librte_vhost/vhost_user.c
>     @@ -24,9 +24,13 @@
>       #include <stdlib.h>
>       #include <string.h>
>       #include <unistd.h>
>     +#include <fcntl.h>
>     +#include <linux/userfaultfd.h>
>     +#include <sys/ioctl.h>
>       #include <sys/mman.h>
>       #include <sys/types.h>
>       #include <sys/stat.h>
>     +#include <sys/syscall.h>
>       #include <assert.h>
>       #ifdef RTE_LIBRTE_VHOST_NUMA
>       #include <numaif.h>
>     @@ -69,6 +73,7 @@ static const char
>     *vhost_message_str[VHOST_USER_MAX] = {
>              [VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
>              [VHOST_USER_CRYPTO_CREATE_SESS] =
>     "VHOST_USER_CRYPTO_CREATE_SESS",
>              [VHOST_USER_CRYPTO_CLOSE_SESS] =
>     "VHOST_USER_CRYPTO_CLOSE_SESS",
>     +       [VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
>       };
> 
>       static uint64_t
>     @@ -1412,6 +1417,33 @@ vhost_user_iotlb_msg(struct virtio_net
>     **pdev, struct VhostUserMsg *msg)
>              return 0;
>       }
> 
>     +static int
>     +vhost_user_set_postcopy_advise(struct virtio_net *dev, struct
>     VhostUserMsg *msg)
>     +{
>     +       struct uffdio_api api_struct;
>     +
>     +
>     +       dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC |
>     O_NONBLOCK);
>     +
>     +       if (dev->postcopy_ufd == -1) {
>     +               RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not
>     available: %s\n",
>     +                               strerror(errno));
>     +               return -1;
>     +       }
>     +       api_struct.api = UFFD_API;
>     +       api_struct.features = 0;
>     +       if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
>     +               RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl
>     failure: %s\n",
>     +                               strerror(errno));
>     +               close(dev->postcopy_ufd);
>     +               return -1;
>     +       }
>     +       msg->fds[0] = dev->postcopy_ufd;
>     +       msg->fd_num = 1;
>     +
>     +       return 0;
>     +}
>     +
>       /* return bytes# of read on success or negative val on failure. */
>       static int
>       read_vhost_message(int sockfd, struct VhostUserMsg *msg)
>     @@ -1756,6 +1788,11 @@ vhost_user_msg_handler(int vid, int fd)
>                      ret = vhost_user_iotlb_msg(&dev, &msg);
>                      break;
> 
>     +       case VHOST_USER_POSTCOPY_ADVISE:
>     +               vhost_user_set_postcopy_advise(dev, &msg);
>     +               send_vhost_reply(fd, &msg);
>     +               break;
>     +
> 
> 
> This should handle the case of vhost_user_set_postcopy_advise returning 
> an error. Otherwise the msg wrong contents are sent back.

Right, I'll fix it when rebasing.

> 
>              default:
>                      ret = -1;
>                      break;
>     diff --git a/lib/librte_vhost/vhost_user.h
>     b/lib/librte_vhost/vhost_user.h
>     index dd0262f8f..2030b40a5 100644
>     --- a/lib/librte_vhost/vhost_user.h
>     +++ b/lib/librte_vhost/vhost_user.h
>     @@ -50,7 +50,8 @@ typedef enum VhostUserRequest {
>              VHOST_USER_IOTLB_MSG = 22,
>              VHOST_USER_CRYPTO_CREATE_SESS = 26,
>              VHOST_USER_CRYPTO_CLOSE_SESS = 27,
>     -       VHOST_USER_MAX = 28
>     +       VHOST_USER_POSTCOPY_ADVISE = 28,
>     +       VHOST_USER_MAX = 29
>       } VhostUserRequest;
> 
>       typedef enum VhostUserSlaveRequest {
>     -- 
>     2.17.1
> 

Thanks,
Maxime
  
Ilya Maximets Sept. 28, 2018, 10:40 a.m. UTC | #4
On 27.09.2018 11:28, Ilya Maximets wrote:
> On 26.09.2018 10:26, Maxime Coquelin wrote:
>> This patch opens a userfaultfd and sends it back to Qemu's
>> VHOST_USER_POSTCOPY_ADVISE request.
>>
>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>> ---
>>  lib/librte_vhost/vhost.h      |  2 ++
>>  lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>>  lib/librte_vhost/vhost_user.h |  3 ++-
>>  3 files changed, 41 insertions(+), 1 deletion(-)
>>
>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>> index 25ffd7614..21722d8a8 100644
>> --- a/lib/librte_vhost/vhost.h
>> +++ b/lib/librte_vhost/vhost.h
>> @@ -363,6 +363,8 @@ struct virtio_net {
>>  	int			slave_req_fd;
>>  	rte_spinlock_t		slave_req_lock;
>>  
>> +	int			postcopy_ufd;
>> +
>>  	/*
>>  	 * Device id to identify a specific backend device.
>>  	 * It's set to -1 for the default software implementation.
>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>> index a9b429598..bdfe2cac0 100644
>> --- a/lib/librte_vhost/vhost_user.c
>> +++ b/lib/librte_vhost/vhost_user.c
>> @@ -24,9 +24,13 @@
>>  #include <stdlib.h>
>>  #include <string.h>
>>  #include <unistd.h>
>> +#include <fcntl.h>
>> +#include <linux/userfaultfd.h>
> 
> Maybe we need compile time check for this header existence?
> Otherwise, this will bump minimal kernel version for default linux build
> to something like 4.3.

We'll need a config option here (disabled by default) and guard all
the postcopy related code.
Meson build will be able to detect the header file and enable
the config if possible. Like this:

lib/librte_vhost/meson.build:
if cc.has_header('linux/userfaultfd.h')
       dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
endif

Best regards, Ilya Maximets.
  
Bruce Richardson Sept. 28, 2018, 12:13 p.m. UTC | #5
On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
> On 27.09.2018 11:28, Ilya Maximets wrote:
> > On 26.09.2018 10:26, Maxime Coquelin wrote:
> >> This patch opens a userfaultfd and sends it back to Qemu's
> >> VHOST_USER_POSTCOPY_ADVISE request.
> >>
> >> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >> ---
> >>  lib/librte_vhost/vhost.h      |  2 ++
> >>  lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
> >>  lib/librte_vhost/vhost_user.h |  3 ++-
> >>  3 files changed, 41 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> >> index 25ffd7614..21722d8a8 100644
> >> --- a/lib/librte_vhost/vhost.h
> >> +++ b/lib/librte_vhost/vhost.h
> >> @@ -363,6 +363,8 @@ struct virtio_net {
> >>  	int			slave_req_fd;
> >>  	rte_spinlock_t		slave_req_lock;
> >>  
> >> +	int			postcopy_ufd;
> >> +
> >>  	/*
> >>  	 * Device id to identify a specific backend device.
> >>  	 * It's set to -1 for the default software implementation.
> >> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> >> index a9b429598..bdfe2cac0 100644
> >> --- a/lib/librte_vhost/vhost_user.c
> >> +++ b/lib/librte_vhost/vhost_user.c
> >> @@ -24,9 +24,13 @@
> >>  #include <stdlib.h>
> >>  #include <string.h>
> >>  #include <unistd.h>
> >> +#include <fcntl.h>
> >> +#include <linux/userfaultfd.h>
> > 
> > Maybe we need compile time check for this header existence?
> > Otherwise, this will bump minimal kernel version for default linux build
> > to something like 4.3.
> 
> We'll need a config option here (disabled by default) and guard all
> the postcopy related code.
> Meson build will be able to detect the header file and enable
> the config if possible. Like this:
> 
> lib/librte_vhost/meson.build:
> if cc.has_header('linux/userfaultfd.h')
>        dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)

Are you sure you want 'set10' rather than 'set'. Set is probably easier
because it ensures no define on false, while set10 has a define of 0. This
has caught me out before.

FYI, you can also avoid the if by putting the condition into the define:

	dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))

/Bruce
  
Ilya Maximets Sept. 28, 2018, 1:17 p.m. UTC | #6
On 28.09.2018 15:13, Bruce Richardson wrote:
> On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
>> On 27.09.2018 11:28, Ilya Maximets wrote:
>>> On 26.09.2018 10:26, Maxime Coquelin wrote:
>>>> This patch opens a userfaultfd and sends it back to Qemu's
>>>> VHOST_USER_POSTCOPY_ADVISE request.
>>>>
>>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>> ---
>>>>  lib/librte_vhost/vhost.h      |  2 ++
>>>>  lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>>>>  lib/librte_vhost/vhost_user.h |  3 ++-
>>>>  3 files changed, 41 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>>> index 25ffd7614..21722d8a8 100644
>>>> --- a/lib/librte_vhost/vhost.h
>>>> +++ b/lib/librte_vhost/vhost.h
>>>> @@ -363,6 +363,8 @@ struct virtio_net {
>>>>  	int			slave_req_fd;
>>>>  	rte_spinlock_t		slave_req_lock;
>>>>  
>>>> +	int			postcopy_ufd;
>>>> +
>>>>  	/*
>>>>  	 * Device id to identify a specific backend device.
>>>>  	 * It's set to -1 for the default software implementation.
>>>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>>>> index a9b429598..bdfe2cac0 100644
>>>> --- a/lib/librte_vhost/vhost_user.c
>>>> +++ b/lib/librte_vhost/vhost_user.c
>>>> @@ -24,9 +24,13 @@
>>>>  #include <stdlib.h>
>>>>  #include <string.h>
>>>>  #include <unistd.h>
>>>> +#include <fcntl.h>
>>>> +#include <linux/userfaultfd.h>
>>>
>>> Maybe we need compile time check for this header existence?
>>> Otherwise, this will bump minimal kernel version for default linux build
>>> to something like 4.3.
>>
>> We'll need a config option here (disabled by default) and guard all
>> the postcopy related code.
>> Meson build will be able to detect the header file and enable
>> the config if possible. Like this:
>>
>> lib/librte_vhost/meson.build:
>> if cc.has_header('linux/userfaultfd.h')
>>        dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
> 
> Are you sure you want 'set10' rather than 'set'. Set is probably easier
> because it ensures no define on false, while set10 has a define of 0. This
> has caught me out before.
> 
> FYI, you can also avoid the if by putting the condition into the define:
> 
> 	dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))

Sure, this variant looks better. Thanks for suggestions.
I just copied my version from the similar code for 'RTE_HAS_LIBNUMA'.

Best regards, Ilya Maximets.
  
Bruce Richardson Sept. 28, 2018, 1:24 p.m. UTC | #7
On Fri, Sep 28, 2018 at 04:17:34PM +0300, Ilya Maximets wrote:
> On 28.09.2018 15:13, Bruce Richardson wrote:
> > On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
> >> On 27.09.2018 11:28, Ilya Maximets wrote:
> >>> On 26.09.2018 10:26, Maxime Coquelin wrote:
> >>>> This patch opens a userfaultfd and sends it back to Qemu's
> >>>> VHOST_USER_POSTCOPY_ADVISE request.
> >>>>
> >>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
> >>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
> >>>> ---
> >>>>  lib/librte_vhost/vhost.h      |  2 ++
> >>>>  lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
> >>>>  lib/librte_vhost/vhost_user.h |  3 ++-
> >>>>  3 files changed, 41 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
> >>>> index 25ffd7614..21722d8a8 100644
> >>>> --- a/lib/librte_vhost/vhost.h
> >>>> +++ b/lib/librte_vhost/vhost.h
> >>>> @@ -363,6 +363,8 @@ struct virtio_net {
> >>>>  	int			slave_req_fd;
> >>>>  	rte_spinlock_t		slave_req_lock;
> >>>>  
> >>>> +	int			postcopy_ufd;
> >>>> +
> >>>>  	/*
> >>>>  	 * Device id to identify a specific backend device.
> >>>>  	 * It's set to -1 for the default software implementation.
> >>>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
> >>>> index a9b429598..bdfe2cac0 100644
> >>>> --- a/lib/librte_vhost/vhost_user.c
> >>>> +++ b/lib/librte_vhost/vhost_user.c
> >>>> @@ -24,9 +24,13 @@
> >>>>  #include <stdlib.h>
> >>>>  #include <string.h>
> >>>>  #include <unistd.h>
> >>>> +#include <fcntl.h>
> >>>> +#include <linux/userfaultfd.h>
> >>>
> >>> Maybe we need compile time check for this header existence?
> >>> Otherwise, this will bump minimal kernel version for default linux build
> >>> to something like 4.3.
> >>
> >> We'll need a config option here (disabled by default) and guard all
> >> the postcopy related code.
> >> Meson build will be able to detect the header file and enable
> >> the config if possible. Like this:
> >>
> >> lib/librte_vhost/meson.build:
> >> if cc.has_header('linux/userfaultfd.h')
> >>        dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
> > 
> > Are you sure you want 'set10' rather than 'set'. Set is probably easier
> > because it ensures no define on false, while set10 has a define of 0. This
> > has caught me out before.
> > 
> > FYI, you can also avoid the if by putting the condition into the define:
> > 
> > 	dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))
> 
> Sure, this variant looks better. Thanks for suggestions.
> I just copied my version from the similar code for 'RTE_HAS_LIBNUMA'.
> 

Yes, looking at that code, it could do with a clean-up to shorten it too.
[It's true that nothing embarasses a programmer more than their own code 6
months layer :-)]

/Bruce
  
Maxime Coquelin Sept. 28, 2018, 1:49 p.m. UTC | #8
On 09/28/2018 03:24 PM, Bruce Richardson wrote:
> On Fri, Sep 28, 2018 at 04:17:34PM +0300, Ilya Maximets wrote:
>> On 28.09.2018 15:13, Bruce Richardson wrote:
>>> On Fri, Sep 28, 2018 at 01:40:25PM +0300, Ilya Maximets wrote:
>>>> On 27.09.2018 11:28, Ilya Maximets wrote:
>>>>> On 26.09.2018 10:26, Maxime Coquelin wrote:
>>>>>> This patch opens a userfaultfd and sends it back to Qemu's
>>>>>> VHOST_USER_POSTCOPY_ADVISE request.
>>>>>>
>>>>>> Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
>>>>>> Signed-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>
>>>>>> ---
>>>>>>   lib/librte_vhost/vhost.h      |  2 ++
>>>>>>   lib/librte_vhost/vhost_user.c | 37 +++++++++++++++++++++++++++++++++++
>>>>>>   lib/librte_vhost/vhost_user.h |  3 ++-
>>>>>>   3 files changed, 41 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
>>>>>> index 25ffd7614..21722d8a8 100644
>>>>>> --- a/lib/librte_vhost/vhost.h
>>>>>> +++ b/lib/librte_vhost/vhost.h
>>>>>> @@ -363,6 +363,8 @@ struct virtio_net {
>>>>>>   	int			slave_req_fd;
>>>>>>   	rte_spinlock_t		slave_req_lock;
>>>>>>   
>>>>>> +	int			postcopy_ufd;
>>>>>> +
>>>>>>   	/*
>>>>>>   	 * Device id to identify a specific backend device.
>>>>>>   	 * It's set to -1 for the default software implementation.
>>>>>> diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
>>>>>> index a9b429598..bdfe2cac0 100644
>>>>>> --- a/lib/librte_vhost/vhost_user.c
>>>>>> +++ b/lib/librte_vhost/vhost_user.c
>>>>>> @@ -24,9 +24,13 @@
>>>>>>   #include <stdlib.h>
>>>>>>   #include <string.h>
>>>>>>   #include <unistd.h>
>>>>>> +#include <fcntl.h>
>>>>>> +#include <linux/userfaultfd.h>
>>>>>
>>>>> Maybe we need compile time check for this header existence?
>>>>> Otherwise, this will bump minimal kernel version for default linux build
>>>>> to something like 4.3.
>>>>
>>>> We'll need a config option here (disabled by default) and guard all
>>>> the postcopy related code.
>>>> Meson build will be able to detect the header file and enable
>>>> the config if possible. Like this:
>>>>
>>>> lib/librte_vhost/meson.build:
>>>> if cc.has_header('linux/userfaultfd.h')
>>>>         dpdk_conf.set10('RTE_LIBRTE_VHOST_POSTCOPY', true)
>>>
>>> Are you sure you want 'set10' rather than 'set'. Set is probably easier
>>> because it ensures no define on false, while set10 has a define of 0. This
>>> has caught me out before.
>>>
>>> FYI, you can also avoid the if by putting the condition into the define:
>>>
>>> 	dpdk_conf.set('RTE_LIBRTE_VHOST_POSTCOPY', cc.has_header('...'))
>>
>> Sure, this variant looks better. Thanks for suggestions.
>> I just copied my version from the similar code for 'RTE_HAS_LIBNUMA'.
>>

Thanks Ilya & Bruce for the hint!
I'll do this in next version.

> Yes, looking at that code, it could do with a clean-up to shorten it too.
> [It's true that nothing embarasses a programmer more than their own code 6
> months layer :-)]

:)

Maxime
> /Bruce
>
  

Patch

diff --git a/lib/librte_vhost/vhost.h b/lib/librte_vhost/vhost.h
index 25ffd7614..21722d8a8 100644
--- a/lib/librte_vhost/vhost.h
+++ b/lib/librte_vhost/vhost.h
@@ -363,6 +363,8 @@  struct virtio_net {
 	int			slave_req_fd;
 	rte_spinlock_t		slave_req_lock;
 
+	int			postcopy_ufd;
+
 	/*
 	 * Device id to identify a specific backend device.
 	 * It's set to -1 for the default software implementation.
diff --git a/lib/librte_vhost/vhost_user.c b/lib/librte_vhost/vhost_user.c
index a9b429598..bdfe2cac0 100644
--- a/lib/librte_vhost/vhost_user.c
+++ b/lib/librte_vhost/vhost_user.c
@@ -24,9 +24,13 @@ 
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <fcntl.h>
+#include <linux/userfaultfd.h>
+#include <sys/ioctl.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/syscall.h>
 #include <assert.h>
 #ifdef RTE_LIBRTE_VHOST_NUMA
 #include <numaif.h>
@@ -69,6 +73,7 @@  static const char *vhost_message_str[VHOST_USER_MAX] = {
 	[VHOST_USER_IOTLB_MSG]  = "VHOST_USER_IOTLB_MSG",
 	[VHOST_USER_CRYPTO_CREATE_SESS] = "VHOST_USER_CRYPTO_CREATE_SESS",
 	[VHOST_USER_CRYPTO_CLOSE_SESS] = "VHOST_USER_CRYPTO_CLOSE_SESS",
+	[VHOST_USER_POSTCOPY_ADVISE]  = "VHOST_USER_POSTCOPY_ADVISE",
 };
 
 static uint64_t
@@ -1412,6 +1417,33 @@  vhost_user_iotlb_msg(struct virtio_net **pdev, struct VhostUserMsg *msg)
 	return 0;
 }
 
+static int
+vhost_user_set_postcopy_advise(struct virtio_net *dev, struct VhostUserMsg *msg)
+{
+	struct uffdio_api api_struct;
+
+
+	dev->postcopy_ufd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+
+	if (dev->postcopy_ufd == -1) {
+		RTE_LOG(ERR, VHOST_CONFIG, "Userfaultfd not available: %s\n",
+				strerror(errno));
+		return -1;
+	}
+	api_struct.api = UFFD_API;
+	api_struct.features = 0;
+	if (ioctl(dev->postcopy_ufd, UFFDIO_API, &api_struct)) {
+		RTE_LOG(ERR, VHOST_CONFIG, "UFFDIO_API ioctl failure: %s\n",
+				strerror(errno));
+		close(dev->postcopy_ufd);
+		return -1;
+	}
+	msg->fds[0] = dev->postcopy_ufd;
+	msg->fd_num = 1;
+
+	return 0;
+}
+
 /* return bytes# of read on success or negative val on failure. */
 static int
 read_vhost_message(int sockfd, struct VhostUserMsg *msg)
@@ -1756,6 +1788,11 @@  vhost_user_msg_handler(int vid, int fd)
 		ret = vhost_user_iotlb_msg(&dev, &msg);
 		break;
 
+	case VHOST_USER_POSTCOPY_ADVISE:
+		vhost_user_set_postcopy_advise(dev, &msg);
+		send_vhost_reply(fd, &msg);
+		break;
+
 	default:
 		ret = -1;
 		break;
diff --git a/lib/librte_vhost/vhost_user.h b/lib/librte_vhost/vhost_user.h
index dd0262f8f..2030b40a5 100644
--- a/lib/librte_vhost/vhost_user.h
+++ b/lib/librte_vhost/vhost_user.h
@@ -50,7 +50,8 @@  typedef enum VhostUserRequest {
 	VHOST_USER_IOTLB_MSG = 22,
 	VHOST_USER_CRYPTO_CREATE_SESS = 26,
 	VHOST_USER_CRYPTO_CLOSE_SESS = 27,
-	VHOST_USER_MAX = 28
+	VHOST_USER_POSTCOPY_ADVISE = 28,
+	VHOST_USER_MAX = 29
 } VhostUserRequest;
 
 typedef enum VhostUserSlaveRequest {