[dpdk-dev,v2,1/6] vhost: handle VHOST_USER_SET_LOG_BASE request
Commit Message
VHOST_USER_SET_LOG_BASE request is used to tell the backend (dpdk
vhost-user) where we should log dirty pages, and how big the log
buffer is.
This request introduces a new payload:
typedef struct VhostUserLog {
uint64_t mmap_size;
uint64_t mmap_offset;
} VhostUserLog;
Also, a fd is delivered from QEMU by ancillary data.
With those info given, an area of memory is mmaped, assigned
to dev->log_base, for logging dirty pages.
Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
Signed-off-by: Victor Kaplansky <victork@redhat.com
---
v2: workaround mmap issue when offset is not zero
---
lib/librte_vhost/rte_virtio_net.h | 4 ++-
lib/librte_vhost/vhost_user/vhost-net-user.c | 7 ++--
lib/librte_vhost/vhost_user/vhost-net-user.h | 6 ++++
lib/librte_vhost/vhost_user/virtio-net-user.c | 48 +++++++++++++++++++++++++++
lib/librte_vhost/vhost_user/virtio-net-user.h | 1 +
5 files changed, 63 insertions(+), 3 deletions(-)
Comments
On 12/17/2015 11:11 AM, Yuanhan Liu wrote:
> VHOST_USER_SET_LOG_BASE request is used to tell the backend (dpdk
> vhost-user) where we should log dirty pages, and how big the log
> buffer is.
>
> This request introduces a new payload:
>
> typedef struct VhostUserLog {
> uint64_t mmap_size;
> uint64_t mmap_offset;
> } VhostUserLog;
>
> Also, a fd is delivered from QEMU by ancillary data.
>
> With those info given, an area of memory is mmaped, assigned
> to dev->log_base, for logging dirty pages.
>
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> Signed-off-by: Victor Kaplansky <victork@redhat.com
> ---
>
> v2: workaround mmap issue when offset is not zero
> ---
> lib/librte_vhost/rte_virtio_net.h | 4 ++-
> lib/librte_vhost/vhost_user/vhost-net-user.c | 7 ++--
> lib/librte_vhost/vhost_user/vhost-net-user.h | 6 ++++
> lib/librte_vhost/vhost_user/virtio-net-user.c | 48 +++++++++++++++++++++++++++
> lib/librte_vhost/vhost_user/virtio-net-user.h | 1 +
> 5 files changed, 63 insertions(+), 3 deletions(-)
>
> diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h
> index 10dcb90..8acee02 100644
> --- a/lib/librte_vhost/rte_virtio_net.h
> +++ b/lib/librte_vhost/rte_virtio_net.h
> @@ -129,7 +129,9 @@ struct virtio_net {
> char ifname[IF_NAME_SZ]; /**< Name of the tap device or socket path. */
> uint32_t virt_qp_nb; /**< number of queue pair we have allocated */
> void *priv; /**< private context */
> - uint64_t reserved[64]; /**< Reserve some spaces for future extension. */
> + uint64_t log_size; /**< Size of log area */
> + uint64_t log_base; /**< Where dirty pages are logged */
> + uint64_t reserved[62]; /**< Reserve some spaces for future extension. */
> struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; /**< Contains all virtqueue information. */
> } __rte_cache_aligned;
>
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c
> index 8b7a448..32ad6f6 100644
> --- a/lib/librte_vhost/vhost_user/vhost-net-user.c
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c
> @@ -388,9 +388,12 @@ vserver_message_handler(int connfd, void *dat, int *remove)
> break;
>
> case VHOST_USER_SET_LOG_BASE:
> - RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> - break;
> + user_set_log_base(ctx, &msg);
>
> + /* it needs a reply */
> + msg.size = sizeof(msg.payload.u64);
> + send_vhost_message(connfd, &msg);
> + break;
> case VHOST_USER_SET_LOG_FD:
> close(msg.fds[0]);
> RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
> diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h
> index 38637cc..6d252a3 100644
> --- a/lib/librte_vhost/vhost_user/vhost-net-user.h
> +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h
> @@ -83,6 +83,11 @@ typedef struct VhostUserMemory {
> VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
> } VhostUserMemory;
>
> +typedef struct VhostUserLog {
> + uint64_t mmap_size;
> + uint64_t mmap_offset;
> +} VhostUserLog;
> +
> typedef struct VhostUserMsg {
> VhostUserRequest request;
>
> @@ -97,6 +102,7 @@ typedef struct VhostUserMsg {
> struct vhost_vring_state state;
> struct vhost_vring_addr addr;
> VhostUserMemory memory;
> + VhostUserLog log;
> } payload;
> int fds[VHOST_MEMORY_MAX_NREGIONS];
> } __attribute((packed)) VhostUserMsg;
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c
> index 2934d1c..b77c9b3 100644
> --- a/lib/librte_vhost/vhost_user/virtio-net-user.c
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c
> @@ -365,3 +365,51 @@ user_set_protocol_features(struct vhost_device_ctx ctx,
>
> dev->protocol_features = protocol_features;
> }
> +
> +int
> +user_set_log_base(struct vhost_device_ctx ctx,
> + struct VhostUserMsg *msg)
> +{
> + struct virtio_net *dev;
> + int fd = msg->fds[0];
> + uint64_t size, off;
> + void *addr;
> +
> + dev = get_device(ctx);
> + if (!dev)
> + return -1;
> +
> + if (fd < 0) {
> + RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
> + return -1;
> + }
> +
> + if (msg->size != sizeof(VhostUserLog)) {
> + RTE_LOG(ERR, VHOST_CONFIG,
> + "invalid log base msg size: %"PRId32" != %d\n",
> + msg->size, (int)sizeof(VhostUserLog));
> + return -1;
> + }
> +
> + size = msg->payload.log.mmap_size;
> + off = msg->payload.log.mmap_offset;
> + RTE_LOG(INFO, VHOST_CONFIG,
> + "log mmap size: %"PRId64", offset: %"PRId64"\n",
> + size, off);
> +
> + /*
> + * mmap from 0 to workaround a hugepage mmap bug: mmap will be
> + * failed when offset is not page size aligned.
> + */
s /will be failed/will fail/
mmap will fail when offset is not zero.
Also we only know this workaround is for hugetlbfs. Not sure of other
tmpfs, so mention hugetlbfs here.
> + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> + if (addr == MAP_FAILED) {
> + RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
> + return -1;
> + }
> +
> + /* TODO: unmap on stop */
> + dev->log_base = (uint64_t)(uintptr_t)addr + off;
(uint64_t)(uintptr_t)RTE_PTR_ADD(addr, off)?
> + dev->log_size = size;
> +
> + return 0;
> +}
> diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> index b82108d..013cf38 100644
> --- a/lib/librte_vhost/vhost_user/virtio-net-user.h
> +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> @@ -49,6 +49,7 @@ void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
>
> void user_set_protocol_features(struct vhost_device_ctx ctx,
> uint64_t protocol_features);
> +int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *);
>
> int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
>
On Mon, Dec 21, 2015 at 03:32:53PM +0000, Xie, Huawei wrote:
> > +
> > + /*
> > + * mmap from 0 to workaround a hugepage mmap bug: mmap will be
> > + * failed when offset is not page size aligned.
> > + */
> s /will be failed/will fail/
> mmap will fail when offset is not zero.
> Also we only know this workaround is for hugetlbfs. Not sure of other
> tmpfs, so mention hugetlbfs here.
I have already mentioned "to workaround a __hugepage__ mmap bug"; it's
not enough?
> > + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
> > + if (addr == MAP_FAILED) {
> > + RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
> > + return -1;
> > + }
> > +
> > + /* TODO: unmap on stop */
> > + dev->log_base = (uint64_t)(uintptr_t)addr + off;
> (uint64_t)(uintptr_t)RTE_PTR_ADD(addr, off)?
No, addr is of (void *) type, we should cast it to uint64_t type first,
before adding it with "off".
--yliu
> > + dev->log_size = size;
> > +
> > + return 0;
> > +}
> > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > index b82108d..013cf38 100644
> > --- a/lib/librte_vhost/vhost_user/virtio-net-user.h
> > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > @@ -49,6 +49,7 @@ void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
> >
> > void user_set_protocol_features(struct vhost_device_ctx ctx,
> > uint64_t protocol_features);
> > +int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *);
> >
> > int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);
> >
>
> -----Original Message-----
> From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> Sent: Tuesday, December 22, 2015 10:26 AM
> To: Xie, Huawei
> Cc: dev@dpdk.org; Michael S. Tsirkin; Victor Kaplansky; Iremonger,
> Bernard; Pavel Fedin; Peter Xu
> Subject: Re: [PATCH v2 1/6] vhost: handle VHOST_USER_SET_LOG_BASE
> request
>
> On Mon, Dec 21, 2015 at 03:32:53PM +0000, Xie, Huawei wrote:
> > > +
> > > + /*
> > > + * mmap from 0 to workaround a hugepage mmap bug: mmap will be
> > > + * failed when offset is not page size aligned.
> > > + */
> > s /will be failed/will fail/
> > mmap will fail when offset is not zero.
I mistake for 4KB page size. Please check if huge page size align is enough.
> > Also we only know this workaround is for hugetlbfs. Not sure of
> other
> > tmpfs, so mention hugetlbfs here.
>
> I have already mentioned "to workaround a __hugepage__ mmap bug"; it's
> not enough?
Yes.
>
> > > + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> 0);
> > > + if (addr == MAP_FAILED) {
> > > + RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
> > > + return -1;
> > > + }
> > > +
> > > + /* TODO: unmap on stop */
> > > + dev->log_base = (uint64_t)(uintptr_t)addr + off;
> > (uint64_t)(uintptr_t)RTE_PTR_ADD(addr, off)?
>
> No, addr is of (void *) type, we should cast it to uint64_t type first,
> before adding it with "off".
>
> --yliu
RTE_PTR_ADD is the DPDK interface for pointer arithmetic operation.
>
> > > + dev->log_size = size;
> > > +
> > > + return 0;
> > > +}
> > > diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h
> b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > > index b82108d..013cf38 100644
> > > --- a/lib/librte_vhost/vhost_user/virtio-net-user.h
> > > +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h
> > > @@ -49,6 +49,7 @@ void user_set_vring_kick(struct vhost_device_ctx,
> struct VhostUserMsg *);
> > >
> > > void user_set_protocol_features(struct vhost_device_ctx ctx,
> > > uint64_t protocol_features);
> > > +int user_set_log_base(struct vhost_device_ctx ctx, struct
> VhostUserMsg *);
> > >
> > > int user_get_vring_base(struct vhost_device_ctx, struct
> vhost_vring_state *);
> > >
> >
On Tue, Dec 22, 2015 at 02:41:43AM +0000, Xie, Huawei wrote:
>
>
> > -----Original Message-----
> > From: Yuanhan Liu [mailto:yuanhan.liu@linux.intel.com]
> > Sent: Tuesday, December 22, 2015 10:26 AM
> > To: Xie, Huawei
> > Cc: dev@dpdk.org; Michael S. Tsirkin; Victor Kaplansky; Iremonger,
> > Bernard; Pavel Fedin; Peter Xu
> > Subject: Re: [PATCH v2 1/6] vhost: handle VHOST_USER_SET_LOG_BASE
> > request
> >
> > On Mon, Dec 21, 2015 at 03:32:53PM +0000, Xie, Huawei wrote:
> > > > +
> > > > + /*
> > > > + * mmap from 0 to workaround a hugepage mmap bug: mmap will be
> > > > + * failed when offset is not page size aligned.
> > > > + */
> > > s /will be failed/will fail/
> > > mmap will fail when offset is not zero.
> I mistake for 4KB page size.
Didn't follow you.
> Please check if huge page size align is enough.
It should be. However, I don't think we need bother to do that:
first of all, it happened on few specific old kernels. And, "off"
here is kind of guaranteed to be 0. Last, even it's not, mmaping
it from 0 will resolve that.
> > > Also we only know this workaround is for hugetlbfs. Not sure of
> > other
> > > tmpfs, so mention hugetlbfs here.
> >
> > I have already mentioned "to workaround a __hugepage__ mmap bug"; it's
> > not enough?
> Yes.
> >
> > > > + addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
> > 0);
> > > > + if (addr == MAP_FAILED) {
> > > > + RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
> > > > + return -1;
> > > > + }
> > > > +
> > > > + /* TODO: unmap on stop */
> > > > + dev->log_base = (uint64_t)(uintptr_t)addr + off;
> > > (uint64_t)(uintptr_t)RTE_PTR_ADD(addr, off)?
> >
> > No, addr is of (void *) type, we should cast it to uint64_t type first,
> > before adding it with "off".
> >
> > --yliu
> RTE_PTR_ADD is the DPDK interface for pointer arithmetic operation.
log_base is with "uint64_t" type, RTE_PTR_ADD() returns (void*), so it
won't work here.
--yliu
@@ -129,7 +129,9 @@ struct virtio_net {
char ifname[IF_NAME_SZ]; /**< Name of the tap device or socket path. */
uint32_t virt_qp_nb; /**< number of queue pair we have allocated */
void *priv; /**< private context */
- uint64_t reserved[64]; /**< Reserve some spaces for future extension. */
+ uint64_t log_size; /**< Size of log area */
+ uint64_t log_base; /**< Where dirty pages are logged */
+ uint64_t reserved[62]; /**< Reserve some spaces for future extension. */
struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; /**< Contains all virtqueue information. */
} __rte_cache_aligned;
@@ -388,9 +388,12 @@ vserver_message_handler(int connfd, void *dat, int *remove)
break;
case VHOST_USER_SET_LOG_BASE:
- RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
- break;
+ user_set_log_base(ctx, &msg);
+ /* it needs a reply */
+ msg.size = sizeof(msg.payload.u64);
+ send_vhost_message(connfd, &msg);
+ break;
case VHOST_USER_SET_LOG_FD:
close(msg.fds[0]);
RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n");
@@ -83,6 +83,11 @@ typedef struct VhostUserMemory {
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
+typedef struct VhostUserLog {
+ uint64_t mmap_size;
+ uint64_t mmap_offset;
+} VhostUserLog;
+
typedef struct VhostUserMsg {
VhostUserRequest request;
@@ -97,6 +102,7 @@ typedef struct VhostUserMsg {
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
+ VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
} __attribute((packed)) VhostUserMsg;
@@ -365,3 +365,51 @@ user_set_protocol_features(struct vhost_device_ctx ctx,
dev->protocol_features = protocol_features;
}
+
+int
+user_set_log_base(struct vhost_device_ctx ctx,
+ struct VhostUserMsg *msg)
+{
+ struct virtio_net *dev;
+ int fd = msg->fds[0];
+ uint64_t size, off;
+ void *addr;
+
+ dev = get_device(ctx);
+ if (!dev)
+ return -1;
+
+ if (fd < 0) {
+ RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd);
+ return -1;
+ }
+
+ if (msg->size != sizeof(VhostUserLog)) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "invalid log base msg size: %"PRId32" != %d\n",
+ msg->size, (int)sizeof(VhostUserLog));
+ return -1;
+ }
+
+ size = msg->payload.log.mmap_size;
+ off = msg->payload.log.mmap_offset;
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "log mmap size: %"PRId64", offset: %"PRId64"\n",
+ size, off);
+
+ /*
+ * mmap from 0 to workaround a hugepage mmap bug: mmap will be
+ * failed when offset is not page size aligned.
+ */
+ addr = mmap(0, size + off, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n");
+ return -1;
+ }
+
+ /* TODO: unmap on stop */
+ dev->log_base = (uint64_t)(uintptr_t)addr + off;
+ dev->log_size = size;
+
+ return 0;
+}
@@ -49,6 +49,7 @@ void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *);
void user_set_protocol_features(struct vhost_device_ctx ctx,
uint64_t protocol_features);
+int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *);
int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *);