[dpdk-dev,v4,1/1] net/virtio-user: add support for server mode
Checks
Commit Message
virtio-user adds support for server mode in this patch.
Client mode vhost-user startup firstly, server mode virtio-user startups
and creates the socket file to exchange vhost messages.
If the connection is broken, client mode vhost-user can support to
reconnect virtio-user.
Server mode virtio-user supports many times' vhost-user reconnections with
the same parameter configurations.
Release note is updated in the patch.
Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
---
doc/guides/rel_notes/release_18_05.rst | 6 ++
drivers/net/virtio/virtio_user/vhost_user.c | 64 ++++++++++++--
drivers/net/virtio/virtio_user/virtio_user_dev.c | 45 +++++++---
drivers/net/virtio/virtio_user/virtio_user_dev.h | 4 +
drivers/net/virtio/virtio_user_ethdev.c | 103 +++++++++++++++++++++--
5 files changed, 194 insertions(+), 28 deletions(-)
Comments
On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> virtio-user adds support for server mode in this patch.
>
> Client mode vhost-user startup firstly, server mode virtio-user startups
> and creates the socket file to exchange vhost messages.
>
> If the connection is broken, client mode vhost-user can support to
> reconnect virtio-user.
>
> Server mode virtio-user supports many times' vhost-user reconnections with
> the same parameter configurations.
>
> Release note is updated in the patch.
With current implementation, we have to enable LSC; or no chance to
accept the coming connection. We shall point this out.
And if possible, split this patch into multiple patches.
>
> Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> ---
> doc/guides/rel_notes/release_18_05.rst | 6 ++
> drivers/net/virtio/virtio_user/vhost_user.c | 64 ++++++++++++--
> drivers/net/virtio/virtio_user/virtio_user_dev.c | 45 +++++++---
> drivers/net/virtio/virtio_user/virtio_user_dev.h | 4 +
> drivers/net/virtio/virtio_user_ethdev.c | 103 +++++++++++++++++++++--
> 5 files changed, 194 insertions(+), 28 deletions(-)
>
> diff --git a/doc/guides/rel_notes/release_18_05.rst b/doc/guides/rel_notes/release_18_05.rst
> index 9cc77f893..f8897b2e9 100644
> --- a/doc/guides/rel_notes/release_18_05.rst
> +++ b/doc/guides/rel_notes/release_18_05.rst
> @@ -58,6 +58,12 @@ New Features
> * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
> * Added support for DROP action in flow API.
>
> +* **Added support for virtio-user server mode.**
> + In a container environment if the vhost-user backend restarts, there's no way
> + for it to reconnect to virtio-user. To address this, support for server mode
> + is added. In this mode the socket file is created by virtio-user, which the
> + backend connects to. This means that if the backend restarts, it can reconnect
> + to virtio-user and continue communications.
>
> API Changes
> -----------
> diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c
> index 91c6449bb..1b3401d4f 100644
> --- a/drivers/net/virtio/virtio_user/vhost_user.c
> +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> @@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
> return 0;
> }
>
> +static void
> +virtio_user_set_block(int fd, bool enabled)
This is only used once, no need to abstract it into a function?
> +{
> + int f;
> +
> + f = fcntl(fd, F_GETFL);
> + if (enabled)
> + fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
> + else
> + fcntl(fd, F_SETFL, f | O_NONBLOCK);
> +}
> +
> +#define MAX_VIRTIO_USER_BACKLOG 128
We only allow one connection from vhost-user, so how about just make the
backlog queue length as 1?
> +static int
> +virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
> +{
> + int ret;
> + int fd = dev->listenfd;
> + int connectfd;
> +
> + ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> + if (ret < 0) {
> + PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
> + dev->path, strerror(errno));
> + goto err;
> + }
> + ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> + if (ret < 0)
> + goto err;
> +
> + connectfd = accept(fd, NULL, NULL);
> + if (connectfd >= 0)
> + dev->connected = true;
> + else
> + goto err;
if (connectfd < 0)
goto err;
dev->connected = true;
dev->vhostfd = connectfd;
...
> +
> + dev->vhostfd = connectfd;
> + virtio_user_set_block(connectfd, true);
> + return 0;
> +err:
> + close(dev->listenfd);
> + return -1;
> +}
> +
> /**
> * Set up environment to talk with a vhost user backend.
> *
> @@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
> {
> int fd;
> int flag;
> + int ret = 0;
> struct sockaddr_un un;
>
> fd = socket(AF_UNIX, SOCK_STREAM, 0);
> @@ -405,14 +450,21 @@ vhost_user_setup(struct virtio_user_dev *dev)
> memset(&un, 0, sizeof(un));
> un.sun_family = AF_UNIX;
> snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> - if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> - PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> - close(fd);
> - return -1;
> +
> + if (dev->is_server) {
> + dev->listenfd = fd;
> + ret = virtio_user_start_server(dev, &un);
> + } else {
> + dev->vhostfd = fd;
> + if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> + PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> + close(fd);
> + return -1;
> + }
> + dev->connected = true;
> }
>
> - dev->vhostfd = fd;
> - return 0;
> + return ret;
> }
>
> static int
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> index f90fee9e5..dd9fa9bdf 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> @@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
> {
> uint32_t i;
>
> + if (!dev->connected)
> + return -1;
> +
> for (i = 0; i < dev->max_queue_pairs; ++i)
> dev->ops->enable_qp(dev, i, 0);
>
> @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
> dev->vhostfds = NULL;
> dev->tapfds = NULL;
Add a check here:
if (dev->is_server && !is_vhost_user_by_type(dev->path))
return error;
>
> - if (is_vhost_user_by_type(dev->path)) {
> - dev->ops = &ops_user;
> + if (dev->is_server) {
> + dev->ops = &ops_user;/* server mode only supports vhost user*/
> } else {
> - dev->ops = &ops_kernel;
> -
> - dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
> - dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> - if (!dev->vhostfds || !dev->tapfds) {
> - PMD_INIT_LOG(ERR, "Failed to malloc");
> - return -1;
> - }
> -
> - for (q = 0; q < dev->max_queue_pairs; ++q) {
> - dev->vhostfds[q] = -1;
> - dev->tapfds[q] = -1;
> + if (is_vhost_user_by_type(dev->path)) {
> + dev->ops = &ops_user;
> + } else {
> + dev->ops = &ops_kernel;
> +
> + dev->vhostfds = malloc(dev->max_queue_pairs *
> + sizeof(int));
> + dev->tapfds = malloc(dev->max_queue_pairs *
> + sizeof(int));
> + if (!dev->vhostfds || !dev->tapfds) {
> + PMD_INIT_LOG(ERR, "Failed to malloc");
> + return -1;
> + }
> +
> + for (q = 0; q < dev->max_queue_pairs; ++q) {
> + dev->vhostfds[q] = -1;
> + dev->tapfds[q] = -1;
> + }
> }
> }
>
> @@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
>
> close(dev->vhostfd);
>
> + if (dev->is_server && dev->listenfd >= 0) {
> + close(dev->listenfd);
> + dev->listenfd = -1;
> + }
> + dev->connected = false;
> if (dev->vhostfds) {
> for (i = 0; i < dev->max_queue_pairs; ++i)
> close(dev->vhostfds[i]);
> @@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
> }
>
> free(dev->ifname);
> +
> + if (dev->is_server)
> + unlink(dev->path);
> }
>
> static uint8_t
> diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> index 64467b4f9..68056720d 100644
> --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> @@ -6,6 +6,7 @@
> #define _VIRTIO_USER_DEV_H
>
> #include <limits.h>
> +#include <stdbool.h>
> #include "../virtio_pci.h"
> #include "../virtio_ring.h"
> #include "vhost.h"
> @@ -13,6 +14,9 @@
> struct virtio_user_dev {
> /* for vhost_user backend */
> int vhostfd;
> + int listenfd; /* listening fd */
> + bool connected; /* connection status */
Seems not necessary to add this field; as the connection status can be
deduced from (vhostfd>=0)
> + bool is_server; /* server or client mode */
>
> /* for vhost_kernel backend */
> char *ifname;
> diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c
> index 263649006..5b8c8e291 100644
> --- a/drivers/net/virtio/virtio_user_ethdev.c
> +++ b/drivers/net/virtio/virtio_user_ethdev.c
> @@ -24,15 +24,76 @@
> #define virtio_user_get_dev(hw) \
> ((struct virtio_user_dev *)(hw)->virtio_user_dev)
>
> +static void
> +virtio_user_server_reconnection(struct virtio_user_dev *dev)
s/reconnection/reconnect?
> +{
> + int ret;
> + int flag;
> + int connectfd;
> + struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> +
> + if (dev->connected)
> + return;
> +
> + connectfd = accept(dev->listenfd, NULL, NULL);
> + if (connectfd < 0)
> + return;
> +
> + dev->vhostfd = connectfd;
> + flag = fcntl(connectfd, F_GETFD);
> + fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
> +
> + ret = virtio_user_start_device(dev);
> + if (ret < 0)
> + return;
> +
> + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> + if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> + PMD_DRV_LOG(ERR, "interrupt disable failed");
> + return;
> + }
> + rte_intr_callback_unregister(eth_dev->intr_handle,
> + virtio_interrupt_handler,
> + eth_dev);
> + eth_dev->intr_handle->fd = connectfd;
> + rte_intr_callback_register(eth_dev->intr_handle,
> + virtio_interrupt_handler, eth_dev);
> +
> + if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> + PMD_DRV_LOG(ERR, "interrupt enable failed");
> + return;
> + }
> + }
> + dev->connected = true;
> + PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
> +}
> +
> static void
> virtio_user_delayed_handler(void *param)
> {
> struct virtio_hw *hw = (struct virtio_hw *)param;
> - struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> + struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> + struct virtio_user_dev *dev = virtio_user_get_dev(hw);
>
> - rte_intr_callback_unregister(dev->intr_handle,
> - virtio_interrupt_handler,
> - dev);
> + if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> + PMD_DRV_LOG(ERR, "interrupt disable failed");
> + return;
> + }
> + rte_intr_callback_unregister(eth_dev->intr_handle,
> + virtio_interrupt_handler, eth_dev);
> + if (dev->is_server) {
> + if (dev->vhostfd >= 0) {
> + close(dev->vhostfd);
> + dev->vhostfd = -1;
> + }
> + eth_dev->intr_handle->fd = dev->listenfd;
> + rte_intr_callback_register(eth_dev->intr_handle,
> + virtio_interrupt_handler, eth_dev);
> + if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> + PMD_DRV_LOG(ERR, "interrupt enable failed");
> + return;
> + }
> + }
> }
>
> static void
> @@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
> r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
As server mode and the connection is not connected, vhostfd is -1 now,
then r < 0 and errno is EBADF, how could it go into server handling in
the "else if" block?
> if (r == 0 || (r < 0 && errno != EAGAIN)) {
> dev->status &= (~VIRTIO_NET_S_LINK_UP);
> - PMD_DRV_LOG(ERR, "virtio-user port %u is down",
> - hw->port_id);
> +
> /* Only client mode is available now. Once the
Can you also correct this note as we support server mode now?
> * connection is broken, it can never be up
> * again. Besides, this function could be called
> @@ -74,9 +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
> * callback cannot be unregistered here, set an
> * alarm to do it.
> */
> - rte_eal_alarm_set(1,
> + if (dev->connected) {
> + dev->connected = false;
> + PMD_DRV_LOG(ERR, "virtio-user port %u is down",
> + hw->port_id);
> + rte_eal_alarm_set(1,
> virtio_user_delayed_handler,
> (void *)hw);
> + }
> } else {
> dev->status |= VIRTIO_NET_S_LINK_UP;
> }
> @@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
> PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
> return;
> }
> - }
> +
> + } else if (dev->is_server)
> + virtio_user_server_reconnection(dev);
> +
> *(uint16_t *)dst = dev->status;
> }
>
> @@ -278,12 +346,15 @@ static const char *valid_args[] = {
> VIRTIO_USER_ARG_QUEUE_SIZE,
> #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
> VIRTIO_USER_ARG_INTERFACE_NAME,
> +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> + VIRTIO_USER_ARG_SERVER_MODE,
> NULL
> };
>
> #define VIRTIO_USER_DEF_CQ_EN 0
> #define VIRTIO_USER_DEF_Q_NUM 1
> #define VIRTIO_USER_DEF_Q_SZ 256
> +#define VIRTIO_USER_DEF_SERVER_MODE 0
>
> static int
> get_string_arg(const char *key __rte_unused,
> @@ -378,10 +449,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
> uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
> uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
> uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> + uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
> char *path = NULL;
> char *ifname = NULL;
> char *mac_addr = NULL;
> int ret = -1;
> + struct virtio_user_dev *vu_dev = NULL;
>
> kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
> if (!kvlist) {
> @@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
> }
> }
>
> + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
> + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
> + &get_integer_arg, &server_mode) < 0) {
> + PMD_INIT_LOG(ERR, "error to parse %s",
> + VIRTIO_USER_ARG_SERVER_MODE);
> + goto end;
> + }
> + }
> +
> if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
> if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
> &get_integer_arg, &cq) < 0) {
> @@ -476,6 +558,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
> }
>
> hw = eth_dev->data->dev_private;
> + vu_dev = virtio_user_get_dev(hw);
> + if (server_mode == 1)
> + vu_dev->is_server = true;
> + else
> + vu_dev->is_server = false;
> if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
> queue_size, mac_addr, &ifname) < 0) {
> PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
Hi Jiafeng,
Thanks for your review and comments firstly.
Reply inline.
> -----Original Message-----
> From: Tan, Jianfeng
> Sent: Tuesday, April 3, 2018 11:16 PM
> To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong
> <zhihong.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> Subject: Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
>
>
>
> On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> > virtio-user adds support for server mode in this patch.
> >
> > Client mode vhost-user startup firstly, server mode virtio-user
> > startups and creates the socket file to exchange vhost messages.
> >
> > If the connection is broken, client mode vhost-user can support to
> > reconnect virtio-user.
> >
> > Server mode virtio-user supports many times' vhost-user reconnections
> > with the same parameter configurations.
> >
> > Release note is updated in the patch.
>
> With current implementation, we have to enable LSC; or no chance to accept
> the coming connection. We shall point this out.
>
Ok.
> And if possible, split this patch into multiple patches.
>
How to split?
> >
> > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > ---
> > doc/guides/rel_notes/release_18_05.rst | 6 ++
> > drivers/net/virtio/virtio_user/vhost_user.c | 64 ++++++++++++--
> > drivers/net/virtio/virtio_user/virtio_user_dev.c | 45 +++++++---
> > drivers/net/virtio/virtio_user/virtio_user_dev.h | 4 +
> > drivers/net/virtio/virtio_user_ethdev.c | 103
> +++++++++++++++++++++--
> > 5 files changed, 194 insertions(+), 28 deletions(-)
> >
> > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > b/doc/guides/rel_notes/release_18_05.rst
> > index 9cc77f893..f8897b2e9 100644
> > --- a/doc/guides/rel_notes/release_18_05.rst
> > +++ b/doc/guides/rel_notes/release_18_05.rst
> > @@ -58,6 +58,12 @@ New Features
> > * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
> > * Added support for DROP action in flow API.
> >
> > +* **Added support for virtio-user server mode.**
> > + In a container environment if the vhost-user backend restarts,
> > +there's no way
> > + for it to reconnect to virtio-user. To address this, support for
> > +server mode
> > + is added. In this mode the socket file is created by virtio-user,
> > +which the
> > + backend connects to. This means that if the backend restarts, it
> > +can reconnect
> > + to virtio-user and continue communications.
> >
> > API Changes
> > -----------
> > diff --git a/drivers/net/virtio/virtio_user/vhost_user.c
> > b/drivers/net/virtio/virtio_user/vhost_user.c
> > index 91c6449bb..1b3401d4f 100644
> > --- a/drivers/net/virtio/virtio_user/vhost_user.c
> > +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> > @@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
> > return 0;
> > }
> >
> > +static void
> > +virtio_user_set_block(int fd, bool enabled)
>
> This is only used once, no need to abstract it into a function?
Ok.
>
> > +{
> > + int f;
> > +
> > + f = fcntl(fd, F_GETFL);
> > + if (enabled)
> > + fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
> > + else
> > + fcntl(fd, F_SETFL, f | O_NONBLOCK); }
> > +
> > +#define MAX_VIRTIO_USER_BACKLOG 128
>
> We only allow one connection from vhost-user, so how about just make the
> backlog queue length as 1?
>
> > +static int
> > +virtio_user_start_server(struct virtio_user_dev *dev, struct
> > +sockaddr_un *un) {
> > + int ret;
> > + int fd = dev->listenfd;
> > + int connectfd;
> > +
> > + ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> > + if (ret < 0) {
> > + PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> try again\n",
> > + dev->path, strerror(errno));
> > + goto err;
> > + }
> > + ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> > + if (ret < 0)
> > + goto err;
> > +
> > + connectfd = accept(fd, NULL, NULL);
> > + if (connectfd >= 0)
> > + dev->connected = true;
> > + else
> > + goto err;
>
> if (connectfd < 0)
> goto err;
>
> dev->connected = true;
> dev->vhostfd = connectfd;
> ...
Ok.
>
> > +
> > + dev->vhostfd = connectfd;
> > + virtio_user_set_block(connectfd, true);
> > + return 0;
> > +err:
> > + close(dev->listenfd);
> > + return -1;
> > +}
> > +
> > /**
> > * Set up environment to talk with a vhost user backend.
> > *
> > @@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
> > {
> > int fd;
> > int flag;
> > + int ret = 0;
> > struct sockaddr_un un;
> >
> > fd = socket(AF_UNIX, SOCK_STREAM, 0); @@ -405,14 +450,21 @@
> > vhost_user_setup(struct virtio_user_dev *dev)
> > memset(&un, 0, sizeof(un));
> > un.sun_family = AF_UNIX;
> > snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> > - if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > - PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> > - close(fd);
> > - return -1;
> > +
> > + if (dev->is_server) {
> > + dev->listenfd = fd;
> > + ret = virtio_user_start_server(dev, &un);
> > + } else {
> > + dev->vhostfd = fd;
> > + if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > + PMD_DRV_LOG(ERR, "connect error, %s",
> strerror(errno));
> > + close(fd);
> > + return -1;
> > + }
> > + dev->connected = true;
> > }
> >
> > - dev->vhostfd = fd;
> > - return 0;
> > + return ret;
> > }
> >
> > static int
> > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > index f90fee9e5..dd9fa9bdf 100644
> > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > @@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev
> *dev)
> > {
> > uint32_t i;
> >
> > + if (!dev->connected)
> > + return -1;
> > +
> > for (i = 0; i < dev->max_queue_pairs; ++i)
> > dev->ops->enable_qp(dev, i, 0);
> >
> > @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev
> *dev)
> > dev->vhostfds = NULL;
> > dev->tapfds = NULL;
>
> Add a check here:
> if (dev->is_server && !is_vhost_user_by_type(dev->path))
> return error;
Ok.
>
> >
> > - if (is_vhost_user_by_type(dev->path)) {
> > - dev->ops = &ops_user;
> > + if (dev->is_server) {
> > + dev->ops = &ops_user;/* server mode only supports vhost
> user*/
> > } else {
> > - dev->ops = &ops_kernel;
> > -
> > - dev->vhostfds = malloc(dev->max_queue_pairs *
> sizeof(int));
> > - dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > - if (!dev->vhostfds || !dev->tapfds) {
> > - PMD_INIT_LOG(ERR, "Failed to malloc");
> > - return -1;
> > - }
> > -
> > - for (q = 0; q < dev->max_queue_pairs; ++q) {
> > - dev->vhostfds[q] = -1;
> > - dev->tapfds[q] = -1;
> > + if (is_vhost_user_by_type(dev->path)) {
> > + dev->ops = &ops_user;
> > + } else {
> > + dev->ops = &ops_kernel;
> > +
> > + dev->vhostfds = malloc(dev->max_queue_pairs *
> > + sizeof(int));
> > + dev->tapfds = malloc(dev->max_queue_pairs *
> > + sizeof(int));
> > + if (!dev->vhostfds || !dev->tapfds) {
> > + PMD_INIT_LOG(ERR, "Failed to malloc");
> > + return -1;
> > + }
> > +
> > + for (q = 0; q < dev->max_queue_pairs; ++q) {
> > + dev->vhostfds[q] = -1;
> > + dev->tapfds[q] = -1;
> > + }
> > }
> > }
> >
> > @@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev
> > *dev)
> >
> > close(dev->vhostfd);
> >
> > + if (dev->is_server && dev->listenfd >= 0) {
> > + close(dev->listenfd);
> > + dev->listenfd = -1;
> > + }
> > + dev->connected = false;
> > if (dev->vhostfds) {
> > for (i = 0; i < dev->max_queue_pairs; ++i)
> > close(dev->vhostfds[i]);
> > @@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
> > }
> >
> > free(dev->ifname);
> > +
> > + if (dev->is_server)
> > + unlink(dev->path);
> > }
> >
> > static uint8_t
> > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > index 64467b4f9..68056720d 100644
> > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > @@ -6,6 +6,7 @@
> > #define _VIRTIO_USER_DEV_H
> >
> > #include <limits.h>
> > +#include <stdbool.h>
> > #include "../virtio_pci.h"
> > #include "../virtio_ring.h"
> > #include "vhost.h"
> > @@ -13,6 +14,9 @@
> > struct virtio_user_dev {
> > /* for vhost_user backend */
> > int vhostfd;
> > + int listenfd; /* listening fd */
> > + bool connected; /* connection status */
>
> Seems not necessary to add this field; as the connection status can be
> deduced from (vhostfd>=0)
Ok, remove it.
>
> > + bool is_server; /* server or client mode */
> >
> > /* for vhost_kernel backend */
> > char *ifname;
> > diff --git a/drivers/net/virtio/virtio_user_ethdev.c
> > b/drivers/net/virtio/virtio_user_ethdev.c
> > index 263649006..5b8c8e291 100644
> > --- a/drivers/net/virtio/virtio_user_ethdev.c
> > +++ b/drivers/net/virtio/virtio_user_ethdev.c
> > @@ -24,15 +24,76 @@
> > #define virtio_user_get_dev(hw) \
> > ((struct virtio_user_dev *)(hw)->virtio_user_dev)
> >
> > +static void
> > +virtio_user_server_reconnection(struct virtio_user_dev *dev)
>
> s/reconnection/reconnect?
>
Ok, Good Suggestion.
> > +{
> > + int ret;
> > + int flag;
> > + int connectfd;
> > + struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> > +
> > + if (dev->connected)
> > + return;
> > +
> > + connectfd = accept(dev->listenfd, NULL, NULL);
> > + if (connectfd < 0)
> > + return;
> > +
> > + dev->vhostfd = connectfd;
> > + flag = fcntl(connectfd, F_GETFD);
> > + fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
> > +
> > + ret = virtio_user_start_device(dev);
> > + if (ret < 0)
> > + return;
> > +
> > + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> > + if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > + PMD_DRV_LOG(ERR, "interrupt disable failed");
> > + return;
> > + }
> > + rte_intr_callback_unregister(eth_dev->intr_handle,
> > + virtio_interrupt_handler,
> > + eth_dev);
> > + eth_dev->intr_handle->fd = connectfd;
> > + rte_intr_callback_register(eth_dev->intr_handle,
> > + virtio_interrupt_handler, eth_dev);
> > +
> > + if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > + PMD_DRV_LOG(ERR, "interrupt enable failed");
> > + return;
> > + }
> > + }
> > + dev->connected = true;
> > + PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection
> > +succeeds!"); }
> > +
> > static void
> > virtio_user_delayed_handler(void *param)
> > {
> > struct virtio_hw *hw = (struct virtio_hw *)param;
> > - struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> > + struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> > + struct virtio_user_dev *dev = virtio_user_get_dev(hw);
> >
> > - rte_intr_callback_unregister(dev->intr_handle,
> > - virtio_interrupt_handler,
> > - dev);
> > + if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > + PMD_DRV_LOG(ERR, "interrupt disable failed");
> > + return;
> > + }
> > + rte_intr_callback_unregister(eth_dev->intr_handle,
> > + virtio_interrupt_handler, eth_dev);
> > + if (dev->is_server) {
> > + if (dev->vhostfd >= 0) {
> > + close(dev->vhostfd);
> > + dev->vhostfd = -1;
> > + }
> > + eth_dev->intr_handle->fd = dev->listenfd;
> > + rte_intr_callback_register(eth_dev->intr_handle,
> > + virtio_interrupt_handler, eth_dev);
> > + if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > + PMD_DRV_LOG(ERR, "interrupt enable failed");
> > + return;
> > + }
> > + }
> > }
> >
> > static void
> > @@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> size_t offset,
> > r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
>
> As server mode and the connection is not connected, vhostfd is -1 now, then
> r < 0 and errno is EBADF, how could it go into server handling in the "else if"
> block?
>
I think I don't catch you. When server mode, if vhostfd = -1,
if (dev->vhostfd >= 0) is false, so, the code will goto to check else if (dev->is_server)
to handle server mode.
> > if (r == 0 || (r < 0 && errno != EAGAIN)) {
> > dev->status &= (~VIRTIO_NET_S_LINK_UP);
> > - PMD_DRV_LOG(ERR, "virtio-user port %u is
> down",
> > - hw->port_id);
> > +
> > /* Only client mode is available now. Once
> the
>
> Can you also correct this note as we support server mode now?
Ok.
>
> > * connection is broken, it can never be up
> > * again. Besides, this function could be called
> @@ -74,9
> > +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t
> offset,
> > * callback cannot be unregistered here, set
> an
> > * alarm to do it.
> > */
> > - rte_eal_alarm_set(1,
> > + if (dev->connected) {
> > + dev->connected = false;
> > + PMD_DRV_LOG(ERR, "virtio-user
> port %u is down",
> > + hw->port_id);
> > + rte_eal_alarm_set(1,
> >
> virtio_user_delayed_handler,
> > (void *)hw);
> > + }
> > } else {
> > dev->status |= VIRTIO_NET_S_LINK_UP;
> > }
> > @@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> size_t offset,
> > PMD_DRV_LOG(ERR, "error clearing
> O_NONBLOCK flag");
> > return;
> > }
> > - }
> > +
> > + } else if (dev->is_server)
> > + virtio_user_server_reconnection(dev);
> > +
> > *(uint16_t *)dst = dev->status;
> > }
> >
> > @@ -278,12 +346,15 @@ static const char *valid_args[] = {
> > VIRTIO_USER_ARG_QUEUE_SIZE,
> > #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
> > VIRTIO_USER_ARG_INTERFACE_NAME,
> > +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> > + VIRTIO_USER_ARG_SERVER_MODE,
> > NULL
> > };
> >
> > #define VIRTIO_USER_DEF_CQ_EN 0
> > #define VIRTIO_USER_DEF_Q_NUM 1
> > #define VIRTIO_USER_DEF_Q_SZ 256
> > +#define VIRTIO_USER_DEF_SERVER_MODE 0
> >
> > static int
> > get_string_arg(const char *key __rte_unused, @@ -378,10 +449,12 @@
> > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> > uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
> > uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
> > uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> > + uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
> > char *path = NULL;
> > char *ifname = NULL;
> > char *mac_addr = NULL;
> > int ret = -1;
> > + struct virtio_user_dev *vu_dev = NULL;
> >
> > kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
> > if (!kvlist) {
> > @@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device
> *dev)
> > }
> > }
> >
> > + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1)
> {
> > + if (rte_kvargs_process(kvlist,
> VIRTIO_USER_ARG_SERVER_MODE,
> > + &get_integer_arg, &server_mode) < 0) {
> > + PMD_INIT_LOG(ERR, "error to parse %s",
> > + VIRTIO_USER_ARG_SERVER_MODE);
> > + goto end;
> > + }
> > + }
> > +
> > if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
> > if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
> > &get_integer_arg, &cq) < 0) { @@ -476,6
> +558,11 @@
> > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> > }
> >
> > hw = eth_dev->data->dev_private;
> > + vu_dev = virtio_user_get_dev(hw);
> > + if (server_mode == 1)
> > + vu_dev->is_server = true;
> > + else
> > + vu_dev->is_server = false;
> > if (virtio_user_dev_init(hw->virtio_user_dev, path, queues,
> cq,
> > queue_size, mac_addr, &ifname) < 0) {
> > PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
> -----Original Message-----
> From: Yang, Zhiyong
> Sent: Wednesday, April 4, 2018 11:32 AM
> To: Tan, Jianfeng; dev@dpdk.org
> Cc: maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong;
> Bie, Tiwei
> Subject: RE: [PATCH v4 1/1] net/virtio-user: add support for server mode
>
> Hi Jiafeng,
>
> Thanks for your review and comments firstly.
> Reply inline.
>
> > -----Original Message-----
> > From: Tan, Jianfeng
> > Sent: Tuesday, April 3, 2018 11:16 PM
> > To: Yang, Zhiyong <zhiyong.yang@intel.com>; dev@dpdk.org
> > Cc: maxime.coquelin@redhat.com; thomas@monjalon.net; Wang, Zhihong
> > <zhihong.wang@intel.com>; Bie, Tiwei <tiwei.bie@intel.com>
> > Subject: Re: [PATCH v4 1/1] net/virtio-user: add support for server mode
> >
> >
> >
> > On 4/3/2018 8:20 PM, zhiyong.yang@intel.com wrote:
> > > virtio-user adds support for server mode in this patch.
> > >
> > > Client mode vhost-user startup firstly, server mode virtio-user
> > > startups and creates the socket file to exchange vhost messages.
> > >
> > > If the connection is broken, client mode vhost-user can support to
> > > reconnect virtio-user.
> > >
> > > Server mode virtio-user supports many times' vhost-user reconnections
> > > with the same parameter configurations.
> > >
> > > Release note is updated in the patch.
> >
> > With current implementation, we have to enable LSC; or no chance to
> accept
> > the coming connection. We shall point this out.
> >
> Ok.
>
> > And if possible, split this patch into multiple patches.
> >
> How to split?
I think it can be split into three:
- One for new dev parameter.
- One for server socket setup.
- One for LSC handling.
But it's not a big patch anyway, I'm OK if you think better to keep in one patch.
>
> > >
> > > Signed-off-by: Zhiyong Yang <zhiyong.yang@intel.com>
> > > ---
> > > doc/guides/rel_notes/release_18_05.rst | 6 ++
> > > drivers/net/virtio/virtio_user/vhost_user.c | 64 ++++++++++++--
> > > drivers/net/virtio/virtio_user/virtio_user_dev.c | 45 +++++++---
> > > drivers/net/virtio/virtio_user/virtio_user_dev.h | 4 +
> > > drivers/net/virtio/virtio_user_ethdev.c | 103
> > +++++++++++++++++++++--
> > > 5 files changed, 194 insertions(+), 28 deletions(-)
> > >
> > > diff --git a/doc/guides/rel_notes/release_18_05.rst
> > > b/doc/guides/rel_notes/release_18_05.rst
> > > index 9cc77f893..f8897b2e9 100644
> > > --- a/doc/guides/rel_notes/release_18_05.rst
> > > +++ b/doc/guides/rel_notes/release_18_05.rst
> > > @@ -58,6 +58,12 @@ New Features
> > > * Added support for NVGRE, VXLAN and GENEVE filters in flow API.
> > > * Added support for DROP action in flow API.
> > >
> > > +* **Added support for virtio-user server mode.**
> > > + In a container environment if the vhost-user backend restarts,
> > > +there's no way
> > > + for it to reconnect to virtio-user. To address this, support for
> > > +server mode
> > > + is added. In this mode the socket file is created by virtio-user,
> > > +which the
> > > + backend connects to. This means that if the backend restarts, it
> > > +can reconnect
> > > + to virtio-user and continue communications.
> > >
> > > API Changes
> > > -----------
> > > diff --git a/drivers/net/virtio/virtio_user/vhost_user.c
> > > b/drivers/net/virtio/virtio_user/vhost_user.c
> > > index 91c6449bb..1b3401d4f 100644
> > > --- a/drivers/net/virtio/virtio_user/vhost_user.c
> > > +++ b/drivers/net/virtio/virtio_user/vhost_user.c
> > > @@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
> > > return 0;
> > > }
> > >
> > > +static void
> > > +virtio_user_set_block(int fd, bool enabled)
> >
> > This is only used once, no need to abstract it into a function?
>
> Ok.
>
> >
> > > +{
> > > + int f;
> > > +
> > > + f = fcntl(fd, F_GETFL);
> > > + if (enabled)
> > > + fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
> > > + else
> > > + fcntl(fd, F_SETFL, f | O_NONBLOCK); }
> > > +
> > > +#define MAX_VIRTIO_USER_BACKLOG 128
> >
> > We only allow one connection from vhost-user, so how about just make
> the
> > backlog queue length as 1?
> >
> > > +static int
> > > +virtio_user_start_server(struct virtio_user_dev *dev, struct
> > > +sockaddr_un *un) {
> > > + int ret;
> > > + int fd = dev->listenfd;
> > > + int connectfd;
> > > +
> > > + ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
> > > + if (ret < 0) {
> > > + PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and
> > try again\n",
> > > + dev->path, strerror(errno));
> > > + goto err;
> > > + }
> > > + ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
> > > + if (ret < 0)
> > > + goto err;
> > > +
> > > + connectfd = accept(fd, NULL, NULL);
> > > + if (connectfd >= 0)
> > > + dev->connected = true;
> > > + else
> > > + goto err;
> >
> > if (connectfd < 0)
> > goto err;
> >
> > dev->connected = true;
> > dev->vhostfd = connectfd;
> > ...
>
> Ok.
>
> >
> > > +
> > > + dev->vhostfd = connectfd;
> > > + virtio_user_set_block(connectfd, true);
> > > + return 0;
> > > +err:
> > > + close(dev->listenfd);
> > > + return -1;
> > > +}
> > > +
> > > /**
> > > * Set up environment to talk with a vhost user backend.
> > > *
> > > @@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
> > > {
> > > int fd;
> > > int flag;
> > > + int ret = 0;
> > > struct sockaddr_un un;
> > >
> > > fd = socket(AF_UNIX, SOCK_STREAM, 0); @@ -405,14 +450,21 @@
> > > vhost_user_setup(struct virtio_user_dev *dev)
> > > memset(&un, 0, sizeof(un));
> > > un.sun_family = AF_UNIX;
> > > snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
> > > - if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > > - PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
> > > - close(fd);
> > > - return -1;
> > > +
> > > + if (dev->is_server) {
> > > + dev->listenfd = fd;
> > > + ret = virtio_user_start_server(dev, &un);
> > > + } else {
> > > + dev->vhostfd = fd;
> > > + if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
> > > + PMD_DRV_LOG(ERR, "connect error, %s",
> > strerror(errno));
> > > + close(fd);
> > > + return -1;
> > > + }
> > > + dev->connected = true;
> > > }
> > >
> > > - dev->vhostfd = fd;
> > > - return 0;
> > > + return ret;
> > > }
> > >
> > > static int
> > > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > index f90fee9e5..dd9fa9bdf 100644
> > > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c
> > > @@ -142,6 +142,9 @@ int virtio_user_stop_device(struct
> virtio_user_dev
> > *dev)
> > > {
> > > uint32_t i;
> > >
> > > + if (!dev->connected)
> > > + return -1;
> > > +
> > > for (i = 0; i < dev->max_queue_pairs; ++i)
> > > dev->ops->enable_qp(dev, i, 0);
> > >
> > > @@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev
> > *dev)
> > > dev->vhostfds = NULL;
> > > dev->tapfds = NULL;
> >
> > Add a check here:
> > if (dev->is_server && !is_vhost_user_by_type(dev->path))
> > return error;
>
> Ok.
>
> >
> > >
> > > - if (is_vhost_user_by_type(dev->path)) {
> > > - dev->ops = &ops_user;
> > > + if (dev->is_server) {
> > > + dev->ops = &ops_user;/* server mode only supports vhost
> > user*/
> > > } else {
> > > - dev->ops = &ops_kernel;
> > > -
> > > - dev->vhostfds = malloc(dev->max_queue_pairs *
> > sizeof(int));
> > > - dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
> > > - if (!dev->vhostfds || !dev->tapfds) {
> > > - PMD_INIT_LOG(ERR, "Failed to malloc");
> > > - return -1;
> > > - }
> > > -
> > > - for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > - dev->vhostfds[q] = -1;
> > > - dev->tapfds[q] = -1;
> > > + if (is_vhost_user_by_type(dev->path)) {
> > > + dev->ops = &ops_user;
> > > + } else {
> > > + dev->ops = &ops_kernel;
> > > +
> > > + dev->vhostfds = malloc(dev->max_queue_pairs *
> > > + sizeof(int));
> > > + dev->tapfds = malloc(dev->max_queue_pairs *
> > > + sizeof(int));
> > > + if (!dev->vhostfds || !dev->tapfds) {
> > > + PMD_INIT_LOG(ERR, "Failed to malloc");
> > > + return -1;
> > > + }
> > > +
> > > + for (q = 0; q < dev->max_queue_pairs; ++q) {
> > > + dev->vhostfds[q] = -1;
> > > + dev->tapfds[q] = -1;
> > > + }
> > > }
> > > }
> > >
> > > @@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev
> > > *dev)
> > >
> > > close(dev->vhostfd);
> > >
> > > + if (dev->is_server && dev->listenfd >= 0) {
> > > + close(dev->listenfd);
> > > + dev->listenfd = -1;
> > > + }
> > > + dev->connected = false;
> > > if (dev->vhostfds) {
> > > for (i = 0; i < dev->max_queue_pairs; ++i)
> > > close(dev->vhostfds[i]);
> > > @@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev
> *dev)
> > > }
> > >
> > > free(dev->ifname);
> > > +
> > > + if (dev->is_server)
> > > + unlink(dev->path);
> > > }
> > >
> > > static uint8_t
> > > diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > index 64467b4f9..68056720d 100644
> > > --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h
> > > @@ -6,6 +6,7 @@
> > > #define _VIRTIO_USER_DEV_H
> > >
> > > #include <limits.h>
> > > +#include <stdbool.h>
> > > #include "../virtio_pci.h"
> > > #include "../virtio_ring.h"
> > > #include "vhost.h"
> > > @@ -13,6 +14,9 @@
> > > struct virtio_user_dev {
> > > /* for vhost_user backend */
> > > int vhostfd;
> > > + int listenfd; /* listening fd */
> > > + bool connected; /* connection status */
> >
> > Seems not necessary to add this field; as the connection status can be
> > deduced from (vhostfd>=0)
>
> Ok, remove it.
>
> >
> > > + bool is_server; /* server or client mode */
> > >
> > > /* for vhost_kernel backend */
> > > char *ifname;
> > > diff --git a/drivers/net/virtio/virtio_user_ethdev.c
> > > b/drivers/net/virtio/virtio_user_ethdev.c
> > > index 263649006..5b8c8e291 100644
> > > --- a/drivers/net/virtio/virtio_user_ethdev.c
> > > +++ b/drivers/net/virtio/virtio_user_ethdev.c
> > > @@ -24,15 +24,76 @@
> > > #define virtio_user_get_dev(hw) \
> > > ((struct virtio_user_dev *)(hw)->virtio_user_dev)
> > >
> > > +static void
> > > +virtio_user_server_reconnection(struct virtio_user_dev *dev)
> >
> > s/reconnection/reconnect?
> >
>
> Ok, Good Suggestion.
>
> > > +{
> > > + int ret;
> > > + int flag;
> > > + int connectfd;
> > > + struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
> > > +
> > > + if (dev->connected)
> > > + return;
> > > +
> > > + connectfd = accept(dev->listenfd, NULL, NULL);
> > > + if (connectfd < 0)
> > > + return;
> > > +
> > > + dev->vhostfd = connectfd;
> > > + flag = fcntl(connectfd, F_GETFD);
> > > + fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
> > > +
> > > + ret = virtio_user_start_device(dev);
> > > + if (ret < 0)
> > > + return;
> > > +
> > > + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
> > > + if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > > + PMD_DRV_LOG(ERR, "interrupt disable failed");
> > > + return;
> > > + }
> > > + rte_intr_callback_unregister(eth_dev->intr_handle,
> > > + virtio_interrupt_handler,
> > > + eth_dev);
> > > + eth_dev->intr_handle->fd = connectfd;
> > > + rte_intr_callback_register(eth_dev->intr_handle,
> > > + virtio_interrupt_handler, eth_dev);
> > > +
> > > + if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > > + PMD_DRV_LOG(ERR, "interrupt enable failed");
> > > + return;
> > > + }
> > > + }
> > > + dev->connected = true;
> > > + PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection
> > > +succeeds!"); }
> > > +
> > > static void
> > > virtio_user_delayed_handler(void *param)
> > > {
> > > struct virtio_hw *hw = (struct virtio_hw *)param;
> > > - struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
> > > + struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
> > > + struct virtio_user_dev *dev = virtio_user_get_dev(hw);
> > >
> > > - rte_intr_callback_unregister(dev->intr_handle,
> > > - virtio_interrupt_handler,
> > > - dev);
> > > + if (rte_intr_disable(eth_dev->intr_handle) < 0) {
> > > + PMD_DRV_LOG(ERR, "interrupt disable failed");
> > > + return;
> > > + }
> > > + rte_intr_callback_unregister(eth_dev->intr_handle,
> > > + virtio_interrupt_handler, eth_dev);
> > > + if (dev->is_server) {
> > > + if (dev->vhostfd >= 0) {
> > > + close(dev->vhostfd);
> > > + dev->vhostfd = -1;
> > > + }
> > > + eth_dev->intr_handle->fd = dev->listenfd;
> > > + rte_intr_callback_register(eth_dev->intr_handle,
> > > + virtio_interrupt_handler, eth_dev);
> > > + if (rte_intr_enable(eth_dev->intr_handle) < 0) {
> > > + PMD_DRV_LOG(ERR, "interrupt enable failed");
> > > + return;
> > > + }
> > > + }
> > > }
> > >
> > > static void
> > > @@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw,
> > size_t offset,
> > > r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
> >
> > As server mode and the connection is not connected, vhostfd is -1 now,
> then
> > r < 0 and errno is EBADF, how could it go into server handling in the "else if"
> > block?
> >
>
> I think I don't catch you. When server mode, if vhostfd = -1,
> if (dev->vhostfd >= 0) is false, so, the code will goto to check else if (dev->is_server)
> to handle server mode.
I'm sorry, I overlooked the " if (dev->vhostfd >= 0)", you are right!
Thanks,
Jianfeng
>
> > > if (r == 0 || (r < 0 && errno != EAGAIN)) {
> > > dev->status &= (~VIRTIO_NET_S_LINK_UP);
> > > - PMD_DRV_LOG(ERR, "virtio-user port %u is
> > down",
> > > - hw->port_id);
> > > +
> > > /* Only client mode is available now. Once
> > the
> >
> > Can you also correct this note as we support server mode now?
>
> Ok.
> >
> > > * connection is broken, it can never be up
> > > * again. Besides, this function could be called
> > @@ -74,9
> > > +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t
> > offset,
> > > * callback cannot be unregistered here, set
> > an
> > > * alarm to do it.
> > > */
> > > - rte_eal_alarm_set(1,
> > > + if (dev->connected) {
> > > + dev->connected = false;
> > > + PMD_DRV_LOG(ERR, "virtio-user
> > port %u is down",
> > > + hw->port_id);
> > > + rte_eal_alarm_set(1,
> > >
> > virtio_user_delayed_handler,
> > > (void *)hw);
> > > + }
> > > } else {
> > > dev->status |= VIRTIO_NET_S_LINK_UP;
> > > }
> > > @@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw
> *hw,
> > size_t offset,
> > > PMD_DRV_LOG(ERR, "error clearing
> > O_NONBLOCK flag");
> > > return;
> > > }
> > > - }
> > > +
> > > + } else if (dev->is_server)
> > > + virtio_user_server_reconnection(dev);
> > > +
> > > *(uint16_t *)dst = dev->status;
> > > }
> > >
> > > @@ -278,12 +346,15 @@ static const char *valid_args[] = {
> > > VIRTIO_USER_ARG_QUEUE_SIZE,
> > > #define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
> > > VIRTIO_USER_ARG_INTERFACE_NAME,
> > > +#define VIRTIO_USER_ARG_SERVER_MODE "server"
> > > + VIRTIO_USER_ARG_SERVER_MODE,
> > > NULL
> > > };
> > >
> > > #define VIRTIO_USER_DEF_CQ_EN 0
> > > #define VIRTIO_USER_DEF_Q_NUM 1
> > > #define VIRTIO_USER_DEF_Q_SZ 256
> > > +#define VIRTIO_USER_DEF_SERVER_MODE 0
> > >
> > > static int
> > > get_string_arg(const char *key __rte_unused, @@ -378,10 +449,12 @@
> > > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> > > uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
> > > uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
> > > uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
> > > + uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
> > > char *path = NULL;
> > > char *ifname = NULL;
> > > char *mac_addr = NULL;
> > > int ret = -1;
> > > + struct virtio_user_dev *vu_dev = NULL;
> > >
> > > kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
> > > if (!kvlist) {
> > > @@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device
> > *dev)
> > > }
> > > }
> > >
> > > + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1)
> > {
> > > + if (rte_kvargs_process(kvlist,
> > VIRTIO_USER_ARG_SERVER_MODE,
> > > + &get_integer_arg, &server_mode) < 0) {
> > > + PMD_INIT_LOG(ERR, "error to parse %s",
> > > + VIRTIO_USER_ARG_SERVER_MODE);
> > > + goto end;
> > > + }
> > > + }
> > > +
> > > if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
> > > if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
> > > &get_integer_arg, &cq) < 0) { @@ -476,6
> > +558,11 @@
> > > virtio_user_pmd_probe(struct rte_vdev_device *dev)
> > > }
> > >
> > > hw = eth_dev->data->dev_private;
> > > + vu_dev = virtio_user_get_dev(hw);
> > > + if (server_mode == 1)
> > > + vu_dev->is_server = true;
> > > + else
> > > + vu_dev->is_server = false;
> > > if (virtio_user_dev_init(hw->virtio_user_dev, path, queues,
> > cq,
> > > queue_size, mac_addr, &ifname) < 0) {
> > > PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");
@@ -58,6 +58,12 @@ New Features
* Added support for NVGRE, VXLAN and GENEVE filters in flow API.
* Added support for DROP action in flow API.
+* **Added support for virtio-user server mode.**
+ In a container environment if the vhost-user backend restarts, there's no way
+ for it to reconnect to virtio-user. To address this, support for server mode
+ is added. In this mode the socket file is created by virtio-user, which the
+ backend connects to. This means that if the backend restarts, it can reconnect
+ to virtio-user and continue communications.
API Changes
-----------
@@ -378,6 +378,50 @@ vhost_user_sock(struct virtio_user_dev *dev,
return 0;
}
+static void
+virtio_user_set_block(int fd, bool enabled)
+{
+ int f;
+
+ f = fcntl(fd, F_GETFL);
+ if (enabled)
+ fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+ else
+ fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+#define MAX_VIRTIO_USER_BACKLOG 128
+static int
+virtio_user_start_server(struct virtio_user_dev *dev, struct sockaddr_un *un)
+{
+ int ret;
+ int fd = dev->listenfd;
+ int connectfd;
+
+ ret = bind(fd, (struct sockaddr *)un, sizeof(*un));
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "failed to bind to %s: %s; remove it and try again\n",
+ dev->path, strerror(errno));
+ goto err;
+ }
+ ret = listen(fd, MAX_VIRTIO_USER_BACKLOG);
+ if (ret < 0)
+ goto err;
+
+ connectfd = accept(fd, NULL, NULL);
+ if (connectfd >= 0)
+ dev->connected = true;
+ else
+ goto err;
+
+ dev->vhostfd = connectfd;
+ virtio_user_set_block(connectfd, true);
+ return 0;
+err:
+ close(dev->listenfd);
+ return -1;
+}
+
/**
* Set up environment to talk with a vhost user backend.
*
@@ -390,6 +434,7 @@ vhost_user_setup(struct virtio_user_dev *dev)
{
int fd;
int flag;
+ int ret = 0;
struct sockaddr_un un;
fd = socket(AF_UNIX, SOCK_STREAM, 0);
@@ -405,14 +450,21 @@ vhost_user_setup(struct virtio_user_dev *dev)
memset(&un, 0, sizeof(un));
un.sun_family = AF_UNIX;
snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path);
- if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
- PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
- close(fd);
- return -1;
+
+ if (dev->is_server) {
+ dev->listenfd = fd;
+ ret = virtio_user_start_server(dev, &un);
+ } else {
+ dev->vhostfd = fd;
+ if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) {
+ PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno));
+ close(fd);
+ return -1;
+ }
+ dev->connected = true;
}
- dev->vhostfd = fd;
- return 0;
+ return ret;
}
static int
@@ -142,6 +142,9 @@ int virtio_user_stop_device(struct virtio_user_dev *dev)
{
uint32_t i;
+ if (!dev->connected)
+ return -1;
+
for (i = 0; i < dev->max_queue_pairs; ++i)
dev->ops->enable_qp(dev, i, 0);
@@ -267,21 +270,27 @@ virtio_user_dev_setup(struct virtio_user_dev *dev)
dev->vhostfds = NULL;
dev->tapfds = NULL;
- if (is_vhost_user_by_type(dev->path)) {
- dev->ops = &ops_user;
+ if (dev->is_server) {
+ dev->ops = &ops_user;/* server mode only supports vhost user*/
} else {
- dev->ops = &ops_kernel;
-
- dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int));
- dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int));
- if (!dev->vhostfds || !dev->tapfds) {
- PMD_INIT_LOG(ERR, "Failed to malloc");
- return -1;
- }
-
- for (q = 0; q < dev->max_queue_pairs; ++q) {
- dev->vhostfds[q] = -1;
- dev->tapfds[q] = -1;
+ if (is_vhost_user_by_type(dev->path)) {
+ dev->ops = &ops_user;
+ } else {
+ dev->ops = &ops_kernel;
+
+ dev->vhostfds = malloc(dev->max_queue_pairs *
+ sizeof(int));
+ dev->tapfds = malloc(dev->max_queue_pairs *
+ sizeof(int));
+ if (!dev->vhostfds || !dev->tapfds) {
+ PMD_INIT_LOG(ERR, "Failed to malloc");
+ return -1;
+ }
+
+ for (q = 0; q < dev->max_queue_pairs; ++q) {
+ dev->vhostfds[q] = -1;
+ dev->tapfds[q] = -1;
+ }
}
}
@@ -388,6 +397,11 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
close(dev->vhostfd);
+ if (dev->is_server && dev->listenfd >= 0) {
+ close(dev->listenfd);
+ dev->listenfd = -1;
+ }
+ dev->connected = false;
if (dev->vhostfds) {
for (i = 0; i < dev->max_queue_pairs; ++i)
close(dev->vhostfds[i]);
@@ -396,6 +410,9 @@ virtio_user_dev_uninit(struct virtio_user_dev *dev)
}
free(dev->ifname);
+
+ if (dev->is_server)
+ unlink(dev->path);
}
static uint8_t
@@ -6,6 +6,7 @@
#define _VIRTIO_USER_DEV_H
#include <limits.h>
+#include <stdbool.h>
#include "../virtio_pci.h"
#include "../virtio_ring.h"
#include "vhost.h"
@@ -13,6 +14,9 @@
struct virtio_user_dev {
/* for vhost_user backend */
int vhostfd;
+ int listenfd; /* listening fd */
+ bool connected; /* connection status */
+ bool is_server; /* server or client mode */
/* for vhost_kernel backend */
char *ifname;
@@ -24,15 +24,76 @@
#define virtio_user_get_dev(hw) \
((struct virtio_user_dev *)(hw)->virtio_user_dev)
+static void
+virtio_user_server_reconnection(struct virtio_user_dev *dev)
+{
+ int ret;
+ int flag;
+ int connectfd;
+ struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id];
+
+ if (dev->connected)
+ return;
+
+ connectfd = accept(dev->listenfd, NULL, NULL);
+ if (connectfd < 0)
+ return;
+
+ dev->vhostfd = connectfd;
+ flag = fcntl(connectfd, F_GETFD);
+ fcntl(connectfd, F_SETFL, flag & ~O_NONBLOCK);
+
+ ret = virtio_user_start_device(dev);
+ if (ret < 0)
+ return;
+
+ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
+ if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+ PMD_DRV_LOG(ERR, "interrupt disable failed");
+ return;
+ }
+ rte_intr_callback_unregister(eth_dev->intr_handle,
+ virtio_interrupt_handler,
+ eth_dev);
+ eth_dev->intr_handle->fd = connectfd;
+ rte_intr_callback_register(eth_dev->intr_handle,
+ virtio_interrupt_handler, eth_dev);
+
+ if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+ PMD_DRV_LOG(ERR, "interrupt enable failed");
+ return;
+ }
+ }
+ dev->connected = true;
+ PMD_INIT_LOG(NOTICE, "server mode virtio-user reconnection succeeds!");
+}
+
static void
virtio_user_delayed_handler(void *param)
{
struct virtio_hw *hw = (struct virtio_hw *)param;
- struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id];
+ struct rte_eth_dev *eth_dev = &rte_eth_devices[hw->port_id];
+ struct virtio_user_dev *dev = virtio_user_get_dev(hw);
- rte_intr_callback_unregister(dev->intr_handle,
- virtio_interrupt_handler,
- dev);
+ if (rte_intr_disable(eth_dev->intr_handle) < 0) {
+ PMD_DRV_LOG(ERR, "interrupt disable failed");
+ return;
+ }
+ rte_intr_callback_unregister(eth_dev->intr_handle,
+ virtio_interrupt_handler, eth_dev);
+ if (dev->is_server) {
+ if (dev->vhostfd >= 0) {
+ close(dev->vhostfd);
+ dev->vhostfd = -1;
+ }
+ eth_dev->intr_handle->fd = dev->listenfd;
+ rte_intr_callback_register(eth_dev->intr_handle,
+ virtio_interrupt_handler, eth_dev);
+ if (rte_intr_enable(eth_dev->intr_handle) < 0) {
+ PMD_DRV_LOG(ERR, "interrupt enable failed");
+ return;
+ }
+ }
}
static void
@@ -65,8 +126,7 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
r = recv(dev->vhostfd, buf, 128, MSG_PEEK);
if (r == 0 || (r < 0 && errno != EAGAIN)) {
dev->status &= (~VIRTIO_NET_S_LINK_UP);
- PMD_DRV_LOG(ERR, "virtio-user port %u is down",
- hw->port_id);
+
/* Only client mode is available now. Once the
* connection is broken, it can never be up
* again. Besides, this function could be called
@@ -74,9 +134,14 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
* callback cannot be unregistered here, set an
* alarm to do it.
*/
- rte_eal_alarm_set(1,
+ if (dev->connected) {
+ dev->connected = false;
+ PMD_DRV_LOG(ERR, "virtio-user port %u is down",
+ hw->port_id);
+ rte_eal_alarm_set(1,
virtio_user_delayed_handler,
(void *)hw);
+ }
} else {
dev->status |= VIRTIO_NET_S_LINK_UP;
}
@@ -85,7 +150,10 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset,
PMD_DRV_LOG(ERR, "error clearing O_NONBLOCK flag");
return;
}
- }
+
+ } else if (dev->is_server)
+ virtio_user_server_reconnection(dev);
+
*(uint16_t *)dst = dev->status;
}
@@ -278,12 +346,15 @@ static const char *valid_args[] = {
VIRTIO_USER_ARG_QUEUE_SIZE,
#define VIRTIO_USER_ARG_INTERFACE_NAME "iface"
VIRTIO_USER_ARG_INTERFACE_NAME,
+#define VIRTIO_USER_ARG_SERVER_MODE "server"
+ VIRTIO_USER_ARG_SERVER_MODE,
NULL
};
#define VIRTIO_USER_DEF_CQ_EN 0
#define VIRTIO_USER_DEF_Q_NUM 1
#define VIRTIO_USER_DEF_Q_SZ 256
+#define VIRTIO_USER_DEF_SERVER_MODE 0
static int
get_string_arg(const char *key __rte_unused,
@@ -378,10 +449,12 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
uint64_t queues = VIRTIO_USER_DEF_Q_NUM;
uint64_t cq = VIRTIO_USER_DEF_CQ_EN;
uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ;
+ uint64_t server_mode = VIRTIO_USER_DEF_SERVER_MODE;
char *path = NULL;
char *ifname = NULL;
char *mac_addr = NULL;
int ret = -1;
+ struct virtio_user_dev *vu_dev = NULL;
kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args);
if (!kvlist) {
@@ -445,6 +518,15 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
}
}
+ if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_SERVER_MODE) == 1) {
+ if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_SERVER_MODE,
+ &get_integer_arg, &server_mode) < 0) {
+ PMD_INIT_LOG(ERR, "error to parse %s",
+ VIRTIO_USER_ARG_SERVER_MODE);
+ goto end;
+ }
+ }
+
if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) {
if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM,
&get_integer_arg, &cq) < 0) {
@@ -476,6 +558,11 @@ virtio_user_pmd_probe(struct rte_vdev_device *dev)
}
hw = eth_dev->data->dev_private;
+ vu_dev = virtio_user_get_dev(hw);
+ if (server_mode == 1)
+ vu_dev->is_server = true;
+ else
+ vu_dev->is_server = false;
if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq,
queue_size, mac_addr, &ifname) < 0) {
PMD_INIT_LOG(ERR, "virtio_user_dev_init fails");