From patchwork Fri Aug 25 09:40:51 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Jianfeng Tan X-Patchwork-Id: 27948 X-Patchwork-Delegate: yuanhan.liu@linux.intel.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id A7E9B9113; Fri, 25 Aug 2017 11:39:40 +0200 (CEST) Received: from mga14.intel.com (mga14.intel.com [192.55.52.115]) by dpdk.org (Postfix) with ESMTP id AD6637D8F for ; Fri, 25 Aug 2017 11:39:36 +0200 (CEST) Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga103.fm.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 25 Aug 2017 02:39:36 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.41,424,1498546800"; d="scan'208";a="144456076" Received: from dpdk06.sh.intel.com ([10.67.111.82]) by fmsmga006.fm.intel.com with ESMTP; 25 Aug 2017 02:39:34 -0700 From: Jianfeng Tan To: dev@dpdk.org Cc: bruce.richardson@intel.com, konstantin.ananyev@intel.com, pablo.de.lara.guarch@intel.com, thomas@monjalon.net, yliu@fridaylinux.org, maxime.coquelin@redhat.com, mtetsuyah@gmail.com, ferruh.yigit@intel.com, Jianfeng Tan Date: Fri, 25 Aug 2017 09:40:51 +0000 Message-Id: <1503654052-84730-12-git-send-email-jianfeng.tan@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1503654052-84730-1-git-send-email-jianfeng.tan@intel.com> References: <1503654052-84730-1-git-send-email-jianfeng.tan@intel.com> Subject: [dpdk-dev] [PATCH 11/12] net/vhost: support to run in the secondary process X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support to run vhost-pmd vdev in the secondary process. We obtain information, like memory regions, kickfd, callfd, through primary/secondary communication channel. And by invoking rte_vhost_set_vring_effective_fd, we can set the kickfd which can be recognized by the secondary process. Signed-off-by: Jianfeng Tan --- drivers/net/vhost/rte_eth_vhost.c | 158 +++++++++++++++++++++++++++++++++++--- 1 file changed, 146 insertions(+), 12 deletions(-) diff --git a/drivers/net/vhost/rte_eth_vhost.c b/drivers/net/vhost/rte_eth_vhost.c index 0dac5e6..7120105 100644 --- a/drivers/net/vhost/rte_eth_vhost.c +++ b/drivers/net/vhost/rte_eth_vhost.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -46,6 +47,16 @@ #include "rte_eth_vhost.h" +#define VHOST_MSG_TYPE_REGIONS 1 +#define VHOST_MSG_TYPE_SET_FDS 2 + +struct vhost_params { + int type; + int vid; + int vring_idx; + struct rte_vhost_mem_region regions[0]; +}; + enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; #define ETH_VHOST_IFACE_ARG "iface" @@ -550,6 +561,67 @@ update_queuing_status(struct rte_eth_dev *dev) } static int +share_device(int vid) +{ + uint32_t i, vring_num; + int len; + int fds[8]; + struct rte_vhost_memory *mem; + struct vhost_params *params; + struct rte_vhost_vring vring; + + /* share mem table */ + if (rte_vhost_get_mem_table(vid, &mem) < 0) { + printf("Failed to get mem table\n"); + return 0; + } + for (i = 0; i < mem->nregions; ++i) + fds[i] = mem->regions[i].fd; + + len = sizeof(struct rte_vhost_mem_region) * mem->nregions; + params = malloc(sizeof(*params) + len); + if (params == NULL) { + printf("Failed to allocate memory\n"); + return -1; + } + + params->type = VHOST_MSG_TYPE_REGIONS; + params->vid = vid; + memcpy(params->regions, mem->regions, len); + + if (rte_eal_primary_secondary_sendmsg("vhost pmd", params, + sizeof(*params) + len, + fds, mem->nregions) < 0) { + printf("Failed to share mem table\n"); + free(params); + return -1; + } + + /* share callfd and kickfd */ + params->type = VHOST_MSG_TYPE_SET_FDS; + vring_num = rte_vhost_get_vring_num(vid); + for (i = 0; i < vring_num; i++) { + if (rte_vhost_get_vhost_vring(vid, i, &vring) < 0) { + printf("Failed to get vring, idx = %d\n", i); + free(params); + return -1; + } + + params->vring_idx = i; + fds[0] = vring.callfd; + fds[1] = vring.kickfd; + if (rte_eal_primary_secondary_sendmsg("vhost pmd", params, + sizeof(*params), + fds, 2) < 0) { + printf("Failed to set fds\n"); + return -1; + } + } + + return 0; +} + +static int new_device(int vid) { struct rte_eth_dev *eth_dev; @@ -610,6 +682,8 @@ new_device(int vid) _rte_eth_dev_callback_process(eth_dev, RTE_ETH_EVENT_INTR_LSC, NULL, NULL); + share_device(vid); + return 0; } @@ -1025,13 +1099,6 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, RTE_LOG(INFO, PMD, "Creating VHOST-USER backend on numa socket %u\n", numa_node); - /* now do all data allocation - for eth_dev structure and internal - * (private) data - */ - data = rte_zmalloc_socket(name, sizeof(*data), 0, numa_node); - if (data == NULL) - goto error; - list = rte_zmalloc_socket(name, sizeof(*list), 0, numa_node); if (list == NULL) goto error; @@ -1073,11 +1140,7 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, rte_spinlock_init(&vring_state->lock); vring_states[eth_dev->data->port_id] = vring_state; - /* We'll replace the 'data' originally allocated by eth_dev. So the - * vhost PMD resources won't be shared between multi processes. - */ - rte_memcpy(data, eth_dev->data, sizeof(*data)); - eth_dev->data = data; + data = eth_dev->data; data->nb_rx_queues = queues; data->nb_tx_queues = queues; @@ -1125,6 +1188,33 @@ eth_dev_vhost_create(struct rte_vdev_device *dev, char *iface_name, return -1; } +static int +eth_dev_vhost_attach(struct rte_vdev_device *dev) +{ + struct rte_eth_dev *eth_dev = NULL; + struct rte_eth_dev_data *data = NULL; + + RTE_LOG(INFO, PMD, "Attach vhost user port\n"); + + /* reserve an ethdev entry */ + eth_dev = rte_eth_vdev_allocate(dev, sizeof(struct pmd_internal)); + if (eth_dev == NULL) + goto error; + + eth_dev->dev_ops = &ops; + + /* finally assign rx and tx ops */ + eth_dev->rx_pkt_burst = eth_vhost_rx; + eth_dev->tx_pkt_burst = eth_vhost_tx; + + data = eth_dev->data; + + return data->port_id; + +error: + return -1; +} + static inline int open_iface(const char *key __rte_unused, const char *value, void *extra_args) { @@ -1154,6 +1244,39 @@ open_int(const char *key __rte_unused, const char *value, void *extra_args) } static int +vhost_pmd_action(const char *params, int len __rte_unused, + int fds[], int fds_num) +{ + int i; + void *base_addr; + const struct vhost_params *p = (const struct vhost_params *)params; + const struct rte_vhost_mem_region *regions; + + switch (p->type) { + case VHOST_MSG_TYPE_REGIONS: + regions = (const void *)p->regions; + for (i = 0; i < fds_num; ++i) { + base_addr = mmap(regions[i].mmap_addr, + regions[i].mmap_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_SHARED, fds[i], 0); + if (base_addr != regions[i].mmap_addr) { + RTE_LOG(INFO, PMD, "mmap error"); + break; + } + } + break; + case VHOST_MSG_TYPE_SET_FDS: + rte_vhost_set_vring_effective_fd(p->vid, + p->vring_idx, + fds[0], fds[1]); + break; + } + + return 0; +} + +static int rte_pmd_vhost_probe(struct rte_vdev_device *dev) { struct rte_kvargs *kvlist = NULL; @@ -1167,6 +1290,17 @@ rte_pmd_vhost_probe(struct rte_vdev_device *dev) RTE_LOG(INFO, PMD, "Initializing pmd_vhost for %s\n", rte_vdev_device_name(dev)); + if (rte_eal_primary_secondary_add_action("vhost pmd", + vhost_pmd_action) < 0) { + RTE_LOG(ERR, PMD, "vhost fails to add action\n"); + return -1; + } + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + ret = eth_dev_vhost_attach(dev); + return (ret >= 0) ? 0 : -1; + } + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_arguments); if (kvlist == NULL) return -1;