From patchwork Tue Mar 28 11:54:08 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Allain Legacy X-Patchwork-Id: 22594 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 4771CD208; Tue, 28 Mar 2017 13:56:06 +0200 (CEST) Received: from mail5.wrs.com (mail5.windriver.com [192.103.53.11]) by dpdk.org (Postfix) with ESMTP id 0FC8BD072 for ; Tue, 28 Mar 2017 13:55:01 +0200 (CEST) Received: from ALA-HCA.corp.ad.wrs.com (ala-hca.corp.ad.wrs.com [147.11.189.40]) by mail5.wrs.com (8.15.2/8.15.2) with ESMTPS id v2SBswwx010925 (version=TLSv1 cipher=AES128-SHA bits=128 verify=OK); Tue, 28 Mar 2017 04:54:58 -0700 Received: from yow-cgts4-lx.wrs.com (128.224.145.137) by ALA-HCA.corp.ad.wrs.com (147.11.189.50) with Microsoft SMTP Server (TLS) id 14.3.294.0; Tue, 28 Mar 2017 04:54:56 -0700 From: Allain Legacy To: CC: , , , , , , , , , , <3chas3@gmail.com> Date: Tue, 28 Mar 2017 07:54:08 -0400 Message-ID: <20170328115409.23487-14-allain.legacy@windriver.com> X-Mailer: git-send-email 2.12.1 In-Reply-To: <20170328115409.23487-1-allain.legacy@windriver.com> References: <20170323112413.175202-1-allain.legacy@windriver.com> <20170328115409.23487-1-allain.legacy@windriver.com> MIME-Version: 1.0 X-Originating-IP: [128.224.145.137] Subject: [dpdk-dev] [PATCH v6 13/14] net/avp: migration interrupt handling X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This commit introduces changes required to support VM live-migration. This is done by registering and responding to interrupts coming from the host to signal that the memory is about to be made invalid and replaced with a new memory zone on the destination compute node. Enabling and disabling of the interrupts are maintained outside of the start/stop functions because they must be enabled for the lifetime of the device. This is so that host interrupts are serviced and acked even in cases where the app may have stopped the device. Signed-off-by: Allain Legacy Signed-off-by: Matt Peters --- drivers/net/avp/avp_ethdev.c | 372 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 372 insertions(+) diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c index 9824190a0..e166867aa 100644 --- a/drivers/net/avp/avp_ethdev.c +++ b/drivers/net/avp/avp_ethdev.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -60,6 +61,8 @@ #include "avp_logs.h" +static int avp_dev_create(struct rte_pci_device *pci_dev, + struct rte_eth_dev *eth_dev); static int avp_dev_configure(struct rte_eth_dev *dev); static int avp_dev_start(struct rte_eth_dev *dev); @@ -174,6 +177,7 @@ static const struct eth_dev_ops avp_eth_dev_ops = { #define AVP_F_PROMISC (1 << 1) #define AVP_F_CONFIGURED (1 << 2) #define AVP_F_LINKUP (1 << 3) +#define AVP_F_DETACHED (1 << 4) /**@} */ /* Ethernet device validation marker */ @@ -209,6 +213,9 @@ struct avp_dev { struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES]; /**< To be freed mbufs queue */ + /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */ + rte_spinlock_t lock; + /* For request & response */ struct rte_avp_fifo *req_q; /**< Request queue */ struct rte_avp_fifo *resp_q; /**< Response queue */ @@ -496,6 +503,46 @@ avp_dev_check_regions(struct rte_eth_dev *eth_dev) return 0; } +static int +avp_dev_detach(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + int ret; + + PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n", + eth_dev->data->port_id, avp->device_id); + + rte_spinlock_lock(&avp->lock); + + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(NOTICE, "port %u already detached\n", + eth_dev->data->port_id); + ret = 0; + goto unlock; + } + + /* shutdown the device first so the host stops sending us packets. */ + ret = avp_dev_ctrl_shutdown(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n", + ret); + avp->flags &= ~AVP_F_DETACHED; + goto unlock; + } + + avp->flags |= AVP_F_DETACHED; + rte_wmb(); + + /* wait for queues to acknowledge the presence of the detach flag */ + rte_delay_ms(1); + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + static void _avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id) { @@ -564,6 +611,240 @@ _avp_set_queue_counts(struct rte_eth_dev *eth_dev) avp->num_tx_queues, avp->num_rx_queues); } +static int +avp_dev_attach(struct rte_eth_dev *eth_dev) +{ + struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + struct rte_avp_device_config config; + unsigned int i; + int ret; + + PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n", + eth_dev->data->port_id, avp->device_id); + + rte_spinlock_lock(&avp->lock); + + if (!(avp->flags & AVP_F_DETACHED)) { + PMD_DRV_LOG(NOTICE, "port %u already attached\n", + eth_dev->data->port_id); + ret = 0; + goto unlock; + } + + /* + * make sure that the detached flag is set prior to reconfiguring the + * queues. + */ + avp->flags |= AVP_F_DETACHED; + rte_wmb(); + + /* + * re-run the device create utility which will parse the new host info + * and setup the AVP device queue pointers. + */ + ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n", + ret); + goto unlock; + } + + if (avp->flags & AVP_F_CONFIGURED) { + /* + * Update the receive queue mapping to handle cases where the + * source and destination hosts have different queue + * requirements. As long as the DETACHED flag is asserted the + * queue table should not be referenced so it should be safe to + * update it. + */ + _avp_set_queue_counts(eth_dev); + for (i = 0; i < eth_dev->data->nb_rx_queues; i++) + _avp_set_rx_queue_mappings(eth_dev, i); + + /* + * Update the host with our config details so that it knows the + * device is active. + */ + memset(&config, 0, sizeof(config)); + config.device_id = avp->device_id; + config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK; + config.driver_version = AVP_DPDK_DRIVER_VERSION; + config.features = avp->features; + config.num_tx_queues = avp->num_tx_queues; + config.num_rx_queues = avp->num_rx_queues; + config.if_up = !!(avp->flags & AVP_F_LINKUP); + + ret = avp_dev_ctrl_set_config(eth_dev, &config); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n", + ret); + goto unlock; + } + } + + rte_wmb(); + avp->flags &= ~AVP_F_DETACHED; + + ret = 0; + +unlock: + rte_spinlock_unlock(&avp->lock); + return ret; +} + +static void +avp_dev_interrupt_handler(struct rte_intr_handle *intr_handle, + void *data) +{ + struct rte_eth_dev *eth_dev = data; + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + uint32_t status, value; + int ret; + + if (registers == NULL) + rte_panic("no mapped MMIO register space\n"); + + /* read the interrupt status register + * note: this register clears on read so all raised interrupts must be + * handled or remembered for later processing + */ + status = AVP_READ32( + RTE_PTR_ADD(registers, + RTE_AVP_INTERRUPT_STATUS_OFFSET)); + + if (status | RTE_AVP_MIGRATION_INTERRUPT_MASK) { + /* handle interrupt based on current status */ + value = AVP_READ32( + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_STATUS_OFFSET)); + switch (value) { + case RTE_AVP_MIGRATION_DETACHED: + ret = avp_dev_detach(eth_dev); + break; + case RTE_AVP_MIGRATION_ATTACHED: + ret = avp_dev_attach(eth_dev); + break; + default: + PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n", + value); + ret = -EINVAL; + } + + /* acknowledge the request by writing out our current status */ + value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR); + AVP_WRITE32(value, + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_ACK_OFFSET)); + + PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n"); + } + + if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK) + PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n", + status); + + /* re-enable UIO interrupt handling */ + ret = rte_intr_enable(intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n", + ret); + /* continue */ + } +} + +static int +avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + int ret; + + if (registers == NULL) + return -EINVAL; + + /* enable UIO interrupt handling */ + ret = rte_intr_enable(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n", + ret); + return ret; + } + + /* inform the device that all interrupts are enabled */ + AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK, + RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); + + return 0; +} + +static int +avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + int ret; + + if (registers == NULL) + return 0; + + /* inform the device that all interrupts are disabled */ + AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK, + RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET)); + + /* enable UIO interrupt handling */ + ret = rte_intr_disable(&pci_dev->intr_handle); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n", + ret); + return ret; + } + + return 0; +} + +static int +avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + int ret; + + /* register a callback handler with UIO for interrupt notifications */ + ret = rte_intr_callback_register(&pci_dev->intr_handle, + avp_dev_interrupt_handler, + (void *)eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n", + ret); + return ret; + } + + /* enable interrupt processing */ + return avp_dev_enable_interrupts(eth_dev); +} + +static int +avp_dev_migration_pending(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev); + void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr; + uint32_t value; + + if (registers == NULL) + return 0; + + value = AVP_READ32(RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_STATUS_OFFSET)); + if (value == RTE_AVP_MIGRATION_DETACHED) { + /* migration is in progress; ack it if we have not already */ + AVP_WRITE32(value, + RTE_PTR_ADD(registers, + RTE_AVP_MIGRATION_ACK_OFFSET)); + return 1; + } + return 0; +} + /* * create a AVP device using the supplied device info by first translating it * to guest address space(s). @@ -616,6 +897,7 @@ avp_dev_create(struct rte_pci_device *pci_dev, avp->port_id = eth_dev->data->port_id; avp->host_mbuf_size = host_info->mbuf_size; avp->host_features = host_info->features; + rte_spinlock_init(&avp->lock); memcpy(&avp->ethaddr.addr_bytes[0], host_info->ethaddr, ETHER_ADDR_LEN); /* adjust max values to not exceed our max */ @@ -729,6 +1011,12 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev) eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; + /* Check current migration status */ + if (avp_dev_migration_pending(eth_dev)) { + PMD_DRV_LOG(ERR, "VM live migration operation in progress\n"); + return -EBUSY; + } + /* Check BAR resources */ ret = avp_dev_check_regions(eth_dev); if (ret < 0) { @@ -737,6 +1025,13 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev) return ret; } + /* Enable interrupts */ + ret = avp_dev_setup_interrupts(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret); + return ret; + } + /* Handle each subtype */ ret = avp_dev_create(pci_dev, eth_dev); if (ret < 0) { @@ -761,12 +1056,20 @@ eth_avp_dev_init(struct rte_eth_dev *eth_dev) static int eth_avp_dev_uninit(struct rte_eth_dev *eth_dev) { + int ret; + if (rte_eal_process_type() != RTE_PROC_PRIMARY) return -EPERM; if (eth_dev->data == NULL) return 0; + ret = avp_dev_disable_interrupts(eth_dev); + if (ret != 0) { + PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret); + return ret; + } + if (eth_dev->data->mac_addrs != NULL) { rte_free(eth_dev->data->mac_addrs); eth_dev->data->mac_addrs = NULL; @@ -1129,6 +1432,11 @@ avp_recv_scattered_pkts(void *rx_queue, unsigned int port_id; unsigned int i; + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + return 0; + } + guest_mbuf_size = avp->guest_mbuf_size; port_id = avp->port_id; rx_q = avp->rx_q[rxq->queue_id]; @@ -1223,6 +1531,11 @@ avp_recv_pkts(void *rx_queue, char *pkt_data; unsigned int i; + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + return 0; + } + rx_q = avp->rx_q[rxq->queue_id]; free_q = avp->free_q[rxq->queue_id]; @@ -1430,6 +1743,13 @@ avp_xmit_scattered_pkts(void *tx_queue, unsigned int i; orig_nb_pkts = nb_pkts; + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + /* TODO ... buffer for X packets then drop? */ + txq->errors += nb_pkts; + return 0; + } + tx_q = avp->tx_q[txq->queue_id]; alloc_q = avp->alloc_q[txq->queue_id]; @@ -1542,6 +1862,13 @@ avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) char *pkt_data; unsigned int i; + if (unlikely(avp->flags & AVP_F_DETACHED)) { + /* VM live migration in progress */ + /* TODO ... buffer for X packets then drop?! */ + txq->errors++; + return 0; + } + tx_q = avp->tx_q[txq->queue_id]; alloc_q = avp->alloc_q[txq->queue_id]; @@ -1674,6 +2001,13 @@ avp_dev_configure(struct rte_eth_dev *eth_dev) void *addr; int ret; + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + ret = -ENOTSUP; + goto unlock; + } + addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr; host_info = (struct rte_avp_device_info *)addr; @@ -1705,6 +2039,7 @@ avp_dev_configure(struct rte_eth_dev *eth_dev) ret = 0; unlock: + rte_spinlock_unlock(&avp->lock); return ret; } @@ -1714,6 +2049,13 @@ avp_dev_start(struct rte_eth_dev *eth_dev) struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); int ret; + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + ret = -ENOTSUP; + goto unlock; + } + /* disable features that we do not support */ eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0; eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0; @@ -1734,6 +2076,7 @@ avp_dev_start(struct rte_eth_dev *eth_dev) ret = 0; unlock: + rte_spinlock_unlock(&avp->lock); return ret; } @@ -1743,6 +2086,13 @@ avp_dev_stop(struct rte_eth_dev *eth_dev) struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); int ret; + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + goto unlock; + } + + /* remember current link state */ avp->flags &= ~AVP_F_LINKUP; /* update link state */ @@ -1751,6 +2101,9 @@ avp_dev_stop(struct rte_eth_dev *eth_dev) PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n", ret); } + +unlock: + rte_spinlock_unlock(&avp->lock); } static void @@ -1759,10 +2112,22 @@ avp_dev_close(struct rte_eth_dev *eth_dev) struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); int ret; + rte_spinlock_lock(&avp->lock); + if (avp->flags & AVP_F_DETACHED) { + PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n"); + goto unlock; + } + /* remember current link state */ avp->flags &= ~AVP_F_LINKUP; avp->flags &= ~AVP_F_CONFIGURED; + ret = avp_dev_disable_interrupts(eth_dev); + if (ret < 0) { + PMD_DRV_LOG(ERR, "Failed to disable interrupts\n"); + /* continue */ + } + /* update device state */ ret = avp_dev_ctrl_shutdown(eth_dev); if (ret < 0) { @@ -1770,6 +2135,9 @@ avp_dev_close(struct rte_eth_dev *eth_dev) ret); /* continue */ } + +unlock: + rte_spinlock_unlock(&avp->lock); } static int @@ -1791,11 +2159,13 @@ avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev) { struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + rte_spinlock_lock(&avp->lock); if ((avp->flags & AVP_F_PROMISC) == 0) { avp->flags |= AVP_F_PROMISC; PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n", eth_dev->data->port_id); } + rte_spinlock_unlock(&avp->lock); } static void @@ -1803,11 +2173,13 @@ avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev) { struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private); + rte_spinlock_lock(&avp->lock); if ((avp->flags & AVP_F_PROMISC) != 0) { avp->flags &= ~AVP_F_PROMISC; PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n", eth_dev->data->port_id); } + rte_spinlock_unlock(&avp->lock); } static void