@@ -238,6 +238,7 @@ struct nfp_net_fw_ver {
#define NFP_NET_CFG_CTRL_IPSEC (0x1 << 1) /**< IPsec offload */
#define NFP_NET_CFG_CTRL_IPSEC_SM_LOOKUP (0x1 << 3) /**< SA short match lookup */
#define NFP_NET_CFG_CTRL_IPSEC_LM_LOOKUP (0x1 << 4) /**< SA long match lookup */
+#define NFP_NET_CFG_CTRL_IN_ORDER (0x1 << 11) /**< Virtio in-order flag */
#define NFP_NET_CFG_CAP_WORD1 0x00a4
@@ -4,6 +4,7 @@
*/
#include <pthread.h>
+#include <sys/ioctl.h>
#include <nfp_common_pci.h>
#include <nfp_dev.h>
@@ -15,6 +16,9 @@
#define NFP_VDPA_DRIVER_NAME nfp_vdpa
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+ sizeof(int) * (NFP_VDPA_MAX_QUEUES * 2 + 1))
+
struct nfp_vdpa_dev {
struct rte_pci_device *pci_dev;
struct rte_vdpa_device *vdev;
@@ -25,7 +29,15 @@ struct nfp_vdpa_dev {
int vfio_dev_fd;
int iommu_group;
+ int vid;
uint16_t max_queues;
+ uint32_t started;
+ uint32_t dev_attached;
+ uint32_t running;
+ rte_spinlock_t lock;
+
+ /** Eventfd for used ring interrupt */
+ int intr_fd[NFP_VDPA_MAX_QUEUES * 2];
};
struct nfp_vdpa_dev_node {
@@ -112,6 +124,305 @@ nfp_vdpa_vfio_teardown(struct nfp_vdpa_dev *device)
rte_vfio_container_destroy(device->vfio_container_fd);
}
+static int
+nfp_vdpa_dma_do_unmap(struct rte_vhost_memory *mem,
+ uint32_t times,
+ int vfio_container_fd)
+{
+ uint32_t i;
+ int ret = 0;
+ struct rte_vhost_mem_region *region;
+
+ for (i = 0; i < times; i++) {
+ region = &mem->regions[i];
+
+ ret = rte_vfio_container_dma_unmap(vfio_container_fd,
+ region->host_user_addr, region->guest_phys_addr,
+ region->size);
+ if (ret < 0) {
+ /* Here should not return, even error happened. */
+ DRV_VDPA_LOG(ERR, "DMA unmap failed. Times: %u", i);
+ }
+ }
+
+ return ret;
+}
+
+static int
+nfp_vdpa_dma_do_map(struct rte_vhost_memory *mem,
+ uint32_t times,
+ int vfio_container_fd)
+{
+ int ret;
+ uint32_t i;
+ struct rte_vhost_mem_region *region;
+
+ for (i = 0; i < times; i++) {
+ region = &mem->regions[i];
+
+ ret = rte_vfio_container_dma_map(vfio_container_fd,
+ region->host_user_addr, region->guest_phys_addr,
+ region->size);
+ if (ret < 0) {
+ DRV_VDPA_LOG(ERR, "DMA map failed.");
+ nfp_vdpa_dma_do_unmap(mem, i, vfio_container_fd);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int
+nfp_vdpa_dma_map(struct nfp_vdpa_dev *device,
+ bool do_map)
+{
+ int ret;
+ int vfio_container_fd;
+ struct rte_vhost_memory *mem = NULL;
+
+ ret = rte_vhost_get_mem_table(device->vid, &mem);
+ if (ret < 0) {
+ DRV_VDPA_LOG(ERR, "Failed to get memory layout.");
+ return ret;
+ }
+
+ vfio_container_fd = device->vfio_container_fd;
+ DRV_VDPA_LOG(DEBUG, "vfio_container_fd %d", vfio_container_fd);
+
+ if (do_map)
+ ret = nfp_vdpa_dma_do_map(mem, mem->nregions, vfio_container_fd);
+ else
+ ret = nfp_vdpa_dma_do_unmap(mem, mem->nregions, vfio_container_fd);
+
+ free(mem);
+
+ return ret;
+}
+
+static uint64_t
+nfp_vdpa_qva_to_gpa(int vid,
+ uint64_t qva)
+{
+ int ret;
+ uint32_t i;
+ uint64_t gpa = 0;
+ struct rte_vhost_memory *mem = NULL;
+ struct rte_vhost_mem_region *region;
+
+ ret = rte_vhost_get_mem_table(vid, &mem);
+ if (ret < 0) {
+ DRV_VDPA_LOG(ERR, "Failed to get memory layout.");
+ return gpa;
+ }
+
+ for (i = 0; i < mem->nregions; i++) {
+ region = &mem->regions[i];
+
+ if (qva >= region->host_user_addr &&
+ qva < region->host_user_addr + region->size) {
+ gpa = qva - region->host_user_addr + region->guest_phys_addr;
+ break;
+ }
+ }
+
+ free(mem);
+
+ return gpa;
+}
+
+static int
+nfp_vdpa_start(struct nfp_vdpa_dev *device)
+{
+ int ret;
+ int vid;
+ uint16_t i;
+ uint64_t gpa;
+ struct rte_vhost_vring vring;
+ struct nfp_vdpa_hw *vdpa_hw = &device->hw;
+
+ vid = device->vid;
+ vdpa_hw->nr_vring = rte_vhost_get_vring_num(vid);
+
+ ret = rte_vhost_get_negotiated_features(vid, &vdpa_hw->req_features);
+ if (ret != 0)
+ return ret;
+
+ for (i = 0; i < vdpa_hw->nr_vring; i++) {
+ ret = rte_vhost_get_vhost_vring(vid, i, &vring);
+ if (ret != 0)
+ return ret;
+
+ gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.desc);
+ if (gpa == 0) {
+ DRV_VDPA_LOG(ERR, "Fail to get GPA for descriptor ring.");
+ return -1;
+ }
+
+ vdpa_hw->vring[i].desc = gpa;
+
+ gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.avail);
+ if (gpa == 0) {
+ DRV_VDPA_LOG(ERR, "Fail to get GPA for available ring.");
+ return -1;
+ }
+
+ vdpa_hw->vring[i].avail = gpa;
+
+ gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.used);
+ if (gpa == 0) {
+ DRV_VDPA_LOG(ERR, "Fail to get GPA for used ring.");
+ return -1;
+ }
+
+ vdpa_hw->vring[i].used = gpa;
+
+ vdpa_hw->vring[i].size = vring.size;
+
+ ret = rte_vhost_get_vring_base(vid, i,
+ &vdpa_hw->vring[i].last_avail_idx,
+ &vdpa_hw->vring[i].last_used_idx);
+ if (ret != 0)
+ return ret;
+ }
+
+ return nfp_vdpa_hw_start(&device->hw, vid);
+}
+
+static void
+nfp_vdpa_stop(struct nfp_vdpa_dev *device)
+{
+ int vid;
+ uint32_t i;
+ struct nfp_vdpa_hw *vdpa_hw = &device->hw;
+
+ nfp_vdpa_hw_stop(vdpa_hw);
+
+ vid = device->vid;
+ for (i = 0; i < vdpa_hw->nr_vring; i++)
+ rte_vhost_set_vring_base(vid, i,
+ vdpa_hw->vring[i].last_avail_idx,
+ vdpa_hw->vring[i].last_used_idx);
+}
+
+static int
+nfp_vdpa_enable_vfio_intr(struct nfp_vdpa_dev *device)
+{
+ int ret;
+ uint16_t i;
+ int *fd_ptr;
+ uint16_t nr_vring;
+ struct vfio_irq_set *irq_set;
+ struct rte_vhost_vring vring;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+
+ nr_vring = rte_vhost_get_vring_num(device->vid);
+
+ irq_set = (struct vfio_irq_set *)irq_set_buf;
+ irq_set->argsz = sizeof(irq_set_buf);
+ irq_set->count = nr_vring + 1;
+ irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ fd_ptr = (int *)&irq_set->data;
+ fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(device->pci_dev->intr_handle);
+
+ for (i = 0; i < nr_vring; i++)
+ device->intr_fd[i] = -1;
+
+ for (i = 0; i < nr_vring; i++) {
+ rte_vhost_get_vhost_vring(device->vid, i, &vring);
+ fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+ }
+
+ ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ if (ret != 0) {
+ DRV_VDPA_LOG(ERR, "Error enabling MSI-X interrupts.");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int
+nfp_vdpa_disable_vfio_intr(struct nfp_vdpa_dev *device)
+{
+ int ret;
+ struct vfio_irq_set *irq_set;
+ char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+
+ irq_set = (struct vfio_irq_set *)irq_set_buf;
+ irq_set->argsz = sizeof(irq_set_buf);
+ irq_set->count = 0;
+ irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+ irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+ irq_set->start = 0;
+
+ ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+ if (ret != 0) {
+ DRV_VDPA_LOG(ERR, "Error disabling MSI-X interrupts.");
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int
+update_datapath(struct nfp_vdpa_dev *device)
+{
+ int ret;
+
+ rte_spinlock_lock(&device->lock);
+
+ if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) == 0) &&
+ (rte_atomic_load_explicit(&device->started,
+ rte_memory_order_relaxed) != 0) &&
+ (rte_atomic_load_explicit(&device->dev_attached,
+ rte_memory_order_relaxed) != 0)) {
+ ret = nfp_vdpa_dma_map(device, true);
+ if (ret != 0)
+ goto unlock_exit;
+
+ ret = nfp_vdpa_enable_vfio_intr(device);
+ if (ret != 0)
+ goto dma_map_rollback;
+
+ ret = nfp_vdpa_start(device);
+ if (ret != 0)
+ goto disable_vfio_intr;
+
+ rte_atomic_store_explicit(&device->running, 1, rte_memory_order_relaxed);
+ } else if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) != 0) &&
+ ((rte_atomic_load_explicit(&device->started,
+ rte_memory_order_relaxed) != 0) ||
+ (rte_atomic_load_explicit(&device->dev_attached,
+ rte_memory_order_relaxed) != 0))) {
+ nfp_vdpa_stop(device);
+
+ ret = nfp_vdpa_disable_vfio_intr(device);
+ if (ret != 0)
+ goto unlock_exit;
+
+ ret = nfp_vdpa_dma_map(device, false);
+ if (ret != 0)
+ goto unlock_exit;
+
+ rte_atomic_store_explicit(&device->running, 0, rte_memory_order_relaxed);
+ }
+
+ rte_spinlock_unlock(&device->lock);
+ return 0;
+
+disable_vfio_intr:
+ nfp_vdpa_disable_vfio_intr(device);
+dma_map_rollback:
+ nfp_vdpa_dma_map(device, false);
+unlock_exit:
+ rte_spinlock_unlock(&device->lock);
+ return ret;
+}
+
struct rte_vdpa_dev_ops nfp_vdpa_ops = {
};
@@ -156,6 +467,10 @@ nfp_vdpa_pci_probe(struct rte_pci_device *pci_dev)
TAILQ_INSERT_TAIL(&vdpa_dev_list, node, next);
pthread_mutex_unlock(&vdpa_list_lock);
+ rte_spinlock_init(&device->lock);
+ rte_atomic_store_explicit(&device->started, 1, rte_memory_order_relaxed);
+ update_datapath(device);
+
return 0;
vfio_teardown:
@@ -185,6 +500,9 @@ nfp_vdpa_pci_remove(struct rte_pci_device *pci_dev)
device = node->device;
+ rte_atomic_store_explicit(&device->started, 0, rte_memory_order_relaxed);
+ update_datapath(device);
+
pthread_mutex_lock(&vdpa_list_lock);
TAILQ_REMOVE(&vdpa_dev_list, node, next);
pthread_mutex_unlock(&vdpa_list_lock);
@@ -5,6 +5,7 @@
#include "nfp_vdpa_core.h"
+#include <nfp_common.h>
#include <rte_vhost.h>
#include "nfp_vdpa_log.h"
@@ -52,3 +53,80 @@ nfp_vdpa_hw_init(struct nfp_vdpa_hw *vdpa_hw,
return 0;
}
+
+static uint32_t
+nfp_vdpa_check_offloads(void)
+{
+ return NFP_NET_CFG_CTRL_SCATTER |
+ NFP_NET_CFG_CTRL_IN_ORDER;
+}
+
+int
+nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw,
+ int vid)
+{
+ int ret;
+ uint32_t update;
+ uint32_t new_ctrl;
+ struct timespec wait_tst;
+ struct nfp_hw *hw = &vdpa_hw->super;
+ uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
+
+ nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(0), vdpa_hw->vring[1].desc);
+ nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(0), rte_log2_u32(vdpa_hw->vring[1].size));
+ nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(1), vdpa_hw->vring[1].avail);
+ nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(2), vdpa_hw->vring[1].used);
+
+ nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(0), vdpa_hw->vring[0].desc);
+ nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(0), rte_log2_u32(vdpa_hw->vring[0].size));
+ nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(1), vdpa_hw->vring[0].avail);
+ nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(2), vdpa_hw->vring[0].used);
+
+ rte_wmb();
+
+ nfp_disable_queues(hw);
+ nfp_enable_queues(hw, NFP_VDPA_MAX_QUEUES, NFP_VDPA_MAX_QUEUES);
+
+ new_ctrl = nfp_vdpa_check_offloads();
+
+ nn_cfg_writel(hw, NFP_NET_CFG_MTU, 9216);
+ nn_cfg_writel(hw, NFP_NET_CFG_FLBUFSZ, 10240);
+
+ /* TODO: Temporary set MAC to fixed value fe:1b:ac:05:a5:22 */
+ mac_addr[0] = 0xfe;
+ mac_addr[1] = 0x1b;
+ mac_addr[2] = 0xac;
+ mac_addr[3] = 0x05;
+ mac_addr[4] = 0xa5;
+ mac_addr[5] = (0x22 + vid);
+
+ /* Writing new MAC to the specific port BAR address */
+ nfp_write_mac(hw, (uint8_t *)mac_addr);
+
+ /* Enable device */
+ new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
+
+ /* Signal the NIC about the change */
+ update = NFP_NET_CFG_UPDATE_MACADDR |
+ NFP_NET_CFG_UPDATE_GEN |
+ NFP_NET_CFG_UPDATE_RING;
+
+ ret = nfp_reconfig(hw, new_ctrl, update);
+ if (ret < 0)
+ return -EIO;
+
+ hw->ctrl = new_ctrl;
+
+ DRV_CORE_LOG(DEBUG, "Enabling the device, sleep 1 seconds...");
+ wait_tst.tv_sec = 1;
+ wait_tst.tv_nsec = 0;
+ nanosleep(&wait_tst, 0);
+
+ return 0;
+}
+
+void
+nfp_vdpa_hw_stop(struct nfp_vdpa_hw *vdpa_hw)
+{
+ nfp_disable_queues(&vdpa_hw->super);
+}
@@ -15,6 +15,15 @@
#define NFP_VDPA_NOTIFY_ADDR_BASE 0x4000
#define NFP_VDPA_NOTIFY_ADDR_INTERVAL 0x1000
+struct nfp_vdpa_vring {
+ uint64_t desc;
+ uint64_t avail;
+ uint64_t used;
+ uint16_t size;
+ uint16_t last_avail_idx;
+ uint16_t last_used_idx;
+};
+
struct nfp_vdpa_hw {
struct nfp_hw super;
@@ -22,11 +31,17 @@ struct nfp_vdpa_hw {
uint64_t req_features;
uint8_t *notify_addr[NFP_VDPA_MAX_QUEUES * 2];
+ struct nfp_vdpa_vring vring[NFP_VDPA_MAX_QUEUES * 2];
uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
uint8_t notify_region;
+ uint8_t nr_vring;
};
int nfp_vdpa_hw_init(struct nfp_vdpa_hw *vdpa_hw, struct rte_pci_device *dev);
+int nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw, int vid);
+
+void nfp_vdpa_hw_stop(struct nfp_vdpa_hw *vdpa_hw);
+
#endif /* __NFP_VDPA_CORE_H__ */