[v3,22/25] drivers: add the datapath update logic

Message ID 20231026064324.177531-23-chaoyong.he@corigine.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series add the NFP vDPA PMD |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Chaoyong He Oct. 26, 2023, 6:43 a.m. UTC
  Add the vDPA datapath update logic.

Signed-off-by: Chaoyong He <chaoyong.he@corigine.com>
Signed-off-by: Shujing Dong <shujing.dong@corigine.com>
Reviewed-by: Long Wu <long.wu@corigine.com>
Reviewed-by: Peng Zhang <peng.zhang@corigine.com>
---
 drivers/common/nfp/nfp_common_ctrl.h |   1 +
 drivers/vdpa/nfp/nfp_vdpa.c          | 318 +++++++++++++++++++++++++++
 drivers/vdpa/nfp/nfp_vdpa_core.c     |  78 +++++++
 drivers/vdpa/nfp/nfp_vdpa_core.h     |  15 ++
 4 files changed, 412 insertions(+)
  

Patch

diff --git a/drivers/common/nfp/nfp_common_ctrl.h b/drivers/common/nfp/nfp_common_ctrl.h
index 3c8cd916cf..f92ce50fc0 100644
--- a/drivers/common/nfp/nfp_common_ctrl.h
+++ b/drivers/common/nfp/nfp_common_ctrl.h
@@ -238,6 +238,7 @@  struct nfp_net_fw_ver {
 #define NFP_NET_CFG_CTRL_IPSEC            (0x1 << 1) /**< IPsec offload */
 #define NFP_NET_CFG_CTRL_IPSEC_SM_LOOKUP  (0x1 << 3) /**< SA short match lookup */
 #define NFP_NET_CFG_CTRL_IPSEC_LM_LOOKUP  (0x1 << 4) /**< SA long match lookup */
+#define NFP_NET_CFG_CTRL_IN_ORDER         (0x1 << 11) /**< Virtio in-order flag */
 
 #define NFP_NET_CFG_CAP_WORD1           0x00a4
 
diff --git a/drivers/vdpa/nfp/nfp_vdpa.c b/drivers/vdpa/nfp/nfp_vdpa.c
index 00d8f7e007..d0d8050887 100644
--- a/drivers/vdpa/nfp/nfp_vdpa.c
+++ b/drivers/vdpa/nfp/nfp_vdpa.c
@@ -4,6 +4,7 @@ 
  */
 
 #include <pthread.h>
+#include <sys/ioctl.h>
 
 #include <nfp_common_pci.h>
 #include <nfp_dev.h>
@@ -15,6 +16,9 @@ 
 
 #define NFP_VDPA_DRIVER_NAME nfp_vdpa
 
+#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
+		sizeof(int) * (NFP_VDPA_MAX_QUEUES * 2 + 1))
+
 struct nfp_vdpa_dev {
 	struct rte_pci_device *pci_dev;
 	struct rte_vdpa_device *vdev;
@@ -25,7 +29,15 @@  struct nfp_vdpa_dev {
 	int vfio_dev_fd;
 	int iommu_group;
 
+	int vid;
 	uint16_t max_queues;
+	uint32_t started;
+	uint32_t dev_attached;
+	uint32_t running;
+	rte_spinlock_t lock;
+
+	/** Eventfd for used ring interrupt */
+	int intr_fd[NFP_VDPA_MAX_QUEUES * 2];
 };
 
 struct nfp_vdpa_dev_node {
@@ -112,6 +124,305 @@  nfp_vdpa_vfio_teardown(struct nfp_vdpa_dev *device)
 	rte_vfio_container_destroy(device->vfio_container_fd);
 }
 
+static int
+nfp_vdpa_dma_do_unmap(struct rte_vhost_memory *mem,
+		uint32_t times,
+		int vfio_container_fd)
+{
+	uint32_t i;
+	int ret = 0;
+	struct rte_vhost_mem_region *region;
+
+	for (i = 0; i < times; i++) {
+		region = &mem->regions[i];
+
+		ret = rte_vfio_container_dma_unmap(vfio_container_fd,
+				region->host_user_addr, region->guest_phys_addr,
+				region->size);
+		if (ret < 0) {
+			/* Here should not return, even error happened. */
+			DRV_VDPA_LOG(ERR, "DMA unmap failed. Times: %u", i);
+		}
+	}
+
+	return ret;
+}
+
+static int
+nfp_vdpa_dma_do_map(struct rte_vhost_memory *mem,
+		uint32_t times,
+		int vfio_container_fd)
+{
+	int ret;
+	uint32_t i;
+	struct rte_vhost_mem_region *region;
+
+	for (i = 0; i < times; i++) {
+		region = &mem->regions[i];
+
+		ret = rte_vfio_container_dma_map(vfio_container_fd,
+				region->host_user_addr, region->guest_phys_addr,
+				region->size);
+		if (ret < 0) {
+			DRV_VDPA_LOG(ERR, "DMA map failed.");
+			nfp_vdpa_dma_do_unmap(mem, i, vfio_container_fd);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int
+nfp_vdpa_dma_map(struct nfp_vdpa_dev *device,
+		bool do_map)
+{
+	int ret;
+	int vfio_container_fd;
+	struct rte_vhost_memory *mem = NULL;
+
+	ret = rte_vhost_get_mem_table(device->vid, &mem);
+	if (ret < 0) {
+		DRV_VDPA_LOG(ERR, "Failed to get memory layout.");
+		return ret;
+	}
+
+	vfio_container_fd = device->vfio_container_fd;
+	DRV_VDPA_LOG(DEBUG, "vfio_container_fd %d", vfio_container_fd);
+
+	if (do_map)
+		ret = nfp_vdpa_dma_do_map(mem, mem->nregions, vfio_container_fd);
+	else
+		ret = nfp_vdpa_dma_do_unmap(mem, mem->nregions, vfio_container_fd);
+
+	free(mem);
+
+	return ret;
+}
+
+static uint64_t
+nfp_vdpa_qva_to_gpa(int vid,
+		uint64_t qva)
+{
+	int ret;
+	uint32_t i;
+	uint64_t gpa = 0;
+	struct rte_vhost_memory *mem = NULL;
+	struct rte_vhost_mem_region *region;
+
+	ret = rte_vhost_get_mem_table(vid, &mem);
+	if (ret < 0) {
+		DRV_VDPA_LOG(ERR, "Failed to get memory layout.");
+		return gpa;
+	}
+
+	for (i = 0; i < mem->nregions; i++) {
+		region = &mem->regions[i];
+
+		if (qva >= region->host_user_addr &&
+				qva < region->host_user_addr + region->size) {
+			gpa = qva - region->host_user_addr + region->guest_phys_addr;
+			break;
+		}
+	}
+
+	free(mem);
+
+	return gpa;
+}
+
+static int
+nfp_vdpa_start(struct nfp_vdpa_dev *device)
+{
+	int ret;
+	int vid;
+	uint16_t i;
+	uint64_t gpa;
+	struct rte_vhost_vring vring;
+	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
+
+	vid = device->vid;
+	vdpa_hw->nr_vring = rte_vhost_get_vring_num(vid);
+
+	ret = rte_vhost_get_negotiated_features(vid, &vdpa_hw->req_features);
+	if (ret != 0)
+		return ret;
+
+	for (i = 0; i < vdpa_hw->nr_vring; i++) {
+		ret = rte_vhost_get_vhost_vring(vid, i, &vring);
+		if (ret != 0)
+			return ret;
+
+		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.desc);
+		if (gpa == 0) {
+			DRV_VDPA_LOG(ERR, "Fail to get GPA for descriptor ring.");
+			return -1;
+		}
+
+		vdpa_hw->vring[i].desc = gpa;
+
+		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.avail);
+		if (gpa == 0) {
+			DRV_VDPA_LOG(ERR, "Fail to get GPA for available ring.");
+			return -1;
+		}
+
+		vdpa_hw->vring[i].avail = gpa;
+
+		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.used);
+		if (gpa == 0) {
+			DRV_VDPA_LOG(ERR, "Fail to get GPA for used ring.");
+			return -1;
+		}
+
+		vdpa_hw->vring[i].used = gpa;
+
+		vdpa_hw->vring[i].size = vring.size;
+
+		ret = rte_vhost_get_vring_base(vid, i,
+				&vdpa_hw->vring[i].last_avail_idx,
+				&vdpa_hw->vring[i].last_used_idx);
+		if (ret != 0)
+			return ret;
+	}
+
+	return nfp_vdpa_hw_start(&device->hw, vid);
+}
+
+static void
+nfp_vdpa_stop(struct nfp_vdpa_dev *device)
+{
+	int vid;
+	uint32_t i;
+	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
+
+	nfp_vdpa_hw_stop(vdpa_hw);
+
+	vid = device->vid;
+	for (i = 0; i < vdpa_hw->nr_vring; i++)
+		rte_vhost_set_vring_base(vid, i,
+				vdpa_hw->vring[i].last_avail_idx,
+				vdpa_hw->vring[i].last_used_idx);
+}
+
+static int
+nfp_vdpa_enable_vfio_intr(struct nfp_vdpa_dev *device)
+{
+	int ret;
+	uint16_t i;
+	int *fd_ptr;
+	uint16_t nr_vring;
+	struct vfio_irq_set *irq_set;
+	struct rte_vhost_vring vring;
+	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+
+	nr_vring = rte_vhost_get_vring_num(device->vid);
+
+	irq_set = (struct vfio_irq_set *)irq_set_buf;
+	irq_set->argsz = sizeof(irq_set_buf);
+	irq_set->count = nr_vring + 1;
+	irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = 0;
+
+	fd_ptr = (int *)&irq_set->data;
+	fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = rte_intr_fd_get(device->pci_dev->intr_handle);
+
+	for (i = 0; i < nr_vring; i++)
+		device->intr_fd[i] = -1;
+
+	for (i = 0; i < nr_vring; i++) {
+		rte_vhost_get_vhost_vring(device->vid, i, &vring);
+		fd_ptr[RTE_INTR_VEC_RXTX_OFFSET + i] = vring.callfd;
+	}
+
+	ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret != 0) {
+		DRV_VDPA_LOG(ERR, "Error enabling MSI-X interrupts.");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int
+nfp_vdpa_disable_vfio_intr(struct nfp_vdpa_dev *device)
+{
+	int ret;
+	struct vfio_irq_set *irq_set;
+	char irq_set_buf[MSIX_IRQ_SET_BUF_LEN];
+
+	irq_set = (struct vfio_irq_set *)irq_set_buf;
+	irq_set->argsz = sizeof(irq_set_buf);
+	irq_set->count = 0;
+	irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER;
+	irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
+	irq_set->start = 0;
+
+	ret = ioctl(device->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set);
+	if (ret != 0) {
+		DRV_VDPA_LOG(ERR, "Error disabling MSI-X interrupts.");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int
+update_datapath(struct nfp_vdpa_dev *device)
+{
+	int ret;
+
+	rte_spinlock_lock(&device->lock);
+
+	if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) == 0) &&
+			(rte_atomic_load_explicit(&device->started,
+					rte_memory_order_relaxed) != 0) &&
+			(rte_atomic_load_explicit(&device->dev_attached,
+					rte_memory_order_relaxed) != 0)) {
+		ret = nfp_vdpa_dma_map(device, true);
+		if (ret != 0)
+			goto unlock_exit;
+
+		ret = nfp_vdpa_enable_vfio_intr(device);
+		if (ret != 0)
+			goto dma_map_rollback;
+
+		ret = nfp_vdpa_start(device);
+		if (ret != 0)
+			goto disable_vfio_intr;
+
+		rte_atomic_store_explicit(&device->running, 1, rte_memory_order_relaxed);
+	} else if ((rte_atomic_load_explicit(&device->running, rte_memory_order_relaxed) != 0) &&
+			((rte_atomic_load_explicit(&device->started,
+					rte_memory_order_relaxed) != 0) ||
+			(rte_atomic_load_explicit(&device->dev_attached,
+					rte_memory_order_relaxed) != 0))) {
+		nfp_vdpa_stop(device);
+
+		ret = nfp_vdpa_disable_vfio_intr(device);
+		if (ret != 0)
+			goto unlock_exit;
+
+		ret = nfp_vdpa_dma_map(device, false);
+		if (ret != 0)
+			goto unlock_exit;
+
+		rte_atomic_store_explicit(&device->running, 0, rte_memory_order_relaxed);
+	}
+
+	rte_spinlock_unlock(&device->lock);
+	return 0;
+
+disable_vfio_intr:
+	nfp_vdpa_disable_vfio_intr(device);
+dma_map_rollback:
+	nfp_vdpa_dma_map(device, false);
+unlock_exit:
+	rte_spinlock_unlock(&device->lock);
+	return ret;
+}
+
 struct rte_vdpa_dev_ops nfp_vdpa_ops = {
 };
 
@@ -156,6 +467,10 @@  nfp_vdpa_pci_probe(struct rte_pci_device *pci_dev)
 	TAILQ_INSERT_TAIL(&vdpa_dev_list, node, next);
 	pthread_mutex_unlock(&vdpa_list_lock);
 
+	rte_spinlock_init(&device->lock);
+	rte_atomic_store_explicit(&device->started, 1, rte_memory_order_relaxed);
+	update_datapath(device);
+
 	return 0;
 
 vfio_teardown:
@@ -185,6 +500,9 @@  nfp_vdpa_pci_remove(struct rte_pci_device *pci_dev)
 
 	device = node->device;
 
+	rte_atomic_store_explicit(&device->started, 0, rte_memory_order_relaxed);
+	update_datapath(device);
+
 	pthread_mutex_lock(&vdpa_list_lock);
 	TAILQ_REMOVE(&vdpa_dev_list, node, next);
 	pthread_mutex_unlock(&vdpa_list_lock);
diff --git a/drivers/vdpa/nfp/nfp_vdpa_core.c b/drivers/vdpa/nfp/nfp_vdpa_core.c
index a7e15fa88a..db9b8462b4 100644
--- a/drivers/vdpa/nfp/nfp_vdpa_core.c
+++ b/drivers/vdpa/nfp/nfp_vdpa_core.c
@@ -5,6 +5,7 @@ 
 
 #include "nfp_vdpa_core.h"
 
+#include <nfp_common.h>
 #include <rte_vhost.h>
 
 #include "nfp_vdpa_log.h"
@@ -52,3 +53,80 @@  nfp_vdpa_hw_init(struct nfp_vdpa_hw *vdpa_hw,
 
 	return 0;
 }
+
+static uint32_t
+nfp_vdpa_check_offloads(void)
+{
+	return NFP_NET_CFG_CTRL_SCATTER |
+			NFP_NET_CFG_CTRL_IN_ORDER;
+}
+
+int
+nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw,
+		int vid)
+{
+	int ret;
+	uint32_t update;
+	uint32_t new_ctrl;
+	struct timespec wait_tst;
+	struct nfp_hw *hw = &vdpa_hw->super;
+	uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
+
+	nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(0), vdpa_hw->vring[1].desc);
+	nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(0), rte_log2_u32(vdpa_hw->vring[1].size));
+	nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(1), vdpa_hw->vring[1].avail);
+	nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(2), vdpa_hw->vring[1].used);
+
+	nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(0), vdpa_hw->vring[0].desc);
+	nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(0), rte_log2_u32(vdpa_hw->vring[0].size));
+	nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(1), vdpa_hw->vring[0].avail);
+	nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(2), vdpa_hw->vring[0].used);
+
+	rte_wmb();
+
+	nfp_disable_queues(hw);
+	nfp_enable_queues(hw, NFP_VDPA_MAX_QUEUES, NFP_VDPA_MAX_QUEUES);
+
+	new_ctrl = nfp_vdpa_check_offloads();
+
+	nn_cfg_writel(hw, NFP_NET_CFG_MTU, 9216);
+	nn_cfg_writel(hw, NFP_NET_CFG_FLBUFSZ, 10240);
+
+	/* TODO: Temporary set MAC to fixed value fe:1b:ac:05:a5:22 */
+	mac_addr[0] = 0xfe;
+	mac_addr[1] = 0x1b;
+	mac_addr[2] = 0xac;
+	mac_addr[3] = 0x05;
+	mac_addr[4] = 0xa5;
+	mac_addr[5] = (0x22 + vid);
+
+	/* Writing new MAC to the specific port BAR address */
+	nfp_write_mac(hw, (uint8_t *)mac_addr);
+
+	/* Enable device */
+	new_ctrl |= NFP_NET_CFG_CTRL_ENABLE;
+
+	/* Signal the NIC about the change */
+	update = NFP_NET_CFG_UPDATE_MACADDR |
+			NFP_NET_CFG_UPDATE_GEN |
+			NFP_NET_CFG_UPDATE_RING;
+
+	ret = nfp_reconfig(hw, new_ctrl, update);
+	if (ret < 0)
+		return -EIO;
+
+	hw->ctrl = new_ctrl;
+
+	DRV_CORE_LOG(DEBUG, "Enabling the device, sleep 1 seconds...");
+	wait_tst.tv_sec = 1;
+	wait_tst.tv_nsec = 0;
+	nanosleep(&wait_tst, 0);
+
+	return 0;
+}
+
+void
+nfp_vdpa_hw_stop(struct nfp_vdpa_hw *vdpa_hw)
+{
+	nfp_disable_queues(&vdpa_hw->super);
+}
diff --git a/drivers/vdpa/nfp/nfp_vdpa_core.h b/drivers/vdpa/nfp/nfp_vdpa_core.h
index c9403e0ea4..a88de768dd 100644
--- a/drivers/vdpa/nfp/nfp_vdpa_core.h
+++ b/drivers/vdpa/nfp/nfp_vdpa_core.h
@@ -15,6 +15,15 @@ 
 #define NFP_VDPA_NOTIFY_ADDR_BASE        0x4000
 #define NFP_VDPA_NOTIFY_ADDR_INTERVAL    0x1000
 
+struct nfp_vdpa_vring {
+	uint64_t desc;
+	uint64_t avail;
+	uint64_t used;
+	uint16_t size;
+	uint16_t last_avail_idx;
+	uint16_t last_used_idx;
+};
+
 struct nfp_vdpa_hw {
 	struct nfp_hw super;
 
@@ -22,11 +31,17 @@  struct nfp_vdpa_hw {
 	uint64_t req_features;
 
 	uint8_t *notify_addr[NFP_VDPA_MAX_QUEUES * 2];
+	struct nfp_vdpa_vring vring[NFP_VDPA_MAX_QUEUES * 2];
 
 	uint8_t mac_addr[RTE_ETHER_ADDR_LEN];
 	uint8_t notify_region;
+	uint8_t nr_vring;
 };
 
 int nfp_vdpa_hw_init(struct nfp_vdpa_hw *vdpa_hw, struct rte_pci_device *dev);
 
+int nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw, int vid);
+
+void nfp_vdpa_hw_stop(struct nfp_vdpa_hw *vdpa_hw);
+
 #endif /* __NFP_VDPA_CORE_H__ */