[v3,07/11] vdpa/nfp: setup the VF configure

Message ID 20240617062708.2932037-8-chaoyong.he@corigine.com (mailing list archive)
State New
Delegated to: Maxime Coquelin
Headers
Series support software live migration |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Chaoyong He June 17, 2024, 6:27 a.m. UTC
  From: Xinying Yu <xinying.yu@corigine.com>

Create the relay vring on host and then set the address of Rx
used ring to the VF config bar. So the device can DMA the
used ring information to host rather than directly to VM.

Use 'NFP_NET_CFG_CTRL_LM_RELAY' notify the device side. And
enable the MSIX interrupt on device.

Tx ring address is not needed to change since the relay vring
only assists Rx ring to do the dirty page logging.

Signed-off-by: Xinying Yu <xinying.yu@corigine.com>
Reviewed-by: Chaoyong He <chaoyong.he@corigine.com>
Reviewed-by: Long Wu <long.wu@corigine.com>
Reviewed-by: Peng Zhang <peng.zhang@corigine.com>
---
 drivers/common/nfp/nfp_common_ctrl.h |   3 +
 drivers/vdpa/nfp/nfp_vdpa.c          | 203 ++++++++++++++++++++++++---
 drivers/vdpa/nfp/nfp_vdpa_core.c     |  55 ++++++--
 drivers/vdpa/nfp/nfp_vdpa_core.h     |   8 ++
 4 files changed, 239 insertions(+), 30 deletions(-)
  

Comments

Maxime Coquelin June 18, 2024, 8:19 a.m. UTC | #1
On 6/17/24 08:27, Chaoyong He wrote:
> From: Xinying Yu <xinying.yu@corigine.com>
> 
> Create the relay vring on host and then set the address of Rx
> used ring to the VF config bar. So the device can DMA the
> used ring information to host rather than directly to VM.
> 
> Use 'NFP_NET_CFG_CTRL_LM_RELAY' notify the device side. And
> enable the MSIX interrupt on device.
> 
> Tx ring address is not needed to change since the relay vring
> only assists Rx ring to do the dirty page logging.
> 
> Signed-off-by: Xinying Yu <xinying.yu@corigine.com>
> Reviewed-by: Chaoyong He <chaoyong.he@corigine.com>
> Reviewed-by: Long Wu <long.wu@corigine.com>
> Reviewed-by: Peng Zhang <peng.zhang@corigine.com>
> ---
>   drivers/common/nfp/nfp_common_ctrl.h |   3 +
>   drivers/vdpa/nfp/nfp_vdpa.c          | 203 ++++++++++++++++++++++++---
>   drivers/vdpa/nfp/nfp_vdpa_core.c     |  55 ++++++--
>   drivers/vdpa/nfp/nfp_vdpa_core.h     |   8 ++
>   4 files changed, 239 insertions(+), 30 deletions(-)
> 

Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>

Thanks,
Maxime
  

Patch

diff --git a/drivers/common/nfp/nfp_common_ctrl.h b/drivers/common/nfp/nfp_common_ctrl.h
index a0e62b063d..9311d01590 100644
--- a/drivers/common/nfp/nfp_common_ctrl.h
+++ b/drivers/common/nfp/nfp_common_ctrl.h
@@ -186,6 +186,9 @@  struct nfp_net_fw_ver {
 #define NFP_NET_CFG_CTRL_FLOW_STEER       (0x1 << 8) /**< Flow Steering */
 #define NFP_NET_CFG_CTRL_VIRTIO           (0x1 << 10) /**< Virtio offload */
 #define NFP_NET_CFG_CTRL_IN_ORDER         (0x1 << 11) /**< Virtio in-order flag */
+#define NFP_NET_CFG_CTRL_LM_RELAY         (0x1 << 12) /**< Virtio live migration relay start */
+#define NFP_NET_CFG_CTRL_NOTIFY_DATA      (0x1 << 13) /**< Virtio notification data flag */
+#define NFP_NET_CFG_CTRL_SWLM             (0x1 << 14) /**< Virtio SW live migration enable */
 #define NFP_NET_CFG_CTRL_USO              (0x1 << 16) /**< UDP segmentation offload */
 
 #define NFP_NET_CFG_CAP_WORD1           0x00a4
diff --git a/drivers/vdpa/nfp/nfp_vdpa.c b/drivers/vdpa/nfp/nfp_vdpa.c
index 1643ebbb8c..983123ba08 100644
--- a/drivers/vdpa/nfp/nfp_vdpa.c
+++ b/drivers/vdpa/nfp/nfp_vdpa.c
@@ -11,6 +11,8 @@ 
 #include <nfp_common_pci.h>
 #include <nfp_dev.h>
 #include <rte_vfio.h>
+#include <rte_eal_paging.h>
+#include <rte_malloc.h>
 #include <vdpa_driver.h>
 
 #include "nfp_vdpa_core.h"
@@ -21,6 +23,9 @@ 
 #define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \
 		sizeof(int) * (NFP_VDPA_MAX_QUEUES * 2 + 1))
 
+#define NFP_VDPA_USED_RING_LEN(size) \
+		((size) * sizeof(struct vring_used_elem) + sizeof(struct vring_used))
+
 struct nfp_vdpa_dev {
 	struct rte_pci_device *pci_dev;
 	struct rte_vdpa_device *vdev;
@@ -261,15 +266,85 @@  nfp_vdpa_qva_to_gpa(int vid,
 	return gpa;
 }
 
+static void
+nfp_vdpa_relay_vring_free(struct nfp_vdpa_dev *device,
+		uint16_t vring_index)
+{
+	uint16_t i;
+	uint64_t size;
+	struct rte_vhost_vring vring;
+	uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING;
+
+	for (i = 0; i < vring_index; i++) {
+		rte_vhost_get_vhost_vring(device->vid, i, &vring);
+
+		size = RTE_ALIGN_CEIL(vring_size(vring.size, rte_mem_page_size()),
+				rte_mem_page_size());
+		rte_vfio_container_dma_unmap(device->vfio_container_fd,
+				(uint64_t)(uintptr_t)device->hw.m_vring[i].desc,
+				m_vring_iova, size);
+
+		rte_free(device->hw.m_vring[i].desc);
+		m_vring_iova += size;
+	}
+}
+
 static int
-nfp_vdpa_start(struct nfp_vdpa_dev *device)
+nfp_vdpa_relay_vring_alloc(struct nfp_vdpa_dev *device)
+{
+	int ret;
+	uint16_t i;
+	uint64_t size;
+	void *vring_buf;
+	uint64_t page_size;
+	struct rte_vhost_vring vring;
+	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
+	uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING;
+
+	page_size = rte_mem_page_size();
+
+	for (i = 0; i < vdpa_hw->nr_vring; i++) {
+		rte_vhost_get_vhost_vring(device->vid, i, &vring);
+
+		size = RTE_ALIGN_CEIL(vring_size(vring.size, page_size), page_size);
+		vring_buf = rte_zmalloc("nfp_vdpa_relay", size, page_size);
+		if (vring_buf == NULL)
+			goto vring_free_all;
+
+		vring_init(&vdpa_hw->m_vring[i], vring.size, vring_buf, page_size);
+
+		ret = rte_vfio_container_dma_map(device->vfio_container_fd,
+				(uint64_t)(uintptr_t)vring_buf, m_vring_iova, size);
+		if (ret != 0) {
+			DRV_VDPA_LOG(ERR, "vDPA vring relay dma map failed.");
+			goto vring_free_one;
+		}
+
+		m_vring_iova += size;
+	}
+
+	return 0;
+
+vring_free_one:
+	rte_free(device->hw.m_vring[i].desc);
+vring_free_all:
+	nfp_vdpa_relay_vring_free(device, i);
+
+	return -ENOSPC;
+}
+
+static int
+nfp_vdpa_start(struct nfp_vdpa_dev *device,
+		bool relay)
 {
 	int ret;
 	int vid;
 	uint16_t i;
 	uint64_t gpa;
+	uint16_t size;
 	struct rte_vhost_vring vring;
 	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
+	uint64_t m_vring_iova = NFP_VDPA_RELAY_VRING;
 
 	vid = device->vid;
 	vdpa_hw->nr_vring = rte_vhost_get_vring_num(vid);
@@ -278,15 +353,21 @@  nfp_vdpa_start(struct nfp_vdpa_dev *device)
 	if (ret != 0)
 		return ret;
 
+	if (relay) {
+		ret = nfp_vdpa_relay_vring_alloc(device);
+		if (ret != 0)
+			return ret;
+	}
+
 	for (i = 0; i < vdpa_hw->nr_vring; i++) {
 		ret = rte_vhost_get_vhost_vring(vid, i, &vring);
 		if (ret != 0)
-			return ret;
+			goto relay_vring_free;
 
 		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.desc);
 		if (gpa == 0) {
 			DRV_VDPA_LOG(ERR, "Fail to get GPA for descriptor ring.");
-			return -1;
+			goto relay_vring_free;
 		}
 
 		vdpa_hw->vring[i].desc = gpa;
@@ -294,45 +375,123 @@  nfp_vdpa_start(struct nfp_vdpa_dev *device)
 		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.avail);
 		if (gpa == 0) {
 			DRV_VDPA_LOG(ERR, "Fail to get GPA for available ring.");
-			return -1;
+			goto relay_vring_free;
 		}
 
 		vdpa_hw->vring[i].avail = gpa;
 
-		gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.used);
-		if (gpa == 0) {
-			DRV_VDPA_LOG(ERR, "Fail to get GPA for used ring.");
-			return -1;
-		}
+		/* Direct I/O for Tx queue, relay for Rx queue */
+		if (relay && ((i & 1) == 0)) {
+			vdpa_hw->vring[i].used = m_vring_iova +
+					(char *)vdpa_hw->m_vring[i].used -
+					(char *)vdpa_hw->m_vring[i].desc;
+
+			ret = rte_vhost_get_vring_base(vid, i,
+					&vdpa_hw->m_vring[i].avail->idx,
+					&vdpa_hw->m_vring[i].used->idx);
+			if (ret != 0)
+				goto relay_vring_free;
+		} else {
+			gpa = nfp_vdpa_qva_to_gpa(vid, (uint64_t)(uintptr_t)vring.used);
+			if (gpa == 0) {
+				DRV_VDPA_LOG(ERR, "Fail to get GPA for used ring.");
+				goto relay_vring_free;
+			}
 
-		vdpa_hw->vring[i].used = gpa;
+			vdpa_hw->vring[i].used = gpa;
+		}
 
 		vdpa_hw->vring[i].size = vring.size;
 
+		if (relay) {
+			size = RTE_ALIGN_CEIL(vring_size(vring.size,
+					rte_mem_page_size()), rte_mem_page_size());
+			m_vring_iova += size;
+		}
+
 		ret = rte_vhost_get_vring_base(vid, i,
 				&vdpa_hw->vring[i].last_avail_idx,
 				&vdpa_hw->vring[i].last_used_idx);
 		if (ret != 0)
-			return ret;
+			goto relay_vring_free;
 	}
 
-	return nfp_vdpa_hw_start(&device->hw, vid);
+	if (relay)
+		return nfp_vdpa_relay_hw_start(&device->hw, vid);
+	else
+		return nfp_vdpa_hw_start(&device->hw, vid);
+
+relay_vring_free:
+	if (relay)
+		nfp_vdpa_relay_vring_free(device, vdpa_hw->nr_vring);
+
+	return -EFAULT;
+}
+
+static void
+nfp_vdpa_update_used_ring(struct nfp_vdpa_dev *dev,
+		uint16_t qid)
+{
+	rte_vdpa_relay_vring_used(dev->vid, qid, &dev->hw.m_vring[qid]);
+	rte_vhost_vring_call(dev->vid, qid);
 }
 
 static void
-nfp_vdpa_stop(struct nfp_vdpa_dev *device)
+nfp_vdpa_relay_stop(struct nfp_vdpa_dev *device)
 {
 	int vid;
 	uint32_t i;
+	uint64_t len;
+	struct rte_vhost_vring vring;
 	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
 
 	nfp_vdpa_hw_stop(vdpa_hw);
 
 	vid = device->vid;
-	for (i = 0; i < vdpa_hw->nr_vring; i++)
+	for (i = 0; i < vdpa_hw->nr_vring; i++) {
+		/* Synchronize remaining new used entries if any */
+		if ((i & 1) == 0)
+			nfp_vdpa_update_used_ring(device, i);
+
+		rte_vhost_get_vhost_vring(vid, i, &vring);
+		len = NFP_VDPA_USED_RING_LEN(vring.size);
+		vdpa_hw->vring[i].last_avail_idx = vring.avail->idx;
+		vdpa_hw->vring[i].last_used_idx = vring.used->idx;
+
 		rte_vhost_set_vring_base(vid, i,
 				vdpa_hw->vring[i].last_avail_idx,
 				vdpa_hw->vring[i].last_used_idx);
+
+		rte_vhost_log_used_vring(vid, i, 0, len);
+
+		if (vring.used->idx != vring.avail->idx)
+			rte_atomic_store_explicit(
+					(unsigned short __rte_atomic *)&vring.used->idx,
+					vring.avail->idx, rte_memory_order_release);
+	}
+
+	nfp_vdpa_relay_vring_free(device, vdpa_hw->nr_vring);
+}
+
+static void
+nfp_vdpa_stop(struct nfp_vdpa_dev *device,
+		bool relay)
+{
+	int vid;
+	uint32_t i;
+	struct nfp_vdpa_hw *vdpa_hw = &device->hw;
+
+	nfp_vdpa_hw_stop(vdpa_hw);
+
+	vid = device->vid;
+	if (relay)
+		nfp_vdpa_relay_stop(device);
+	else
+		for (i = 0; i < vdpa_hw->nr_vring; i++)
+			rte_vhost_set_vring_base(vid, i,
+					vdpa_hw->vring[i].last_avail_idx,
+					vdpa_hw->vring[i].last_used_idx);
+
 }
 
 static int
@@ -575,7 +734,7 @@  update_datapath(struct nfp_vdpa_dev *device)
 		if (ret != 0)
 			goto dma_map_rollback;
 
-		ret = nfp_vdpa_start(device);
+		ret = nfp_vdpa_start(device, false);
 		if (ret != 0)
 			goto disable_vfio_intr;
 
@@ -591,7 +750,7 @@  update_datapath(struct nfp_vdpa_dev *device)
 					rte_memory_order_relaxed) != 0))) {
 		nfp_vdpa_unset_notify_relay(device);
 
-		nfp_vdpa_stop(device);
+		nfp_vdpa_stop(device, false);
 
 		ret = nfp_vdpa_disable_vfio_intr(device);
 		if (ret != 0)
@@ -608,7 +767,7 @@  update_datapath(struct nfp_vdpa_dev *device)
 	return 0;
 
 vdpa_stop:
-	nfp_vdpa_stop(device);
+	nfp_vdpa_stop(device, false);
 disable_vfio_intr:
 	nfp_vdpa_disable_vfio_intr(device);
 dma_map_rollback:
@@ -639,10 +798,17 @@  nfp_vdpa_sw_fallback(struct nfp_vdpa_dev *device)
 	if (ret != 0)
 		goto error;
 
+	/* Config the VF */
+	ret = nfp_vdpa_start(device, true);
+	if (ret != 0)
+		goto unset_intr;
+
 	device->hw.sw_fallback_running = true;
 
 	return 0;
 
+unset_intr:
+	nfp_vdpa_disable_vfio_intr(device);
 error:
 	return ret;
 }
@@ -691,6 +857,9 @@  nfp_vdpa_dev_close(int vid)
 
 	device = node->device;
 	if (device->hw.sw_fallback_running) {
+		/* Reset VF */
+		nfp_vdpa_stop(device, true);
+
 		device->hw.sw_fallback_running = false;
 
 		rte_atomic_store_explicit(&device->dev_attached, 0,
diff --git a/drivers/vdpa/nfp/nfp_vdpa_core.c b/drivers/vdpa/nfp/nfp_vdpa_core.c
index 50eda4cb2c..2b609dddc2 100644
--- a/drivers/vdpa/nfp/nfp_vdpa_core.c
+++ b/drivers/vdpa/nfp/nfp_vdpa_core.c
@@ -109,7 +109,8 @@  nfp_vdpa_check_offloads(void)
 
 static int
 nfp_vdpa_vf_config(struct nfp_hw *hw,
-		int vid)
+		int vid,
+		bool relay)
 {
 	int ret;
 	uint32_t update;
@@ -133,6 +134,10 @@  nfp_vdpa_vf_config(struct nfp_hw *hw,
 	nfp_write_mac(hw, (uint8_t *)mac_addr);
 
 	new_ext_ctrl = nfp_vdpa_check_offloads();
+	if (relay)
+		new_ext_ctrl |= NFP_NET_CFG_CTRL_LM_RELAY;
+	else
+		new_ext_ctrl |= NFP_NET_CFG_CTRL_SWLM;
 
 	update = NFP_NET_CFG_UPDATE_GEN;
 	ret = nfp_ext_reconfig(hw, new_ext_ctrl, update);
@@ -149,6 +154,15 @@  nfp_vdpa_vf_config(struct nfp_hw *hw,
 			NFP_NET_CFG_UPDATE_GEN |
 			NFP_NET_CFG_UPDATE_RING;
 
+	if (relay) {
+		update |= NFP_NET_CFG_UPDATE_MSIX;
+
+		/* Enable misx interrupt for vdpa relay */
+		new_ctrl |= NFP_NET_CFG_CTRL_MSIX_TX_OFF;
+
+		nn_cfg_writeb(hw, NFP_NET_CFG_RXR_VEC(0), 1);
+	}
+
 	ret = nfp_reconfig(hw, new_ctrl, update);
 	if (ret < 0)
 		return -EIO;
@@ -164,20 +178,24 @@  nfp_vdpa_vf_config(struct nfp_hw *hw,
 }
 
 static void
-nfp_vdpa_queue_config(struct nfp_vdpa_hw *vdpa_hw)
+nfp_vdpa_queue_config(struct nfp_vdpa_hw *vdpa_hw,
+		bool relay)
 {
 	struct nfp_hw *hw = &vdpa_hw->super;
 
-	nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(0), vdpa_hw->vring[1].desc);
-	nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(0),
-			rte_log2_u32(vdpa_hw->vring[1].size));
-	nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(1), vdpa_hw->vring[1].avail);
-	nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(2), vdpa_hw->vring[1].used);
+	if (!relay) {
+		nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(0), vdpa_hw->vring[1].desc);
+		nn_cfg_writeb(hw, NFP_NET_CFG_TXR_SZ(0),
+				rte_log2_u32(vdpa_hw->vring[1].size));
+		nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(1), vdpa_hw->vring[1].avail);
+		nn_cfg_writeq(hw, NFP_NET_CFG_TXR_ADDR(2), vdpa_hw->vring[1].used);
+
+		nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(0), vdpa_hw->vring[0].desc);
+		nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(0),
+				rte_log2_u32(vdpa_hw->vring[0].size));
+		nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(1), vdpa_hw->vring[0].avail);
+	}
 
-	nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(0), vdpa_hw->vring[0].desc);
-	nn_cfg_writeb(hw, NFP_NET_CFG_RXR_SZ(0),
-			rte_log2_u32(vdpa_hw->vring[0].size));
-	nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(1), vdpa_hw->vring[0].avail);
 	nn_cfg_writeq(hw, NFP_NET_CFG_RXR_ADDR(2), vdpa_hw->vring[0].used);
 
 	rte_wmb();
@@ -189,12 +207,23 @@  nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw,
 {
 	struct nfp_hw *hw = &vdpa_hw->super;
 
-	nfp_vdpa_queue_config(vdpa_hw);
+	nfp_vdpa_queue_config(vdpa_hw, false);
 
 	nfp_disable_queues(hw);
 	nfp_enable_queues(hw, NFP_VDPA_MAX_QUEUES, NFP_VDPA_MAX_QUEUES);
 
-	return nfp_vdpa_vf_config(hw, vid);
+	return nfp_vdpa_vf_config(hw, vid, false);
+}
+
+int
+nfp_vdpa_relay_hw_start(struct nfp_vdpa_hw *vdpa_hw,
+		int vid)
+{
+	struct nfp_hw *hw = &vdpa_hw->super;
+
+	nfp_vdpa_queue_config(vdpa_hw, true);
+
+	return nfp_vdpa_vf_config(hw, vid, true);
 }
 
 void
diff --git a/drivers/vdpa/nfp/nfp_vdpa_core.h b/drivers/vdpa/nfp/nfp_vdpa_core.h
index 0f880fc0c6..a339ace601 100644
--- a/drivers/vdpa/nfp/nfp_vdpa_core.h
+++ b/drivers/vdpa/nfp/nfp_vdpa_core.h
@@ -9,12 +9,15 @@ 
 #include <bus_pci_driver.h>
 #include <nfp_common.h>
 #include <rte_ether.h>
+#include <rte_vhost.h>
 
 #define NFP_VDPA_MAX_QUEUES         1
 
 #define NFP_VDPA_NOTIFY_ADDR_BASE        0x4000
 #define NFP_VDPA_NOTIFY_ADDR_INTERVAL    0x1000
 
+#define NFP_VDPA_RELAY_VRING             0xd0000000
+
 struct nfp_vdpa_vring {
 	uint64_t desc;
 	uint64_t avail;
@@ -40,12 +43,17 @@  struct nfp_vdpa_hw {
 	/** Software Live Migration */
 	bool sw_lm;
 	bool sw_fallback_running;
+
+	/** Mediated vring for SW fallback */
+	struct vring m_vring[NFP_VDPA_MAX_QUEUES * 2];
 };
 
 int nfp_vdpa_hw_init(struct nfp_vdpa_hw *vdpa_hw, struct rte_pci_device *dev);
 
 int nfp_vdpa_hw_start(struct nfp_vdpa_hw *vdpa_hw, int vid);
 
+int nfp_vdpa_relay_hw_start(struct nfp_vdpa_hw *vdpa_hw, int vid);
+
 void nfp_vdpa_hw_stop(struct nfp_vdpa_hw *vdpa_hw);
 
 void nfp_vdpa_notify_queue(struct nfp_vdpa_hw *vdpa_hw, uint16_t qid);