[v7,3/4] kni: add IOVA=VA support in KNI module

Message ID 20190717090408.13717-4-vattunuru@marvell.com
State Superseded, archived
Headers show
Series
  • kni: add IOVA=VA support
Related show

Checks

Context Check Description
ci/Intel-compilation fail Compilation issues
ci/checkpatch success coding style OK

Commit Message

Vamsi Krishna Attunuru July 17, 2019, 9:04 a.m.
From: Kiran Kumar K <kirankumark@marvell.com>

Patch adds support for kernel module to work in IOVA = VA mode,
the idea is to get physical address from IOVA address using
iommu_iova_to_phys API and later use phys_to_virt API to
convert the physical address to kernel virtual address.

When compared with IOVA = PA mode, there is no performance
drop with this approach.

This approach does not work with the kernel versions less
than 4.4.0 because of API compatibility issues.

Patch also updates these support details in KNI documentation.

Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
---
 doc/guides/prog_guide/kernel_nic_interface.rst |  8 +++
 kernel/linux/kni/compat.h                      |  4 ++
 kernel/linux/kni/kni_dev.h                     |  4 ++
 kernel/linux/kni/kni_misc.c                    | 71 ++++++++++++++++++++++----
 kernel/linux/kni/kni_net.c                     | 59 ++++++++++++++++-----
 5 files changed, 124 insertions(+), 22 deletions(-)

Patch

diff --git a/doc/guides/prog_guide/kernel_nic_interface.rst b/doc/guides/prog_guide/kernel_nic_interface.rst
index 38369b3..fd2ce63 100644
--- a/doc/guides/prog_guide/kernel_nic_interface.rst
+++ b/doc/guides/prog_guide/kernel_nic_interface.rst
@@ -291,6 +291,14 @@  The sk_buff is then freed and the mbuf sent in the tx_q FIFO.
 The DPDK TX thread dequeues the mbuf and sends it to the PMD via ``rte_eth_tx_burst()``.
 It then puts the mbuf back in the cache.
 
+IOVA = VA: Support
+------------------
+
+KNI can be operated in IOVA as VA scheme when following criteria are fullfilled
+
+- LINUX_VERSION_CODE >= KERNEL_VERSION(4, 4, 0)
+- eal param `--iova-mode va` is passed or bus IOVA scheme is set to RTE_IOVA_VA
+
 Ethtool
 -------
 
diff --git a/kernel/linux/kni/compat.h b/kernel/linux/kni/compat.h
index 562d8bf..ee997a6 100644
--- a/kernel/linux/kni/compat.h
+++ b/kernel/linux/kni/compat.h
@@ -121,3 +121,7 @@ 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 11, 0)
 #define HAVE_SIGNAL_FUNCTIONS_OWN_HEADER
 #endif
+
+#if KERNEL_VERSION(4, 4, 0) <= LINUX_VERSION_CODE
+#define HAVE_IOVA_AS_VA_SUPPORT
+#endif
diff --git a/kernel/linux/kni/kni_dev.h b/kernel/linux/kni/kni_dev.h
index c1ca678..d5898f3 100644
--- a/kernel/linux/kni/kni_dev.h
+++ b/kernel/linux/kni/kni_dev.h
@@ -25,6 +25,7 @@ 
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
+#include <linux/iommu.h>
 
 #include <rte_kni_common.h>
 #define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
@@ -41,6 +42,9 @@  struct kni_dev {
 	/* kni list */
 	struct list_head list;
 
+	uint8_t iova_mode;
+	struct iommu_domain *domain;
+
 	uint32_t core_id;            /* Core ID to bind */
 	char name[RTE_KNI_NAMESIZE]; /* Network device name */
 	struct task_struct *pthread;
diff --git a/kernel/linux/kni/kni_misc.c b/kernel/linux/kni/kni_misc.c
index 2b75502..8660205 100644
--- a/kernel/linux/kni/kni_misc.c
+++ b/kernel/linux/kni/kni_misc.c
@@ -295,6 +295,9 @@  kni_ioctl_create(struct net *net, uint32_t ioctl_num,
 	struct rte_kni_device_info dev_info;
 	struct net_device *net_dev = NULL;
 	struct kni_dev *kni, *dev, *n;
+	struct pci_dev *pci = NULL;
+	struct iommu_domain *domain = NULL;
+	phys_addr_t phys_addr;
 
 	pr_info("Creating kni...\n");
 	/* Check the buffer size, to avoid warning */
@@ -348,15 +351,65 @@  kni_ioctl_create(struct net *net, uint32_t ioctl_num,
 	strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
 
 	/* Translate user space info into kernel space info */
-	kni->tx_q = phys_to_virt(dev_info.tx_phys);
-	kni->rx_q = phys_to_virt(dev_info.rx_phys);
-	kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
-	kni->free_q = phys_to_virt(dev_info.free_phys);
-
-	kni->req_q = phys_to_virt(dev_info.req_phys);
-	kni->resp_q = phys_to_virt(dev_info.resp_phys);
-	kni->sync_va = dev_info.sync_va;
-	kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+	if (dev_info.iova_mode) {
+#ifdef HAVE_IOVA_AS_VA_SUPPORT
+		pci = pci_get_device(dev_info.vendor_id,
+				     dev_info.device_id, NULL);
+		if (pci == NULL) {
+			pr_err("pci dev does not exist\n");
+			return -ENODEV;
+		}
+
+		while (pci) {
+			if ((pci->bus->number == dev_info.bus) &&
+			    (PCI_SLOT(pci->devfn) == dev_info.devid) &&
+			    (PCI_FUNC(pci->devfn) == dev_info.function)) {
+				domain = iommu_get_domain_for_dev(&pci->dev);
+				break;
+			}
+			pci = pci_get_device(dev_info.vendor_id,
+					     dev_info.device_id, pci);
+		}
+
+		if (domain == NULL) {
+			pr_err("Failed to get pci dev domain info\n");
+			return -ENODEV;
+		}
+#else
+		pr_err("Kernel version does not support IOVA as VA\n");
+		return -EINVAL;
+#endif
+		kni->domain = domain;
+		phys_addr = iommu_iova_to_phys(domain, dev_info.tx_phys);
+		kni->tx_q = phys_to_virt(phys_addr);
+		phys_addr = iommu_iova_to_phys(domain, dev_info.rx_phys);
+		kni->rx_q = phys_to_virt(phys_addr);
+		phys_addr = iommu_iova_to_phys(domain, dev_info.alloc_phys);
+		kni->alloc_q = phys_to_virt(phys_addr);
+		phys_addr = iommu_iova_to_phys(domain, dev_info.free_phys);
+		kni->free_q = phys_to_virt(phys_addr);
+		phys_addr = iommu_iova_to_phys(domain, dev_info.req_phys);
+		kni->req_q = phys_to_virt(phys_addr);
+		phys_addr = iommu_iova_to_phys(domain, dev_info.resp_phys);
+		kni->resp_q = phys_to_virt(phys_addr);
+		kni->sync_va = dev_info.sync_va;
+		phys_addr = iommu_iova_to_phys(domain, dev_info.sync_phys);
+		kni->sync_kva = phys_to_virt(phys_addr);
+		kni->iova_mode = 1;
+
+	} else {
+
+		kni->tx_q = phys_to_virt(dev_info.tx_phys);
+		kni->rx_q = phys_to_virt(dev_info.rx_phys);
+		kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+		kni->free_q = phys_to_virt(dev_info.free_phys);
+
+		kni->req_q = phys_to_virt(dev_info.req_phys);
+		kni->resp_q = phys_to_virt(dev_info.resp_phys);
+		kni->sync_va = dev_info.sync_va;
+		kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+		kni->iova_mode = 0;
+	}
 
 	kni->mbuf_size = dev_info.mbuf_size;
 
diff --git a/kernel/linux/kni/kni_net.c b/kernel/linux/kni/kni_net.c
index a736407..9387e4c 100644
--- a/kernel/linux/kni/kni_net.c
+++ b/kernel/linux/kni/kni_net.c
@@ -36,6 +36,21 @@  static void kni_net_rx_normal(struct kni_dev *kni);
 /* kni rx function pointer, with default to normal rx */
 static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
 
+/* iova to kernel virtual address */
+static inline void *
+iova2kva(struct kni_dev *kni, void *pa)
+{
+	return phys_to_virt(iommu_iova_to_phys(kni->domain,
+				(uintptr_t)pa));
+}
+
+static inline void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+	return phys_to_virt(iommu_iova_to_phys(kni->domain,
+			   (uintptr_t)m->buf_physaddr) + m->data_off);
+}
+
 /* physical address to kernel virtual address */
 static void *
 pa2kva(void *pa)
@@ -62,6 +77,24 @@  kva2data_kva(struct rte_kni_mbuf *m)
 	return phys_to_virt(m->buf_physaddr + m->data_off);
 }
 
+static inline void *
+get_kva(struct kni_dev *kni, void *pa)
+{
+	if (kni->iova_mode == 1)
+		return iova2kva(kni, pa);
+
+	return pa2kva(pa);
+}
+
+static inline void *
+get_data_kva(struct kni_dev *kni, void *pkt_kva)
+{
+	if (kni->iova_mode == 1)
+		return iova2data_kva(kni, pkt_kva);
+
+	return kva2data_kva(pkt_kva);
+}
+
 /*
  * It can be called to process the request.
  */
@@ -178,7 +211,7 @@  kni_fifo_trans_pa2va(struct kni_dev *kni,
 			return;
 
 		for (i = 0; i < num_rx; i++) {
-			kva = pa2kva(kni->pa[i]);
+			kva = get_kva(kni, kni->pa[i]);
 			kni->va[i] = pa2va(kni->pa[i], kva);
 
 			kva_nb_segs = kva->nb_segs;
@@ -266,8 +299,8 @@  kni_net_tx(struct sk_buff *skb, struct net_device *dev)
 	if (likely(ret == 1)) {
 		void *data_kva;
 
-		pkt_kva = pa2kva(pkt_pa);
-		data_kva = kva2data_kva(pkt_kva);
+		pkt_kva = get_kva(kni, pkt_pa);
+		data_kva = get_data_kva(kni, pkt_kva);
 		pkt_va = pa2va(pkt_pa, pkt_kva);
 
 		len = skb->len;
@@ -338,9 +371,9 @@  kni_net_rx_normal(struct kni_dev *kni)
 
 	/* Transfer received packets to netif */
 	for (i = 0; i < num_rx; i++) {
-		kva = pa2kva(kni->pa[i]);
+		kva = get_kva(kni, kni->pa[i]);
 		len = kva->pkt_len;
-		data_kva = kva2data_kva(kva);
+		data_kva = get_data_kva(kni, kva);
 		kni->va[i] = pa2va(kni->pa[i], kva);
 
 		skb = netdev_alloc_skb(dev, len);
@@ -437,9 +470,9 @@  kni_net_rx_lo_fifo(struct kni_dev *kni)
 		num = ret;
 		/* Copy mbufs */
 		for (i = 0; i < num; i++) {
-			kva = pa2kva(kni->pa[i]);
+			kva = get_kva(kni, kni->pa[i]);
 			len = kva->pkt_len;
-			data_kva = kva2data_kva(kva);
+			data_kva = get_data_kva(kni, kva);
 			kni->va[i] = pa2va(kni->pa[i], kva);
 
 			while (kva->next) {
@@ -449,8 +482,8 @@  kni_net_rx_lo_fifo(struct kni_dev *kni)
 				kva = next_kva;
 			}
 
-			alloc_kva = pa2kva(kni->alloc_pa[i]);
-			alloc_data_kva = kva2data_kva(alloc_kva);
+			alloc_kva = get_kva(kni, kni->alloc_pa[i]);
+			alloc_data_kva = get_data_kva(kni, alloc_kva);
 			kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
 
 			memcpy(alloc_data_kva, data_kva, len);
@@ -517,9 +550,9 @@  kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 
 	/* Copy mbufs to sk buffer and then call tx interface */
 	for (i = 0; i < num; i++) {
-		kva = pa2kva(kni->pa[i]);
+		kva = get_kva(kni, kni->pa[i]);
 		len = kva->pkt_len;
-		data_kva = kva2data_kva(kva);
+		data_kva = get_data_kva(kni, kva);
 		kni->va[i] = pa2va(kni->pa[i], kva);
 
 		skb = netdev_alloc_skb(dev, len);
@@ -550,8 +583,8 @@  kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
 					break;
 
 				prev_kva = kva;
-				kva = pa2kva(kva->next);
-				data_kva = kva2data_kva(kva);
+				kva = get_kva(kni, kva->next);
+				data_kva = get_data_kva(kni, kva);
 				/* Convert physical address to virtual address */
 				prev_kva->next = pa2va(prev_kva->next, kva);
 			}