On 22-Apr-19 5:39 AM, kirankumark@marvell.com wrote:
> From: Kiran Kumar K <kirankumark@marvell.com>
>
> With current KNI implementation kernel module will work only in
> IOVA=PA mode. This patch will add support for kernel module to work
> with IOVA=VA mode.
>
> The idea is to get the physical address from iova address using
> api iommu_iova_to_phys. Using this API, we will get the physical
> address from iova address and later use phys_to_virt API to
> convert the physical address to kernel virtual address.
>
> With this approach we have compared the performance with IOVA=PA
> and there is no difference observed. Seems like kernel is the
> overhead.
>
> This approach will not work with the kernel versions less than 4.4.0
> because of API compatibility issues.
>
> Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
> ---
<snip>
> +/* iova to kernel virtual address */
> +static void *
> +iova2kva(struct kni_dev *kni, void *pa)
> +{
> + return phys_to_virt(iommu_iova_to_phys(kni->domain,
> + (dma_addr_t)pa));
> +}
> +
> +static void *
> +iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
> +{
> + return phys_to_virt((iommu_iova_to_phys(kni->domain,
> + (dma_addr_t)m->buf_physaddr) +
> + m->data_off));
Does this account for mbufs crossing page boundary? In IOVA as VA mode,
the mempool is likely allocated in one go, so the mempool allocator will
not care for preventing mbufs from crossing page boundary. The data may
very well start at the very end of a page, and continue through the
beginning of next page, which will have a different physical address.
@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/iommu.h>
#include <rte_kni_common.h>
#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
@@ -39,6 +40,9 @@ struct kni_dev {
/* kni list */
struct list_head list;
+ uint8_t iova_mode;
+ struct iommu_domain *domain;
+
struct net_device_stats stats;
int status;
uint16_t group_id; /* Group ID of a group of KNI devices */
@@ -306,10 +306,12 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
struct rte_kni_device_info dev_info;
struct net_device *net_dev = NULL;
struct kni_dev *kni, *dev, *n;
+ struct pci_dev *pci = NULL;
+ struct iommu_domain *domain = NULL;
+ phys_addr_t phys_addr;
#ifdef RTE_KNI_KMOD_ETHTOOL
struct pci_dev *found_pci = NULL;
struct net_device *lad_dev = NULL;
- struct pci_dev *pci = NULL;
#endif
pr_info("Creating kni...\n");
@@ -368,15 +370,56 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
/* Translate user space info into kernel space info */
- kni->tx_q = phys_to_virt(dev_info.tx_phys);
- kni->rx_q = phys_to_virt(dev_info.rx_phys);
- kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
- kni->free_q = phys_to_virt(dev_info.free_phys);
-
- kni->req_q = phys_to_virt(dev_info.req_phys);
- kni->resp_q = phys_to_virt(dev_info.resp_phys);
- kni->sync_va = dev_info.sync_va;
- kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+
+ if (dev_info.iova_mode) {
+#if KERNEL_VERSION(4, 4, 0) > LINUX_VERSION_CODE
+ (void)pci;
+ pr_err("Kernel version is not supported\n");
+ return -EINVAL;
+#else
+ pci = pci_get_device(dev_info.vendor_id,
+ dev_info.device_id, NULL);
+ while (pci) {
+ if ((pci->bus->number == dev_info.bus) &&
+ (PCI_SLOT(pci->devfn) == dev_info.devid) &&
+ (PCI_FUNC(pci->devfn) == dev_info.function)) {
+ domain = iommu_get_domain_for_dev(&pci->dev);
+ break;
+ }
+ pci = pci_get_device(dev_info.vendor_id,
+ dev_info.device_id, pci);
+ }
+#endif
+ kni->domain = domain;
+ phys_addr = iommu_iova_to_phys(domain, dev_info.tx_phys);
+ kni->tx_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.rx_phys);
+ kni->rx_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.alloc_phys);
+ kni->alloc_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.free_phys);
+ kni->free_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.req_phys);
+ kni->req_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.resp_phys);
+ kni->resp_q = phys_to_virt(phys_addr);
+ kni->sync_va = dev_info.sync_va;
+ phys_addr = iommu_iova_to_phys(domain, dev_info.sync_phys);
+ kni->sync_kva = phys_to_virt(phys_addr);
+ kni->iova_mode = 1;
+
+ } else {
+ kni->tx_q = phys_to_virt(dev_info.tx_phys);
+ kni->rx_q = phys_to_virt(dev_info.rx_phys);
+ kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+ kni->free_q = phys_to_virt(dev_info.free_phys);
+
+ kni->req_q = phys_to_virt(dev_info.req_phys);
+ kni->resp_q = phys_to_virt(dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ kni->iova_mode = 0;
+ }
kni->mbuf_size = dev_info.mbuf_size;
@@ -35,6 +35,22 @@ static void kni_net_rx_normal(struct kni_dev *kni);
/* kni rx function pointer, with default to normal rx */
static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+/* iova to kernel virtual address */
+static void *
+iova2kva(struct kni_dev *kni, void *pa)
+{
+ return phys_to_virt(iommu_iova_to_phys(kni->domain,
+ (dma_addr_t)pa));
+}
+
+static void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+ return phys_to_virt((iommu_iova_to_phys(kni->domain,
+ (dma_addr_t)m->buf_physaddr) +
+ m->data_off));
+}
+
/* physical address to kernel virtual address */
static void *
pa2kva(void *pa)
@@ -186,7 +202,10 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
return;
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ if (likely(kni->iova_mode == 1))
+ kva = iova2kva(kni, kni->pa[i]);
+ else
+ kva = pa2kva(kni->pa[i]);
kni->va[i] = pa2va(kni->pa[i], kva);
}
@@ -263,8 +282,13 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev)
if (likely(ret == 1)) {
void *data_kva;
- pkt_kva = pa2kva(pkt_pa);
- data_kva = kva2data_kva(pkt_kva);
+ if (likely(kni->iova_mode == 1)) {
+ pkt_kva = iova2kva(kni, pkt_pa);
+ data_kva = iova2data_kva(kni, pkt_kva);
+ } else {
+ pkt_kva = pa2kva(pkt_pa);
+ data_kva = kva2data_kva(pkt_kva);
+ }
pkt_va = pa2va(pkt_pa, pkt_kva);
len = skb->len;
@@ -335,9 +359,14 @@ kni_net_rx_normal(struct kni_dev *kni)
/* Transfer received packets to netif */
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
@@ -434,13 +463,20 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
num = ret;
/* Copy mbufs */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ alloc_kva = iova2kva(kni, kni->alloc_pa[i]);
+ alloc_data_kva = iova2data_kva(kni, alloc_kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ alloc_kva = pa2kva(kni->alloc_pa[i]);
+ alloc_data_kva = kva2data_kva(alloc_kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
-
- alloc_kva = pa2kva(kni->alloc_pa[i]);
- alloc_data_kva = kva2data_kva(alloc_kva);
kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
memcpy(alloc_data_kva, data_kva, len);
@@ -507,9 +543,15 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
/* Copy mbufs to sk buffer and then call tx interface */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
@@ -545,8 +587,14 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
if (!kva->next)
break;
- kva = pa2kva(va2pa(kva->next, kva));
- data_kva = kva2data_kva(kva);
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni,
+ va2pa(kva->next, kva));
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(va2pa(kva->next, kva));
+ data_kva = kva2data_kva(kva);
+ }
}
}
@@ -1040,15 +1040,6 @@ rte_eal_init(int argc, char **argv)
/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
rte_eal_get_configuration()->iova_mode =
rte_bus_get_iommu_class();
-
- /* Workaround for KNI which requires physical address to work */
- if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
- rte_eal_check_module("rte_kni") == 1) {
- rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
- RTE_LOG(WARNING, EAL,
- "Some devices want IOVA as VA but PA will be used because.. "
- "KNI module inserted\n");
- }
} else {
rte_eal_get_configuration()->iova_mode =
internal_config.iova_mode;
@@ -128,6 +128,7 @@ struct rte_kni_device_info {
unsigned mbuf_size;
unsigned int mtu;
char mac_addr[6];
+ uint8_t iova_mode;
};
#define KNI_DEVICE "kni"
@@ -304,6 +304,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
kni->group_id = conf->group_id;
kni->mbuf_size = conf->mbuf_size;
+ dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
if (ret < 0)
goto ioctl_fail;