@@ -23,6 +23,7 @@
#include <linux/netdevice.h>
#include <linux/spinlock.h>
#include <linux/list.h>
+#include <linux/iommu.h>
#include <rte_kni_common.h>
#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */
@@ -39,6 +40,9 @@ struct kni_dev {
/* kni list */
struct list_head list;
+ uint8_t iova_mode;
+ struct iommu_domain *domain;
+
struct net_device_stats stats;
int status;
uint16_t group_id; /* Group ID of a group of KNI devices */
@@ -306,10 +306,12 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
struct rte_kni_device_info dev_info;
struct net_device *net_dev = NULL;
struct kni_dev *kni, *dev, *n;
+ struct pci_dev *pci = NULL;
+ struct iommu_domain *domain = NULL;
+ phys_addr_t phys_addr;
#ifdef RTE_KNI_KMOD_ETHTOOL
struct pci_dev *found_pci = NULL;
struct net_device *lad_dev = NULL;
- struct pci_dev *pci = NULL;
#endif
pr_info("Creating kni...\n");
@@ -368,15 +370,50 @@ kni_ioctl_create(struct net *net, uint32_t ioctl_num,
strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE);
/* Translate user space info into kernel space info */
- kni->tx_q = phys_to_virt(dev_info.tx_phys);
- kni->rx_q = phys_to_virt(dev_info.rx_phys);
- kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
- kni->free_q = phys_to_virt(dev_info.free_phys);
-
- kni->req_q = phys_to_virt(dev_info.req_phys);
- kni->resp_q = phys_to_virt(dev_info.resp_phys);
- kni->sync_va = dev_info.sync_va;
- kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+
+ if (dev_info.iova_mode) {
+ pci = pci_get_device(dev_info.vendor_id,
+ dev_info.device_id, NULL);
+ while (pci) {
+ if ((pci->bus->number == dev_info.bus) &&
+ (PCI_SLOT(pci->devfn) == dev_info.devid) &&
+ (PCI_FUNC(pci->devfn) == dev_info.function)) {
+ domain = iommu_get_domain_for_dev(&pci->dev);
+ break;
+ }
+ pci = pci_get_device(dev_info.vendor_id,
+ dev_info.device_id, pci);
+ }
+ kni->domain = domain;
+ phys_addr = iommu_iova_to_phys(domain, dev_info.tx_phys);
+ kni->tx_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.rx_phys);
+ kni->rx_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.alloc_phys);
+ kni->alloc_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.free_phys);
+ kni->free_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.req_phys);
+ kni->req_q = phys_to_virt(phys_addr);
+ phys_addr = iommu_iova_to_phys(domain, dev_info.resp_phys);
+ kni->resp_q = phys_to_virt(phys_addr);
+ kni->sync_va = dev_info.sync_va;
+ phys_addr = iommu_iova_to_phys(domain, dev_info.sync_phys);
+ kni->sync_kva = phys_to_virt(phys_addr);
+ kni->iova_mode = 1;
+
+ } else {
+ kni->tx_q = phys_to_virt(dev_info.tx_phys);
+ kni->rx_q = phys_to_virt(dev_info.rx_phys);
+ kni->alloc_q = phys_to_virt(dev_info.alloc_phys);
+ kni->free_q = phys_to_virt(dev_info.free_phys);
+
+ kni->req_q = phys_to_virt(dev_info.req_phys);
+ kni->resp_q = phys_to_virt(dev_info.resp_phys);
+ kni->sync_va = dev_info.sync_va;
+ kni->sync_kva = phys_to_virt(dev_info.sync_phys);
+ kni->iova_mode = 0;
+ }
kni->mbuf_size = dev_info.mbuf_size;
@@ -35,6 +35,22 @@ static void kni_net_rx_normal(struct kni_dev *kni);
/* kni rx function pointer, with default to normal rx */
static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal;
+/* iova to kernel virtual address */
+static void *
+iova2kva(struct kni_dev *kni, void *pa)
+{
+ return phys_to_virt(iommu_iova_to_phys(kni->domain,
+ (dma_addr_t)pa));
+}
+
+static void *
+iova2data_kva(struct kni_dev *kni, struct rte_kni_mbuf *m)
+{
+ return phys_to_virt((iommu_iova_to_phys(kni->domain,
+ (dma_addr_t)m->buf_physaddr) +
+ m->data_off));
+}
+
/* physical address to kernel virtual address */
static void *
pa2kva(void *pa)
@@ -186,7 +202,10 @@ kni_fifo_trans_pa2va(struct kni_dev *kni,
return;
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ if (likely(kni->iova_mode == 1))
+ kva = iova2kva(kni, kni->pa[i]);
+ else
+ kva = pa2kva(kni->pa[i]);
kni->va[i] = pa2va(kni->pa[i], kva);
}
@@ -263,8 +282,13 @@ kni_net_tx(struct sk_buff *skb, struct net_device *dev)
if (likely(ret == 1)) {
void *data_kva;
- pkt_kva = pa2kva(pkt_pa);
- data_kva = kva2data_kva(pkt_kva);
+ if (likely(kni->iova_mode == 1)) {
+ pkt_kva = iova2kva(kni, pkt_pa);
+ data_kva = iova2data_kva(kni, pkt_kva);
+ } else {
+ pkt_kva = pa2kva(pkt_pa);
+ data_kva = kva2data_kva(pkt_kva);
+ }
pkt_va = pa2va(pkt_pa, pkt_kva);
len = skb->len;
@@ -335,9 +359,14 @@ kni_net_rx_normal(struct kni_dev *kni)
/* Transfer received packets to netif */
for (i = 0; i < num_rx; i++) {
- kva = pa2kva(kni->pa[i]);
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
@@ -434,13 +463,20 @@ kni_net_rx_lo_fifo(struct kni_dev *kni)
num = ret;
/* Copy mbufs */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ alloc_kva = iova2kva(kni, kni->alloc_pa[i]);
+ alloc_data_kva = iova2data_kva(kni, alloc_kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ alloc_kva = pa2kva(kni->alloc_pa[i]);
+ alloc_data_kva = kva2data_kva(alloc_kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
-
- alloc_kva = pa2kva(kni->alloc_pa[i]);
- alloc_data_kva = kva2data_kva(alloc_kva);
kni->alloc_va[i] = pa2va(kni->alloc_pa[i], alloc_kva);
memcpy(alloc_data_kva, data_kva, len);
@@ -507,9 +543,15 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
/* Copy mbufs to sk buffer and then call tx interface */
for (i = 0; i < num; i++) {
- kva = pa2kva(kni->pa[i]);
+
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni, kni->pa[i]);
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(kni->pa[i]);
+ data_kva = kva2data_kva(kva);
+ }
len = kva->pkt_len;
- data_kva = kva2data_kva(kva);
kni->va[i] = pa2va(kni->pa[i], kva);
skb = dev_alloc_skb(len + 2);
@@ -545,8 +587,14 @@ kni_net_rx_lo_fifo_skb(struct kni_dev *kni)
if (!kva->next)
break;
- kva = pa2kva(va2pa(kva->next, kva));
- data_kva = kva2data_kva(kva);
+ if (likely(kni->iova_mode == 1)) {
+ kva = iova2kva(kni,
+ va2pa(kva->next, kva));
+ data_kva = iova2data_kva(kni, kva);
+ } else {
+ kva = pa2kva(va2pa(kva->next, kva));
+ data_kva = kva2data_kva(kva);
+ }
}
}
@@ -1040,15 +1040,6 @@ rte_eal_init(int argc, char **argv)
/* autodetect the IOVA mapping mode (default is RTE_IOVA_PA) */
rte_eal_get_configuration()->iova_mode =
rte_bus_get_iommu_class();
-
- /* Workaround for KNI which requires physical address to work */
- if (rte_eal_get_configuration()->iova_mode == RTE_IOVA_VA &&
- rte_eal_check_module("rte_kni") == 1) {
- rte_eal_get_configuration()->iova_mode = RTE_IOVA_PA;
- RTE_LOG(WARNING, EAL,
- "Some devices want IOVA as VA but PA will be used because.. "
- "KNI module inserted\n");
- }
} else {
rte_eal_get_configuration()->iova_mode =
internal_config.iova_mode;
@@ -128,6 +128,7 @@ struct rte_kni_device_info {
unsigned mbuf_size;
unsigned int mtu;
char mac_addr[6];
+ uint8_t iova_mode;
};
#define KNI_DEVICE "kni"
@@ -304,6 +304,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
kni->group_id = conf->group_id;
kni->mbuf_size = conf->mbuf_size;
+ dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
if (ret < 0)
goto ioctl_fail;