[dpdk-dev,6/6] virtio: arm/arm64: memory mapped IO support in pmd driver

Message ID 1449250519-28372-7-git-send-email-sshukla@mvista.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Santosh Shukla Dec. 4, 2015, 5:35 p.m. UTC
  This patch set add memory-mapped-IO support for arm/arm64 archs in virtio-pmd
driver. Patch creates ioport-mem device name /dev/igb_ioport. virtio_ethdev_init
function to open that device file for once, mappes 4K page_size memory such that
all entry in cat /proc/ioport {all PCI_IOBAR entry} mapped for once. For that
two ancessor api;s
- virtio_map_ioport : Maps all cat /proc/ioport pci iobars.
- virtio_set_ioport_addr : manages the each pci_iobar slot as an offset.

Tested for thunderX/arm64 platform, by creating maximum guest kernel supported
virtio-net-pci device i.e.. 31 max, then attaching all 32 interface to uio,
Verified with tespmd io_fwd application.

Signed-off-by: Santosh Shukla <sshukla@mvista.com>
Signed-off-by: Rizwan Ansari <ransari@mvista.com>
---
 drivers/net/virtio/virtio_ethdev.c        |  138 ++++++++++++++++++++++++++++-
 lib/librte_eal/linuxapp/igb_uio/igb_uio.c |   80 ++++++++++++++++-
 2 files changed, 214 insertions(+), 4 deletions(-)
  

Comments

Stephen Hemminger Dec. 7, 2015, 5:08 p.m. UTC | #1
On Fri,  4 Dec 2015 23:05:19 +0530
Santosh Shukla <sshukla@mvista.com> wrote:

>  
> +#ifdef RTE_EXEC_ENV_LINUXAPP
> +/* start address of first pci_iobar slot (user-space virtual-addres) */
> +void *ioport_map;
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +
> +#include <sys/mman.h>
> +#define DEV_NAME		"/dev/igb_ioport"
> +
> +/* Keeping pci_ioport_size = 4k.
> + * So maximum mmaped pci_iobar supported =
> + * (ioport_size/pci_dev->mem_resource[0].len)
> + *
> + * note: kernel could allow maximum 32 virtio-net-pci interface, that mean
> + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so virtio_map_ioport()
> + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
> + * max-virtio-net-pci interface.
> + */
> +#define PAGE_SIZE		4096
> +#define PCI_IOPORT_SIZE		PAGE_SIZE
> +#define PCI_IOPORT_MAX		128 /* 4k / 0x20 */
> +
> +int ioport_map_cnt;
> +#endif /* ARM, ARM64 */
> +#endif /* RTE_EXEC_ENV_LINUXAPP */

These variables should be static.

Also, it is should be possible to extract the I/O bar stuff in a generic way through sysfs
and not depend on a character device. The long term goal for DPDK acceptance is to
eliminate (or at least reduce to a minumum) any requirement for special kernel drivers.
  
Ananyev, Konstantin Dec. 8, 2015, 9:47 a.m. UTC | #2
Hi,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Santosh Shukla
> Sent: Friday, December 04, 2015 5:35 PM
> To: dev@dpdk.org
> Cc: Rizwan Ansari
> Subject: [dpdk-dev] [PATCH 6/6] virtio: arm/arm64: memory mapped IO support in pmd driver
> 
> This patch set add memory-mapped-IO support for arm/arm64 archs in virtio-pmd
> driver. Patch creates ioport-mem device name /dev/igb_ioport. virtio_ethdev_init
> function to open that device file for once, mappes 4K page_size memory such that
> all entry in cat /proc/ioport {all PCI_IOBAR entry} mapped for once. For that
> two ancessor api;s
> - virtio_map_ioport : Maps all cat /proc/ioport pci iobars.
> - virtio_set_ioport_addr : manages the each pci_iobar slot as an offset.
> 
> Tested for thunderX/arm64 platform, by creating maximum guest kernel supported
> virtio-net-pci device i.e.. 31 max, then attaching all 32 interface to uio,
> Verified with tespmd io_fwd application.
> 
> Signed-off-by: Santosh Shukla <sshukla@mvista.com>
> Signed-off-by: Rizwan Ansari <ransari@mvista.com>
> ---

Is it possible to rework patch a bit to minimise number of #ifdef ARM ... 
spread across the code? 
Group arch related code into separate file(s), use union for fields with sizes, etc?
Konstantin  

>  drivers/net/virtio/virtio_ethdev.c        |  138 ++++++++++++++++++++++++++++-
>  lib/librte_eal/linuxapp/igb_uio/igb_uio.c |   80 ++++++++++++++++-
>  2 files changed, 214 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
> index 74c00ee..93b8784 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -63,6 +63,30 @@
>  #include "virtqueue.h"
>  #include "virtio_rxtx.h"
> 
> +#ifdef RTE_EXEC_ENV_LINUXAPP
> +/* start address of first pci_iobar slot (user-space virtual-addres) */
> +void *ioport_map;
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +
> +#include <sys/mman.h>
> +#define DEV_NAME		"/dev/igb_ioport"
> +
> +/* Keeping pci_ioport_size = 4k.
> + * So maximum mmaped pci_iobar supported =
> + * (ioport_size/pci_dev->mem_resource[0].len)
> + *
> + * note: kernel could allow maximum 32 virtio-net-pci interface, that mean
> + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so virtio_map_ioport()
> + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
> + * max-virtio-net-pci interface.
> + */
> +#define PAGE_SIZE		4096
> +#define PCI_IOPORT_SIZE		PAGE_SIZE
> +#define PCI_IOPORT_MAX		128 /* 4k / 0x20 */
> +
> +int ioport_map_cnt;
> +#endif /* ARM, ARM64 */
> +#endif /* RTE_EXEC_ENV_LINUXAPP */
> 
>  static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
>  static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
> @@ -497,6 +521,18 @@ virtio_dev_close(struct rte_eth_dev *dev)
>  	hw->started = 0;
>  	virtio_dev_free_mbufs(dev);
>  	virtio_free_queues(dev);
> +
> +#ifdef RTE_EXEC_ENV_LINUXAPP
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +
> +	/* unmap ioport memory */
> +	ioport_map_cnt--;
> +	if (!ioport_map_cnt)
> +		munmap(ioport_map, PCI_IOPORT_SIZE);
> +
> +	PMD_INIT_LOG(DEBUG, "unmapping ioport_mem %d\n", ioport_map_cnt);
> +#endif
> +#endif
>  }
> 
>  static void
> @@ -1172,7 +1208,6 @@ static int virtio_resource_init_by_ioports(struct rte_pci_device *pci_dev)
> 
>  			sscanf(ptr, "%04hx-%04hx", &start, &end);
>  			size = end - start + 1;
> -
>  			break;
>  		}
>  	}
> @@ -1256,6 +1291,81 @@ rx_func_get(struct rte_eth_dev *eth_dev)
>  		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
>  }
> 
> +#ifdef RTE_EXEC_ENV_LINUXAPP
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +
> +static int
> +virtio_map_ioport(void **resource_addr)
> +{
> +	int fd;
> +	int ret = 0;
> +
> +	fd = open(DEV_NAME, O_RDWR);
> +	if (fd < 0) {
> +		PMD_INIT_LOG(ERR, "device file %s open error: %d\n",
> +			     DEV_NAME, fd);
> +		ret = -1;
> +		goto out;
> +	}
> +
> +	ioport_map = mmap(NULL, PCI_IOPORT_SIZE,
> +			PROT_EXEC | PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
> +
> +	if (ioport_map == MAP_FAILED) {
> +		PMD_INIT_LOG(ERR, "mmap: failed to map bar Address=%p\n",
> +				*resource_addr);
> +		ret = -ENOMEM;
> +		goto out1;
> +	}
> +
> +	PMD_INIT_LOG(INFO, "First pci_iobar mapped at %p\n", ioport_map);
> +
> +out1:
> +	close(fd);
> +out:
> +	return ret;
> +}
> +
> +static int
> +virtio_set_ioport_addr(void **resource_addr, unsigned long offset)
> +{
> +	int ret = 0;
> +
> +	if (ioport_map_cnt >= PCI_IOPORT_MAX) {
> +		ret = -1;
> +		PMD_INIT_LOG(ERR,
> +			     "ioport_map_cnt(%d) greater than"
> +			     "PCI_IOPORT_MAX(%d)\n",
> +			     ioport_map_cnt, PCI_IOPORT_MAX);
> +		goto out;
> +	}
> +	*resource_addr = (void *)((char *)ioport_map + (ioport_map_cnt)*offset);
> +	ioport_map_cnt++;
> +
> +	PMD_INIT_LOG(DEBUG, "pci.resource_addr %p ioport_map_cnt %d\n",
> +			*resource_addr, ioport_map_cnt);
> +out:
> +	return ret;
> +}
> +#else /* !ARM, !ARM64 */
> +static int
> +virtio_map_ioport(void *resource_addr)
> +{
> +	(void)resource_addr;
> +	return 0;
> +}
> +
> +static int
> +virtio_set_ioport_addr(void *resource_addr, unsigned long offset)
> +{
> +	(void)resource_addr;
> +	(void)offset;
> +	return 0;
> +}
> +
> +#endif /* ARM, ARM64 */
> +#endif /* RTE_EXEC_ENV_LINUXAPP */
> +
>  /*
>   * This function is based on probe() function in virtio_pci.c
>   * It returns 0 on success.
> @@ -1294,8 +1404,31 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
>  	if (virtio_resource_init(pci_dev) < 0)
>  		return -1;
> 
> +#ifdef	RTE_EXEC_ENV_LINUXAPP
> +	int ret = 0;
> +
> +	/* Map the all IOBAR entry from /proc/ioport to 4k page_size only once.
> +	 * Later virtio_set_ioport_addr() func will update correct bar_addr
> +	 * eachk ioport (i.e..pci_dev->mem_resource[0].addr)
> +	 */
> +	if (!ioport_map) {
> +		ret = virtio_map_ioport(&pci_dev->mem_resource[0].addr);
> +		if (ret < 0)
> +			return -1;
> +	}
> +
> +	ret = virtio_set_ioport_addr(&pci_dev->mem_resource[0].addr,
> +					pci_dev->mem_resource[0].len);
> +	if (ret < 0)
> +		return -1;
> +
> +	PMD_INIT_LOG(INFO, "ioport_map %p resource_addr %p resource_len :%ld\n",
> +			ioport_map, pci_dev->mem_resource[0].addr,
> +			(unsigned long)pci_dev->mem_resource[0].len);
> +
> +#endif
>  	hw->use_msix = virtio_has_msix(&pci_dev->addr);
> -	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
> +	hw->io_base = (unsigned long)(uintptr_t)pci_dev->mem_resource[0].addr;
> 
>  	/* Reset the device although not necessary at startup */
>  	vtpci_reset(hw);
> @@ -1430,7 +1563,6 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
>  		rte_intr_callback_unregister(&pci_dev->intr_handle,
>  						virtio_interrupt_handler,
>  						eth_dev);
> -
>  	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
> 
>  	return 0;
> diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> index f5617d2..b154a44 100644
> --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> @@ -324,6 +324,61 @@ igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
>  }
>  #endif
> 
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +#ifdef CONFIG_HAS_IOPORT_MAP
> +/*
> + * mmap driver to map x86-style PCI_IOBAR (i.e..cat /proc/ioport pci-bar-memory)
> + * from kernel-space virtual address to user-space virtual address. This module
> + * required for non-x86 archs example arm/arm64, as those archs donot do
> + * IO_MAP_IO types access, Infact supports memory-mapped-IO. That is because
> + * arm/arm64 doesn't support direct IO instruction, so the design approach is to
> + * map `cat /proc/ioport` PCI_IOBAR's kernel-space virtual-addr to user-space
> + * virtual-addr. Therefore the need for mmap-driver.
> + */
> +#include <linux/fs.h>		/* file_operations */
> +#include <linux/miscdevice.h>
> +#include <linux/mm.h>		/* VM_IO */
> +#include <linux/uaccess.h>
> +#include <asm/page.h>
> +#include <linux/sched.h>
> +#include <linux/pid.h>
> +
> +void *__iomem mapped_io; /* ioport addr of `cat /proc/ioport` */
> +
> +static int igb_ioport_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	struct page *npage;
> +	int ret = 0;
> +
> +	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> +	npage = vmalloc_to_page(mapped_io);
> +	ret = remap_pfn_range(vma, vma->vm_start,
> +				page_to_pfn(npage),
> +				vma->vm_end - vma->vm_start,
> +				vma->vm_page_prot);
> +	if (ret) {
> +		pr_info("Error: Failed to remap pfn=%lu error=%d\n",
> +			page_to_pfn(npage), ret);
> +	}
> +	return 0;
> +}
> +
> +static const struct file_operations igb_ioport_fops = {
> +	.mmap		= igb_ioport_mmap,
> +};
> +
> +static struct miscdevice igb_ioport_dev = {
> +	.minor	= MISC_DYNAMIC_MINOR,
> +	.name	= "igb_ioport",
> +	.fops	= &igb_ioport_fops
> +};
> +#else /* !CONFIG_HAS_IOPORT_MAP */
> +
> +#error "CONFIG_HAS_IOPORT_MAP not supported for $RTE_ARCH"
> +
> +#endif /* CONFIG_HAS_IOPORT_MAP */
> +#endif /* RTE_ARCH_ARM, RTE_ARCH_ARM64 */
> +
>  /* Remap pci resources described by bar #pci_bar in uio resource n. */
>  static int
>  igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
> @@ -365,11 +420,22 @@ igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
>  	if (addr == 0 || len == 0)
>  		return -EINVAL;
> 
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +	/*
> +	 * TODO: KNI/procfs:: porttype entry need to be added for ARM/ARM64,
> +	 * for below mmap driver;
> +	 * */
> +	mapped_io = ioport_map(addr, 0);
> +	info->port[n].name = name;
> +	info->port[n].start = (unsigned long)(uintptr_t)mapped_io;
> +	info->port[n].size = len;
> +	info->port[n].porttype = UIO_PORT_X86;
> +#else
>  	info->port[n].name = name;
>  	info->port[n].start = addr;
>  	info->port[n].size = len;
>  	info->port[n].porttype = UIO_PORT_X86;
> -
> +#endif
>  	return 0;
>  }
> 
> @@ -615,6 +681,15 @@ igbuio_pci_init_module(void)
>  {
>  	int ret;
> 
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +	ret = misc_register(&igb_ioport_dev);
> +	if (ret < 0) {
> +		pr_info("Error: failed to register ioport map driver (%d)\n",
> +				ret);
> +		return ret;
> +	}
> +#endif
> +
>  	ret = igbuio_config_intr_mode(intr_mode);
>  	if (ret < 0)
>  		return ret;
> @@ -625,6 +700,9 @@ igbuio_pci_init_module(void)
>  static void __exit
>  igbuio_pci_exit_module(void)
>  {
> +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> +	misc_deregister(&igb_ioport_dev);
> +#endif
>  	pci_unregister_driver(&igbuio_pci_driver);
>  }
> 
> --
> 1.7.9.5
  
Santosh Shukla Dec. 8, 2015, 12:53 p.m. UTC | #3
On Mon, Dec 7, 2015 at 10:38 PM, Stephen Hemminger <
stephen@networkplumber.org> wrote:

> On Fri,  4 Dec 2015 23:05:19 +0530
> Santosh Shukla <sshukla@mvista.com> wrote:
>
> >
> > +#ifdef RTE_EXEC_ENV_LINUXAPP
> > +/* start address of first pci_iobar slot (user-space virtual-addres) */
> > +void *ioport_map;
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +
> > +#include <sys/mman.h>
> > +#define DEV_NAME             "/dev/igb_ioport"
> > +
> > +/* Keeping pci_ioport_size = 4k.
> > + * So maximum mmaped pci_iobar supported =
> > + * (ioport_size/pci_dev->mem_resource[0].len)
> > + *
> > + * note: kernel could allow maximum 32 virtio-net-pci interface, that
> mean
> > + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so
> virtio_map_ioport()
> > + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
> > + * max-virtio-net-pci interface.
> > + */
> > +#define PAGE_SIZE            4096
> > +#define PCI_IOPORT_SIZE              PAGE_SIZE
> > +#define PCI_IOPORT_MAX               128 /* 4k / 0x20 */
> > +
> > +int ioport_map_cnt;
> > +#endif /* ARM, ARM64 */
> > +#endif /* RTE_EXEC_ENV_LINUXAPP */
>
> These variables should be static.
>
>
(Sorry for delayed follow, Was travelling..)
Right,


> Also, it is should be possible to extract the I/O bar stuff in a generic
> way through sysfs
> and not depend on a character device. The long term goal for DPDK
> acceptance is to
> eliminate (or at least reduce to a minumum) any requirement for special
> kernel drivers.
>

I agree. Existing implementation does read pci_iobar for start address and
size, But for non-x86 arch, we need someway to map pci_iobar and thats-why
thought of adding device file for a purpose, as archs like arm lack iopl()
privileged io syscall support, However iopl() too quite native driver
design assumption.

I have few idea in my mind such that - Right now I am updating
ioport_mapped addr {kernel-virtual-addr-io-memory} to
/sys/bus/pci/pci_bus_xxxx/xxx/map field, instead of mapping their, I'll try
to map to uio's pci interface and then use existing pci_map_resource() api
to mmap kernel-virtual-io-address to user-space-virtual-ioaddr. We'll come
back on this.
  
Santosh Shukla Dec. 8, 2015, 12:55 p.m. UTC | #4
On Tue, Dec 8, 2015 at 3:17 PM, Ananyev, Konstantin <
konstantin.ananyev@intel.com> wrote:

> Hi,
>
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Santosh Shukla
> > Sent: Friday, December 04, 2015 5:35 PM
> > To: dev@dpdk.org
> > Cc: Rizwan Ansari
> > Subject: [dpdk-dev] [PATCH 6/6] virtio: arm/arm64: memory mapped IO
> support in pmd driver
> >
> > This patch set add memory-mapped-IO support for arm/arm64 archs in
> virtio-pmd
> > driver. Patch creates ioport-mem device name /dev/igb_ioport.
> virtio_ethdev_init
> > function to open that device file for once, mappes 4K page_size memory
> such that
> > all entry in cat /proc/ioport {all PCI_IOBAR entry} mapped for once. For
> that
> > two ancessor api;s
> > - virtio_map_ioport : Maps all cat /proc/ioport pci iobars.
> > - virtio_set_ioport_addr : manages the each pci_iobar slot as an offset.
> >
> > Tested for thunderX/arm64 platform, by creating maximum guest kernel
> supported
> > virtio-net-pci device i.e.. 31 max, then attaching all 32 interface to
> uio,
> > Verified with tespmd io_fwd application.
> >
> > Signed-off-by: Santosh Shukla <sshukla@mvista.com>
> > Signed-off-by: Rizwan Ansari <ransari@mvista.com>
> > ---
>
> Is it possible to rework patch a bit to minimise number of #ifdef ARM ...
> spread across the code?
> Group arch related code into separate file(s), use union for fields with
> sizes, etc?
> Konstantin
>
>
Sure. Thanks.


> >  drivers/net/virtio/virtio_ethdev.c        |  138
> ++++++++++++++++++++++++++++-
> >  lib/librte_eal/linuxapp/igb_uio/igb_uio.c |   80 ++++++++++++++++-
> >  2 files changed, 214 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> > index 74c00ee..93b8784 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -63,6 +63,30 @@
> >  #include "virtqueue.h"
> >  #include "virtio_rxtx.h"
> >
> > +#ifdef RTE_EXEC_ENV_LINUXAPP
> > +/* start address of first pci_iobar slot (user-space virtual-addres) */
> > +void *ioport_map;
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +
> > +#include <sys/mman.h>
> > +#define DEV_NAME             "/dev/igb_ioport"
> > +
> > +/* Keeping pci_ioport_size = 4k.
> > + * So maximum mmaped pci_iobar supported =
> > + * (ioport_size/pci_dev->mem_resource[0].len)
> > + *
> > + * note: kernel could allow maximum 32 virtio-net-pci interface, that
> mean
> > + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so
> virtio_map_ioport()
> > + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
> > + * max-virtio-net-pci interface.
> > + */
> > +#define PAGE_SIZE            4096
> > +#define PCI_IOPORT_SIZE              PAGE_SIZE
> > +#define PCI_IOPORT_MAX               128 /* 4k / 0x20 */
> > +
> > +int ioport_map_cnt;
> > +#endif /* ARM, ARM64 */
> > +#endif /* RTE_EXEC_ENV_LINUXAPP */
> >
> >  static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
> >  static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
> > @@ -497,6 +521,18 @@ virtio_dev_close(struct rte_eth_dev *dev)
> >       hw->started = 0;
> >       virtio_dev_free_mbufs(dev);
> >       virtio_free_queues(dev);
> > +
> > +#ifdef RTE_EXEC_ENV_LINUXAPP
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +
> > +     /* unmap ioport memory */
> > +     ioport_map_cnt--;
> > +     if (!ioport_map_cnt)
> > +             munmap(ioport_map, PCI_IOPORT_SIZE);
> > +
> > +     PMD_INIT_LOG(DEBUG, "unmapping ioport_mem %d\n", ioport_map_cnt);
> > +#endif
> > +#endif
> >  }
> >
> >  static void
> > @@ -1172,7 +1208,6 @@ static int virtio_resource_init_by_ioports(struct
> rte_pci_device *pci_dev)
> >
> >                       sscanf(ptr, "%04hx-%04hx", &start, &end);
> >                       size = end - start + 1;
> > -
> >                       break;
> >               }
> >       }
> > @@ -1256,6 +1291,81 @@ rx_func_get(struct rte_eth_dev *eth_dev)
> >               eth_dev->rx_pkt_burst = &virtio_recv_pkts;
> >  }
> >
> > +#ifdef RTE_EXEC_ENV_LINUXAPP
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +
> > +static int
> > +virtio_map_ioport(void **resource_addr)
> > +{
> > +     int fd;
> > +     int ret = 0;
> > +
> > +     fd = open(DEV_NAME, O_RDWR);
> > +     if (fd < 0) {
> > +             PMD_INIT_LOG(ERR, "device file %s open error: %d\n",
> > +                          DEV_NAME, fd);
> > +             ret = -1;
> > +             goto out;
> > +     }
> > +
> > +     ioport_map = mmap(NULL, PCI_IOPORT_SIZE,
> > +                     PROT_EXEC | PROT_WRITE | PROT_READ, MAP_SHARED,
> fd, 0);
> > +
> > +     if (ioport_map == MAP_FAILED) {
> > +             PMD_INIT_LOG(ERR, "mmap: failed to map bar Address=%p\n",
> > +                             *resource_addr);
> > +             ret = -ENOMEM;
> > +             goto out1;
> > +     }
> > +
> > +     PMD_INIT_LOG(INFO, "First pci_iobar mapped at %p\n", ioport_map);
> > +
> > +out1:
> > +     close(fd);
> > +out:
> > +     return ret;
> > +}
> > +
> > +static int
> > +virtio_set_ioport_addr(void **resource_addr, unsigned long offset)
> > +{
> > +     int ret = 0;
> > +
> > +     if (ioport_map_cnt >= PCI_IOPORT_MAX) {
> > +             ret = -1;
> > +             PMD_INIT_LOG(ERR,
> > +                          "ioport_map_cnt(%d) greater than"
> > +                          "PCI_IOPORT_MAX(%d)\n",
> > +                          ioport_map_cnt, PCI_IOPORT_MAX);
> > +             goto out;
> > +     }
> > +     *resource_addr = (void *)((char *)ioport_map +
> (ioport_map_cnt)*offset);
> > +     ioport_map_cnt++;
> > +
> > +     PMD_INIT_LOG(DEBUG, "pci.resource_addr %p ioport_map_cnt %d\n",
> > +                     *resource_addr, ioport_map_cnt);
> > +out:
> > +     return ret;
> > +}
> > +#else /* !ARM, !ARM64 */
> > +static int
> > +virtio_map_ioport(void *resource_addr)
> > +{
> > +     (void)resource_addr;
> > +     return 0;
> > +}
> > +
> > +static int
> > +virtio_set_ioport_addr(void *resource_addr, unsigned long offset)
> > +{
> > +     (void)resource_addr;
> > +     (void)offset;
> > +     return 0;
> > +}
> > +
> > +#endif /* ARM, ARM64 */
> > +#endif /* RTE_EXEC_ENV_LINUXAPP */
> > +
> >  /*
> >   * This function is based on probe() function in virtio_pci.c
> >   * It returns 0 on success.
> > @@ -1294,8 +1404,31 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
> >       if (virtio_resource_init(pci_dev) < 0)
> >               return -1;
> >
> > +#ifdef       RTE_EXEC_ENV_LINUXAPP
> > +     int ret = 0;
> > +
> > +     /* Map the all IOBAR entry from /proc/ioport to 4k page_size only
> once.
> > +      * Later virtio_set_ioport_addr() func will update correct bar_addr
> > +      * eachk ioport (i.e..pci_dev->mem_resource[0].addr)
> > +      */
> > +     if (!ioport_map) {
> > +             ret = virtio_map_ioport(&pci_dev->mem_resource[0].addr);
> > +             if (ret < 0)
> > +                     return -1;
> > +     }
> > +
> > +     ret = virtio_set_ioport_addr(&pci_dev->mem_resource[0].addr,
> > +                                     pci_dev->mem_resource[0].len);
> > +     if (ret < 0)
> > +             return -1;
> > +
> > +     PMD_INIT_LOG(INFO, "ioport_map %p resource_addr %p resource_len
> :%ld\n",
> > +                     ioport_map, pci_dev->mem_resource[0].addr,
> > +                     (unsigned long)pci_dev->mem_resource[0].len);
> > +
> > +#endif
> >       hw->use_msix = virtio_has_msix(&pci_dev->addr);
> > -     hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
> > +     hw->io_base = (unsigned
> long)(uintptr_t)pci_dev->mem_resource[0].addr;
> >
> >       /* Reset the device although not necessary at startup */
> >       vtpci_reset(hw);
> > @@ -1430,7 +1563,6 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
> >               rte_intr_callback_unregister(&pci_dev->intr_handle,
> >                                               virtio_interrupt_handler,
> >                                               eth_dev);
> > -
> >       PMD_INIT_LOG(DEBUG, "dev_uninit completed");
> >
> >       return 0;
> > diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> > index f5617d2..b154a44 100644
> > --- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> > +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
> > @@ -324,6 +324,61 @@ igbuio_dom0_pci_mmap(struct uio_info *info, struct
> vm_area_struct *vma)
> >  }
> >  #endif
> >
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +#ifdef CONFIG_HAS_IOPORT_MAP
> > +/*
> > + * mmap driver to map x86-style PCI_IOBAR (i.e..cat /proc/ioport
> pci-bar-memory)
> > + * from kernel-space virtual address to user-space virtual address.
> This module
> > + * required for non-x86 archs example arm/arm64, as those archs donot do
> > + * IO_MAP_IO types access, Infact supports memory-mapped-IO. That is
> because
> > + * arm/arm64 doesn't support direct IO instruction, so the design
> approach is to
> > + * map `cat /proc/ioport` PCI_IOBAR's kernel-space virtual-addr to
> user-space
> > + * virtual-addr. Therefore the need for mmap-driver.
> > + */
> > +#include <linux/fs.h>                /* file_operations */
> > +#include <linux/miscdevice.h>
> > +#include <linux/mm.h>                /* VM_IO */
> > +#include <linux/uaccess.h>
> > +#include <asm/page.h>
> > +#include <linux/sched.h>
> > +#include <linux/pid.h>
> > +
> > +void *__iomem mapped_io; /* ioport addr of `cat /proc/ioport` */
> > +
> > +static int igb_ioport_mmap(struct file *file, struct vm_area_struct
> *vma)
> > +{
> > +     struct page *npage;
> > +     int ret = 0;
> > +
> > +     vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
> > +     npage = vmalloc_to_page(mapped_io);
> > +     ret = remap_pfn_range(vma, vma->vm_start,
> > +                             page_to_pfn(npage),
> > +                             vma->vm_end - vma->vm_start,
> > +                             vma->vm_page_prot);
> > +     if (ret) {
> > +             pr_info("Error: Failed to remap pfn=%lu error=%d\n",
> > +                     page_to_pfn(npage), ret);
> > +     }
> > +     return 0;
> > +}
> > +
> > +static const struct file_operations igb_ioport_fops = {
> > +     .mmap           = igb_ioport_mmap,
> > +};
> > +
> > +static struct miscdevice igb_ioport_dev = {
> > +     .minor  = MISC_DYNAMIC_MINOR,
> > +     .name   = "igb_ioport",
> > +     .fops   = &igb_ioport_fops
> > +};
> > +#else /* !CONFIG_HAS_IOPORT_MAP */
> > +
> > +#error "CONFIG_HAS_IOPORT_MAP not supported for $RTE_ARCH"
> > +
> > +#endif /* CONFIG_HAS_IOPORT_MAP */
> > +#endif /* RTE_ARCH_ARM, RTE_ARCH_ARM64 */
> > +
> >  /* Remap pci resources described by bar #pci_bar in uio resource n. */
> >  static int
> >  igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
> > @@ -365,11 +420,22 @@ igbuio_pci_setup_ioport(struct pci_dev *dev,
> struct uio_info *info,
> >       if (addr == 0 || len == 0)
> >               return -EINVAL;
> >
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +     /*
> > +      * TODO: KNI/procfs:: porttype entry need to be added for
> ARM/ARM64,
> > +      * for below mmap driver;
> > +      * */
> > +     mapped_io = ioport_map(addr, 0);
> > +     info->port[n].name = name;
> > +     info->port[n].start = (unsigned long)(uintptr_t)mapped_io;
> > +     info->port[n].size = len;
> > +     info->port[n].porttype = UIO_PORT_X86;
> > +#else
> >       info->port[n].name = name;
> >       info->port[n].start = addr;
> >       info->port[n].size = len;
> >       info->port[n].porttype = UIO_PORT_X86;
> > -
> > +#endif
> >       return 0;
> >  }
> >
> > @@ -615,6 +681,15 @@ igbuio_pci_init_module(void)
> >  {
> >       int ret;
> >
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +     ret = misc_register(&igb_ioport_dev);
> > +     if (ret < 0) {
> > +             pr_info("Error: failed to register ioport map driver
> (%d)\n",
> > +                             ret);
> > +             return ret;
> > +     }
> > +#endif
> > +
> >       ret = igbuio_config_intr_mode(intr_mode);
> >       if (ret < 0)
> >               return ret;
> > @@ -625,6 +700,9 @@ igbuio_pci_init_module(void)
> >  static void __exit
> >  igbuio_pci_exit_module(void)
> >  {
> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> > +     misc_deregister(&igb_ioport_dev);
> > +#endif
> >       pci_unregister_driver(&igbuio_pci_driver);
> >  }
> >
> > --
> > 1.7.9.5
>
>
  
Santosh Shukla Dec. 9, 2015, 6:59 p.m. UTC | #5
On Tue, Dec 8, 2015 at 6:23 PM, Santosh Shukla <sshukla@mvista.com> wrote:
>
>
> On Mon, Dec 7, 2015 at 10:38 PM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
>>
>> On Fri,  4 Dec 2015 23:05:19 +0530
>> Santosh Shukla <sshukla@mvista.com> wrote:
>>
>> >
>> > +#ifdef RTE_EXEC_ENV_LINUXAPP
>> > +/* start address of first pci_iobar slot (user-space virtual-addres) */
>> > +void *ioport_map;
>> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
>> > +
>> > +#include <sys/mman.h>
>> > +#define DEV_NAME             "/dev/igb_ioport"
>> > +
>> > +/* Keeping pci_ioport_size = 4k.
>> > + * So maximum mmaped pci_iobar supported =
>> > + * (ioport_size/pci_dev->mem_resource[0].len)
>> > + *
>> > + * note: kernel could allow maximum 32 virtio-net-pci interface, that
>> > mean
>> > + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so
>> > virtio_map_ioport()
>> > + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
>> > + * max-virtio-net-pci interface.
>> > + */
>> > +#define PAGE_SIZE            4096
>> > +#define PCI_IOPORT_SIZE              PAGE_SIZE
>> > +#define PCI_IOPORT_MAX               128 /* 4k / 0x20 */
>> > +
>> > +int ioport_map_cnt;
>> > +#endif /* ARM, ARM64 */
>> > +#endif /* RTE_EXEC_ENV_LINUXAPP */
>>
>> These variables should be static.
>>
>
> (Sorry for delayed follow, Was travelling..)
> Right,
>
>>
>> Also, it is should be possible to extract the I/O bar stuff in a generic
>> way through sysfs
>> and not depend on a character device. The long term goal for DPDK
>> acceptance is to
>> eliminate (or at least reduce to a minumum) any requirement for special
>> kernel drivers.
>
>
> I agree. Existing implementation does read pci_iobar for start address and
> size, But for non-x86 arch, we need someway to map pci_iobar and thats-why
> thought of adding device file for a purpose, as archs like arm lack iopl()
> privileged io syscall support, However iopl() too quite native driver design
> assumption.
>
> I have few idea in my mind such that - Right now I am updating ioport_mapped
> addr {kernel-virtual-addr-io-memory} to /sys/bus/pci/pci_bus_xxxx/xxx/map
> field, instead of mapping their, I'll try to map to uio's pci interface and
> then use existing pci_map_resource() api to mmap kernel-virtual-io-address
> to user-space-virtual-ioaddr. We'll come back on this.
>


Spent sometime digging dpdk's uio/pci source code, Intent was to map
pci ioport region via uio-way. In order to achieve that I tried to
hack the virtio-net-pci pmd driver. Right now in virtio-net-pci case,
It creates two sysfs entry for pci bars: resource0 /1.

Resource0; is ioport region
Resource1; is iomem region.

By appending a RTE_PCI_DRV_NEED_MAPPING flag to drv_flag and passing
hw->io_base = resource1 type pci.mem_resource[slot].addr; where slot
=1. Resource1 is IORESOURCE_MEM type so uio/pci driver able to mmap.
That way I could get the valid user-space virtual address. However
this hack did not worked for me because at qemu side: virtio-pxe.rom
has virtio_headers located at ioport pci region and guest driver
writing at iomem region, that's why driver init failed. Note that
default driver doesn't use resource1 memory.

This made me think that either I had to add dependent code in kernel
or something similar proposed in this patch.
It is because:
- uio driver and dependent user-space pci api's in dpdk mmaps
IORESOURCE_MEM types address only {refer igbuio_setup_bars() and in
particular function pci_parse_sysfs_resource()}.
- That mmap in userspace backed by arch specific api
pci_mmap_page_range() in kernel.
- pci_mmap_page_range() does not support mmaping to IO_RESOURCE_IO type memory.
- Having said that, we need routine or a way to to map pci_iobar
region from kernel virtual-address to user-space virtual address.

In x86 case; iopl() exports the ioport address to userspace. But for
non-x86 case, we need special device file to mmap ioport kernel
address space to user space.  Also I grepped dpdk source code little
more and noticed that xen dom0 implementation is quite similar too.
They are too using /dev/dom0_mem special device file.

IMO, there is approach-2 to achieve desired which I mentioned in
cover-letter too. By adding /dev/ioport device file support in kernel,
so that user could do byte/word/halfword rd/wr, rightnow kernel
support only byte long rd/wr (i.e.. /dev/port file driver/char/mem.c).
In that way; We care to do file open/rd/wr/close for /dev/ioport. No
low level extra arch level api introduced in this patch series. Pretty
clean code. But then we do have kernel device file dependency which is
a cons. I personally like this approach as we are not dependant on
kernel side driver, all desired dependency is in dpdk source base. I
am going to send out approach-2 patch series, perhaps in this week for
comment.

Pl. let me know if you have better approach in mind, Otherwise I am
planning to work on V2 patch revision, incorporating other review
comment.
  
Stephen Hemminger Dec. 9, 2015, 7:04 p.m. UTC | #6
On Thu, 10 Dec 2015 00:29:30 +0530
Santosh Shukla <sshukla@mvista.com> wrote:

> On Tue, Dec 8, 2015 at 6:23 PM, Santosh Shukla <sshukla@mvista.com> wrote:
> >
> >
> > On Mon, Dec 7, 2015 at 10:38 PM, Stephen Hemminger
> > <stephen@networkplumber.org> wrote:
> >>
> >> On Fri,  4 Dec 2015 23:05:19 +0530
> >> Santosh Shukla <sshukla@mvista.com> wrote:
> >>
> >> >
> >> > +#ifdef RTE_EXEC_ENV_LINUXAPP
> >> > +/* start address of first pci_iobar slot (user-space virtual-addres) */
> >> > +void *ioport_map;
> >> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> >> > +
> >> > +#include <sys/mman.h>
> >> > +#define DEV_NAME             "/dev/igb_ioport"
> >> > +
> >> > +/* Keeping pci_ioport_size = 4k.
> >> > + * So maximum mmaped pci_iobar supported =
> >> > + * (ioport_size/pci_dev->mem_resource[0].len)
> >> > + *
> >> > + * note: kernel could allow maximum 32 virtio-net-pci interface, that
> >> > mean
> >> > + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so
> >> > virtio_map_ioport()
> >> > + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
> >> > + * max-virtio-net-pci interface.
> >> > + */
> >> > +#define PAGE_SIZE            4096
> >> > +#define PCI_IOPORT_SIZE              PAGE_SIZE
> >> > +#define PCI_IOPORT_MAX               128 /* 4k / 0x20 */
> >> > +
> >> > +int ioport_map_cnt;
> >> > +#endif /* ARM, ARM64 */
> >> > +#endif /* RTE_EXEC_ENV_LINUXAPP */
> >>
> >> These variables should be static.
> >>
> >
> > (Sorry for delayed follow, Was travelling..)
> > Right,
> >
> >>
> >> Also, it is should be possible to extract the I/O bar stuff in a generic
> >> way through sysfs
> >> and not depend on a character device. The long term goal for DPDK
> >> acceptance is to
> >> eliminate (or at least reduce to a minumum) any requirement for special
> >> kernel drivers.
> >
> >
> > I agree. Existing implementation does read pci_iobar for start address and
> > size, But for non-x86 arch, we need someway to map pci_iobar and thats-why
> > thought of adding device file for a purpose, as archs like arm lack iopl()
> > privileged io syscall support, However iopl() too quite native driver design
> > assumption.
> >
> > I have few idea in my mind such that - Right now I am updating ioport_mapped
> > addr {kernel-virtual-addr-io-memory} to /sys/bus/pci/pci_bus_xxxx/xxx/map
> > field, instead of mapping their, I'll try to map to uio's pci interface and
> > then use existing pci_map_resource() api to mmap kernel-virtual-io-address
> > to user-space-virtual-ioaddr. We'll come back on this.
> >
> 
> 
> Spent sometime digging dpdk's uio/pci source code, Intent was to map
> pci ioport region via uio-way. In order to achieve that I tried to
> hack the virtio-net-pci pmd driver. Right now in virtio-net-pci case,
> It creates two sysfs entry for pci bars: resource0 /1.
> 
> Resource0; is ioport region
> Resource1; is iomem region.
> 
> By appending a RTE_PCI_DRV_NEED_MAPPING flag to drv_flag and passing
> hw->io_base = resource1 type pci.mem_resource[slot].addr; where slot
> =1. Resource1 is IORESOURCE_MEM type so uio/pci driver able to mmap.
> That way I could get the valid user-space virtual address. However
> this hack did not worked for me because at qemu side: virtio-pxe.rom
> has virtio_headers located at ioport pci region and guest driver
> writing at iomem region, that's why driver init failed. Note that
> default driver doesn't use resource1 memory.
> 
> This made me think that either I had to add dependent code in kernel
> or something similar proposed in this patch.
> It is because:
> - uio driver and dependent user-space pci api's in dpdk mmaps
> IORESOURCE_MEM types address only {refer igbuio_setup_bars() and in
> particular function pci_parse_sysfs_resource()}.
> - That mmap in userspace backed by arch specific api
> pci_mmap_page_range() in kernel.
> - pci_mmap_page_range() does not support mmaping to IO_RESOURCE_IO type memory.
> - Having said that, we need routine or a way to to map pci_iobar
> region from kernel virtual-address to user-space virtual address.

There a couple of gotcha's with this. It turns out the iomem region
is not mappable on some platforms. I think GCE was one of them.
  
Santosh Shukla Dec. 9, 2015, 7:19 p.m. UTC | #7
On Thu, Dec 10, 2015 at 12:34 AM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> On Thu, 10 Dec 2015 00:29:30 +0530
> Santosh Shukla <sshukla@mvista.com> wrote:
>
>> On Tue, Dec 8, 2015 at 6:23 PM, Santosh Shukla <sshukla@mvista.com> wrote:
>> >
>> >
>> > On Mon, Dec 7, 2015 at 10:38 PM, Stephen Hemminger
>> > <stephen@networkplumber.org> wrote:
>> >>
>> >> On Fri,  4 Dec 2015 23:05:19 +0530
>> >> Santosh Shukla <sshukla@mvista.com> wrote:
>> >>
>> >> >
>> >> > +#ifdef RTE_EXEC_ENV_LINUXAPP
>> >> > +/* start address of first pci_iobar slot (user-space virtual-addres) */
>> >> > +void *ioport_map;
>> >> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
>> >> > +
>> >> > +#include <sys/mman.h>
>> >> > +#define DEV_NAME             "/dev/igb_ioport"
>> >> > +
>> >> > +/* Keeping pci_ioport_size = 4k.
>> >> > + * So maximum mmaped pci_iobar supported =
>> >> > + * (ioport_size/pci_dev->mem_resource[0].len)
>> >> > + *
>> >> > + * note: kernel could allow maximum 32 virtio-net-pci interface, that
>> >> > mean
>> >> > + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so
>> >> > virtio_map_ioport()
>> >> > + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
>> >> > + * max-virtio-net-pci interface.
>> >> > + */
>> >> > +#define PAGE_SIZE            4096
>> >> > +#define PCI_IOPORT_SIZE              PAGE_SIZE
>> >> > +#define PCI_IOPORT_MAX               128 /* 4k / 0x20 */
>> >> > +
>> >> > +int ioport_map_cnt;
>> >> > +#endif /* ARM, ARM64 */
>> >> > +#endif /* RTE_EXEC_ENV_LINUXAPP */
>> >>
>> >> These variables should be static.
>> >>
>> >
>> > (Sorry for delayed follow, Was travelling..)
>> > Right,
>> >
>> >>
>> >> Also, it is should be possible to extract the I/O bar stuff in a generic
>> >> way through sysfs
>> >> and not depend on a character device. The long term goal for DPDK
>> >> acceptance is to
>> >> eliminate (or at least reduce to a minumum) any requirement for special
>> >> kernel drivers.
>> >
>> >
>> > I agree. Existing implementation does read pci_iobar for start address and
>> > size, But for non-x86 arch, we need someway to map pci_iobar and thats-why
>> > thought of adding device file for a purpose, as archs like arm lack iopl()
>> > privileged io syscall support, However iopl() too quite native driver design
>> > assumption.
>> >
>> > I have few idea in my mind such that - Right now I am updating ioport_mapped
>> > addr {kernel-virtual-addr-io-memory} to /sys/bus/pci/pci_bus_xxxx/xxx/map
>> > field, instead of mapping their, I'll try to map to uio's pci interface and
>> > then use existing pci_map_resource() api to mmap kernel-virtual-io-address
>> > to user-space-virtual-ioaddr. We'll come back on this.
>> >
>>
>>
>> Spent sometime digging dpdk's uio/pci source code, Intent was to map
>> pci ioport region via uio-way. In order to achieve that I tried to
>> hack the virtio-net-pci pmd driver. Right now in virtio-net-pci case,
>> It creates two sysfs entry for pci bars: resource0 /1.
>>
>> Resource0; is ioport region
>> Resource1; is iomem region.
>>
>> By appending a RTE_PCI_DRV_NEED_MAPPING flag to drv_flag and passing
>> hw->io_base = resource1 type pci.mem_resource[slot].addr; where slot
>> =1. Resource1 is IORESOURCE_MEM type so uio/pci driver able to mmap.
>> That way I could get the valid user-space virtual address. However
>> this hack did not worked for me because at qemu side: virtio-pxe.rom
>> has virtio_headers located at ioport pci region and guest driver
>> writing at iomem region, that's why driver init failed. Note that
>> default driver doesn't use resource1 memory.
>>
>> This made me think that either I had to add dependent code in kernel
>> or something similar proposed in this patch.
>> It is because:
>> - uio driver and dependent user-space pci api's in dpdk mmaps
>> IORESOURCE_MEM types address only {refer igbuio_setup_bars() and in
>> particular function pci_parse_sysfs_resource()}.
>> - That mmap in userspace backed by arch specific api
>> pci_mmap_page_range() in kernel.
>> - pci_mmap_page_range() does not support mmaping to IO_RESOURCE_IO type memory.
>> - Having said that, we need routine or a way to to map pci_iobar
>> region from kernel virtual-address to user-space virtual address.
>
> There a couple of gotcha's with this. It turns out the iomem region
> is not mappable on some platforms. I think GCE was one of them.

afaik, In linux kernel if arch supports pci_mmap_page_range then iomem
region should map, right? I am confused by reading your reply, also I
am not aware of GCE? which platform is GCE, please suggest.
  
Stephen Hemminger Dec. 9, 2015, 7:57 p.m. UTC | #8
On Thu, 10 Dec 2015 00:49:08 +0530
Santosh Shukla <sshukla@mvista.com> wrote:

> On Thu, Dec 10, 2015 at 12:34 AM, Stephen Hemminger
> <stephen@networkplumber.org> wrote:
> > On Thu, 10 Dec 2015 00:29:30 +0530
> > Santosh Shukla <sshukla@mvista.com> wrote:
> >
> >> On Tue, Dec 8, 2015 at 6:23 PM, Santosh Shukla <sshukla@mvista.com> wrote:
> >> >
> >> >
> >> > On Mon, Dec 7, 2015 at 10:38 PM, Stephen Hemminger
> >> > <stephen@networkplumber.org> wrote:
> >> >>
> >> >> On Fri,  4 Dec 2015 23:05:19 +0530
> >> >> Santosh Shukla <sshukla@mvista.com> wrote:
> >> >>
> >> >> >
> >> >> > +#ifdef RTE_EXEC_ENV_LINUXAPP
> >> >> > +/* start address of first pci_iobar slot (user-space virtual-addres) */
> >> >> > +void *ioport_map;
> >> >> > +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
> >> >> > +
> >> >> > +#include <sys/mman.h>
> >> >> > +#define DEV_NAME             "/dev/igb_ioport"
> >> >> > +
> >> >> > +/* Keeping pci_ioport_size = 4k.
> >> >> > + * So maximum mmaped pci_iobar supported =
> >> >> > + * (ioport_size/pci_dev->mem_resource[0].len)
> >> >> > + *
> >> >> > + * note: kernel could allow maximum 32 virtio-net-pci interface, that
> >> >> > mean
> >> >> > + * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so
> >> >> > virtio_map_ioport()
> >> >> > + * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
> >> >> > + * max-virtio-net-pci interface.
> >> >> > + */
> >> >> > +#define PAGE_SIZE            4096
> >> >> > +#define PCI_IOPORT_SIZE              PAGE_SIZE
> >> >> > +#define PCI_IOPORT_MAX               128 /* 4k / 0x20 */
> >> >> > +
> >> >> > +int ioport_map_cnt;
> >> >> > +#endif /* ARM, ARM64 */
> >> >> > +#endif /* RTE_EXEC_ENV_LINUXAPP */
> >> >>
> >> >> These variables should be static.
> >> >>
> >> >
> >> > (Sorry for delayed follow, Was travelling..)
> >> > Right,
> >> >
> >> >>
> >> >> Also, it is should be possible to extract the I/O bar stuff in a generic
> >> >> way through sysfs
> >> >> and not depend on a character device. The long term goal for DPDK
> >> >> acceptance is to
> >> >> eliminate (or at least reduce to a minumum) any requirement for special
> >> >> kernel drivers.
> >> >
> >> >
> >> > I agree. Existing implementation does read pci_iobar for start address and
> >> > size, But for non-x86 arch, we need someway to map pci_iobar and thats-why
> >> > thought of adding device file for a purpose, as archs like arm lack iopl()
> >> > privileged io syscall support, However iopl() too quite native driver design
> >> > assumption.
> >> >
> >> > I have few idea in my mind such that - Right now I am updating ioport_mapped
> >> > addr {kernel-virtual-addr-io-memory} to /sys/bus/pci/pci_bus_xxxx/xxx/map
> >> > field, instead of mapping their, I'll try to map to uio's pci interface and
> >> > then use existing pci_map_resource() api to mmap kernel-virtual-io-address
> >> > to user-space-virtual-ioaddr. We'll come back on this.
> >> >
> >>
> >>
> >> Spent sometime digging dpdk's uio/pci source code, Intent was to map
> >> pci ioport region via uio-way. In order to achieve that I tried to
> >> hack the virtio-net-pci pmd driver. Right now in virtio-net-pci case,
> >> It creates two sysfs entry for pci bars: resource0 /1.
> >>
> >> Resource0; is ioport region
> >> Resource1; is iomem region.
> >>
> >> By appending a RTE_PCI_DRV_NEED_MAPPING flag to drv_flag and passing
> >> hw->io_base = resource1 type pci.mem_resource[slot].addr; where slot
> >> =1. Resource1 is IORESOURCE_MEM type so uio/pci driver able to mmap.
> >> That way I could get the valid user-space virtual address. However
> >> this hack did not worked for me because at qemu side: virtio-pxe.rom
> >> has virtio_headers located at ioport pci region and guest driver
> >> writing at iomem region, that's why driver init failed. Note that
> >> default driver doesn't use resource1 memory.
> >>
> >> This made me think that either I had to add dependent code in kernel
> >> or something similar proposed in this patch.
> >> It is because:
> >> - uio driver and dependent user-space pci api's in dpdk mmaps
> >> IORESOURCE_MEM types address only {refer igbuio_setup_bars() and in
> >> particular function pci_parse_sysfs_resource()}.
> >> - That mmap in userspace backed by arch specific api
> >> pci_mmap_page_range() in kernel.
> >> - pci_mmap_page_range() does not support mmaping to IO_RESOURCE_IO type memory.
> >> - Having said that, we need routine or a way to to map pci_iobar
> >> region from kernel virtual-address to user-space virtual address.
> >
> > There a couple of gotcha's with this. It turns out the iomem region
> > is not mappable on some platforms. I think GCE was one of them.
> 
> afaik, In linux kernel if arch supports pci_mmap_page_range then iomem
> region should map, right? I am confused by reading your reply, also I
> am not aware of GCE? which platform is GCE, please suggest.

I think it was Google Compute Environment that reported an memory region
which was huge and not accessible, they have there own vhost.
  

Patch

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 74c00ee..93b8784 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -63,6 +63,30 @@ 
 #include "virtqueue.h"
 #include "virtio_rxtx.h"
 
+#ifdef RTE_EXEC_ENV_LINUXAPP
+/* start address of first pci_iobar slot (user-space virtual-addres) */
+void *ioport_map;
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+
+#include <sys/mman.h>
+#define DEV_NAME		"/dev/igb_ioport"
+
+/* Keeping pci_ioport_size = 4k.
+ * So maximum mmaped pci_iobar supported =
+ * (ioport_size/pci_dev->mem_resource[0].len)
+ *
+ * note: kernel could allow maximum 32 virtio-net-pci interface, that mean
+ * maximum 32 PCI_IOBAR(s) where each PCI_IOBAR_LEN=0x20, so virtio_map_ioport()
+ * func by theory gonna support 4k/0x20 ==> 128 PCI_IOBAR(s), more than
+ * max-virtio-net-pci interface.
+ */
+#define PAGE_SIZE		4096
+#define PCI_IOPORT_SIZE		PAGE_SIZE
+#define PCI_IOPORT_MAX		128 /* 4k / 0x20 */
+
+int ioport_map_cnt;
+#endif /* ARM, ARM64 */
+#endif /* RTE_EXEC_ENV_LINUXAPP */
 
 static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);
 static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev);
@@ -497,6 +521,18 @@  virtio_dev_close(struct rte_eth_dev *dev)
 	hw->started = 0;
 	virtio_dev_free_mbufs(dev);
 	virtio_free_queues(dev);
+
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+
+	/* unmap ioport memory */
+	ioport_map_cnt--;
+	if (!ioport_map_cnt)
+		munmap(ioport_map, PCI_IOPORT_SIZE);
+
+	PMD_INIT_LOG(DEBUG, "unmapping ioport_mem %d\n", ioport_map_cnt);
+#endif
+#endif
 }
 
 static void
@@ -1172,7 +1208,6 @@  static int virtio_resource_init_by_ioports(struct rte_pci_device *pci_dev)
 
 			sscanf(ptr, "%04hx-%04hx", &start, &end);
 			size = end - start + 1;
-
 			break;
 		}
 	}
@@ -1256,6 +1291,81 @@  rx_func_get(struct rte_eth_dev *eth_dev)
 		eth_dev->rx_pkt_burst = &virtio_recv_pkts;
 }
 
+#ifdef RTE_EXEC_ENV_LINUXAPP
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+
+static int
+virtio_map_ioport(void **resource_addr)
+{
+	int fd;
+	int ret = 0;
+
+	fd = open(DEV_NAME, O_RDWR);
+	if (fd < 0) {
+		PMD_INIT_LOG(ERR, "device file %s open error: %d\n",
+			     DEV_NAME, fd);
+		ret = -1;
+		goto out;
+	}
+
+	ioport_map = mmap(NULL, PCI_IOPORT_SIZE,
+			PROT_EXEC | PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0);
+
+	if (ioport_map == MAP_FAILED) {
+		PMD_INIT_LOG(ERR, "mmap: failed to map bar Address=%p\n",
+				*resource_addr);
+		ret = -ENOMEM;
+		goto out1;
+	}
+
+	PMD_INIT_LOG(INFO, "First pci_iobar mapped at %p\n", ioport_map);
+
+out1:
+	close(fd);
+out:
+	return ret;
+}
+
+static int
+virtio_set_ioport_addr(void **resource_addr, unsigned long offset)
+{
+	int ret = 0;
+
+	if (ioport_map_cnt >= PCI_IOPORT_MAX) {
+		ret = -1;
+		PMD_INIT_LOG(ERR,
+			     "ioport_map_cnt(%d) greater than"
+			     "PCI_IOPORT_MAX(%d)\n",
+			     ioport_map_cnt, PCI_IOPORT_MAX);
+		goto out;
+	}
+	*resource_addr = (void *)((char *)ioport_map + (ioport_map_cnt)*offset);
+	ioport_map_cnt++;
+
+	PMD_INIT_LOG(DEBUG, "pci.resource_addr %p ioport_map_cnt %d\n",
+			*resource_addr, ioport_map_cnt);
+out:
+	return ret;
+}
+#else /* !ARM, !ARM64 */
+static int
+virtio_map_ioport(void *resource_addr)
+{
+	(void)resource_addr;
+	return 0;
+}
+
+static int
+virtio_set_ioport_addr(void *resource_addr, unsigned long offset)
+{
+	(void)resource_addr;
+	(void)offset;
+	return 0;
+}
+
+#endif /* ARM, ARM64 */
+#endif /* RTE_EXEC_ENV_LINUXAPP */
+
 /*
  * This function is based on probe() function in virtio_pci.c
  * It returns 0 on success.
@@ -1294,8 +1404,31 @@  eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 	if (virtio_resource_init(pci_dev) < 0)
 		return -1;
 
+#ifdef	RTE_EXEC_ENV_LINUXAPP
+	int ret = 0;
+
+	/* Map the all IOBAR entry from /proc/ioport to 4k page_size only once.
+	 * Later virtio_set_ioport_addr() func will update correct bar_addr
+	 * eachk ioport (i.e..pci_dev->mem_resource[0].addr)
+	 */
+	if (!ioport_map) {
+		ret = virtio_map_ioport(&pci_dev->mem_resource[0].addr);
+		if (ret < 0)
+			return -1;
+	}
+
+	ret = virtio_set_ioport_addr(&pci_dev->mem_resource[0].addr,
+					pci_dev->mem_resource[0].len);
+	if (ret < 0)
+		return -1;
+
+	PMD_INIT_LOG(INFO, "ioport_map %p resource_addr %p resource_len :%ld\n",
+			ioport_map, pci_dev->mem_resource[0].addr,
+			(unsigned long)pci_dev->mem_resource[0].len);
+
+#endif
 	hw->use_msix = virtio_has_msix(&pci_dev->addr);
-	hw->io_base = (uint32_t)(uintptr_t)pci_dev->mem_resource[0].addr;
+	hw->io_base = (unsigned long)(uintptr_t)pci_dev->mem_resource[0].addr;
 
 	/* Reset the device although not necessary at startup */
 	vtpci_reset(hw);
@@ -1430,7 +1563,6 @@  eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev)
 		rte_intr_callback_unregister(&pci_dev->intr_handle,
 						virtio_interrupt_handler,
 						eth_dev);
-
 	PMD_INIT_LOG(DEBUG, "dev_uninit completed");
 
 	return 0;
diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
index f5617d2..b154a44 100644
--- a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
+++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c
@@ -324,6 +324,61 @@  igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma)
 }
 #endif
 
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+#ifdef CONFIG_HAS_IOPORT_MAP
+/*
+ * mmap driver to map x86-style PCI_IOBAR (i.e..cat /proc/ioport pci-bar-memory)
+ * from kernel-space virtual address to user-space virtual address. This module
+ * required for non-x86 archs example arm/arm64, as those archs donot do
+ * IO_MAP_IO types access, Infact supports memory-mapped-IO. That is because
+ * arm/arm64 doesn't support direct IO instruction, so the design approach is to
+ * map `cat /proc/ioport` PCI_IOBAR's kernel-space virtual-addr to user-space
+ * virtual-addr. Therefore the need for mmap-driver.
+ */
+#include <linux/fs.h>		/* file_operations */
+#include <linux/miscdevice.h>
+#include <linux/mm.h>		/* VM_IO */
+#include <linux/uaccess.h>
+#include <asm/page.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+
+void *__iomem mapped_io; /* ioport addr of `cat /proc/ioport` */
+
+static int igb_ioport_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	struct page *npage;
+	int ret = 0;
+
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	npage = vmalloc_to_page(mapped_io);
+	ret = remap_pfn_range(vma, vma->vm_start,
+				page_to_pfn(npage),
+				vma->vm_end - vma->vm_start,
+				vma->vm_page_prot);
+	if (ret) {
+		pr_info("Error: Failed to remap pfn=%lu error=%d\n",
+			page_to_pfn(npage), ret);
+	}
+	return 0;
+}
+
+static const struct file_operations igb_ioport_fops = {
+	.mmap		= igb_ioport_mmap,
+};
+
+static struct miscdevice igb_ioport_dev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "igb_ioport",
+	.fops	= &igb_ioport_fops
+};
+#else /* !CONFIG_HAS_IOPORT_MAP */
+
+#error "CONFIG_HAS_IOPORT_MAP not supported for $RTE_ARCH"
+
+#endif /* CONFIG_HAS_IOPORT_MAP */
+#endif /* RTE_ARCH_ARM, RTE_ARCH_ARM64 */
+
 /* Remap pci resources described by bar #pci_bar in uio resource n. */
 static int
 igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info,
@@ -365,11 +420,22 @@  igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info,
 	if (addr == 0 || len == 0)
 		return -EINVAL;
 
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+	/*
+	 * TODO: KNI/procfs:: porttype entry need to be added for ARM/ARM64,
+	 * for below mmap driver;
+	 * */
+	mapped_io = ioport_map(addr, 0);
+	info->port[n].name = name;
+	info->port[n].start = (unsigned long)(uintptr_t)mapped_io;
+	info->port[n].size = len;
+	info->port[n].porttype = UIO_PORT_X86;
+#else
 	info->port[n].name = name;
 	info->port[n].start = addr;
 	info->port[n].size = len;
 	info->port[n].porttype = UIO_PORT_X86;
-
+#endif
 	return 0;
 }
 
@@ -615,6 +681,15 @@  igbuio_pci_init_module(void)
 {
 	int ret;
 
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+	ret = misc_register(&igb_ioport_dev);
+	if (ret < 0) {
+		pr_info("Error: failed to register ioport map driver (%d)\n",
+				ret);
+		return ret;
+	}
+#endif
+
 	ret = igbuio_config_intr_mode(intr_mode);
 	if (ret < 0)
 		return ret;
@@ -625,6 +700,9 @@  igbuio_pci_init_module(void)
 static void __exit
 igbuio_pci_exit_module(void)
 {
+#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64)
+	misc_deregister(&igb_ioport_dev);
+#endif
 	pci_unregister_driver(&igbuio_pci_driver);
 }