[v9,2/5] kni: add IOVA=VA support in KNI lib

Message ID 20190729121313.30639-3-vattunuru@marvell.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series kni: add IOVA=VA support |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Vamsi Krishna Attunuru July 29, 2019, 12:13 p.m. UTC
  From: Vamsi Attunuru <vattunuru@marvell.com>

Current KNI implementation only operates in IOVA=PA mode, patch adds
required functionality in KNI lib to support IOVA=VA mode.

KNI kernel module requires device info to get iommu domain related
information for IOVA addr related translations. Patch defines device
related info in rte_kni_device_info structure and passes device info
to the kernel KNI module when IOVA=VA mode is enabled.

Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
---
 lib/librte_eal/linux/eal/include/rte_kni_common.h |  8 ++++++
 lib/librte_kni/Makefile                           |  1 +
 lib/librte_kni/meson.build                        |  1 +
 lib/librte_kni/rte_kni.c                          | 30 +++++++++++++++++++++++
 4 files changed, 40 insertions(+)
  

Comments

Igor Ryzhov July 29, 2019, 12:24 p.m. UTC | #1
Hi,

I believe iova_mode check should not be automatic and should be a part
of rte_kni_conf.
What if I want to use KNI just as a pure virtual device without any
connection to a real PCI device in an application that works in VA
mode?

Best regards,
Igor

On Mon, Jul 29, 2019 at 3:14 PM <vattunuru@marvell.com> wrote:
>
> From: Vamsi Attunuru <vattunuru@marvell.com>
>
> Current KNI implementation only operates in IOVA=PA mode, patch adds
> required functionality in KNI lib to support IOVA=VA mode.
>
> KNI kernel module requires device info to get iommu domain related
> information for IOVA addr related translations. Patch defines device
> related info in rte_kni_device_info structure and passes device info
> to the kernel KNI module when IOVA=VA mode is enabled.
>
> Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
> Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
> ---
>  lib/librte_eal/linux/eal/include/rte_kni_common.h |  8 ++++++
>  lib/librte_kni/Makefile                           |  1 +
>  lib/librte_kni/meson.build                        |  1 +
>  lib/librte_kni/rte_kni.c                          | 30 +++++++++++++++++++++++
>  4 files changed, 40 insertions(+)
>
> diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> index 37d9ee8..4fd8a90 100644
> --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
> +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> @@ -111,6 +111,13 @@ struct rte_kni_device_info {
>         void * mbuf_va;
>         phys_addr_t mbuf_phys;
>
> +       /* PCI info */
> +       uint16_t vendor_id;           /**< Vendor ID or PCI_ANY_ID. */
> +       uint16_t device_id;           /**< Device ID or PCI_ANY_ID. */
> +       uint8_t bus;                  /**< Device bus */
> +       uint8_t devid;                /**< Device ID */
> +       uint8_t function;             /**< Device function. */
> +
>         uint16_t group_id;            /**< Group ID */
>         uint32_t core_id;             /**< core ID to bind for kernel thread */
>
> @@ -121,6 +128,7 @@ struct rte_kni_device_info {
>         unsigned mbuf_size;
>         unsigned int mtu;
>         uint8_t mac_addr[6];
> +       uint8_t iova_mode;
>  };
>
>  #define KNI_DEVICE "kni"
> diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
> index cbd6599..ab15d10 100644
> --- a/lib/librte_kni/Makefile
> +++ b/lib/librte_kni/Makefile
> @@ -7,6 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
>  LIB = librte_kni.a
>
>  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
> +CFLAGS += -I$(RTE_SDK)/drivers/bus/pci
>  LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
>
>  EXPORT_MAP := rte_kni_version.map
> diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build
> index 41fa2e3..fd46f87 100644
> --- a/lib/librte_kni/meson.build
> +++ b/lib/librte_kni/meson.build
> @@ -9,3 +9,4 @@ version = 2
>  sources = files('rte_kni.c')
>  headers = files('rte_kni.h')
>  deps += ['ethdev', 'pci']
> +includes += include_directories('../../drivers/bus/pci')
> diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
> index 4b51fb4..2aaaeaa 100644
> --- a/lib/librte_kni/rte_kni.c
> +++ b/lib/librte_kni/rte_kni.c
> @@ -14,6 +14,7 @@
>  #include <rte_spinlock.h>
>  #include <rte_string_fns.h>
>  #include <rte_ethdev.h>
> +#include <rte_bus_pci.h>
>  #include <rte_malloc.h>
>  #include <rte_log.h>
>  #include <rte_kni.h>
> @@ -199,6 +200,27 @@ kni_release_mz(struct rte_kni *kni)
>         rte_memzone_free(kni->m_sync_addr);
>  }
>
> +static void
> +kni_dev_pci_addr_get(uint16_t port_id, struct rte_kni_device_info *kni_dev_info)
> +{
> +       const struct rte_pci_device *pci_dev;
> +       struct rte_eth_dev_info dev_info;
> +       const struct rte_bus *bus = NULL;
> +
> +       rte_eth_dev_info_get(port_id, &dev_info);
> +
> +       if (dev_info.device)
> +               bus = rte_bus_find_by_device(dev_info.device);
> +       if (bus && !strcmp(bus->name, "pci")) {
> +               pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> +               kni_dev_info->bus = pci_dev->addr.bus;
> +               kni_dev_info->devid = pci_dev->addr.devid;
> +               kni_dev_info->function = pci_dev->addr.function;
> +               kni_dev_info->vendor_id = pci_dev->id.vendor_id;
> +               kni_dev_info->device_id = pci_dev->id.device_id;
> +       }
> +}
> +
>  struct rte_kni *
>  rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
>               const struct rte_kni_conf *conf,
> @@ -247,6 +269,12 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
>                 kni->ops.port_id = UINT16_MAX;
>
>         memset(&dev_info, 0, sizeof(dev_info));
> +
> +       if (rte_eal_iova_mode() == RTE_IOVA_VA) {
> +               uint16_t port_id = conf->group_id;
> +
> +               kni_dev_pci_addr_get(port_id, &dev_info);
> +       }
>         dev_info.core_id = conf->core_id;
>         dev_info.force_bind = conf->force_bind;
>         dev_info.group_id = conf->group_id;
> @@ -300,6 +328,8 @@ rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
>         kni->group_id = conf->group_id;
>         kni->mbuf_size = conf->mbuf_size;
>
> +       dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
> +
>         ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
>         if (ret < 0)
>                 goto ioctl_fail;
> --
> 2.8.4
>
  
Vamsi Krishna Attunuru July 29, 2019, 1:22 p.m. UTC | #2
> -----Original Message-----
> From: Igor Ryzhov <iryzhov@nfware.com>
> Sent: Monday, July 29, 2019 5:55 PM
> To: Vamsi Krishna Attunuru <vattunuru@marvell.com>
> Cc: dev@dpdk.org; Thomas Monjalon <thomas@monjalon.net>; Jerin Jacob
> Kollanukkaran <jerinj@marvell.com>; Olivier Matz
> <olivier.matz@6wind.com>; Ferruh Yigit <ferruh.yigit@intel.com>;
> anatoly.burakov@intel.com; Andrew Rybchenko
> <arybchenko@solarflare.com>; Kiran Kumar Kokkilagadda
> <kirankumark@marvell.com>
> Subject: [EXT] Re: [dpdk-dev] [PATCH v9 2/5] kni: add IOVA=VA support in
> KNI lib
> 
> External Email
> 
> ----------------------------------------------------------------------
> Hi,
> 
> I believe iova_mode check should not be automatic and should be a part of
> rte_kni_conf.

Changing the mode through rte_kni_conf will be too late, since already the environment would be set to VA/PA mode during the eal_init itself which can not be switched back based on the kni config. 

> What if I want to use KNI just as a pure virtual device without any connection
> to a real PCI device in an application that works in VA mode?

For the above use case, it will fail because there is no iommu domain present correspondingly, without that kernel kni module could not translate the memory that application(working in VA mode) provided to it. 

One possible way is to use explicit iova=pa mode selection from eal command line option when there is vdev(not backed up by pci device) presents.

Other option will be adding a mechanism to find out the presence of vdev(without backed up by pci dev) before setting the environment's IOVA mode in eal_init(), accordingly change the mode to PA if both vdev and kni module are present.   

@Ferruh,  any other thoughts

BTW what is the plan for these KNI patches.

> 
> Best regards,
> Igor
> 
> On Mon, Jul 29, 2019 at 3:14 PM <vattunuru@marvell.com> wrote:
> >
> > From: Vamsi Attunuru <vattunuru@marvell.com>
> >
> > Current KNI implementation only operates in IOVA=PA mode, patch adds
> > required functionality in KNI lib to support IOVA=VA mode.
> >
> > KNI kernel module requires device info to get iommu domain related
> > information for IOVA addr related translations. Patch defines device
> > related info in rte_kni_device_info structure and passes device info
> > to the kernel KNI module when IOVA=VA mode is enabled.
> >
> > Signed-off-by: Vamsi Attunuru <vattunuru@marvell.com>
> > Signed-off-by: Kiran Kumar K <kirankumark@marvell.com>
> > ---
> >  lib/librte_eal/linux/eal/include/rte_kni_common.h |  8 ++++++
> >  lib/librte_kni/Makefile                           |  1 +
> >  lib/librte_kni/meson.build                        |  1 +
> >  lib/librte_kni/rte_kni.c                          | 30 +++++++++++++++++++++++
> >  4 files changed, 40 insertions(+)
> >
> > diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > index 37d9ee8..4fd8a90 100644
> > --- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > +++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
> > @@ -111,6 +111,13 @@ struct rte_kni_device_info {
> >         void * mbuf_va;
> >         phys_addr_t mbuf_phys;
> >
> > +       /* PCI info */
> > +       uint16_t vendor_id;           /**< Vendor ID or PCI_ANY_ID. */
> > +       uint16_t device_id;           /**< Device ID or PCI_ANY_ID. */
> > +       uint8_t bus;                  /**< Device bus */
> > +       uint8_t devid;                /**< Device ID */
> > +       uint8_t function;             /**< Device function. */
> > +
> >         uint16_t group_id;            /**< Group ID */
> >         uint32_t core_id;             /**< core ID to bind for kernel thread */
> >
> > @@ -121,6 +128,7 @@ struct rte_kni_device_info {
> >         unsigned mbuf_size;
> >         unsigned int mtu;
> >         uint8_t mac_addr[6];
> > +       uint8_t iova_mode;
> >  };
> >
> >  #define KNI_DEVICE "kni"
> > diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile index
> > cbd6599..ab15d10 100644
> > --- a/lib/librte_kni/Makefile
> > +++ b/lib/librte_kni/Makefile
> > @@ -7,6 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk  LIB = librte_kni.a
> >
> >  CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
> > +CFLAGS += -I$(RTE_SDK)/drivers/bus/pci
> >  LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
> >
> >  EXPORT_MAP := rte_kni_version.map
> > diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build
> > index 41fa2e3..fd46f87 100644
> > --- a/lib/librte_kni/meson.build
> > +++ b/lib/librte_kni/meson.build
> > @@ -9,3 +9,4 @@ version = 2
> >  sources = files('rte_kni.c')
> >  headers = files('rte_kni.h')
> >  deps += ['ethdev', 'pci']
> > +includes += include_directories('../../drivers/bus/pci')
> > diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c index
> > 4b51fb4..2aaaeaa 100644
> > --- a/lib/librte_kni/rte_kni.c
> > +++ b/lib/librte_kni/rte_kni.c
> > @@ -14,6 +14,7 @@
> >  #include <rte_spinlock.h>
> >  #include <rte_string_fns.h>
> >  #include <rte_ethdev.h>
> > +#include <rte_bus_pci.h>
> >  #include <rte_malloc.h>
> >  #include <rte_log.h>
> >  #include <rte_kni.h>
> > @@ -199,6 +200,27 @@ kni_release_mz(struct rte_kni *kni)
> >         rte_memzone_free(kni->m_sync_addr);
> >  }
> >
> > +static void
> > +kni_dev_pci_addr_get(uint16_t port_id, struct rte_kni_device_info
> > +*kni_dev_info) {
> > +       const struct rte_pci_device *pci_dev;
> > +       struct rte_eth_dev_info dev_info;
> > +       const struct rte_bus *bus = NULL;
> > +
> > +       rte_eth_dev_info_get(port_id, &dev_info);
> > +
> > +       if (dev_info.device)
> > +               bus = rte_bus_find_by_device(dev_info.device);
> > +       if (bus && !strcmp(bus->name, "pci")) {
> > +               pci_dev = RTE_DEV_TO_PCI(dev_info.device);
> > +               kni_dev_info->bus = pci_dev->addr.bus;
> > +               kni_dev_info->devid = pci_dev->addr.devid;
> > +               kni_dev_info->function = pci_dev->addr.function;
> > +               kni_dev_info->vendor_id = pci_dev->id.vendor_id;
> > +               kni_dev_info->device_id = pci_dev->id.device_id;
> > +       }
> > +}
> > +
> >  struct rte_kni *
> >  rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
> >               const struct rte_kni_conf *conf, @@ -247,6 +269,12 @@
> > rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
> >                 kni->ops.port_id = UINT16_MAX;
> >
> >         memset(&dev_info, 0, sizeof(dev_info));
> > +
> > +       if (rte_eal_iova_mode() == RTE_IOVA_VA) {
> > +               uint16_t port_id = conf->group_id;
> > +
> > +               kni_dev_pci_addr_get(port_id, &dev_info);
> > +       }
> >         dev_info.core_id = conf->core_id;
> >         dev_info.force_bind = conf->force_bind;
> >         dev_info.group_id = conf->group_id; @@ -300,6 +328,8 @@
> > rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
> >         kni->group_id = conf->group_id;
> >         kni->mbuf_size = conf->mbuf_size;
> >
> > +       dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1
> > + : 0;
> > +
> >         ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
> >         if (ret < 0)
> >                 goto ioctl_fail;
> > --
> > 2.8.4
> >
  

Patch

diff --git a/lib/librte_eal/linux/eal/include/rte_kni_common.h b/lib/librte_eal/linux/eal/include/rte_kni_common.h
index 37d9ee8..4fd8a90 100644
--- a/lib/librte_eal/linux/eal/include/rte_kni_common.h
+++ b/lib/librte_eal/linux/eal/include/rte_kni_common.h
@@ -111,6 +111,13 @@  struct rte_kni_device_info {
 	void * mbuf_va;
 	phys_addr_t mbuf_phys;
 
+	/* PCI info */
+	uint16_t vendor_id;           /**< Vendor ID or PCI_ANY_ID. */
+	uint16_t device_id;           /**< Device ID or PCI_ANY_ID. */
+	uint8_t bus;                  /**< Device bus */
+	uint8_t devid;                /**< Device ID */
+	uint8_t function;             /**< Device function. */
+
 	uint16_t group_id;            /**< Group ID */
 	uint32_t core_id;             /**< core ID to bind for kernel thread */
 
@@ -121,6 +128,7 @@  struct rte_kni_device_info {
 	unsigned mbuf_size;
 	unsigned int mtu;
 	uint8_t mac_addr[6];
+	uint8_t iova_mode;
 };
 
 #define KNI_DEVICE "kni"
diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile
index cbd6599..ab15d10 100644
--- a/lib/librte_kni/Makefile
+++ b/lib/librte_kni/Makefile
@@ -7,6 +7,7 @@  include $(RTE_SDK)/mk/rte.vars.mk
 LIB = librte_kni.a
 
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing
+CFLAGS += -I$(RTE_SDK)/drivers/bus/pci
 LDLIBS += -lrte_eal -lrte_mempool -lrte_mbuf -lrte_ethdev
 
 EXPORT_MAP := rte_kni_version.map
diff --git a/lib/librte_kni/meson.build b/lib/librte_kni/meson.build
index 41fa2e3..fd46f87 100644
--- a/lib/librte_kni/meson.build
+++ b/lib/librte_kni/meson.build
@@ -9,3 +9,4 @@  version = 2
 sources = files('rte_kni.c')
 headers = files('rte_kni.h')
 deps += ['ethdev', 'pci']
+includes += include_directories('../../drivers/bus/pci')
diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c
index 4b51fb4..2aaaeaa 100644
--- a/lib/librte_kni/rte_kni.c
+++ b/lib/librte_kni/rte_kni.c
@@ -14,6 +14,7 @@ 
 #include <rte_spinlock.h>
 #include <rte_string_fns.h>
 #include <rte_ethdev.h>
+#include <rte_bus_pci.h>
 #include <rte_malloc.h>
 #include <rte_log.h>
 #include <rte_kni.h>
@@ -199,6 +200,27 @@  kni_release_mz(struct rte_kni *kni)
 	rte_memzone_free(kni->m_sync_addr);
 }
 
+static void
+kni_dev_pci_addr_get(uint16_t port_id, struct rte_kni_device_info *kni_dev_info)
+{
+	const struct rte_pci_device *pci_dev;
+	struct rte_eth_dev_info dev_info;
+	const struct rte_bus *bus = NULL;
+
+	rte_eth_dev_info_get(port_id, &dev_info);
+
+	if (dev_info.device)
+		bus = rte_bus_find_by_device(dev_info.device);
+	if (bus && !strcmp(bus->name, "pci")) {
+		pci_dev = RTE_DEV_TO_PCI(dev_info.device);
+		kni_dev_info->bus = pci_dev->addr.bus;
+		kni_dev_info->devid = pci_dev->addr.devid;
+		kni_dev_info->function = pci_dev->addr.function;
+		kni_dev_info->vendor_id = pci_dev->id.vendor_id;
+		kni_dev_info->device_id = pci_dev->id.device_id;
+	}
+}
+
 struct rte_kni *
 rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	      const struct rte_kni_conf *conf,
@@ -247,6 +269,12 @@  rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 		kni->ops.port_id = UINT16_MAX;
 
 	memset(&dev_info, 0, sizeof(dev_info));
+
+	if (rte_eal_iova_mode() == RTE_IOVA_VA) {
+		uint16_t port_id = conf->group_id;
+
+		kni_dev_pci_addr_get(port_id, &dev_info);
+	}
 	dev_info.core_id = conf->core_id;
 	dev_info.force_bind = conf->force_bind;
 	dev_info.group_id = conf->group_id;
@@ -300,6 +328,8 @@  rte_kni_alloc(struct rte_mempool *pktmbuf_pool,
 	kni->group_id = conf->group_id;
 	kni->mbuf_size = conf->mbuf_size;
 
+	dev_info.iova_mode = (rte_eal_iova_mode() == RTE_IOVA_VA) ? 1 : 0;
+
 	ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info);
 	if (ret < 0)
 		goto ioctl_fail;