[dpdk-dev,6/6] virtio: add virtio v1.0 support

Message ID 1449719650-3482-7-git-send-email-yuanhan.liu@linux.intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Yuanhan Liu Dec. 10, 2015, 3:54 a.m. UTC
  Modern (v1.0) virtio pci device defines several pci capabilities.
Each cap has a configure structure corresponding to it, and the
cap.bar and cap.offset fields tell us where to find it.

And thanks to the RTE_PCI_DRV_NEED_MAPPING, which already maps all
the bar space for us, we could easily locate to a cfg structure by:

    cfg_addr = dev->mem_resources[cap.bar].addr + cap.offset;

Therefore, the entrance of enabling modern (v1.0) pci device support
is to iterate the pci capability lists, and to locate some configs
we care; and they are:

- common cfg

  For generic virtio and virtuqueu configuration, such as setting/getting
  features, enabling a specific queue, and so on.

- nofity cfg

  Combining with `queue_notify_off' from common cfg, we could use it to
  notify a specific virt queue.

- device cfg

  Where virtio_net_config structure locates.

If any of above cap is not found, we fallback to the legacy virtio
handling.

If succeed, hw->vtpci_ops is assigned to modern_ops, where all
operations are implemented by reading/writing a (or few) specific
configuration space from above 3 cfg structures. And that's basically
how this patch works.

Besides those changes, virtio 1.0 introduces a new status field:
FEATURES_OK, which is set after features negotiation is done.

Last, set the VIRTIO_F_VERSION_1 feature flag.

Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
---
 drivers/net/virtio/virtio_ethdev.c |  16 +-
 drivers/net/virtio/virtio_ethdev.h |   3 +-
 drivers/net/virtio/virtio_pci.c    | 313 ++++++++++++++++++++++++++++++++++++-
 drivers/net/virtio/virtio_pci.h    |  65 ++++++++
 drivers/net/virtio/virtqueue.h     |   2 +
 5 files changed, 395 insertions(+), 4 deletions(-)
  

Comments

Jianfeng Tan Dec. 29, 2015, 11:39 a.m. UTC | #1
> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
> Sent: Thursday, December 10, 2015 11:54 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH 6/6] virtio: add virtio v1.0 support
> 
...
> Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> ---
>  drivers/net/virtio/virtio_ethdev.c |  16 +-
>  drivers/net/virtio/virtio_ethdev.h |   3 +-
>  drivers/net/virtio/virtio_pci.c    | 313
> ++++++++++++++++++++++++++++++++++++-
>  drivers/net/virtio/virtio_pci.h    |  65 ++++++++
>  drivers/net/virtio/virtqueue.h     |   2 +
>  5 files changed, 395 insertions(+), 4 deletions(-)
> 

As legacy VIRTIO_WRITE_REG_XXXs are only used in virtio_pci.c, move these definitions into virtio_pci.c?

> diff --git a/drivers/net/virtio/virtio_ethdev.c
> b/drivers/net/virtio/virtio_ethdev.c
> index 9847ed8..1f9de01 100644
> --- a/drivers/net/virtio/virtio_ethdev.c
> +++ b/drivers/net/virtio/virtio_ethdev.c
> @@ -927,7 +927,7 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev,
> uint16_t vlan_id, int on)
>  	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
>  }
> 
> -static void
> +static int
>  virtio_negotiate_features(struct virtio_hw *hw)
>  {
>  	uint64_t host_features;
> @@ -949,6 +949,17 @@ virtio_negotiate_features(struct virtio_hw *hw)
>  	hw->guest_features = vtpci_negotiate_features(hw, host_features);
>  	PMD_INIT_LOG(DEBUG, "features after negotiate = %"PRIx64,
>  		hw->guest_features);
> +
> +	if (hw->modern) {
> +		if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
> +			PMD_INIT_LOG(ERR,
> +				"VIRTIO_F_VERSION_1 features is not
> enabled");
> +			return -1;
> +		}
> +		vtpci_set_status(hw,
> VIRTIO_CONFIG_STATUS_FEATURES_OK);


As per Linux code drivers/virtio/virtio.c:virtio_finalize_features(), vtpci_set_status() may fail, and there's a double check.
Is it necessary here?


> +	}
> +
> +	return 0;
>  }
> 
>  /*
> @@ -1032,7 +1043,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
> 
>  	/* Tell the host we've known how to drive the device. */
>  	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
> -	virtio_negotiate_features(hw);
> +	if (virtio_negotiate_features(hw) < 0)
> +		return -1;
> 
>  	/* If host does not support status then disable LSC */
>  	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
> diff --git a/drivers/net/virtio/virtio_ethdev.h
> b/drivers/net/virtio/virtio_ethdev.h
> index ae2d47d..fed9571 100644
> --- a/drivers/net/virtio/virtio_ethdev.h
> +++ b/drivers/net/virtio/virtio_ethdev.h
> @@ -64,7 +64,8 @@
>  	 1u << VIRTIO_NET_F_CTRL_VQ	  |	\
>  	 1u << VIRTIO_NET_F_CTRL_RX	  |	\
>  	 1u << VIRTIO_NET_F_CTRL_VLAN	  |	\
> -	 1u << VIRTIO_NET_F_MRG_RXBUF)
> +	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
> +	 1ULL << VIRTIO_F_VERSION_1)
> 
>  /*
>   * CQ function prototype
> diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
> index 26b0a0c..7862d7f 100644
> --- a/drivers/net/virtio/virtio_pci.c
> +++ b/drivers/net/virtio/virtio_pci.c
> @@ -31,6 +31,7 @@
>   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
>   */
>  #include <stdint.h>
> +#include <linux/pci_regs.h>

Put this "include" into macro RTE_EXEC_ENV_LINUXAPP followed?

> 
>  #ifdef RTE_EXEC_ENV_LINUXAPP
>   #include <dirent.h>
> @@ -448,6 +449,205 @@ static const struct virtio_pci_ops legacy_ops = {
>  };
> 
> 
...
> +
> +static uint16_t
> +modern_set_irq(struct virtio_hw *hw __rte_unused, uint16_t vec
> __rte_unused)
> +{
> +	/* FIXME: xxx */
> +	return 0;
> +}

If not going to support LSC, please give clear indication at related doc. My concern is: this will affect
some users, who are using LSC in legacy virtio. (If LSC works well on legacy virtio?).

> +
> +static uint16_t
> +modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
> +{
> +	modern_write16(queue_id, &hw->common_cfg->queue_select);
> +	return modern_read16(&hw->common_cfg->queue_size);
> +}
> +
...
> 
> +static inline void *
> +get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
> +{
> +	uint8_t  bar    = cap->bar;
> +	uint32_t length = cap->length;
> +	uint32_t offset = cap->offset;
> +	uint8_t *base;
> +

Use a macro to substitute hardcoded "5"?

> +	if (unlikely(bar > 5)) {
> +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
> +		return NULL;
> +	}
> +
...
>  int
>  vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
>  {
> -	hw->vtpci_ops = &legacy_ops;
> +	hw->dev = dev;
> 
> +	/*
> +	 * Try if we can succeed reading virtio pci caps, which exists
> +	 * only on modern pci device. If failed, we fallback to legacy
> +	 * virtio hanlding.

hanlding -> handling

Thanks,
Jianfeng

> +	 */
...
> +
>  	struct vq_desc_extra {
>  		void              *cookie;
>  		uint16_t          ndescs;
> --
> 1.9.0
  
Yuanhan Liu Dec. 30, 2015, 3:40 a.m. UTC | #2
On Tue, Dec 29, 2015 at 11:39:47AM +0000, Tan, Jianfeng wrote:
> 
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Yuanhan Liu
> > Sent: Thursday, December 10, 2015 11:54 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH 6/6] virtio: add virtio v1.0 support
> > 
> ...
> > Signed-off-by: Yuanhan Liu <yuanhan.liu@linux.intel.com>
> > ---
> >  drivers/net/virtio/virtio_ethdev.c |  16 +-
> >  drivers/net/virtio/virtio_ethdev.h |   3 +-
> >  drivers/net/virtio/virtio_pci.c    | 313
> > ++++++++++++++++++++++++++++++++++++-
> >  drivers/net/virtio/virtio_pci.h    |  65 ++++++++
> >  drivers/net/virtio/virtqueue.h     |   2 +
> >  5 files changed, 395 insertions(+), 4 deletions(-)
> > 
> 
> As legacy VIRTIO_WRITE_REG_XXXs are only used in virtio_pci.c, move these definitions into virtio_pci.c?

Yes, and I planned to do it in another series, where I will do more virtio
cleanups. (hmm..., maybe I could stack them on top of this series).

> 
> > diff --git a/drivers/net/virtio/virtio_ethdev.c
> > b/drivers/net/virtio/virtio_ethdev.c
> > index 9847ed8..1f9de01 100644
> > --- a/drivers/net/virtio/virtio_ethdev.c
> > +++ b/drivers/net/virtio/virtio_ethdev.c
> > @@ -927,7 +927,7 @@ virtio_vlan_filter_set(struct rte_eth_dev *dev,
> > uint16_t vlan_id, int on)
> >  	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
> >  }
> > 
> > -static void
> > +static int
> >  virtio_negotiate_features(struct virtio_hw *hw)
> >  {
> >  	uint64_t host_features;
> > @@ -949,6 +949,17 @@ virtio_negotiate_features(struct virtio_hw *hw)
> >  	hw->guest_features = vtpci_negotiate_features(hw, host_features);
> >  	PMD_INIT_LOG(DEBUG, "features after negotiate = %"PRIx64,
> >  		hw->guest_features);
> > +
> > +	if (hw->modern) {
> > +		if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
> > +			PMD_INIT_LOG(ERR,
> > +				"VIRTIO_F_VERSION_1 features is not
> > enabled");
> > +			return -1;
> > +		}
> > +		vtpci_set_status(hw,
> > VIRTIO_CONFIG_STATUS_FEATURES_OK);
> 
> 
> As per Linux code drivers/virtio/virtio.c:virtio_finalize_features(), vtpci_set_status() may fail, and there's a double check.
> Is it necessary here?

Yes, it's necessary: see the spec (3.1.1 Driver Requirements: Device
Initialization):

    5. Set the FEATURES_OK status bit. The driver MUST NOT accept new
       feature bits after this step.

    6. Re-read device status to ensure the FEATURES_OK bit is still set:
       otherwise, the device does not support our subset of features and
       the device is unusable.
> 
> 
> > +	}
> > +
> > +	return 0;
> >  }
> > 
> >  /*
> > @@ -1032,7 +1043,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
> > 
> >  	/* Tell the host we've known how to drive the device. */
> >  	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
> > -	virtio_negotiate_features(hw);
> > +	if (virtio_negotiate_features(hw) < 0)
> > +		return -1;
> > 
> >  	/* If host does not support status then disable LSC */
> >  	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
> > diff --git a/drivers/net/virtio/virtio_ethdev.h
> > b/drivers/net/virtio/virtio_ethdev.h
> > index ae2d47d..fed9571 100644
> > --- a/drivers/net/virtio/virtio_ethdev.h
> > +++ b/drivers/net/virtio/virtio_ethdev.h
> > @@ -64,7 +64,8 @@
> >  	 1u << VIRTIO_NET_F_CTRL_VQ	  |	\
> >  	 1u << VIRTIO_NET_F_CTRL_RX	  |	\
> >  	 1u << VIRTIO_NET_F_CTRL_VLAN	  |	\
> > -	 1u << VIRTIO_NET_F_MRG_RXBUF)
> > +	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
> > +	 1ULL << VIRTIO_F_VERSION_1)
> > 
> >  /*
> >   * CQ function prototype
> > diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
> > index 26b0a0c..7862d7f 100644
> > --- a/drivers/net/virtio/virtio_pci.c
> > +++ b/drivers/net/virtio/virtio_pci.c
> > @@ -31,6 +31,7 @@
> >   *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> > DAMAGE.
> >   */
> >  #include <stdint.h>
> > +#include <linux/pci_regs.h>
> 
> Put this "include" into macro RTE_EXEC_ENV_LINUXAPP followed?

We can't simply do that, as we need reference some macros from it
in common code path, virtio_read_caps(). But yes, you just remind
me that there should be no such header in non-linux platform. We
can't include it blindly here. Good catch, btw!

What's lucky here is that we just need reference quite few macros,
maybe we could just define them by ourself, to get rid of the
depency.

> 
> > 
> >  #ifdef RTE_EXEC_ENV_LINUXAPP
> >   #include <dirent.h>
> > @@ -448,6 +449,205 @@ static const struct virtio_pci_ops legacy_ops = {
> >  };
> > 
> > 
> ...
> > +
> > +static uint16_t
> > +modern_set_irq(struct virtio_hw *hw __rte_unused, uint16_t vec
> > __rte_unused)
> > +{
> > +	/* FIXME: xxx */
> > +	return 0;
> > +}
> 
> If not going to support LSC, please give clear indication at related doc. My concern is: this will affect
> some users, who are using LSC in legacy virtio. (If LSC works well on legacy virtio?).

TBH, I don't spent too much time on that, therefore, I marked it as
FIXME here, with the hope that I will revisit it with more cares in
future version.

> > +
> > +static uint16_t
> > +modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
> > +{
> > +	modern_write16(queue_id, &hw->common_cfg->queue_select);
> > +	return modern_read16(&hw->common_cfg->queue_size);
> > +}
> > +
> ...
> > 
> > +static inline void *
> > +get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
> > +{
> > +	uint8_t  bar    = cap->bar;
> > +	uint32_t length = cap->length;
> > +	uint32_t offset = cap->offset;
> > +	uint8_t *base;
> > +
> 
> Use a macro to substitute hardcoded "5"?

The fact that 5 is the max bar number is so well known, that I don't
think it's necessary to add a macro here.

> > +	if (unlikely(bar > 5)) {
> > +		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
> > +		return NULL;
> > +	}
> > +
> ...
> >  int
> >  vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
> >  {
> > -	hw->vtpci_ops = &legacy_ops;
> > +	hw->dev = dev;
> > 
> > +	/*
> > +	 * Try if we can succeed reading virtio pci caps, which exists
> > +	 * only on modern pci device. If failed, we fallback to legacy
> > +	 * virtio hanlding.
> 
> hanlding -> handling

Oops, and thanks.

	--yliu
  

Patch

diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c
index 9847ed8..1f9de01 100644
--- a/drivers/net/virtio/virtio_ethdev.c
+++ b/drivers/net/virtio/virtio_ethdev.c
@@ -927,7 +927,7 @@  virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
 	return virtio_send_command(hw->cvq, &ctrl, &len, 1);
 }
 
-static void
+static int
 virtio_negotiate_features(struct virtio_hw *hw)
 {
 	uint64_t host_features;
@@ -949,6 +949,17 @@  virtio_negotiate_features(struct virtio_hw *hw)
 	hw->guest_features = vtpci_negotiate_features(hw, host_features);
 	PMD_INIT_LOG(DEBUG, "features after negotiate = %"PRIx64,
 		hw->guest_features);
+
+	if (hw->modern) {
+		if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) {
+			PMD_INIT_LOG(ERR,
+				"VIRTIO_F_VERSION_1 features is not enabled");
+			return -1;
+		}
+		vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK);
+	}
+
+	return 0;
 }
 
 /*
@@ -1032,7 +1043,8 @@  eth_virtio_dev_init(struct rte_eth_dev *eth_dev)
 
 	/* Tell the host we've known how to drive the device. */
 	vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER);
-	virtio_negotiate_features(hw);
+	if (virtio_negotiate_features(hw) < 0)
+		return -1;
 
 	/* If host does not support status then disable LSC */
 	if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS))
diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h
index ae2d47d..fed9571 100644
--- a/drivers/net/virtio/virtio_ethdev.h
+++ b/drivers/net/virtio/virtio_ethdev.h
@@ -64,7 +64,8 @@ 
 	 1u << VIRTIO_NET_F_CTRL_VQ	  |	\
 	 1u << VIRTIO_NET_F_CTRL_RX	  |	\
 	 1u << VIRTIO_NET_F_CTRL_VLAN	  |	\
-	 1u << VIRTIO_NET_F_MRG_RXBUF)
+	 1u << VIRTIO_NET_F_MRG_RXBUF	  |	\
+	 1ULL << VIRTIO_F_VERSION_1)
 
 /*
  * CQ function prototype
diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c
index 26b0a0c..7862d7f 100644
--- a/drivers/net/virtio/virtio_pci.c
+++ b/drivers/net/virtio/virtio_pci.c
@@ -31,6 +31,7 @@ 
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <stdint.h>
+#include <linux/pci_regs.h>
 
 #ifdef RTE_EXEC_ENV_LINUXAPP
  #include <dirent.h>
@@ -448,6 +449,205 @@  static const struct virtio_pci_ops legacy_ops = {
 };
 
 
+#define MODERN_READ_DEF(nr_bits, type)		\
+static inline type				\
+modern_read##nr_bits(type *addr)		\
+{						\
+	return *(volatile type *)addr;		\
+}
+
+#define MODERN_WRITE_DEF(nr_bits, type)		\
+static inline void				\
+modern_write##nr_bits(type val, type *addr)	\
+{						\
+	*(volatile type *)addr = val;		\
+}
+
+MODERN_READ_DEF (8, uint8_t)
+MODERN_WRITE_DEF(8, uint8_t)
+
+MODERN_READ_DEF (16, uint16_t)
+MODERN_WRITE_DEF(16, uint16_t)
+
+MODERN_READ_DEF (32, uint32_t)
+MODERN_WRITE_DEF(32, uint32_t)
+
+static inline void
+modern_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi)
+{
+	modern_write32((uint32_t)val, lo);
+	modern_write32(val >> 32,     hi);
+}
+
+static void
+modern_read_dev_config(struct virtio_hw *hw, uint64_t offset,
+		       void *dst, int length)
+{
+	int i;
+	uint8_t *p;
+	uint8_t old_gen, new_gen;
+
+	do {
+		old_gen = modern_read8(&hw->common_cfg->config_generation);
+
+		p = dst;
+		for (i = 0;  i < length; i++)
+			*p++ = modern_read8((uint8_t *)hw->dev_cfg + offset + i);
+
+		new_gen = modern_read8(&hw->common_cfg->config_generation);
+	} while (old_gen != new_gen);
+}
+
+static void
+modern_write_dev_config(struct virtio_hw *hw, uint64_t offset,
+		        void *src, int length)
+{
+	int i;
+	uint8_t *p = src;
+
+	for (i = 0;  i < length; i++)
+		modern_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i);
+}
+
+static uint64_t
+modern_get_features(struct virtio_hw *hw)
+{
+	uint32_t features_lo, features_hi;
+
+	modern_write32(0, &hw->common_cfg->device_feature_select);
+	features_lo = modern_read32(&hw->common_cfg->device_feature);
+
+	modern_write32(1, &hw->common_cfg->device_feature_select);
+	features_hi = modern_read32(&hw->common_cfg->device_feature);
+
+	return ((uint64_t)(features_hi) << 32) | features_lo;
+}
+
+static void
+modern_set_features(struct virtio_hw *hw, uint64_t features)
+{
+	modern_write32(0, &hw->common_cfg->guest_feature_select);
+	modern_write32(features & ((1ULL<<32) - 1),
+		&hw->common_cfg->guest_feature);
+
+	modern_write32(1, &hw->common_cfg->guest_feature_select);
+	modern_write32(features >> 32,
+		&hw->common_cfg->guest_feature);
+}
+
+static uint8_t
+modern_get_status(struct virtio_hw *hw)
+{
+	return modern_read8(&hw->common_cfg->device_status);
+}
+
+static void
+modern_set_status(struct virtio_hw *hw, uint8_t status)
+{
+	modern_write8(status, &hw->common_cfg->device_status);
+}
+
+static void
+modern_reset(struct virtio_hw *hw)
+{
+	modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET);
+	modern_get_status(hw);
+}
+
+static uint8_t
+modern_get_isr(struct virtio_hw *hw __rte_unused)
+{
+	/* FIXME: xxx */
+	return 0;
+}
+
+static uint16_t
+modern_set_irq(struct virtio_hw *hw __rte_unused, uint16_t vec __rte_unused)
+{
+	/* FIXME: xxx */
+	return 0;
+}
+
+static uint16_t
+modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id)
+{
+	modern_write16(queue_id, &hw->common_cfg->queue_select);
+	return modern_read16(&hw->common_cfg->queue_size);
+}
+
+static void
+modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	uint64_t desc_addr, avail_addr, used_addr;
+	uint16_t notify_off;
+
+	desc_addr = vq->mz->phys_addr;
+	avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc);
+	used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail,
+							 ring[vq->vq_nentries]),
+				   VIRTIO_PCI_VRING_ALIGN);
+
+	modern_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	modern_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo,
+					  &hw->common_cfg->queue_desc_hi);
+	modern_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo,
+					   &hw->common_cfg->queue_avail_hi);
+	modern_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo,
+					  &hw->common_cfg->queue_used_hi);
+
+	notify_off = modern_read16(&hw->common_cfg->queue_notify_off);
+	vq->notify_addr = (void *)((uint8_t *)hw->notify_base +
+				notify_off * hw->notify_off_multiplier);
+
+	modern_write16(1, &hw->common_cfg->queue_enable);
+
+	PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index);
+	PMD_INIT_LOG(DEBUG, "\t desc_addr: %"PRIx64, desc_addr);
+	PMD_INIT_LOG(DEBUG, "\t aval_addr: %"PRIx64, avail_addr);
+	PMD_INIT_LOG(DEBUG, "\t used_addr: %"PRIx64, used_addr);
+	PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)",
+		vq->notify_addr, notify_off);
+}
+
+static void
+modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq)
+{
+	modern_write16(vq->vq_queue_index, &hw->common_cfg->queue_select);
+
+	modern_write64_twopart(0, &hw->common_cfg->queue_desc_lo,
+				  &hw->common_cfg->queue_desc_hi);
+	modern_write64_twopart(0, &hw->common_cfg->queue_avail_lo,
+				  &hw->common_cfg->queue_avail_hi);
+	modern_write64_twopart(0, &hw->common_cfg->queue_used_lo,
+				  &hw->common_cfg->queue_used_hi);
+
+	modern_write16(0, &hw->common_cfg->queue_enable);
+}
+
+static void
+modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq)
+{
+	modern_write16(1, vq->notify_addr);
+}
+
+static const struct virtio_pci_ops modern_ops = {
+	.read_dev_cfg	= modern_read_dev_config,
+	.write_dev_cfg	= modern_write_dev_config,
+	.reset		= modern_reset,
+	.get_status	= modern_get_status,
+	.set_status	= modern_set_status,
+	.get_features	= modern_get_features,
+	.set_features	= modern_set_features,
+	.get_isr	= modern_get_isr,
+	.set_irq	= modern_set_irq,
+	.get_queue_num	= modern_get_queue_num,
+	.setup_queue	= modern_setup_queue,
+	.del_queue	= modern_del_queue,
+	.notify_queue	= modern_notify_queue,
+};
+
+
 void
 vtpci_read_dev_config(struct virtio_hw *hw, uint64_t offset,
 		      void *dst, int length)
@@ -514,15 +714,126 @@  vtpci_irq_config(struct virtio_hw *hw, uint16_t vec)
 	return hw->vtpci_ops->set_irq(hw, vec);
 }
 
+static inline void *
+get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap)
+{
+	uint8_t  bar    = cap->bar;
+	uint32_t length = cap->length;
+	uint32_t offset = cap->offset;
+	uint8_t *base;
+
+	if (unlikely(bar > 5)) {
+		PMD_INIT_LOG(ERR, "invalid bar: %u", bar);
+		return NULL;
+	}
+
+	if (unlikely(offset + length > dev->mem_resource[bar].len)) {
+		PMD_INIT_LOG(ERR,
+			"invalid cap: overflows bar space: %u > %"PRIu64,
+			offset + length, dev->mem_resource[bar].len);
+		return NULL;
+	}
+
+	base = dev->mem_resource[bar].addr;
+	if (unlikely(base == NULL)) {
+		PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar);
+		return NULL;
+	}
+
+	return base + offset;
+}
+
+static int
+virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw)
+{
+	uint8_t pos;
+	struct virtio_pci_cap cap;
+	int ret;
+
+	ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST);
+	if (ret < 0) {
+		PMD_INIT_LOG(DEBUG, "failed to read pci capability list");
+		return -1;
+	}
+
+	while (pos) {
+		ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos);
+		if (ret < 0) {
+			PMD_INIT_LOG(ERR,
+				"failed to read pci cap at pos: %x", pos);
+			break;
+		}
+
+		if (cap.cap_vndr != PCI_CAP_ID_VNDR) {
+			PMD_INIT_LOG(DEBUG,
+				"[%2x] skipping non VNDR cap id: %02x",
+				pos, cap.cap_vndr);
+			goto next;
+		}
+
+		PMD_INIT_LOG(DEBUG,
+			"[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u",
+			pos, cap.cfg_type, cap.bar, cap.offset, cap.length);
+
+		switch (cap.cfg_type) {
+		case VIRTIO_PCI_CAP_COMMON_CFG:
+			hw->common_cfg = get_cfg_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_NOTIFY_CFG:
+			rte_eal_pci_read_config(dev, &hw->notify_off_multiplier,
+						4, pos + sizeof(cap));
+			hw->notify_base = get_cfg_addr(dev, &cap);
+			break;
+		case VIRTIO_PCI_CAP_DEVICE_CFG:
+			hw->dev_cfg = get_cfg_addr(dev, &cap);
+			break;
+		}
+
+	next:
+		pos = cap.cap_next;
+	}
+
+	if (hw->common_cfg == NULL || hw->notify_base == NULL ||
+	    hw->dev_cfg == NULL) {
+		PMD_INIT_LOG(INFO, "no modern virtio pci device found.");
+		return -1;
+	}
+
+	PMD_INIT_LOG(INFO, "found modern virtio pci device.");
+
+	PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg);
+	PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg);
+	PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u",
+		hw->notify_base, hw->notify_off_multiplier);
+
+	return 0;
+}
+
 int
 vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw)
 {
-	hw->vtpci_ops = &legacy_ops;
+	hw->dev = dev;
 
+	/*
+	 * Try if we can succeed reading virtio pci caps, which exists
+	 * only on modern pci device. If failed, we fallback to legacy
+	 * virtio hanlding.
+	 */
+	if (virtio_read_caps(dev, hw) == 0) {
+		PMD_INIT_LOG(INFO, "moderen virtio pci detected.");
+		hw->vtpci_ops = &modern_ops;
+		hw->modern    = 1;
+		return 0;
+	}
+
+	PMD_INIT_LOG(INFO, "trying with legacy virtio pci.");
 	if (legacy_virtio_resource_init(dev) < 0)
 		return -1;
+
+	hw->vtpci_ops = &legacy_ops;
 	hw->use_msix = legacy_virtio_has_msix(&dev->addr);
 	hw->io_base  = (uint32_t)(uintptr_t)dev->mem_resource[0].addr;
+	hw->modern   = 0;
 
 	return 0;
 }
diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h
index 0900cd0..8ff51d2 100644
--- a/drivers/net/virtio/virtio_pci.h
+++ b/drivers/net/virtio/virtio_pci.h
@@ -93,6 +93,7 @@  struct virtqueue;
 #define VIRTIO_CONFIG_STATUS_ACK       0x01
 #define VIRTIO_CONFIG_STATUS_DRIVER    0x02
 #define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04
+#define VIRTIO_CONFIG_STATUS_FEATURES_OK 0x08
 #define VIRTIO_CONFIG_STATUS_FAILED    0x80
 
 /*
@@ -141,6 +142,8 @@  struct virtqueue;
 /* We support indirect buffer descriptors */
 #define VIRTIO_RING_F_INDIRECT_DESC	28
 
+#define VIRTIO_F_VERSION_1		32
+
 /*
  * Some VirtIO feature bits (currently bits 28 through 31) are
  * reserved for the transport being used (eg. virtio_ring), the
@@ -163,6 +166,60 @@  struct virtqueue;
  */
 #define VIRTIO_MAX_VIRTQUEUES 8
 
+/* Common configuration */
+#define VIRTIO_PCI_CAP_COMMON_CFG	1
+/* Notifications */
+#define VIRTIO_PCI_CAP_NOTIFY_CFG	2
+/* ISR Status */
+#define VIRTIO_PCI_CAP_ISR_CFG		3
+/* Device specific configuration */
+#define VIRTIO_PCI_CAP_DEVICE_CFG	4
+/* PCI configuration access */
+#define VIRTIO_PCI_CAP_PCI_CFG		5
+
+/* This is the PCI capability header: */
+struct virtio_pci_cap {
+	uint8_t cap_vndr;		/* Generic PCI field: PCI_CAP_ID_VNDR */
+	uint8_t cap_next;		/* Generic PCI field: next ptr. */
+	uint8_t cap_len;		/* Generic PCI field: capability length */
+	uint8_t cfg_type;		/* Identifies the structure. */
+	uint8_t bar;			/* Where to find it. */
+	uint8_t padding[3];		/* Pad to full dword. */
+	uint32_t offset;		/* Offset within bar. */
+	uint32_t length;		/* Length of the structure, in bytes. */
+};
+
+struct virtio_pci_notify_cap {
+	struct virtio_pci_cap cap;
+	uint32_t notify_off_multiplier;	/* Multiplier for queue_notify_off. */
+};
+
+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */
+struct virtio_pci_common_cfg {
+	/* About the whole device. */
+	uint32_t device_feature_select;	/* read-write */
+	uint32_t device_feature;	/* read-only */
+	uint32_t guest_feature_select;	/* read-write */
+	uint32_t guest_feature;		/* read-write */
+	uint16_t msix_config;		/* read-write */
+	uint16_t num_queues;		/* read-only */
+	uint8_t device_status;		/* read-write */
+	uint8_t config_generation;	/* read-only */
+
+	/* About a specific virtqueue. */
+	uint16_t queue_select;		/* read-write */
+	uint16_t queue_size;		/* read-write, power of 2. */
+	uint16_t queue_msix_vector;	/* read-write */
+	uint16_t queue_enable;		/* read-write */
+	uint16_t queue_notify_off;	/* read-only */
+	uint32_t queue_desc_lo;		/* read-write */
+	uint32_t queue_desc_hi;		/* read-write */
+	uint32_t queue_avail_lo;	/* read-write */
+	uint32_t queue_avail_hi;	/* read-write */
+	uint32_t queue_used_lo;		/* read-write */
+	uint32_t queue_used_hi;		/* read-write */
+};
+
 struct virtio_hw;
 
 struct virtio_pci_ops {
@@ -188,6 +245,8 @@  struct virtio_pci_ops {
 	void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq);
 };
 
+struct virtio_net_config;
+
 struct virtio_hw {
 	struct virtqueue *cvq;
 	uint32_t    io_base;
@@ -198,7 +257,13 @@  struct virtio_hw {
 	uint8_t	    vlan_strip;
 	uint8_t	    use_msix;
 	uint8_t     started;
+	uint8_t     modern;
 	uint8_t     mac_addr[ETHER_ADDR_LEN];
+	uint32_t    notify_off_multiplier;
+	uint16_t    *notify_base;
+	struct rte_pci_device *dev;
+	struct virtio_pci_common_cfg *common_cfg;
+	struct virtio_net_config *dev_cfg;
 	const struct virtio_pci_ops *vtpci_ops;
 };
 
diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h
index d7fb450..99d4fa9 100644
--- a/drivers/net/virtio/virtqueue.h
+++ b/drivers/net/virtio/virtqueue.h
@@ -202,6 +202,8 @@  struct virtqueue {
 	/* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */
 	uint64_t	size_bins[8];
 
+	uint16_t	*notify_addr;
+
 	struct vq_desc_extra {
 		void              *cookie;
 		uint16_t          ndescs;