[dpdk-dev,v4,5/7] hv: poll mode driver

Message ID 1429637564-5656-6-git-send-email-stephen@networkplumber.org (mailing list archive)
State Changes Requested, archived
Headers

Commit Message

Stephen Hemminger April 21, 2015, 5:32 p.m. UTC
  From: Stephen Hemminger <shemming@brocade.com>

This is new Poll Mode driver for using hyper-v virtual network
interface.

Signed-off-by: Stas Egorov <segorov@mirantis.com>
Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/Makefile                          |    1 +
 lib/librte_pmd_hyperv/Makefile        |   28 +
 lib/librte_pmd_hyperv/hyperv.h        |  169 ++++
 lib/librte_pmd_hyperv/hyperv_drv.c    | 1653 +++++++++++++++++++++++++++++++++
 lib/librte_pmd_hyperv/hyperv_drv.h    |  558 +++++++++++
 lib/librte_pmd_hyperv/hyperv_ethdev.c |  332 +++++++
 lib/librte_pmd_hyperv/hyperv_logs.h   |   69 ++
 lib/librte_pmd_hyperv/hyperv_rxtx.c   |  403 ++++++++
 lib/librte_pmd_hyperv/hyperv_rxtx.h   |   35 +
 mk/rte.app.mk                         |    4 +
 10 files changed, 3252 insertions(+)
 create mode 100644 lib/librte_pmd_hyperv/Makefile
 create mode 100644 lib/librte_pmd_hyperv/hyperv.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_ethdev.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_logs.h
 create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.c
 create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.h
  

Comments

Siobhan Butler April 21, 2015, 7:34 p.m. UTC | #1
Hi Stephen 
Will you have documentation to go along with these changes?
Thanks
Siobhan

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Stephen
> Hemminger
> Sent: Tuesday, April 21, 2015 6:33 PM
> To: alexmay@microsoft.com
> Cc: dev@dpdk.org; Stas Egorov; Stephen Hemminger
> Subject: [dpdk-dev] [PATCH v4 5/7] hv: poll mode driver
> 
> From: Stephen Hemminger <shemming@brocade.com>
> 
> This is new Poll Mode driver for using hyper-v virtual network
> interface.
> 
> Signed-off-by: Stas Egorov <segorov@mirantis.com>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  lib/Makefile                          |    1 +
>  lib/librte_pmd_hyperv/Makefile        |   28 +
>  lib/librte_pmd_hyperv/hyperv.h        |  169 ++++
>  lib/librte_pmd_hyperv/hyperv_drv.c    | 1653
> +++++++++++++++++++++++++++++++++
>  lib/librte_pmd_hyperv/hyperv_drv.h    |  558 +++++++++++
>  lib/librte_pmd_hyperv/hyperv_ethdev.c |  332 +++++++
>  lib/librte_pmd_hyperv/hyperv_logs.h   |   69 ++
>  lib/librte_pmd_hyperv/hyperv_rxtx.c   |  403 ++++++++
>  lib/librte_pmd_hyperv/hyperv_rxtx.h   |   35 +
>  mk/rte.app.mk                         |    4 +
>  10 files changed, 3252 insertions(+)
>  create mode 100644 lib/librte_pmd_hyperv/Makefile
>  create mode 100644 lib/librte_pmd_hyperv/hyperv.h
>  create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.c
>  create mode 100644 lib/librte_pmd_hyperv/hyperv_drv.h
>  create mode 100644 lib/librte_pmd_hyperv/hyperv_ethdev.c
>  create mode 100644 lib/librte_pmd_hyperv/hyperv_logs.h
>  create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.c
>  create mode 100644 lib/librte_pmd_hyperv/hyperv_rxtx.h
> 
> diff --git a/lib/Makefile b/lib/Makefile
> index d94355d..6c1daf2 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -47,6 +47,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) +=
> librte_pmd_i40e
>  DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += librte_pmd_fm10k
>  DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += librte_pmd_mlx4
>  DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic
> +DIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += librte_pmd_hyperv
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
>  DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
> diff --git a/lib/librte_pmd_hyperv/Makefile
> b/lib/librte_pmd_hyperv/Makefile
> new file mode 100644
> index 0000000..4ba08c8
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/Makefile
> @@ -0,0 +1,28 @@
> +#   BSD LICENSE
> +#
> +#   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> +#   All rights reserved.
> +
> +include $(RTE_SDK)/mk/rte.vars.mk
> +
> +#
> +# library name
> +#
> +LIB = librte_pmd_hyperv.a
> +
> +CFLAGS += -O3
> +CFLAGS += $(WERROR_FLAGS)
> +
> +#
> +# all source are stored in SRCS-y
> +#
> +SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_ethdev.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_rxtx.c
> +SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_drv.c
> +
> +# this lib depends upon:
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_eal lib/librte_ether
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_mempool
> lib/librte_mbuf
> +DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_malloc
> +
> +include $(RTE_SDK)/mk/rte.lib.mk
> diff --git a/lib/librte_pmd_hyperv/hyperv.h
> b/lib/librte_pmd_hyperv/hyperv.h
> new file mode 100644
> index 0000000..5f66d8a
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv.h
> @@ -0,0 +1,169 @@
> +/*-
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + */
> +
> +#ifndef _HYPERV_H_
> +#define _HYPERV_H_
> +
> +#include <sys/param.h>
> +#include <rte_log.h>
> +#include <rte_debug.h>
> +#include <rte_ether.h>
> +#include <rte_ethdev.h>
> +#include <rte_malloc.h>
> +#include <rte_memzone.h>
> +#include <rte_cycles.h>
> +#include <rte_dev.h>
> +
> +#include "hyperv_logs.h"
> +
> +#define PAGE_SHIFT		12
> +#define PAGE_SIZE		(1 << PAGE_SHIFT)
> +
> +/*
> + * Tunable ethdev params
> + */
> +#define HV_MIN_RX_BUF_SIZE 1024
> +#define HV_MAX_RX_PKT_LEN  4096
> +#define HV_MAX_MAC_ADDRS   1
> +#define HV_MAX_RX_QUEUES   1
> +#define HV_MAX_TX_QUEUES   1
> +#define HV_MAX_PKT_BURST   32
> +#define HV_MAX_LINK_REQ    10
> +
> +/*
> + * List of resources mapped from kspace
> + * need to be the same as defined in hv_uio.c
> + */
> +enum {
> +	TXRX_RING_MAP,
> +	INT_PAGE_MAP,
> +	MON_PAGE_MAP,
> +	RECV_BUF_MAP
> +};
> +
> +/*
> + * Statistics
> + */
> +struct hv_stats {
> +	uint64_t opkts;
> +	uint64_t obytes;
> +	uint64_t oerrors;
> +
> +	uint64_t ipkts;
> +	uint64_t ibytes;
> +	uint64_t ierrors;
> +	uint64_t rx_nombuf;
> +};
> +
> +struct hv_data;
> +struct netvsc_packet;
> +struct rndis_msg;
> +typedef void (*receive_callback_t)(struct hv_data *hv, struct rndis_msg
> *msg,
> +		struct netvsc_packet *pkt);
> +
> +/*
> + * Main driver structure
> + */
> +struct hv_data {
> +	int vmbus_device;
> +	uint8_t monitor_bit;
> +	uint8_t monitor_group;
> +	uint8_t kernel_initialized;
> +	int uio_fd;
> +	/* Flag indicates channel state. If closed, RX/TX shouldn't work
> further */
> +	uint8_t closed;
> +	/* Flag indicates whether HALT rndis request was received by host */
> +	uint8_t hlt_req_sent;
> +	/* Flag indicates pending state for HALT request */
> +	uint8_t hlt_req_pending;
> +	/* Counter for RNDIS requests */
> +	uint32_t new_request_id;
> +	/* State of RNDIS device */
> +	uint8_t rndis_dev_state;
> +	/* Number of transmitted packets but not completed yet by Hyper-V
> */
> +	int num_outstanding_sends;
> +	/* Max pkt len to fit in rx mbufs */
> +	uint32_t max_rx_pkt_len;
> +
> +	uint8_t jumbo_frame_support;
> +
> +	struct hv_vmbus_ring_buffer *in;
> +	struct hv_vmbus_ring_buffer *out;
> +
> +	/* Size of each ring_buffer(in/out) */
> +	uint32_t rb_size;
> +	/* Size of data in each ring_buffer(in/out) */
> +	uint32_t rb_data_size;
> +
> +	void *int_page;
> +	struct hv_vmbus_monitor_page *monitor_pages;
> +	void *recv_interrupt_page;
> +	void *send_interrupt_page;
> +	void *ring_pages;
> +	void *recv_buf;
> +
> +	uint8_t link_req_cnt;
> +	uint32_t link_status;
> +	uint8_t  hw_mac_addr[ETHER_ADDR_LEN];
> +	struct rndis_request *req;
> +	struct netvsc_packet *netvsc_packet;
> +	struct nvsp_msg *rx_comp_msg;
> +	struct hv_rx_queue *rxq;
> +	struct hv_tx_queue *txq;
> +	struct hv_vm_packet_descriptor *desc;
> +	receive_callback_t receive_callback;
> +	int pkt_rxed;
> +
> +	uint32_t debug;
> +	struct hv_stats stats;
> +};
> +
> +/*
> + * Extern functions declarations
> + */
> +int hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
> +			 uint16_t queue_idx,
> +			 uint16_t nb_desc,
> +			 unsigned int socket_id,
> +			 const struct rte_eth_txconf *tx_conf);
> +
> +void hyperv_dev_tx_queue_release(void *ptxq);
> +
> +int hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
> +			 uint16_t queue_idx,
> +			 uint16_t nb_desc,
> +			 unsigned int socket_id,
> +			 const struct rte_eth_rxconf *rx_conf,
> +			 struct rte_mempool *mp);
> +
> +void hyperv_dev_rx_queue_release(void *prxq);
> +
> +uint16_t
> +hyperv_recv_pkts(void *prxq,
> +		 struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
> +
> +uint16_t
> +hyperv_xmit_pkts(void *ptxq,
> +		 struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
> +
> +int hv_rf_on_device_add(struct hv_data *hv);
> +int hv_rf_on_device_remove(struct hv_data *hv);
> +int hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt);
> +int hv_rf_on_open(struct hv_data *hv);
> +int hv_rf_on_close(struct hv_data *hv);
> +int hv_rf_set_device_mac(struct hv_data *hv, uint8_t *mac);
> +void hyperv_start_rx(struct hv_data *hv);
> +void hyperv_stop_rx(struct hv_data *hv);
> +int hyperv_get_buffer(struct hv_data *hv, void *buffer, uint32_t
> bufferlen);
> +void hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop);
> +uint8_t hyperv_get_link_status(struct hv_data *hv);
> +int hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t
> mcast);
> +
> +inline int rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> +		struct rte_eth_link *link);
> +inline int rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
> +		struct rte_eth_link *link);
> +
> +#endif /* _HYPERV_H_ */
> diff --git a/lib/librte_pmd_hyperv/hyperv_drv.c
> b/lib/librte_pmd_hyperv/hyperv_drv.c
> new file mode 100644
> index 0000000..4a37966
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_drv.c
> @@ -0,0 +1,1653 @@
> +/*-
> + * Copyright (c) 2009-2012 Microsoft Corp.
> + * Copyright (c) 2010-2012 Citrix Inc.
> + * Copyright (c) 2012 NetApp Inc.
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice unmodified, this list of conditions, and the following
> + *    disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
> OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> DISCLAIMED.
> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> (INCLUDING, BUT
> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE OF
> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + *
> + */
> +
> +#include "hyperv.h"
> +#include "hyperv_drv.h"
> +#include "hyperv_rxtx.h"
> +
> +#define LOOP_CNT 10000
> +#define MAC_STRLEN 14
> +#define MAC_PARAM_STR "NetworkAddress"
> +
> +#define hex "0123456789abcdef"
> +#define high(x) hex[(x & 0xf0) >> 4]
> +#define low(x) hex[x & 0x0f]
> +
> +static int hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt);
> +
> +/*
> + * Ring buffer
> + */
> +
> +/* Amount of space to write to */
> +#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) \
> +	(((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
> +
> +/*
> + * Get number of bytes available to read and to write to
> + * for the specified ring buffer
> + */
> +static inline void
> +get_ring_buffer_avail_bytes(
> +	struct hv_data               *hv,
> +	struct hv_vmbus_ring_buffer  *ring_buffer,
> +	uint32_t                     *read,
> +	uint32_t                     *write)
> +{
> +	rte_compiler_barrier();
> +
> +	/*
> +	 * Capture the read/write indices before they changed
> +	 */
> +	uint32_t read_loc = ring_buffer->read_index;
> +	uint32_t write_loc = ring_buffer->write_index;
> +
> +	*write = HV_BYTES_AVAIL_TO_WRITE(
> +			read_loc, write_loc, hv->rb_data_size);
> +	*read = hv->rb_data_size - *write;
> +}
> +
> +/*
> + * Helper routine to copy from source to ring buffer.
> + *
> + * Assume there is enough room. Handles wrap-around in dest case only!
> + */
> +static uint32_t
> +copy_to_ring_buffer(
> +	struct hv_vmbus_ring_buffer  *ring_buffer,
> +	uint32_t                     ring_buffer_size,
> +	uint32_t                     start_write_offset,
> +	char                         *src,
> +	uint32_t                     src_len)
> +{
> +	char *ring_buf = (char *)ring_buffer->buffer;
> +	uint32_t fragLen;
> +
> +	if (src_len > ring_buffer_size - start_write_offset)  {
> +		/* wrap-around detected! */
> +		fragLen = ring_buffer_size - start_write_offset;
> +		rte_memcpy(ring_buf + start_write_offset, src, fragLen);
> +		rte_memcpy(ring_buf, src + fragLen, src_len - fragLen);
> +	} else {
> +		rte_memcpy(ring_buf + start_write_offset, src, src_len);
> +	}
> +
> +	start_write_offset += src_len;
> +	start_write_offset %= ring_buffer_size;
> +
> +	return start_write_offset;
> +}
> +
> +/*
> + * Helper routine to copy to dest from ring buffer.
> + *
> + * Assume there is enough room. Handles wrap-around in src case only!
> + */
> +static uint32_t
> +copy_from_ring_buffer(
> +	struct hv_data               *hv,
> +	struct hv_vmbus_ring_buffer  *ring_buffer,
> +	char                         *dest,
> +	uint32_t                     dest_len,
> +	uint32_t                     start_read_offset)
> +{
> +	uint32_t fragLen;
> +	char *ring_buf = (char *)ring_buffer->buffer;
> +
> +	if (dest_len > hv->rb_data_size - start_read_offset) {
> +		/*  wrap-around detected at the src */
> +		fragLen = hv->rb_data_size - start_read_offset;
> +		rte_memcpy(dest, ring_buf + start_read_offset, fragLen);
> +		rte_memcpy(dest + fragLen, ring_buf, dest_len - fragLen);
> +	} else {
> +		rte_memcpy(dest, ring_buf + start_read_offset, dest_len);
> +	}
> +
> +	start_read_offset += dest_len;
> +	start_read_offset %= hv->rb_data_size;
> +
> +	return start_read_offset;
> +}
> +
> +/*
> + * Write to the ring buffer.
> + */
> +static int
> +hv_ring_buffer_write(
> +	struct hv_data                 *hv,
> +	struct hv_vmbus_sg_buffer_list sg_buffers[],
> +	uint32_t                       sg_buffer_count)
> +{
> +	struct hv_vmbus_ring_buffer *ring_buffer = hv->out;
> +	uint32_t i = 0;
> +	uint32_t byte_avail_to_write;
> +	uint32_t byte_avail_to_read;
> +	uint32_t total_bytes_to_write = 0;
> +	volatile uint32_t next_write_location;
> +	uint64_t prev_indices = 0;
> +
> +	for (i = 0; i < sg_buffer_count; i++)
> +		total_bytes_to_write += sg_buffers[i].length;
> +
> +	total_bytes_to_write += sizeof(uint64_t);
> +
> +	get_ring_buffer_avail_bytes(hv, ring_buffer, &byte_avail_to_read,
> +			&byte_avail_to_write);
> +
> +	/*
> +	 * If there is only room for the packet, assume it is full.
> +	 * Otherwise, the next time around, we think the ring buffer
> +	 * is empty since the read index == write index
> +	 */
> +	if (byte_avail_to_write <= total_bytes_to_write) {
> +		PMD_PERROR_LOG(hv, DBG_RB,
> +				"byte_avail_to_write = %u,
> total_bytes_to_write = %u",
> +				byte_avail_to_write, total_bytes_to_write);
> +		return -EAGAIN;
> +	}
> +
> +	/*
> +	 * Write to the ring buffer
> +	 */
> +	next_write_location = ring_buffer->write_index;
> +
> +	for (i = 0; i < sg_buffer_count; i++) {
> +		next_write_location = copy_to_ring_buffer(ring_buffer,
> +				hv->rb_data_size, next_write_location,
> +				(char *) sg_buffers[i].data,
> sg_buffers[i].length);
> +	}
> +
> +	/*
> +	 * Set previous packet start
> +	 */
> +	prev_indices = (uint64_t)ring_buffer->write_index << 32;
> +
> +	next_write_location = copy_to_ring_buffer(
> +			ring_buffer, hv->rb_data_size, next_write_location,
> +			(char *) &prev_indices, sizeof(uint64_t));
> +
> +	/*
> +	 * Make sure we flush all writes before updating the writeIndex
> +	 */
> +	rte_compiler_barrier();
> +
> +	/*
> +	 * Now, update the write location
> +	 */
> +	ring_buffer->write_index = next_write_location;
> +
> +	return 0;
> +}
> +
> +/*
> + * Read without advancing the read index.
> + */
> +static int
> +hv_ring_buffer_peek(struct hv_data  *hv, void *buffer, uint32_t
> buffer_len)
> +{
> +	struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
> +	uint32_t bytesAvailToWrite;
> +	uint32_t bytesAvailToRead;
> +
> +	get_ring_buffer_avail_bytes(hv, ring_buffer,
> +			&bytesAvailToRead,
> +			&bytesAvailToWrite);
> +
> +	/* Make sure there is something to read */
> +	if (bytesAvailToRead < buffer_len)
> +		return -EAGAIN;
> +
> +	copy_from_ring_buffer(hv, ring_buffer,
> +		(char *)buffer, buffer_len, ring_buffer->read_index);
> +
> +	return 0;
> +}
> +
> +/*
> + * Read and advance the read index.
> + */
> +static int
> +hv_ring_buffer_read(struct hv_data  *hv, void *buffer,
> +		    uint32_t buffer_len, uint32_t offset)
> +{
> +	struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
> +	uint32_t bytes_avail_to_write;
> +	uint32_t bytes_avail_to_read;
> +	uint32_t next_read_location = 0;
> +	uint64_t prev_indices = 0;
> +
> +	if (buffer_len <= 0)
> +		return -EINVAL;
> +
> +	get_ring_buffer_avail_bytes(
> +			hv,
> +			ring_buffer,
> +			&bytes_avail_to_read,
> +			&bytes_avail_to_write);
> +
> +	/*
> +	 * Make sure there is something to read
> +	 */
> +	if (bytes_avail_to_read < buffer_len) {
> +		PMD_PERROR_LOG(hv, DBG_RB, "bytes_avail_to_read =
> %u, buffer_len = %u",
> +				bytes_avail_to_read, buffer_len);
> +		return -EAGAIN;
> +	}
> +
> +	next_read_location = (ring_buffer->read_index + offset) % hv-
> >rb_data_size;
> +
> +	next_read_location = copy_from_ring_buffer(
> +			hv,
> +			ring_buffer,
> +			(char *) buffer,
> +			buffer_len,
> +			next_read_location);
> +
> +	next_read_location = copy_from_ring_buffer(
> +			hv,
> +			ring_buffer,
> +			(char *) &prev_indices,
> +			sizeof(uint64_t),
> +			next_read_location);
> +
> +	/*
> +	 * Make sure all reads are done before we update the read index
> since
> +	 * the writer may start writing to the read area once the read index
> +	 * is updated.
> +	 */
> +	rte_compiler_barrier();
> +
> +	/*
> +	 * Update the read index
> +	 */
> +	ring_buffer->read_index = next_read_location;
> +
> +	return 0;
> +}
> +
> +/*
> + * VMBus
> + */
> +
> +/*
> + * Retrieve the raw packet on the specified channel
> + */
> +static int
> +hv_vmbus_channel_recv_packet_raw(struct hv_data  *hv, void *buffer,
> +				 uint32_t        buffer_len,
> +				 uint32_t        *buffer_actual_len,
> +				 uint64_t        *request_id,
> +				 int             mode)
> +{
> +	int ret;
> +	uint32_t packetLen;
> +	struct hv_vm_packet_descriptor desc;
> +
> +	*buffer_actual_len = 0;
> +	*request_id = 0;
> +
> +	ret = hv_ring_buffer_peek(hv, &desc,
> +			sizeof(struct hv_vm_packet_descriptor));
> +
> +	if (ret != 0)
> +		return 0;
> +
> +	if ((desc.type ==
> HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES
> +				&& !(mode & 1)) ||
> +			((desc.type ==
> HV_VMBUS_PACKET_TYPE_COMPLETION) && !(mode & 2))) {
> +		return -1;
> +	}
> +
> +	packetLen = desc.length8 << 3;
> +
> +	*buffer_actual_len = packetLen;
> +
> +	if (unlikely(packetLen > buffer_len)) {
> +		PMD_PERROR_LOG(hv, DBG_RX, "The buffer desc is too big,
> will drop it");
> +		return -ENOMEM;
> +	}
> +
> +	*request_id = desc.transaction_id;
> +
> +	/* Copy over the entire packet to the user buffer */
> +	ret = hv_ring_buffer_read(hv, buffer, packetLen, 0);
> +
> +	return 0;
> +}
> +
> +/*
> + * Trigger an event notification on the specified channel
> + */
> +static void
> +vmbus_channel_set_event(struct hv_data *hv)
> +{
> +	/* Here we assume that channel->offer_msg.monitor_allocated ==
> 1,
> +	 * in another case our driver will not work */
> +	/* Each uint32_t represents 32 channels */
> +	__sync_or_and_fetch(((uint32_t *)hv->send_interrupt_page
> +		+ ((hv->vmbus_device >> 5))), 1 << (hv->vmbus_device &
> 31)
> +	);
> +	__sync_or_and_fetch((uint32_t *)&hv->monitor_pages->
> +			trigger_group[hv->monitor_group].u.pending, 1 <<
> hv->monitor_bit);
> +}
> +
> +/**
> + * @brief Send the specified buffer on the given channel
> + */
> +static int
> +hv_vmbus_channel_send_packet(struct hv_data *hv, void *buffer,
> +			     uint32_t buffer_len, uint64_t request_id,
> +			     enum hv_vmbus_packet_type type,
> +			     uint32_t flags)
> +{
> +	struct hv_vmbus_sg_buffer_list buffer_list[3];
> +	struct hv_vm_packet_descriptor desc;
> +	uint32_t packet_len_aligned;
> +	uint64_t aligned_data;
> +	uint32_t packet_len;
> +	int ret = 0;
> +	uint32_t old_write = hv->out->write_index;
> +
> +	packet_len = sizeof(struct hv_vm_packet_descriptor) + buffer_len;
> +	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
> +	aligned_data = 0;
> +
> +	/* Setup the descriptor */
> +	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;
> */
> +	desc.flags = flags; /*
> HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
> +	/* in 8-bytes granularity */
> +	desc.data_offset8 = sizeof(struct hv_vm_packet_descriptor) >> 3;
> +	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
> +	desc.transaction_id = request_id;
> +
> +	buffer_list[0].data = &desc;
> +	buffer_list[0].length = sizeof(struct hv_vm_packet_descriptor);
> +
> +	buffer_list[1].data = buffer;
> +	buffer_list[1].length = buffer_len;
> +
> +	buffer_list[2].data = &aligned_data;
> +	buffer_list[2].length = packet_len_aligned - packet_len;
> +
> +	ret = hv_ring_buffer_write(hv, buffer_list, 3);
> +
> +	rte_mb();
> +	if (!ret && !hv->out->interrupt_mask && hv->out->read_index ==
> old_write)
> +		vmbus_channel_set_event(hv);
> +
> +	return ret;
> +}
> +
> +/*
> + * Send a range of single-page buffer packets using
> + * a GPADL Direct packet type
> + */
> +static int
> +hv_vmbus_channel_send_packet_pagebuffer(
> +	struct hv_data  *hv,
> +	struct hv_vmbus_page_buffer	page_buffers[],
> +	uint32_t		page_count,
> +	void			*buffer,
> +	uint32_t		buffer_len,
> +	uint64_t		request_id)
> +{
> +
> +	int ret = 0;
> +	uint32_t packet_len, packetLen_aligned, descSize, i = 0;
> +	struct hv_vmbus_sg_buffer_list buffer_list[3];
> +	struct hv_vmbus_channel_packet_page_buffer desc;
> +	uint64_t alignedData = 0;
> +	uint32_t old_write = hv->out->write_index;
> +
> +	if (page_count > HV_MAX_PAGE_BUFFER_COUNT) {
> +		PMD_PERROR_LOG(hv, DBG_VMBUS, "page_count %u goes
> out of the limit",
> +				page_count);
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * Adjust the size down since
> hv_vmbus_channel_packet_page_buffer
> +	 * is the largest size we support
> +	 */
> +	descSize = sizeof(struct hv_vmbus_channel_packet_page_buffer) -
> +		((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
> +		 sizeof(struct hv_vmbus_page_buffer));
> +	packet_len = descSize + buffer_len;
> +	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
> +
> +	/* Setup the descriptor */
> +	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
> +	desc.flags =
> HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
> +	desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
> +	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
> +	desc.transaction_id = request_id;
> +	desc.range_count = page_count;
> +
> +	for (i = 0; i < page_count; i++) {
> +		desc.range[i].length = page_buffers[i].length;
> +		desc.range[i].offset = page_buffers[i].offset;
> +		desc.range[i].pfn = page_buffers[i].pfn;
> +	}
> +
> +	buffer_list[0].data = &desc;
> +	buffer_list[0].length = descSize;
> +
> +	buffer_list[1].data = buffer;
> +	buffer_list[1].length = buffer_len;
> +
> +	buffer_list[2].data = &alignedData;
> +	buffer_list[2].length = packetLen_aligned - packet_len;
> +
> +	ret = hv_ring_buffer_write(hv, buffer_list, 3);
> +	if (likely(ret == 0))
> +		++hv->num_outstanding_sends;
> +
> +	rte_mb();
> +	if (!ret && !hv->out->interrupt_mask &&
> +			hv->out->read_index == old_write)
> +		vmbus_channel_set_event(hv);
> +
> +	return ret;
> +}
> +
> +/*
> + * NetVSC
> + */
> +
> +/*
> + * Net VSC on send
> + * Sends a packet on the specified Hyper-V device.
> + * Returns 0 on success, non-zero on failure.
> + */
> +static int
> +hv_nv_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
> +{
> +	struct nvsp_msg send_msg;
> +	int ret;
> +
> +	send_msg.msg_type = nvsp_msg_1_type_send_rndis_pkt;
> +	if (pkt->is_data_pkt) {
> +		/* 0 is RMC_DATA */
> +		send_msg.msgs.send_rndis_pkt.chan_type = 0;
> +	} else {
> +		/* 1 is RMC_CONTROL */
> +		send_msg.msgs.send_rndis_pkt.chan_type = 1;
> +	}
> +
> +	/* Not using send buffer section */
> +	send_msg.msgs.send_rndis_pkt.send_buf_section_idx =
> +	    0xFFFFFFFF;
> +	send_msg.msgs.send_rndis_pkt.send_buf_section_size = 0;
> +
> +	if (likely(pkt->page_buf_count)) {
> +		ret = hv_vmbus_channel_send_packet_pagebuffer(hv,
> +				pkt->page_buffers, pkt->page_buf_count,
> +				&send_msg, sizeof(struct nvsp_msg),
> +				(uint64_t)pkt->is_data_pkt ? (hv->txq-
> >tx_tail + 1) : 0);
> +	} else {
> +		PMD_PERROR_LOG(hv, DBG_TX, "pkt->page_buf_count
> value can't be zero");
> +		ret = -1;
> +	}
> +
> +	return ret;
> +}
> +
> +/*
> + * Net VSC on receive
> + *
> + * This function deals exclusively with virtual addresses.
> + */
> +static void
> +hv_nv_on_receive(struct hv_data *hv, struct hv_vm_packet_descriptor
> *pkt)
> +{
> +	struct hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
> +	struct nvsp_msg *nvsp_msg_pkt;
> +	struct netvsc_packet *net_vsc_pkt = NULL;
> +	unsigned long start;
> +	int count, i;
> +
> +	nvsp_msg_pkt = (struct nvsp_msg *)((unsigned long)pkt
> +			+ (pkt->data_offset8 << 3));
> +
> +	/* Make sure this is a valid nvsp packet */
> +	if (unlikely(nvsp_msg_pkt->msg_type !=
> nvsp_msg_1_type_send_rndis_pkt)) {
> +		PMD_PERROR_LOG(hv, DBG_RX, "NVSP packet is not valid");
> +		return;
> +	}
> +
> +	vm_xfer_page_pkt = (struct hv_vm_transfer_page_packet_header
> *)pkt;
> +
> +	if (unlikely(vm_xfer_page_pkt->transfer_page_set_id
> +			!= NETVSC_RECEIVE_BUFFER_ID)) {
> +		PMD_PERROR_LOG(hv, DBG_RX, "transfer_page_set_id is
> not valid");
> +		return;
> +	}
> +
> +	count = vm_xfer_page_pkt->range_count;
> +
> +	/*
> +	 * Initialize the netvsc packet
> +	 */
> +	for (i = 0; i < count; ++i) {
> +		net_vsc_pkt = hv->netvsc_packet;
> +
> +		net_vsc_pkt->tot_data_buf_len =
> +			vm_xfer_page_pkt->ranges[i].byte_count;
> +		net_vsc_pkt->page_buf_count = 1;
> +
> +		net_vsc_pkt->page_buffers[0].length =
> +			vm_xfer_page_pkt->ranges[i].byte_count;
> +
> +		/* The virtual address of the packet in the receive buffer */
> +		start = ((unsigned long)hv->recv_buf +
> +				vm_xfer_page_pkt->ranges[i].byte_offset);
> +
> +		/* Page number of the virtual page containing packet start */
> +		net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
> +
> +		/* Calculate the page relative offset */
> +		net_vsc_pkt->page_buffers[0].offset =
> +			vm_xfer_page_pkt->ranges[i].byte_offset &
> (PAGE_SIZE - 1);
> +
> +		/*
> +		 * In this implementation, we are dealing with virtual
> +		 * addresses exclusively.  Since we aren't using physical
> +		 * addresses at all, we don't care if a packet crosses a
> +		 * page boundary.  For this reason, the original code to
> +		 * check for and handle page crossings has been removed.
> +		 */
> +
> +		/*
> +		 * Pass it to the upper layer.  The receive completion call
> +		 * has been moved into this function.
> +		 */
> +		hv_rf_on_receive(hv, net_vsc_pkt);
> +	}
> +	/* Send a receive completion packet to RNDIS device (ie NetVsp) */
> +	hv_vmbus_channel_send_packet(hv, hv->rx_comp_msg,
> sizeof(struct nvsp_msg),
> +			vm_xfer_page_pkt->d.transaction_id,
> +			HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
> +}
> +
> +/*
> + * Net VSC on send completion
> + */
> +static void
> +hv_nv_on_send_completion(struct hv_data *hv, struct
> hv_vm_packet_descriptor *pkt)
> +{
> +	struct nvsp_msg *nvsp_msg_pkt;
> +
> +	nvsp_msg_pkt =
> +	    (struct nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 <<
> 3));
> +
> +	if (likely(nvsp_msg_pkt->msg_type ==
> +
> 	nvsp_msg_1_type_send_rndis_pkt_complete)) {
> +
> +		if (unlikely(hv->hlt_req_pending))
> +			hv->hlt_req_sent = 1;
> +		else
> +			if (pkt->transaction_id)
> +				++hv->txq->tx_free;
> +		--hv->num_outstanding_sends;
> +		return;
> +	}
> +	PMD_PINFO_LOG(hv, DBG_TX, "unhandled completion (for kernel
> req or so)");
> +}
> +
> +/*
> + * Analogue of bsd hv_nv_on_channel_callback
> + */
> +static void
> +hv_nv_complete_request(struct hv_data *hv, struct rndis_request
> *request)
> +{
> +	uint32_t bytes_rxed, cnt = 0;
> +	uint64_t request_id;
> +	struct hv_vm_packet_descriptor *desc;
> +	uint8_t *buffer;
> +	int     bufferlen = NETVSC_PACKET_SIZE;
> +	int     ret = 0;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	hv->req = request;
> +
> +	buffer = rte_malloc(NULL, bufferlen, RTE_CACHE_LINE_SIZE);
> +	if (!buffer) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "failed to allocate
> packet");
> +		return;
> +	}
> +
> +	do {
> +		rte_delay_us(1);
> +		ret = hv_vmbus_channel_recv_packet_raw(hv,
> +				buffer, bufferlen, &bytes_rxed,
> &request_id, 3);
> +		if (ret == 0) {
> +			if (bytes_rxed > 0) {
> +				desc = (struct hv_vm_packet_descriptor
> *)buffer;
> +
> +				switch (desc->type) {
> +				case
> HV_VMBUS_PACKET_TYPE_COMPLETION:
> +					hv_nv_on_send_completion(hv,
> desc);
> +					break;
> +				case
> HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
> +					hv_nv_on_receive(hv, desc);
> +					break;
> +				default:
> +					break;
> +				}
> +				PMD_PDEBUG_LOG(hv, DBG_LOAD,
> +					       "Did %d attempts until non-empty
> data was receieved",
> +					       cnt);
> +				cnt = 0;
> +			} else {
> +				cnt++;
> +			}
> +		} else if (ret == -ENOMEM) {
> +			/* Handle large packet */
> +			PMD_PDEBUG_LOG(hv, DBG_LOAD,
> +				       "recv_packet_raw returned -ENOMEM");
> +			rte_free(buffer);
> +			buffer = rte_malloc(NULL, bytes_rxed,
> RTE_CACHE_LINE_SIZE);
> +			if (buffer == NULL) {
> +				PMD_PERROR_LOG(hv, DBG_LOAD, "failed
> to allocate buffer");
> +				break;
> +			}
> +			bufferlen = bytes_rxed;
> +		} else {
> +			PMD_PERROR_LOG(hv, DBG_LOAD, "Unexpected
> return code (%d)", ret);
> +		}
> +		if (!hv->req) {
> +			PMD_PINFO_LOG(hv, DBG_LOAD, "Single request
> processed");
> +			break;
> +		}
> +		if (cnt >= LOOP_CNT) {
> +			PMD_PERROR_LOG(hv, DBG_LOAD, "Emergency
> break from the loop");
> +			break;
> +		}
> +		if (hv->hlt_req_sent) {
> +			PMD_PINFO_LOG(hv, DBG_LOAD, "Halt request
> processed");
> +			break;
> +		}
> +		/* The field hv->req->response_msg.ndis_msg_type
> +		 * should be set to non-zero value when response received
> +		 */
> +	} while (!hv->req->response_msg.ndis_msg_type);
> +
> +	rte_free(buffer);
> +}
> +
> +/*
> + * RNDIS
> + */
> +
> +/*
> + * Create new RNDIS request
> + */
> +static inline struct rndis_request *
> +hv_rndis_request(struct hv_data *hv, uint32_t message_type,
> +		uint32_t message_length)
> +{
> +	struct rndis_request *request;
> +	struct rndis_msg *rndis_mesg;
> +	struct rndis_set_request *set;
> +	char mz_name[RTE_MEMZONE_NAMESIZE];
> +	uint32_t size;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	request = rte_zmalloc("rndis_req", sizeof(struct rndis_request),
> +			      RTE_CACHE_LINE_SIZE);
> +
> +	if (!request)
> +		return NULL;
> +
> +	sprintf(mz_name, "hv_%d_%u_%d_%p", hv->vmbus_device,
> message_type,
> +			hv->new_request_id, request);
> +
> +	size = MAX(message_length, sizeof(struct rndis_msg));
> +
> +	request->request_msg_memzone =
> rte_memzone_reserve_aligned(mz_name,
> +			size, rte_lcore_to_socket_id(rte_lcore_id()), 0,
> PAGE_SIZE);
> +	if (!request->request_msg_memzone) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "memzone_reserve
> failed");
> +		rte_free(request);
> +		return NULL;
> +	}
> +	request->request_msg = request->request_msg_memzone->addr;
> +	rndis_mesg = request->request_msg;
> +	rndis_mesg->ndis_msg_type = message_type;
> +	rndis_mesg->msg_len = message_length;
> +
> +	/*
> +	 * Set the request id. This field is always after the rndis header
> +	 * for request/response packet types so we just use the set_request
> +	 * as a template.
> +	 */
> +	set = &rndis_mesg->msg.set_request;
> +	hv->new_request_id++;
> +	set->request_id = hv->new_request_id;
> +
> +	return request;
> +}
> +
> +/*
> + * RNDIS filter
> + */
> +
> +static void
> +hv_rf_receive_response(
> +	struct hv_data       *hv,
> +	struct rndis_msg     *response)
> +{
> +	struct rndis_request *request = hv->req;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	if (response->msg_len <= sizeof(struct rndis_msg)) {
> +		rte_memcpy(&request->response_msg, response,
> +				response->msg_len);
> +	} else {
> +		if (response->ndis_msg_type ==
> REMOTE_NDIS_INITIALIZE_CMPLT) {
> +			request->response_msg.msg.init_complete.status =
> +				STATUS_BUFFER_OVERFLOW;
> +		}
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "response buffer
> overflow\n");
> +	}
> +}
> +
> +/*
> + * RNDIS filter receive indicate status
> + */
> +static void
> +hv_rf_receive_indicate_status(struct hv_data *hv, struct rndis_msg
> *response)
> +{
> +	struct rndis_indicate_status *indicate = &response-
> >msg.indicate_status;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT)
> +		hv->link_status = 1;
> +	else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT)
> +		hv->link_status = 0;
> +	else if (indicate->status == RNDIS_STATUS_INVALID_DATA)
> +		PMD_PERROR_LOG(hv, DBG_RX, "Invalid data in RNDIS
> message");
> +	else
> +		PMD_PERROR_LOG(hv, DBG_RX, "Unsupported status: %u",
> indicate->status);
> +}
> +
> +/*
> + * RNDIS filter receive data
> + */
> +static void
> +hv_rf_receive_data(struct hv_data *hv, struct rndis_msg *msg,
> +		struct netvsc_packet *pkt)
> +{
> +	struct rte_mbuf *m_new;
> +	struct hv_rx_queue *rxq = hv->rxq;
> +	struct rndis_packet *rndis_pkt;
> +	uint32_t data_offset;
> +
> +	if (unlikely(hv->closed))
> +		return;
> +
> +	rndis_pkt = &msg->msg.packet;
> +
> +	if (unlikely(hv->max_rx_pkt_len < rndis_pkt->data_length)) {
> +		PMD_PWARN_LOG(hv, DBG_RX, "Packet is too large (%db),
> dropping.",
> +				rndis_pkt->data_length);
> +		++hv->stats.ierrors;
> +		return;
> +	}
> +
> +	/* Remove rndis header, then pass data packet up the stack */
> +	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
> +
> +	/* L2 frame length, with L2 header, not including CRC */
> +	pkt->tot_data_buf_len        = rndis_pkt->data_length;
> +	pkt->page_buffers[0].offset += data_offset;
> +	/* Buffer length now L2 frame length plus trailing junk */
> +	pkt->page_buffers[0].length -= data_offset;
> +
> +	pkt->vlan_tci = 0;
> +
> +	/*
> +	 * Just put data into appropriate mbuf, all further work will be done
> +	 * by the upper layer (mbuf replacement, index adjustment, etc)
> +	 */
> +	m_new = rxq->sw_ring[rxq->rx_tail];
> +	if (++rxq->rx_tail == rxq->nb_rx_desc)
> +		rxq->rx_tail = 0;
> +
> +	/*
> +	 * Copy the received packet to mbuf.
> +	 * The copy is required since the memory pointed to by
> netvsc_packet
> +	 * cannot be reallocated
> +	 */
> +	uint8_t *vaddr = (uint8_t *)
> +		(pkt->page_buffers[0].pfn << PAGE_SHIFT)
> +		+ pkt->page_buffers[0].offset;
> +
> +	m_new->nb_segs = 1;
> +	m_new->pkt_len = m_new->data_len = pkt->tot_data_buf_len;
> +	rte_memcpy(rte_pktmbuf_mtod(m_new, void *), vaddr, m_new-
> >data_len);
> +
> +	if (pkt->vlan_tci) {
> +		m_new->vlan_tci = pkt->vlan_tci;
> +		m_new->ol_flags |= PKT_RX_VLAN_PKT;
> +	}
> +
> +	hv->pkt_rxed = 1;
> +}
> +
> +/*
> + * RNDIS filter receive data, jumbo frames support
> + */
> +static void
> +hv_rf_receive_data_sg(struct hv_data *hv, struct rndis_msg *msg,
> +		struct netvsc_packet *pkt)
> +{
> +	struct rte_mbuf *m_new;
> +	struct hv_rx_queue *rxq = hv->rxq;
> +	struct rndis_packet *rndis_pkt;
> +	uint32_t data_offset;
> +
> +	if (unlikely(hv->closed))
> +		return;
> +
> +	rndis_pkt = &msg->msg.packet;
> +
> +	/* Remove rndis header, then pass data packet up the stack */
> +	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
> +
> +	/* L2 frame length, with L2 header, not including CRC */
> +	pkt->tot_data_buf_len        = rndis_pkt->data_length;
> +	pkt->page_buffers[0].offset += data_offset;
> +	/* Buffer length now L2 frame length plus trailing junk */
> +	pkt->page_buffers[0].length -= data_offset;
> +
> +	pkt->vlan_tci = 0;
> +
> +	/*
> +	 * Just put data into appropriate mbuf, all further work will be done
> +	 * by the upper layer (mbuf replacement, index adjustment, etc)
> +	 */
> +	m_new = rxq->sw_ring[rxq->rx_tail];
> +	if (++rxq->rx_tail == rxq->nb_rx_desc)
> +		rxq->rx_tail = 0;
> +
> +	/*
> +	 * Copy the received packet to mbuf.
> +	 * The copy is required since the memory pointed to by
> netvsc_packet
> +	 * cannot be reallocated
> +	 */
> +	uint8_t *vaddr = (uint8_t *)
> +		(pkt->page_buffers[0].pfn << PAGE_SHIFT)
> +		+ pkt->page_buffers[0].offset;
> +
> +	/* Scatter-gather emulation */
> +	uint32_t carry_len = pkt->tot_data_buf_len;
> +	struct rte_mbuf *m_next;
> +
> +	m_new->pkt_len = carry_len;
> +	m_new->nb_segs = (carry_len - 1) / hv->max_rx_pkt_len + 1;
> +
> +	while (1) {
> +		m_new->data_len = MIN(carry_len, hv->max_rx_pkt_len);
> +		rte_memcpy(rte_pktmbuf_mtod(m_new, void *),
> +			   vaddr, m_new->data_len);
> +		vaddr += m_new->data_len;
> +
> +		if (carry_len <= hv->max_rx_pkt_len)
> +			break;
> +
> +		carry_len -= hv->max_rx_pkt_len;
> +		m_next = rxq->sw_ring[rxq->rx_tail];
> +		if (++rxq->rx_tail == rxq->nb_rx_desc)
> +			rxq->rx_tail = 0;
> +		m_new->next = m_next;
> +		m_new = m_next;
> +	}
> +
> +	if (pkt->vlan_tci) {
> +		m_new->vlan_tci = pkt->vlan_tci;
> +		m_new->ol_flags |= PKT_RX_VLAN_PKT;
> +	}
> +
> +	hv->pkt_rxed = 1;
> +}
> +
> +static int
> +hv_rf_send_request(struct hv_data *hv, struct rndis_request *request)
> +{
> +	struct netvsc_packet *packet;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	/* Set up the packet to send it */
> +	packet = &request->pkt;
> +
> +	packet->is_data_pkt = 0;
> +	packet->tot_data_buf_len = request->request_msg->msg_len;
> +	packet->page_buf_count = 1;
> +
> +	packet->page_buffers[0].pfn =
> +		(request->request_msg_memzone->phys_addr) >>
> PAGE_SHIFT;
> +	packet->page_buffers[0].length = request->request_msg->msg_len;
> +	packet->page_buffers[0].offset =
> +	    (unsigned long)request->request_msg & (PAGE_SIZE - 1);
> +
> +	return hv_nv_on_send(hv, packet);
> +}
> +
> +static void u8_to_u16(const char *src, int len, char *dst)
> +{
> +	int i;
> +
> +	for (i = 0; i < len; ++i) {
> +		dst[2 * i] = src[i];
> +		dst[2 * i + 1] = 0;
> +	}
> +}
> +
> +int
> +hv_rf_set_device_mac(struct hv_data *hv, uint8_t *macaddr)
> +{
> +	struct rndis_request *request;
> +	struct rndis_set_request *set_request;
> +	struct rndis_config_parameter_info *info;
> +	struct rndis_set_complete *set_complete;
> +	char mac_str[2*ETHER_ADDR_LEN+1];
> +	wchar_t *param_value, *param_name;
> +	uint32_t status;
> +	uint32_t message_len = sizeof(struct rndis_config_parameter_info)
> +
> +		2 * MAC_STRLEN + 4 * ETHER_ADDR_LEN;
> +	int ret, i;
> +
> +	request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
> +		RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
> message_len);
> +	if (!request)
> +		return -ENOMEM;
> +
> +	set_request = &request->request_msg->msg.set_request;
> +	set_request->oid = RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER;
> +	set_request->device_vc_handle = 0;
> +	set_request->info_buffer_offset = sizeof(struct rndis_set_request);
> +	set_request->info_buffer_length = message_len;
> +
> +	info = (struct rndis_config_parameter_info *)((ulong)set_request +
> +		set_request->info_buffer_offset);
> +	info->parameter_type = RNDIS_CONFIG_PARAM_TYPE_STRING;
> +	info->parameter_name_offset =
> +		sizeof(struct rndis_config_parameter_info);
> +	info->parameter_name_length = 2 * MAC_STRLEN;
> +	info->parameter_value_offset =
> +		info->parameter_name_offset + info-
> >parameter_name_length;
> +	/* Multiply by 2 because of string representation and by 2
> +	 * because of utf16 representation
> +	 */
> +	info->parameter_value_length = 4 * ETHER_ADDR_LEN;
> +	param_name = (wchar_t *)((ulong)info + info-
> >parameter_name_offset);
> +	param_value = (wchar_t *)((ulong)info + info-
> >parameter_value_offset);
> +
> +	u8_to_u16(MAC_PARAM_STR, MAC_STRLEN, (char *)param_name);
> +	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
> +		mac_str[2*i] = high(macaddr[i]);
> +		mac_str[2*i+1] = low(macaddr[i]);
> +	}
> +
> +	u8_to_u16((const char *)mac_str, 2 * ETHER_ADDR_LEN, (char
> *)param_value);
> +
> +	ret = hv_rf_send_request(hv, request);
> +	if (ret)
> +		goto cleanup;
> +
> +	request->response_msg.msg.set_complete.status = 0xFFFF;
> +	hv_nv_complete_request(hv, request);
> +	set_complete = &request->response_msg.msg.set_complete;
> +	if (set_complete->status == 0xFFFF) {
> +		/* Host is not responding, we can't free request in this case
> */
> +		ret = -1;
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "Host is not
> responding");
> +		goto exit;
> +	}
> +	/* Response received, check status */
> +	status = set_complete->status;
> +	if (status) {
> +		/* Bad response status, return error */
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "set_complete->status
> = %u\n", status);
> +		ret = -EINVAL;
> +	}
> +
> +cleanup:
> +	rte_free(request);
> +exit:
> +	return ret;
> +}
> +
> +/*
> + * RNDIS filter on receive
> + */
> +static int
> +hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt)
> +{
> +	struct rndis_msg rndis_mesg;
> +	struct rndis_msg *rndis_hdr;
> +
> +	/* Shift virtual page number to form virtual page address */
> +	rndis_hdr = (struct rndis_msg *)(pkt->page_buffers[0].pfn <<
> PAGE_SHIFT);
> +
> +	rndis_hdr = (void *)((unsigned long)rndis_hdr
> +			+ pkt->page_buffers[0].offset);
> +
> +	/*
> +	 * Make sure we got a valid rndis message
> +	 * Fixme:  There seems to be a bug in set completion msg where
> +	 * its msg_len is 16 bytes but the byte_count field in the
> +	 * xfer page range shows 52 bytes
> +	 */
> +	if (unlikely(pkt->tot_data_buf_len != rndis_hdr->msg_len)) {
> +		++hv->stats.ierrors;
> +		PMD_PERROR_LOG(hv, DBG_RX,
> +			       "invalid rndis message? (expected %u "
> +			       "bytes got %u)... dropping this message",
> +			       rndis_hdr->msg_len, pkt->tot_data_buf_len);
> +		return -1;
> +	}
> +
> +	rte_memcpy(&rndis_mesg, rndis_hdr,
> +	    (rndis_hdr->msg_len > sizeof(struct rndis_msg)) ?
> +	    sizeof(struct rndis_msg) : rndis_hdr->msg_len);
> +
> +	switch (rndis_mesg.ndis_msg_type) {
> +
> +	/* data message */
> +	case REMOTE_NDIS_PACKET_MSG:
> +		hv->receive_callback(hv, &rndis_mesg, pkt);
> +		break;
> +	/* completion messages */
> +	case REMOTE_NDIS_INITIALIZE_CMPLT:
> +	case REMOTE_NDIS_QUERY_CMPLT:
> +	case REMOTE_NDIS_SET_CMPLT:
> +	case REMOTE_NDIS_RESET_CMPLT:
> +	case REMOTE_NDIS_KEEPALIVE_CMPLT:
> +		hv_rf_receive_response(hv, &rndis_mesg);
> +		break;
> +	/* notification message */
> +	case REMOTE_NDIS_INDICATE_STATUS_MSG:
> +		hv_rf_receive_indicate_status(hv, &rndis_mesg);
> +		break;
> +	default:
> +		PMD_PERROR_LOG(hv, DBG_RX, "hv_rf_on_receive():
> Unknown msg_type 0x%x",
> +		    rndis_mesg.ndis_msg_type);
> +		break;
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * RNDIS filter on send
> + */
> +int
> +hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
> +{
> +	struct rndis_msg *rndis_mesg;
> +	struct rndis_packet *rndis_pkt;
> +	uint32_t rndis_msg_size;
> +
> +	/* Add the rndis header */
> +	rndis_mesg = (struct rndis_msg *)pkt->extension;
> +
> +	memset(rndis_mesg, 0, sizeof(struct rndis_msg));
> +
> +	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
> +
> +	rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
> +	rndis_mesg->msg_len = pkt->tot_data_buf_len + rndis_msg_size;
> +
> +	rndis_pkt = &rndis_mesg->msg.packet;
> +	rndis_pkt->data_offset = sizeof(struct rndis_packet);
> +	rndis_pkt->data_length = pkt->tot_data_buf_len;
> +
> +	pkt->is_data_pkt = 1;
> +
> +	/*
> +	 * Invoke netvsc send.  If return status is bad, the caller now
> +	 * resets the context pointers before retrying.
> +	 */
> +	return hv_nv_on_send(hv, pkt);
> +}
> +
> +static int
> +hv_rf_init_device(struct hv_data *hv)
> +{
> +	struct rndis_request *request;
> +	struct rndis_initialize_request *init;
> +	struct rndis_initialize_complete *init_complete;
> +	uint32_t status;
> +	int ret;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	request = hv_rndis_request(hv, REMOTE_NDIS_INITIALIZE_MSG,
> +	    RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
> +	if (!request) {
> +		ret = -1;
> +		goto cleanup;
> +	}
> +
> +	/* Set up the rndis set */
> +	init = &request->request_msg->msg.init_request;
> +	init->major_version = RNDIS_MAJOR_VERSION;
> +	init->minor_version = RNDIS_MINOR_VERSION;
> +	/*
> +	 * Per the RNDIS document, this should be set to the max MTU
> +	 * plus the header size.  However, 2048 works fine, so leaving
> +	 * it as is.
> +	 */
> +	init->max_xfer_size = 2048;
> +
> +	hv->rndis_dev_state = RNDIS_DEV_INITIALIZING;
> +
> +	ret = hv_rf_send_request(hv, request);
> +	if (ret != 0) {
> +		hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> +		goto cleanup;
> +	}
> +
> +	/* Putting -1 here to ensure that HyperV really answered us */
> +	request->response_msg.msg.init_complete.status = -1;
> +	hv_nv_complete_request(hv, request);
> +
> +	init_complete = &request->response_msg.msg.init_complete;
> +	status = init_complete->status;
> +	if (status == 0) {
> +		PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device is
> initialized");
> +		hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
> +		ret = 0;
> +	} else {
> +		PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device left
> uninitialized");
> +		hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> +		ret = -1;
> +	}
> +
> +cleanup:
> +	rte_free(request);
> +
> +	return ret;
> +}
> +
> +/*
> + * RNDIS filter query device
> + */
> +static int
> +hv_rf_query_device(struct hv_data *hv, uint32_t oid, void *result,
> +		   uint32_t result_size)
> +{
> +	struct rndis_request *request;
> +	struct rndis_query_request *query;
> +	struct rndis_query_complete *query_complete;
> +	int ret = 0;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	request = hv_rndis_request(hv, REMOTE_NDIS_QUERY_MSG,
> +	    RNDIS_MESSAGE_SIZE(struct rndis_query_request));
> +	if (request == NULL) {
> +		ret = -1;
> +		goto cleanup;
> +	}
> +
> +	/* Set up the rndis query */
> +	query = &request->request_msg->msg.query_request;
> +	query->oid = oid;
> +	query->info_buffer_offset = sizeof(struct rndis_query_request);
> +	query->info_buffer_length = 0;
> +	query->device_vc_handle = 0;
> +
> +	ret = hv_rf_send_request(hv, request);
> +	if (ret != 0) {
> +		PMD_PERROR_LOG(hv, DBG_TX, "RNDISFILTER request
> failed to Send!");
> +		goto cleanup;
> +	}
> +
> +	hv_nv_complete_request(hv, request);
> +
> +	/* Copy the response back */
> +	query_complete = &request->response_msg.msg.query_complete;
> +
> +	if (query_complete->info_buffer_length > result_size) {
> +		ret = -EINVAL;
> +		goto cleanup;
> +	}
> +
> +	rte_memcpy(result, (void *)((unsigned long)query_complete +
> +	    query_complete->info_buffer_offset),
> +	    query_complete->info_buffer_length);
> +
> +cleanup:
> +	rte_free(request);
> +
> +	return ret;
> +}
> +
> +/*
> + * RNDIS filter query device MAC address
> + */
> +static inline int
> +hv_rf_query_device_mac(struct hv_data *hv)
> +{
> +	uint32_t size = HW_MACADDR_LEN;
> +
> +	int ret = hv_rf_query_device(hv,
> RNDIS_OID_802_3_PERMANENT_ADDRESS,
> +			&hv->hw_mac_addr, size);
> +	PMD_PDEBUG_LOG(hv, DBG_TX, "MAC:
> %02x:%02x:%02x:%02x:%02x:%02x, ret = %d",
> +			hv->hw_mac_addr[0], hv->hw_mac_addr[1], hv-
> >hw_mac_addr[2],
> +			hv->hw_mac_addr[3], hv->hw_mac_addr[4], hv-
> >hw_mac_addr[5],
> +			ret);
> +	return ret;
> +}
> +
> +/*
> + * RNDIS filter query device link status
> + */
> +static inline int
> +hv_rf_query_device_link_status(struct hv_data *hv)
> +{
> +	uint32_t size = sizeof(uint32_t);
> +	/* Set all bits to 1, it's to ensure that the response is actual */
> +	uint32_t status = -1;
> +
> +	int ret = hv_rf_query_device(hv,
> RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
> +			&status, size);
> +	hv->link_status = status ? 0 : 1;
> +	PMD_PDEBUG_LOG(hv, DBG_TX, "Link Status: %s",
> +			hv->link_status ? "Up" : "Down");
> +	return ret;
> +}
> +
> +int
> +hv_rf_on_device_add(struct hv_data *hv)
> +{
> +	int ret;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	hv->closed = 0;
> +	hv->rb_data_size = hv->rb_size - sizeof(struct
> hv_vmbus_ring_buffer);
> +	PMD_PDEBUG_LOG(hv, DBG_LOAD, "hv->rb_data_size = %u", hv-
> >rb_data_size);
> +
> +	if (unlikely(hv->in->interrupt_mask == 0)) {
> +		PMD_PINFO_LOG(hv, DBG_LOAD, "Disabling interrupts from
> host");
> +		hv->in->interrupt_mask = 1;
> +		rte_mb();
> +	}
> +
> +	hv->netvsc_packet = rte_zmalloc("", sizeof(struct netvsc_packet),
> +					RTE_CACHE_LINE_SIZE);
> +	if (hv->netvsc_packet == NULL)
> +		return -ENOMEM;
> +	hv->netvsc_packet->is_data_pkt = 1;
> +
> +	hv->rx_comp_msg = rte_zmalloc("", sizeof(struct nvsp_msg),
> +				      RTE_CACHE_LINE_SIZE);
> +	if (hv->rx_comp_msg == NULL)
> +		return -ENOMEM;
> +
> +	hv->rx_comp_msg->msg_type =
> nvsp_msg_1_type_send_rndis_pkt_complete;
> +	hv->rx_comp_msg->msgs.send_rndis_pkt_complete.status =
> +		nvsp_status_success;
> +
> +	memset(&hv->stats, 0, sizeof(struct hv_stats));
> +
> +	hv->receive_callback = hv_rf_receive_data;
> +
> +	/* It's for completion of requests which were sent from kernel-space
> part */
> +	hv_nv_complete_request(hv, NULL);
> +	hv_nv_complete_request(hv, NULL);
> +
> +	hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> +
> +	/* Send the rndis initialization message */
> +	ret = hv_rf_init_device(hv);
> +	if (ret != 0) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "rndis init failed!");
> +		hv_rf_on_device_remove(hv);
> +		return ret;
> +	}
> +
> +	/* Get the mac address */
> +	ret = hv_rf_query_device_mac(hv);
> +	if (ret != 0) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "rndis query mac
> failed!");
> +		hv_rf_on_device_remove(hv);
> +		return ret;
> +	}
> +
> +	return ret;
> +}
> +
> +#define HALT_COMPLETION_WAIT_COUNT      25
> +
> +/*
> + * RNDIS filter halt device
> + */
> +static int
> +hv_rf_halt_device(struct hv_data *hv)
> +{
> +	struct rndis_request *request;
> +	struct rndis_halt_request *halt;
> +	int i, ret;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	/* Attempt to do a rndis device halt */
> +	request = hv_rndis_request(hv, REMOTE_NDIS_HALT_MSG,
> +	    RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
> +	if (!request) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "Unable to create
> RNDIS_HALT request");
> +		return -1;
> +	}
> +
> +	/* initialize "poor man's semaphore" */
> +	hv->hlt_req_sent = 0;
> +
> +	/* Set up the rndis set */
> +	halt = &request->request_msg->msg.halt_request;
> +	hv->new_request_id++;
> +	halt->request_id = hv->new_request_id;
> +
> +	ret = hv_rf_send_request(hv, request);
> +	if (ret) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to send
> RNDIS_HALT request: %d",
> +				ret);
> +		return ret;
> +	}
> +
> +	/*
> +	 * Wait for halt response from halt callback.  We must wait for
> +	 * the transaction response before freeing the request and other
> +	 * resources.
> +	 */
> +	for (i = HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
> +		hv_nv_complete_request(hv, request);
> +		if (hv->hlt_req_sent != 0) {
> +			PMD_PDEBUG_LOG(hv, DBG_LOAD, "Completed
> HALT request at %d try",
> +					HALT_COMPLETION_WAIT_COUNT - i
> + 1);
> +			break;
> +		}
> +	}
> +	hv->hlt_req_sent = 0;
> +	if (i == 0) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS_HALT request
> was not completed!");
> +		rte_free(request);
> +		return -1;
> +	}
> +
> +	hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
> +
> +	rte_free(request);
> +
> +	return 0;
> +}
> +
> +#define HV_TX_DRAIN_TRIES 50
> +static inline int
> +hyperv_tx_drain(struct hv_data *hv)
> +{
> +	int i = HV_TX_DRAIN_TRIES;
> +
> +	PMD_PDEBUG_LOG(hv, DBG_LOAD, "Waiting for TXs to be
> completed...");
> +	while (hv->num_outstanding_sends > 0 && --i) {
> +		hv_nv_complete_request(hv, NULL);
> +		rte_delay_ms(100);
> +	}
> +
> +	return hv->num_outstanding_sends;
> +}
> +
> +/*
> + * RNDIS filter on device remove
> + */
> +int
> +hv_rf_on_device_remove(struct hv_data *hv)
> +{
> +	int ret;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	hv->closed = 1;
> +	if (hyperv_tx_drain(hv) > 0) {
> +		/* Hypervisor is not responding, exit with error here */
> +		PMD_PWARN_LOG(hv, DBG_LOAD, "Can't drain TX queue:
> no response");
> +		return -EAGAIN;
> +	}
> +	PMD_PDEBUG_LOG(hv, DBG_LOAD, "TX queue is empty, can halt
> the device");
> +
> +	/* Halt and release the rndis device */
> +	hv->hlt_req_pending = 1;
> +	ret = hv_rf_halt_device(hv);
> +	hv->hlt_req_pending = 0;
> +
> +	rte_free(hv->netvsc_packet);
> +
> +	return ret;
> +}
> +
> +/*
> + * RNDIS filter set packet filter
> + * Sends an rndis request with the new filter, then waits for a response
> + * from the host.
> + * Returns zero on success, non-zero on failure.
> + */
> +static int
> +hv_rf_set_packet_filter(struct hv_data *hv, uint32_t new_filter)
> +{
> +	struct rndis_request *request;
> +	struct rndis_set_request *set;
> +	struct rndis_set_complete *set_complete;
> +	uint32_t status;
> +	int ret;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
> +			RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
> sizeof(uint32_t));
> +	if (!request) {
> +		ret = -1;
> +		goto cleanup;
> +	}
> +
> +	/* Set up the rndis set */
> +	set = &request->request_msg->msg.set_request;
> +	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
> +	set->info_buffer_length = sizeof(uint32_t);
> +	set->info_buffer_offset = sizeof(struct rndis_set_request);
> +
> +	rte_memcpy((void *)((unsigned long)set + sizeof(struct
> rndis_set_request)),
> +			&new_filter, sizeof(uint32_t));
> +
> +	ret = hv_rf_send_request(hv, request);
> +	if (ret)
> +		goto cleanup;
> +
> +	/*
> +	 * Wait for the response from the host.
> +	 */
> +	request->response_msg.msg.set_complete.status = 0xFFFF;
> +	hv_nv_complete_request(hv, request);
> +
> +	set_complete = &request->response_msg.msg.set_complete;
> +	if (set_complete->status == 0xFFFF) {
> +		/* Host is not responding, we can't free request in this case
> */
> +		ret = -1;
> +		goto exit;
> +	}
> +	/* Response received, check status */
> +	status = set_complete->status;
> +	if (status)
> +		/* Bad response status, return error */
> +		ret = -2;
> +
> +cleanup:
> +	rte_free(request);
> +exit:
> +	return ret;
> +}
> +
> +/*
> + * RNDIS filter open device
> + */
> +int
> +hv_rf_on_open(struct hv_data *hv)
> +{
> +	int ret;
> +
> +	if (hv->closed)
> +		return 0;
> +
> +	if (hv->jumbo_frame_support)
> +		hv->receive_callback = hv_rf_receive_data_sg;
> +
> +	ret = hyperv_set_rx_mode(hv, 1, 0);
> +	if (!ret) {
> +		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device
> opened");
> +		hv->rndis_dev_state = RNDIS_DEV_DATAINITIALIZED;
> +	} else
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS device is left
> unopened");
> +
> +	return ret;
> +}
> +
> +/*
> + * RNDIS filter on close
> + */
> +int
> +hv_rf_on_close(struct hv_data *hv)
> +{
> +	int ret;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	if (hv->closed)
> +		return 0;
> +
> +	if (hv->rndis_dev_state != RNDIS_DEV_DATAINITIALIZED) {
> +		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device state
> should be"
> +				" RNDIS_DEV_DATAINITIALIZED, but now it is
> %u",
> +				hv->rndis_dev_state);
> +		return 0;
> +	}
> +
> +	ret = hv_rf_set_packet_filter(hv, 0);
> +	if (!ret) {
> +		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device closed");
> +		hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
> +	} else
> +		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device is left
> unclosed");
> +
> +	return ret;
> +}
> +
> +/*
> + * RX Flow
> + */
> +int
> +hyperv_get_buffer(struct hv_data *hv, void *buffer, uint32_t bufferlen)
> +{
> +	uint32_t bytes_rxed;
> +	uint64_t request_id;
> +	struct hv_vm_packet_descriptor *desc;
> +
> +	int ret = hv_vmbus_channel_recv_packet_raw(hv, buffer, bufferlen,
> +			&bytes_rxed, &request_id, 1);
> +	if (likely(ret == 0)) {
> +		if (bytes_rxed) {
> +			desc = (struct hv_vm_packet_descriptor *)buffer;
> +
> +			if (likely(desc->type ==
> +
> 	HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)) {
> +				hv->pkt_rxed = 0;
> +				hv_nv_on_receive(hv, desc);
> +				return hv->pkt_rxed;
> +			}
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * TX completions handler
> + */
> +void
> +hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop)
> +{
> +	uint32_t bytes_rxed;
> +	uint64_t request_id;
> +
> +	while (1) {
> +		int ret = hv_vmbus_channel_recv_packet_raw(hv, hv->desc,
> PAGE_SIZE,
> +			&bytes_rxed, &request_id, 2 | allow_rx_drop);
> +
> +		if (ret != 0 || !bytes_rxed)
> +			break;
> +
> +		if (likely(hv->desc->type ==
> HV_VMBUS_PACKET_TYPE_COMPLETION))
> +			hv_nv_on_send_completion(hv, hv->desc);
> +	}
> +}
> +
> +/*
> + * Get link status
> + */
> +uint8_t
> +hyperv_get_link_status(struct hv_data *hv)
> +{
> +	if (hv_rf_query_device_link_status(hv))
> +		return 2;
> +	return hv->link_status;
> +}
> +
> +/*
> + * Set/Reset RX mode
> + */
> +int
> +hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t mcast)
> +{
> +	PMD_INIT_FUNC_TRACE();
> +
> +	if (!promisc) {
> +		return hv_rf_set_packet_filter(hv,
> +				NDIS_PACKET_TYPE_BROADCAST                   |
> +				(mcast ?
> NDIS_PACKET_TYPE_ALL_MULTICAST : 0) |
> +				NDIS_PACKET_TYPE_DIRECTED);
> +	}
> +
> +	return hv_rf_set_packet_filter(hv,
> NDIS_PACKET_TYPE_PROMISCUOUS);
> +}
> diff --git a/lib/librte_pmd_hyperv/hyperv_drv.h
> b/lib/librte_pmd_hyperv/hyperv_drv.h
> new file mode 100644
> index 0000000..22acad5
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_drv.h
> @@ -0,0 +1,558 @@
> +/*-
> + * Copyright (c) 2009-2012 Microsoft Corp.
> + * Copyright (c) 2010-2012 Citrix Inc.
> + * Copyright (c) 2012 NetApp Inc.
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *    notice unmodified, this list of conditions, and the following
> + *    disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *    notice, this list of conditions and the following disclaimer in the
> + *    documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
> OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> DISCLAIMED.
> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> (INCLUDING, BUT
> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
> LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
> AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
> THE USE OF
> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
> DAMAGE.
> + *
> + */
> +
> +#ifndef _HYPERV_DRV_H_
> +#define _HYPERV_DRV_H_
> +
> +/*
> + * Definitions from hyperv.h
> + */
> +#define HW_MACADDR_LEN	6
> +#define HV_MAX_PAGE_BUFFER_COUNT	19
> +
> +#define HV_ALIGN_UP(value, align) \
> +		(((value) & (align-1)) ? \
> +		    (((value) + (align-1)) & ~(align-1)) : (value))
> +
> +/*
> + *  Connection identifier type
> + */
> +union hv_vmbus_connection_id {
> +	uint32_t                as_uint32_t;
> +	struct {
> +		uint32_t        id:24;
> +		uint32_t        reserved:8;
> +	} u;
> +
> +} __attribute__((packed));
> +
> +union hv_vmbus_monitor_trigger_state {
> +	uint32_t as_uint32_t;
> +	struct {
> +		uint32_t group_enable:4;
> +		uint32_t rsvd_z:28;
> +	} u;
> +};
> +
> +union hv_vmbus_monitor_trigger_group {
> +	uint64_t as_uint64_t;
> +	struct {
> +		uint32_t pending;
> +		uint32_t armed;
> +	} u;
> +};
> +
> +struct hv_vmbus_monitor_parameter {
> +	union hv_vmbus_connection_id  connection_id;
> +	uint16_t                flag_number;
> +	uint16_t                rsvd_z;
> +};
> +
> +/*
> + * hv_vmbus_monitor_page Layout
> + * ------------------------------------------------------
> + * | 0   | trigger_state (4 bytes) | Rsvd1 (4 bytes)     |
> + * | 8   | trigger_group[0]                              |
> + * | 10  | trigger_group[1]                              |
> + * | 18  | trigger_group[2]                              |
> + * | 20  | trigger_group[3]                              |
> + * | 28  | Rsvd2[0]                                      |
> + * | 30  | Rsvd2[1]                                      |
> + * | 38  | Rsvd2[2]                                      |
> + * | 40  | next_check_time[0][0] | next_check_time[0][1] |
> + * | ...                                                 |
> + * | 240 | latency[0][0..3]                              |
> + * | 340 | Rsvz3[0]                                      |
> + * | 440 | parameter[0][0]                               |
> + * | 448 | parameter[0][1]                               |
> + * | ...                                                 |
> + * | 840 | Rsvd4[0]                                      |
> + * ------------------------------------------------------
> + */
> +
> +struct hv_vmbus_monitor_page {
> +	union hv_vmbus_monitor_trigger_state  trigger_state;
> +	uint32_t                        rsvd_z1;
> +
> +	union hv_vmbus_monitor_trigger_group  trigger_group[4];
> +	uint64_t                        rsvd_z2[3];
> +
> +	int32_t                         next_check_time[4][32];
> +
> +	uint16_t                        latency[4][32];
> +	uint64_t                        rsvd_z3[32];
> +
> +	struct hv_vmbus_monitor_parameter      parameter[4][32];
> +
> +	uint8_t                         rsvd_z4[1984];
> +};
> +
> +enum hv_vmbus_packet_type {
> +	HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES
> 	= 0x7,
> +	HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT
> 	= 0x9,
> +	HV_VMBUS_PACKET_TYPE_COMPLETION
> 	= 0xb,
> +};
> +
> +#define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED    1
> +
> +struct hv_vm_packet_descriptor {
> +	uint16_t type;
> +	uint16_t data_offset8;
> +	uint16_t length8;
> +	uint16_t flags;
> +	uint64_t transaction_id;
> +} __attribute__((packed));
> +
> +struct hv_vm_transfer_page {
> +	uint32_t byte_count;
> +	uint32_t byte_offset;
> +} __attribute__((packed));
> +
> +struct hv_vm_transfer_page_packet_header {
> +	struct hv_vm_packet_descriptor d;
> +	uint16_t                       transfer_page_set_id;
> +	uint8_t                        sender_owns_set;
> +	uint8_t                        reserved;
> +	uint32_t                       range_count;
> +	struct hv_vm_transfer_page     ranges[1];
> +} __attribute__((packed));
> +
> +struct hv_vmbus_ring_buffer {
> +	volatile uint32_t       write_index;
> +	volatile uint32_t       read_index;
> +	/*
> +	 * NOTE: The interrupt_mask field is used only for channels, but
> +	 * vmbus connection also uses this data structure
> +	 */
> +	volatile uint32_t       interrupt_mask;
> +	/* pad it to PAGE_SIZE so that data starts on a page */
> +	uint8_t                 reserved[4084];
> +
> +	/*
> +	 * WARNING: Ring data starts here + ring_data_start_offset
> +	 *  !!! DO NOT place any fields below this !!!
> +	 */
> +	uint8_t			buffer[0];	/* doubles as interrupt mask
> */
> +} __attribute__((packed));
> +
> +struct hv_vmbus_page_buffer {
> +	uint32_t	length;
> +	uint32_t	offset;
> +	uint64_t	pfn;
> +} __attribute__((packed));
> +
> +/*
> + * Definitions from hv_vmbus_priv.h
> + */
> +struct hv_vmbus_sg_buffer_list {
> +	void		*data;
> +	uint32_t	length;
> +};
> +
> +struct hv_vmbus_channel_packet_page_buffer {
> +	uint16_t		type;
> +	uint16_t		data_offset8;
> +	uint16_t		length8;
> +	uint16_t		flags;
> +	uint64_t		transaction_id;
> +	uint32_t		reserved;
> +	uint32_t		range_count;
> +	struct hv_vmbus_page_buffer
> 	range[HV_MAX_PAGE_BUFFER_COUNT];
> +} __attribute__((packed));
> +
> +/*
> + * Definitions from hv_net_vsc.h
> + */
> +#define NETVSC_PACKET_MAXPAGE 16
> +#define NETVSC_PACKET_SIZE    256
> +
> +/*
> + * This message is used by both the VSP and the VSC to complete
> + * a RNDIS message to the opposite channel endpoint.  At this
> + * point, the initiator of this message cannot use any resources
> + * associated with the original RNDIS packet.
> + */
> +enum nvsp_status_ {
> +	nvsp_status_none = 0,
> +	nvsp_status_success,
> +	nvsp_status_failure,
> +};
> +
> +struct nvsp_1_msg_send_rndis_pkt_complete {
> +	uint32_t                                status;
> +} __attribute__((packed));
> +
> +enum nvsp_msg_type {
> +	/*
> +	 * Version 1 Messages
> +	 */
> +	nvsp_msg_1_type_send_ndis_vers          = 100,
> +
> +	nvsp_msg_1_type_send_rx_buf,
> +	nvsp_msg_1_type_send_rx_buf_complete,
> +	nvsp_msg_1_type_revoke_rx_buf,
> +
> +	nvsp_msg_1_type_send_send_buf,
> +	nvsp_msg_1_type_send_send_buf_complete,
> +	nvsp_msg_1_type_revoke_send_buf,
> +
> +	nvsp_msg_1_type_send_rndis_pkt,
> +	nvsp_msg_1_type_send_rndis_pkt_complete,
> +};
> +
> +struct nvsp_1_msg_send_rndis_pkt {
> +	/*
> +	 * This field is specified by RNDIS.  They assume there's
> +	 * two different channels of communication. However,
> +	 * the Network VSP only has one.  Therefore, the channel
> +	 * travels with the RNDIS packet.
> +	 */
> +	uint32_t                                chan_type;
> +
> +	/*
> +	 * This field is used to send part or all of the data
> +	 * through a send buffer. This value specifies an
> +	 * index into the send buffer.  If the index is
> +	 * 0xFFFFFFFF, then the send buffer is not being used
> +	 * and all of the data was sent through other VMBus
> +	 * mechanisms.
> +	 */
> +	uint32_t                                send_buf_section_idx;
> +	uint32_t                                send_buf_section_size;
> +} __attribute__((packed));
> +
> +/*
> + * ALL Messages
> + */
> +struct nvsp_msg {
> +	uint32_t                                msg_type;
> +	union {
> +		struct nvsp_1_msg_send_rndis_pkt               send_rndis_pkt;
> +		struct nvsp_1_msg_send_rndis_pkt_complete
> send_rndis_pkt_complete;
> +		/* size is set like in linux kernel driver */
> +		uint8_t raw[24];
> +	} msgs;
> +} __attribute__((packed));
> +
> +#define NETVSC_RECEIVE_BUFFER_ID                0xcafe
> +
> +struct netvsc_packet {
> +	uint8_t             is_data_pkt;      /* One byte */
> +	uint8_t             ext_pages;
> +	uint16_t            vlan_tci;
> +
> +	void                *extension;
> +	uint64_t            extension_phys_addr;
> +	uint32_t            tot_data_buf_len;
> +	uint32_t            page_buf_count;
> +	struct hv_vmbus_page_buffer
> page_buffers[NETVSC_PACKET_MAXPAGE];
> +};
> +
> +/*
> + * Definitions from hv_rndis.h
> + */
> +#define RNDIS_MAJOR_VERSION                             0x00000001
> +#define RNDIS_MINOR_VERSION                             0x00000000
> +
> +#define STATUS_BUFFER_OVERFLOW                          (0x80000005L)
> +
> +/*
> + * Remote NDIS message types
> + */
> +#define REMOTE_NDIS_PACKET_MSG                          0x00000001
> +#define REMOTE_NDIS_INITIALIZE_MSG                      0x00000002
> +#define REMOTE_NDIS_HALT_MSG                            0x00000003
> +#define REMOTE_NDIS_QUERY_MSG                           0x00000004
> +#define REMOTE_NDIS_SET_MSG                             0x00000005
> +#define REMOTE_NDIS_RESET_MSG                           0x00000006
> +#define REMOTE_NDIS_INDICATE_STATUS_MSG                 0x00000007
> +#define REMOTE_NDIS_KEEPALIVE_MSG                       0x00000008
> +/*
> + * Remote NDIS message completion types
> + */
> +#define REMOTE_NDIS_INITIALIZE_CMPLT                    0x80000002
> +#define REMOTE_NDIS_QUERY_CMPLT                         0x80000004
> +#define REMOTE_NDIS_SET_CMPLT                           0x80000005
> +#define REMOTE_NDIS_RESET_CMPLT                         0x80000006
> +#define REMOTE_NDIS_KEEPALIVE_CMPLT                     0x80000008
> +
> +#define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS              0x00010114
> +#define RNDIS_OID_GEN_CURRENT_PACKET_FILTER             0x0001010E
> +#define RNDIS_OID_802_3_PERMANENT_ADDRESS               0x01010101
> +#define RNDIS_OID_802_3_CURRENT_ADDRESS                 0x01010102
> +#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER            0x0001021B
> +
> +#define RNDIS_CONFIG_PARAM_TYPE_STRING      2
> +/* extended info after the RNDIS request message */
> +#define RNDIS_EXT_LEN                       100
> +/*
> + * Packet extension field contents associated with a Data message.
> + */
> +struct rndis_per_packet_info {
> +	uint32_t            size;
> +	uint32_t            type;
> +	uint32_t            per_packet_info_offset;
> +};
> +
> +#define ieee_8021q_info 6
> +
> +struct ndis_8021q_info {
> +	union {
> +		struct {
> +			uint32_t   user_pri:3;  /* User Priority */
> +			uint32_t   cfi:1;  /* Canonical Format ID */
> +			uint32_t   vlan_id:12;
> +			uint32_t   reserved:16;
> +		} s1;
> +		uint32_t    value;
> +	} u1;
> +};
> +
> +/* Format of Information buffer passed in a SetRequest for the OID */
> +/* OID_GEN_RNDIS_CONFIG_PARAMETER. */
> +struct rndis_config_parameter_info {
> +	uint32_t parameter_name_offset;
> +	uint32_t parameter_name_length;
> +	uint32_t parameter_type;
> +	uint32_t parameter_value_offset;
> +	uint32_t parameter_value_length;
> +};
> +
> +/*
> + * NdisInitialize message
> + */
> +struct rndis_initialize_request {
> +	/* RNDIS request ID */
> +	uint32_t            request_id;
> +	uint32_t            major_version;
> +	uint32_t            minor_version;
> +	uint32_t            max_xfer_size;
> +};
> +
> +/*
> + * Response to NdisInitialize
> + */
> +struct rndis_initialize_complete {
> +	/* RNDIS request ID */
> +	uint32_t            request_id;
> +	/* RNDIS status */
> +	uint32_t            status;
> +	uint32_t            major_version;
> +	uint32_t            minor_version;
> +	uint32_t            device_flags;
> +	/* RNDIS medium */
> +	uint32_t            medium;
> +	uint32_t            max_pkts_per_msg;
> +	uint32_t            max_xfer_size;
> +	uint32_t            pkt_align_factor;
> +	uint32_t            af_list_offset;
> +	uint32_t            af_list_size;
> +};
> +
> +/*
> + * NdisSetRequest message
> + */
> +struct rndis_set_request {
> +	/* RNDIS request ID */
> +	uint32_t            request_id;
> +	/* RNDIS OID */
> +	uint32_t            oid;
> +	uint32_t            info_buffer_length;
> +	uint32_t            info_buffer_offset;
> +	/* RNDIS handle */
> +	uint32_t            device_vc_handle;
> +};
> +
> +/*
> + * Response to NdisSetRequest
> + */
> +struct rndis_set_complete {
> +	/* RNDIS request ID */
> +	uint32_t            request_id;
> +	/* RNDIS status */
> +	uint32_t            status;
> +};
> +
> +/*
> + * NdisQueryRequest message
> + */
> +struct rndis_query_request {
> +	/* RNDIS request ID */
> +	uint32_t            request_id;
> +	/* RNDIS OID */
> +	uint32_t            oid;
> +	uint32_t            info_buffer_length;
> +	uint32_t            info_buffer_offset;
> +	/* RNDIS handle */
> +	uint32_t            device_vc_handle;
> +};
> +
> +/*
> + * Response to NdisQueryRequest
> + */
> +struct rndis_query_complete {
> +	/* RNDIS request ID */
> +	uint32_t            request_id;
> +	/* RNDIS status */
> +	uint32_t            status;
> +	uint32_t            info_buffer_length;
> +	uint32_t            info_buffer_offset;
> +};
> +
> +/*
> + * Data message. All offset fields contain byte offsets from the beginning
> + * of the rndis_packet structure. All length fields are in bytes.
> + * VcHandle is set to 0 for connectionless data, otherwise it
> + * contains the VC handle.
> + */
> +struct rndis_packet {
> +	uint32_t            data_offset;
> +	uint32_t            data_length;
> +	uint32_t            oob_data_offset;
> +	uint32_t            oob_data_length;
> +	uint32_t            num_oob_data_elements;
> +	uint32_t            per_pkt_info_offset;
> +	uint32_t            per_pkt_info_length;
> +	/* RNDIS handle */
> +	uint32_t            vc_handle;
> +	uint32_t            reserved;
> +};
> +
> +/*
> + * NdisHalt message
> + */
> +struct rndis_halt_request {
> +	/* RNDIS request ID */
> +	uint32_t            request_id;
> +};
> +
> +/*
> + * NdisMIndicateStatus message
> + */
> +struct rndis_indicate_status {
> +	/* RNDIS status */
> +	uint32_t                                status;
> +	uint32_t                                status_buf_length;
> +	uint32_t                                status_buf_offset;
> +};
> +
> +#define RNDIS_STATUS_MEDIA_CONNECT              (0x4001000BL)
> +#define RNDIS_STATUS_MEDIA_DISCONNECT           (0x4001000CL)
> +#define RNDIS_STATUS_INVALID_DATA               (0xC0010015L)
> +
> +/*
> + * union with all of the RNDIS messages
> + */
> +union rndis_msg_container {
> +	struct rndis_initialize_request                init_request;
> +	struct rndis_initialize_complete               init_complete;
> +	struct rndis_set_request                       set_request;
> +	struct rndis_set_complete                      set_complete;
> +	struct rndis_query_request                     query_request;
> +	struct rndis_query_complete                    query_complete;
> +	struct rndis_packet                            packet;
> +	struct rndis_halt_request                      halt_request;
> +	struct rndis_indicate_status                   indicate_status;
> +#if 0
> +	rndis_keepalive_request                 keepalive_request;
> +	rndis_reset_request                     reset_request;
> +	rndis_reset_complete                    reset_complete;
> +	rndis_keepalive_complete                keepalive_complete;
> +	rcondis_mp_create_vc                    co_miniport_create_vc;
> +	rcondis_mp_delete_vc                    co_miniport_delete_vc;
> +	rcondis_indicate_status                 co_miniport_status;
> +	rcondis_mp_activate_vc_request          co_miniport_activate_vc;
> +	rcondis_mp_deactivate_vc_request        co_miniport_deactivate_vc;
> +	rcondis_mp_create_vc_complete
> co_miniport_create_vc_complete;
> +	rcondis_mp_delete_vc_complete
> co_miniport_delete_vc_complete;
> +	rcondis_mp_activate_vc_complete
> co_miniport_activate_vc_complete;
> +	rcondis_mp_deactivate_vc_complete
> co_miniport_deactivate_vc_complete;
> +#endif
> +	uint32_t packet_ex[16]; /* to pad the union size */
> +};
> +
> +struct rndis_msg {
> +	uint32_t         ndis_msg_type;
> +
> +	/*
> +	 * Total length of this message, from the beginning
> +	 * of the rndis_msg struct, in bytes.
> +	 */
> +	uint32_t         msg_len;
> +
> +	/* Actual message */
> +	union rndis_msg_container msg;
> +};
> +
> +#define RNDIS_HEADER_SIZE (sizeof(struct rndis_msg) - sizeof(union
> rndis_msg_container))
> +
> +#define NDIS_PACKET_TYPE_DIRECTED       0x00000001
> +#define NDIS_PACKET_TYPE_MULTICAST      0x00000002
> +#define NDIS_PACKET_TYPE_ALL_MULTICAST  0x00000004
> +#define NDIS_PACKET_TYPE_BROADCAST      0x00000008
> +#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
> +#define NDIS_PACKET_TYPE_PROMISCUOUS    0x00000020
> +
> +/*
> + * get the size of an RNDIS message. Pass in the message type,
> + * rndis_set_request, rndis_packet for example
> + */
> +#define RNDIS_MESSAGE_SIZE(message) \
> +	(sizeof(message) + (sizeof(struct rndis_msg) - sizeof(union
> rndis_msg_container)))
> +
> +
> +/*
> + * Definitions from hv_rndis_filter.h
> + */
> +enum {
> +	RNDIS_DEV_UNINITIALIZED = 0,
> +	RNDIS_DEV_INITIALIZING,
> +	RNDIS_DEV_INITIALIZED,
> +	RNDIS_DEV_DATAINITIALIZED,
> +};
> +
> +struct rndis_request {
> +	/* assumed a fixed size response here. */
> +	struct rndis_msg    response_msg;
> +
> +	/* Simplify allocation by having a netvsc packet inline */
> +	struct netvsc_packet pkt;
> +	/* set additional buffer since packet can cross page boundary */
> +	struct hv_vmbus_page_buffer buffer;
> +	/* assumed a fixed size request here. */
> +	struct rndis_msg    *request_msg;
> +	const struct rte_memzone *request_msg_memzone;
> +};
> +
> +struct rndis_filter_packet {
> +	struct rndis_msg                       message;
> +};
> +
> +#endif /* _HYPERV_DRV_H_ */
> diff --git a/lib/librte_pmd_hyperv/hyperv_ethdev.c
> b/lib/librte_pmd_hyperv/hyperv_ethdev.c
> new file mode 100644
> index 0000000..7b909db
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_ethdev.c
> @@ -0,0 +1,332 @@
> +/*-
> + * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
> + * All rights reserved.
> + */
> +
> +#include <assert.h>
> +#include <unistd.h>
> +#include "hyperv.h"
> +
> +static struct rte_vmbus_id vmbus_id_hyperv_map[] = {
> +	{
> +		.device_id = 0x0,
> +	},
> +};
> +
> +static void
> +hyperv_dev_info_get(__rte_unused struct rte_eth_dev *dev,
> +		struct rte_eth_dev_info *dev_info)
> +{
> +	PMD_INIT_FUNC_TRACE();
> +	dev_info->max_rx_queues  = HV_MAX_RX_QUEUES;
> +	dev_info->max_tx_queues  = HV_MAX_TX_QUEUES;
> +	dev_info->min_rx_bufsize = HV_MIN_RX_BUF_SIZE;
> +	dev_info->max_rx_pktlen  = HV_MAX_RX_PKT_LEN;
> +	dev_info->max_mac_addrs  = HV_MAX_MAC_ADDRS;
> +}
> +
> +inline int
> +rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
> +		struct rte_eth_link *link)
> +{
> +	struct rte_eth_link *dst = &(dev->data->dev_link);
> +	struct rte_eth_link *src = link;
> +
> +	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> +				*(uint64_t *)src) == 0)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +inline int
> +rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
> +		struct rte_eth_link *link)
> +{
> +	struct rte_eth_link *dst = link;
> +	struct rte_eth_link *src = &(dev->data->dev_link);
> +
> +	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
> +				*(uint64_t *)src) == 0)
> +		return -1;
> +
> +	return 0;
> +}
> +
> +/* return 0 means link status changed, -1 means not changed */
> +static int
> +hyperv_dev_link_update(struct rte_eth_dev *dev,
> +		__rte_unused int wait_to_complete)
> +{
> +	uint8_t ret;
> +	struct rte_eth_link old, link;
> +	struct hv_data *hv = dev->data->dev_private;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	memset(&old, 0, sizeof(old));
> +	memset(&link, 0, sizeof(link));
> +	rte_hv_dev_atomic_read_link_status(dev, &old);
> +	if (!hv->link_status && (hv->link_req_cnt == HV_MAX_LINK_REQ)) {
> +		ret = hyperv_get_link_status(hv);
> +		if (ret > 1)
> +			return -1;
> +		hv->link_req_cnt = 0;
> +	}
> +	link.link_duplex = ETH_LINK_FULL_DUPLEX;
> +	link.link_speed = ETH_LINK_SPEED_10000;
> +	link.link_status = hv->link_status;
> +	hv->link_req_cnt++;
> +	rte_hv_dev_atomic_write_link_status(dev, &link);
> +
> +	return (old.link_status == link.link_status) ? -1 : 0;
> +}
> +
> +static int
> +hyperv_dev_configure(struct rte_eth_dev *dev)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +	const struct rte_eth_rxmode *rxmode = &dev->data-
> >dev_conf.rxmode;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	rte_memcpy(dev->data->mac_addrs->addr_bytes, hv-
> >hw_mac_addr,
> +			ETHER_ADDR_LEN);
> +	hv->jumbo_frame_support = rxmode->jumbo_frame;
> +
> +	return 0;
> +}
> +
> +static int
> +hyperv_init(struct rte_eth_dev *dev)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +	struct rte_vmbus_device *vmbus_dev;
> +
> +	vmbus_dev = dev->vmbus_dev;
> +	hv->uio_fd = vmbus_dev->uio_fd;
> +	hv->kernel_initialized = 1;
> +	hv->vmbus_device = vmbus_dev->id.device_id;
> +	hv->monitor_bit = (uint8_t)(vmbus_dev->vmbus_monitor_id % 32);
> +	hv->monitor_group = (uint8_t)(vmbus_dev->vmbus_monitor_id /
> 32);
> +	PMD_PDEBUG_LOG(hv, DBG_LOAD, "hyperv_init for vmbus device
> %d",
> +			vmbus_dev->id.device_id);
> +
> +	/* get the memory mappings */
> +	hv->ring_pages = vmbus_dev-
> >mem_resource[TXRX_RING_MAP].addr;
> +	hv->int_page = vmbus_dev->mem_resource[INT_PAGE_MAP].addr;
> +	hv->monitor_pages =
> +		(struct hv_vmbus_monitor_page *)
> +		vmbus_dev->mem_resource[MON_PAGE_MAP].addr;
> +	hv->recv_buf = vmbus_dev-
> >mem_resource[RECV_BUF_MAP].addr;
> +	assert(hv->ring_pages);
> +	assert(hv->int_page);
> +	assert(hv->monitor_pages);
> +	assert(hv->recv_buf);
> +
> +	/* separate send/recv int_pages */
> +	hv->recv_interrupt_page = hv->int_page;
> +
> +	hv->send_interrupt_page =
> +		((uint8_t *) hv->int_page + (PAGE_SIZE >> 1));
> +
> +	/* retrieve in/out ring_buffers */
> +	hv->out = hv->ring_pages;
> +	hv->in  = (void *)((uint64_t)hv->out +
> +			(vmbus_dev->mem_resource[TXRX_RING_MAP].len
> / 2));
> +	hv->rb_size = (vmbus_dev->mem_resource[TXRX_RING_MAP].len /
> 2);
> +
> +	dev->rx_pkt_burst = hyperv_recv_pkts;
> +	dev->tx_pkt_burst = hyperv_xmit_pkts;
> +
> +	return hv_rf_on_device_add(hv);
> +}
> +
> +#define HV_DEV_ID (hv->vmbus_device << 1)
> +#define HV_MTU (dev->data->dev_conf.rxmode.max_rx_pkt_len << 9)
> +
> +static int
> +hyperv_dev_start(struct rte_eth_dev *dev)
> +{
> +	int ret;
> +	uint32_t cmd;
> +	size_t bytes;
> +	struct hv_data *hv = dev->data->dev_private;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	if (!hv->kernel_initialized) {
> +		cmd = HV_DEV_ID | HV_MTU;
> +		bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
> +		if (bytes < sizeof(uint32_t)) {
> +			PMD_PERROR_LOG(hv, DBG_LOAD, "write on uio_fd
> %d failed",
> +					hv->uio_fd);
> +			return -1;
> +		}
> +		ret = vmbus_uio_map_resource(dev->vmbus_dev);
> +		if (ret < 0) {
> +			PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to map
> resources");
> +			return ret;
> +		}
> +		ret = hyperv_init(dev);
> +		if (ret)
> +			return ret;
> +	}
> +	ret = hv_rf_on_open(hv);
> +	if (ret) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "hv_rf_on_open
> failed");
> +		return ret;
> +	}
> +	hv->link_req_cnt = HV_MAX_LINK_REQ;
> +
> +	return ret;
> +}
> +
> +static void
> +hyperv_dev_stop(struct rte_eth_dev *dev)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +	uint32_t cmd;
> +	size_t bytes;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	if (!hv->closed) {
> +		hv_rf_on_close(hv);
> +		hv_rf_on_device_remove(hv);
> +		if (hv->kernel_initialized) {
> +			cmd = 1 | HV_DEV_ID;
> +			bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
> +			if (bytes)
> +				hv->kernel_initialized = 0;
> +			else
> +				PMD_PWARN_LOG(hv, DBG_LOAD, "write to
> uio_fd %d failed: (%zu)b",
> +						hv->uio_fd, bytes);
> +		}
> +		hv->link_status = 0;
> +	}
> +}
> +
> +static void
> +hyperv_dev_close(struct rte_eth_dev *dev)
> +{
> +	PMD_INIT_FUNC_TRACE();
> +	hyperv_dev_stop(dev);
> +}
> +
> +static void
> +hyperv_dev_promisc_enable(struct rte_eth_dev *dev)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	hyperv_set_rx_mode(hv, 1, dev->data->all_multicast);
> +}
> +
> +static void
> +hyperv_dev_promisc_disable(struct rte_eth_dev *dev)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	hyperv_set_rx_mode(hv, 0, dev->data->all_multicast);
> +}
> +
> +static void
> +hyperv_dev_allmulticast_enable(struct rte_eth_dev *dev)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	hyperv_set_rx_mode(hv, dev->data->promiscuous, 1);
> +}
> +
> +static void
> +hyperv_dev_allmulticast_disable(struct rte_eth_dev *dev)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	hyperv_set_rx_mode(hv, dev->data->promiscuous, 0);
> +}
> +
> +static void
> +hyperv_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats
> *stats)
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +	struct hv_stats *st = &hv->stats;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	memset(stats, 0, sizeof(struct rte_eth_stats));
> +
> +	stats->opackets = st->opkts;
> +	stats->obytes = st->obytes;
> +	stats->oerrors = st->oerrors;
> +	stats->ipackets = st->ipkts;
> +	stats->ibytes = st->ibytes;
> +	stats->ierrors = st->ierrors;
> +	stats->rx_nombuf = st->rx_nombuf;
> +}
> +
> +static struct eth_dev_ops hyperv_eth_dev_ops = {
> +	.dev_configure		= hyperv_dev_configure,
> +	.dev_start		= hyperv_dev_start,
> +	.dev_stop		= hyperv_dev_stop,
> +	.dev_infos_get		= hyperv_dev_info_get,
> +	.rx_queue_release	= hyperv_dev_rx_queue_release,
> +	.tx_queue_release	= hyperv_dev_tx_queue_release,
> +	.rx_queue_setup		= hyperv_dev_rx_queue_setup,
> +	.tx_queue_setup		= hyperv_dev_tx_queue_setup,
> +	.dev_close		= hyperv_dev_close,
> +	.promiscuous_enable	= hyperv_dev_promisc_enable,
> +	.promiscuous_disable	= hyperv_dev_promisc_disable,
> +	.allmulticast_enable	= hyperv_dev_allmulticast_enable,
> +	.allmulticast_disable	= hyperv_dev_allmulticast_disable,
> +	.link_update		= hyperv_dev_link_update,
> +	.stats_get		= hyperv_dev_stats_get,
> +};
> +
> +static int
> +eth_hyperv_dev_init(struct rte_eth_dev *eth_dev)
> +{
> +	int ret;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	eth_dev->dev_ops = &hyperv_eth_dev_ops;
> +	eth_dev->data->mac_addrs = rte_malloc("mac_addrs",
> +					      sizeof(struct ether_addr),
> +					      RTE_CACHE_LINE_SIZE);
> +	if (!eth_dev->data->mac_addrs) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "unable to allocate
> memory for mac addrs");
> +		return -1;
> +	}
> +
> +	ret = hyperv_init(eth_dev);
> +
> +	return ret;
> +}
> +
> +static struct eth_driver rte_hyperv_pmd = {
> +	.vmbus_drv = {
> +		.name = "rte_hyperv_pmd",
> +		.module_name = "hv_uio",
> +		.id_table = vmbus_id_hyperv_map,
> +	},
> +	.bus_type = RTE_BUS_VMBUS,
> +	.eth_dev_init = eth_hyperv_dev_init,
> +	.dev_private_size = sizeof(struct hv_data),
> +};
> +
> +static int
> +rte_hyperv_pmd_init(const char *name __rte_unused,
> +		    const char *param __rte_unused)
> +{
> +	rte_eth_driver_register(&rte_hyperv_pmd);
> +	return 0;
> +}
> +
> +static struct rte_driver rte_hyperv_driver = {
> +	.type = PMD_PDEV,
> +	.init = rte_hyperv_pmd_init,
> +};
> +
> +PMD_REGISTER_DRIVER(rte_hyperv_driver);
> diff --git a/lib/librte_pmd_hyperv/hyperv_logs.h
> b/lib/librte_pmd_hyperv/hyperv_logs.h
> new file mode 100644
> index 0000000..1b96468
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_logs.h
> @@ -0,0 +1,69 @@
> +/*-
> + *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> + *   All rights reserved.
> + */
> +
> +#ifndef _HYPERV_LOGS_H_
> +#define _HYPERV_LOGS_H_
> +
> +#ifdef RTE_LIBRTE_HV_DEBUG_INIT
> +#define PMD_INIT_LOG(level, fmt, args...) \
> +	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
> +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
> +#else
> +#define PMD_INIT_LOG(level, fmt, args...) do { } while (0)
> +#define PMD_INIT_FUNC_TRACE() do { } while (0)
> +#endif
> +
> +#ifdef RTE_LIBRTE_HV_DEBUG
> +
> +#define RTE_DBG_LOAD   INIT
> +#define RTE_DBG_STATS  STATS
> +#define RTE_DBG_TX     TX
> +#define RTE_DBG_RX     RX
> +#define RTE_DBG_MBUF   MBUF
> +#define RTE_DBG_ASSERT ASRT
> +#define RTE_DBG_RB     RB
> +#define RTE_DBG_VMBUS  VMBUS
> +#define RTE_DBG_ALL    ALL
> +
> +#define STR(x) #x
> +
> +#define HV_RTE_LOG(hv, codepath, level, fmt, args...) \
> +	RTE_LOG(level, PMD, "[%d]: %-6s: %s: " fmt "\n", \
> +		hv->vmbus_device, STR(codepath), __func__, ## args)
> +
> +#define PMD_PDEBUG_LOG(hv, codepath, fmt, args...) \
> +do { \
> +	if (unlikely(hv->debug & (codepath))) \
> +		HV_RTE_LOG(hv, RTE_##codepath, DEBUG, fmt, ## args) \
> +} while (0)
> +
> +#define PMD_PINFO_LOG(hv, codepath, fmt, args...) \
> +do { \
> +	if (unlikely(hv->debug & (codepath))) \
> +		HV_RTE_LOG(hv, RTE_##codepath, INFO, fmt, ## args) \
> +} while (0)
> +
> +#define PMD_PWARN_LOG(hv, codepath, fmt, args...) \
> +do { \
> +	if (unlikely(hv->debug & (codepath))) \
> +		HV_RTE_LOG(hv, RTE_##codepath, WARNING, fmt, ## args)
> \
> +} while (0)
> +
> +#define PMD_PERROR_LOG(hv, codepath, fmt, args...) \
> +do { \
> +	if (unlikely(hv->debug & (codepath))) \
> +		HV_RTE_LOG(hv, RTE_##codepath, ERR, fmt, ## args) \
> +} while (0)
> +#else
> +#define HV_RTE_LOG(level, fmt, args...) do { } while (0)
> +#define PMD_PDEBUG_LOG(fmt, args...) do { } while (0)
> +#define PMD_PINFO_LOG(fmt, args...) do { } while (0)
> +#define PMD_PWARN_LOG(fmt, args...) do { } while (0)
> +#define PMD_PERROR_LOG(fmt, args...) do { } while (0)
> +#undef RTE_LIBRTE_HV_DEBUG_TX
> +#undef RTE_LIBRTE_HV_DEBUG_RX
> +#endif
> +
> +#endif /* _HYPERV_LOGS_H_ */
> diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.c
> b/lib/librte_pmd_hyperv/hyperv_rxtx.c
> new file mode 100644
> index 0000000..9e423d0
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_rxtx.c
> @@ -0,0 +1,403 @@
> +/*-
> + *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> + *   All rights reserved.
> + */
> +
> +#include "hyperv.h"
> +#include "hyperv_rxtx.h"
> +#include "hyperv_drv.h"
> +
> +#define RTE_MBUF_DATA_DMA_ADDR(mb) \
> +	((uint64_t)((mb)->buf_physaddr + (mb)->data_off))
> +
> +#define RPPI_SIZE	(sizeof(struct rndis_per_packet_info)\
> +			 + sizeof(struct ndis_8021q_info))
> +#define RNDIS_OFF	(sizeof(struct netvsc_packet) + RPPI_SIZE)
> +#define TX_PKT_SIZE	(RNDIS_OFF + sizeof(struct rndis_filter_packet) * 2)
> +
> +static inline struct rte_mbuf *
> +hv_rxmbuf_alloc(struct rte_mempool *mp)
> +{
> +	return	__rte_mbuf_raw_alloc(mp);
> +}
> +
> +static inline int
> +hyperv_has_rx_work(struct hv_data *hv)
> +{
> +	return hv->in->read_index != hv->in->write_index;
> +}
> +
> +#ifndef DEFAULT_TX_FREE_THRESHOLD
> +#define DEFAULT_TX_FREE_THRESHOLD 32
> +#endif
> +
> +int
> +hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
> +			  uint16_t queue_idx,
> +			  uint16_t nb_desc,
> +			  unsigned int socket_id,
> +			  const struct rte_eth_txconf *tx_conf)
> +
> +{
> +	struct hv_data *hv = dev->data->dev_private;
> +	const struct rte_memzone *tz;
> +	struct hv_tx_queue *txq;
> +	char tz_name[RTE_MEMZONE_NAMESIZE];
> +	uint32_t i, delta = 0, new_delta;
> +	struct netvsc_packet *pkt;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct
> hv_tx_queue),
> +				 RTE_CACHE_LINE_SIZE, socket_id);
> +	if (txq == NULL) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for
> tx_queue failed");
> +		return -ENOMEM;
> +	}
> +
> +	if (tx_conf->tx_free_thresh >= nb_desc) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD,
> +			       "tx_free_thresh should be less then nb_desc");
> +		return -EINVAL;
> +	}
> +	txq->tx_free_thresh = (tx_conf->tx_free_thresh ? tx_conf-
> >tx_free_thresh :
> +			       DEFAULT_TX_FREE_THRESHOLD);
> +	txq->pkts = rte_calloc_socket("TX pkts", sizeof(void*), nb_desc,
> +				       RTE_CACHE_LINE_SIZE, socket_id);
> +	if (txq->pkts == NULL) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for pkts
> failed");
> +		return -ENOMEM;
> +	}
> +	sprintf(tz_name, "hv_%d_%u_%u", hv->vmbus_device, queue_idx,
> socket_id);
> +	tz = rte_memzone_reserve_aligned(tz_name,
> +					 (uint32_t)nb_desc * TX_PKT_SIZE,
> +
> rte_lcore_to_socket_id(rte_lcore_id()),
> +					 0, PAGE_SIZE);
> +	if (tz == NULL) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD, "netvsc packet ring alloc
> fail");
> +		return -ENOMEM;
> +	}
> +	for (i = 0; i < nb_desc; i++) {
> +		pkt = txq->pkts[i] = (struct netvsc_packet *)((uint8_t *)tz-
> >addr +
> +							      i * TX_PKT_SIZE +
> delta);
> +		pkt->extension = (uint8_t *)tz->addr + i * TX_PKT_SIZE +
> RNDIS_OFF + delta;
> +		if (!pkt->extension) {
> +			PMD_PERROR_LOG(hv, DBG_TX,
> +				       "pkt->extension is NULL for %d-th pkt", i);
> +			return -EINVAL;
> +		}
> +		pkt->extension_phys_addr =
> +			tz->phys_addr + i * TX_PKT_SIZE + RNDIS_OFF +
> delta;
> +		pkt->ext_pages = 1;
> +		pkt->page_buffers[0].pfn = pkt->extension_phys_addr >>
> PAGE_SHIFT;
> +		pkt->page_buffers[0].offset =
> +			(unsigned long)pkt->extension & (PAGE_SIZE - 1);
> +		pkt->page_buffers[0].length = RNDIS_MESSAGE_SIZE(struct
> rndis_packet);
> +		if (pkt->page_buffers[0].offset + pkt-
> >page_buffers[0].length
> +		    > PAGE_SIZE) {
> +			new_delta = PAGE_SIZE - pkt-
> >page_buffers[0].offset;
> +			pkt->page_buffers[0].pfn++;
> +			delta += new_delta;
> +			pkt->page_buffers[0].offset = 0;
> +			pkt->extension = (uint8_t *)pkt->extension +
> new_delta;
> +			pkt->extension_phys_addr += new_delta;
> +		}
> +	}
> +	txq->sw_ring = rte_calloc_socket("txq_sw_ring",
> +					 sizeof(struct rte_mbuf *), nb_desc,
> +					 RTE_CACHE_LINE_SIZE, socket_id);
> +	if (txq->sw_ring == NULL) {
> +		hyperv_dev_tx_queue_release(txq);
> +		return -ENOMEM;
> +	}
> +	txq->port_id = dev->data->port_id;
> +	txq->nb_tx_desc = txq->tx_avail = nb_desc;
> +	txq->tx_free_thresh = tx_conf->tx_free_thresh;
> +	txq->hv = hv;
> +	dev->data->tx_queues[queue_idx] = txq;
> +	hv->txq = txq;
> +
> +	return 0;
> +}
> +
> +void
> +hyperv_dev_tx_queue_release(void *ptxq)
> +{
> +	struct hv_tx_queue *txq = ptxq;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	if (txq == NULL)
> +		return;
> +	rte_free(txq->sw_ring);
> +	rte_free(txq->pkts);
> +	rte_free(txq);
> +}
> +
> +int
> +hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
> +			  uint16_t queue_idx,
> +			  uint16_t nb_desc,
> +			  unsigned int socket_id,
> +			  const struct rte_eth_rxconf *rx_conf,
> +			  struct rte_mempool *mp)
> +{
> +	uint16_t i;
> +	struct hv_rx_queue *rxq;
> +	struct rte_mbuf *mbuf;
> +	struct hv_data *hv = dev->data->dev_private;
> +
> +	PMD_INIT_FUNC_TRACE();
> +
> +	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct
> hv_rx_queue),
> +				 RTE_CACHE_LINE_SIZE, socket_id);
> +	if (rxq == NULL) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD,
> +			       "rte_zmalloc for rx_queue failed!");
> +		return -ENOMEM;
> +	}
> +	hv->desc = rxq->desc = rte_zmalloc_socket(NULL, PAGE_SIZE,
> +						  RTE_CACHE_LINE_SIZE,
> socket_id);
> +	if (rxq->desc == NULL) {
> +		PMD_PERROR_LOG(hv, DBG_LOAD,
> +			       "rte_zmalloc for vmbus_desc failed!");
> +		hyperv_dev_rx_queue_release(rxq);
> +		return -ENOMEM;
> +	}
> +	rxq->sw_ring = rte_calloc_socket("rxq->sw_ring",
> +					 sizeof(struct mbuf *), nb_desc,
> +					 RTE_CACHE_LINE_SIZE, socket_id);
> +	if (rxq->sw_ring == NULL) {
> +		hyperv_dev_rx_queue_release(rxq);
> +		return -ENOMEM;
> +	}
> +
> +	for (i = 0; i < nb_desc; i++) {
> +		mbuf = hv_rxmbuf_alloc(mp);
> +		if (mbuf == NULL) {
> +			PMD_PERROR_LOG(hv, DBG_LOAD, "RX mbuf alloc
> failed");
> +			return -ENOMEM;
> +		}
> +
> +		mbuf->nb_segs = 1;
> +		mbuf->next = NULL;
> +		mbuf->port = rxq->port_id;
> +		rxq->sw_ring[i] = mbuf;
> +	}
> +
> +	rxq->mb_pool = mp;
> +	rxq->nb_rx_desc = nb_desc;
> +	rxq->rx_head = 0;
> +	rxq->rx_tail = 0;
> +	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
> +	rxq->port_id = dev->data->port_id;
> +	rxq->hv = hv;
> +	dev->data->rx_queues[queue_idx] = rxq;
> +	hv->rxq = rxq;
> +	hv->max_rx_pkt_len = mp->elt_size -
> +		(sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
> +
> +	return 0;
> +}
> +
> +void
> +hyperv_dev_rx_queue_release(void *prxq)
> +{
> +	struct hv_rx_queue *rxq = prxq;
> +
> +	PMD_INIT_FUNC_TRACE();
> +	if (rxq == NULL)
> +		return;
> +	rte_free(rxq->sw_ring);
> +	rte_free(rxq->desc);
> +	rte_free(rxq);
> +}
> +
> +uint16_t
> +hyperv_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t
> nb_pkts)
> +{
> +	struct hv_rx_queue *rxq = prxq;
> +	struct hv_data *hv = rxq->hv;
> +	struct rte_mbuf *new_mb, *rx_mbuf, *first_mbuf;
> +	uint16_t nb_rx = 0;
> +	uint16_t segs, i;
> +
> +	if (unlikely(hv->closed))
> +		return 0;
> +
> +	nb_pkts = MIN(nb_pkts, HV_MAX_PKT_BURST);
> +	hyperv_scan_comps(hv, 0);
> +
> +	while (nb_rx < nb_pkts) {
> +		/*
> +		 * if there are no mbufs in sw_ring,
> +		 * we need to trigger receive procedure
> +		 */
> +		if (rxq->rx_head == rxq->rx_tail) {
> +			if (!hyperv_has_rx_work(hv))
> +				break;
> +
> +			if (unlikely(!hyperv_get_buffer(hv, rxq->desc,
> PAGE_SIZE))) {
> +				hyperv_scan_comps(hv, 0);
> +				continue;
> +			}
> +		}
> +
> +		/*
> +		 * Now the received data is in sw_ring of our rxq
> +		 * we need to extract it and replace in sw_ring with new
> mbuf
> +		 */
> +		rx_mbuf = first_mbuf = rxq->sw_ring[rxq->rx_head];
> +		segs = first_mbuf->nb_segs;
> +		for (i = 0; i < segs; ++i) {
> +			new_mb = hv_rxmbuf_alloc(rxq->mb_pool);
> +			if (unlikely(!new_mb)) {
> +				PMD_PERROR_LOG(hv, DBG_RX, "mbuf alloc
> fail");
> +				++hv->stats.rx_nombuf;
> +				return nb_rx;
> +			}
> +
> +			rx_mbuf = rxq->sw_ring[rxq->rx_head];
> +			rxq->sw_ring[rxq->rx_head] = new_mb;
> +
> +			if (++rxq->rx_head == rxq->nb_rx_desc)
> +				rxq->rx_head = 0;
> +
> +			rx_mbuf->ol_flags |= PKT_RX_IPV4_HDR;
> +			rx_mbuf->port = rxq->port_id;
> +		}
> +		rx_mbuf->next = NULL;
> +
> +		rx_pkts[nb_rx++] = first_mbuf;
> +		++hv->stats.ipkts;
> +		hv->stats.ibytes += first_mbuf->pkt_len;
> +	}
> +
> +	return nb_rx;
> +}
> +
> +static void hyperv_txeof(struct hv_tx_queue *txq)
> +{
> +	struct rte_mbuf *mb, *mb_next;
> +
> +	txq->tx_avail += txq->tx_free;
> +	while (txq->tx_free) {
> +		--txq->tx_free;
> +		mb = txq->sw_ring[txq->tx_head];
> +		while (mb) {
> +			mb_next = mb->next;
> +			rte_mempool_put(mb->pool, mb);
> +			mb = mb_next;
> +		}
> +		if (++txq->tx_head == txq->nb_tx_desc)
> +			txq->tx_head = 0;
> +	}
> +}
> +
> +uint16_t
> +hyperv_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t
> nb_pkts)
> +{
> +	struct hv_tx_queue *txq = ptxq;
> +	struct hv_data *hv = txq->hv;
> +	struct netvsc_packet *packet;
> +	struct rte_mbuf *m;
> +	uint32_t data_pages;
> +	uint64_t first_data_page;
> +	uint32_t total_len;
> +	uint32_t len;
> +	uint16_t i, nb_tx;
> +	uint8_t rndis_pages;
> +	int ret;
> +
> +	if (unlikely(hv->closed))
> +		return 0;
> +
> +	for (nb_tx = 0; nb_tx < nb_pkts; ++nb_tx) {
> +		hyperv_scan_comps(hv, 0);
> +		/* Determine if the descriptor ring needs to be cleaned. */
> +		if (txq->tx_free > txq->tx_free_thresh)
> +			hyperv_txeof(txq);
> +
> +		if (!txq->tx_avail) {
> +			hyperv_scan_comps(hv, 1);
> +			hyperv_txeof(txq);
> +			if (!txq->tx_avail) {
> +				PMD_PWARN_LOG(hv, DBG_TX, "No TX
> mbuf available");
> +				break;
> +			}
> +		}
> +		m = tx_pkts[nb_tx];
> +		len = m->data_len;
> +		total_len = m->pkt_len;
> +		first_data_page = RTE_MBUF_DATA_DMA_ADDR(m) >>
> PAGE_SHIFT;
> +		data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) + len - 1)
> >> PAGE_SHIFT) -
> +			first_data_page + 1;
> +
> +		packet = txq->pkts[txq->tx_tail];
> +		rndis_pages = packet->ext_pages;
> +
> +		txq->sw_ring[txq->tx_tail] = m;
> +		packet->tot_data_buf_len = total_len;
> +		packet->page_buffers[rndis_pages].pfn =
> +			RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
> +		packet->page_buffers[rndis_pages].offset =
> +			RTE_MBUF_DATA_DMA_ADDR(m) & (PAGE_SIZE -
> 1);
> +		if (data_pages == 1)
> +			packet->page_buffers[rndis_pages].length = len;
> +		else
> +			packet->page_buffers[rndis_pages].length =
> PAGE_SIZE -
> +				packet->page_buffers[rndis_pages].offset;
> +
> +		for (i = 1; i < data_pages; ++i) {
> +			packet->page_buffers[rndis_pages + i].pfn =
> first_data_page + i;
> +			packet->page_buffers[rndis_pages + i].offset = 0;
> +			packet->page_buffers[rndis_pages + i].length =
> PAGE_SIZE;
> +		}
> +		if (data_pages > 1)
> +			packet->page_buffers[rndis_pages - 1 +
> data_pages].length =
> +				((rte_pktmbuf_mtod(m, unsigned long) + len
> - 1)
> +				 & (PAGE_SIZE - 1)) + 1;
> +
> +		uint16_t index = data_pages + rndis_pages;
> +
> +		for (i = 1; i < m->nb_segs; ++i) {
> +			m = m->next;
> +			len = m->data_len;
> +			first_data_page = RTE_MBUF_DATA_DMA_ADDR(m)
> >> PAGE_SHIFT;
> +			data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) +
> len - 1) >> PAGE_SHIFT) -
> +				first_data_page + 1;
> +			packet->page_buffers[index].pfn =
> +				RTE_MBUF_DATA_DMA_ADDR(m) >>
> PAGE_SHIFT;
> +			packet->page_buffers[index].offset =
> +				rte_pktmbuf_mtod(m, unsigned long)
> +				& (PAGE_SIZE - 1);
> +			packet->page_buffers[index].length = m->data_len;
> +			if (data_pages > 1) {
> +				/* It can be 2 in case of usual mbuf_size=2048
> */
> +				packet->page_buffers[index].length =
> PAGE_SIZE -
> +					packet->page_buffers[index].offset;
> +				packet->page_buffers[++index].offset = 0;
> +				packet->page_buffers[index].pfn =
> +					packet->page_buffers[index - 1].pfn
> + 1;
> +				packet->page_buffers[index].length =
> +					m->data_len
> +					- packet->page_buffers[index -
> 1].length;
> +			}
> +			++index;
> +		}
> +		packet->page_buf_count = index;
> +
> +		ret = hv_rf_on_send(hv, packet);
> +		if (likely(ret == 0)) {
> +			++hv->stats.opkts;
> +			hv->stats.obytes += total_len;
> +			if (++txq->tx_tail == txq->nb_tx_desc)
> +				txq->tx_tail = 0;
> +			--txq->tx_avail;
> +		} else {
> +			++hv->stats.oerrors;
> +			PMD_PERROR_LOG(hv, DBG_TX, "TX ring buffer is
> busy");
> +		}
> +	}
> +
> +	return nb_tx;
> +}
> diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.h
> b/lib/librte_pmd_hyperv/hyperv_rxtx.h
> new file mode 100644
> index 0000000..c45a704
> --- /dev/null
> +++ b/lib/librte_pmd_hyperv/hyperv_rxtx.h
> @@ -0,0 +1,35 @@
> +/*-
> + *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
> + *   All rights reserved.
> + */
> +
> +/**
> + * Structure associated with each TX queue.
> + */
> +struct hv_tx_queue {
> +	struct netvsc_packet    **pkts;
> +	struct rte_mbuf         **sw_ring;
> +	uint16_t                nb_tx_desc;
> +	uint16_t                tx_avail;
> +	uint16_t                tx_head;
> +	uint16_t                tx_tail;
> +	uint16_t                tx_free_thresh;
> +	uint16_t                tx_free;
> +	uint8_t                 port_id;
> +	struct hv_data          *hv;
> +} __rte_cache_aligned;
> +
> +/**
> + * Structure associated with each RX queue.
> + */
> +struct hv_rx_queue {
> +	struct rte_mempool      *mb_pool;
> +	struct rte_mbuf         **sw_ring;
> +	uint16_t                nb_rx_desc;
> +	uint16_t                rx_head;
> +	uint16_t                rx_tail;
> +	uint16_t                rx_free_thresh;
> +	uint8_t                 port_id;
> +	struct hv_data          *hv;
> +	struct hv_vm_packet_descriptor *desc;
> +} __rte_cache_aligned;
> diff --git a/mk/rte.app.mk b/mk/rte.app.mk
> index 62a76ae..e0416d1 100644
> --- a/mk/rte.app.mk
> +++ b/mk/rte.app.mk
> @@ -133,6 +133,10 @@ LDLIBS += -lm
>  LDLIBS += -lrt
>  endif
> 
> +ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
> +LDLIBS += -lrte_pmd_hyperv
> +endif
> +
>  ifeq ($(CONFIG_RTE_LIBRTE_VHOST), y)
>  LDLIBS += -lrte_vhost
>  endif
> --
> 2.1.4
  
Stephen Hemminger April 21, 2015, 9:35 p.m. UTC | #2
On Tue, 21 Apr 2015 19:34:39 +0000
"Butler, Siobhan A" <siobhan.a.butler@intel.com> wrote:

> Hi Stephen 
> Will you have documentation to go along with these changes?
> Thanks
> Siobhan

Unlikely. Microsoft or other contributors might add something
in a later version.

The documentation that exists in DPDK related drivers just
won't scale as more drivers are added. It needs to be massively
simplified and generalized.
  
Thomas Monjalon July 9, 2015, 12:01 a.m. UTC | #3
2015-04-21 14:35, Stephen Hemminger:
> On Tue, 21 Apr 2015 19:34:39 +0000
> "Butler, Siobhan A" <siobhan.a.butler@intel.com> wrote:
> 
> > Hi Stephen 
> > Will you have documentation to go along with these changes?
> > Thanks
> > Siobhan
> 
> Unlikely. Microsoft or other contributors might add something
> in a later version.
> 
> The documentation that exists in DPDK related drivers just
> won't scale as more drivers are added. It needs to be massively
> simplified and generalized.

I'm afraid you'll need to put a rst file in doc/guides/nics/.
At least, you need to describe how to use the specific bus and explains that
it is supported only on Linux with recent kernels.
  
Thomas Monjalon July 9, 2015, 12:05 a.m. UTC | #4
2015-04-21 10:32, Stephen Hemminger:
> From: Stephen Hemminger <shemming@brocade.com>
> 
> This is new Poll Mode driver for using hyper-v virtual network
> interface.
> 
> Signed-off-by: Stas Egorov <segorov@mirantis.com>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
> ---
>  lib/Makefile                          |    1 +
>  lib/librte_pmd_hyperv/Makefile        |   28 +
>  lib/librte_pmd_hyperv/hyperv.h        |  169 ++++
>  lib/librte_pmd_hyperv/hyperv_drv.c    | 1653 +++++++++++++++++++++++++++++++++
>  lib/librte_pmd_hyperv/hyperv_drv.h    |  558 +++++++++++
>  lib/librte_pmd_hyperv/hyperv_ethdev.c |  332 +++++++
>  lib/librte_pmd_hyperv/hyperv_logs.h   |   69 ++
>  lib/librte_pmd_hyperv/hyperv_rxtx.c   |  403 ++++++++
>  lib/librte_pmd_hyperv/hyperv_rxtx.h   |   35 +
>  mk/rte.app.mk                         |    4 +
>  10 files changed, 3252 insertions(+)

Please split in separate patches:
- setup
- Rx
- Tx
- link state
- stats
- promisc
  

Patch

diff --git a/lib/Makefile b/lib/Makefile
index d94355d..6c1daf2 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,6 +47,7 @@  DIRS-$(CONFIG_RTE_LIBRTE_I40E_PMD) += librte_pmd_i40e
 DIRS-$(CONFIG_RTE_LIBRTE_FM10K_PMD) += librte_pmd_fm10k
 DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += librte_pmd_mlx4
 DIRS-$(CONFIG_RTE_LIBRTE_ENIC_PMD) += librte_pmd_enic
+DIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += librte_pmd_hyperv
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += librte_pmd_bond
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_RING) += librte_pmd_ring
 DIRS-$(CONFIG_RTE_LIBRTE_PMD_PCAP) += librte_pmd_pcap
diff --git a/lib/librte_pmd_hyperv/Makefile b/lib/librte_pmd_hyperv/Makefile
new file mode 100644
index 0000000..4ba08c8
--- /dev/null
+++ b/lib/librte_pmd_hyperv/Makefile
@@ -0,0 +1,28 @@ 
+#   BSD LICENSE
+#
+#   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+#   All rights reserved.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_hyperv.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_ethdev.c
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_rxtx.c
+SRCS-$(CONFIG_RTE_LIBRTE_HV_PMD) += hyperv_drv.c
+
+# this lib depends upon:
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_eal lib/librte_ether
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_mempool lib/librte_mbuf
+DEPDIRS-$(CONFIG_RTE_LIBRTE_HV_PMD) += lib/librte_malloc
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/lib/librte_pmd_hyperv/hyperv.h b/lib/librte_pmd_hyperv/hyperv.h
new file mode 100644
index 0000000..5f66d8a
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv.h
@@ -0,0 +1,169 @@ 
+/*-
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _HYPERV_H_
+#define _HYPERV_H_
+
+#include <sys/param.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_memzone.h>
+#include <rte_cycles.h>
+#include <rte_dev.h>
+
+#include "hyperv_logs.h"
+
+#define PAGE_SHIFT		12
+#define PAGE_SIZE		(1 << PAGE_SHIFT)
+
+/*
+ * Tunable ethdev params
+ */
+#define HV_MIN_RX_BUF_SIZE 1024
+#define HV_MAX_RX_PKT_LEN  4096
+#define HV_MAX_MAC_ADDRS   1
+#define HV_MAX_RX_QUEUES   1
+#define HV_MAX_TX_QUEUES   1
+#define HV_MAX_PKT_BURST   32
+#define HV_MAX_LINK_REQ    10
+
+/*
+ * List of resources mapped from kspace
+ * need to be the same as defined in hv_uio.c
+ */
+enum {
+	TXRX_RING_MAP,
+	INT_PAGE_MAP,
+	MON_PAGE_MAP,
+	RECV_BUF_MAP
+};
+
+/*
+ * Statistics
+ */
+struct hv_stats {
+	uint64_t opkts;
+	uint64_t obytes;
+	uint64_t oerrors;
+
+	uint64_t ipkts;
+	uint64_t ibytes;
+	uint64_t ierrors;
+	uint64_t rx_nombuf;
+};
+
+struct hv_data;
+struct netvsc_packet;
+struct rndis_msg;
+typedef void (*receive_callback_t)(struct hv_data *hv, struct rndis_msg *msg,
+		struct netvsc_packet *pkt);
+
+/*
+ * Main driver structure
+ */
+struct hv_data {
+	int vmbus_device;
+	uint8_t monitor_bit;
+	uint8_t monitor_group;
+	uint8_t kernel_initialized;
+	int uio_fd;
+	/* Flag indicates channel state. If closed, RX/TX shouldn't work further */
+	uint8_t closed;
+	/* Flag indicates whether HALT rndis request was received by host */
+	uint8_t hlt_req_sent;
+	/* Flag indicates pending state for HALT request */
+	uint8_t hlt_req_pending;
+	/* Counter for RNDIS requests */
+	uint32_t new_request_id;
+	/* State of RNDIS device */
+	uint8_t rndis_dev_state;
+	/* Number of transmitted packets but not completed yet by Hyper-V */
+	int num_outstanding_sends;
+	/* Max pkt len to fit in rx mbufs */
+	uint32_t max_rx_pkt_len;
+
+	uint8_t jumbo_frame_support;
+
+	struct hv_vmbus_ring_buffer *in;
+	struct hv_vmbus_ring_buffer *out;
+
+	/* Size of each ring_buffer(in/out) */
+	uint32_t rb_size;
+	/* Size of data in each ring_buffer(in/out) */
+	uint32_t rb_data_size;
+
+	void *int_page;
+	struct hv_vmbus_monitor_page *monitor_pages;
+	void *recv_interrupt_page;
+	void *send_interrupt_page;
+	void *ring_pages;
+	void *recv_buf;
+
+	uint8_t link_req_cnt;
+	uint32_t link_status;
+	uint8_t  hw_mac_addr[ETHER_ADDR_LEN];
+	struct rndis_request *req;
+	struct netvsc_packet *netvsc_packet;
+	struct nvsp_msg *rx_comp_msg;
+	struct hv_rx_queue *rxq;
+	struct hv_tx_queue *txq;
+	struct hv_vm_packet_descriptor *desc;
+	receive_callback_t receive_callback;
+	int pkt_rxed;
+
+	uint32_t debug;
+	struct hv_stats stats;
+};
+
+/*
+ * Extern functions declarations
+ */
+int hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
+			 uint16_t queue_idx,
+			 uint16_t nb_desc,
+			 unsigned int socket_id,
+			 const struct rte_eth_txconf *tx_conf);
+
+void hyperv_dev_tx_queue_release(void *ptxq);
+
+int hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
+			 uint16_t queue_idx,
+			 uint16_t nb_desc,
+			 unsigned int socket_id,
+			 const struct rte_eth_rxconf *rx_conf,
+			 struct rte_mempool *mp);
+
+void hyperv_dev_rx_queue_release(void *prxq);
+
+uint16_t
+hyperv_recv_pkts(void *prxq,
+		 struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
+
+uint16_t
+hyperv_xmit_pkts(void *ptxq,
+		 struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+int hv_rf_on_device_add(struct hv_data *hv);
+int hv_rf_on_device_remove(struct hv_data *hv);
+int hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt);
+int hv_rf_on_open(struct hv_data *hv);
+int hv_rf_on_close(struct hv_data *hv);
+int hv_rf_set_device_mac(struct hv_data *hv, uint8_t *mac);
+void hyperv_start_rx(struct hv_data *hv);
+void hyperv_stop_rx(struct hv_data *hv);
+int hyperv_get_buffer(struct hv_data *hv, void *buffer, uint32_t bufferlen);
+void hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop);
+uint8_t hyperv_get_link_status(struct hv_data *hv);
+int hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t mcast);
+
+inline int rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link);
+inline int rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link);
+
+#endif /* _HYPERV_H_ */
diff --git a/lib/librte_pmd_hyperv/hyperv_drv.c b/lib/librte_pmd_hyperv/hyperv_drv.c
new file mode 100644
index 0000000..4a37966
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_drv.c
@@ -0,0 +1,1653 @@ 
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include "hyperv.h"
+#include "hyperv_drv.h"
+#include "hyperv_rxtx.h"
+
+#define LOOP_CNT 10000
+#define MAC_STRLEN 14
+#define MAC_PARAM_STR "NetworkAddress"
+
+#define hex "0123456789abcdef"
+#define high(x) hex[(x & 0xf0) >> 4]
+#define low(x) hex[x & 0x0f]
+
+static int hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt);
+
+/*
+ * Ring buffer
+ */
+
+/* Amount of space to write to */
+#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) \
+	(((w) >= (r)) ? ((z) - ((w) - (r))) : ((r) - (w)))
+
+/*
+ * Get number of bytes available to read and to write to
+ * for the specified ring buffer
+ */
+static inline void
+get_ring_buffer_avail_bytes(
+	struct hv_data               *hv,
+	struct hv_vmbus_ring_buffer  *ring_buffer,
+	uint32_t                     *read,
+	uint32_t                     *write)
+{
+	rte_compiler_barrier();
+
+	/*
+	 * Capture the read/write indices before they changed
+	 */
+	uint32_t read_loc = ring_buffer->read_index;
+	uint32_t write_loc = ring_buffer->write_index;
+
+	*write = HV_BYTES_AVAIL_TO_WRITE(
+			read_loc, write_loc, hv->rb_data_size);
+	*read = hv->rb_data_size - *write;
+}
+
+/*
+ * Helper routine to copy from source to ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in dest case only!
+ */
+static uint32_t
+copy_to_ring_buffer(
+	struct hv_vmbus_ring_buffer  *ring_buffer,
+	uint32_t                     ring_buffer_size,
+	uint32_t                     start_write_offset,
+	char                         *src,
+	uint32_t                     src_len)
+{
+	char *ring_buf = (char *)ring_buffer->buffer;
+	uint32_t fragLen;
+
+	if (src_len > ring_buffer_size - start_write_offset)  {
+		/* wrap-around detected! */
+		fragLen = ring_buffer_size - start_write_offset;
+		rte_memcpy(ring_buf + start_write_offset, src, fragLen);
+		rte_memcpy(ring_buf, src + fragLen, src_len - fragLen);
+	} else {
+		rte_memcpy(ring_buf + start_write_offset, src, src_len);
+	}
+
+	start_write_offset += src_len;
+	start_write_offset %= ring_buffer_size;
+
+	return start_write_offset;
+}
+
+/*
+ * Helper routine to copy to dest from ring buffer.
+ *
+ * Assume there is enough room. Handles wrap-around in src case only!
+ */
+static uint32_t
+copy_from_ring_buffer(
+	struct hv_data               *hv,
+	struct hv_vmbus_ring_buffer  *ring_buffer,
+	char                         *dest,
+	uint32_t                     dest_len,
+	uint32_t                     start_read_offset)
+{
+	uint32_t fragLen;
+	char *ring_buf = (char *)ring_buffer->buffer;
+
+	if (dest_len > hv->rb_data_size - start_read_offset) {
+		/*  wrap-around detected at the src */
+		fragLen = hv->rb_data_size - start_read_offset;
+		rte_memcpy(dest, ring_buf + start_read_offset, fragLen);
+		rte_memcpy(dest + fragLen, ring_buf, dest_len - fragLen);
+	} else {
+		rte_memcpy(dest, ring_buf + start_read_offset, dest_len);
+	}
+
+	start_read_offset += dest_len;
+	start_read_offset %= hv->rb_data_size;
+
+	return start_read_offset;
+}
+
+/*
+ * Write to the ring buffer.
+ */
+static int
+hv_ring_buffer_write(
+	struct hv_data                 *hv,
+	struct hv_vmbus_sg_buffer_list sg_buffers[],
+	uint32_t                       sg_buffer_count)
+{
+	struct hv_vmbus_ring_buffer *ring_buffer = hv->out;
+	uint32_t i = 0;
+	uint32_t byte_avail_to_write;
+	uint32_t byte_avail_to_read;
+	uint32_t total_bytes_to_write = 0;
+	volatile uint32_t next_write_location;
+	uint64_t prev_indices = 0;
+
+	for (i = 0; i < sg_buffer_count; i++)
+		total_bytes_to_write += sg_buffers[i].length;
+
+	total_bytes_to_write += sizeof(uint64_t);
+
+	get_ring_buffer_avail_bytes(hv, ring_buffer, &byte_avail_to_read,
+			&byte_avail_to_write);
+
+	/*
+	 * If there is only room for the packet, assume it is full.
+	 * Otherwise, the next time around, we think the ring buffer
+	 * is empty since the read index == write index
+	 */
+	if (byte_avail_to_write <= total_bytes_to_write) {
+		PMD_PERROR_LOG(hv, DBG_RB,
+				"byte_avail_to_write = %u, total_bytes_to_write = %u",
+				byte_avail_to_write, total_bytes_to_write);
+		return -EAGAIN;
+	}
+
+	/*
+	 * Write to the ring buffer
+	 */
+	next_write_location = ring_buffer->write_index;
+
+	for (i = 0; i < sg_buffer_count; i++) {
+		next_write_location = copy_to_ring_buffer(ring_buffer,
+				hv->rb_data_size, next_write_location,
+				(char *) sg_buffers[i].data, sg_buffers[i].length);
+	}
+
+	/*
+	 * Set previous packet start
+	 */
+	prev_indices = (uint64_t)ring_buffer->write_index << 32;
+
+	next_write_location = copy_to_ring_buffer(
+			ring_buffer, hv->rb_data_size, next_write_location,
+			(char *) &prev_indices, sizeof(uint64_t));
+
+	/*
+	 * Make sure we flush all writes before updating the writeIndex
+	 */
+	rte_compiler_barrier();
+
+	/*
+	 * Now, update the write location
+	 */
+	ring_buffer->write_index = next_write_location;
+
+	return 0;
+}
+
+/*
+ * Read without advancing the read index.
+ */
+static int
+hv_ring_buffer_peek(struct hv_data  *hv, void *buffer, uint32_t buffer_len)
+{
+	struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
+	uint32_t bytesAvailToWrite;
+	uint32_t bytesAvailToRead;
+
+	get_ring_buffer_avail_bytes(hv, ring_buffer,
+			&bytesAvailToRead,
+			&bytesAvailToWrite);
+
+	/* Make sure there is something to read */
+	if (bytesAvailToRead < buffer_len)
+		return -EAGAIN;
+
+	copy_from_ring_buffer(hv, ring_buffer,
+		(char *)buffer, buffer_len, ring_buffer->read_index);
+
+	return 0;
+}
+
+/*
+ * Read and advance the read index.
+ */
+static int
+hv_ring_buffer_read(struct hv_data  *hv, void *buffer,
+		    uint32_t buffer_len, uint32_t offset)
+{
+	struct hv_vmbus_ring_buffer *ring_buffer = hv->in;
+	uint32_t bytes_avail_to_write;
+	uint32_t bytes_avail_to_read;
+	uint32_t next_read_location = 0;
+	uint64_t prev_indices = 0;
+
+	if (buffer_len <= 0)
+		return -EINVAL;
+
+	get_ring_buffer_avail_bytes(
+			hv,
+			ring_buffer,
+			&bytes_avail_to_read,
+			&bytes_avail_to_write);
+
+	/*
+	 * Make sure there is something to read
+	 */
+	if (bytes_avail_to_read < buffer_len) {
+		PMD_PERROR_LOG(hv, DBG_RB, "bytes_avail_to_read = %u, buffer_len = %u",
+				bytes_avail_to_read, buffer_len);
+		return -EAGAIN;
+	}
+
+	next_read_location = (ring_buffer->read_index + offset) % hv->rb_data_size;
+
+	next_read_location = copy_from_ring_buffer(
+			hv,
+			ring_buffer,
+			(char *) buffer,
+			buffer_len,
+			next_read_location);
+
+	next_read_location = copy_from_ring_buffer(
+			hv,
+			ring_buffer,
+			(char *) &prev_indices,
+			sizeof(uint64_t),
+			next_read_location);
+
+	/*
+	 * Make sure all reads are done before we update the read index since
+	 * the writer may start writing to the read area once the read index
+	 * is updated.
+	 */
+	rte_compiler_barrier();
+
+	/*
+	 * Update the read index
+	 */
+	ring_buffer->read_index = next_read_location;
+
+	return 0;
+}
+
+/*
+ * VMBus
+ */
+
+/*
+ * Retrieve the raw packet on the specified channel
+ */
+static int
+hv_vmbus_channel_recv_packet_raw(struct hv_data  *hv, void *buffer,
+				 uint32_t        buffer_len,
+				 uint32_t        *buffer_actual_len,
+				 uint64_t        *request_id,
+				 int             mode)
+{
+	int ret;
+	uint32_t packetLen;
+	struct hv_vm_packet_descriptor desc;
+
+	*buffer_actual_len = 0;
+	*request_id = 0;
+
+	ret = hv_ring_buffer_peek(hv, &desc,
+			sizeof(struct hv_vm_packet_descriptor));
+
+	if (ret != 0)
+		return 0;
+
+	if ((desc.type == HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES
+				&& !(mode & 1)) ||
+			((desc.type == HV_VMBUS_PACKET_TYPE_COMPLETION) && !(mode & 2))) {
+		return -1;
+	}
+
+	packetLen = desc.length8 << 3;
+
+	*buffer_actual_len = packetLen;
+
+	if (unlikely(packetLen > buffer_len)) {
+		PMD_PERROR_LOG(hv, DBG_RX, "The buffer desc is too big, will drop it");
+		return -ENOMEM;
+	}
+
+	*request_id = desc.transaction_id;
+
+	/* Copy over the entire packet to the user buffer */
+	ret = hv_ring_buffer_read(hv, buffer, packetLen, 0);
+
+	return 0;
+}
+
+/*
+ * Trigger an event notification on the specified channel
+ */
+static void
+vmbus_channel_set_event(struct hv_data *hv)
+{
+	/* Here we assume that channel->offer_msg.monitor_allocated == 1,
+	 * in another case our driver will not work */
+	/* Each uint32_t represents 32 channels */
+	__sync_or_and_fetch(((uint32_t *)hv->send_interrupt_page
+		+ ((hv->vmbus_device >> 5))), 1 << (hv->vmbus_device & 31)
+	);
+	__sync_or_and_fetch((uint32_t *)&hv->monitor_pages->
+			trigger_group[hv->monitor_group].u.pending, 1 << hv->monitor_bit);
+}
+
+/**
+ * @brief Send the specified buffer on the given channel
+ */
+static int
+hv_vmbus_channel_send_packet(struct hv_data *hv, void *buffer,
+			     uint32_t buffer_len, uint64_t request_id,
+			     enum hv_vmbus_packet_type type,
+			     uint32_t flags)
+{
+	struct hv_vmbus_sg_buffer_list buffer_list[3];
+	struct hv_vm_packet_descriptor desc;
+	uint32_t packet_len_aligned;
+	uint64_t aligned_data;
+	uint32_t packet_len;
+	int ret = 0;
+	uint32_t old_write = hv->out->write_index;
+
+	packet_len = sizeof(struct hv_vm_packet_descriptor) + buffer_len;
+	packet_len_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+	aligned_data = 0;
+
+	/* Setup the descriptor */
+	desc.type = type;   /* HV_VMBUS_PACKET_TYPE_DATA_IN_BAND;             */
+	desc.flags = flags; /* HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED */
+	/* in 8-bytes granularity */
+	desc.data_offset8 = sizeof(struct hv_vm_packet_descriptor) >> 3;
+	desc.length8 = (uint16_t) (packet_len_aligned >> 3);
+	desc.transaction_id = request_id;
+
+	buffer_list[0].data = &desc;
+	buffer_list[0].length = sizeof(struct hv_vm_packet_descriptor);
+
+	buffer_list[1].data = buffer;
+	buffer_list[1].length = buffer_len;
+
+	buffer_list[2].data = &aligned_data;
+	buffer_list[2].length = packet_len_aligned - packet_len;
+
+	ret = hv_ring_buffer_write(hv, buffer_list, 3);
+
+	rte_mb();
+	if (!ret && !hv->out->interrupt_mask && hv->out->read_index == old_write)
+		vmbus_channel_set_event(hv);
+
+	return ret;
+}
+
+/*
+ * Send a range of single-page buffer packets using
+ * a GPADL Direct packet type
+ */
+static int
+hv_vmbus_channel_send_packet_pagebuffer(
+	struct hv_data  *hv,
+	struct hv_vmbus_page_buffer	page_buffers[],
+	uint32_t		page_count,
+	void			*buffer,
+	uint32_t		buffer_len,
+	uint64_t		request_id)
+{
+
+	int ret = 0;
+	uint32_t packet_len, packetLen_aligned, descSize, i = 0;
+	struct hv_vmbus_sg_buffer_list buffer_list[3];
+	struct hv_vmbus_channel_packet_page_buffer desc;
+	uint64_t alignedData = 0;
+	uint32_t old_write = hv->out->write_index;
+
+	if (page_count > HV_MAX_PAGE_BUFFER_COUNT) {
+		PMD_PERROR_LOG(hv, DBG_VMBUS, "page_count %u goes out of the limit",
+				page_count);
+		return -EINVAL;
+	}
+
+	/*
+	 * Adjust the size down since hv_vmbus_channel_packet_page_buffer
+	 * is the largest size we support
+	 */
+	descSize = sizeof(struct hv_vmbus_channel_packet_page_buffer) -
+		((HV_MAX_PAGE_BUFFER_COUNT - page_count) *
+		 sizeof(struct hv_vmbus_page_buffer));
+	packet_len = descSize + buffer_len;
+	packetLen_aligned = HV_ALIGN_UP(packet_len, sizeof(uint64_t));
+
+	/* Setup the descriptor */
+	desc.type = HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT;
+	desc.flags = HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED;
+	desc.data_offset8 = descSize >> 3; /* in 8-bytes granularity */
+	desc.length8 = (uint16_t) (packetLen_aligned >> 3);
+	desc.transaction_id = request_id;
+	desc.range_count = page_count;
+
+	for (i = 0; i < page_count; i++) {
+		desc.range[i].length = page_buffers[i].length;
+		desc.range[i].offset = page_buffers[i].offset;
+		desc.range[i].pfn = page_buffers[i].pfn;
+	}
+
+	buffer_list[0].data = &desc;
+	buffer_list[0].length = descSize;
+
+	buffer_list[1].data = buffer;
+	buffer_list[1].length = buffer_len;
+
+	buffer_list[2].data = &alignedData;
+	buffer_list[2].length = packetLen_aligned - packet_len;
+
+	ret = hv_ring_buffer_write(hv, buffer_list, 3);
+	if (likely(ret == 0))
+		++hv->num_outstanding_sends;
+
+	rte_mb();
+	if (!ret && !hv->out->interrupt_mask &&
+			hv->out->read_index == old_write)
+		vmbus_channel_set_event(hv);
+
+	return ret;
+}
+
+/*
+ * NetVSC
+ */
+
+/*
+ * Net VSC on send
+ * Sends a packet on the specified Hyper-V device.
+ * Returns 0 on success, non-zero on failure.
+ */
+static int
+hv_nv_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
+{
+	struct nvsp_msg send_msg;
+	int ret;
+
+	send_msg.msg_type = nvsp_msg_1_type_send_rndis_pkt;
+	if (pkt->is_data_pkt) {
+		/* 0 is RMC_DATA */
+		send_msg.msgs.send_rndis_pkt.chan_type = 0;
+	} else {
+		/* 1 is RMC_CONTROL */
+		send_msg.msgs.send_rndis_pkt.chan_type = 1;
+	}
+
+	/* Not using send buffer section */
+	send_msg.msgs.send_rndis_pkt.send_buf_section_idx =
+	    0xFFFFFFFF;
+	send_msg.msgs.send_rndis_pkt.send_buf_section_size = 0;
+
+	if (likely(pkt->page_buf_count)) {
+		ret = hv_vmbus_channel_send_packet_pagebuffer(hv,
+				pkt->page_buffers, pkt->page_buf_count,
+				&send_msg, sizeof(struct nvsp_msg),
+				(uint64_t)pkt->is_data_pkt ? (hv->txq->tx_tail + 1) : 0);
+	} else {
+		PMD_PERROR_LOG(hv, DBG_TX, "pkt->page_buf_count value can't be zero");
+		ret = -1;
+	}
+
+	return ret;
+}
+
+/*
+ * Net VSC on receive
+ *
+ * This function deals exclusively with virtual addresses.
+ */
+static void
+hv_nv_on_receive(struct hv_data *hv, struct hv_vm_packet_descriptor *pkt)
+{
+	struct hv_vm_transfer_page_packet_header *vm_xfer_page_pkt;
+	struct nvsp_msg *nvsp_msg_pkt;
+	struct netvsc_packet *net_vsc_pkt = NULL;
+	unsigned long start;
+	int count, i;
+
+	nvsp_msg_pkt = (struct nvsp_msg *)((unsigned long)pkt
+			+ (pkt->data_offset8 << 3));
+
+	/* Make sure this is a valid nvsp packet */
+	if (unlikely(nvsp_msg_pkt->msg_type != nvsp_msg_1_type_send_rndis_pkt)) {
+		PMD_PERROR_LOG(hv, DBG_RX, "NVSP packet is not valid");
+		return;
+	}
+
+	vm_xfer_page_pkt = (struct hv_vm_transfer_page_packet_header *)pkt;
+
+	if (unlikely(vm_xfer_page_pkt->transfer_page_set_id
+			!= NETVSC_RECEIVE_BUFFER_ID)) {
+		PMD_PERROR_LOG(hv, DBG_RX, "transfer_page_set_id is not valid");
+		return;
+	}
+
+	count = vm_xfer_page_pkt->range_count;
+
+	/*
+	 * Initialize the netvsc packet
+	 */
+	for (i = 0; i < count; ++i) {
+		net_vsc_pkt = hv->netvsc_packet;
+
+		net_vsc_pkt->tot_data_buf_len =
+			vm_xfer_page_pkt->ranges[i].byte_count;
+		net_vsc_pkt->page_buf_count = 1;
+
+		net_vsc_pkt->page_buffers[0].length =
+			vm_xfer_page_pkt->ranges[i].byte_count;
+
+		/* The virtual address of the packet in the receive buffer */
+		start = ((unsigned long)hv->recv_buf +
+				vm_xfer_page_pkt->ranges[i].byte_offset);
+
+		/* Page number of the virtual page containing packet start */
+		net_vsc_pkt->page_buffers[0].pfn = start >> PAGE_SHIFT;
+
+		/* Calculate the page relative offset */
+		net_vsc_pkt->page_buffers[0].offset =
+			vm_xfer_page_pkt->ranges[i].byte_offset & (PAGE_SIZE - 1);
+
+		/*
+		 * In this implementation, we are dealing with virtual
+		 * addresses exclusively.  Since we aren't using physical
+		 * addresses at all, we don't care if a packet crosses a
+		 * page boundary.  For this reason, the original code to
+		 * check for and handle page crossings has been removed.
+		 */
+
+		/*
+		 * Pass it to the upper layer.  The receive completion call
+		 * has been moved into this function.
+		 */
+		hv_rf_on_receive(hv, net_vsc_pkt);
+	}
+	/* Send a receive completion packet to RNDIS device (ie NetVsp) */
+	hv_vmbus_channel_send_packet(hv, hv->rx_comp_msg, sizeof(struct nvsp_msg),
+			vm_xfer_page_pkt->d.transaction_id,
+			HV_VMBUS_PACKET_TYPE_COMPLETION, 0);
+}
+
+/*
+ * Net VSC on send completion
+ */
+static void
+hv_nv_on_send_completion(struct hv_data *hv, struct hv_vm_packet_descriptor *pkt)
+{
+	struct nvsp_msg *nvsp_msg_pkt;
+
+	nvsp_msg_pkt =
+	    (struct nvsp_msg *)((unsigned long)pkt + (pkt->data_offset8 << 3));
+
+	if (likely(nvsp_msg_pkt->msg_type ==
+				nvsp_msg_1_type_send_rndis_pkt_complete)) {
+
+		if (unlikely(hv->hlt_req_pending))
+			hv->hlt_req_sent = 1;
+		else
+			if (pkt->transaction_id)
+				++hv->txq->tx_free;
+		--hv->num_outstanding_sends;
+		return;
+	}
+	PMD_PINFO_LOG(hv, DBG_TX, "unhandled completion (for kernel req or so)");
+}
+
+/*
+ * Analogue of bsd hv_nv_on_channel_callback
+ */
+static void
+hv_nv_complete_request(struct hv_data *hv, struct rndis_request *request)
+{
+	uint32_t bytes_rxed, cnt = 0;
+	uint64_t request_id;
+	struct hv_vm_packet_descriptor *desc;
+	uint8_t *buffer;
+	int     bufferlen = NETVSC_PACKET_SIZE;
+	int     ret = 0;
+
+	PMD_INIT_FUNC_TRACE();
+
+	hv->req = request;
+
+	buffer = rte_malloc(NULL, bufferlen, RTE_CACHE_LINE_SIZE);
+	if (!buffer) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "failed to allocate packet");
+		return;
+	}
+
+	do {
+		rte_delay_us(1);
+		ret = hv_vmbus_channel_recv_packet_raw(hv,
+				buffer, bufferlen, &bytes_rxed, &request_id, 3);
+		if (ret == 0) {
+			if (bytes_rxed > 0) {
+				desc = (struct hv_vm_packet_descriptor *)buffer;
+
+				switch (desc->type) {
+				case HV_VMBUS_PACKET_TYPE_COMPLETION:
+					hv_nv_on_send_completion(hv, desc);
+					break;
+				case HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES:
+					hv_nv_on_receive(hv, desc);
+					break;
+				default:
+					break;
+				}
+				PMD_PDEBUG_LOG(hv, DBG_LOAD,
+					       "Did %d attempts until non-empty data was receieved",
+					       cnt);
+				cnt = 0;
+			} else {
+				cnt++;
+			}
+		} else if (ret == -ENOMEM) {
+			/* Handle large packet */
+			PMD_PDEBUG_LOG(hv, DBG_LOAD,
+				       "recv_packet_raw returned -ENOMEM");
+			rte_free(buffer);
+			buffer = rte_malloc(NULL, bytes_rxed, RTE_CACHE_LINE_SIZE);
+			if (buffer == NULL) {
+				PMD_PERROR_LOG(hv, DBG_LOAD, "failed to allocate buffer");
+				break;
+			}
+			bufferlen = bytes_rxed;
+		} else {
+			PMD_PERROR_LOG(hv, DBG_LOAD, "Unexpected return code (%d)", ret);
+		}
+		if (!hv->req) {
+			PMD_PINFO_LOG(hv, DBG_LOAD, "Single request processed");
+			break;
+		}
+		if (cnt >= LOOP_CNT) {
+			PMD_PERROR_LOG(hv, DBG_LOAD, "Emergency break from the loop");
+			break;
+		}
+		if (hv->hlt_req_sent) {
+			PMD_PINFO_LOG(hv, DBG_LOAD, "Halt request processed");
+			break;
+		}
+		/* The field hv->req->response_msg.ndis_msg_type
+		 * should be set to non-zero value when response received
+		 */
+	} while (!hv->req->response_msg.ndis_msg_type);
+
+	rte_free(buffer);
+}
+
+/*
+ * RNDIS
+ */
+
+/*
+ * Create new RNDIS request
+ */
+static inline struct rndis_request *
+hv_rndis_request(struct hv_data *hv, uint32_t message_type,
+		uint32_t message_length)
+{
+	struct rndis_request *request;
+	struct rndis_msg *rndis_mesg;
+	struct rndis_set_request *set;
+	char mz_name[RTE_MEMZONE_NAMESIZE];
+	uint32_t size;
+
+	PMD_INIT_FUNC_TRACE();
+
+	request = rte_zmalloc("rndis_req", sizeof(struct rndis_request),
+			      RTE_CACHE_LINE_SIZE);
+
+	if (!request)
+		return NULL;
+
+	sprintf(mz_name, "hv_%d_%u_%d_%p", hv->vmbus_device, message_type,
+			hv->new_request_id, request);
+
+	size = MAX(message_length, sizeof(struct rndis_msg));
+
+	request->request_msg_memzone = rte_memzone_reserve_aligned(mz_name,
+			size, rte_lcore_to_socket_id(rte_lcore_id()), 0, PAGE_SIZE);
+	if (!request->request_msg_memzone) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "memzone_reserve failed");
+		rte_free(request);
+		return NULL;
+	}
+	request->request_msg = request->request_msg_memzone->addr;
+	rndis_mesg = request->request_msg;
+	rndis_mesg->ndis_msg_type = message_type;
+	rndis_mesg->msg_len = message_length;
+
+	/*
+	 * Set the request id. This field is always after the rndis header
+	 * for request/response packet types so we just use the set_request
+	 * as a template.
+	 */
+	set = &rndis_mesg->msg.set_request;
+	hv->new_request_id++;
+	set->request_id = hv->new_request_id;
+
+	return request;
+}
+
+/*
+ * RNDIS filter
+ */
+
+static void
+hv_rf_receive_response(
+	struct hv_data       *hv,
+	struct rndis_msg     *response)
+{
+	struct rndis_request *request = hv->req;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (response->msg_len <= sizeof(struct rndis_msg)) {
+		rte_memcpy(&request->response_msg, response,
+				response->msg_len);
+	} else {
+		if (response->ndis_msg_type == REMOTE_NDIS_INITIALIZE_CMPLT) {
+			request->response_msg.msg.init_complete.status =
+				STATUS_BUFFER_OVERFLOW;
+		}
+		PMD_PERROR_LOG(hv, DBG_LOAD, "response buffer overflow\n");
+	}
+}
+
+/*
+ * RNDIS filter receive indicate status
+ */
+static void
+hv_rf_receive_indicate_status(struct hv_data *hv, struct rndis_msg *response)
+{
+	struct rndis_indicate_status *indicate = &response->msg.indicate_status;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (indicate->status == RNDIS_STATUS_MEDIA_CONNECT)
+		hv->link_status = 1;
+	else if (indicate->status == RNDIS_STATUS_MEDIA_DISCONNECT)
+		hv->link_status = 0;
+	else if (indicate->status == RNDIS_STATUS_INVALID_DATA)
+		PMD_PERROR_LOG(hv, DBG_RX, "Invalid data in RNDIS message");
+	else
+		PMD_PERROR_LOG(hv, DBG_RX, "Unsupported status: %u", indicate->status);
+}
+
+/*
+ * RNDIS filter receive data
+ */
+static void
+hv_rf_receive_data(struct hv_data *hv, struct rndis_msg *msg,
+		struct netvsc_packet *pkt)
+{
+	struct rte_mbuf *m_new;
+	struct hv_rx_queue *rxq = hv->rxq;
+	struct rndis_packet *rndis_pkt;
+	uint32_t data_offset;
+
+	if (unlikely(hv->closed))
+		return;
+
+	rndis_pkt = &msg->msg.packet;
+
+	if (unlikely(hv->max_rx_pkt_len < rndis_pkt->data_length)) {
+		PMD_PWARN_LOG(hv, DBG_RX, "Packet is too large (%db), dropping.",
+				rndis_pkt->data_length);
+		++hv->stats.ierrors;
+		return;
+	}
+
+	/* Remove rndis header, then pass data packet up the stack */
+	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
+
+	/* L2 frame length, with L2 header, not including CRC */
+	pkt->tot_data_buf_len        = rndis_pkt->data_length;
+	pkt->page_buffers[0].offset += data_offset;
+	/* Buffer length now L2 frame length plus trailing junk */
+	pkt->page_buffers[0].length -= data_offset;
+
+	pkt->vlan_tci = 0;
+
+	/*
+	 * Just put data into appropriate mbuf, all further work will be done
+	 * by the upper layer (mbuf replacement, index adjustment, etc)
+	 */
+	m_new = rxq->sw_ring[rxq->rx_tail];
+	if (++rxq->rx_tail == rxq->nb_rx_desc)
+		rxq->rx_tail = 0;
+
+	/*
+	 * Copy the received packet to mbuf.
+	 * The copy is required since the memory pointed to by netvsc_packet
+	 * cannot be reallocated
+	 */
+	uint8_t *vaddr = (uint8_t *)
+		(pkt->page_buffers[0].pfn << PAGE_SHIFT)
+		+ pkt->page_buffers[0].offset;
+
+	m_new->nb_segs = 1;
+	m_new->pkt_len = m_new->data_len = pkt->tot_data_buf_len;
+	rte_memcpy(rte_pktmbuf_mtod(m_new, void *), vaddr, m_new->data_len);
+
+	if (pkt->vlan_tci) {
+		m_new->vlan_tci = pkt->vlan_tci;
+		m_new->ol_flags |= PKT_RX_VLAN_PKT;
+	}
+
+	hv->pkt_rxed = 1;
+}
+
+/*
+ * RNDIS filter receive data, jumbo frames support
+ */
+static void
+hv_rf_receive_data_sg(struct hv_data *hv, struct rndis_msg *msg,
+		struct netvsc_packet *pkt)
+{
+	struct rte_mbuf *m_new;
+	struct hv_rx_queue *rxq = hv->rxq;
+	struct rndis_packet *rndis_pkt;
+	uint32_t data_offset;
+
+	if (unlikely(hv->closed))
+		return;
+
+	rndis_pkt = &msg->msg.packet;
+
+	/* Remove rndis header, then pass data packet up the stack */
+	data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
+
+	/* L2 frame length, with L2 header, not including CRC */
+	pkt->tot_data_buf_len        = rndis_pkt->data_length;
+	pkt->page_buffers[0].offset += data_offset;
+	/* Buffer length now L2 frame length plus trailing junk */
+	pkt->page_buffers[0].length -= data_offset;
+
+	pkt->vlan_tci = 0;
+
+	/*
+	 * Just put data into appropriate mbuf, all further work will be done
+	 * by the upper layer (mbuf replacement, index adjustment, etc)
+	 */
+	m_new = rxq->sw_ring[rxq->rx_tail];
+	if (++rxq->rx_tail == rxq->nb_rx_desc)
+		rxq->rx_tail = 0;
+
+	/*
+	 * Copy the received packet to mbuf.
+	 * The copy is required since the memory pointed to by netvsc_packet
+	 * cannot be reallocated
+	 */
+	uint8_t *vaddr = (uint8_t *)
+		(pkt->page_buffers[0].pfn << PAGE_SHIFT)
+		+ pkt->page_buffers[0].offset;
+
+	/* Scatter-gather emulation */
+	uint32_t carry_len = pkt->tot_data_buf_len;
+	struct rte_mbuf *m_next;
+
+	m_new->pkt_len = carry_len;
+	m_new->nb_segs = (carry_len - 1) / hv->max_rx_pkt_len + 1;
+
+	while (1) {
+		m_new->data_len = MIN(carry_len, hv->max_rx_pkt_len);
+		rte_memcpy(rte_pktmbuf_mtod(m_new, void *),
+			   vaddr, m_new->data_len);
+		vaddr += m_new->data_len;
+
+		if (carry_len <= hv->max_rx_pkt_len)
+			break;
+
+		carry_len -= hv->max_rx_pkt_len;
+		m_next = rxq->sw_ring[rxq->rx_tail];
+		if (++rxq->rx_tail == rxq->nb_rx_desc)
+			rxq->rx_tail = 0;
+		m_new->next = m_next;
+		m_new = m_next;
+	}
+
+	if (pkt->vlan_tci) {
+		m_new->vlan_tci = pkt->vlan_tci;
+		m_new->ol_flags |= PKT_RX_VLAN_PKT;
+	}
+
+	hv->pkt_rxed = 1;
+}
+
+static int
+hv_rf_send_request(struct hv_data *hv, struct rndis_request *request)
+{
+	struct netvsc_packet *packet;
+
+	PMD_INIT_FUNC_TRACE();
+	/* Set up the packet to send it */
+	packet = &request->pkt;
+
+	packet->is_data_pkt = 0;
+	packet->tot_data_buf_len = request->request_msg->msg_len;
+	packet->page_buf_count = 1;
+
+	packet->page_buffers[0].pfn =
+		(request->request_msg_memzone->phys_addr) >> PAGE_SHIFT;
+	packet->page_buffers[0].length = request->request_msg->msg_len;
+	packet->page_buffers[0].offset =
+	    (unsigned long)request->request_msg & (PAGE_SIZE - 1);
+
+	return hv_nv_on_send(hv, packet);
+}
+
+static void u8_to_u16(const char *src, int len, char *dst)
+{
+	int i;
+
+	for (i = 0; i < len; ++i) {
+		dst[2 * i] = src[i];
+		dst[2 * i + 1] = 0;
+	}
+}
+
+int
+hv_rf_set_device_mac(struct hv_data *hv, uint8_t *macaddr)
+{
+	struct rndis_request *request;
+	struct rndis_set_request *set_request;
+	struct rndis_config_parameter_info *info;
+	struct rndis_set_complete *set_complete;
+	char mac_str[2*ETHER_ADDR_LEN+1];
+	wchar_t *param_value, *param_name;
+	uint32_t status;
+	uint32_t message_len = sizeof(struct rndis_config_parameter_info) +
+		2 * MAC_STRLEN + 4 * ETHER_ADDR_LEN;
+	int ret, i;
+
+	request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
+		RNDIS_MESSAGE_SIZE(struct rndis_set_request) + message_len);
+	if (!request)
+		return -ENOMEM;
+
+	set_request = &request->request_msg->msg.set_request;
+	set_request->oid = RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER;
+	set_request->device_vc_handle = 0;
+	set_request->info_buffer_offset = sizeof(struct rndis_set_request);
+	set_request->info_buffer_length = message_len;
+
+	info = (struct rndis_config_parameter_info *)((ulong)set_request +
+		set_request->info_buffer_offset);
+	info->parameter_type = RNDIS_CONFIG_PARAM_TYPE_STRING;
+	info->parameter_name_offset =
+		sizeof(struct rndis_config_parameter_info);
+	info->parameter_name_length = 2 * MAC_STRLEN;
+	info->parameter_value_offset =
+		info->parameter_name_offset + info->parameter_name_length;
+	/* Multiply by 2 because of string representation and by 2
+	 * because of utf16 representation
+	 */
+	info->parameter_value_length = 4 * ETHER_ADDR_LEN;
+	param_name = (wchar_t *)((ulong)info + info->parameter_name_offset);
+	param_value = (wchar_t *)((ulong)info + info->parameter_value_offset);
+
+	u8_to_u16(MAC_PARAM_STR, MAC_STRLEN, (char *)param_name);
+	for (i = 0; i < ETHER_ADDR_LEN; ++i) {
+		mac_str[2*i] = high(macaddr[i]);
+		mac_str[2*i+1] = low(macaddr[i]);
+	}
+
+	u8_to_u16((const char *)mac_str, 2 * ETHER_ADDR_LEN, (char *)param_value);
+
+	ret = hv_rf_send_request(hv, request);
+	if (ret)
+		goto cleanup;
+
+	request->response_msg.msg.set_complete.status = 0xFFFF;
+	hv_nv_complete_request(hv, request);
+	set_complete = &request->response_msg.msg.set_complete;
+	if (set_complete->status == 0xFFFF) {
+		/* Host is not responding, we can't free request in this case */
+		ret = -1;
+		PMD_PERROR_LOG(hv, DBG_LOAD, "Host is not responding");
+		goto exit;
+	}
+	/* Response received, check status */
+	status = set_complete->status;
+	if (status) {
+		/* Bad response status, return error */
+		PMD_PERROR_LOG(hv, DBG_LOAD, "set_complete->status = %u\n", status);
+		ret = -EINVAL;
+	}
+
+cleanup:
+	rte_free(request);
+exit:
+	return ret;
+}
+
+/*
+ * RNDIS filter on receive
+ */
+static int
+hv_rf_on_receive(struct hv_data *hv, struct netvsc_packet *pkt)
+{
+	struct rndis_msg rndis_mesg;
+	struct rndis_msg *rndis_hdr;
+
+	/* Shift virtual page number to form virtual page address */
+	rndis_hdr = (struct rndis_msg *)(pkt->page_buffers[0].pfn << PAGE_SHIFT);
+
+	rndis_hdr = (void *)((unsigned long)rndis_hdr
+			+ pkt->page_buffers[0].offset);
+
+	/*
+	 * Make sure we got a valid rndis message
+	 * Fixme:  There seems to be a bug in set completion msg where
+	 * its msg_len is 16 bytes but the byte_count field in the
+	 * xfer page range shows 52 bytes
+	 */
+	if (unlikely(pkt->tot_data_buf_len != rndis_hdr->msg_len)) {
+		++hv->stats.ierrors;
+		PMD_PERROR_LOG(hv, DBG_RX,
+			       "invalid rndis message? (expected %u "
+			       "bytes got %u)... dropping this message",
+			       rndis_hdr->msg_len, pkt->tot_data_buf_len);
+		return -1;
+	}
+
+	rte_memcpy(&rndis_mesg, rndis_hdr,
+	    (rndis_hdr->msg_len > sizeof(struct rndis_msg)) ?
+	    sizeof(struct rndis_msg) : rndis_hdr->msg_len);
+
+	switch (rndis_mesg.ndis_msg_type) {
+
+	/* data message */
+	case REMOTE_NDIS_PACKET_MSG:
+		hv->receive_callback(hv, &rndis_mesg, pkt);
+		break;
+	/* completion messages */
+	case REMOTE_NDIS_INITIALIZE_CMPLT:
+	case REMOTE_NDIS_QUERY_CMPLT:
+	case REMOTE_NDIS_SET_CMPLT:
+	case REMOTE_NDIS_RESET_CMPLT:
+	case REMOTE_NDIS_KEEPALIVE_CMPLT:
+		hv_rf_receive_response(hv, &rndis_mesg);
+		break;
+	/* notification message */
+	case REMOTE_NDIS_INDICATE_STATUS_MSG:
+		hv_rf_receive_indicate_status(hv, &rndis_mesg);
+		break;
+	default:
+		PMD_PERROR_LOG(hv, DBG_RX, "hv_rf_on_receive():  Unknown msg_type 0x%x",
+		    rndis_mesg.ndis_msg_type);
+		break;
+	}
+
+	return 0;
+}
+
+/*
+ * RNDIS filter on send
+ */
+int
+hv_rf_on_send(struct hv_data *hv, struct netvsc_packet *pkt)
+{
+	struct rndis_msg *rndis_mesg;
+	struct rndis_packet *rndis_pkt;
+	uint32_t rndis_msg_size;
+
+	/* Add the rndis header */
+	rndis_mesg = (struct rndis_msg *)pkt->extension;
+
+	memset(rndis_mesg, 0, sizeof(struct rndis_msg));
+
+	rndis_msg_size = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+
+	rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG;
+	rndis_mesg->msg_len = pkt->tot_data_buf_len + rndis_msg_size;
+
+	rndis_pkt = &rndis_mesg->msg.packet;
+	rndis_pkt->data_offset = sizeof(struct rndis_packet);
+	rndis_pkt->data_length = pkt->tot_data_buf_len;
+
+	pkt->is_data_pkt = 1;
+
+	/*
+	 * Invoke netvsc send.  If return status is bad, the caller now
+	 * resets the context pointers before retrying.
+	 */
+	return hv_nv_on_send(hv, pkt);
+}
+
+static int
+hv_rf_init_device(struct hv_data *hv)
+{
+	struct rndis_request *request;
+	struct rndis_initialize_request *init;
+	struct rndis_initialize_complete *init_complete;
+	uint32_t status;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	request = hv_rndis_request(hv, REMOTE_NDIS_INITIALIZE_MSG,
+	    RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
+	if (!request) {
+		ret = -1;
+		goto cleanup;
+	}
+
+	/* Set up the rndis set */
+	init = &request->request_msg->msg.init_request;
+	init->major_version = RNDIS_MAJOR_VERSION;
+	init->minor_version = RNDIS_MINOR_VERSION;
+	/*
+	 * Per the RNDIS document, this should be set to the max MTU
+	 * plus the header size.  However, 2048 works fine, so leaving
+	 * it as is.
+	 */
+	init->max_xfer_size = 2048;
+
+	hv->rndis_dev_state = RNDIS_DEV_INITIALIZING;
+
+	ret = hv_rf_send_request(hv, request);
+	if (ret != 0) {
+		hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+		goto cleanup;
+	}
+
+	/* Putting -1 here to ensure that HyperV really answered us */
+	request->response_msg.msg.init_complete.status = -1;
+	hv_nv_complete_request(hv, request);
+
+	init_complete = &request->response_msg.msg.init_complete;
+	status = init_complete->status;
+	if (status == 0) {
+		PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device is initialized");
+		hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
+		ret = 0;
+	} else {
+		PMD_PINFO_LOG(hv, DBG_LOAD, "Remote NDIS device left uninitialized");
+		hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+		ret = -1;
+	}
+
+cleanup:
+	rte_free(request);
+
+	return ret;
+}
+
+/*
+ * RNDIS filter query device
+ */
+static int
+hv_rf_query_device(struct hv_data *hv, uint32_t oid, void *result,
+		   uint32_t result_size)
+{
+	struct rndis_request *request;
+	struct rndis_query_request *query;
+	struct rndis_query_complete *query_complete;
+	int ret = 0;
+
+	PMD_INIT_FUNC_TRACE();
+
+	request = hv_rndis_request(hv, REMOTE_NDIS_QUERY_MSG,
+	    RNDIS_MESSAGE_SIZE(struct rndis_query_request));
+	if (request == NULL) {
+		ret = -1;
+		goto cleanup;
+	}
+
+	/* Set up the rndis query */
+	query = &request->request_msg->msg.query_request;
+	query->oid = oid;
+	query->info_buffer_offset = sizeof(struct rndis_query_request);
+	query->info_buffer_length = 0;
+	query->device_vc_handle = 0;
+
+	ret = hv_rf_send_request(hv, request);
+	if (ret != 0) {
+		PMD_PERROR_LOG(hv, DBG_TX, "RNDISFILTER request failed to Send!");
+		goto cleanup;
+	}
+
+	hv_nv_complete_request(hv, request);
+
+	/* Copy the response back */
+	query_complete = &request->response_msg.msg.query_complete;
+
+	if (query_complete->info_buffer_length > result_size) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
+
+	rte_memcpy(result, (void *)((unsigned long)query_complete +
+	    query_complete->info_buffer_offset),
+	    query_complete->info_buffer_length);
+
+cleanup:
+	rte_free(request);
+
+	return ret;
+}
+
+/*
+ * RNDIS filter query device MAC address
+ */
+static inline int
+hv_rf_query_device_mac(struct hv_data *hv)
+{
+	uint32_t size = HW_MACADDR_LEN;
+
+	int ret = hv_rf_query_device(hv, RNDIS_OID_802_3_PERMANENT_ADDRESS,
+			&hv->hw_mac_addr, size);
+	PMD_PDEBUG_LOG(hv, DBG_TX, "MAC: %02x:%02x:%02x:%02x:%02x:%02x, ret = %d",
+			hv->hw_mac_addr[0], hv->hw_mac_addr[1], hv->hw_mac_addr[2],
+			hv->hw_mac_addr[3], hv->hw_mac_addr[4], hv->hw_mac_addr[5],
+			ret);
+	return ret;
+}
+
+/*
+ * RNDIS filter query device link status
+ */
+static inline int
+hv_rf_query_device_link_status(struct hv_data *hv)
+{
+	uint32_t size = sizeof(uint32_t);
+	/* Set all bits to 1, it's to ensure that the response is actual */
+	uint32_t status = -1;
+
+	int ret = hv_rf_query_device(hv, RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
+			&status, size);
+	hv->link_status = status ? 0 : 1;
+	PMD_PDEBUG_LOG(hv, DBG_TX, "Link Status: %s",
+			hv->link_status ? "Up" : "Down");
+	return ret;
+}
+
+int
+hv_rf_on_device_add(struct hv_data *hv)
+{
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	hv->closed = 0;
+	hv->rb_data_size = hv->rb_size - sizeof(struct hv_vmbus_ring_buffer);
+	PMD_PDEBUG_LOG(hv, DBG_LOAD, "hv->rb_data_size = %u", hv->rb_data_size);
+
+	if (unlikely(hv->in->interrupt_mask == 0)) {
+		PMD_PINFO_LOG(hv, DBG_LOAD, "Disabling interrupts from host");
+		hv->in->interrupt_mask = 1;
+		rte_mb();
+	}
+
+	hv->netvsc_packet = rte_zmalloc("", sizeof(struct netvsc_packet),
+					RTE_CACHE_LINE_SIZE);
+	if (hv->netvsc_packet == NULL)
+		return -ENOMEM;
+	hv->netvsc_packet->is_data_pkt = 1;
+
+	hv->rx_comp_msg = rte_zmalloc("", sizeof(struct nvsp_msg),
+				      RTE_CACHE_LINE_SIZE);
+	if (hv->rx_comp_msg == NULL)
+		return -ENOMEM;
+
+	hv->rx_comp_msg->msg_type = nvsp_msg_1_type_send_rndis_pkt_complete;
+	hv->rx_comp_msg->msgs.send_rndis_pkt_complete.status =
+		nvsp_status_success;
+
+	memset(&hv->stats, 0, sizeof(struct hv_stats));
+
+	hv->receive_callback = hv_rf_receive_data;
+
+	/* It's for completion of requests which were sent from kernel-space part */
+	hv_nv_complete_request(hv, NULL);
+	hv_nv_complete_request(hv, NULL);
+
+	hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+
+	/* Send the rndis initialization message */
+	ret = hv_rf_init_device(hv);
+	if (ret != 0) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "rndis init failed!");
+		hv_rf_on_device_remove(hv);
+		return ret;
+	}
+
+	/* Get the mac address */
+	ret = hv_rf_query_device_mac(hv);
+	if (ret != 0) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "rndis query mac failed!");
+		hv_rf_on_device_remove(hv);
+		return ret;
+	}
+
+	return ret;
+}
+
+#define HALT_COMPLETION_WAIT_COUNT      25
+
+/*
+ * RNDIS filter halt device
+ */
+static int
+hv_rf_halt_device(struct hv_data *hv)
+{
+	struct rndis_request *request;
+	struct rndis_halt_request *halt;
+	int i, ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	/* Attempt to do a rndis device halt */
+	request = hv_rndis_request(hv, REMOTE_NDIS_HALT_MSG,
+	    RNDIS_MESSAGE_SIZE(struct rndis_halt_request));
+	if (!request) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "Unable to create RNDIS_HALT request");
+		return -1;
+	}
+
+	/* initialize "poor man's semaphore" */
+	hv->hlt_req_sent = 0;
+
+	/* Set up the rndis set */
+	halt = &request->request_msg->msg.halt_request;
+	hv->new_request_id++;
+	halt->request_id = hv->new_request_id;
+
+	ret = hv_rf_send_request(hv, request);
+	if (ret) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to send RNDIS_HALT request: %d",
+				ret);
+		return ret;
+	}
+
+	/*
+	 * Wait for halt response from halt callback.  We must wait for
+	 * the transaction response before freeing the request and other
+	 * resources.
+	 */
+	for (i = HALT_COMPLETION_WAIT_COUNT; i > 0; i--) {
+		hv_nv_complete_request(hv, request);
+		if (hv->hlt_req_sent != 0) {
+			PMD_PDEBUG_LOG(hv, DBG_LOAD, "Completed HALT request at %d try",
+					HALT_COMPLETION_WAIT_COUNT - i + 1);
+			break;
+		}
+	}
+	hv->hlt_req_sent = 0;
+	if (i == 0) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS_HALT request was not completed!");
+		rte_free(request);
+		return -1;
+	}
+
+	hv->rndis_dev_state = RNDIS_DEV_UNINITIALIZED;
+
+	rte_free(request);
+
+	return 0;
+}
+
+#define HV_TX_DRAIN_TRIES 50
+static inline int
+hyperv_tx_drain(struct hv_data *hv)
+{
+	int i = HV_TX_DRAIN_TRIES;
+
+	PMD_PDEBUG_LOG(hv, DBG_LOAD, "Waiting for TXs to be completed...");
+	while (hv->num_outstanding_sends > 0 && --i) {
+		hv_nv_complete_request(hv, NULL);
+		rte_delay_ms(100);
+	}
+
+	return hv->num_outstanding_sends;
+}
+
+/*
+ * RNDIS filter on device remove
+ */
+int
+hv_rf_on_device_remove(struct hv_data *hv)
+{
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+	hv->closed = 1;
+	if (hyperv_tx_drain(hv) > 0) {
+		/* Hypervisor is not responding, exit with error here */
+		PMD_PWARN_LOG(hv, DBG_LOAD, "Can't drain TX queue: no response");
+		return -EAGAIN;
+	}
+	PMD_PDEBUG_LOG(hv, DBG_LOAD, "TX queue is empty, can halt the device");
+
+	/* Halt and release the rndis device */
+	hv->hlt_req_pending = 1;
+	ret = hv_rf_halt_device(hv);
+	hv->hlt_req_pending = 0;
+
+	rte_free(hv->netvsc_packet);
+
+	return ret;
+}
+
+/*
+ * RNDIS filter set packet filter
+ * Sends an rndis request with the new filter, then waits for a response
+ * from the host.
+ * Returns zero on success, non-zero on failure.
+ */
+static int
+hv_rf_set_packet_filter(struct hv_data *hv, uint32_t new_filter)
+{
+	struct rndis_request *request;
+	struct rndis_set_request *set;
+	struct rndis_set_complete *set_complete;
+	uint32_t status;
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	request = hv_rndis_request(hv, REMOTE_NDIS_SET_MSG,
+			RNDIS_MESSAGE_SIZE(struct rndis_set_request) + sizeof(uint32_t));
+	if (!request) {
+		ret = -1;
+		goto cleanup;
+	}
+
+	/* Set up the rndis set */
+	set = &request->request_msg->msg.set_request;
+	set->oid = RNDIS_OID_GEN_CURRENT_PACKET_FILTER;
+	set->info_buffer_length = sizeof(uint32_t);
+	set->info_buffer_offset = sizeof(struct rndis_set_request);
+
+	rte_memcpy((void *)((unsigned long)set + sizeof(struct rndis_set_request)),
+			&new_filter, sizeof(uint32_t));
+
+	ret = hv_rf_send_request(hv, request);
+	if (ret)
+		goto cleanup;
+
+	/*
+	 * Wait for the response from the host.
+	 */
+	request->response_msg.msg.set_complete.status = 0xFFFF;
+	hv_nv_complete_request(hv, request);
+
+	set_complete = &request->response_msg.msg.set_complete;
+	if (set_complete->status == 0xFFFF) {
+		/* Host is not responding, we can't free request in this case */
+		ret = -1;
+		goto exit;
+	}
+	/* Response received, check status */
+	status = set_complete->status;
+	if (status)
+		/* Bad response status, return error */
+		ret = -2;
+
+cleanup:
+	rte_free(request);
+exit:
+	return ret;
+}
+
+/*
+ * RNDIS filter open device
+ */
+int
+hv_rf_on_open(struct hv_data *hv)
+{
+	int ret;
+
+	if (hv->closed)
+		return 0;
+
+	if (hv->jumbo_frame_support)
+		hv->receive_callback = hv_rf_receive_data_sg;
+
+	ret = hyperv_set_rx_mode(hv, 1, 0);
+	if (!ret) {
+		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device opened");
+		hv->rndis_dev_state = RNDIS_DEV_DATAINITIALIZED;
+	} else
+		PMD_PERROR_LOG(hv, DBG_LOAD, "RNDIS device is left unopened");
+
+	return ret;
+}
+
+/*
+ * RNDIS filter on close
+ */
+int
+hv_rf_on_close(struct hv_data *hv)
+{
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	if (hv->closed)
+		return 0;
+
+	if (hv->rndis_dev_state != RNDIS_DEV_DATAINITIALIZED) {
+		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device state should be"
+				" RNDIS_DEV_DATAINITIALIZED, but now it is %u",
+				hv->rndis_dev_state);
+		return 0;
+	}
+
+	ret = hv_rf_set_packet_filter(hv, 0);
+	if (!ret) {
+		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device closed");
+		hv->rndis_dev_state = RNDIS_DEV_INITIALIZED;
+	} else
+		PMD_PDEBUG_LOG(hv, DBG_LOAD, "RNDIS device is left unclosed");
+
+	return ret;
+}
+
+/*
+ * RX Flow
+ */
+int
+hyperv_get_buffer(struct hv_data *hv, void *buffer, uint32_t bufferlen)
+{
+	uint32_t bytes_rxed;
+	uint64_t request_id;
+	struct hv_vm_packet_descriptor *desc;
+
+	int ret = hv_vmbus_channel_recv_packet_raw(hv, buffer, bufferlen,
+			&bytes_rxed, &request_id, 1);
+	if (likely(ret == 0)) {
+		if (bytes_rxed) {
+			desc = (struct hv_vm_packet_descriptor *)buffer;
+
+			if (likely(desc->type ==
+						HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES)) {
+				hv->pkt_rxed = 0;
+				hv_nv_on_receive(hv, desc);
+				return hv->pkt_rxed;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * TX completions handler
+ */
+void
+hyperv_scan_comps(struct hv_data *hv, int allow_rx_drop)
+{
+	uint32_t bytes_rxed;
+	uint64_t request_id;
+
+	while (1) {
+		int ret = hv_vmbus_channel_recv_packet_raw(hv, hv->desc, PAGE_SIZE,
+			&bytes_rxed, &request_id, 2 | allow_rx_drop);
+
+		if (ret != 0 || !bytes_rxed)
+			break;
+
+		if (likely(hv->desc->type == HV_VMBUS_PACKET_TYPE_COMPLETION))
+			hv_nv_on_send_completion(hv, hv->desc);
+	}
+}
+
+/*
+ * Get link status
+ */
+uint8_t
+hyperv_get_link_status(struct hv_data *hv)
+{
+	if (hv_rf_query_device_link_status(hv))
+		return 2;
+	return hv->link_status;
+}
+
+/*
+ * Set/Reset RX mode
+ */
+int
+hyperv_set_rx_mode(struct hv_data *hv, uint8_t promisc, uint8_t mcast)
+{
+	PMD_INIT_FUNC_TRACE();
+
+	if (!promisc) {
+		return hv_rf_set_packet_filter(hv,
+				NDIS_PACKET_TYPE_BROADCAST                   |
+				(mcast ? NDIS_PACKET_TYPE_ALL_MULTICAST : 0) |
+				NDIS_PACKET_TYPE_DIRECTED);
+	}
+
+	return hv_rf_set_packet_filter(hv, NDIS_PACKET_TYPE_PROMISCUOUS);
+}
diff --git a/lib/librte_pmd_hyperv/hyperv_drv.h b/lib/librte_pmd_hyperv/hyperv_drv.h
new file mode 100644
index 0000000..22acad5
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_drv.h
@@ -0,0 +1,558 @@ 
+/*-
+ * Copyright (c) 2009-2012 Microsoft Corp.
+ * Copyright (c) 2010-2012 Citrix Inc.
+ * Copyright (c) 2012 NetApp Inc.
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _HYPERV_DRV_H_
+#define _HYPERV_DRV_H_
+
+/*
+ * Definitions from hyperv.h
+ */
+#define HW_MACADDR_LEN	6
+#define HV_MAX_PAGE_BUFFER_COUNT	19
+
+#define HV_ALIGN_UP(value, align) \
+		(((value) & (align-1)) ? \
+		    (((value) + (align-1)) & ~(align-1)) : (value))
+
+/*
+ *  Connection identifier type
+ */
+union hv_vmbus_connection_id {
+	uint32_t                as_uint32_t;
+	struct {
+		uint32_t        id:24;
+		uint32_t        reserved:8;
+	} u;
+
+} __attribute__((packed));
+
+union hv_vmbus_monitor_trigger_state {
+	uint32_t as_uint32_t;
+	struct {
+		uint32_t group_enable:4;
+		uint32_t rsvd_z:28;
+	} u;
+};
+
+union hv_vmbus_monitor_trigger_group {
+	uint64_t as_uint64_t;
+	struct {
+		uint32_t pending;
+		uint32_t armed;
+	} u;
+};
+
+struct hv_vmbus_monitor_parameter {
+	union hv_vmbus_connection_id  connection_id;
+	uint16_t                flag_number;
+	uint16_t                rsvd_z;
+};
+
+/*
+ * hv_vmbus_monitor_page Layout
+ * ------------------------------------------------------
+ * | 0   | trigger_state (4 bytes) | Rsvd1 (4 bytes)     |
+ * | 8   | trigger_group[0]                              |
+ * | 10  | trigger_group[1]                              |
+ * | 18  | trigger_group[2]                              |
+ * | 20  | trigger_group[3]                              |
+ * | 28  | Rsvd2[0]                                      |
+ * | 30  | Rsvd2[1]                                      |
+ * | 38  | Rsvd2[2]                                      |
+ * | 40  | next_check_time[0][0] | next_check_time[0][1] |
+ * | ...                                                 |
+ * | 240 | latency[0][0..3]                              |
+ * | 340 | Rsvz3[0]                                      |
+ * | 440 | parameter[0][0]                               |
+ * | 448 | parameter[0][1]                               |
+ * | ...                                                 |
+ * | 840 | Rsvd4[0]                                      |
+ * ------------------------------------------------------
+ */
+
+struct hv_vmbus_monitor_page {
+	union hv_vmbus_monitor_trigger_state  trigger_state;
+	uint32_t                        rsvd_z1;
+
+	union hv_vmbus_monitor_trigger_group  trigger_group[4];
+	uint64_t                        rsvd_z2[3];
+
+	int32_t                         next_check_time[4][32];
+
+	uint16_t                        latency[4][32];
+	uint64_t                        rsvd_z3[32];
+
+	struct hv_vmbus_monitor_parameter      parameter[4][32];
+
+	uint8_t                         rsvd_z4[1984];
+};
+
+enum hv_vmbus_packet_type {
+	HV_VMBUS_PACKET_TYPE_DATA_USING_TRANSFER_PAGES		= 0x7,
+	HV_VMBUS_PACKET_TYPE_DATA_USING_GPA_DIRECT		= 0x9,
+	HV_VMBUS_PACKET_TYPE_COMPLETION				= 0xb,
+};
+
+#define HV_VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED    1
+
+struct hv_vm_packet_descriptor {
+	uint16_t type;
+	uint16_t data_offset8;
+	uint16_t length8;
+	uint16_t flags;
+	uint64_t transaction_id;
+} __attribute__((packed));
+
+struct hv_vm_transfer_page {
+	uint32_t byte_count;
+	uint32_t byte_offset;
+} __attribute__((packed));
+
+struct hv_vm_transfer_page_packet_header {
+	struct hv_vm_packet_descriptor d;
+	uint16_t                       transfer_page_set_id;
+	uint8_t                        sender_owns_set;
+	uint8_t                        reserved;
+	uint32_t                       range_count;
+	struct hv_vm_transfer_page     ranges[1];
+} __attribute__((packed));
+
+struct hv_vmbus_ring_buffer {
+	volatile uint32_t       write_index;
+	volatile uint32_t       read_index;
+	/*
+	 * NOTE: The interrupt_mask field is used only for channels, but
+	 * vmbus connection also uses this data structure
+	 */
+	volatile uint32_t       interrupt_mask;
+	/* pad it to PAGE_SIZE so that data starts on a page */
+	uint8_t                 reserved[4084];
+
+	/*
+	 * WARNING: Ring data starts here + ring_data_start_offset
+	 *  !!! DO NOT place any fields below this !!!
+	 */
+	uint8_t			buffer[0];	/* doubles as interrupt mask */
+} __attribute__((packed));
+
+struct hv_vmbus_page_buffer {
+	uint32_t	length;
+	uint32_t	offset;
+	uint64_t	pfn;
+} __attribute__((packed));
+
+/*
+ * Definitions from hv_vmbus_priv.h
+ */
+struct hv_vmbus_sg_buffer_list {
+	void		*data;
+	uint32_t	length;
+};
+
+struct hv_vmbus_channel_packet_page_buffer {
+	uint16_t		type;
+	uint16_t		data_offset8;
+	uint16_t		length8;
+	uint16_t		flags;
+	uint64_t		transaction_id;
+	uint32_t		reserved;
+	uint32_t		range_count;
+	struct hv_vmbus_page_buffer	range[HV_MAX_PAGE_BUFFER_COUNT];
+} __attribute__((packed));
+
+/*
+ * Definitions from hv_net_vsc.h
+ */
+#define NETVSC_PACKET_MAXPAGE 16
+#define NETVSC_PACKET_SIZE    256
+
+/*
+ * This message is used by both the VSP and the VSC to complete
+ * a RNDIS message to the opposite channel endpoint.  At this
+ * point, the initiator of this message cannot use any resources
+ * associated with the original RNDIS packet.
+ */
+enum nvsp_status_ {
+	nvsp_status_none = 0,
+	nvsp_status_success,
+	nvsp_status_failure,
+};
+
+struct nvsp_1_msg_send_rndis_pkt_complete {
+	uint32_t                                status;
+} __attribute__((packed));
+
+enum nvsp_msg_type {
+	/*
+	 * Version 1 Messages
+	 */
+	nvsp_msg_1_type_send_ndis_vers          = 100,
+
+	nvsp_msg_1_type_send_rx_buf,
+	nvsp_msg_1_type_send_rx_buf_complete,
+	nvsp_msg_1_type_revoke_rx_buf,
+
+	nvsp_msg_1_type_send_send_buf,
+	nvsp_msg_1_type_send_send_buf_complete,
+	nvsp_msg_1_type_revoke_send_buf,
+
+	nvsp_msg_1_type_send_rndis_pkt,
+	nvsp_msg_1_type_send_rndis_pkt_complete,
+};
+
+struct nvsp_1_msg_send_rndis_pkt {
+	/*
+	 * This field is specified by RNDIS.  They assume there's
+	 * two different channels of communication. However,
+	 * the Network VSP only has one.  Therefore, the channel
+	 * travels with the RNDIS packet.
+	 */
+	uint32_t                                chan_type;
+
+	/*
+	 * This field is used to send part or all of the data
+	 * through a send buffer. This value specifies an
+	 * index into the send buffer.  If the index is
+	 * 0xFFFFFFFF, then the send buffer is not being used
+	 * and all of the data was sent through other VMBus
+	 * mechanisms.
+	 */
+	uint32_t                                send_buf_section_idx;
+	uint32_t                                send_buf_section_size;
+} __attribute__((packed));
+
+/*
+ * ALL Messages
+ */
+struct nvsp_msg {
+	uint32_t                                msg_type;
+	union {
+		struct nvsp_1_msg_send_rndis_pkt               send_rndis_pkt;
+		struct nvsp_1_msg_send_rndis_pkt_complete      send_rndis_pkt_complete;
+		/* size is set like in linux kernel driver */
+		uint8_t raw[24];
+	} msgs;
+} __attribute__((packed));
+
+#define NETVSC_RECEIVE_BUFFER_ID                0xcafe
+
+struct netvsc_packet {
+	uint8_t             is_data_pkt;      /* One byte */
+	uint8_t             ext_pages;
+	uint16_t            vlan_tci;
+
+	void                *extension;
+	uint64_t            extension_phys_addr;
+	uint32_t            tot_data_buf_len;
+	uint32_t            page_buf_count;
+	struct hv_vmbus_page_buffer page_buffers[NETVSC_PACKET_MAXPAGE];
+};
+
+/*
+ * Definitions from hv_rndis.h
+ */
+#define RNDIS_MAJOR_VERSION                             0x00000001
+#define RNDIS_MINOR_VERSION                             0x00000000
+
+#define STATUS_BUFFER_OVERFLOW                          (0x80000005L)
+
+/*
+ * Remote NDIS message types
+ */
+#define REMOTE_NDIS_PACKET_MSG                          0x00000001
+#define REMOTE_NDIS_INITIALIZE_MSG                      0x00000002
+#define REMOTE_NDIS_HALT_MSG                            0x00000003
+#define REMOTE_NDIS_QUERY_MSG                           0x00000004
+#define REMOTE_NDIS_SET_MSG                             0x00000005
+#define REMOTE_NDIS_RESET_MSG                           0x00000006
+#define REMOTE_NDIS_INDICATE_STATUS_MSG                 0x00000007
+#define REMOTE_NDIS_KEEPALIVE_MSG                       0x00000008
+/*
+ * Remote NDIS message completion types
+ */
+#define REMOTE_NDIS_INITIALIZE_CMPLT                    0x80000002
+#define REMOTE_NDIS_QUERY_CMPLT                         0x80000004
+#define REMOTE_NDIS_SET_CMPLT                           0x80000005
+#define REMOTE_NDIS_RESET_CMPLT                         0x80000006
+#define REMOTE_NDIS_KEEPALIVE_CMPLT                     0x80000008
+
+#define RNDIS_OID_GEN_MEDIA_CONNECT_STATUS              0x00010114
+#define RNDIS_OID_GEN_CURRENT_PACKET_FILTER             0x0001010E
+#define RNDIS_OID_802_3_PERMANENT_ADDRESS               0x01010101
+#define RNDIS_OID_802_3_CURRENT_ADDRESS                 0x01010102
+#define RNDIS_OID_GEN_RNDIS_CONFIG_PARAMETER            0x0001021B
+
+#define RNDIS_CONFIG_PARAM_TYPE_STRING      2
+/* extended info after the RNDIS request message */
+#define RNDIS_EXT_LEN                       100
+/*
+ * Packet extension field contents associated with a Data message.
+ */
+struct rndis_per_packet_info {
+	uint32_t            size;
+	uint32_t            type;
+	uint32_t            per_packet_info_offset;
+};
+
+#define ieee_8021q_info 6
+
+struct ndis_8021q_info {
+	union {
+		struct {
+			uint32_t   user_pri:3;  /* User Priority */
+			uint32_t   cfi:1;  /* Canonical Format ID */
+			uint32_t   vlan_id:12;
+			uint32_t   reserved:16;
+		} s1;
+		uint32_t    value;
+	} u1;
+};
+
+/* Format of Information buffer passed in a SetRequest for the OID */
+/* OID_GEN_RNDIS_CONFIG_PARAMETER. */
+struct rndis_config_parameter_info {
+	uint32_t parameter_name_offset;
+	uint32_t parameter_name_length;
+	uint32_t parameter_type;
+	uint32_t parameter_value_offset;
+	uint32_t parameter_value_length;
+};
+
+/*
+ * NdisInitialize message
+ */
+struct rndis_initialize_request {
+	/* RNDIS request ID */
+	uint32_t            request_id;
+	uint32_t            major_version;
+	uint32_t            minor_version;
+	uint32_t            max_xfer_size;
+};
+
+/*
+ * Response to NdisInitialize
+ */
+struct rndis_initialize_complete {
+	/* RNDIS request ID */
+	uint32_t            request_id;
+	/* RNDIS status */
+	uint32_t            status;
+	uint32_t            major_version;
+	uint32_t            minor_version;
+	uint32_t            device_flags;
+	/* RNDIS medium */
+	uint32_t            medium;
+	uint32_t            max_pkts_per_msg;
+	uint32_t            max_xfer_size;
+	uint32_t            pkt_align_factor;
+	uint32_t            af_list_offset;
+	uint32_t            af_list_size;
+};
+
+/*
+ * NdisSetRequest message
+ */
+struct rndis_set_request {
+	/* RNDIS request ID */
+	uint32_t            request_id;
+	/* RNDIS OID */
+	uint32_t            oid;
+	uint32_t            info_buffer_length;
+	uint32_t            info_buffer_offset;
+	/* RNDIS handle */
+	uint32_t            device_vc_handle;
+};
+
+/*
+ * Response to NdisSetRequest
+ */
+struct rndis_set_complete {
+	/* RNDIS request ID */
+	uint32_t            request_id;
+	/* RNDIS status */
+	uint32_t            status;
+};
+
+/*
+ * NdisQueryRequest message
+ */
+struct rndis_query_request {
+	/* RNDIS request ID */
+	uint32_t            request_id;
+	/* RNDIS OID */
+	uint32_t            oid;
+	uint32_t            info_buffer_length;
+	uint32_t            info_buffer_offset;
+	/* RNDIS handle */
+	uint32_t            device_vc_handle;
+};
+
+/*
+ * Response to NdisQueryRequest
+ */
+struct rndis_query_complete {
+	/* RNDIS request ID */
+	uint32_t            request_id;
+	/* RNDIS status */
+	uint32_t            status;
+	uint32_t            info_buffer_length;
+	uint32_t            info_buffer_offset;
+};
+
+/*
+ * Data message. All offset fields contain byte offsets from the beginning
+ * of the rndis_packet structure. All length fields are in bytes.
+ * VcHandle is set to 0 for connectionless data, otherwise it
+ * contains the VC handle.
+ */
+struct rndis_packet {
+	uint32_t            data_offset;
+	uint32_t            data_length;
+	uint32_t            oob_data_offset;
+	uint32_t            oob_data_length;
+	uint32_t            num_oob_data_elements;
+	uint32_t            per_pkt_info_offset;
+	uint32_t            per_pkt_info_length;
+	/* RNDIS handle */
+	uint32_t            vc_handle;
+	uint32_t            reserved;
+};
+
+/*
+ * NdisHalt message
+ */
+struct rndis_halt_request {
+	/* RNDIS request ID */
+	uint32_t            request_id;
+};
+
+/*
+ * NdisMIndicateStatus message
+ */
+struct rndis_indicate_status {
+	/* RNDIS status */
+	uint32_t                                status;
+	uint32_t                                status_buf_length;
+	uint32_t                                status_buf_offset;
+};
+
+#define RNDIS_STATUS_MEDIA_CONNECT              (0x4001000BL)
+#define RNDIS_STATUS_MEDIA_DISCONNECT           (0x4001000CL)
+#define RNDIS_STATUS_INVALID_DATA               (0xC0010015L)
+
+/*
+ * union with all of the RNDIS messages
+ */
+union rndis_msg_container {
+	struct rndis_initialize_request                init_request;
+	struct rndis_initialize_complete               init_complete;
+	struct rndis_set_request                       set_request;
+	struct rndis_set_complete                      set_complete;
+	struct rndis_query_request                     query_request;
+	struct rndis_query_complete                    query_complete;
+	struct rndis_packet                            packet;
+	struct rndis_halt_request                      halt_request;
+	struct rndis_indicate_status                   indicate_status;
+#if 0
+	rndis_keepalive_request                 keepalive_request;
+	rndis_reset_request                     reset_request;
+	rndis_reset_complete                    reset_complete;
+	rndis_keepalive_complete                keepalive_complete;
+	rcondis_mp_create_vc                    co_miniport_create_vc;
+	rcondis_mp_delete_vc                    co_miniport_delete_vc;
+	rcondis_indicate_status                 co_miniport_status;
+	rcondis_mp_activate_vc_request          co_miniport_activate_vc;
+	rcondis_mp_deactivate_vc_request        co_miniport_deactivate_vc;
+	rcondis_mp_create_vc_complete           co_miniport_create_vc_complete;
+	rcondis_mp_delete_vc_complete           co_miniport_delete_vc_complete;
+	rcondis_mp_activate_vc_complete         co_miniport_activate_vc_complete;
+	rcondis_mp_deactivate_vc_complete       co_miniport_deactivate_vc_complete;
+#endif
+	uint32_t packet_ex[16]; /* to pad the union size */
+};
+
+struct rndis_msg {
+	uint32_t         ndis_msg_type;
+
+	/*
+	 * Total length of this message, from the beginning
+	 * of the rndis_msg struct, in bytes.
+	 */
+	uint32_t         msg_len;
+
+	/* Actual message */
+	union rndis_msg_container msg;
+};
+
+#define RNDIS_HEADER_SIZE (sizeof(struct rndis_msg) - sizeof(union rndis_msg_container))
+
+#define NDIS_PACKET_TYPE_DIRECTED       0x00000001
+#define NDIS_PACKET_TYPE_MULTICAST      0x00000002
+#define NDIS_PACKET_TYPE_ALL_MULTICAST  0x00000004
+#define NDIS_PACKET_TYPE_BROADCAST      0x00000008
+#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010
+#define NDIS_PACKET_TYPE_PROMISCUOUS    0x00000020
+
+/*
+ * get the size of an RNDIS message. Pass in the message type,
+ * rndis_set_request, rndis_packet for example
+ */
+#define RNDIS_MESSAGE_SIZE(message) \
+	(sizeof(message) + (sizeof(struct rndis_msg) - sizeof(union rndis_msg_container)))
+
+
+/*
+ * Definitions from hv_rndis_filter.h
+ */
+enum {
+	RNDIS_DEV_UNINITIALIZED = 0,
+	RNDIS_DEV_INITIALIZING,
+	RNDIS_DEV_INITIALIZED,
+	RNDIS_DEV_DATAINITIALIZED,
+};
+
+struct rndis_request {
+	/* assumed a fixed size response here. */
+	struct rndis_msg    response_msg;
+
+	/* Simplify allocation by having a netvsc packet inline */
+	struct netvsc_packet pkt;
+	/* set additional buffer since packet can cross page boundary */
+	struct hv_vmbus_page_buffer buffer;
+	/* assumed a fixed size request here. */
+	struct rndis_msg    *request_msg;
+	const struct rte_memzone *request_msg_memzone;
+};
+
+struct rndis_filter_packet {
+	struct rndis_msg                       message;
+};
+
+#endif /* _HYPERV_DRV_H_ */
diff --git a/lib/librte_pmd_hyperv/hyperv_ethdev.c b/lib/librte_pmd_hyperv/hyperv_ethdev.c
new file mode 100644
index 0000000..7b909db
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_ethdev.c
@@ -0,0 +1,332 @@ 
+/*-
+ * Copyright (c) 2013-2015 Brocade Communications Systems, Inc.
+ * All rights reserved.
+ */
+
+#include <assert.h>
+#include <unistd.h>
+#include "hyperv.h"
+
+static struct rte_vmbus_id vmbus_id_hyperv_map[] = {
+	{
+		.device_id = 0x0,
+	},
+};
+
+static void
+hyperv_dev_info_get(__rte_unused struct rte_eth_dev *dev,
+		struct rte_eth_dev_info *dev_info)
+{
+	PMD_INIT_FUNC_TRACE();
+	dev_info->max_rx_queues  = HV_MAX_RX_QUEUES;
+	dev_info->max_tx_queues  = HV_MAX_TX_QUEUES;
+	dev_info->min_rx_bufsize = HV_MIN_RX_BUF_SIZE;
+	dev_info->max_rx_pktlen  = HV_MAX_RX_PKT_LEN;
+	dev_info->max_mac_addrs  = HV_MAX_MAC_ADDRS;
+}
+
+inline int
+rte_hv_dev_atomic_write_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = &(dev->data->dev_link);
+	struct rte_eth_link *src = link;
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+				*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+inline int
+rte_hv_dev_atomic_read_link_status(struct rte_eth_dev *dev,
+		struct rte_eth_link *link)
+{
+	struct rte_eth_link *dst = link;
+	struct rte_eth_link *src = &(dev->data->dev_link);
+
+	if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst,
+				*(uint64_t *)src) == 0)
+		return -1;
+
+	return 0;
+}
+
+/* return 0 means link status changed, -1 means not changed */
+static int
+hyperv_dev_link_update(struct rte_eth_dev *dev,
+		__rte_unused int wait_to_complete)
+{
+	uint8_t ret;
+	struct rte_eth_link old, link;
+	struct hv_data *hv = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	memset(&old, 0, sizeof(old));
+	memset(&link, 0, sizeof(link));
+	rte_hv_dev_atomic_read_link_status(dev, &old);
+	if (!hv->link_status && (hv->link_req_cnt == HV_MAX_LINK_REQ)) {
+		ret = hyperv_get_link_status(hv);
+		if (ret > 1)
+			return -1;
+		hv->link_req_cnt = 0;
+	}
+	link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	link.link_speed = ETH_LINK_SPEED_10000;
+	link.link_status = hv->link_status;
+	hv->link_req_cnt++;
+	rte_hv_dev_atomic_write_link_status(dev, &link);
+
+	return (old.link_status == link.link_status) ? -1 : 0;
+}
+
+static int
+hyperv_dev_configure(struct rte_eth_dev *dev)
+{
+	struct hv_data *hv = dev->data->dev_private;
+	const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
+
+	PMD_INIT_FUNC_TRACE();
+
+	rte_memcpy(dev->data->mac_addrs->addr_bytes, hv->hw_mac_addr,
+			ETHER_ADDR_LEN);
+	hv->jumbo_frame_support = rxmode->jumbo_frame;
+
+	return 0;
+}
+
+static int
+hyperv_init(struct rte_eth_dev *dev)
+{
+	struct hv_data *hv = dev->data->dev_private;
+	struct rte_vmbus_device *vmbus_dev;
+
+	vmbus_dev = dev->vmbus_dev;
+	hv->uio_fd = vmbus_dev->uio_fd;
+	hv->kernel_initialized = 1;
+	hv->vmbus_device = vmbus_dev->id.device_id;
+	hv->monitor_bit = (uint8_t)(vmbus_dev->vmbus_monitor_id % 32);
+	hv->monitor_group = (uint8_t)(vmbus_dev->vmbus_monitor_id / 32);
+	PMD_PDEBUG_LOG(hv, DBG_LOAD, "hyperv_init for vmbus device %d",
+			vmbus_dev->id.device_id);
+
+	/* get the memory mappings */
+	hv->ring_pages = vmbus_dev->mem_resource[TXRX_RING_MAP].addr;
+	hv->int_page = vmbus_dev->mem_resource[INT_PAGE_MAP].addr;
+	hv->monitor_pages =
+		(struct hv_vmbus_monitor_page *)
+		vmbus_dev->mem_resource[MON_PAGE_MAP].addr;
+	hv->recv_buf = vmbus_dev->mem_resource[RECV_BUF_MAP].addr;
+	assert(hv->ring_pages);
+	assert(hv->int_page);
+	assert(hv->monitor_pages);
+	assert(hv->recv_buf);
+
+	/* separate send/recv int_pages */
+	hv->recv_interrupt_page = hv->int_page;
+
+	hv->send_interrupt_page =
+		((uint8_t *) hv->int_page + (PAGE_SIZE >> 1));
+
+	/* retrieve in/out ring_buffers */
+	hv->out = hv->ring_pages;
+	hv->in  = (void *)((uint64_t)hv->out +
+			(vmbus_dev->mem_resource[TXRX_RING_MAP].len / 2));
+	hv->rb_size = (vmbus_dev->mem_resource[TXRX_RING_MAP].len / 2);
+
+	dev->rx_pkt_burst = hyperv_recv_pkts;
+	dev->tx_pkt_burst = hyperv_xmit_pkts;
+
+	return hv_rf_on_device_add(hv);
+}
+
+#define HV_DEV_ID (hv->vmbus_device << 1)
+#define HV_MTU (dev->data->dev_conf.rxmode.max_rx_pkt_len << 9)
+
+static int
+hyperv_dev_start(struct rte_eth_dev *dev)
+{
+	int ret;
+	uint32_t cmd;
+	size_t bytes;
+	struct hv_data *hv = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	if (!hv->kernel_initialized) {
+		cmd = HV_DEV_ID | HV_MTU;
+		bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
+		if (bytes < sizeof(uint32_t)) {
+			PMD_PERROR_LOG(hv, DBG_LOAD, "write on uio_fd %d failed",
+					hv->uio_fd);
+			return -1;
+		}
+		ret = vmbus_uio_map_resource(dev->vmbus_dev);
+		if (ret < 0) {
+			PMD_PERROR_LOG(hv, DBG_LOAD, "Failed to map resources");
+			return ret;
+		}
+		ret = hyperv_init(dev);
+		if (ret)
+			return ret;
+	}
+	ret = hv_rf_on_open(hv);
+	if (ret) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "hv_rf_on_open failed");
+		return ret;
+	}
+	hv->link_req_cnt = HV_MAX_LINK_REQ;
+
+	return ret;
+}
+
+static void
+hyperv_dev_stop(struct rte_eth_dev *dev)
+{
+	struct hv_data *hv = dev->data->dev_private;
+	uint32_t cmd;
+	size_t bytes;
+
+	PMD_INIT_FUNC_TRACE();
+	if (!hv->closed) {
+		hv_rf_on_close(hv);
+		hv_rf_on_device_remove(hv);
+		if (hv->kernel_initialized) {
+			cmd = 1 | HV_DEV_ID;
+			bytes = write(hv->uio_fd, &cmd, sizeof(uint32_t));
+			if (bytes)
+				hv->kernel_initialized = 0;
+			else
+				PMD_PWARN_LOG(hv, DBG_LOAD, "write to uio_fd %d failed: (%zu)b",
+						hv->uio_fd, bytes);
+		}
+		hv->link_status = 0;
+	}
+}
+
+static void
+hyperv_dev_close(struct rte_eth_dev *dev)
+{
+	PMD_INIT_FUNC_TRACE();
+	hyperv_dev_stop(dev);
+}
+
+static void
+hyperv_dev_promisc_enable(struct rte_eth_dev *dev)
+{
+	struct hv_data *hv = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	hyperv_set_rx_mode(hv, 1, dev->data->all_multicast);
+}
+
+static void
+hyperv_dev_promisc_disable(struct rte_eth_dev *dev)
+{
+	struct hv_data *hv = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	hyperv_set_rx_mode(hv, 0, dev->data->all_multicast);
+}
+
+static void
+hyperv_dev_allmulticast_enable(struct rte_eth_dev *dev)
+{
+	struct hv_data *hv = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	hyperv_set_rx_mode(hv, dev->data->promiscuous, 1);
+}
+
+static void
+hyperv_dev_allmulticast_disable(struct rte_eth_dev *dev)
+{
+	struct hv_data *hv = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+	hyperv_set_rx_mode(hv, dev->data->promiscuous, 0);
+}
+
+static void
+hyperv_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
+{
+	struct hv_data *hv = dev->data->dev_private;
+	struct hv_stats *st = &hv->stats;
+
+	PMD_INIT_FUNC_TRACE();
+
+	memset(stats, 0, sizeof(struct rte_eth_stats));
+
+	stats->opackets = st->opkts;
+	stats->obytes = st->obytes;
+	stats->oerrors = st->oerrors;
+	stats->ipackets = st->ipkts;
+	stats->ibytes = st->ibytes;
+	stats->ierrors = st->ierrors;
+	stats->rx_nombuf = st->rx_nombuf;
+}
+
+static struct eth_dev_ops hyperv_eth_dev_ops = {
+	.dev_configure		= hyperv_dev_configure,
+	.dev_start		= hyperv_dev_start,
+	.dev_stop		= hyperv_dev_stop,
+	.dev_infos_get		= hyperv_dev_info_get,
+	.rx_queue_release	= hyperv_dev_rx_queue_release,
+	.tx_queue_release	= hyperv_dev_tx_queue_release,
+	.rx_queue_setup		= hyperv_dev_rx_queue_setup,
+	.tx_queue_setup		= hyperv_dev_tx_queue_setup,
+	.dev_close		= hyperv_dev_close,
+	.promiscuous_enable	= hyperv_dev_promisc_enable,
+	.promiscuous_disable	= hyperv_dev_promisc_disable,
+	.allmulticast_enable	= hyperv_dev_allmulticast_enable,
+	.allmulticast_disable	= hyperv_dev_allmulticast_disable,
+	.link_update		= hyperv_dev_link_update,
+	.stats_get		= hyperv_dev_stats_get,
+};
+
+static int
+eth_hyperv_dev_init(struct rte_eth_dev *eth_dev)
+{
+	int ret;
+
+	PMD_INIT_FUNC_TRACE();
+
+	eth_dev->dev_ops = &hyperv_eth_dev_ops;
+	eth_dev->data->mac_addrs = rte_malloc("mac_addrs",
+					      sizeof(struct ether_addr),
+					      RTE_CACHE_LINE_SIZE);
+	if (!eth_dev->data->mac_addrs) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "unable to allocate memory for mac addrs");
+		return -1;
+	}
+
+	ret = hyperv_init(eth_dev);
+
+	return ret;
+}
+
+static struct eth_driver rte_hyperv_pmd = {
+	.vmbus_drv = {
+		.name = "rte_hyperv_pmd",
+		.module_name = "hv_uio",
+		.id_table = vmbus_id_hyperv_map,
+	},
+	.bus_type = RTE_BUS_VMBUS,
+	.eth_dev_init = eth_hyperv_dev_init,
+	.dev_private_size = sizeof(struct hv_data),
+};
+
+static int
+rte_hyperv_pmd_init(const char *name __rte_unused,
+		    const char *param __rte_unused)
+{
+	rte_eth_driver_register(&rte_hyperv_pmd);
+	return 0;
+}
+
+static struct rte_driver rte_hyperv_driver = {
+	.type = PMD_PDEV,
+	.init = rte_hyperv_pmd_init,
+};
+
+PMD_REGISTER_DRIVER(rte_hyperv_driver);
diff --git a/lib/librte_pmd_hyperv/hyperv_logs.h b/lib/librte_pmd_hyperv/hyperv_logs.h
new file mode 100644
index 0000000..1b96468
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_logs.h
@@ -0,0 +1,69 @@ 
+/*-
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ */
+
+#ifndef _HYPERV_LOGS_H_
+#define _HYPERV_LOGS_H_
+
+#ifdef RTE_LIBRTE_HV_DEBUG_INIT
+#define PMD_INIT_LOG(level, fmt, args...) \
+	RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args)
+#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>")
+#else
+#define PMD_INIT_LOG(level, fmt, args...) do { } while (0)
+#define PMD_INIT_FUNC_TRACE() do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_HV_DEBUG
+
+#define RTE_DBG_LOAD   INIT
+#define RTE_DBG_STATS  STATS
+#define RTE_DBG_TX     TX
+#define RTE_DBG_RX     RX
+#define RTE_DBG_MBUF   MBUF
+#define RTE_DBG_ASSERT ASRT
+#define RTE_DBG_RB     RB
+#define RTE_DBG_VMBUS  VMBUS
+#define RTE_DBG_ALL    ALL
+
+#define STR(x) #x
+
+#define HV_RTE_LOG(hv, codepath, level, fmt, args...) \
+	RTE_LOG(level, PMD, "[%d]: %-6s: %s: " fmt "\n", \
+		hv->vmbus_device, STR(codepath), __func__, ## args)
+
+#define PMD_PDEBUG_LOG(hv, codepath, fmt, args...) \
+do { \
+	if (unlikely(hv->debug & (codepath))) \
+		HV_RTE_LOG(hv, RTE_##codepath, DEBUG, fmt, ## args) \
+} while (0)
+
+#define PMD_PINFO_LOG(hv, codepath, fmt, args...) \
+do { \
+	if (unlikely(hv->debug & (codepath))) \
+		HV_RTE_LOG(hv, RTE_##codepath, INFO, fmt, ## args) \
+} while (0)
+
+#define PMD_PWARN_LOG(hv, codepath, fmt, args...) \
+do { \
+	if (unlikely(hv->debug & (codepath))) \
+		HV_RTE_LOG(hv, RTE_##codepath, WARNING, fmt, ## args) \
+} while (0)
+
+#define PMD_PERROR_LOG(hv, codepath, fmt, args...) \
+do { \
+	if (unlikely(hv->debug & (codepath))) \
+		HV_RTE_LOG(hv, RTE_##codepath, ERR, fmt, ## args) \
+} while (0)
+#else
+#define HV_RTE_LOG(level, fmt, args...) do { } while (0)
+#define PMD_PDEBUG_LOG(fmt, args...) do { } while (0)
+#define PMD_PINFO_LOG(fmt, args...) do { } while (0)
+#define PMD_PWARN_LOG(fmt, args...) do { } while (0)
+#define PMD_PERROR_LOG(fmt, args...) do { } while (0)
+#undef RTE_LIBRTE_HV_DEBUG_TX
+#undef RTE_LIBRTE_HV_DEBUG_RX
+#endif
+
+#endif /* _HYPERV_LOGS_H_ */
diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.c b/lib/librte_pmd_hyperv/hyperv_rxtx.c
new file mode 100644
index 0000000..9e423d0
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_rxtx.c
@@ -0,0 +1,403 @@ 
+/*-
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ */
+
+#include "hyperv.h"
+#include "hyperv_rxtx.h"
+#include "hyperv_drv.h"
+
+#define RTE_MBUF_DATA_DMA_ADDR(mb) \
+	((uint64_t)((mb)->buf_physaddr + (mb)->data_off))
+
+#define RPPI_SIZE	(sizeof(struct rndis_per_packet_info)\
+			 + sizeof(struct ndis_8021q_info))
+#define RNDIS_OFF	(sizeof(struct netvsc_packet) + RPPI_SIZE)
+#define TX_PKT_SIZE	(RNDIS_OFF + sizeof(struct rndis_filter_packet) * 2)
+
+static inline struct rte_mbuf *
+hv_rxmbuf_alloc(struct rte_mempool *mp)
+{
+	return	__rte_mbuf_raw_alloc(mp);
+}
+
+static inline int
+hyperv_has_rx_work(struct hv_data *hv)
+{
+	return hv->in->read_index != hv->in->write_index;
+}
+
+#ifndef DEFAULT_TX_FREE_THRESHOLD
+#define DEFAULT_TX_FREE_THRESHOLD 32
+#endif
+
+int
+hyperv_dev_tx_queue_setup(struct rte_eth_dev *dev,
+			  uint16_t queue_idx,
+			  uint16_t nb_desc,
+			  unsigned int socket_id,
+			  const struct rte_eth_txconf *tx_conf)
+
+{
+	struct hv_data *hv = dev->data->dev_private;
+	const struct rte_memzone *tz;
+	struct hv_tx_queue *txq;
+	char tz_name[RTE_MEMZONE_NAMESIZE];
+	uint32_t i, delta = 0, new_delta;
+	struct netvsc_packet *pkt;
+
+	PMD_INIT_FUNC_TRACE();
+
+	txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct hv_tx_queue),
+				 RTE_CACHE_LINE_SIZE, socket_id);
+	if (txq == NULL) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for tx_queue failed");
+		return -ENOMEM;
+	}
+
+	if (tx_conf->tx_free_thresh >= nb_desc) {
+		PMD_PERROR_LOG(hv, DBG_LOAD,
+			       "tx_free_thresh should be less then nb_desc");
+		return -EINVAL;
+	}
+	txq->tx_free_thresh = (tx_conf->tx_free_thresh ? tx_conf->tx_free_thresh :
+			       DEFAULT_TX_FREE_THRESHOLD);
+	txq->pkts = rte_calloc_socket("TX pkts", sizeof(void*), nb_desc,
+				       RTE_CACHE_LINE_SIZE, socket_id);
+	if (txq->pkts == NULL) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "rte_zmalloc for pkts failed");
+		return -ENOMEM;
+	}
+	sprintf(tz_name, "hv_%d_%u_%u", hv->vmbus_device, queue_idx, socket_id);
+	tz = rte_memzone_reserve_aligned(tz_name,
+					 (uint32_t)nb_desc * TX_PKT_SIZE,
+					 rte_lcore_to_socket_id(rte_lcore_id()),
+					 0, PAGE_SIZE);
+	if (tz == NULL) {
+		PMD_PERROR_LOG(hv, DBG_LOAD, "netvsc packet ring alloc fail");
+		return -ENOMEM;
+	}
+	for (i = 0; i < nb_desc; i++) {
+		pkt = txq->pkts[i] = (struct netvsc_packet *)((uint8_t *)tz->addr +
+							      i * TX_PKT_SIZE + delta);
+		pkt->extension = (uint8_t *)tz->addr + i * TX_PKT_SIZE + RNDIS_OFF + delta;
+		if (!pkt->extension) {
+			PMD_PERROR_LOG(hv, DBG_TX,
+				       "pkt->extension is NULL for %d-th pkt", i);
+			return -EINVAL;
+		}
+		pkt->extension_phys_addr =
+			tz->phys_addr + i * TX_PKT_SIZE + RNDIS_OFF + delta;
+		pkt->ext_pages = 1;
+		pkt->page_buffers[0].pfn = pkt->extension_phys_addr >> PAGE_SHIFT;
+		pkt->page_buffers[0].offset =
+			(unsigned long)pkt->extension & (PAGE_SIZE - 1);
+		pkt->page_buffers[0].length = RNDIS_MESSAGE_SIZE(struct rndis_packet);
+		if (pkt->page_buffers[0].offset + pkt->page_buffers[0].length
+		    > PAGE_SIZE) {
+			new_delta = PAGE_SIZE - pkt->page_buffers[0].offset;
+			pkt->page_buffers[0].pfn++;
+			delta += new_delta;
+			pkt->page_buffers[0].offset = 0;
+			pkt->extension = (uint8_t *)pkt->extension + new_delta;
+			pkt->extension_phys_addr += new_delta;
+		}
+	}
+	txq->sw_ring = rte_calloc_socket("txq_sw_ring",
+					 sizeof(struct rte_mbuf *), nb_desc,
+					 RTE_CACHE_LINE_SIZE, socket_id);
+	if (txq->sw_ring == NULL) {
+		hyperv_dev_tx_queue_release(txq);
+		return -ENOMEM;
+	}
+	txq->port_id = dev->data->port_id;
+	txq->nb_tx_desc = txq->tx_avail = nb_desc;
+	txq->tx_free_thresh = tx_conf->tx_free_thresh;
+	txq->hv = hv;
+	dev->data->tx_queues[queue_idx] = txq;
+	hv->txq = txq;
+
+	return 0;
+}
+
+void
+hyperv_dev_tx_queue_release(void *ptxq)
+{
+	struct hv_tx_queue *txq = ptxq;
+
+	PMD_INIT_FUNC_TRACE();
+	if (txq == NULL)
+		return;
+	rte_free(txq->sw_ring);
+	rte_free(txq->pkts);
+	rte_free(txq);
+}
+
+int
+hyperv_dev_rx_queue_setup(struct rte_eth_dev *dev,
+			  uint16_t queue_idx,
+			  uint16_t nb_desc,
+			  unsigned int socket_id,
+			  const struct rte_eth_rxconf *rx_conf,
+			  struct rte_mempool *mp)
+{
+	uint16_t i;
+	struct hv_rx_queue *rxq;
+	struct rte_mbuf *mbuf;
+	struct hv_data *hv = dev->data->dev_private;
+
+	PMD_INIT_FUNC_TRACE();
+
+	rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct hv_rx_queue),
+				 RTE_CACHE_LINE_SIZE, socket_id);
+	if (rxq == NULL) {
+		PMD_PERROR_LOG(hv, DBG_LOAD,
+			       "rte_zmalloc for rx_queue failed!");
+		return -ENOMEM;
+	}
+	hv->desc = rxq->desc = rte_zmalloc_socket(NULL, PAGE_SIZE,
+						  RTE_CACHE_LINE_SIZE, socket_id);
+	if (rxq->desc == NULL) {
+		PMD_PERROR_LOG(hv, DBG_LOAD,
+			       "rte_zmalloc for vmbus_desc failed!");
+		hyperv_dev_rx_queue_release(rxq);
+		return -ENOMEM;
+	}
+	rxq->sw_ring = rte_calloc_socket("rxq->sw_ring",
+					 sizeof(struct mbuf *), nb_desc,
+					 RTE_CACHE_LINE_SIZE, socket_id);
+	if (rxq->sw_ring == NULL) {
+		hyperv_dev_rx_queue_release(rxq);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < nb_desc; i++) {
+		mbuf = hv_rxmbuf_alloc(mp);
+		if (mbuf == NULL) {
+			PMD_PERROR_LOG(hv, DBG_LOAD, "RX mbuf alloc failed");
+			return -ENOMEM;
+		}
+
+		mbuf->nb_segs = 1;
+		mbuf->next = NULL;
+		mbuf->port = rxq->port_id;
+		rxq->sw_ring[i] = mbuf;
+	}
+
+	rxq->mb_pool = mp;
+	rxq->nb_rx_desc = nb_desc;
+	rxq->rx_head = 0;
+	rxq->rx_tail = 0;
+	rxq->rx_free_thresh = rx_conf->rx_free_thresh;
+	rxq->port_id = dev->data->port_id;
+	rxq->hv = hv;
+	dev->data->rx_queues[queue_idx] = rxq;
+	hv->rxq = rxq;
+	hv->max_rx_pkt_len = mp->elt_size -
+		(sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM);
+
+	return 0;
+}
+
+void
+hyperv_dev_rx_queue_release(void *prxq)
+{
+	struct hv_rx_queue *rxq = prxq;
+
+	PMD_INIT_FUNC_TRACE();
+	if (rxq == NULL)
+		return;
+	rte_free(rxq->sw_ring);
+	rte_free(rxq->desc);
+	rte_free(rxq);
+}
+
+uint16_t
+hyperv_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
+{
+	struct hv_rx_queue *rxq = prxq;
+	struct hv_data *hv = rxq->hv;
+	struct rte_mbuf *new_mb, *rx_mbuf, *first_mbuf;
+	uint16_t nb_rx = 0;
+	uint16_t segs, i;
+
+	if (unlikely(hv->closed))
+		return 0;
+
+	nb_pkts = MIN(nb_pkts, HV_MAX_PKT_BURST);
+	hyperv_scan_comps(hv, 0);
+
+	while (nb_rx < nb_pkts) {
+		/*
+		 * if there are no mbufs in sw_ring,
+		 * we need to trigger receive procedure
+		 */
+		if (rxq->rx_head == rxq->rx_tail) {
+			if (!hyperv_has_rx_work(hv))
+				break;
+
+			if (unlikely(!hyperv_get_buffer(hv, rxq->desc, PAGE_SIZE))) {
+				hyperv_scan_comps(hv, 0);
+				continue;
+			}
+		}
+
+		/*
+		 * Now the received data is in sw_ring of our rxq
+		 * we need to extract it and replace in sw_ring with new mbuf
+		 */
+		rx_mbuf = first_mbuf = rxq->sw_ring[rxq->rx_head];
+		segs = first_mbuf->nb_segs;
+		for (i = 0; i < segs; ++i) {
+			new_mb = hv_rxmbuf_alloc(rxq->mb_pool);
+			if (unlikely(!new_mb)) {
+				PMD_PERROR_LOG(hv, DBG_RX, "mbuf alloc fail");
+				++hv->stats.rx_nombuf;
+				return nb_rx;
+			}
+
+			rx_mbuf = rxq->sw_ring[rxq->rx_head];
+			rxq->sw_ring[rxq->rx_head] = new_mb;
+
+			if (++rxq->rx_head == rxq->nb_rx_desc)
+				rxq->rx_head = 0;
+
+			rx_mbuf->ol_flags |= PKT_RX_IPV4_HDR;
+			rx_mbuf->port = rxq->port_id;
+		}
+		rx_mbuf->next = NULL;
+
+		rx_pkts[nb_rx++] = first_mbuf;
+		++hv->stats.ipkts;
+		hv->stats.ibytes += first_mbuf->pkt_len;
+	}
+
+	return nb_rx;
+}
+
+static void hyperv_txeof(struct hv_tx_queue *txq)
+{
+	struct rte_mbuf *mb, *mb_next;
+
+	txq->tx_avail += txq->tx_free;
+	while (txq->tx_free) {
+		--txq->tx_free;
+		mb = txq->sw_ring[txq->tx_head];
+		while (mb) {
+			mb_next = mb->next;
+			rte_mempool_put(mb->pool, mb);
+			mb = mb_next;
+		}
+		if (++txq->tx_head == txq->nb_tx_desc)
+			txq->tx_head = 0;
+	}
+}
+
+uint16_t
+hyperv_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+	struct hv_tx_queue *txq = ptxq;
+	struct hv_data *hv = txq->hv;
+	struct netvsc_packet *packet;
+	struct rte_mbuf *m;
+	uint32_t data_pages;
+	uint64_t first_data_page;
+	uint32_t total_len;
+	uint32_t len;
+	uint16_t i, nb_tx;
+	uint8_t rndis_pages;
+	int ret;
+
+	if (unlikely(hv->closed))
+		return 0;
+
+	for (nb_tx = 0; nb_tx < nb_pkts; ++nb_tx) {
+		hyperv_scan_comps(hv, 0);
+		/* Determine if the descriptor ring needs to be cleaned. */
+		if (txq->tx_free > txq->tx_free_thresh)
+			hyperv_txeof(txq);
+
+		if (!txq->tx_avail) {
+			hyperv_scan_comps(hv, 1);
+			hyperv_txeof(txq);
+			if (!txq->tx_avail) {
+				PMD_PWARN_LOG(hv, DBG_TX, "No TX mbuf available");
+				break;
+			}
+		}
+		m = tx_pkts[nb_tx];
+		len = m->data_len;
+		total_len = m->pkt_len;
+		first_data_page = RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+		data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) + len - 1) >> PAGE_SHIFT) -
+			first_data_page + 1;
+
+		packet = txq->pkts[txq->tx_tail];
+		rndis_pages = packet->ext_pages;
+
+		txq->sw_ring[txq->tx_tail] = m;
+		packet->tot_data_buf_len = total_len;
+		packet->page_buffers[rndis_pages].pfn =
+			RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+		packet->page_buffers[rndis_pages].offset =
+			RTE_MBUF_DATA_DMA_ADDR(m) & (PAGE_SIZE - 1);
+		if (data_pages == 1)
+			packet->page_buffers[rndis_pages].length = len;
+		else
+			packet->page_buffers[rndis_pages].length = PAGE_SIZE -
+				packet->page_buffers[rndis_pages].offset;
+
+		for (i = 1; i < data_pages; ++i) {
+			packet->page_buffers[rndis_pages + i].pfn = first_data_page + i;
+			packet->page_buffers[rndis_pages + i].offset = 0;
+			packet->page_buffers[rndis_pages + i].length = PAGE_SIZE;
+		}
+		if (data_pages > 1)
+			packet->page_buffers[rndis_pages - 1 + data_pages].length =
+				((rte_pktmbuf_mtod(m, unsigned long) + len - 1)
+				 & (PAGE_SIZE - 1)) + 1;
+
+		uint16_t index = data_pages + rndis_pages;
+
+		for (i = 1; i < m->nb_segs; ++i) {
+			m = m->next;
+			len = m->data_len;
+			first_data_page = RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+			data_pages = ((RTE_MBUF_DATA_DMA_ADDR(m) + len - 1) >> PAGE_SHIFT) -
+				first_data_page + 1;
+			packet->page_buffers[index].pfn =
+				RTE_MBUF_DATA_DMA_ADDR(m) >> PAGE_SHIFT;
+			packet->page_buffers[index].offset =
+				rte_pktmbuf_mtod(m, unsigned long)
+				& (PAGE_SIZE - 1);
+			packet->page_buffers[index].length = m->data_len;
+			if (data_pages > 1) {
+				/* It can be 2 in case of usual mbuf_size=2048 */
+				packet->page_buffers[index].length = PAGE_SIZE -
+					packet->page_buffers[index].offset;
+				packet->page_buffers[++index].offset = 0;
+				packet->page_buffers[index].pfn =
+					packet->page_buffers[index - 1].pfn + 1;
+				packet->page_buffers[index].length =
+					m->data_len
+					- packet->page_buffers[index - 1].length;
+			}
+			++index;
+		}
+		packet->page_buf_count = index;
+
+		ret = hv_rf_on_send(hv, packet);
+		if (likely(ret == 0)) {
+			++hv->stats.opkts;
+			hv->stats.obytes += total_len;
+			if (++txq->tx_tail == txq->nb_tx_desc)
+				txq->tx_tail = 0;
+			--txq->tx_avail;
+		} else {
+			++hv->stats.oerrors;
+			PMD_PERROR_LOG(hv, DBG_TX, "TX ring buffer is busy");
+		}
+	}
+
+	return nb_tx;
+}
diff --git a/lib/librte_pmd_hyperv/hyperv_rxtx.h b/lib/librte_pmd_hyperv/hyperv_rxtx.h
new file mode 100644
index 0000000..c45a704
--- /dev/null
+++ b/lib/librte_pmd_hyperv/hyperv_rxtx.h
@@ -0,0 +1,35 @@ 
+/*-
+ *   Copyright(c) 2013-2015 Brocade Communications Systems, Inc.
+ *   All rights reserved.
+ */
+
+/**
+ * Structure associated with each TX queue.
+ */
+struct hv_tx_queue {
+	struct netvsc_packet    **pkts;
+	struct rte_mbuf         **sw_ring;
+	uint16_t                nb_tx_desc;
+	uint16_t                tx_avail;
+	uint16_t                tx_head;
+	uint16_t                tx_tail;
+	uint16_t                tx_free_thresh;
+	uint16_t                tx_free;
+	uint8_t                 port_id;
+	struct hv_data          *hv;
+} __rte_cache_aligned;
+
+/**
+ * Structure associated with each RX queue.
+ */
+struct hv_rx_queue {
+	struct rte_mempool      *mb_pool;
+	struct rte_mbuf         **sw_ring;
+	uint16_t                nb_rx_desc;
+	uint16_t                rx_head;
+	uint16_t                rx_tail;
+	uint16_t                rx_free_thresh;
+	uint8_t                 port_id;
+	struct hv_data          *hv;
+	struct hv_vm_packet_descriptor *desc;
+} __rte_cache_aligned;
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index 62a76ae..e0416d1 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -133,6 +133,10 @@  LDLIBS += -lm
 LDLIBS += -lrt
 endif
 
+ifeq ($(CONFIG_RTE_LIBRTE_HV_PMD),y)
+LDLIBS += -lrte_pmd_hyperv
+endif
+
 ifeq ($(CONFIG_RTE_LIBRTE_VHOST), y)
 LDLIBS += -lrte_vhost
 endif