[dpdk-dev,v3,1/4] net/softnic: add softnic PMD

Message ID 20170811124929.118564-2-jasvinder.singh@intel.com (mailing list archive)
State Superseded, archived
Headers

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Jasvinder Singh Aug. 11, 2017, 12:49 p.m. UTC
  Add SoftNIC PMD to provide SW fall-back for ethdev APIs.

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
---
v3 changes:
- rebase to dpdk17.08 release

v2 changes:
- fix build errors
- rebased to TM APIs v6 plus dpdk master

 MAINTAINERS                                     |   5 +
 config/common_base                              |   5 +
 drivers/net/Makefile                            |   5 +
 drivers/net/softnic/Makefile                    |  56 +++
 drivers/net/softnic/rte_eth_softnic.c           | 609 ++++++++++++++++++++++++
 drivers/net/softnic/rte_eth_softnic.h           |  54 +++
 drivers/net/softnic/rte_eth_softnic_internals.h | 114 +++++
 drivers/net/softnic/rte_eth_softnic_version.map |   7 +
 mk/rte.app.mk                                   |   5 +-
 9 files changed, 859 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/softnic/Makefile
 create mode 100644 drivers/net/softnic/rte_eth_softnic.c
 create mode 100644 drivers/net/softnic/rte_eth_softnic.h
 create mode 100644 drivers/net/softnic/rte_eth_softnic_internals.h
 create mode 100644 drivers/net/softnic/rte_eth_softnic_version.map
  

Comments

Ferruh Yigit Sept. 5, 2017, 2:53 p.m. UTC | #1
On 8/11/2017 1:49 PM, Jasvinder Singh wrote:
> Add SoftNIC PMD to provide SW fall-back for ethdev APIs.
> 
> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
> Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>
> ---
> v3 changes:
> - rebase to dpdk17.08 release
> 
> v2 changes:
> - fix build errors
> - rebased to TM APIs v6 plus dpdk master
> 
>  MAINTAINERS                                     |   5 +
>  config/common_base                              |   5 +
>  drivers/net/Makefile                            |   5 +
>  drivers/net/softnic/Makefile                    |  56 +++
>  drivers/net/softnic/rte_eth_softnic.c           | 609 ++++++++++++++++++++++++
>  drivers/net/softnic/rte_eth_softnic.h           |  54 +++
>  drivers/net/softnic/rte_eth_softnic_internals.h | 114 +++++
>  drivers/net/softnic/rte_eth_softnic_version.map |   7 +
>  mk/rte.app.mk                                   |   5 +-

Also documentation updates are required:
- .ini file
- PMD documentation .rst file
- I believe it is good to update release note about new PMD
- release notes library version info, since this has public API

<...>

> +EXPORT_MAP := rte_eth_softnic_version.map

rte_pmd_... to be consistent.

<...>

> +#
> +# Export include files
> +#
> +SYMLINK-y-include +=rte_eth_softnic.h

space after +=

<...>

> diff --git a/drivers/net/softnic/rte_eth_softnic.c b/drivers/net/softnic/rte_eth_softnic.c
<...>
> +
> +static struct rte_vdev_driver pmd_drv;

Why this is required, already defined below.
And for naming, pmd=poll mode driver, drv=driver, makes "poll mode
driver driver"


<...>

> +static int
> +pmd_rx_queue_setup(struct rte_eth_dev *dev,
> +	uint16_t rx_queue_id,
> +	uint16_t nb_rx_desc __rte_unused,
> +	unsigned int socket_id,
> +	const struct rte_eth_rxconf *rx_conf __rte_unused,
> +	struct rte_mempool *mb_pool __rte_unused)
> +{
> +	struct pmd_internals *p = dev->data->dev_private;
> +
> +	if (p->params.soft.intrusive == 0) {
> +		struct pmd_rx_queue *rxq;
> +
> +		rxq = rte_zmalloc_socket(p->params.soft.name,
> +			sizeof(struct pmd_rx_queue), 0, socket_id);
> +		if (rxq == NULL)
> +			return -1;

return -ENOMEM ?

> +
> +		rxq->hard.port_id = p->hard.port_id;
> +		rxq->hard.rx_queue_id = rx_queue_id;
> +		dev->data->rx_queues[rx_queue_id] = rxq;
> +	} else {
> +		struct rte_eth_dev *hard_dev =
> +			&rte_eth_devices[p->hard.port_id];> +		void *rxq = hard_dev->data->rx_queues[rx_queue_id];
> +
> +		if (rxq == NULL)
> +			return -1;
> +
> +		dev->data->rx_queues[rx_queue_id] = rxq;

This assigns underlying hw queue as this soft PMD queue, what happens if
two different cores, one polls the actual hw device and other polls the
this virtual device, since both are indeed same queues?

> +	}
> +	return 0;
> +}
> +

<...>

> +static __rte_always_inline int
> +rte_pmd_softnic_run_default(struct rte_eth_dev *dev)
> +{
> +	struct pmd_internals *p = dev->data->dev_private;
> +
> +	/* Persistent context: Read Only (update not required) */
> +	struct rte_mbuf **pkts = p->soft.def.pkts;
> +	uint16_t nb_tx_queues = dev->data->nb_tx_queues;
> +
> +	/* Persistent context: Read - Write (update required) */
> +	uint32_t txq_pos = p->soft.def.txq_pos;
> +	uint32_t pkts_len = p->soft.def.pkts_len;
> +	uint32_t flush_count = p->soft.def.flush_count;
> +
> +	/* Not part of the persistent context */
> +	uint32_t pos;
> +	uint16_t i;
> +
> +	/* Soft device TXQ read, Hard device TXQ write */
> +	for (i = 0; i < nb_tx_queues; i++) {
> +		struct rte_ring *txq = dev->data->tx_queues[txq_pos];
> +
> +		/* Read soft device TXQ burst to packet enqueue buffer */
> +		pkts_len += rte_ring_sc_dequeue_burst(txq,
> +			(void **) &pkts[pkts_len],
> +			DEFAULT_BURST_SIZE,
> +			NULL);
> +
> +		/* Increment soft device TXQ */
> +		txq_pos++;
> +		if (txq_pos >= nb_tx_queues)
> +			txq_pos = 0;
> +
> +		/* Hard device TXQ write when complete burst is available */
> +		if (pkts_len >= DEFAULT_BURST_SIZE) {

There questions:
1- When there are multiple tx_queues of softnic, and assume all will be
processed by a core, this core will be reading from all into single HW
queue, won' this create a bottle neck?

2- This logic reads from all queues as BURST_SIZE and merges them, if
queues split with a RSS or similar, that clasiffication will be lost,
will it be problem?

3- If there is not enough packets in the queues ( < DEFAULT_BURST_SIZE)
those packets won't be transmitted unless more is comming, will this
create latency for those cases?

> +			for (pos = 0; pos < pkts_len; )
> +				pos += rte_eth_tx_burst(p->hard.port_id,
> +					p->params.hard.tx_queue_id,
> +					&pkts[pos],
> +					(uint16_t) (pkts_len - pos));
> +
> +			pkts_len = 0;
> +			flush_count = 0;
> +			break;
> +		}
> +	}
> +
> +	if (flush_count >= FLUSH_COUNT_THRESHOLD) {

FLUSH_COUNT_THRESHOLD is (1 << 17), and if no packet is sent, flash
count incremented by one, just want to confirm the treshold value?

And why this flush exists?

> +		for (pos = 0; pos < pkts_len; )
> +			pos += rte_eth_tx_burst(p->hard.port_id,
> +				p->params.hard.tx_queue_id,
> +				&pkts[pos],
> +				(uint16_t) (pkts_len - pos));
> +
> +		pkts_len = 0;
> +		flush_count = 0;
> +	}
> +
> +	p->soft.def.txq_pos = txq_pos;
> +	p->soft.def.pkts_len = pkts_len;
> +	p->soft.def.flush_count = flush_count + 1;
> +
> +	return 0;
> +}
> +
> +int
> +rte_pmd_softnic_run(uint8_t port_id)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];

It can be possible to create a macro for this.

<...>

> +static int
> +default_init(struct pmd_internals *p,

default_mbufs_init()? default_init() on its own in not that clear.

<...>

> +static void
> +default_free(struct pmd_internals *p)

default_mbufs_free()?

<...>

> +static void *
> +pmd_init(struct pmd_params *params, int numa_node)
> +{
> +	struct pmd_internals *p;
> +	int status;
> +
> +	p = rte_zmalloc_socket(params->soft.name,
> +		sizeof(struct pmd_internals),
> +		0,
> +		numa_node);
> +	if (p == NULL)
> +		return NULL;
> +
> +	memcpy(&p->params, params, sizeof(p->params));
> +	rte_eth_dev_get_port_by_name(params->hard.name, &p->hard.port_id);

You may want to check return value of this.

> +
> +	/* Default */
> +	status = default_init(p, params, numa_node);
> +	if (status) {
> +		rte_free(p);
> +		return NULL;
> +	}
> +
> +	return p;
> +}
> +
> +static void
> +pmd_free(struct pmd_internals *p)
> +{
> +	default_free(p);

p->hard.name also needs to be freed here.

> +
> +	rte_free(p);
> +}
> +
> +static int
> +pmd_ethdev_register(struct rte_vdev_device *vdev,
> +	struct pmd_params *params,
> +	void *dev_private)
> +{
> +	struct rte_eth_dev_info hard_info;
> +	struct rte_eth_dev *soft_dev;
> +	struct rte_eth_dev_data *soft_data;
> +	uint32_t hard_speed;
> +	int numa_node;
> +	uint8_t hard_port_id;
> +
> +	rte_eth_dev_get_port_by_name(params->hard.name, &hard_port_id);
> +	rte_eth_dev_info_get(hard_port_id, &hard_info);
> +	hard_speed = eth_dev_speed_max_mbps(hard_info.speed_capa);
> +	numa_node = rte_eth_dev_socket_id(hard_port_id);
> +
> +	/* Memory allocation */
> +	soft_data = rte_zmalloc_socket(params->soft.name,
> +		sizeof(*soft_data), 0, numa_node);
> +	if (!soft_data)
> +		return -ENOMEM;
> +
> +	/* Ethdev entry allocation */
> +	soft_dev = rte_eth_dev_allocate(params->soft.name);
> +	if (!soft_dev) {
> +		rte_free(soft_data);
> +		return -ENOMEM;
> +	}
> +
> +	/* Connect dev->data */
> +	memmove(soft_data->name,
> +		soft_dev->data->name,
> +		sizeof(soft_data->name));

I guess this is redundant here, allocating soft_data and rest, it is
possible to use soft_dev->data directly.

> +	soft_data->port_id = soft_dev->data->port_id;
> +	soft_data->mtu = soft_dev->data->mtu;
> +	soft_dev->data = soft_data;
> +
> +	/* dev */
> +	soft_dev->rx_pkt_burst = (params->soft.intrusive) ?
> +		NULL : /* set up later */
> +		pmd_rx_pkt_burst;
> +	soft_dev->tx_pkt_burst = pmd_tx_pkt_burst;
> +	soft_dev->tx_pkt_prepare = NULL;
> +	soft_dev->dev_ops = &pmd_ops;
> +	soft_dev->device = &vdev->device;
> +
> +	/* dev->data */
> +	soft_dev->data->dev_private = dev_private;
> +	soft_dev->data->dev_link.link_speed = hard_speed;
> +	soft_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
> +	soft_dev->data->dev_link.link_autoneg = ETH_LINK_SPEED_FIXED;
> +	soft_dev->data->dev_link.link_status = ETH_LINK_DOWN;

For simplity, it is possible to have a static struct rte_eth_link, and
assing it to data->dev_link, as done in null pmd.

> +	soft_dev->data->mac_addrs = &eth_addr;
> +	soft_dev->data->promiscuous = 1;
> +	soft_dev->data->kdrv = RTE_KDRV_NONE;
> +	soft_dev->data->numa_node = numa_node;

If pmd is detachable, need following flag:
data->dev_flags = RTE_ETH_DEV_DETACHABLE;

> +
> +	return 0;
> +}
> +

<...>

> +static int
> +pmd_probe(struct rte_vdev_device *vdev)
> +{
> +	struct pmd_params p;
> +	const char *params;
> +	int status;
> +
> +	struct rte_eth_dev_info hard_info;
> +	uint8_t hard_port_id;
> +	int numa_node;
> +	void *dev_private;
> +
> +	if (!vdev)
> +		return -EINVAL;

This check is not required, eal won't call this function with NULL vdev.

<...>

> diff --git a/drivers/net/softnic/rte_eth_softnic.h b/drivers/net/softnic/rte_eth_softnic.h
<...>
> +int
> +rte_pmd_softnic_run(uint8_t port_id);

Since this is public API, this needs to be commented properly, with
doxygen comment.

Btw, since there is API in this PMD perhaps api documentation also needs
to be updated to include this.

<...>
  
Jasvinder Singh Sept. 8, 2017, 9:30 a.m. UTC | #2
Hi Ferruh,

Thank you for the review and feedback. Please see inline response;

> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Tuesday, September 5, 2017 3:53 PM

> To: Singh, Jasvinder <jasvinder.singh@intel.com>; dev@dpdk.org

> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;

> thomas@monjalon.net

> Subject: Re: [PATCH v3 1/4] net/softnic: add softnic PMD

> 

> On 8/11/2017 1:49 PM, Jasvinder Singh wrote:

> > Add SoftNIC PMD to provide SW fall-back for ethdev APIs.

> >

> > Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

> > Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>

> > ---

> > v3 changes:

> > - rebase to dpdk17.08 release

> >

> > v2 changes:

> > - fix build errors

> > - rebased to TM APIs v6 plus dpdk master

> >

> >  MAINTAINERS                                     |   5 +

> >  config/common_base                              |   5 +

> >  drivers/net/Makefile                            |   5 +

> >  drivers/net/softnic/Makefile                    |  56 +++

> >  drivers/net/softnic/rte_eth_softnic.c           | 609

> ++++++++++++++++++++++++

> >  drivers/net/softnic/rte_eth_softnic.h           |  54 +++

> >  drivers/net/softnic/rte_eth_softnic_internals.h | 114 +++++

> >  drivers/net/softnic/rte_eth_softnic_version.map |   7 +

> >  mk/rte.app.mk                                   |   5 +-

> 

> Also documentation updates are required:

> - .ini file

> - PMD documentation .rst file

> - I believe it is good to update release note about new PMD

> - release notes library version info, since this has public API


Will send documentation patch.

> <...>

> 

> > +EXPORT_MAP := rte_eth_softnic_version.map

> 

> rte_pmd_... to be consistent.

> 

> <...>


Will do.

> > +#

> > +# Export include files

> > +#

> > +SYMLINK-y-include +=rte_eth_softnic.h

> 

> space after +=

> 


Will add space.
> 

> > diff --git a/drivers/net/softnic/rte_eth_softnic.c

> > b/drivers/net/softnic/rte_eth_softnic.c

> <...>

> > +

> > +static struct rte_vdev_driver pmd_drv;

> 

> Why this is required, already defined below.

> And for naming, pmd=poll mode driver, drv=driver, makes "poll mode driver

> driver"

> 


Ok. will correct this.

> <...>

> 

> > +static int

> > +pmd_rx_queue_setup(struct rte_eth_dev *dev,

> > +	uint16_t rx_queue_id,

> > +	uint16_t nb_rx_desc __rte_unused,

> > +	unsigned int socket_id,

> > +	const struct rte_eth_rxconf *rx_conf __rte_unused,

> > +	struct rte_mempool *mb_pool __rte_unused) {

> > +	struct pmd_internals *p = dev->data->dev_private;

> > +

> > +	if (p->params.soft.intrusive == 0) {

> > +		struct pmd_rx_queue *rxq;

> > +

> > +		rxq = rte_zmalloc_socket(p->params.soft.name,

> > +			sizeof(struct pmd_rx_queue), 0, socket_id);

> > +		if (rxq == NULL)

> > +			return -1;

> 

> return -ENOMEM ?


Ok.
 
> > +

> > +		rxq->hard.port_id = p->hard.port_id;

> > +		rxq->hard.rx_queue_id = rx_queue_id;

> > +		dev->data->rx_queues[rx_queue_id] = rxq;

> > +	} else {

> > +		struct rte_eth_dev *hard_dev =

> > +			&rte_eth_devices[p->hard.port_id];> +

> 	void *rxq = hard_dev->data->rx_queues[rx_queue_id];

> > +

> > +		if (rxq == NULL)

> > +			return -1;

> > +

> > +		dev->data->rx_queues[rx_queue_id] = rxq;

> 

> This assigns underlying hw queue as this soft PMD queue, what happens if

> two different cores, one polls the actual hw device and other polls the this

> virtual device, since both are indeed same queues?


Once soft device is created and attached to hard device, application has to reads packets from/writes packets to the "soft" port instead of the "hard" port as soft device is feature rich
version of the hard device (See Cover letter notes). The RX and TX queues of the "soft" port are thread safe, as any ethdev. 
 
> > +	}

> > +	return 0;

> > +}

> > +

> 

> <...>

> 

> > +static __rte_always_inline int

> > +rte_pmd_softnic_run_default(struct rte_eth_dev *dev) {

> > +	struct pmd_internals *p = dev->data->dev_private;

> > +

> > +	/* Persistent context: Read Only (update not required) */

> > +	struct rte_mbuf **pkts = p->soft.def.pkts;

> > +	uint16_t nb_tx_queues = dev->data->nb_tx_queues;

> > +

> > +	/* Persistent context: Read - Write (update required) */

> > +	uint32_t txq_pos = p->soft.def.txq_pos;

> > +	uint32_t pkts_len = p->soft.def.pkts_len;

> > +	uint32_t flush_count = p->soft.def.flush_count;

> > +

> > +	/* Not part of the persistent context */

> > +	uint32_t pos;

> > +	uint16_t i;

> > +

> > +	/* Soft device TXQ read, Hard device TXQ write */

> > +	for (i = 0; i < nb_tx_queues; i++) {

> > +		struct rte_ring *txq = dev->data->tx_queues[txq_pos];

> > +

> > +		/* Read soft device TXQ burst to packet enqueue buffer */

> > +		pkts_len += rte_ring_sc_dequeue_burst(txq,

> > +			(void **) &pkts[pkts_len],

> > +			DEFAULT_BURST_SIZE,

> > +			NULL);

> > +

> > +		/* Increment soft device TXQ */

> > +		txq_pos++;

> > +		if (txq_pos >= nb_tx_queues)

> > +			txq_pos = 0;

> > +

> > +		/* Hard device TXQ write when complete burst is available */

> > +		if (pkts_len >= DEFAULT_BURST_SIZE) {

> 

> There questions:

> 1- When there are multiple tx_queues of softnic, and assume all will be

> processed by a core, this core will be reading from all into single HW queue,

> won' this create a bottle neck?


I am not sure if I understand correctly. As per QoS sched library implementation, the number of tx queues of the softnic depend upon the number of users sending their traffic and configurable via one of the input
argument for device creation. There doesn't exist any mapping between the softnic tx queues and hard device tx queues.  The softnic device receives the packets in its scheduling queues (tx queues) and prioritizes their transmission
and transmit them accordingly to specific queue of the hard device(can be specified as an input argument). It would be redundant for thread implementing the QoS scheduler to distribute the packets among the hard device tx queues which actually doesn't serve any purpose.
 
> 2- This logic reads from all queues as BURST_SIZE and merges them, if

> queues split with a RSS or similar, that clasiffication will be lost, will it be

> problem?


I don't think so. The QoS scheduler sits on the tx side just before the transmission stage and receives the packet burst destined for the specific network interface to which it is attached.
Thus, it schedules the packets egressing through the specific port instead of merging the packets going to different interfaces.
 
> 3- If there is not enough packets in the queues ( < DEFAULT_BURST_SIZE)

> those packets won't be transmitted unless more is comming, will this create

> latency for those cases?


In case of low traffic rate situation, packets will be automatically flushed at specific interval as discussed below.

> 

> > +			for (pos = 0; pos < pkts_len; )

> > +				pos += rte_eth_tx_burst(p->hard.port_id,

> > +					p->params.hard.tx_queue_id,

> > +					&pkts[pos],

> > +					(uint16_t) (pkts_len - pos));

> > +

> > +			pkts_len = 0;

> > +			flush_count = 0;

> > +			break;

> > +		}

> > +	}

> > +

> > +	if (flush_count >= FLUSH_COUNT_THRESHOLD) {

> 

> FLUSH_COUNT_THRESHOLD is (1 << 17), and if no packet is sent, flash count

> incremented by one, just want to confirm the treshold value?

> 

> And why this flush exists?


Flush mechanism comes in play when traffic rate is very low. In such instance, packet flush will be triggered once threshold value is satisfied. For example,  cpu core spining at 2.0 GHz, as per current
setting, the packet flush will happen at ~65us interval in case of packet burst size is less than set value.

> > +		for (pos = 0; pos < pkts_len; )

> > +			pos += rte_eth_tx_burst(p->hard.port_id,

> > +				p->params.hard.tx_queue_id,

> > +				&pkts[pos],

> > +				(uint16_t) (pkts_len - pos));

> > +

> > +		pkts_len = 0;

> > +		flush_count = 0;

> > +	}

> > +

> > +	p->soft.def.txq_pos = txq_pos;

> > +	p->soft.def.pkts_len = pkts_len;

> > +	p->soft.def.flush_count = flush_count + 1;

> > +

> > +	return 0;

> > +}

> > +

> > +int

> > +rte_pmd_softnic_run(uint8_t port_id)

> > +{

> > +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];

> 

> It can be possible to create a macro for this.


Ok. Will do.
 
> <...>

> 

> > +static int

> > +default_init(struct pmd_internals *p,

> 

> default_mbufs_init()? default_init() on its own in not that clear.

> 

> <...>

> 

> > +static void

> > +default_free(struct pmd_internals *p)

> 

> default_mbufs_free()?


The generic  name is chosen if we initialize and free more parameters than mbufs.
 
> <...>

> 

> > +static void *

> > +pmd_init(struct pmd_params *params, int numa_node) {

> > +	struct pmd_internals *p;

> > +	int status;

> > +

> > +	p = rte_zmalloc_socket(params->soft.name,

> > +		sizeof(struct pmd_internals),

> > +		0,

> > +		numa_node);

> > +	if (p == NULL)

> > +		return NULL;

> > +

> > +	memcpy(&p->params, params, sizeof(p->params));

> > +	rte_eth_dev_get_port_by_name(params->hard.name, &p-

> >hard.port_id);

> 

> You may want to check return value of this.


Will add check.
 
> > +

> > +	/* Default */

> > +	status = default_init(p, params, numa_node);

> > +	if (status) {

> > +		rte_free(p);

> > +		return NULL;

> > +	}

> > +

> > +	return p;

> > +}

> > +

> > +static void

> > +pmd_free(struct pmd_internals *p)

> > +{

> > +	default_free(p);

> 

> p->hard.name also needs to be freed here.


No, we don't allocate any memory to this varibale as it points to the value retrieved from the rte_eth_dev_get_port_by_name();
 
> > +

> > +	rte_free(p);

> > +}

> > +

> > +static int

> > +pmd_ethdev_register(struct rte_vdev_device *vdev,

> > +	struct pmd_params *params,

> > +	void *dev_private)

> > +{

> > +	struct rte_eth_dev_info hard_info;

> > +	struct rte_eth_dev *soft_dev;

> > +	struct rte_eth_dev_data *soft_data;

> > +	uint32_t hard_speed;

> > +	int numa_node;

> > +	uint8_t hard_port_id;

> > +

> > +	rte_eth_dev_get_port_by_name(params->hard.name,

> &hard_port_id);

> > +	rte_eth_dev_info_get(hard_port_id, &hard_info);

> > +	hard_speed = eth_dev_speed_max_mbps(hard_info.speed_capa);

> > +	numa_node = rte_eth_dev_socket_id(hard_port_id);

> > +

> > +	/* Memory allocation */

> > +	soft_data = rte_zmalloc_socket(params->soft.name,

> > +		sizeof(*soft_data), 0, numa_node);

> > +	if (!soft_data)

> > +		return -ENOMEM;

> > +

> > +	/* Ethdev entry allocation */

> > +	soft_dev = rte_eth_dev_allocate(params->soft.name);

> > +	if (!soft_dev) {

> > +		rte_free(soft_data);

> > +		return -ENOMEM;

> > +	}

> > +

> > +	/* Connect dev->data */

> > +	memmove(soft_data->name,

> > +		soft_dev->data->name,

> > +		sizeof(soft_data->name));

> 

> I guess this is redundant here, allocating soft_data and rest, it is possible to

> use soft_dev->data directly.


Yes,  will correct this.
 
> > +	soft_data->port_id = soft_dev->data->port_id;

> > +	soft_data->mtu = soft_dev->data->mtu;

> > +	soft_dev->data = soft_data;

> > +

> > +	/* dev */

> > +	soft_dev->rx_pkt_burst = (params->soft.intrusive) ?

> > +		NULL : /* set up later */

> > +		pmd_rx_pkt_burst;

> > +	soft_dev->tx_pkt_burst = pmd_tx_pkt_burst;

> > +	soft_dev->tx_pkt_prepare = NULL;

> > +	soft_dev->dev_ops = &pmd_ops;

> > +	soft_dev->device = &vdev->device;

> > +

> > +	/* dev->data */

> > +	soft_dev->data->dev_private = dev_private;

> > +	soft_dev->data->dev_link.link_speed = hard_speed;

> > +	soft_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;

> > +	soft_dev->data->dev_link.link_autoneg = ETH_LINK_SPEED_FIXED;

> > +	soft_dev->data->dev_link.link_status = ETH_LINK_DOWN;

> 

> For simplity, it is possible to have a static struct rte_eth_link, and assing it to

> data->dev_link, as done in null pmd.


The device speed is determined from that of hard device, so thought to assign value explicitly here.
 
> > +	soft_dev->data->mac_addrs = &eth_addr;

> > +	soft_dev->data->promiscuous = 1;

> > +	soft_dev->data->kdrv = RTE_KDRV_NONE;

> > +	soft_dev->data->numa_node = numa_node;

> 

> If pmd is detachable, need following flag:

> data->dev_flags = RTE_ETH_DEV_DETACHABLE;


Ok. Will do that.
 
> > +

> > +	return 0;

> > +}

> > +

> 

> <...>

> 

> > +static int

> > +pmd_probe(struct rte_vdev_device *vdev) {

> > +	struct pmd_params p;

> > +	const char *params;

> > +	int status;

> > +

> > +	struct rte_eth_dev_info hard_info;

> > +	uint8_t hard_port_id;

> > +	int numa_node;

> > +	void *dev_private;

> > +

> > +	if (!vdev)

> > +		return -EINVAL;

> 

> This check is not required, eal won't call this function with NULL vdev.


Ok. Will correct this.
 
> <...>

> 

> > diff --git a/drivers/net/softnic/rte_eth_softnic.h

> > b/drivers/net/softnic/rte_eth_softnic.h

> <...>

> > +int

> > +rte_pmd_softnic_run(uint8_t port_id);

> 

> Since this is public API, this needs to be commented properly, with doxygen

> comment.

>

> Btw, since there is API in this PMD perhaps api documentation also needs to

> be updated to include this.


Yes, will add documentation.
> <...>
  
Ferruh Yigit Sept. 8, 2017, 9:48 a.m. UTC | #3
On 9/8/2017 10:30 AM, Singh, Jasvinder wrote:
> Hi Ferruh,
> 
> Thank you for the review and feedback. Please see inline response;
> 
>> -----Original Message-----
>> From: Yigit, Ferruh
>> Sent: Tuesday, September 5, 2017 3:53 PM
>> To: Singh, Jasvinder <jasvinder.singh@intel.com>; dev@dpdk.org
>> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;
>> thomas@monjalon.net
>> Subject: Re: [PATCH v3 1/4] net/softnic: add softnic PMD
>>
>> On 8/11/2017 1:49 PM, Jasvinder Singh wrote:
>>> Add SoftNIC PMD to provide SW fall-back for ethdev APIs.
>>>
>>> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
>>> Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>

<...>

>>> +
>>> +	/* Default */
>>> +	status = default_init(p, params, numa_node);
>>> +	if (status) {
>>> +		rte_free(p);
>>> +		return NULL;
>>> +	}
>>> +
>>> +	return p;
>>> +}
>>> +
>>> +static void
>>> +pmd_free(struct pmd_internals *p)
>>> +{
>>> +	default_free(p);
>>
>> p->hard.name also needs to be freed here.
> 
> No, we don't allocate any memory to this varibale as it points to the value retrieved from the rte_eth_dev_get_port_by_name();

I guess it is otherway around, the rte_eth_dev_get_port_by_name() uses
hard.name to get and store the port_id of the underlying hw.

how hard.name set, if I don't miss anything, it is strdup from devargs:

--
ret = rte_kvargs_process(kvlist, PMD_PARAM_HARD_NAME, &get_string,
&p->hard.name);
--
get_string()
	*(char **)extra_args = strdup(value);
--
  
Jasvinder Singh Sept. 8, 2017, 10:42 a.m. UTC | #4
> -----Original Message-----

> From: Yigit, Ferruh

> Sent: Friday, September 8, 2017 10:49 AM

> To: Singh, Jasvinder <jasvinder.singh@intel.com>; dev@dpdk.org

> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;

> thomas@monjalon.net

> Subject: Re: [PATCH v3 1/4] net/softnic: add softnic PMD

> 

> On 9/8/2017 10:30 AM, Singh, Jasvinder wrote:

> > Hi Ferruh,

> >

> > Thank you for the review and feedback. Please see inline response;

> >

> >> -----Original Message-----

> >> From: Yigit, Ferruh

> >> Sent: Tuesday, September 5, 2017 3:53 PM

> >> To: Singh, Jasvinder <jasvinder.singh@intel.com>; dev@dpdk.org

> >> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>;

> >> thomas@monjalon.net

> >> Subject: Re: [PATCH v3 1/4] net/softnic: add softnic PMD

> >>

> >> On 8/11/2017 1:49 PM, Jasvinder Singh wrote:

> >>> Add SoftNIC PMD to provide SW fall-back for ethdev APIs.

> >>>

> >>> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

> >>> Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>

> 

> <...>

> 

> >>> +

> >>> +	/* Default */

> >>> +	status = default_init(p, params, numa_node);

> >>> +	if (status) {

> >>> +		rte_free(p);

> >>> +		return NULL;

> >>> +	}

> >>> +

> >>> +	return p;

> >>> +}

> >>> +

> >>> +static void

> >>> +pmd_free(struct pmd_internals *p)

> >>> +{

> >>> +	default_free(p);

> >>

> >> p->hard.name also needs to be freed here.

> >

> > No, we don't allocate any memory to this varibale as it points to the

> > value retrieved from the rte_eth_dev_get_port_by_name();

> 

> I guess it is otherway around, the rte_eth_dev_get_port_by_name() uses

> hard.name to get and store the port_id of the underlying hw.

> 

> how hard.name set, if I don't miss anything, it is strdup from devargs:

> 

> --

> ret = rte_kvargs_process(kvlist, PMD_PARAM_HARD_NAME, &get_string,

> &p->hard.name);

> --

> get_string()

> 	*(char **)extra_args = strdup(value);

> --


Yes, it is set using above, will  correct that. Thanks.
  
Jasvinder Singh Sept. 18, 2017, 9:10 a.m. UTC | #5
The SoftNIC PMD is intended to provide SW fall-back options for specific
ethdev APIs in a generic way to the NICs not supporting those features.

Currently, the only implemented ethdev API is Traffic Management (TM),
but other ethdev APIs such as rte_flow, traffic metering & policing, etc
can be easily implemented.

Overview:
* Generic: The SoftNIC PMD works with any "hard" PMD that implements the
  ethdev API. It does not change the "hard" PMD in any way.
* Creation: For any given "hard" ethdev port, the user can decide to
  create an associated "soft" ethdev port to drive the "hard" port. The
  "soft" port is a virtual device that can be created at app start-up
  through EAL vdev arg or later through the virtual device API.
* Configuration: The app explicitly decides which features are to be
  enabled on the "soft" port and which features are still to be used from
  the "hard" port. The app continues to explicitly configure both the
  "hard" and the "soft" ports after the creation of the "soft" port.
* RX/TX: The app reads packets from/writes packets to the "soft" port
  instead of the "hard" port. The RX and TX queues of the "soft" port are
  thread safe, as any ethdev.
* Execution: The "soft" port is a feature-rich NIC implemented by the CPU,
  so the run function of the "soft" port has to be executed by the CPU in
  order to get packets moving between "hard" port and the app.
* Meets the NFV vision: The app should be (almost) agnostic about the NIC
  implementation (different vendors/models, HW-SW mix), the app should not
  require changes to use different NICs, the app should use the same API
  for all NICs. If a NIC does not implement a specific feature, the HW
  should be augmented with SW to meet the functionality while still
  preserving the same API.

Traffic Management SW fall-back overview:
* Implements the ethdev traffic management API (rte_tm.h).
* Based on the existing librte_sched DPDK library.

Example: Create "soft" port for "hard" port "0000:04:00.1", enable the TM
feature with default settings:
          --vdev 'net_softnic0,hard_name=0000:04:00.1,soft_tm=on' 

Q1: Why generic name, if only TM is supported (for now)?
A1: The intention is to have SoftNIC PMD implement many other (all?)
    ethdev APIs under a single "ideal" ethdev, hence the generic name.
    The initial motivation is TM API, but the mechanism is generic and can
    be used for many other ethdev APIs. Somebody looking to provide SW
    fall-back for other ethdev API is likely to end up inventing the same,
    hence it would be good to consolidate all under a single PMD and have
    the user explicitly enable/disable the features it needs for each
    "soft" device.

Q2: Are there any performance requirements for SoftNIC?
A2: Yes, performance should be great/decent for every feature, otherwise
    the SW fall-back is unusable, thus useless.

Q3: Why not change the "hard" device (and keep a single device) instead of
    creating a new "soft" device (and thus having two devices)?
A3: This is not possible with the current librte_ether ethdev
    implementation. The ethdev->dev_ops are defined as constant structure,
    so it cannot be changed per device (nor per PMD). The new ops also
    need memory space to store their context data structures, which
    requires updating the ethdev->data->dev_private of the existing
    device; at best, maybe a resize of ethdev->data->dev_private could be
    done, assuming that librte_ether will introduce a way to find out its
    size, but this cannot be done while device is running. Other side
    effects might exist, as the changes are very intrusive, plus it likely
    needs more changes in librte_ether.

Q4: Why not call the SW fall-back dev_ops directly in librte_ether for
    devices which do not support the specific feature? If the device
    supports the capability, let's call its dev_ops, otherwise call the
    SW fall-back dev_ops.
A4: First, similar reasons to Q&A3. This fixes the need to change
    ethdev->dev_ops of the device, but it does not do anything to fix the
    other significant issue of where to store the context data structures
    needed by the SW fall-back functions (which, in this approach, are
    called implicitly by librte_ether).
    Second, the SW fall-back options should not be restricted arbitrarily
    by the librte_ether library, the decision should belong to the app.
    For example, the TM SW fall-back should not be limited to only
    librte_sched, which (like any SW fall-back) is limited to a specific
    hierarchy and feature set, it cannot do any possible hierarchy. If
    alternatives exist, the one to use should be picked by the app, not by
    the ethdev layer.

Q5: Why is the app required to continue to configure both the "hard" and
    the "soft" devices even after the "soft" device has been created? Why
    not hiding the "hard" device under the "soft" device and have the
    "soft" device configure the "hard" device under the hood?
A5: This was the approach tried in the V2 of this patch set (overlay
    "soft" device taking over the configuration of the underlay "hard"
    device) and eventually dropped due to increased complexity of having
    to keep the configuration of two distinct devices in sync with
    librte_ether implementation that is not friendly towards such
    approach. Basically, each ethdev API call for the overlay device
    needs to configure the overlay device, invoke the same configuration
    with possibly modified parameters for the underlay device, then resume
    the configuration of overlay device, turning this into a device
    emulation project.
    V2 minuses: increased complexity (deal with two devices at same time);
    need to implement every ethdev API, even those not needed for the scope
    of SW fall-back; intrusive; sometimes have to silently take decisions
    that should be left to the app.
    V3 pluses: lower complexity (only one device); only need to implement
    those APIs that are in scope of the SW fall-back; non-intrusive (deal
    with "hard" device through ethdev API); app decisions taken by the app
    in an explicit way.

Q6: Why expose the SW fall-back in a PMD and not in a SW library?
A6: The SW fall-back for an ethdev API has to implement that specific
    ethdev API, (hence expose an ethdev object through a PMD), as opposed
    to providing a different API. This approach allows the app to use the
    same API (NFV vision). For example, we already have a library for TM
    SW fall-back (librte_sched) that can be called directly by the apps
    that need to call it outside of ethdev context (use-cases exist), but
    an app that works with TM-aware NICs through the ethdev TM API would
    have to be changed significantly in order to work with different
    TM-agnostic NICs through the librte_sched API.

Q7: Why have all the SW fall-backs in a single PMD? Why not develop
    the SW fall-back for each different ethdev API in a separate PMD, then
    create a chain of "soft" devices for each "hard" device? Potentially,
    this results in smaller size PMDs that are easier to maintain.
A7: Arguments for single ethdev/PMD and against chain of ethdevs/PMDs:
    1. All the existing PMDs for HW NICs implement a lot of features under
       the same PMD, so there is no reason for single PMD approach to break
       code modularity. See the V3 code, a lot of care has been taken for
       code modularity.
    2. We should avoid the proliferation of SW PMDs.
    3. A single device should be handled by a single PMD.
    4. People are used with feature-rich PMDs, not with single-feature
       PMDs, so we change of mindset?
    5. [Configuration nightmare] A chain of "soft" devices attached to
       single "hard" device requires the app to be aware that the N "soft"
       devices in the chain plus the "hard" device refer to the same HW
       device, and which device should be invoked to configure which
       feature. Also the length of the chain and functionality of each
       link is different for each HW device. This breaks the requirement
       of preserving the same API while working with different NICs (NFV).
       This most likely results in a configuration nightmare, nobody is
       going to seriously use this.
    6. [Feature inter-dependecy] Sometimes different features need to be
       configured and executed together (e.g. share the same set of
       resources, are inter-dependent, etc), so it is better and more
       performant to do them in the same ethdev/PMD.
    7. [Code duplication] There is a lot of duplication in the
       configuration code for the chain of ethdevs approach. The ethdev
       dev_configure, rx_queue_setup, tx_queue_setup API functions have to
       be implemented per device, and they become meaningless/inconsistent
       with the chain approach.
    8. [Data structure duplication] The per device data structures have to
       be duplicated and read repeatedly for each "soft" ethdev. The
       ethdev device, dev_private, data, per RX/TX queue data structures
       have to be replicated per "soft" device. They have to be re-read for
       each stage, so the same cache misses are now multiplied with the
       number of stages in the chain.
    9. [rte_ring proliferation] Thread safety requirements for ethdev
       RX/TXqueues require an rte_ring to be used for every RX/TX queue
       of each "soft" ethdev. This rte_ring proliferation unnecessarily
       increases the memory footprint and lowers performance, especially
       when each "soft" ethdev ends up on a different CPU core (ping-pong
       of cache lines).
    10.[Meta-data proliferation] A chain of ethdevs is likely to result
       in proliferation of meta-data that has to be passed between the
       ethdevs (e.g. policing needs the output of flow classification),
       which results in more cache line ping-pong between cores, hence
       performance drops.

Cristian Dumitrescu (4):
Jasvinder Singh (4):
  net/softnic: add softnic PMD
  net/softnic: add traffic management support
  net/softnic: add TM capabilities ops
  net/softnic: add TM hierarchy related  ops

 MAINTAINERS                                        |    5 +
 config/common_base                                 |    5 +
 doc/api/doxy-api-index.md                          |    3 +-
 doc/api/doxy-api.conf                              |    1 +
 doc/guides/rel_notes/release_17_11.rst             |    6 +
 drivers/net/Makefile                               |    5 +
 drivers/net/softnic/Makefile                       |   57 +
 drivers/net/softnic/rte_eth_softnic.c              |  853 +++++
 drivers/net/softnic/rte_eth_softnic.h              |   83 +
 drivers/net/softnic/rte_eth_softnic_internals.h    |  291 ++
 drivers/net/softnic/rte_eth_softnic_tm.c           | 3449 ++++++++++++++++++++
 .../net/softnic/rte_pmd_eth_softnic_version.map    |    7 +
 mk/rte.app.mk                                      |    5 +-
 13 files changed, 4768 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/softnic/Makefile
 create mode 100644 drivers/net/softnic/rte_eth_softnic.c
 create mode 100644 drivers/net/softnic/rte_eth_softnic.h
 create mode 100644 drivers/net/softnic/rte_eth_softnic_internals.h
 create mode 100644 drivers/net/softnic/rte_eth_softnic_tm.c
 create mode 100644 drivers/net/softnic/rte_pmd_eth_softnic_version.map
  
Jasvinder Singh Sept. 18, 2017, 4:58 p.m. UTC | #6
Hi Thomas,

I don't see this patch in patchwork, although it is present in email archive. Any guess why it is not showing up there?

Thank you,
Jasvinder

> -----Original Message-----

> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Jasvinder Singh

> Sent: Monday, September 18, 2017 10:10 AM

> To: dev@dpdk.org

> Cc: Dumitrescu, Cristian <cristian.dumitrescu@intel.com>; Yigit, Ferruh

> <ferruh.yigit@intel.com>; thomas@monjalon.net

> Subject: [dpdk-dev] [PATCH v4 1/4] net/softnic: add softnic PMD

> 

> Add SoftNIC PMD to provide SW fall-back for ethdev APIs.

> 

> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

> Signed-off-by: Jasvinder Singh <jasvinder.singh@intel.com>

> ---

> v4 changes:

> - Implemented feedback from Ferruh [1]

>  - rename map file to rte_pmd_eth_softnic_version.map

>  - add release notes library version info

>  - doxygen: fix hooks in doc/api/doxy-api-index.md

>  - add doxygen comment for rte_pmd_softnic_run()

>  - free device name memory

>  - remove soft_dev param in pmd_ethdev_register()

>  - fix checkpatch warnings

> 

> v3 changes:

> - rebase to dpdk17.08 release

> 

> v2 changes:

> - fix build errors

> - rebased to TM APIs v6 plus dpdk master

> 

> [1] Ferruh s feedback on v3: http://dpdk.org/ml/archives/dev/2017-

> September/074576.html

> 

>  MAINTAINERS                                        |   5 +

>  config/common_base                                 |   5 +

>  doc/api/doxy-api-index.md                          |   3 +-

>  doc/api/doxy-api.conf                              |   1 +

>  doc/guides/rel_notes/release_17_11.rst             |   6 +

>  drivers/net/Makefile                               |   5 +

>  drivers/net/softnic/Makefile                       |  56 ++

>  drivers/net/softnic/rte_eth_softnic.c              | 595 +++++++++++++++++++++

>  drivers/net/softnic/rte_eth_softnic.h              |  67 +++

>  drivers/net/softnic/rte_eth_softnic_internals.h    | 114 ++++

>  .../net/softnic/rte_pmd_eth_softnic_version.map    |   7 +

>  mk/rte.app.mk                                      |   5 +-

>  12 files changed, 867 insertions(+), 2 deletions(-)  create mode 100644

> drivers/net/softnic/Makefile  create mode 100644

> drivers/net/softnic/rte_eth_softnic.c

>  create mode 100644 drivers/net/softnic/rte_eth_softnic.h

>  create mode 100644 drivers/net/softnic/rte_eth_softnic_internals.h

>  create mode 100644 drivers/net/softnic/rte_pmd_eth_softnic_version.map

> 

> diff --git a/MAINTAINERS b/MAINTAINERS

> index a0cd75e..b6b738d 100644

> --- a/MAINTAINERS

> +++ b/MAINTAINERS

> @@ -511,6 +511,11 @@ M: Gaetan Rivet <gaetan.rivet@6wind.com>

>  F: drivers/net/failsafe/

>  F: doc/guides/nics/fail_safe.rst

> 

> +Softnic PMD

> +M: Jasvinder Singh <jasvinder.singh@intel.com>

> +M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

> +F: drivers/net/softnic

> +

> 

>  Crypto Drivers

>  --------------

> diff --git a/config/common_base b/config/common_base index

> 5e97a08..1a0c77d 100644

> --- a/config/common_base

> +++ b/config/common_base

> @@ -273,6 +273,11 @@ CONFIG_RTE_LIBRTE_SFC_EFX_PMD=y

> CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n

> 

>  #

> +# Compile SOFTNIC PMD

> +#

> +CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y

> +

> +#

>  # Compile software PMD backed by SZEDATA2 device  #

> CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n diff --git a/doc/api/doxy-api-

> index.md b/doc/api/doxy-api-index.md index 19e0d4f..626ab51 100644

> --- a/doc/api/doxy-api-index.md

> +++ b/doc/api/doxy-api-index.md

> @@ -55,7 +55,8 @@ The public API headers are grouped by topics:

>    [KNI]                (@ref rte_kni.h),

>    [ixgbe]              (@ref rte_pmd_ixgbe.h),

>    [i40e]               (@ref rte_pmd_i40e.h),

> -  [crypto_scheduler]   (@ref rte_cryptodev_scheduler.h)

> +  [crypto_scheduler]   (@ref rte_cryptodev_scheduler.h),

> +  [softnic]            (@ref rte_eth_softnic.h)

> 

>  - **memory**:

>    [memseg]             (@ref rte_memory.h),

> diff --git a/doc/api/doxy-api.conf b/doc/api/doxy-api.conf index

> 823554f..b27755d 100644

> --- a/doc/api/doxy-api.conf

> +++ b/doc/api/doxy-api.conf

> @@ -32,6 +32,7 @@ PROJECT_NAME            = DPDK

>  INPUT                   = doc/api/doxy-api-index.md \

>                            drivers/crypto/scheduler \

>                            drivers/net/bonding \

> +                          drivers/net/softnic \

>                            drivers/net/i40e \

>                            drivers/net/ixgbe \

>                            lib/librte_eal/common/include \ diff --git

> a/doc/guides/rel_notes/release_17_11.rst

> b/doc/guides/rel_notes/release_17_11.rst

> index 170f4f9..d5a760b 100644

> --- a/doc/guides/rel_notes/release_17_11.rst

> +++ b/doc/guides/rel_notes/release_17_11.rst

> @@ -41,6 +41,11 @@ New Features

>       Also, make sure to start the actual text at the margin.

>       =========================================================

> 

> +* **Added SoftNIC PMD.**

> +

> +  Added new SoftNIC PMD. This virtual device offers applications a

> + software  fallback support for traffic management.

> +

> 

>  Resolved Issues

>  ---------------

> @@ -170,6 +175,7 @@ The libraries prepended with a plus sign were

> incremented in this version.

>       librte_pipeline.so.3

>       librte_pmd_bond.so.1

>       librte_pmd_ring.so.2

> +   + librte_pmd_softnic.so.1

>       librte_port.so.3

>       librte_power.so.1

>       librte_reorder.so.1

> diff --git a/drivers/net/Makefile b/drivers/net/Makefile index

> d33c959..b552a51 100644

> --- a/drivers/net/Makefile

> +++ b/drivers/net/Makefile

> @@ -110,4 +110,9 @@ DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost

> endif # $(CONFIG_RTE_LIBRTE_VHOST)  DEPDIRS-vhost = $(core-libs)

> librte_vhost

> 

> +ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)

> +DIRS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += softnic endif #

> +$(CONFIG_RTE_LIBRTE_SCHED) DEPDIRS-softnic = $(core-libs) librte_sched

> +

>  include $(RTE_SDK)/mk/rte.subdir.mk

> diff --git a/drivers/net/softnic/Makefile b/drivers/net/softnic/Makefile new

> file mode 100644 index 0000000..c2f42ef

> --- /dev/null

> +++ b/drivers/net/softnic/Makefile

> @@ -0,0 +1,56 @@

> +#   BSD LICENSE

> +#

> +#   Copyright(c) 2017 Intel Corporation. All rights reserved.

> +#   All rights reserved.

> +#

> +#   Redistribution and use in source and binary forms, with or without

> +#   modification, are permitted provided that the following conditions

> +#   are met:

> +#

> +#     * Redistributions of source code must retain the above copyright

> +#       notice, this list of conditions and the following disclaimer.

> +#     * Redistributions in binary form must reproduce the above copyright

> +#       notice, this list of conditions and the following disclaimer in

> +#       the documentation and/or other materials provided with the

> +#       distribution.

> +#     * Neither the name of Intel Corporation nor the names of its

> +#       contributors may be used to endorse or promote products derived

> +#       from this software without specific prior written permission.

> +#

> +#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND

> CONTRIBUTORS

> +#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT

> NOT

> +#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

> FITNESS FOR

> +#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

> COPYRIGHT

> +#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

> INCIDENTAL,

> +#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

> NOT

> +#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS

> OF USE,

> +#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED

> AND ON ANY

> +#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR

> TORT

> +#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF

> THE USE

> +#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH

> DAMAGE.

> +

> +include $(RTE_SDK)/mk/rte.vars.mk

> +

> +#

> +# library name

> +#

> +LIB = librte_pmd_softnic.a

> +

> +CFLAGS += -O3

> +CFLAGS += $(WERROR_FLAGS)

> +

> +EXPORT_MAP := rte_pmd_eth_softnic_version.map

> +

> +LIBABIVER := 1

> +

> +#

> +# all source are stored in SRCS-y

> +#

> +SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic.c

> +

> +#

> +# Export include files

> +#

> +SYMLINK-y-include += rte_eth_softnic.h

> +

> +include $(RTE_SDK)/mk/rte.lib.mk

> diff --git a/drivers/net/softnic/rte_eth_softnic.c

> b/drivers/net/softnic/rte_eth_softnic.c

> new file mode 100644

> index 0000000..792e7ea

> --- /dev/null

> +++ b/drivers/net/softnic/rte_eth_softnic.c

> @@ -0,0 +1,595 @@

> +/*-

> + *   BSD LICENSE

> + *

> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.

> + *   All rights reserved.

> + *

> + *   Redistribution and use in source and binary forms, with or without

> + *   modification, are permitted provided that the following conditions

> + *   are met:

> + *

> + *     * Redistributions of source code must retain the above copyright

> + *       notice, this list of conditions and the following disclaimer.

> + *     * Redistributions in binary form must reproduce the above copyright

> + *       notice, this list of conditions and the following disclaimer in

> + *       the documentation and/or other materials provided with the

> + *       distribution.

> + *     * Neither the name of Intel Corporation nor the names of its

> + *       contributors may be used to endorse or promote products derived

> + *       from this software without specific prior written permission.

> + *

> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND

> CONTRIBUTORS

> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT

> NOT

> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

> FITNESS FOR

> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

> COPYRIGHT

> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

> INCIDENTAL,

> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

> NOT

> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS

> OF USE,

> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED

> AND ON ANY

> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR

> TORT

> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF

> THE USE

> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH

> DAMAGE.

> + */

> +

> +#include <stdint.h>

> +#include <stdlib.h>

> +#include <string.h>

> +

> +#include <rte_ethdev.h>

> +#include <rte_ethdev_vdev.h>

> +#include <rte_malloc.h>

> +#include <rte_vdev.h>

> +#include <rte_kvargs.h>

> +#include <rte_errno.h>

> +#include <rte_ring.h>

> +

> +#include "rte_eth_softnic.h"

> +#include "rte_eth_softnic_internals.h"

> +

> +#define PRIV_TO_HARD_DEV(p)					\

> +	(&rte_eth_devices[p->hard.port_id])

> +

> +#define PMD_PARAM_HARD_NAME

> 	"hard_name"

> +#define PMD_PARAM_HARD_TX_QUEUE_ID

> 	"hard_tx_queue_id"

> +

> +static const char *pmd_valid_args[] = {

> +	PMD_PARAM_HARD_NAME,

> +	PMD_PARAM_HARD_TX_QUEUE_ID,

> +	NULL

> +};

> +

> +static const struct rte_eth_dev_info pmd_dev_info = {

> +	.min_rx_bufsize = 0,

> +	.max_rx_pktlen = UINT32_MAX,

> +	.max_rx_queues = UINT16_MAX,

> +	.max_tx_queues = UINT16_MAX,

> +	.rx_desc_lim = {

> +		.nb_max = UINT16_MAX,

> +		.nb_min = 0,

> +		.nb_align = 1,

> +	},

> +	.tx_desc_lim = {

> +		.nb_max = UINT16_MAX,

> +		.nb_min = 0,

> +		.nb_align = 1,

> +	},

> +};

> +

> +static void

> +pmd_dev_infos_get(struct rte_eth_dev *dev __rte_unused,

> +	struct rte_eth_dev_info *dev_info)

> +{

> +	memcpy(dev_info, &pmd_dev_info, sizeof(*dev_info)); }

> +

> +static int

> +pmd_dev_configure(struct rte_eth_dev *dev) {

> +	struct pmd_internals *p = dev->data->dev_private;

> +	struct rte_eth_dev *hard_dev = PRIV_TO_HARD_DEV(p);

> +

> +	if (dev->data->nb_rx_queues > hard_dev->data->nb_rx_queues)

> +		return -1;

> +

> +	if (p->params.hard.tx_queue_id >= hard_dev->data->nb_tx_queues)

> +		return -1;

> +

> +	return 0;

> +}

> +

> +static int

> +pmd_rx_queue_setup(struct rte_eth_dev *dev,

> +	uint16_t rx_queue_id,

> +	uint16_t nb_rx_desc __rte_unused,

> +	unsigned int socket_id,

> +	const struct rte_eth_rxconf *rx_conf __rte_unused,

> +	struct rte_mempool *mb_pool __rte_unused) {

> +	struct pmd_internals *p = dev->data->dev_private;

> +

> +	if (p->params.soft.intrusive == 0) {

> +		struct pmd_rx_queue *rxq;

> +

> +		rxq = rte_zmalloc_socket(p->params.soft.name,

> +			sizeof(struct pmd_rx_queue), 0, socket_id);

> +		if (rxq == NULL)

> +			return -ENOMEM;

> +

> +		rxq->hard.port_id = p->hard.port_id;

> +		rxq->hard.rx_queue_id = rx_queue_id;

> +		dev->data->rx_queues[rx_queue_id] = rxq;

> +	} else {

> +		struct rte_eth_dev *hard_dev = PRIV_TO_HARD_DEV(p);

> +		void *rxq = hard_dev->data->rx_queues[rx_queue_id];

> +

> +		if (rxq == NULL)

> +			return -1;

> +

> +		dev->data->rx_queues[rx_queue_id] = rxq;

> +	}

> +	return 0;

> +}

> +

> +static int

> +pmd_tx_queue_setup(struct rte_eth_dev *dev,

> +	uint16_t tx_queue_id,

> +	uint16_t nb_tx_desc,

> +	unsigned int socket_id,

> +	const struct rte_eth_txconf *tx_conf __rte_unused) {

> +	uint32_t size = RTE_ETH_NAME_MAX_LEN + strlen("_txq") + 4;

> +	char name[size];

> +	struct rte_ring *r;

> +

> +	snprintf(name, sizeof(name), "%s_txq%04x",

> +		dev->data->name, tx_queue_id);

> +	r = rte_ring_create(name, nb_tx_desc, socket_id,

> +		RING_F_SP_ENQ | RING_F_SC_DEQ);

> +	if (r == NULL)

> +		return -1;

> +

> +	dev->data->tx_queues[tx_queue_id] = r;

> +	return 0;

> +}

> +

> +static int

> +pmd_dev_start(struct rte_eth_dev *dev)

> +{

> +	struct pmd_internals *p = dev->data->dev_private;

> +

> +	dev->data->dev_link.link_status = ETH_LINK_UP;

> +

> +	if (p->params.soft.intrusive) {

> +		struct rte_eth_dev *hard_dev = PRIV_TO_HARD_DEV(p);

> +

> +		/* The hard_dev->rx_pkt_burst should be stable by now */

> +		dev->rx_pkt_burst = hard_dev->rx_pkt_burst;

> +	}

> +

> +	return 0;

> +}

> +

> +static void

> +pmd_dev_stop(struct rte_eth_dev *dev)

> +{

> +	dev->data->dev_link.link_status = ETH_LINK_DOWN; }

> +

> +static void

> +pmd_dev_close(struct rte_eth_dev *dev)

> +{

> +	uint32_t i;

> +

> +	/* TX queues */

> +	for (i = 0; i < dev->data->nb_tx_queues; i++)

> +		rte_ring_free((struct rte_ring *)dev->data->tx_queues[i]); }

> +

> +static int

> +pmd_link_update(struct rte_eth_dev *dev __rte_unused,

> +	int wait_to_complete __rte_unused)

> +{

> +	return 0;

> +}

> +

> +static const struct eth_dev_ops pmd_ops = {

> +	.dev_configure = pmd_dev_configure,

> +	.dev_start = pmd_dev_start,

> +	.dev_stop = pmd_dev_stop,

> +	.dev_close = pmd_dev_close,

> +	.link_update = pmd_link_update,

> +	.dev_infos_get = pmd_dev_infos_get,

> +	.rx_queue_setup = pmd_rx_queue_setup,

> +	.tx_queue_setup = pmd_tx_queue_setup,

> +	.tm_ops_get = NULL,

> +};

> +

> +static uint16_t

> +pmd_rx_pkt_burst(void *rxq,

> +	struct rte_mbuf **rx_pkts,

> +	uint16_t nb_pkts)

> +{

> +	struct pmd_rx_queue *rx_queue = rxq;

> +

> +	return rte_eth_rx_burst(rx_queue->hard.port_id,

> +		rx_queue->hard.rx_queue_id,

> +		rx_pkts,

> +		nb_pkts);

> +}

> +

> +static uint16_t

> +pmd_tx_pkt_burst(void *txq,

> +	struct rte_mbuf **tx_pkts,

> +	uint16_t nb_pkts)

> +{

> +	return (uint16_t)rte_ring_enqueue_burst(txq,

> +		(void **)tx_pkts,

> +		nb_pkts,

> +		NULL);

> +}

> +

> +static __rte_always_inline int

> +rte_pmd_softnic_run_default(struct rte_eth_dev *dev) {

> +	struct pmd_internals *p = dev->data->dev_private;

> +

> +	/* Persistent context: Read Only (update not required) */

> +	struct rte_mbuf **pkts = p->soft.def.pkts;

> +	uint16_t nb_tx_queues = dev->data->nb_tx_queues;

> +

> +	/* Persistent context: Read - Write (update required) */

> +	uint32_t txq_pos = p->soft.def.txq_pos;

> +	uint32_t pkts_len = p->soft.def.pkts_len;

> +	uint32_t flush_count = p->soft.def.flush_count;

> +

> +	/* Not part of the persistent context */

> +	uint32_t pos;

> +	uint16_t i;

> +

> +	/* Soft device TXQ read, Hard device TXQ write */

> +	for (i = 0; i < nb_tx_queues; i++) {

> +		struct rte_ring *txq = dev->data->tx_queues[txq_pos];

> +

> +		/* Read soft device TXQ burst to packet enqueue buffer */

> +		pkts_len += rte_ring_sc_dequeue_burst(txq,

> +			(void **)&pkts[pkts_len],

> +			DEFAULT_BURST_SIZE,

> +			NULL);

> +

> +		/* Increment soft device TXQ */

> +		txq_pos++;

> +		if (txq_pos >= nb_tx_queues)

> +			txq_pos = 0;

> +

> +		/* Hard device TXQ write when complete burst is available */

> +		if (pkts_len >= DEFAULT_BURST_SIZE) {

> +			for (pos = 0; pos < pkts_len; )

> +				pos += rte_eth_tx_burst(p->hard.port_id,

> +					p->params.hard.tx_queue_id,

> +					&pkts[pos],

> +					(uint16_t)(pkts_len - pos));

> +

> +			pkts_len = 0;

> +			flush_count = 0;

> +			break;

> +		}

> +	}

> +

> +	if (flush_count >= FLUSH_COUNT_THRESHOLD) {

> +		for (pos = 0; pos < pkts_len; )

> +			pos += rte_eth_tx_burst(p->hard.port_id,

> +				p->params.hard.tx_queue_id,

> +				&pkts[pos],

> +				(uint16_t)(pkts_len - pos));

> +

> +		pkts_len = 0;

> +		flush_count = 0;

> +	}

> +

> +	p->soft.def.txq_pos = txq_pos;

> +	p->soft.def.pkts_len = pkts_len;

> +	p->soft.def.flush_count = flush_count + 1;

> +

> +	return 0;

> +}

> +

> +int

> +rte_pmd_softnic_run(uint8_t port_id)

> +{

> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];

> +

> +#ifdef RTE_LIBRTE_ETHDEV_DEBUG

> +	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); #endif

> +

> +	return rte_pmd_softnic_run_default(dev); }

> +

> +static struct ether_addr eth_addr = { .addr_bytes = {0} };

> +

> +static uint32_t

> +eth_dev_speed_max_mbps(uint32_t speed_capa) {

> +	uint32_t rate_mbps[32] = {

> +		ETH_SPEED_NUM_NONE,

> +		ETH_SPEED_NUM_10M,

> +		ETH_SPEED_NUM_10M,

> +		ETH_SPEED_NUM_100M,

> +		ETH_SPEED_NUM_100M,

> +		ETH_SPEED_NUM_1G,

> +		ETH_SPEED_NUM_2_5G,

> +		ETH_SPEED_NUM_5G,

> +		ETH_SPEED_NUM_10G,

> +		ETH_SPEED_NUM_20G,

> +		ETH_SPEED_NUM_25G,

> +		ETH_SPEED_NUM_40G,

> +		ETH_SPEED_NUM_50G,

> +		ETH_SPEED_NUM_56G,

> +		ETH_SPEED_NUM_100G,

> +	};

> +

> +	uint32_t pos = (speed_capa) ? (31 - __builtin_clz(speed_capa)) : 0;

> +	return rate_mbps[pos];

> +}

> +

> +static int

> +default_init(struct pmd_internals *p,

> +	struct pmd_params *params,

> +	int numa_node)

> +{

> +	p->soft.def.pkts = rte_zmalloc_socket(params->soft.name,

> +		2 * DEFAULT_BURST_SIZE * sizeof(struct rte_mbuf *),

> +		0,

> +		numa_node);

> +

> +	if (p->soft.def.pkts == NULL)

> +		return -ENOMEM;

> +

> +	return 0;

> +}

> +

> +static void

> +default_free(struct pmd_internals *p)

> +{

> +	free((void *)p->params.hard.name);

> +	rte_free(p->soft.def.pkts);

> +}

> +

> +static void *

> +pmd_init(struct pmd_params *params, int numa_node) {

> +	struct pmd_internals *p;

> +	int status;

> +

> +	p = rte_zmalloc_socket(params->soft.name,

> +		sizeof(struct pmd_internals),

> +		0,

> +		numa_node);

> +	if (p == NULL)

> +		return NULL;

> +

> +	memcpy(&p->params, params, sizeof(p->params));

> +	status = rte_eth_dev_get_port_by_name(params->hard.name,

> +		&p->hard.port_id);

> +	if (status) {

> +		rte_free(p);

> +		return NULL;

> +	}

> +

> +	/* Default */

> +	status = default_init(p, params, numa_node);

> +	if (status) {

> +		rte_free(p);

> +		return NULL;

> +	}

> +

> +	return p;

> +}

> +

> +static void

> +pmd_free(struct pmd_internals *p)

> +{

> +	default_free(p);

> +

> +	rte_free(p);

> +}

> +

> +static int

> +pmd_ethdev_register(struct rte_vdev_device *vdev,

> +	struct pmd_params *params,

> +	void *dev_private)

> +{

> +	struct rte_eth_dev_info hard_info;

> +	struct rte_eth_dev *soft_dev;

> +	uint32_t hard_speed;

> +	int numa_node;

> +	uint8_t hard_port_id;

> +

> +	rte_eth_dev_get_port_by_name(params->hard.name,

> &hard_port_id);

> +	rte_eth_dev_info_get(hard_port_id, &hard_info);

> +	hard_speed = eth_dev_speed_max_mbps(hard_info.speed_capa);

> +	numa_node = rte_eth_dev_socket_id(hard_port_id);

> +

> +	/* Ethdev entry allocation */

> +	soft_dev = rte_eth_dev_allocate(params->soft.name);

> +	if (!soft_dev)

> +		return -ENOMEM;

> +

> +	/* dev */

> +	soft_dev->rx_pkt_burst = (params->soft.intrusive) ?

> +		NULL : /* set up later */

> +		pmd_rx_pkt_burst;

> +	soft_dev->tx_pkt_burst = pmd_tx_pkt_burst;

> +	soft_dev->tx_pkt_prepare = NULL;

> +	soft_dev->dev_ops = &pmd_ops;

> +	soft_dev->device = &vdev->device;

> +

> +	/* dev->data */

> +	soft_dev->data->dev_private = dev_private;

> +	soft_dev->data->dev_link.link_speed = hard_speed;

> +	soft_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;

> +	soft_dev->data->dev_link.link_autoneg = ETH_LINK_SPEED_FIXED;

> +	soft_dev->data->dev_link.link_status = ETH_LINK_DOWN;

> +	soft_dev->data->mac_addrs = &eth_addr;

> +	soft_dev->data->promiscuous = 1;

> +	soft_dev->data->kdrv = RTE_KDRV_NONE;

> +	soft_dev->data->numa_node = numa_node;

> +	soft_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE;

> +

> +	return 0;

> +}

> +

> +static int

> +get_string(const char *key __rte_unused, const char *value, void

> +*extra_args) {

> +	if (!value || !extra_args)

> +		return -EINVAL;

> +

> +	*(char **)extra_args = strdup(value);

> +

> +	if (!*(char **)extra_args)

> +		return -ENOMEM;

> +

> +	return 0;

> +}

> +

> +static int

> +get_uint32(const char *key __rte_unused, const char *value, void

> +*extra_args) {

> +	if (!value || !extra_args)

> +		return -EINVAL;

> +

> +	*(uint32_t *)extra_args = strtoull(value, NULL, 0);

> +

> +	return 0;

> +}

> +

> +static int

> +pmd_parse_args(struct pmd_params *p, const char *name, const char

> +*params) {

> +	struct rte_kvargs *kvlist;

> +	int ret;

> +

> +	kvlist = rte_kvargs_parse(params, pmd_valid_args);

> +	if (kvlist == NULL)

> +		return -EINVAL;

> +

> +	/* Set default values */

> +	memset(p, 0, sizeof(*p));

> +	p->soft.name = name;

> +	p->soft.intrusive = INTRUSIVE;

> +	p->hard.tx_queue_id = SOFTNIC_HARD_TX_QUEUE_ID;

> +

> +	/* HARD: name (mandatory) */

> +	if (rte_kvargs_count(kvlist, PMD_PARAM_HARD_NAME) == 1) {

> +		ret = rte_kvargs_process(kvlist, PMD_PARAM_HARD_NAME,

> +			&get_string, &p->hard.name);

> +		if (ret < 0)

> +			goto out_free;

> +	} else {

> +		ret = -EINVAL;

> +		goto out_free;

> +	}

> +

> +	/* HARD: tx_queue_id (optional) */

> +	if (rte_kvargs_count(kvlist, PMD_PARAM_HARD_TX_QUEUE_ID) == 1)

> {

> +		ret = rte_kvargs_process(kvlist,

> PMD_PARAM_HARD_TX_QUEUE_ID,

> +			&get_uint32, &p->hard.tx_queue_id);

> +		if (ret < 0)

> +			goto out_free;

> +	}

> +

> +out_free:

> +	rte_kvargs_free(kvlist);

> +	return ret;

> +}

> +

> +static int

> +pmd_probe(struct rte_vdev_device *vdev) {

> +	struct pmd_params p;

> +	const char *params;

> +	int status;

> +

> +	struct rte_eth_dev_info hard_info;

> +	uint8_t hard_port_id;

> +	int numa_node;

> +	void *dev_private;

> +

> +	RTE_LOG(INFO, PMD,

> +		"Probing device \"%s\"\n",

> +		rte_vdev_device_name(vdev));

> +

> +	/* Parse input arguments */

> +	params = rte_vdev_device_args(vdev);

> +	if (!params)

> +		return -EINVAL;

> +

> +	status = pmd_parse_args(&p, rte_vdev_device_name(vdev),

> params);

> +	if (status)

> +		return status;

> +

> +	/* Check input arguments */

> +	if (rte_eth_dev_get_port_by_name(p.hard.name, &hard_port_id))

> +		return -EINVAL;

> +

> +	rte_eth_dev_info_get(hard_port_id, &hard_info);

> +	numa_node = rte_eth_dev_socket_id(hard_port_id);

> +

> +	if (p.hard.tx_queue_id >= hard_info.max_tx_queues)

> +		return -EINVAL;

> +

> +	/* Allocate and initialize soft ethdev private data */

> +	dev_private = pmd_init(&p, numa_node);

> +	if (dev_private == NULL)

> +		return -ENOMEM;

> +

> +	/* Register soft ethdev */

> +	RTE_LOG(INFO, PMD,

> +		"Creating soft ethdev \"%s\" for hard ethdev \"%s\"\n",

> +		p.soft.name, p.hard.name);

> +

> +	status = pmd_ethdev_register(vdev, &p, dev_private);

> +	if (status) {

> +		pmd_free(dev_private);

> +		return status;

> +	}

> +

> +	return 0;

> +}

> +

> +static int

> +pmd_remove(struct rte_vdev_device *vdev) {

> +	struct rte_eth_dev *dev = NULL;

> +	struct pmd_internals *p;

> +

> +	if (!vdev)

> +		return -EINVAL;

> +

> +	RTE_LOG(INFO, PMD, "Removing device \"%s\"\n",

> +		rte_vdev_device_name(vdev));

> +

> +	/* Find the ethdev entry */

> +	dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));

> +	if (dev == NULL)

> +		return -ENODEV;

> +	p = dev->data->dev_private;

> +

> +	/* Free device data structures*/

> +	pmd_free(p);

> +	rte_free(dev->data);

> +	rte_eth_dev_release_port(dev);

> +

> +	return 0;

> +}

> +

> +static struct rte_vdev_driver pmd_softnic_drv = {

> +	.probe = pmd_probe,

> +	.remove = pmd_remove,

> +};

> +

> +RTE_PMD_REGISTER_VDEV(net_softnic, pmd_softnic_drv);

> +RTE_PMD_REGISTER_PARAM_STRING(net_softnic,

> +	PMD_PARAM_HARD_NAME "=<string> "

> +	PMD_PARAM_HARD_TX_QUEUE_ID "=<int>");

> diff --git a/drivers/net/softnic/rte_eth_softnic.h

> b/drivers/net/softnic/rte_eth_softnic.h

> new file mode 100644

> index 0000000..e6996f3

> --- /dev/null

> +++ b/drivers/net/softnic/rte_eth_softnic.h

> @@ -0,0 +1,67 @@

> +/*-

> + *   BSD LICENSE

> + *

> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.

> + *   All rights reserved.

> + *

> + *   Redistribution and use in source and binary forms, with or without

> + *   modification, are permitted provided that the following conditions

> + *   are met:

> + *

> + *     * Redistributions of source code must retain the above copyright

> + *       notice, this list of conditions and the following disclaimer.

> + *     * Redistributions in binary form must reproduce the above copyright

> + *       notice, this list of conditions and the following disclaimer in

> + *       the documentation and/or other materials provided with the

> + *       distribution.

> + *     * Neither the name of Intel Corporation nor the names of its

> + *       contributors may be used to endorse or promote products derived

> + *       from this software without specific prior written permission.

> + *

> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND

> CONTRIBUTORS

> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT

> NOT

> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

> FITNESS FOR

> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

> COPYRIGHT

> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

> INCIDENTAL,

> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

> NOT

> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS

> OF USE,

> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED

> AND ON ANY

> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR

> TORT

> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF

> THE USE

> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH

> DAMAGE.

> + */

> +

> +#ifndef __INCLUDE_RTE_ETH_SOFTNIC_H__

> +#define __INCLUDE_RTE_ETH_SOFTNIC_H__

> +

> +#include <stdint.h>

> +

> +#ifdef __cplusplus

> +extern "C" {

> +#endif

> +

> +#ifndef SOFTNIC_HARD_TX_QUEUE_ID

> +#define SOFTNIC_HARD_TX_QUEUE_ID			0

> +#endif

> +

> +/**

> + * Run the traffic management function on the softnic device

> + *

> + * This function read the packets from the softnic input queues, insert

> +into

> + * QoS scheduler queues based on mbuf sched field value and transmit

> +the

> + * scheduled packets out through the hard device interface.

> + *

> + * @param portid

> + *    port id of the soft device.

> + * @return

> + *    zero.

> + */

> +

> +int

> +rte_pmd_softnic_run(uint8_t port_id);

> +

> +#ifdef __cplusplus

> +}

> +#endif

> +

> +#endif /* __INCLUDE_RTE_ETH_SOFTNIC_H__ */

> diff --git a/drivers/net/softnic/rte_eth_softnic_internals.h

> b/drivers/net/softnic/rte_eth_softnic_internals.h

> new file mode 100644

> index 0000000..96995b5

> --- /dev/null

> +++ b/drivers/net/softnic/rte_eth_softnic_internals.h

> @@ -0,0 +1,114 @@

> +/*-

> + *   BSD LICENSE

> + *

> + *   Copyright(c) 2017 Intel Corporation. All rights reserved.

> + *   All rights reserved.

> + *

> + *   Redistribution and use in source and binary forms, with or without

> + *   modification, are permitted provided that the following conditions

> + *   are met:

> + *

> + *     * Redistributions of source code must retain the above copyright

> + *       notice, this list of conditions and the following disclaimer.

> + *     * Redistributions in binary form must reproduce the above copyright

> + *       notice, this list of conditions and the following disclaimer in

> + *       the documentation and/or other materials provided with the

> + *       distribution.

> + *     * Neither the name of Intel Corporation nor the names of its

> + *       contributors may be used to endorse or promote products derived

> + *       from this software without specific prior written permission.

> + *

> + *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND

> CONTRIBUTORS

> + *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT

> NOT

> + *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND

> FITNESS FOR

> + *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE

> COPYRIGHT

> + *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,

> INCIDENTAL,

> + *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT

> NOT

> + *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS

> OF USE,

> + *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED

> AND ON ANY

> + *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR

> TORT

> + *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF

> THE USE

> + *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH

> DAMAGE.

> + */

> +

> +#ifndef __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__

> +#define __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__

> +

> +#include <stdint.h>

> +

> +#include <rte_mbuf.h>

> +#include <rte_ethdev.h>

> +

> +#include "rte_eth_softnic.h"

> +

> +#ifndef INTRUSIVE

> +#define INTRUSIVE					0

> +#endif

> +

> +struct pmd_params {

> +	/** Parameters for the soft device (to be created) */

> +	struct {

> +		const char *name; /**< Name */

> +		uint32_t flags; /**< Flags */

> +

> +		/** 0 = Access hard device though API only (potentially

> slower,

> +		 *      but safer);

> +		 *  1 = Access hard device private data structures is allowed

> +		 *      (potentially faster).

> +		 */

> +		int intrusive;

> +	} soft;

> +

> +	/** Parameters for the hard device (existing) */

> +	struct {

> +		char *name; /**< Name */

> +		uint16_t tx_queue_id; /**< TX queue ID */

> +	} hard;

> +};

> +

> +/**

> + * Default Internals

> + */

> +

> +#ifndef DEFAULT_BURST_SIZE

> +#define DEFAULT_BURST_SIZE				32

> +#endif

> +

> +#ifndef FLUSH_COUNT_THRESHOLD

> +#define FLUSH_COUNT_THRESHOLD			(1 << 17)

> +#endif

> +

> +struct default_internals {

> +	struct rte_mbuf **pkts;

> +	uint32_t pkts_len;

> +	uint32_t txq_pos;

> +	uint32_t flush_count;

> +};

> +

> +/**

> + * PMD Internals

> + */

> +struct pmd_internals {

> +	/** Params */

> +	struct pmd_params params;

> +

> +	/** Soft device */

> +	struct {

> +		struct default_internals def; /**< Default */

> +	} soft;

> +

> +	/** Hard device */

> +	struct {

> +		uint8_t port_id;

> +	} hard;

> +};

> +

> +struct pmd_rx_queue {

> +	/** Hard device */

> +	struct {

> +		uint8_t port_id;

> +		uint16_t rx_queue_id;

> +	} hard;

> +};

> +

> +#endif /* __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__ */

> diff --git a/drivers/net/softnic/rte_pmd_eth_softnic_version.map

> b/drivers/net/softnic/rte_pmd_eth_softnic_version.map

> new file mode 100644

> index 0000000..fb2cb68

> --- /dev/null

> +++ b/drivers/net/softnic/rte_pmd_eth_softnic_version.map

> @@ -0,0 +1,7 @@

> +DPDK_17.11 {

> +	global:

> +

> +	rte_pmd_softnic_run;

> +

> +	local: *;

> +};

> diff --git a/mk/rte.app.mk b/mk/rte.app.mk index c25fdd9..3dc82fb 100644

> --- a/mk/rte.app.mk

> +++ b/mk/rte.app.mk

> @@ -67,7 +67,6 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)    += -

> lrte_distributor

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG)        += -lrte_ip_frag

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_GRO)            += -lrte_gro

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_METER)          += -lrte_meter

> -_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED)          += -lrte_sched

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM)            += -lrte_lpm

>  # librte_acl needs --whole-archive because of weak functions

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += --whole-archive

> @@ -99,6 +98,7 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_RING)           += -

> lrte_ring

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_EAL)            += -lrte_eal

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_CMDLINE)        += -lrte_cmdline

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_REORDER)        += -lrte_reorder

> +_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED)          += -lrte_sched

> 

>  ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni

> @@ -135,6 +135,9 @@ _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL)       += -

> lrte_pmd_null

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring

> +ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)

> +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC)      += -lrte_pmd_softnic

> +endif

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD)    += -lrte_pmd_sfc_efx

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -

> lsze2

>  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap

> --

> 2.9.3
  
Thomas Monjalon Sept. 18, 2017, 7:09 p.m. UTC | #7
18/09/2017 18:58, Singh, Jasvinder:
> Hi Thomas,
> 
> I don't see this patch in patchwork, although it is present in email archive. Any guess why it is not showing up there?

No idea.
Not a big deal, others are there.
  
Thomas Monjalon Sept. 20, 2017, 3:35 p.m. UTC | #8
Hi,

18/09/2017 11:10, Jasvinder Singh:
> The SoftNIC PMD is intended to provide SW fall-back options for specific
> ethdev APIs in a generic way to the NICs not supporting those features.

I agree it is important to have a solution in DPDK to better manage
SW fallbacks. One question is to know whether we can implement and
maintain many solutions. We probably must choose only one solution.

I have not read the code. I am just interested in the design for now.
I think it is a smart idea but maybe less convenient than calling fallback
from ethdev API glue code. My opinion has not changed since v1.
Thanks for the detailed explanations. Let's discuss below.

[...]
> * RX/TX: The app reads packets from/writes packets to the "soft" port
>   instead of the "hard" port. The RX and TX queues of the "soft" port are
>   thread safe, as any ethdev.

"thread safe as any ethdev"?
I would say the ethdev queues are not thread safe.

[...]
> * Meets the NFV vision: The app should be (almost) agnostic about the NIC
>   implementation (different vendors/models, HW-SW mix), the app should not
>   require changes to use different NICs, the app should use the same API
>   for all NICs. If a NIC does not implement a specific feature, the HW
>   should be augmented with SW to meet the functionality while still
>   preserving the same API.

This goal could also be achieved by adding the SW capability to the API.
After getting capabilities of a hardware, the app could set the capability
of some driver features to "SW fallback".
So the capability would become a tristate:
	- not supported
	- HW supported
	- SW supported

The unique API goal is failed if we must manage two ports,
the HW port for some features and the softnic port for other features.
You explain it in A5 below.

[...]
> Example: Create "soft" port for "hard" port "0000:04:00.1", enable the TM
> feature with default settings:
>           --vdev 'net_softnic0,hard_name=0000:04:00.1,soft_tm=on' 

So the app will use only the vdev net_softnic0 which will forward packets
to 0000:04:00.1?
Can we say in this example that net_softnic0 owns 0000:04:00.1?
Probably not, because the config of the HW must be done separately (cf. Q5).
See my "ownership proposal":
	http://dpdk.org/ml/archives/dev/2017-September/074656.html

The issue I see in this example is that we must define how to enable
every features. It should be equivalent to defining the ethdev capabilities.
In this example, the option soft_tm=on is probably not enough fine-grain.
We could support some parts of TM API in HW and other parts in SW.

[...]
> Q3: Why not change the "hard" device (and keep a single device) instead of
>     creating a new "soft" device (and thus having two devices)?
> A3: This is not possible with the current librte_ether ethdev
>     implementation. The ethdev->dev_ops are defined as constant structure,
>     so it cannot be changed per device (nor per PMD). The new ops also
>     need memory space to store their context data structures, which
>     requires updating the ethdev->data->dev_private of the existing
>     device; at best, maybe a resize of ethdev->data->dev_private could be
>     done, assuming that librte_ether will introduce a way to find out its
>     size, but this cannot be done while device is running. Other side
>     effects might exist, as the changes are very intrusive, plus it likely
>     needs more changes in librte_ether.

Q3 is about calling SW fallback from the driver code, right?

We must not implement fallbacks in drivers because it would hide
it to the application.
If a feature is not available in hardware, the application can choose
to bypass this feature or integrate the fallback in its own workflow.

> Q4: Why not call the SW fall-back dev_ops directly in librte_ether for
>     devices which do not support the specific feature? If the device
>     supports the capability, let's call its dev_ops, otherwise call the
>     SW fall-back dev_ops.
> A4: First, similar reasons to Q&A3. This fixes the need to change
>     ethdev->dev_ops of the device, but it does not do anything to fix the
>     other significant issue of where to store the context data structures
>     needed by the SW fall-back functions (which, in this approach, are
>     called implicitly by librte_ether).
>     Second, the SW fall-back options should not be restricted arbitrarily
>     by the librte_ether library, the decision should belong to the app.
>     For example, the TM SW fall-back should not be limited to only
>     librte_sched, which (like any SW fall-back) is limited to a specific
>     hierarchy and feature set, it cannot do any possible hierarchy. If
>     alternatives exist, the one to use should be picked by the app, not by
>     the ethdev layer.

Q4 is about calling SW callback from the API glue code, right?

We could summarize Q3/Q4 as "it could be done but we propose another way".
I think we must consider the pros and cons of both approaches from
a user perspective.
I agree the application must decide which fallback to use.
We could propose one fallback in ethdev which can be enabled explicitly
(see my tristate capabilities proposal above).

> Q5: Why is the app required to continue to configure both the "hard" and
>     the "soft" devices even after the "soft" device has been created? Why
>     not hiding the "hard" device under the "soft" device and have the
>     "soft" device configure the "hard" device under the hood?
> A5: This was the approach tried in the V2 of this patch set (overlay
>     "soft" device taking over the configuration of the underlay "hard"
>     device) and eventually dropped due to increased complexity of having
>     to keep the configuration of two distinct devices in sync with
>     librte_ether implementation that is not friendly towards such
>     approach. Basically, each ethdev API call for the overlay device
>     needs to configure the overlay device, invoke the same configuration
>     with possibly modified parameters for the underlay device, then resume
>     the configuration of overlay device, turning this into a device
>     emulation project.
>     V2 minuses: increased complexity (deal with two devices at same time);
>     need to implement every ethdev API, even those not needed for the scope
>     of SW fall-back; intrusive; sometimes have to silently take decisions
>     that should be left to the app.
>     V3 pluses: lower complexity (only one device); only need to implement
>     those APIs that are in scope of the SW fall-back; non-intrusive (deal
>     with "hard" device through ethdev API); app decisions taken by the app
>     in an explicit way.

I think it is breaking what you call the NFV vision in several places.

[...]
>     9. [rte_ring proliferation] Thread safety requirements for ethdev
>        RX/TXqueues require an rte_ring to be used for every RX/TX queue
>        of each "soft" ethdev. This rte_ring proliferation unnecessarily
>        increases the memory footprint and lowers performance, especially
>        when each "soft" ethdev ends up on a different CPU core (ping-pong
>        of cache lines).

I am curious to understand why you consider thread safety as a requirement
for queues. No need to reply here, the question is already asked
at the beginning of this email ;)
  
Jasvinder Singh Sept. 22, 2017, 10:07 p.m. UTC | #9
Hi Thomas,

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Wednesday, September 20, 2017 4:36 PM
> To: Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian
> <cristian.dumitrescu@intel.com>
> Cc: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v4 0/4] net/softnic: sw fall-back pmd for
> traffic mgmt and others
> 
> Hi,
> 
> 18/09/2017 11:10, Jasvinder Singh:
> > The SoftNIC PMD is intended to provide SW fall-back options for
> > specific ethdev APIs in a generic way to the NICs not supporting those
> features.
> 
> I agree it is important to have a solution in DPDK to better manage SW
> fallbacks. One question is to know whether we can implement and maintain
> many solutions. We probably must choose only one solution.
> 
> I have not read the code. I am just interested in the design for now.
> I think it is a smart idea but maybe less convenient than calling fallback from
> ethdev API glue code. My opinion has not changed since v1.

IMHO, Calling fallback from ethdev API glue code suffers from scalability issue. Let's assume the scenario of having another
sw fallback implementation for TM or its specific feature is available. What will be the approach when we already have something glued in TM API? 
The softnic could be considered as a placeholder for adding and enabling more features at any granularity in addition
to having complete TM feature.  

> Thanks for the detailed explanations. Let's discuss below.
> 
> [...]
> > * RX/TX: The app reads packets from/writes packets to the "soft" port
> >   instead of the "hard" port. The RX and TX queues of the "soft" port are
> >   thread safe, as any ethdev.
> 
> "thread safe as any ethdev"?
> I would say the ethdev queues are not thread safe.

[Jasvinder] Agree.

> [...]
> > * Meets the NFV vision: The app should be (almost) agnostic about the NIC
> >   implementation (different vendors/models, HW-SW mix), the app should
> not
> >   require changes to use different NICs, the app should use the same API
> >   for all NICs. If a NIC does not implement a specific feature, the HW
> >   should be augmented with SW to meet the functionality while still
> >   preserving the same API.
> 
> This goal could also be achieved by adding the SW capability to the API.
> After getting capabilities of a hardware, the app could set the capability of
> some driver features to "SW fallback".
> So the capability would become a tristate:
> 	- not supported
> 	- HW supported
> 	- SW supported
> 
> The unique API goal is failed if we must manage two ports, the HW port for
> some features and the softnic port for other features.
> You explain it in A5 below.

[Jasvinder]  TM API is agnostic to underlying implementation and allows applications to implement
solution on SW, HW or hybrid of HW and SW at any granularity, and on any number of devices depending
upon the availability of features. No Restriction. Thus, managing and configuring number of devices (physical and virtual) using high
level api is at the disposal of application level framework. When softnic device is enabled, application sends and receives packet
from soft device instead of hard device as soft device implements the features missing in hard device. It deosn't mean that softnic
device should hide the hard device. However, it doesn't restrict application to communicate directly with hard device. If desired, application
can bypass the softnic device and send tx packet straight to hard device through the queues not used by soft device.

> [...]
> > Example: Create "soft" port for "hard" port "0000:04:00.1", enable the
> > TM feature with default settings:
> >           --vdev 'net_softnic0,hard_name=0000:04:00.1,soft_tm=on'
> 
> So the app will use only the vdev net_softnic0 which will forward packets to
> 0000:04:00.1?
> Can we say in this example that net_softnic0 owns 0000:04:00.1?
> Probably not, because the config of the HW must be done separately (cf. Q5).
> See my "ownership proposal":
> 	http://dpdk.org/ml/archives/dev/2017-September/074656.html
> 
> The issue I see in this example is that we must define how to enable every
> features. It should be equivalent to defining the ethdev capabilities.
> In this example, the option soft_tm=on is probably not enough fine-grain.
> We could support some parts of TM API in HW and other parts in SW.
> 
[Jasvinder] - This is one instance where the complete hierarchical scheduler is presented as software fallback. But the
approach doesn't restrict to add more features (at any granularity) to softnic and enable them altogether
naming arguments during device creation.

> [...]
> > Q3: Why not change the "hard" device (and keep a single device) instead of
> >     creating a new "soft" device (and thus having two devices)?
> > A3: This is not possible with the current librte_ether ethdev
> >     implementation. The ethdev->dev_ops are defined as constant structure,
> >     so it cannot be changed per device (nor per PMD). The new ops also
> >     need memory space to store their context data structures, which
> >     requires updating the ethdev->data->dev_private of the existing
> >     device; at best, maybe a resize of ethdev->data->dev_private could be
> >     done, assuming that librte_ether will introduce a way to find out its
> >     size, but this cannot be done while device is running. Other side
> >     effects might exist, as the changes are very intrusive, plus it likely
> >     needs more changes in librte_ether.
> 
> Q3 is about calling SW fallback from the driver code, right?
> 
> We must not implement fallbacks in drivers because it would hide it to the
> application.
> If a feature is not available in hardware, the application can choose to bypass
> this feature or integrate the fallback in its own workflow.

[Jasvinder]: Naturally, if hard device has the TM feature, then TM specific ops which are invoked API function are implemented by its pmd.
Similar approach has been followed in sw fallback solution as softnic port that complements the hard device.
 
> > Q4: Why not call the SW fall-back dev_ops directly in librte_ether for
> >     devices which do not support the specific feature? If the device
> >     supports the capability, let's call its dev_ops, otherwise call the
> >     SW fall-back dev_ops.
> > A4: First, similar reasons to Q&A3. This fixes the need to change
> >     ethdev->dev_ops of the device, but it does not do anything to fix the
> >     other significant issue of where to store the context data structures
> >     needed by the SW fall-back functions (which, in this approach, are
> >     called implicitly by librte_ether).
> >     Second, the SW fall-back options should not be restricted arbitrarily
> >     by the librte_ether library, the decision should belong to the app.
> >     For example, the TM SW fall-back should not be limited to only
> >     librte_sched, which (like any SW fall-back) is limited to a specific
> >     hierarchy and feature set, it cannot do any possible hierarchy. If
> >     alternatives exist, the one to use should be picked by the app, not by
> >     the ethdev layer.
> 
> Q4 is about calling SW callback from the API glue code, right?
> 
> We could summarize Q3/Q4 as "it could be done but we propose another
> way".
> I think we must consider the pros and cons of both approaches from a user
> perspective.
> I agree the application must decide which fallback to use.
> We could propose one fallback in ethdev which can be enabled explicitly (see
> my tristate capabilities proposal above).

[Jasvinder] As explained above as well, the approach of sticking sw solution with API will
create issue of scalability. How two different SW solution will coexist or for instance N software solutions.
That's why the softnic (virtual device) is proposed as an alternative which could be extended
to include and enable features.
 
> > Q5: Why is the app required to continue to configure both the "hard" and
> >     the "soft" devices even after the "soft" device has been created? Why
> >     not hiding the "hard" device under the "soft" device and have the
> >     "soft" device configure the "hard" device under the hood?
> > A5: This was the approach tried in the V2 of this patch set (overlay
> >     "soft" device taking over the configuration of the underlay "hard"
> >     device) and eventually dropped due to increased complexity of having
> >     to keep the configuration of two distinct devices in sync with
> >     librte_ether implementation that is not friendly towards such
> >     approach. Basically, each ethdev API call for the overlay device
> >     needs to configure the overlay device, invoke the same configuration
> >     with possibly modified parameters for the underlay device, then resume
> >     the configuration of overlay device, turning this into a device
> >     emulation project.
> >     V2 minuses: increased complexity (deal with two devices at same time);
> >     need to implement every ethdev API, even those not needed for the
> scope
> >     of SW fall-back; intrusive; sometimes have to silently take decisions
> >     that should be left to the app.
> >     V3 pluses: lower complexity (only one device); only need to implement
> >     those APIs that are in scope of the SW fall-back; non-intrusive (deal
> >     with "hard" device through ethdev API); app decisions taken by the app
> >     in an explicit way.
> 
> I think it is breaking what you call the NFV vision in several places.

[Jasvinder] Mentioning nfv vision is about hiding heterogeneous implementation
(HW,SW, HW-SW hybrid) under the abstraction layer provided by TM API
instead of restricting app to use API for specific port.
> 
> [...]
> >     9. [rte_ring proliferation] Thread safety requirements for ethdev
> >        RX/TXqueues require an rte_ring to be used for every RX/TX queue
> >        of each "soft" ethdev. This rte_ring proliferation unnecessarily
> >        increases the memory footprint and lowers performance, especially
> >        when each "soft" ethdev ends up on a different CPU core (ping-pong
> >        of cache lines).
> 
> I am curious to understand why you consider thread safety as a requirement
> for queues. No need to reply here, the question is already asked at the
> beginning of this email ;)
  
Cristian Dumitrescu Oct. 6, 2017, 10:40 a.m. UTC | #10
Hi Thomas,

Thanks for taking the time to read through our rationale and provide quality comments on a topic where usually people are shouting but not listening!

> -----Original Message-----
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> Sent: Wednesday, September 20, 2017 4:36 PM
> To: Singh, Jasvinder <jasvinder.singh@intel.com>; Dumitrescu, Cristian
> <cristian.dumitrescu@intel.com>
> Cc: dev@dpdk.org; Yigit, Ferruh <ferruh.yigit@intel.com>
> Subject: Re: [dpdk-dev] [PATCH v4 0/4] net/softnic: sw fall-back pmd for
> traffic mgmt and others
> 
> Hi,
> 
> 18/09/2017 11:10, Jasvinder Singh:
> > The SoftNIC PMD is intended to provide SW fall-back options for specific
> > ethdev APIs in a generic way to the NICs not supporting those features.
> 
> I agree it is important to have a solution in DPDK to better manage
> SW fallbacks. One question is to know whether we can implement and
> maintain many solutions. We probably must choose only one solution.
> 
> I have not read the code. I am just interested in the design for now.
> I think it is a smart idea but maybe less convenient than calling fallback
> from ethdev API glue code. My opinion has not changed since v1.
> Thanks for the detailed explanations. Let's discuss below.
> 

Don't understand me wrong, I would also like to have the single device solution (hard NIC augmented with SW-implemented features) as opposed to the current proposal, which requires two devices (hard device and soft device acting as app front-end for the hard device).

The problem is that right now the single device solution is not an option with the current librte_ether, as there simply a lot of changes required that need more time to think through and get agreement, and likely several incremental stages are required to make it happen. As detailed in the Dublin presentation, they mostly refer to:
- the need of the SW fall-back to maintain its owns data structures and functions (per device, per RX queue, per TX queue)
- coexistence of all the features together
- how to bind an ethdev to one (or several) SW threads
- thread safety requirements between ethdev SW thread and app threads

Per our Dublin discussion, here is my proposal:
1. Get Soft NIC PMD into release 17.11.
	a) It is the imperfect 2-device solution, but it works and provides an interim solution.
	b) It allows us to make progress on the development for a few key features such as traffic management (on TX) and hopefully flow & metering (on RX) and get feedback on this code that we can later restructure into the final single-device solution.
	c) It is purely yet another PMD which we can melt away into the final solution later.
2. Start an RFC on librte_ether required changes to get the single-device solution in place.
	a) I can spend some time to summarize the objectives, requirements, current issues and potential approaches and send the first draft of this RFC in the next week or two?
	b) We can then discuss, poll for more ideas and hopefully draft an incremental path forward

What do you think?

> [...]
> > * RX/TX: The app reads packets from/writes packets to the "soft" port
> >   instead of the "hard" port. The RX and TX queues of the "soft" port are
> >   thread safe, as any ethdev.
> 
> "thread safe as any ethdev"?
> I would say the ethdev queues are not thread safe.
> 
> [...]

Yes, per the Dublin presentation, the thread safety mentioned here is between the Soft NIC thread and the application thread(s).

> > * Meets the NFV vision: The app should be (almost) agnostic about the NIC
> >   implementation (different vendors/models, HW-SW mix), the app should
> not
> >   require changes to use different NICs, the app should use the same API
> >   for all NICs. If a NIC does not implement a specific feature, the HW
> >   should be augmented with SW to meet the functionality while still
> >   preserving the same API.
> 
> This goal could also be achieved by adding the SW capability to the API.
> After getting capabilities of a hardware, the app could set the capability
> of some driver features to "SW fallback".
> So the capability would become a tristate:
> 	- not supported
> 	- HW supported
> 	- SW supported
> 
> The unique API goal is failed if we must manage two ports,
> the HW port for some features and the softnic port for other features.
> You explain it in A5 below.
> 

Yes, agree that 2-device solution is not fully meeting this goal, but IMHO this is the best we can do today; hopefully we can come up with a path forward for the single-device solution.

> [...]
> > Example: Create "soft" port for "hard" port "0000:04:00.1", enable the TM
> > feature with default settings:
> >           --vdev 'net_softnic0,hard_name=0000:04:00.1,soft_tm=on'
> 
> So the app will use only the vdev net_softnic0 which will forward packets
> to 0000:04:00.1?
> Can we say in this example that net_softnic0 owns 0000:04:00.1?
> Probably not, because the config of the HW must be done separately (cf.
> Q5).
> See my "ownership proposal":
> 	http://dpdk.org/ml/archives/dev/2017-September/074656.html
> 
> The issue I see in this example is that we must define how to enable
> every features. It should be equivalent to defining the ethdev capabilities.
> In this example, the option soft_tm=on is probably not enough fine-grain.
> We could support some parts of TM API in HW and other parts in SW.
> 

There are optional parameters for each feature (i.e. only TM at this point) that are left on their default value for this simple example; they can easily be added on the command line for fine grained tuning of each feature.

> [...]
> > Q3: Why not change the "hard" device (and keep a single device) instead of
> >     creating a new "soft" device (and thus having two devices)?
> > A3: This is not possible with the current librte_ether ethdev
> >     implementation. The ethdev->dev_ops are defined as constant
> structure,
> >     so it cannot be changed per device (nor per PMD). The new ops also
> >     need memory space to store their context data structures, which
> >     requires updating the ethdev->data->dev_private of the existing
> >     device; at best, maybe a resize of ethdev->data->dev_private could be
> >     done, assuming that librte_ether will introduce a way to find out its
> >     size, but this cannot be done while device is running. Other side
> >     effects might exist, as the changes are very intrusive, plus it likely
> >     needs more changes in librte_ether.
> 
> Q3 is about calling SW fallback from the driver code, right?
> 

Yes, correct, but the answer is applicable to the Q4 as well.

> We must not implement fallbacks in drivers because it would hide
> it to the application.
> If a feature is not available in hardware, the application can choose
> to bypass this feature or integrate the fallback in its own workflow.
> 

I agree.

> > Q4: Why not call the SW fall-back dev_ops directly in librte_ether for
> >     devices which do not support the specific feature? If the device
> >     supports the capability, let's call its dev_ops, otherwise call the
> >     SW fall-back dev_ops.
> > A4: First, similar reasons to Q&A3. This fixes the need to change
> >     ethdev->dev_ops of the device, but it does not do anything to fix the
> >     other significant issue of where to store the context data structures
> >     needed by the SW fall-back functions (which, in this approach, are
> >     called implicitly by librte_ether).
> >     Second, the SW fall-back options should not be restricted arbitrarily
> >     by the librte_ether library, the decision should belong to the app.
> >     For example, the TM SW fall-back should not be limited to only
> >     librte_sched, which (like any SW fall-back) is limited to a specific
> >     hierarchy and feature set, it cannot do any possible hierarchy. If
> >     alternatives exist, the one to use should be picked by the app, not by
> >     the ethdev layer.
> 
> Q4 is about calling SW callback from the API glue code, right?
> 

Yes.

> We could summarize Q3/Q4 as "it could be done but we propose another
> way".
> I think we must consider the pros and cons of both approaches from
> a user perspective.
> I agree the application must decide which fallback to use.
> We could propose one fallback in ethdev which can be enabled explicitly
> (see my tristate capabilities proposal above).
> 

My summary would be: it would be great to do it this way, but significant road blocks exist that need to be lifted first.

> > Q5: Why is the app required to continue to configure both the "hard" and
> >     the "soft" devices even after the "soft" device has been created? Why
> >     not hiding the "hard" device under the "soft" device and have the
> >     "soft" device configure the "hard" device under the hood?
> > A5: This was the approach tried in the V2 of this patch set (overlay
> >     "soft" device taking over the configuration of the underlay "hard"
> >     device) and eventually dropped due to increased complexity of having
> >     to keep the configuration of two distinct devices in sync with
> >     librte_ether implementation that is not friendly towards such
> >     approach. Basically, each ethdev API call for the overlay device
> >     needs to configure the overlay device, invoke the same configuration
> >     with possibly modified parameters for the underlay device, then resume
> >     the configuration of overlay device, turning this into a device
> >     emulation project.
> >     V2 minuses: increased complexity (deal with two devices at same time);
> >     need to implement every ethdev API, even those not needed for the
> scope
> >     of SW fall-back; intrusive; sometimes have to silently take decisions
> >     that should be left to the app.
> >     V3 pluses: lower complexity (only one device); only need to implement
> >     those APIs that are in scope of the SW fall-back; non-intrusive (deal
> >     with "hard" device through ethdev API); app decisions taken by the app
> >     in an explicit way.
> 
> I think it is breaking what you call the NFV vision in several places.
> 

Personally, I also agree with you here.

> [...]
> >     9. [rte_ring proliferation] Thread safety requirements for ethdev
> >        RX/TXqueues require an rte_ring to be used for every RX/TX queue
> >        of each "soft" ethdev. This rte_ring proliferation unnecessarily
> >        increases the memory footprint and lowers performance, especially
> >        when each "soft" ethdev ends up on a different CPU core (ping-pong
> >        of cache lines).
> 
> I am curious to understand why you consider thread safety as a requirement
> for queues. No need to reply here, the question is already asked
> at the beginning of this email ;)

Regards,
Cristian
  
Thomas Monjalon Oct. 6, 2017, 12:13 p.m. UTC | #11
06/10/2017 12:40, Dumitrescu, Cristian:
> From: Thomas Monjalon [mailto:thomas@monjalon.net]
> > 18/09/2017 11:10, Jasvinder Singh:
> > > The SoftNIC PMD is intended to provide SW fall-back options for specific
> > > ethdev APIs in a generic way to the NICs not supporting those features.
> > 
> > I agree it is important to have a solution in DPDK to better manage
> > SW fallbacks. One question is to know whether we can implement and
> > maintain many solutions. We probably must choose only one solution.
> > 
> > I have not read the code. I am just interested in the design for now.
> > I think it is a smart idea but maybe less convenient than calling fallback
> > from ethdev API glue code. My opinion has not changed since v1.
> > Thanks for the detailed explanations. Let's discuss below.
> > 
> 
> Don't understand me wrong, I would also like to have the single device solution (hard NIC augmented with SW-implemented features) as opposed to the current proposal, which requires two devices (hard device and soft device acting as app front-end for the hard device).
> 
> The problem is that right now the single device solution is not an option with the current librte_ether, as there simply a lot of changes required that need more time to think through and get agreement, and likely several incremental stages are required to make it happen. As detailed in the Dublin presentation, they mostly refer to:
> - the need of the SW fall-back to maintain its owns data structures and functions (per device, per RX queue, per TX queue)
> - coexistence of all the features together
> - how to bind an ethdev to one (or several) SW threads
> - thread safety requirements between ethdev SW thread and app threads
> 
> Per our Dublin discussion, here is my proposal:
> 1. Get Soft NIC PMD into release 17.11.
> 	a) It is the imperfect 2-device solution, but it works and provides an interim solution.
> 	b) It allows us to make progress on the development for a few key features such as traffic management (on TX) and hopefully flow & metering (on RX) and get feedback on this code that we can later restructure into the final single-device solution.
> 	c) It is purely yet another PMD which we can melt away into the final solution later.
> 2. Start an RFC on librte_ether required changes to get the single-device solution in place.
> 	a) I can spend some time to summarize the objectives, requirements, current issues and potential approaches and send the first draft of this RFC in the next week or two?
> 	b) We can then discuss, poll for more ideas and hopefully draft an incremental path forward
> 
> What do you think?

I think temporary solutions (which often become definitive) must be avoided,
especially when it implies new API.
In the case of softnic, there is no new API really, just a new workflow
for applications and some new driver parameters.
So my conclusion is that we should merge and experience it.
It does not prevent from working on another solution, as you suggest.

Acked-by: Thomas Monjalon <thomas@monjalon.net>

PS: thank you for having given your opinion on other questions
  

Patch

diff --git a/MAINTAINERS b/MAINTAINERS
index a0cd75e..b6b738d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -511,6 +511,11 @@  M: Gaetan Rivet <gaetan.rivet@6wind.com>
 F: drivers/net/failsafe/
 F: doc/guides/nics/fail_safe.rst
 
+Softnic PMD
+M: Jasvinder Singh <jasvinder.singh@intel.com>
+M: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
+F: drivers/net/softnic
+
 
 Crypto Drivers
 --------------
diff --git a/config/common_base b/config/common_base
index 5e97a08..1a0c77d 100644
--- a/config/common_base
+++ b/config/common_base
@@ -273,6 +273,11 @@  CONFIG_RTE_LIBRTE_SFC_EFX_PMD=y
 CONFIG_RTE_LIBRTE_SFC_EFX_DEBUG=n
 
 #
+# Compile SOFTNIC PMD
+#
+CONFIG_RTE_LIBRTE_PMD_SOFTNIC=y
+
+#
 # Compile software PMD backed by SZEDATA2 device
 #
 CONFIG_RTE_LIBRTE_PMD_SZEDATA2=n
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index d33c959..b552a51 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -110,4 +110,9 @@  DIRS-$(CONFIG_RTE_LIBRTE_PMD_VHOST) += vhost
 endif # $(CONFIG_RTE_LIBRTE_VHOST)
 DEPDIRS-vhost = $(core-libs) librte_vhost
 
+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += softnic
+endif # $(CONFIG_RTE_LIBRTE_SCHED)
+DEPDIRS-softnic = $(core-libs) librte_sched
+
 include $(RTE_SDK)/mk/rte.subdir.mk
diff --git a/drivers/net/softnic/Makefile b/drivers/net/softnic/Makefile
new file mode 100644
index 0000000..8d00656
--- /dev/null
+++ b/drivers/net/softnic/Makefile
@@ -0,0 +1,56 @@ 
+#   BSD LICENSE
+#
+#   Copyright(c) 2017 Intel Corporation. All rights reserved.
+#   All rights reserved.
+#
+#   Redistribution and use in source and binary forms, with or without
+#   modification, are permitted provided that the following conditions
+#   are met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in
+#       the documentation and/or other materials provided with the
+#       distribution.
+#     * Neither the name of Intel Corporation nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+#   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_softnic.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+EXPORT_MAP := rte_eth_softnic_version.map
+
+LIBABIVER := 1
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC) += rte_eth_softnic.c
+
+#
+# Export include files
+#
+SYMLINK-y-include +=rte_eth_softnic.h
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/softnic/rte_eth_softnic.c b/drivers/net/softnic/rte_eth_softnic.c
new file mode 100644
index 0000000..35cb93c
--- /dev/null
+++ b/drivers/net/softnic/rte_eth_softnic.c
@@ -0,0 +1,609 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <rte_ethdev.h>
+#include <rte_ethdev_vdev.h>
+#include <rte_malloc.h>
+#include <rte_vdev.h>
+#include <rte_kvargs.h>
+#include <rte_errno.h>
+#include <rte_ring.h>
+
+#include "rte_eth_softnic.h"
+#include "rte_eth_softnic_internals.h"
+
+#define PMD_PARAM_HARD_NAME					"hard_name"
+#define PMD_PARAM_HARD_TX_QUEUE_ID			"hard_tx_queue_id"
+
+static const char *pmd_valid_args[] = {
+	PMD_PARAM_HARD_NAME,
+	PMD_PARAM_HARD_TX_QUEUE_ID,
+	NULL
+};
+
+static struct rte_vdev_driver pmd_drv;
+
+static const struct rte_eth_dev_info pmd_dev_info = {
+	.min_rx_bufsize = 0,
+	.max_rx_pktlen = UINT32_MAX,
+	.max_rx_queues = UINT16_MAX,
+	.max_tx_queues = UINT16_MAX,
+	.rx_desc_lim = {
+		.nb_max = UINT16_MAX,
+		.nb_min = 0,
+		.nb_align = 1,
+	},
+	.tx_desc_lim = {
+		.nb_max = UINT16_MAX,
+		.nb_min = 0,
+		.nb_align = 1,
+	},
+};
+
+static void
+pmd_dev_infos_get(struct rte_eth_dev *dev __rte_unused,
+	struct rte_eth_dev_info *dev_info)
+{
+	memcpy(dev_info, &pmd_dev_info, sizeof(*dev_info));
+}
+
+static int
+pmd_dev_configure(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+	struct rte_eth_dev *hard_dev = &rte_eth_devices[p->hard.port_id];
+
+	if (dev->data->nb_rx_queues > hard_dev->data->nb_rx_queues)
+		return -1;
+
+	if (p->params.hard.tx_queue_id >= hard_dev->data->nb_tx_queues)
+		return -1;
+
+	return 0;
+}
+
+static int
+pmd_rx_queue_setup(struct rte_eth_dev *dev,
+	uint16_t rx_queue_id,
+	uint16_t nb_rx_desc __rte_unused,
+	unsigned int socket_id,
+	const struct rte_eth_rxconf *rx_conf __rte_unused,
+	struct rte_mempool *mb_pool __rte_unused)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	if (p->params.soft.intrusive == 0) {
+		struct pmd_rx_queue *rxq;
+
+		rxq = rte_zmalloc_socket(p->params.soft.name,
+			sizeof(struct pmd_rx_queue), 0, socket_id);
+		if (rxq == NULL)
+			return -1;
+
+		rxq->hard.port_id = p->hard.port_id;
+		rxq->hard.rx_queue_id = rx_queue_id;
+		dev->data->rx_queues[rx_queue_id] = rxq;
+	} else {
+		struct rte_eth_dev *hard_dev =
+			&rte_eth_devices[p->hard.port_id];
+		void *rxq = hard_dev->data->rx_queues[rx_queue_id];
+
+		if (rxq == NULL)
+			return -1;
+
+		dev->data->rx_queues[rx_queue_id] = rxq;
+	}
+	return 0;
+}
+
+static int
+pmd_tx_queue_setup(struct rte_eth_dev *dev,
+	uint16_t tx_queue_id,
+	uint16_t nb_tx_desc,
+	unsigned int socket_id,
+	const struct rte_eth_txconf *tx_conf __rte_unused)
+{
+	uint32_t size = RTE_ETH_NAME_MAX_LEN + strlen("_txq") + 4;
+	char name[size];
+	struct rte_ring *r;
+
+	snprintf(name, sizeof(name), "%s_txq%04x",
+		dev->data->name, tx_queue_id);
+	r = rte_ring_create(name, nb_tx_desc, socket_id,
+		RING_F_SP_ENQ | RING_F_SC_DEQ);
+	if (r == NULL)
+		return -1;
+
+	dev->data->tx_queues[tx_queue_id] = r;
+	return 0;
+}
+
+static int
+pmd_dev_start(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	dev->data->dev_link.link_status = ETH_LINK_UP;
+
+	if (p->params.soft.intrusive) {
+		struct rte_eth_dev *hard_dev =
+			&rte_eth_devices[p->hard.port_id];
+
+		/* The hard_dev->rx_pkt_burst should be stable by now */
+		dev->rx_pkt_burst = hard_dev->rx_pkt_burst;
+	}
+
+	return 0;
+}
+
+static void
+pmd_dev_stop(struct rte_eth_dev *dev)
+{
+	dev->data->dev_link.link_status = ETH_LINK_DOWN;
+}
+
+static void
+pmd_dev_close(struct rte_eth_dev *dev)
+{
+	uint32_t i;
+
+	/* TX queues */
+	for (i = 0; i < dev->data->nb_tx_queues; i++)
+		rte_ring_free((struct rte_ring *) dev->data->tx_queues[i]);
+}
+
+static int
+pmd_link_update(struct rte_eth_dev *dev __rte_unused,
+	int wait_to_complete __rte_unused)
+{
+	return 0;
+}
+
+static const struct eth_dev_ops pmd_ops = {
+	.dev_configure = pmd_dev_configure,
+	.dev_start = pmd_dev_start,
+	.dev_stop = pmd_dev_stop,
+	.dev_close = pmd_dev_close,
+	.link_update = pmd_link_update,
+	.dev_infos_get = pmd_dev_infos_get,
+	.rx_queue_setup = pmd_rx_queue_setup,
+	.tx_queue_setup = pmd_tx_queue_setup,
+	.tm_ops_get = NULL,
+};
+
+static uint16_t
+pmd_rx_pkt_burst(void *rxq,
+	struct rte_mbuf **rx_pkts,
+	uint16_t nb_pkts)
+{
+	struct pmd_rx_queue *rx_queue = rxq;
+
+	return rte_eth_rx_burst(rx_queue->hard.port_id,
+		rx_queue->hard.rx_queue_id,
+		rx_pkts,
+		nb_pkts);
+}
+
+static uint16_t
+pmd_tx_pkt_burst(void *txq,
+	struct rte_mbuf **tx_pkts,
+	uint16_t nb_pkts)
+{
+	return (uint16_t) rte_ring_enqueue_burst(txq,
+		(void **) tx_pkts,
+		nb_pkts,
+		NULL);
+}
+
+static __rte_always_inline int
+rte_pmd_softnic_run_default(struct rte_eth_dev *dev)
+{
+	struct pmd_internals *p = dev->data->dev_private;
+
+	/* Persistent context: Read Only (update not required) */
+	struct rte_mbuf **pkts = p->soft.def.pkts;
+	uint16_t nb_tx_queues = dev->data->nb_tx_queues;
+
+	/* Persistent context: Read - Write (update required) */
+	uint32_t txq_pos = p->soft.def.txq_pos;
+	uint32_t pkts_len = p->soft.def.pkts_len;
+	uint32_t flush_count = p->soft.def.flush_count;
+
+	/* Not part of the persistent context */
+	uint32_t pos;
+	uint16_t i;
+
+	/* Soft device TXQ read, Hard device TXQ write */
+	for (i = 0; i < nb_tx_queues; i++) {
+		struct rte_ring *txq = dev->data->tx_queues[txq_pos];
+
+		/* Read soft device TXQ burst to packet enqueue buffer */
+		pkts_len += rte_ring_sc_dequeue_burst(txq,
+			(void **) &pkts[pkts_len],
+			DEFAULT_BURST_SIZE,
+			NULL);
+
+		/* Increment soft device TXQ */
+		txq_pos++;
+		if (txq_pos >= nb_tx_queues)
+			txq_pos = 0;
+
+		/* Hard device TXQ write when complete burst is available */
+		if (pkts_len >= DEFAULT_BURST_SIZE) {
+			for (pos = 0; pos < pkts_len; )
+				pos += rte_eth_tx_burst(p->hard.port_id,
+					p->params.hard.tx_queue_id,
+					&pkts[pos],
+					(uint16_t) (pkts_len - pos));
+
+			pkts_len = 0;
+			flush_count = 0;
+			break;
+		}
+	}
+
+	if (flush_count >= FLUSH_COUNT_THRESHOLD) {
+		for (pos = 0; pos < pkts_len; )
+			pos += rte_eth_tx_burst(p->hard.port_id,
+				p->params.hard.tx_queue_id,
+				&pkts[pos],
+				(uint16_t) (pkts_len - pos));
+
+		pkts_len = 0;
+		flush_count = 0;
+	}
+
+	p->soft.def.txq_pos = txq_pos;
+	p->soft.def.pkts_len = pkts_len;
+	p->soft.def.flush_count = flush_count + 1;
+
+	return 0;
+}
+
+int
+rte_pmd_softnic_run(uint8_t port_id)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0);
+#endif
+
+	return rte_pmd_softnic_run_default(dev);
+}
+
+static struct ether_addr eth_addr = { .addr_bytes = {0} };
+
+static uint32_t
+eth_dev_speed_max_mbps(uint32_t speed_capa)
+{
+	uint32_t rate_mbps[32] = {
+		ETH_SPEED_NUM_NONE,
+		ETH_SPEED_NUM_10M,
+		ETH_SPEED_NUM_10M,
+		ETH_SPEED_NUM_100M,
+		ETH_SPEED_NUM_100M,
+		ETH_SPEED_NUM_1G,
+		ETH_SPEED_NUM_2_5G,
+		ETH_SPEED_NUM_5G,
+		ETH_SPEED_NUM_10G,
+		ETH_SPEED_NUM_20G,
+		ETH_SPEED_NUM_25G,
+		ETH_SPEED_NUM_40G,
+		ETH_SPEED_NUM_50G,
+		ETH_SPEED_NUM_56G,
+		ETH_SPEED_NUM_100G,
+	};
+
+	uint32_t pos = (speed_capa) ? (31 - __builtin_clz(speed_capa)) : 0;
+	return rate_mbps[pos];
+}
+
+static int
+default_init(struct pmd_internals *p,
+	struct pmd_params *params,
+	int numa_node)
+{
+	p->soft.def.pkts = rte_zmalloc_socket(params->soft.name,
+		2 * DEFAULT_BURST_SIZE * sizeof(struct rte_mbuf *),
+		0,
+		numa_node);
+
+	if (p->soft.def.pkts == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void
+default_free(struct pmd_internals *p)
+{
+	rte_free(p->soft.def.pkts);
+}
+
+static void *
+pmd_init(struct pmd_params *params, int numa_node)
+{
+	struct pmd_internals *p;
+	int status;
+
+	p = rte_zmalloc_socket(params->soft.name,
+		sizeof(struct pmd_internals),
+		0,
+		numa_node);
+	if (p == NULL)
+		return NULL;
+
+	memcpy(&p->params, params, sizeof(p->params));
+	rte_eth_dev_get_port_by_name(params->hard.name, &p->hard.port_id);
+
+	/* Default */
+	status = default_init(p, params, numa_node);
+	if (status) {
+		rte_free(p);
+		return NULL;
+	}
+
+	return p;
+}
+
+static void
+pmd_free(struct pmd_internals *p)
+{
+	default_free(p);
+
+	rte_free(p);
+}
+
+static int
+pmd_ethdev_register(struct rte_vdev_device *vdev,
+	struct pmd_params *params,
+	void *dev_private)
+{
+	struct rte_eth_dev_info hard_info;
+	struct rte_eth_dev *soft_dev;
+	struct rte_eth_dev_data *soft_data;
+	uint32_t hard_speed;
+	int numa_node;
+	uint8_t hard_port_id;
+
+	rte_eth_dev_get_port_by_name(params->hard.name, &hard_port_id);
+	rte_eth_dev_info_get(hard_port_id, &hard_info);
+	hard_speed = eth_dev_speed_max_mbps(hard_info.speed_capa);
+	numa_node = rte_eth_dev_socket_id(hard_port_id);
+
+	/* Memory allocation */
+	soft_data = rte_zmalloc_socket(params->soft.name,
+		sizeof(*soft_data), 0, numa_node);
+	if (!soft_data)
+		return -ENOMEM;
+
+	/* Ethdev entry allocation */
+	soft_dev = rte_eth_dev_allocate(params->soft.name);
+	if (!soft_dev) {
+		rte_free(soft_data);
+		return -ENOMEM;
+	}
+
+	/* Connect dev->data */
+	memmove(soft_data->name,
+		soft_dev->data->name,
+		sizeof(soft_data->name));
+	soft_data->port_id = soft_dev->data->port_id;
+	soft_data->mtu = soft_dev->data->mtu;
+	soft_dev->data = soft_data;
+
+	/* dev */
+	soft_dev->rx_pkt_burst = (params->soft.intrusive) ?
+		NULL : /* set up later */
+		pmd_rx_pkt_burst;
+	soft_dev->tx_pkt_burst = pmd_tx_pkt_burst;
+	soft_dev->tx_pkt_prepare = NULL;
+	soft_dev->dev_ops = &pmd_ops;
+	soft_dev->device = &vdev->device;
+
+	/* dev->data */
+	soft_dev->data->dev_private = dev_private;
+	soft_dev->data->dev_link.link_speed = hard_speed;
+	soft_dev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
+	soft_dev->data->dev_link.link_autoneg = ETH_LINK_SPEED_FIXED;
+	soft_dev->data->dev_link.link_status = ETH_LINK_DOWN;
+	soft_dev->data->mac_addrs = &eth_addr;
+	soft_dev->data->promiscuous = 1;
+	soft_dev->data->kdrv = RTE_KDRV_NONE;
+	soft_dev->data->numa_node = numa_node;
+
+	return 0;
+}
+
+static int
+get_string(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	if (!value || !extra_args)
+		return -EINVAL;
+
+	*(char **)extra_args = strdup(value);
+
+	if (!*(char **)extra_args)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int
+get_uint32(const char *key __rte_unused, const char *value, void *extra_args)
+{
+	if (!value || !extra_args)
+		return -EINVAL;
+
+	*(uint32_t *)extra_args = strtoull(value, NULL, 0);
+
+	return 0;
+}
+
+static int
+pmd_parse_args(struct pmd_params *p, const char *name, const char *params)
+{
+	struct rte_kvargs *kvlist;
+	int ret;
+
+	kvlist = rte_kvargs_parse(params, pmd_valid_args);
+	if (kvlist == NULL)
+		return -EINVAL;
+
+	/* Set default values */
+	memset(p, 0, sizeof(*p));
+	p->soft.name = name;
+	p->soft.intrusive = INTRUSIVE;
+	p->hard.tx_queue_id = SOFTNIC_HARD_TX_QUEUE_ID;
+
+	/* HARD: name (mandatory) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_HARD_NAME) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_HARD_NAME,
+			&get_string, &p->hard.name);
+		if (ret < 0)
+			goto out_free;
+	} else {
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	/* HARD: tx_queue_id (optional) */
+	if (rte_kvargs_count(kvlist, PMD_PARAM_HARD_TX_QUEUE_ID) == 1) {
+		ret = rte_kvargs_process(kvlist, PMD_PARAM_HARD_TX_QUEUE_ID,
+			&get_uint32, &p->hard.tx_queue_id);
+		if (ret < 0)
+			goto out_free;
+	}
+
+out_free:
+	rte_kvargs_free(kvlist);
+	return ret;
+}
+
+static int
+pmd_probe(struct rte_vdev_device *vdev)
+{
+	struct pmd_params p;
+	const char *params;
+	int status;
+
+	struct rte_eth_dev_info hard_info;
+	uint8_t hard_port_id;
+	int numa_node;
+	void *dev_private;
+
+	if (!vdev)
+		return -EINVAL;
+
+	RTE_LOG(INFO, PMD,
+		"Probing device \"%s\"\n",
+		rte_vdev_device_name(vdev));
+
+	/* Parse input arguments */
+	params = rte_vdev_device_args(vdev);
+	if (!params)
+		return -EINVAL;
+
+	status = pmd_parse_args(&p, rte_vdev_device_name(vdev), params);
+	if (status)
+		return status;
+
+	/* Check input arguments */
+	if (rte_eth_dev_get_port_by_name(p.hard.name, &hard_port_id))
+		return -EINVAL;
+
+	rte_eth_dev_info_get(hard_port_id, &hard_info);
+	numa_node = rte_eth_dev_socket_id(hard_port_id);
+
+	if (p.hard.tx_queue_id >= hard_info.max_tx_queues)
+		return -EINVAL;
+
+	/* Allocate and initialize soft ethdev private data */
+	dev_private = pmd_init(&p, numa_node);
+	if (dev_private == NULL)
+		return -ENOMEM;
+
+	/* Register soft ethdev */
+	RTE_LOG(INFO, PMD,
+		"Creating soft ethdev \"%s\" for hard ethdev \"%s\"\n",
+		p.soft.name, p.hard.name);
+
+	status = pmd_ethdev_register(vdev, &p, dev_private);
+	if (status) {
+		pmd_free(dev_private);
+		return status;
+	}
+
+	return 0;
+}
+
+static int
+pmd_remove(struct rte_vdev_device *vdev)
+{
+	struct rte_eth_dev *dev = NULL;
+	struct pmd_internals *p;
+
+	if (!vdev)
+		return -EINVAL;
+
+	RTE_LOG(INFO, PMD, "Removing device \"%s\"\n",
+		rte_vdev_device_name(vdev));
+
+	/* Find the ethdev entry */
+	dev = rte_eth_dev_allocated(rte_vdev_device_name(vdev));
+	if (dev == NULL)
+		return -ENODEV;
+	p = dev->data->dev_private;
+
+	/* Free device data structures*/
+	pmd_free(p);
+	rte_free(dev->data);
+	rte_eth_dev_release_port(dev);
+
+	return 0;
+}
+
+static struct rte_vdev_driver pmd_drv = {
+	.probe = pmd_probe,
+	.remove = pmd_remove,
+};
+
+RTE_PMD_REGISTER_VDEV(net_softnic, pmd_drv);
+RTE_PMD_REGISTER_PARAM_STRING(net_softnic,
+	PMD_PARAM_HARD_NAME "=<string> "
+	PMD_PARAM_HARD_TX_QUEUE_ID "=<int>");
diff --git a/drivers/net/softnic/rte_eth_softnic.h b/drivers/net/softnic/rte_eth_softnic.h
new file mode 100644
index 0000000..f840345
--- /dev/null
+++ b/drivers/net/softnic/rte_eth_softnic.h
@@ -0,0 +1,54 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_ETH_SOFTNIC_H__
+#define __INCLUDE_RTE_ETH_SOFTNIC_H__
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef SOFTNIC_HARD_TX_QUEUE_ID
+#define SOFTNIC_HARD_TX_QUEUE_ID			0
+#endif
+
+int
+rte_pmd_softnic_run(uint8_t port_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __INCLUDE_RTE_ETH_SOFTNIC_H__ */
diff --git a/drivers/net/softnic/rte_eth_softnic_internals.h b/drivers/net/softnic/rte_eth_softnic_internals.h
new file mode 100644
index 0000000..dfb7fab
--- /dev/null
+++ b/drivers/net/softnic/rte_eth_softnic_internals.h
@@ -0,0 +1,114 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__
+#define __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__
+
+#include <stdint.h>
+
+#include <rte_mbuf.h>
+#include <rte_ethdev.h>
+
+#include "rte_eth_softnic.h"
+
+#ifndef INTRUSIVE
+#define INTRUSIVE					0
+#endif
+
+struct pmd_params {
+	/** Parameters for the soft device (to be created) */
+	struct {
+		const char *name; /**< Name */
+		uint32_t flags; /**< Flags */
+
+		/** 0 = Access hard device though API only (potentially slower,
+		 *      but safer);
+		 *  1 = Access hard device private data structures is allowed
+		 *      (potentially faster).
+		 */
+		int intrusive;
+	} soft;
+
+	/** Parameters for the hard device (existing) */
+	struct {
+		const char *name; /**< Name */
+		uint16_t tx_queue_id; /**< TX queue ID */
+	} hard;
+};
+
+/**
+ * Default Internals
+ */
+
+#ifndef DEFAULT_BURST_SIZE
+#define DEFAULT_BURST_SIZE				32
+#endif
+
+#ifndef FLUSH_COUNT_THRESHOLD
+#define FLUSH_COUNT_THRESHOLD			(1 << 17)
+#endif
+
+struct default_internals {
+	struct rte_mbuf **pkts;
+	uint32_t pkts_len;
+	uint32_t txq_pos;
+	uint32_t flush_count;
+};
+
+/**
+ * PMD Internals
+ */
+struct pmd_internals {
+	/** Params */
+	struct pmd_params params;
+
+	/** Soft device */
+	struct {
+		struct default_internals def; /**< Default */
+	} soft;
+
+	/** Hard device */
+	struct {
+		uint8_t port_id;
+	} hard;
+};
+
+struct pmd_rx_queue {
+	/** Hard device */
+	struct {
+		uint8_t port_id;
+		uint16_t rx_queue_id;
+	} hard;
+};
+
+#endif /* __INCLUDE_RTE_ETH_SOFTNIC_INTERNALS_H__ */
diff --git a/drivers/net/softnic/rte_eth_softnic_version.map b/drivers/net/softnic/rte_eth_softnic_version.map
new file mode 100644
index 0000000..fb2cb68
--- /dev/null
+++ b/drivers/net/softnic/rte_eth_softnic_version.map
@@ -0,0 +1,7 @@ 
+DPDK_17.11 {
+	global:
+
+	rte_pmd_softnic_run;
+
+	local: *;
+};
diff --git a/mk/rte.app.mk b/mk/rte.app.mk
index c25fdd9..3dc82fb 100644
--- a/mk/rte.app.mk
+++ b/mk/rte.app.mk
@@ -67,7 +67,6 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)    += -lrte_distributor
 _LDLIBS-$(CONFIG_RTE_LIBRTE_IP_FRAG)        += -lrte_ip_frag
 _LDLIBS-$(CONFIG_RTE_LIBRTE_GRO)            += -lrte_gro
 _LDLIBS-$(CONFIG_RTE_LIBRTE_METER)          += -lrte_meter
-_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED)          += -lrte_sched
 _LDLIBS-$(CONFIG_RTE_LIBRTE_LPM)            += -lrte_lpm
 # librte_acl needs --whole-archive because of weak functions
 _LDLIBS-$(CONFIG_RTE_LIBRTE_ACL)            += --whole-archive
@@ -99,6 +98,7 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_RING)           += -lrte_ring
 _LDLIBS-$(CONFIG_RTE_LIBRTE_EAL)            += -lrte_eal
 _LDLIBS-$(CONFIG_RTE_LIBRTE_CMDLINE)        += -lrte_cmdline
 _LDLIBS-$(CONFIG_RTE_LIBRTE_REORDER)        += -lrte_reorder
+_LDLIBS-$(CONFIG_RTE_LIBRTE_SCHED)          += -lrte_sched
 
 ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y)
 _LDLIBS-$(CONFIG_RTE_LIBRTE_KNI)            += -lrte_kni
@@ -135,6 +135,9 @@  _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_NULL)       += -lrte_pmd_null
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_PCAP)       += -lrte_pmd_pcap -lpcap
 _LDLIBS-$(CONFIG_RTE_LIBRTE_QEDE_PMD)       += -lrte_pmd_qede
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_RING)       += -lrte_pmd_ring
+ifeq ($(CONFIG_RTE_LIBRTE_SCHED),y)
+_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SOFTNIC)      += -lrte_pmd_softnic
+endif
 _LDLIBS-$(CONFIG_RTE_LIBRTE_SFC_EFX_PMD)    += -lrte_pmd_sfc_efx
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_SZEDATA2)   += -lrte_pmd_szedata2 -lsze2
 _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_TAP)        += -lrte_pmd_tap