[dpdk-dev,1/6] ethdev: add Tx preparation

Message ID 1472228578-6980-2-git-send-email-tomaszx.kulasek@intel.com (mailing list archive)
State Superseded, archived
Headers

Commit Message

Tomasz Kulasek Aug. 26, 2016, 4:22 p.m. UTC
  Added API for `rte_eth_tx_prep`

uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
	struct rte_mbuf **tx_pkts, uint16_t nb_pkts)

Added fields to the `struct rte_eth_desc_lim`:

	uint16_t nb_seg_max;
		/**< Max number of segments per whole packet. */

	uint16_t nb_mtu_seg_max;
		/**< Max number of segments per one MTU */

Created `rte_pkt.h` header with common used functions:

int rte_validate_tx_offload(struct rte_mbuf *m)
	to validate general requirements for tx offload in packet such a
	flag completness. In current implementation this function is called
	optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.

int rte_phdr_cksum_fix(struct rte_mbuf *m)
	to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
	before hardware tx checksum offload.
	 - for non-TSO tcp/udp packets full pseudo-header checksum is
	   counted and set.
	 - for TSO the IP payload length is not included.

Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
---
 lib/librte_ether/rte_ethdev.h |   74 +++++++++++++++++++++++
 lib/librte_mbuf/rte_mbuf.h    |    8 +++
 lib/librte_net/Makefile       |    2 +-
 lib/librte_net/rte_pkt.h      |  132 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 lib/librte_net/rte_pkt.h
  

Comments

Jerin Jacob Sept. 8, 2016, 7:28 a.m. UTC | #1
On Fri, Aug 26, 2016 at 06:22:53PM +0200, Tomasz Kulasek wrote:
> Added API for `rte_eth_tx_prep`
> 
> uint16_t rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id,
> 	struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
> 
> Added fields to the `struct rte_eth_desc_lim`:
> 
> 	uint16_t nb_seg_max;
> 		/**< Max number of segments per whole packet. */
> 
> 	uint16_t nb_mtu_seg_max;
> 		/**< Max number of segments per one MTU */
> 
> Created `rte_pkt.h` header with common used functions:
> 
> int rte_validate_tx_offload(struct rte_mbuf *m)
> 	to validate general requirements for tx offload in packet such a
> 	flag completness. In current implementation this function is called
> 	optionaly when RTE_LIBRTE_ETHDEV_DEBUG is enabled.
> 
> int rte_phdr_cksum_fix(struct rte_mbuf *m)
> 	to fix pseudo header checksum for TSO and non-TSO tcp/udp packets
> 	before hardware tx checksum offload.
> 	 - for non-TSO tcp/udp packets full pseudo-header checksum is
> 	   counted and set.
> 	 - for TSO the IP payload length is not included.
> 
> Signed-off-by: Tomasz Kulasek <tomaszx.kulasek@intel.com>
> ---
>  lib/librte_ether/rte_ethdev.h |   74 +++++++++++++++++++++++
>  lib/librte_mbuf/rte_mbuf.h    |    8 +++
>  lib/librte_net/Makefile       |    2 +-
>  lib/librte_net/rte_pkt.h      |  132 +++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 215 insertions(+), 1 deletion(-)
>  create mode 100644 lib/librte_net/rte_pkt.h
> 
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index b0fe033..02569ca 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -182,6 +182,7 @@ extern "C" {
>  #include <rte_pci.h>
>  #include <rte_dev.h>
>  #include <rte_devargs.h>
> +#include <rte_errno.h>
>  #include "rte_ether.h"
>  #include "rte_eth_ctrl.h"
>  #include "rte_dev_info.h"
> @@ -696,6 +697,8 @@ struct rte_eth_desc_lim {
>  	uint16_t nb_max;   /**< Max allowed number of descriptors. */
>  	uint16_t nb_min;   /**< Min allowed number of descriptors. */
>  	uint16_t nb_align; /**< Number of descriptors should be aligned to. */
> +	uint16_t nb_seg_max;     /**< Max number of segments per whole packet. */
> +	uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
>  };
>  
>  /**
> @@ -1181,6 +1184,12 @@ typedef uint16_t (*eth_tx_burst_t)(void *txq,
>  				   uint16_t nb_pkts);
>  /**< @internal Send output packets on a transmit queue of an Ethernet device. */
>  
> +typedef uint16_t (*eth_tx_prep_t)(void *txq,
> +				   struct rte_mbuf **tx_pkts,
> +				   uint16_t nb_pkts);
> +/**< @internal Prepare output packets on a transmit queue of an Ethernet
> +		device. */
> +
>  typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
>  			       struct rte_eth_fc_conf *fc_conf);
>  /**< @internal Get current flow control parameter on an Ethernet device */
> @@ -1626,6 +1635,7 @@ enum rte_eth_dev_type {
>  struct rte_eth_dev {
>  	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
>  	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
> +	eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */
>  	struct rte_eth_dev_data *data;  /**< Pointer to device data */
>  	const struct eth_driver *driver;/**< Driver for this device */
>  	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
> @@ -2833,6 +2843,70 @@ rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
>  	return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
>  }
>  
> +/**
> + * Process a burst of output packets on a transmit queue of an Ethernet device.
> + *
> + * The rte_eth_tx_prep() function is invoked to prepare output packets to be
> + * transmitted on the output queue *queue_id* of the Ethernet device designated
> + * by its *port_id*.
> + * The *nb_pkts* parameter is the number of packets to be prepared which are
> + * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
> + * allocated from a pool created with rte_pktmbuf_pool_create().
> + * For each packet to send, the rte_eth_tx_prep() function performs
> + * the following operations:
> + *
> + * - Check if packet meets devices requirements for tx offloads.
> + *
> + * - Check limitations about number of segments.
> + *
> + * - Check additional requirements when debug is enabled.
> + *
> + * - Update and/or reset required checksums when tx offload is set for packet.
> + *
> + * The rte_eth_tx_prep() function returns the number of packets ready to be
> + * sent. A return value equal to *nb_pkts* means that all packets are valid and
> + * ready to be sent.
> + *
> + * @param port_id
> + *   The port identifier of the Ethernet device.
> + * @param queue_id
> + *   The index of the transmit queue through which output packets must be
> + *   sent.
> + *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
> + *   to rte_eth_dev_configure().
> + * @param tx_pkts
> + *   The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
> + *   which contain the output packets.
> + * @param nb_pkts
> + *   The maximum number of packets to process.
> + * @return
> + *   The number of packets correct and ready to be sent. The return value can be
> + *   less than the value of the *tx_pkts* parameter when some packet doesn't
> + *   meet devices requirements with rte_errno set appropriately.
> + */
> +static inline uint16_t
> +rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts,
> +		uint16_t nb_pkts)
> +{
> +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> +
> +	if (!dev->tx_pkt_prep) {
> +		rte_errno = -ENOTSUP;

rte_errno update may not be necessary here. see below

> +		return 0;
IMO, We should return "nb_pkts" here instead of 0(i.e, all the packets
are valid in-case PMD does not have tx_prep function) and in-case of "0"
the following check in the application also will fail for no reason
if (nb_prep < nb_pkts) {
	printf("tx_prep failed\n");
}


> +	}
> +
> +#ifdef RTE_LIBRTE_ETHDEV_DEBUG
> +	if (queue_id >= dev->data->nb_tx_queues) {
> +		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
> +		rte_errno = -EINVAL;
> +		return 0;
> +	}
> +#endif
> +
> +	return (*dev->tx_pkt_prep)(dev->data->tx_queues[queue_id],
> +			tx_pkts, nb_pkts);
> +}
> +

IMO, We need to provide a compile time option for rte_eth_tx_prep as
NOOP. Default option should be non NOOP but incase a _target_ want to
override to NOOP it should be possible, the reasons is:

- Low-end ARMv7,ARMv8 targets may not have PCIE-RC support and it may
have only integrated NIC controller. On those targets, where integrated
NIC controller does not use tx_prep service it can made it as NOOP to
save cycles on following "rte_eth_tx_prep" and associated "if
(unlikely(nb_prep < nb_rx))" checks in the application.

/* Prepare burst of TX packets */
nb_prep = rte_eth_tx_prep(fs->rx_port, 0, pkts_burst, nb_rx);

if (unlikely(nb_prep < nb_rx)) {
        int i;
        for (i = nb_prep; i < nb_rx; i++)
                rte_pktmbuf_free(pkts_burst[i]);
}


Jerin
  
Tomasz Kulasek Sept. 8, 2016, 4:09 p.m. UTC | #2
Hi Jerin,

> -----Original Message-----
> From: Jerin Jacob [mailto:jerin.jacob@caviumnetworks.com]
> Sent: Thursday, September 8, 2016 09:29
> To: Kulasek, TomaszX <tomaszx.kulasek@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 1/6] ethdev: add Tx preparation
> 

[...]

> > +static inline uint16_t
> > +rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf
> **tx_pkts,
> > +		uint16_t nb_pkts)
> > +{
> > +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> > +
> > +	if (!dev->tx_pkt_prep) {
> > +		rte_errno = -ENOTSUP;
> 
> rte_errno update may not be necessary here. see below
> 
> > +		return 0;
> IMO, We should return "nb_pkts" here instead of 0(i.e, all the packets are
> valid in-case PMD does not have tx_prep function) and in-case of "0"
> the following check in the application also will fail for no reason if
> (nb_prep < nb_pkts) {
> 	printf("tx_prep failed\n");
> }
> 

Yes, it seems to be reasonable.

> 
> > +	}
> > +
> > +#ifdef RTE_LIBRTE_ETHDEV_DEBUG
> > +	if (queue_id >= dev->data->nb_tx_queues) {
> > +		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
> > +		rte_errno = -EINVAL;
> > +		return 0;
> > +	}
> > +#endif
> > +
> > +	return (*dev->tx_pkt_prep)(dev->data->tx_queues[queue_id],
> > +			tx_pkts, nb_pkts);
> > +}
> > +
> 
> IMO, We need to provide a compile time option for rte_eth_tx_prep as NOOP.
> Default option should be non NOOP but incase a _target_ want to override
> to NOOP it should be possible, the reasons is:
> 
> - Low-end ARMv7,ARMv8 targets may not have PCIE-RC support and it may have
> only integrated NIC controller. On those targets, where integrated NIC
> controller does not use tx_prep service it can made it as NOOP to save
> cycles on following "rte_eth_tx_prep" and associated "if (unlikely(nb_prep
> < nb_rx))" checks in the application.
> 
> /* Prepare burst of TX packets */
> nb_prep = rte_eth_tx_prep(fs->rx_port, 0, pkts_burst, nb_rx);
> 
> if (unlikely(nb_prep < nb_rx)) {
>         int i;
>         for (i = nb_prep; i < nb_rx; i++)
>                 rte_pktmbuf_free(pkts_burst[i]); }
> 

You mean to have a code for NOOP like:


	/* Prepare burst of TX packets */
	nb_prep = nb_rx; /* rte_eth_tx_prep(fs->rx_port, 0, pkts_burst, nb_rx); */
 
	if (unlikely(nb_prep < nb_rx)) {
         int i;
         for (i = nb_prep; i < nb_rx; i++)
                 rte_pktmbuf_free(pkts_burst[i]); }


and let optimizer to remove unused parts?


IMHO it should be an application issue to use tx_prep or not.

While part of the job is done by the driver (verification and preparation), and part by application (error handling), such a global compile time option can introduce inconsistency, if application will not handle both cases.

If someone wants to turn off this functionality, it should be done on application level, e.g. with compilation option.
 
> 
> Jerin
>
  
Jerin Jacob Sept. 9, 2016, 5:58 a.m. UTC | #3
On Thu, Sep 08, 2016 at 04:09:05PM +0000, Kulasek, TomaszX wrote:
> Hi Jerin,

Hi TomaszX,

> 
> > -----Original Message-----
> > From: Jerin Jacob [mailto:jerin.jacob@caviumnetworks.com]
> > Sent: Thursday, September 8, 2016 09:29
> > To: Kulasek, TomaszX <tomaszx.kulasek@intel.com>
> > Cc: dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH 1/6] ethdev: add Tx preparation
> > 
> 
> [...]
> 
> > > +static inline uint16_t
> > > +rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf
> > **tx_pkts,
> > > +		uint16_t nb_pkts)
> > > +{
> > > +	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
> > > +
> > > +	if (!dev->tx_pkt_prep) {
> > > +		rte_errno = -ENOTSUP;
> > 
> > rte_errno update may not be necessary here. see below
> > 
> > > +		return 0;
> > IMO, We should return "nb_pkts" here instead of 0(i.e, all the packets are
> > valid in-case PMD does not have tx_prep function) and in-case of "0"
> > the following check in the application also will fail for no reason if
> > (nb_prep < nb_pkts) {
> > 	printf("tx_prep failed\n");
> > }
> > 
> 
> Yes, it seems to be reasonable.
> 
> > 
> > > +	}
> > > +
> > > +#ifdef RTE_LIBRTE_ETHDEV_DEBUG
> > > +	if (queue_id >= dev->data->nb_tx_queues) {
> > > +		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
> > > +		rte_errno = -EINVAL;
> > > +		return 0;
> > > +	}
> > > +#endif
> > > +
> > > +	return (*dev->tx_pkt_prep)(dev->data->tx_queues[queue_id],
> > > +			tx_pkts, nb_pkts);
> > > +}
> > > +
> > 
> > IMO, We need to provide a compile time option for rte_eth_tx_prep as NOOP.
> > Default option should be non NOOP but incase a _target_ want to override
> > to NOOP it should be possible, the reasons is:
> > 
> > - Low-end ARMv7,ARMv8 targets may not have PCIE-RC support and it may have
> > only integrated NIC controller. On those targets, where integrated NIC
> > controller does not use tx_prep service it can made it as NOOP to save
> > cycles on following "rte_eth_tx_prep" and associated "if (unlikely(nb_prep
> > < nb_rx))" checks in the application.
> > 
> > /* Prepare burst of TX packets */
> > nb_prep = rte_eth_tx_prep(fs->rx_port, 0, pkts_burst, nb_rx);
> > 
> > if (unlikely(nb_prep < nb_rx)) {
> >         int i;
> >         for (i = nb_prep; i < nb_rx; i++)
> >                 rte_pktmbuf_free(pkts_burst[i]); }
> > 
> 
> You mean to have a code for NOOP like:
> 
> 
> 	/* Prepare burst of TX packets */
> 	nb_prep = nb_rx; /* rte_eth_tx_prep(fs->rx_port, 0, pkts_burst, nb_rx); */
>  
> 	if (unlikely(nb_prep < nb_rx)) {
>          int i;
>          for (i = nb_prep; i < nb_rx; i++)
>                  rte_pktmbuf_free(pkts_burst[i]); }
> 
> 
> and let optimizer to remove unused parts?

I thought of creating compile time NOOP like this,
CONFIG_RTE_LIBRTE_ETHDEV_TXPREP_SUPPORT=y in config/common_base and
and have two flavors of definitions for rte_eth_tx_prep

#ifdef RTE_LIBRTE_ETHDEV_TXPREP_SUPPORT
static inline uint16_t
rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf
**tx_pkts, uint16_t nb_pkts)
{
	Proposed implementation
}
#else
static inline uint16_t
rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf
**tx_pkts, uint16_t nb_pkts)
{
	(void)port_id;
	(void)queue_id;
	..
}
#endif

> 
> 
> IMHO it should be an application issue to use tx_prep or not.

Some cases even _target_(example: config/defconfig_arm64-*) can also decides that.
An example of such target is:
Low-end ARMv7,ARMv8 targets may not have PCIE-RC support and it may have
only integrated NIC controller. On those targets/configs, where integrated NIC
controller does not use tx_prep service it can made it as NOOP to save
cycles on following "rte_eth_tx_prep" and associated "if (unlikely(nb_prep
< nb_rx))" checks in the application.

> 
> While part of the job is done by the driver (verification and preparation), and part by application (error handling), such a global compile time option can introduce inconsistency, if application will not handle both cases.

Each DPDK application build/compile against the target/config so I think
it is OK.

> 
> If someone wants to turn off this functionality, it should be done on application level, e.g. with compilation option.
>
  

Patch

diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index b0fe033..02569ca 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -182,6 +182,7 @@  extern "C" {
 #include <rte_pci.h>
 #include <rte_dev.h>
 #include <rte_devargs.h>
+#include <rte_errno.h>
 #include "rte_ether.h"
 #include "rte_eth_ctrl.h"
 #include "rte_dev_info.h"
@@ -696,6 +697,8 @@  struct rte_eth_desc_lim {
 	uint16_t nb_max;   /**< Max allowed number of descriptors. */
 	uint16_t nb_min;   /**< Min allowed number of descriptors. */
 	uint16_t nb_align; /**< Number of descriptors should be aligned to. */
+	uint16_t nb_seg_max;     /**< Max number of segments per whole packet. */
+	uint16_t nb_mtu_seg_max; /**< Max number of segments per one MTU */
 };
 
 /**
@@ -1181,6 +1184,12 @@  typedef uint16_t (*eth_tx_burst_t)(void *txq,
 				   uint16_t nb_pkts);
 /**< @internal Send output packets on a transmit queue of an Ethernet device. */
 
+typedef uint16_t (*eth_tx_prep_t)(void *txq,
+				   struct rte_mbuf **tx_pkts,
+				   uint16_t nb_pkts);
+/**< @internal Prepare output packets on a transmit queue of an Ethernet
+		device. */
+
 typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev,
 			       struct rte_eth_fc_conf *fc_conf);
 /**< @internal Get current flow control parameter on an Ethernet device */
@@ -1626,6 +1635,7 @@  enum rte_eth_dev_type {
 struct rte_eth_dev {
 	eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */
 	eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */
+	eth_tx_prep_t tx_pkt_prep; /**< Pointer to PMD transmit prepare function. */
 	struct rte_eth_dev_data *data;  /**< Pointer to device data */
 	const struct eth_driver *driver;/**< Driver for this device */
 	const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */
@@ -2833,6 +2843,70 @@  rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id,
 	return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts);
 }
 
+/**
+ * Process a burst of output packets on a transmit queue of an Ethernet device.
+ *
+ * The rte_eth_tx_prep() function is invoked to prepare output packets to be
+ * transmitted on the output queue *queue_id* of the Ethernet device designated
+ * by its *port_id*.
+ * The *nb_pkts* parameter is the number of packets to be prepared which are
+ * supplied in the *tx_pkts* array of *rte_mbuf* structures, each of them
+ * allocated from a pool created with rte_pktmbuf_pool_create().
+ * For each packet to send, the rte_eth_tx_prep() function performs
+ * the following operations:
+ *
+ * - Check if packet meets devices requirements for tx offloads.
+ *
+ * - Check limitations about number of segments.
+ *
+ * - Check additional requirements when debug is enabled.
+ *
+ * - Update and/or reset required checksums when tx offload is set for packet.
+ *
+ * The rte_eth_tx_prep() function returns the number of packets ready to be
+ * sent. A return value equal to *nb_pkts* means that all packets are valid and
+ * ready to be sent.
+ *
+ * @param port_id
+ *   The port identifier of the Ethernet device.
+ * @param queue_id
+ *   The index of the transmit queue through which output packets must be
+ *   sent.
+ *   The value must be in the range [0, nb_tx_queue - 1] previously supplied
+ *   to rte_eth_dev_configure().
+ * @param tx_pkts
+ *   The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *   which contain the output packets.
+ * @param nb_pkts
+ *   The maximum number of packets to process.
+ * @return
+ *   The number of packets correct and ready to be sent. The return value can be
+ *   less than the value of the *tx_pkts* parameter when some packet doesn't
+ *   meet devices requirements with rte_errno set appropriately.
+ */
+static inline uint16_t
+rte_eth_tx_prep(uint8_t port_id, uint16_t queue_id, struct rte_mbuf **tx_pkts,
+		uint16_t nb_pkts)
+{
+	struct rte_eth_dev *dev = &rte_eth_devices[port_id];
+
+	if (!dev->tx_pkt_prep) {
+		rte_errno = -ENOTSUP;
+		return 0;
+	}
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	if (queue_id >= dev->data->nb_tx_queues) {
+		RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id);
+		rte_errno = -EINVAL;
+		return 0;
+	}
+#endif
+
+	return (*dev->tx_pkt_prep)(dev->data->tx_queues[queue_id],
+			tx_pkts, nb_pkts);
+}
+
 typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count,
 		void *userdata);
 
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index 7ea66ed..72fd352 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -211,6 +211,14 @@  extern "C" {
  */
 #define PKT_TX_OUTER_IPV4   (1ULL << 59)
 
+#define PKT_TX_OFFLOAD_MASK (    \
+		PKT_TX_IP_CKSUM |        \
+		PKT_TX_L4_MASK |         \
+		PKT_TX_OUTER_IP_CKSUM |  \
+		PKT_TX_TCP_SEG |         \
+		PKT_TX_QINQ_PKT |        \
+		PKT_TX_VLAN_PKT)
+
 /**
  * Packet outer header is IPv6. This flag must be set when using any
  * outer offload feature (L4 checksum) to tell the NIC that the outer
diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile
index ad2e482..b5abe84 100644
--- a/lib/librte_net/Makefile
+++ b/lib/librte_net/Makefile
@@ -34,7 +34,7 @@  include $(RTE_SDK)/mk/rte.vars.mk
 CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
 
 # install includes
-SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h rte_pkt.h
 
 
 include $(RTE_SDK)/mk/rte.install.mk
diff --git a/lib/librte_net/rte_pkt.h b/lib/librte_net/rte_pkt.h
new file mode 100644
index 0000000..a3c3e3c
--- /dev/null
+++ b/lib/librte_net/rte_pkt.h
@@ -0,0 +1,132 @@ 
+/*-
+ *   BSD LICENSE
+ *
+ *   Copyright(c) 2016 Intel Corporation. All rights reserved.
+ *   All rights reserved.
+ *
+ *   Redistribution and use in source and binary forms, with or without
+ *   modification, are permitted provided that the following conditions
+ *   are met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in
+ *       the documentation and/or other materials provided with the
+ *       distribution.
+ *     * Neither the name of Intel Corporation nor the names of its
+ *       contributors may be used to endorse or promote products derived
+ *       from this software without specific prior written permission.
+ *
+ *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_PKT_H_
+#define _RTE_PKT_H_
+
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_sctp.h>
+
+/**
+ * Validate general requirements for tx offload in packet.
+ */
+static inline int
+rte_validate_tx_offload(struct rte_mbuf *m)
+{
+	uint64_t ol_flags = m->ol_flags;
+
+	/* Does packet set any of available offloads? */
+	if (!(ol_flags & PKT_TX_OFFLOAD_MASK))
+		return 0;
+
+	/* IP checksum can be counted only for IPv4 packet */
+	if ((ol_flags & PKT_TX_IP_CKSUM) && (ol_flags & PKT_TX_IPV6))
+		return -EINVAL;
+
+	if (ol_flags & (PKT_TX_L4_MASK | PKT_TX_TCP_SEG))
+		/* IP type not set */
+		if (!(ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6)))
+			return -EINVAL;
+
+	if (ol_flags & PKT_TX_TCP_SEG) {
+
+		/* PKT_TX_IP_CKSUM offload not set for IPv4 TSO packet */
+		if ((ol_flags & PKT_TX_IPV4) && !(ol_flags & PKT_TX_IP_CKSUM))
+			return -EINVAL;
+
+		if (m->tso_segsz == 0)
+			return -EINVAL;
+
+	}
+
+	/* PKT_TX_OUTER_IP_CKSUM set for non outer IPv4 packet. */
+	if ((ol_flags & PKT_TX_OUTER_IP_CKSUM) && !(ol_flags & PKT_TX_OUTER_IPV4))
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * Fix pseudo header checksum for TSO and non-TSO tcp/udp packets before
+ * hardware tx checksum.
+ * For non-TSO tcp/udp packets full pseudo-header checksum is counted and set.
+ * For TSO the IP payload length is not included.
+ */
+static inline int
+rte_phdr_cksum_fix(struct rte_mbuf *m)
+{
+	struct ipv4_hdr *ipv4_hdr;
+	struct ipv6_hdr *ipv6_hdr;
+	struct tcp_hdr *tcp_hdr;
+	struct udp_hdr *udp_hdr;
+
+	if (m->ol_flags & PKT_TX_IPV4) {
+		ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len);
+
+		if (m->ol_flags & PKT_TX_IP_CKSUM)
+			ipv4_hdr->hdr_checksum = 0;
+
+		if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+			/* non-TSO udp */
+			udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, m->l2_len +
+					m->l3_len);
+			udp_hdr->dgram_cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
+		} else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
+				(m->ol_flags & PKT_TX_TCP_SEG)) {
+			/* non-TSO tcp or TSO */
+			tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, m->l2_len +
+					m->l3_len);
+			tcp_hdr->cksum = rte_ipv4_phdr_cksum(ipv4_hdr, m->ol_flags);
+		}
+	} else if (m->ol_flags & PKT_TX_IPV6) {
+		ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len);
+
+		if ((m->ol_flags & PKT_TX_UDP_CKSUM) == PKT_TX_UDP_CKSUM) {
+			/* non-TSO udp */
+			udp_hdr = rte_pktmbuf_mtod_offset(m, struct udp_hdr *, m->l2_len +
+					m->l3_len);
+			udp_hdr->dgram_cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
+		} else if ((m->ol_flags & PKT_TX_TCP_CKSUM) ||
+				(m->ol_flags & PKT_TX_TCP_SEG)) {
+			/* non-TSO tcp or TSO */
+			tcp_hdr = rte_pktmbuf_mtod_offset(m, struct tcp_hdr *, m->l2_len +
+					m->l3_len);
+			tcp_hdr->cksum = rte_ipv6_phdr_cksum(ipv6_hdr, m->ol_flags);
+		}
+	}
+	return 0;
+}
+
+#endif /* _RTE_PKT_H_ */