[dpdk-dev] ixgbe: fix x550 VF tx issue

Message ID 1454053693-28673-1-git-send-email-wenzhuo.lu@intel.com (mailing list archive)
State Rejected, archived
Delegated to: Bruce Richardson
Headers

Commit Message

Wenzhuo Lu Jan. 29, 2016, 7:48 a.m. UTC
  On x550, the basic tx function may not work if we use DPDK VF based on
kernel PF. The reason is kernel PF enables a new feature "malicious driver
detection". According to this feature, driver should create TX context
descriptor and set the "Check Context" bit in advanced data descriptor.
This patch add the support of "malicious driver detection" to let kernel PF
based DPDK VF work.

Although CC bit should be set in IOV mode, as tested, it's no harm to set
CC bit in non-IVO mode. So, as PF and VF share the same code, will set CC
bit anyway.

Please aware there's another way, disabling MDD in PF kernel driver.
Like this,
>insmod ixgbe.ko MDD=0,0

Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
---
 doc/guides/rel_notes/release_2_3.rst | 12 ++++++++++++
 drivers/net/ixgbe/ixgbe_rxtx.c       | 16 ++++++++++++++--
 drivers/net/ixgbe/ixgbe_rxtx.h       |  3 ++-
 lib/librte_ether/rte_ethdev.h        |  1 +
 lib/librte_mbuf/rte_mbuf.h           |  3 ++-
 5 files changed, 31 insertions(+), 4 deletions(-)
  

Comments

Xu, HuilongX Feb. 5, 2016, 5:09 a.m. UTC | #1
Test-by: huilong xu <huilongx.xu@intel.com>
Summary:
       Test four case, and all case passed. 
Test environment:
  Host: 
     os: 4.2.3-300.fc23.x86_64
     NIC: 03:00.0 Ethernet controller [0200]: Intel Corporation Ethernet Connection X552/X557-AT 10GBASE-T [8086:15ad]
     NIC kernel driver: ixgbe 4.2.5
  Guest:
     OS: 3.11.10-301.fc20.x86_64
     Gcc: 4.8.2 20131017 (Red Hat 4.8.2-1) (GCC)
     Package: dpdk.org newest commit(commit b86af7b1b57ebe02ebf5298415172d51f1f81e8b) +PATCH
     Target: x86_64-native-linuxapp-gcc
Case list:
Config VM
==============================================
1. get the pci device id of DUT, for example,

lspci -nn|grep -i eth

04:00.0 Ethernet controller [0200]: Intel Corporation Device [8086:15ad] (rev 01) 
04:00.1 Ethernet controller [0200]: Intel Corporation Device [8086:15ad] (rev 01) 

2. create 2 VFs from 2 PFs,

echo 1 > /sys/bus/pci/devices/0000\:04\:00.0/sriov_numvfs
echo 1 > /sys/bus/pci/devices/0000\:04\:00.1/sriov_numvfs
lspci -nn|grep -i eth

04:00.0 Ethernet controller [0200]: Intel Corporation Device [8086:15ad] (rev 01) 
04:00.1 Ethernet controller [0200]: Intel Corporation Device [8086:15ad] (rev 01)
04:10.0 Ethernet controller [0200]: Intel Corporation Device [8086:15a8]
04:10.1 Ethernet controller [0200]: Intel Corporation Device [8086:15a8]

3. detach VFs from the host, bind them to pci-stub driver,

/sbin/modprobe pci-stub

using `lspci -nn|grep -i ethernet` got VF device id, for example "8086 1565",

echo "8086 15a8" > /sys/bus/pci/drivers/pci-stub/new_id
echo 0000:04:10.0 > /sys/bus/pci/devices/0000:04:10.0/driver/unbind
echo 0000:04:10.1 > /sys/bus/pci/drivers/pci-stub/bind

echo "8086 15a8" > /sys/bus/pci/drivers/pci-stub/new_id
echo 0000:04:10.0 > /sys/bus/pci/devices/0000:04:10.0/driver/unbind
echo 0000:04:10.0 > /sys/bus/pci/drivers/pci-stub/bind

or using the following more easy way,

virsh nodedev-detach pci_0000_04_10_0; 
virsh nodedev-detach pci_0000_04_10_1;


4. passthrough VFs 04:10.0 & 04:10.1 to vm0, and start vm0,

/usr/bin/qemu-system-x86_64  -name vm0 -enable-kvm \
-cpu host -smp 4 -m 2048 -drive file=/home/image/sriov-fc20-1.img -vnc :1 \
-device pci-assign,host=04:10.0,id=pt_0 \
-device pci-assign,host=04:10.1,id=pt_1


Test Case 1: enable_mdd_dpdk_disable
==============================================
1. config vm
2. login vm0, got VFs pci device id in vm0, assume they are 00:06.0 & 00:07.0, bind them to igb_uio driver,
and then start testpmd, set it in mac forward mode,

./tools/dpdk_nic_bind.py --bind=igb_uio 00:06.0 00:07.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0x0f -n 4 -w 00:06.0 -w 00:07.0 -- -i --portmask=0x3 --txqflags=0x1000

testpmd> set fwd mac
testpmd> start

3. get mac address of one VF and use it as dest mac, using scapy to send 2000 random packets from tester,
verify the packets can't be received by one VF, and on host can find "ixgbe 0000:04:00.0: Malicious event on VF 0 tx:100000 rx:0"

Test Case 2: enable_mdd_dpdk_enable
==============================================
1. config vm
2. login vm0, got VFs pci device id in vm0, assume they are 00:06.0 & 00:07.0, bind them to igb_uio driver,
and then start testpmd, set it in mac forward mode,

./tools/dpdk_nic_bind.py --bind=igb_uio 00:06.0 00:07.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0x0f -n 4 -w 00:06.0 -w 00:07.0 -- -i --portmask=0x3 --txqflags=0x0

testpmd> set fwd mac
testpmd> start

3. get mac address of one VF and use it as dest mac, using scapy to send 2000 random packets from tester,
verify the packets can be received by one VF, and on host can't find "ixgbe 0000:04:00.0: Malicious event on VF 0 tx:100000 rx:0"
 
Test Case 3: disable_mdd_dpdk_disable
==============================================
1. disable mdd in kernel
rmmod ixgbe
insmod MDD=0,0
2. config vm
3. login vm0, got VFs pci device id in vm0, assume they are 00:06.0 & 00:07.0, bind them to igb_uio driver,
and then start testpmd, set it in mac forward mode,

./tools/dpdk_nic_bind.py --bind=igb_uio 00:06.0 00:07.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0x0f -n 4 -w 00:06.0 -w 00:07.0 -- -i --portmask=0x3 --txqflags=0x1000

testpmd> set fwd mac
testpmd> start

4. get mac address of one VF and use it as dest mac, using scapy to send 2000 random packets from tester,
verify the packets can be received by one VF, and on host can't find "ixgbe 0000:04:00.0: Malicious event on VF 0 tx:100000 rx:0"

Test Case 4: disable_mdd_dpdk_enable
==============================================
1. disable mdd in kernel
rmmod ixgbe
insmod MDD=0,0
2. config vm
3. login vm0, got VFs pci device id in vm0, assume they are 00:06.0 & 00:07.0, bind them to igb_uio driver,
and then start testpmd, set it in mac forward mode,

./tools/dpdk_nic_bind.py --bind=igb_uio 00:06.0 00:07.0
./x86_64-native-linuxapp-gcc/app/testpmd -c 0x0f -n 4 -w 00:06.0 -w 00:07.0 -- -i --portmask=0x3 --txqflags=0x0

testpmd> set fwd mac
testpmd> start

4. get mac address of one VF and use it as dest mac, using scapy to send 2000 random packets from tester,
verify the packets can be received by one VF, and on host can't find "ixgbe 0000:04:00.0: Malicious event on VF 0 tx:100000 rx:0"

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> Sent: Friday, January 29, 2016 3:48 PM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] ixgbe: fix x550 VF tx issue
> 
> On x550, the basic tx function may not work if we use DPDK VF based on
> kernel PF. The reason is kernel PF enables a new feature "malicious driver
> detection". According to this feature, driver should create TX context
> descriptor and set the "Check Context" bit in advanced data descriptor.
> This patch add the support of "malicious driver detection" to let kernel
> PF
> based DPDK VF work.
> 
> Although CC bit should be set in IOV mode, as tested, it's no harm to set
> CC bit in non-IVO mode. So, as PF and VF share the same code, will set CC
> bit anyway.
> 
> Please aware there's another way, disabling MDD in PF kernel driver.
> Like this,
> >insmod ixgbe.ko MDD=0,0
> 
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  doc/guides/rel_notes/release_2_3.rst | 12 ++++++++++++
>  drivers/net/ixgbe/ixgbe_rxtx.c       | 16 ++++++++++++++--
>  drivers/net/ixgbe/ixgbe_rxtx.h       |  3 ++-
>  lib/librte_ether/rte_ethdev.h        |  1 +
>  lib/librte_mbuf/rte_mbuf.h           |  3 ++-
>  5 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_2_3.rst
> b/doc/guides/rel_notes/release_2_3.rst
> index 99de186..90f81e0 100644
> --- a/doc/guides/rel_notes/release_2_3.rst
> +++ b/doc/guides/rel_notes/release_2_3.rst
> @@ -15,6 +15,18 @@ EAL
>  Drivers
>  ~~~~~~~
> 
> +* **ixgbe: Fixed x550 VF tx issue.**
> +
> +  On x550, the basic tx function may not work if we use DPDK VF based on
> +  kernel PF. The reason is kernel PF enables a new feature "malicious
> driver
> +  detection". According to this feature, driver should create TX context
> +  descriptor and set the "Check Context" bit in advanced data descriptor.
> +  This patch add the support of "malicious driver detection" to let
> kernel PF
> +  based DPDK VF work.
> +
> +  Please aware there's another way, disabling MDD in PF kernel driver.
> +  Like this,
> +  >insmod ixgbe.ko MDD=0,0
> 
>  Libraries
>  ~~~~~~~~~
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c
> b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 52a263c..c8a7740 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -85,7 +85,8 @@
>  		PKT_TX_VLAN_PKT |		 \
>  		PKT_TX_IP_CKSUM |		 \
>  		PKT_TX_L4_MASK |		 \
> -		PKT_TX_TCP_SEG)
> +		PKT_TX_TCP_SEG |		 \
> +		PKT_TX_MALICIOUS_DRIVER_DETECT)
> 
>  static inline struct rte_mbuf *
>  rte_rxmbuf_alloc(struct rte_mempool *mp)
> @@ -564,6 +565,8 @@ ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
>  	return (0);
>  }
> 
> +#define DEFAULT_CTX_L2_LEN 14
> +
>  uint16_t
>  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		uint16_t nb_pkts)
> @@ -614,11 +617,19 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  		 * are needed for offload functionality.
>  		 */
>  		ol_flags = tx_pkt->ol_flags;
> +		if (!(txq->txq_flags & ETH_TXQ_FLAGS_NOMDD))
> +			ol_flags |= PKT_TX_MALICIOUS_DRIVER_DETECT;
> 
>  		/* If hardware offload required */
>  		tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
>  		if (tx_ol_req) {
> -			tx_offload.l2_len = tx_pkt->l2_len;
> +			/* if l2 len isn't provided by the caller, give it a
> +			 * default value.
> +			 */
> +			if (tx_pkt->l2_len == 0)
> +				tx_offload.l2_len = DEFAULT_CTX_L2_LEN;
> +			else
> +				tx_offload.l2_len = tx_pkt->l2_len;
>  			tx_offload.l3_len = tx_pkt->l3_len;
>  			tx_offload.l4_len = tx_pkt->l4_len;
>  			tx_offload.vlan_tci = tx_pkt->vlan_tci;
> @@ -801,6 +812,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
>  		}
> 
>  		olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
> +		olinfo_status |= IXGBE_ADVTXD_CC;
> 
>  		m_seg = tx_pkt;
>  		do {
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h
> b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 475a800..bcde785 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -255,7 +255,8 @@ struct ixgbe_txq_ops {
>   * RTE_PMD_IXGBE_TX_MAX_BURST.
>   */
>  #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
> -			    ETH_TXQ_FLAGS_NOOFFLOADS)
> +			    ETH_TXQ_FLAGS_NOOFFLOADS | \
> +			    ETH_TXQ_FLAGS_NOMDD)
> 
>  /*
>   * Populate descriptors with the following info:
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index bada8ad..a2e32d0 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -637,6 +637,7 @@ struct rte_eth_rxconf {
>  #define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum
> offload */
>  #define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum offload
> */
>  #define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP checksum offload
> */
> +#define ETH_TXQ_FLAGS_NOMDD      0x1000 /**< disable malicious driver
> detection */
>  #define ETH_TXQ_FLAGS_NOOFFLOADS \
>  		(ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
>  		 ETH_TXQ_FLAGS_NOXSUMUDP  | ETH_TXQ_FLAGS_NOXSUMTCP)
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index f234ac9..b5ed25c 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -102,6 +102,7 @@ extern "C" {
> 
>  /* add new TX flags here */
> 
> +#define PKT_TX_MALICIOUS_DRIVER_DETECT (1ULL << 48)
>  /**
>   * Second VLAN insertion (QinQ) flag.
>   */
> @@ -824,7 +825,7 @@ struct rte_mbuf {
>  	struct rte_mempool *pool; /**< Pool from which mbuf was allocated.
> */
>  	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
> 
> -	/* fields to support TX offloads */
> +	/* fields to support malicious driver detection or TX offloads */
>  	union {
>  		uint64_t tx_offload;       /**< combined for easy fetch */
>  		struct {
> --
> 1.9.3
  
Ananyev, Konstantin Feb. 17, 2016, 4:44 p.m. UTC | #2
Hi Wenzhuo,

> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> Sent: Friday, January 29, 2016 7:48 AM
> To: dev@dpdk.org
> Subject: [dpdk-dev] [PATCH] ixgbe: fix x550 VF tx issue
> 
> On x550, the basic tx function may not work if we use DPDK VF based on
> kernel PF. The reason is kernel PF enables a new feature "malicious driver
> detection". According to this feature, driver should create TX context
> descriptor and set the "Check Context" bit in advanced data descriptor.
> This patch add the support of "malicious driver detection" to let kernel PF
> based DPDK VF work.

Is there any way for VF to determine is MDD is enabled on PF or not?
Mailbox message or something?
If not, then I suppose for x550 VF we always have to set default txq_flags in
dev_info to PKT_TX_MALICIOUS_DRIVER_DETECT?
Which means that for X550 VF simple TX path by default will be disabled, correct?
As alternative we probably can change tx_queue_start() for x550 VF to always
setup a dummy TCD and then make changes in TX simple path to always setup
CC bit for x550 VF.
That would allow to keep using TX simple for x550 VF , no matter is MDD is enabled  
by PF or not.

Another thing - there could be a potential slowdown with that feature:
As now TX has to occupy one extra HW context, even if no HW offload is requested by SW,
 correct?
Let say if app sends packets of 3 types:
- TCP with TX HW csum offload
- UDP with TX HW csum offload
- packets with no HW offload requested   
Then on 82599 VF no PMD would need to setup TCD only fist 2 times.
But on X550 VF will need to setup new TCD for every packet.
But I suppose that is unavoidable.

> 
> Although CC bit should be set in IOV mode, as tested, it's no harm to set
> CC bit in non-IVO mode. So, as PF and VF share the same code, will set CC
> bit anyway.

I don't think it totally harmless.
As I understand, if CC bit is set HW would try to apply related TC to the packet.
If this TC was never setup - you don't know what values it will contain.

> 
> Please aware there's another way, disabling MDD in PF kernel driver.
> Like this,
> >insmod ixgbe.ko MDD=0,0
> 
> Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> ---
>  doc/guides/rel_notes/release_2_3.rst | 12 ++++++++++++
>  drivers/net/ixgbe/ixgbe_rxtx.c       | 16 ++++++++++++++--
>  drivers/net/ixgbe/ixgbe_rxtx.h       |  3 ++-
>  lib/librte_ether/rte_ethdev.h        |  1 +
>  lib/librte_mbuf/rte_mbuf.h           |  3 ++-
>  5 files changed, 31 insertions(+), 4 deletions(-)
> 
> diff --git a/doc/guides/rel_notes/release_2_3.rst b/doc/guides/rel_notes/release_2_3.rst
> index 99de186..90f81e0 100644
> --- a/doc/guides/rel_notes/release_2_3.rst
> +++ b/doc/guides/rel_notes/release_2_3.rst
> @@ -15,6 +15,18 @@ EAL
>  Drivers
>  ~~~~~~~
> 
> +* **ixgbe: Fixed x550 VF tx issue.**
> +
> +  On x550, the basic tx function may not work if we use DPDK VF based on
> +  kernel PF. The reason is kernel PF enables a new feature "malicious driver
> +  detection". According to this feature, driver should create TX context
> +  descriptor and set the "Check Context" bit in advanced data descriptor.
> +  This patch add the support of "malicious driver detection" to let kernel PF
> +  based DPDK VF work.
> +
> +  Please aware there's another way, disabling MDD in PF kernel driver.
> +  Like this,
> +  >insmod ixgbe.ko MDD=0,0
> 
>  Libraries
>  ~~~~~~~~~
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
> index 52a263c..c8a7740 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> @@ -85,7 +85,8 @@
>  		PKT_TX_VLAN_PKT |		 \
>  		PKT_TX_IP_CKSUM |		 \
>  		PKT_TX_L4_MASK |		 \
> -		PKT_TX_TCP_SEG)
> +		PKT_TX_TCP_SEG |		 \
> +		PKT_TX_MALICIOUS_DRIVER_DETECT)
> 
>  static inline struct rte_mbuf *
>  rte_rxmbuf_alloc(struct rte_mempool *mp)
> @@ -564,6 +565,8 @@ ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
>  	return (0);
>  }
> 
> +#define DEFAULT_CTX_L2_LEN 14

Use sizeof(strutct eth_hdr) or something.

> +
>  uint16_t
>  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		uint16_t nb_pkts)
> @@ -614,11 +617,19 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		 * are needed for offload functionality.
>  		 */
>  		ol_flags = tx_pkt->ol_flags;
> +		if (!(txq->txq_flags & ETH_TXQ_FLAGS_NOMDD))
> +			ol_flags |= PKT_TX_MALICIOUS_DRIVER_DETECT;

Probably a bit better introduce new filed inside txq: deaul_ol_flags or something,
Then set it up depending on HW type, and then just:

ol_flags = tx_pkt->ol_flags | txq->default_ol_flags;


> 
>  		/* If hardware offload required */
>  		tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
>  		if (tx_ol_req) {
> -			tx_offload.l2_len = tx_pkt->l2_len;
> +			/* if l2 len isn't provided by the caller, give it a
> +			 * default value.
> +			 */
> +			if (tx_pkt->l2_len == 0)
> +				tx_offload.l2_len = DEFAULT_CTX_L2_LEN;
> +			else
> +				tx_offload.l2_len = tx_pkt->l2_len;

So with l2_len==0 what would happen?
PF would stop the queue?


>  			tx_offload.l3_len = tx_pkt->l3_len;
>  			tx_offload.l4_len = tx_pkt->l4_len;
>  			tx_offload.vlan_tci = tx_pkt->vlan_tci;
> @@ -801,6 +812,7 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
>  		}
> 
>  		olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
> +		olinfo_status |= IXGBE_ADVTXD_CC;

As I said, probably safer to set CC bit  only if (tx_ol_req != 0).  

> 
>  		m_seg = tx_pkt;
>  		do {
> diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
> index 475a800..bcde785 100644
> --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> @@ -255,7 +255,8 @@ struct ixgbe_txq_ops {
>   * RTE_PMD_IXGBE_TX_MAX_BURST.
>   */
>  #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
> -			    ETH_TXQ_FLAGS_NOOFFLOADS)
> +			    ETH_TXQ_FLAGS_NOOFFLOADS | \
> +			    ETH_TXQ_FLAGS_NOMDD)
> 
>  /*
>   * Populate descriptors with the following info:
> diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
> index bada8ad..a2e32d0 100644
> --- a/lib/librte_ether/rte_ethdev.h
> +++ b/lib/librte_ether/rte_ethdev.h
> @@ -637,6 +637,7 @@ struct rte_eth_rxconf {
>  #define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */
>  #define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum offload */
>  #define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP checksum offload */
> +#define ETH_TXQ_FLAGS_NOMDD      0x1000 /**< disable malicious driver detection */
>  #define ETH_TXQ_FLAGS_NOOFFLOADS \
>  		(ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
>  		 ETH_TXQ_FLAGS_NOXSUMUDP  | ETH_TXQ_FLAGS_NOXSUMTCP)
> diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
> index f234ac9..b5ed25c 100644
> --- a/lib/librte_mbuf/rte_mbuf.h
> +++ b/lib/librte_mbuf/rte_mbuf.h
> @@ -102,6 +102,7 @@ extern "C" {
> 
>  /* add new TX flags here */
> 
> +#define PKT_TX_MALICIOUS_DRIVER_DETECT (1ULL << 48)
>  /**
>   * Second VLAN insertion (QinQ) flag.
>   */
> @@ -824,7 +825,7 @@ struct rte_mbuf {
>  	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
>  	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
> 
> -	/* fields to support TX offloads */
> +	/* fields to support malicious driver detection or TX offloads */

No fields are really updated, so probably no need to update comments here.

>  	union {
>  		uint64_t tx_offload;       /**< combined for easy fetch */
>  		struct {
> --
> 1.9.3
  
Wenzhuo Lu Feb. 22, 2016, 5:28 a.m. UTC | #3
Hi Konstantin,
Many thanks for your review and comments. Sorry for the late response. Please see inline :)

> -----Original Message-----
> From: Ananyev, Konstantin
> Sent: Thursday, February 18, 2016 12:45 AM
> To: Lu, Wenzhuo <wenzhuo.lu@intel.com>; dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH] ixgbe: fix x550 VF tx issue
> 
> Hi Wenzhuo,
> 
> > -----Original Message-----
> > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Wenzhuo Lu
> > Sent: Friday, January 29, 2016 7:48 AM
> > To: dev@dpdk.org
> > Subject: [dpdk-dev] [PATCH] ixgbe: fix x550 VF tx issue
> >
> > On x550, the basic tx function may not work if we use DPDK VF based on
> > kernel PF. The reason is kernel PF enables a new feature "malicious
> > driver detection". According to this feature, driver should create TX
> > context descriptor and set the "Check Context" bit in advanced data
> descriptor.
> > This patch add the support of "malicious driver detection" to let
> > kernel PF based DPDK VF work.
> 
> Is there any way for VF to determine is MDD is enabled on PF or not?
> Mailbox message or something?
> If not, then I suppose for x550 VF we always have to set default txq_flags in
> dev_info to PKT_TX_MALICIOUS_DRIVER_DETECT?
> Which means that for X550 VF simple TX path by default will be disabled,
> correct?
The bad news is now there's no way for VF to know if the MDD is enabled. But PF will disable MDD by default.
And MDD will impact the performance. So, I think VF better disable MDD by default either.

> As alternative we probably can change tx_queue_start() for x550 VF to
> always setup a dummy TCD and then make changes in TX simple path to
> always setup CC bit for x550 VF.
> That would allow to keep using TX simple for x550 VF , no matter is MDD is
> enabled by PF or not.
Any suggestion about how to set the dummy TCD? Thanks.
I think TCD is used to describe the data, so it should match the packets. So, I'm sure how to create a dummy TCD :(

> 
> Another thing - there could be a potential slowdown with that feature:
> As now TX has to occupy one extra HW context, even if no HW offload is
> requested by SW,  correct?
> Let say if app sends packets of 3 types:
> - TCP with TX HW csum offload
> - UDP with TX HW csum offload
> - packets with no HW offload requested
> Then on 82599 VF no PMD would need to setup TCD only fist 2 times.
> But on X550 VF will need to setup new TCD for every packet.
> But I suppose that is unavoidable.
Yes, I agree. There's performance impact and seems unavoidable.

> 
> >
> > Although CC bit should be set in IOV mode, as tested, it's no harm to
> > set CC bit in non-IVO mode. So, as PF and VF share the same code, will
> > set CC bit anyway.
> 
> I don't think it totally harmless.
> As I understand, if CC bit is set HW would try to apply related TC to the
> packet.
> If this TC was never setup - you don't know what values it will contain.
I agree with you, although as tested, the NULL TC doesn't cause any problem.
In this scenario, better only set CC bit for VF. I'll try to do that.

> 
> >
> > Please aware there's another way, disabling MDD in PF kernel driver.
> > Like this,
> > >insmod ixgbe.ko MDD=0,0
> >
> > Signed-off-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
> > ---
> >  doc/guides/rel_notes/release_2_3.rst | 12 ++++++++++++
> >  drivers/net/ixgbe/ixgbe_rxtx.c       | 16 ++++++++++++++--
> >  drivers/net/ixgbe/ixgbe_rxtx.h       |  3 ++-
> >  lib/librte_ether/rte_ethdev.h        |  1 +
> >  lib/librte_mbuf/rte_mbuf.h           |  3 ++-
> >  5 files changed, 31 insertions(+), 4 deletions(-)
> >
> > diff --git a/doc/guides/rel_notes/release_2_3.rst
> > b/doc/guides/rel_notes/release_2_3.rst
> > index 99de186..90f81e0 100644
> > --- a/doc/guides/rel_notes/release_2_3.rst
> > +++ b/doc/guides/rel_notes/release_2_3.rst
> > @@ -15,6 +15,18 @@ EAL
> >  Drivers
> >  ~~~~~~~
> >
> > +* **ixgbe: Fixed x550 VF tx issue.**
> > +
> > +  On x550, the basic tx function may not work if we use DPDK VF based
> > + on  kernel PF. The reason is kernel PF enables a new feature
> > + "malicious driver  detection". According to this feature, driver
> > + should create TX context  descriptor and set the "Check Context" bit in
> advanced data descriptor.
> > +  This patch add the support of "malicious driver detection" to let
> > + kernel PF  based DPDK VF work.
> > +
> > +  Please aware there's another way, disabling MDD in PF kernel driver.
> > +  Like this,
> > +  >insmod ixgbe.ko MDD=0,0
> >
> >  Libraries
> >  ~~~~~~~~~
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c
> > b/drivers/net/ixgbe/ixgbe_rxtx.c index 52a263c..c8a7740 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.c
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.c
> > @@ -85,7 +85,8 @@
> >  		PKT_TX_VLAN_PKT |		 \
> >  		PKT_TX_IP_CKSUM |		 \
> >  		PKT_TX_L4_MASK |		 \
> > -		PKT_TX_TCP_SEG)
> > +		PKT_TX_TCP_SEG |		 \
> > +		PKT_TX_MALICIOUS_DRIVER_DETECT)
> >
> >  static inline struct rte_mbuf *
> >  rte_rxmbuf_alloc(struct rte_mempool *mp) @@ -564,6 +565,8 @@
> > ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
> >  	return (0);
> >  }
> >
> > +#define DEFAULT_CTX_L2_LEN 14
> 
> Use sizeof(strutct eth_hdr) or something.
Honestly the 14 comes from datasheet. I think your idea is better :)

> 
> > +
> >  uint16_t
> >  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> >  		uint16_t nb_pkts)
> > @@ -614,11 +617,19 @@ ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts,
> >  		 * are needed for offload functionality.
> >  		 */
> >  		ol_flags = tx_pkt->ol_flags;
> > +		if (!(txq->txq_flags & ETH_TXQ_FLAGS_NOMDD))
> > +			ol_flags |= PKT_TX_MALICIOUS_DRIVER_DETECT;
> 
> Probably a bit better introduce new filed inside txq: deaul_ol_flags or
> something, Then set it up depending on HW type, and then just:
> 
> ol_flags = tx_pkt->ol_flags | txq->default_ol_flags;
I think you mean doing this way can decrease the performance impact, right? Let me figure out how to do it?

> 
> 
> >
> >  		/* If hardware offload required */
> >  		tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
> >  		if (tx_ol_req) {
> > -			tx_offload.l2_len = tx_pkt->l2_len;
> > +			/* if l2 len isn't provided by the caller, give it a
> > +			 * default value.
> > +			 */
> > +			if (tx_pkt->l2_len == 0)
> > +				tx_offload.l2_len = DEFAULT_CTX_L2_LEN;
> > +			else
> > +				tx_offload.l2_len = tx_pkt->l2_len;
> 
> So with l2_len==0 what would happen?
> PF would stop the queue?
Even worse. As I remember, PF will reset the VF.

> 
> 
> >  			tx_offload.l3_len = tx_pkt->l3_len;
> >  			tx_offload.l4_len = tx_pkt->l4_len;
> >  			tx_offload.vlan_tci = tx_pkt->vlan_tci; @@ -801,6
> +812,7 @@
> > ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> >  		}
> >
> >  		olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
> > +		olinfo_status |= IXGBE_ADVTXD_CC;
> 
> As I said, probably safer to set CC bit  only if (tx_ol_req != 0).
Yes, it's better. I thought I need to check if it's VF. But seems maybe I only need to check this ol flag.
Thanks for the suggestion:)

> 
> >
> >  		m_seg = tx_pkt;
> >  		do {
> > diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h
> > b/drivers/net/ixgbe/ixgbe_rxtx.h index 475a800..bcde785 100644
> > --- a/drivers/net/ixgbe/ixgbe_rxtx.h
> > +++ b/drivers/net/ixgbe/ixgbe_rxtx.h
> > @@ -255,7 +255,8 @@ struct ixgbe_txq_ops {
> >   * RTE_PMD_IXGBE_TX_MAX_BURST.
> >   */
> >  #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS |
> \
> > -			    ETH_TXQ_FLAGS_NOOFFLOADS)
> > +			    ETH_TXQ_FLAGS_NOOFFLOADS | \
> > +			    ETH_TXQ_FLAGS_NOMDD)
> >
> >  /*
> >   * Populate descriptors with the following info:
> > diff --git a/lib/librte_ether/rte_ethdev.h
> > b/lib/librte_ether/rte_ethdev.h index bada8ad..a2e32d0 100644
> > --- a/lib/librte_ether/rte_ethdev.h
> > +++ b/lib/librte_ether/rte_ethdev.h
> > @@ -637,6 +637,7 @@ struct rte_eth_rxconf {  #define
> > ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload
> */
> > #define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum
> > offload */  #define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP
> > checksum offload */
> > +#define ETH_TXQ_FLAGS_NOMDD      0x1000 /**< disable malicious driver
> detection */
> >  #define ETH_TXQ_FLAGS_NOOFFLOADS \
> >  		(ETH_TXQ_FLAGS_NOVLANOFFL |
> ETH_TXQ_FLAGS_NOXSUMSCTP | \
> >  		 ETH_TXQ_FLAGS_NOXSUMUDP  |
> ETH_TXQ_FLAGS_NOXSUMTCP) diff --git
> > a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h index
> > f234ac9..b5ed25c 100644
> > --- a/lib/librte_mbuf/rte_mbuf.h
> > +++ b/lib/librte_mbuf/rte_mbuf.h
> > @@ -102,6 +102,7 @@ extern "C" {
> >
> >  /* add new TX flags here */
> >
> > +#define PKT_TX_MALICIOUS_DRIVER_DETECT (1ULL << 48)
> >  /**
> >   * Second VLAN insertion (QinQ) flag.
> >   */
> > @@ -824,7 +825,7 @@ struct rte_mbuf {
> >  	struct rte_mempool *pool; /**< Pool from which mbuf was allocated.
> */
> >  	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
> >
> > -	/* fields to support TX offloads */
> > +	/* fields to support malicious driver detection or TX offloads */
> 
> No fields are really updated, so probably no need to update comments here.
It's reminder. As the users may not know if the MDD can work as expected depends on these fields.

> 
> >  	union {
> >  		uint64_t tx_offload;       /**< combined for easy fetch */
> >  		struct {
> > --
> > 1.9.3
  

Patch

diff --git a/doc/guides/rel_notes/release_2_3.rst b/doc/guides/rel_notes/release_2_3.rst
index 99de186..90f81e0 100644
--- a/doc/guides/rel_notes/release_2_3.rst
+++ b/doc/guides/rel_notes/release_2_3.rst
@@ -15,6 +15,18 @@  EAL
 Drivers
 ~~~~~~~
 
+* **ixgbe: Fixed x550 VF tx issue.**
+
+  On x550, the basic tx function may not work if we use DPDK VF based on
+  kernel PF. The reason is kernel PF enables a new feature "malicious driver
+  detection". According to this feature, driver should create TX context
+  descriptor and set the "Check Context" bit in advanced data descriptor.
+  This patch add the support of "malicious driver detection" to let kernel PF
+  based DPDK VF work.
+
+  Please aware there's another way, disabling MDD in PF kernel driver.
+  Like this,
+  >insmod ixgbe.ko MDD=0,0
 
 Libraries
 ~~~~~~~~~
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.c b/drivers/net/ixgbe/ixgbe_rxtx.c
index 52a263c..c8a7740 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.c
+++ b/drivers/net/ixgbe/ixgbe_rxtx.c
@@ -85,7 +85,8 @@ 
 		PKT_TX_VLAN_PKT |		 \
 		PKT_TX_IP_CKSUM |		 \
 		PKT_TX_L4_MASK |		 \
-		PKT_TX_TCP_SEG)
+		PKT_TX_TCP_SEG |		 \
+		PKT_TX_MALICIOUS_DRIVER_DETECT)
 
 static inline struct rte_mbuf *
 rte_rxmbuf_alloc(struct rte_mempool *mp)
@@ -564,6 +565,8 @@  ixgbe_xmit_cleanup(struct ixgbe_tx_queue *txq)
 	return (0);
 }
 
+#define DEFAULT_CTX_L2_LEN 14
+
 uint16_t
 ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		uint16_t nb_pkts)
@@ -614,11 +617,19 @@  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		 * are needed for offload functionality.
 		 */
 		ol_flags = tx_pkt->ol_flags;
+		if (!(txq->txq_flags & ETH_TXQ_FLAGS_NOMDD))
+			ol_flags |= PKT_TX_MALICIOUS_DRIVER_DETECT;
 
 		/* If hardware offload required */
 		tx_ol_req = ol_flags & IXGBE_TX_OFFLOAD_MASK;
 		if (tx_ol_req) {
-			tx_offload.l2_len = tx_pkt->l2_len;
+			/* if l2 len isn't provided by the caller, give it a
+			 * default value.
+			 */
+			if (tx_pkt->l2_len == 0)
+				tx_offload.l2_len = DEFAULT_CTX_L2_LEN;
+			else
+				tx_offload.l2_len = tx_pkt->l2_len;
 			tx_offload.l3_len = tx_pkt->l3_len;
 			tx_offload.l4_len = tx_pkt->l4_len;
 			tx_offload.vlan_tci = tx_pkt->vlan_tci;
@@ -801,6 +812,7 @@  ixgbe_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
 		}
 
 		olinfo_status |= (pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+		olinfo_status |= IXGBE_ADVTXD_CC;
 
 		m_seg = tx_pkt;
 		do {
diff --git a/drivers/net/ixgbe/ixgbe_rxtx.h b/drivers/net/ixgbe/ixgbe_rxtx.h
index 475a800..bcde785 100644
--- a/drivers/net/ixgbe/ixgbe_rxtx.h
+++ b/drivers/net/ixgbe/ixgbe_rxtx.h
@@ -255,7 +255,8 @@  struct ixgbe_txq_ops {
  * RTE_PMD_IXGBE_TX_MAX_BURST.
  */
 #define IXGBE_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \
-			    ETH_TXQ_FLAGS_NOOFFLOADS)
+			    ETH_TXQ_FLAGS_NOOFFLOADS | \
+			    ETH_TXQ_FLAGS_NOMDD)
 
 /*
  * Populate descriptors with the following info:
diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h
index bada8ad..a2e32d0 100644
--- a/lib/librte_ether/rte_ethdev.h
+++ b/lib/librte_ether/rte_ethdev.h
@@ -637,6 +637,7 @@  struct rte_eth_rxconf {
 #define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */
 #define ETH_TXQ_FLAGS_NOXSUMUDP  0x0400 /**< disable UDP checksum offload */
 #define ETH_TXQ_FLAGS_NOXSUMTCP  0x0800 /**< disable TCP checksum offload */
+#define ETH_TXQ_FLAGS_NOMDD      0x1000 /**< disable malicious driver detection */
 #define ETH_TXQ_FLAGS_NOOFFLOADS \
 		(ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \
 		 ETH_TXQ_FLAGS_NOXSUMUDP  | ETH_TXQ_FLAGS_NOXSUMTCP)
diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h
index f234ac9..b5ed25c 100644
--- a/lib/librte_mbuf/rte_mbuf.h
+++ b/lib/librte_mbuf/rte_mbuf.h
@@ -102,6 +102,7 @@  extern "C" {
 
 /* add new TX flags here */
 
+#define PKT_TX_MALICIOUS_DRIVER_DETECT (1ULL << 48)
 /**
  * Second VLAN insertion (QinQ) flag.
  */
@@ -824,7 +825,7 @@  struct rte_mbuf {
 	struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */
 	struct rte_mbuf *next;    /**< Next segment of scattered packet. */
 
-	/* fields to support TX offloads */
+	/* fields to support malicious driver detection or TX offloads */
 	union {
 		uint64_t tx_offload;       /**< combined for easy fetch */
 		struct {