diff mbox

[dpdk-dev,v3,5/6] ixgbe: Config VF RSS

Message ID 1419398584-19520-6-git-send-email-changchun.ouyang@intel.com (mailing list archive)
State Superseded, archived
Headers show

Commit Message

Ouyang Changchun Dec. 24, 2014, 5:23 a.m. UTC
It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.

The psrtype will determine how many queues the received packets will distribute to,
and the value of psrtype should depends on both facet: max VF rxq number which
has been negotiated with PF, and the number of rxq specified in config on guest.

Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
---
 lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
 lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 97 insertions(+), 10 deletions(-)

Comments

Vladislav Zolotarov Dec. 24, 2014, 10:39 a.m. UTC | #1
On 12/24/14 07:23, Ouyang Changchun wrote:
> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.
>
> The psrtype will determine how many queues the received packets will distribute to,
> and the value of psrtype should depends on both facet: max VF rxq number which
> has been negotiated with PF, and the number of rxq specified in config on guest.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>   lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92 ++++++++++++++++++++++++++++++++++-----
>   2 files changed, 97 insertions(+), 10 deletions(-)
>
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index cbb0145..9c9dad8 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
>   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
>   
> +	/*
> +	 * VF RSS can support at most 4 queues for each VF, even if
> +	 * 8 queues are available for each VF, it need refine to 4
> +	 * queues here due to this limitation, otherwise no queue
> +	 * will receive any packet even RSS is enabled.

According to Table 7-3 in the 82599 spec RSS is not available when port 
is configured to have 8 queues per pool. This means that if u see this 
configuration u may immediately disable RSS flow in your code.

> +	 */
> +	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> +			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> +				dev_num_vf(eth_dev) * 4;

According to 82599 spec u can't do that since RSS is not allowed when 
port is configured to have 8 function per-VF. Have u verified that this 
works? If yes, then spec should be updated.

> +		}
> +	}
> +
>   	/* set VMDq map to default PF pool */
>   	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>   
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f69abda..a7c17a4 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
>   }
>   
>   static int
> +ixgbe_config_vf_rss(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw;
> +	uint32_t mrqc;
> +
> +	ixgbe_rss_configure(dev);
> +
> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	/* MRQC: enable VF RSS */
> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> +		break;
> +
> +	case ETH_32_POOLS:
> +	case ETH_16_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;

Again, this contradicts with the spec.

> +		break;
> +
> +	default:
> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> +		return -EINVAL;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +
> +	return 0;
> +}
> +
> +static int
>   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   {
>   	struct ixgbe_hw *hw =
> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>   			default: ixgbe_rss_disable(dev);
>   		}
>   	} else {
> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>   		/*
>   		 * SRIOV active scheme
>   		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>   		 */
> -		case ETH_64_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> +		case ETH_MQ_RX_RSS:
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			ixgbe_config_vf_rss(dev);
>   			break;
>   
> -		case ETH_32_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
> -			break;
> +		default:
> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {

Sorry for nitpicking but have u considered taking this encapsulated 
"switch-case" block into a separate function? This could make the code 
look a lot nicer. ;)

> +			case ETH_64_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQEN);
> +				break;
>   
> -		case ETH_16_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
> +			case ETH_32_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT4TCEN);
> +				break;
> +
> +			case ETH_16_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT8TCEN);
> +				break;
> +			default:
> +				PMD_INIT_LOG(ERR,
> +					"invalid pool number in IOV mode");
> +				break;
> +			}
>   			break;
> -		default:
> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
>   		}
>   	}
>   
> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   	uint16_t buf_size;
>   	uint16_t i;
>   	int ret;
> +	uint16_t valid_rxq_num;
>   
>   	PMD_INIT_FUNC_TRACE();
>   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>   
> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw->mac.max_rx_queues);
> +
> +	/*
> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> +	 * and give user a hint that some packets may loss if it doesn't
> +	 * poll the queue where those packets are distributed to.
> +	 */
> +	if (valid_rxq_num == 3)
> +		valid_rxq_num = 4;

Why to configure more queues that requested and not less (2)? Why to 
configure anything at all and not return an error?

> +
> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be equal to or less than %d",
> +			valid_rxq_num);
> +		return -1;
> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> +			"than the number of available Rx queues:%d, "
> +			"packets in Rx queues(q_id >= %d) may loss.",
> +			valid_rxq_num, dev->data->nb_rx_queues);

Who ever looks in the "INIT_LOG" if everything "work well" and u make it 
look so by allowing this call to succeed. And then some packets will 
just silently not arrive?! And what the used should somehow guess to do? 
- Look in the "INIT_LOG"?! This is a nightmare!

> +
>   	/*
>   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
>   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>   			IXGBE_PSRTYPE_IPV6HDR;
>   #endif
>   
> +	/* Set RQPL for VF RSS according to max Rx queue */
> +	psrtype |= (valid_rxq_num >> 1) <<
> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>   
>   	if (dev->data->dev_conf.rxmode.enable_scatter) {
Ouyang Changchun Dec. 25, 2014, 2:14 a.m. UTC | #2
Hi,

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Wednesday, December 24, 2014 6:40 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/24/14 07:23, Ouyang Changchun wrote:
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >   lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> ++++++++++++++++++++++++++++++++++-----
> >   2 files changed, 97 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries), 0);
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries), 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> 
> According to Table 7-3 in the 82599 spec RSS is not available when port is
> configured to have 8 queues per pool. This means that if u see this
> configuration u may immediately disable RSS flow in your code.
> 
8 queues here means the available number queue per vf, it is calculated according to max vfs,
e.g. if max vfs is 16(or less than), then each vf 'COULD' have 8 queues evenly, pf early init stage estimate this value,
but that is not precise, so need refine this.
User don't know this estimated value, it is internal value, not come from user's input/configure.
Hope it is clear to you.
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> 
> According to 82599 spec u can't do that since RSS is not allowed when port is
> configured to have 8 function per-VF. Have u verified that this works? If yes,
> then spec should be updated.
>
Response as above,
Of course I have validated this. It works well.

> > +		}
> > +	}
> > +
> >   	/* set VMDq map to default PF pool */
> >   	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..a7c17a4 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue *rxq)
> >   }
> >
> >   static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +	case ETH_16_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> 
> Again, this contradicts with the spec.
> 
> > +		break;
> > +
> > +	default:
> > +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > +		return -EINVAL;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >   {
> >   	struct ixgbe_hw *hw =
> > @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> rte_eth_dev *dev)
> >   			default: ixgbe_rss_disable(dev);
> >   		}
> >   	} else {
> > -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >   		/*
> >   		 * SRIOV active scheme
> >   		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >   		 */
> > -		case ETH_64_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQEN);
> > +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> > +		case ETH_MQ_RX_RSS:
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			ixgbe_config_vf_rss(dev);
> >   			break;
> >
> > -		case ETH_32_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT4TCEN);
> > -			break;
> > +		default:
> > +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> 
> Sorry for nitpicking but have u considered taking this encapsulated "switch-
> case" block into a separate function? This could make the code look a lot
> nicer. ;)

Only one place use it, so don't need make it a function,
And I prefer to the current code.

> 
> > +			case ETH_64_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQEN);
> > +				break;
> >
> > -		case ETH_16_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT8TCEN);
> > +			case ETH_32_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT4TCEN);
> > +				break;
> > +
> > +			case ETH_16_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT8TCEN);
> > +				break;
> > +			default:
> > +				PMD_INIT_LOG(ERR,
> > +					"invalid pool number in IOV mode");
> > +				break;
> > +			}
> >   			break;
> > -		default:
> > -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> mode");
> >   		}
> >   	}
> >
> > @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   	uint16_t buf_size;
> >   	uint16_t i;
> >   	int ret;
> > +	uint16_t valid_rxq_num;
> >
> >   	PMD_INIT_FUNC_TRACE();
> >   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >
> > +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> > +hw->mac.max_rx_queues);
> > +
> > +	/*
> > +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > +	 * and give user a hint that some packets may loss if it doesn't
> > +	 * poll the queue where those packets are distributed to.
> > +	 */
> > +	if (valid_rxq_num == 3)
> > +		valid_rxq_num = 4;
> 
> Why to configure more queues that requested and not less (2)? Why to
> configure anything at all and not return an error?
> 
> > +
> > +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > +			"it should be equal to or less than %d",
> > +			valid_rxq_num);
> > +		return -1;
> > +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> > +			"than the number of available Rx queues:%d, "
> > +			"packets in Rx queues(q_id >= %d) may loss.",
> > +			valid_rxq_num, dev->data->nb_rx_queues);
> 
> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
> look so by allowing this call to succeed. And then some packets will just
> silently not arrive?! And what the used should somehow guess to do?
> - Look in the "INIT_LOG"?! This is a nightmare!

Sorry, I don't think so again, if user find any packets loss, he will care for log, 
Then he can find that log there, then user can refine its rxq number due the wrong rxq number,
Why is it a nightmare?

I don't agree with you about "silently not arrive", because we have hint/log there.

Return error here is also possible way, 
Again need other guys' insight here.

> > +
> >   	/*
> >   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> driver
> >   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> > @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   			IXGBE_PSRTYPE_IPV6HDR;
> >   #endif
> >
> > +	/* Set RQPL for VF RSS according to max Rx queue */
> > +	psrtype |= (valid_rxq_num >> 1) <<
> > +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >
> >   	if (dev->data->dev_conf.rxmode.enable_scatter) {
Ouyang Changchun Dec. 25, 2014, 2:43 a.m. UTC | #3
Hi,
Sorry miss some comments, so continue my response below,

> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Wednesday, December 24, 2014 6:40 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/24/14 07:23, Ouyang Changchun wrote:
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >   lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >   lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> ++++++++++++++++++++++++++++++++++-----
> >   2 files changed, 97 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries), 0);
> >   	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries), 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> 
> According to Table 7-3 in the 82599 spec RSS is not available when port is
> configured to have 8 queues per pool. This means that if u see this
> configuration u may immediately disable RSS flow in your code.
> 
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> 
> According to 82599 spec u can't do that since RSS is not allowed when port is
> configured to have 8 function per-VF. Have u verified that this works? If yes,
> then spec should be updated.
> 
> > +		}
> > +	}
> > +
> >   	/* set VMDq map to default PF pool */
> >   	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..a7c17a4 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue *rxq)
> >   }
> >
> >   static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +	case ETH_16_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> 
> Again, this contradicts with the spec.
Yes, the spec say the hw can't support vf rss at all, but experiment find that could be done.
We can focus on discussing the implementation firstly.
 
> > +		break;
> > +
> > +	default:
> > +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > +		return -EINVAL;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >   ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >   {
> >   	struct ixgbe_hw *hw =
> > @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> rte_eth_dev *dev)
> >   			default: ixgbe_rss_disable(dev);
> >   		}
> >   	} else {
> > -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >   		/*
> >   		 * SRIOV active scheme
> >   		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >   		 */
> > -		case ETH_64_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQEN);
> > +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> > +		case ETH_MQ_RX_RSS:
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			ixgbe_config_vf_rss(dev);
> >   			break;
> >
> > -		case ETH_32_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT4TCEN);
> > -			break;
> > +		default:
> > +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> 
> Sorry for nitpicking but have u considered taking this encapsulated "switch-
> case" block into a separate function? This could make the code look a lot
> nicer. ;)
> 
> > +			case ETH_64_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQEN);
> > +				break;
> >
> > -		case ETH_16_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT8TCEN);
> > +			case ETH_32_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT4TCEN);
> > +				break;
> > +
> > +			case ETH_16_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT8TCEN);
> > +				break;
> > +			default:
> > +				PMD_INIT_LOG(ERR,
> > +					"invalid pool number in IOV mode");
> > +				break;
> > +			}
> >   			break;
> > -		default:
> > -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> mode");
> >   		}
> >   	}
> >
> > @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   	uint16_t buf_size;
> >   	uint16_t i;
> >   	int ret;
> > +	uint16_t valid_rxq_num;
> >
> >   	PMD_INIT_FUNC_TRACE();
> >   	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >
> > +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> > +hw->mac.max_rx_queues);
> > +
> > +	/*
> > +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > +	 * and give user a hint that some packets may loss if it doesn't
> > +	 * poll the queue where those packets are distributed to.
> > +	 */
> > +	if (valid_rxq_num == 3)
> > +		valid_rxq_num = 4;
> 
> Why to configure more queues that requested and not less (2)? Why to
> configure anything at all and not return an error?

Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7, 8, ..., 16, 2014, 2015,... etc.
By considering 2 or 4,
I prefer 4, the reason is if user need more than 3 queues per vf to do something, 
And pf has also the capability to setup 4 queues per vf, confining to 2 queues is also not good thing,
So here try to enable 4 queues, and give user hints here.
Btw, change it into 2 is another way, depends on other guys' more insight here.

> 
> > +
> > +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > +			"it should be equal to or less than %d",
> > +			valid_rxq_num);
> > +		return -1;
> > +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> > +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> > +			"than the number of available Rx queues:%d, "
> > +			"packets in Rx queues(q_id >= %d) may loss.",
> > +			valid_rxq_num, dev->data->nb_rx_queues);
> 
> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
> look so by allowing this call to succeed. And then some packets will just
> silently not arrive?! And what the used should somehow guess to do?
> - Look in the "INIT_LOG"?! This is a nightmare!
> 
> > +
> >   	/*
> >   	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> driver
> >   	 * disables the VF receipt of packets if the PF MTU is > 1500.
> > @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >   			IXGBE_PSRTYPE_IPV6HDR;
> >   #endif
> >
> > +	/* Set RQPL for VF RSS according to max Rx queue */
> > +	psrtype |= (valid_rxq_num >> 1) <<
> > +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >   	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >
> >   	if (dev->data->dev_conf.rxmode.enable_scatter) {
Vladislav Zolotarov Dec. 25, 2014, 1:13 p.m. UTC | #4
On 12/25/14 04:14, Ouyang, Changchun wrote:
> Hi,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, December 24, 2014 6:40 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>
>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>> RSS.
>>> The psrtype will determine how many queues the received packets will
>>> distribute to, and the value of psrtype should depends on both facet:
>>> max VF rxq number which has been negotiated with PF, and the number of
>> rxq specified in config on guest.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>> ++++++++++++++++++++++++++++++++++-----
>>>    2 files changed, 97 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>> *eth_dev)
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>> mac.num_rar_entries), 0);
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>> mac.num_rar_entries), 0);
>>>
>>> +	/*
>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>> +	 * 8 queues are available for each VF, it need refine to 4
>>> +	 * queues here due to this limitation, otherwise no queue
>>> +	 * will receive any packet even RSS is enabled.
>> According to Table 7-3 in the 82599 spec RSS is not available when port is
>> configured to have 8 queues per pool. This means that if u see this
>> configuration u may immediately disable RSS flow in your code.
>>
> 8 queues here means the available number queue per vf, it is calculated according to max vfs,
> e.g. if max vfs is 16(or less than), then each vf 'COULD' have 8 queues evenly, pf early init stage estimate this value,
> but that is not precise, so need refine this.
> User don't know this estimated value, it is internal value, not come from user's input/configure.
> Hope it is clear to you.
>>> +	 */
>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>> ETH_MQ_RX_VMDQ_RSS) {
>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>> ETH_32_POOLS;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>> +				dev_num_vf(eth_dev) * 4;
>> According to 82599 spec u can't do that since RSS is not allowed when port is
>> configured to have 8 function per-VF. Have u verified that this works? If yes,
>> then spec should be updated.
>>
> Response as above,
> Of course I have validated this. It works well.
>
>>> +		}
>>> +	}
>>> +
>>>    	/* set VMDq map to default PF pool */
>>>    	hw->mac.ops.set_vmdq(hw, 0,
>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> index f69abda..a7c17a4 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>> igb_rx_queue *rxq)
>>>    }
>>>
>>>    static int
>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>> +	struct ixgbe_hw *hw;
>>> +	uint32_t mrqc;
>>> +
>>> +	ixgbe_rss_configure(dev);
>>> +
>>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> +
>>> +	/* MRQC: enable VF RSS */
>>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>> +	case ETH_64_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>> +		break;
>>> +
>>> +	case ETH_32_POOLS:
>>> +	case ETH_16_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>> Again, this contradicts with the spec.
>>
>>> +		break;
>>> +
>>> +	default:
>>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>>>    {
>>>    	struct ixgbe_hw *hw =
>>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
>> rte_eth_dev *dev)
>>>    			default: ixgbe_rss_disable(dev);
>>>    		}
>>>    	} else {
>>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>    		/*
>>>    		 * SRIOV active scheme
>>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>>>    		 */
>>> -		case ETH_64_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQEN);
>>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
>>> +		case ETH_MQ_RX_RSS:
>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>> +			ixgbe_config_vf_rss(dev);
>>>    			break;
>>>
>>> -		case ETH_32_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT4TCEN);
>>> -			break;
>>> +		default:
>>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
>> Sorry for nitpicking but have u considered taking this encapsulated "switch-
>> case" block into a separate function? This could make the code look a lot
>> nicer. ;)
> Only one place use it, so don't need make it a function,
> And I prefer to the current code.

Functions may be used not only to have a repeatedly called code but also 
to make a caller code more readable. Encapsulated switch-case is one of 
the examples of a *not* readable code constructs which should be avoided.

>
>>> +			case ETH_64_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQEN);
>>> +				break;
>>>
>>> -		case ETH_16_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT8TCEN);
>>> +			case ETH_32_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT4TCEN);
>>> +				break;
>>> +
>>> +			case ETH_16_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT8TCEN);
>>> +				break;
>>> +			default:
>>> +				PMD_INIT_LOG(ERR,
>>> +					"invalid pool number in IOV mode");
>>> +				break;
>>> +			}
>>>    			break;
>>> -		default:
>>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
>> mode");
>>>    		}
>>>    	}
>>>
>>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    	uint16_t buf_size;
>>>    	uint16_t i;
>>>    	int ret;
>>> +	uint16_t valid_rxq_num;
>>>
>>>    	PMD_INIT_FUNC_TRACE();
>>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>
>>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
>>> +hw->mac.max_rx_queues);
>>> +
>>> +	/*
>>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
>>> +	 * and give user a hint that some packets may loss if it doesn't
>>> +	 * poll the queue where those packets are distributed to.
>>> +	 */
>>> +	if (valid_rxq_num == 3)
>>> +		valid_rxq_num = 4;
>> Why to configure more queues that requested and not less (2)? Why to
>> configure anything at all and not return an error?
>>
>>> +
>>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
>>> +			"it should be equal to or less than %d",
>>> +			valid_rxq_num);
>>> +		return -1;
>>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
>>> +			"than the number of available Rx queues:%d, "
>>> +			"packets in Rx queues(q_id >= %d) may loss.",
>>> +			valid_rxq_num, dev->data->nb_rx_queues);
>> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
>> look so by allowing this call to succeed. And then some packets will just
>> silently not arrive?! And what the used should somehow guess to do?
>> - Look in the "INIT_LOG"?! This is a nightmare!
> Sorry, I don't think so again, if user find any packets loss, he will care for log,
> Then he can find that log there, then user can refine its rxq number due the wrong rxq number,
> Why is it a nightmare?

Because usually u expect that if the function call returns with a 
success it means a success. Why a user has to learn that a device 
configuration function was provided with wrong parameters from the 
packet loss? If parameters are not allowed u expect to get an error as a 
return value. Since when errors are returned in a form of a log message? 
Why do u think there is a living person running a DPDK based 
application? How do u expect somebody build an automated environment 
when part of errors are returned in some log? Should he/she add a log 
parser?
On the other hand, why do u think 4 queues is a better option for a user 
than 2 queue when he asked for 3 queues? What kind of heuristics is that?

To summarize - it would be much better if u just returned an EINVAL 
error in that case.

>
> I don't agree with you about "silently not arrive", because we have hint/log there.
>
> Return error here is also possible way,

It's the only possible way! ;)

> Again need other guys' insight here.
>
>>> +
>>>    	/*
>>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
>> driver
>>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
>>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    			IXGBE_PSRTYPE_IPV6HDR;
>>>    #endif
>>>
>>> +	/* Set RQPL for VF RSS according to max Rx queue */
>>> +	psrtype |= (valid_rxq_num >> 1) <<
>>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>>>
>>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {
Vladislav Zolotarov Dec. 25, 2014, 1:20 p.m. UTC | #5
On 12/25/14 04:43, Ouyang, Changchun wrote:
> Hi,
> Sorry miss some comments, so continue my response below,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, December 24, 2014 6:40 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>
>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>> RSS.
>>> The psrtype will determine how many queues the received packets will
>>> distribute to, and the value of psrtype should depends on both facet:
>>> max VF rxq number which has been negotiated with PF, and the number of
>> rxq specified in config on guest.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>> ++++++++++++++++++++++++++++++++++-----
>>>    2 files changed, 97 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>> *eth_dev)
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>> mac.num_rar_entries), 0);
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>> mac.num_rar_entries), 0);
>>>
>>> +	/*
>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>> +	 * 8 queues are available for each VF, it need refine to 4
>>> +	 * queues here due to this limitation, otherwise no queue
>>> +	 * will receive any packet even RSS is enabled.
>> According to Table 7-3 in the 82599 spec RSS is not available when port is
>> configured to have 8 queues per pool. This means that if u see this
>> configuration u may immediately disable RSS flow in your code.
>>
>>> +	 */
>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>> ETH_MQ_RX_VMDQ_RSS) {
>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>> ETH_32_POOLS;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>> +				dev_num_vf(eth_dev) * 4;
>> According to 82599 spec u can't do that since RSS is not allowed when port is
>> configured to have 8 function per-VF. Have u verified that this works? If yes,
>> then spec should be updated.
>>
>>> +		}
>>> +	}
>>> +
>>>    	/* set VMDq map to default PF pool */
>>>    	hw->mac.ops.set_vmdq(hw, 0,
>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> index f69abda..a7c17a4 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>> igb_rx_queue *rxq)
>>>    }
>>>
>>>    static int
>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>> +	struct ixgbe_hw *hw;
>>> +	uint32_t mrqc;
>>> +
>>> +	ixgbe_rss_configure(dev);
>>> +
>>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> +
>>> +	/* MRQC: enable VF RSS */
>>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>> +	case ETH_64_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>> +		break;
>>> +
>>> +	case ETH_32_POOLS:
>>> +	case ETH_16_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>> Again, this contradicts with the spec.
> Yes, the spec say the hw can't support vf rss at all, but experiment find that could be done.

The spec explicitly say that VF RSS *is* supported in particular in the 
table mentioned above.
What your code is doing is that in case of 16 VFs u setup a 32 pools 
configuration and use only 16 out of them.

> We can focus on discussing the implementation firstly.

>   
>>> +		break;
>>> +
>>> +	default:
>>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>>>    {
>>>    	struct ixgbe_hw *hw =
>>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
>> rte_eth_dev *dev)
>>>    			default: ixgbe_rss_disable(dev);
>>>    		}
>>>    	} else {
>>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>    		/*
>>>    		 * SRIOV active scheme
>>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>>>    		 */
>>> -		case ETH_64_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQEN);
>>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
>>> +		case ETH_MQ_RX_RSS:
>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>> +			ixgbe_config_vf_rss(dev);
>>>    			break;
>>>
>>> -		case ETH_32_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT4TCEN);
>>> -			break;
>>> +		default:
>>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
>> Sorry for nitpicking but have u considered taking this encapsulated "switch-
>> case" block into a separate function? This could make the code look a lot
>> nicer. ;)
>>
>>> +			case ETH_64_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQEN);
>>> +				break;
>>>
>>> -		case ETH_16_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT8TCEN);
>>> +			case ETH_32_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT4TCEN);
>>> +				break;
>>> +
>>> +			case ETH_16_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT8TCEN);
>>> +				break;
>>> +			default:
>>> +				PMD_INIT_LOG(ERR,
>>> +					"invalid pool number in IOV mode");
>>> +				break;
>>> +			}
>>>    			break;
>>> -		default:
>>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
>> mode");
>>>    		}
>>>    	}
>>>
>>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    	uint16_t buf_size;
>>>    	uint16_t i;
>>>    	int ret;
>>> +	uint16_t valid_rxq_num;
>>>
>>>    	PMD_INIT_FUNC_TRACE();
>>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>
>>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
>>> +hw->mac.max_rx_queues);
>>> +
>>> +	/*
>>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
>>> +	 * and give user a hint that some packets may loss if it doesn't
>>> +	 * poll the queue where those packets are distributed to.
>>> +	 */
>>> +	if (valid_rxq_num == 3)
>>> +		valid_rxq_num = 4;
>> Why to configure more queues that requested and not less (2)? Why to
>> configure anything at all and not return an error?
> Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7, 8, ..., 16, 2014, 2015,... etc.
> By considering 2 or 4,
> I prefer 4, the reason is if user need more than 3 queues per vf to do something,
> And pf has also the capability to setup 4 queues per vf, confining to 2 queues is also not good thing,
> So here try to enable 4 queues, and give user hints here.
> Btw, change it into 2 is another way, depends on other guys' more insight here.

Like I said before, trying to guess what user wants is a way to making a 
code that is very hard to use and to maintain. Pls., just return an 
error and let the user code deal with it the way he/she really wants and 
not the way u *think* he/she wants.

>
>>> +
>>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
>>> +			"it should be equal to or less than %d",
>>> +			valid_rxq_num);
>>> +		return -1;
>>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
>>> +			"than the number of available Rx queues:%d, "
>>> +			"packets in Rx queues(q_id >= %d) may loss.",
>>> +			valid_rxq_num, dev->data->nb_rx_queues);
>> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
>> look so by allowing this call to succeed. And then some packets will just
>> silently not arrive?! And what the used should somehow guess to do?
>> - Look in the "INIT_LOG"?! This is a nightmare!
>>
>>> +
>>>    	/*
>>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
>> driver
>>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
>>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    			IXGBE_PSRTYPE_IPV6HDR;
>>>    #endif
>>>
>>> +	/* Set RQPL for VF RSS according to max Rx queue */
>>> +	psrtype |= (valid_rxq_num >> 1) <<
>>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>>>
>>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {
Vladislav Zolotarov Dec. 25, 2014, 1:38 p.m. UTC | #6
On 12/25/14 04:43, Ouyang, Changchun wrote:
> Hi,
> Sorry miss some comments, so continue my response below,
>
>> -----Original Message-----
>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Wednesday, December 24, 2014 6:40 PM
>> To: Ouyang, Changchun; dev@dpdk.org
>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>
>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
>> RSS.
>>> The psrtype will determine how many queues the received packets will
>>> distribute to, and the value of psrtype should depends on both facet:
>>> max VF rxq number which has been negotiated with PF, and the number of
>> rxq specified in config on guest.
>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>> ---
>>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>> ++++++++++++++++++++++++++++++++++-----
>>>    2 files changed, 97 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>> *eth_dev)
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>> mac.num_rar_entries), 0);
>>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>> mac.num_rar_entries), 0);
>>>
>>> +	/*
>>> +	 * VF RSS can support at most 4 queues for each VF, even if
>>> +	 * 8 queues are available for each VF, it need refine to 4
>>> +	 * queues here due to this limitation, otherwise no queue
>>> +	 * will receive any packet even RSS is enabled.
>> According to Table 7-3 in the 82599 spec RSS is not available when port is
>> configured to have 8 queues per pool. This means that if u see this
>> configuration u may immediately disable RSS flow in your code.
>>
>>> +	 */
>>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>> ETH_MQ_RX_VMDQ_RSS) {
>>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
>> ETH_32_POOLS;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>> +				dev_num_vf(eth_dev) * 4;
>> According to 82599 spec u can't do that since RSS is not allowed when port is
>> configured to have 8 function per-VF. Have u verified that this works? If yes,
>> then spec should be updated.
>>
>>> +		}
>>> +	}
>>> +
>>>    	/* set VMDq map to default PF pool */
>>>    	hw->mac.ops.set_vmdq(hw, 0,
>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>
>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> index f69abda..a7c17a4 100644
>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>> igb_rx_queue *rxq)
>>>    }
>>>
>>>    static int
>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>> +	struct ixgbe_hw *hw;
>>> +	uint32_t mrqc;
>>> +
>>> +	ixgbe_rss_configure(dev);
>>> +
>>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>> +
>>> +	/* MRQC: enable VF RSS */
>>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>> +	case ETH_64_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>> +		break;
>>> +
>>> +	case ETH_32_POOLS:
>>> +	case ETH_16_POOLS:
>>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>> Again, this contradicts with the spec.
> Yes, the spec say the hw can't support vf rss at all, but experiment find that could be done.

I have just realized something - why did u have to experiment at all? U 
work at Intel, don't u? Can't u just ask a HW engineer that have 
designed this NIC? What do u mean by an "experiment" here?
 From my experience u can't just write some random values in the 
registers and conclude that if it worked for like 5 minutes it will 
continue to work for the next minute... There is always a clear 
procedure of how HW should be initialized and used and that's the only 
way it may be used since this was the way the HW has been tested. U 
can't assume anything in regards to reliability if u don't follow specs 
and programmer manuals of HW provider.

Could u clarify, please?

> We can focus on discussing the implementation firstly.
>   
>>> +		break;
>>> +
>>> +	default:
>>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int
>>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>>>    {
>>>    	struct ixgbe_hw *hw =
>>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
>> rte_eth_dev *dev)
>>>    			default: ixgbe_rss_disable(dev);
>>>    		}
>>>    	} else {
>>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>    		/*
>>>    		 * SRIOV active scheme
>>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>>>    		 */
>>> -		case ETH_64_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQEN);
>>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
>>> +		case ETH_MQ_RX_RSS:
>>> +		case ETH_MQ_RX_VMDQ_RSS:
>>> +			ixgbe_config_vf_rss(dev);
>>>    			break;
>>>
>>> -		case ETH_32_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT4TCEN);
>>> -			break;
>>> +		default:
>>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
>> Sorry for nitpicking but have u considered taking this encapsulated "switch-
>> case" block into a separate function? This could make the code look a lot
>> nicer. ;)
>>
>>> +			case ETH_64_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQEN);
>>> +				break;
>>>
>>> -		case ETH_16_POOLS:
>>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>> IXGBE_MRQC_VMDQRT8TCEN);
>>> +			case ETH_32_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT4TCEN);
>>> +				break;
>>> +
>>> +			case ETH_16_POOLS:
>>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
>>> +					IXGBE_MRQC_VMDQRT8TCEN);
>>> +				break;
>>> +			default:
>>> +				PMD_INIT_LOG(ERR,
>>> +					"invalid pool number in IOV mode");
>>> +				break;
>>> +			}
>>>    			break;
>>> -		default:
>>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
>> mode");
>>>    		}
>>>    	}
>>>
>>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    	uint16_t buf_size;
>>>    	uint16_t i;
>>>    	int ret;
>>> +	uint16_t valid_rxq_num;
>>>
>>>    	PMD_INIT_FUNC_TRACE();
>>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>
>>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
>>> +hw->mac.max_rx_queues);
>>> +
>>> +	/*
>>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
>>> +	 * and give user a hint that some packets may loss if it doesn't
>>> +	 * poll the queue where those packets are distributed to.
>>> +	 */
>>> +	if (valid_rxq_num == 3)
>>> +		valid_rxq_num = 4;
>> Why to configure more queues that requested and not less (2)? Why to
>> configure anything at all and not return an error?
> Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7, 8, ..., 16, 2014, 2015,... etc.
> By considering 2 or 4,
> I prefer 4, the reason is if user need more than 3 queues per vf to do something,
> And pf has also the capability to setup 4 queues per vf, confining to 2 queues is also not good thing,
> So here try to enable 4 queues, and give user hints here.
> Btw, change it into 2 is another way, depends on other guys' more insight here.
>
>>> +
>>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
>>> +			"it should be equal to or less than %d",
>>> +			valid_rxq_num);
>>> +		return -1;
>>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
>>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
>>> +			"than the number of available Rx queues:%d, "
>>> +			"packets in Rx queues(q_id >= %d) may loss.",
>>> +			valid_rxq_num, dev->data->nb_rx_queues);
>> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
>> look so by allowing this call to succeed. And then some packets will just
>> silently not arrive?! And what the used should somehow guess to do?
>> - Look in the "INIT_LOG"?! This is a nightmare!
>>
>>> +
>>>    	/*
>>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
>> driver
>>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
>>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>>>    			IXGBE_PSRTYPE_IPV6HDR;
>>>    #endif
>>>
>>> +	/* Set RQPL for VF RSS according to max Rx queue */
>>> +	psrtype |= (valid_rxq_num >> 1) <<
>>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
>>>
>>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {
Ouyang Changchun Dec. 26, 2014, 1:26 a.m. UTC | #7
> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, December 25, 2014 9:39 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/25/14 04:43, Ouyang, Changchun wrote:
> > Hi,
> > Sorry miss some comments, so continue my response below,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Wednesday, December 24, 2014 6:40 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >>
> >>
> >> On 12/24/14 07:23, Ouyang Changchun wrote:
> >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> VF
> >> RSS.
> >>> The psrtype will determine how many queues the received packets will
> >>> distribute to, and the value of psrtype should depends on both facet:
> >>> max VF rxq number which has been negotiated with PF, and the number
> >>> of
> >> rxq specified in config on guest.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> >> ++++++++++++++++++++++++++++++++++-----
> >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> >> *eth_dev)
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> mac.num_rar_entries), 0);
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> mac.num_rar_entries), 0);
> >>>
> >>> +	/*
> >>> +	 * VF RSS can support at most 4 queues for each VF, even if
> >>> +	 * 8 queues are available for each VF, it need refine to 4
> >>> +	 * queues here due to this limitation, otherwise no queue
> >>> +	 * will receive any packet even RSS is enabled.
> >> According to Table 7-3 in the 82599 spec RSS is not available when
> >> port is configured to have 8 queues per pool. This means that if u
> >> see this configuration u may immediately disable RSS flow in your code.
> >>
> >>> +	 */
> >>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> >> ETH_MQ_RX_VMDQ_RSS) {
> >>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> >> ETH_32_POOLS;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> >>> +				dev_num_vf(eth_dev) * 4;
> >> According to 82599 spec u can't do that since RSS is not allowed when
> >> port is configured to have 8 function per-VF. Have u verified that
> >> this works? If yes, then spec should be updated.
> >>
> >>> +		}
> >>> +	}
> >>> +
> >>>    	/* set VMDq map to default PF pool */
> >>>    	hw->mac.ops.set_vmdq(hw, 0,
> >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> index f69abda..a7c17a4 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> >> igb_rx_queue *rxq)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> >>> +	struct ixgbe_hw *hw;
> >>> +	uint32_t mrqc;
> >>> +
> >>> +	ixgbe_rss_configure(dev);
> >>> +
> >>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> +
> >>> +	/* MRQC: enable VF RSS */
> >>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> >>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> >>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>> +	case ETH_64_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> >>> +		break;
> >>> +
> >>> +	case ETH_32_POOLS:
> >>> +	case ETH_16_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> >> Again, this contradicts with the spec.
> > Yes, the spec say the hw can't support vf rss at all, but experiment find that
> could be done.
> 
> I have just realized something - why did u have to experiment at all? U work
> at Intel, don't u? Can't u just ask a HW engineer that have designed this NIC?
> What do u mean by an "experiment" here?

HW and dpdk team are cross-geo and cross division, experiment is often needed
Because no spec, doc, discussion, meeting minutes is perfect and can include everything absolutely correct,
And don't miss one thing, are you agree?
  
Anyway Let's focus on technical question, please. 
 
>  From my experience u can't just write some random values in the registers

I don't think it is random data at all, do you see I use a random seed to generate a random data here?

> and conclude that if it worked for like 5 minutes it will continue to work for
> the next minute... There is always a clear procedure of how HW should be
> initialized and used and that's the only way it may be used since this was the
> way the HW has been tested. U can't assume anything in regards to reliability
> if u don't follow specs and programmer manuals of HW provider.
>
Sorry I don't think it doesn't follow spec and programmer manuals.
and if you have a bit bigger picture here(just mean the overall process for the vf rss enabling, and nothing else,
Don't think too much, hope it is clear), I am sure you won't get confused here.
Or my observation is you probably have another perfect solution to support vf rss in dpdk,
Right? If you have it, you could also submit your patch here, this is open source, I think you are free to do it.

> Could u clarify, please?
> 
> > We can focus on discussing the implementation firstly.
> >
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >>>    {
> >>>    	struct ixgbe_hw *hw =
> >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> >> rte_eth_dev *dev)
> >>>    			default: ixgbe_rss_disable(dev);
> >>>    		}
> >>>    	} else {
> >>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>>    		/*
> >>>    		 * SRIOV active scheme
> >>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >>>    		 */
> >>> -		case ETH_64_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQEN);
> >>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> >>> +		case ETH_MQ_RX_RSS:
> >>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>> +			ixgbe_config_vf_rss(dev);
> >>>    			break;
> >>>
> >>> -		case ETH_32_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT4TCEN);
> >>> -			break;
> >>> +		default:
> >>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >> Sorry for nitpicking but have u considered taking this encapsulated
> >> "switch- case" block into a separate function? This could make the
> >> code look a lot nicer. ;)
> >>
> >>> +			case ETH_64_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQEN);
> >>> +				break;
> >>>
> >>> -		case ETH_16_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT8TCEN);
> >>> +			case ETH_32_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT4TCEN);
> >>> +				break;
> >>> +
> >>> +			case ETH_16_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT8TCEN);
> >>> +				break;
> >>> +			default:
> >>> +				PMD_INIT_LOG(ERR,
> >>> +					"invalid pool number in IOV mode");
> >>> +				break;
> >>> +			}
> >>>    			break;
> >>> -		default:
> >>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> >> mode");
> >>>    		}
> >>>    	}
> >>>
> >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> *dev)
> >>>    	uint16_t buf_size;
> >>>    	uint16_t i;
> >>>    	int ret;
> >>> +	uint16_t valid_rxq_num;
> >>>
> >>>    	PMD_INIT_FUNC_TRACE();
> >>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>
> >>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> >>> +hw->mac.max_rx_queues);
> >>> +
> >>> +	/*
> >>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> >>> +	 * and give user a hint that some packets may loss if it doesn't
> >>> +	 * poll the queue where those packets are distributed to.
> >>> +	 */
> >>> +	if (valid_rxq_num == 3)
> >>> +		valid_rxq_num = 4;
> >> Why to configure more queues that requested and not less (2)? Why to
> >> configure anything at all and not return an error?
> > Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7,
> 8, ..., 16, 2014, 2015,... etc.
> > By considering 2 or 4,
> > I prefer 4, the reason is if user need more than 3 queues per vf to do
> > something, And pf has also the capability to setup 4 queues per vf,
> > confining to 2 queues is also not good thing, So here try to enable 4 queues,
> and give user hints here.
> > Btw, change it into 2 is another way, depends on other guys' more insight
> here.
> >
> >>> +
> >>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> >>> +			"it should be equal to or less than %d",
> >>> +			valid_rxq_num);
> >>> +		return -1;
> >>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> >>> +			"than the number of available Rx queues:%d, "
> >>> +			"packets in Rx queues(q_id >= %d) may loss.",
> >>> +			valid_rxq_num, dev->data->nb_rx_queues);
> >> Who ever looks in the "INIT_LOG" if everything "work well" and u make
> >> it look so by allowing this call to succeed. And then some packets
> >> will just silently not arrive?! And what the used should somehow guess to
> do?
> >> - Look in the "INIT_LOG"?! This is a nightmare!
> >>
> >>> +
> >>>    	/*
> >>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> >> driver
> >>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
> >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >>>    			IXGBE_PSRTYPE_IPV6HDR;
> >>>    #endif
> >>>
> >>> +	/* Set RQPL for VF RSS according to max Rx queue */
> >>> +	psrtype |= (valid_rxq_num >> 1) <<
> >>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >>>
> >>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {
Ouyang Changchun Dec. 26, 2014, 1:52 a.m. UTC | #8
> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, December 25, 2014 9:20 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/25/14 04:43, Ouyang, Changchun wrote:
> > Hi,
> > Sorry miss some comments, so continue my response below,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Wednesday, December 24, 2014 6:40 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >>
> >>
> >> On 12/24/14 07:23, Ouyang Changchun wrote:
> >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> VF
> >> RSS.
> >>> The psrtype will determine how many queues the received packets will
> >>> distribute to, and the value of psrtype should depends on both facet:
> >>> max VF rxq number which has been negotiated with PF, and the number
> >>> of
> >> rxq specified in config on guest.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> >> ++++++++++++++++++++++++++++++++++-----
> >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> >> *eth_dev)
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> mac.num_rar_entries), 0);
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> mac.num_rar_entries), 0);
> >>>
> >>> +	/*
> >>> +	 * VF RSS can support at most 4 queues for each VF, even if
> >>> +	 * 8 queues are available for each VF, it need refine to 4
> >>> +	 * queues here due to this limitation, otherwise no queue
> >>> +	 * will receive any packet even RSS is enabled.
> >> According to Table 7-3 in the 82599 spec RSS is not available when
> >> port is configured to have 8 queues per pool. This means that if u
> >> see this configuration u may immediately disable RSS flow in your code.
> >>
> >>> +	 */
> >>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> >> ETH_MQ_RX_VMDQ_RSS) {
> >>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> >> ETH_32_POOLS;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> >>> +				dev_num_vf(eth_dev) * 4;
> >> According to 82599 spec u can't do that since RSS is not allowed when
> >> port is configured to have 8 function per-VF. Have u verified that
> >> this works? If yes, then spec should be updated.
> >>
> >>> +		}
> >>> +	}
> >>> +
> >>>    	/* set VMDq map to default PF pool */
> >>>    	hw->mac.ops.set_vmdq(hw, 0,
> >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> index f69abda..a7c17a4 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> >> igb_rx_queue *rxq)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> >>> +	struct ixgbe_hw *hw;
> >>> +	uint32_t mrqc;
> >>> +
> >>> +	ixgbe_rss_configure(dev);
> >>> +
> >>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> +
> >>> +	/* MRQC: enable VF RSS */
> >>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> >>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> >>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>> +	case ETH_64_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> >>> +		break;
> >>> +
> >>> +	case ETH_32_POOLS:
> >>> +	case ETH_16_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> >> Again, this contradicts with the spec.
> > Yes, the spec say the hw can't support vf rss at all, but experiment find that
> could be done.
> 
> The spec explicitly say that VF RSS *is* supported in particular in the table
> mentioned above.
But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1
> What your code is doing is that in case of 16 VFs u setup a 32 pools
> configuration and use only 16 out of them.
But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues
Are really available for one vf, you can refer to spec for the correctness here.
> 
> > We can focus on discussing the implementation firstly.
> 
> >
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >>>    {
> >>>    	struct ixgbe_hw *hw =
> >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> >> rte_eth_dev *dev)
> >>>    			default: ixgbe_rss_disable(dev);
> >>>    		}
> >>>    	} else {
> >>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>>    		/*
> >>>    		 * SRIOV active scheme
> >>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >>>    		 */
> >>> -		case ETH_64_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQEN);
> >>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> >>> +		case ETH_MQ_RX_RSS:
> >>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>> +			ixgbe_config_vf_rss(dev);
> >>>    			break;
> >>>
> >>> -		case ETH_32_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT4TCEN);
> >>> -			break;
> >>> +		default:
> >>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >> Sorry for nitpicking but have u considered taking this encapsulated
> >> "switch- case" block into a separate function? This could make the
> >> code look a lot nicer. ;)
> >>
> >>> +			case ETH_64_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQEN);
> >>> +				break;
> >>>
> >>> -		case ETH_16_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT8TCEN);
> >>> +			case ETH_32_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT4TCEN);
> >>> +				break;
> >>> +
> >>> +			case ETH_16_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT8TCEN);
> >>> +				break;
> >>> +			default:
> >>> +				PMD_INIT_LOG(ERR,
> >>> +					"invalid pool number in IOV mode");
> >>> +				break;
> >>> +			}
> >>>    			break;
> >>> -		default:
> >>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> >> mode");
> >>>    		}
> >>>    	}
> >>>
> >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> *dev)
> >>>    	uint16_t buf_size;
> >>>    	uint16_t i;
> >>>    	int ret;
> >>> +	uint16_t valid_rxq_num;
> >>>
> >>>    	PMD_INIT_FUNC_TRACE();
> >>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>
> >>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> >>> +hw->mac.max_rx_queues);
> >>> +
> >>> +	/*
> >>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> >>> +	 * and give user a hint that some packets may loss if it doesn't
> >>> +	 * poll the queue where those packets are distributed to.
> >>> +	 */
> >>> +	if (valid_rxq_num == 3)
> >>> +		valid_rxq_num = 4;
> >> Why to configure more queues that requested and not less (2)? Why to
> >> configure anything at all and not return an error?
> > Sorry, I don't agree this is "anything" you say, because I don't use 5,6,7,
> 8, ..., 16, 2014, 2015,... etc.
> > By considering 2 or 4,
> > I prefer 4, the reason is if user need more than 3 queues per vf to do
> > something, And pf has also the capability to setup 4 queues per vf,
> > confining to 2 queues is also not good thing, So here try to enable 4 queues,
> and give user hints here.
> > Btw, change it into 2 is another way, depends on other guys' more insight
> here.
> 
> Like I said before, trying to guess what user wants is a way to making a code
> that is very hard to use and to maintain. Pls., just return an error and let the
> user code deal with it the way he/she really wants and not the way u *think*
> he/she wants.
> 
I didn't disagree on this, either :-)
If you have strong reason for this way and more guys agree with it,
I will modify it probably in v4. 
> >
> >>> +
> >>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> >>> +			"it should be equal to or less than %d",
> >>> +			valid_rxq_num);
> >>> +		return -1;
> >>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> >>> +			"than the number of available Rx queues:%d, "
> >>> +			"packets in Rx queues(q_id >= %d) may loss.",
> >>> +			valid_rxq_num, dev->data->nb_rx_queues);
> >> Who ever looks in the "INIT_LOG" if everything "work well" and u make
> >> it look so by allowing this call to succeed. And then some packets
> >> will just silently not arrive?! And what the used should somehow guess to
> do?
> >> - Look in the "INIT_LOG"?! This is a nightmare!
> >>
> >>> +
> >>>    	/*
> >>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> >> driver
> >>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
> >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >>>    			IXGBE_PSRTYPE_IPV6HDR;
> >>>    #endif
> >>>
> >>> +	/* Set RQPL for VF RSS according to max Rx queue */
> >>> +	psrtype |= (valid_rxq_num >> 1) <<
> >>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >>>
> >>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {
Ouyang Changchun Dec. 26, 2014, 2:07 a.m. UTC | #9
> -----Original Message-----
> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Thursday, December 25, 2014 9:14 PM
> To: Ouyang, Changchun; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> On 12/25/14 04:14, Ouyang, Changchun wrote:
> > Hi,
> >
> >> -----Original Message-----
> >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> >> Sent: Wednesday, December 24, 2014 6:40 PM
> >> To: Ouyang, Changchun; dev@dpdk.org
> >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >>
> >>
> >> On 12/24/14 07:23, Ouyang Changchun wrote:
> >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> VF
> >> RSS.
> >>> The psrtype will determine how many queues the received packets will
> >>> distribute to, and the value of psrtype should depends on both facet:
> >>> max VF rxq number which has been negotiated with PF, and the number
> >>> of
> >> rxq specified in config on guest.
> >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> >>> ---
> >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> >> ++++++++++++++++++++++++++++++++++-----
> >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> >> *eth_dev)
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> mac.num_rar_entries), 0);
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> mac.num_rar_entries), 0);
> >>>
> >>> +	/*
> >>> +	 * VF RSS can support at most 4 queues for each VF, even if
> >>> +	 * 8 queues are available for each VF, it need refine to 4
> >>> +	 * queues here due to this limitation, otherwise no queue
> >>> +	 * will receive any packet even RSS is enabled.
> >> According to Table 7-3 in the 82599 spec RSS is not available when
> >> port is configured to have 8 queues per pool. This means that if u
> >> see this configuration u may immediately disable RSS flow in your code.
> >>
> > 8 queues here means the available number queue per vf, it is
> > calculated according to max vfs, e.g. if max vfs is 16(or less than),
> > then each vf 'COULD' have 8 queues evenly, pf early init stage estimate this
> value, but that is not precise, so need refine this.
> > User don't know this estimated value, it is internal value, not come from
> user's input/configure.
> > Hope it is clear to you.
> >>> +	 */
> >>> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> >> ETH_MQ_RX_VMDQ_RSS) {
> >>> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> >> ETH_32_POOLS;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> >>> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> >>> +				dev_num_vf(eth_dev) * 4;
> >> According to 82599 spec u can't do that since RSS is not allowed when
> >> port is configured to have 8 function per-VF. Have u verified that
> >> this works? If yes, then spec should be updated.
> >>
> > Response as above,
> > Of course I have validated this. It works well.
> >
> >>> +		}
> >>> +	}
> >>> +
> >>>    	/* set VMDq map to default PF pool */
> >>>    	hw->mac.ops.set_vmdq(hw, 0,
> >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >>>
> >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> index f69abda..a7c17a4 100644
> >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> >> igb_rx_queue *rxq)
> >>>    }
> >>>
> >>>    static int
> >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> >>> +	struct ixgbe_hw *hw;
> >>> +	uint32_t mrqc;
> >>> +
> >>> +	ixgbe_rss_configure(dev);
> >>> +
> >>> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>> +
> >>> +	/* MRQC: enable VF RSS */
> >>> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> >>> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> >>> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>> +	case ETH_64_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> >>> +		break;
> >>> +
> >>> +	case ETH_32_POOLS:
> >>> +	case ETH_16_POOLS:
> >>> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> >> Again, this contradicts with the spec.
> >>
> >>> +		break;
> >>> +
> >>> +	default:
> >>> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> >>> +		return -EINVAL;
> >>> +	}
> >>> +
> >>> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> >>> +
> >>> +	return 0;
> >>> +}
> >>> +
> >>> +static int
> >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> >>>    {
> >>>    	struct ixgbe_hw *hw =
> >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> >> rte_eth_dev *dev)
> >>>    			default: ixgbe_rss_disable(dev);
> >>>    		}
> >>>    	} else {
> >>> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >>>    		/*
> >>>    		 * SRIOV active scheme
> >>>    		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >>>    		 */
> >>> -		case ETH_64_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQEN);
> >>> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> >>> +		case ETH_MQ_RX_RSS:
> >>> +		case ETH_MQ_RX_VMDQ_RSS:
> >>> +			ixgbe_config_vf_rss(dev);
> >>>    			break;
> >>>
> >>> -		case ETH_32_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT4TCEN);
> >>> -			break;
> >>> +		default:
> >>> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >> Sorry for nitpicking but have u considered taking this encapsulated
> >> "switch- case" block into a separate function? This could make the
> >> code look a lot nicer. ;)
> > Only one place use it, so don't need make it a function, And I prefer
> > to the current code.
> 
> Functions may be used not only to have a repeatedly called code but also to
> make a caller code more readable. Encapsulated switch-case is one of the
> examples of a *not* readable code constructs which should be avoided.
> 
> >
> >>> +			case ETH_64_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQEN);
> >>> +				break;
> >>>
> >>> -		case ETH_16_POOLS:
> >>> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >> IXGBE_MRQC_VMDQRT8TCEN);
> >>> +			case ETH_32_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT4TCEN);
> >>> +				break;
> >>> +
> >>> +			case ETH_16_POOLS:
> >>> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> >>> +					IXGBE_MRQC_VMDQRT8TCEN);
> >>> +				break;
> >>> +			default:
> >>> +				PMD_INIT_LOG(ERR,
> >>> +					"invalid pool number in IOV mode");
> >>> +				break;
> >>> +			}
> >>>    			break;
> >>> -		default:
> >>> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> >> mode");
> >>>    		}
> >>>    	}
> >>>
> >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> *dev)
> >>>    	uint16_t buf_size;
> >>>    	uint16_t i;
> >>>    	int ret;
> >>> +	uint16_t valid_rxq_num;
> >>>
> >>>    	PMD_INIT_FUNC_TRACE();
> >>>    	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >>>
> >>> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> >>> +hw->mac.max_rx_queues);
> >>> +
> >>> +	/*
> >>> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> >>> +	 * and give user a hint that some packets may loss if it doesn't
> >>> +	 * poll the queue where those packets are distributed to.
> >>> +	 */
> >>> +	if (valid_rxq_num == 3)
> >>> +		valid_rxq_num = 4;
> >> Why to configure more queues that requested and not less (2)? Why to
> >> configure anything at all and not return an error?
> >>
> >>> +
> >>> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> >>> +			"it should be equal to or less than %d",
> >>> +			valid_rxq_num);
> >>> +		return -1;
> >>> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> >>> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> >>> +			"than the number of available Rx queues:%d, "
> >>> +			"packets in Rx queues(q_id >= %d) may loss.",
> >>> +			valid_rxq_num, dev->data->nb_rx_queues);
> >> Who ever looks in the "INIT_LOG" if everything "work well" and u make it
> >> look so by allowing this call to succeed. And then some packets will just
> >> silently not arrive?! And what the used should somehow guess to do?
> >> - Look in the "INIT_LOG"?! This is a nightmare!
> > Sorry, I don't think so again, if user find any packets loss, he will care for log,
> > Then he can find that log there, then user can refine its rxq number due
> the wrong rxq number,
> > Why is it a nightmare?
> 
> Because usually u expect that if the function call returns with a
> success it means a success. Why a user has to learn that a device
> configuration function was provided with wrong parameters from the
> packet loss? If parameters are not allowed u expect to get an error as a
> return value. Since when errors are returned in a form of a log message?
> Why do u think there is a living person running a DPDK based
> application? How do u expect somebody build an automated environment
> when part of errors are returned in some log? Should he/she add a log
> parser?
If it is automated environment, the log parsing should also be automated,
At least it should report to living person.  But this is another topic we could stop here. Let's focus on vf rss

> On the other hand, why do u think 4 queues is a better option for a user
> than 2 queue when he asked for 3 queues? What kind of heuristics is that?
> 
> To summarize - it would be much better if u just returned an EINVAL
> error in that case.

As I said, it seems you have strong reason to use 2 queues here, and return error
If user try to use 3 queues, I will consider it in v4, but still need insights from more guys here.
> 
> >
> > I don't agree with you about "silently not arrive", because we have hint/log
> there.
> >
> > Return error here is also possible way,
> 
> It's the only possible way! ;)
> 
> > Again need other guys' insight here.
> >
> >>> +
> >>>    	/*
> >>>    	 * When the VF driver issues a IXGBE_VF_RESET request, the PF
> >> driver
> >>>    	 * disables the VF receipt of packets if the PF MTU is > 1500.
> >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >>>    			IXGBE_PSRTYPE_IPV6HDR;
> >>>    #endif
> >>>
> >>> +	/* Set RQPL for VF RSS according to max Rx queue */
> >>> +	psrtype |= (valid_rxq_num >> 1) <<
> >>> +		IXGBE_PSRTYPE_RQPL_SHIFT;
> >>>    	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> >>>
> >>>    	if (dev->data->dev_conf.rxmode.enable_scatter) {
Vladislav Zolotarov Dec. 26, 2014, 6:49 a.m. UTC | #10
On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com>
wrote:
>
>
>
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > Sent: Thursday, December 25, 2014 9:20 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> >
> >
> > On 12/25/14 04:43, Ouyang, Changchun wrote:
> > > Hi,
> > > Sorry miss some comments, so continue my response below,
> > >
> > >> -----Original Message-----
> > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > >> Sent: Wednesday, December 24, 2014 6:40 PM
> > >> To: Ouyang, Changchun; dev@dpdk.org
> > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >>
> > >>
> > >> On 12/24/14 07:23, Ouyang Changchun wrote:
> > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> > VF
> > >> RSS.
> > >>> The psrtype will determine how many queues the received packets will
> > >>> distribute to, and the value of psrtype should depends on both
facet:
> > >>> max VF rxq number which has been negotiated with PF, and the number
> > >>> of
> > >> rxq specified in config on guest.
> > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > >>> ---
> > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > >> ++++++++++++++++++++++++++++++++++-----
> > >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> > >>>
> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > >> *eth_dev)
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> > mac.num_rar_entries), 0);
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> > mac.num_rar_entries), 0);
> > >>>
> > >>> + /*
> > >>> +  * VF RSS can support at most 4 queues for each VF, even if
> > >>> +  * 8 queues are available for each VF, it need refine to 4
> > >>> +  * queues here due to this limitation, otherwise no queue
> > >>> +  * will receive any packet even RSS is enabled.
> > >> According to Table 7-3 in the 82599 spec RSS is not available when
> > >> port is configured to have 8 queues per pool. This means that if u
> > >> see this configuration u may immediately disable RSS flow in your
code.
> > >>
> > >>> +  */
> > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > >> ETH_MQ_RX_VMDQ_RSS) {
> > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
> > >> ETH_32_POOLS;
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > >>> +                         dev_num_vf(eth_dev) * 4;
> > >> According to 82599 spec u can't do that since RSS is not allowed when
> > >> port is configured to have 8 function per-VF. Have u verified that
> > >> this works? If yes, then spec should be updated.
> > >>
> > >>> +         }
> > >>> + }
> > >>> +
> > >>>           /* set VMDq map to default PF pool */
> > >>>           hw->mac.ops.set_vmdq(hw, 0,
> > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> > >>>
> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> index f69abda..a7c17a4 100644
> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> > >> igb_rx_queue *rxq)
> > >>>    }
> > >>>
> > >>>    static int
> > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > >>> + struct ixgbe_hw *hw;
> > >>> + uint32_t mrqc;
> > >>> +
> > >>> + ixgbe_rss_configure(dev);
> > >>> +
> > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > >>> +
> > >>> + /* MRQC: enable VF RSS */
> > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > >>> + case ETH_64_POOLS:
> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > >>> +         break;
> > >>> +
> > >>> + case ETH_32_POOLS:
> > >>> + case ETH_16_POOLS:
> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > >> Again, this contradicts with the spec.
> > > Yes, the spec say the hw can't support vf rss at all, but experiment
find that
> > could be done.
> >
> > The spec explicitly say that VF RSS *is* supported in particular in the
table
> > mentioned above.
> But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode,
VMDq+RSS mode is not available"  in note of section 4.6.10.2.1

And still there is the whole section about configuring packet filtering
including Rx in the VF mode (including the table i've referred) . It's
quite confusing i must say...

> > What your code is doing is that in case of 16 VFs u setup a 32 pools
> > configuration and use only 16 out of them.
> But I don't see any big issue here, in this case, each vf COULD have 8
queues, like I said before, but this is estimation value, actually only 4
queues
> Are really available for one vf, you can refer to spec for the
correctness here.

No issues, i just wanted to clarify that it seems like you are doing it
quite according to the spec.

> >
> > > We can focus on discussing the implementation firstly.

Right. So, after we clarified that there is nothing u can do at the moment
about the rss query flow, there is  one more open issue here.
In general we need a way to know how many  queues from those that are
available may be configured as RSS. While the same issue is present with
the PF as well (it's 16 for 82599 but it may be a different number for a
different device) for VF it's more pronounced since it depends on the PF
configuration.

Don't u think it would be logical to add a specific filed for it in the
dev_info struct?

> >
> > >
> > >>> +         break;
> > >>> +
> > >>> + default:
> > >>> +         PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > >>> +         return -EINVAL;
> > >>> + }
> > >>> +
> > >>> + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > >>> +
> > >>> + return 0;
> > >>> +}
> > >>> +
> > >>> +static int
> > >>>    ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
> > >>>    {
> > >>>           struct ixgbe_hw *hw =
> > >>> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> > >> rte_eth_dev *dev)
> > >>>                           default: ixgbe_rss_disable(dev);
> > >>>                   }
> > >>>           } else {
> > >>> -         switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > >>>                   /*
> > >>>                    * SRIOV active scheme
> > >>>                    * FIXME if support DCB/RSS together with VMDq &
SRIOV
> > >>>                    */
> > >>> -         case ETH_64_POOLS:
> > >>> -                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >> IXGBE_MRQC_VMDQEN);
> > >>> +         switch (dev->data->dev_conf.rxmode.mq_mode) {
> > >>> +         case ETH_MQ_RX_RSS:
> > >>> +         case ETH_MQ_RX_VMDQ_RSS:
> > >>> +                 ixgbe_config_vf_rss(dev);
> > >>>                           break;
> > >>>
> > >>> -         case ETH_32_POOLS:
> > >>> -                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >> IXGBE_MRQC_VMDQRT4TCEN);
> > >>> -                 break;
> > >>> +         default:
> > >>> +                 switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > >> Sorry for nitpicking but have u considered taking this encapsulated
> > >> "switch- case" block into a separate function? This could make the
> > >> code look a lot nicer. ;)
> > >>
> > >>> +                 case ETH_64_POOLS:
> > >>> +                         IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >>> +                                 IXGBE_MRQC_VMDQEN);
> > >>> +                         break;
> > >>>
> > >>> -         case ETH_16_POOLS:
> > >>> -                 IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >> IXGBE_MRQC_VMDQRT8TCEN);
> > >>> +                 case ETH_32_POOLS:
> > >>> +                         IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >>> +                                 IXGBE_MRQC_VMDQRT4TCEN);
> > >>> +                         break;
> > >>> +
> > >>> +                 case ETH_16_POOLS:
> > >>> +                         IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > >>> +                                 IXGBE_MRQC_VMDQRT8TCEN);
> > >>> +                         break;
> > >>> +                 default:
> > >>> +                         PMD_INIT_LOG(ERR,
> > >>> +                                 "invalid pool number in IOV
mode");
> > >>> +                         break;
> > >>> +                 }
> > >>>                           break;
> > >>> -         default:
> > >>> -                 PMD_INIT_LOG(ERR, "invalid pool number in IOV
> > >> mode");
> > >>>                   }
> > >>>           }
> > >>>
> > >>> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev
> > *dev)
> > >>>           uint16_t buf_size;
> > >>>           uint16_t i;
> > >>>           int ret;
> > >>> + uint16_t valid_rxq_num;
> > >>>
> > >>>           PMD_INIT_FUNC_TRACE();
> > >>>           hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > >>>
> > >>> + valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues,
> > >>> +hw->mac.max_rx_queues);
> > >>> +
> > >>> + /*
> > >>> +  * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > >>> +  * and give user a hint that some packets may loss if it doesn't
> > >>> +  * poll the queue where those packets are distributed to.
> > >>> +  */
> > >>> + if (valid_rxq_num == 3)
> > >>> +         valid_rxq_num = 4;
> > >> Why to configure more queues that requested and not less (2)? Why to
> > >> configure anything at all and not return an error?
> > > Sorry, I don't agree this is "anything" you say, because I don't use
5,6,7,
> > 8, ..., 16, 2014, 2015,... etc.
> > > By considering 2 or 4,
> > > I prefer 4, the reason is if user need more than 3 queues per vf to do
> > > something, And pf has also the capability to setup 4 queues per vf,
> > > confining to 2 queues is also not good thing, So here try to enable 4
queues,
> > and give user hints here.
> > > Btw, change it into 2 is another way, depends on other guys' more
insight
> > here.
> >
> > Like I said before, trying to guess what user wants is a way to making
a code
> > that is very hard to use and to maintain. Pls., just return an error
and let the
> > user code deal with it the way he/she really wants and not the way u
*think*
> > he/she wants.
> >
> I didn't disagree on this, either :-)
> If you have strong reason for this way and more guys agree with it,
> I will modify it probably in v4.
> > >
> > >>> +
> > >>> + if (dev->data->nb_rx_queues > valid_rxq_num) {
> > >>> +         PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> > >>> +                 "it should be equal to or less than %d",
> > >>> +                 valid_rxq_num);
> > >>> +         return -1;
> > >>> + } else if (dev->data->nb_rx_queues < valid_rxq_num)
> > >>> +         PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> > >>> +                 "than the number of available Rx queues:%d, "
> > >>> +                 "packets in Rx queues(q_id >= %d) may loss.",
> > >>> +                 valid_rxq_num, dev->data->nb_rx_queues);
> > >> Who ever looks in the "INIT_LOG" if everything "work well" and u make
> > >> it look so by allowing this call to succeed. And then some packets
> > >> will just silently not arrive?! And what the used should somehow
guess to
> > do?
> > >> - Look in the "INIT_LOG"?! This is a nightmare!
> > >>
> > >>> +
> > >>>           /*
> > >>>            * When the VF driver issues a IXGBE_VF_RESET request,
the PF
> > >> driver
> > >>>            * disables the VF receipt of packets if the PF MTU is >
1500.
> > >>> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> > >>>                           IXGBE_PSRTYPE_IPV6HDR;
> > >>>    #endif
> > >>>
> > >>> + /* Set RQPL for VF RSS according to max Rx queue */
> > >>> + psrtype |= (valid_rxq_num >> 1) <<
> > >>> +         IXGBE_PSRTYPE_RQPL_SHIFT;
> > >>>           IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> > >>>
> > >>>           if (dev->data->dev_conf.rxmode.enable_scatter) {
>
Ouyang Changchun Dec. 26, 2014, 7:26 a.m. UTC | #11
Hi Vladislav,

From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]

Sent: Friday, December 26, 2014 2:49 PM
To: Ouyang, Changchun
Cc: dev@dpdk.org
Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS


On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com<mailto:changchun.ouyang@intel.com>> wrote:
>

>

>

> > -----Original Message-----

> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com<mailto:vladz@cloudius-systems.com>]

> > Sent: Thursday, December 25, 2014 9:20 PM

> > To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>

> > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS

> >

> >

> > On 12/25/14 04:43, Ouyang, Changchun wrote:

> > > Hi,

> > > Sorry miss some comments, so continue my response below,

> > >

> > >> -----Original Message-----

> > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com<mailto:vladz@cloudius-systems.com>]

> > >> Sent: Wednesday, December 24, 2014 6:40 PM

> > >> To: Ouyang, Changchun; dev@dpdk.org<mailto:dev@dpdk.org>

> > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS

> > >>

> > >>

> > >> On 12/24/14 07:23, Ouyang Changchun wrote:

> > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable

> > VF

> > >> RSS.

> > >>> The psrtype will determine how many queues the received packets will

> > >>> distribute to, and the value of psrtype should depends on both facet:

> > >>> max VF rxq number which has been negotiated with PF, and the number

> > >>> of

> > >> rxq specified in config on guest.

> > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com<mailto:changchun.ouyang@intel.com>>

> > >>> ---

> > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++

> > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92

> > >> ++++++++++++++++++++++++++++++++++-----

> > >>>    2 files changed, 97 insertions(+), 10 deletions(-)

> > >>>

> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c

> > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644

> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c

> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c

> > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev

> > >> *eth_dev)

> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-

> > mac.num_rar_entries), 0);

> > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-

> > mac.num_rar_entries), 0);

> > >>>

> > >>> + /*

> > >>> +  * VF RSS can support at most 4 queues for each VF, even if

> > >>> +  * 8 queues are available for each VF, it need refine to 4

> > >>> +  * queues here due to this limitation, otherwise no queue

> > >>> +  * will receive any packet even RSS is enabled.

> > >> According to Table 7-3 in the 82599 spec RSS is not available when

> > >> port is configured to have 8 queues per pool. This means that if u

> > >> see this configuration u may immediately disable RSS flow in your code.

> > >>

> > >>> +  */

> > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==

> > >> ETH_MQ_RX_VMDQ_RSS) {

> > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {

> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =

> > >> ETH_32_POOLS;

> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;

> > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =

> > >>> +                         dev_num_vf(eth_dev) * 4;

> > >> According to 82599 spec u can't do that since RSS is not allowed when

> > >> port is configured to have 8 function per-VF. Have u verified that

> > >> this works? If yes, then spec should be updated.

> > >>

> > >>> +         }

> > >>> + }

> > >>> +

> > >>>           /* set VMDq map to default PF pool */

> > >>>           hw->mac.ops.set_vmdq(hw, 0,

> > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);

> > >>>

> > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > >>> index f69abda..a7c17a4 100644

> > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct

> > >> igb_rx_queue *rxq)

> > >>>    }

> > >>>

> > >>>    static int

> > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {

> > >>> + struct ixgbe_hw *hw;

> > >>> + uint32_t mrqc;

> > >>> +

> > >>> + ixgbe_rss_configure(dev);

> > >>> +

> > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);

> > >>> +

> > >>> + /* MRQC: enable VF RSS */

> > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);

> > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;

> > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {

> > >>> + case ETH_64_POOLS:

> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;

> > >>> +         break;

> > >>> +

> > >>> + case ETH_32_POOLS:

> > >>> + case ETH_16_POOLS:

> > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;

> > >> Again, this contradicts with the spec.

> > > Yes, the spec say the hw can't support vf rss at all, but experiment find that

> > could be done.

> >

> > The spec explicitly say that VF RSS *is* supported in particular in the table

> > mentioned above.

> But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1


>And still there is the whole section about configuring packet filtering including Rx in the VF mode (including the table i've referred) . It's quite confusing i must say...


Changchun: do you mind tell me which table you are referring to, I will try to have a look and may share my thought if I can.

> > What your code is doing is that in case of 16 VFs u setup a 32 pools

> > configuration and use only 16 out of them.

> But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues

> Are really available for one vf, you can refer to spec for the correctness here.


>No issues, i just wanted to clarify that it seems like you are doing it quite according to the spec.


> >

> > > We can focus on discussing the implementation firstly.


>Right. So, after we clarified that there is nothing u can do at the moment about the rss query flow, there is  one more open issue here.

>In general we need a way to know how many  queues from those that are available may be configured as RSS. While the same issue is present with the PF as well (it's 16 for 82599 but it may be a different number for a different device) for VF it's more pronounced since it depends on the PF configuration.


>Don't u think it would be logical to add a specific filed for it in the dev_info struct?


Changchun: you are right, and we have already the max_rx_queues in dev_info,

while negotiating between pf and vf, the negotiated max rx queue number will be set into hw->mac.max_rx_queues,

And after that when you call ixgbe_dev_info_get, that value will be set into dev_info->max_rx_queues.

Then you could get the number of queue all packets will distribute to by getting dev_info->max_rx_queues.

Thanks

Changchun
Vladislav Zolotarov Dec. 26, 2014, 7:37 a.m. UTC | #12
On Dec 26, 2014 9:28 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com>
wrote:
>
> Hi Vladislav,
>
>
>
> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Friday, December 26, 2014 2:49 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>
>
>
>
> On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com>
wrote:
> >
> >
> >
> > > -----Original Message-----
> > > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > Sent: Thursday, December 25, 2014 9:20 PM
> > > To: Ouyang, Changchun; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >
> > >
> > > On 12/25/14 04:43, Ouyang, Changchun wrote:
> > > > Hi,
> > > > Sorry miss some comments, so continue my response below,
> > > >
> > > >> -----Original Message-----
> > > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > >> Sent: Wednesday, December 24, 2014 6:40 PM
> > > >> To: Ouyang, Changchun; dev@dpdk.org
> > > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > > >>
> > > >>
> > > >> On 12/24/14 07:23, Ouyang Changchun wrote:
> > > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
> > > VF
> > > >> RSS.
> > > >>> The psrtype will determine how many queues the received packets
will
> > > >>> distribute to, and the value of psrtype should depends on both
facet:
> > > >>> max VF rxq number which has been negotiated with PF, and the
number
> > > >>> of
> > > >> rxq specified in config on guest.
> > > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > > >>> ---
> > > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> > > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > > >> ++++++++++++++++++++++++++++++++++-----
> > > >>>    2 files changed, 97 insertions(+), 10 deletions(-)
> > > >>>
> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct
rte_eth_dev
> > > >> *eth_dev)
> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> > > mac.num_rar_entries), 0);
> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> > > mac.num_rar_entries), 0);
> > > >>>
> > > >>> + /*
> > > >>> +  * VF RSS can support at most 4 queues for each VF, even if
> > > >>> +  * 8 queues are available for each VF, it need refine to 4
> > > >>> +  * queues here due to this limitation, otherwise no queue
> > > >>> +  * will receive any packet even RSS is enabled.
> > > >> According to Table 7-3 in the 82599 spec RSS is not available when
> > > >> port is configured to have 8 queues per pool. This means that if u
> > > >> see this configuration u may immediately disable RSS flow in your
code.
> > > >>
> > > >>> +  */
> > > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > > >> ETH_MQ_RX_VMDQ_RSS) {
> > > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
> > > >> ETH_32_POOLS;
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > > >>> +                         dev_num_vf(eth_dev) * 4;
> > > >> According to 82599 spec u can't do that since RSS is not allowed
when
> > > >> port is configured to have 8 function per-VF. Have u verified that
> > > >> this works? If yes, then spec should be updated.
> > > >>
> > > >>> +         }
> > > >>> + }
> > > >>> +
> > > >>>           /* set VMDq map to default PF pool */
> > > >>>           hw->mac.ops.set_vmdq(hw, 0,
> > > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> > > >>>
> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> index f69abda..a7c17a4 100644
> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> > > >> igb_rx_queue *rxq)
> > > >>>    }
> > > >>>
> > > >>>    static int
> > > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > > >>> + struct ixgbe_hw *hw;
> > > >>> + uint32_t mrqc;
> > > >>> +
> > > >>> + ixgbe_rss_configure(dev);
> > > >>> +
> > > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > > >>> +
> > > >>> + /* MRQC: enable VF RSS */
> > > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > > >>> + case ETH_64_POOLS:
> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > > >>> +         break;
> > > >>> +
> > > >>> + case ETH_32_POOLS:
> > > >>> + case ETH_16_POOLS:
> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > > >> Again, this contradicts with the spec.
> > > > Yes, the spec say the hw can't support vf rss at all, but
experiment find that
> > > could be done.
> > >
> > > The spec explicitly say that VF RSS *is* supported in particular in
the table
> > > mentioned above.
> > But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode,
VMDq+RSS mode is not available"  in note of section 4.6.10.2.1
>
> >And still there is the whole section about configuring packet filtering
including Rx in the VF mode (including the table i've referred) . It's
quite confusing i must say...
>
> Changchun: do you mind tell me which table you are referring to, I will
try to have a look and may share my thought if I can.
>
> > > What your code is doing is that in case of 16 VFs u setup a 32 pools
> > > configuration and use only 16 out of them.
> > But I don't see any big issue here, in this case, each vf COULD have 8
queues, like I said before, but this is estimation value, actually only 4
queues
> > Are really available for one vf, you can refer to spec for the
correctness here.
>
> >No issues, i just wanted to clarify that it seems like you are doing it
quite according to the spec.
>
> > >
> > > > We can focus on discussing the implementation firstly.
>
> >Right. So, after we clarified that there is nothing u can do at the
moment about the rss query flow, there is  one more open issue here.
> >In general we need a way to know how many  queues from those that are
available may be configured as RSS. While the same issue is present with
the PF as well (it's 16 for 82599 but it may be a different number for a
different device) for VF it's more pronounced since it depends on the PF
configuration.
>
> >Don't u think it would be logical to add a specific filed for it in the
dev_info struct?
>
> Changchun: you are right, and we have already the max_rx_queues in
dev_info,
>
> while negotiating between pf and vf, the negotiated max rx queue number
will be set into hw->mac.max_rx_queues,
>
> And after that when you call ixgbe_dev_info_get, that value will be set
into dev_info->max_rx_queues.
>
> Then you could get the number of queue all packets will distribute to by
getting dev_info->max_rx_queues.

I'm afraid u've missed my point here. For instance, for a PF max_rx_queues
will be set to 128 while u may only configure 16 RSS queues. The similar
will happen for a VF in the 16 VF configuration: max_rx_queues will be set
to 8 while u may configure only 4 RSS queues.

This is why i suggested to add a separate info field... 😉

>
> Thanks
>
> Changchun
Ouyang Changchun Dec. 26, 2014, 8:45 a.m. UTC | #13
Hi Vladislav,

From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com] 

Sent: Friday, December 26, 2014 3:37 PM
To: Ouyang, Changchun
Cc: dev@dpdk.org
Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS


On Dec 26, 2014 9:28 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:
>

> Hi Vladislav,

>

>  

>

> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com] 

> Sent: Friday, December 26, 2014 2:49 PM

> To: Ouyang, Changchun

> Cc: dev@dpdk.org

> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS

>

>  

>

>

> On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:

> >

> >

> >

> > > -----Original Message-----

> > > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]

> > > Sent: Thursday, December 25, 2014 9:20 PM

> > > To: Ouyang, Changchun; dev@dpdk.org

> > > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS

> > >

> > >

> > > On 12/25/14 04:43, Ouyang, Changchun wrote:

> > > > Hi,

> > > > Sorry miss some comments, so continue my response below,

> > > >

> > > >> -----Original Message-----

> > > >> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]

> > > >> Sent: Wednesday, December 24, 2014 6:40 PM

> > > >> To: Ouyang, Changchun; dev@dpdk.org

> > > >> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS

> > > >>

> > > >>

> > > >> On 12/24/14 07:23, Ouyang Changchun wrote:

> > > >>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable

> > > VF

> > > >> RSS.

> > > >>> The psrtype will determine how many queues the received packets will

> > > >>> distribute to, and the value of psrtype should depends on both facet:

> > > >>> max VF rxq number which has been negotiated with PF, and the number

> > > >>> of

> > > >> rxq specified in config on guest.

> > > >>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>

> > > >>> ---

> > > >>>    lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++

> > > >>>    lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92

> > > >> ++++++++++++++++++++++++++++++++++-----

> > > >>>    2 files changed, 97 insertions(+), 10 deletions(-)

> > > >>>

> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c

> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644

> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c

> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c

> > > >>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev

> > > >> *eth_dev)

> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-

> > > mac.num_rar_entries), 0);

> > > >>>           IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-

> > > mac.num_rar_entries), 0);

> > > >>>

> > > >>> + /*

> > > >>> +  * VF RSS can support at most 4 queues for each VF, even if

> > > >>> +  * 8 queues are available for each VF, it need refine to 4

> > > >>> +  * queues here due to this limitation, otherwise no queue

> > > >>> +  * will receive any packet even RSS is enabled.

> > > >> According to Table 7-3 in the 82599 spec RSS is not available when

> > > >> port is configured to have 8 queues per pool. This means that if u

> > > >> see this configuration u may immediately disable RSS flow in your code.

> > > >>

> > > >>> +  */

> > > >>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==

> > > >> ETH_MQ_RX_VMDQ_RSS) {

> > > >>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {

> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =

> > > >> ETH_32_POOLS;

> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;

> > > >>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =

> > > >>> +                         dev_num_vf(eth_dev) * 4;

> > > >> According to 82599 spec u can't do that since RSS is not allowed when

> > > >> port is configured to have 8 function per-VF. Have u verified that

> > > >> this works? If yes, then spec should be updated.

> > > >>

> > > >>> +         }

> > > >>> + }

> > > >>> +

> > > >>>           /* set VMDq map to default PF pool */

> > > >>>           hw->mac.ops.set_vmdq(hw, 0,

> > > >>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);

> > > >>>

> > > >>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > > >>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > > >>> index f69abda..a7c17a4 100644

> > > >>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > > >>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c

> > > >>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct

> > > >> igb_rx_queue *rxq)

> > > >>>    }

> > > >>>

> > > >>>    static int

> > > >>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {

> > > >>> + struct ixgbe_hw *hw;

> > > >>> + uint32_t mrqc;

> > > >>> +

> > > >>> + ixgbe_rss_configure(dev);

> > > >>> +

> > > >>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);

> > > >>> +

> > > >>> + /* MRQC: enable VF RSS */

> > > >>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);

> > > >>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;

> > > >>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {

> > > >>> + case ETH_64_POOLS:

> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;

> > > >>> +         break;

> > > >>> +

> > > >>> + case ETH_32_POOLS:

> > > >>> + case ETH_16_POOLS:

> > > >>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;

> > > >> Again, this contradicts with the spec.

> > > > Yes, the spec say the hw can't support vf rss at all, but experiment find that

> > > could be done.

> > >

> > > The spec explicitly say that VF RSS *is* supported in particular in the table

> > > mentioned above.

> > But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1

>

> >And still there is the whole section about configuring packet filtering including Rx in the VF mode (including the table i've referred) . It's quite confusing i must say...

>

> Changchun: do you mind tell me which table you are referring to, I will try to have a look and may share my thought if I can.

>

> > > What your code is doing is that in case of 16 VFs u setup a 32 pools

> > > configuration and use only 16 out of them.

> > But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues

> > Are really available for one vf, you can refer to spec for the correctness here.

>

> >No issues, i just wanted to clarify that it seems like you are doing it quite according to the spec.

>

> > >

> > > > We can focus on discussing the implementation firstly.

>

> >Right. So, after we clarified that there is nothing u can do at the moment about the rss query flow, there is  one more open issue here. 

> >In general we need a way to know how many  queues from those that are available may be configured as RSS. While the same issue is present with the PF as well (it's 16 for 82599 but it may be a different number for a different device) for VF it's more pronounced since it depends on the PF configuration.

>

> >Don't u think it would be logical to add a specific filed for it in the dev_info struct?

>

> Changchun: you are right, and we have already the max_rx_queues in dev_info,

>

> while negotiating between pf and vf, the negotiated max rx queue number will be set into hw->mac.max_rx_queues,

>

> And after that when you call ixgbe_dev_info_get, that value will be set into dev_info->max_rx_queues.

>

> Then you could get the number of queue all packets will distribute to by getting dev_info->max_rx_queues.

>I'm afraid u've missed my point here. For instance, for a PF max_rx_queues will be set to 128 while u may only configure 16 RSS queues. The similar will happen for a VF in the 16 VF

>configuration: max_rx_queues will be set to 8 while u may configure only 4 RSS queues. 

>This is why i suggested to add a separate info field... 😉 

Yes, I got your point this time, but the issue is that when I have 16 vf, and try to set max_rx_queues as 8, then no queue can rx any packet on vf,
This is why I have to add a logic to refine the rx queue number from 8 to 4 queues.
I have tried to do it in the way as you suggest, but unfortunately rx queue can't work.  If you find any other good method, pls let me know.
Thanks and regards,
Changchun
Vladislav Zolotarov Dec. 28, 2014, 10:14 a.m. UTC | #14
On 12/26/14 10:45, Ouyang, Changchun wrote:
> Hi Vladislav,
>
> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
> Sent: Friday, December 26, 2014 3:37 PM
> To: Ouyang, Changchun
> Cc: dev@dpdk.org
> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>
>
> On Dec 26, 2014 9:28 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:
>> Hi Vladislav,
>>
>>   
>>
>> From: Vladislav Zolotarov [mailto:vladz@cloudius-systems.com]
>> Sent: Friday, December 26, 2014 2:49 PM
>> To: Ouyang, Changchun
>> Cc: dev@dpdk.org
>> Subject: RE: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>
>>   
>>
>>
>> On Dec 26, 2014 3:52 AM, "Ouyang, Changchun" <changchun.ouyang@intel.com> wrote:
>>>
>>>
>>>> -----Original Message-----
>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>> Sent: Thursday, December 25, 2014 9:20 PM
>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>>>
>>>>
>>>> On 12/25/14 04:43, Ouyang, Changchun wrote:
>>>>> Hi,
>>>>> Sorry miss some comments, so continue my response below,
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
>>>>>> Sent: Wednesday, December 24, 2014 6:40 PM
>>>>>> To: Ouyang, Changchun; dev@dpdk.org
>>>>>> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
>>>>>>
>>>>>>
>>>>>> On 12/24/14 07:23, Ouyang Changchun wrote:
>>>>>>> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable
>>>> VF
>>>>>> RSS.
>>>>>>> The psrtype will determine how many queues the received packets will
>>>>>>> distribute to, and the value of psrtype should depends on both facet:
>>>>>>> max VF rxq number which has been negotiated with PF, and the number
>>>>>>> of
>>>>>> rxq specified in config on guest.
>>>>>>> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
>>>>>>> ---
>>>>>>>      lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>>>>>>>      lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
>>>>>> ++++++++++++++++++++++++++++++++++-----
>>>>>>>      2 files changed, 97 insertions(+), 10 deletions(-)
>>>>>>>
>>>>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>>> b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
>>>>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
>>>>>>> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
>>>>>> *eth_dev)
>>>>>>>             IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
>>>> mac.num_rar_entries), 0);
>>>>>>>             IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
>>>> mac.num_rar_entries), 0);
>>>>>>> + /*
>>>>>>> +  * VF RSS can support at most 4 queues for each VF, even if
>>>>>>> +  * 8 queues are available for each VF, it need refine to 4
>>>>>>> +  * queues here due to this limitation, otherwise no queue
>>>>>>> +  * will receive any packet even RSS is enabled.
>>>>>> According to Table 7-3 in the 82599 spec RSS is not available when
>>>>>> port is configured to have 8 queues per pool. This means that if u
>>>>>> see this configuration u may immediately disable RSS flow in your code.
>>>>>>
>>>>>>> +  */
>>>>>>> + if (eth_dev->data->dev_conf.rxmode.mq_mode ==
>>>>>> ETH_MQ_RX_VMDQ_RSS) {
>>>>>>> +         if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
>>>>>>> +                 RTE_ETH_DEV_SRIOV(eth_dev).active =
>>>>>> ETH_32_POOLS;
>>>>>>> +                 RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
>>>>>>> +                 RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
>>>>>>> +                         dev_num_vf(eth_dev) * 4;
>>>>>> According to 82599 spec u can't do that since RSS is not allowed when
>>>>>> port is configured to have 8 function per-VF. Have u verified that
>>>>>> this works? If yes, then spec should be updated.
>>>>>>
>>>>>>> +         }
>>>>>>> + }
>>>>>>> +
>>>>>>>             /* set VMDq map to default PF pool */
>>>>>>>             hw->mac.ops.set_vmdq(hw, 0,
>>>>>>> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
>>>>>>>
>>>>>>> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> index f69abda..a7c17a4 100644
>>>>>>> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
>>>>>>> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
>>>>>> igb_rx_queue *rxq)
>>>>>>>      }
>>>>>>>
>>>>>>>      static int
>>>>>>> +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
>>>>>>> + struct ixgbe_hw *hw;
>>>>>>> + uint32_t mrqc;
>>>>>>> +
>>>>>>> + ixgbe_rss_configure(dev);
>>>>>>> +
>>>>>>> + hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
>>>>>>> +
>>>>>>> + /* MRQC: enable VF RSS */
>>>>>>> + mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
>>>>>>> + mrqc &= ~IXGBE_MRQC_MRQE_MASK;
>>>>>>> + switch (RTE_ETH_DEV_SRIOV(dev).active) {
>>>>>>> + case ETH_64_POOLS:
>>>>>>> +         mrqc |= IXGBE_MRQC_VMDQRSS64EN;
>>>>>>> +         break;
>>>>>>> +
>>>>>>> + case ETH_32_POOLS:
>>>>>>> + case ETH_16_POOLS:
>>>>>>> +         mrqc |= IXGBE_MRQC_VMDQRSS32EN;
>>>>>> Again, this contradicts with the spec.
>>>>> Yes, the spec say the hw can't support vf rss at all, but experiment find that
>>>> could be done.
>>>>
>>>> The spec explicitly say that VF RSS *is* supported in particular in the table
>>>> mentioned above.
>>> But the spec(January 2014 revision 2.9) on my hand says: "in IOV mode, VMDq+RSS mode is not available"  in note of section 4.6.10.2.1
>>> And still there is the whole section about configuring packet filtering including Rx in the VF mode (including the table i've referred) . It's quite confusing i must say...
>> Changchun: do you mind tell me which table you are referring to, I will try to have a look and may share my thought if I can.
>>
>>>> What your code is doing is that in case of 16 VFs u setup a 32 pools
>>>> configuration and use only 16 out of them.
>>> But I don't see any big issue here, in this case, each vf COULD have 8 queues, like I said before, but this is estimation value, actually only 4 queues
>>> Are really available for one vf, you can refer to spec for the correctness here.
>>> No issues, i just wanted to clarify that it seems like you are doing it quite according to the spec.
>>>>> We can focus on discussing the implementation firstly.
>>> Right. So, after we clarified that there is nothing u can do at the moment about the rss query flow, there is  one more open issue here.
>>> In general we need a way to know how many  queues from those that are available may be configured as RSS. While the same issue is present with the PF as well (it's 16 for 82599 but it may be a different number for a different device) for VF it's more pronounced since it depends on the PF configuration.
>>> Don't u think it would be logical to add a specific filed for it in the dev_info struct?
>> Changchun: you are right, and we have already the max_rx_queues in dev_info,
>>
>> while negotiating between pf and vf, the negotiated max rx queue number will be set into hw->mac.max_rx_queues,
>>
>> And after that when you call ixgbe_dev_info_get, that value will be set into dev_info->max_rx_queues.
>>
>> Then you could get the number of queue all packets will distribute to by getting dev_info->max_rx_queues.
>> I'm afraid u've missed my point here. For instance, for a PF max_rx_queues will be set to 128 while u may only configure 16 RSS queues. The similar will happen for a VF in the 16 VF
>> configuration: max_rx_queues will be set to 8 while u may configure only 4 RSS queues.
>> This is why i suggested to add a separate info field... 😉
> Yes, I got your point this time, but the issue is that when I have 16 vf, and try to set max_rx_queues as 8, then no queue can rx any packet on vf,
> This is why I have to add a logic to refine the rx queue number from 8 to 4 queues.
> I have tried to do it in the way as you suggest, but unfortunately rx queue can't work.  If you find any other good method, pls let me know.

Pls., note that RSS is not the only multi-queue mode supported by both 
HW and DPDK - there is a DCB mode. This mode is also supported in the VF 
mode according to the same Table 7-3. And, according to the same table, 
there is a 8 TC per 16 pools mode. Therefore if a user desires to 
utilize all 8 available Rx queues of VF he/she could - in a DCB Rx mode.

Now looking at your code a bit mode deeply I see that u cut the number 
of Rx queues per pool down to 4 in a PF configuration when VMDQ_RSS mode 
is requested, which is ok but it still leaves the general issue open. 
Let's describe it in details for PF and VF separately:

For a PF:

  * When a user queries the PF he only gets the maximum number of Rx
    queues and he has no way to know what is a maximum set of RSS/DCB
    queues he/she may configure. E.g. for 82599 PF the maximum Rx queues
    number is 128 and the maximum RSS set size is 16 (see table 7-1 in
    the spec for all set of supported modes).
  * Therefore the user can't write a generic vendor independent code
    that will be able to configure RSS for a PF based on the current
    rte_eth_dev_info_get() output.

For a VF:

  * Similarly to PF above, if VF supports both RSS and DCB
    configurations, having the max_rx_queues is not enough since the
    maximum RSS set may be smaller from that number. "Luckily", 82599
    supports only either RSS or DCB VF configuration at the same time
    and this is configured globally during PF configuration but who said
    that later Intel's NICs or other provider's NICs supported by DCB
    are going to have the same limitation? So, in a general case, we
    find ourselves in the same uncertainty in a VF case like in a PF
    case above.
  * Currently, VF have no tools to know what is a PF multi-queue
    configuration (Table 7-3): is it RSS or DCB. Your patch-set sort of
    assumes that PF is accessible at the same level where VF DPDK code
    runs but this is not the case on the environments when SRIOV was
    originally targeting to - the virtualization environment, where the
    Guest code has no access whatsoever to the PF and may only query VF.
    AWS is one real-life example. So, when a DPDK code has to initialize
    an SRIOV VF in a Guest OS it lacks the information about both the
    available Rx MQ mode and it's capabilities (u may say we
    max_rx_queues for a capabilities but see the bullet above).


What I suggest to address all issues above is to add the following 
fields to the rte_eth_dev_info:

 1. rte_eth_rx_mq_mode rx_mode - for supported Rx MQ modes.
 2. uint16_t max_rss_queues - for the maximum RSS set size.
 3. uint16_t max_dcb_queues - for the maximum number of TCs.

These 3 new fields will clearly describe the Rx MQ capabilities of the 
function. The further correctness checking, like the specific RSS queues 
number configuration for VF, should be implemented as a error code 
returned from the rte_eth_dev_configure().


Pls., comment.
Vlad


> Thanks and regards,
> Changchun
>
Cunming Liang Jan. 4, 2015, 2:10 a.m. UTC | #15
> -----Original Message-----
> From: Ouyang, Changchun
> Sent: Wednesday, December 24, 2014 1:23 PM
> To: dev@dpdk.org
> Cc: Liang, Cunming; Cao, Waterman; Ouyang, Changchun
> Subject: [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF RSS.
> 
> The psrtype will determine how many queues the received packets will distribute
> to,
> and the value of psrtype should depends on both facet: max VF rxq number
> which
> has been negotiated with PF, and the number of rxq specified in config on guest.
> 
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> ---
>  lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
>  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> ++++++++++++++++++++++++++++++++++-----
>  2 files changed, 97 insertions(+), 10 deletions(-)
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> index cbb0145..9c9dad8 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> *eth_dev)
>  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries),
> 0);
>  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries),
> 0);
> 
> +	/*
> +	 * VF RSS can support at most 4 queues for each VF, even if
> +	 * 8 queues are available for each VF, it need refine to 4
> +	 * queues here due to this limitation, otherwise no queue
> +	 * will receive any packet even RSS is enabled.
> +	 */
> +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> ETH_MQ_RX_VMDQ_RSS) {
> +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> +			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
> +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> +				dev_num_vf(eth_dev) * 4;
> +		}
> +	}
> +
>  	/* set VMDq map to default PF pool */
>  	hw->mac.ops.set_vmdq(hw, 0,
> RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> 
> diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> index f69abda..a7c17a4 100644
> --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue
> *rxq)
>  }
> 
>  static int
> +ixgbe_config_vf_rss(struct rte_eth_dev *dev)
> +{
> +	struct ixgbe_hw *hw;
> +	uint32_t mrqc;
> +
> +	ixgbe_rss_configure(dev);
> +
> +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> +
> +	/* MRQC: enable VF RSS */
> +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> +	case ETH_64_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> +		break;
> +
> +	case ETH_32_POOLS:
> +	case ETH_16_POOLS:
> +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> +		break;
> +
> +	default:
> +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> +		return -EINVAL;
> +	}
> +
> +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> +
> +	return 0;
> +}
> +
> +static int
>  ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
>  {
>  	struct ixgbe_hw *hw =
> @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct rte_eth_dev
> *dev)
>  			default: ixgbe_rss_disable(dev);
>  		}
>  	} else {
> -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
>  		/*
>  		 * SRIOV active scheme
>  		 * FIXME if support DCB/RSS together with VMDq & SRIOV
>  		 */
> -		case ETH_64_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQEN);
> +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> +		case ETH_MQ_RX_RSS:
> +		case ETH_MQ_RX_VMDQ_RSS:
> +			ixgbe_config_vf_rss(dev);
>  			break;
> 
> -		case ETH_32_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT4TCEN);
> -			break;
> +		default:
> +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
[Liang, Cunming]  Just a minor comments. To avoid a switch branch inside another switch, we can have a  ixgbe_config_vf_default(),
which process all the things if no RSS/DCB required in multi-queue setting.
Then we can put all the 'switch(SRIOV(dev).active){...}'  in it.
> +			case ETH_64_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQEN);
> +				break;
> 
> -		case ETH_16_POOLS:
> -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> IXGBE_MRQC_VMDQRT8TCEN);
> +			case ETH_32_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT4TCEN);
> +				break;
> +
> +			case ETH_16_POOLS:
> +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> +					IXGBE_MRQC_VMDQRT8TCEN);
> +				break;
> +			default:
> +				PMD_INIT_LOG(ERR,
> +					"invalid pool number in IOV mode");
> +				break;
> +			}
>  			break;
> -		default:
> -			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
>  		}
>  	}
> 
> @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  	uint16_t buf_size;
>  	uint16_t i;
>  	int ret;
> +	uint16_t valid_rxq_num;
> 
>  	PMD_INIT_FUNC_TRACE();
>  	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> 
> +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw-
> >mac.max_rx_queues);
> +
> +	/*
> +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> +	 * and give user a hint that some packets may loss if it doesn't
> +	 * poll the queue where those packets are distributed to.
> +	 */
> +	if (valid_rxq_num == 3)
[Liang, Cunming] According to the inline comment, it makes more sense to use 'if (valid_rxq_num >= 3)'.
In case, the value returned by max_rx_queues is not less equal than 4. 
> +		valid_rxq_num = 4;
> +
> +	if (dev->data->nb_rx_queues > valid_rxq_num) {
> +		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
> +			"it should be equal to or less than %d",
> +			valid_rxq_num);
> +		return -1;
> +	} else if (dev->data->nb_rx_queues < valid_rxq_num)
> +		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
> +			"than the number of available Rx queues:%d, "
> +			"packets in Rx queues(q_id >= %d) may loss.",
> +			valid_rxq_num, dev->data->nb_rx_queues);
> +
>  	/*
>  	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
>  	 * disables the VF receipt of packets if the PF MTU is > 1500.
> @@ -4094,6 +4163,9 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
>  			IXGBE_PSRTYPE_IPV6HDR;
>  #endif
> 
> +	/* Set RQPL for VF RSS according to max Rx queue */
> +	psrtype |= (valid_rxq_num >> 1) <<
> +		IXGBE_PSRTYPE_RQPL_SHIFT;
>  	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
> 
>  	if (dev->data->dev_conf.rxmode.enable_scatter) {
> --
> 1.8.4.2
Ouyang Changchun Jan. 4, 2015, 6:25 a.m. UTC | #16
Hi Steve,

> -----Original Message-----
> From: Liang, Cunming
> Sent: Sunday, January 4, 2015 10:11 AM
> To: Ouyang, Changchun; dev@dpdk.org
> Cc: Cao, Waterman
> Subject: RE: [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> 
> 
> > -----Original Message-----
> > From: Ouyang, Changchun
> > Sent: Wednesday, December 24, 2014 1:23 PM
> > To: dev@dpdk.org
> > Cc: Liang, Cunming; Cao, Waterman; Ouyang, Changchun
> > Subject: [PATCH v3 5/6] ixgbe: Config VF RSS
> >
> > It needs config RSS and IXGBE_MRQC and IXGBE_VFPSRTYPE to enable VF
> RSS.
> >
> > The psrtype will determine how many queues the received packets will
> > distribute to, and the value of psrtype should depends on both facet:
> > max VF rxq number which has been negotiated with PF, and the number of
> > rxq specified in config on guest.
> >
> > Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> > ---
> >  lib/librte_pmd_ixgbe/ixgbe_pf.c   | 15 +++++++
> >  lib/librte_pmd_ixgbe/ixgbe_rxtx.c | 92
> > ++++++++++++++++++++++++++++++++++-----
> >  2 files changed, 97 insertions(+), 10 deletions(-)
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > b/lib/librte_pmd_ixgbe/ixgbe_pf.c index cbb0145..9c9dad8 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
> > @@ -187,6 +187,21 @@ int ixgbe_pf_host_configure(struct rte_eth_dev
> > *eth_dev)
> >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw-
> >mac.num_rar_entries),
> > 0);
> >  	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw-
> >mac.num_rar_entries),
> > 0);
> >
> > +	/*
> > +	 * VF RSS can support at most 4 queues for each VF, even if
> > +	 * 8 queues are available for each VF, it need refine to 4
> > +	 * queues here due to this limitation, otherwise no queue
> > +	 * will receive any packet even RSS is enabled.
> > +	 */
> > +	if (eth_dev->data->dev_conf.rxmode.mq_mode ==
> > ETH_MQ_RX_VMDQ_RSS) {
> > +		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
> > +			RTE_ETH_DEV_SRIOV(eth_dev).active =
> ETH_32_POOLS;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
> > +			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
> > +				dev_num_vf(eth_dev) * 4;
> > +		}
> > +	}
> > +
> >  	/* set VMDq map to default PF pool */
> >  	hw->mac.ops.set_vmdq(hw, 0,
> > RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
> >
> > diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > index f69abda..a7c17a4 100644
> > --- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > +++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
> > @@ -3327,6 +3327,39 @@ ixgbe_alloc_rx_queue_mbufs(struct
> igb_rx_queue
> > *rxq)
> >  }
> >
> >  static int
> > +ixgbe_config_vf_rss(struct rte_eth_dev *dev) {
> > +	struct ixgbe_hw *hw;
> > +	uint32_t mrqc;
> > +
> > +	ixgbe_rss_configure(dev);
> > +
> > +	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> > +
> > +	/* MRQC: enable VF RSS */
> > +	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
> > +	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
> > +	switch (RTE_ETH_DEV_SRIOV(dev).active) {
> > +	case ETH_64_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
> > +		break;
> > +
> > +	case ETH_32_POOLS:
> > +	case ETH_16_POOLS:
> > +		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
> > +		break;
> > +
> > +	default:
> > +		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
> > +		return -EINVAL;
> > +	}
> > +
> > +	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
> > +
> > +	return 0;
> > +}
> > +
> > +static int
> >  ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)  {
> >  	struct ixgbe_hw *hw =
> > @@ -3358,24 +3391,38 @@ ixgbe_dev_mq_rx_configure(struct
> rte_eth_dev
> > *dev)
> >  			default: ixgbe_rss_disable(dev);
> >  		}
> >  	} else {
> > -		switch (RTE_ETH_DEV_SRIOV(dev).active) {
> >  		/*
> >  		 * SRIOV active scheme
> >  		 * FIXME if support DCB/RSS together with VMDq & SRIOV
> >  		 */
> > -		case ETH_64_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQEN);
> > +		switch (dev->data->dev_conf.rxmode.mq_mode) {
> > +		case ETH_MQ_RX_RSS:
> > +		case ETH_MQ_RX_VMDQ_RSS:
> > +			ixgbe_config_vf_rss(dev);
> >  			break;
> >
> > -		case ETH_32_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQRT4TCEN);
> > -			break;
> > +		default:
> > +			switch (RTE_ETH_DEV_SRIOV(dev).active) {
> [Liang, Cunming]  Just a minor comments. To avoid a switch branch inside
> another switch, we can have a  ixgbe_config_vf_default(), which process all
> the things if no RSS/DCB required in multi-queue setting.
> Then we can put all the 'switch(SRIOV(dev).active){...}'  in it.

Yes, will resolve it in v4 patch.

> > +			case ETH_64_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQEN);
> > +				break;
> >
> > -		case ETH_16_POOLS:
> > -			IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > IXGBE_MRQC_VMDQRT8TCEN);
> > +			case ETH_32_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT4TCEN);
> > +				break;
> > +
> > +			case ETH_16_POOLS:
> > +				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
> > +					IXGBE_MRQC_VMDQRT8TCEN);
> > +				break;
> > +			default:
> > +				PMD_INIT_LOG(ERR,
> > +					"invalid pool number in IOV mode");
> > +				break;
> > +			}
> >  			break;
> > -		default:
> > -			PMD_INIT_LOG(ERR, "invalid pool number in IOV
> mode");
> >  		}
> >  	}
> >
> > @@ -3989,10 +4036,32 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
> >  	uint16_t buf_size;
> >  	uint16_t i;
> >  	int ret;
> > +	uint16_t valid_rxq_num;
> >
> >  	PMD_INIT_FUNC_TRACE();
> >  	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
> >
> > +	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw-
> > >mac.max_rx_queues);
> > +
> > +	/*
> > +	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
> > +	 * and give user a hint that some packets may loss if it doesn't
> > +	 * poll the queue where those packets are distributed to.
> > +	 */
> > +	if (valid_rxq_num == 3)
> [Liang, Cunming] According to the inline comment, it makes more sense to
> use 'if (valid_rxq_num >= 3)'.
> In case, the value returned by max_rx_queues is not less equal than 4.

This will be resolved in v4 patch.
Thanks
Changchun
Bruce Richardson Jan. 5, 2015, 10:29 a.m. UTC | #17
On Fri, Dec 26, 2014 at 01:52:25AM +0000, Ouyang, Changchun wrote:
> 
> 
> > -----Original Message-----
> > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > Sent: Thursday, December 25, 2014 9:20 PM
> > To: Ouyang, Changchun; dev@dpdk.org
> > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > 
 > 
> > Like I said before, trying to guess what user wants is a way to making a code
> > that is very hard to use and to maintain. Pls., just return an error and let the
> > user code deal with it the way he/she really wants and not the way u *think*
> > he/she wants.
> > 
> I didn't disagree on this, either :-)
> If you have strong reason for this way and more guys agree with it,
> I will modify it probably in v4. 
> > >
+1 on returning error.
Ouyang Changchun Jan. 6, 2015, 1 a.m. UTC | #18
> -----Original Message-----
> From: Richardson, Bruce
> Sent: Monday, January 5, 2015 6:29 PM
> To: Ouyang, Changchun
> Cc: Vlad Zolotarov; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> 
> On Fri, Dec 26, 2014 at 01:52:25AM +0000, Ouyang, Changchun wrote:
> >
> >
> > > -----Original Message-----
> > > From: Vlad Zolotarov [mailto:vladz@cloudius-systems.com]
> > > Sent: Thursday, December 25, 2014 9:20 PM
> > > To: Ouyang, Changchun; dev@dpdk.org
> > > Subject: Re: [dpdk-dev] [PATCH v3 5/6] ixgbe: Config VF RSS
> > >
>  >
> > > Like I said before, trying to guess what user wants is a way to
> > > making a code that is very hard to use and to maintain. Pls., just
> > > return an error and let the user code deal with it the way he/she
> > > really wants and not the way u *think* he/she wants.
> > >
> > I didn't disagree on this, either :-)
> > If you have strong reason for this way and more guys agree with it, I
> > will modify it probably in v4.
> > > >
> +1 on returning error.
It does in v4 patch.
diff mbox

Patch

diff --git a/lib/librte_pmd_ixgbe/ixgbe_pf.c b/lib/librte_pmd_ixgbe/ixgbe_pf.c
index cbb0145..9c9dad8 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_pf.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_pf.c
@@ -187,6 +187,21 @@  int ixgbe_pf_host_configure(struct rte_eth_dev *eth_dev)
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(hw->mac.num_rar_entries), 0);
 	IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(hw->mac.num_rar_entries), 0);
 
+	/*
+	 * VF RSS can support at most 4 queues for each VF, even if
+	 * 8 queues are available for each VF, it need refine to 4
+	 * queues here due to this limitation, otherwise no queue
+	 * will receive any packet even RSS is enabled.
+	 */
+	if (eth_dev->data->dev_conf.rxmode.mq_mode == ETH_MQ_RX_VMDQ_RSS) {
+		if (RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool == 8) {
+			RTE_ETH_DEV_SRIOV(eth_dev).active = ETH_32_POOLS;
+			RTE_ETH_DEV_SRIOV(eth_dev).nb_q_per_pool = 4;
+			RTE_ETH_DEV_SRIOV(eth_dev).def_pool_q_idx =
+				dev_num_vf(eth_dev) * 4;
+		}
+	}
+
 	/* set VMDq map to default PF pool */
 	hw->mac.ops.set_vmdq(hw, 0, RTE_ETH_DEV_SRIOV(eth_dev).def_vmdq_idx);
 
diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
index f69abda..a7c17a4 100644
--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c
@@ -3327,6 +3327,39 @@  ixgbe_alloc_rx_queue_mbufs(struct igb_rx_queue *rxq)
 }
 
 static int
+ixgbe_config_vf_rss(struct rte_eth_dev *dev)
+{
+	struct ixgbe_hw *hw;
+	uint32_t mrqc;
+
+	ixgbe_rss_configure(dev);
+
+	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
+
+	/* MRQC: enable VF RSS */
+	mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC);
+	mrqc &= ~IXGBE_MRQC_MRQE_MASK;
+	switch (RTE_ETH_DEV_SRIOV(dev).active) {
+	case ETH_64_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS64EN;
+		break;
+
+	case ETH_32_POOLS:
+	case ETH_16_POOLS:
+		mrqc |= IXGBE_MRQC_VMDQRSS32EN;
+		break;
+
+	default:
+		PMD_INIT_LOG(ERR, "Invalid pool number in IOV mode");
+		return -EINVAL;
+	}
+
+	IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc);
+
+	return 0;
+}
+
+static int
 ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 {
 	struct ixgbe_hw *hw =
@@ -3358,24 +3391,38 @@  ixgbe_dev_mq_rx_configure(struct rte_eth_dev *dev)
 			default: ixgbe_rss_disable(dev);
 		}
 	} else {
-		switch (RTE_ETH_DEV_SRIOV(dev).active) {
 		/*
 		 * SRIOV active scheme
 		 * FIXME if support DCB/RSS together with VMDq & SRIOV
 		 */
-		case ETH_64_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQEN);
+		switch (dev->data->dev_conf.rxmode.mq_mode) {
+		case ETH_MQ_RX_RSS:
+		case ETH_MQ_RX_VMDQ_RSS:
+			ixgbe_config_vf_rss(dev);
 			break;
 
-		case ETH_32_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT4TCEN);
-			break;
+		default:
+			switch (RTE_ETH_DEV_SRIOV(dev).active) {
+			case ETH_64_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQEN);
+				break;
 
-		case ETH_16_POOLS:
-			IXGBE_WRITE_REG(hw, IXGBE_MRQC, IXGBE_MRQC_VMDQRT8TCEN);
+			case ETH_32_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQRT4TCEN);
+				break;
+
+			case ETH_16_POOLS:
+				IXGBE_WRITE_REG(hw, IXGBE_MRQC,
+					IXGBE_MRQC_VMDQRT8TCEN);
+				break;
+			default:
+				PMD_INIT_LOG(ERR,
+					"invalid pool number in IOV mode");
+				break;
+			}
 			break;
-		default:
-			PMD_INIT_LOG(ERR, "invalid pool number in IOV mode");
 		}
 	}
 
@@ -3989,10 +4036,32 @@  ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 	uint16_t buf_size;
 	uint16_t i;
 	int ret;
+	uint16_t valid_rxq_num;
 
 	PMD_INIT_FUNC_TRACE();
 	hw = IXGBE_DEV_PRIVATE_TO_HW(dev->data->dev_private);
 
+	valid_rxq_num = RTE_MIN(dev->data->nb_rx_queues, hw->mac.max_rx_queues);
+
+	/*
+	 * VMDq RSS can't support 3 queues, so config it into 4 queues,
+	 * and give user a hint that some packets may loss if it doesn't
+	 * poll the queue where those packets are distributed to.
+	 */
+	if (valid_rxq_num == 3)
+		valid_rxq_num = 4;
+
+	if (dev->data->nb_rx_queues > valid_rxq_num) {
+		PMD_INIT_LOG(ERR, "The number of Rx queue invalid, "
+			"it should be equal to or less than %d",
+			valid_rxq_num);
+		return -1;
+	} else if (dev->data->nb_rx_queues < valid_rxq_num)
+		PMD_INIT_LOG(ERR, "The number of Rx queue is less "
+			"than the number of available Rx queues:%d, "
+			"packets in Rx queues(q_id >= %d) may loss.",
+			valid_rxq_num, dev->data->nb_rx_queues);
+
 	/*
 	 * When the VF driver issues a IXGBE_VF_RESET request, the PF driver
 	 * disables the VF receipt of packets if the PF MTU is > 1500.
@@ -4094,6 +4163,9 @@  ixgbevf_dev_rx_init(struct rte_eth_dev *dev)
 			IXGBE_PSRTYPE_IPV6HDR;
 #endif
 
+	/* Set RQPL for VF RSS according to max Rx queue */
+	psrtype |= (valid_rxq_num >> 1) <<
+		IXGBE_PSRTYPE_RQPL_SHIFT;
 	IXGBE_WRITE_REG(hw, IXGBE_VFPSRTYPE, psrtype);
 
 	if (dev->data->dev_conf.rxmode.enable_scatter) {