net/ixgbe: reduce PF mailbox interrupt rate
diff mbox series

Message ID 20180815141430.13421-1-bluca@debian.org
State Not Applicable, archived
Delegated to: Qi Zhang
Headers show
Series
  • net/ixgbe: reduce PF mailbox interrupt rate
Related show

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/Intel-compilation success Compilation OK

Commit Message

Luca Boccassi Aug. 15, 2018, 2:14 p.m. UTC
We have observed high rate of NIC PF interrupts when VNF is using DPDK
APIs rte_eth_link_get_nowait() and rte_eth_link_get() functions, as
they are causing VF driver to send many MBOX ACK messages.

With these changes, the interrupt rates go down significantly. Here's
some testing results:

Without the patch:

$ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f' /proc/interrupts
            CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7       CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
  34:         88          0          0          0          0         41         30        509          0          0        350         24         88        114        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
  35:         49         24          0          0         65        130         64         29         67          0         10          0          0         46         38        764   PCI-MSI 1572865-edge      ens1f0-TxRx-1
  36:         53          0          0         64         15         85        132         71        108          0         30          0        165        215        303        104   PCI-MSI 1572866-edge      ens1f0-TxRx-2
  37:         46        196          0          0         10         48         62         68         51          0          0          0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
  38:        226          0          0          0        159        145        749        265          0          0        202          0      69229        166        450          0   PCI-MSI 1572868-edge      ens1f0
  52:         95        896          0          0          0         18         53          0        494          0          0          0          0        265         79        124   PCI-MSI 1574912-edge      ens1f1-TxRx-0
  53:         50          0         18          0         72         33          0        168        330          0          0          0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
  54:         65          0          0          0        239        104        166         49        442          0          0          0        126         26        307          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
  55:         57          0          0          0        123         35         83         54        157        106          0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-TxRx-3
  56:        232          0      13910          0         16         21          0      54422          0          0          0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
            CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7       CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
  34:         88          0          0          0          0         41         30        509          0          0        350         24         88        119        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
  35:         49         24          0          0         65        130         64         29         67          0         10          0          0         46         38        771   PCI-MSI 1572865-edge      ens1f0-TxRx-1
  36:         53          0          0         64         15         85        132         71        108          0         30          0        165        215        303        113   PCI-MSI 1572866-edge      ens1f0-TxRx-2
  37:         46        196          0          0         10         48         62         68         56          0          0          0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
  38:        226          0          0          0        159        145        749        265          0          0        202          0      71281        166        450          0   PCI-MSI 1572868-edge      ens1f0
  52:         95        896          0          0          0         18         53          0        494          0          0          0          0        265         79        133   PCI-MSI 1574912-edge      ens1f1-TxRx-0
  53:         50          0         18          0         72         33          0        173        330          0          0          0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
  54:         65          0          0          0        239        104        166         49        442          0          0          0        126         26        312          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
  55:         57          0          0          0        123         35         83         59        157        106          0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-TxRx-3
  56:        232          0      15910          0         16         21          0      54422          0          0          0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1

During the 10s interval, CPU2 jumped by 2000 interrupts, CPU12 by 2051
interrupts, for about 200 interrupts/second. That's on the order of
what we expect. I would have guessed 100/s but perhaps there are two
mailbox messages.

With the patch:

$ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f' /proc/interrupts
            CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7       CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
  34:         88          0          0          0          0         25         19        177          0          0        350         24         88        100        362        559   PCI-MSI 1572864-edge      ens1f0-TxRx-0
  35:         49         19          0          0         65        130         64         29         67          0         10          0          0         46         38        543   PCI-MSI 1572865-edge      ens1f0-TxRx-1
  36:         53          0          0         64         15         53         85         71        108          0         24          0         85        215        292         31   PCI-MSI 1572866-edge      ens1f0-TxRx-2
  37:         46        196          0          0         10         43         57         39         19          0          0          0         78         69         49        149   PCI-MSI 1572867-edge      ens1f0-TxRx-3
  38:        226          0          0          0        159        145        749        247          0          0        202          0      58250          0        450          0   PCI-MSI 1572868-edge      ens1f0
  52:         95        896          0          0          0         18         53          0        189          0          0          0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
  53:         50          0         18          0         72         33          0         90        330          0          0          0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
  54:         65          0          0          0         10        104        166         49        442          0          0          0        126         26        226          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
  55:         57          0          0          0         61         35         83         30        157        101          0          0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
  56:        232          0       2062          0         16         21          0      54422          0          0          0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
            CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7       CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
  34:         88          0          0          0          0         25         19        177          0          0        350         24         88        102        362        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
  35:         49         19          0          0         65        130         64         29         67          0         10          0          0         46         38        548   PCI-MSI 1572865-edge      ens1f0-TxRx-1
  36:         53          0          0         64         15         53         85         71        108          0         24          0         85        215        292         36   PCI-MSI 1572866-edge      ens1f0-TxRx-2
  37:         46        196          0          0         10         45         57         39         19          0          0          0         78         69         49        152   PCI-MSI 1572867-edge      ens1f0-TxRx-3
  38:        226          0          0          0        159        145        749        247          0          0        202          0      58259          0        450          0   PCI-MSI 1572868-edge      ens1f0
  52:         95        896          0          0          0         18         53          0        194          0          0          0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
  53:         50          0         18          0         72         33          0         95        330          0          0          0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
  54:         65          0          0          0         10        104        166         49        442          0          0          0        126         26        231          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
  55:         57          0          0          0         66         35         83         30        157        101          0          0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
  56:        232          0       2071          0         16         21          0      54422          0          0          0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1

Note the interrupt rate has gone way down. During the 10s interval, we
only saw a handful of interrupts.

Note that this patch was originally provided by Intel directly to AT&T
and Vyatta, but unfortunately I am unable to find records of the exact
author.

We have been using this in production for more than a year.

Fixes: af75078fece3 ("first public release")
Cc: stable@dpdk.org

Signed-off-by: Luca Boccassi <bluca@debian.org>
---
 drivers/net/ixgbe/base/ixgbe_vf.c | 33 ++++++++++++++++---------------
 1 file changed, 17 insertions(+), 16 deletions(-)

Comments

Luca Boccassi Sept. 26, 2018, 10:21 a.m. UTC | #1
On Wed, 2018-08-15 at 15:14 +0100, Luca Boccassi wrote:
> We have observed high rate of NIC PF interrupts when VNF is using
> DPDK
> APIs rte_eth_link_get_nowait() and rte_eth_link_get() functions, as
> they are causing VF driver to send many MBOX ACK messages.

Ping. Any chance this could be reviewed? Thanks!
Zhao1, Wei Nov. 7, 2018, 9:17 a.m. UTC | #2
Hi, Luca Boccassi

    The purpose of this patch is to reduce the mailbox interrupt from vf to pf, but there seem some point need for discussion in this patch.
 
First, I do not know why do you change code of function ixgbe_check_mac_link_vf(), because in rte_eth_link_get_nowait() and rte_eth_link_get(),
it will call ixgbe_dev_link_update()->ixgbe_dev_link_update_share()-> ixgbevf_check_link() for VF, NOT ixgbe_check_mac_link_vf() in your patch!

Second, in function ixgbevf_check_link(), there is mailbox message read operation for vf, 
" if (mbx->ops.read(hw, &in_msg, 1, 0))", that is ixgbe_read_mbx_vf() ,
This will cause interrupt from vf to pf, this is just the point of this patch, it is also the problem that you want to solve.
So, you use autoneg_wait_to_complete flag to control this mailbox message read operation, maybe you will use rte_eth_link_get_nowait(), Which set autoneg_wait_to_complete = 0, then the interrupt from vf to pf can be reduced.

But  I do not think this patch is necessary, because in ixgbevf_check_link(), it,has
" 
bool no_pflink_check = wait_to_complete == 0; 

               ////////////////////////

                if (no_pflink_check) {
                                if (*speed == IXGBE_LINK_SPEED_UNKNOWN)
                                                mac->get_link_status = true;
                                else
                                                mac->get_link_status = false;

                                goto out;
                }
"
Comment of "for a quick link status checking, wait_to_compelet == 0, skip PF link status checking " is clear.

That means in rte_eth_link_get_nowait(), code will skip this mailbox read interrupt, only in 
rte_eth_link_get() there will be this interrupt, so I think what you need to is just replace
rte_eth_link_get() with rte_eth_link_get_nowait() in your APP,
that will reduce interrupt from vf to pf in mailbox read.


> -----Original Message-----
> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Luca Boccassi
> Sent: Wednesday, August 15, 2018 10:15 PM
> To: dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; Luca Boccassi <bluca@debian.org>;
> stable@dpdk.org
> Subject: [dpdk-dev] [PATCH] net/ixgbe: reduce PF mailbox interrupt rate
> 
> We have observed high rate of NIC PF interrupts when VNF is using DPDK
> APIs rte_eth_link_get_nowait() and rte_eth_link_get() functions, as they
> are causing VF driver to send many MBOX ACK messages.
> 
> With these changes, the interrupt rates go down significantly. Here's some
> testing results:
> 
> Without the patch:
> 
> $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
> /proc/interrupts
>             CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>   34:         88          0          0          0          0         41         30        509          0          0        350
> 24         88        114        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>   35:         49         24          0          0         65        130         64         29         67          0         10
> 0          0         46         38        764   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>   36:         53          0          0         64         15         85        132         71        108          0
> 30          0        165        215        303        104   PCI-MSI 1572866-edge      ens1f0-
> TxRx-2
>   37:         46        196          0          0         10         48         62         68         51          0          0
> 0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>   38:        226          0          0          0        159        145        749        265          0          0
> 202          0      69229        166        450          0   PCI-MSI 1572868-edge      ens1f0
>   52:         95        896          0          0          0         18         53          0        494          0          0
> 0          0        265         79        124   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>   53:         50          0         18          0         72         33          0        168        330          0          0
> 0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>   54:         65          0          0          0        239        104        166         49        442          0
> 0          0        126         26        307          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>   55:         57          0          0          0        123         35         83         54        157        106
> 0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>   56:        232          0      13910          0         16         21          0      54422          0          0
> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
>             CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>   34:         88          0          0          0          0         41         30        509          0          0        350
> 24         88        119        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>   35:         49         24          0          0         65        130         64         29         67          0         10
> 0          0         46         38        771   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>   36:         53          0          0         64         15         85        132         71        108          0
> 30          0        165        215        303        113   PCI-MSI 1572866-edge      ens1f0-
> TxRx-2
>   37:         46        196          0          0         10         48         62         68         56          0          0
> 0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>   38:        226          0          0          0        159        145        749        265          0          0
> 202          0      71281        166        450          0   PCI-MSI 1572868-edge      ens1f0
>   52:         95        896          0          0          0         18         53          0        494          0          0
> 0          0        265         79        133   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>   53:         50          0         18          0         72         33          0        173        330          0          0
> 0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>   54:         65          0          0          0        239        104        166         49        442          0
> 0          0        126         26        312          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>   55:         57          0          0          0        123         35         83         59        157        106
> 0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>   56:        232          0      15910          0         16         21          0      54422          0          0
> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
> 
> During the 10s interval, CPU2 jumped by 2000 interrupts, CPU12 by 2051
> interrupts, for about 200 interrupts/second. That's on the order of what we
> expect. I would have guessed 100/s but perhaps there are two mailbox
> messages.
> 
> With the patch:
> 
> $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
> /proc/interrupts
>             CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>   34:         88          0          0          0          0         25         19        177          0          0        350
> 24         88        100        362        559   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>   35:         49         19          0          0         65        130         64         29         67          0         10
> 0          0         46         38        543   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>   36:         53          0          0         64         15         53         85         71        108          0         24
> 0         85        215        292         31   PCI-MSI 1572866-edge      ens1f0-TxRx-2
>   37:         46        196          0          0         10         43         57         39         19          0          0
> 0         78         69         49        149   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>   38:        226          0          0          0        159        145        749        247          0          0
> 202          0      58250          0        450          0   PCI-MSI 1572868-edge      ens1f0
>   52:         95        896          0          0          0         18         53          0        189          0          0
> 0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>   53:         50          0         18          0         72         33          0         90        330          0          0
> 0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>   54:         65          0          0          0         10        104        166         49        442          0          0
> 0        126         26        226          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>   55:         57          0          0          0         61         35         83         30        157        101          0
> 0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>   56:        232          0       2062          0         16         21          0      54422          0          0
> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
>             CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>   34:         88          0          0          0          0         25         19        177          0          0        350
> 24         88        102        362        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>   35:         49         19          0          0         65        130         64         29         67          0         10
> 0          0         46         38        548   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>   36:         53          0          0         64         15         53         85         71        108          0         24
> 0         85        215        292         36   PCI-MSI 1572866-edge      ens1f0-TxRx-2
>   37:         46        196          0          0         10         45         57         39         19          0          0
> 0         78         69         49        152   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>   38:        226          0          0          0        159        145        749        247          0          0
> 202          0      58259          0        450          0   PCI-MSI 1572868-edge      ens1f0
>   52:         95        896          0          0          0         18         53          0        194          0          0
> 0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>   53:         50          0         18          0         72         33          0         95        330          0          0
> 0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>   54:         65          0          0          0         10        104        166         49        442          0          0
> 0        126         26        231          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>   55:         57          0          0          0         66         35         83         30        157        101          0
> 0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>   56:        232          0       2071          0         16         21          0      54422          0          0
> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
> 
> Note the interrupt rate has gone way down. During the 10s interval, we only
> saw a handful of interrupts.
> 
> Note that this patch was originally provided by Intel directly to AT&T and
> Vyatta, but unfortunately I am unable to find records of the exact author.
> 
> We have been using this in production for more than a year.
> 
> Fixes: af75078fece3 ("first public release")
> Cc: stable@dpdk.org
> 
> Signed-off-by: Luca Boccassi <bluca@debian.org>
> ---
>  drivers/net/ixgbe/base/ixgbe_vf.c | 33 ++++++++++++++++---------------
>  1 file changed, 17 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/ixgbe/base/ixgbe_vf.c
> b/drivers/net/ixgbe/base/ixgbe_vf.c
> index 5b25a6b4d4..16086670b1 100644
> --- a/drivers/net/ixgbe/base/ixgbe_vf.c
> +++ b/drivers/net/ixgbe/base/ixgbe_vf.c
> @@ -586,7 +586,6 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
> ixgbe_link_speed *speed,
>  	s32 ret_val = IXGBE_SUCCESS;
>  	u32 links_reg;
>  	u32 in_msg = 0;
> -	UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
> 
>  	/* If we were hit with a reset drop the link */
>  	if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout) @@ -643,23
> +642,25 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
> ixgbe_link_speed *speed,
>  		*speed = IXGBE_LINK_SPEED_UNKNOWN;
>  	}
> 
> -	/* if the read failed it could just be a mailbox collision, best wait
> -	 * until we are called again and don't report an error
> -	 */
> -	if (mbx->ops.read(hw, &in_msg, 1, 0))
> -		goto out;
> +	if (autoneg_wait_to_complete) {
> +		/* if the read failed it could just be a mailbox collision, best
> wait
> +		 * until we are called again and don't report an error
> +		 */
> +		if (mbx->ops.read(hw, &in_msg, 1, 0))
> +			goto out;
> 
> -	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> -		/* msg is not CTS and is NACK we must have lost CTS status
> */
> -		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> +		if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> +			/* msg is not CTS and is NACK we must have lost CTS
> status */
> +			if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> +				ret_val = -1;
> +			goto out;
> +		}
> +
> +		/* the pf is talking, if we timed out in the past we reinit */
> +		if (!mbx->timeout) {
>  			ret_val = -1;
> -		goto out;
> -	}
> -
> -	/* the pf is talking, if we timed out in the past we reinit */
> -	if (!mbx->timeout) {
> -		ret_val = -1;
> -		goto out;
> +			goto out;
> +		}
>  	}
> 
>  	/* if we passed all the tests above then the link is up and we no
> --
> 2.18.0
Chas Williams Nov. 7, 2018, 6:54 p.m. UTC | #3
On 11/07/2018 04:17 AM, Zhao1, Wei wrote:
> Hi, Luca Boccassi
> 
>      The purpose of this patch is to reduce the mailbox interrupt from vf to pf, but there seem some point need for discussion in this patch.
>   
> First, I do not know why do you change code of function ixgbe_check_mac_link_vf(), because in rte_eth_link_get_nowait() and rte_eth_link_get(),
> it will call ixgbe_dev_link_update()->ixgbe_dev_link_update_share()-> ixgbevf_check_link() for VF, NOT ixgbe_check_mac_link_vf() in your patch!
> 
> Second, in function ixgbevf_check_link(), there is mailbox message read operation for vf,
> " if (mbx->ops.read(hw, &in_msg, 1, 0))", that is ixgbe_read_mbx_vf() ,
> This will cause interrupt from vf to pf, this is just the point of this patch, it is also the problem that you want to solve.
> So, you use autoneg_wait_to_complete flag to control this mailbox message read operation, maybe you will use rte_eth_link_get_nowait(), Which set autoneg_wait_to_complete = 0, then the interrupt from vf to pf can be reduced.
> 
> But  I do not think this patch is necessary, because in ixgbevf_check_link(), it,has

I think you are right here. This patch dates to before the addition
of the vf argument to ixgbe_dev_link_update_share() and the split of
.link_update between ixgbe and ixgbevf.  At one point, this patch was
especially beneficial if you were running bonding (which tends to make
quite a few link status checks).

So this patch probably hasn't been helping at this point.  I will try
to get some time to locally test this.

> "
> bool no_pflink_check = wait_to_complete == 0;
> 
>                 ////////////////////////
> 
>                  if (no_pflink_check) {
>                                  if (*speed == IXGBE_LINK_SPEED_UNKNOWN)
>                                                  mac->get_link_status = true;
>                                  else
>                                                  mac->get_link_status = false;
> 
>                                  goto out;
>                  }
> "
> Comment of "for a quick link status checking, wait_to_compelet == 0, skip PF link status checking " is clear.
> 
> That means in rte_eth_link_get_nowait(), code will skip this mailbox read interrupt, only in
> rte_eth_link_get() there will be this interrupt, so I think what you need to is just replace
> rte_eth_link_get() with rte_eth_link_get_nowait() in your APP,
> that will reduce interrupt from vf to pf in mailbox read.
> 
> 
>> -----Original Message-----
>> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Luca Boccassi
>> Sent: Wednesday, August 15, 2018 10:15 PM
>> To: dev@dpdk.org
>> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Ananyev, Konstantin
>> <konstantin.ananyev@intel.com>; Luca Boccassi <bluca@debian.org>;
>> stable@dpdk.org
>> Subject: [dpdk-dev] [PATCH] net/ixgbe: reduce PF mailbox interrupt rate
>>
>> We have observed high rate of NIC PF interrupts when VNF is using DPDK
>> APIs rte_eth_link_get_nowait() and rte_eth_link_get() functions, as they
>> are causing VF driver to send many MBOX ACK messages.
>>
>> With these changes, the interrupt rates go down significantly. Here's some
>> testing results:
>>
>> Without the patch:
>>
>> $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
>> /proc/interrupts
>>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
>> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>>    34:         88          0          0          0          0         41         30        509          0          0        350
>> 24         88        114        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>>    35:         49         24          0          0         65        130         64         29         67          0         10
>> 0          0         46         38        764   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>>    36:         53          0          0         64         15         85        132         71        108          0
>> 30          0        165        215        303        104   PCI-MSI 1572866-edge      ens1f0-
>> TxRx-2
>>    37:         46        196          0          0         10         48         62         68         51          0          0
>> 0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>>    38:        226          0          0          0        159        145        749        265          0          0
>> 202          0      69229        166        450          0   PCI-MSI 1572868-edge      ens1f0
>>    52:         95        896          0          0          0         18         53          0        494          0          0
>> 0          0        265         79        124   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>>    53:         50          0         18          0         72         33          0        168        330          0          0
>> 0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>>    54:         65          0          0          0        239        104        166         49        442          0
>> 0          0        126         26        307          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>>    55:         57          0          0          0        123         35         83         54        157        106
>> 0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>>    56:        232          0      13910          0         16         21          0      54422          0          0
>> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
>>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
>> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>>    34:         88          0          0          0          0         41         30        509          0          0        350
>> 24         88        119        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>>    35:         49         24          0          0         65        130         64         29         67          0         10
>> 0          0         46         38        771   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>>    36:         53          0          0         64         15         85        132         71        108          0
>> 30          0        165        215        303        113   PCI-MSI 1572866-edge      ens1f0-
>> TxRx-2
>>    37:         46        196          0          0         10         48         62         68         56          0          0
>> 0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>>    38:        226          0          0          0        159        145        749        265          0          0
>> 202          0      71281        166        450          0   PCI-MSI 1572868-edge      ens1f0
>>    52:         95        896          0          0          0         18         53          0        494          0          0
>> 0          0        265         79        133   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>>    53:         50          0         18          0         72         33          0        173        330          0          0
>> 0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>>    54:         65          0          0          0        239        104        166         49        442          0
>> 0          0        126         26        312          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>>    55:         57          0          0          0        123         35         83         59        157        106
>> 0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>>    56:        232          0      15910          0         16         21          0      54422          0          0
>> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
>>
>> During the 10s interval, CPU2 jumped by 2000 interrupts, CPU12 by 2051
>> interrupts, for about 200 interrupts/second. That's on the order of what we
>> expect. I would have guessed 100/s but perhaps there are two mailbox
>> messages.
>>
>> With the patch:
>>
>> $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
>> /proc/interrupts
>>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
>> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>>    34:         88          0          0          0          0         25         19        177          0          0        350
>> 24         88        100        362        559   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>>    35:         49         19          0          0         65        130         64         29         67          0         10
>> 0          0         46         38        543   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>>    36:         53          0          0         64         15         53         85         71        108          0         24
>> 0         85        215        292         31   PCI-MSI 1572866-edge      ens1f0-TxRx-2
>>    37:         46        196          0          0         10         43         57         39         19          0          0
>> 0         78         69         49        149   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>>    38:        226          0          0          0        159        145        749        247          0          0
>> 202          0      58250          0        450          0   PCI-MSI 1572868-edge      ens1f0
>>    52:         95        896          0          0          0         18         53          0        189          0          0
>> 0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>>    53:         50          0         18          0         72         33          0         90        330          0          0
>> 0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>>    54:         65          0          0          0         10        104        166         49        442          0          0
>> 0        126         26        226          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>>    55:         57          0          0          0         61         35         83         30        157        101          0
>> 0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>>    56:        232          0       2062          0         16         21          0      54422          0          0
>> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
>>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
>> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
>>    34:         88          0          0          0          0         25         19        177          0          0        350
>> 24         88        102        362        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
>>    35:         49         19          0          0         65        130         64         29         67          0         10
>> 0          0         46         38        548   PCI-MSI 1572865-edge      ens1f0-TxRx-1
>>    36:         53          0          0         64         15         53         85         71        108          0         24
>> 0         85        215        292         36   PCI-MSI 1572866-edge      ens1f0-TxRx-2
>>    37:         46        196          0          0         10         45         57         39         19          0          0
>> 0         78         69         49        152   PCI-MSI 1572867-edge      ens1f0-TxRx-3
>>    38:        226          0          0          0        159        145        749        247          0          0
>> 202          0      58259          0        450          0   PCI-MSI 1572868-edge      ens1f0
>>    52:         95        896          0          0          0         18         53          0        194          0          0
>> 0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
>>    53:         50          0         18          0         72         33          0         95        330          0          0
>> 0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
>>    54:         65          0          0          0         10        104        166         49        442          0          0
>> 0        126         26        231          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
>>    55:         57          0          0          0         66         35         83         30        157        101          0
>> 0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
>>    56:        232          0       2071          0         16         21          0      54422          0          0
>> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
>>
>> Note the interrupt rate has gone way down. During the 10s interval, we only
>> saw a handful of interrupts.
>>
>> Note that this patch was originally provided by Intel directly to AT&T and
>> Vyatta, but unfortunately I am unable to find records of the exact author.
>>
>> We have been using this in production for more than a year.
>>
>> Fixes: af75078fece3 ("first public release")
>> Cc: stable@dpdk.org
>>
>> Signed-off-by: Luca Boccassi <bluca@debian.org>
>> ---
>>   drivers/net/ixgbe/base/ixgbe_vf.c | 33 ++++++++++++++++---------------
>>   1 file changed, 17 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/net/ixgbe/base/ixgbe_vf.c
>> b/drivers/net/ixgbe/base/ixgbe_vf.c
>> index 5b25a6b4d4..16086670b1 100644
>> --- a/drivers/net/ixgbe/base/ixgbe_vf.c
>> +++ b/drivers/net/ixgbe/base/ixgbe_vf.c
>> @@ -586,7 +586,6 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
>> ixgbe_link_speed *speed,
>>   	s32 ret_val = IXGBE_SUCCESS;
>>   	u32 links_reg;
>>   	u32 in_msg = 0;
>> -	UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
>>
>>   	/* If we were hit with a reset drop the link */
>>   	if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout) @@ -643,23
>> +642,25 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
>> ixgbe_link_speed *speed,
>>   		*speed = IXGBE_LINK_SPEED_UNKNOWN;
>>   	}
>>
>> -	/* if the read failed it could just be a mailbox collision, best wait
>> -	 * until we are called again and don't report an error
>> -	 */
>> -	if (mbx->ops.read(hw, &in_msg, 1, 0))
>> -		goto out;
>> +	if (autoneg_wait_to_complete) {
>> +		/* if the read failed it could just be a mailbox collision, best
>> wait
>> +		 * until we are called again and don't report an error
>> +		 */
>> +		if (mbx->ops.read(hw, &in_msg, 1, 0))
>> +			goto out;
>>
>> -	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
>> -		/* msg is not CTS and is NACK we must have lost CTS status
>> */
>> -		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
>> +		if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
>> +			/* msg is not CTS and is NACK we must have lost CTS
>> status */
>> +			if (in_msg & IXGBE_VT_MSGTYPE_NACK)
>> +				ret_val = -1;
>> +			goto out;
>> +		}
>> +
>> +		/* the pf is talking, if we timed out in the past we reinit */
>> +		if (!mbx->timeout) {
>>   			ret_val = -1;
>> -		goto out;
>> -	}
>> -
>> -	/* the pf is talking, if we timed out in the past we reinit */
>> -	if (!mbx->timeout) {
>> -		ret_val = -1;
>> -		goto out;
>> +			goto out;
>> +		}
>>   	}
>>
>>   	/* if we passed all the tests above then the link is up and we no
>> --
>> 2.18.0
>
Luca Boccassi Nov. 7, 2018, 7:10 p.m. UTC | #4
On Wed, 2018-11-07 at 13:54 -0500, Chas Williams wrote:
> 
> On 11/07/2018 04:17 AM, Zhao1, Wei wrote:
> > Hi, Luca Boccassi
> > 
> >      The purpose of this patch is to reduce the mailbox interrupt
> > from vf to pf, but there seem some point need for discussion in
> > this patch.
> >   
> > First, I do not know why do you change code of function
> > ixgbe_check_mac_link_vf(), because in rte_eth_link_get_nowait() and
> > rte_eth_link_get(),
> > it will call ixgbe_dev_link_update()-
> > >ixgbe_dev_link_update_share()-> ixgbevf_check_link() for VF, NOT
> > ixgbe_check_mac_link_vf() in your patch!
> > 
> > Second, in function ixgbevf_check_link(), there is mailbox message
> > read operation for vf,
> > " if (mbx->ops.read(hw, &in_msg, 1, 0))", that is
> > ixgbe_read_mbx_vf() ,
> > This will cause interrupt from vf to pf, this is just the point of
> > this patch, it is also the problem that you want to solve.
> > So, you use autoneg_wait_to_complete flag to control this mailbox
> > message read operation, maybe you will use
> > rte_eth_link_get_nowait(), Which set autoneg_wait_to_complete = 0,
> > then the interrupt from vf to pf can be reduced.
> > 
> > But  I do not think this patch is necessary, because in
> > ixgbevf_check_link(), it,has
> 
> I think you are right here. This patch dates to before the addition
> of the vf argument to ixgbe_dev_link_update_share() and the split of
> .link_update between ixgbe and ixgbevf.  At one point, this patch was
> especially beneficial if you were running bonding (which tends to
> make
> quite a few link status checks).
> 
> So this patch probably hasn't been helping at this point.  I will try
> to get some time to locally test this.

Yes, this looks like to be the case.

Thank you Wei for double checking, and sorry for not having checked
more thoroughly - this patch has been around for a long time, and I
just rebased, tested and sent it without checking if it was still
needed.

I've marked it as "not applicable" on patchwork.

> > "
> > bool no_pflink_check = wait_to_complete == 0;
> > 
> >                 ////////////////////////
> > 
> >                  if (no_pflink_check) {
> >                                  if (*speed ==
> > IXGBE_LINK_SPEED_UNKNOWN)
> >                                                  mac-
> > >get_link_status = true;
> >                                  else
> >                                                  mac-
> > >get_link_status = false;
> > 
> >                                  goto out;
> >                  }
> > "
> > Comment of "for a quick link status checking, wait_to_compelet ==
> > 0, skip PF link status checking " is clear.
> > 
> > That means in rte_eth_link_get_nowait(), code will skip this
> > mailbox read interrupt, only in
> > rte_eth_link_get() there will be this interrupt, so I think what
> > you need to is just replace
> > rte_eth_link_get() with rte_eth_link_get_nowait() in your APP,
> > that will reduce interrupt from vf to pf in mailbox read.
> > 
> > 
> > > -----Original Message-----
> > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Luca
> > > Boccassi
> > > Sent: Wednesday, August 15, 2018 10:15 PM
> > > To: dev@dpdk.org
> > > Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Ananyev, Konstantin
> > > <konstantin.ananyev@intel.com>; Luca Boccassi <bluca@debian.org>;
> > > stable@dpdk.org
> > > Subject: [dpdk-dev] [PATCH] net/ixgbe: reduce PF mailbox
> > > interrupt rate
> > > 
> > > We have observed high rate of NIC PF interrupts when VNF is using
> > > DPDK
> > > APIs rte_eth_link_get_nowait() and rte_eth_link_get() functions,
> > > as they
> > > are causing VF driver to send many MBOX ACK messages.
> > > 
> > > With these changes, the interrupt rates go down significantly.
> > > Here's some
> > > testing results:
> > > 
> > > Without the patch:
> > > 
> > > $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep
> > > 'CPU|ens1f'
> > > /proc/interrupts
> > >              CPU0       CPU1       CPU2       CPU3       CPU4    
> > >    CPU5       CPU6       CPU7
> > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13     
> > >  CPU14      CPU15
> > >    34:         88          0          0          0          0    
> > >      41         30        509          0          0        350
> > > 24         88        114        461        562   PCI-MSI 1572864-
> > > edge      ens1f0-TxRx-0
> > >    35:         49         24          0          0         65    
> > >     130         64         29         67          0         10
> > > 0          0         46         38        764   PCI-MSI 1572865-
> > > edge      ens1f0-TxRx-1
> > >    36:         53          0          0         64         15    
> > >      85        132         71        108          0
> > > 30          0        165        215        303        104   PCI-
> > > MSI 1572866-edge      ens1f0-
> > > TxRx-2
> > >    37:         46        196          0          0         10    
> > >      48         62         68         51          0          0
> > > 0        103         82         54        192   PCI-MSI 1572867-
> > > edge      ens1f0-TxRx-3
> > >    38:        226          0          0          0        159    
> > >     145        749        265          0          0
> > > 202          0      69229        166        450          0   PCI-
> > > MSI 1572868-edge      ens1f0
> > >    52:         95        896          0          0          0    
> > >      18         53          0        494          0          0
> > > 0          0        265         79        124   PCI-MSI 1574912-
> > > edge      ens1f1-TxRx-0
> > >    53:         50          0         18          0         72    
> > >      33          0        168        330          0          0
> > > 0        141         22         12         65   PCI-MSI 1574913-
> > > edge      ens1f1-TxRx-1
> > >    54:         65          0          0          0        239    
> > >     104        166         49        442          0
> > > 0          0        126         26        307          0   PCI-
> > > MSI 1574914-edge      ens1f1-TxRx-2
> > >    55:         57          0          0          0        123    
> > >      35         83         54        157        106
> > > 0          0         26         29        312         97   PCI-
> > > MSI 1574915-edge      ens1f1-TxRx-3
> > >    56:        232          0      13910          0         16    
> > >      21          0      54422          0          0
> > > 0         24         25          0         78          0   PCI-
> > > MSI 1574916-edge      ens1f1
> > >              CPU0       CPU1       CPU2       CPU3       CPU4    
> > >    CPU5       CPU6       CPU7
> > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13     
> > >  CPU14      CPU15
> > >    34:         88          0          0          0          0    
> > >      41         30        509          0          0        350
> > > 24         88        119        461        562   PCI-MSI 1572864-
> > > edge      ens1f0-TxRx-0
> > >    35:         49         24          0          0         65    
> > >     130         64         29         67          0         10
> > > 0          0         46         38        771   PCI-MSI 1572865-
> > > edge      ens1f0-TxRx-1
> > >    36:         53          0          0         64         15    
> > >      85        132         71        108          0
> > > 30          0        165        215        303        113   PCI-
> > > MSI 1572866-edge      ens1f0-
> > > TxRx-2
> > >    37:         46        196          0          0         10    
> > >      48         62         68         56          0          0
> > > 0        103         82         54        192   PCI-MSI 1572867-
> > > edge      ens1f0-TxRx-3
> > >    38:        226          0          0          0        159    
> > >     145        749        265          0          0
> > > 202          0      71281        166        450          0   PCI-
> > > MSI 1572868-edge      ens1f0
> > >    52:         95        896          0          0          0    
> > >      18         53          0        494          0          0
> > > 0          0        265         79        133   PCI-MSI 1574912-
> > > edge      ens1f1-TxRx-0
> > >    53:         50          0         18          0         72    
> > >      33          0        173        330          0          0
> > > 0        141         22         12         65   PCI-MSI 1574913-
> > > edge      ens1f1-TxRx-1
> > >    54:         65          0          0          0        239    
> > >     104        166         49        442          0
> > > 0          0        126         26        312          0   PCI-
> > > MSI 1574914-edge      ens1f1-TxRx-2
> > >    55:         57          0          0          0        123    
> > >      35         83         59        157        106
> > > 0          0         26         29        312         97   PCI-
> > > MSI 1574915-edge      ens1f1-TxRx-3
> > >    56:        232          0      15910          0         16    
> > >      21          0      54422          0          0
> > > 0         24         25          0         78          0   PCI-
> > > MSI 1574916-edge      ens1f1
> > > 
> > > During the 10s interval, CPU2 jumped by 2000 interrupts, CPU12 by
> > > 2051
> > > interrupts, for about 200 interrupts/second. That's on the order
> > > of what we
> > > expect. I would have guessed 100/s but perhaps there are two
> > > mailbox
> > > messages.
> > > 
> > > With the patch:
> > > 
> > > $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep
> > > 'CPU|ens1f'
> > > /proc/interrupts
> > >              CPU0       CPU1       CPU2       CPU3       CPU4    
> > >    CPU5       CPU6       CPU7
> > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13     
> > >  CPU14      CPU15
> > >    34:         88          0          0          0          0    
> > >      25         19        177          0          0        350
> > > 24         88        100        362        559   PCI-MSI 1572864-
> > > edge      ens1f0-TxRx-0
> > >    35:         49         19          0          0         65    
> > >     130         64         29         67          0         10
> > > 0          0         46         38        543   PCI-MSI 1572865-
> > > edge      ens1f0-TxRx-1
> > >    36:         53          0          0         64         15    
> > >      53         85         71        108          0         24
> > > 0         85        215        292         31   PCI-MSI 1572866-
> > > edge      ens1f0-TxRx-2
> > >    37:         46        196          0          0         10    
> > >      43         57         39         19          0          0
> > > 0         78         69         49        149   PCI-MSI 1572867-
> > > edge      ens1f0-TxRx-3
> > >    38:        226          0          0          0        159    
> > >     145        749        247          0          0
> > > 202          0      58250          0        450          0   PCI-
> > > MSI 1572868-edge      ens1f0
> > >    52:         95        896          0          0          0    
> > >      18         53          0        189          0          0
> > > 0          0        265         79         25   PCI-MSI 1574912-
> > > edge      ens1f1-TxRx-0
> > >    53:         50          0         18          0         72    
> > >      33          0         90        330          0          0
> > > 0        136          5         12          0   PCI-MSI 1574913-
> > > edge      ens1f1-TxRx-1
> > >    54:         65          0          0          0         10    
> > >     104        166         49        442          0          0
> > > 0        126         26        226          0   PCI-MSI 1574914-
> > > edge      ens1f1-TxRx-2
> > >    55:         57          0          0          0         61    
> > >      35         83         30        157        101          0
> > > 0         26         15        312          0   PCI-MSI 1574915-
> > > edge      ens1f1-TxRx-3
> > >    56:        232          0       2062          0         16    
> > >      21          0      54422          0          0
> > > 0         24         25          0         78          0   PCI-
> > > MSI 1574916-edge      ens1f1
> > >              CPU0       CPU1       CPU2       CPU3       CPU4    
> > >    CPU5       CPU6       CPU7
> > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13     
> > >  CPU14      CPU15
> > >    34:         88          0          0          0          0    
> > >      25         19        177          0          0        350
> > > 24         88        102        362        562   PCI-MSI 1572864-
> > > edge      ens1f0-TxRx-0
> > >    35:         49         19          0          0         65    
> > >     130         64         29         67          0         10
> > > 0          0         46         38        548   PCI-MSI 1572865-
> > > edge      ens1f0-TxRx-1
> > >    36:         53          0          0         64         15    
> > >      53         85         71        108          0         24
> > > 0         85        215        292         36   PCI-MSI 1572866-
> > > edge      ens1f0-TxRx-2
> > >    37:         46        196          0          0         10    
> > >      45         57         39         19          0          0
> > > 0         78         69         49        152   PCI-MSI 1572867-
> > > edge      ens1f0-TxRx-3
> > >    38:        226          0          0          0        159    
> > >     145        749        247          0          0
> > > 202          0      58259          0        450          0   PCI-
> > > MSI 1572868-edge      ens1f0
> > >    52:         95        896          0          0          0    
> > >      18         53          0        194          0          0
> > > 0          0        265         79         25   PCI-MSI 1574912-
> > > edge      ens1f1-TxRx-0
> > >    53:         50          0         18          0         72    
> > >      33          0         95        330          0          0
> > > 0        136          5         12          0   PCI-MSI 1574913-
> > > edge      ens1f1-TxRx-1
> > >    54:         65          0          0          0         10    
> > >     104        166         49        442          0          0
> > > 0        126         26        231          0   PCI-MSI 1574914-
> > > edge      ens1f1-TxRx-2
> > >    55:         57          0          0          0         66    
> > >      35         83         30        157        101          0
> > > 0         26         15        312          0   PCI-MSI 1574915-
> > > edge      ens1f1-TxRx-3
> > >    56:        232          0       2071          0         16    
> > >      21          0      54422          0          0
> > > 0         24         25          0         78          0   PCI-
> > > MSI 1574916-edge      ens1f1
> > > 
> > > Note the interrupt rate has gone way down. During the 10s
> > > interval, we only
> > > saw a handful of interrupts.
> > > 
> > > Note that this patch was originally provided by Intel directly to
> > > AT&T and
> > > Vyatta, but unfortunately I am unable to find records of the
> > > exact author.
> > > 
> > > We have been using this in production for more than a year.
> > > 
> > > Fixes: af75078fece3 ("first public release")
> > > Cc: stable@dpdk.org
> > > 
> > > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > > ---
> > >   drivers/net/ixgbe/base/ixgbe_vf.c | 33 ++++++++++++++++------
> > > ---------
> > >   1 file changed, 17 insertions(+), 16 deletions(-)
> > > 
> > > diff --git a/drivers/net/ixgbe/base/ixgbe_vf.c
> > > b/drivers/net/ixgbe/base/ixgbe_vf.c
> > > index 5b25a6b4d4..16086670b1 100644
> > > --- a/drivers/net/ixgbe/base/ixgbe_vf.c
> > > +++ b/drivers/net/ixgbe/base/ixgbe_vf.c
> > > @@ -586,7 +586,6 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw
> > > *hw,
> > > ixgbe_link_speed *speed,
> > >   	s32 ret_val = IXGBE_SUCCESS;
> > >   	u32 links_reg;
> > >   	u32 in_msg = 0;
> > > -	UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
> > > 
> > >   	/* If we were hit with a reset drop the link */
> > >   	if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout) @@
> > > -643,23
> > > +642,25 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
> > > ixgbe_link_speed *speed,
> > >   		*speed = IXGBE_LINK_SPEED_UNKNOWN;
> > >   	}
> > > 
> > > -	/* if the read failed it could just be a mailbox
> > > collision, best wait
> > > -	 * until we are called again and don't report an error
> > > -	 */
> > > -	if (mbx->ops.read(hw, &in_msg, 1, 0))
> > > -		goto out;
> > > +	if (autoneg_wait_to_complete) {
> > > +		/* if the read failed it could just be a mailbox
> > > collision, best
> > > wait
> > > +		 * until we are called again and don't report an
> > > error
> > > +		 */
> > > +		if (mbx->ops.read(hw, &in_msg, 1, 0))
> > > +			goto out;
> > > 
> > > -	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> > > -		/* msg is not CTS and is NACK we must have lost
> > > CTS status
> > > */
> > > -		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> > > +		if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> > > +			/* msg is not CTS and is NACK we must
> > > have lost CTS
> > > status */
> > > +			if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> > > +				ret_val = -1;
> > > +			goto out;
> > > +		}
> > > +
> > > +		/* the pf is talking, if we timed out in the
> > > past we reinit */
> > > +		if (!mbx->timeout) {
> > >   			ret_val = -1;
> > > -		goto out;
> > > -	}
> > > -
> > > -	/* the pf is talking, if we timed out in the past we
> > > reinit */
> > > -	if (!mbx->timeout) {
> > > -		ret_val = -1;
> > > -		goto out;
> > > +			goto out;
> > > +		}
> > >   	}
> > > 
> > >   	/* if we passed all the tests above then the link is up
> > > and we no
> > > --
> > > 2.18.0
Zhao1, Wei Nov. 8, 2018, 2:55 a.m. UTC | #5
> -----Original Message-----
> From: Luca Boccassi [mailto:bluca@debian.org]
> Sent: Thursday, November 8, 2018 3:10 AM
> To: Chas Williams <3chas3@gmail.com>; Zhao1, Wei <wei.zhao1@intel.com>;
> dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stable@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/ixgbe: reduce PF mailbox interrupt rate
> 
> On Wed, 2018-11-07 at 13:54 -0500, Chas Williams wrote:
> >
> > On 11/07/2018 04:17 AM, Zhao1, Wei wrote:
> > > Hi, Luca Boccassi
> > >
> > >      The purpose of this patch is to reduce the mailbox interrupt
> > > from vf to pf, but there seem some point need for discussion in this
> > > patch.
> > >
> > > First, I do not know why do you change code of function
> > > ixgbe_check_mac_link_vf(), because in rte_eth_link_get_nowait() and
> > > rte_eth_link_get(), it will call ixgbe_dev_link_update()-
> > > >ixgbe_dev_link_update_share()-> ixgbevf_check_link() for VF, NOT
> > > ixgbe_check_mac_link_vf() in your patch!
> > >
> > > Second, in function ixgbevf_check_link(), there is mailbox message
> > > read operation for vf, " if (mbx->ops.read(hw, &in_msg, 1, 0))",
> > > that is
> > > ixgbe_read_mbx_vf() ,
> > > This will cause interrupt from vf to pf, this is just the point of
> > > this patch, it is also the problem that you want to solve.
> > > So, you use autoneg_wait_to_complete flag to control this mailbox
> > > message read operation, maybe you will use
> > > rte_eth_link_get_nowait(), Which set autoneg_wait_to_complete = 0,
> > > then the interrupt from vf to pf can be reduced.
> > >
> > > But  I do not think this patch is necessary, because in
> > > ixgbevf_check_link(), it,has
> >
> > I think you are right here. This patch dates to before the addition of
> > the vf argument to ixgbe_dev_link_update_share() and the split of
> > .link_update between ixgbe and ixgbevf.  At one point, this patch was
> > especially beneficial if you were running bonding (which tends to make
> > quite a few link status checks).
> >
> > So this patch probably hasn't been helping at this point.  I will try
> > to get some time to locally test this.
> 
> Yes, this looks like to be the case.
> 
> Thank you Wei for double checking, and sorry for not having checked more
> thoroughly - this patch has been around for a long time, and I just rebased,
> tested and sent it without checking if it was still needed.
> 
> I've marked it as "not applicable" on patchwork.

You are welcome.
BTW, I really appreciate your debug ability to find the root cause of too much interrupt is coming from mailbox read operation!
That need deep knowledge of NIC.


> 
> > > "
> > > bool no_pflink_check = wait_to_complete == 0;
> > >
> > >                 ////////////////////////
> > >
> > >                  if (no_pflink_check) {
> > >                                  if (*speed ==
> > > IXGBE_LINK_SPEED_UNKNOWN)
> > >                                                  mac-
> > > >get_link_status = true;
> > >                                  else
> > >                                                  mac-
> > > >get_link_status = false;
> > >
> > >                                  goto out;
> > >                  }
> > > "
> > > Comment of "for a quick link status checking, wait_to_compelet == 0,
> > > skip PF link status checking " is clear.
> > >
> > > That means in rte_eth_link_get_nowait(), code will skip this mailbox
> > > read interrupt, only in
> > > rte_eth_link_get() there will be this interrupt, so I think what you
> > > need to is just replace
> > > rte_eth_link_get() with rte_eth_link_get_nowait() in your APP, that
> > > will reduce interrupt from vf to pf in mailbox read.
> > >
> > >
> > > > -----Original Message-----
> > > > From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Luca Boccassi
> > > > Sent: Wednesday, August 15, 2018 10:15 PM
> > > > To: dev@dpdk.org
> > > > Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Ananyev, Konstantin
> > > > <konstantin.ananyev@intel.com>; Luca Boccassi <bluca@debian.org>;
> > > > stable@dpdk.org
> > > > Subject: [dpdk-dev] [PATCH] net/ixgbe: reduce PF mailbox interrupt
> > > > rate
> > > >
> > > > We have observed high rate of NIC PF interrupts when VNF is using
> > > > DPDK APIs rte_eth_link_get_nowait() and rte_eth_link_get()
> > > > functions, as they are causing VF driver to send many MBOX ACK
> > > > messages.
> > > >
> > > > With these changes, the interrupt rates go down significantly.
> > > > Here's some
> > > > testing results:
> > > >
> > > > Without the patch:
> > > >
> > > > $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
> > > > /proc/interrupts
> > > >              CPU0       CPU1       CPU2       CPU3       CPU4
> > > >    CPU5       CPU6       CPU7
> > > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13
> > > >  CPU14      CPU15
> > > >    34:         88          0          0          0          0
> > > >      41         30        509          0          0        350
> > > > 24         88        114        461        562   PCI-MSI 1572864-
> > > > edge      ens1f0-TxRx-0
> > > >    35:         49         24          0          0         65
> > > >     130         64         29         67          0         10
> > > > 0          0         46         38        764   PCI-MSI 1572865-
> > > > edge      ens1f0-TxRx-1
> > > >    36:         53          0          0         64         15
> > > >      85        132         71        108          0
> > > > 30          0        165        215        303        104   PCI-
> > > > MSI 1572866-edge      ens1f0-
> > > > TxRx-2
> > > >    37:         46        196          0          0         10
> > > >      48         62         68         51          0          0
> > > > 0        103         82         54        192   PCI-MSI 1572867-
> > > > edge      ens1f0-TxRx-3
> > > >    38:        226          0          0          0        159
> > > >     145        749        265          0          0
> > > > 202          0      69229        166        450          0   PCI-
> > > > MSI 1572868-edge      ens1f0
> > > >    52:         95        896          0          0          0
> > > >      18         53          0        494          0          0
> > > > 0          0        265         79        124   PCI-MSI 1574912-
> > > > edge      ens1f1-TxRx-0
> > > >    53:         50          0         18          0         72
> > > >      33          0        168        330          0          0
> > > > 0        141         22         12         65   PCI-MSI 1574913-
> > > > edge      ens1f1-TxRx-1
> > > >    54:         65          0          0          0        239
> > > >     104        166         49        442          0
> > > > 0          0        126         26        307          0   PCI-
> > > > MSI 1574914-edge      ens1f1-TxRx-2
> > > >    55:         57          0          0          0        123
> > > >      35         83         54        157        106
> > > > 0          0         26         29        312         97   PCI-
> > > > MSI 1574915-edge      ens1f1-TxRx-3
> > > >    56:        232          0      13910          0         16
> > > >      21          0      54422          0          0
> > > > 0         24         25          0         78          0   PCI-
> > > > MSI 1574916-edge      ens1f1
> > > >              CPU0       CPU1       CPU2       CPU3       CPU4
> > > >    CPU5       CPU6       CPU7
> > > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13
> > > >  CPU14      CPU15
> > > >    34:         88          0          0          0          0
> > > >      41         30        509          0          0        350
> > > > 24         88        119        461        562   PCI-MSI 1572864-
> > > > edge      ens1f0-TxRx-0
> > > >    35:         49         24          0          0         65
> > > >     130         64         29         67          0         10
> > > > 0          0         46         38        771   PCI-MSI 1572865-
> > > > edge      ens1f0-TxRx-1
> > > >    36:         53          0          0         64         15
> > > >      85        132         71        108          0
> > > > 30          0        165        215        303        113   PCI-
> > > > MSI 1572866-edge      ens1f0-
> > > > TxRx-2
> > > >    37:         46        196          0          0         10
> > > >      48         62         68         56          0          0
> > > > 0        103         82         54        192   PCI-MSI 1572867-
> > > > edge      ens1f0-TxRx-3
> > > >    38:        226          0          0          0        159
> > > >     145        749        265          0          0
> > > > 202          0      71281        166        450          0   PCI-
> > > > MSI 1572868-edge      ens1f0
> > > >    52:         95        896          0          0          0
> > > >      18         53          0        494          0          0
> > > > 0          0        265         79        133   PCI-MSI 1574912-
> > > > edge      ens1f1-TxRx-0
> > > >    53:         50          0         18          0         72
> > > >      33          0        173        330          0          0
> > > > 0        141         22         12         65   PCI-MSI 1574913-
> > > > edge      ens1f1-TxRx-1
> > > >    54:         65          0          0          0        239
> > > >     104        166         49        442          0
> > > > 0          0        126         26        312          0   PCI-
> > > > MSI 1574914-edge      ens1f1-TxRx-2
> > > >    55:         57          0          0          0        123
> > > >      35         83         59        157        106
> > > > 0          0         26         29        312         97   PCI-
> > > > MSI 1574915-edge      ens1f1-TxRx-3
> > > >    56:        232          0      15910          0         16
> > > >      21          0      54422          0          0
> > > > 0         24         25          0         78          0   PCI-
> > > > MSI 1574916-edge      ens1f1
> > > >
> > > > During the 10s interval, CPU2 jumped by 2000 interrupts, CPU12 by
> > > > 2051
> > > > interrupts, for about 200 interrupts/second. That's on the order
> > > > of what we expect. I would have guessed 100/s but perhaps there
> > > > are two mailbox messages.
> > > >
> > > > With the patch:
> > > >
> > > > $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
> > > > /proc/interrupts
> > > >              CPU0       CPU1       CPU2       CPU3       CPU4
> > > >    CPU5       CPU6       CPU7
> > > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13
> > > >  CPU14      CPU15
> > > >    34:         88          0          0          0          0
> > > >      25         19        177          0          0        350
> > > > 24         88        100        362        559   PCI-MSI 1572864-
> > > > edge      ens1f0-TxRx-0
> > > >    35:         49         19          0          0         65
> > > >     130         64         29         67          0         10
> > > > 0          0         46         38        543   PCI-MSI 1572865-
> > > > edge      ens1f0-TxRx-1
> > > >    36:         53          0          0         64         15
> > > >      53         85         71        108          0         24
> > > > 0         85        215        292         31   PCI-MSI 1572866-
> > > > edge      ens1f0-TxRx-2
> > > >    37:         46        196          0          0         10
> > > >      43         57         39         19          0          0
> > > > 0         78         69         49        149   PCI-MSI 1572867-
> > > > edge      ens1f0-TxRx-3
> > > >    38:        226          0          0          0        159
> > > >     145        749        247          0          0
> > > > 202          0      58250          0        450          0   PCI-
> > > > MSI 1572868-edge      ens1f0
> > > >    52:         95        896          0          0          0
> > > >      18         53          0        189          0          0
> > > > 0          0        265         79         25   PCI-MSI 1574912-
> > > > edge      ens1f1-TxRx-0
> > > >    53:         50          0         18          0         72
> > > >      33          0         90        330          0          0
> > > > 0        136          5         12          0   PCI-MSI 1574913-
> > > > edge      ens1f1-TxRx-1
> > > >    54:         65          0          0          0         10
> > > >     104        166         49        442          0          0
> > > > 0        126         26        226          0   PCI-MSI 1574914-
> > > > edge      ens1f1-TxRx-2
> > > >    55:         57          0          0          0         61
> > > >      35         83         30        157        101          0
> > > > 0         26         15        312          0   PCI-MSI 1574915-
> > > > edge      ens1f1-TxRx-3
> > > >    56:        232          0       2062          0         16
> > > >      21          0      54422          0          0
> > > > 0         24         25          0         78          0   PCI-
> > > > MSI 1574916-edge      ens1f1
> > > >              CPU0       CPU1       CPU2       CPU3       CPU4
> > > >    CPU5       CPU6       CPU7
> > > > CPU8       CPU9       CPU10      CPU11      CPU12      CPU13
> > > >  CPU14      CPU15
> > > >    34:         88          0          0          0          0
> > > >      25         19        177          0          0        350
> > > > 24         88        102        362        562   PCI-MSI 1572864-
> > > > edge      ens1f0-TxRx-0
> > > >    35:         49         19          0          0         65
> > > >     130         64         29         67          0         10
> > > > 0          0         46         38        548   PCI-MSI 1572865-
> > > > edge      ens1f0-TxRx-1
> > > >    36:         53          0          0         64         15
> > > >      53         85         71        108          0         24
> > > > 0         85        215        292         36   PCI-MSI 1572866-
> > > > edge      ens1f0-TxRx-2
> > > >    37:         46        196          0          0         10
> > > >      45         57         39         19          0          0
> > > > 0         78         69         49        152   PCI-MSI 1572867-
> > > > edge      ens1f0-TxRx-3
> > > >    38:        226          0          0          0        159
> > > >     145        749        247          0          0
> > > > 202          0      58259          0        450          0   PCI-
> > > > MSI 1572868-edge      ens1f0
> > > >    52:         95        896          0          0          0
> > > >      18         53          0        194          0          0
> > > > 0          0        265         79         25   PCI-MSI 1574912-
> > > > edge      ens1f1-TxRx-0
> > > >    53:         50          0         18          0         72
> > > >      33          0         95        330          0          0
> > > > 0        136          5         12          0   PCI-MSI 1574913-
> > > > edge      ens1f1-TxRx-1
> > > >    54:         65          0          0          0         10
> > > >     104        166         49        442          0          0
> > > > 0        126         26        231          0   PCI-MSI 1574914-
> > > > edge      ens1f1-TxRx-2
> > > >    55:         57          0          0          0         66
> > > >      35         83         30        157        101          0
> > > > 0         26         15        312          0   PCI-MSI 1574915-
> > > > edge      ens1f1-TxRx-3
> > > >    56:        232          0       2071          0         16
> > > >      21          0      54422          0          0
> > > > 0         24         25          0         78          0   PCI-
> > > > MSI 1574916-edge      ens1f1
> > > >
> > > > Note the interrupt rate has gone way down. During the 10s
> > > > interval, we only saw a handful of interrupts.
> > > >
> > > > Note that this patch was originally provided by Intel directly to
> > > > AT&T and Vyatta, but unfortunately I am unable to find records of
> > > > the exact author.
> > > >
> > > > We have been using this in production for more than a year.
> > > >
> > > > Fixes: af75078fece3 ("first public release")
> > > > Cc: stable@dpdk.org
> > > >
> > > > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > > > ---
> > > >   drivers/net/ixgbe/base/ixgbe_vf.c | 33 ++++++++++++++++------
> > > > ---------
> > > >   1 file changed, 17 insertions(+), 16 deletions(-)
> > > >
> > > > diff --git a/drivers/net/ixgbe/base/ixgbe_vf.c
> > > > b/drivers/net/ixgbe/base/ixgbe_vf.c
> > > > index 5b25a6b4d4..16086670b1 100644
> > > > --- a/drivers/net/ixgbe/base/ixgbe_vf.c
> > > > +++ b/drivers/net/ixgbe/base/ixgbe_vf.c
> > > > @@ -586,7 +586,6 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw
> > > > *hw, ixgbe_link_speed *speed,
> > > >   	s32 ret_val = IXGBE_SUCCESS;
> > > >   	u32 links_reg;
> > > >   	u32 in_msg = 0;
> > > > -	UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
> > > >
> > > >   	/* If we were hit with a reset drop the link */
> > > >   	if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout) @@
> > > > -643,23
> > > > +642,25 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
> > > > ixgbe_link_speed *speed,
> > > >   		*speed = IXGBE_LINK_SPEED_UNKNOWN;
> > > >   	}
> > > >
> > > > -	/* if the read failed it could just be a mailbox
> > > > collision, best wait
> > > > -	 * until we are called again and don't report an error
> > > > -	 */
> > > > -	if (mbx->ops.read(hw, &in_msg, 1, 0))
> > > > -		goto out;
> > > > +	if (autoneg_wait_to_complete) {
> > > > +		/* if the read failed it could just be a mailbox
> > > > collision, best
> > > > wait
> > > > +		 * until we are called again and don't report an
> > > > error
> > > > +		 */
> > > > +		if (mbx->ops.read(hw, &in_msg, 1, 0))
> > > > +			goto out;
> > > >
> > > > -	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> > > > -		/* msg is not CTS and is NACK we must have lost
> > > > CTS status
> > > > */
> > > > -		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> > > > +		if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> > > > +			/* msg is not CTS and is NACK we must
> > > > have lost CTS
> > > > status */
> > > > +			if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> > > > +				ret_val = -1;
> > > > +			goto out;
> > > > +		}
> > > > +
> > > > +		/* the pf is talking, if we timed out in the
> > > > past we reinit */
> > > > +		if (!mbx->timeout) {
> > > >   			ret_val = -1;
> > > > -		goto out;
> > > > -	}
> > > > -
> > > > -	/* the pf is talking, if we timed out in the past we
> > > > reinit */
> > > > -	if (!mbx->timeout) {
> > > > -		ret_val = -1;
> > > > -		goto out;
> > > > +			goto out;
> > > > +		}
> > > >   	}
> > > >
> > > >   	/* if we passed all the tests above then the link is up and we
> > > > no
> > > > --
> > > > 2.18.0
> 
> --
> Kind regards,
> Luca Boccassi
Zhao1, Wei Nov. 8, 2018, 2:58 a.m. UTC | #6
Hi,Chas Williams


> -----Original Message-----
> From: Chas Williams [mailto:3chas3@gmail.com]
> Sent: Thursday, November 8, 2018 2:55 AM
> To: Zhao1, Wei <wei.zhao1@intel.com>; Luca Boccassi <bluca@debian.org>;
> dev@dpdk.org
> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Ananyev, Konstantin
> <konstantin.ananyev@intel.com>; stable@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH] net/ixgbe: reduce PF mailbox interrupt rate
> 
> 
> 
> On 11/07/2018 04:17 AM, Zhao1, Wei wrote:
> > Hi, Luca Boccassi
> >
> >      The purpose of this patch is to reduce the mailbox interrupt from vf to pf,
> but there seem some point need for discussion in this patch.
> >
> > First, I do not know why do you change code of function
> > ixgbe_check_mac_link_vf(), because in rte_eth_link_get_nowait() and
> rte_eth_link_get(), it will call ixgbe_dev_link_update()-
> >ixgbe_dev_link_update_share()-> ixgbevf_check_link() for VF, NOT
> ixgbe_check_mac_link_vf() in your patch!
> >
> > Second, in function ixgbevf_check_link(), there is mailbox message
> > read operation for vf, " if (mbx->ops.read(hw, &in_msg, 1, 0))", that
> > is ixgbe_read_mbx_vf() , This will cause interrupt from vf to pf, this is just
> the point of this patch, it is also the problem that you want to solve.
> > So, you use autoneg_wait_to_complete flag to control this mailbox
> message read operation, maybe you will use rte_eth_link_get_nowait(),
> Which set autoneg_wait_to_complete = 0, then the interrupt from vf to pf
> can be reduced.
> >
> > But  I do not think this patch is necessary, because in
> > ixgbevf_check_link(), it,has
> 
> I think you are right here. This patch dates to before the addition of the vf
> argument to ixgbe_dev_link_update_share() and the split of .link_update
> between ixgbe and ixgbevf.  At one point, this patch was especially beneficial
> if you were running bonding (which tends to make quite a few link status
> checks).

If you have other idea based on this patch, I am willing to review and ack.

> 
> So this patch probably hasn't been helping at this point.  I will try to get some
> time to locally test this.
> 
> > "
> > bool no_pflink_check = wait_to_complete == 0;
> >
> >                 ////////////////////////
> >
> >                  if (no_pflink_check) {
> >                                  if (*speed == IXGBE_LINK_SPEED_UNKNOWN)
> >                                                  mac->get_link_status = true;
> >                                  else
> >                                                  mac->get_link_status
> > = false;
> >
> >                                  goto out;
> >                  }
> > "
> > Comment of "for a quick link status checking, wait_to_compelet == 0, skip
> PF link status checking " is clear.
> >
> > That means in rte_eth_link_get_nowait(), code will skip this mailbox
> > read interrupt, only in
> > rte_eth_link_get() there will be this interrupt, so I think what you
> > need to is just replace
> > rte_eth_link_get() with rte_eth_link_get_nowait() in your APP, that
> > will reduce interrupt from vf to pf in mailbox read.
> >
> >
> >> -----Original Message-----
> >> From: dev [mailto:dev-bounces@dpdk.org] On Behalf Of Luca Boccassi
> >> Sent: Wednesday, August 15, 2018 10:15 PM
> >> To: dev@dpdk.org
> >> Cc: Lu, Wenzhuo <wenzhuo.lu@intel.com>; Ananyev, Konstantin
> >> <konstantin.ananyev@intel.com>; Luca Boccassi <bluca@debian.org>;
> >> stable@dpdk.org
> >> Subject: [dpdk-dev] [PATCH] net/ixgbe: reduce PF mailbox interrupt
> >> rate
> >>
> >> We have observed high rate of NIC PF interrupts when VNF is using
> >> DPDK APIs rte_eth_link_get_nowait() and rte_eth_link_get() functions,
> >> as they are causing VF driver to send many MBOX ACK messages.
> >>
> >> With these changes, the interrupt rates go down significantly. Here's
> >> some testing results:
> >>
> >> Without the patch:
> >>
> >> $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
> >> /proc/interrupts
> >>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> >> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
> >>    34:         88          0          0          0          0         41         30        509          0          0
> 350
> >> 24         88        114        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
> >>    35:         49         24          0          0         65        130         64         29         67          0
> 10
> >> 0          0         46         38        764   PCI-MSI 1572865-edge      ens1f0-TxRx-1
> >>    36:         53          0          0         64         15         85        132         71        108          0
> >> 30          0        165        215        303        104   PCI-MSI 1572866-edge      ens1f0-
> >> TxRx-2
> >>    37:         46        196          0          0         10         48         62         68         51          0
> 0
> >> 0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
> >>    38:        226          0          0          0        159        145        749        265          0          0
> >> 202          0      69229        166        450          0   PCI-MSI 1572868-edge      ens1f0
> >>    52:         95        896          0          0          0         18         53          0        494          0
> 0
> >> 0          0        265         79        124   PCI-MSI 1574912-edge      ens1f1-TxRx-0
> >>    53:         50          0         18          0         72         33          0        168        330          0
> 0
> >> 0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
> >>    54:         65          0          0          0        239        104        166         49        442          0
> >> 0          0        126         26        307          0   PCI-MSI 1574914-edge      ens1f1-
> TxRx-2
> >>    55:         57          0          0          0        123         35         83         54        157        106
> >> 0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-
> TxRx-3
> >>    56:        232          0      13910          0         16         21          0      54422          0          0
> >> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
> >>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> >> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
> >>    34:         88          0          0          0          0         41         30        509          0          0
> 350
> >> 24         88        119        461        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
> >>    35:         49         24          0          0         65        130         64         29         67          0
> 10
> >> 0          0         46         38        771   PCI-MSI 1572865-edge      ens1f0-TxRx-1
> >>    36:         53          0          0         64         15         85        132         71        108          0
> >> 30          0        165        215        303        113   PCI-MSI 1572866-edge      ens1f0-
> >> TxRx-2
> >>    37:         46        196          0          0         10         48         62         68         56          0
> 0
> >> 0        103         82         54        192   PCI-MSI 1572867-edge      ens1f0-TxRx-3
> >>    38:        226          0          0          0        159        145        749        265          0          0
> >> 202          0      71281        166        450          0   PCI-MSI 1572868-edge      ens1f0
> >>    52:         95        896          0          0          0         18         53          0        494          0
> 0
> >> 0          0        265         79        133   PCI-MSI 1574912-edge      ens1f1-TxRx-0
> >>    53:         50          0         18          0         72         33          0        173        330          0
> 0
> >> 0        141         22         12         65   PCI-MSI 1574913-edge      ens1f1-TxRx-1
> >>    54:         65          0          0          0        239        104        166         49        442          0
> >> 0          0        126         26        312          0   PCI-MSI 1574914-edge      ens1f1-
> TxRx-2
> >>    55:         57          0          0          0        123         35         83         59        157        106
> >> 0          0         26         29        312         97   PCI-MSI 1574915-edge      ens1f1-
> TxRx-3
> >>    56:        232          0      15910          0         16         21          0      54422          0          0
> >> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
> >>
> >> During the 10s interval, CPU2 jumped by 2000 interrupts, CPU12 by
> >> 2051 interrupts, for about 200 interrupts/second. That's on the order
> >> of what we expect. I would have guessed 100/s but perhaps there are
> >> two mailbox messages.
> >>
> >> With the patch:
> >>
> >> $ egrep 'CPU|ens1f' /proc/interrupts ; sleep 10; egrep 'CPU|ens1f'
> >> /proc/interrupts
> >>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> >> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
> >>    34:         88          0          0          0          0         25         19        177          0          0
> 350
> >> 24         88        100        362        559   PCI-MSI 1572864-edge      ens1f0-TxRx-0
> >>    35:         49         19          0          0         65        130         64         29         67          0
> 10
> >> 0          0         46         38        543   PCI-MSI 1572865-edge      ens1f0-TxRx-1
> >>    36:         53          0          0         64         15         53         85         71        108          0
> 24
> >> 0         85        215        292         31   PCI-MSI 1572866-edge      ens1f0-TxRx-2
> >>    37:         46        196          0          0         10         43         57         39         19          0
> 0
> >> 0         78         69         49        149   PCI-MSI 1572867-edge      ens1f0-TxRx-3
> >>    38:        226          0          0          0        159        145        749        247          0          0
> >> 202          0      58250          0        450          0   PCI-MSI 1572868-edge      ens1f0
> >>    52:         95        896          0          0          0         18         53          0        189          0
> 0
> >> 0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
> >>    53:         50          0         18          0         72         33          0         90        330          0
> 0
> >> 0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
> >>    54:         65          0          0          0         10        104        166         49        442          0
> 0
> >> 0        126         26        226          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
> >>    55:         57          0          0          0         61         35         83         30        157        101
> 0
> >> 0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
> >>    56:        232          0       2062          0         16         21          0      54422          0          0
> >> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
> >>              CPU0       CPU1       CPU2       CPU3       CPU4       CPU5       CPU6       CPU7
> >> CPU8       CPU9       CPU10      CPU11      CPU12      CPU13      CPU14      CPU15
> >>    34:         88          0          0          0          0         25         19        177          0          0
> 350
> >> 24         88        102        362        562   PCI-MSI 1572864-edge      ens1f0-TxRx-0
> >>    35:         49         19          0          0         65        130         64         29         67          0
> 10
> >> 0          0         46         38        548   PCI-MSI 1572865-edge      ens1f0-TxRx-1
> >>    36:         53          0          0         64         15         53         85         71        108          0
> 24
> >> 0         85        215        292         36   PCI-MSI 1572866-edge      ens1f0-TxRx-2
> >>    37:         46        196          0          0         10         45         57         39         19          0
> 0
> >> 0         78         69         49        152   PCI-MSI 1572867-edge      ens1f0-TxRx-3
> >>    38:        226          0          0          0        159        145        749        247          0          0
> >> 202          0      58259          0        450          0   PCI-MSI 1572868-edge      ens1f0
> >>    52:         95        896          0          0          0         18         53          0        194          0
> 0
> >> 0          0        265         79         25   PCI-MSI 1574912-edge      ens1f1-TxRx-0
> >>    53:         50          0         18          0         72         33          0         95        330          0
> 0
> >> 0        136          5         12          0   PCI-MSI 1574913-edge      ens1f1-TxRx-1
> >>    54:         65          0          0          0         10        104        166         49        442          0
> 0
> >> 0        126         26        231          0   PCI-MSI 1574914-edge      ens1f1-TxRx-2
> >>    55:         57          0          0          0         66         35         83         30        157        101
> 0
> >> 0         26         15        312          0   PCI-MSI 1574915-edge      ens1f1-TxRx-3
> >>    56:        232          0       2071          0         16         21          0      54422          0          0
> >> 0         24         25          0         78          0   PCI-MSI 1574916-edge      ens1f1
> >>
> >> Note the interrupt rate has gone way down. During the 10s interval,
> >> we only saw a handful of interrupts.
> >>
> >> Note that this patch was originally provided by Intel directly to
> >> AT&T and Vyatta, but unfortunately I am unable to find records of the
> exact author.
> >>
> >> We have been using this in production for more than a year.
> >>
> >> Fixes: af75078fece3 ("first public release")
> >> Cc: stable@dpdk.org
> >>
> >> Signed-off-by: Luca Boccassi <bluca@debian.org>
> >> ---
> >>   drivers/net/ixgbe/base/ixgbe_vf.c | 33 ++++++++++++++++---------------
> >>   1 file changed, 17 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/drivers/net/ixgbe/base/ixgbe_vf.c
> >> b/drivers/net/ixgbe/base/ixgbe_vf.c
> >> index 5b25a6b4d4..16086670b1 100644
> >> --- a/drivers/net/ixgbe/base/ixgbe_vf.c
> >> +++ b/drivers/net/ixgbe/base/ixgbe_vf.c
> >> @@ -586,7 +586,6 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw
> *hw,
> >> ixgbe_link_speed *speed,
> >>   	s32 ret_val = IXGBE_SUCCESS;
> >>   	u32 links_reg;
> >>   	u32 in_msg = 0;
> >> -	UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
> >>
> >>   	/* If we were hit with a reset drop the link */
> >>   	if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout) @@ -643,23
> >> +642,25 @@ s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw,
> >> ixgbe_link_speed *speed,
> >>   		*speed = IXGBE_LINK_SPEED_UNKNOWN;
> >>   	}
> >>
> >> -	/* if the read failed it could just be a mailbox collision, best wait
> >> -	 * until we are called again and don't report an error
> >> -	 */
> >> -	if (mbx->ops.read(hw, &in_msg, 1, 0))
> >> -		goto out;
> >> +	if (autoneg_wait_to_complete) {
> >> +		/* if the read failed it could just be a mailbox collision, best
> >> wait
> >> +		 * until we are called again and don't report an error
> >> +		 */
> >> +		if (mbx->ops.read(hw, &in_msg, 1, 0))
> >> +			goto out;
> >>
> >> -	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> >> -		/* msg is not CTS and is NACK we must have lost CTS status
> >> */
> >> -		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> >> +		if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
> >> +			/* msg is not CTS and is NACK we must have lost CTS
> >> status */
> >> +			if (in_msg & IXGBE_VT_MSGTYPE_NACK)
> >> +				ret_val = -1;
> >> +			goto out;
> >> +		}
> >> +
> >> +		/* the pf is talking, if we timed out in the past we reinit */
> >> +		if (!mbx->timeout) {
> >>   			ret_val = -1;
> >> -		goto out;
> >> -	}
> >> -
> >> -	/* the pf is talking, if we timed out in the past we reinit */
> >> -	if (!mbx->timeout) {
> >> -		ret_val = -1;
> >> -		goto out;
> >> +			goto out;
> >> +		}
> >>   	}
> >>
> >>   	/* if we passed all the tests above then the link is up and we no
> >> --
> >> 2.18.0
> >

Patch
diff mbox series

diff --git a/drivers/net/ixgbe/base/ixgbe_vf.c b/drivers/net/ixgbe/base/ixgbe_vf.c
index 5b25a6b4d4..16086670b1 100644
--- a/drivers/net/ixgbe/base/ixgbe_vf.c
+++ b/drivers/net/ixgbe/base/ixgbe_vf.c
@@ -586,7 +586,6 @@  s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
 	s32 ret_val = IXGBE_SUCCESS;
 	u32 links_reg;
 	u32 in_msg = 0;
-	UNREFERENCED_1PARAMETER(autoneg_wait_to_complete);
 
 	/* If we were hit with a reset drop the link */
 	if (!mbx->ops.check_for_rst(hw, 0) || !mbx->timeout)
@@ -643,23 +642,25 @@  s32 ixgbe_check_mac_link_vf(struct ixgbe_hw *hw, ixgbe_link_speed *speed,
 		*speed = IXGBE_LINK_SPEED_UNKNOWN;
 	}
 
-	/* if the read failed it could just be a mailbox collision, best wait
-	 * until we are called again and don't report an error
-	 */
-	if (mbx->ops.read(hw, &in_msg, 1, 0))
-		goto out;
+	if (autoneg_wait_to_complete) {
+		/* if the read failed it could just be a mailbox collision, best wait
+		 * until we are called again and don't report an error
+		 */
+		if (mbx->ops.read(hw, &in_msg, 1, 0))
+			goto out;
 
-	if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
-		/* msg is not CTS and is NACK we must have lost CTS status */
-		if (in_msg & IXGBE_VT_MSGTYPE_NACK)
+		if (!(in_msg & IXGBE_VT_MSGTYPE_CTS)) {
+			/* msg is not CTS and is NACK we must have lost CTS status */
+			if (in_msg & IXGBE_VT_MSGTYPE_NACK)
+				ret_val = -1;
+			goto out;
+		}
+
+		/* the pf is talking, if we timed out in the past we reinit */
+		if (!mbx->timeout) {
 			ret_val = -1;
-		goto out;
-	}
-
-	/* the pf is talking, if we timed out in the past we reinit */
-	if (!mbx->timeout) {
-		ret_val = -1;
-		goto out;
+			goto out;
+		}
 	}
 
 	/* if we passed all the tests above then the link is up and we no