[v8,2/4] net/i40e: use WC store to update queue tail registers

Message ID 1594982985-31551-3-git-send-email-radu.nicolau@intel.com (mailing list archive)
State Superseded, archived
Headers
Series eal: add WC store functions |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Radu Nicolau July 17, 2020, 10:49 a.m. UTC
  Performance improvement: use a write combining store
instead of a regular mmio write to update queue tail
registers.

Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
Acked-by: Bruce Richardson <bruce.richardson@intel.com>
---
 drivers/net/i40e/base/i40e_osdep.h    | 5 +++++
 drivers/net/i40e/i40e_rxtx.c          | 8 ++++----
 drivers/net/i40e/i40e_rxtx_vec_avx2.c | 4 ++--
 drivers/net/i40e/i40e_rxtx_vec_sse.c  | 4 ++--
 4 files changed, 13 insertions(+), 8 deletions(-)
  

Comments

Ruifeng Wang July 20, 2020, 6:46 a.m. UTC | #1
> -----Original Message-----
> From: Radu Nicolau <radu.nicolau@intel.com>
> Sent: Friday, July 17, 2020 6:50 PM
> To: dev@dpdk.org
> Cc: beilei.xing@intel.com; jia.guo@intel.com; bruce.richardson@intel.com;
> konstantin.ananyev@intel.com; jerinjacobk@gmail.com;
> david.marchand@redhat.com; fiona.trahe@intel.com; wei.zhao1@intel.com;
> Ruifeng Wang <Ruifeng.Wang@arm.com>; Radu Nicolau
> <radu.nicolau@intel.com>
> Subject: [PATCH v8 2/4] net/i40e: use WC store to update queue tail registers
> 
> Performance improvement: use a write combining store instead of a regular
> mmio write to update queue tail registers.
> 
> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
> ---
>  drivers/net/i40e/base/i40e_osdep.h    | 5 +++++
>  drivers/net/i40e/i40e_rxtx.c          | 8 ++++----
>  drivers/net/i40e/i40e_rxtx_vec_avx2.c | 4 ++--
> drivers/net/i40e/i40e_rxtx_vec_sse.c  | 4 ++--
>  4 files changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/net/i40e/base/i40e_osdep.h
> b/drivers/net/i40e/base/i40e_osdep.h
> index 58be396..69ab717 100644
> --- a/drivers/net/i40e/base/i40e_osdep.h
> +++ b/drivers/net/i40e/base/i40e_osdep.h
> @@ -138,6 +138,11 @@ static inline uint32_t i40e_read_addr(volatile void
> *addr)
>  #define I40E_PCI_REG_WRITE_RELAXED(reg, value)	\
>  	rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
> 
> +#define I40E_PCI_REG_WC_WRITE(queue, reg, value) \

'queue' is not necessary since it will not be used. It can be removed?

Thanks.
/Ruifeng
> +	rte_write32_wc((rte_cpu_to_le_32(value)), reg) #define
> +I40E_PCI_REG_WC_WRITE_RELAXED(queue, reg, value) \
> +	rte_write32_wc_relaxed((rte_cpu_to_le_32(value)), reg)
> +
>  #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT)
> #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT)
> 
> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
> 840b6f3..64e43ac 100644
> --- a/drivers/net/i40e/i40e_rxtx.c
> +++ b/drivers/net/i40e/i40e_rxtx.c
> @@ -760,7 +760,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts, uint16_t nb_pkts)
>  	if (nb_hold > rxq->rx_free_thresh) {
>  		rx_id = (uint16_t) ((rx_id == 0) ?
>  			(rxq->nb_rx_desc - 1) : (rx_id - 1));
> -		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
> +		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>  		nb_hold = 0;
>  	}
>  	rxq->nb_rx_hold = nb_hold;
> @@ -938,7 +938,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
>  	if (nb_hold > rxq->rx_free_thresh) {
>  		rx_id = (uint16_t)(rx_id == 0 ?
>  			(rxq->nb_rx_desc - 1) : (rx_id - 1));
> -		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
> +		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>  		nb_hold = 0;
>  	}
>  	rxq->nb_rx_hold = nb_hold;
> @@ -1249,7 +1249,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
>  		   (unsigned) tx_id, (unsigned) nb_tx);
> 
>  	rte_cio_wmb();
> -	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
> +	I40E_PCI_REG_WC_WRITE_RELAXED(txq, txq->qtx_tail, tx_id);
>  	txq->tx_tail = tx_id;
> 
>  	return nb_tx;
> @@ -1400,7 +1400,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq,
>  		txq->tx_tail = 0;
> 
>  	/* Update the tx tail register */
> -	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
> +	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
> 
>  	return nb_pkts;
>  }
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> index 3bcef13..294c1c4 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> +++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
> @@ -134,7 +134,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
>  			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
> 
>  	/* Update the tail pointer on the NIC */
> -	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
> +	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>  }
> 
>  #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
> @@ -921,7 +921,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue,
> struct rte_mbuf **tx_pkts,
> 
>  	txq->tx_tail = tx_id;
> 
> -	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
> +	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
> 
>  	return nb_pkts;
>  }
> diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c
> b/drivers/net/i40e/i40e_rxtx_vec_sse.c
> index 6985183..a4635e0 100644
> --- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
> +++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
> @@ -86,7 +86,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
>  			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
> 
>  	/* Update the tail pointer on the NIC */
> -	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
> +	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>  }
> 
>  #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
> @@ -733,7 +733,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct
> rte_mbuf **tx_pkts,
> 
>  	txq->tx_tail = tx_id;
> 
> -	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
> +	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
> 
>  	return nb_pkts;
>  }
> --
> 2.7.4
  
Radu Nicolau July 20, 2020, 8:54 a.m. UTC | #2
On 7/20/2020 7:46 AM, Ruifeng Wang wrote:
>> -----Original Message-----
>> From: Radu Nicolau <radu.nicolau@intel.com>
>> Sent: Friday, July 17, 2020 6:50 PM
>> To: dev@dpdk.org
>> Cc: beilei.xing@intel.com; jia.guo@intel.com; bruce.richardson@intel.com;
>> konstantin.ananyev@intel.com; jerinjacobk@gmail.com;
>> david.marchand@redhat.com; fiona.trahe@intel.com; wei.zhao1@intel.com;
>> Ruifeng Wang <Ruifeng.Wang@arm.com>; Radu Nicolau
>> <radu.nicolau@intel.com>
>> Subject: [PATCH v8 2/4] net/i40e: use WC store to update queue tail registers
>>
>> Performance improvement: use a write combining store instead of a regular
>> mmio write to update queue tail registers.
>>
>> Signed-off-by: Radu Nicolau <radu.nicolau@intel.com>
>> Acked-by: Bruce Richardson <bruce.richardson@intel.com>
>> ---
>>   drivers/net/i40e/base/i40e_osdep.h    | 5 +++++
>>   drivers/net/i40e/i40e_rxtx.c          | 8 ++++----
>>   drivers/net/i40e/i40e_rxtx_vec_avx2.c | 4 ++--
>> drivers/net/i40e/i40e_rxtx_vec_sse.c  | 4 ++--
>>   4 files changed, 13 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/net/i40e/base/i40e_osdep.h
>> b/drivers/net/i40e/base/i40e_osdep.h
>> index 58be396..69ab717 100644
>> --- a/drivers/net/i40e/base/i40e_osdep.h
>> +++ b/drivers/net/i40e/base/i40e_osdep.h
>> @@ -138,6 +138,11 @@ static inline uint32_t i40e_read_addr(volatile void
>> *addr)
>>   #define I40E_PCI_REG_WRITE_RELAXED(reg, value)	\
>>   	rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
>>
>> +#define I40E_PCI_REG_WC_WRITE(queue, reg, value) \
> 'queue' is not necessary since it will not be used. It can be removed?
Yes, I will remove it - in the first version we had a flag in the queue 
struct, and this macro was not properly updated.
>
> Thanks.
> /Ruifeng
>> +	rte_write32_wc((rte_cpu_to_le_32(value)), reg) #define
>> +I40E_PCI_REG_WC_WRITE_RELAXED(queue, reg, value) \
>> +	rte_write32_wc_relaxed((rte_cpu_to_le_32(value)), reg)
>> +
>>   #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT)
>> #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT)
>>
>> diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c index
>> 840b6f3..64e43ac 100644
>> --- a/drivers/net/i40e/i40e_rxtx.c
>> +++ b/drivers/net/i40e/i40e_rxtx.c
>> @@ -760,7 +760,7 @@ i40e_recv_pkts(void *rx_queue, struct rte_mbuf
>> **rx_pkts, uint16_t nb_pkts)
>>   	if (nb_hold > rxq->rx_free_thresh) {
>>   		rx_id = (uint16_t) ((rx_id == 0) ?
>>   			(rxq->nb_rx_desc - 1) : (rx_id - 1));
>> -		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
>> +		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>>   		nb_hold = 0;
>>   	}
>>   	rxq->nb_rx_hold = nb_hold;
>> @@ -938,7 +938,7 @@ i40e_recv_scattered_pkts(void *rx_queue,
>>   	if (nb_hold > rxq->rx_free_thresh) {
>>   		rx_id = (uint16_t)(rx_id == 0 ?
>>   			(rxq->nb_rx_desc - 1) : (rx_id - 1));
>> -		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
>> +		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>>   		nb_hold = 0;
>>   	}
>>   	rxq->nb_rx_hold = nb_hold;
>> @@ -1249,7 +1249,7 @@ i40e_xmit_pkts(void *tx_queue, struct rte_mbuf
>> **tx_pkts, uint16_t nb_pkts)
>>   		   (unsigned) tx_id, (unsigned) nb_tx);
>>
>>   	rte_cio_wmb();
>> -	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
>> +	I40E_PCI_REG_WC_WRITE_RELAXED(txq, txq->qtx_tail, tx_id);
>>   	txq->tx_tail = tx_id;
>>
>>   	return nb_tx;
>> @@ -1400,7 +1400,7 @@ tx_xmit_pkts(struct i40e_tx_queue *txq,
>>   		txq->tx_tail = 0;
>>
>>   	/* Update the tx tail register */
>> -	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
>> +	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
>>
>>   	return nb_pkts;
>>   }
>> diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
>> b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
>> index 3bcef13..294c1c4 100644
>> --- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
>> +++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
>> @@ -134,7 +134,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
>>   			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
>>
>>   	/* Update the tail pointer on the NIC */
>> -	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
>> +	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>>   }
>>
>>   #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
>> @@ -921,7 +921,7 @@ i40e_xmit_fixed_burst_vec_avx2(void *tx_queue,
>> struct rte_mbuf **tx_pkts,
>>
>>   	txq->tx_tail = tx_id;
>>
>> -	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
>> +	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
>>
>>   	return nb_pkts;
>>   }
>> diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c
>> b/drivers/net/i40e/i40e_rxtx_vec_sse.c
>> index 6985183..a4635e0 100644
>> --- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
>> +++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
>> @@ -86,7 +86,7 @@ i40e_rxq_rearm(struct i40e_rx_queue *rxq)
>>   			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
>>
>>   	/* Update the tail pointer on the NIC */
>> -	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
>> +	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
>>   }
>>
>>   #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
>> @@ -733,7 +733,7 @@ i40e_xmit_fixed_burst_vec(void *tx_queue, struct
>> rte_mbuf **tx_pkts,
>>
>>   	txq->tx_tail = tx_id;
>>
>> -	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
>> +	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
>>
>>   	return nb_pkts;
>>   }
>> --
>> 2.7.4
  

Patch

diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
index 58be396..69ab717 100644
--- a/drivers/net/i40e/base/i40e_osdep.h
+++ b/drivers/net/i40e/base/i40e_osdep.h
@@ -138,6 +138,11 @@  static inline uint32_t i40e_read_addr(volatile void *addr)
 #define I40E_PCI_REG_WRITE_RELAXED(reg, value)	\
 	rte_write32_relaxed((rte_cpu_to_le_32(value)), reg)
 
+#define I40E_PCI_REG_WC_WRITE(queue, reg, value) \
+	rte_write32_wc((rte_cpu_to_le_32(value)), reg)
+#define I40E_PCI_REG_WC_WRITE_RELAXED(queue, reg, value) \
+	rte_write32_wc_relaxed((rte_cpu_to_le_32(value)), reg)
+
 #define I40E_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_GLGEN_STAT)
 #define I40EVF_WRITE_FLUSH(a) I40E_READ_REG(a, I40E_VFGEN_RSTAT)
 
diff --git a/drivers/net/i40e/i40e_rxtx.c b/drivers/net/i40e/i40e_rxtx.c
index 840b6f3..64e43ac 100644
--- a/drivers/net/i40e/i40e_rxtx.c
+++ b/drivers/net/i40e/i40e_rxtx.c
@@ -760,7 +760,7 @@  i40e_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
 	if (nb_hold > rxq->rx_free_thresh) {
 		rx_id = (uint16_t) ((rx_id == 0) ?
 			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -938,7 +938,7 @@  i40e_recv_scattered_pkts(void *rx_queue,
 	if (nb_hold > rxq->rx_free_thresh) {
 		rx_id = (uint16_t)(rx_id == 0 ?
 			(rxq->nb_rx_desc - 1) : (rx_id - 1));
-		I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+		I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 		nb_hold = 0;
 	}
 	rxq->nb_rx_hold = nb_hold;
@@ -1249,7 +1249,7 @@  i40e_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
 		   (unsigned) tx_id, (unsigned) nb_tx);
 
 	rte_cio_wmb();
-	I40E_PCI_REG_WRITE_RELAXED(txq->qtx_tail, tx_id);
+	I40E_PCI_REG_WC_WRITE_RELAXED(txq, txq->qtx_tail, tx_id);
 	txq->tx_tail = tx_id;
 
 	return nb_tx;
@@ -1400,7 +1400,7 @@  tx_xmit_pkts(struct i40e_tx_queue *txq,
 		txq->tx_tail = 0;
 
 	/* Update the tx tail register */
-	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
 
 	return nb_pkts;
 }
diff --git a/drivers/net/i40e/i40e_rxtx_vec_avx2.c b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
index 3bcef13..294c1c4 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_avx2.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_avx2.c
@@ -134,7 +134,7 @@  i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -921,7 +921,7 @@  i40e_xmit_fixed_burst_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	txq->tx_tail = tx_id;
 
-	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
 
 	return nb_pkts;
 }
diff --git a/drivers/net/i40e/i40e_rxtx_vec_sse.c b/drivers/net/i40e/i40e_rxtx_vec_sse.c
index 6985183..a4635e0 100644
--- a/drivers/net/i40e/i40e_rxtx_vec_sse.c
+++ b/drivers/net/i40e/i40e_rxtx_vec_sse.c
@@ -86,7 +86,7 @@  i40e_rxq_rearm(struct i40e_rx_queue *rxq)
 			     (rxq->nb_rx_desc - 1) : (rxq->rxrearm_start - 1));
 
 	/* Update the tail pointer on the NIC */
-	I40E_PCI_REG_WRITE(rxq->qrx_tail, rx_id);
+	I40E_PCI_REG_WC_WRITE(rxq, rxq->qrx_tail, rx_id);
 }
 
 #ifndef RTE_LIBRTE_I40E_16BYTE_RX_DESC
@@ -733,7 +733,7 @@  i40e_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
 
 	txq->tx_tail = tx_id;
 
-	I40E_PCI_REG_WRITE(txq->qtx_tail, txq->tx_tail);
+	I40E_PCI_REG_WC_WRITE(txq, txq->qtx_tail, txq->tx_tail);
 
 	return nb_pkts;
 }