[v3] net/pcap: support buffer size parameter

Message ID 20210828094751.1955-1-chenqiming_huawei@163.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series [v3] net/pcap: support buffer size parameter |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/github-robot: build success github build: passed
ci/iol-broadcom-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-broadcom-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Qiming Chen Aug. 28, 2021, 9:47 a.m. UTC
  When the pcap port is probed, the size of the pcap message buffer is not
set, the default is 2M, and then this value has a great impact on the
message forwarding performance. Therefore, parameters are provided for
users to set.

In order to pass the buffer size parameter parsed by the probe to the
start function, the buf_size member variable is added to the
struct pmd_process_private structure. At the same time, for the uniform
code style, the buf_size member variable is also added to the
struct pmd_devargs structure, which is used by the probe function.

Signed-off-by: Qiming Chen <chenqiming_huawei@163.com>
---
v2:
  Clear coding style warning.
v3:
  When buf_size=0, the modification keeps the old implementation unchanged.
---
 drivers/net/pcap/pcap_ethdev.c | 78 +++++++++++++++++++++++++++++-----
 1 file changed, 68 insertions(+), 10 deletions(-)
  

Comments

Ferruh Yigit Sept. 14, 2021, 1:39 p.m. UTC | #1
On 8/28/2021 10:47 AM, Qiming Chen wrote:
> When the pcap port is probed, the size of the pcap message buffer is not
> set, the default is 2M, and then this value has a great impact on the
> message forwarding performance. Therefore, parameters are provided for
> users to set.
> 

Hi Qiming,

I assume you suggest buffer should be set bigger than 2M for better performance.
I can see following description for "pcap message buffer" [1], I am not clear
why this impacts the performance, can you please clarify?
If the producer rate is higher than consumer rate, performance would be same but
bigger buffer only delays the packet drops. It may only help on the case
producer has peaks, but still not sure why it increase the performance.
I did quick checks and not observed any performance improvement, can you please
detail your usecase?


Another concern is below description mentions "On some platforms, the buffer's
size can be set". Pcap PMD now supports Windows too (cc'ed Dmitry), I wonder if
this features is supported on Windows?


[1]
buffer size
Packets that arrive for a capture are stored in a buffer, so that they do not
have to be read by the application as soon as they arrive. On some platforms,
the buffer's size can be set; a size that's too small could mean that, if too
many packets are being captured and the snapshot length doesn't limit the amount
of data that's buffered, packets could be dropped if the buffer fills up before
the application can read packets from it, while a size that's too large could
use more non-pageable operating system memory than is necessary to prevent
packets from being dropped.
The buffer size is set with pcap_set_buffer_size().


> In order to pass the buffer size parameter parsed by the probe to the
> start function, the buf_size member variable is added to the
> struct pmd_process_private structure. At the same time, for the uniform
> code style, the buf_size member variable is also added to the
> struct pmd_devargs structure, which is used by the probe function.
> 

Why added to process_private data, but not to 'struct pmd_internals'. Process
private data is for the variables that will be different for primary and
secondary process.

> Signed-off-by: Qiming Chen <chenqiming_huawei@163.com>
> ---
> v2:
>   Clear coding style warning.
> v3:
>   When buf_size=0, the modification keeps the old implementation unchanged.
> ---
>  drivers/net/pcap/pcap_ethdev.c | 78 +++++++++++++++++++++++++++++-----
>  1 file changed, 68 insertions(+), 10 deletions(-)

Documentation also needs to be updated: 'doc/guides/nics/pcap_ring.rst'

> 
> diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
> index a8774b7a43..fdc74313d5 100644
> --- a/drivers/net/pcap/pcap_ethdev.c
> +++ b/drivers/net/pcap/pcap_ethdev.c
> @@ -33,6 +33,7 @@
>  #define ETH_PCAP_IFACE_ARG    "iface"
>  #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
>  #define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
> +#define ETH_PCAP_BUF_SIZE_ARG "buf_size"
>  
>  #define ETH_PCAP_ARG_MAXLEN	64
>  
> @@ -98,6 +99,7 @@ struct pmd_process_private {
>  	pcap_t *rx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
>  	pcap_t *tx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
>  	pcap_dumper_t *tx_dumper[RTE_PMD_PCAP_MAX_QUEUES];
> +	int buf_size;
>  };
>  
>  struct pmd_devargs {
> @@ -109,6 +111,7 @@ struct pmd_devargs {
>  		const char *type;
>  	} queue[RTE_PMD_PCAP_MAX_QUEUES];
>  	int phy_mac;
> +	int buf_size;
>  };
>  
>  struct pmd_devargs_all {
> @@ -131,6 +134,7 @@ static const char *valid_arguments[] = {
>  	ETH_PCAP_IFACE_ARG,
>  	ETH_PCAP_PHY_MAC_ARG,
>  	ETH_PCAP_INFINITE_RX_ARG,
> +	ETH_PCAP_BUF_SIZE_ARG,
>  	NULL
>  };
>  
> @@ -521,11 +525,46 @@ open_iface_live(const char *iface, pcap_t **pcap) {
>  }
>  
>  static int
> -open_single_iface(const char *iface, pcap_t **pcap)
> +open_single_iface(const char *iface, int buf_size, pcap_t **pcap)
>  {
> -	if (open_iface_live(iface, pcap) < 0) {
> -		PMD_LOG(ERR, "Couldn't open interface %s", iface);
> -		return -1;
> +	if (buf_size == 0) {
> +		if (open_iface_live(iface, pcap) < 0) {
> +			PMD_LOG(ERR, "Couldn't open interface %s", iface);
> +			return -1;
> +		}
> +	} else {
> +		pcap_t *p = pcap_create(iface, errbuf);
> +		if (p == NULL) {
> +			PMD_LOG(ERR, "Couldn't create %s pcap", iface);
> +			return -1;
> +		}
> +
> +		if (pcap_set_snaplen(p, RTE_ETH_PCAP_SNAPLEN) < 0) {
> +			PMD_LOG(ERR, "Couldn't set %s pcap snaplen", iface);
> +			return -1;
> +		}
> +
> +		if (pcap_set_promisc(p, RTE_ETH_PCAP_PROMISC) < 0) {
> +			PMD_LOG(ERR, "Couldn't set %s pcap promisc", iface);
> +			return -1;
> +		}
> +
> +		if (pcap_set_timeout(p, RTE_ETH_PCAP_TIMEOUT) < 0) {
> +			PMD_LOG(ERR, "Couldn't set %s pcap timeout", iface);
> +			return -1;
> +		}
> +
> +		if (pcap_set_buffer_size(p, buf_size) < 0) {
> +			PMD_LOG(ERR, "Couldn't set %s pcap buffer size(%d)", iface, buf_size);
> +			return -1;
> +		}
> +
> +		if (pcap_activate(p) < 0) {
> +			PMD_LOG(ERR, "Couldn't activate %s pcap", iface);
> +			return -1;
> +		}
> +
> +		*pcap = p;
>  	}
>  
>  	return 0;
> @@ -608,7 +647,7 @@ eth_dev_start(struct rte_eth_dev *dev)
>  
>  		if (!pp->tx_pcap[0] &&
>  			strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
> -			if (open_single_iface(tx->name, &pp->tx_pcap[0]) < 0)
> +			if (open_single_iface(tx->name, pp->buf_size, &pp->tx_pcap[0]) < 0)
>  				return -1;
>  			pp->rx_pcap[0] = pp->tx_pcap[0];
>  		}
> @@ -627,7 +666,7 @@ eth_dev_start(struct rte_eth_dev *dev)
>  				return -1;
>  		} else if (!pp->tx_pcap[i] &&
>  				strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
> -			if (open_single_iface(tx->name, &pp->tx_pcap[i]) < 0)
> +			if (open_single_iface(tx->name, pp->buf_size, &pp->tx_pcap[i]) < 0)

This is when the argument is 'tx_iface=eth0', why we are still passing the
buffer size when having an handler for Tx? Is the buffer for both Rx & Tx?
Isn't this buffer size parameter only for 'iface=...' argument?

>  				return -1;
>  		}
>  	}
> @@ -643,7 +682,7 @@ eth_dev_start(struct rte_eth_dev *dev)
>  			if (open_single_rx_pcap(rx->name, &pp->rx_pcap[i]) < 0)
>  				return -1;
>  		} else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
> -			if (open_single_iface(rx->name, &pp->rx_pcap[i]) < 0)
> +			if (open_single_iface(rx->name, pp->buf_size, &pp->rx_pcap[i]) < 0)
>  				return -1;
>  		}
>  	}
> @@ -1072,7 +1111,7 @@ open_rx_tx_iface(const char *key, const char *value, void *extra_args)
>  	struct pmd_devargs *tx = extra_args;
>  	pcap_t *pcap = NULL;
>  
> -	if (open_single_iface(iface, &pcap) < 0)
> +	if (open_single_iface(iface, tx->buf_size, &pcap) < 0)
>  		return -1;
>  
>  	tx->queue[0].pcap = pcap;
> @@ -1104,7 +1143,7 @@ open_iface(const char *key, const char *value, void *extra_args)
>  	struct pmd_devargs *pmd = extra_args;
>  	pcap_t *pcap = NULL;
>  
> -	if (open_single_iface(iface, &pcap) < 0)
> +	if (open_single_iface(iface, pmd->buf_size, &pcap) < 0)
>  		return -1;
>  	if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
>  		pcap_close(pcap);
> @@ -1154,6 +1193,15 @@ open_tx_iface(const char *key, const char *value, void *extra_args)
>  	return open_iface(key, value, extra_args);
>  }
>  
> +static int
> +select_buf_size(const char *key __rte_unused, const char *value,
> +		void *extra_args)
> +{
> +	if (extra_args)
> +		*(int *)extra_args = atoi(value);
> +	return 0;
> +}
> +
>  static int
>  select_phy_mac(const char *key __rte_unused, const char *value,
>  		void *extra_args)
> @@ -1413,6 +1461,13 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>  			return -1;
>  	}
>  
> +	if (rte_kvargs_count(kvlist, ETH_PCAP_BUF_SIZE_ARG) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_PCAP_BUF_SIZE_ARG,
> +				&select_buf_size, &pcaps.buf_size);
> +		if (ret < 0)
> +			goto free_kvlist;
> +	}
> +
>  	/*
>  	 * If iface argument is passed we open the NICs and use them for
>  	 * reading / writing
> @@ -1456,6 +1511,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>  	devargs_all.is_tx_iface =
>  		rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG) ? 1 : 0;
>  	dumpers.num_of_queue = 0;
> +	dumpers.buf_size = pcaps.buf_size;
>  
>  	if (devargs_all.is_rx_pcap) {
>  		/*
> @@ -1562,6 +1618,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>  			pp->tx_dumper[i] = dumpers.queue[i].dumper;
>  			pp->tx_pcap[i] = dumpers.queue[i].pcap;
>  		}
> +		pp->buf_size = pcaps.buf_size;
>  

This is inside the seconday proccess branch, process private seems not updated
at all for the primary process.

>  		eth_dev->process_private = pp;
>  		eth_dev->rx_pkt_burst = eth_pcap_rx;
> @@ -1618,4 +1675,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
>  	ETH_PCAP_TX_IFACE_ARG "=<ifc> "
>  	ETH_PCAP_IFACE_ARG "=<ifc> "
>  	ETH_PCAP_PHY_MAC_ARG "=<int>"
> -	ETH_PCAP_INFINITE_RX_ARG "=<0|1>");
> +	ETH_PCAP_INFINITE_RX_ARG "=<0|1>"
> +	ETH_PCAP_BUF_SIZE_ARG "=<int>");
>
  

Patch

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index a8774b7a43..fdc74313d5 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -33,6 +33,7 @@ 
 #define ETH_PCAP_IFACE_ARG    "iface"
 #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
 #define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
+#define ETH_PCAP_BUF_SIZE_ARG "buf_size"
 
 #define ETH_PCAP_ARG_MAXLEN	64
 
@@ -98,6 +99,7 @@  struct pmd_process_private {
 	pcap_t *rx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
 	pcap_t *tx_pcap[RTE_PMD_PCAP_MAX_QUEUES];
 	pcap_dumper_t *tx_dumper[RTE_PMD_PCAP_MAX_QUEUES];
+	int buf_size;
 };
 
 struct pmd_devargs {
@@ -109,6 +111,7 @@  struct pmd_devargs {
 		const char *type;
 	} queue[RTE_PMD_PCAP_MAX_QUEUES];
 	int phy_mac;
+	int buf_size;
 };
 
 struct pmd_devargs_all {
@@ -131,6 +134,7 @@  static const char *valid_arguments[] = {
 	ETH_PCAP_IFACE_ARG,
 	ETH_PCAP_PHY_MAC_ARG,
 	ETH_PCAP_INFINITE_RX_ARG,
+	ETH_PCAP_BUF_SIZE_ARG,
 	NULL
 };
 
@@ -521,11 +525,46 @@  open_iface_live(const char *iface, pcap_t **pcap) {
 }
 
 static int
-open_single_iface(const char *iface, pcap_t **pcap)
+open_single_iface(const char *iface, int buf_size, pcap_t **pcap)
 {
-	if (open_iface_live(iface, pcap) < 0) {
-		PMD_LOG(ERR, "Couldn't open interface %s", iface);
-		return -1;
+	if (buf_size == 0) {
+		if (open_iface_live(iface, pcap) < 0) {
+			PMD_LOG(ERR, "Couldn't open interface %s", iface);
+			return -1;
+		}
+	} else {
+		pcap_t *p = pcap_create(iface, errbuf);
+		if (p == NULL) {
+			PMD_LOG(ERR, "Couldn't create %s pcap", iface);
+			return -1;
+		}
+
+		if (pcap_set_snaplen(p, RTE_ETH_PCAP_SNAPLEN) < 0) {
+			PMD_LOG(ERR, "Couldn't set %s pcap snaplen", iface);
+			return -1;
+		}
+
+		if (pcap_set_promisc(p, RTE_ETH_PCAP_PROMISC) < 0) {
+			PMD_LOG(ERR, "Couldn't set %s pcap promisc", iface);
+			return -1;
+		}
+
+		if (pcap_set_timeout(p, RTE_ETH_PCAP_TIMEOUT) < 0) {
+			PMD_LOG(ERR, "Couldn't set %s pcap timeout", iface);
+			return -1;
+		}
+
+		if (pcap_set_buffer_size(p, buf_size) < 0) {
+			PMD_LOG(ERR, "Couldn't set %s pcap buffer size(%d)", iface, buf_size);
+			return -1;
+		}
+
+		if (pcap_activate(p) < 0) {
+			PMD_LOG(ERR, "Couldn't activate %s pcap", iface);
+			return -1;
+		}
+
+		*pcap = p;
 	}
 
 	return 0;
@@ -608,7 +647,7 @@  eth_dev_start(struct rte_eth_dev *dev)
 
 		if (!pp->tx_pcap[0] &&
 			strcmp(tx->type, ETH_PCAP_IFACE_ARG) == 0) {
-			if (open_single_iface(tx->name, &pp->tx_pcap[0]) < 0)
+			if (open_single_iface(tx->name, pp->buf_size, &pp->tx_pcap[0]) < 0)
 				return -1;
 			pp->rx_pcap[0] = pp->tx_pcap[0];
 		}
@@ -627,7 +666,7 @@  eth_dev_start(struct rte_eth_dev *dev)
 				return -1;
 		} else if (!pp->tx_pcap[i] &&
 				strcmp(tx->type, ETH_PCAP_TX_IFACE_ARG) == 0) {
-			if (open_single_iface(tx->name, &pp->tx_pcap[i]) < 0)
+			if (open_single_iface(tx->name, pp->buf_size, &pp->tx_pcap[i]) < 0)
 				return -1;
 		}
 	}
@@ -643,7 +682,7 @@  eth_dev_start(struct rte_eth_dev *dev)
 			if (open_single_rx_pcap(rx->name, &pp->rx_pcap[i]) < 0)
 				return -1;
 		} else if (strcmp(rx->type, ETH_PCAP_RX_IFACE_ARG) == 0) {
-			if (open_single_iface(rx->name, &pp->rx_pcap[i]) < 0)
+			if (open_single_iface(rx->name, pp->buf_size, &pp->rx_pcap[i]) < 0)
 				return -1;
 		}
 	}
@@ -1072,7 +1111,7 @@  open_rx_tx_iface(const char *key, const char *value, void *extra_args)
 	struct pmd_devargs *tx = extra_args;
 	pcap_t *pcap = NULL;
 
-	if (open_single_iface(iface, &pcap) < 0)
+	if (open_single_iface(iface, tx->buf_size, &pcap) < 0)
 		return -1;
 
 	tx->queue[0].pcap = pcap;
@@ -1104,7 +1143,7 @@  open_iface(const char *key, const char *value, void *extra_args)
 	struct pmd_devargs *pmd = extra_args;
 	pcap_t *pcap = NULL;
 
-	if (open_single_iface(iface, &pcap) < 0)
+	if (open_single_iface(iface, pmd->buf_size, &pcap) < 0)
 		return -1;
 	if (add_queue(pmd, iface, key, pcap, NULL) < 0) {
 		pcap_close(pcap);
@@ -1154,6 +1193,15 @@  open_tx_iface(const char *key, const char *value, void *extra_args)
 	return open_iface(key, value, extra_args);
 }
 
+static int
+select_buf_size(const char *key __rte_unused, const char *value,
+		void *extra_args)
+{
+	if (extra_args)
+		*(int *)extra_args = atoi(value);
+	return 0;
+}
+
 static int
 select_phy_mac(const char *key __rte_unused, const char *value,
 		void *extra_args)
@@ -1413,6 +1461,13 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 			return -1;
 	}
 
+	if (rte_kvargs_count(kvlist, ETH_PCAP_BUF_SIZE_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_PCAP_BUF_SIZE_ARG,
+				&select_buf_size, &pcaps.buf_size);
+		if (ret < 0)
+			goto free_kvlist;
+	}
+
 	/*
 	 * If iface argument is passed we open the NICs and use them for
 	 * reading / writing
@@ -1456,6 +1511,7 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 	devargs_all.is_tx_iface =
 		rte_kvargs_count(kvlist, ETH_PCAP_TX_IFACE_ARG) ? 1 : 0;
 	dumpers.num_of_queue = 0;
+	dumpers.buf_size = pcaps.buf_size;
 
 	if (devargs_all.is_rx_pcap) {
 		/*
@@ -1562,6 +1618,7 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 			pp->tx_dumper[i] = dumpers.queue[i].dumper;
 			pp->tx_pcap[i] = dumpers.queue[i].pcap;
 		}
+		pp->buf_size = pcaps.buf_size;
 
 		eth_dev->process_private = pp;
 		eth_dev->rx_pkt_burst = eth_pcap_rx;
@@ -1618,4 +1675,5 @@  RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
 	ETH_PCAP_TX_IFACE_ARG "=<ifc> "
 	ETH_PCAP_IFACE_ARG "=<ifc> "
 	ETH_PCAP_PHY_MAC_ARG "=<int>"
-	ETH_PCAP_INFINITE_RX_ARG "=<0|1>");
+	ETH_PCAP_INFINITE_RX_ARG "=<0|1>"
+	ETH_PCAP_BUF_SIZE_ARG "=<int>");