[v3] pcap: support MTU set

Message ID 20220530103647.35626-1-ido@cgstowernetworks.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v3] pcap: support MTU set |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues
ci/intel-Testing success Testing PASS
ci/github-robot: build success github build: passed
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Ido Goshen May 30, 2022, 10:36 a.m. UTC
  Support rte_eth_dev_set_mtu by pcap vdevs
Enforce mtu on rx/tx

Bugzilla ID: 961
Signed-off-by: Ido Goshen <ido@cgstowernetworks.com>

---
v3:
Preserve pcap behavior to support max size packets by default
alternative to v2 in order to limit the code change to pcap only and
avoid abi change.
Enforce mtu only in case rte_eth_dev_set_mtu was explicitly called.

v2:
Preserve pcap behavior to support max size packets by default.
---
 drivers/net/pcap/pcap_ethdev.c | 44 +++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)
  

Comments

Ferruh Yigit May 30, 2022, 6:05 p.m. UTC | #1
On 5/30/2022 11:36 AM, Ido Goshen wrote:
> Support rte_eth_dev_set_mtu by pcap vdevs
> Enforce mtu on rx/tx
> 

Still not sure about enforcing MTU on pcap, but please find comments on 
mechanical issues

> Bugzilla ID: 961
> Signed-off-by: Ido Goshen <ido@cgstowernetworks.com>
> 
> ---
> v3:
> Preserve pcap behavior to support max size packets by default
> alternative to v2 in order to limit the code change to pcap only and
> avoid abi change.
> Enforce mtu only in case rte_eth_dev_set_mtu was explicitly called.
> 
> v2:
> Preserve pcap behavior to support max size packets by default.
> ---
>   drivers/net/pcap/pcap_ethdev.c | 44 +++++++++++++++++++++++++++++++---
>   1 file changed, 41 insertions(+), 3 deletions(-)
> 

Is documentation needs to be updated as well?

And what do you think to update release notes for this update?

> diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
> index ec29fd6bc5..2e7fff9579 100644
> --- a/drivers/net/pcap/pcap_ethdev.c
> +++ b/drivers/net/pcap/pcap_ethdev.c
> @@ -93,6 +93,7 @@ struct pmd_internals {
>   	int single_iface;
>   	int phy_mac;
>   	unsigned int infinite_rx;
> +	int is_mtu_set;
>   };
>   
>   struct pmd_process_private {
> @@ -278,11 +279,13 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	const u_char *packet;
>   	struct rte_mbuf *mbuf;
>   	struct pcap_rx_queue *pcap_q = queue;
> +	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
> +	struct pmd_internals *internals = dev->data->dev_private;

'rte_eth_devices[]' needs to be used for 'process_private' but lets not 
tie it to access 'dev_private'.

You can add "struct pmd_internals *" to the "struct pcap_rx_queue" & 
"struct pcap_tx_queue" structs for the access. Please check null PMD for 
sample.

>   	uint16_t num_rx = 0;
>   	uint32_t rx_bytes = 0;
>   	pcap_t *pcap;
>   
> -	pp = rte_eth_devices[pcap_q->port_id].process_private;
> +	pp = dev->process_private;
>   	pcap = pp->rx_pcap[pcap_q->queue_id];
>   
>   	if (unlikely(pcap == NULL || nb_pkts == 0))
> @@ -303,6 +306,13 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   			break;
>   		}
>   
> +		if (unlikely(header.caplen > dev->data->mtu) &&
> +				internals->is_mtu_set) {
> +			pcap_q->rx_stat.err_pkts++;
> +			rte_pktmbuf_free(mbuf);
> +			break;
> +		}
> +
>   		if (header.caplen <= rte_pktmbuf_tailroom(mbuf)) {
>   			/* pcap packet will fit in the mbuf, can copy it */
>   			rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
> @@ -378,6 +388,8 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	struct rte_mbuf *mbuf;
>   	struct pmd_process_private *pp;
>   	struct pcap_tx_queue *dumper_q = queue;
> +	struct rte_eth_dev *dev = &rte_eth_devices[dumper_q->port_id];
> +	struct pmd_internals *internals = dev->data->dev_private;
>   	uint16_t num_tx = 0;
>   	uint32_t tx_bytes = 0;
>   	struct pcap_pkthdr header;
> @@ -385,7 +397,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
>   	size_t len, caplen;
>   
> -	pp = rte_eth_devices[dumper_q->port_id].process_private;
> +	pp = dev->process_private;
>   	dumper = pp->tx_dumper[dumper_q->queue_id];
>   
>   	if (dumper == NULL || nb_pkts == 0)
> @@ -396,6 +408,13 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	for (i = 0; i < nb_pkts; i++) {
>   		mbuf = bufs[i];
>   		len = caplen = rte_pktmbuf_pkt_len(mbuf);
> +
> +		if (unlikely(len > dev->data->mtu) &&
> +				internals->is_mtu_set) {

It is possible to save only some part of the packet to the pcap file, 
please check snaplen patch [1], how MTU config should work with this 
feature?

[1]
https://patchwork.dpdk.org/project/dpdk/patch/20220313112638.3945-1-laitianli@tom.com/

> +			rte_pktmbuf_free(mbuf);
> +			continue;

Normally a PMD should not silently free a packet itself, it should 
return error and application will decide to free the packet or not.

> +		}
> +
>   		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
>   				len > sizeof(temp_data))) {
>   			caplen = sizeof(temp_data);
> @@ -464,13 +483,15 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	struct rte_mbuf *mbuf;
>   	struct pmd_process_private *pp;
>   	struct pcap_tx_queue *tx_queue = queue;
> +	struct rte_eth_dev *dev = &rte_eth_devices[tx_queue->port_id];
> +	struct pmd_internals *internals = dev->data->dev_private;
>   	uint16_t num_tx = 0;
>   	uint32_t tx_bytes = 0;
>   	pcap_t *pcap;
>   	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
>   	size_t len;
>   
> -	pp = rte_eth_devices[tx_queue->port_id].process_private;
> +	pp = dev->process_private;
>   	pcap = pp->tx_pcap[tx_queue->queue_id];
>   
>   	if (unlikely(nb_pkts == 0 || pcap == NULL))
> @@ -479,6 +500,13 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>   	for (i = 0; i < nb_pkts; i++) {
>   		mbuf = bufs[i];
>   		len = rte_pktmbuf_pkt_len(mbuf);
> +
> +		if (unlikely(len > dev->data->mtu) &&
> +				internals->is_mtu_set) {
> +			rte_pktmbuf_free(mbuf);
> +			continue;

ditto

> +		}
> +
>   		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
>   				len > sizeof(temp_data))) {
>   			PMD_LOG(ERR,
> @@ -807,6 +835,14 @@ eth_stats_reset(struct rte_eth_dev *dev)
>   	return 0;
>   }
>   
> +static int eth_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)

Please follow coding convention to have return type in a separate line:
   static int
   eth_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
   {

> +{
> +	PMD_LOG(INFO, "mtu set %s %u\n", dev->device->name, mtu);

Can you please move the log after variable declerations. And can be good 
to capitilise MTU in the log.

> +	struct pmd_internals *internals = dev->data->dev_private;
> +	internals->is_mtu_set = 1;
> +	return 0;
> +}
> +
>   static inline void
>   infinite_rx_ring_free(struct rte_ring *pkts)
>   {
> @@ -1004,6 +1040,7 @@ static const struct eth_dev_ops ops = {
>   	.link_update = eth_link_update,
>   	.stats_get = eth_stats_get,
>   	.stats_reset = eth_stats_reset,
> +	.mtu_set = eth_mtu_set,
>   };
>   
>   static int
> @@ -1233,6 +1270,7 @@ pmd_init_internals(struct rte_vdev_device *vdev,
>   		.addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
>   	};
>   	(*internals)->phy_mac = 0;
> +	(*internals)->is_mtu_set = 0;
>   	data = (*eth_dev)->data;
>   	data->nb_rx_queues = (uint16_t)nb_rx_queues;
>   	data->nb_tx_queues = (uint16_t)nb_tx_queues;
  
Ido Goshen May 31, 2022, 1:12 p.m. UTC | #2
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Monday, 30 May 2022 21:06
> To: Ido Goshen <Ido@cgstowernetworks.com>; ferruh.yigit@xilinx.com;
> stephen@networkplumber.org
> Cc: dev@dpdk.org; Tianli Lai <laitianli@tom.com>
> Subject: Re: [PATCH v3] pcap: support MTU set
> 
> On 5/30/2022 11:36 AM, Ido Goshen wrote:
> > Support rte_eth_dev_set_mtu by pcap vdevs Enforce mtu on rx/tx
> >
> 
> Still not sure about enforcing MTU on pcap, but please find comments on
> mechanical issues

[idog] Trying to detail more about use cases:
1. CI tests which are HW independent and works with --vdevs=net_pcap
For testing that mtu feature(s) work correctly in our app a pcap that contains
mix sized packets should be used. Using pcap that has only small size packets
will miss the all point.
2. Customer support - it's much simpler to debug on a workstation and not 
real HW setup. We often get the customer's configuration and a pcap in order
to reproduce an issue. It will be a pain and error-prone to manipulate the pcap
before using it.

I will address the mechanical issues and post v4 patch

> > Bugzilla ID: 961
> > Signed-off-by: Ido Goshen <ido@cgstowernetworks.com>
> >
> > ---
> > v3:
> > Preserve pcap behavior to support max size packets by default
> > alternative to v2 in order to limit the code change to pcap only and
> > avoid abi change.
> > Enforce mtu only in case rte_eth_dev_set_mtu was explicitly called.
> >
> > v2:
> > Preserve pcap behavior to support max size packets by default.
> > ---
> >   drivers/net/pcap/pcap_ethdev.c | 44
> +++++++++++++++++++++++++++++++---
> >   1 file changed, 41 insertions(+), 3 deletions(-)
> >
> 
> Is documentation needs to be updated as well?
 
[idog] I don't think so 
It's using the standard rte_eth_dev_set_mtu() which is already documented in 
features.rst https://doc.dpdk.org/guides/nics/features.html#mtu-update
I don't see other PMDs mention explicitly they support it (it's the normal behaviour)

> And what do you think to update release notes for this update?
[idog] ok

> > +388,8 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs,
> uint16_t nb_pkts)
> > +		if (unlikely(len > dev->data->mtu) &&
> > +				internals->is_mtu_set) {
> 
> It is possible to save only some part of the packet to the pcap file, please
> check snaplen patch [1], how MTU config should work with this feature?
> 
> [1]
> https://patchwork.dpdk.org/project/dpdk/patch/20220313112638.3945-1-
> laitianli@tom.com/
> 

[idog] interesting to know this is being work on.
(To your method, why is it needed?  there are tools like editpcap that can be
applied on the dpdk output file and snap the packets ;-)
I think integration of the 2 features is trivial, mtu controls if the packet is 
written to file or not, and snaplan controls what part of it needs to be written. 
i.e. mtu is checked before snaplen. 
Using snaplen > mtu will become meaningless (maybe block/warn on such configuration)

Alternative is to apply mtu only on pcap iface (assuming snaplen is applied only
 on tx_pcap file)

If supporting mtu only for pcap live ifaces and not for pcap files then
the all thing can be implemented differently by setting the OS netdevice 
mtu (e.g. in linux SIOCSIFMTU) instead of enforcing it by the pcap pmd
but this will require osdep use and I admit I have no idea how to do it for windows
Would this be a better approach?
  
Ido Goshen June 6, 2022, 9:40 a.m. UTC | #3
> -----Original Message-----
> From: Ferruh Yigit <ferruh.yigit@xilinx.com>
> Sent: Monday, 30 May 2022 21:06
> To: Ido Goshen <Ido@cgstowernetworks.com>; ferruh.yigit@xilinx.com;
> stephen@networkplumber.org
> Cc: dev@dpdk.org; Tianli Lai <laitianli@tom.com>
> Subject: Re: [PATCH v3] pcap: support MTU set
> 
> On 5/30/2022 11:36 AM, Ido Goshen wrote:
> > Support rte_eth_dev_set_mtu by pcap vdevs Enforce mtu on rx/tx
> >

> 
> > +			rte_pktmbuf_free(mbuf);
> > +			continue;
> 
> Normally a PMD should not silently free a packet itself, it should return error and
> application will decide to free the packet or not.
> 

[idog] 
The doc say:
' The return value can be less than the value of the *tx_pkts* parameter when
   the transmit ring is full or has been filled up.'
Which is not the case
It will force failing all the burst's following packets too even if under MTU
I think in HW case oversized TX is dropped by the HW and not left to the app. 
Freeing might mimic it better, simpler and safer

I do miss incrementing the oerrors for that case
  

Patch

diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index ec29fd6bc5..2e7fff9579 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -93,6 +93,7 @@  struct pmd_internals {
 	int single_iface;
 	int phy_mac;
 	unsigned int infinite_rx;
+	int is_mtu_set;
 };
 
 struct pmd_process_private {
@@ -278,11 +279,13 @@  eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
 	struct pcap_rx_queue *pcap_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[pcap_q->port_id];
+	struct pmd_internals *internals = dev->data->dev_private;
 	uint16_t num_rx = 0;
 	uint32_t rx_bytes = 0;
 	pcap_t *pcap;
 
-	pp = rte_eth_devices[pcap_q->port_id].process_private;
+	pp = dev->process_private;
 	pcap = pp->rx_pcap[pcap_q->queue_id];
 
 	if (unlikely(pcap == NULL || nb_pkts == 0))
@@ -303,6 +306,13 @@  eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			break;
 		}
 
+		if (unlikely(header.caplen > dev->data->mtu) &&
+				internals->is_mtu_set) {
+			pcap_q->rx_stat.err_pkts++;
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
 		if (header.caplen <= rte_pktmbuf_tailroom(mbuf)) {
 			/* pcap packet will fit in the mbuf, can copy it */
 			rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
@@ -378,6 +388,8 @@  eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[dumper_q->port_id];
+	struct pmd_internals *internals = dev->data->dev_private;
 	uint16_t num_tx = 0;
 	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
@@ -385,7 +397,7 @@  eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len, caplen;
 
-	pp = rte_eth_devices[dumper_q->port_id].process_private;
+	pp = dev->process_private;
 	dumper = pp->tx_dumper[dumper_q->queue_id];
 
 	if (dumper == NULL || nb_pkts == 0)
@@ -396,6 +408,13 @@  eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
 		len = caplen = rte_pktmbuf_pkt_len(mbuf);
+
+		if (unlikely(len > dev->data->mtu) &&
+				internals->is_mtu_set) {
+			rte_pktmbuf_free(mbuf);
+			continue;
+		}
+
 		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 				len > sizeof(temp_data))) {
 			caplen = sizeof(temp_data);
@@ -464,13 +483,15 @@  eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
+	struct rte_eth_dev *dev = &rte_eth_devices[tx_queue->port_id];
+	struct pmd_internals *internals = dev->data->dev_private;
 	uint16_t num_tx = 0;
 	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
 	unsigned char temp_data[RTE_ETH_PCAP_SNAPLEN];
 	size_t len;
 
-	pp = rte_eth_devices[tx_queue->port_id].process_private;
+	pp = dev->process_private;
 	pcap = pp->tx_pcap[tx_queue->queue_id];
 
 	if (unlikely(nb_pkts == 0 || pcap == NULL))
@@ -479,6 +500,13 @@  eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
 		len = rte_pktmbuf_pkt_len(mbuf);
+
+		if (unlikely(len > dev->data->mtu) &&
+				internals->is_mtu_set) {
+			rte_pktmbuf_free(mbuf);
+			continue;
+		}
+
 		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 				len > sizeof(temp_data))) {
 			PMD_LOG(ERR,
@@ -807,6 +835,14 @@  eth_stats_reset(struct rte_eth_dev *dev)
 	return 0;
 }
 
+static int eth_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+	PMD_LOG(INFO, "mtu set %s %u\n", dev->device->name, mtu);
+	struct pmd_internals *internals = dev->data->dev_private;
+	internals->is_mtu_set = 1;
+	return 0;
+}
+
 static inline void
 infinite_rx_ring_free(struct rte_ring *pkts)
 {
@@ -1004,6 +1040,7 @@  static const struct eth_dev_ops ops = {
 	.link_update = eth_link_update,
 	.stats_get = eth_stats_get,
 	.stats_reset = eth_stats_reset,
+	.mtu_set = eth_mtu_set,
 };
 
 static int
@@ -1233,6 +1270,7 @@  pmd_init_internals(struct rte_vdev_device *vdev,
 		.addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
 	};
 	(*internals)->phy_mac = 0;
+	(*internals)->is_mtu_set = 0;
 	data = (*eth_dev)->data;
 	data->nb_rx_queues = (uint16_t)nb_rx_queues;
 	data->nb_tx_queues = (uint16_t)nb_tx_queues;