[v4] pcap: support MTU set

Message ID 20220606162147.57218-1-ido@cgstowernetworks.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v4] pcap: support MTU set |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-testing warning apply patch failure
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS

Commit Message

Ido Goshen June 6, 2022, 4:21 p.m. UTC
  Support rte_eth_dev_set_mtu by pcap vdevs
Enforce mtu on rx/tx

Bugzilla ID: 961
Signed-off-by: Ido Goshen <ido@cgstowernetworks.com>

---
v4:
1. Add release notes comment
2. Access pmd internals via queue struct
3. eth_mtu_set code convention fixes

v3:
Preserve pcap behavior to support max size packets by default
alternative to v2 in order to limit the code change to pcap only and
avoid abi change.
Enforce mtu only in case rte_eth_dev_set_mtu was explicitly called.

v2:
Preserve pcap behavior to support max size packets by default.
---
 doc/guides/rel_notes/release_22_07.rst |  3 ++
 drivers/net/pcap/pcap_ethdev.c         | 38 ++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)
  

Comments

Stephen Hemminger June 6, 2022, 5:10 p.m. UTC | #1
On Mon,  6 Jun 2022 19:21:47 +0300
Ido Goshen <ido@cgstowernetworks.com> wrote:

> Support rte_eth_dev_set_mtu by pcap vdevs
> Enforce mtu on rx/tx
> 
> Bugzilla ID: 961

This is not really a bug, it is an enhancement specific to your
test setup. It should not be backported to stable.

Since it is change in behavior it might be better to add a vdev argument
for this rather than overloading meaning of MTU. Also, this does not behave
the same as virtio or hardware drivers.



> Signed-off-by: Ido Goshen <ido@cgstowernetworks.com>
> 
> ---
> v4:
> 1. Add release notes comment
> 2. Access pmd internals via queue struct
> 3. eth_mtu_set code convention fixes
> 
> v3:
> Preserve pcap behavior to support max size packets by default
> alternative to v2 in order to limit the code change to pcap only and
> avoid abi change.
> Enforce mtu only in case rte_eth_dev_set_mtu was explicitly called.
> 
> v2:
> Preserve pcap behavior to support max size packets by default.
> ---
>  doc/guides/rel_notes/release_22_07.rst |  3 ++
>  drivers/net/pcap/pcap_ethdev.c         | 38 ++++++++++++++++++++++++++
>  2 files changed, 41 insertions(+)
> 
> diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
> index 0ed4f92820..717191d498 100644
> --- a/doc/guides/rel_notes/release_22_07.rst
> +++ b/doc/guides/rel_notes/release_22_07.rst
> @@ -95,6 +95,9 @@ New Features
>    * Added AH mode support in lookaside protocol (IPsec) for CN9K & CN10K.
>    * Added AES-GMAC support in lookaside protocol (IPsec) for CN9K & CN10K.
>  
> +* **Updated pcap driver.**
> +
> + * Added support for MTU via ``rte_eth_dev_set_mtu``
>  
>  Removed Items
>  -------------
> diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
> index ec29fd6bc5..db1958f20f 100644
> --- a/drivers/net/pcap/pcap_ethdev.c
> +++ b/drivers/net/pcap/pcap_ethdev.c
> @@ -74,6 +74,7 @@ struct pcap_rx_queue {
>  
>  	/* Contains pre-generated packets to be looped through */
>  	struct rte_ring *pkts;
> +	struct pmd_internals *internals;
>  };
>  
>  struct pcap_tx_queue {
> @@ -82,6 +83,7 @@ struct pcap_tx_queue {
>  	struct queue_stat tx_stat;
>  	char name[PATH_MAX];
>  	char type[ETH_PCAP_ARG_MAXLEN];
> +	struct pmd_internals *internals;
>  };
>  
>  struct pmd_internals {
> @@ -93,6 +95,7 @@ struct pmd_internals {
>  	int single_iface;
>  	int phy_mac;
>  	unsigned int infinite_rx;
> +	uint16_t mtu;
>  };

The mtu is already in dev->data->mtu, why copy it?

>  struct pmd_process_private {
> @@ -278,6 +281,7 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  	const u_char *packet;
>  	struct rte_mbuf *mbuf;
>  	struct pcap_rx_queue *pcap_q = queue;
> +	struct pmd_internals *internals = pcap_q->internals;
>  	uint16_t num_rx = 0;
>  	uint32_t rx_bytes = 0;
>  	pcap_t *pcap;
> @@ -303,6 +307,12 @@ eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  			break;
>  		}
>  
> +		if (unlikely(header.caplen > internals->mtu)) {
> +			pcap_q->rx_stat.err_pkts++;
> +			rte_pktmbuf_free(mbuf);
> +			break;
> +		}

This doesn't account for VLAN header.


>  		if (header.caplen <= rte_pktmbuf_tailroom(mbuf)) {
>  			/* pcap packet will fit in the mbuf, can copy it */
>  			rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
> @@ -378,6 +388,7 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  	struct rte_mbuf *mbuf;
>  	struct pmd_process_private *pp;
>  	struct pcap_tx_queue *dumper_q = queue;
> +	struct pmd_internals *internals = dumper_q->internals;
>  	uint16_t num_tx = 0;
>  	uint32_t tx_bytes = 0;
>  	struct pcap_pkthdr header;
> @@ -396,6 +407,12 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  	for (i = 0; i < nb_pkts; i++) {
>  		mbuf = bufs[i];
>  		len = caplen = rte_pktmbuf_pkt_len(mbuf);
> +
> +		if (unlikely(len > internals->mtu)) {
> +			rte_pktmbuf_free(mbuf);
> +			continue;
> +		}

There needs to be a per queue counter any and all drops.
<

> +
>  		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
>  				len > sizeof(temp_data))) {
>  			caplen = sizeof(temp_data);
> @@ -464,6 +481,7 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  	struct rte_mbuf *mbuf;
>  	struct pmd_process_private *pp;
>  	struct pcap_tx_queue *tx_queue = queue;
> +	struct pmd_internals *internals = tx_queue->internals;
>  	uint16_t num_tx = 0;
>  	uint32_t tx_bytes = 0;
>  	pcap_t *pcap;
> @@ -479,6 +497,12 @@ eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
>  	for (i = 0; i < nb_pkts; i++) {
>  		mbuf = bufs[i];
>  		len = rte_pktmbuf_pkt_len(mbuf);
> +
> +		if (unlikely(len > internals->mtu)) {
> +			rte_pktmbuf_free(mbuf);
> +			continue;
> +		}
> +
>  		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
>  				len > sizeof(temp_data))) {
>  			PMD_LOG(ERR,
> @@ -807,6 +831,16 @@ eth_stats_reset(struct rte_eth_dev *dev)
>  	return 0;
>  }
>  
> +static int
> +eth_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
> +{
> +	struct pmd_internals *internals = dev->data->dev_private;
> +
> +	PMD_LOG(INFO, "MTU set %s %u\n", dev->device->name, mtu);
> +	internals->mtu = mtu;
> +	return 0;
> +}

If you drop internals->mtu (redundant) then this just becomes stub (ie return 0)

> +
>  static inline void
>  infinite_rx_ring_free(struct rte_ring *pkts)
>  {
> @@ -878,6 +912,7 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>  	pcap_q->mb_pool = mb_pool;
>  	pcap_q->port_id = dev->data->port_id;
>  	pcap_q->queue_id = rx_queue_id;
> +	pcap_q->internals = internals;
>  	dev->data->rx_queues[rx_queue_id] = pcap_q;
>  
>  	if (internals->infinite_rx) {
> @@ -952,6 +987,7 @@ eth_tx_queue_setup(struct rte_eth_dev *dev,
>  
>  	pcap_q->port_id = dev->data->port_id;
>  	pcap_q->queue_id = tx_queue_id;
> +	pcap_q->internals = internals;
>  	dev->data->tx_queues[tx_queue_id] = pcap_q;
>  
>  	return 0;
> @@ -1004,6 +1040,7 @@ static const struct eth_dev_ops ops = {
>  	.link_update = eth_link_update,
>  	.stats_get = eth_stats_get,
>  	.stats_reset = eth_stats_reset,
> +	.mtu_set = eth_mtu_set,
>  };
>  
>  static int
> @@ -1233,6 +1270,7 @@ pmd_init_internals(struct rte_vdev_device *vdev,
>  		.addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
>  	};
>  	(*internals)->phy_mac = 0;
> +	(*internals)->mtu = RTE_ETH_PCAP_SNAPLEN;


Use dev->data->mtu not internal value.


>  	data = (*eth_dev)->data;
>  	data->nb_rx_queues = (uint16_t)nb_rx_queues;
>  	data->nb_tx_queues = (uint16_t)nb_tx_queues;
  
Ido Goshen June 6, 2022, 7:07 p.m. UTC | #2
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Sent: Monday, 6 June 2022 20:10
> To: Ido Goshen <Ido@cgstowernetworks.com>
> Cc: ferruh.yigit@xilinx.com; dev@dpdk.org
> Subject: Re: [PATCH v4] pcap: support MTU set
> 
> On Mon,  6 Jun 2022 19:21:47 +0300
> Ido Goshen <ido@cgstowernetworks.com> wrote:
> 
> > Support rte_eth_dev_set_mtu by pcap vdevs Enforce mtu on rx/tx
> >
> > Bugzilla ID: 961
> 
> This is not really a bug, it is an enhancement specific to your test setup. It should
> not be backported to stable.
> 
> Since it is change in behavior it might be better to add a vdev argument for this
> rather than overloading meaning of MTU.

[idog] The default behavior stays the same and long packets will continue to pass as used to,
Only if 'rte_eth_dev_set_mtu' is explicitly used it will take effect.
I doubt it'll break anything cause no one could use it so far as it returns -ENOTSUP,
and I assume that would be the expected behavior for anyone who will set it.

Adding it as an argument to vdev (e.g. vdev='net_pcap0,iface=eth0,mtu=9400') seems to me 
like a duplication to an existing API.

> Also, this does not behave the same[idog]  as virtio or hardware drivers.

[idog] The idea of this patch is to make pcap behave more like HW NICs.
Couple of HW NICs (ixgbe, i40e) I've checked do respect MTU
Please see test outputs in https://bugs.dpdk.org/show_bug.cgi?id=961
Though probably it's done by the HW and not by the driver 

Alternative might be to set the network interfaces MTU and not do it in pmd, so
It'll be like the "HW" is doing it, but this will work only for ifaces and not for pcap files.

> 
> The mtu is already in dev->data->mtu, why copy it?
> 

[idog] That's what I was using so far, but I got a request from ferruh.yigit@xilinx.com 
not to use 'dev' but access 'internals' via the 'pcap_rx/tx_queue' struct.

> > +		if (unlikely(header.caplen > internals->mtu)) {
> > +			pcap_q->rx_stat.err_pkts++;
> > +			rte_pktmbuf_free(mbuf);
> > +			break;
> > +		}
> 
> This doesn't account for VLAN header.

[idog] Good point, I'm never sure what overhead should be considered?
Please advice what should I add
e.g.  '(RTE_ETHER_HDR_LEN + RTE_ETHER_CRC_LEN + RTE_VLAN_HLEN * 2)'

however caller can always set it a bit higher if needed

> > +
> > +		if (unlikely(len > internals->mtu)) {
> > +			rte_pktmbuf_free(mbuf);
> > +			continue;
> > +		}
> 
> There needs to be a per queue counter any and all drops.

[idog] It will be counted few lines below by
	'dumper_q->tx_stat.err_pkts += nb_pkts - num_tx;'
as this case doesn't increment the 'num_tx'

> >
> > +static int
> > +eth_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) {
> > +	struct pmd_internals *internals = dev->data->dev_private;
> > +
> > +	PMD_LOG(INFO, "MTU set %s %u\n", dev->device->name, mtu);
> > +	internals->mtu = mtu;
> > +	return 0;
> > +}
> 
> If you drop internals->mtu (redundant) then this just becomes stub (ie return 0)
> 

[idog] Again I'm not sure if it's right to use 'dev->data->mtu' directly where later needed.
ferruh.yigit@xilinx.com ?
Anyway this function is needed even if it does nothing (or just logs) in order for the
eth_dev_ops.mtu_set to be supported


> >
> >  static int
> > @@ -1233,6 +1270,7 @@ pmd_init_internals(struct rte_vdev_device *vdev,
> >  		.addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
> >  	};
> >  	(*internals)->phy_mac = 0;
> > +	(*internals)->mtu = RTE_ETH_PCAP_SNAPLEN;
> 
> 
> Use dev->data->mtu not internal value.
> 

[idog] This runs early when the probe creates the device 
Later 'dev->data->mtu' will be overwritten later in 'rte_eth_dev_configure'
To hard-coded 1500

	if (dev_conf->rxmode.mtu == 0)
		dev->data->dev_conf.rxmode.mtu = RTE_ETHER_MTU;
	ret = eth_dev_validate_mtu(port_id, &dev_info,
			dev->data->dev_conf.rxmode.mtu);
	if (ret != 0)
		goto rollback;
	dev->data->mtu = dev->data->dev_conf.rxmode.mtu;

I tried to overcome it by [PATCH v2] http://mails.dpdk.org/archives/dev/2022-May/241974.html
But this code change spills out of the pcap pmd and changes rte_ethdev abi which I rather avoid
  

Patch

diff --git a/doc/guides/rel_notes/release_22_07.rst b/doc/guides/rel_notes/release_22_07.rst
index 0ed4f92820..717191d498 100644
--- a/doc/guides/rel_notes/release_22_07.rst
+++ b/doc/guides/rel_notes/release_22_07.rst
@@ -95,6 +95,9 @@  New Features
   * Added AH mode support in lookaside protocol (IPsec) for CN9K & CN10K.
   * Added AES-GMAC support in lookaside protocol (IPsec) for CN9K & CN10K.
 
+* **Updated pcap driver.**
+
+ * Added support for MTU via ``rte_eth_dev_set_mtu``
 
 Removed Items
 -------------
diff --git a/drivers/net/pcap/pcap_ethdev.c b/drivers/net/pcap/pcap_ethdev.c
index ec29fd6bc5..db1958f20f 100644
--- a/drivers/net/pcap/pcap_ethdev.c
+++ b/drivers/net/pcap/pcap_ethdev.c
@@ -74,6 +74,7 @@  struct pcap_rx_queue {
 
 	/* Contains pre-generated packets to be looped through */
 	struct rte_ring *pkts;
+	struct pmd_internals *internals;
 };
 
 struct pcap_tx_queue {
@@ -82,6 +83,7 @@  struct pcap_tx_queue {
 	struct queue_stat tx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
+	struct pmd_internals *internals;
 };
 
 struct pmd_internals {
@@ -93,6 +95,7 @@  struct pmd_internals {
 	int single_iface;
 	int phy_mac;
 	unsigned int infinite_rx;
+	uint16_t mtu;
 };
 
 struct pmd_process_private {
@@ -278,6 +281,7 @@  eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	const u_char *packet;
 	struct rte_mbuf *mbuf;
 	struct pcap_rx_queue *pcap_q = queue;
+	struct pmd_internals *internals = pcap_q->internals;
 	uint16_t num_rx = 0;
 	uint32_t rx_bytes = 0;
 	pcap_t *pcap;
@@ -303,6 +307,12 @@  eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 			break;
 		}
 
+		if (unlikely(header.caplen > internals->mtu)) {
+			pcap_q->rx_stat.err_pkts++;
+			rte_pktmbuf_free(mbuf);
+			break;
+		}
+
 		if (header.caplen <= rte_pktmbuf_tailroom(mbuf)) {
 			/* pcap packet will fit in the mbuf, can copy it */
 			rte_memcpy(rte_pktmbuf_mtod(mbuf, void *), packet,
@@ -378,6 +388,7 @@  eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *dumper_q = queue;
+	struct pmd_internals *internals = dumper_q->internals;
 	uint16_t num_tx = 0;
 	uint32_t tx_bytes = 0;
 	struct pcap_pkthdr header;
@@ -396,6 +407,12 @@  eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
 		len = caplen = rte_pktmbuf_pkt_len(mbuf);
+
+		if (unlikely(len > internals->mtu)) {
+			rte_pktmbuf_free(mbuf);
+			continue;
+		}
+
 		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 				len > sizeof(temp_data))) {
 			caplen = sizeof(temp_data);
@@ -464,6 +481,7 @@  eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	struct rte_mbuf *mbuf;
 	struct pmd_process_private *pp;
 	struct pcap_tx_queue *tx_queue = queue;
+	struct pmd_internals *internals = tx_queue->internals;
 	uint16_t num_tx = 0;
 	uint32_t tx_bytes = 0;
 	pcap_t *pcap;
@@ -479,6 +497,12 @@  eth_pcap_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	for (i = 0; i < nb_pkts; i++) {
 		mbuf = bufs[i];
 		len = rte_pktmbuf_pkt_len(mbuf);
+
+		if (unlikely(len > internals->mtu)) {
+			rte_pktmbuf_free(mbuf);
+			continue;
+		}
+
 		if (unlikely(!rte_pktmbuf_is_contiguous(mbuf) &&
 				len > sizeof(temp_data))) {
 			PMD_LOG(ERR,
@@ -807,6 +831,16 @@  eth_stats_reset(struct rte_eth_dev *dev)
 	return 0;
 }
 
+static int
+eth_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
+{
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	PMD_LOG(INFO, "MTU set %s %u\n", dev->device->name, mtu);
+	internals->mtu = mtu;
+	return 0;
+}
+
 static inline void
 infinite_rx_ring_free(struct rte_ring *pkts)
 {
@@ -878,6 +912,7 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 	pcap_q->mb_pool = mb_pool;
 	pcap_q->port_id = dev->data->port_id;
 	pcap_q->queue_id = rx_queue_id;
+	pcap_q->internals = internals;
 	dev->data->rx_queues[rx_queue_id] = pcap_q;
 
 	if (internals->infinite_rx) {
@@ -952,6 +987,7 @@  eth_tx_queue_setup(struct rte_eth_dev *dev,
 
 	pcap_q->port_id = dev->data->port_id;
 	pcap_q->queue_id = tx_queue_id;
+	pcap_q->internals = internals;
 	dev->data->tx_queues[tx_queue_id] = pcap_q;
 
 	return 0;
@@ -1004,6 +1040,7 @@  static const struct eth_dev_ops ops = {
 	.link_update = eth_link_update,
 	.stats_get = eth_stats_get,
 	.stats_reset = eth_stats_reset,
+	.mtu_set = eth_mtu_set,
 };
 
 static int
@@ -1233,6 +1270,7 @@  pmd_init_internals(struct rte_vdev_device *vdev,
 		.addr_bytes = { 0x02, 0x70, 0x63, 0x61, 0x70, iface_idx++ }
 	};
 	(*internals)->phy_mac = 0;
+	(*internals)->mtu = RTE_ETH_PCAP_SNAPLEN;
 	data = (*eth_dev)->data;
 	data->nb_rx_queues = (uint16_t)nb_rx_queues;
 	data->nb_tx_queues = (uint16_t)nb_tx_queues;