[19.08,v2] net/pcap: enable infinitely rxing a pcap file

Message ID 20190605115620.32047-1-cian.ferriter@intel.com (mailing list archive)
State Changes Requested, archived
Delegated to: Ferruh Yigit
Headers
Series [19.08,v2] net/pcap: enable infinitely rxing a pcap file |

Checks

Context Check Description
ci/checkpatch warning coding style issues
ci/intel-Performance-Testing success Performance Testing PASS
ci/mellanox-Performance-Testing success Performance Testing PASS
ci/Intel-compilation fail Compilation issues

Commit Message

Cian Ferriter June 5, 2019, 11:56 a.m. UTC
  It can be useful to use pcap files for some rudimental performance
testing. This patch enables this functionality in the pcap driver.

At a high level, this works by creaing a ring of sufficient size to
store the packets in the pcap file passed to the application. When the
rx function for this mode is called, packets are dequeued from the ring
for use by the application and also enqueued back on to the ring to be
"received" again.

A tx_drop mode is also added since transmitting to a tx_pcap file isn't
desirable at a high traffic rate.

Jumbo frames are not supported in this mode. When filling the ring at rx
queue setup time, the presence of multi segment mbufs is checked for.
The PMD will exit on detection of these multi segment mbufs.

Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
---
 doc/guides/nics/pcap_ring.rst   |  19 +++
 drivers/net/pcap/rte_eth_pcap.c | 268 ++++++++++++++++++++++++++++++--
 2 files changed, 277 insertions(+), 10 deletions(-)
  

Comments

Cian Ferriter June 5, 2019, 12:46 p.m. UTC | #1
Adding in my changelog at the top of this, since I forgot to add it in the original mail:

v2:
* Rework the method of filling the ring to infinitely rx from
	* Avoids potential huge allocation of mbufs
	* Removes double allocation of mbufs used during queue setup
* rename count_packets_in_pcaps to count_packets_in_pcap
* initialize pcap_pkt_count in count_packets_in_pcap
* use RTE_PMD_REGISTER_PARAM_STRING <0|1> rather than <int>
* replace calls to rte_panic with proper error returning
* count rx and tx stat bytes in pcap_rx_infinite and tx_drop
* make internals->infinite_rx = infinite_rx assignment unconditional
* add cleanup for infinite_rx in eth_dev_close and pmd_pcap_remove
* add cleanup when multi seg mbufs are found
* add some clarifications to the documentation update

> -----Original Message-----
> From: Ferriter, Cian
> Sent: 05 June 2019 12:56
> To: Richardson, Bruce <bruce.richardson@intel.com>; Yigit, Ferruh
> <ferruh.yigit@intel.com>; Mcnamara, John <john.mcnamara@intel.com>;
> Kovacevic, Marko <marko.kovacevic@intel.com>
> Cc: dev@dpdk.org; Ferriter, Cian <cian.ferriter@intel.com>
> Subject: [PATCH 19.08 v2] net/pcap: enable infinitely rxing a pcap file
> 
> It can be useful to use pcap files for some rudimental performance testing.
> This patch enables this functionality in the pcap driver.
> 
> At a high level, this works by creaing a ring of sufficient size to store the
> packets in the pcap file passed to the application. When the rx function for
> this mode is called, packets are dequeued from the ring for use by the
> application and also enqueued back on to the ring to be "received" again.
> 
> A tx_drop mode is also added since transmitting to a tx_pcap file isn't
> desirable at a high traffic rate.
> 
> Jumbo frames are not supported in this mode. When filling the ring at rx
> queue setup time, the presence of multi segment mbufs is checked for.
> The PMD will exit on detection of these multi segment mbufs.
> 
> Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
> ---
>  doc/guides/nics/pcap_ring.rst   |  19 +++
>  drivers/net/pcap/rte_eth_pcap.c | 268
> ++++++++++++++++++++++++++++++--
>  2 files changed, 277 insertions(+), 10 deletions(-)
> 
> diff --git a/doc/guides/nics/pcap_ring.rst b/doc/guides/nics/pcap_ring.rst
> index c1ef9196b..b272e6fe3 100644
> --- a/doc/guides/nics/pcap_ring.rst
> +++ b/doc/guides/nics/pcap_ring.rst
> @@ -106,6 +106,25 @@ Runtime Config Options
> 
>     --vdev 'net_pcap0,iface=eth0,phy_mac=1'
> 
> +- Use the RX PCAP file to infinitely receive packets
> +
> + In case ``rx_pcap=`` configuration is set, user may want to use the
> + selected PCAP file for rudimental performance testing. This can be done
> with a ``devarg`` ``infinite_rx``, for example::
> +
> +   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,infinite_rx=1,tx_drop=1'
> +
> + When this mode is used, it is recommended to use the ``tx_drop``
> ``devarg``.
> +
> + This option is device wide, so all queues on a device will either have this
> enabled or disabled.
> +
> +- Drop all packets on transmit
> +
> + The user may want to drop all packets on tx for a device. This can be done
> with the ``tx_drop`` ``devarg``, for example::
> +
> +   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_drop=1'
> +
> + One tx drop queue is created for each rxq on that device.
> +
>  Examples of Usage
>  ^^^^^^^^^^^^^^^^^
> 
> diff --git a/drivers/net/pcap/rte_eth_pcap.c
> b/drivers/net/pcap/rte_eth_pcap.c index 10277b9b6..9d239d171 100644
> --- a/drivers/net/pcap/rte_eth_pcap.c
> +++ b/drivers/net/pcap/rte_eth_pcap.c
> @@ -39,6 +39,8 @@
>  #define ETH_PCAP_TX_IFACE_ARG "tx_iface"
>  #define ETH_PCAP_IFACE_ARG    "iface"
>  #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
> +#define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
> +#define ETH_PCAP_TX_DROP_ARG  "tx_drop"
> 
>  #define ETH_PCAP_ARG_MAXLEN	64
> 
> @@ -64,6 +66,9 @@ struct pcap_rx_queue {
>  	struct queue_stat rx_stat;
>  	char name[PATH_MAX];
>  	char type[ETH_PCAP_ARG_MAXLEN];
> +
> +	/* Contains pre-generated packets to be looped through */
> +	struct rte_ring *pkts;
>  };
> 
>  struct pcap_tx_queue {
> @@ -82,6 +87,7 @@ struct pmd_internals {
>  	int if_index;
>  	int single_iface;
>  	int phy_mac;
> +	int infinite_rx;
>  };
> 
>  struct pmd_process_private {
> @@ -109,6 +115,8 @@ static const char *valid_arguments[] = {
>  	ETH_PCAP_TX_IFACE_ARG,
>  	ETH_PCAP_IFACE_ARG,
>  	ETH_PCAP_PHY_MAC_ARG,
> +	ETH_PCAP_INFINITE_RX_ARG,
> +	ETH_PCAP_TX_DROP_ARG,
>  	NULL
>  };
> 
> @@ -178,6 +186,43 @@ eth_pcap_gather_data(unsigned char *data, struct
> rte_mbuf *mbuf)
>  	}
>  }
> 
> +static uint16_t
> +eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t
> +nb_pkts) {
> +	int i;
> +	struct pcap_rx_queue *pcap_q = queue;
> +	uint32_t rx_bytes = 0;
> +
> +	if (unlikely(nb_pkts == 0))
> +		return 0;
> +
> +	if (rte_pktmbuf_alloc_bulk(pcap_q->mb_pool, bufs, nb_pkts) != 0)
> +		return 0;
> +
> +	for (i = 0; i < nb_pkts; i++) {
> +		struct rte_mbuf *pcap_buf;
> +		int err = rte_ring_dequeue(pcap_q->pkts, (void
> **)&pcap_buf);
> +		if (err)
> +			return i;
> +
> +		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
> +				rte_pktmbuf_mtod(pcap_buf, void *),
> +				pcap_buf->data_len);
> +		bufs[i]->data_len = pcap_buf->data_len;
> +		bufs[i]->pkt_len = pcap_buf->pkt_len;
> +		bufs[i]->port = pcap_q->port_id;
> +		rx_bytes += pcap_buf->data_len;
> +
> +		/* Enqueue packet back on ring to allow infinite rx. */
> +		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
> +	}
> +
> +	pcap_q->rx_stat.pkts += i;
> +	pcap_q->rx_stat.bytes += rx_bytes;
> +
> +	return i;
> +}
> +
>  static uint16_t
>  eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)  { @@
> -320,6 +365,30 @@ eth_pcap_tx_dumper(void *queue, struct rte_mbuf
> **bufs, uint16_t nb_pkts)
>  	return num_tx;
>  }
> 
> +/*
> + * Callback to handle dropping packets in the infinite rx case.
> + */
> +static uint16_t
> +eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) {
> +	unsigned int i;
> +	uint32_t tx_bytes = 0;
> +	struct pcap_tx_queue *tx_queue = queue;
> +
> +	if (unlikely(nb_pkts == 0))
> +		return 0;
> +
> +	for (i = 0; i < nb_pkts; i++) {
> +		tx_bytes += bufs[i]->data_len;
> +		rte_pktmbuf_free(bufs[i]);
> +	}
> +
> +	tx_queue->tx_stat.pkts += nb_pkts;
> +	tx_queue->tx_stat.bytes += tx_bytes;
> +
> +	return i;
> +}
> +
>  /*
>   * Callback to handle sending packets through a real NIC.
>   */
> @@ -447,6 +516,24 @@ open_single_rx_pcap(const char *pcap_filename,
> pcap_t **pcap)
>  	return 0;
>  }
> 
> +static uint64_t
> +count_packets_in_pcap(pcap_t **pcap, struct pcap_rx_queue *pcap_q) {
> +	const u_char *packet;
> +	struct pcap_pkthdr header;
> +	uint64_t pcap_pkt_count = 0;
> +
> +	while ((packet = pcap_next(*pcap, &header)))
> +		pcap_pkt_count++;
> +
> +	/* The pcap is reopened so it can be used as normal later. */
> +	pcap_close(*pcap);
> +	*pcap = NULL;
> +	open_single_rx_pcap(pcap_q->name, pcap);
> +
> +	return pcap_pkt_count;
> +}
> +
>  static int
>  eth_dev_start(struct rte_eth_dev *dev)
>  {
> @@ -639,8 +726,25 @@ eth_stats_reset(struct rte_eth_dev *dev)  }
> 
>  static void
> -eth_dev_close(struct rte_eth_dev *dev __rte_unused)
> +eth_dev_close(struct rte_eth_dev *dev)
>  {
> +	unsigned int i;
> +	struct pmd_internals *internals = dev->data->dev_private;
> +
> +	/* Device wide flag, but cleanup must be performed per queue. */
> +	if (internals->infinite_rx) {
> +		for (i = 0; i < dev->data->nb_rx_queues; i++) {
> +			struct pcap_rx_queue *pcap_q = &internals-
> >rx_queue[i];
> +			struct rte_mbuf *pcap_buf;
> +
> +			while (!rte_ring_dequeue(pcap_q->pkts,
> +					(void **)&pcap_buf))
> +				rte_pktmbuf_free(pcap_buf);
> +
> +			rte_ring_free(pcap_q->pkts);
> +		}
> +	}
> +
>  }
> 
>  static void
> @@ -671,6 +775,59 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>  	pcap_q->queue_id = rx_queue_id;
>  	dev->data->rx_queues[rx_queue_id] = pcap_q;
> 
> +	if (internals->infinite_rx) {
> +		struct pmd_process_private *pp;
> +		char ring_name[NAME_MAX];
> +		static uint32_t ring_number;
> +		uint64_t pcap_pkt_count = 0;
> +		struct rte_mbuf *bufs[1];
> +		pcap_t **pcap;
> +
> +		pp = rte_eth_devices[pcap_q->port_id].process_private;
> +		pcap = &pp->rx_pcap[pcap_q->queue_id];
> +
> +		if (unlikely(*pcap == NULL))
> +			return -ENOENT;
> +
> +		pcap_pkt_count = count_packets_in_pcap(pcap, pcap_q);
> +
> +		snprintf(ring_name, sizeof(ring_name), "PCAP_RING%"
> PRIu16,
> +				ring_number);
> +
> +		pcap_q->pkts = rte_ring_create(ring_name,
> +				rte_align64pow2(pcap_pkt_count + 1), 0,
> +				RING_F_SP_ENQ | RING_F_SC_DEQ);
> +		ring_number++;
> +		if (!pcap_q->pkts)
> +			return -ENOENT;
> +
> +		/* Fill ring with packets from PCAP file one by one. */
> +		while (eth_pcap_rx(pcap_q, bufs, 1)) {
> +			/* Check for multiseg mbufs. */
> +			if (bufs[0]->nb_segs != 1) {
> +				rte_pktmbuf_free(*bufs);
> +
> +				while (!rte_ring_dequeue(pcap_q->pkts,
> +						(void **)bufs))
> +					rte_pktmbuf_free(*bufs);
> +
> +				rte_ring_free(pcap_q->pkts);
> +				PMD_LOG(ERR, "Multiseg mbufs are not
> supported in infinite_rx "
> +						"mode.");
> +				return -EINVAL;
> +			}
> +
> +			rte_ring_enqueue_bulk(pcap_q->pkts,
> +					(void * const *)bufs, 1, NULL);
> +		}
> +		/*
> +		 * Reset the stats for this queue since eth_pcap_rx calls
> above
> +		 * didn't result in the application receiving packets.
> +		 */
> +		pcap_q->rx_stat.pkts = 0;
> +		pcap_q->rx_stat.bytes = 0;
> +	}
> +
>  	return 0;
>  }
> 
> @@ -908,6 +1065,34 @@ select_phy_mac(const char *key __rte_unused,
> const char *value,
>  	return 0;
>  }
> 
> +static int
> +get_infinite_rx_arg(const char *key __rte_unused,
> +		const char *value, void *extra_args)
> +{
> +	if (extra_args) {
> +		const int infinite_rx = atoi(value);
> +		int *enable_infinite_rx = extra_args;
> +
> +		if (infinite_rx)
> +			*enable_infinite_rx = 1;
> +	}
> +	return 0;
> +}
> +
> +static int
> +get_tx_drop_arg(const char *key __rte_unused,
> +		const char *value, void *extra_args)
> +{
> +	if (extra_args) {
> +		const int tx_drop = atoi(value);
> +		int *enable_tx_drop = extra_args;
> +
> +		if (tx_drop)
> +			*enable_tx_drop = 1;
> +	}
> +	return 0;
> +}
> +
>  static struct rte_vdev_driver pmd_pcap_drv;
> 
>  static int
> @@ -1105,7 +1290,8 @@ static int
>  eth_from_pcaps(struct rte_vdev_device *vdev,
>  		struct pmd_devargs *rx_queues, const unsigned int
> nb_rx_queues,
>  		struct pmd_devargs *tx_queues, const unsigned int
> nb_tx_queues,
> -		int single_iface, unsigned int using_dumpers)
> +		int single_iface, unsigned int using_dumpers,
> +		unsigned int infinite_rx, unsigned int tx_drop)
>  {
>  	struct pmd_internals *internals = NULL;
>  	struct rte_eth_dev *eth_dev = NULL;
> @@ -1132,9 +1318,17 @@ eth_from_pcaps(struct rte_vdev_device *vdev,
>  		}
>  	}
> 
> -	eth_dev->rx_pkt_burst = eth_pcap_rx;
> +	internals->infinite_rx = infinite_rx;
> +	/* Assign rx ops. */
> +	if (infinite_rx)
> +		eth_dev->rx_pkt_burst = eth_pcap_rx_infinite;
> +	else
> +		eth_dev->rx_pkt_burst = eth_pcap_rx;
> 
> -	if (using_dumpers)
> +	/* Assign tx ops. */
> +	if (tx_drop)
> +		eth_dev->tx_pkt_burst = eth_tx_drop;
> +	else if (using_dumpers)
>  		eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
>  	else
>  		eth_dev->tx_pkt_burst = eth_pcap_tx;
> @@ -1148,6 +1342,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)  {
>  	const char *name;
>  	unsigned int is_rx_pcap = 0, is_tx_pcap = 0;
> +	unsigned int infinite_rx = 0, infinite_rx_arg_cnt = 0, tx_drop = 0;
>  	struct rte_kvargs *kvlist;
>  	struct pmd_devargs pcaps = {0};
>  	struct pmd_devargs dumpers = {0};
> @@ -1216,7 +1411,25 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>  	is_rx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1
> : 0;
>  	pcaps.num_of_queue = 0;
> 
> +	infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
> +			ETH_PCAP_INFINITE_RX_ARG);
> +
>  	if (is_rx_pcap) {
> +		/*
> +		 * We check whether we want to infinitely rx the pcap file.
> +		 */
> +		if (infinite_rx_arg_cnt == 1) {
> +			ret = rte_kvargs_process(kvlist,
> +					ETH_PCAP_INFINITE_RX_ARG,
> +					&get_infinite_rx_arg, &infinite_rx);
> +			if (ret < 0)
> +				goto free_kvlist;
> +		} else if (infinite_rx_arg_cnt >= 1) {
> +			PMD_LOG(WARNING, "infinite_rx has not been
> enabled since the "
> +					"argument has been provided more
> than once "
> +					"for %s", name);
> +		}
> +
>  		ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
>  				&open_rx_pcap, &pcaps);
>  	} else {
> @@ -1228,18 +1441,32 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>  		goto free_kvlist;
> 
>  	/*
> -	 * We check whether we want to open a TX stream to a real NIC or a
> -	 * pcap file
> +	 * We check whether we want to open a TX stream to a real NIC,
> +	 * a pcap file, or drop packets on tx
>  	 */
>  	is_tx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1
> : 0;
>  	dumpers.num_of_queue = 0;
> 
> -	if (is_tx_pcap)
> +	if (rte_kvargs_count(kvlist, ETH_PCAP_TX_DROP_ARG) == 1) {
> +		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_DROP_ARG,
> +				&get_tx_drop_arg, &tx_drop);
> +		if (ret < 0)
> +			goto free_kvlist;
> +	}
> +
> +	if (tx_drop) {
> +		unsigned int i;
> +		/* Add 1 dummy queue per rxq which counts and drops
> packets. */
> +		for (i = 0; i < pcaps.num_of_queue; i++)
> +			ret = add_queue(&dumpers, "dummy", "tx_drop",
> NULL,
> +					NULL);
> +	} else if (is_tx_pcap) {
>  		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
>  				&open_tx_pcap, &dumpers);
> -	else
> +	} else {
>  		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
>  				&open_tx_iface, &dumpers);
> +	}
> 
>  	if (ret < 0)
>  		goto free_kvlist;
> @@ -1285,8 +1512,12 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>  		goto free_kvlist;
>  	}
> 
> +	PMD_LOG(INFO, "Configure pmd_pcap: infinite_rx is %s",
> +			infinite_rx ? "enabled" : "disabled");
> +
>  	ret = eth_from_pcaps(dev, &pcaps, pcaps.num_of_queue,
> &dumpers,
> -		dumpers.num_of_queue, single_iface, is_tx_pcap);
> +		dumpers.num_of_queue, single_iface, is_tx_pcap,
> infinite_rx,
> +		tx_drop);
> 
>  free_kvlist:
>  	rte_kvargs_free(kvlist);
> @@ -1297,6 +1528,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
> static int  pmd_pcap_remove(struct rte_vdev_device *dev)  {
> +	unsigned int i;
>  	struct pmd_internals *internals = NULL;
>  	struct rte_eth_dev *eth_dev = NULL;
> 
> @@ -1318,6 +1550,20 @@ pmd_pcap_remove(struct rte_vdev_device *dev)
>  			eth_dev->data->mac_addrs = NULL;
>  	}
> 
> +	/* Device wide flag, but cleanup must be performed per queue. */
> +	if (internals->infinite_rx) {
> +		for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
> +			struct pcap_rx_queue *pcap_q = &internals-
> >rx_queue[i];
> +			struct rte_mbuf *pcap_buf;
> +
> +			while (!rte_ring_dequeue(pcap_q->pkts,
> +					(void **)&pcap_buf))
> +				rte_pktmbuf_free(pcap_buf);
> +
> +			rte_ring_free(pcap_q->pkts);
> +		}
> +	}
> +
>  	rte_free(eth_dev->process_private);
>  	rte_eth_dev_release_port(eth_dev);
> 
> @@ -1338,7 +1584,9 @@ RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
>  	ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
>  	ETH_PCAP_TX_IFACE_ARG "=<ifc> "
>  	ETH_PCAP_IFACE_ARG "=<ifc> "
> -	ETH_PCAP_PHY_MAC_ARG "=<int>");
> +	ETH_PCAP_PHY_MAC_ARG "=<int>"
> +	ETH_PCAP_INFINITE_RX_ARG "=<0|1>"
> +	ETH_PCAP_TX_DROP_ARG "=<0|1>");
> 
>  RTE_INIT(eth_pcap_init_log)
>  {
> --
> 2.17.1
  
Ferruh Yigit June 12, 2019, 2:10 p.m. UTC | #2
On 6/5/2019 1:46 PM, Ferriter, Cian wrote:
> Adding in my changelog at the top of this, since I forgot to add it in the original mail:
> 
> v2:
> * Rework the method of filling the ring to infinitely rx from
> 	* Avoids potential huge allocation of mbufs
> 	* Removes double allocation of mbufs used during queue setup
> * rename count_packets_in_pcaps to count_packets_in_pcap
> * initialize pcap_pkt_count in count_packets_in_pcap
> * use RTE_PMD_REGISTER_PARAM_STRING <0|1> rather than <int>
> * replace calls to rte_panic with proper error returning
> * count rx and tx stat bytes in pcap_rx_infinite and tx_drop
> * make internals->infinite_rx = infinite_rx assignment unconditional
> * add cleanup for infinite_rx in eth_dev_close and pmd_pcap_remove
> * add cleanup when multi seg mbufs are found
> * add some clarifications to the documentation update
> 
>> -----Original Message-----
>> From: Ferriter, Cian
>> Sent: 05 June 2019 12:56
>> To: Richardson, Bruce <bruce.richardson@intel.com>; Yigit, Ferruh
>> <ferruh.yigit@intel.com>; Mcnamara, John <john.mcnamara@intel.com>;
>> Kovacevic, Marko <marko.kovacevic@intel.com>
>> Cc: dev@dpdk.org; Ferriter, Cian <cian.ferriter@intel.com>
>> Subject: [PATCH 19.08 v2] net/pcap: enable infinitely rxing a pcap file
>>
>> It can be useful to use pcap files for some rudimental performance testing.
>> This patch enables this functionality in the pcap driver.
>>
>> At a high level, this works by creaing a ring of sufficient size to store the
>> packets in the pcap file passed to the application. When the rx function for
>> this mode is called, packets are dequeued from the ring for use by the
>> application and also enqueued back on to the ring to be "received" again.
>>
>> A tx_drop mode is also added since transmitting to a tx_pcap file isn't
>> desirable at a high traffic rate.
>>
>> Jumbo frames are not supported in this mode. When filling the ring at rx
>> queue setup time, the presence of multi segment mbufs is checked for.
>> The PMD will exit on detection of these multi segment mbufs.
>>
>> Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
>> ---
>>  doc/guides/nics/pcap_ring.rst   |  19 +++
>>  drivers/net/pcap/rte_eth_pcap.c | 268
>> ++++++++++++++++++++++++++++++--
>>  2 files changed, 277 insertions(+), 10 deletions(-)
>>
>> diff --git a/doc/guides/nics/pcap_ring.rst b/doc/guides/nics/pcap_ring.rst
>> index c1ef9196b..b272e6fe3 100644
>> --- a/doc/guides/nics/pcap_ring.rst
>> +++ b/doc/guides/nics/pcap_ring.rst
>> @@ -106,6 +106,25 @@ Runtime Config Options
>>
>>     --vdev 'net_pcap0,iface=eth0,phy_mac=1'
>>
>> +- Use the RX PCAP file to infinitely receive packets
>> +
>> + In case ``rx_pcap=`` configuration is set, user may want to use the
>> + selected PCAP file for rudimental performance testing. This can be done
>> with a ``devarg`` ``infinite_rx``, for example::
>> +
>> +   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,infinite_rx=1,tx_drop=1'

Can be good to highlight that this flag is not per queue, but should be provided
once (explictly once since code checks it) per Rx.

>> +
>> + When this mode is used, it is recommended to use the ``tx_drop``
>> ``devarg``.
>> +
>> + This option is device wide, so all queues on a device will either have this
>> enabled or disabled.
>> +
>> +- Drop all packets on transmit
>> +
>> + The user may want to drop all packets on tx for a device. This can be done
>> with the ``tx_drop`` ``devarg``, for example::
>> +
>> +   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_drop=1'
>> +
>> + One tx drop queue is created for each rxq on that device.

Can we drop the ``tx_drop`` completely?

What happens when no 'tx_pcap' or 'tx_iface' provided at all, to imply the tx_drop?

<...>

>> @@ -1105,7 +1290,8 @@ static int
>>  eth_from_pcaps(struct rte_vdev_device *vdev,
>>  		struct pmd_devargs *rx_queues, const unsigned int
>> nb_rx_queues,
>>  		struct pmd_devargs *tx_queues, const unsigned int
>> nb_tx_queues,
>> -		int single_iface, unsigned int using_dumpers)
>> +		int single_iface, unsigned int using_dumpers,
>> +		unsigned int infinite_rx, unsigned int tx_drop)


The argument list is keep increasing. What happens is 'pmd_pcap_probe()'
processes the user input (devargs) and passes the processed output to this
function to create ethdev.
What do you think gathering all processed output to a struct and pass it the
this function, in a patch before this patch? Like:

struct pmd_devargs_all {
  struct pmd_devargs pcaps;
  struct pmd_devargs dumpers;
  int single_iface;
  unsigned intis_tx_pcap;
};

And add 'unsigned int infinite_rx;' into that struct in this patch.

<...>

>> @@ -1148,6 +1342,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)  {
>>  	const char *name;
>>  	unsigned int is_rx_pcap = 0, is_tx_pcap = 0;
>> +	unsigned int infinite_rx = 0, infinite_rx_arg_cnt = 0, tx_drop = 0;

Is  initial value required for 'infinite_rx_arg_cnt '?

>>  	struct rte_kvargs *kvlist;
>>  	struct pmd_devargs pcaps = {0};
>>  	struct pmd_devargs dumpers = {0};
>> @@ -1216,7 +1411,25 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>>  	is_rx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1
>> : 0;
>>  	pcaps.num_of_queue = 0;
>>
>> +	infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
>> +			ETH_PCAP_INFINITE_RX_ARG);

Can move this under 'is_rx_pcap', since this value only make sense when Rx is pcap.

>> +
>>  	if (is_rx_pcap) {
>> +		/*
>> +		 * We check whether we want to infinitely rx the pcap file.
>> +		 */
>> +		if (infinite_rx_arg_cnt == 1) {
>> +			ret = rte_kvargs_process(kvlist,
>> +					ETH_PCAP_INFINITE_RX_ARG,
>> +					&get_infinite_rx_arg, &infinite_rx);
>> +			if (ret < 0)
>> +				goto free_kvlist;
>> +		} else if (infinite_rx_arg_cnt >= 1) {

I guess it should be ">" instead of ">="

<...>

>> @@ -1285,8 +1512,12 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
>>  		goto free_kvlist;
>>  	}
>>
>> +	PMD_LOG(INFO, "Configure pmd_pcap: infinite_rx is %s",
>> +			infinite_rx ? "enabled" : "disabled");

What do you think printing the message when feature is requested? Instead of
printing each time that it is disabled, which is default behaviour.

<...>

>> @@ -1318,6 +1550,20 @@ pmd_pcap_remove(struct rte_vdev_device *dev)
>>  			eth_dev->data->mac_addrs = NULL;
>>  	}
>>
>> +	/* Device wide flag, but cleanup must be performed per queue. */
>> +	if (internals->infinite_rx) {
>> +		for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
>> +			struct pcap_rx_queue *pcap_q = &internals-
>>> rx_queue[i];
>> +			struct rte_mbuf *pcap_buf;
>> +
>> +			while (!rte_ring_dequeue(pcap_q->pkts,
>> +					(void **)&pcap_buf))
>> +				rte_pktmbuf_free(pcap_buf);
>> +
>> +			rte_ring_free(pcap_q->pkts);
>> +		}
>> +	}

Can it possible to call 'eth_dev_close()' which seems dublicating the above code?
  
Cian Ferriter June 14, 2019, 2:48 p.m. UTC | #3
Hi Ferruh,

Thanks for the review.

I've send a v3 and responded to your comments below.

Thanks,
Cian

> -----Original Message-----
> From: Yigit, Ferruh
> Sent: 12 June 2019 15:10
> To: Ferriter, Cian <cian.ferriter@intel.com>; Richardson, Bruce
> <bruce.richardson@intel.com>; Mcnamara, John
> <john.mcnamara@intel.com>; Kovacevic, Marko
> <marko.kovacevic@intel.com>
> Cc: dev@dpdk.org
> Subject: Re: [PATCH 19.08 v2] net/pcap: enable infinitely rxing a pcap file
> 
> On 6/5/2019 1:46 PM, Ferriter, Cian wrote:
> > Adding in my changelog at the top of this, since I forgot to add it in the
> original mail:
> >
> > v2:
> > * Rework the method of filling the ring to infinitely rx from
> > 	* Avoids potential huge allocation of mbufs
> > 	* Removes double allocation of mbufs used during queue setup
> > * rename count_packets_in_pcaps to count_packets_in_pcap
> > * initialize pcap_pkt_count in count_packets_in_pcap
> > * use RTE_PMD_REGISTER_PARAM_STRING <0|1> rather than <int>
> > * replace calls to rte_panic with proper error returning
> > * count rx and tx stat bytes in pcap_rx_infinite and tx_drop
> > * make internals->infinite_rx = infinite_rx assignment unconditional
> > * add cleanup for infinite_rx in eth_dev_close and pmd_pcap_remove
> > * add cleanup when multi seg mbufs are found
> > * add some clarifications to the documentation update
> >
> >> -----Original Message-----
> >> From: Ferriter, Cian
> >> Sent: 05 June 2019 12:56
> >> To: Richardson, Bruce <bruce.richardson@intel.com>; Yigit, Ferruh
> >> <ferruh.yigit@intel.com>; Mcnamara, John <john.mcnamara@intel.com>;
> >> Kovacevic, Marko <marko.kovacevic@intel.com>
> >> Cc: dev@dpdk.org; Ferriter, Cian <cian.ferriter@intel.com>
> >> Subject: [PATCH 19.08 v2] net/pcap: enable infinitely rxing a pcap
> >> file
> >>
> >> It can be useful to use pcap files for some rudimental performance
> testing.
> >> This patch enables this functionality in the pcap driver.
> >>
> >> At a high level, this works by creaing a ring of sufficient size to
> >> store the packets in the pcap file passed to the application. When
> >> the rx function for this mode is called, packets are dequeued from
> >> the ring for use by the application and also enqueued back on to the ring
> to be "received" again.
> >>
> >> A tx_drop mode is also added since transmitting to a tx_pcap file
> >> isn't desirable at a high traffic rate.
> >>
> >> Jumbo frames are not supported in this mode. When filling the ring at
> >> rx queue setup time, the presence of multi segment mbufs is checked
> for.
> >> The PMD will exit on detection of these multi segment mbufs.
> >>
> >> Signed-off-by: Cian Ferriter <cian.ferriter@intel.com>
> >> ---
> >>  doc/guides/nics/pcap_ring.rst   |  19 +++
> >>  drivers/net/pcap/rte_eth_pcap.c | 268
> >> ++++++++++++++++++++++++++++++--
> >>  2 files changed, 277 insertions(+), 10 deletions(-)
> >>
> >> diff --git a/doc/guides/nics/pcap_ring.rst
> >> b/doc/guides/nics/pcap_ring.rst index c1ef9196b..b272e6fe3 100644
> >> --- a/doc/guides/nics/pcap_ring.rst
> >> +++ b/doc/guides/nics/pcap_ring.rst
> >> @@ -106,6 +106,25 @@ Runtime Config Options
> >>
> >>     --vdev 'net_pcap0,iface=eth0,phy_mac=1'
> >>
> >> +- Use the RX PCAP file to infinitely receive packets
> >> +
> >> + In case ``rx_pcap=`` configuration is set, user may want to use the
> >> + selected PCAP file for rudimental performance testing. This can be
> >> + done
> >> with a ``devarg`` ``infinite_rx``, for example::
> >> +
> >> +   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,infinite_rx=1,tx_drop=1'
> 
> Can be good to highlight that this flag is not per queue, but should be
> provided once (explictly once since code checks it) per Rx.
> 

Added to the docs in the next version.

> >> +
> >> + When this mode is used, it is recommended to use the ``tx_drop``
> >> ``devarg``.
> >> +
> >> + This option is device wide, so all queues on a device will either
> >> + have this
> >> enabled or disabled.
> >> +
> >> +- Drop all packets on transmit
> >> +
> >> + The user may want to drop all packets on tx for a device. This can
> >> + be done
> >> with the ``tx_drop`` ``devarg``, for example::
> >> +
> >> +   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_drop=1'
> >> +
> >> + One tx drop queue is created for each rxq on that device.
> 
> Can we drop the ``tx_drop`` completely?
> 
> What happens when no 'tx_pcap' or 'tx_iface' provided at all, to imply the
> tx_drop?
> 

This sound like nice default behavior to have. I've updated the latest version to implement this and I've removed the tx_drop args parsing and related doc section.

> <...>
> 
> >> @@ -1105,7 +1290,8 @@ static int
> >>  eth_from_pcaps(struct rte_vdev_device *vdev,
> >>  		struct pmd_devargs *rx_queues, const unsigned int
> nb_rx_queues,
> >>  		struct pmd_devargs *tx_queues, const unsigned int
> nb_tx_queues,
> >> -		int single_iface, unsigned int using_dumpers)
> >> +		int single_iface, unsigned int using_dumpers,
> >> +		unsigned int infinite_rx, unsigned int tx_drop)
> 
> 
> The argument list is keep increasing. What happens is 'pmd_pcap_probe()'
> processes the user input (devargs) and passes the processed output to this
> function to create ethdev.
> What do you think gathering all processed output to a struct and pass it the
> this function, in a patch before this patch? Like:
> 
> struct pmd_devargs_all {
>   struct pmd_devargs pcaps;
>   struct pmd_devargs dumpers;
>   int single_iface;
>   unsigned intis_tx_pcap;
> };
> 
> And add 'unsigned int infinite_rx;' into that struct in this patch.
> 

Good idea, I've implemented these changes in the next version.

> <...>
> 
> >> @@ -1148,6 +1342,7 @@ pmd_pcap_probe(struct rte_vdev_device *dev)
> {
> >>  	const char *name;
> >>  	unsigned int is_rx_pcap = 0, is_tx_pcap = 0;
> >> +	unsigned int infinite_rx = 0, infinite_rx_arg_cnt = 0, tx_drop = 0;
> 
> Is  initial value required for 'infinite_rx_arg_cnt '?
> 

I don't think the initial value is required. I was just following the convention used with the 'is_rx_pcap' and 'is_tx_pcap' variables.
I think that the below comment takes care of this, since we can move 'infinite_rx_arg_cnt' under 'is_rx_pcap' and assign directly to the output of the 'rte_kvargs_count'.

> >>  	struct rte_kvargs *kvlist;
> >>  	struct pmd_devargs pcaps = {0};
> >>  	struct pmd_devargs dumpers = {0};
> >> @@ -1216,7 +1411,25 @@ pmd_pcap_probe(struct rte_vdev_device
> *dev)
> >>  	is_rx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1
> >> : 0;
> >>  	pcaps.num_of_queue = 0;
> >>
> >> +	infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
> >> +			ETH_PCAP_INFINITE_RX_ARG);
> 
> Can move this under 'is_rx_pcap', since this value only make sense when Rx
> is pcap.
> 

True, that makes sense. Fixed in the next version.

> >> +
> >>  	if (is_rx_pcap) {
> >> +		/*
> >> +		 * We check whether we want to infinitely rx the pcap file.
> >> +		 */
> >> +		if (infinite_rx_arg_cnt == 1) {
> >> +			ret = rte_kvargs_process(kvlist,
> >> +					ETH_PCAP_INFINITE_RX_ARG,
> >> +					&get_infinite_rx_arg, &infinite_rx);
> >> +			if (ret < 0)
> >> +				goto free_kvlist;
> >> +		} else if (infinite_rx_arg_cnt >= 1) {
> 
> I guess it should be ">" instead of ">="
> 

Ooops, you are correct. The '=' part is pointless here. Fixed in the next version.

> <...>
> 
> >> @@ -1285,8 +1512,12 @@ pmd_pcap_probe(struct rte_vdev_device
> *dev)
> >>  		goto free_kvlist;
> >>  	}
> >>
> >> +	PMD_LOG(INFO, "Configure pmd_pcap: infinite_rx is %s",
> >> +			infinite_rx ? "enabled" : "disabled");
> 
> What do you think printing the message when feature is requested? Instead
> of printing each time that it is disabled, which is default behaviour.
> 

I agree, there's no point in mentioning that infinite_rx has been disabled if the user hasn't requested it in the args.
Fixed in the new version.

> <...>
> 
> >> @@ -1318,6 +1550,20 @@ pmd_pcap_remove(struct rte_vdev_device
> *dev)
> >>  			eth_dev->data->mac_addrs = NULL;
> >>  	}
> >>
> >> +	/* Device wide flag, but cleanup must be performed per queue. */
> >> +	if (internals->infinite_rx) {
> >> +		for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
> >> +			struct pcap_rx_queue *pcap_q = &internals-
> >>> rx_queue[i];
> >> +			struct rte_mbuf *pcap_buf;
> >> +
> >> +			while (!rte_ring_dequeue(pcap_q->pkts,
> >> +					(void **)&pcap_buf))
> >> +				rte_pktmbuf_free(pcap_buf);
> >> +
> >> +			rte_ring_free(pcap_q->pkts);
> >> +		}
> >> +	}
> 
> Can it possible to call 'eth_dev_close()' which seems dublicating the above
> code?

This makes sense. Added this in the next version.
  

Patch

diff --git a/doc/guides/nics/pcap_ring.rst b/doc/guides/nics/pcap_ring.rst
index c1ef9196b..b272e6fe3 100644
--- a/doc/guides/nics/pcap_ring.rst
+++ b/doc/guides/nics/pcap_ring.rst
@@ -106,6 +106,25 @@  Runtime Config Options
 
    --vdev 'net_pcap0,iface=eth0,phy_mac=1'
 
+- Use the RX PCAP file to infinitely receive packets
+
+ In case ``rx_pcap=`` configuration is set, user may want to use the selected PCAP file for rudimental
+ performance testing. This can be done with a ``devarg`` ``infinite_rx``, for example::
+
+   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,infinite_rx=1,tx_drop=1'
+
+ When this mode is used, it is recommended to use the ``tx_drop`` ``devarg``.
+
+ This option is device wide, so all queues on a device will either have this enabled or disabled.
+
+- Drop all packets on transmit
+
+ The user may want to drop all packets on tx for a device. This can be done with the ``tx_drop`` ``devarg``, for example::
+
+   --vdev 'net_pcap0,rx_pcap=file_rx.pcap,tx_drop=1'
+
+ One tx drop queue is created for each rxq on that device.
+
 Examples of Usage
 ^^^^^^^^^^^^^^^^^
 
diff --git a/drivers/net/pcap/rte_eth_pcap.c b/drivers/net/pcap/rte_eth_pcap.c
index 10277b9b6..9d239d171 100644
--- a/drivers/net/pcap/rte_eth_pcap.c
+++ b/drivers/net/pcap/rte_eth_pcap.c
@@ -39,6 +39,8 @@ 
 #define ETH_PCAP_TX_IFACE_ARG "tx_iface"
 #define ETH_PCAP_IFACE_ARG    "iface"
 #define ETH_PCAP_PHY_MAC_ARG  "phy_mac"
+#define ETH_PCAP_INFINITE_RX_ARG  "infinite_rx"
+#define ETH_PCAP_TX_DROP_ARG  "tx_drop"
 
 #define ETH_PCAP_ARG_MAXLEN	64
 
@@ -64,6 +66,9 @@  struct pcap_rx_queue {
 	struct queue_stat rx_stat;
 	char name[PATH_MAX];
 	char type[ETH_PCAP_ARG_MAXLEN];
+
+	/* Contains pre-generated packets to be looped through */
+	struct rte_ring *pkts;
 };
 
 struct pcap_tx_queue {
@@ -82,6 +87,7 @@  struct pmd_internals {
 	int if_index;
 	int single_iface;
 	int phy_mac;
+	int infinite_rx;
 };
 
 struct pmd_process_private {
@@ -109,6 +115,8 @@  static const char *valid_arguments[] = {
 	ETH_PCAP_TX_IFACE_ARG,
 	ETH_PCAP_IFACE_ARG,
 	ETH_PCAP_PHY_MAC_ARG,
+	ETH_PCAP_INFINITE_RX_ARG,
+	ETH_PCAP_TX_DROP_ARG,
 	NULL
 };
 
@@ -178,6 +186,43 @@  eth_pcap_gather_data(unsigned char *data, struct rte_mbuf *mbuf)
 	}
 }
 
+static uint16_t
+eth_pcap_rx_infinite(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	int i;
+	struct pcap_rx_queue *pcap_q = queue;
+	uint32_t rx_bytes = 0;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	if (rte_pktmbuf_alloc_bulk(pcap_q->mb_pool, bufs, nb_pkts) != 0)
+		return 0;
+
+	for (i = 0; i < nb_pkts; i++) {
+		struct rte_mbuf *pcap_buf;
+		int err = rte_ring_dequeue(pcap_q->pkts, (void **)&pcap_buf);
+		if (err)
+			return i;
+
+		rte_memcpy(rte_pktmbuf_mtod(bufs[i], void *),
+				rte_pktmbuf_mtod(pcap_buf, void *),
+				pcap_buf->data_len);
+		bufs[i]->data_len = pcap_buf->data_len;
+		bufs[i]->pkt_len = pcap_buf->pkt_len;
+		bufs[i]->port = pcap_q->port_id;
+		rx_bytes += pcap_buf->data_len;
+
+		/* Enqueue packet back on ring to allow infinite rx. */
+		rte_ring_enqueue(pcap_q->pkts, pcap_buf);
+	}
+
+	pcap_q->rx_stat.pkts += i;
+	pcap_q->rx_stat.bytes += rx_bytes;
+
+	return i;
+}
+
 static uint16_t
 eth_pcap_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 {
@@ -320,6 +365,30 @@  eth_pcap_tx_dumper(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 	return num_tx;
 }
 
+/*
+ * Callback to handle dropping packets in the infinite rx case.
+ */
+static uint16_t
+eth_tx_drop(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
+{
+	unsigned int i;
+	uint32_t tx_bytes = 0;
+	struct pcap_tx_queue *tx_queue = queue;
+
+	if (unlikely(nb_pkts == 0))
+		return 0;
+
+	for (i = 0; i < nb_pkts; i++) {
+		tx_bytes += bufs[i]->data_len;
+		rte_pktmbuf_free(bufs[i]);
+	}
+
+	tx_queue->tx_stat.pkts += nb_pkts;
+	tx_queue->tx_stat.bytes += tx_bytes;
+
+	return i;
+}
+
 /*
  * Callback to handle sending packets through a real NIC.
  */
@@ -447,6 +516,24 @@  open_single_rx_pcap(const char *pcap_filename, pcap_t **pcap)
 	return 0;
 }
 
+static uint64_t
+count_packets_in_pcap(pcap_t **pcap, struct pcap_rx_queue *pcap_q)
+{
+	const u_char *packet;
+	struct pcap_pkthdr header;
+	uint64_t pcap_pkt_count = 0;
+
+	while ((packet = pcap_next(*pcap, &header)))
+		pcap_pkt_count++;
+
+	/* The pcap is reopened so it can be used as normal later. */
+	pcap_close(*pcap);
+	*pcap = NULL;
+	open_single_rx_pcap(pcap_q->name, pcap);
+
+	return pcap_pkt_count;
+}
+
 static int
 eth_dev_start(struct rte_eth_dev *dev)
 {
@@ -639,8 +726,25 @@  eth_stats_reset(struct rte_eth_dev *dev)
 }
 
 static void
-eth_dev_close(struct rte_eth_dev *dev __rte_unused)
+eth_dev_close(struct rte_eth_dev *dev)
 {
+	unsigned int i;
+	struct pmd_internals *internals = dev->data->dev_private;
+
+	/* Device wide flag, but cleanup must be performed per queue. */
+	if (internals->infinite_rx) {
+		for (i = 0; i < dev->data->nb_rx_queues; i++) {
+			struct pcap_rx_queue *pcap_q = &internals->rx_queue[i];
+			struct rte_mbuf *pcap_buf;
+
+			while (!rte_ring_dequeue(pcap_q->pkts,
+					(void **)&pcap_buf))
+				rte_pktmbuf_free(pcap_buf);
+
+			rte_ring_free(pcap_q->pkts);
+		}
+	}
+
 }
 
 static void
@@ -671,6 +775,59 @@  eth_rx_queue_setup(struct rte_eth_dev *dev,
 	pcap_q->queue_id = rx_queue_id;
 	dev->data->rx_queues[rx_queue_id] = pcap_q;
 
+	if (internals->infinite_rx) {
+		struct pmd_process_private *pp;
+		char ring_name[NAME_MAX];
+		static uint32_t ring_number;
+		uint64_t pcap_pkt_count = 0;
+		struct rte_mbuf *bufs[1];
+		pcap_t **pcap;
+
+		pp = rte_eth_devices[pcap_q->port_id].process_private;
+		pcap = &pp->rx_pcap[pcap_q->queue_id];
+
+		if (unlikely(*pcap == NULL))
+			return -ENOENT;
+
+		pcap_pkt_count = count_packets_in_pcap(pcap, pcap_q);
+
+		snprintf(ring_name, sizeof(ring_name), "PCAP_RING%" PRIu16,
+				ring_number);
+
+		pcap_q->pkts = rte_ring_create(ring_name,
+				rte_align64pow2(pcap_pkt_count + 1), 0,
+				RING_F_SP_ENQ | RING_F_SC_DEQ);
+		ring_number++;
+		if (!pcap_q->pkts)
+			return -ENOENT;
+
+		/* Fill ring with packets from PCAP file one by one. */
+		while (eth_pcap_rx(pcap_q, bufs, 1)) {
+			/* Check for multiseg mbufs. */
+			if (bufs[0]->nb_segs != 1) {
+				rte_pktmbuf_free(*bufs);
+
+				while (!rte_ring_dequeue(pcap_q->pkts,
+						(void **)bufs))
+					rte_pktmbuf_free(*bufs);
+
+				rte_ring_free(pcap_q->pkts);
+				PMD_LOG(ERR, "Multiseg mbufs are not supported in infinite_rx "
+						"mode.");
+				return -EINVAL;
+			}
+
+			rte_ring_enqueue_bulk(pcap_q->pkts,
+					(void * const *)bufs, 1, NULL);
+		}
+		/*
+		 * Reset the stats for this queue since eth_pcap_rx calls above
+		 * didn't result in the application receiving packets.
+		 */
+		pcap_q->rx_stat.pkts = 0;
+		pcap_q->rx_stat.bytes = 0;
+	}
+
 	return 0;
 }
 
@@ -908,6 +1065,34 @@  select_phy_mac(const char *key __rte_unused, const char *value,
 	return 0;
 }
 
+static int
+get_infinite_rx_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	if (extra_args) {
+		const int infinite_rx = atoi(value);
+		int *enable_infinite_rx = extra_args;
+
+		if (infinite_rx)
+			*enable_infinite_rx = 1;
+	}
+	return 0;
+}
+
+static int
+get_tx_drop_arg(const char *key __rte_unused,
+		const char *value, void *extra_args)
+{
+	if (extra_args) {
+		const int tx_drop = atoi(value);
+		int *enable_tx_drop = extra_args;
+
+		if (tx_drop)
+			*enable_tx_drop = 1;
+	}
+	return 0;
+}
+
 static struct rte_vdev_driver pmd_pcap_drv;
 
 static int
@@ -1105,7 +1290,8 @@  static int
 eth_from_pcaps(struct rte_vdev_device *vdev,
 		struct pmd_devargs *rx_queues, const unsigned int nb_rx_queues,
 		struct pmd_devargs *tx_queues, const unsigned int nb_tx_queues,
-		int single_iface, unsigned int using_dumpers)
+		int single_iface, unsigned int using_dumpers,
+		unsigned int infinite_rx, unsigned int tx_drop)
 {
 	struct pmd_internals *internals = NULL;
 	struct rte_eth_dev *eth_dev = NULL;
@@ -1132,9 +1318,17 @@  eth_from_pcaps(struct rte_vdev_device *vdev,
 		}
 	}
 
-	eth_dev->rx_pkt_burst = eth_pcap_rx;
+	internals->infinite_rx = infinite_rx;
+	/* Assign rx ops. */
+	if (infinite_rx)
+		eth_dev->rx_pkt_burst = eth_pcap_rx_infinite;
+	else
+		eth_dev->rx_pkt_burst = eth_pcap_rx;
 
-	if (using_dumpers)
+	/* Assign tx ops. */
+	if (tx_drop)
+		eth_dev->tx_pkt_burst = eth_tx_drop;
+	else if (using_dumpers)
 		eth_dev->tx_pkt_burst = eth_pcap_tx_dumper;
 	else
 		eth_dev->tx_pkt_burst = eth_pcap_tx;
@@ -1148,6 +1342,7 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 {
 	const char *name;
 	unsigned int is_rx_pcap = 0, is_tx_pcap = 0;
+	unsigned int infinite_rx = 0, infinite_rx_arg_cnt = 0, tx_drop = 0;
 	struct rte_kvargs *kvlist;
 	struct pmd_devargs pcaps = {0};
 	struct pmd_devargs dumpers = {0};
@@ -1216,7 +1411,25 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 	is_rx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_RX_PCAP_ARG) ? 1 : 0;
 	pcaps.num_of_queue = 0;
 
+	infinite_rx_arg_cnt = rte_kvargs_count(kvlist,
+			ETH_PCAP_INFINITE_RX_ARG);
+
 	if (is_rx_pcap) {
+		/*
+		 * We check whether we want to infinitely rx the pcap file.
+		 */
+		if (infinite_rx_arg_cnt == 1) {
+			ret = rte_kvargs_process(kvlist,
+					ETH_PCAP_INFINITE_RX_ARG,
+					&get_infinite_rx_arg, &infinite_rx);
+			if (ret < 0)
+				goto free_kvlist;
+		} else if (infinite_rx_arg_cnt >= 1) {
+			PMD_LOG(WARNING, "infinite_rx has not been enabled since the "
+					"argument has been provided more than once "
+					"for %s", name);
+		}
+
 		ret = rte_kvargs_process(kvlist, ETH_PCAP_RX_PCAP_ARG,
 				&open_rx_pcap, &pcaps);
 	} else {
@@ -1228,18 +1441,32 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 		goto free_kvlist;
 
 	/*
-	 * We check whether we want to open a TX stream to a real NIC or a
-	 * pcap file
+	 * We check whether we want to open a TX stream to a real NIC,
+	 * a pcap file, or drop packets on tx
 	 */
 	is_tx_pcap = rte_kvargs_count(kvlist, ETH_PCAP_TX_PCAP_ARG) ? 1 : 0;
 	dumpers.num_of_queue = 0;
 
-	if (is_tx_pcap)
+	if (rte_kvargs_count(kvlist, ETH_PCAP_TX_DROP_ARG) == 1) {
+		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_DROP_ARG,
+				&get_tx_drop_arg, &tx_drop);
+		if (ret < 0)
+			goto free_kvlist;
+	}
+
+	if (tx_drop) {
+		unsigned int i;
+		/* Add 1 dummy queue per rxq which counts and drops packets. */
+		for (i = 0; i < pcaps.num_of_queue; i++)
+			ret = add_queue(&dumpers, "dummy", "tx_drop", NULL,
+					NULL);
+	} else if (is_tx_pcap) {
 		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_PCAP_ARG,
 				&open_tx_pcap, &dumpers);
-	else
+	} else {
 		ret = rte_kvargs_process(kvlist, ETH_PCAP_TX_IFACE_ARG,
 				&open_tx_iface, &dumpers);
+	}
 
 	if (ret < 0)
 		goto free_kvlist;
@@ -1285,8 +1512,12 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 		goto free_kvlist;
 	}
 
+	PMD_LOG(INFO, "Configure pmd_pcap: infinite_rx is %s",
+			infinite_rx ? "enabled" : "disabled");
+
 	ret = eth_from_pcaps(dev, &pcaps, pcaps.num_of_queue, &dumpers,
-		dumpers.num_of_queue, single_iface, is_tx_pcap);
+		dumpers.num_of_queue, single_iface, is_tx_pcap, infinite_rx,
+		tx_drop);
 
 free_kvlist:
 	rte_kvargs_free(kvlist);
@@ -1297,6 +1528,7 @@  pmd_pcap_probe(struct rte_vdev_device *dev)
 static int
 pmd_pcap_remove(struct rte_vdev_device *dev)
 {
+	unsigned int i;
 	struct pmd_internals *internals = NULL;
 	struct rte_eth_dev *eth_dev = NULL;
 
@@ -1318,6 +1550,20 @@  pmd_pcap_remove(struct rte_vdev_device *dev)
 			eth_dev->data->mac_addrs = NULL;
 	}
 
+	/* Device wide flag, but cleanup must be performed per queue. */
+	if (internals->infinite_rx) {
+		for (i = 0; i < eth_dev->data->nb_rx_queues; i++) {
+			struct pcap_rx_queue *pcap_q = &internals->rx_queue[i];
+			struct rte_mbuf *pcap_buf;
+
+			while (!rte_ring_dequeue(pcap_q->pkts,
+					(void **)&pcap_buf))
+				rte_pktmbuf_free(pcap_buf);
+
+			rte_ring_free(pcap_q->pkts);
+		}
+	}
+
 	rte_free(eth_dev->process_private);
 	rte_eth_dev_release_port(eth_dev);
 
@@ -1338,7 +1584,9 @@  RTE_PMD_REGISTER_PARAM_STRING(net_pcap,
 	ETH_PCAP_RX_IFACE_IN_ARG "=<ifc> "
 	ETH_PCAP_TX_IFACE_ARG "=<ifc> "
 	ETH_PCAP_IFACE_ARG "=<ifc> "
-	ETH_PCAP_PHY_MAC_ARG "=<int>");
+	ETH_PCAP_PHY_MAC_ARG "=<int>"
+	ETH_PCAP_INFINITE_RX_ARG "=<0|1>"
+	ETH_PCAP_TX_DROP_ARG "=<0|1>");
 
 RTE_INIT(eth_pcap_init_log)
 {