[v4,1/2] app/testpmd: optimize testpmd txonly mode
Checks
Commit Message
From: Pavan Nikhilesh <pbhagavatula@marvell.com>
Optimize testpmd txonly mode by
1. Moving per packet ethernet header copy above the loop.
2. Use bulk ops for allocating segments instead of having a inner loop
for every segment.
Also, move the packet prepare logic into a separate function so that it
can be reused later.
Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
---
v4 Changes:
- Fix packet len calculation.
v3 Changes:
- Split the patches for easier review. (Thomas)
- Remove unnecessary assignments to 0. (Bernard)
v2 Changes:
- Use bulk ops for fetching segments. (Andrew Rybchenko)
- Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
- Fix mbufs not being freed when there is no more mbufs available for
segments. (Andrew Rybchenko)
app/test-pmd/txonly.c | 141 +++++++++++++++++++++++-------------------
1 file changed, 77 insertions(+), 64 deletions(-)
--
2.20.1
Comments
Hi Pavan,
> -----Original Message-----
> From: Pavan Nikhilesh Bhagavatula [mailto:pbhagavatula@marvell.com]
> Sent: Tuesday, March 26, 2019 1:03 PM
> To: Jerin Jacob Kollanukkaran <jerinj@marvell.com>; thomas@monjalon.net;
> arybchenko@solarflare.com; Yigit, Ferruh <ferruh.yigit@intel.com>;
> Iremonger, Bernard <bernard.iremonger@intel.com>
> Cc: dev@dpdk.org; Pavan Nikhilesh Bhagavatula
> <pbhagavatula@marvell.com>
> Subject: [dpdk-dev] [PATCH v4 1/2] app/testpmd: optimize testpmd txonly
> mode
>
> From: Pavan Nikhilesh <pbhagavatula@marvell.com>
>
> Optimize testpmd txonly mode by
> 1. Moving per packet ethernet header copy above the loop.
> 2. Use bulk ops for allocating segments instead of having a inner loop for
> every segment.
>
> Also, move the packet prepare logic into a separate function so that it can be
> reused later.
>
> Signed-off-by: Pavan Nikhilesh <pbhagavatula@marvell.com>
> ---
> v4 Changes:
> - Fix packet len calculation.
>
> v3 Changes:
> - Split the patches for easier review. (Thomas)
> - Remove unnecessary assignments to 0. (Bernard)
>
> v2 Changes:
> - Use bulk ops for fetching segments. (Andrew Rybchenko)
> - Fallback to rte_mbuf_raw_alloc if bulk get fails. (Andrew Rybchenko)
> - Fix mbufs not being freed when there is no more mbufs available for
> segments. (Andrew Rybchenko)
>
> app/test-pmd/txonly.c | 141 +++++++++++++++++++++++-------------------
> 1 file changed, 77 insertions(+), 64 deletions(-)
>
> diff --git a/app/test-pmd/txonly.c b/app/test-pmd/txonly.c index
> 1f08b6ed3..8d49e41b1 100644
> --- a/app/test-pmd/txonly.c
> +++ b/app/test-pmd/txonly.c
> @@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr
> *ip_hdr,
> ip_hdr->hdr_checksum = (uint16_t) ip_cksum; }
>
> +static inline bool
> +pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
> + struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
> + const uint16_t vlan_tci_outer, const uint64_t ol_flags) {
> + struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
> + struct rte_mbuf *pkt_seg;
> + uint32_t nb_segs, pkt_len;
> + uint8_t i;
> +
> + if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
> + nb_segs = random() % tx_pkt_nb_segs + 1;
> + else
> + nb_segs = tx_pkt_nb_segs;
> +
> + if (nb_segs > 1) {
> + if (rte_mempool_get_bulk(mbp, (void **)pkt_segs,
> nb_segs))
> + return false;
> + }
> +
> + rte_pktmbuf_reset_headroom(pkt);
> + pkt->data_len = tx_pkt_seg_lengths[0];
> + pkt->ol_flags = ol_flags;
> + pkt->vlan_tci = vlan_tci;
> + pkt->vlan_tci_outer = vlan_tci_outer;
> + pkt->l2_len = sizeof(struct ether_hdr);
> + pkt->l3_len = sizeof(struct ipv4_hdr);
> +
> + pkt_len = pkt->data_len;
> + pkt_seg = pkt;
> + for (i = 1; i < nb_segs; i++) {
> + pkt_seg->next = pkt_segs[i - 1];
> + pkt_seg = pkt_seg->next;
> + pkt_seg->data_len = tx_pkt_seg_lengths[i];
> + pkt_len += pkt_seg->data_len;
> + }
> + pkt_seg->next = NULL; /* Last segment of packet. */
> + /*
> + * Copy headers in first packet segment(s).
> + */
> + copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
> + copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> + sizeof(struct ether_hdr));
> + copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> + sizeof(struct ether_hdr) +
> + sizeof(struct ipv4_hdr));
> +
> + /*
> + * Complete first mbuf of packet and append it to the
> + * burst of packets to be transmitted.
> + */
> + pkt->nb_segs = nb_segs;
> + pkt->pkt_len = pkt_len;
> +
> + return true;
> +}
> +
> /*
> * Transmit a burst of multi-segments packets.
> */
> @@ -154,9 +211,8 @@ static void
> pkt_burst_transmit(struct fwd_stream *fs) {
> struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
> - struct rte_port *txp;
> struct rte_mbuf *pkt;
> - struct rte_mbuf *pkt_seg;
> + struct rte_port *txp;
Unnecessary change to struct rte_port *txp still there.
> struct rte_mempool *mbp;
> struct ether_hdr eth_hdr;
> uint16_t nb_tx;
> @@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
> uint16_t vlan_tci, vlan_tci_outer;
> uint32_t retry;
> uint64_t ol_flags = 0;
> - uint8_t i;
> uint64_t tx_offloads;
> #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> uint64_t start_tsc;
> uint64_t end_tsc;
> uint64_t core_cycles;
> #endif
> - uint32_t nb_segs, pkt_len;
>
> #ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
> start_tsc = rte_rdtsc();
> @@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
> ol_flags |= PKT_TX_QINQ_PKT;
> if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
> ol_flags |= PKT_TX_MACSEC;
> +
> + /*
> + * Initialize Ethernet header.
> + */
> + ether_addr_copy(&peer_eth_addrs[fs->peer_addr],
> ð_hdr.d_addr);
> + ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr);
> + eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> +
> for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
> pkt = rte_mbuf_raw_alloc(mbp);
> - if (pkt == NULL) {
> - nomore_mbuf:
> - if (nb_pkt == 0)
> - return;
> + if (pkt == NULL)
> + break;
> + if (unlikely(!pkt_burst_prepare(pkt, mbp,
> + ð_hdr, vlan_tci,
> + vlan_tci_outer,
> + ol_flags))) {
> + rte_mempool_put(mbp, pkt);
> break;
> }
> -
> - /*
> - * Using raw alloc is good to improve performance,
> - * but some consumers may use the headroom and so
> - * decrement data_off. We need to make sure it is
> - * reset to default value.
> - */
> - rte_pktmbuf_reset_headroom(pkt);
> - pkt->data_len = tx_pkt_seg_lengths[0];
> - pkt_seg = pkt;
> - if (tx_pkt_split == TX_PKT_SPLIT_RND)
> - nb_segs = random() % tx_pkt_nb_segs + 1;
> - else
> - nb_segs = tx_pkt_nb_segs;
> - pkt_len = pkt->data_len;
> - for (i = 1; i < nb_segs; i++) {
> - pkt_seg->next = rte_mbuf_raw_alloc(mbp);
> - if (pkt_seg->next == NULL) {
> - pkt->nb_segs = i;
> - rte_pktmbuf_free(pkt);
> - goto nomore_mbuf;
> - }
> - pkt_seg = pkt_seg->next;
> - pkt_seg->data_len = tx_pkt_seg_lengths[i];
> - pkt_len += pkt_seg->data_len;
> - }
> - pkt_seg->next = NULL; /* Last segment of packet. */
> -
> - /*
> - * Initialize Ethernet header.
> - */
> - ether_addr_copy(&peer_eth_addrs[fs-
> >peer_addr],ð_hdr.d_addr);
> - ether_addr_copy(&ports[fs->tx_port].eth_addr,
> ð_hdr.s_addr);
> - eth_hdr.ether_type =
> rte_cpu_to_be_16(ETHER_TYPE_IPv4);
> -
> - /*
> - * Copy headers in first packet segment(s).
> - */
> - copy_buf_to_pkt(ð_hdr, sizeof(eth_hdr), pkt, 0);
> - copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
> - sizeof(struct ether_hdr));
> - copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
> - sizeof(struct ether_hdr) +
> - sizeof(struct ipv4_hdr));
> -
> - /*
> - * Complete first mbuf of packet and append it to the
> - * burst of packets to be transmitted.
> - */
> - pkt->nb_segs = nb_segs;
> - pkt->pkt_len = pkt_len;
> - pkt->ol_flags = ol_flags;
> - pkt->vlan_tci = vlan_tci;
> - pkt->vlan_tci_outer = vlan_tci_outer;
> - pkt->l2_len = sizeof(struct ether_hdr);
> - pkt->l3_len = sizeof(struct ipv4_hdr);
> pkts_burst[nb_pkt] = pkt;
> }
> +
> + if (nb_pkt == 0)
> + return;
> +
> nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst,
> nb_pkt);
> /*
> * Retry if necessary
> --
> 2.20.1
Regards,
Bernard.
@@ -147,6 +147,63 @@ setup_pkt_udp_ip_headers(struct ipv4_hdr *ip_hdr,
ip_hdr->hdr_checksum = (uint16_t) ip_cksum;
}
+static inline bool
+pkt_burst_prepare(struct rte_mbuf *pkt, struct rte_mempool *mbp,
+ struct ether_hdr *eth_hdr, const uint16_t vlan_tci,
+ const uint16_t vlan_tci_outer, const uint64_t ol_flags)
+{
+ struct rte_mbuf *pkt_segs[RTE_MAX_SEGS_PER_PKT];
+ struct rte_mbuf *pkt_seg;
+ uint32_t nb_segs, pkt_len;
+ uint8_t i;
+
+ if (unlikely(tx_pkt_split == TX_PKT_SPLIT_RND))
+ nb_segs = random() % tx_pkt_nb_segs + 1;
+ else
+ nb_segs = tx_pkt_nb_segs;
+
+ if (nb_segs > 1) {
+ if (rte_mempool_get_bulk(mbp, (void **)pkt_segs, nb_segs))
+ return false;
+ }
+
+ rte_pktmbuf_reset_headroom(pkt);
+ pkt->data_len = tx_pkt_seg_lengths[0];
+ pkt->ol_flags = ol_flags;
+ pkt->vlan_tci = vlan_tci;
+ pkt->vlan_tci_outer = vlan_tci_outer;
+ pkt->l2_len = sizeof(struct ether_hdr);
+ pkt->l3_len = sizeof(struct ipv4_hdr);
+
+ pkt_len = pkt->data_len;
+ pkt_seg = pkt;
+ for (i = 1; i < nb_segs; i++) {
+ pkt_seg->next = pkt_segs[i - 1];
+ pkt_seg = pkt_seg->next;
+ pkt_seg->data_len = tx_pkt_seg_lengths[i];
+ pkt_len += pkt_seg->data_len;
+ }
+ pkt_seg->next = NULL; /* Last segment of packet. */
+ /*
+ * Copy headers in first packet segment(s).
+ */
+ copy_buf_to_pkt(eth_hdr, sizeof(eth_hdr), pkt, 0);
+ copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
+ sizeof(struct ether_hdr));
+ copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
+ sizeof(struct ether_hdr) +
+ sizeof(struct ipv4_hdr));
+
+ /*
+ * Complete first mbuf of packet and append it to the
+ * burst of packets to be transmitted.
+ */
+ pkt->nb_segs = nb_segs;
+ pkt->pkt_len = pkt_len;
+
+ return true;
+}
+
/*
* Transmit a burst of multi-segments packets.
*/
@@ -154,9 +211,8 @@ static void
pkt_burst_transmit(struct fwd_stream *fs)
{
struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
- struct rte_port *txp;
struct rte_mbuf *pkt;
- struct rte_mbuf *pkt_seg;
+ struct rte_port *txp;
struct rte_mempool *mbp;
struct ether_hdr eth_hdr;
uint16_t nb_tx;
@@ -164,14 +220,12 @@ pkt_burst_transmit(struct fwd_stream *fs)
uint16_t vlan_tci, vlan_tci_outer;
uint32_t retry;
uint64_t ol_flags = 0;
- uint8_t i;
uint64_t tx_offloads;
#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
uint64_t start_tsc;
uint64_t end_tsc;
uint64_t core_cycles;
#endif
- uint32_t nb_segs, pkt_len;
#ifdef RTE_TEST_PMD_RECORD_CORE_CYCLES
start_tsc = rte_rdtsc();
@@ -188,72 +242,31 @@ pkt_burst_transmit(struct fwd_stream *fs)
ol_flags |= PKT_TX_QINQ_PKT;
if (tx_offloads & DEV_TX_OFFLOAD_MACSEC_INSERT)
ol_flags |= PKT_TX_MACSEC;
+
+ /*
+ * Initialize Ethernet header.
+ */
+ ether_addr_copy(&peer_eth_addrs[fs->peer_addr], ð_hdr.d_addr);
+ ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr);
+ eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
for (nb_pkt = 0; nb_pkt < nb_pkt_per_burst; nb_pkt++) {
pkt = rte_mbuf_raw_alloc(mbp);
- if (pkt == NULL) {
- nomore_mbuf:
- if (nb_pkt == 0)
- return;
+ if (pkt == NULL)
+ break;
+ if (unlikely(!pkt_burst_prepare(pkt, mbp,
+ ð_hdr, vlan_tci,
+ vlan_tci_outer,
+ ol_flags))) {
+ rte_mempool_put(mbp, pkt);
break;
}
-
- /*
- * Using raw alloc is good to improve performance,
- * but some consumers may use the headroom and so
- * decrement data_off. We need to make sure it is
- * reset to default value.
- */
- rte_pktmbuf_reset_headroom(pkt);
- pkt->data_len = tx_pkt_seg_lengths[0];
- pkt_seg = pkt;
- if (tx_pkt_split == TX_PKT_SPLIT_RND)
- nb_segs = random() % tx_pkt_nb_segs + 1;
- else
- nb_segs = tx_pkt_nb_segs;
- pkt_len = pkt->data_len;
- for (i = 1; i < nb_segs; i++) {
- pkt_seg->next = rte_mbuf_raw_alloc(mbp);
- if (pkt_seg->next == NULL) {
- pkt->nb_segs = i;
- rte_pktmbuf_free(pkt);
- goto nomore_mbuf;
- }
- pkt_seg = pkt_seg->next;
- pkt_seg->data_len = tx_pkt_seg_lengths[i];
- pkt_len += pkt_seg->data_len;
- }
- pkt_seg->next = NULL; /* Last segment of packet. */
-
- /*
- * Initialize Ethernet header.
- */
- ether_addr_copy(&peer_eth_addrs[fs->peer_addr],ð_hdr.d_addr);
- ether_addr_copy(&ports[fs->tx_port].eth_addr, ð_hdr.s_addr);
- eth_hdr.ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
-
- /*
- * Copy headers in first packet segment(s).
- */
- copy_buf_to_pkt(ð_hdr, sizeof(eth_hdr), pkt, 0);
- copy_buf_to_pkt(&pkt_ip_hdr, sizeof(pkt_ip_hdr), pkt,
- sizeof(struct ether_hdr));
- copy_buf_to_pkt(&pkt_udp_hdr, sizeof(pkt_udp_hdr), pkt,
- sizeof(struct ether_hdr) +
- sizeof(struct ipv4_hdr));
-
- /*
- * Complete first mbuf of packet and append it to the
- * burst of packets to be transmitted.
- */
- pkt->nb_segs = nb_segs;
- pkt->pkt_len = pkt_len;
- pkt->ol_flags = ol_flags;
- pkt->vlan_tci = vlan_tci;
- pkt->vlan_tci_outer = vlan_tci_outer;
- pkt->l2_len = sizeof(struct ether_hdr);
- pkt->l3_len = sizeof(struct ipv4_hdr);
pkts_burst[nb_pkt] = pkt;
}
+
+ if (nb_pkt == 0)
+ return;
+
nb_tx = rte_eth_tx_burst(fs->tx_port, fs->tx_queue, pkts_burst, nb_pkt);
/*
* Retry if necessary