From patchwork Wed Nov 5 01:49:42 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yong Wang X-Patchwork-Id: 1129 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id CAACC7F39; Wed, 5 Nov 2014 02:40:34 +0100 (CET) Received: from smtp-outbound-2.vmware.com (smtp-outbound-2.vmware.com [208.91.2.13]) by dpdk.org (Postfix) with ESMTP id D3F265944 for ; Wed, 5 Nov 2014 02:40:30 +0100 (CET) Received: from sc9-mailhost1.vmware.com (sc9-mailhost1.vmware.com [10.113.161.71]) by smtp-outbound-2.vmware.com (Postfix) with ESMTP id 2202C289F8 for ; Tue, 4 Nov 2014 17:49:47 -0800 (PST) Received: from sc9-mailhost2.vmware.com (unknown [10.32.43.10]) by sc9-mailhost1.vmware.com (Postfix) with ESMTP id 0452B191B1 for ; Tue, 4 Nov 2014 17:49:46 -0800 (PST) From: Yong Wang To: dev@dpdk.org Date: Tue, 4 Nov 2014 17:49:42 -0800 Message-Id: <1415152183-119796-6-git-send-email-yongwang@vmware.com> X-Mailer: git-send-email 1.9.1 In-Reply-To: <1415152183-119796-1-git-send-email-yongwang@vmware.com> References: <1415152183-119796-1-git-send-email-yongwang@vmware.com> Subject: [dpdk-dev] [PATCH v2 5/6] vmxnet3: Perf improvement on the rx path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch includes two small performance optimizations on the rx path: (1) It adds unlikely hints on various infrequent error paths to the compiler to make branch prediction more efficient. (2) It also moves a constant assignment out of the pkt polling loop. This saves one branching per packet. Performance evaluation configs: - On the DPDK-side, it's running some l3 forwarding app inside a VM on ESXi with one core assigned for polling. - On the client side, pktgen/dpdk is used to generate 64B tcp packets at line rate (14.8M PPS). Performance results on a Nehalem box (4cores@2.8GHzx2) shown below. CPU usage is collected factoring out the idle loop cost. - Before the patch, ~900K PPS with 65% CPU of a core used for DPDK. - After the patch, only 45% of a core used, while maintaining the same packet rate. Signed-off-by: Yong Wang --- lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c | 242 ++++++++++++++++------------------ 1 file changed, 116 insertions(+), 126 deletions(-) diff --git a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c index e2fb8a8..4799f4d 100644 --- a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c +++ b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c @@ -451,6 +451,19 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) uint32_t i = 0, val = 0; struct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id]; + if (ring_id == 0) { + /* Usually: One HEAD type buf per packet + * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ? + * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD; + */ + + /* We use single packet buffer so all heads here */ + val = VMXNET3_RXD_BTYPE_HEAD; + } else { + /* All BODY type buffers for 2nd ring */ + val = VMXNET3_RXD_BTYPE_BODY; + } + while (vmxnet3_cmd_ring_desc_avail(ring) > 0) { struct Vmxnet3_RxDesc *rxd; struct rte_mbuf *mbuf; @@ -458,22 +471,9 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id) rxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill); - if (ring->rid == 0) { - /* Usually: One HEAD type buf per packet - * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ? - * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD; - */ - - /* We use single packet buffer so all heads here */ - val = VMXNET3_RXD_BTYPE_HEAD; - } else { - /* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */ - val = VMXNET3_RXD_BTYPE_BODY; - } - /* Allocate blank mbuf for the current Rx Descriptor */ mbuf = rte_rxmbuf_alloc(rxq->mp); - if (mbuf == NULL) { + if (unlikely(mbuf == NULL)) { PMD_RX_LOG(ERR, "Error allocating mbuf in %s", __func__); rxq->stats.rx_buf_alloc_failure++; err = ENOMEM; @@ -536,151 +536,141 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; - if (rxq->stopped) { + if (unlikely(rxq->stopped)) { PMD_RX_LOG(DEBUG, "Rx queue is stopped."); return 0; } while (rcd->gen == rxq->comp_ring.gen) { - if (nb_rx >= nb_pkts) break; + idx = rcd->rxdIdx; ring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1); rxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx; rbi = rxq->cmd_ring[ring_idx].buf_info + idx; - if (rcd->sop != 1 || rcd->eop != 1) { + if (unlikely(rcd->sop != 1 || rcd->eop != 1)) { rte_pktmbuf_free_seg(rbi->m); - PMD_RX_LOG(DEBUG, "Packet spread across multiple buffers\n)"); goto rcd_done; + } - } else { - - PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx); + PMD_RX_LOG(DEBUG, "rxd idx: %d ring idx: %d.", idx, ring_idx); #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER - VMXNET3_ASSERT(rcd->len <= rxd->len); - VMXNET3_ASSERT(rbi->m); + VMXNET3_ASSERT(rcd->len <= rxd->len); + VMXNET3_ASSERT(rbi->m); #endif - if (rcd->len == 0) { - PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)", - ring_idx, idx); + if (unlikely(rcd->len == 0)) { + PMD_RX_LOG(DEBUG, "Rx buf was skipped. rxring[%d][%d]\n)", + ring_idx, idx); #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER - VMXNET3_ASSERT(rcd->sop && rcd->eop); + VMXNET3_ASSERT(rcd->sop && rcd->eop); #endif - rte_pktmbuf_free_seg(rbi->m); - - goto rcd_done; - } + rte_pktmbuf_free_seg(rbi->m); + goto rcd_done; + } - /* Assuming a packet is coming in a single packet buffer */ - if (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) { - PMD_RX_LOG(DEBUG, - "Alert : Misbehaving device, incorrect " - " buffer type used. iPacket dropped."); - rte_pktmbuf_free_seg(rbi->m); - goto rcd_done; - } + /* Assuming a packet is coming in a single packet buffer */ + if (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) { + PMD_RX_LOG(DEBUG, + "Alert : Misbehaving device, incorrect " + " buffer type used. iPacket dropped."); + rte_pktmbuf_free_seg(rbi->m); + goto rcd_done; + } #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER - VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD); + VMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD); #endif - /* Get the packet buffer pointer from buf_info */ - rxm = rbi->m; - - /* Clear descriptor associated buf_info to be reused */ - rbi->m = NULL; - rbi->bufPA = 0; - - /* Update the index that we received a packet */ - rxq->cmd_ring[ring_idx].next2comp = idx; - - /* For RCD with EOP set, check if there is frame error */ - if (rcd->err) { - rxq->stats.drop_total++; - rxq->stats.drop_err++; - - if (!rcd->fcs) { - rxq->stats.drop_fcs++; - PMD_RX_LOG(ERR, "Recv packet dropped due to frame err."); - } - PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d", - (int)(rcd - (struct Vmxnet3_RxCompDesc *) - rxq->comp_ring.base), rcd->rxdIdx); - rte_pktmbuf_free_seg(rxm); - - goto rcd_done; - } + /* Get the packet buffer pointer from buf_info */ + rxm = rbi->m; - /* Check for hardware stripped VLAN tag */ - if (rcd->ts) { - PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.", - rcd->tci); - rxm->ol_flags = PKT_RX_VLAN_PKT; -#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER - VMXNET3_ASSERT(rxm && - rte_pktmbuf_mtod(rxm, void *)); -#endif - /* Copy vlan tag in packet buffer */ - rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci); - } else { - rxm->ol_flags = 0; - rxm->vlan_tci = 0; - } + /* Clear descriptor associated buf_info to be reused */ + rbi->m = NULL; + rbi->bufPA = 0; - /* Initialize newly received packet buffer */ - rxm->port = rxq->port_id; - rxm->nb_segs = 1; - rxm->next = NULL; - rxm->pkt_len = (uint16_t)rcd->len; - rxm->data_len = (uint16_t)rcd->len; - rxm->port = rxq->port_id; - rxm->data_off = RTE_PKTMBUF_HEADROOM; - - /* Check packet types, rx checksum errors, etc. Only support IPv4 so far. */ - if (rcd->v4) { - struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *); - struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1); - - if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr)) - rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT; - else - rxm->ol_flags |= PKT_RX_IPV4_HDR; - - if (!rcd->cnc) { - if (!rcd->ipc) - rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; - - if ((rcd->tcp || rcd->udp) && !rcd->tuc) - rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; - } - } + /* Update the index that we received a packet */ + rxq->cmd_ring[ring_idx].next2comp = idx; - rx_pkts[nb_rx++] = rxm; -rcd_done: - rxq->cmd_ring[ring_idx].next2comp = idx; - VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size); + /* For RCD with EOP set, check if there is frame error */ + if (unlikely(rcd->err)) { + rxq->stats.drop_total++; + rxq->stats.drop_err++; - /* It's time to allocate some new buf and renew descriptors */ - vmxnet3_post_rx_bufs(rxq, ring_idx); - if (unlikely(rxq->shared->ctrl.updateRxProd)) { - VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), - rxq->cmd_ring[ring_idx].next2fill); + if (!rcd->fcs) { + rxq->stats.drop_fcs++; + PMD_RX_LOG(ERR, "Recv packet dropped due to frame err."); } + PMD_RX_LOG(ERR, "Error in received packet rcd#:%d rxd:%d", + (int)(rcd - (struct Vmxnet3_RxCompDesc *) + rxq->comp_ring.base), rcd->rxdIdx); + rte_pktmbuf_free_seg(rxm); + goto rcd_done; + } - /* Advance to the next descriptor in comp_ring */ - vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring); + /* Check for hardware stripped VLAN tag */ + if (rcd->ts) { + PMD_RX_LOG(DEBUG, "Received packet with vlan ID: %d.", + rcd->tci); + rxm->ol_flags = PKT_RX_VLAN_PKT; + /* Copy vlan tag in packet buffer */ + rxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci); + } else { + rxm->ol_flags = 0; + rxm->vlan_tci = 0; + } - rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; - nb_rxd++; - if (nb_rxd > rxq->cmd_ring[0].size) { - PMD_RX_LOG(ERR, - "Used up quota of receiving packets," - " relinquish control."); - break; + /* Initialize newly received packet buffer */ + rxm->port = rxq->port_id; + rxm->nb_segs = 1; + rxm->next = NULL; + rxm->pkt_len = (uint16_t)rcd->len; + rxm->data_len = (uint16_t)rcd->len; + rxm->data_off = RTE_PKTMBUF_HEADROOM; + + /* Check packet type, checksum errors, etc. Only support IPv4 for now. */ + if (rcd->v4) { + struct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *); + struct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1); + + if (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr)) + rxm->ol_flags |= PKT_RX_IPV4_HDR_EXT; + else + rxm->ol_flags |= PKT_RX_IPV4_HDR; + + if (!rcd->cnc) { + if (!rcd->ipc) + rxm->ol_flags |= PKT_RX_IP_CKSUM_BAD; + + if ((rcd->tcp || rcd->udp) && !rcd->tuc) + rxm->ol_flags |= PKT_RX_L4_CKSUM_BAD; } } + + rx_pkts[nb_rx++] = rxm; +rcd_done: + rxq->cmd_ring[ring_idx].next2comp = idx; + VMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size); + + /* It's time to allocate some new buf and renew descriptors */ + vmxnet3_post_rx_bufs(rxq, ring_idx); + if (unlikely(rxq->shared->ctrl.updateRxProd)) { + VMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN), + rxq->cmd_ring[ring_idx].next2fill); + } + + /* Advance to the next descriptor in comp_ring */ + vmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring); + + rcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd; + nb_rxd++; + if (nb_rxd > rxq->cmd_ring[0].size) { + PMD_RX_LOG(ERR, + "Used up quota of receiving packets," + " relinquish control."); + break; + } } return nb_rx;