get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/1129/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 1129,
    "url": "https://patches.dpdk.org/api/patches/1129/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/1415152183-119796-6-git-send-email-yongwang@vmware.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1415152183-119796-6-git-send-email-yongwang@vmware.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1415152183-119796-6-git-send-email-yongwang@vmware.com",
    "date": "2014-11-05T01:49:42",
    "name": "[dpdk-dev,v2,5/6] vmxnet3: Perf improvement on the rx path",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "552cf9aa33a93bd3daf1a020c95ad3491c472bd6",
    "submitter": {
        "id": 93,
        "url": "https://patches.dpdk.org/api/people/93/?format=api",
        "name": "Yong Wang",
        "email": "yongwang@vmware.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/1415152183-119796-6-git-send-email-yongwang@vmware.com/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/1129/comments/",
    "check": "pending",
    "checks": "https://patches.dpdk.org/api/patches/1129/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id CAACC7F39;\n\tWed,  5 Nov 2014 02:40:34 +0100 (CET)",
            "from smtp-outbound-2.vmware.com (smtp-outbound-2.vmware.com\n\t[208.91.2.13]) by dpdk.org (Postfix) with ESMTP id D3F265944\n\tfor <dev@dpdk.org>; Wed,  5 Nov 2014 02:40:30 +0100 (CET)",
            "from sc9-mailhost1.vmware.com (sc9-mailhost1.vmware.com\n\t[10.113.161.71])\n\tby smtp-outbound-2.vmware.com (Postfix) with ESMTP id 2202C289F8\n\tfor <dev@dpdk.org>; Tue,  4 Nov 2014 17:49:47 -0800 (PST)",
            "from sc9-mailhost2.vmware.com (unknown [10.32.43.10])\n\tby sc9-mailhost1.vmware.com (Postfix) with ESMTP id 0452B191B1\n\tfor <dev@dpdk.org>; Tue,  4 Nov 2014 17:49:46 -0800 (PST)"
        ],
        "From": "Yong Wang <yongwang@vmware.com>",
        "To": "dev@dpdk.org",
        "Date": "Tue,  4 Nov 2014 17:49:42 -0800",
        "Message-Id": "<1415152183-119796-6-git-send-email-yongwang@vmware.com>",
        "X-Mailer": "git-send-email 1.9.1",
        "In-Reply-To": "<1415152183-119796-1-git-send-email-yongwang@vmware.com>",
        "References": "<1415152183-119796-1-git-send-email-yongwang@vmware.com>",
        "Subject": "[dpdk-dev] [PATCH v2 5/6] vmxnet3: Perf improvement on the rx path",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "This patch includes two small performance optimizations\non the rx path:\n\n(1) It adds unlikely hints on various infrequent error\npaths to the compiler to make branch prediction more\nefficient.\n\n(2) It also moves a constant assignment out of the pkt\npolling loop.  This saves one branching per packet.\n\nPerformance evaluation configs:\n- On the DPDK-side, it's running some l3 forwarding app\ninside a VM on ESXi with one core assigned for polling.\n- On the client side, pktgen/dpdk is used to generate\n64B tcp packets at line rate (14.8M PPS).\n\nPerformance results on a Nehalem box (4cores@2.8GHzx2)\nshown below.  CPU usage is collected factoring out the\nidle loop cost.\n- Before the patch, ~900K PPS with 65% CPU of a core\nused for DPDK.\n- After the patch, only 45% of a core used, while\nmaintaining the same packet rate.\n\nSigned-off-by: Yong Wang <yongwang@vmware.com>\n---\n lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c | 242 ++++++++++++++++------------------\n 1 file changed, 116 insertions(+), 126 deletions(-)",
    "diff": "diff --git a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c\nindex e2fb8a8..4799f4d 100644\n--- a/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c\n+++ b/lib/librte_pmd_vmxnet3/vmxnet3_rxtx.c\n@@ -451,6 +451,19 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)\n \tuint32_t i = 0, val = 0;\n \tstruct vmxnet3_cmd_ring *ring = &rxq->cmd_ring[ring_id];\n \n+\tif (ring_id == 0) {\n+\t\t/* Usually: One HEAD type buf per packet\n+\t\t * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?\n+\t\t * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;\n+\t\t */\n+\n+\t\t/* We use single packet buffer so all heads here */\n+\t\tval = VMXNET3_RXD_BTYPE_HEAD;\n+\t} else {\n+\t\t/* All BODY type buffers for 2nd ring */\n+\t\tval = VMXNET3_RXD_BTYPE_BODY;\n+\t}\n+\n \twhile (vmxnet3_cmd_ring_desc_avail(ring) > 0) {\n \t\tstruct Vmxnet3_RxDesc *rxd;\n \t\tstruct rte_mbuf *mbuf;\n@@ -458,22 +471,9 @@ vmxnet3_post_rx_bufs(vmxnet3_rx_queue_t *rxq, uint8_t ring_id)\n \n \t\trxd = (struct Vmxnet3_RxDesc *)(ring->base + ring->next2fill);\n \n-\t\tif (ring->rid == 0) {\n-\t\t\t/* Usually: One HEAD type buf per packet\n-\t\t\t * val = (ring->next2fill % rxq->hw->bufs_per_pkt) ?\n-\t\t\t * VMXNET3_RXD_BTYPE_BODY : VMXNET3_RXD_BTYPE_HEAD;\n-\t\t\t */\n-\n-\t\t\t/* We use single packet buffer so all heads here */\n-\t\t\tval = VMXNET3_RXD_BTYPE_HEAD;\n-\t\t} else {\n-\t\t\t/* All BODY type buffers for 2nd ring; which won't be used at all by ESXi */\n-\t\t\tval = VMXNET3_RXD_BTYPE_BODY;\n-\t\t}\n-\n \t\t/* Allocate blank mbuf for the current Rx Descriptor */\n \t\tmbuf = rte_rxmbuf_alloc(rxq->mp);\n-\t\tif (mbuf == NULL) {\n+\t\tif (unlikely(mbuf == NULL)) {\n \t\t\tPMD_RX_LOG(ERR, \"Error allocating mbuf in %s\", __func__);\n \t\t\trxq->stats.rx_buf_alloc_failure++;\n \t\t\terr = ENOMEM;\n@@ -536,151 +536,141 @@ vmxnet3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n \n \trcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;\n \n-\tif (rxq->stopped) {\n+\tif (unlikely(rxq->stopped)) {\n \t\tPMD_RX_LOG(DEBUG, \"Rx queue is stopped.\");\n \t\treturn 0;\n \t}\n \n \twhile (rcd->gen == rxq->comp_ring.gen) {\n-\n \t\tif (nb_rx >= nb_pkts)\n \t\t\tbreak;\n+\n \t\tidx = rcd->rxdIdx;\n \t\tring_idx = (uint8_t)((rcd->rqID == rxq->qid1) ? 0 : 1);\n \t\trxd = (Vmxnet3_RxDesc *)rxq->cmd_ring[ring_idx].base + idx;\n \t\trbi = rxq->cmd_ring[ring_idx].buf_info + idx;\n \n-\t\tif (rcd->sop != 1 || rcd->eop != 1) {\n+\t\tif (unlikely(rcd->sop != 1 || rcd->eop != 1)) {\n \t\t\trte_pktmbuf_free_seg(rbi->m);\n-\n \t\t\tPMD_RX_LOG(DEBUG, \"Packet spread across multiple buffers\\n)\");\n \t\t\tgoto rcd_done;\n+\t\t}\n \n-\t\t} else {\n-\n-\t\t\tPMD_RX_LOG(DEBUG, \"rxd idx: %d ring idx: %d.\", idx, ring_idx);\n+\t\tPMD_RX_LOG(DEBUG, \"rxd idx: %d ring idx: %d.\", idx, ring_idx);\n \n #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER\n-\t\t\tVMXNET3_ASSERT(rcd->len <= rxd->len);\n-\t\t\tVMXNET3_ASSERT(rbi->m);\n+\t\tVMXNET3_ASSERT(rcd->len <= rxd->len);\n+\t\tVMXNET3_ASSERT(rbi->m);\n #endif\n-\t\t\tif (rcd->len == 0) {\n-\t\t\t\tPMD_RX_LOG(DEBUG, \"Rx buf was skipped. rxring[%d][%d]\\n)\",\n-\t\t\t\t\t   ring_idx, idx);\n+\t\tif (unlikely(rcd->len == 0)) {\n+\t\t\tPMD_RX_LOG(DEBUG, \"Rx buf was skipped. rxring[%d][%d]\\n)\",\n+\t\t\t\t   ring_idx, idx);\n #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER\n-\t\t\t\tVMXNET3_ASSERT(rcd->sop && rcd->eop);\n+\t\t\tVMXNET3_ASSERT(rcd->sop && rcd->eop);\n #endif\n-\t\t\t\trte_pktmbuf_free_seg(rbi->m);\n-\n-\t\t\t\tgoto rcd_done;\n-\t\t\t}\n+\t\t\trte_pktmbuf_free_seg(rbi->m);\n+\t\t\tgoto rcd_done;\n+\t\t}\n \n-\t\t\t/* Assuming a packet is coming in a single packet buffer */\n-\t\t\tif (rxd->btype != VMXNET3_RXD_BTYPE_HEAD) {\n-\t\t\t\tPMD_RX_LOG(DEBUG,\n-\t\t\t\t\t   \"Alert : Misbehaving device, incorrect \"\n-\t\t\t\t\t   \" buffer type used. iPacket dropped.\");\n-\t\t\t\trte_pktmbuf_free_seg(rbi->m);\n-\t\t\t\tgoto rcd_done;\n-\t\t\t}\n+\t\t/* Assuming a packet is coming in a single packet buffer */\n+\t\tif (unlikely(rxd->btype != VMXNET3_RXD_BTYPE_HEAD)) {\n+\t\t\tPMD_RX_LOG(DEBUG,\n+\t\t\t\t   \"Alert : Misbehaving device, incorrect \"\n+\t\t\t\t   \" buffer type used. iPacket dropped.\");\n+\t\t\trte_pktmbuf_free_seg(rbi->m);\n+\t\t\tgoto rcd_done;\n+\t\t}\n #ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER\n-\t\t\tVMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);\n+\t\tVMXNET3_ASSERT(rxd->btype == VMXNET3_RXD_BTYPE_HEAD);\n #endif\n-\t\t\t/* Get the packet buffer pointer from buf_info */\n-\t\t\trxm = rbi->m;\n-\n-\t\t\t/* Clear descriptor associated buf_info to be reused */\n-\t\t\trbi->m = NULL;\n-\t\t\trbi->bufPA = 0;\n-\n-\t\t\t/* Update the index that we received a packet */\n-\t\t\trxq->cmd_ring[ring_idx].next2comp = idx;\n-\n-\t\t\t/* For RCD with EOP set, check if there is frame error */\n-\t\t\tif (rcd->err) {\n-\t\t\t\trxq->stats.drop_total++;\n-\t\t\t\trxq->stats.drop_err++;\n-\n-\t\t\t\tif (!rcd->fcs) {\n-\t\t\t\t\trxq->stats.drop_fcs++;\n-\t\t\t\t\tPMD_RX_LOG(ERR, \"Recv packet dropped due to frame err.\");\n-\t\t\t\t}\n-\t\t\t\tPMD_RX_LOG(ERR, \"Error in received packet rcd#:%d rxd:%d\",\n-\t\t\t\t\t   (int)(rcd - (struct Vmxnet3_RxCompDesc *)\n-\t\t\t\t\t\t rxq->comp_ring.base), rcd->rxdIdx);\n-\t\t\t\trte_pktmbuf_free_seg(rxm);\n-\n-\t\t\t\tgoto rcd_done;\n-\t\t\t}\n+\t\t/* Get the packet buffer pointer from buf_info */\n+\t\trxm = rbi->m;\n \n-\t\t\t/* Check for hardware stripped VLAN tag */\n-\t\t\tif (rcd->ts) {\n-\t\t\t\tPMD_RX_LOG(DEBUG, \"Received packet with vlan ID: %d.\",\n-\t\t\t\t\t   rcd->tci);\n-\t\t\t\trxm->ol_flags = PKT_RX_VLAN_PKT;\n-#ifdef RTE_LIBRTE_VMXNET3_DEBUG_DRIVER\n-\t\t\t\tVMXNET3_ASSERT(rxm &&\n-\t\t\t\t\t       rte_pktmbuf_mtod(rxm, void *));\n-#endif\n-\t\t\t\t/* Copy vlan tag in packet buffer */\n-\t\t\t\trxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);\n-\t\t\t} else {\n-\t\t\t\trxm->ol_flags = 0;\n-\t\t\t\trxm->vlan_tci = 0;\n-\t\t\t}\n+\t\t/* Clear descriptor associated buf_info to be reused */\n+\t\trbi->m = NULL;\n+\t\trbi->bufPA = 0;\n \n-\t\t\t/* Initialize newly received packet buffer */\n-\t\t\trxm->port = rxq->port_id;\n-\t\t\trxm->nb_segs = 1;\n-\t\t\trxm->next = NULL;\n-\t\t\trxm->pkt_len = (uint16_t)rcd->len;\n-\t\t\trxm->data_len = (uint16_t)rcd->len;\n-\t\t\trxm->port = rxq->port_id;\n-\t\t\trxm->data_off = RTE_PKTMBUF_HEADROOM;\n-\n-\t\t\t/* Check packet types, rx checksum errors, etc. Only support IPv4 so far. */\n-\t\t\tif (rcd->v4) {\n-\t\t\t\tstruct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);\n-\t\t\t\tstruct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);\n-\n-\t\t\t\tif (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))\n-\t\t\t\t\trxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;\n-\t\t\t\telse\n-\t\t\t\t\trxm->ol_flags |= PKT_RX_IPV4_HDR;\n-\n-\t\t\t\tif (!rcd->cnc) {\n-\t\t\t\t\tif (!rcd->ipc)\n-\t\t\t\t\t\trxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;\n-\n-\t\t\t\t\tif ((rcd->tcp || rcd->udp) && !rcd->tuc)\n-\t\t\t\t\t\trxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;\n-\t\t\t\t}\n-\t\t\t}\n+\t\t/* Update the index that we received a packet */\n+\t\trxq->cmd_ring[ring_idx].next2comp = idx;\n \n-\t\t\trx_pkts[nb_rx++] = rxm;\n-rcd_done:\n-\t\t\trxq->cmd_ring[ring_idx].next2comp = idx;\n-\t\t\tVMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);\n+\t\t/* For RCD with EOP set, check if there is frame error */\n+\t\tif (unlikely(rcd->err)) {\n+\t\t\trxq->stats.drop_total++;\n+\t\t\trxq->stats.drop_err++;\n \n-\t\t\t/* It's time to allocate some new buf and renew descriptors */\n-\t\t\tvmxnet3_post_rx_bufs(rxq, ring_idx);\n-\t\t\tif (unlikely(rxq->shared->ctrl.updateRxProd)) {\n-\t\t\t\tVMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),\n-\t\t\t\t\t\t       rxq->cmd_ring[ring_idx].next2fill);\n+\t\t\tif (!rcd->fcs) {\n+\t\t\t\trxq->stats.drop_fcs++;\n+\t\t\t\tPMD_RX_LOG(ERR, \"Recv packet dropped due to frame err.\");\n \t\t\t}\n+\t\t\tPMD_RX_LOG(ERR, \"Error in received packet rcd#:%d rxd:%d\",\n+\t\t\t\t   (int)(rcd - (struct Vmxnet3_RxCompDesc *)\n+\t\t\t\t\t rxq->comp_ring.base), rcd->rxdIdx);\n+\t\t\trte_pktmbuf_free_seg(rxm);\n+\t\t\tgoto rcd_done;\n+\t\t}\n \n-\t\t\t/* Advance to the next descriptor in comp_ring */\n-\t\t\tvmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);\n+\t\t/* Check for hardware stripped VLAN tag */\n+\t\tif (rcd->ts) {\n+\t\t\tPMD_RX_LOG(DEBUG, \"Received packet with vlan ID: %d.\",\n+\t\t\t\t   rcd->tci);\n+\t\t\trxm->ol_flags = PKT_RX_VLAN_PKT;\n+\t\t\t/* Copy vlan tag in packet buffer */\n+\t\t\trxm->vlan_tci = rte_le_to_cpu_16((uint16_t)rcd->tci);\n+\t\t} else {\n+\t\t\trxm->ol_flags = 0;\n+\t\t\trxm->vlan_tci = 0;\n+\t\t}\n \n-\t\t\trcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;\n-\t\t\tnb_rxd++;\n-\t\t\tif (nb_rxd > rxq->cmd_ring[0].size) {\n-\t\t\t\tPMD_RX_LOG(ERR,\n-\t\t\t\t\t   \"Used up quota of receiving packets,\"\n-\t\t\t\t\t   \" relinquish control.\");\n-\t\t\t\tbreak;\n+\t\t/* Initialize newly received packet buffer */\n+\t\trxm->port = rxq->port_id;\n+\t\trxm->nb_segs = 1;\n+\t\trxm->next = NULL;\n+\t\trxm->pkt_len = (uint16_t)rcd->len;\n+\t\trxm->data_len = (uint16_t)rcd->len;\n+\t\trxm->data_off = RTE_PKTMBUF_HEADROOM;\n+\n+\t\t/* Check packet type, checksum errors, etc. Only support IPv4 for now. */\n+\t\tif (rcd->v4) {\n+\t\t\tstruct ether_hdr *eth = rte_pktmbuf_mtod(rxm, struct ether_hdr *);\n+\t\t\tstruct ipv4_hdr *ip = (struct ipv4_hdr *)(eth + 1);\n+\n+\t\t\tif (((ip->version_ihl & 0xf) << 2) > (int)sizeof(struct ipv4_hdr))\n+\t\t\t\trxm->ol_flags |= PKT_RX_IPV4_HDR_EXT;\n+\t\t\telse\n+\t\t\t\trxm->ol_flags |= PKT_RX_IPV4_HDR;\n+\n+\t\t\tif (!rcd->cnc) {\n+\t\t\t\tif (!rcd->ipc)\n+\t\t\t\t\trxm->ol_flags |= PKT_RX_IP_CKSUM_BAD;\n+\n+\t\t\t\tif ((rcd->tcp || rcd->udp) && !rcd->tuc)\n+\t\t\t\t\trxm->ol_flags |= PKT_RX_L4_CKSUM_BAD;\n \t\t\t}\n \t\t}\n+\n+\t\trx_pkts[nb_rx++] = rxm;\n+rcd_done:\n+\t\trxq->cmd_ring[ring_idx].next2comp = idx;\n+\t\tVMXNET3_INC_RING_IDX_ONLY(rxq->cmd_ring[ring_idx].next2comp, rxq->cmd_ring[ring_idx].size);\n+\n+\t\t/* It's time to allocate some new buf and renew descriptors */\n+\t\tvmxnet3_post_rx_bufs(rxq, ring_idx);\n+\t\tif (unlikely(rxq->shared->ctrl.updateRxProd)) {\n+\t\t\tVMXNET3_WRITE_BAR0_REG(hw, rxprod_reg[ring_idx] + (rxq->queue_id * VMXNET3_REG_ALIGN),\n+\t\t\t\t\t       rxq->cmd_ring[ring_idx].next2fill);\n+\t\t}\n+\n+\t\t/* Advance to the next descriptor in comp_ring */\n+\t\tvmxnet3_comp_ring_adv_next2proc(&rxq->comp_ring);\n+\n+\t\trcd = &rxq->comp_ring.base[rxq->comp_ring.next2proc].rcd;\n+\t\tnb_rxd++;\n+\t\tif (nb_rxd > rxq->cmd_ring[0].size) {\n+\t\t\tPMD_RX_LOG(ERR,\n+\t\t\t\t   \"Used up quota of receiving packets,\"\n+\t\t\t\t   \" relinquish control.\");\n+\t\t\tbreak;\n+\t\t}\n \t}\n \n \treturn nb_rx;\n",
    "prefixes": [
        "dpdk-dev",
        "v2",
        "5/6"
    ]
}