Show a patch.

GET /api/patches/144/
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 144,
    "url": "https://patches.dpdk.org/api/patches/144/",
    "web_url": "https://patches.dpdk.org/patch/144/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk"
    },
    "msgid": "<1407789890-17355-10-git-send-email-bruce.richardson@intel.com>",
    "date": "2014-08-11T20:44:45",
    "name": "[dpdk-dev,RFC,09/14] Fix performance regression due to moved pool ptr",
    "commit_ref": "",
    "pull_url": "",
    "state": "rfc",
    "archived": true,
    "hash": "bd3122740e003754548d52cd4411dc40542b2c39",
    "submitter": {
        "id": 20,
        "url": "https://patches.dpdk.org/api/people/20/",
        "name": "Bruce Richardson",
        "email": "bruce.richardson@intel.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/patch/144/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/144/comments/",
    "check": "pending",
    "checks": "https://patches.dpdk.org/api/patches/144/checks/",
    "tags": {},
    "headers": {
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\r\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Date": "Mon, 11 Aug 2014 21:44:45 +0100",
        "In-Reply-To": "<1407789890-17355-1-git-send-email-bruce.richardson@intel.com>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "X-BeenThere": "dev@dpdk.org",
        "References": "<1407789890-17355-1-git-send-email-bruce.richardson@intel.com>",
        "X-Mailer": "git-send-email 1.7.0.7",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "To": "dev@dpdk.org",
        "Received": [
            "from mga09.intel.com (mga09.intel.com [134.134.136.24])\r\n\tby dpdk.org (Postfix) with ESMTP id 4C2B0B399\r\n\tfor <dev@dpdk.org>; Mon, 11 Aug 2014 22:44:07 +0200 (CEST)",
            "from orsmga002.jf.intel.com ([10.7.209.21])\r\n\tby orsmga102.jf.intel.com with ESMTP; 11 Aug 2014 13:41:07 -0700",
            "from irvmail001.ir.intel.com ([163.33.26.43])\r\n\tby orsmga002.jf.intel.com with ESMTP; 11 Aug 2014 13:44:53 -0700",
            "from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com\r\n\t[10.237.217.46])\r\n\tby irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\r\n\ts7BKiqMx020958; Mon, 11 Aug 2014 21:44:52 +0100",
            "from sivswdev02.ir.intel.com (localhost [127.0.0.1])\r\n\tby sivswdev02.ir.intel.com with ESMTP id s7BKiqjv017702;\r\n\tMon, 11 Aug 2014 21:44:52 +0100",
            "(from bricha3@localhost)\r\n\tby sivswdev02.ir.intel.com with  id s7BKiqlk017698;\r\n\tMon, 11 Aug 2014 21:44:52 +0100"
        ],
        "From": "Bruce Richardson <bruce.richardson@intel.com>",
        "Precedence": "list",
        "X-ExtLoop1": "1",
        "Message-Id": "<1407789890-17355-10-git-send-email-bruce.richardson@intel.com>",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "Subject": "[dpdk-dev] [RFC PATCH 09/14] Fix performance regression due to\r\n\tmoved pool ptr",
        "X-List-Received-Date": "Mon, 11 Aug 2014 20:44:08 -0000",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\r\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "Return-Path": "<bricha3@ecsmtp.ir.intel.com>",
        "X-IronPort-AV": "E=Sophos;i=\"5.01,844,1400050800\"; d=\"scan'208\";a=\"586659693\"",
        "List-Post": "<mailto:dev@dpdk.org>",
        "X-Mailman-Version": "2.1.15"
    },
    "content": "Adjust the fast-path code to fix the regression caused by the pool\npointer moving to the second cache line. This change adjusts the\nprefetching and also the way in which the mbufs are freed back to the\nmempool.\n\nSigned-off-by: Bruce Richardson <bruce.richardson@intel.com>\n---\n lib/librte_pmd_ixgbe/ixgbe_rxtx.c     | 23 +++++----\n lib/librte_pmd_ixgbe/ixgbe_rxtx.h     | 12 -----\n lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 93 ++++++++++++++---------------------\n 3 files changed, 47 insertions(+), 81 deletions(-)",
    "diff": "diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\nindex 1b0e272..fa3b357 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\n@@ -142,10 +142,6 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n \t */\r\n \ttxep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);\r\n \r\n-\t/* prefetch the mbufs that are about to be freed */\r\n-\tfor (i = 0; i < txq->tx_rs_thresh; ++i)\r\n-\t\trte_prefetch0((txep + i)->mbuf);\r\n-\r\n \t/* free buffers one at a time */\r\n \tif ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {\r\n \t\tfor (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {\r\n@@ -186,6 +182,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)\r\n \t\t\t\t((uint32_t)DCMD_DTYP_FLAGS | pkt_len);\r\n \t\ttxdp->read.olinfo_status =\r\n \t\t\t\t(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);\r\n+\t\trte_prefetch0(&(*pkts)->pool);\r\n \t}\r\n }\r\n \r\n@@ -205,6 +202,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)\r\n \t\t\t((uint32_t)DCMD_DTYP_FLAGS | pkt_len);\r\n \ttxdp->read.olinfo_status =\r\n \t\t\t(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);\r\n+\trte_prefetch0(&(*pkts)->pool);\r\n }\r\n \r\n /*\r\n@@ -252,14 +250,6 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \tvolatile union ixgbe_adv_tx_desc *tx_r = txq->tx_ring;\r\n \tuint16_t n = 0;\r\n \r\n-\t/*\r\n-\t * Begin scanning the H/W ring for done descriptors when the\r\n-\t * number of available descriptors drops below tx_free_thresh.  For\r\n-\t * each done descriptor, free the associated buffer.\r\n-\t */\r\n-\tif (txq->nb_tx_free < txq->tx_free_thresh)\r\n-\t\tixgbe_tx_free_bufs(txq);\r\n-\r\n \t/* Only use descriptors that are available */\r\n \tnb_pkts = (uint16_t)RTE_MIN(txq->nb_tx_free, nb_pkts);\r\n \tif (unlikely(nb_pkts == 0))\r\n@@ -323,6 +313,15 @@ tx_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \tif (txq->tx_tail >= txq->nb_tx_desc)\r\n \t\ttxq->tx_tail = 0;\r\n \r\n+\t/*\r\n+\t * Begin scanning the H/W ring for done descriptors when the\r\n+\t * number of available descriptors drops below tx_free_thresh.  For\r\n+\t * each done descriptor, free the associated buffer.\r\n+\t */\r\n+\tif (txq->nb_tx_free < txq->tx_free_thresh)\r\n+\t\tixgbe_tx_free_bufs(txq);\r\n+\r\n+\r\n \t/* update tail pointer */\r\n \trte_wmb();\r\n \tIXGBE_PCI_REG_WRITE(txq->tdt_reg_addr, txq->tx_tail);\r\ndiff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\nindex 1861f18..d9889d9 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\n@@ -96,14 +96,6 @@ struct igb_tx_entry_v {\r\n };\r\n \r\n /**\r\n- * continuous entry sequence, gather by the same mempool\r\n- */\r\n-struct igb_tx_entry_seq {\r\n-\tconst struct rte_mempool* pool;\r\n-\tuint32_t same_pool;\r\n-};\r\n-\r\n-/**\r\n  * Structure associated with each RX queue.\r\n  */\r\n struct igb_rx_queue {\r\n@@ -170,10 +162,6 @@ struct igb_tx_queue {\r\n \tvolatile union ixgbe_adv_tx_desc *tx_ring;\r\n \tuint64_t            tx_ring_phys_addr; /**< TX ring DMA address. */\r\n \tstruct igb_tx_entry *sw_ring;      /**< virtual address of SW ring. */\r\n-#ifdef RTE_IXGBE_INC_VECTOR\r\n-\t/** continuous tx entry sequence within the same mempool */\r\n-\tstruct igb_tx_entry_seq *sw_ring_seq;\r\n-#endif\r\n \tvolatile uint32_t   *tdt_reg_addr; /**< Address of TDT register. */\r\n \tuint16_t            nb_tx_desc;    /**< number of TX descriptors. */\r\n \tuint16_t            tx_tail;       /**< current value of TDT reg. */\r\ndiff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\nindex 780bf1e..c98356e 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\n@@ -365,9 +365,8 @@ static inline int __attribute__((always_inline))\r\n ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n {\r\n \tstruct igb_tx_entry_v *txep;\r\n-\tstruct igb_tx_entry_seq *txsp;\r\n \tuint32_t status;\r\n-\tuint32_t n, k;\r\n+\tuint32_t n;\r\n #ifdef RTE_MBUF_REFCNT\r\n \tuint32_t i;\r\n \tint nb_free = 0;\r\n@@ -387,25 +386,42 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n \t */\r\n \ttxep = &((struct igb_tx_entry_v *)txq->sw_ring)[txq->tx_next_dd -\r\n \t\t\t(n - 1)];\r\n-\ttxsp = &txq->sw_ring_seq[txq->tx_next_dd - (n - 1)];\r\n-\r\n-\twhile (n > 0) {\r\n-\t\tk = RTE_MIN(n, txsp[n-1].same_pool);\r\n #ifdef RTE_MBUF_REFCNT\r\n-\t\tfor (i = 0; i < k; i++) {\r\n-\t\t\tm = __rte_pktmbuf_prefree_seg((txep+n-k+i)->mbuf);\r\n+\r\n+\tm = __rte_pktmbuf_prefree_seg(txep[0].mbuf);\r\n+\tif (likely(m != NULL)) {\r\n+\t\tfree[0] = m;\r\n+\t\tnb_free = 1;\r\n+\t\tfor (i = 1; i < n; i++) {\r\n+\t\t\tm = __rte_pktmbuf_prefree_seg(txep[i].mbuf);\r\n+\t\t\tif (likely(m != NULL)) {\r\n+\t\t\t\tif (likely(m->pool == free[0]->pool))\r\n+\t\t\t\t\tfree[nb_free++] = m;\r\n+\t\t\t\telse {\r\n+\t\t\t\t\trte_mempool_put_bulk(free[0]->pool,\r\n+\t\t\t\t\t\t\t(void *)free, nb_free);\r\n+\t\t\t\t\tfree[0] = m;\r\n+\t\t\t\t\tnb_free = 1;\r\n+\t\t\t\t}\r\n+\t\t\t}\r\n+\t\t}\r\n+\t\trte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);\r\n+\t}\r\n+\telse {\r\n+\t\tfor (i = 1; i < n; i++) {\r\n+\t\t\tm = __rte_pktmbuf_prefree_seg(txep[i].mbuf);\r\n \t\t\tif (m != NULL)\r\n-\t\t\t\tfree[nb_free++] = m;\r\n+\t\t\t\trte_mempool_put(m->pool, m);\r\n \t\t}\r\n-\t\trte_mempool_put_bulk((void *)txsp[n-1].pool,\r\n-\t\t\t\t(void **)free, nb_free);\r\n-#else\r\n-\t\trte_mempool_put_bulk((void *)txsp[n-1].pool,\r\n-\t\t\t\t(void **)(txep+n-k), k);\r\n-#endif\r\n-\t\tn -= k;\r\n \t}\r\n \r\n+#else /* no scatter_gather */\r\n+\tfor (i = 0; i < n; i++) {\r\n+\t\tm = txep[i]->mbuf;\r\n+\t\trte_mempool_put(m->pool,m);\r\n+\t}\r\n+#endif /* scatter_gather */\r\n+\r\n \t/* buffers were freed, update counters */\r\n \ttxq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);\r\n \ttxq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);\r\n@@ -417,19 +433,11 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n \r\n static inline void __attribute__((always_inline))\r\n tx_backlog_entry(struct igb_tx_entry_v *txep,\r\n-\t\t struct igb_tx_entry_seq *txsp,\r\n \t\t struct rte_mbuf **tx_pkts, uint16_t nb_pkts)\r\n {\r\n \tint i;\r\n-\tfor (i = 0; i < (int)nb_pkts; ++i) {\r\n+\tfor (i = 0; i < (int)nb_pkts; ++i)\r\n \t\ttxep[i].mbuf = tx_pkts[i];\r\n-\t\t/* check and update sequence number */\r\n-\t\ttxsp[i].pool = tx_pkts[i]->pool;\r\n-\t\tif (txsp[i-1].pool == tx_pkts[i]->pool)\r\n-\t\t\ttxsp[i].same_pool = txsp[i-1].same_pool + 1;\r\n-\t\telse\r\n-\t\t\ttxsp[i].same_pool = 1;\r\n-\t}\r\n }\r\n \r\n uint16_t\r\n@@ -439,7 +447,6 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \tstruct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;\r\n \tvolatile union ixgbe_adv_tx_desc *txdp;\r\n \tstruct igb_tx_entry_v *txep;\r\n-\tstruct igb_tx_entry_seq *txsp;\r\n \tuint16_t n, nb_commit, tx_id;\r\n \tuint64_t flags = DCMD_DTYP_FLAGS;\r\n \tuint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;\r\n@@ -458,14 +465,13 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \ttx_id = txq->tx_tail;\r\n \ttxdp = &txq->tx_ring[tx_id];\r\n \ttxep = &((struct igb_tx_entry_v *)txq->sw_ring)[tx_id];\r\n-\ttxsp = &txq->sw_ring_seq[tx_id];\r\n \r\n \ttxq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);\r\n \r\n \tn = (uint16_t)(txq->nb_tx_desc - tx_id);\r\n \tif (nb_commit >= n) {\r\n \r\n-\t\ttx_backlog_entry(txep, txsp, tx_pkts, n);\r\n+\t\ttx_backlog_entry(txep, tx_pkts, n);\r\n \r\n \t\tfor (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)\r\n \t\t\tvtx1(txdp, *tx_pkts, flags);\r\n@@ -480,10 +486,9 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \t\t/* avoid reach the end of ring */\r\n \t\ttxdp = &(txq->tx_ring[tx_id]);\r\n \t\ttxep = &(((struct igb_tx_entry_v *)txq->sw_ring)[tx_id]);\r\n-\t\ttxsp = &(txq->sw_ring_seq[tx_id]);\r\n \t}\r\n \r\n-\ttx_backlog_entry(txep, txsp, tx_pkts, nb_commit);\r\n+\ttx_backlog_entry(txep, tx_pkts, nb_commit);\r\n \r\n \tvtx(txdp, tx_pkts, nb_commit, flags);\r\n \r\n@@ -507,7 +512,6 @@ ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)\r\n {\r\n \tunsigned i;\r\n \tstruct igb_tx_entry_v *txe;\r\n-\tstruct igb_tx_entry_seq *txs;\r\n \tuint16_t nb_free, max_desc;\r\n \r\n \tif (txq->sw_ring != NULL) {\r\n@@ -525,10 +529,6 @@ ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)\r\n \t\tfor (i = 0; i < txq->nb_tx_desc; i++) {\r\n \t\t\ttxe = (struct igb_tx_entry_v *)&txq->sw_ring[i];\r\n \t\t\ttxe->mbuf = NULL;\r\n-\r\n-\t\t\ttxs = &txq->sw_ring_seq[i];\r\n-\t\t\ttxs->pool = NULL;\r\n-\t\t\ttxs->same_pool = 0;\r\n \t\t}\r\n \t}\r\n }\r\n@@ -543,11 +543,6 @@ ixgbe_tx_free_swring(struct igb_tx_queue *txq)\r\n \t\trte_free((struct igb_rx_entry *)txq->sw_ring - 1);\r\n \t\ttxq->sw_ring = NULL;\r\n \t}\r\n-\r\n-\tif (txq->sw_ring_seq != NULL) {\r\n-\t\trte_free(txq->sw_ring_seq - 1);\r\n-\t\ttxq->sw_ring_seq = NULL;\r\n-\t}\r\n }\r\n \r\n static void\r\n@@ -556,7 +551,6 @@ ixgbe_reset_tx_queue(struct igb_tx_queue *txq)\r\n \tstatic const union ixgbe_adv_tx_desc zeroed_desc = { .read = {\r\n \t\t\t.buffer_addr = 0} };\r\n \tstruct igb_tx_entry_v *txe = (struct igb_tx_entry_v *)txq->sw_ring;\r\n-\tstruct igb_tx_entry_seq *txs = txq->sw_ring_seq;\r\n \tuint16_t i;\r\n \r\n \t/* Zero out HW ring memory */\r\n@@ -568,8 +562,6 @@ ixgbe_reset_tx_queue(struct igb_tx_queue *txq)\r\n \t\tvolatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];\r\n \t\ttxd->wb.status = IXGBE_TXD_STAT_DD;\r\n \t\ttxe[i].mbuf = NULL;\r\n-\t\ttxs[i].pool = NULL;\r\n-\t\ttxs[i].same_pool = 0;\r\n \t}\r\n \r\n \ttxq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);\r\n@@ -595,27 +587,14 @@ static struct ixgbe_txq_ops vec_txq_ops = {\r\n };\r\n \r\n int ixgbe_txq_vec_setup(struct igb_tx_queue *txq,\r\n-\t\t\tunsigned int socket_id)\r\n+\t\t\tunsigned int socket_id __rte_unused)\r\n {\r\n-\tuint16_t nb_desc;\r\n-\r\n \tif (txq->sw_ring == NULL)\r\n \t\treturn -1;\r\n \r\n-\t/* request addtional one entry for continous sequence check */\r\n-\tnb_desc = (uint16_t)(txq->nb_tx_desc + 1);\r\n-\r\n-\ttxq->sw_ring_seq = rte_zmalloc_socket(\"txq->sw_ring_seq\",\r\n-\t\t\t\tsizeof(struct igb_tx_entry_seq) * nb_desc,\r\n-\t\t\t\tCACHE_LINE_SIZE, socket_id);\r\n-\tif (txq->sw_ring_seq == NULL)\r\n-\t\treturn -1;\r\n-\r\n-\r\n \t/* leave the first one for overflow */\r\n \ttxq->sw_ring = (struct igb_tx_entry *)\r\n \t\t((struct igb_tx_entry_v *)txq->sw_ring + 1);\r\n-\ttxq->sw_ring_seq += 1;\r\n \ttxq->ops = &vec_txq_ops;\r\n \r\n \treturn 0;\r\n",
    "prefixes": [
        "dpdk-dev",
        "RFC",
        "09/14"
    ]
}