Show a patch.

GET /api/patches/381/
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 381,
    "url": "https://patches.dpdk.org/api/patches/381/",
    "web_url": "https://patches.dpdk.org/patch/381/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk"
    },
    "msgid": "<1410798013-15936-1-git-send-email-bruce.richardson@intel.com>",
    "date": "2014-09-15T16:20:13",
    "name": "[dpdk-dev,v3,12/13] ixgbe: Fix perf regression due to moved pool ptr",
    "commit_ref": "",
    "pull_url": "",
    "state": "accepted",
    "archived": true,
    "hash": "fbdc478b9932cacb87cd50880df45028b9006177",
    "submitter": {
        "id": 20,
        "url": "https://patches.dpdk.org/api/people/20/",
        "name": "Bruce Richardson",
        "email": "bruce.richardson@intel.com"
    },
    "delegate": null,
    "mbox": "https://patches.dpdk.org/patch/381/mbox/",
    "series": [],
    "comments": "https://patches.dpdk.org/api/patches/381/comments/",
    "check": "pending",
    "checks": "https://patches.dpdk.org/api/patches/381/checks/",
    "tags": {},
    "headers": {
        "Delivered-To": "patchwork@dpdk.org",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\r\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "X-Original-To": "patchwork@dpdk.org",
        "Date": "Mon, 15 Sep 2014 17:20:13 +0100",
        "In-Reply-To": "<1410441347-22840-13-git-send-email-bruce.richardson@intel.com>",
        "Precedence": "list",
        "X-BeenThere": "dev@dpdk.org",
        "References": "<1410441347-22840-13-git-send-email-bruce.richardson@intel.com>",
        "X-Mailer": "git-send-email 1.7.4.1",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "To": "dev@dpdk.org",
        "Errors-To": "dev-bounces@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\r\n\tby dpdk.org (Postfix) with ESMTP id 35D843976;\r\n\tMon, 15 Sep 2014 18:15:11 +0200 (CEST)",
            "from mga09.intel.com (mga09.intel.com [134.134.136.24])\r\n\tby dpdk.org (Postfix) with ESMTP id D4C032E81\r\n\tfor <dev@dpdk.org>; Mon, 15 Sep 2014 18:15:08 +0200 (CEST)",
            "from orsmga002.jf.intel.com ([10.7.209.21])\r\n\tby orsmga102.jf.intel.com with ESMTP; 15 Sep 2014 09:14:29 -0700",
            "from irvmail001.ir.intel.com ([163.33.26.43])\r\n\tby orsmga002.jf.intel.com with ESMTP; 15 Sep 2014 09:20:14 -0700",
            "from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com\r\n\t[10.237.217.46])\r\n\tby irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\r\n\ts8FGKDZ2012136; Mon, 15 Sep 2014 17:20:13 +0100",
            "from sivswdev02.ir.intel.com (localhost [127.0.0.1])\r\n\tby sivswdev02.ir.intel.com with ESMTP id s8FGKDdU015988;\r\n\tMon, 15 Sep 2014 17:20:13 +0100",
            "(from bricha3@localhost)\r\n\tby sivswdev02.ir.intel.com with  id s8FGKDTa015984;\r\n\tMon, 15 Sep 2014 17:20:13 +0100"
        ],
        "From": "Bruce Richardson <bruce.richardson@intel.com>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "X-ExtLoop1": "1",
        "Message-Id": "<1410798013-15936-1-git-send-email-bruce.richardson@intel.com>",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "Subject": "[dpdk-dev] [PATCH v3 12/13] ixgbe: Fix perf regression due to moved\r\n\tpool ptr",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\r\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-IronPort-AV": "E=Sophos;i=\"5.04,529,1406617200\"; d=\"scan'208\";a=\"603113131\"",
        "List-Post": "<mailto:dev@dpdk.org>",
        "X-Mailman-Version": "2.1.15"
    },
    "content": "Adjust the fast-path code to fix the regression caused by the pool\npointer moving to the second cache line. This change adjusts the\nprefetching and also the way in which the mbufs are freed back to the\nmempool.\nNote: slow-path e.g. path supporting jumbo frames, is still slower, but\nis dealt with by a later commit\n\nUpdates in V2:\n* fixup checkpatch issue\n\nUpdates in V3:\n* The variable definitions for freeing mbufs now need to be included\nwhether or not reference counting is enabled.\n\nSigned-off-by: Bruce Richardson <bruce.richardson@intel.com>\n---\n lib/librte_pmd_ixgbe/ixgbe_rxtx.c     |  8 ++-\n lib/librte_pmd_ixgbe/ixgbe_rxtx.h     | 14 +-----\n lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 94 +++++++++++++----------------------\n 3 files changed, 38 insertions(+), 78 deletions(-)",
    "diff": "diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\nindex 1a46393..d6448a4 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\n@@ -142,10 +142,6 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n \t */\r\n \ttxep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);\r\n \r\n-\t/* prefetch the mbufs that are about to be freed */\r\n-\tfor (i = 0; i < txq->tx_rs_thresh; ++i)\r\n-\t\trte_prefetch0((txep + i)->mbuf);\r\n-\r\n \t/* free buffers one at a time */\r\n \tif ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {\r\n \t\tfor (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {\r\n@@ -186,6 +182,7 @@ tx4(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)\r\n \t\t\t\t((uint32_t)DCMD_DTYP_FLAGS | pkt_len);\r\n \t\ttxdp->read.olinfo_status =\r\n \t\t\t\t(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);\r\n+\t\trte_prefetch0(&(*pkts)->pool);\r\n \t}\r\n }\r\n \r\n@@ -205,6 +202,7 @@ tx1(volatile union ixgbe_adv_tx_desc *txdp, struct rte_mbuf **pkts)\r\n \t\t\t((uint32_t)DCMD_DTYP_FLAGS | pkt_len);\r\n \ttxdp->read.olinfo_status =\r\n \t\t\t(pkt_len << IXGBE_ADVTXD_PAYLEN_SHIFT);\r\n+\trte_prefetch0(&(*pkts)->pool);\r\n }\r\n \r\n /*\r\n@@ -1875,7 +1873,7 @@ ixgbe_dev_tx_queue_setup(struct rte_eth_dev *dev,\r\n \t\tPMD_INIT_LOG(INFO, \"Using simple tx code path\\n\");\r\n #ifdef RTE_IXGBE_INC_VECTOR\r\n \t\tif (txq->tx_rs_thresh <= RTE_IXGBE_TX_MAX_FREE_BUF_SZ &&\r\n-\t\t    ixgbe_txq_vec_setup(txq, socket_id) == 0) {\r\n+\t\t    ixgbe_txq_vec_setup(txq) == 0) {\r\n \t\t\tPMD_INIT_LOG(INFO, \"Vector tx enabled.\\n\");\r\n \t\t\tdev->tx_pkt_burst = ixgbe_xmit_pkts_vec;\r\n \t\t}\r\ndiff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\nindex e92a864..a97fddb 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\n@@ -96,14 +96,6 @@ struct igb_tx_entry_v {\r\n };\r\n \r\n /**\r\n- * continuous entry sequence, gather by the same mempool\r\n- */\r\n-struct igb_tx_entry_seq {\r\n-\tconst struct rte_mempool* pool;\r\n-\tuint32_t same_pool;\r\n-};\r\n-\r\n-/**\r\n  * Structure associated with each RX queue.\r\n  */\r\n struct igb_rx_queue {\r\n@@ -190,10 +182,6 @@ struct igb_tx_queue {\r\n \tvolatile union ixgbe_adv_tx_desc *tx_ring;\r\n \tuint64_t            tx_ring_phys_addr; /**< TX ring DMA address. */\r\n \tstruct igb_tx_entry *sw_ring;      /**< virtual address of SW ring. */\r\n-#ifdef RTE_IXGBE_INC_VECTOR\r\n-\t/** continuous tx entry sequence within the same mempool */\r\n-\tstruct igb_tx_entry_seq *sw_ring_seq;\r\n-#endif\r\n \tvolatile uint32_t   *tdt_reg_addr; /**< Address of TDT register. */\r\n \tuint16_t            nb_tx_desc;    /**< number of TX descriptors. */\r\n \tuint16_t            tx_tail;       /**< current value of TDT reg. */\r\n@@ -258,7 +246,7 @@ struct ixgbe_txq_ops {\r\n #ifdef RTE_IXGBE_INC_VECTOR\r\n uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);\r\n uint16_t ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);\r\n-int ixgbe_txq_vec_setup(struct igb_tx_queue *txq, unsigned int socket_id);\r\n+int ixgbe_txq_vec_setup(struct igb_tx_queue *txq);\r\n int ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq);\r\n int ixgbe_rx_vec_condition_check(struct rte_eth_dev *dev);\r\n #endif\r\ndiff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\nindex d53e239..9869b8b 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\n@@ -342,14 +342,11 @@ static inline int __attribute__((always_inline))\r\n ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n {\r\n \tstruct igb_tx_entry_v *txep;\r\n-\tstruct igb_tx_entry_seq *txsp;\r\n \tuint32_t status;\r\n-\tuint32_t n, k;\r\n-#ifdef RTE_MBUF_REFCNT\r\n+\tuint32_t n;\r\n \tuint32_t i;\r\n \tint nb_free = 0;\r\n \tstruct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];\r\n-#endif\r\n \r\n \t/* check DD bit on threshold descriptor */\r\n \tstatus = txq->tx_ring[txq->tx_next_dd].wb.status;\r\n@@ -364,23 +361,38 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n \t */\r\n \ttxep = &((struct igb_tx_entry_v *)txq->sw_ring)[txq->tx_next_dd -\r\n \t\t\t(n - 1)];\r\n-\ttxsp = &txq->sw_ring_seq[txq->tx_next_dd - (n - 1)];\r\n-\r\n-\twhile (n > 0) {\r\n-\t\tk = RTE_MIN(n, txsp[n-1].same_pool);\r\n #ifdef RTE_MBUF_REFCNT\r\n-\t\tfor (i = 0; i < k; i++) {\r\n-\t\t\tm = __rte_pktmbuf_prefree_seg((txep+n-k+i)->mbuf);\r\n-\t\t\tif (m != NULL)\r\n-\t\t\t\tfree[nb_free++] = m;\r\n-\t\t}\r\n-\t\trte_mempool_put_bulk((void *)txsp[n-1].pool,\r\n-\t\t\t\t(void **)free, nb_free);\r\n+\tm = __rte_pktmbuf_prefree_seg(txep[0].mbuf);\r\n #else\r\n-\t\trte_mempool_put_bulk((void *)txsp[n-1].pool,\r\n-\t\t\t\t(void **)(txep+n-k), k);\r\n+\tm = txep[0].mbuf;\r\n #endif\r\n-\t\tn -= k;\r\n+\tif (likely(m != NULL)) {\r\n+\t\tfree[0] = m;\r\n+\t\tnb_free = 1;\r\n+\t\tfor (i = 1; i < n; i++) {\r\n+#ifdef RTE_MBUF_REFCNT\r\n+\t\t\tm = __rte_pktmbuf_prefree_seg(txep[i].mbuf);\r\n+#else\r\n+\t\t\tm = txep[i]->mbuf;\r\n+#endif\r\n+\t\t\tif (likely(m != NULL)) {\r\n+\t\t\t\tif (likely(m->pool == free[0]->pool))\r\n+\t\t\t\t\tfree[nb_free++] = m;\r\n+\t\t\t\telse {\r\n+\t\t\t\t\trte_mempool_put_bulk(free[0]->pool,\r\n+\t\t\t\t\t\t\t(void *)free, nb_free);\r\n+\t\t\t\t\tfree[0] = m;\r\n+\t\t\t\t\tnb_free = 1;\r\n+\t\t\t\t}\r\n+\t\t\t}\r\n+\t\t}\r\n+\t\trte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);\r\n+\t} else {\r\n+\t\tfor (i = 1; i < n; i++) {\r\n+\t\t\tm = __rte_pktmbuf_prefree_seg(txep[i].mbuf);\r\n+\t\t\tif (m != NULL)\r\n+\t\t\t\trte_mempool_put(m->pool, m);\r\n+\t\t}\r\n \t}\r\n \r\n \t/* buffers were freed, update counters */\r\n@@ -394,19 +406,11 @@ ixgbe_tx_free_bufs(struct igb_tx_queue *txq)\r\n \r\n static inline void __attribute__((always_inline))\r\n tx_backlog_entry(struct igb_tx_entry_v *txep,\r\n-\t\t struct igb_tx_entry_seq *txsp,\r\n \t\t struct rte_mbuf **tx_pkts, uint16_t nb_pkts)\r\n {\r\n \tint i;\r\n-\tfor (i = 0; i < (int)nb_pkts; ++i) {\r\n+\tfor (i = 0; i < (int)nb_pkts; ++i)\r\n \t\ttxep[i].mbuf = tx_pkts[i];\r\n-\t\t/* check and update sequence number */\r\n-\t\ttxsp[i].pool = tx_pkts[i]->pool;\r\n-\t\tif (txsp[i-1].pool == tx_pkts[i]->pool)\r\n-\t\t\ttxsp[i].same_pool = txsp[i-1].same_pool + 1;\r\n-\t\telse\r\n-\t\t\ttxsp[i].same_pool = 1;\r\n-\t}\r\n }\r\n \r\n uint16_t\r\n@@ -416,7 +420,6 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \tstruct igb_tx_queue *txq = (struct igb_tx_queue *)tx_queue;\r\n \tvolatile union ixgbe_adv_tx_desc *txdp;\r\n \tstruct igb_tx_entry_v *txep;\r\n-\tstruct igb_tx_entry_seq *txsp;\r\n \tuint16_t n, nb_commit, tx_id;\r\n \tuint64_t flags = DCMD_DTYP_FLAGS;\r\n \tuint64_t rs = IXGBE_ADVTXD_DCMD_RS|DCMD_DTYP_FLAGS;\r\n@@ -435,14 +438,13 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \ttx_id = txq->tx_tail;\r\n \ttxdp = &txq->tx_ring[tx_id];\r\n \ttxep = &((struct igb_tx_entry_v *)txq->sw_ring)[tx_id];\r\n-\ttxsp = &txq->sw_ring_seq[tx_id];\r\n \r\n \ttxq->nb_tx_free = (uint16_t)(txq->nb_tx_free - nb_pkts);\r\n \r\n \tn = (uint16_t)(txq->nb_tx_desc - tx_id);\r\n \tif (nb_commit >= n) {\r\n \r\n-\t\ttx_backlog_entry(txep, txsp, tx_pkts, n);\r\n+\t\ttx_backlog_entry(txep, tx_pkts, n);\r\n \r\n \t\tfor (i = 0; i < n - 1; ++i, ++tx_pkts, ++txdp)\r\n \t\t\tvtx1(txdp, *tx_pkts, flags);\r\n@@ -457,10 +459,9 @@ ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,\r\n \t\t/* avoid reach the end of ring */\r\n \t\ttxdp = &(txq->tx_ring[tx_id]);\r\n \t\ttxep = &(((struct igb_tx_entry_v *)txq->sw_ring)[tx_id]);\r\n-\t\ttxsp = &(txq->sw_ring_seq[tx_id]);\r\n \t}\r\n \r\n-\ttx_backlog_entry(txep, txsp, tx_pkts, nb_commit);\r\n+\ttx_backlog_entry(txep, tx_pkts, nb_commit);\r\n \r\n \tvtx(txdp, tx_pkts, nb_commit, flags);\r\n \r\n@@ -484,7 +485,6 @@ ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)\r\n {\r\n \tunsigned i;\r\n \tstruct igb_tx_entry_v *txe;\r\n-\tstruct igb_tx_entry_seq *txs;\r\n \tuint16_t nb_free, max_desc;\r\n \r\n \tif (txq->sw_ring != NULL) {\r\n@@ -502,10 +502,6 @@ ixgbe_tx_queue_release_mbufs(struct igb_tx_queue *txq)\r\n \t\tfor (i = 0; i < txq->nb_tx_desc; i++) {\r\n \t\t\ttxe = (struct igb_tx_entry_v *)&txq->sw_ring[i];\r\n \t\t\ttxe->mbuf = NULL;\r\n-\r\n-\t\t\ttxs = &txq->sw_ring_seq[i];\r\n-\t\t\ttxs->pool = NULL;\r\n-\t\t\ttxs->same_pool = 0;\r\n \t\t}\r\n \t}\r\n }\r\n@@ -520,11 +516,6 @@ ixgbe_tx_free_swring(struct igb_tx_queue *txq)\r\n \t\trte_free((struct igb_rx_entry *)txq->sw_ring - 1);\r\n \t\ttxq->sw_ring = NULL;\r\n \t}\r\n-\r\n-\tif (txq->sw_ring_seq != NULL) {\r\n-\t\trte_free(txq->sw_ring_seq - 1);\r\n-\t\ttxq->sw_ring_seq = NULL;\r\n-\t}\r\n }\r\n \r\n static void\r\n@@ -533,7 +524,6 @@ ixgbe_reset_tx_queue(struct igb_tx_queue *txq)\r\n \tstatic const union ixgbe_adv_tx_desc zeroed_desc = { .read = {\r\n \t\t\t.buffer_addr = 0} };\r\n \tstruct igb_tx_entry_v *txe = (struct igb_tx_entry_v *)txq->sw_ring;\r\n-\tstruct igb_tx_entry_seq *txs = txq->sw_ring_seq;\r\n \tuint16_t i;\r\n \r\n \t/* Zero out HW ring memory */\r\n@@ -545,8 +535,6 @@ ixgbe_reset_tx_queue(struct igb_tx_queue *txq)\r\n \t\tvolatile union ixgbe_adv_tx_desc *txd = &txq->tx_ring[i];\r\n \t\ttxd->wb.status = IXGBE_TXD_STAT_DD;\r\n \t\ttxe[i].mbuf = NULL;\r\n-\t\ttxs[i].pool = NULL;\r\n-\t\ttxs[i].same_pool = 0;\r\n \t}\r\n \r\n \ttxq->tx_next_dd = (uint16_t)(txq->tx_rs_thresh - 1);\r\n@@ -588,28 +576,14 @@ ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq)\r\n \treturn 0;\r\n }\r\n \r\n-int ixgbe_txq_vec_setup(struct igb_tx_queue *txq,\r\n-\t\t\tunsigned int socket_id)\r\n+int ixgbe_txq_vec_setup(struct igb_tx_queue *txq)\r\n {\r\n-\tuint16_t nb_desc;\r\n-\r\n \tif (txq->sw_ring == NULL)\r\n \t\treturn -1;\r\n \r\n-\t/* request addtional one entry for continous sequence check */\r\n-\tnb_desc = (uint16_t)(txq->nb_tx_desc + 1);\r\n-\r\n-\ttxq->sw_ring_seq = rte_zmalloc_socket(\"txq->sw_ring_seq\",\r\n-\t\t\t\tsizeof(struct igb_tx_entry_seq) * nb_desc,\r\n-\t\t\t\tCACHE_LINE_SIZE, socket_id);\r\n-\tif (txq->sw_ring_seq == NULL)\r\n-\t\treturn -1;\r\n-\r\n-\r\n \t/* leave the first one for overflow */\r\n \ttxq->sw_ring = (struct igb_tx_entry *)\r\n \t\t((struct igb_tx_entry_v *)txq->sw_ring + 1);\r\n-\ttxq->sw_ring_seq += 1;\r\n \ttxq->ops = &vec_txq_ops;\r\n \r\n \treturn 0;\r\n",
    "prefixes": [
        "dpdk-dev",
        "v3",
        "12/13"
    ]
}