Show a patch.

GET /api/patches/311/
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 311,
    "url": "http://patches.dpdk.org/api/patches/311/",
    "web_url": "http://patches.dpdk.org/patch/311/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk"
    },
    "msgid": "<1409759378-10113-14-git-send-email-bruce.richardson@intel.com>",
    "date": "2014-09-03T15:49:38",
    "name": "[dpdk-dev,13/13] ixgbe: Improve slow-path perf: vector scattered RX",
    "commit_ref": "",
    "pull_url": "",
    "state": "superseded",
    "archived": true,
    "hash": "6fedbd9b2c430ba2f74ee2d830104640f528af88",
    "submitter": {
        "id": 20,
        "url": "http://patches.dpdk.org/api/people/20/",
        "name": "Bruce Richardson",
        "email": "bruce.richardson@intel.com"
    },
    "delegate": null,
    "mbox": "http://patches.dpdk.org/patch/311/mbox/",
    "series": [],
    "comments": "http://patches.dpdk.org/api/patches/311/comments/",
    "check": "pending",
    "checks": "http://patches.dpdk.org/api/patches/311/checks/",
    "tags": {},
    "headers": {
        "Return-Path": "<bricha3@ecsmtp.ir.intel.com>",
        "References": "<1409759378-10113-1-git-send-email-bruce.richardson@intel.com>",
        "X-Mailman-Version": "2.1.15",
        "X-IronPort-AV": "E=Sophos;i=\"5.04,458,1406617200\"; d=\"scan'208\";a=\"585697559\"",
        "From": "Bruce Richardson <bruce.richardson@intel.com>",
        "X-List-Received-Date": "Wed, 03 Sep 2014 15:45:45 -0000",
        "X-BeenThere": "dev@dpdk.org",
        "Message-Id": "<1409759378-10113-14-git-send-email-bruce.richardson@intel.com>",
        "Received": [
            "from mga11.intel.com (mga11.intel.com [192.55.52.93])\r\n\tby dpdk.org (Postfix) with ESMTP id 87EAEB3AB\r\n\tfor <dev@dpdk.org>; Wed,  3 Sep 2014 17:45:44 +0200 (CEST)",
            "from fmsmga001.fm.intel.com ([10.253.24.23])\r\n\tby fmsmga102.fm.intel.com with ESMTP; 03 Sep 2014 08:49:46 -0700",
            "from irvmail001.ir.intel.com ([163.33.26.43])\r\n\tby fmsmga001.fm.intel.com with ESMTP; 03 Sep 2014 08:49:41 -0700",
            "from sivswdev02.ir.intel.com (sivswdev02.ir.intel.com\r\n\t[10.237.217.46])\r\n\tby irvmail001.ir.intel.com (8.14.3/8.13.6/MailSET/Hub) with ESMTP id\r\n\ts83FnefR025442; Wed, 3 Sep 2014 16:49:40 +0100",
            "from sivswdev02.ir.intel.com (localhost [127.0.0.1])\r\n\tby sivswdev02.ir.intel.com with ESMTP id s83FnemY010508;\r\n\tWed, 3 Sep 2014 16:49:40 +0100",
            "(from bricha3@localhost)\r\n\tby sivswdev02.ir.intel.com with  id s83FneeU010503;\r\n\tWed, 3 Sep 2014 16:49:40 +0100"
        ],
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "X-Mailer": "git-send-email 1.7.0.7",
        "Precedence": "list",
        "Date": "Wed,  3 Sep 2014 16:49:38 +0100",
        "Subject": "[dpdk-dev] [PATCH 13/13] ixgbe: Improve slow-path perf: vector\r\n\tscattered RX",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "X-ExtLoop1": "1",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\r\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "In-Reply-To": "<1409759378-10113-1-git-send-email-bruce.richardson@intel.com>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\r\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "To": "dev@dpdk.org"
    },
    "content": "Provide a wrapper routine to enable receive of scattered packets with a\nvector driver.\n\nSigned-off-by: Bruce Richardson <bruce.richardson@intel.com>\n---\n lib/librte_pmd_ixgbe/ixgbe_rxtx.c     |  16 ++++\n lib/librte_pmd_ixgbe/ixgbe_rxtx.h     |   1 +\n lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c | 173 ++++++++++++++++++++++++++++++++--\n 3 files changed, 183 insertions(+), 7 deletions(-)",
    "diff": "diff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\nindex ebbcee8..8a2e0ee 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.c\r\n@@ -3477,12 +3477,20 @@ ixgbe_dev_rx_init(struct rte_eth_dev *dev)\r\n \t\tif ((dev->data->dev_conf.rxmode.max_rx_pkt_len +\r\n \t\t\t\t2 * IXGBE_VLAN_TAG_SIZE) > buf_size){\r\n \t\t\tdev->data->scattered_rx = 1;\r\n+#ifdef RTE_IXGBE_INC_VECTOR\r\n+\t\t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;\r\n+#else\r\n \t\t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts;\r\n+#endif\r\n \t\t}\r\n \t}\r\n \r\n \tif (dev->data->dev_conf.rxmode.enable_scatter) {\r\n+#ifdef RTE_IXGBE_INC_VECTOR\r\n+\t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;\r\n+#else\r\n \t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts;\r\n+#endif\r\n \t\tdev->data->scattered_rx = 1;\r\n \t}\r\n \r\n@@ -3970,12 +3978,20 @@ ixgbevf_dev_rx_init(struct rte_eth_dev *dev)\r\n \t\tif ((dev->data->dev_conf.rxmode.max_rx_pkt_len +\r\n \t\t\t\t2 * IXGBE_VLAN_TAG_SIZE) > buf_size) {\r\n \t\t\tdev->data->scattered_rx = 1;\r\n+#ifdef RTE_IXGBE_INC_VECTOR\r\n+\t\t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;\r\n+#else\r\n \t\t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts;\r\n+#endif\r\n \t\t}\r\n \t}\r\n \r\n \tif (dev->data->dev_conf.rxmode.enable_scatter) {\r\n+#ifdef RTE_IXGBE_INC_VECTOR\r\n+\t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts_vec;\r\n+#else\r\n \t\tdev->rx_pkt_burst = ixgbe_recv_scattered_pkts;\r\n+#endif\r\n \t\tdev->data->scattered_rx = 1;\r\n \t}\r\n \r\ndiff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\nindex dbb57af..e2e037b 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx.h\r\n@@ -246,6 +246,7 @@ struct ixgbe_txq_ops {\r\n \r\n #ifdef RTE_IXGBE_INC_VECTOR\r\n uint16_t ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);\r\n+uint16_t ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts);\r\n uint16_t ixgbe_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts);\r\n int ixgbe_txq_vec_setup(struct igb_tx_queue *txq);\r\n int ixgbe_rxq_vec_setup(struct igb_rx_queue *rxq);\r\ndiff --git a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\nindex 4f63086..168996f 100644\r\n--- a/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\n+++ b/lib/librte_pmd_ixgbe/ixgbe_rxtx_vec.c\r\n@@ -164,12 +164,11 @@ desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)\r\n  *   numbers of DD bit\r\n  * - don't support ol_flags for rss and csum err\r\n  */\r\n-uint16_t\r\n-ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n-\t\tuint16_t nb_pkts)\r\n+static inline uint16_t\r\n+_recv_raw_pkts_vec(struct igb_rx_queue *rxq, struct rte_mbuf **rx_pkts,\r\n+\t\tuint16_t nb_pkts, uint8_t *split_packet)\r\n {\r\n \tvolatile union ixgbe_adv_rx_desc *rxdp;\r\n-\tstruct igb_rx_queue *rxq = rx_queue;\r\n \tstruct igb_rx_entry *sw_ring;\r\n \tuint16_t nb_pkts_recd;\r\n \tint pos;\r\n@@ -182,7 +181,7 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n \t\t\t\t-rxq->crc_len, /* sub crc on data_len */\r\n \t\t\t\t0            /* ignore pkt_type field */\r\n \t\t\t);\r\n-\t__m128i dd_check;\r\n+\t__m128i dd_check, eop_check;\r\n \r\n \tif (unlikely(nb_pkts < RTE_IXGBE_VPMD_RX_BURST))\r\n \t\treturn 0;\r\n@@ -207,6 +206,9 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n \t/* 4 packets DD mask */\r\n \tdd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL);\r\n \r\n+\t/* 4 packets EOP mask */\r\n+\teop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL);\r\n+\r\n \t/* mask to shuffle from desc. to mbuf */\r\n \tshuf_msk = _mm_set_epi8(\r\n \t\t7, 6, 5, 4,  /* octet 4~7, 32bits rss */\r\n@@ -218,7 +220,6 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n \t\t0xFF, 0xFF   /* skip pkt_type field */\r\n \t\t);\r\n \r\n-\r\n \t/* Cache is empty -> need to scan the buffer rings, but first move\r\n \t * the next 'n' mbufs into the cache */\r\n \tsw_ring = &rxq->sw_ring[rxq->rx_tail];\r\n@@ -227,6 +228,7 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n \t * A. load 4 packet in one loop\r\n \t * B. copy 4 mbuf point from swring to rx_pkts\r\n \t * C. calc the number of DD bits among the 4 packets\r\n+\t * [C*. extract the end-of-packet bit, if requested]\r\n \t * D. fill info. from desc to mbuf\r\n \t */\r\n \tfor (pos = 0, nb_pkts_recd = 0; pos < RTE_IXGBE_VPMD_RX_BURST;\r\n@@ -237,6 +239,13 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n \t\t__m128i zero, staterr, sterr_tmp1, sterr_tmp2;\r\n \t\t__m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */\r\n \r\n+\t\tif (split_packet) {\r\n+\t\t\trte_prefetch0(&rx_pkts[pos]->cacheline1);\r\n+\t\t\trte_prefetch0(&rx_pkts[pos + 1]->cacheline1);\r\n+\t\t\trte_prefetch0(&rx_pkts[pos + 2]->cacheline1);\r\n+\t\t\trte_prefetch0(&rx_pkts[pos + 3]->cacheline1);\r\n+\t\t}\r\n+\r\n \t\t/* B.1 load 1 mbuf point */\r\n \t\tmbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]);\r\n \r\n@@ -295,7 +304,36 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n \t\tpkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust);\r\n \t\tpkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust);\r\n \r\n-\t\t/* C.3 calc avaialbe number of desc */\r\n+\t\t/* C* extract and record EOP bit */\r\n+\t\tif (split_packet){\r\n+\t\t\t__m128i eop_shuf_mask = _mm_set_epi8(\r\n+\t\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\r\n+\t\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\r\n+\t\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\r\n+\t\t\t\t\t0x04, 0x0C, 0x00, 0x08\r\n+\t\t\t\t\t);\r\n+\r\n+\t\t\t/* and with mask to extract bits, flipping 1-0 */\r\n+\t\t\t__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);\r\n+\t\t\t/* convert from 0/2 value to a 0/1 value */\r\n+\t\t\t//eop_bits = _mm_srli_epi32(eop_bits, 1);\r\n+\t\t\t/* the staterr values are not in order, as the count\r\n+\t\t\t * count of dd bits doesn't care. However, for end of\r\n+\t\t\t * packet tracking, we do care, so shuffle. This also\r\n+\t\t\t * compresses the 32-bit values to 8-bit */\r\n+\t\t\teop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);\r\n+\t\t\t/* store the resulting 32-bit value */\r\n+\t\t\t*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);\r\n+\t\t\tsplit_packet += RTE_IXGBE_DESCS_PER_LOOP;\r\n+\r\n+\t\t\t/* zero-out next pointers */\r\n+\t\t\trx_pkts[pos]->next = NULL;\r\n+\t\t\trx_pkts[pos + 1]->next = NULL;\r\n+\t\t\trx_pkts[pos + 2]->next = NULL;\r\n+\t\t\trx_pkts[pos + 3]->next = NULL;\r\n+\t\t}\r\n+\r\n+\t\t/* C.3 calc available number of desc */\r\n \t\tstaterr = _mm_and_si128(staterr, dd_check);\r\n \t\tstaterr = _mm_packs_epi32(staterr, zero);\r\n \r\n@@ -319,6 +357,127 @@ ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n \r\n \treturn nb_pkts_recd;\r\n }\r\n+\r\n+/*\r\n+ * vPMD receive routine, now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)\r\n+ * in one loop\r\n+ *\r\n+ * Notice:\r\n+ * - nb_pkts < RTE_IXGBE_VPMD_RX_BURST, just return no packet\r\n+ * - nb_pkts > RTE_IXGBE_VPMD_RX_BURST, only scan RTE_IXGBE_VPMD_RX_BURST\r\n+ *   numbers of DD bit\r\n+ * - don't support ol_flags for rss and csum err\r\n+ */\r\n+uint16_t\r\n+ixgbe_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n+\t\tuint16_t nb_pkts)\r\n+{\r\n+\treturn _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);\r\n+}\r\n+\r\n+static inline uint16_t\r\n+reassemble_packets(struct igb_rx_queue *rxq, struct rte_mbuf **rx_bufs,\r\n+\t\tuint16_t nb_bufs, uint8_t *split_flags)\r\n+{\r\n+\tstruct rte_mbuf *pkts[RTE_IXGBE_VPMD_RX_BURST]; /*finished pkts*/\r\n+\tstruct rte_mbuf *start = rxq->pkt_first_seg;\r\n+\tstruct rte_mbuf *end =  rxq->pkt_last_seg;\r\n+\tunsigned pkt_idx = 0, buf_idx = 0;\r\n+\r\n+\r\n+\twhile (buf_idx < nb_bufs) {\r\n+\t\tif (end != NULL) {\r\n+\t\t\t/* processing a split packet */\r\n+\t\t\tend->next = rx_bufs[buf_idx];\r\n+\t\t\trx_bufs[buf_idx]->data_len += rxq->crc_len;\r\n+\r\n+\t\t\tstart->nb_segs++;\r\n+\t\t\tstart->pkt_len += rx_bufs[buf_idx]->data_len;\r\n+\t\t\tend = end->next;\r\n+\r\n+\t\t\tif (!split_flags[buf_idx]) {\r\n+\t\t\t\t/* it's the last packet of the set */\r\n+\t\t\t\tstart->hash = end->hash;\r\n+\t\t\t\tstart->ol_flags = end->ol_flags;\r\n+\t\t\t\t/* we need to strip crc for the whole packet */\r\n+\t\t\t\tstart->pkt_len -= rxq->crc_len;\r\n+\t\t\t\tif (end->data_len > rxq->crc_len)\r\n+\t\t\t\t\tend->data_len -= rxq->crc_len;\r\n+\t\t\t\telse {\r\n+\t\t\t\t\t/* free up last mbuf */\r\n+\t\t\t\t\tstruct rte_mbuf *secondlast = start;\r\n+\t\t\t\t\twhile (secondlast->next != end)\r\n+\t\t\t\t\t\tsecondlast = secondlast->next;\r\n+\t\t\t\t\tsecondlast->data_len -= (rxq->crc_len -\r\n+\t\t\t\t\t\t\tend->data_len);\r\n+\t\t\t\t\tsecondlast->next = NULL;\r\n+\t\t\t\t\trte_pktmbuf_free_seg(end);\r\n+\t\t\t\t\tend = secondlast;\r\n+\t\t\t\t}\r\n+\t\t\t\tpkts[pkt_idx++] = start;\r\n+\t\t\t\tstart = end = NULL;\r\n+\t\t\t}\r\n+\t\t} else {\r\n+\t\t\t/* not processing a split packet */\r\n+\t\t\tif (!split_flags[buf_idx]) {\r\n+\t\t\t\t/* not a split packet, save and skip */\r\n+\t\t\t\tpkts[pkt_idx++] = rx_bufs[buf_idx];\r\n+\t\t\t\tcontinue;\r\n+\t\t\t}\r\n+\t\t\tend = start = rx_bufs[buf_idx];\r\n+\t\t\trx_bufs[buf_idx]->data_len += rxq->crc_len;\r\n+\t\t\trx_bufs[buf_idx]->pkt_len += rxq->crc_len;\r\n+\t\t}\r\n+\t\tbuf_idx++;\r\n+\t}\r\n+\r\n+\t/* save the partial packet for next time */\r\n+\trxq->pkt_first_seg = start;\r\n+\trxq->pkt_last_seg = end;\r\n+\tmemcpy(rx_bufs, pkts, pkt_idx * (sizeof(*pkts)));\r\n+\treturn pkt_idx;\r\n+}\r\n+\r\n+/*\r\n+ * vPMD receive routine that reassembles scattered packets\r\n+ *\r\n+ * Notice:\r\n+ * - don't support ol_flags for rss and csum err\r\n+ * - now only accept (nb_pkts == RTE_IXGBE_VPMD_RX_BURST)\r\n+ */\r\n+uint16_t\r\n+ixgbe_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\r\n+\t\tuint16_t nb_pkts)\r\n+{\r\n+\tstruct igb_rx_queue *rxq = rx_queue;\r\n+\tuint8_t split_flags[RTE_IXGBE_VPMD_RX_BURST] = {0};\r\n+\r\n+\t/* get some new buffers */\r\n+\tuint16_t nb_bufs = _recv_raw_pkts_vec(rxq, rx_pkts, nb_pkts,\r\n+\t\t\tsplit_flags);\r\n+\tif (nb_bufs == 0)\r\n+\t\treturn 0;\r\n+\r\n+\t/* happy day case, full burst + no packets to be joined */\r\n+\tconst uint32_t *split_fl32 = (uint32_t *)split_flags;\r\n+\tif (rxq->pkt_first_seg == NULL &&\r\n+\t\t\tsplit_fl32[0] == 0 && split_fl32[1] == 0 &&\r\n+\t\t\tsplit_fl32[2] == 0 && split_fl32[3] == 0 )\r\n+\t\treturn nb_bufs;\r\n+\r\n+\t/* reassemble any packets that need reassembly*/\r\n+\tunsigned i = 0;\r\n+\tif (rxq->pkt_first_seg == NULL ){\r\n+\t\t/* find the first split flag, and only reassemble then*/\r\n+\t\twhile (!split_flags[i] && i < nb_bufs)\r\n+\t\t\ti++;\r\n+\t\tif (i == nb_bufs)\r\n+\t\t\treturn nb_bufs;\r\n+\t}\r\n+\treturn i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i,\r\n+\t\t&split_flags[i]);\r\n+}\r\n+\r\n static inline void\r\n vtx1(volatile union ixgbe_adv_tx_desc *txdp,\r\n \t\tstruct rte_mbuf *pkt, uint64_t flags)\r\n",
    "prefixes": [
        "dpdk-dev",
        "13/13"
    ]
}