get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/8324/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 8324,
    "url": "http://patches.dpdk.org/api/patches/8324/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/1446192187-1890-5-git-send-email-jing.d.chen@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<1446192187-1890-5-git-send-email-jing.d.chen@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/1446192187-1890-5-git-send-email-jing.d.chen@intel.com",
    "date": "2015-10-30T08:02:57",
    "name": "[dpdk-dev,v5,04/14] fm10k: add Vector RX function",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "863fda9ba7a308967bc6c3c8b6eef0ae6b003446",
    "submitter": {
        "id": 40,
        "url": "http://patches.dpdk.org/api/people/40/?format=api",
        "name": "Chen, Jing D",
        "email": "jing.d.chen@intel.com"
    },
    "delegate": null,
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/1446192187-1890-5-git-send-email-jing.d.chen@intel.com/mbox/",
    "series": [],
    "comments": "http://patches.dpdk.org/api/patches/8324/comments/",
    "check": "pending",
    "checks": "http://patches.dpdk.org/api/patches/8324/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@dpdk.org",
        "Delivered-To": "patchwork@dpdk.org",
        "Received": [
            "from [92.243.14.124] (localhost [IPv6:::1])\n\tby dpdk.org (Postfix) with ESMTP id 80DF88E75;\n\tFri, 30 Oct 2015 09:03:32 +0100 (CET)",
            "from mga09.intel.com (mga09.intel.com [134.134.136.24])\n\tby dpdk.org (Postfix) with ESMTP id 20C228E6D\n\tfor <dev@dpdk.org>; Fri, 30 Oct 2015 09:03:29 +0100 (CET)",
            "from fmsmga002.fm.intel.com ([10.253.24.26])\n\tby orsmga102.jf.intel.com with ESMTP; 30 Oct 2015 01:03:29 -0700",
            "from shvmail01.sh.intel.com ([10.239.29.42])\n\tby fmsmga002.fm.intel.com with ESMTP; 30 Oct 2015 01:03:24 -0700",
            "from shecgisg004.sh.intel.com (shecgisg004.sh.intel.com\n\t[10.239.29.89])\n\tby shvmail01.sh.intel.com with ESMTP id t9U83Mnx027230;\n\tFri, 30 Oct 2015 16:03:22 +0800",
            "from shecgisg004.sh.intel.com (localhost [127.0.0.1])\n\tby shecgisg004.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP\n\tid t9U83IxQ001952; Fri, 30 Oct 2015 16:03:20 +0800",
            "(from jingche2@localhost)\n\tby shecgisg004.sh.intel.com (8.13.6/8.13.6/Submit) id t9U83IE7001948; \n\tFri, 30 Oct 2015 16:03:18 +0800"
        ],
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.20,217,1444719600\"; d=\"scan'208\";a=\"839122424\"",
        "From": "\"Chen Jing D(Mark)\" <jing.d.chen@intel.com>",
        "To": "dev@dpdk.org",
        "Date": "Fri, 30 Oct 2015 16:02:57 +0800",
        "Message-Id": "<1446192187-1890-5-git-send-email-jing.d.chen@intel.com>",
        "X-Mailer": "git-send-email 1.7.12.2",
        "In-Reply-To": "<1446192187-1890-1-git-send-email-jing.d.chen@intel.com>",
        "References": "<1446110173-13330-2-git-send-email-jing.d.chen@intel.com>\n\t<1446192187-1890-1-git-send-email-jing.d.chen@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v5 04/14] fm10k: add Vector RX function",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "patches and discussions about DPDK <dev.dpdk.org>",
        "List-Unsubscribe": "<http://dpdk.org/ml/options/dev>,\n\t<mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://dpdk.org/ml/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<http://dpdk.org/ml/listinfo/dev>,\n\t<mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "From: \"Chen Jing D(Mark)\" <jing.d.chen@intel.com>\n\nThis patch add below functions:\n1. Add function fm10k_rxq_rearm to re-allocate mbuf for used desc\nin RX HW ring.\n2. Add 2 functions, in which using SSE instructions to parse RX desc\nto get pkt_type and ol_flags in mbuf.\n3. Add func fm10k_recv_raw_pkts_vec to parse raw packets, in which\nincludes possible chained packets.\n4. Add func fm10k_recv_pkts_vec to receive single mbuf packet.\n\nSigned-off-by: Chen Jing D(Mark) <jing.d.chen@intel.com>\n---\n drivers/net/fm10k/fm10k.h          |   12 +\n drivers/net/fm10k/fm10k_ethdev.c   |    3 +\n drivers/net/fm10k/fm10k_rxtx_vec.c |  426 ++++++++++++++++++++++++++++++++++++\n 3 files changed, 441 insertions(+), 0 deletions(-)",
    "diff": "diff --git a/drivers/net/fm10k/fm10k.h b/drivers/net/fm10k/fm10k.h\nindex 362a2d0..96b30a7 100644\n--- a/drivers/net/fm10k/fm10k.h\n+++ b/drivers/net/fm10k/fm10k.h\n@@ -123,6 +123,12 @@\n #define FM10K_VFTA_BIT(vlan_id)    (1 << ((vlan_id) & 0x1F))\n #define FM10K_VFTA_IDX(vlan_id)    ((vlan_id) >> 5)\n \n+#define RTE_FM10K_RXQ_REARM_THRESH      32\n+#define RTE_FM10K_VPMD_TX_BURST         32\n+#define RTE_FM10K_MAX_RX_BURST          RTE_FM10K_RXQ_REARM_THRESH\n+#define RTE_FM10K_TX_MAX_FREE_BUF_SZ    64\n+#define RTE_FM10K_DESCS_PER_LOOP    4\n+\n struct fm10k_macvlan_filter_info {\n \tuint16_t vlan_num;       /* Total VLAN number */\n \tuint16_t mac_num;        /* Total mac number */\n@@ -171,6 +177,8 @@ struct fm10k_rx_queue {\n \tstruct rte_mbuf *pkt_last_seg;  /* Last segment of current packet. */\n \tuint64_t hw_ring_phys_addr;\n \tuint64_t mbuf_initializer; /* value to init mbufs */\n+\t/** need to alloc dummy mbuf, for wraparound when scanning hw ring */\n+\tstruct rte_mbuf fake_mbuf;\n \tuint16_t next_dd;\n \tuint16_t next_alloc;\n \tuint16_t next_trigger;\n@@ -178,6 +186,9 @@ struct fm10k_rx_queue {\n \tvolatile uint32_t *tail_ptr;\n \tuint16_t nb_desc;\n \tuint16_t queue_id;\n+\t/* Below 2 fields only valid in case vPMD is applied. */\n+\tuint16_t rxrearm_nb;     /* number of remaining to be re-armed */\n+\tuint16_t rxrearm_start;  /* the idx we start the re-arming from */\n \tuint8_t port_id;\n \tuint8_t drop_en;\n \tuint8_t rx_deferred_start; /* don't start this queue in dev start. */\n@@ -318,4 +329,5 @@ uint16_t fm10k_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,\n \tuint16_t nb_pkts);\n \n int fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq);\n+uint16_t fm10k_recv_pkts_vec(void *, struct rte_mbuf **, uint16_t);\n #endif\ndiff --git a/drivers/net/fm10k/fm10k_ethdev.c b/drivers/net/fm10k/fm10k_ethdev.c\nindex 8dd64bf..6be764a 100644\n--- a/drivers/net/fm10k/fm10k_ethdev.c\n+++ b/drivers/net/fm10k/fm10k_ethdev.c\n@@ -121,6 +121,9 @@ rx_queue_reset(struct fm10k_rx_queue *q)\n \tq->next_alloc = 0;\n \tq->next_trigger = q->alloc_thresh - 1;\n \tFM10K_PCI_REG_WRITE(q->tail_ptr, q->nb_desc - 1);\n+\tq->rxrearm_start = 0;\n+\tq->rxrearm_nb = 0;\n+\n \treturn 0;\n }\n \ndiff --git a/drivers/net/fm10k/fm10k_rxtx_vec.c b/drivers/net/fm10k/fm10k_rxtx_vec.c\nindex 34b677b..9633f35 100644\n--- a/drivers/net/fm10k/fm10k_rxtx_vec.c\n+++ b/drivers/net/fm10k/fm10k_rxtx_vec.c\n@@ -44,6 +44,133 @@\n #pragma GCC diagnostic ignored \"-Wcast-qual\"\n #endif\n \n+/* Handling the offload flags (olflags) field takes computation\n+ * time when receiving packets. Therefore we provide a flag to disable\n+ * the processing of the olflags field when they are not needed. This\n+ * gives improved performance, at the cost of losing the offload info\n+ * in the received packet\n+ */\n+#ifdef RTE_LIBRTE_FM10K_RX_OLFLAGS_ENABLE\n+\n+/* Vlan present flag shift */\n+#define VP_SHIFT     (2)\n+/* L3 type shift */\n+#define L3TYPE_SHIFT     (4)\n+/* L4 type shift */\n+#define L4TYPE_SHIFT     (7)\n+\n+static inline void\n+fm10k_desc_to_olflags_v(__m128i descs[4], struct rte_mbuf **rx_pkts)\n+{\n+\t__m128i ptype0, ptype1, vtag0, vtag1;\n+\tunion {\n+\t\tuint16_t e[4];\n+\t\tuint64_t dword;\n+\t} vol;\n+\n+\tconst __m128i pkttype_msk = _mm_set_epi16(\n+\t\t\t0x0000, 0x0000, 0x0000, 0x0000,\n+\t\t\tPKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT,\n+\t\t\tPKT_RX_VLAN_PKT, PKT_RX_VLAN_PKT);\n+\n+\t/* mask everything except rss type */\n+\tconst __m128i rsstype_msk = _mm_set_epi16(\n+\t\t\t0x0000, 0x0000, 0x0000, 0x0000,\n+\t\t\t0x000F, 0x000F, 0x000F, 0x000F);\n+\n+\t/* map rss type to rss hash flag */\n+\tconst __m128i rss_flags = _mm_set_epi8(0, 0, 0, 0,\n+\t\t\t0, 0, 0, PKT_RX_RSS_HASH,\n+\t\t\tPKT_RX_RSS_HASH, 0, PKT_RX_RSS_HASH, 0,\n+\t\t\tPKT_RX_RSS_HASH, PKT_RX_RSS_HASH, PKT_RX_RSS_HASH, 0);\n+\n+\tptype0 = _mm_unpacklo_epi16(descs[0], descs[1]);\n+\tptype1 = _mm_unpacklo_epi16(descs[2], descs[3]);\n+\tvtag0 = _mm_unpackhi_epi16(descs[0], descs[1]);\n+\tvtag1 = _mm_unpackhi_epi16(descs[2], descs[3]);\n+\n+\tptype0 = _mm_unpacklo_epi32(ptype0, ptype1);\n+\tptype0 = _mm_and_si128(ptype0, rsstype_msk);\n+\tptype0 = _mm_shuffle_epi8(rss_flags, ptype0);\n+\n+\tvtag1 = _mm_unpacklo_epi32(vtag0, vtag1);\n+\tvtag1 = _mm_srli_epi16(vtag1, VP_SHIFT);\n+\tvtag1 = _mm_and_si128(vtag1, pkttype_msk);\n+\n+\tvtag1 = _mm_or_si128(ptype0, vtag1);\n+\tvol.dword = _mm_cvtsi128_si64(vtag1);\n+\n+\trx_pkts[0]->ol_flags = vol.e[0];\n+\trx_pkts[1]->ol_flags = vol.e[1];\n+\trx_pkts[2]->ol_flags = vol.e[2];\n+\trx_pkts[3]->ol_flags = vol.e[3];\n+}\n+\n+static inline void\n+fm10k_desc_to_pktype_v(__m128i descs[4], struct rte_mbuf **rx_pkts)\n+{\n+\t__m128i l3l4type0, l3l4type1, l3type, l4type;\n+\tunion {\n+\t\tuint16_t e[4];\n+\t\tuint64_t dword;\n+\t} vol;\n+\n+\t/* L3 pkt type mask  Bit4 to Bit6 */\n+\tconst __m128i l3type_msk = _mm_set_epi16(\n+\t\t\t0x0000, 0x0000, 0x0000, 0x0000,\n+\t\t\t0x0070, 0x0070, 0x0070, 0x0070);\n+\n+\t/* L4 pkt type mask  Bit7 to Bit9 */\n+\tconst __m128i l4type_msk = _mm_set_epi16(\n+\t\t\t0x0000, 0x0000, 0x0000, 0x0000,\n+\t\t\t0x0380, 0x0380, 0x0380, 0x0380);\n+\n+\t/* convert RRC l3 type to mbuf format */\n+\tconst __m128i l3type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,\n+\t\t\t0, 0, 0, RTE_PTYPE_L3_IPV6_EXT,\n+\t\t\tRTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV4_EXT,\n+\t\t\tRTE_PTYPE_L3_IPV4, 0);\n+\n+\t/* Convert RRC l4 type to mbuf format l4type_flags shift-left 8 bits\n+\t * to fill into8 bits length.\n+\t */\n+\tconst __m128i l4type_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0,\n+\t\t\tRTE_PTYPE_TUNNEL_GENEVE >> 8,\n+\t\t\tRTE_PTYPE_TUNNEL_NVGRE >> 8,\n+\t\t\tRTE_PTYPE_TUNNEL_VXLAN >> 8,\n+\t\t\tRTE_PTYPE_TUNNEL_GRE >> 8,\n+\t\t\tRTE_PTYPE_L4_UDP >> 8,\n+\t\t\tRTE_PTYPE_L4_TCP >> 8,\n+\t\t\t0);\n+\n+\tl3l4type0 = _mm_unpacklo_epi16(descs[0], descs[1]);\n+\tl3l4type1 = _mm_unpacklo_epi16(descs[2], descs[3]);\n+\tl3l4type0 = _mm_unpacklo_epi32(l3l4type0, l3l4type1);\n+\n+\tl3type = _mm_and_si128(l3l4type0, l3type_msk);\n+\tl4type = _mm_and_si128(l3l4type0, l4type_msk);\n+\n+\tl3type = _mm_srli_epi16(l3type, L3TYPE_SHIFT);\n+\tl4type = _mm_srli_epi16(l4type, L4TYPE_SHIFT);\n+\n+\tl3type = _mm_shuffle_epi8(l3type_flags, l3type);\n+\t/* l4type_flags shift-left for 8 bits, need shift-right back */\n+\tl4type = _mm_shuffle_epi8(l4type_flags, l4type);\n+\n+\tl4type = _mm_slli_epi16(l4type, 8);\n+\tl3l4type0 = _mm_or_si128(l3type, l4type);\n+\tvol.dword = _mm_cvtsi128_si64(l3l4type0);\n+\n+\trx_pkts[0]->packet_type = vol.e[0];\n+\trx_pkts[1]->packet_type = vol.e[1];\n+\trx_pkts[2]->packet_type = vol.e[2];\n+\trx_pkts[3]->packet_type = vol.e[3];\n+}\n+#else\n+#define fm10k_desc_to_olflags_v(desc, rx_pkts) do {} while (0)\n+#define fm10k_desc_to_pktype_v(desc, rx_pkts) do {} while (0)\n+#endif\n+\n int __attribute__((cold))\n fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq)\n {\n@@ -64,3 +191,302 @@ fm10k_rxq_vec_setup(struct fm10k_rx_queue *rxq)\n \trxq->mbuf_initializer = *(uint64_t *)p;\n \treturn 0;\n }\n+\n+static inline void\n+fm10k_rxq_rearm(struct fm10k_rx_queue *rxq)\n+{\n+\tint i;\n+\tuint16_t rx_id;\n+\tvolatile union fm10k_rx_desc *rxdp;\n+\tstruct rte_mbuf **mb_alloc = &rxq->sw_ring[rxq->rxrearm_start];\n+\tstruct rte_mbuf *mb0, *mb1;\n+\t__m128i head_off = _mm_set_epi64x(\n+\t\t\tRTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1,\n+\t\t\tRTE_PKTMBUF_HEADROOM + FM10K_RX_DATABUF_ALIGN - 1);\n+\t__m128i dma_addr0, dma_addr1;\n+\t/* Rx buffer need to be aligned with 512 byte */\n+\tconst __m128i hba_msk = _mm_set_epi64x(0,\n+\t\t\t\tUINT64_MAX - FM10K_RX_DATABUF_ALIGN + 1);\n+\n+\trxdp = rxq->hw_ring + rxq->rxrearm_start;\n+\n+\t/* Pull 'n' more MBUFs into the software ring */\n+\tif (rte_mempool_get_bulk(rxq->mp,\n+\t\t\t\t (void *)mb_alloc,\n+\t\t\t\t RTE_FM10K_RXQ_REARM_THRESH) < 0) {\n+\t\tdma_addr0 = _mm_setzero_si128();\n+\t\t/* Clean up all the HW/SW ring content */\n+\t\tfor (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i++) {\n+\t\t\tmb_alloc[i] = &rxq->fake_mbuf;\n+\t\t\t_mm_store_si128((__m128i *)&rxdp[i].q,\n+\t\t\t\t\t\tdma_addr0);\n+\t\t}\n+\n+\t\trte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=\n+\t\t\tRTE_FM10K_RXQ_REARM_THRESH;\n+\t\treturn;\n+\t}\n+\n+\t/* Initialize the mbufs in vector, process 2 mbufs in one loop */\n+\tfor (i = 0; i < RTE_FM10K_RXQ_REARM_THRESH; i += 2, mb_alloc += 2) {\n+\t\t__m128i vaddr0, vaddr1;\n+\t\tuintptr_t p0, p1;\n+\n+\t\tmb0 = mb_alloc[0];\n+\t\tmb1 = mb_alloc[1];\n+\n+\t\t/* Flush mbuf with pkt template.\n+\t\t * Data to be rearmed is 6 bytes long.\n+\t\t * Though, RX will overwrite ol_flags that are coming next\n+\t\t * anyway. So overwrite whole 8 bytes with one load:\n+\t\t * 6 bytes of rearm_data plus first 2 bytes of ol_flags.\n+\t\t */\n+\t\tp0 = (uintptr_t)&mb0->rearm_data;\n+\t\t*(uint64_t *)p0 = rxq->mbuf_initializer;\n+\t\tp1 = (uintptr_t)&mb1->rearm_data;\n+\t\t*(uint64_t *)p1 = rxq->mbuf_initializer;\n+\n+\t\t/* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */\n+\t\tvaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr);\n+\t\tvaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr);\n+\n+\t\t/* convert pa to dma_addr hdr/data */\n+\t\tdma_addr0 = _mm_unpackhi_epi64(vaddr0, vaddr0);\n+\t\tdma_addr1 = _mm_unpackhi_epi64(vaddr1, vaddr1);\n+\n+\t\t/* add headroom to pa values */\n+\t\tdma_addr0 = _mm_add_epi64(dma_addr0, head_off);\n+\t\tdma_addr1 = _mm_add_epi64(dma_addr1, head_off);\n+\n+\t\t/* Do 512 byte alignment to satisfy HW requirement, in the\n+\t\t * meanwhile, set Header Buffer Address to zero.\n+\t\t */\n+\t\tdma_addr0 = _mm_and_si128(dma_addr0, hba_msk);\n+\t\tdma_addr1 = _mm_and_si128(dma_addr1, hba_msk);\n+\n+\t\t/* flush desc with pa dma_addr */\n+\t\t_mm_store_si128((__m128i *)&rxdp++->q, dma_addr0);\n+\t\t_mm_store_si128((__m128i *)&rxdp++->q, dma_addr1);\n+\n+\t\t/* enforce 512B alignment on default Rx virtual addresses */\n+\t\tmb0->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb0->buf_addr\n+\t\t\t\t+ RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN)\n+\t\t\t\t- (char *)mb0->buf_addr);\n+\t\tmb1->data_off = (uint16_t)(RTE_PTR_ALIGN((char *)mb1->buf_addr\n+\t\t\t\t+ RTE_PKTMBUF_HEADROOM, FM10K_RX_DATABUF_ALIGN)\n+\t\t\t\t- (char *)mb1->buf_addr);\n+\t}\n+\n+\trxq->rxrearm_start += RTE_FM10K_RXQ_REARM_THRESH;\n+\tif (rxq->rxrearm_start >= rxq->nb_desc)\n+\t\trxq->rxrearm_start = 0;\n+\n+\trxq->rxrearm_nb -= RTE_FM10K_RXQ_REARM_THRESH;\n+\n+\trx_id = (uint16_t)((rxq->rxrearm_start == 0) ?\n+\t\t\t(rxq->nb_desc - 1) : (rxq->rxrearm_start - 1));\n+\n+\t/* Update the tail pointer on the NIC */\n+\tFM10K_PCI_REG_WRITE(rxq->tail_ptr, rx_id);\n+}\n+\n+static inline uint16_t\n+fm10k_recv_raw_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\tuint16_t nb_pkts, uint8_t *split_packet)\n+{\n+\tvolatile union fm10k_rx_desc *rxdp;\n+\tstruct rte_mbuf **mbufp;\n+\tuint16_t nb_pkts_recd;\n+\tint pos;\n+\tstruct fm10k_rx_queue *rxq = rx_queue;\n+\tuint64_t var;\n+\t__m128i shuf_msk;\n+\t__m128i dd_check, eop_check;\n+\tuint16_t next_dd;\n+\n+\tnext_dd = rxq->next_dd;\n+\n+\t/* Just the act of getting into the function from the application is\n+\t * going to cost about 7 cycles\n+\t */\n+\trxdp = rxq->hw_ring + next_dd;\n+\n+\t_mm_prefetch((const void *)rxdp, _MM_HINT_T0);\n+\n+\t/* See if we need to rearm the RX queue - gives the prefetch a bit\n+\t * of time to act\n+\t */\n+\tif (rxq->rxrearm_nb > RTE_FM10K_RXQ_REARM_THRESH)\n+\t\tfm10k_rxq_rearm(rxq);\n+\n+\t/* Before we start moving massive data around, check to see if\n+\t * there is actually a packet available\n+\t */\n+\tif (!(rxdp->d.staterr & FM10K_RXD_STATUS_DD))\n+\t\treturn 0;\n+\n+\t/* Vecotr RX will process 4 packets at a time, strip the unaligned\n+\t * tails in case it's not multiple of 4.\n+\t */\n+\tnb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_FM10K_DESCS_PER_LOOP);\n+\n+\t/* 4 packets DD mask */\n+\tdd_check = _mm_set_epi64x(0x0000000100000001LL, 0x0000000100000001LL);\n+\n+\t/* 4 packets EOP mask */\n+\teop_check = _mm_set_epi64x(0x0000000200000002LL, 0x0000000200000002LL);\n+\n+\t/* mask to shuffle from desc. to mbuf */\n+\tshuf_msk = _mm_set_epi8(\n+\t\t7, 6, 5, 4,  /* octet 4~7, 32bits rss */\n+\t\t15, 14,      /* octet 14~15, low 16 bits vlan_macip */\n+\t\t13, 12,      /* octet 12~13, 16 bits data_len */\n+\t\t0xFF, 0xFF,  /* skip high 16 bits pkt_len, zero out */\n+\t\t13, 12,      /* octet 12~13, low 16 bits pkt_len */\n+\t\t0xFF, 0xFF,  /* skip high 16 bits pkt_type */\n+\t\t0xFF, 0xFF   /* Skip pkt_type field in shuffle operation */\n+\t\t);\n+\n+\t/* Cache is empty -> need to scan the buffer rings, but first move\n+\t * the next 'n' mbufs into the cache\n+\t */\n+\tmbufp = &rxq->sw_ring[next_dd];\n+\n+\t/* A. load 4 packet in one loop\n+\t * [A*. mask out 4 unused dirty field in desc]\n+\t * B. copy 4 mbuf point from swring to rx_pkts\n+\t * C. calc the number of DD bits among the 4 packets\n+\t * [C*. extract the end-of-packet bit, if requested]\n+\t * D. fill info. from desc to mbuf\n+\t */\n+\tfor (pos = 0, nb_pkts_recd = 0; pos < nb_pkts;\n+\t\t\tpos += RTE_FM10K_DESCS_PER_LOOP,\n+\t\t\trxdp += RTE_FM10K_DESCS_PER_LOOP) {\n+\t\t__m128i descs0[RTE_FM10K_DESCS_PER_LOOP];\n+\t\t__m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;\n+\t\t__m128i zero, staterr, sterr_tmp1, sterr_tmp2;\n+\t\t__m128i mbp1, mbp2; /* two mbuf pointer in one XMM reg. */\n+\n+\t\t/* B.1 load 1 mbuf point */\n+\t\tmbp1 = _mm_loadu_si128((__m128i *)&mbufp[pos]);\n+\n+\t\t/* Read desc statuses backwards to avoid race condition */\n+\t\t/* A.1 load 4 pkts desc */\n+\t\tdescs0[3] = _mm_loadu_si128((__m128i *)(rxdp + 3));\n+\n+\t\t/* B.2 copy 2 mbuf point into rx_pkts  */\n+\t\t_mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1);\n+\n+\t\t/* B.1 load 1 mbuf point */\n+\t\tmbp2 = _mm_loadu_si128((__m128i *)&mbufp[pos+2]);\n+\n+\t\tdescs0[2] = _mm_loadu_si128((__m128i *)(rxdp + 2));\n+\t\t/* B.1 load 2 mbuf point */\n+\t\tdescs0[1] = _mm_loadu_si128((__m128i *)(rxdp + 1));\n+\t\tdescs0[0] = _mm_loadu_si128((__m128i *)(rxdp));\n+\n+\t\t/* B.2 copy 2 mbuf point into rx_pkts  */\n+\t\t_mm_storeu_si128((__m128i *)&rx_pkts[pos+2], mbp2);\n+\n+\t\t/* avoid compiler reorder optimization */\n+\t\trte_compiler_barrier();\n+\n+\t\tif (split_packet) {\n+\t\t\trte_prefetch0(&rx_pkts[pos]->cacheline1);\n+\t\t\trte_prefetch0(&rx_pkts[pos + 1]->cacheline1);\n+\t\t\trte_prefetch0(&rx_pkts[pos + 2]->cacheline1);\n+\t\t\trte_prefetch0(&rx_pkts[pos + 3]->cacheline1);\n+\t\t}\n+\n+\t\t/* D.1 pkt 3,4 convert format from desc to pktmbuf */\n+\t\tpkt_mb4 = _mm_shuffle_epi8(descs0[3], shuf_msk);\n+\t\tpkt_mb3 = _mm_shuffle_epi8(descs0[2], shuf_msk);\n+\n+\t\t/* C.1 4=>2 filter staterr info only */\n+\t\tsterr_tmp2 = _mm_unpackhi_epi32(descs0[3], descs0[2]);\n+\t\t/* C.1 4=>2 filter staterr info only */\n+\t\tsterr_tmp1 = _mm_unpackhi_epi32(descs0[1], descs0[0]);\n+\n+\t\t/* set ol_flags with vlan packet type */\n+\t\tfm10k_desc_to_olflags_v(descs0, &rx_pkts[pos]);\n+\n+\t\t/* D.1 pkt 1,2 convert format from desc to pktmbuf */\n+\t\tpkt_mb2 = _mm_shuffle_epi8(descs0[1], shuf_msk);\n+\t\tpkt_mb1 = _mm_shuffle_epi8(descs0[0], shuf_msk);\n+\n+\t\t/* C.2 get 4 pkts staterr value  */\n+\t\tzero = _mm_xor_si128(dd_check, dd_check);\n+\t\tstaterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2);\n+\n+\t\t/* D.3 copy final 3,4 data to rx_pkts */\n+\t\t_mm_storeu_si128((void *)&rx_pkts[pos+3]->rx_descriptor_fields1,\n+\t\t\t\tpkt_mb4);\n+\t\t_mm_storeu_si128((void *)&rx_pkts[pos+2]->rx_descriptor_fields1,\n+\t\t\t\tpkt_mb3);\n+\n+\t\t/* C* extract and record EOP bit */\n+\t\tif (split_packet) {\n+\t\t\t__m128i eop_shuf_mask = _mm_set_epi8(\n+\t\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t\t0xFF, 0xFF, 0xFF, 0xFF,\n+\t\t\t\t\t0x04, 0x0C, 0x00, 0x08\n+\t\t\t\t\t);\n+\n+\t\t\t/* and with mask to extract bits, flipping 1-0 */\n+\t\t\t__m128i eop_bits = _mm_andnot_si128(staterr, eop_check);\n+\t\t\t/* the staterr values are not in order, as the count\n+\t\t\t * count of dd bits doesn't care. However, for end of\n+\t\t\t * packet tracking, we do care, so shuffle. This also\n+\t\t\t * compresses the 32-bit values to 8-bit\n+\t\t\t */\n+\t\t\teop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask);\n+\t\t\t/* store the resulting 32-bit value */\n+\t\t\t*(int *)split_packet = _mm_cvtsi128_si32(eop_bits);\n+\t\t\tsplit_packet += RTE_FM10K_DESCS_PER_LOOP;\n+\n+\t\t\t/* zero-out next pointers */\n+\t\t\trx_pkts[pos]->next = NULL;\n+\t\t\trx_pkts[pos + 1]->next = NULL;\n+\t\t\trx_pkts[pos + 2]->next = NULL;\n+\t\t\trx_pkts[pos + 3]->next = NULL;\n+\t\t}\n+\n+\t\t/* C.3 calc available number of desc */\n+\t\tstaterr = _mm_and_si128(staterr, dd_check);\n+\t\tstaterr = _mm_packs_epi32(staterr, zero);\n+\n+\t\t/* D.3 copy final 1,2 data to rx_pkts */\n+\t\t_mm_storeu_si128((void *)&rx_pkts[pos+1]->rx_descriptor_fields1,\n+\t\t\t\tpkt_mb2);\n+\t\t_mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1,\n+\t\t\t\tpkt_mb1);\n+\n+\t\tfm10k_desc_to_pktype_v(descs0, &rx_pkts[pos]);\n+\n+\t\t/* C.4 calc avaialbe number of desc */\n+\t\tvar = __builtin_popcountll(_mm_cvtsi128_si64(staterr));\n+\t\tnb_pkts_recd += var;\n+\t\tif (likely(var != RTE_FM10K_DESCS_PER_LOOP))\n+\t\t\tbreak;\n+\t}\n+\n+\t/* Update our internal tail pointer */\n+\trxq->next_dd = (uint16_t)(rxq->next_dd + nb_pkts_recd);\n+\trxq->next_dd = (uint16_t)(rxq->next_dd & (rxq->nb_desc - 1));\n+\trxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd);\n+\n+\treturn nb_pkts_recd;\n+}\n+\n+/* vPMD receive routine\n+ *\n+ * Notice:\n+ * - don't support ol_flags for rss and csum err\n+ */\n+uint16_t\n+fm10k_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\tuint16_t nb_pkts)\n+{\n+\treturn fm10k_recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL);\n+}\n",
    "prefixes": [
        "dpdk-dev",
        "v5",
        "04/14"
    ]
}