get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/128645/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 128645,
    "url": "http://patches.dpdk.org/api/patches/128645/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20230614014948.3495063-4-zhichaox.zeng@intel.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20230614014948.3495063-4-zhichaox.zeng@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20230614014948.3495063-4-zhichaox.zeng@intel.com",
    "date": "2023-06-14T01:49:48",
    "name": "[v5,3/3] net/iavf: support Rx timestamp offload on SSE",
    "commit_ref": null,
    "pull_url": null,
    "state": "accepted",
    "archived": true,
    "hash": "be2f1a920e7162dfdf2f6fe483db10359719d4f4",
    "submitter": {
        "id": 2644,
        "url": "http://patches.dpdk.org/api/people/2644/?format=api",
        "name": "Zhichao Zeng",
        "email": "zhichaox.zeng@intel.com"
    },
    "delegate": {
        "id": 1540,
        "url": "http://patches.dpdk.org/api/users/1540/?format=api",
        "username": "qzhan15",
        "first_name": "Qi",
        "last_name": "Zhang",
        "email": "qi.z.zhang@intel.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20230614014948.3495063-4-zhichaox.zeng@intel.com/mbox/",
    "series": [
        {
            "id": 28496,
            "url": "http://patches.dpdk.org/api/series/28496/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=28496",
            "date": "2023-06-14T01:49:45",
            "name": "Enable iavf Rx Timestamp offload on vector path",
            "version": 5,
            "mbox": "http://patches.dpdk.org/series/28496/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/128645/comments/",
    "check": "warning",
    "checks": "http://patches.dpdk.org/api/patches/128645/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 9A8FC42CB0;\n\tWed, 14 Jun 2023 03:44:01 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id E58EA42D20;\n\tWed, 14 Jun 2023 03:43:48 +0200 (CEST)",
            "from mga06.intel.com (mga06b.intel.com [134.134.136.31])\n by mails.dpdk.org (Postfix) with ESMTP id B86C942BFE\n for <dev@dpdk.org>; Wed, 14 Jun 2023 03:43:47 +0200 (CEST)",
            "from orsmga008.jf.intel.com ([10.7.209.65])\n by orsmga104.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 13 Jun 2023 18:43:47 -0700",
            "from unknown (HELO zhichao-dpdk..) ([10.239.252.103])\n by orsmga008-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 13 Jun 2023 18:43:44 -0700"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/simple;\n d=intel.com; i=@intel.com; q=dns/txt; s=Intel;\n t=1686707027; x=1718243027;\n h=from:to:cc:subject:date:message-id:in-reply-to:\n references:mime-version:content-transfer-encoding;\n bh=daRBQ+j+AFXhWFVWEi6bIPVN7YgaBoHEvRT8cZwJ0Ug=;\n b=c1MADWUtRN232eC0ePqMemGi97F0rdX/OXxjLyI9028Rd3uzQ+78dj0c\n v6wNoDdqyqH62f/eHo+lsKRU6OdGi01Qns1OiiAlbt+ADlhyy1PQOd0iW\n jdTZjvE4tIq/iqSHgvjcuiIr8MDegMmPYF/hFmMdxtjmeVk4ZYn0B2k2w\n /N30kuK3tDNX10KP1n32300r2CXAeVvYSn1oLwTu3NOpa35kGt1YIT8mU\n 1/1jmY4EAGEFdoUVwuv/UcmVw6bPm0OG6zWOPqaiTwBEbw7NfRKz77lhh\n Zl+RggmCOYDxkvaCZp0fGT1nJZjn5gVRKKJg38k87+yInlnMh618XcmBS Q==;",
        "X-IronPort-AV": [
            "E=McAfee;i=\"6600,9927,10740\"; a=\"422095177\"",
            "E=Sophos;i=\"6.00,241,1681196400\"; d=\"scan'208\";a=\"422095177\"",
            "E=McAfee;i=\"6600,9927,10740\"; a=\"741655352\"",
            "E=Sophos;i=\"6.00,241,1681196400\"; d=\"scan'208\";a=\"741655352\""
        ],
        "X-ExtLoop1": "1",
        "From": "Zhichao Zeng <zhichaox.zeng@intel.com>",
        "To": "dev@dpdk.org",
        "Cc": "qi.z.zhang@intel.com, yaqi.tang@intel.com,\n Zhichao Zeng <zhichaox.zeng@intel.com>,\n Bruce Richardson <bruce.richardson@intel.com>,\n Konstantin Ananyev <konstantin.v.ananyev@yandex.ru>,\n Jingjing Wu <jingjing.wu@intel.com>, Beilei Xing <beilei.xing@intel.com>",
        "Subject": "[PATCH v5 3/3] net/iavf: support Rx timestamp offload on SSE",
        "Date": "Wed, 14 Jun 2023 09:49:48 +0800",
        "Message-Id": "<20230614014948.3495063-4-zhichaox.zeng@intel.com>",
        "X-Mailer": "git-send-email 2.34.1",
        "In-Reply-To": "<20230614014948.3495063-1-zhichaox.zeng@intel.com>",
        "References": "<20230526095055.2855121-1-zhichaox.zeng@intel.com>\n <20230614014948.3495063-1-zhichaox.zeng@intel.com>",
        "MIME-Version": "1.0",
        "Content-Transfer-Encoding": "8bit",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "This patch enables Rx timestamp offload on the SSE data path.\n\nIt significantly reduces the performance drop when\nRTE_ETH_RX_OFFLOAD_TIMESTAMP is enabled.\n\n---\nv5: fix CI errors\n---\nv4: rework avx2 patch based on offload path\n---\nv3: logging with driver dedicated macro\n---\nv2: fix compile warning and timestamp error\n\nSigned-off-by: Zhichao Zeng <zhichaox.zeng@intel.com>\n---\n drivers/net/iavf/iavf_rxtx_vec_sse.c | 160 ++++++++++++++++++++++++++-\n 1 file changed, 157 insertions(+), 3 deletions(-)",
    "diff": "diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c\nindex 3f30be01aa..892bfa4cf3 100644\n--- a/drivers/net/iavf/iavf_rxtx_vec_sse.c\n+++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c\n@@ -392,6 +392,11 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4],\n \t\t\t_mm_extract_epi32(fdir_id0_3, 3);\n \t} /* if() on fdir_enabled */\n \n+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n+\tif (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)\n+\t\tflags = _mm_or_si128(flags, _mm_set1_epi32(iavf_timestamp_dynflag));\n+#endif\n+\n \t/**\n \t * At this point, we have the 4 sets of flags in the low 16-bits\n \t * of each 32-bit value in flags.\n@@ -723,7 +728,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,\n \tint pos;\n \tuint64_t var;\n \tstruct iavf_adapter *adapter = rxq->vsi->adapter;\n+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n \tuint64_t offloads = adapter->dev_data->dev_conf.rxmode.offloads;\n+#endif\n \tconst uint32_t *ptype_tbl = adapter->ptype_tbl;\n \t__m128i crc_adjust = _mm_set_epi16\n \t\t\t\t(0, 0, 0,       /* ignore non-length fields */\n@@ -793,6 +800,24 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,\n \t      rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)))\n \t\treturn 0;\n \n+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n+\tuint8_t inflection_point = 0;\n+\tbool is_tsinit = false;\n+\t__m128i hw_low_last = _mm_set_epi32(0, 0, 0, (uint32_t)rxq->phc_time);\n+\n+\tif (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {\n+\t\tuint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);\n+\n+\t\tif (unlikely(sw_cur_time - rxq->hw_time_update > 4)) {\n+\t\t\thw_low_last = _mm_setzero_si128();\n+\t\t\tis_tsinit = 1;\n+\t\t} else {\n+\t\t\thw_low_last = _mm_set_epi32(0, 0, 0, (uint32_t)rxq->phc_time);\n+\t\t}\n+\t}\n+\n+#endif\n+\n \t/**\n \t * Compile-time verify the shuffle mask\n \t * NOTE: some field positions already verified above, but duplicated\n@@ -825,7 +850,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,\n \t     rxdp += IAVF_VPMD_DESCS_PER_LOOP) {\n \t\t__m128i descs[IAVF_VPMD_DESCS_PER_LOOP];\n #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n-\t\t__m128i descs_bh[IAVF_VPMD_DESCS_PER_LOOP];\n+\t\t__m128i descs_bh[IAVF_VPMD_DESCS_PER_LOOP] = {_mm_setzero_si128()};\n #endif\n \t\t__m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3;\n \t\t__m128i staterr, sterr_tmp1, sterr_tmp2;\n@@ -895,10 +920,11 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,\n \n #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n \t\t/**\n-\t\t * needs to load 2nd 16B of each desc for RSS hash parsing,\n+\t\t * needs to load 2nd 16B of each desc,\n \t\t * will cause performance drop to get into this context.\n \t\t */\n \t\tif (offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH ||\n+\t\t\toffloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP ||\n \t\t\trxq->rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2) {\n \t\t\t/* load bottom half of every 32B desc */\n \t\t\tdescs_bh[3] = _mm_load_si128\n@@ -964,7 +990,94 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,\n \t\t\tpkt_mb2 = _mm_or_si128(pkt_mb2, vlan_tci2);\n \t\t\tpkt_mb1 = _mm_or_si128(pkt_mb1, vlan_tci1);\n \t\t\tpkt_mb0 = _mm_or_si128(pkt_mb0, vlan_tci0);\n-\t\t}\n+\t\t} /* if() on Vlan parsing */\n+\n+\t\tif (offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {\n+\t\t\tuint32_t mask = 0xFFFFFFFF;\n+\t\t\t__m128i ts;\n+\t\t\t__m128i ts_low = _mm_setzero_si128();\n+\t\t\t__m128i ts_low1;\n+\t\t\t__m128i max_ret;\n+\t\t\t__m128i cmp_ret;\n+\t\t\tuint8_t ret = 0;\n+\t\t\tuint8_t shift = 4;\n+\t\t\t__m128i ts_desp_mask = _mm_set_epi32(mask, 0, 0, 0);\n+\t\t\t__m128i cmp_mask = _mm_set1_epi32(mask);\n+\n+\t\t\tts = _mm_and_si128(descs_bh[0], ts_desp_mask);\n+\t\t\tts_low = _mm_or_si128(ts_low, _mm_srli_si128(ts, 3 * 4));\n+\t\t\tts = _mm_and_si128(descs_bh[1], ts_desp_mask);\n+\t\t\tts_low = _mm_or_si128(ts_low, _mm_srli_si128(ts, 2 * 4));\n+\t\t\tts = _mm_and_si128(descs_bh[2], ts_desp_mask);\n+\t\t\tts_low = _mm_or_si128(ts_low, _mm_srli_si128(ts, 1 * 4));\n+\t\t\tts = _mm_and_si128(descs_bh[3], ts_desp_mask);\n+\t\t\tts_low = _mm_or_si128(ts_low, ts);\n+\n+\t\t\tts_low1 = _mm_slli_si128(ts_low, 4);\n+\t\t\tts_low1 = _mm_and_si128(ts_low, _mm_set_epi32(mask, mask, mask, 0));\n+\t\t\tts_low1 = _mm_or_si128(ts_low1, hw_low_last);\n+\t\t\thw_low_last = _mm_and_si128(ts_low, _mm_set_epi32(0, 0, 0, mask));\n+\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 0],\n+\t\t\t\tiavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 0);\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 1],\n+\t\t\t\tiavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 1);\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 2],\n+\t\t\t\tiavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 2);\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 3],\n+\t\t\t\tiavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 3);\n+\n+\t\t\tif (unlikely(is_tsinit)) {\n+\t\t\t\tuint32_t in_timestamp;\n+\n+\t\t\t\tif (iavf_get_phc_time(rxq))\n+\t\t\t\t\tPMD_DRV_LOG(ERR, \"get physical time failed\");\n+\t\t\t\tin_timestamp = *RTE_MBUF_DYNFIELD(rx_pkts[pos + 0],\n+\t\t\t\t\t\t\tiavf_timestamp_dynfield_offset, uint32_t *);\n+\t\t\t\trxq->phc_time = iavf_tstamp_convert_32b_64b(rxq->phc_time, in_timestamp);\n+\t\t\t}\n+\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 0],\n+\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32);\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 1],\n+\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32);\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 2],\n+\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32);\n+\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 3],\n+\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32);\n+\n+\t\t\tmax_ret = _mm_max_epu32(ts_low, ts_low1);\n+\t\t\tcmp_ret = _mm_andnot_si128(_mm_cmpeq_epi32(max_ret, ts_low), cmp_mask);\n+\n+\t\t\tif (_mm_testz_si128(cmp_ret, cmp_mask)) {\n+\t\t\t\tinflection_point = 0;\n+\t\t\t} else {\n+\t\t\t\tinflection_point = 1;\n+\t\t\t\twhile (shift > 1) {\n+\t\t\t\t\tshift = shift >> 1;\n+\t\t\t\t\t__m128i mask_low = _mm_setzero_si128();\n+\t\t\t\t\t__m128i mask_high = _mm_setzero_si128();\n+\t\t\t\t\tswitch (shift) {\n+\t\t\t\t\tcase 2:\n+\t\t\t\t\t\tmask_low = _mm_set_epi32(0, 0, mask, mask);\n+\t\t\t\t\t\tmask_high = _mm_set_epi32(mask, mask, 0, 0);\n+\t\t\t\t\t\tbreak;\n+\t\t\t\t\tcase 1:\n+\t\t\t\t\t\tmask_low = _mm_srli_si128(cmp_mask, 4);\n+\t\t\t\t\t\tmask_high = _mm_slli_si128(cmp_mask, 4);\n+\t\t\t\t\t\tbreak;\n+\t\t\t\t\t}\n+\t\t\t\t\tret = _mm_testz_si128(cmp_ret, mask_low);\n+\t\t\t\t\tif (ret) {\n+\t\t\t\t\t\tret = _mm_testz_si128(cmp_ret, mask_high);\n+\t\t\t\t\t\tinflection_point += ret ? 0 : shift;\n+\t\t\t\t\t\tcmp_mask = mask_high;\n+\t\t\t\t\t} else {\n+\t\t\t\t\t\tcmp_mask = mask_low;\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\t\t} /* if() on Timestamp parsing */\n \n \t\tflex_desc_to_olflags_v(rxq, descs, descs_bh, &rx_pkts[pos]);\n #else\n@@ -1011,10 +1124,51 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq,\n \t\t/* C.4 calc available number of desc */\n \t\tvar = __builtin_popcountll(_mm_cvtsi128_si64(staterr));\n \t\tnb_pkts_recd += var;\n+\n+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n+\t\tif (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) {\n+\t\t\tinflection_point = (inflection_point <= var) ? inflection_point : 0;\n+\t\t\tswitch (inflection_point) {\n+\t\t\tcase 1:\n+\t\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 0],\n+\t\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) += 1;\n+\t\t\t\t/* fallthrough */\n+\t\t\tcase 2:\n+\t\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 1],\n+\t\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) += 1;\n+\t\t\t\t/* fallthrough */\n+\t\t\tcase 3:\n+\t\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 2],\n+\t\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) += 1;\n+\t\t\t\t/* fallthrough */\n+\t\t\tcase 4:\n+\t\t\t\t*RTE_MBUF_DYNFIELD(rx_pkts[pos + 3],\n+\t\t\t\t\tiavf_timestamp_dynfield_offset + 4, uint32_t *) += 1;\n+\t\t\t\trxq->phc_time += (uint64_t)1 << 32;\n+\t\t\t\t/* fallthrough */\n+\t\t\tcase 0:\n+\t\t\t\tbreak;\n+\t\t\tdefault:\n+\t\t\t\tPMD_DRV_LOG(ERR, \"invalid inflection point for rx timestamp\");\n+\t\t\t\tbreak;\n+\t\t\t}\n+\n+\t\t\trxq->hw_time_update = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000);\n+\t\t}\n+#endif\n+\n \t\tif (likely(var != IAVF_VPMD_DESCS_PER_LOOP))\n \t\t\tbreak;\n \t}\n \n+#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC\n+#ifdef IAVF_RX_TS_OFFLOAD\n+\tif (nb_pkts_recd > 0 && (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP))\n+\t\trxq->phc_time = *RTE_MBUF_DYNFIELD(rx_pkts[nb_pkts_recd - 1],\n+\t\t\t\t\t\tiavf_timestamp_dynfield_offset, uint32_t *);\n+#endif\n+#endif\n+\n \t/* Update our internal tail pointer */\n \trxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd);\n \trxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));\n",
    "prefixes": [
        "v5",
        "3/3"
    ]
}