From patchwork Mon Apr 10 07:36:45 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Zhichao Zeng X-Patchwork-Id: 125873 X-Patchwork-Delegate: qi.z.zhang@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id E4D7A4290D; Mon, 10 Apr 2023 09:31:33 +0200 (CEST) Received: from mails.dpdk.org (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id D2E2E42B71; Mon, 10 Apr 2023 09:31:33 +0200 (CEST) Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by mails.dpdk.org (Postfix) with ESMTP id E479540A81 for ; Mon, 10 Apr 2023 09:31:31 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1681111892; x=1712647892; h=from:to:cc:subject:date:message-id:mime-version: content-transfer-encoding; bh=VmO7HDjup+Z3z9r+b0ma4lZvodowMcaIYSUDv2bAOhA=; b=SQuQzu2FQ9VsO6tpDwpbceH3gmHWUpIAFn7V9yCJ5cNWuDhGDzaMYpti w0NPOiH5MDsKgokdrJI1d0P03pirZ18w2MTrwdoubqNDiRG1TdVIR06Kg QbKtNDyfMZY0ommyiZNWGBnNTqHCcJIW+Y4encib5zOtCd+AGkzzhVRr7 R22+oszzeXQXQ1ZSSxEQM0Dfm9wy5CmrmshEzS6HPzWaNl4LTyNy9zQ/W iOrNJe2mcp12zjYBgSB1qErALWIC68+doQS2NRqiKiRlh14BacLH2jGTL RMJBvxymir5w+wpPJ+Moid63Pdo3rA0DX8imXC31ic+1E0MilBr/gj+P9 g==; X-IronPort-AV: E=McAfee;i="6600,9927,10675"; a="345966156" X-IronPort-AV: E=Sophos;i="5.98,333,1673942400"; d="scan'208";a="345966156" Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga103.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 10 Apr 2023 00:31:31 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6600,9927,10675"; a="812069926" X-IronPort-AV: E=Sophos;i="5.98,333,1673942400"; d="scan'208";a="812069926" Received: from unknown (HELO localhost.localdomain) ([10.239.252.103]) by orsmga004-auth.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 10 Apr 2023 00:31:28 -0700 From: Zhichao Zeng To: dev@dpdk.org Cc: qi.z.zhang@intel.com, yaqi.tang@intel.com, Zhichao Zeng , Bruce Richardson , Konstantin Ananyev , Jingjing Wu , Beilei Xing Subject: [PATCH 3/3] net/iavf: support Rx timestamp offload on SSE Date: Mon, 10 Apr 2023 15:36:45 +0800 Message-Id: <20230410073645.2493296-1-zhichaox.zeng@intel.com> X-Mailer: git-send-email 2.25.1 MIME-Version: 1.0 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org This patch enables Rx timestamp offload on SSE data path. Enable timestamp offload with the command '--enable-rx-timestamp', pay attention that getting Rx timestamp offload will drop the performance. Signed-off-by: Zhichao Zeng --- drivers/net/iavf/iavf_rxtx_vec_sse.c | 163 ++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 4 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c index 3f30be01aa..a627fb39a1 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c @@ -392,6 +392,11 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], _mm_extract_epi32(fdir_id0_3, 3); } /* if() on fdir_enabled */ +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC + if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) + flags = _mm_or_si128(flags, _mm_set1_epi32(iavf_timestamp_dynflag)); +#endif + /** * At this point, we have the 4 sets of flags in the low 16-bits * of each 32-bit value in flags. @@ -793,6 +798,24 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))) return 0; +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC + uint8_t inflection_point; + bool is_tsinit = false; + __m128i hw_low_last = _mm_set_epi32(0, 0, 0, (uint32_t)rxq->phc_time); + + if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) { + uint64_t sw_cur_time = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000); + + if (unlikely(sw_cur_time - rxq->hw_time_update > 4)) { + hw_low_last = _mm_setzero_si128(); + is_tsinit = 1; + } else { + hw_low_last = _mm_set_epi32(0, 0, 0, (uint32_t)rxq->phc_time); + } + } + +#endif + /** * Compile-time verify the shuffle mask * NOTE: some field positions already verified above, but duplicated @@ -895,11 +918,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, #ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC /** - * needs to load 2nd 16B of each desc for RSS hash parsing, + * needs to load 2nd 16B of each desc, * will cause performance drop to get into this context. */ - if (offloads & RTE_ETH_RX_OFFLOAD_RSS_HASH || - rxq->rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2) { + if (offloads & (RTE_ETH_RX_OFFLOAD_RSS_HASH | + RTE_ETH_RX_OFFLOAD_TIMESTAMP) || + rxq->rx_flags & IAVF_RX_FLAGS_VLAN_TAG_LOC_L2TAG2_2) { /* load bottom half of every 32B desc */ descs_bh[3] = _mm_load_si128 ((void *)(&rxdp[3].wb.status_error1)); @@ -964,7 +988,94 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, pkt_mb2 = _mm_or_si128(pkt_mb2, vlan_tci2); pkt_mb1 = _mm_or_si128(pkt_mb1, vlan_tci1); pkt_mb0 = _mm_or_si128(pkt_mb0, vlan_tci0); - } + } /* if() on Vlan parsing */ + + if (offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) { + uint32_t mask = 0xFFFFFFFF; + __m128i ts; + __m128i ts_low = _mm_setzero_si128(); + __m128i ts_low1; + __m128i max_ret; + __m128i cmp_ret; + uint8_t ret = 0; + uint8_t shift = 4; + __m128i ts_desp_mask = _mm_set_epi32(mask, 0, 0, 0); + __m128i cmp_mask = _mm_set1_epi32(mask); + + ts = _mm_and_si128(descs_bh[0], ts_desp_mask); + ts_low = _mm_or_si128(ts_low, _mm_srli_si128(ts, 3 * 4)); + ts = _mm_and_si128(descs_bh[1], ts_desp_mask); + ts_low = _mm_or_si128(ts_low, _mm_srli_si128(ts, 2 * 4)); + ts = _mm_and_si128(descs_bh[2], ts_desp_mask); + ts_low = _mm_or_si128(ts_low, _mm_srli_si128(ts, 1 * 4)); + ts = _mm_and_si128(descs_bh[3], ts_desp_mask); + ts_low = _mm_or_si128(ts_low, ts); + + ts_low1 = _mm_slli_si128(ts_low, 4); + ts_low1 = _mm_and_si128(ts_low, _mm_set_epi32(mask, mask, mask, 0)); + ts_low1 = _mm_or_si128(ts_low1, hw_low_last); + hw_low_last = _mm_and_si128(ts_low, _mm_set_epi32(0, 0, 0, mask)); + + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 0], + iavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 0); + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 1], + iavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 1); + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 2], + iavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 2); + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 3], + iavf_timestamp_dynfield_offset, uint32_t *) = _mm_extract_epi32(ts_low, 3); + + if (unlikely(is_tsinit)) { + uint32_t in_timestamp; + + if (iavf_get_phc_time(rxq)) + PMD_DRV_LOG(ERR, "get physical time failed"); + in_timestamp = *RTE_MBUF_DYNFIELD(rx_pkts[pos + 0], + iavf_timestamp_dynfield_offset, uint32_t *); + rxq->phc_time = iavf_tstamp_convert_32b_64b(rxq->phc_time, in_timestamp); + } + + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 0], + iavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32); + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 1], + iavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32); + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 2], + iavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32); + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 3], + iavf_timestamp_dynfield_offset + 4, uint32_t *) = (uint32_t)(rxq->phc_time >> 32); + + max_ret = _mm_max_epu32(ts_low, ts_low1); + cmp_ret = _mm_andnot_si128(_mm_cmpeq_epi32(max_ret, ts_low1), cmp_mask); + + if (_mm_testz_si128(cmp_ret, cmp_mask)) { + inflection_point = 0; + } else { + inflection_point = 1; + while (shift > 1) { + shift = shift >> 1; + __m128i mask_low; + __m128i mask_high; + switch (shift) { + case 2: + mask_low = _mm_set_epi32(0, 0, mask, mask); + mask_high = _mm_set_epi32(mask, mask, 0, 0); + break; + case 1: + mask_low = _mm_srli_si128(cmp_mask, 4); + mask_high = _mm_slli_si128(cmp_mask, 4); + break; + } + ret = _mm_testz_si128(cmp_ret, mask_low); + if (ret) { + ret = _mm_testz_si128(cmp_ret, mask_high); + inflection_point += ret ? 0 : shift; + cmp_mask = mask_high; + } else { + cmp_mask = mask_low; + } + } + } + } /* if() on Timestamp parsing */ flex_desc_to_olflags_v(rxq, descs, descs_bh, &rx_pkts[pos]); #else @@ -1011,10 +1122,54 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, /* C.4 calc available number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); nb_pkts_recd += var; + +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wimplicit-fallthrough" + if (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP) { + inflection_point = (inflection_point <= var) ? inflection_point : 0; + switch (inflection_point) { + case 1: + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 0], + iavf_timestamp_dynfield_offset + 4, uint32_t *) += 1; + break; + case 2: + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 1], + iavf_timestamp_dynfield_offset + 4, uint32_t *) += 1; + break; + case 3: + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 2], + iavf_timestamp_dynfield_offset + 4, uint32_t *) += 1; + break; + case 4: + *RTE_MBUF_DYNFIELD(rx_pkts[pos + 3], + iavf_timestamp_dynfield_offset + 4, uint32_t *) += 1; + rxq->phc_time += (uint64_t)1 << 32; + break; + case 0: + break; + default: + printf("invalid inflection point for rx timestamp\n"); + break; + } + + rxq->hw_time_update = rte_get_timer_cycles() / (rte_get_timer_hz() / 1000); + } +#pragma GCC diagnostic pop +#endif + if (likely(var != IAVF_VPMD_DESCS_PER_LOOP)) break; } +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC +#ifdef IAVF_RX_TS_OFFLOAD + if (nb_pkts_recd > 0 && (rxq->offloads & RTE_ETH_RX_OFFLOAD_TIMESTAMP)) + rxq->phc_time = *RTE_MBUF_DYNFIELD(rx_pkts[nb_pkts_recd - 1], + iavf_timestamp_dynfield_offset, uint32_t *); +#endif +#endif + /* Update our internal tail pointer */ rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1));