From patchwork Mon Mar 16 07:45:52 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66675 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 2C60EA0559; Mon, 16 Mar 2020 08:57:31 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 63B241C067; Mon, 16 Mar 2020 08:57:26 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id E110225D9 for ; Mon, 16 Mar 2020 08:57:23 +0100 (CET) IronPort-SDR: rPkNDQvrYD9UrYNzWWjID72AsaK3+DmZQb9pqN2ZEwZBYvac47uYIMMcF02sciOMr3nQ+qmkCs b/tneK2IXDnA== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:23 -0700 IronPort-SDR: IYDWQmRhufZ1XpsFuJTRRw4XaWjQe5Dpfav6v+NI8aBo4CnBvt1pjOFK6x/empSDhnKNUPxIQX VAUV5e6BvkMw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622485" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:22 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:52 +0800 Message-Id: <20200316074603.10998-2-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 01/12] net/iavf: remove 16B Rx descriptor compile option X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Remove CONFIG_RTE_LIBRTE_IAVF_16BYTE_RX_DESC as it's not supported in ice PF host driver. Signed-off-by: Leyi Rong --- config/common_base | 1 - drivers/net/iavf/iavf_rxtx.c | 2 - drivers/net/iavf/iavf_rxtx.h | 14 +-- drivers/net/iavf/iavf_rxtx_vec_avx2.c | 148 +++++++------------------- 4 files changed, 42 insertions(+), 123 deletions(-) diff --git a/config/common_base b/config/common_base index c31175f9d..eea53cb35 100644 --- a/config/common_base +++ b/config/common_base @@ -346,7 +346,6 @@ CONFIG_RTE_LIBRTE_IAVF_DEBUG_TX=n CONFIG_RTE_LIBRTE_IAVF_DEBUG_TX_FREE=n CONFIG_RTE_LIBRTE_IAVF_DEBUG_RX=n CONFIG_RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC=n -CONFIG_RTE_LIBRTE_IAVF_16BYTE_RX_DESC=n # # Compile burst-oriented IPN3KE PMD driver # diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index 9eccb7c41..fbb18a713 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -230,10 +230,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq) rxd = &rxq->rx_ring[i]; rxd->read.pkt_addr = dma_addr; rxd->read.hdr_addr = 0; -#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC rxd->read.rsvd1 = 0; rxd->read.rsvd2 = 0; -#endif rxq->sw_ring[i] = mbuf; } diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index 09b5bd99e..416433504 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -57,12 +57,8 @@ #define IAVF_TX_OFFLOAD_NOTSUP_MASK \ (PKT_TX_OFFLOAD_MASK ^ IAVF_TX_OFFLOAD_MASK) -/* HW desc structure, both 16-byte and 32-byte types are supported */ -#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC -#define iavf_rx_desc iavf_16byte_rx_desc -#else +/* HW desc structure, only 32-byte type is supported */ #define iavf_rx_desc iavf_32byte_rx_desc -#endif struct iavf_rxq_ops { void (*release_mbufs)(struct iavf_rx_queue *rxq); @@ -224,20 +220,12 @@ void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq, const volatile void *desc, uint16_t rx_id) { -#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC - const volatile union iavf_16byte_rx_desc *rx_desc = desc; - - printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64"\n", - rxq->queue_id, rx_id, rx_desc->read.pkt_addr, - rx_desc->read.hdr_addr); -#else const volatile union iavf_32byte_rx_desc *rx_desc = desc; printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64 " QW2: 0x%016"PRIx64" QW3: 0x%016"PRIx64"\n", rxq->queue_id, rx_id, rx_desc->read.pkt_addr, rx_desc->read.hdr_addr, rx_desc->read.rsvd1, rx_desc->read.rsvd2); -#endif } /* All the descriptors are 16 bytes, so just use one of them diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c index 2587083d8..4e1231162 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c @@ -40,7 +40,6 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq) return; } -#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC struct rte_mbuf *mb0, *mb1; __m128i dma_addr0, dma_addr1; __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, @@ -70,54 +69,6 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq) _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); } -#else - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - __m256i dma_addr0_1, dma_addr2_3; - __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM); - /* Initialize the mbufs in vector, process 4 mbufs in one loop */ - for (i = 0; i < IAVF_RXQ_REARM_THRESH; - i += 4, rxp += 4, rxdp += 4) { - __m128i vaddr0, vaddr1, vaddr2, vaddr3; - __m256i vaddr0_1, vaddr2_3; - - mb0 = rxp[0]; - mb1 = rxp[1]; - mb2 = rxp[2]; - mb3 = rxp[3]; - - /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ - RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) != - offsetof(struct rte_mbuf, buf_addr) + 8); - vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); - vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); - vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); - vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); - - /** - * merge 0 & 1, by casting 0 to 256-bit and inserting 1 - * into the high lanes. Similarly for 2 & 3 - */ - vaddr0_1 = - _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), - vaddr1, 1); - vaddr2_3 = - _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), - vaddr3, 1); - - /* convert pa to dma_addr hdr/data */ - dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1); - dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3); - - /* add headroom to pa values */ - dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room); - dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room); - - /* flush desc with pa dma_addr */ - _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1); - _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3); - } - -#endif rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH; if (rxq->rxrearm_start >= rxq->nb_rx_desc) @@ -149,7 +100,6 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, /* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */ struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail]; volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail; - const int avx_aligned = ((rxq->rx_tail & 1) == 0); rte_prefetch0(rxdp); @@ -292,8 +242,6 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | PKT_RX_EIP_CKSUM_BAD); - RTE_SET_USED(avx_aligned); /* for 32B descriptors we don't use this */ - uint16_t i, received; for (i = 0, received = 0; i < nb_pkts; @@ -309,61 +257,47 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, #endif __m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7; -#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC - /* for AVX we need alignment otherwise loads are not atomic */ - if (avx_aligned) { - /* load in descriptors, 2 at a time, in reverse order */ - raw_desc6_7 = _mm256_load_si256((void *)(rxdp + 6)); - rte_compiler_barrier(); - raw_desc4_5 = _mm256_load_si256((void *)(rxdp + 4)); - rte_compiler_barrier(); - raw_desc2_3 = _mm256_load_si256((void *)(rxdp + 2)); - rte_compiler_barrier(); - raw_desc0_1 = _mm256_load_si256((void *)(rxdp + 0)); - } else -#endif - { - const __m128i raw_desc7 = - _mm_load_si128((void *)(rxdp + 7)); - rte_compiler_barrier(); - const __m128i raw_desc6 = - _mm_load_si128((void *)(rxdp + 6)); - rte_compiler_barrier(); - const __m128i raw_desc5 = - _mm_load_si128((void *)(rxdp + 5)); - rte_compiler_barrier(); - const __m128i raw_desc4 = - _mm_load_si128((void *)(rxdp + 4)); - rte_compiler_barrier(); - const __m128i raw_desc3 = - _mm_load_si128((void *)(rxdp + 3)); - rte_compiler_barrier(); - const __m128i raw_desc2 = - _mm_load_si128((void *)(rxdp + 2)); - rte_compiler_barrier(); - const __m128i raw_desc1 = - _mm_load_si128((void *)(rxdp + 1)); - rte_compiler_barrier(); - const __m128i raw_desc0 = - _mm_load_si128((void *)(rxdp + 0)); - - raw_desc6_7 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc6), - raw_desc7, 1); - raw_desc4_5 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc4), - raw_desc5, 1); - raw_desc2_3 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc2), - raw_desc3, 1); - raw_desc0_1 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc0), - raw_desc1, 1); - } + + const __m128i raw_desc7 = + _mm_load_si128((void *)(rxdp + 7)); + rte_compiler_barrier(); + const __m128i raw_desc6 = + _mm_load_si128((void *)(rxdp + 6)); + rte_compiler_barrier(); + const __m128i raw_desc5 = + _mm_load_si128((void *)(rxdp + 5)); + rte_compiler_barrier(); + const __m128i raw_desc4 = + _mm_load_si128((void *)(rxdp + 4)); + rte_compiler_barrier(); + const __m128i raw_desc3 = + _mm_load_si128((void *)(rxdp + 3)); + rte_compiler_barrier(); + const __m128i raw_desc2 = + _mm_load_si128((void *)(rxdp + 2)); + rte_compiler_barrier(); + const __m128i raw_desc1 = + _mm_load_si128((void *)(rxdp + 1)); + rte_compiler_barrier(); + const __m128i raw_desc0 = + _mm_load_si128((void *)(rxdp + 0)); + + raw_desc6_7 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc6), + raw_desc7, 1); + raw_desc4_5 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc4), + raw_desc5, 1); + raw_desc2_3 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc2), + raw_desc3, 1); + raw_desc0_1 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc0), + raw_desc1, 1); if (split_packet) { int j; From patchwork Mon Mar 16 07:45:53 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66676 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id AFC46A0559; Mon, 16 Mar 2020 08:57:41 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id CDAC61C07B; Mon, 16 Mar 2020 08:57:28 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 79EB91C065 for ; Mon, 16 Mar 2020 08:57:25 +0100 (CET) IronPort-SDR: dsUFpWrNkHy/eUqDnTgMC07UwqEMjYBdzLBJOY/Nwtn2jQBz9DmTF13gYFMFWlhWawEDJjiMHF 3U3pJ3Get41A== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:25 -0700 IronPort-SDR: oxIAzb1vVaJWFO8wilkkfFHcV41MWwSvOoYIX6et2VdA6/rjdhMAmqRKfBF1XYXXRg/4ElwQ8p CQU70Tv40eyg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622491" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:23 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:53 +0800 Message-Id: <20200316074603.10998-3-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 02/12] net/iavf: return error if opcode is mismatched X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Adds error return when the opcode of read message is mismatched which is received from adminQ. Fixes: 22b123a36d07 ("net/avf: initialize PMD") Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf_vchnl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c index fa4da3a6d..b7fb05d32 100644 --- a/drivers/net/iavf/iavf_vchnl.c +++ b/drivers/net/iavf/iavf_vchnl.c @@ -52,9 +52,11 @@ iavf_read_msg_from_pf(struct iavf_adapter *adapter, uint16_t buf_len, PMD_DRV_LOG(DEBUG, "AQ from pf carries opcode %u, retval %d", opcode, vf->cmd_retval); - if (opcode != vf->pend_cmd) + if (opcode != vf->pend_cmd) { PMD_DRV_LOG(WARNING, "command mismatch, expect %u, get %u", vf->pend_cmd, opcode); + return IAVF_ERR_OPCODE_MISMATCH; + } return IAVF_SUCCESS; } From patchwork Mon Mar 16 07:45:54 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66677 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id A9EB5A0559; Mon, 16 Mar 2020 08:57:51 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 90AAC1C08D; Mon, 16 Mar 2020 08:57:30 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 1EEA01C06A for ; Mon, 16 Mar 2020 08:57:26 +0100 (CET) IronPort-SDR: QBvniYpGsOct322qb8lwN5VHiK4BUsYgsGh1gdoFUyxjFzkth2LL7yztqhTbWIhF0nuw4dp4w/ EGc9/j0+BVaA== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:26 -0700 IronPort-SDR: 54EX9LF2IsH/i2YkCVicTDx3C8C5Cn6EHNP/5LbnVgAimD0svmMKXRg3txYkGISSBI5DQ/QLMZ Sdxk87AZoC1A== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622495" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:25 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:54 +0800 Message-Id: <20200316074603.10998-4-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 03/12] net/iavf: support to query DDP package info X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Advanced iAVF supports to query DDP package info, includes package version, track id, package name, device serial number and the list of protocols that the PF supports. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf.h | 9 ++++++++ drivers/net/iavf/iavf_ethdev.c | 7 ++++++ drivers/net/iavf/iavf_vchnl.c | 41 +++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index 526040c6e..39c6eeec9 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -67,6 +67,13 @@ #define IAVF_48_BIT_WIDTH (CHAR_BIT * 6) #define IAVF_48_BIT_MASK RTE_LEN2MASK(IAVF_48_BIT_WIDTH, uint64_t) +/* VF supported comms protocols 64-bits bitmap */ +#define IAVF_COMMS_PROTO_GTP 0x0000000000000001 +#define IAVF_COMMS_PROTO_PPPOE 0x0000000000000002 +#define IAVF_COMMS_PROTO_PFCP 0x0000000000000004 +#define IAVF_COMMS_PROTO_L2TPV3 0x0000000000000008 +#define IAVF_COMMS_PROTO_ESP 0x0000000000000010 + struct iavf_adapter; struct iavf_rx_queue; struct iavf_tx_queue; @@ -97,6 +104,7 @@ struct iavf_info { struct virtchnl_version_info virtchnl_version; struct virtchnl_vf_resource *vf_res; /* VF resource */ struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */ + struct virtchnl_pkg_info pkg_info; /* package info */ volatile enum virtchnl_ops pend_cmd; /* pending command not finished */ uint32_t cmd_retval; /* return value of the cmd response from PF */ @@ -225,6 +233,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter); int iavf_configure_rss_lut(struct iavf_adapter *adapter); int iavf_configure_rss_key(struct iavf_adapter *adapter); int iavf_configure_queues(struct iavf_adapter *adapter); +int iavf_query_package_info(struct iavf_adapter *adapter); int iavf_config_irq_map(struct iavf_adapter *adapter); void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add); int iavf_dev_link_update(struct rte_eth_dev *dev, diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index ee9f82249..9c8f789da 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -1236,6 +1236,13 @@ iavf_init_vf(struct rte_eth_dev *dev) goto err_rss; } } + + if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_QUERY_DDP) { + if (iavf_query_package_info(adapter) != 0) { + PMD_INIT_LOG(ERR, "iavf_query_package_info failed"); + goto err_rss; + } + } return 0; err_rss: rte_free(vf->rss_key); diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c index b7fb05d32..288d34e8b 100644 --- a/drivers/net/iavf/iavf_vchnl.c +++ b/drivers/net/iavf/iavf_vchnl.c @@ -88,6 +88,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args) break; case VIRTCHNL_OP_VERSION: case VIRTCHNL_OP_GET_VF_RESOURCES: + case VIRTCHNL_OP_PACKAGE_INFO: /* for init virtchnl ops, need to poll the response */ do { ret = iavf_read_msg_from_pf(adapter, args->out_size, @@ -338,7 +339,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter) * add advanced/optional offload capabilities */ - caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED; + caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED | + VIRTCHNL_VF_OFFLOAD_QUERY_DDP; args.in_args = (uint8_t *)∩︀ args.in_args_size = sizeof(caps); @@ -586,6 +588,43 @@ iavf_configure_queues(struct iavf_adapter *adapter) return err; } +int +iavf_query_package_info(struct iavf_adapter *adapter) +{ + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); + struct iavf_cmd_info args; + int ret; + + args.ops = VIRTCHNL_OP_PACKAGE_INFO; + args.in_args = NULL; + args.in_args_size = 0; + args.out_buffer = vf->aq_resp; + args.out_size = IAVF_AQ_BUF_SZ; + + ret = iavf_execute_vf_cmd(adapter, &args); + if (ret) { + PMD_DRV_LOG(ERR, + "Failed to execute command of OP_PACKAGE_INFO"); + return ret; + } + + rte_memcpy(&vf->pkg_info, args.out_buffer, + sizeof(struct virtchnl_pkg_info)); + PMD_DRV_LOG(NOTICE, "pkg version is %d.%d.%d.%d, pkg name is %s," + " track id is %x, serial number is %02x%02x%02x%02x" + "%02x%02x%02x%02x, proto_metadata is 0x%016lx\n", + vf->pkg_info.p_ver.major, vf->pkg_info.p_ver.minor, + vf->pkg_info.p_ver.update, vf->pkg_info.p_ver.draft, + vf->pkg_info.pkg_name, vf->pkg_info.track_id, + vf->pkg_info.dsn[7], vf->pkg_info.dsn[6], + vf->pkg_info.dsn[5], vf->pkg_info.dsn[4], + vf->pkg_info.dsn[3], vf->pkg_info.dsn[2], + vf->pkg_info.dsn[1], vf->pkg_info.dsn[0], + vf->pkg_info.proto_metadata); + + return 0; +} + int iavf_config_irq_map(struct iavf_adapter *adapter) { From patchwork Mon Mar 16 07:45:55 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66678 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1E5E7A0559; Mon, 16 Mar 2020 08:58:02 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 06C401C0AF; Mon, 16 Mar 2020 08:57:32 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 307B21C07E for ; Mon, 16 Mar 2020 08:57:29 +0100 (CET) IronPort-SDR: VkyCZfQ0InJEeIB1c8o9Oq5W1Yk0li9jit9yWOOJdhUdj2JH3+cl2qN2iwT7I9auBRf07JQs1k MOL6PjLvILIw== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:28 -0700 IronPort-SDR: mWvgTDQd8HLe/bhLYrTM0N7gFkXUCUn4Vd+Lj0PMuToTOS3WCL7vNjawazW9TsG60fV5Yo6k0Q jhol172S2CBA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622507" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:27 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:55 +0800 Message-Id: <20200316074603.10998-5-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 04/12] net/iavf: flexible Rx descriptor support in normal path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support flexible Rx descriptor format in normal path of iAVF PMD. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf.h | 2 + drivers/net/iavf/iavf_ethdev.c | 8 + drivers/net/iavf/iavf_rxtx.c | 507 ++++++++++++++++++++++++++++++++- drivers/net/iavf/iavf_rxtx.h | 11 + drivers/net/iavf/iavf_vchnl.c | 43 ++- 5 files changed, 567 insertions(+), 4 deletions(-) diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index 39c6eeec9..4fe15237a 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -105,6 +105,7 @@ struct iavf_info { struct virtchnl_vf_resource *vf_res; /* VF resource */ struct virtchnl_vsi_resource *vsi_res; /* LAN VSI */ struct virtchnl_pkg_info pkg_info; /* package info */ + uint64_t supported_rxdid; volatile enum virtchnl_ops pend_cmd; /* pending command not finished */ uint32_t cmd_retval; /* return value of the cmd response from PF */ @@ -233,6 +234,7 @@ int iavf_disable_queues(struct iavf_adapter *adapter); int iavf_configure_rss_lut(struct iavf_adapter *adapter); int iavf_configure_rss_key(struct iavf_adapter *adapter); int iavf_configure_queues(struct iavf_adapter *adapter); +int iavf_get_supported_rxdid(struct iavf_adapter *adapter); int iavf_query_package_info(struct iavf_adapter *adapter); int iavf_config_irq_map(struct iavf_adapter *adapter); void iavf_add_del_all_mac_addr(struct iavf_adapter *adapter, bool add); diff --git a/drivers/net/iavf/iavf_ethdev.c b/drivers/net/iavf/iavf_ethdev.c index 9c8f789da..f7b2562c1 100644 --- a/drivers/net/iavf/iavf_ethdev.c +++ b/drivers/net/iavf/iavf_ethdev.c @@ -1243,6 +1243,14 @@ iavf_init_vf(struct rte_eth_dev *dev) goto err_rss; } } + + if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { + if (iavf_get_supported_rxdid(adapter) != 0) { + PMD_INIT_LOG(ERR, "failed to do get supported rxdid"); + goto err_rss; + } + } + return 0; err_rss: rte_free(vf->rss_key); diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index fbb18a713..b9b35bdbb 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -718,6 +718,20 @@ iavf_rxd_to_vlan_tci(struct rte_mbuf *mb, volatile union iavf_rx_desc *rxdp) } } +static inline void +iavf_flex_rxd_to_vlan_tci(struct rte_mbuf *mb, + volatile union iavf_rx_flex_desc *rxdp) +{ + if (rte_le_to_cpu_64(rxdp->wb.status_error0) & + (1 << IAVF_RX_FLEX_DESC_STATUS0_L2TAG1P_S)) { + mb->ol_flags |= PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED; + mb->vlan_tci = + rte_le_to_cpu_16(rxdp->wb.l2tag1); + } else { + mb->vlan_tci = 0; + } +} + /* Translate the rx descriptor status and error fields to pkt flags */ static inline uint64_t iavf_rxd_to_pkt_flags(uint64_t qword) @@ -752,6 +766,63 @@ iavf_rxd_to_pkt_flags(uint64_t qword) return flags; } +/* Translate the rx flex descriptor status to pkt flags */ +static inline void +iavf_rxd_to_pkt_fields(struct rte_mbuf *mb, + volatile union iavf_rx_flex_desc *rxdp) +{ + volatile struct iavf_32b_rx_flex_desc_comms_ovs *desc = + (volatile struct iavf_32b_rx_flex_desc_comms_ovs *)rxdp; + uint16_t stat_err; + +#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC + stat_err = rte_le_to_cpu_16(desc->status_error0); + if (likely(stat_err & (1 << IAVF_RX_FLEX_DESC_STATUS0_RSS_VALID_S))) { + mb->ol_flags |= PKT_RX_RSS_HASH; + mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash); + } +#endif +} + +#define IAVF_RX_FLEX_ERR0_BITS \ + ((1 << IAVF_RX_FLEX_DESC_STATUS0_HBO_S) | \ + (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | \ + (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) | \ + (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) | \ + (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S) | \ + (1 << IAVF_RX_FLEX_DESC_STATUS0_RXE_S)) + +/* Rx L3/L4 checksum */ +static inline uint64_t +iavf_flex_rxd_error_to_pkt_flags(uint16_t stat_err0) +{ + uint64_t flags = 0; + + /* check if HW has decoded the packet and checksum */ + if (unlikely(!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_L3L4P_S)))) + return 0; + + if (likely(!(stat_err0 & IAVF_RX_FLEX_ERR0_BITS))) { + flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD); + return flags; + } + + if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_IPE_S))) + flags |= PKT_RX_IP_CKSUM_BAD; + else + flags |= PKT_RX_IP_CKSUM_GOOD; + + if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))) + flags |= PKT_RX_L4_CKSUM_BAD; + else + flags |= PKT_RX_L4_CKSUM_GOOD; + + if (unlikely(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))) + flags |= PKT_RX_EIP_CKSUM_BAD; + + return flags; +} + /* implement recv_pkts */ uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) @@ -873,6 +944,289 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) return nb_rx; } +/* implement recv_pkts for flexible Rx descriptor */ +uint16_t +iavf_recv_pkts_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + volatile union iavf_rx_desc *rx_ring; + volatile union iavf_rx_flex_desc *rxdp; + struct iavf_rx_queue *rxq; + union iavf_rx_flex_desc rxd; + struct rte_mbuf *rxe; + struct rte_eth_dev *dev; + struct rte_mbuf *rxm; + struct rte_mbuf *nmb; + uint16_t nb_rx; + uint16_t rx_stat_err0; + uint16_t rx_packet_len; + uint16_t rx_id, nb_hold; + uint64_t dma_addr; + uint64_t pkt_flags; + const uint32_t *ptype_tbl; + + nb_rx = 0; + nb_hold = 0; + rxq = rx_queue; + rx_id = rxq->rx_tail; + rx_ring = rxq->rx_ring; + ptype_tbl = rxq->vsi->adapter->ptype_tbl; + + while (nb_rx < nb_pkts) { + rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id]; + rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0); + + /* Check the DD bit first */ + if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))) + break; + IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id); + + nmb = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(!nmb)) { + dev = &rte_eth_devices[rxq->port_id]; + dev->data->rx_mbuf_alloc_failed++; + PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " + "queue_id=%u", rxq->port_id, rxq->queue_id); + break; + } + + rxd = *rxdp; + nb_hold++; + rxe = rxq->sw_ring[rx_id]; + rx_id++; + if (unlikely(rx_id == rxq->nb_rx_desc)) + rx_id = 0; + + /* Prefetch next mbuf */ + rte_prefetch0(rxq->sw_ring[rx_id]); + + /* When next RX descriptor is on a cache line boundary, + * prefetch the next 4 RX descriptors and next 8 pointers + * to mbufs. + */ + if ((rx_id & 0x3) == 0) { + rte_prefetch0(&rx_ring[rx_id]); + rte_prefetch0(rxq->sw_ring[rx_id]); + } + rxm = rxe; + rxe = nmb; + dma_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + rxdp->read.hdr_addr = 0; + rxdp->read.pkt_addr = dma_addr; + + rx_packet_len = (rte_le_to_cpu_16(rxd.wb.pkt_len) & + IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len; + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rte_prefetch0(RTE_PTR_ADD(rxm->buf_addr, RTE_PKTMBUF_HEADROOM)); + rxm->nb_segs = 1; + rxm->next = NULL; + rxm->pkt_len = rx_packet_len; + rxm->data_len = rx_packet_len; + rxm->port = rxq->port_id; + rxm->ol_flags = 0; + rxm->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; + iavf_flex_rxd_to_vlan_tci(rxm, &rxd); + iavf_rxd_to_pkt_fields(rxm, &rxd); + pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); + rxm->ol_flags |= pkt_flags; + + rx_pkts[nb_rx++] = rxm; + } + rxq->rx_tail = rx_id; + + /* If the number of free RX descriptors is greater than the RX free + * threshold of the queue, advance the receive tail register of queue. + * Update that register with the value of the last processed RX + * descriptor minus 1. + */ + nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); + if (nb_hold > rxq->rx_free_thresh) { + PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u " + "nb_hold=%u nb_rx=%u", + rxq->port_id, rxq->queue_id, + rx_id, nb_hold, nb_rx); + rx_id = (uint16_t)((rx_id == 0) ? + (rxq->nb_rx_desc - 1) : (rx_id - 1)); + IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); + nb_hold = 0; + } + rxq->nb_rx_hold = nb_hold; + + return nb_rx; +} + +/* implement recv_scattered_pkts for flexible Rx descriptor */ +uint16_t +iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct iavf_rx_queue *rxq = rx_queue; + union iavf_rx_flex_desc rxd; + struct rte_mbuf *rxe; + struct rte_mbuf *first_seg = rxq->pkt_first_seg; + struct rte_mbuf *last_seg = rxq->pkt_last_seg; + struct rte_mbuf *nmb, *rxm; + uint16_t rx_id = rxq->rx_tail; + uint16_t nb_rx = 0, nb_hold = 0, rx_packet_len; + struct rte_eth_dev *dev; + uint16_t rx_stat_err0; + uint64_t dma_addr; + uint64_t pkt_flags; + + volatile union iavf_rx_desc *rx_ring = rxq->rx_ring; + volatile union iavf_rx_flex_desc *rxdp; + const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; + + while (nb_rx < nb_pkts) { + rxdp = (volatile union iavf_rx_flex_desc *)&rx_ring[rx_id]; + rx_stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0); + + /* Check the DD bit */ + if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))) + break; + IAVF_DUMP_RX_DESC(rxq, rxdp, rx_id); + + nmb = rte_mbuf_raw_alloc(rxq->mp); + if (unlikely(!nmb)) { + PMD_RX_LOG(DEBUG, "RX mbuf alloc failed port_id=%u " + "queue_id=%u", rxq->port_id, rxq->queue_id); + dev = &rte_eth_devices[rxq->port_id]; + dev->data->rx_mbuf_alloc_failed++; + break; + } + + rxd = *rxdp; + nb_hold++; + rxe = rxq->sw_ring[rx_id]; + rx_id++; + if (rx_id == rxq->nb_rx_desc) + rx_id = 0; + + /* Prefetch next mbuf */ + rte_prefetch0(rxq->sw_ring[rx_id]); + + /* When next RX descriptor is on a cache line boundary, + * prefetch the next 4 RX descriptors and next 8 pointers + * to mbufs. + */ + if ((rx_id & 0x3) == 0) { + rte_prefetch0(&rx_ring[rx_id]); + rte_prefetch0(rxq->sw_ring[rx_id]); + } + + rxm = rxe; + rxe = nmb; + dma_addr = + rte_cpu_to_le_64(rte_mbuf_data_iova_default(nmb)); + + /* Set data buffer address and data length of the mbuf */ + rxdp->read.hdr_addr = 0; + rxdp->read.pkt_addr = dma_addr; + rx_packet_len = rte_le_to_cpu_16(rxd.wb.pkt_len) & + IAVF_RX_FLX_DESC_PKT_LEN_M; + rxm->data_len = rx_packet_len; + rxm->data_off = RTE_PKTMBUF_HEADROOM; + + /* If this is the first buffer of the received packet, set the + * pointer to the first mbuf of the packet and initialize its + * context. Otherwise, update the total length and the number + * of segments of the current scattered packet, and update the + * pointer to the last mbuf of the current packet. + */ + if (!first_seg) { + first_seg = rxm; + first_seg->nb_segs = 1; + first_seg->pkt_len = rx_packet_len; + } else { + first_seg->pkt_len = + (uint16_t)(first_seg->pkt_len + + rx_packet_len); + first_seg->nb_segs++; + last_seg->next = rxm; + } + + /* If this is not the last buffer of the received packet, + * update the pointer to the last mbuf of the current scattered + * packet and continue to parse the RX ring. + */ + if (!(rx_stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_EOF_S))) { + last_seg = rxm; + continue; + } + + /* This is the last buffer of the received packet. If the CRC + * is not stripped by the hardware: + * - Subtract the CRC length from the total packet length. + * - If the last buffer only contains the whole CRC or a part + * of it, free the mbuf associated to the last buffer. If part + * of the CRC is also contained in the previous mbuf, subtract + * the length of that CRC part from the data length of the + * previous mbuf. + */ + rxm->next = NULL; + if (unlikely(rxq->crc_len > 0)) { + first_seg->pkt_len -= RTE_ETHER_CRC_LEN; + if (rx_packet_len <= RTE_ETHER_CRC_LEN) { + rte_pktmbuf_free_seg(rxm); + first_seg->nb_segs--; + last_seg->data_len = + (uint16_t)(last_seg->data_len - + (RTE_ETHER_CRC_LEN - rx_packet_len)); + last_seg->next = NULL; + } else { + rxm->data_len = (uint16_t)(rx_packet_len - + RTE_ETHER_CRC_LEN); + } + } + + first_seg->port = rxq->port_id; + first_seg->ol_flags = 0; + first_seg->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxd.wb.ptype_flex_flags0)]; + iavf_flex_rxd_to_vlan_tci(first_seg, &rxd); + iavf_rxd_to_pkt_fields(first_seg, &rxd); + pkt_flags = iavf_flex_rxd_error_to_pkt_flags(rx_stat_err0); + + first_seg->ol_flags |= pkt_flags; + + /* Prefetch data of first segment, if configured to do so. */ + rte_prefetch0(RTE_PTR_ADD(first_seg->buf_addr, + first_seg->data_off)); + rx_pkts[nb_rx++] = first_seg; + first_seg = NULL; + } + + /* Record index of the next RX descriptor to probe. */ + rxq->rx_tail = rx_id; + rxq->pkt_first_seg = first_seg; + rxq->pkt_last_seg = last_seg; + + /* If the number of free RX descriptors is greater than the RX free + * threshold of the queue, advance the Receive Descriptor Tail (RDT) + * register. Update the RDT with the value of the last processed RX + * descriptor minus 1, to guarantee that the RDT register is never + * equal to the RDH register, which creates a "full" ring situtation + * from the hardware point of view. + */ + nb_hold = (uint16_t)(nb_hold + rxq->nb_rx_hold); + if (nb_hold > rxq->rx_free_thresh) { + PMD_RX_LOG(DEBUG, "port_id=%u queue_id=%u rx_tail=%u " + "nb_hold=%u nb_rx=%u", + rxq->port_id, rxq->queue_id, + rx_id, nb_hold, nb_rx); + rx_id = (uint16_t)(rx_id == 0 ? + (rxq->nb_rx_desc - 1) : (rx_id - 1)); + IAVF_PCI_REG_WRITE(rxq->qrx_tail, rx_id); + nb_hold = 0; + } + rxq->nb_rx_hold = nb_hold; + + return nb_rx; +} + /* implement recv_scattered_pkts */ uint16_t iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, @@ -1049,6 +1403,82 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, } #define IAVF_LOOK_AHEAD 8 +static inline int +iavf_rx_scan_hw_ring_flex_rxd(struct iavf_rx_queue *rxq) +{ + volatile union iavf_rx_flex_desc *rxdp; + struct rte_mbuf **rxep; + struct rte_mbuf *mb; + uint16_t stat_err0; + uint16_t pkt_len; + int32_t s[IAVF_LOOK_AHEAD], nb_dd; + int32_t i, j, nb_rx = 0; + uint64_t pkt_flags; + const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; + + rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail]; + rxep = &rxq->sw_ring[rxq->rx_tail]; + + stat_err0 = rte_le_to_cpu_16(rxdp->wb.status_error0); + + /* Make sure there is at least 1 packet to receive */ + if (!(stat_err0 & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))) + return 0; + + /* Scan LOOK_AHEAD descriptors at a time to determine which + * descriptors reference packets that are ready to be received. + */ + for (i = 0; i < IAVF_RX_MAX_BURST; i += IAVF_LOOK_AHEAD, + rxdp += IAVF_LOOK_AHEAD, rxep += IAVF_LOOK_AHEAD) { + /* Read desc statuses backwards to avoid race condition */ + for (j = IAVF_LOOK_AHEAD - 1; j >= 0; j--) + s[j] = rte_le_to_cpu_16(rxdp[j].wb.status_error0); + + rte_smp_rmb(); + + /* Compute how many status bits were set */ + for (j = 0, nb_dd = 0; j < IAVF_LOOK_AHEAD; j++) + nb_dd += s[j] & (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S); + + nb_rx += nb_dd; + + /* Translate descriptor info to mbuf parameters */ + for (j = 0; j < nb_dd; j++) { + IAVF_DUMP_RX_DESC(rxq, &rxdp[j], + rxq->rx_tail + + i * IAVF_LOOK_AHEAD + j); + + mb = rxep[j]; + pkt_len = (rte_le_to_cpu_16(rxdp[j].wb.pkt_len) & + IAVF_RX_FLX_DESC_PKT_LEN_M) - rxq->crc_len; + mb->data_len = pkt_len; + mb->pkt_len = pkt_len; + mb->ol_flags = 0; + + mb->packet_type = ptype_tbl[IAVF_RX_FLEX_DESC_PTYPE_M & + rte_le_to_cpu_16(rxdp[j].wb.ptype_flex_flags0)]; + iavf_flex_rxd_to_vlan_tci(mb, &rxdp[j]); + iavf_rxd_to_pkt_fields(mb, &rxdp[j]); + stat_err0 = rte_le_to_cpu_16(rxdp[j].wb.status_error0); + pkt_flags = iavf_flex_rxd_error_to_pkt_flags(stat_err0); + + mb->ol_flags |= pkt_flags; + } + + for (j = 0; j < IAVF_LOOK_AHEAD; j++) + rxq->rx_stage[i + j] = rxep[j]; + + if (nb_dd != IAVF_LOOK_AHEAD) + break; + } + + /* Clear software ring entries */ + for (i = 0; i < nb_rx; i++) + rxq->sw_ring[rxq->rx_tail + i] = NULL; + + return nb_rx; +} + static inline int iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) { @@ -1217,7 +1647,10 @@ rx_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) if (rxq->rx_nb_avail) return iavf_rx_fill_from_stage(rxq, rx_pkts, nb_pkts); - nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq); + if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) + nb_rx = (uint16_t)iavf_rx_scan_hw_ring_flex_rxd(rxq); + else + nb_rx = (uint16_t)iavf_rx_scan_hw_ring(rxq); rxq->rx_next_avail = 0; rxq->rx_nb_avail = nb_rx; rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_rx); @@ -1661,6 +2094,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev) { struct iavf_adapter *adapter = IAVF_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private); + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(dev->data->dev_private); #ifdef RTE_ARCH_X86 struct iavf_rx_queue *rxq; int i; @@ -1700,7 +2134,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev) if (dev->data->scattered_rx) { PMD_DRV_LOG(DEBUG, "Using a Scattered Rx callback (port=%d).", dev->data->port_id); - dev->rx_pkt_burst = iavf_recv_scattered_pkts; + if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) + dev->rx_pkt_burst = iavf_recv_scattered_pkts_flex_rxd; + else + dev->rx_pkt_burst = iavf_recv_scattered_pkts; } else if (adapter->rx_bulk_alloc_allowed) { PMD_DRV_LOG(DEBUG, "Using bulk Rx callback (port=%d).", dev->data->port_id); @@ -1708,7 +2145,10 @@ iavf_set_rx_function(struct rte_eth_dev *dev) } else { PMD_DRV_LOG(DEBUG, "Using Basic Rx callback (port=%d).", dev->data->port_id); - dev->rx_pkt_burst = iavf_recv_pkts; + if (vf->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) + dev->rx_pkt_burst = iavf_recv_pkts_flex_rxd; + else + dev->rx_pkt_burst = iavf_recv_pkts; } } @@ -1784,6 +2224,35 @@ iavf_dev_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, qinfo->conf.tx_deferred_start = txq->tx_deferred_start; } +/* Get the number of used descriptors of a rx queue for flexible RXD */ +uint32_t +iavf_dev_rxq_count_flex_rxd(struct rte_eth_dev *dev, uint16_t queue_id) +{ +#define IAVF_RXQ_SCAN_INTERVAL 4 + volatile union iavf_rx_flex_desc *rxdp; + struct iavf_rx_queue *rxq; + uint16_t desc = 0; + + rxq = dev->data->rx_queues[queue_id]; + rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[rxq->rx_tail]; + while ((desc < rxq->nb_rx_desc) && + rte_le_to_cpu_16(rxdp->wb.status_error0) & + (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)) { + /* Check the DD bit of a rx descriptor of each 4 in a group, + * to avoid checking too frequently and downgrading performance + * too much. + */ + desc += IAVF_RXQ_SCAN_INTERVAL; + rxdp += IAVF_RXQ_SCAN_INTERVAL; + if (rxq->rx_tail + desc >= rxq->nb_rx_desc) + rxdp = (volatile union iavf_rx_flex_desc *) + &(rxq->rx_ring[rxq->rx_tail + + desc - rxq->nb_rx_desc]); + } + + return desc; +} + /* Get the number of used descriptors of a rx queue */ uint32_t iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id) @@ -1795,6 +2264,10 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id) rxq = dev->data->rx_queues[queue_id]; rxdp = &rxq->rx_ring[rxq->rx_tail]; + + if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) + return iavf_dev_rxq_count_flex_rxd(dev, queue_id); + while ((desc < rxq->nb_rx_desc) && ((rte_le_to_cpu_64(rxdp->wb.qword1.status_error_len) & IAVF_RXD_QW1_STATUS_MASK) >> IAVF_RXD_QW1_STATUS_SHIFT) & @@ -1813,6 +2286,31 @@ iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id) return desc; } +int +iavf_dev_rx_desc_status_flex_rxd(void *rx_queue, uint16_t offset) +{ + volatile union iavf_rx_flex_desc *rxdp; + struct iavf_rx_queue *rxq = rx_queue; + uint32_t desc; + + if (unlikely(offset >= rxq->nb_rx_desc)) + return -EINVAL; + + if (offset >= rxq->nb_rx_desc - rxq->nb_rx_hold) + return RTE_ETH_RX_DESC_UNAVAIL; + + desc = rxq->rx_tail + offset; + if (desc >= rxq->nb_rx_desc) + desc -= rxq->nb_rx_desc; + + rxdp = (volatile union iavf_rx_flex_desc *)&rxq->rx_ring[desc]; + if (rte_le_to_cpu_16(rxdp->wb.status_error0) & + (1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S)) + return RTE_ETH_RX_DESC_DONE; + + return RTE_ETH_RX_DESC_AVAIL; +} + int iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset) { @@ -1821,6 +2319,9 @@ iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset) uint64_t mask; uint32_t desc; + if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) + return iavf_dev_rx_desc_status_flex_rxd(rx_queue, offset); + if (unlikely(offset >= rxq->nb_rx_desc)) return -EINVAL; diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index 416433504..ee306d400 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -59,6 +59,7 @@ /* HW desc structure, only 32-byte type is supported */ #define iavf_rx_desc iavf_32byte_rx_desc +#define iavf_rx_flex_desc iavf_32b_rx_flex_desc struct iavf_rxq_ops { void (*release_mbufs)(struct iavf_rx_queue *rxq); @@ -83,6 +84,7 @@ struct iavf_rx_queue { struct rte_mbuf *pkt_first_seg; /* first segment of current packet */ struct rte_mbuf *pkt_last_seg; /* last segment of current packet */ struct rte_mbuf fake_mbuf; /* dummy mbuf */ + uint8_t rxdid; /* used for VPMD */ uint16_t rxrearm_nb; /* number of remaining to be re-armed */ @@ -175,9 +177,15 @@ void iavf_dev_tx_queue_release(void *txq); void iavf_stop_queues(struct rte_eth_dev *dev); uint16_t iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t iavf_recv_pkts_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); uint16_t iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t iavf_recv_scattered_pkts_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); uint16_t iavf_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t iavf_prep_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, @@ -189,7 +197,10 @@ void iavf_dev_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, void iavf_dev_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id, struct rte_eth_txq_info *qinfo); uint32_t iavf_dev_rxq_count(struct rte_eth_dev *dev, uint16_t queue_id); +uint32_t iavf_dev_rxq_count_flex_rxd(struct rte_eth_dev *dev, + uint16_t queue_id); int iavf_dev_rx_desc_status(void *rx_queue, uint16_t offset); +int iavf_dev_rx_desc_status_flex_rxd(void *rx_queue, uint16_t offset); int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset); uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, diff --git a/drivers/net/iavf/iavf_vchnl.c b/drivers/net/iavf/iavf_vchnl.c index 288d34e8b..f0c283472 100644 --- a/drivers/net/iavf/iavf_vchnl.c +++ b/drivers/net/iavf/iavf_vchnl.c @@ -89,6 +89,7 @@ iavf_execute_vf_cmd(struct iavf_adapter *adapter, struct iavf_cmd_info *args) case VIRTCHNL_OP_VERSION: case VIRTCHNL_OP_GET_VF_RESOURCES: case VIRTCHNL_OP_PACKAGE_INFO: + case VIRTCHNL_OP_RXDID: /* for init virtchnl ops, need to poll the response */ do { ret = iavf_read_msg_from_pf(adapter, args->out_size, @@ -340,7 +341,8 @@ iavf_get_vf_resource(struct iavf_adapter *adapter) */ caps = IAVF_BASIC_OFFLOAD_CAPS | VIRTCHNL_VF_CAP_ADV_LINK_SPEED | - VIRTCHNL_VF_OFFLOAD_QUERY_DDP; + VIRTCHNL_VF_OFFLOAD_QUERY_DDP | + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC; args.in_args = (uint8_t *)∩︀ args.in_args_size = sizeof(caps); @@ -377,6 +379,31 @@ iavf_get_vf_resource(struct iavf_adapter *adapter) return 0; } +int +iavf_get_supported_rxdid(struct iavf_adapter *adapter) +{ + struct iavf_info *vf = IAVF_DEV_PRIVATE_TO_VF(adapter); + struct iavf_cmd_info args; + int ret; + + args.ops = VIRTCHNL_OP_RXDID; + args.in_args = NULL; + args.in_args_size = 0; + args.out_buffer = vf->aq_resp; + args.out_size = IAVF_AQ_BUF_SZ; + + ret = iavf_execute_vf_cmd(adapter, &args); + if (ret) { + PMD_DRV_LOG(ERR, + "Failed to execute command of OP_RXDID"); + return ret; + } + + vf->supported_rxdid = *(uint64_t *)args.out_buffer; + + return 0; +} + int iavf_enable_queues(struct iavf_adapter *adapter) { @@ -569,6 +596,20 @@ iavf_configure_queues(struct iavf_adapter *adapter) vc_qp->rxq.ring_len = rxq[i]->nb_rx_desc; vc_qp->rxq.dma_ring_addr = rxq[i]->rx_ring_phys_addr; vc_qp->rxq.databuffer_size = rxq[i]->rx_buf_len; + + if (vf->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC && + vf->supported_rxdid & BIT(IAVF_RXDID_COMMS_OVS_1)) { + vc_qp->rxq.rxdid = IAVF_RXDID_COMMS_OVS_1; + rxq[i]->rxdid = IAVF_RXDID_COMMS_OVS_1; + PMD_DRV_LOG(NOTICE, "request RXDID == %d in " + "Queue[%d]", vc_qp->rxq.rxdid, i); + } else { + vc_qp->rxq.rxdid = IAVF_RXDID_LEGACY_1; + rxq[i]->rxdid = IAVF_RXDID_LEGACY_1; + PMD_DRV_LOG(NOTICE, "request RXDID == %d in " + "Queue[%d]", vc_qp->rxq.rxdid, i); + } } } From patchwork Mon Mar 16 07:45:56 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66680 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 23196A0559; Mon, 16 Mar 2020 08:58:26 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 63D001C0BE; Mon, 16 Mar 2020 08:57:35 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id C67071C0AC for ; Mon, 16 Mar 2020 08:57:31 +0100 (CET) IronPort-SDR: FqdXICpMS6q+KDKgUf3dt3umKkOnXIhZdZs7dB0Qjsgy5L+uLzu7bEn1wTq2qxej4+xxZP5dDe 7IcDi9Cf8Dbw== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:30 -0700 IronPort-SDR: 2CW8XQY0SPa9B7HbERKuRtZQ/84sH5K2UysdUV4PfZRfKi9JBS2SZk0eaYqEQcpeR/iZxs0Vp2 Us3gHiZF9eFQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622514" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:29 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:56 +0800 Message-Id: <20200316074603.10998-6-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 05/12] net/iavf: flexible Rx descriptor support in AVX path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support flexible Rx descriptor format in AVX path of iAVF PMD. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf_rxtx.c | 24 +- drivers/net/iavf/iavf_rxtx.h | 6 + drivers/net/iavf/iavf_rxtx_vec_avx2.c | 550 +++++++++++++++++++++++++- 3 files changed, 570 insertions(+), 10 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index b9b35bdbb..57fffece9 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -2115,16 +2115,28 @@ iavf_set_rx_function(struct rte_eth_dev *dev) "Using %sVector Scattered Rx (port %d).", use_avx2 ? "avx2 " : "", dev->data->port_id); - dev->rx_pkt_burst = use_avx2 ? - iavf_recv_scattered_pkts_vec_avx2 : - iavf_recv_scattered_pkts_vec; + if (vf->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) + dev->rx_pkt_burst = use_avx2 ? + iavf_recv_scattered_pkts_vec_avx2_flex_rxd : + iavf_recv_scattered_pkts_vec; + else + dev->rx_pkt_burst = use_avx2 ? + iavf_recv_scattered_pkts_vec_avx2 : + iavf_recv_scattered_pkts_vec; } else { PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).", use_avx2 ? "avx2 " : "", dev->data->port_id); - dev->rx_pkt_burst = use_avx2 ? - iavf_recv_pkts_vec_avx2 : - iavf_recv_pkts_vec; + if (vf->vf_res->vf_cap_flags & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) + dev->rx_pkt_burst = use_avx2 ? + iavf_recv_pkts_vec_avx2_flex_rxd : + iavf_recv_pkts_vec; + else + dev->rx_pkt_burst = use_avx2 ? + iavf_recv_pkts_vec_avx2 : + iavf_recv_pkts_vec; } return; diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index ee306d400..de13fd516 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -212,9 +212,15 @@ uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); uint16_t iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); uint16_t iavf_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t iavf_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts, diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c index 4e1231162..74f672c7e 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c @@ -11,14 +11,16 @@ #endif static inline void -iavf_rxq_rearm(struct iavf_rx_queue *rxq) +iavf_rxq_rearm(struct iavf_rx_queue *rxq, volatile union iavf_rx_desc *rxdp) { int i; uint16_t rx_id; - volatile union iavf_rx_desc *rxdp; struct rte_mbuf **rxp = &rxq->sw_ring[rxq->rxrearm_start]; - rxdp = rxq->rx_ring + rxq->rxrearm_start; + if (rxq->rxdid == IAVF_RXDID_COMMS_OVS_1) { + volatile union iavf_rx_flex_desc *rxdp = + (union iavf_rx_flex_desc *)rxdp; + } /* Pull 'n' more MBUFs into the software ring */ if (rte_mempool_get_bulk(rxq->mp, @@ -110,7 +112,7 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, * of time to act */ if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH) - iavf_rxq_rearm(rxq); + iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start); /* Before we start moving massive data around, check to see if * there is actually a packet available @@ -548,6 +550,465 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, return received; } +static inline uint16_t +_iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, uint8_t *split_packet) +{ +#define IAVF_DESCS_PER_LOOP_AVX 8 + + const uint32_t *type_table = rxq->vsi->adapter->ptype_tbl; + + const __m256i mbuf_init = _mm256_set_epi64x(0, 0, + 0, rxq->mbuf_initializer); + struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail]; + volatile union iavf_rx_flex_desc *rxdp = + (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail; + + rte_prefetch0(rxdp); + + /* nb_pkts has to be floor-aligned to IAVF_DESCS_PER_LOOP_AVX */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_DESCS_PER_LOOP_AVX); + + /* See if we need to rearm the RX queue - gives the prefetch a bit + * of time to act + */ + if (rxq->rxrearm_nb > IAVF_RXQ_REARM_THRESH) + /* iavf_rxq_rearm(rxq); */ + iavf_rxq_rearm(rxq, rxq->rx_ring + rxq->rxrearm_start); + + /* Before we start moving massive data around, check to see if + * there is actually a packet available + */ + if (!(rxdp->wb.status_error0 & + rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))) + return 0; + + /* constants used in processing loop */ + const __m256i crc_adjust = + _mm256_set_epi16 + (/* first descriptor */ + 0, 0, 0, /* ignore non-length fields */ + -rxq->crc_len, /* sub crc on data_len */ + 0, /* ignore high-16bits of pkt_len */ + -rxq->crc_len, /* sub crc on pkt_len */ + 0, 0, /* ignore pkt_type field */ + /* second descriptor */ + 0, 0, 0, /* ignore non-length fields */ + -rxq->crc_len, /* sub crc on data_len */ + 0, /* ignore high-16bits of pkt_len */ + -rxq->crc_len, /* sub crc on pkt_len */ + 0, 0 /* ignore pkt_type field */ + ); + + /* 8 packets DD mask, LSB in each 32-bit value */ + const __m256i dd_check = _mm256_set1_epi32(1); + + /* 8 packets EOP mask, second-LSB in each 32-bit value */ + const __m256i eop_check = _mm256_slli_epi32(dd_check, + IAVF_RX_FLEX_DESC_STATUS0_EOF_S); + + /* mask to shuffle from desc. to mbuf (2 descriptors)*/ + const __m256i shuf_msk = + _mm256_set_epi8 + (/* first descriptor */ + 15, 14, + 13, 12, /* octet 12~15, 32 bits rss */ + 11, 10, /* octet 10~11, 16 bits vlan_macip */ + 5, 4, /* octet 4~5, 16 bits data_len */ + 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ + 5, 4, /* octet 4~5, 16 bits pkt_len */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF, /*pkt_type set as unknown */ + /* second descriptor */ + 15, 14, + 13, 12, /* octet 12~15, 32 bits rss */ + 11, 10, /* octet 10~11, 16 bits vlan_macip */ + 5, 4, /* octet 4~5, 16 bits data_len */ + 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ + 5, 4, /* octet 4~5, 16 bits pkt_len */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF /*pkt_type set as unknown */ + ); + /** + * compile-time check the above crc and shuffle layout is correct. + * NOTE: the first field (lowest address) is given last in set_epi + * calls above. + */ + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); + + /* Status/Error flag masks */ + /** + * mask everything except Checksum Reports, RSS indication + * and VLAN indication. + * bit6:4 for IP/L4 checksum errors. + * bit12 is for RSS indication. + * bit13 is for VLAN indication. + */ + const __m256i flags_mask = + _mm256_set1_epi32((7 << 4) | (1 << 12) | (1 << 13)); + /** + * data to be shuffled by the result of the flags mask shifted by 4 + * bits. This gives use the l3_l4 flags. + */ + const __m256i l3_l4_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + /* shift right 1 bit to make sure it not exceed 255 */ + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1, + /* second 128-bits */ + 0, 0, 0, 0, 0, 0, 0, 0, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + const __m256i cksum_mask = + _mm256_set1_epi32(PKT_RX_IP_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD | + PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_EIP_CKSUM_BAD); + /** + * data to be shuffled by result of flag mask, shifted down 12. + * If RSS(bit12)/VLAN(bit13) are set, + * shuffle moves appropriate flags in place. + */ + const __m256i rss_vlan_flags_shuf = _mm256_set_epi8(0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, + PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, + PKT_RX_RSS_HASH, 0, + /* end up 128-bits */ + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, + PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, + PKT_RX_RSS_HASH, 0); + + uint16_t i, received; + + for (i = 0, received = 0; i < nb_pkts; + i += IAVF_DESCS_PER_LOOP_AVX, + rxdp += IAVF_DESCS_PER_LOOP_AVX) { + /* step 1, copy over 8 mbuf pointers to rx_pkts array */ + _mm256_storeu_si256((void *)&rx_pkts[i], + _mm256_loadu_si256((void *)&sw_ring[i])); +#ifdef RTE_ARCH_X86_64 + _mm256_storeu_si256 + ((void *)&rx_pkts[i + 4], + _mm256_loadu_si256((void *)&sw_ring[i + 4])); +#endif + + __m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7; + + const __m128i raw_desc7 = + _mm_load_si128((void *)(rxdp + 7)); + rte_compiler_barrier(); + const __m128i raw_desc6 = + _mm_load_si128((void *)(rxdp + 6)); + rte_compiler_barrier(); + const __m128i raw_desc5 = + _mm_load_si128((void *)(rxdp + 5)); + rte_compiler_barrier(); + const __m128i raw_desc4 = + _mm_load_si128((void *)(rxdp + 4)); + rte_compiler_barrier(); + const __m128i raw_desc3 = + _mm_load_si128((void *)(rxdp + 3)); + rte_compiler_barrier(); + const __m128i raw_desc2 = + _mm_load_si128((void *)(rxdp + 2)); + rte_compiler_barrier(); + const __m128i raw_desc1 = + _mm_load_si128((void *)(rxdp + 1)); + rte_compiler_barrier(); + const __m128i raw_desc0 = + _mm_load_si128((void *)(rxdp + 0)); + + raw_desc6_7 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc6), + raw_desc7, 1); + raw_desc4_5 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc4), + raw_desc5, 1); + raw_desc2_3 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc2), + raw_desc3, 1); + raw_desc0_1 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc0), + raw_desc1, 1); + + if (split_packet) { + int j; + + for (j = 0; j < IAVF_DESCS_PER_LOOP_AVX; j++) + rte_mbuf_prefetch_part2(rx_pkts[i + j]); + } + + /** + * convert descriptors 4-7 into mbufs, re-arrange fields. + * Then write into the mbuf. + */ + __m256i mb6_7 = _mm256_shuffle_epi8(raw_desc6_7, shuf_msk); + __m256i mb4_5 = _mm256_shuffle_epi8(raw_desc4_5, shuf_msk); + + mb6_7 = _mm256_add_epi16(mb6_7, crc_adjust); + mb4_5 = _mm256_add_epi16(mb4_5, crc_adjust); + /** + * to get packet types, ptype is located in bit16-25 + * of each 128bits + */ + const __m256i ptype_mask = + _mm256_set1_epi16(IAVF_RX_FLEX_DESC_PTYPE_M); + const __m256i ptypes6_7 = + _mm256_and_si256(raw_desc6_7, ptype_mask); + const __m256i ptypes4_5 = + _mm256_and_si256(raw_desc4_5, ptype_mask); + const uint16_t ptype7 = _mm256_extract_epi16(ptypes6_7, 9); + const uint16_t ptype6 = _mm256_extract_epi16(ptypes6_7, 1); + const uint16_t ptype5 = _mm256_extract_epi16(ptypes4_5, 9); + const uint16_t ptype4 = _mm256_extract_epi16(ptypes4_5, 1); + + mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype7], 4); + mb6_7 = _mm256_insert_epi32(mb6_7, type_table[ptype6], 0); + mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype5], 4); + mb4_5 = _mm256_insert_epi32(mb4_5, type_table[ptype4], 0); + /* merge the status bits into one register */ + const __m256i status4_7 = _mm256_unpackhi_epi32(raw_desc6_7, + raw_desc4_5); + + /** + * convert descriptors 0-3 into mbufs, re-arrange fields. + * Then write into the mbuf. + */ + __m256i mb2_3 = _mm256_shuffle_epi8(raw_desc2_3, shuf_msk); + __m256i mb0_1 = _mm256_shuffle_epi8(raw_desc0_1, shuf_msk); + + mb2_3 = _mm256_add_epi16(mb2_3, crc_adjust); + mb0_1 = _mm256_add_epi16(mb0_1, crc_adjust); + /** + * to get packet types, ptype is located in bit16-25 + * of each 128bits + */ + const __m256i ptypes2_3 = + _mm256_and_si256(raw_desc2_3, ptype_mask); + const __m256i ptypes0_1 = + _mm256_and_si256(raw_desc0_1, ptype_mask); + const uint16_t ptype3 = _mm256_extract_epi16(ptypes2_3, 9); + const uint16_t ptype2 = _mm256_extract_epi16(ptypes2_3, 1); + const uint16_t ptype1 = _mm256_extract_epi16(ptypes0_1, 9); + const uint16_t ptype0 = _mm256_extract_epi16(ptypes0_1, 1); + + mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype3], 4); + mb2_3 = _mm256_insert_epi32(mb2_3, type_table[ptype2], 0); + mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype1], 4); + mb0_1 = _mm256_insert_epi32(mb0_1, type_table[ptype0], 0); + /* merge the status bits into one register */ + const __m256i status0_3 = _mm256_unpackhi_epi32(raw_desc2_3, + raw_desc0_1); + + /** + * take the two sets of status bits and merge to one + * After merge, the packets status flags are in the + * order (hi->lo): [1, 3, 5, 7, 0, 2, 4, 6] + */ + __m256i status0_7 = _mm256_unpacklo_epi64(status4_7, + status0_3); + + /* now do flag manipulation */ + + /* get only flag/error bits we want */ + const __m256i flag_bits = + _mm256_and_si256(status0_7, flags_mask); + /** + * l3_l4_error flags, shuffle, then shift to correct adjustment + * of flags in flags_shuf, and finally mask out extra bits + */ + __m256i l3_l4_flags = _mm256_shuffle_epi8(l3_l4_flags_shuf, + _mm256_srli_epi32(flag_bits, 4)); + l3_l4_flags = _mm256_slli_epi32(l3_l4_flags, 1); + l3_l4_flags = _mm256_and_si256(l3_l4_flags, cksum_mask); + /* set rss and vlan flags */ + const __m256i rss_vlan_flag_bits = + _mm256_srli_epi32(flag_bits, 12); + const __m256i rss_vlan_flags = + _mm256_shuffle_epi8(rss_vlan_flags_shuf, + rss_vlan_flag_bits); + + /* merge flags */ + const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags, + rss_vlan_flags); + /** + * At this point, we have the 8 sets of flags in the low 16-bits + * of each 32-bit value in vlan0. + * We want to extract these, and merge them with the mbuf init + * data so we can do a single write to the mbuf to set the flags + * and all the other initialization fields. Extracting the + * appropriate flags means that we have to do a shift and blend + * for each mbuf before we do the write. However, we can also + * add in the previously computed rx_descriptor fields to + * make a single 256-bit write per mbuf + */ + /* check the structure matches expectations */ + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) != + offsetof(struct rte_mbuf, rearm_data) + 8); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) != + RTE_ALIGN(offsetof(struct rte_mbuf, + rearm_data), + 16)); + /* build up data and do writes */ + __m256i rearm0, rearm1, rearm2, rearm3, rearm4, rearm5, + rearm6, rearm7; + rearm6 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(mbuf_flags, 8), + 0x04); + rearm4 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(mbuf_flags, 4), + 0x04); + rearm2 = _mm256_blend_epi32(mbuf_init, mbuf_flags, 0x04); + rearm0 = _mm256_blend_epi32(mbuf_init, + _mm256_srli_si256(mbuf_flags, 4), + 0x04); + /* permute to add in the rx_descriptor e.g. rss fields */ + rearm6 = _mm256_permute2f128_si256(rearm6, mb6_7, 0x20); + rearm4 = _mm256_permute2f128_si256(rearm4, mb4_5, 0x20); + rearm2 = _mm256_permute2f128_si256(rearm2, mb2_3, 0x20); + rearm0 = _mm256_permute2f128_si256(rearm0, mb0_1, 0x20); + /* write to mbuf */ + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 6]->rearm_data, + rearm6); + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 4]->rearm_data, + rearm4); + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 2]->rearm_data, + rearm2); + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 0]->rearm_data, + rearm0); + + /* repeat for the odd mbufs */ + const __m256i odd_flags = + _mm256_castsi128_si256 + (_mm256_extracti128_si256(mbuf_flags, 1)); + rearm7 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(odd_flags, 8), + 0x04); + rearm5 = _mm256_blend_epi32(mbuf_init, + _mm256_slli_si256(odd_flags, 4), + 0x04); + rearm3 = _mm256_blend_epi32(mbuf_init, odd_flags, 0x04); + rearm1 = _mm256_blend_epi32(mbuf_init, + _mm256_srli_si256(odd_flags, 4), + 0x04); + /* since odd mbufs are already in hi 128-bits use blend */ + rearm7 = _mm256_blend_epi32(rearm7, mb6_7, 0xF0); + rearm5 = _mm256_blend_epi32(rearm5, mb4_5, 0xF0); + rearm3 = _mm256_blend_epi32(rearm3, mb2_3, 0xF0); + rearm1 = _mm256_blend_epi32(rearm1, mb0_1, 0xF0); + /* again write to mbufs */ + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 7]->rearm_data, + rearm7); + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 5]->rearm_data, + rearm5); + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 3]->rearm_data, + rearm3); + _mm256_storeu_si256((__m256i *)&rx_pkts[i + 1]->rearm_data, + rearm1); + + /* extract and record EOP bit */ + if (split_packet) { + const __m128i eop_mask = + _mm_set1_epi16(1 << + IAVF_RX_FLEX_DESC_STATUS0_EOF_S); + const __m256i eop_bits256 = _mm256_and_si256(status0_7, + eop_check); + /* pack status bits into a single 128-bit register */ + const __m128i eop_bits = + _mm_packus_epi32 + (_mm256_castsi256_si128(eop_bits256), + _mm256_extractf128_si256(eop_bits256, + 1)); + /** + * flip bits, and mask out the EOP bit, which is now + * a split-packet bit i.e. !EOP, rather than EOP one. + */ + __m128i split_bits = _mm_andnot_si128(eop_bits, + eop_mask); + /** + * eop bits are out of order, so we need to shuffle them + * back into order again. In doing so, only use low 8 + * bits, which acts like another pack instruction + * The original order is (hi->lo): 1,3,5,7,0,2,4,6 + * [Since we use epi8, the 16-bit positions are + * multiplied by 2 in the eop_shuffle value.] + */ + __m128i eop_shuffle = + _mm_set_epi8(/* zero hi 64b */ + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, + /* move values to lo 64b */ + 8, 0, 10, 2, + 12, 4, 14, 6); + split_bits = _mm_shuffle_epi8(split_bits, eop_shuffle); + *(uint64_t *)split_packet = + _mm_cvtsi128_si64(split_bits); + split_packet += IAVF_DESCS_PER_LOOP_AVX; + } + + /* perform dd_check */ + status0_7 = _mm256_and_si256(status0_7, dd_check); + status0_7 = _mm256_packs_epi32(status0_7, + _mm256_setzero_si256()); + + uint64_t burst = __builtin_popcountll + (_mm_cvtsi128_si64 + (_mm256_extracti128_si256 + (status0_7, 1))); + burst += __builtin_popcountll + (_mm_cvtsi128_si64 + (_mm256_castsi256_si128(status0_7))); + received += burst; + if (burst != IAVF_DESCS_PER_LOOP_AVX) + break; + } + + /* update tail pointers */ + rxq->rx_tail += received; + rxq->rx_tail &= (rxq->nb_rx_desc - 1); + if ((rxq->rx_tail & 1) == 1 && received > 1) { /* keep avx2 aligned */ + rxq->rx_tail--; + received--; + } + rxq->rxrearm_nb += received; + return received; +} + /** * Notice: * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet @@ -559,6 +1020,18 @@ iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, return _iavf_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts, NULL); } +/** + * Notice: + * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet + */ +uint16_t +iavf_recv_pkts_vec_avx2_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + return _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rx_queue, rx_pkts, + nb_pkts, NULL); +} + /** * vPMD receive routine that reassembles single burst of 32 scattered packets * Notice: @@ -624,6 +1097,75 @@ iavf_recv_scattered_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, rx_pkts + retval, nb_pkts); } +/** + * vPMD receive routine that reassembles single burst of + * 32 scattered packets for flex RxD + * Notice: + * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet + */ +static uint16_t +iavf_recv_scattered_burst_vec_avx2_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct iavf_rx_queue *rxq = rx_queue; + uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0}; + + /* get some new buffers */ + uint16_t nb_bufs = _iavf_recv_raw_pkts_vec_avx2_flex_rxd(rxq, + rx_pkts, nb_pkts, split_flags); + if (nb_bufs == 0) + return 0; + + /* happy day case, full burst + no packets to be joined */ + const uint64_t *split_fl64 = (uint64_t *)split_flags; + + if (!rxq->pkt_first_seg && + split_fl64[0] == 0 && split_fl64[1] == 0 && + split_fl64[2] == 0 && split_fl64[3] == 0) + return nb_bufs; + + /* reassemble any packets that need reassembly*/ + unsigned int i = 0; + + if (!rxq->pkt_first_seg) { + /* find the first split flag, and only reassemble then*/ + while (i < nb_bufs && !split_flags[i]) + i++; + if (i == nb_bufs) + return nb_bufs; + rxq->pkt_first_seg = rx_pkts[i]; + } + return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, + &split_flags[i]); +} + +/** + * vPMD receive routine that reassembles scattered packets for flex RxD. + * Main receive routine that can handle arbitrary burst sizes + * Notice: + * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet + */ +uint16_t +iavf_recv_scattered_pkts_vec_avx2_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + uint16_t retval = 0; + + while (nb_pkts > IAVF_VPMD_RX_MAX_BURST) { + uint16_t burst = + iavf_recv_scattered_burst_vec_avx2_flex_rxd + (rx_queue, rx_pkts + retval, IAVF_VPMD_RX_MAX_BURST); + retval += burst; + nb_pkts -= burst; + if (burst < IAVF_VPMD_RX_MAX_BURST) + return retval; + } + return retval + iavf_recv_scattered_burst_vec_avx2_flex_rxd(rx_queue, + rx_pkts + retval, nb_pkts); +} + static inline void iavf_vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags) From patchwork Mon Mar 16 07:45:57 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66681 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 3749DA0559; Mon, 16 Mar 2020 08:58:37 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id DAB571C0C3; Mon, 16 Mar 2020 08:57:36 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 075481C0AC for ; Mon, 16 Mar 2020 08:57:32 +0100 (CET) IronPort-SDR: xUaPdymivkrbQbSmGhCXFbRBTCKvqxDTBcPLt+K7IP8K7SxPWSqy+LDTHaneFJMo/QdGxWThp1 zr7gFttUoHow== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:32 -0700 IronPort-SDR: xJCUJtXWU6zFVZL7y2Zgcv9SLX2Ue1415yoVPE6/dAPG+mpfEsS/JEYJ5uiS7qbZCeDRw3qMSm RKYhtHbtr+yg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622521" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:31 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:57 +0800 Message-Id: <20200316074603.10998-7-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 06/12] net/iavf: flexible Rx descriptor support in SSE path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support flexible Rx descriptor format in SSE path of iAVF PMD. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf_rxtx.c | 4 +- drivers/net/iavf/iavf_rxtx.h | 5 + drivers/net/iavf/iavf_rxtx_vec_sse.c | 414 +++++++++++++++++++++++++++ 3 files changed, 421 insertions(+), 2 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index 57fffece9..5d484d5e9 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -2119,7 +2119,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev) VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) dev->rx_pkt_burst = use_avx2 ? iavf_recv_scattered_pkts_vec_avx2_flex_rxd : - iavf_recv_scattered_pkts_vec; + iavf_recv_scattered_pkts_vec_flex_rxd; else dev->rx_pkt_burst = use_avx2 ? iavf_recv_scattered_pkts_vec_avx2 : @@ -2132,7 +2132,7 @@ iavf_set_rx_function(struct rte_eth_dev *dev) VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) dev->rx_pkt_burst = use_avx2 ? iavf_recv_pkts_vec_avx2_flex_rxd : - iavf_recv_pkts_vec; + iavf_recv_pkts_vec_flex_rxd; else dev->rx_pkt_burst = use_avx2 ? iavf_recv_pkts_vec_avx2 : diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index de13fd516..c85207dae 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -205,9 +205,14 @@ int iavf_dev_tx_desc_status(void *tx_queue, uint16_t offset); uint16_t iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); uint16_t iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts); +uint16_t iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); uint16_t iavf_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts); uint16_t iavf_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c index 0365c49e1..9c1f2a445 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c @@ -189,6 +189,109 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], _mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3); } +static inline void +flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], + struct rte_mbuf **rx_pkts) +{ + const __m128i mbuf_init = _mm_set_epi64x(0, rxq->mbuf_initializer); + __m128i rearm0, rearm1, rearm2, rearm3; + + __m128i tmp_desc, flags, rss_vlan; + + /* mask everything except checksum, RSS and VLAN flags. + * bit6:4 for checksum. + * bit12 for RSS indication. + * bit13 for VLAN indication. + */ + const __m128i desc_mask = _mm_set_epi32(0x3070, 0x3070, + 0x3070, 0x3070); + + const __m128i cksum_mask = _mm_set_epi32(PKT_RX_IP_CKSUM_MASK | + PKT_RX_L4_CKSUM_MASK | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_MASK | + PKT_RX_L4_CKSUM_MASK | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_MASK | + PKT_RX_L4_CKSUM_MASK | + PKT_RX_EIP_CKSUM_BAD, + PKT_RX_IP_CKSUM_MASK | + PKT_RX_L4_CKSUM_MASK | + PKT_RX_EIP_CKSUM_BAD); + + /* map the checksum, rss and vlan fields to the checksum, rss + * and vlan flag + */ + const __m128i cksum_flags = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, + /* shift right 1 bit to make sure it not exceed 255 */ + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_BAD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_EIP_CKSUM_BAD | PKT_RX_L4_CKSUM_GOOD | + PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_BAD | PKT_RX_IP_CKSUM_GOOD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_BAD) >> 1, + (PKT_RX_L4_CKSUM_GOOD | PKT_RX_IP_CKSUM_GOOD) >> 1); + + const __m128i rss_vlan_flags = _mm_set_epi8(0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + PKT_RX_RSS_HASH | PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, + PKT_RX_VLAN | PKT_RX_VLAN_STRIPPED, + PKT_RX_RSS_HASH, 0); + + /* merge 4 descriptors */ + flags = _mm_unpackhi_epi32(descs[0], descs[1]); + tmp_desc = _mm_unpackhi_epi32(descs[2], descs[3]); + tmp_desc = _mm_unpacklo_epi64(flags, tmp_desc); + tmp_desc = _mm_and_si128(flags, desc_mask); + + /* checksum flags */ + tmp_desc = _mm_srli_epi32(tmp_desc, 4); + flags = _mm_shuffle_epi8(cksum_flags, tmp_desc); + /* then we shift left 1 bit */ + flags = _mm_slli_epi32(flags, 1); + /* we need to mask out the reduntant bits introduced by RSS or + * VLAN fields. + */ + flags = _mm_and_si128(flags, cksum_mask); + + /* RSS, VLAN flag */ + tmp_desc = _mm_srli_epi32(tmp_desc, 8); + rss_vlan = _mm_shuffle_epi8(rss_vlan_flags, tmp_desc); + + /* merge the flags */ + flags = _mm_or_si128(flags, rss_vlan); + + /** + * At this point, we have the 4 sets of flags in the low 16-bits + * of each 32-bit value in flags. + * We want to extract these, and merge them with the mbuf init data + * so we can do a single 16-byte write to the mbuf to set the flags + * and all the other initialization fields. Extracting the + * appropriate flags means that we have to do a shift and blend for + * each mbuf before we do the write. + */ + rearm0 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 8), 0x10); + rearm1 = _mm_blend_epi16(mbuf_init, _mm_slli_si128(flags, 4), 0x10); + rearm2 = _mm_blend_epi16(mbuf_init, flags, 0x10); + rearm3 = _mm_blend_epi16(mbuf_init, _mm_srli_si128(flags, 4), 0x10); + + /* write the rearm data and the olflags in one write */ + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) != + offsetof(struct rte_mbuf, rearm_data) + 8); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, rearm_data) != + RTE_ALIGN(offsetof(struct rte_mbuf, rearm_data), 16)); + _mm_store_si128((__m128i *)&rx_pkts[0]->rearm_data, rearm0); + _mm_store_si128((__m128i *)&rx_pkts[1]->rearm_data, rearm1); + _mm_store_si128((__m128i *)&rx_pkts[2]->rearm_data, rearm2); + _mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3); +} + #define PKTLEN_SHIFT 10 static inline void @@ -207,6 +310,26 @@ desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts, rx_pkts[3]->packet_type = type_table[_mm_extract_epi8(ptype1, 8)]; } +static inline void +flex_desc_to_ptype_v(__m128i descs[4], struct rte_mbuf **rx_pkts, + const uint32_t *type_table) +{ + const __m128i ptype_mask = _mm_set_epi16(0, IAVF_RX_FLEX_DESC_PTYPE_M, + 0, IAVF_RX_FLEX_DESC_PTYPE_M, + 0, IAVF_RX_FLEX_DESC_PTYPE_M, + 0, IAVF_RX_FLEX_DESC_PTYPE_M); + __m128i ptype_01 = _mm_unpacklo_epi32(descs[0], descs[1]); + __m128i ptype_23 = _mm_unpacklo_epi32(descs[2], descs[3]); + __m128i ptype_all = _mm_unpacklo_epi64(ptype_01, ptype_23); + + ptype_all = _mm_and_si128(ptype_all, ptype_mask); + + rx_pkts[0]->packet_type = type_table[_mm_extract_epi16(ptype_all, 1)]; + rx_pkts[1]->packet_type = type_table[_mm_extract_epi16(ptype_all, 3)]; + rx_pkts[2]->packet_type = type_table[_mm_extract_epi16(ptype_all, 5)]; + rx_pkts[3]->packet_type = type_table[_mm_extract_epi16(ptype_all, 7)]; +} + /* Notice: * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST @@ -455,6 +578,243 @@ _recv_raw_pkts_vec(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, return nb_pkts_recd; } +/* Notice: + * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet + * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST + * numbers of DD bits + */ +static inline uint16_t +_recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts, uint8_t *split_packet) +{ + volatile union iavf_rx_flex_desc *rxdp; + struct rte_mbuf **sw_ring; + uint16_t nb_pkts_recd; + int pos; + uint64_t var; + const uint32_t *ptype_tbl = rxq->vsi->adapter->ptype_tbl; + __m128i crc_adjust = _mm_set_epi16 + (0, 0, 0, /* ignore non-length fields */ + -rxq->crc_len, /* sub crc on data_len */ + 0, /* ignore high-16bits of pkt_len */ + -rxq->crc_len, /* sub crc on pkt_len */ + 0, 0 /* ignore pkt_type field */ + ); + const __m128i zero = _mm_setzero_si128(); + /* mask to shuffle from desc. to mbuf */ + const __m128i shuf_msk = _mm_set_epi8 + (15, 14, 13, 12, /* octet 12~15, 32 bits rss */ + 11, 10, /* octet 10~11, 16 bits vlan_macip */ + 5, 4, /* octet 4~5, 16 bits data_len */ + 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ + 5, 4, /* octet 4~5, low 16 bits pkt_len */ + 0xFF, 0xFF, /* pkt_type set as unknown */ + 0xFF, 0xFF /* pkt_type set as unknown */ + ); + const __m128i eop_shuf_mask = _mm_set_epi8(0xFF, 0xFF, + 0xFF, 0xFF, + 0xFF, 0xFF, + 0xFF, 0xFF, + 0xFF, 0xFF, + 0xFF, 0xFF, + 0x04, 0x0C, + 0x00, 0x08); + + /** + * compile-time check the above crc_adjust layout is correct. + * NOTE: the first field (lowest address) is given last in set_epi16 + * call above. + */ + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); + + /* 4 packets DD mask */ + const __m128i dd_check = _mm_set_epi64x(0x0000000100000001LL, + 0x0000000100000001LL); + /* 4 packets EOP mask */ + const __m128i eop_check = _mm_set_epi64x(0x0000000200000002LL, + 0x0000000200000002LL); + + /* nb_pkts shall be less equal than IAVF_VPMD_RX_MAX_BURST */ + nb_pkts = RTE_MIN(nb_pkts, IAVF_VPMD_RX_MAX_BURST); + + /* nb_pkts has to be floor-aligned to IAVF_VPMD_DESCS_PER_LOOP */ + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, IAVF_VPMD_DESCS_PER_LOOP); + + /* Just the act of getting into the function from the application is + * going to cost about 7 cycles + */ + rxdp = (union iavf_rx_flex_desc *)rxq->rx_ring + rxq->rx_tail; + + rte_prefetch0(rxdp); + + /* See if we need to rearm the RX queue - gives the prefetch a bit + * of time to act + */ + if (rxq->rxrearm_nb > rxq->rx_free_thresh) + iavf_rxq_rearm(rxq); + + /* Before we start moving massive data around, check to see if + * there is actually a packet available + */ + if (!(rxdp->wb.status_error0 & + rte_cpu_to_le_32(1 << IAVF_RX_FLEX_DESC_STATUS0_DD_S))) + return 0; + + /** + * Compile-time verify the shuffle mask + * NOTE: some field positions already verified above, but duplicated + * here for completeness in case of future modifications. + */ + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 4); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 8); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, vlan_tci) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 10); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, hash) != + offsetof(struct rte_mbuf, rx_descriptor_fields1) + 12); + + /* Cache is empty -> need to scan the buffer rings, but first move + * the next 'n' mbufs into the cache + */ + sw_ring = &rxq->sw_ring[rxq->rx_tail]; + + /* A. load 4 packet in one loop + * [A*. mask out 4 unused dirty field in desc] + * B. copy 4 mbuf point from swring to rx_pkts + * C. calc the number of DD bits among the 4 packets + * [C*. extract the end-of-packet bit, if requested] + * D. fill info. from desc to mbuf + */ + + for (pos = 0, nb_pkts_recd = 0; pos < nb_pkts; + pos += IAVF_VPMD_DESCS_PER_LOOP, + rxdp += IAVF_VPMD_DESCS_PER_LOOP) { + __m128i descs[IAVF_VPMD_DESCS_PER_LOOP]; + __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; + __m128i staterr, sterr_tmp1, sterr_tmp2; + /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ + __m128i mbp1; +#if defined(RTE_ARCH_X86_64) + __m128i mbp2; +#endif + + /* B.1 load 2 (64 bit) or 4 (32 bit) mbuf points */ + mbp1 = _mm_loadu_si128((__m128i *)&sw_ring[pos]); + /* Read desc statuses backwards to avoid race condition */ + /* A.1 load 4 pkts desc */ + descs[3] = _mm_loadu_si128((__m128i *)(rxdp + 3)); + rte_compiler_barrier(); + + /* B.2 copy 2 64 bit or 4 32 bit mbuf point into rx_pkts */ + _mm_storeu_si128((__m128i *)&rx_pkts[pos], mbp1); + +#if defined(RTE_ARCH_X86_64) + /* B.1 load 2 64 bit mbuf points */ + mbp2 = _mm_loadu_si128((__m128i *)&sw_ring[pos + 2]); +#endif + + descs[2] = _mm_loadu_si128((__m128i *)(rxdp + 2)); + rte_compiler_barrier(); + /* B.1 load 2 mbuf point */ + descs[1] = _mm_loadu_si128((__m128i *)(rxdp + 1)); + rte_compiler_barrier(); + descs[0] = _mm_loadu_si128((__m128i *)(rxdp)); + +#if defined(RTE_ARCH_X86_64) + /* B.2 copy 2 mbuf point into rx_pkts */ + _mm_storeu_si128((__m128i *)&rx_pkts[pos + 2], mbp2); +#endif + + if (split_packet) { + rte_mbuf_prefetch_part2(rx_pkts[pos]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 1]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 2]); + rte_mbuf_prefetch_part2(rx_pkts[pos + 3]); + } + + /* avoid compiler reorder optimization */ + rte_compiler_barrier(); + + /* D.1 pkt 3,4 convert format from desc to pktmbuf */ + pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); + pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); + + /* C.1 4=>2 filter staterr info only */ + sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); + /* C.1 4=>2 filter staterr info only */ + sterr_tmp1 = _mm_unpackhi_epi32(descs[1], descs[0]); + + flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]); + + /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ + pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); + pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); + + /* D.1 pkt 1,2 convert format from desc to pktmbuf */ + pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); + pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); + + /* C.2 get 4 pkts staterr value */ + staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); + + /* D.3 copy final 3,4 data to rx_pkts */ + _mm_storeu_si128 + ((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, + pkt_mb4); + _mm_storeu_si128 + ((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, + pkt_mb3); + + /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ + pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); + pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); + + /* C* extract and record EOP bit */ + if (split_packet) { + /* and with mask to extract bits, flipping 1-0 */ + __m128i eop_bits = _mm_andnot_si128(staterr, eop_check); + /* the staterr values are not in order, as the count + * count of dd bits doesn't care. However, for end of + * packet tracking, we do care, so shuffle. This also + * compresses the 32-bit values to 8-bit + */ + eop_bits = _mm_shuffle_epi8(eop_bits, eop_shuf_mask); + /* store the resulting 32-bit value */ + *(int *)split_packet = _mm_cvtsi128_si32(eop_bits); + split_packet += IAVF_VPMD_DESCS_PER_LOOP; + } + + /* C.3 calc available number of desc */ + staterr = _mm_and_si128(staterr, dd_check); + staterr = _mm_packs_epi32(staterr, zero); + + /* D.3 copy final 1,2 data to rx_pkts */ + _mm_storeu_si128 + ((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1, + pkt_mb2); + _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, + pkt_mb1); + flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); + /* C.4 calc avaialbe number of desc */ + var = __builtin_popcountll(_mm_cvtsi128_si64(staterr)); + nb_pkts_recd += var; + if (likely(var != IAVF_VPMD_DESCS_PER_LOOP)) + break; + } + + /* Update our internal tail pointer */ + rxq->rx_tail = (uint16_t)(rxq->rx_tail + nb_pkts_recd); + rxq->rx_tail = (uint16_t)(rxq->rx_tail & (rxq->nb_rx_desc - 1)); + rxq->rxrearm_nb = (uint16_t)(rxq->rxrearm_nb + nb_pkts_recd); + + return nb_pkts_recd; +} + /* Notice: * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST @@ -467,6 +827,18 @@ iavf_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, return _recv_raw_pkts_vec(rx_queue, rx_pkts, nb_pkts, NULL); } +/* Notice: + * - nb_pkts < IAVF_DESCS_PER_LOOP, just return no packet + * - nb_pkts > IAVF_VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST + * numbers of DD bits + */ +uint16_t +iavf_recv_pkts_vec_flex_rxd(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + return _recv_raw_pkts_vec_flex_rxd(rx_queue, rx_pkts, nb_pkts, NULL); +} + /* vPMD receive routine that reassembles scattered packets * Notice: * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet @@ -508,6 +880,48 @@ iavf_recv_scattered_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, &split_flags[i]); } +/* vPMD receive routine that reassembles scattered packets for flex RxD + * Notice: + * - nb_pkts < IAVF_VPMD_DESCS_PER_LOOP, just return no packet + * - nb_pkts > VPMD_RX_MAX_BURST, only scan IAVF_VPMD_RX_MAX_BURST + * numbers of DD bits + */ +uint16_t +iavf_recv_scattered_pkts_vec_flex_rxd(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct iavf_rx_queue *rxq = rx_queue; + uint8_t split_flags[IAVF_VPMD_RX_MAX_BURST] = {0}; + unsigned int i = 0; + + /* get some new buffers */ + uint16_t nb_bufs = _recv_raw_pkts_vec_flex_rxd(rxq, rx_pkts, nb_pkts, + split_flags); + if (nb_bufs == 0) + return 0; + + /* happy day case, full burst + no packets to be joined */ + const uint64_t *split_fl64 = (uint64_t *)split_flags; + + if (!rxq->pkt_first_seg && + split_fl64[0] == 0 && split_fl64[1] == 0 && + split_fl64[2] == 0 && split_fl64[3] == 0) + return nb_bufs; + + /* reassemble any packets that need reassembly*/ + if (!rxq->pkt_first_seg) { + /* find the first split flag, and only reassemble then*/ + while (i < nb_bufs && !split_flags[i]) + i++; + if (i == nb_bufs) + return nb_bufs; + rxq->pkt_first_seg = rx_pkts[i]; + } + return i + reassemble_packets(rxq, &rx_pkts[i], nb_bufs - i, + &split_flags[i]); +} + static inline void vtx1(volatile struct iavf_tx_desc *txdp, struct rte_mbuf *pkt, uint64_t flags) { From patchwork Mon Mar 16 07:45:58 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66682 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 0713DA0559; Mon, 16 Mar 2020 08:58:48 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 440B71C0C9; Mon, 16 Mar 2020 08:57:38 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 62E141C0B4 for ; Mon, 16 Mar 2020 08:57:34 +0100 (CET) IronPort-SDR: z2m6UuHMcl2m2RPzx8DfL8w5pwZuSExK1ZmItK/XTow6SUk6LR9HFOxO4ZsHwE8gEdZ56NwNKB 1jGKtJTxDo8Q== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:34 -0700 IronPort-SDR: vMmGZRzOoO3awxAlMwfkfr9lIoOag3S1iUcmP3d9+RrtcdkRgjvb31cO0Y03hkIfJH2EfMCeOr Cc9N+fxGszPg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622528" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:32 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:58 +0800 Message-Id: <20200316074603.10998-8-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 07/12] net/iavf: add flow director enabled switch value X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" The commit adds fdir_enabled flag into iavf_adapter structure to identify if fdir id is active. Rx data path can be benefit if fdir id parsing is not needed, especially in vector path. Signed-off-by: Leyi Rong Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf.h | 1 + drivers/net/iavf/iavf_rxtx.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index 4fe15237a..1918a67f1 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -142,6 +142,7 @@ struct iavf_adapter { bool tx_vec_allowed; const uint32_t *ptype_tbl; bool stopped; + uint8_t fdir_enabled; }; /* IAVF_DEV_PRIVATE_TO */ diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index c85207dae..5548d1adb 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -281,6 +281,32 @@ void iavf_dump_tx_descriptor(const struct iavf_tx_queue *txq, tx_desc->cmd_type_offset_bsz); } +/* Enable/disable flow director Rx processing in data path. */ +static inline +void iavf_fdir_rx_proc_enable(struct iavf_adapter *ad, bool on) +{ + static uint32_t ref_cnt; + + if (on) { + /* enable flow director processing */ + if (ref_cnt++ == 0) { + ad->fdir_enabled = on; + PMD_DRV_LOG(DEBUG, + "FDIR processing on RX set to %d", on); + } + } else { + if (ref_cnt >= 1) { + ref_cnt--; + + if (ref_cnt == 0) { + ad->fdir_enabled = on; + PMD_DRV_LOG(DEBUG, + "FDIR processing on RX set to %d", on); + } + } + } +} + #ifdef RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC #define IAVF_DUMP_RX_DESC(rxq, desc, rx_id) \ iavf_dump_rx_descriptor(rxq, desc, rx_id) From patchwork Mon Mar 16 07:45:59 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66683 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 0CB64A0559; Mon, 16 Mar 2020 08:59:01 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 18FF11C0D4; Mon, 16 Mar 2020 08:57:40 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 061131C0AE for ; Mon, 16 Mar 2020 08:57:35 +0100 (CET) IronPort-SDR: AvaXviL/+V2nXLUjumZZGAyZ7p0NMNVEWB/t6ga6EXd/vMEWP1jw2uMrC/IsJCgcgerJBNmcOo 1t+85dyFdhEg== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:35 -0700 IronPort-SDR: JkwDfH/RC5VxD+78KvUDHuAo/ujfrB6kcIUItkt7ApPKuLVG+/J1eON2jOS0FAzWHe63U4KWl2 ELVHEJZTaM7A== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622538" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:34 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:59 +0800 Message-Id: <20200316074603.10998-9-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 08/12] net/iavf: support flow mark in normal data path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support Flow Director mark ID parsing in normal path. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf.h | 3 +++ drivers/net/iavf/iavf_rxtx.c | 37 ++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/drivers/net/iavf/iavf.h b/drivers/net/iavf/iavf.h index 1918a67f1..533856f21 100644 --- a/drivers/net/iavf/iavf.h +++ b/drivers/net/iavf/iavf.h @@ -74,6 +74,9 @@ #define IAVF_COMMS_PROTO_L2TPV3 0x0000000000000008 #define IAVF_COMMS_PROTO_ESP 0x0000000000000010 +#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK 0x03 +#define IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID 0x01 + struct iavf_adapter; struct iavf_rx_queue; struct iavf_tx_queue; diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index 5d484d5e9..d941249e3 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -746,6 +746,10 @@ iavf_rxd_to_pkt_flags(uint64_t qword) IAVF_RX_DESC_FLTSTAT_RSS_HASH) == IAVF_RX_DESC_FLTSTAT_RSS_HASH) ? PKT_RX_RSS_HASH : 0; + /* Check if FDIR Match */ + flags |= (qword & (1 << IAVF_RX_DESC_STATUS_FLM_SHIFT) ? + PKT_RX_FDIR : 0); + if (likely((error_bits & IAVF_RX_ERR_BITS) == 0)) { flags |= (PKT_RX_IP_CKSUM_GOOD | PKT_RX_L4_CKSUM_GOOD); return flags; @@ -766,6 +770,25 @@ iavf_rxd_to_pkt_flags(uint64_t qword) return flags; } +static inline uint64_t +iavf_rxd_build_fdir(volatile union iavf_rx_desc *rxdp, struct rte_mbuf *mb) +{ + uint64_t flags = 0; + uint16_t flexbh; + + flexbh = (rte_le_to_cpu_32(rxdp->wb.qword2.ext_status) >> + IAVF_RX_DESC_EXT_STATUS_FLEXBH_SHIFT) & + IAVF_RX_DESC_EXT_STATUS_FLEXBH_MASK; + + if (flexbh == IAVF_RX_DESC_EXT_STATUS_FLEXBH_FD_ID) { + mb->hash.fdir.hi = + rte_le_to_cpu_32(rxdp->wb.qword3.hi_dword.fd_id); + flags |= PKT_RX_FDIR_ID; + } + + return flags; +} + /* Translate the rx flex descriptor status to pkt flags */ static inline void iavf_rxd_to_pkt_fields(struct rte_mbuf *mb, @@ -782,6 +805,11 @@ iavf_rxd_to_pkt_fields(struct rte_mbuf *mb, mb->hash.rss = rte_le_to_cpu_32(desc->rss_hash); } #endif + + if (desc->flow_id != 0xFFFFFFFF) { + mb->ol_flags |= PKT_RX_FDIR | PKT_RX_FDIR_ID; + mb->hash.fdir.hi = rte_le_to_cpu_32(desc->flow_id); + } } #define IAVF_RX_FLEX_ERR0_BITS \ @@ -917,6 +945,9 @@ iavf_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxm->hash.rss = rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss); + if (pkt_flags & PKT_RX_FDIR) + pkt_flags |= iavf_rxd_build_fdir(&rxd, rxm); + rxm->ol_flags |= pkt_flags; rx_pkts[nb_rx++] = rxm; @@ -1365,6 +1396,9 @@ iavf_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, first_seg->hash.rss = rte_le_to_cpu_32(rxd.wb.qword0.hi_dword.rss); + if (pkt_flags & PKT_RX_FDIR) + pkt_flags |= iavf_rxd_build_fdir(&rxd, first_seg); + first_seg->ol_flags |= pkt_flags; /* Prefetch data of first segment, if configured to do so. */ @@ -1549,6 +1583,9 @@ iavf_rx_scan_hw_ring(struct iavf_rx_queue *rxq) mb->hash.rss = rte_le_to_cpu_32( rxdp[j].wb.qword0.hi_dword.rss); + if (pkt_flags & PKT_RX_FDIR) + pkt_flags |= iavf_rxd_build_fdir(&rxdp[j], mb); + mb->ol_flags |= pkt_flags; } From patchwork Mon Mar 16 07:46:00 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66684 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id DB0CBA0559; Mon, 16 Mar 2020 08:59:08 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 556F81C0DC; Mon, 16 Mar 2020 08:57:41 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 9DF9F1C0C5 for ; Mon, 16 Mar 2020 08:57:37 +0100 (CET) IronPort-SDR: 9abwRfozO5DXdmvdQoFMQit90emslU8xhntDrz4CVVsqBr1uYHkkt9jDLsqg0r0XtsU+22wo8w NlN8k7bkvRmQ== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:37 -0700 IronPort-SDR: geq9Aa6g9xHR3DHtrcO2E/oMoCxORzjT5Zf12B/kLxUL10JJlPWG4H9+1L2UNuY2QusS0RVdXI Jw7Rm4QlMNRQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622543" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:35 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:46:00 +0800 Message-Id: <20200316074603.10998-10-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 09/12] net/iavf: support flow mark in AVX path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support Flow Director mark ID parsing from Flex Rx descriptor in AVX path. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf_rxtx_vec_avx2.c | 82 +++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c index 74f672c7e..9bbf75632 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c @@ -550,6 +550,25 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, return received; } +static inline __m256i +flex_rxd_to_fdir_flags_vec_avx2(const __m256i fdir_id0_7) +{ +#define FDID_MIS_MAGIC 0xFFFFFFFF + RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2)); + RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13)); + const __m256i pkt_fdir_bit = _mm256_set1_epi32(PKT_RX_FDIR | + PKT_RX_FDIR_ID); + /* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */ + const __m256i fdir_mis_mask = _mm256_set1_epi32(FDID_MIS_MAGIC); + __m256i fdir_mask = _mm256_cmpeq_epi32(fdir_id0_7, + fdir_mis_mask); + /* this XOR op results to bit-reverse the fdir_mask */ + fdir_mask = _mm256_xor_si256(fdir_mask, fdir_mis_mask); + const __m256i fdir_flags = _mm256_and_si256(fdir_mask, pkt_fdir_bit); + + return fdir_flags; +} + static inline uint16_t _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, struct rte_mbuf **rx_pkts, @@ -612,8 +631,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, const __m256i shuf_msk = _mm256_set_epi8 (/* first descriptor */ - 15, 14, - 13, 12, /* octet 12~15, 32 bits rss */ + 0xFF, 0xFF, + 0xFF, 0xFF, /* rss not supported */ 11, 10, /* octet 10~11, 16 bits vlan_macip */ 5, 4, /* octet 4~5, 16 bits data_len */ 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ @@ -621,8 +640,8 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, 0xFF, 0xFF, /* pkt_type set as unknown */ 0xFF, 0xFF, /*pkt_type set as unknown */ /* second descriptor */ - 15, 14, - 13, 12, /* octet 12~15, 32 bits rss */ + 0xFF, 0xFF, + 0xFF, 0xFF, /* rss not supported */ 11, 10, /* octet 10~11, 16 bits vlan_macip */ 5, 4, /* octet 4~5, 16 bits data_len */ 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ @@ -864,8 +883,61 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, rss_vlan_flag_bits); /* merge flags */ - const __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags, + __m256i mbuf_flags = _mm256_or_si256(l3_l4_flags, rss_vlan_flags); + + if (rxq->vsi->adapter->fdir_enabled) { + const __m256i fdir_id4_7 = + _mm256_unpackhi_epi32(raw_desc6_7, raw_desc4_5); + + const __m256i fdir_id0_3 = + _mm256_unpackhi_epi32(raw_desc2_3, raw_desc0_1); + + const __m256i fdir_id0_7 = + _mm256_unpackhi_epi64(fdir_id4_7, fdir_id0_3); + + const __m256i fdir_flags = + flex_rxd_to_fdir_flags_vec_avx2(fdir_id0_7); + + /* merge with fdir_flags */ + mbuf_flags = _mm256_or_si256(mbuf_flags, fdir_flags); + + /* write to mbuf: have to use scalar store here */ + uint32_t fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 3); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 0]->hash.fdir.hi = fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 7); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 1]->hash.fdir.hi = fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 2); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 2]->hash.fdir.hi = fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 6); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 3]->hash.fdir.hi = fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 1); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 4]->hash.fdir.hi = fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 5); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 5]->hash.fdir.hi = fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 0); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 6]->hash.fdir.hi = fdir_id_extr; + + fdir_id_extr = _mm256_extract_epi32(fdir_id0_7, 4); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i + 7]->hash.fdir.hi = fdir_id_extr; + } /* if() on fdir_enabled */ + /** * At this point, we have the 8 sets of flags in the low 16-bits * of each 32-bit value in vlan0. From patchwork Mon Mar 16 07:46:01 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66685 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 4EC85A0559; Mon, 16 Mar 2020 08:59:21 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 836FB1C10D; Mon, 16 Mar 2020 08:57:42 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 3CDE41C0CD for ; Mon, 16 Mar 2020 08:57:39 +0100 (CET) IronPort-SDR: 29wQXimoQRcgxVW6QMrNUpe7GtZzD+6aZ4P5Fp4ihSmdUi1xC77zW1pMGDJpa0Ag5P685r5PII +6FnfhbtP8GA== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:38 -0700 IronPort-SDR: 0OIXltztx8E++QU7J8qBSB+b1c2zF5FO3ooLm+GuUrj14bMCmwPwLioJ+1peYH+xk0NhQ3yU9f rcGdum7pWsDA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622547" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:37 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:46:01 +0800 Message-Id: <20200316074603.10998-11-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 10/12] net/iavf: support flow mark in SSE path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support Flow Director mark ID parsing from Flex Rx descriptor in SSE path. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf_rxtx_vec_sse.c | 48 +++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c index 9c1f2a445..eb784938f 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c @@ -189,6 +189,25 @@ desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], _mm_store_si128((__m128i *)&rx_pkts[3]->rearm_data, rearm3); } +static inline __m128i +flex_rxd_to_fdir_flags_vec(const __m128i fdir_id0_3) +{ +#define FDID_MIS_MAGIC 0xFFFFFFFF + RTE_BUILD_BUG_ON(PKT_RX_FDIR != (1 << 2)); + RTE_BUILD_BUG_ON(PKT_RX_FDIR_ID != (1 << 13)); + const __m128i pkt_fdir_bit = _mm_set1_epi32(PKT_RX_FDIR | + PKT_RX_FDIR_ID); + /* desc->flow_id field == 0xFFFFFFFF means fdir mismatch */ + const __m128i fdir_mis_mask = _mm_set1_epi32(FDID_MIS_MAGIC); + __m128i fdir_mask = _mm_cmpeq_epi32(fdir_id0_3, + fdir_mis_mask); + /* this XOR op results to bit-reverse the fdir_mask */ + fdir_mask = _mm_xor_si128(fdir_mask, fdir_mis_mask); + const __m128i fdir_flags = _mm_and_si128(fdir_mask, pkt_fdir_bit); + + return fdir_flags; +} + static inline void flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], struct rte_mbuf **rx_pkts) @@ -267,6 +286,32 @@ flex_desc_to_olflags_v(struct iavf_rx_queue *rxq, __m128i descs[4], /* merge the flags */ flags = _mm_or_si128(flags, rss_vlan); + if (rxq->vsi->adapter->fdir_enabled) { + const __m128i fdir_id0_1 = + _mm_unpackhi_epi32(descs[0], descs[1]); + + const __m128i fdir_id2_3 = + _mm_unpackhi_epi32(descs[2], descs[3]); + + const __m128i fdir_id0_3 = + _mm_unpackhi_epi64(fdir_id0_1, fdir_id2_3); + + const __m128i fdir_flags = + flex_rxd_to_fdir_flags_vec(fdir_id0_3); + + /* merge with fdir_flags */ + flags = _mm_or_si128(flags, fdir_flags); + + /* write fdir_id to mbuf if FDIR match */ + uint32_t fdir_id_extr, i; + + for (i = 0; i < IAVF_VPMD_DESCS_PER_LOOP; i++) { + fdir_id_extr = _mm_extract_epi32(fdir_id0_3, i); + if (fdir_id_extr != FDID_MIS_MAGIC) + rx_pkts[i]->hash.fdir.hi = fdir_id_extr; + } + } /* if() on fdir_enabled */ + /** * At this point, we have the 4 sets of flags in the low 16-bits * of each 32-bit value in flags. @@ -604,7 +649,8 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, const __m128i zero = _mm_setzero_si128(); /* mask to shuffle from desc. to mbuf */ const __m128i shuf_msk = _mm_set_epi8 - (15, 14, 13, 12, /* octet 12~15, 32 bits rss */ + (0xFF, 0xFF, + 0xFF, 0xFF, /* rss not supported */ 11, 10, /* octet 10~11, 16 bits vlan_macip */ 5, 4, /* octet 4~5, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ From patchwork Mon Mar 16 07:46:02 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66686 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1ACA1A0559; Mon, 16 Mar 2020 08:59:31 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 21FF71C112; Mon, 16 Mar 2020 08:57:44 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id D44431C0D8 for ; Mon, 16 Mar 2020 08:57:40 +0100 (CET) IronPort-SDR: odNWif2H4qB0x7gMnNboyDp/dT8shFXxaPqPXmjP1FPlXHeiGnpyx49aiOJnMoNBJQMn1Eskde /KD586pMVprw== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:40 -0700 IronPort-SDR: 6dfAYeKJBDm+8bADKfunHOykZkXdVxDWpOjFonYU3ULiDIPidh6dkfXLi6BnotCaPvSA0eI8vo 75lbTxibgZ0w== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622553" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:39 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:46:02 +0800 Message-Id: <20200316074603.10998-12-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 11/12] net/iavf: add RSS hash parsing in AVX path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support RSS hash parsing from Flex Rx descriptor in AVX data path. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf_rxtx_vec_avx2.c | 92 ++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c index 9bbf75632..efecdd714 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c @@ -632,7 +632,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, _mm256_set_epi8 (/* first descriptor */ 0xFF, 0xFF, - 0xFF, 0xFF, /* rss not supported */ + 0xFF, 0xFF, /* rss hash parsed separately */ 11, 10, /* octet 10~11, 16 bits vlan_macip */ 5, 4, /* octet 4~5, 16 bits data_len */ 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ @@ -641,7 +641,7 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, 0xFF, 0xFF, /*pkt_type set as unknown */ /* second descriptor */ 0xFF, 0xFF, - 0xFF, 0xFF, /* rss not supported */ + 0xFF, 0xFF, /* rss hash parsed separately */ 11, 10, /* octet 10~11, 16 bits vlan_macip */ 5, 4, /* octet 4~5, 16 bits data_len */ 0xFF, 0xFF, /* skip hi 16 bits pkt_len, zero out */ @@ -938,6 +938,94 @@ _iavf_recv_raw_pkts_vec_avx2_flex_rxd(struct iavf_rx_queue *rxq, rx_pkts[i + 7]->hash.fdir.hi = fdir_id_extr; } /* if() on fdir_enabled */ + /** + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ + if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh7 = + _mm_load_si128 + ((void *)(&rxdp[7].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh6 = + _mm_load_si128 + ((void *)(&rxdp[6].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh5 = + _mm_load_si128 + ((void *)(&rxdp[5].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh4 = + _mm_load_si128 + ((void *)(&rxdp[4].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh3 = + _mm_load_si128 + ((void *)(&rxdp[3].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh2 = + _mm_load_si128 + ((void *)(&rxdp[2].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh1 = + _mm_load_si128 + ((void *)(&rxdp[1].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh0 = + _mm_load_si128 + ((void *)(&rxdp[0].wb.status_error1)); + + __m256i raw_desc_bh6_7 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc_bh6), + raw_desc_bh7, 1); + __m256i raw_desc_bh4_5 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc_bh4), + raw_desc_bh5, 1); + __m256i raw_desc_bh2_3 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc_bh2), + raw_desc_bh3, 1); + __m256i raw_desc_bh0_1 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc_bh0), + raw_desc_bh1, 1); + + /** + * to shift the 32b RSS hash value to the + * highest 32b of each 128b before mask + */ + __m256i rss_hash6_7 = + _mm256_slli_epi64(raw_desc_bh6_7, 32); + __m256i rss_hash4_5 = + _mm256_slli_epi64(raw_desc_bh4_5, 32); + __m256i rss_hash2_3 = + _mm256_slli_epi64(raw_desc_bh2_3, 32); + __m256i rss_hash0_1 = + _mm256_slli_epi64(raw_desc_bh0_1, 32); + + __m256i rss_hash_msk = + _mm256_set_epi32(0xFFFFFFFF, 0, 0, 0, + 0xFFFFFFFF, 0, 0, 0); + + rss_hash6_7 = _mm256_and_si256 + (rss_hash6_7, rss_hash_msk); + rss_hash4_5 = _mm256_and_si256 + (rss_hash4_5, rss_hash_msk); + rss_hash2_3 = _mm256_and_si256 + (rss_hash2_3, rss_hash_msk); + rss_hash0_1 = _mm256_and_si256 + (rss_hash0_1, rss_hash_msk); + + mb6_7 = _mm256_or_si256(mb6_7, rss_hash6_7); + mb4_5 = _mm256_or_si256(mb4_5, rss_hash4_5); + mb2_3 = _mm256_or_si256(mb2_3, rss_hash2_3); + mb0_1 = _mm256_or_si256(mb0_1, rss_hash0_1); + } /* if() on RSS hash parsing */ + /** * At this point, we have the 8 sets of flags in the low 16-bits * of each 32-bit value in vlan0. From patchwork Mon Mar 16 07:46:03 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66687 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id B967AA0563; Mon, 16 Mar 2020 08:59:41 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 361AB1C118; Mon, 16 Mar 2020 08:57:46 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id 7A4EC1C0D9 for ; Mon, 16 Mar 2020 08:57:42 +0100 (CET) IronPort-SDR: Y+xP3d1sSMOVtn2Sjpkf+hf6lGN0BUSXdrY7b+NWW2XHfkqWt9O6L5AdZH5ib3s8O8ue+YGE6q Gif9wb8BSJxQ== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:42 -0700 IronPort-SDR: Kknr9pRxkdQ5MaktsqwdcBIzSlVRPqjDUcMCFs8apMdGlYcXZ/sZ11zFqnL9wkFsbfrPY7I9iM Rq3+oTHP2FkQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622562" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:40 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:46:03 +0800 Message-Id: <20200316074603.10998-13-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 12/12] net/iavf: add RSS hash parsing in SSE path X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Support RSS hash parsing from Flex Rx descriptor in SSE data path. Signed-off-by: Leyi Rong --- drivers/net/iavf/iavf_rxtx_vec_sse.c | 86 ++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 16 deletions(-) diff --git a/drivers/net/iavf/iavf_rxtx_vec_sse.c b/drivers/net/iavf/iavf_rxtx_vec_sse.c index eb784938f..a615f22e2 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_sse.c +++ b/drivers/net/iavf/iavf_rxtx_vec_sse.c @@ -650,7 +650,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, /* mask to shuffle from desc. to mbuf */ const __m128i shuf_msk = _mm_set_epi8 (0xFF, 0xFF, - 0xFF, 0xFF, /* rss not supported */ + 0xFF, 0xFF, /* rss hash parsed separately */ 11, 10, /* octet 10~11, 16 bits vlan_macip */ 5, 4, /* octet 4~5, 16 bits data_len */ 0xFF, 0xFF, /* skip high 16 bits pkt_len, zero out */ @@ -741,7 +741,7 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, pos += IAVF_VPMD_DESCS_PER_LOOP, rxdp += IAVF_VPMD_DESCS_PER_LOOP) { __m128i descs[IAVF_VPMD_DESCS_PER_LOOP]; - __m128i pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4; + __m128i pkt_mb0, pkt_mb1, pkt_mb2, pkt_mb3; __m128i staterr, sterr_tmp1, sterr_tmp2; /* 2 64 bit or 4 32 bit mbuf pointers in one XMM reg. */ __m128i mbp1; @@ -787,8 +787,12 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, rte_compiler_barrier(); /* D.1 pkt 3,4 convert format from desc to pktmbuf */ - pkt_mb4 = _mm_shuffle_epi8(descs[3], shuf_msk); - pkt_mb3 = _mm_shuffle_epi8(descs[2], shuf_msk); + pkt_mb3 = _mm_shuffle_epi8(descs[3], shuf_msk); + pkt_mb2 = _mm_shuffle_epi8(descs[2], shuf_msk); + + /* D.1 pkt 1,2 convert format from desc to pktmbuf */ + pkt_mb1 = _mm_shuffle_epi8(descs[1], shuf_msk); + pkt_mb0 = _mm_shuffle_epi8(descs[0], shuf_msk); /* C.1 4=>2 filter staterr info only */ sterr_tmp2 = _mm_unpackhi_epi32(descs[3], descs[2]); @@ -798,12 +802,66 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, flex_desc_to_olflags_v(rxq, descs, &rx_pkts[pos]); /* D.2 pkt 3,4 set in_port/nb_seg and remove crc */ - pkt_mb4 = _mm_add_epi16(pkt_mb4, crc_adjust); pkt_mb3 = _mm_add_epi16(pkt_mb3, crc_adjust); + pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); - /* D.1 pkt 1,2 convert format from desc to pktmbuf */ - pkt_mb2 = _mm_shuffle_epi8(descs[1], shuf_msk); - pkt_mb1 = _mm_shuffle_epi8(descs[0], shuf_msk); + /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ + pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); + pkt_mb0 = _mm_add_epi16(pkt_mb0, crc_adjust); + + /** + * needs to load 2nd 16B of each desc for RSS hash parsing, + * will cause performance drop to get into this context. + */ + if (rxq->vsi->adapter->eth_dev->data->dev_conf.rxmode.offloads & + DEV_RX_OFFLOAD_RSS_HASH) { + /* load bottom half of every 32B desc */ + const __m128i raw_desc_bh3 = + _mm_load_si128 + ((void *)(&rxdp[3].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh2 = + _mm_load_si128 + ((void *)(&rxdp[2].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh1 = + _mm_load_si128 + ((void *)(&rxdp[1].wb.status_error1)); + rte_compiler_barrier(); + const __m128i raw_desc_bh0 = + _mm_load_si128 + ((void *)(&rxdp[0].wb.status_error1)); + + /** + * to shift the 32b RSS hash value to the + * highest 32b of each 128b before mask + */ + __m128i rss_hash3 = + _mm_slli_epi64(raw_desc_bh3, 32); + __m128i rss_hash2 = + _mm_slli_epi64(raw_desc_bh2, 32); + __m128i rss_hash1 = + _mm_slli_epi64(raw_desc_bh1, 32); + __m128i rss_hash0 = + _mm_slli_epi64(raw_desc_bh0, 32); + + __m128i rss_hash_msk = + _mm_set_epi32(0xFFFFFFFF, 0, 0, 0); + + rss_hash3 = _mm_and_si128 + (rss_hash3, rss_hash_msk); + rss_hash2 = _mm_and_si128 + (rss_hash2, rss_hash_msk); + rss_hash1 = _mm_and_si128 + (rss_hash1, rss_hash_msk); + rss_hash0 = _mm_and_si128 + (rss_hash0, rss_hash_msk); + + pkt_mb3 = _mm_or_si128(pkt_mb3, rss_hash3); + pkt_mb2 = _mm_or_si128(pkt_mb2, rss_hash2); + pkt_mb1 = _mm_or_si128(pkt_mb1, rss_hash1); + pkt_mb0 = _mm_or_si128(pkt_mb0, rss_hash0); + } /* if() on RSS hash parsing */ /* C.2 get 4 pkts staterr value */ staterr = _mm_unpacklo_epi32(sterr_tmp1, sterr_tmp2); @@ -811,14 +869,10 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, /* D.3 copy final 3,4 data to rx_pkts */ _mm_storeu_si128 ((void *)&rx_pkts[pos + 3]->rx_descriptor_fields1, - pkt_mb4); + pkt_mb3); _mm_storeu_si128 ((void *)&rx_pkts[pos + 2]->rx_descriptor_fields1, - pkt_mb3); - - /* D.2 pkt 1,2 set in_port/nb_seg and remove crc */ - pkt_mb2 = _mm_add_epi16(pkt_mb2, crc_adjust); - pkt_mb1 = _mm_add_epi16(pkt_mb1, crc_adjust); + pkt_mb2); /* C* extract and record EOP bit */ if (split_packet) { @@ -842,9 +896,9 @@ _recv_raw_pkts_vec_flex_rxd(struct iavf_rx_queue *rxq, /* D.3 copy final 1,2 data to rx_pkts */ _mm_storeu_si128 ((void *)&rx_pkts[pos + 1]->rx_descriptor_fields1, - pkt_mb2); + pkt_mb1); _mm_storeu_si128((void *)&rx_pkts[pos]->rx_descriptor_fields1, - pkt_mb1); + pkt_mb0); flex_desc_to_ptype_v(descs, &rx_pkts[pos], ptype_tbl); /* C.4 calc avaialbe number of desc */ var = __builtin_popcountll(_mm_cvtsi128_si64(staterr));