From patchwork Mon Mar 16 07:45:52 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Leyi Rong X-Patchwork-Id: 66675 X-Patchwork-Delegate: xiaolong.ye@intel.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 2C60EA0559; Mon, 16 Mar 2020 08:57:31 +0100 (CET) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 63B241C067; Mon, 16 Mar 2020 08:57:26 +0100 (CET) Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by dpdk.org (Postfix) with ESMTP id E110225D9 for ; Mon, 16 Mar 2020 08:57:23 +0100 (CET) IronPort-SDR: rPkNDQvrYD9UrYNzWWjID72AsaK3+DmZQb9pqN2ZEwZBYvac47uYIMMcF02sciOMr3nQ+qmkCs b/tneK2IXDnA== X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga004.jf.intel.com ([10.7.209.38]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 16 Mar 2020 00:57:23 -0700 IronPort-SDR: IYDWQmRhufZ1XpsFuJTRRw4XaWjQe5Dpfav6v+NI8aBo4CnBvt1pjOFK6x/empSDhnKNUPxIQX VAUV5e6BvkMw== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.70,559,1574150400"; d="scan'208";a="390622485" Received: from dpdk-lrong-srv-04.sh.intel.com ([10.67.119.221]) by orsmga004.jf.intel.com with ESMTP; 16 Mar 2020 00:57:22 -0700 From: Leyi Rong To: qi.z.zhang@intel.com, xiaolong.ye@intel.com Cc: dev@dpdk.org, Leyi Rong Date: Mon, 16 Mar 2020 15:45:52 +0800 Message-Id: <20200316074603.10998-2-leyi.rong@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200316074603.10998-1-leyi.rong@intel.com> References: <20200316074603.10998-1-leyi.rong@intel.com> Subject: [dpdk-dev] [PATCH 01/12] net/iavf: remove 16B Rx descriptor compile option X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Remove CONFIG_RTE_LIBRTE_IAVF_16BYTE_RX_DESC as it's not supported in ice PF host driver. Signed-off-by: Leyi Rong --- config/common_base | 1 - drivers/net/iavf/iavf_rxtx.c | 2 - drivers/net/iavf/iavf_rxtx.h | 14 +-- drivers/net/iavf/iavf_rxtx_vec_avx2.c | 148 +++++++------------------- 4 files changed, 42 insertions(+), 123 deletions(-) diff --git a/config/common_base b/config/common_base index c31175f9d..eea53cb35 100644 --- a/config/common_base +++ b/config/common_base @@ -346,7 +346,6 @@ CONFIG_RTE_LIBRTE_IAVF_DEBUG_TX=n CONFIG_RTE_LIBRTE_IAVF_DEBUG_TX_FREE=n CONFIG_RTE_LIBRTE_IAVF_DEBUG_RX=n CONFIG_RTE_LIBRTE_IAVF_DEBUG_DUMP_DESC=n -CONFIG_RTE_LIBRTE_IAVF_16BYTE_RX_DESC=n # # Compile burst-oriented IPN3KE PMD driver # diff --git a/drivers/net/iavf/iavf_rxtx.c b/drivers/net/iavf/iavf_rxtx.c index 9eccb7c41..fbb18a713 100644 --- a/drivers/net/iavf/iavf_rxtx.c +++ b/drivers/net/iavf/iavf_rxtx.c @@ -230,10 +230,8 @@ alloc_rxq_mbufs(struct iavf_rx_queue *rxq) rxd = &rxq->rx_ring[i]; rxd->read.pkt_addr = dma_addr; rxd->read.hdr_addr = 0; -#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC rxd->read.rsvd1 = 0; rxd->read.rsvd2 = 0; -#endif rxq->sw_ring[i] = mbuf; } diff --git a/drivers/net/iavf/iavf_rxtx.h b/drivers/net/iavf/iavf_rxtx.h index 09b5bd99e..416433504 100644 --- a/drivers/net/iavf/iavf_rxtx.h +++ b/drivers/net/iavf/iavf_rxtx.h @@ -57,12 +57,8 @@ #define IAVF_TX_OFFLOAD_NOTSUP_MASK \ (PKT_TX_OFFLOAD_MASK ^ IAVF_TX_OFFLOAD_MASK) -/* HW desc structure, both 16-byte and 32-byte types are supported */ -#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC -#define iavf_rx_desc iavf_16byte_rx_desc -#else +/* HW desc structure, only 32-byte type is supported */ #define iavf_rx_desc iavf_32byte_rx_desc -#endif struct iavf_rxq_ops { void (*release_mbufs)(struct iavf_rx_queue *rxq); @@ -224,20 +220,12 @@ void iavf_dump_rx_descriptor(struct iavf_rx_queue *rxq, const volatile void *desc, uint16_t rx_id) { -#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC - const volatile union iavf_16byte_rx_desc *rx_desc = desc; - - printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64"\n", - rxq->queue_id, rx_id, rx_desc->read.pkt_addr, - rx_desc->read.hdr_addr); -#else const volatile union iavf_32byte_rx_desc *rx_desc = desc; printf("Queue %d Rx_desc %d: QW0: 0x%016"PRIx64" QW1: 0x%016"PRIx64 " QW2: 0x%016"PRIx64" QW3: 0x%016"PRIx64"\n", rxq->queue_id, rx_id, rx_desc->read.pkt_addr, rx_desc->read.hdr_addr, rx_desc->read.rsvd1, rx_desc->read.rsvd2); -#endif } /* All the descriptors are 16 bytes, so just use one of them diff --git a/drivers/net/iavf/iavf_rxtx_vec_avx2.c b/drivers/net/iavf/iavf_rxtx_vec_avx2.c index 2587083d8..4e1231162 100644 --- a/drivers/net/iavf/iavf_rxtx_vec_avx2.c +++ b/drivers/net/iavf/iavf_rxtx_vec_avx2.c @@ -40,7 +40,6 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq) return; } -#ifndef RTE_LIBRTE_IAVF_16BYTE_RX_DESC struct rte_mbuf *mb0, *mb1; __m128i dma_addr0, dma_addr1; __m128i hdr_room = _mm_set_epi64x(RTE_PKTMBUF_HEADROOM, @@ -70,54 +69,6 @@ iavf_rxq_rearm(struct iavf_rx_queue *rxq) _mm_store_si128((__m128i *)&rxdp++->read, dma_addr0); _mm_store_si128((__m128i *)&rxdp++->read, dma_addr1); } -#else - struct rte_mbuf *mb0, *mb1, *mb2, *mb3; - __m256i dma_addr0_1, dma_addr2_3; - __m256i hdr_room = _mm256_set1_epi64x(RTE_PKTMBUF_HEADROOM); - /* Initialize the mbufs in vector, process 4 mbufs in one loop */ - for (i = 0; i < IAVF_RXQ_REARM_THRESH; - i += 4, rxp += 4, rxdp += 4) { - __m128i vaddr0, vaddr1, vaddr2, vaddr3; - __m256i vaddr0_1, vaddr2_3; - - mb0 = rxp[0]; - mb1 = rxp[1]; - mb2 = rxp[2]; - mb3 = rxp[3]; - - /* load buf_addr(lo 64bit) and buf_physaddr(hi 64bit) */ - RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_physaddr) != - offsetof(struct rte_mbuf, buf_addr) + 8); - vaddr0 = _mm_loadu_si128((__m128i *)&mb0->buf_addr); - vaddr1 = _mm_loadu_si128((__m128i *)&mb1->buf_addr); - vaddr2 = _mm_loadu_si128((__m128i *)&mb2->buf_addr); - vaddr3 = _mm_loadu_si128((__m128i *)&mb3->buf_addr); - - /** - * merge 0 & 1, by casting 0 to 256-bit and inserting 1 - * into the high lanes. Similarly for 2 & 3 - */ - vaddr0_1 = - _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr0), - vaddr1, 1); - vaddr2_3 = - _mm256_inserti128_si256(_mm256_castsi128_si256(vaddr2), - vaddr3, 1); - - /* convert pa to dma_addr hdr/data */ - dma_addr0_1 = _mm256_unpackhi_epi64(vaddr0_1, vaddr0_1); - dma_addr2_3 = _mm256_unpackhi_epi64(vaddr2_3, vaddr2_3); - - /* add headroom to pa values */ - dma_addr0_1 = _mm256_add_epi64(dma_addr0_1, hdr_room); - dma_addr2_3 = _mm256_add_epi64(dma_addr2_3, hdr_room); - - /* flush desc with pa dma_addr */ - _mm256_store_si256((__m256i *)&rxdp->read, dma_addr0_1); - _mm256_store_si256((__m256i *)&(rxdp + 2)->read, dma_addr2_3); - } - -#endif rxq->rxrearm_start += IAVF_RXQ_REARM_THRESH; if (rxq->rxrearm_start >= rxq->nb_rx_desc) @@ -149,7 +100,6 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, /* struct iavf_rx_entry *sw_ring = &rxq->sw_ring[rxq->rx_tail]; */ struct rte_mbuf **sw_ring = &rxq->sw_ring[rxq->rx_tail]; volatile union iavf_rx_desc *rxdp = rxq->rx_ring + rxq->rx_tail; - const int avx_aligned = ((rxq->rx_tail & 1) == 0); rte_prefetch0(rxdp); @@ -292,8 +242,6 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, PKT_RX_L4_CKSUM_GOOD | PKT_RX_L4_CKSUM_BAD | PKT_RX_EIP_CKSUM_BAD); - RTE_SET_USED(avx_aligned); /* for 32B descriptors we don't use this */ - uint16_t i, received; for (i = 0, received = 0; i < nb_pkts; @@ -309,61 +257,47 @@ _iavf_recv_raw_pkts_vec_avx2(struct iavf_rx_queue *rxq, #endif __m256i raw_desc0_1, raw_desc2_3, raw_desc4_5, raw_desc6_7; -#ifdef RTE_LIBRTE_IAVF_16BYTE_RX_DESC - /* for AVX we need alignment otherwise loads are not atomic */ - if (avx_aligned) { - /* load in descriptors, 2 at a time, in reverse order */ - raw_desc6_7 = _mm256_load_si256((void *)(rxdp + 6)); - rte_compiler_barrier(); - raw_desc4_5 = _mm256_load_si256((void *)(rxdp + 4)); - rte_compiler_barrier(); - raw_desc2_3 = _mm256_load_si256((void *)(rxdp + 2)); - rte_compiler_barrier(); - raw_desc0_1 = _mm256_load_si256((void *)(rxdp + 0)); - } else -#endif - { - const __m128i raw_desc7 = - _mm_load_si128((void *)(rxdp + 7)); - rte_compiler_barrier(); - const __m128i raw_desc6 = - _mm_load_si128((void *)(rxdp + 6)); - rte_compiler_barrier(); - const __m128i raw_desc5 = - _mm_load_si128((void *)(rxdp + 5)); - rte_compiler_barrier(); - const __m128i raw_desc4 = - _mm_load_si128((void *)(rxdp + 4)); - rte_compiler_barrier(); - const __m128i raw_desc3 = - _mm_load_si128((void *)(rxdp + 3)); - rte_compiler_barrier(); - const __m128i raw_desc2 = - _mm_load_si128((void *)(rxdp + 2)); - rte_compiler_barrier(); - const __m128i raw_desc1 = - _mm_load_si128((void *)(rxdp + 1)); - rte_compiler_barrier(); - const __m128i raw_desc0 = - _mm_load_si128((void *)(rxdp + 0)); - - raw_desc6_7 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc6), - raw_desc7, 1); - raw_desc4_5 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc4), - raw_desc5, 1); - raw_desc2_3 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc2), - raw_desc3, 1); - raw_desc0_1 = - _mm256_inserti128_si256 - (_mm256_castsi128_si256(raw_desc0), - raw_desc1, 1); - } + + const __m128i raw_desc7 = + _mm_load_si128((void *)(rxdp + 7)); + rte_compiler_barrier(); + const __m128i raw_desc6 = + _mm_load_si128((void *)(rxdp + 6)); + rte_compiler_barrier(); + const __m128i raw_desc5 = + _mm_load_si128((void *)(rxdp + 5)); + rte_compiler_barrier(); + const __m128i raw_desc4 = + _mm_load_si128((void *)(rxdp + 4)); + rte_compiler_barrier(); + const __m128i raw_desc3 = + _mm_load_si128((void *)(rxdp + 3)); + rte_compiler_barrier(); + const __m128i raw_desc2 = + _mm_load_si128((void *)(rxdp + 2)); + rte_compiler_barrier(); + const __m128i raw_desc1 = + _mm_load_si128((void *)(rxdp + 1)); + rte_compiler_barrier(); + const __m128i raw_desc0 = + _mm_load_si128((void *)(rxdp + 0)); + + raw_desc6_7 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc6), + raw_desc7, 1); + raw_desc4_5 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc4), + raw_desc5, 1); + raw_desc2_3 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc2), + raw_desc3, 1); + raw_desc0_1 = + _mm256_inserti128_si256 + (_mm256_castsi128_si256(raw_desc0), + raw_desc1, 1); if (split_packet) { int j;