From patchwork Sun Apr 11 23:23:37 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Cristian Dumitrescu X-Patchwork-Id: 91079 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id B3D5EA0C46; Mon, 12 Apr 2021 01:23:42 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 3340514143A; Mon, 12 Apr 2021 01:23:42 +0200 (CEST) Received: from mga04.intel.com (mga04.intel.com [192.55.52.120]) by mails.dpdk.org (Postfix) with ESMTP id 005B940143 for ; Mon, 12 Apr 2021 01:23:40 +0200 (CEST) IronPort-SDR: vQ2KDMjuNI867HIUme4ugT0Y+bbCBg0lI4ayX696udGJEteEccxR2cxzpR+hHQYw0OtKTjnTDQ w2F2XAHMYLDA== X-IronPort-AV: E=McAfee;i="6000,8403,9951"; a="191942977" X-IronPort-AV: E=Sophos;i="5.82,214,1613462400"; d="scan'208";a="191942977" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by fmsmga104.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 11 Apr 2021 16:23:39 -0700 IronPort-SDR: PzEiAcqKvlt8GkNrjnO1XPCRMdvygtIN8sd/s5mtcQFKd+s08SKwHtqRerPTzlZqVfwLmoYQqu bGGKohQZgLOQ== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.82,214,1613462400"; d="scan'208";a="611165967" Received: from silpixa00400573.ir.intel.com (HELO silpixa00400573.ger.corp.intel.com) ([10.237.223.107]) by fmsmga006.fm.intel.com with ESMTP; 11 Apr 2021 16:23:39 -0700 From: Cristian Dumitrescu To: dev@dpdk.org Date: Mon, 12 Apr 2021 00:23:37 +0100 Message-Id: <20210411232338.4005-1-cristian.dumitrescu@intel.com> X-Mailer: git-send-email 2.17.1 Subject: [dpdk-dev] [PATCH 1/2] pipeline: modularize the instruction optimizer X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Decouple between the different instruction optimizer. Allow each optimization to run as a separate iteration on the entire instruction stream. Signed-off-by: Cristian Dumitrescu --- lib/librte_pipeline/rte_swx_pipeline.c | 196 ++++++++++++++++++------- 1 file changed, 142 insertions(+), 54 deletions(-) diff --git a/lib/librte_pipeline/rte_swx_pipeline.c b/lib/librte_pipeline/rte_swx_pipeline.c index dc2a155ed..ba828cbda 100644 --- a/lib/librte_pipeline/rte_swx_pipeline.c +++ b/lib/librte_pipeline/rte_swx_pipeline.c @@ -8085,8 +8085,34 @@ instr_verify(struct rte_swx_pipeline *p __rte_unused, return 0; } +static uint32_t +instr_compact(struct instruction *instructions, + struct instruction_data *instruction_data, + uint32_t n_instructions) +{ + uint32_t i, pos = 0; + + /* Eliminate the invalid instructions that have been optimized out. */ + for (i = 0; i < n_instructions; i++) { + struct instruction *instr = &instructions[i]; + struct instruction_data *data = &instruction_data[i]; + + if (data->invalid) + continue; + + if (i != pos) { + memcpy(&instructions[pos], instr, sizeof(*instr)); + memcpy(&instruction_data[pos], data, sizeof(*data)); + } + + pos++; + } + + return pos; +} + static int -instr_pattern_extract_many_detect(struct instruction *instr, +instr_pattern_extract_many_search(struct instruction *instr, struct instruction_data *data, uint32_t n_instr, uint32_t *n_pattern_instr) @@ -8115,9 +8141,9 @@ instr_pattern_extract_many_detect(struct instruction *instr, } static void -instr_pattern_extract_many_optimize(struct instruction *instr, - struct instruction_data *data, - uint32_t n_instr) +instr_pattern_extract_many_replace(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr) { uint32_t i; @@ -8131,8 +8157,46 @@ instr_pattern_extract_many_optimize(struct instruction *instr, } } +static uint32_t +instr_pattern_extract_many_optimize(struct instruction *instructions, + struct instruction_data *instruction_data, + uint32_t n_instructions) +{ + uint32_t i; + + for (i = 0; i < n_instructions; ) { + struct instruction *instr = &instructions[i]; + struct instruction_data *data = &instruction_data[i]; + uint32_t n_instr = 0; + int detected; + + /* Extract many. */ + detected = instr_pattern_extract_many_search(instr, + data, + n_instructions - i, + &n_instr); + if (detected) { + instr_pattern_extract_many_replace(instr, + data, + n_instr); + i += n_instr; + continue; + } + + /* No pattern starting at the current instruction. */ + i++; + } + + /* Eliminate the invalid instructions that have been optimized out. */ + n_instructions = instr_compact(instructions, + instruction_data, + n_instructions); + + return n_instructions; +} + static int -instr_pattern_emit_many_tx_detect(struct instruction *instr, +instr_pattern_emit_many_tx_search(struct instruction *instr, struct instruction_data *data, uint32_t n_instr, uint32_t *n_pattern_instr) @@ -8169,9 +8233,9 @@ instr_pattern_emit_many_tx_detect(struct instruction *instr, } static void -instr_pattern_emit_many_tx_optimize(struct instruction *instr, - struct instruction_data *data, - uint32_t n_instr) +instr_pattern_emit_many_tx_replace(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr) { uint32_t i; @@ -8192,8 +8256,46 @@ instr_pattern_emit_many_tx_optimize(struct instruction *instr, data[i].invalid = 1; } +static uint32_t +instr_pattern_emit_many_tx_optimize(struct instruction *instructions, + struct instruction_data *instruction_data, + uint32_t n_instructions) +{ + uint32_t i; + + for (i = 0; i < n_instructions; ) { + struct instruction *instr = &instructions[i]; + struct instruction_data *data = &instruction_data[i]; + uint32_t n_instr = 0; + int detected; + + /* Emit many + TX. */ + detected = instr_pattern_emit_many_tx_search(instr, + data, + n_instructions - i, + &n_instr); + if (detected) { + instr_pattern_emit_many_tx_replace(instr, + data, + n_instr); + i += n_instr; + continue; + } + + /* No pattern starting at the current instruction. */ + i++; + } + + /* Eliminate the invalid instructions that have been optimized out. */ + n_instructions = instr_compact(instructions, + instruction_data, + n_instructions); + + return n_instructions; +} + static int -instr_pattern_dma_many_detect(struct instruction *instr, +instr_pattern_dma_many_search(struct instruction *instr, struct instruction_data *data, uint32_t n_instr, uint32_t *n_pattern_instr) @@ -8222,9 +8324,9 @@ instr_pattern_dma_many_detect(struct instruction *instr, } static void -instr_pattern_dma_many_optimize(struct instruction *instr, - struct instruction_data *data, - uint32_t n_instr) +instr_pattern_dma_many_replace(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr) { uint32_t i; @@ -8240,11 +8342,11 @@ instr_pattern_dma_many_optimize(struct instruction *instr, } static uint32_t -instr_optimize(struct instruction *instructions, +instr_pattern_dma_many_optimize(struct instruction *instructions, struct instruction_data *instruction_data, uint32_t n_instructions) { - uint32_t i, pos = 0; + uint32_t i; for (i = 0; i < n_instructions; ) { struct instruction *instr = &instructions[i]; @@ -8252,39 +8354,13 @@ instr_optimize(struct instruction *instructions, uint32_t n_instr = 0; int detected; - /* Extract many. */ - detected = instr_pattern_extract_many_detect(instr, - data, - n_instructions - i, - &n_instr); - if (detected) { - instr_pattern_extract_many_optimize(instr, - data, - n_instr); - i += n_instr; - continue; - } - - /* Emit many + TX. */ - detected = instr_pattern_emit_many_tx_detect(instr, - data, - n_instructions - i, - &n_instr); - if (detected) { - instr_pattern_emit_many_tx_optimize(instr, - data, - n_instr); - i += n_instr; - continue; - } - /* DMA many. */ - detected = instr_pattern_dma_many_detect(instr, + detected = instr_pattern_dma_many_search(instr, data, n_instructions - i, &n_instr); if (detected) { - instr_pattern_dma_many_optimize(instr, data, n_instr); + instr_pattern_dma_many_replace(instr, data, n_instr); i += n_instr; continue; } @@ -8294,22 +8370,34 @@ instr_optimize(struct instruction *instructions, } /* Eliminate the invalid instructions that have been optimized out. */ - for (i = 0; i < n_instructions; i++) { - struct instruction *instr = &instructions[i]; - struct instruction_data *data = &instruction_data[i]; + n_instructions = instr_compact(instructions, + instruction_data, + n_instructions); - if (data->invalid) - continue; + return n_instructions; +} - if (i != pos) { - memcpy(&instructions[pos], instr, sizeof(*instr)); - memcpy(&instruction_data[pos], data, sizeof(*data)); - } +static uint32_t +instr_optimize(struct instruction *instructions, + struct instruction_data *instruction_data, + uint32_t n_instructions) +{ + /* Extract many. */ + n_instructions = instr_pattern_extract_many_optimize(instructions, + instruction_data, + n_instructions); - pos++; - } + /* Emit many + TX. */ + n_instructions = instr_pattern_emit_many_tx_optimize(instructions, + instruction_data, + n_instructions); - return pos; + /* DMA many. */ + n_instructions = instr_pattern_dma_many_optimize(instructions, + instruction_data, + n_instructions); + + return n_instructions; } static int