From patchwork Wed Sep 23 18:06:32 2020 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Cristian Dumitrescu X-Patchwork-Id: 78610 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from dpdk.org (dpdk.org [92.243.14.124]) by inbox.dpdk.org (Postfix) with ESMTP id 50129A04B1; Wed, 23 Sep 2020 20:11:53 +0200 (CEST) Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id C6BFB1DD54; Wed, 23 Sep 2020 20:08:20 +0200 (CEST) Received: from mga05.intel.com (mga05.intel.com [192.55.52.43]) by dpdk.org (Postfix) with ESMTP id 313751DC91 for ; Wed, 23 Sep 2020 20:07:22 +0200 (CEST) IronPort-SDR: nWBdA2XlfNYYLvX8OmRpFRJkLXFBNktYJPyP0/QSdInqkulB3u+cCwVb4Ne4WBpkU5hWt6ZR4h pUcpr4UjxPnA== X-IronPort-AV: E=McAfee;i="6000,8403,9753"; a="245809569" X-IronPort-AV: E=Sophos;i="5.77,293,1596524400"; d="scan'208";a="245809569" X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga003.jf.intel.com ([10.7.209.27]) by fmsmga105.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 23 Sep 2020 11:07:21 -0700 IronPort-SDR: 0Et0+Ll9IXKAgLEWsSHwRZA8Fb/MTKKDhoSFWw1IpP/s3MSE/eBhDqXiLtUUH656yQUS73hdrG ClkC644lyxgg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.77,293,1596524400"; d="scan'208";a="305477944" Received: from silpixa00400573.ir.intel.com (HELO silpixa00400573.ger.corp.intel.com) ([10.237.223.107]) by orsmga003.jf.intel.com with ESMTP; 23 Sep 2020 11:07:20 -0700 From: Cristian Dumitrescu To: dev@dpdk.org Cc: thomas@monjalon.net, david.marchand@redhat.com Date: Wed, 23 Sep 2020 19:06:32 +0100 Message-Id: <20200923180645.55852-29-cristian.dumitrescu@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20200923180645.55852-1-cristian.dumitrescu@intel.com> References: <20200910152645.9342-2-cristian.dumitrescu@intel.com> <20200923180645.55852-1-cristian.dumitrescu@intel.com> Subject: [dpdk-dev] [PATCH v5 28/41] pipeline: add SWX instruction optimizer X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Instruction optimizer. Detects frequent patterns and replaces them with some more powerful vector-like pipeline instructions without any user effort. Executes at instruction translation, not at run-time. Signed-off-by: Cristian Dumitrescu --- lib/librte_pipeline/rte_swx_pipeline.c | 226 +++++++++++++++++++++++++ 1 file changed, 226 insertions(+) diff --git a/lib/librte_pipeline/rte_swx_pipeline.c b/lib/librte_pipeline/rte_swx_pipeline.c index d51fec821..77eae1927 100644 --- a/lib/librte_pipeline/rte_swx_pipeline.c +++ b/lib/librte_pipeline/rte_swx_pipeline.c @@ -5700,6 +5700,230 @@ instr_verify(struct rte_swx_pipeline *p __rte_unused, return 0; } +static int +instr_pattern_extract_many_detect(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr, + uint32_t *n_pattern_instr) +{ + uint32_t i; + + for (i = 0; i < n_instr; i++) { + if (data[i].invalid) + break; + + if (instr[i].type != INSTR_HDR_EXTRACT) + break; + + if (i == RTE_DIM(instr->io.hdr.header_id)) + break; + + if (i && data[i].n_users) + break; + } + + if (i < 2) + return 0; + + *n_pattern_instr = i; + return 1; +} + +static void +instr_pattern_extract_many_optimize(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr) +{ + uint32_t i; + + for (i = 1; i < n_instr; i++) { + instr[0].type++; + instr[0].io.hdr.header_id[i] = instr[i].io.hdr.header_id[0]; + instr[0].io.hdr.struct_id[i] = instr[i].io.hdr.struct_id[0]; + instr[0].io.hdr.n_bytes[i] = instr[i].io.hdr.n_bytes[0]; + + data[i].invalid = 1; + } +} + +static int +instr_pattern_emit_many_tx_detect(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr, + uint32_t *n_pattern_instr) +{ + uint32_t i; + + for (i = 0; i < n_instr; i++) { + if (data[i].invalid) + break; + + if (instr[i].type != INSTR_HDR_EMIT) + break; + + if (i == RTE_DIM(instr->io.hdr.header_id)) + break; + + if (i && data[i].n_users) + break; + } + + if (!i) + return 0; + + if (instr[i].type != INSTR_TX) + return 0; + + i++; + + *n_pattern_instr = i; + return 1; +} + +static void +instr_pattern_emit_many_tx_optimize(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr) +{ + uint32_t i; + + /* Any emit instruction in addition to the first one. */ + for (i = 1; i < n_instr - 1; i++) { + instr[0].type++; + instr[0].io.hdr.header_id[i] = instr[i].io.hdr.header_id[0]; + instr[0].io.hdr.struct_id[i] = instr[i].io.hdr.struct_id[0]; + instr[0].io.hdr.n_bytes[i] = instr[i].io.hdr.n_bytes[0]; + + data[i].invalid = 1; + } + + /* The TX instruction is the last one in the pattern. */ + instr[0].type++; + instr[0].io.io.offset = instr[i].io.io.offset; + instr[0].io.io.n_bits = instr[i].io.io.n_bits; + data[i].invalid = 1; +} + +static int +instr_pattern_dma_many_detect(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr, + uint32_t *n_pattern_instr) +{ + uint32_t i; + + for (i = 0; i < n_instr; i++) { + if (data[i].invalid) + break; + + if (instr[i].type != INSTR_DMA_HT) + break; + + if (i == RTE_DIM(instr->dma.dst.header_id)) + break; + + if (i && data[i].n_users) + break; + } + + if (i < 2) + return 0; + + *n_pattern_instr = i; + return 1; +} + +static void +instr_pattern_dma_many_optimize(struct instruction *instr, + struct instruction_data *data, + uint32_t n_instr) +{ + uint32_t i; + + for (i = 1; i < n_instr; i++) { + instr[0].type++; + instr[0].dma.dst.header_id[i] = instr[i].dma.dst.header_id[0]; + instr[0].dma.dst.struct_id[i] = instr[i].dma.dst.struct_id[0]; + instr[0].dma.src.offset[i] = instr[i].dma.src.offset[0]; + instr[0].dma.n_bytes[i] = instr[i].dma.n_bytes[0]; + + data[i].invalid = 1; + } +} + +static uint32_t +instr_optimize(struct instruction *instructions, + struct instruction_data *instruction_data, + uint32_t n_instructions) +{ + uint32_t i, pos = 0; + + for (i = 0; i < n_instructions; ) { + struct instruction *instr = &instructions[i]; + struct instruction_data *data = &instruction_data[i]; + uint32_t n_instr = 0; + int detected; + + /* Extract many. */ + detected = instr_pattern_extract_many_detect(instr, + data, + n_instructions - i, + &n_instr); + if (detected) { + instr_pattern_extract_many_optimize(instr, + data, + n_instr); + i += n_instr; + continue; + } + + /* Emit many + TX. */ + detected = instr_pattern_emit_many_tx_detect(instr, + data, + n_instructions - i, + &n_instr); + if (detected) { + instr_pattern_emit_many_tx_optimize(instr, + data, + n_instr); + i += n_instr; + continue; + } + + /* DMA many. */ + detected = instr_pattern_dma_many_detect(instr, + data, + n_instructions - i, + &n_instr); + if (detected) { + instr_pattern_dma_many_optimize(instr, data, n_instr); + i += n_instr; + continue; + } + + /* No pattern starting at the current instruction. */ + i++; + } + + /* Eliminate the invalid instructions that have been optimized out. */ + for (i = 0; i < n_instructions; i++) { + struct instruction *instr = &instructions[i]; + struct instruction_data *data = &instruction_data[i]; + + if (data->invalid) + continue; + + if (i != pos) { + memcpy(&instructions[pos], instr, sizeof(*instr)); + memcpy(&instruction_data[pos], data, sizeof(*data)); + } + + pos++; + } + + return pos; +} + static int instruction_config(struct rte_swx_pipeline *p, struct action *a, @@ -5752,6 +5976,8 @@ instruction_config(struct rte_swx_pipeline *p, if (err) goto error; + n_instructions = instr_optimize(instr, data, n_instructions); + err = instr_jmp_resolve(instr, data, n_instructions); if (err) goto error;