pipeline: rework optimization pattern for header generation

Message ID 20220616141645.30366-1-cristian.dumitrescu@intel.com (mailing list archive)
State Accepted, archived
Delegated to: Thomas Monjalon
Headers
Series pipeline: rework optimization pattern for header generation |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/iol-mellanox-Performance success Performance Testing PASS
ci/github-robot: build success github build: passed
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-abi-testing success Testing PASS

Commit Message

Cristian Dumitrescu June 16, 2022, 2:16 p.m. UTC
  The P4 language requires marking a header as valid before any of the
header fields are written as opposed to after the writes are done.
Hence, the optimization of replacing the sequence of instructions to
generate a header by reading it from the table action data with a
single DMA internal instruction are reworked from "mov all + validate
-> dma" to "validate + mov all -> dma".

Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>
---
 examples/pipeline/examples/fib.spec      |  2 +-
 examples/pipeline/examples/vxlan.spec    |  8 +--
 lib/pipeline/rte_swx_pipeline.c          | 89 +++++++++++-------------
 lib/pipeline/rte_swx_pipeline_internal.h | 14 +++-
 4 files changed, 57 insertions(+), 56 deletions(-)
  

Comments

Thomas Monjalon June 20, 2022, 2:10 p.m. UTC | #1
16/06/2022 16:16, Cristian Dumitrescu:
> The P4 language requires marking a header as valid before any of the
> header fields are written as opposed to after the writes are done.
> Hence, the optimization of replacing the sequence of instructions to
> generate a header by reading it from the table action data with a
> single DMA internal instruction are reworked from "mov all + validate
> -> dma" to "validate + mov all -> dma".
> 
> Signed-off-by: Cristian Dumitrescu <cristian.dumitrescu@intel.com>

Applied, thanks.
  

Patch

diff --git a/examples/pipeline/examples/fib.spec b/examples/pipeline/examples/fib.spec
index 3f8c76610c..0c2d19c106 100644
--- a/examples/pipeline/examples/fib.spec
+++ b/examples/pipeline/examples/fib.spec
@@ -83,10 +83,10 @@  struct nexthop_action_args_t {
 
 action nexthop_action args instanceof nexthop_action_args_t {
 	//Set Ethernet header.
+	validate h.ethernet
 	mov h.ethernet.dst_addr t.ethernet_dst_addr
 	mov h.ethernet.src_addr t.ethernet_src_addr
 	mov h.ethernet.ethertype t.ethernet_ethertype
-	validate h.ethernet
 
 	//Decrement the TTL and update the checksum within the IPv4 header.
 	cksub h.ipv4.hdr_checksum h.ipv4.ttl
diff --git a/examples/pipeline/examples/vxlan.spec b/examples/pipeline/examples/vxlan.spec
index ac62ab2bec..4ab4309f16 100644
--- a/examples/pipeline/examples/vxlan.spec
+++ b/examples/pipeline/examples/vxlan.spec
@@ -115,12 +115,13 @@  struct vxlan_encap_args_t {
 
 action vxlan_encap args instanceof vxlan_encap_args_t {
 	//Set the outer Ethernet header.
+	validate h.outer_ethernet
 	mov h.outer_ethernet.dst_addr t.ethernet_dst_addr
 	mov h.outer_ethernet.src_addr t.ethernet_src_addr
 	mov h.outer_ethernet.ethertype t.ethernet_ethertype
-	validate h.outer_ethernet
 
 	//Set the outer IPv4 header.
+	validate h.outer_ipv4
 	mov h.outer_ipv4.ver_ihl t.ipv4_ver_ihl
 	mov h.outer_ipv4.diffserv t.ipv4_diffserv
 	mov h.outer_ipv4.total_len t.ipv4_total_len
@@ -131,21 +132,20 @@  action vxlan_encap args instanceof vxlan_encap_args_t {
 	mov h.outer_ipv4.hdr_checksum t.ipv4_hdr_checksum
 	mov h.outer_ipv4.src_addr t.ipv4_src_addr
 	mov h.outer_ipv4.dst_addr t.ipv4_dst_addr
-	validate h.outer_ipv4
 
 	//Set the outer UDP header.
+	validate h.outer_udp
 	mov h.outer_udp.src_port t.udp_src_port
 	mov h.outer_udp.dst_port t.udp_dst_port
 	mov h.outer_udp.length t.udp_length
 	mov h.outer_udp.checksum t.udp_checksum
-	validate h.outer_udp
 
 	//Set the outer VXLAN header.
+	validate h.outer_vxlan
 	mov h.outer_vxlan.flags t.vxlan_flags
 	mov h.outer_vxlan.reserved t.vxlan_reserved
 	mov h.outer_vxlan.vni t.vxlan_vni
 	mov h.outer_vxlan.reserved2 t.vxlan_reserved2
-	validate h.outer_vxlan
 
 	//Set the output port.
 	mov m.port_out t.port_out
diff --git a/lib/pipeline/rte_swx_pipeline.c b/lib/pipeline/rte_swx_pipeline.c
index 066356684e..50805ba821 100644
--- a/lib/pipeline/rte_swx_pipeline.c
+++ b/lib/pipeline/rte_swx_pipeline.c
@@ -2284,6 +2284,7 @@  instr_hdr_validate_translate(struct rte_swx_pipeline *p,
 
 	instr->type = INSTR_HDR_VALIDATE;
 	instr->valid.header_id = h->id;
+	instr->valid.struct_id = h->struct_id;
 	return 0;
 }
 
@@ -6754,7 +6755,7 @@  action_arg_src_mov_count(struct action *a,
 			 uint32_t n_instructions);
 
 static int
-instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
+instr_pattern_validate_mov_all_search(struct rte_swx_pipeline *p,
 				      struct action *a,
 				      struct instruction *instr,
 				      struct instruction_data *data,
@@ -6771,51 +6772,42 @@  instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
 	if (!a || !a->st)
 		return 0;
 
-	/* First instruction: MOV_HM. */
-	if (data[0].invalid || (instr[0].type != INSTR_MOV_HM))
+	/* First instruction: HDR_VALIDATE. Second instruction: MOV_HM. */
+	if (data[0].invalid ||
+	    (instr[0].type != INSTR_HDR_VALIDATE) ||
+	    (n_instr < 2) ||
+	    data[1].invalid ||
+	    (instr[1].type != INSTR_MOV_HM) ||
+	    instr[1].mov.src.struct_id)
 		return 0;
 
-	h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id);
-	if (!h || h->st->var_size)
+	h = header_find_by_struct_id(p, instr[0].valid.struct_id);
+	if (!h ||
+	    h->st->var_size ||
+	    (n_instr < 1 + h->st->n_fields))
 		return 0;
 
 	for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++)
-		if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8)
+		if (instr[1].mov.src.offset == a->st->fields[src_field_id].offset / 8)
 			break;
 
-	if (src_field_id == a->st->n_fields)
+	if (src_field_id + h->st->n_fields > a->st->n_fields)
 		return 0;
 
-	if (instr[0].mov.dst.offset ||
-	    (instr[0].mov.dst.n_bits != h->st->fields[0].n_bits) ||
-	    instr[0].mov.src.struct_id ||
-	    (instr[0].mov.src.n_bits != a->st->fields[src_field_id].n_bits) ||
-	    (instr[0].mov.dst.n_bits != instr[0].mov.src.n_bits))
-		return 0;
-
-	if ((n_instr < h->st->n_fields + 1) ||
-	     (a->st->n_fields < src_field_id + h->st->n_fields + 1))
-		return 0;
-
-	/* Subsequent instructions: MOV_HM. */
-	for (i = 1; i < h->st->n_fields; i++)
-		if (data[i].invalid ||
-		    data[i].n_users ||
-		    (instr[i].type != INSTR_MOV_HM) ||
-		    (instr[i].mov.dst.struct_id != h->struct_id) ||
-		    (instr[i].mov.dst.offset != h->st->fields[i].offset / 8) ||
-		    (instr[i].mov.dst.n_bits != h->st->fields[i].n_bits) ||
-		    instr[i].mov.src.struct_id ||
-		    (instr[i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) ||
-		    (instr[i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) ||
-		    (instr[i].mov.dst.n_bits != instr[i].mov.src.n_bits))
+	/* Second and subsequent instructions: MOV_HM. */
+	for (i = 0; i < h->st->n_fields; i++)
+		if (data[1 + i].invalid ||
+		    data[1 + i].n_users ||
+		    (instr[1 + i].type != INSTR_MOV_HM) ||
+		    (instr[1 + i].mov.dst.struct_id != h->struct_id) ||
+		    (instr[1 + i].mov.dst.offset != h->st->fields[i].offset / 8) ||
+		    (instr[1 + i].mov.dst.n_bits != h->st->fields[i].n_bits) ||
+		    instr[1 + i].mov.src.struct_id ||
+		    (instr[1 + i].mov.src.offset != a->st->fields[src_field_id + i].offset / 8) ||
+		    (instr[1 + i].mov.src.n_bits != a->st->fields[src_field_id + i].n_bits) ||
+		    (instr[1 + i].mov.dst.n_bits != instr[1 + i].mov.src.n_bits))
 			return 0;
 
-	/* Last instruction: HDR_VALIDATE. */
-	if ((instr[i].type != INSTR_HDR_VALIDATE) ||
-	    (instr[i].valid.header_id != h->id))
-		return 0;
-
 	/* Check that none of the action args that are used as source for this
 	 * DMA transfer are not used as source in any other mov instruction.
 	 */
@@ -6831,12 +6823,12 @@  instr_pattern_mov_all_validate_search(struct rte_swx_pipeline *p,
 			return 0;
 	}
 
-	*n_pattern_instr = 1 + i;
+	*n_pattern_instr = 1 + h->st->n_fields;
 	return 1;
 }
 
 static void
-instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
+instr_pattern_validate_mov_all_replace(struct rte_swx_pipeline *p,
 				       struct action *a,
 				       struct instruction *instr,
 				       struct instruction_data *data,
@@ -6846,19 +6838,16 @@  instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
 	uint32_t src_field_id, src_offset, i;
 
 	/* Read from the instructions before they are modified. */
-	h = header_find_by_struct_id(p, instr[0].mov.dst.struct_id);
+	h = header_find_by_struct_id(p, instr[1].mov.dst.struct_id);
 	if (!h)
 		return;
 
+	src_offset = instr[1].mov.src.offset;
+
 	for (src_field_id = 0; src_field_id < a->st->n_fields; src_field_id++)
-		if (instr[0].mov.src.offset == a->st->fields[src_field_id].offset / 8)
+		if (src_offset == a->st->fields[src_field_id].offset / 8)
 			break;
 
-	if (src_field_id == a->st->n_fields)
-		return;
-
-	src_offset = instr[0].mov.src.offset;
-
 	/* Modify the instructions. */
 	instr[0].type = INSTR_DMA_HT;
 	instr[0].dma.dst.header_id[0] = h->id;
@@ -6875,7 +6864,7 @@  instr_pattern_mov_all_validate_replace(struct rte_swx_pipeline *p,
 }
 
 static uint32_t
-instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
+instr_pattern_validate_mov_all_optimize(struct rte_swx_pipeline *p,
 					struct action *a,
 					struct instruction *instructions,
 					struct instruction_data *instruction_data,
@@ -6892,8 +6881,8 @@  instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
 		uint32_t n_instr = 0;
 		int detected;
 
-		/* Mov all + validate. */
-		detected = instr_pattern_mov_all_validate_search(p,
+		/* Validate + mov all. */
+		detected = instr_pattern_validate_mov_all_search(p,
 								 a,
 								 instr,
 								 data,
@@ -6903,7 +6892,7 @@  instr_pattern_mov_all_validate_optimize(struct rte_swx_pipeline *p,
 								 n_instructions,
 								 &n_instr);
 		if (detected) {
-			instr_pattern_mov_all_validate_replace(p, a, instr, data, n_instr);
+			instr_pattern_validate_mov_all_replace(p, a, instr, data, n_instr);
 			i += n_instr;
 			continue;
 		}
@@ -7020,8 +7009,8 @@  instr_optimize(struct rte_swx_pipeline *p,
 							     instruction_data,
 							     n_instructions);
 
-	/* Mov all + validate. */
-	n_instructions = instr_pattern_mov_all_validate_optimize(p,
+	/* Validate + mov all. */
+	n_instructions = instr_pattern_validate_mov_all_optimize(p,
 								 a,
 								 instructions,
 								 instruction_data,
diff --git a/lib/pipeline/rte_swx_pipeline_internal.h b/lib/pipeline/rte_swx_pipeline_internal.h
index 5feee8eff6..a35635efb7 100644
--- a/lib/pipeline/rte_swx_pipeline_internal.h
+++ b/lib/pipeline/rte_swx_pipeline_internal.h
@@ -632,6 +632,7 @@  struct instr_io {
 
 struct instr_hdr_validity {
 	uint8_t header_id;
+	uint8_t struct_id;
 };
 
 struct instr_table {
@@ -2228,11 +2229,22 @@  __instr_hdr_validate_exec(struct rte_swx_pipeline *p __rte_unused,
 			  const struct instruction *ip)
 {
 	uint32_t header_id = ip->valid.header_id;
+	uint32_t struct_id = ip->valid.struct_id;
+	uint64_t valid_headers = t->valid_headers;
+	struct header_runtime *h = &t->headers[header_id];
 
 	TRACE("[Thread %2u] validate header %u\n", p->thread_id, header_id);
 
+	/* If this header is already valid, then its associated t->structs[] element is also valid
+	 * and therefore it should not be modified. It could point to the packet buffer (in case of
+	 * extracted header) and setting it to the default location (h->ptr0) would be incorrect.
+	 */
+	if (MASK64_BIT_GET(valid_headers, header_id))
+		return;
+
 	/* Headers. */
-	t->valid_headers = MASK64_BIT_SET(t->valid_headers, header_id);
+	t->structs[struct_id] = h->ptr0;
+	t->valid_headers = MASK64_BIT_SET(valid_headers, header_id);
 }
 
 /*