[v6,2/3] gro: add VXLAN UDP/IPv4 GRO support
diff mbox series

Message ID 20200917034959.194372-3-yang_y_yi@163.com
State Superseded
Delegated to: Thomas Monjalon
Headers show
Series
  • gro: add UDP/IPv4 GRO and VXLAN UDP/IPv4 GRO support
Related show

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

yang_y_yi Sept. 17, 2020, 3:49 a.m. UTC
From: Yi Yang <yangyi01@inspur.com>

VXLAN UDP/IPv4 GRO can help improve VM-to-VM UDP
performance when UFO or GSO is enabled in VM, GRO
must be supported if UFO or GSO is enabled,
otherwise, performance can't get big improvement
if only GSO is there.

With this enabled in DPDK, OVS DPDK can leverage it
to improve VM-to-VM UDP performance, it will reassemble
VXLAN UDP/IPv4 fragments immediate after they are
received from a physical NIC. It is very helpful in
OVS DPDK VXLAN use case.

Note: outer IP ID isn't used to check if two packets
are same flow and can be merged because the difference
between outer IP IDs of two packets isn't always +/-1
in case of OVS DPDK.

Signed-off-by: Yi Yang <yangyi01@inspur.com>
---
 lib/librte_gro/gro_udp4.h       |   1 +
 lib/librte_gro/gro_vxlan_udp4.c | 542 ++++++++++++++++++++++++++++++++++++++++
 lib/librte_gro/gro_vxlan_udp4.h | 154 ++++++++++++
 lib/librte_gro/meson.build      |   2 +-
 lib/librte_gro/rte_gro.c        | 115 +++++++--
 lib/librte_gro/rte_gro.h        |   3 +
 6 files changed, 790 insertions(+), 27 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_udp4.h

Comments

Hu, Jiayu Sept. 21, 2020, 7:54 a.m. UTC | #1
Hi Yi,

Some comments are inline.

Thanks,
Jiayu

> -----Original Message-----
> From: yang_y_yi@163.com <yang_y_yi@163.com>
> Sent: Thursday, September 17, 2020 11:50 AM
> To: dev@dpdk.org
> Cc: Hu, Jiayu <jiayu.hu@intel.com>; thomas@monjalon.net;
> yangyi01@inspur.com; yang_y_yi@163.com
> Subject: [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO support
> 
> From: Yi Yang <yangyi01@inspur.com>
> 
> VXLAN UDP/IPv4 GRO can help improve VM-to-VM UDP
> performance when UFO or GSO is enabled in VM, GRO
> must be supported if UFO or GSO is enabled,
> otherwise, performance can't get big improvement
> if only GSO is there.
> 
> With this enabled in DPDK, OVS DPDK can leverage it
> to improve VM-to-VM UDP performance, it will reassemble
> VXLAN UDP/IPv4 fragments immediate after they are
> received from a physical NIC. It is very helpful in
> OVS DPDK VXLAN use case.
> 
> Note: outer IP ID isn't used to check if two packets
> are same flow and can be merged because the difference
> between outer IP IDs of two packets isn't always +/-1
> in case of OVS DPDK.
> 
> Signed-off-by: Yi Yang <yangyi01@inspur.com>
> ---
>  lib/librte_gro/gro_udp4.h       |   1 +
>  lib/librte_gro/gro_vxlan_udp4.c | 542
> ++++++++++++++++++++++++++++++++++++++++
>  lib/librte_gro/gro_vxlan_udp4.h | 154 ++++++++++++
>  lib/librte_gro/meson.build      |   2 +-
>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>  lib/librte_gro/rte_gro.h        |   3 +
>  6 files changed, 790 insertions(+), 27 deletions(-)
>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
> 
> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
> index 0a078e4..d38b393 100644
> --- a/lib/librte_gro/gro_udp4.h
> +++ b/lib/librte_gro/gro_udp4.h
> @@ -7,6 +7,7 @@
> 
>  #include <rte_ip.h>
>  #include <rte_udp.h>
> +#include <rte_vxlan.h>
> 
>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> new file mode 100644
> index 0000000..4eece56
> --- /dev/null
> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> @@ -0,0 +1,542 @@
> +
> +uint16_t
> +gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
> +		uint64_t flush_timestamp,
> +		struct rte_mbuf **out,
> +		uint16_t nb_out)
> +{
> +	uint16_t k = 0;
> +	uint32_t i, j;
> +	uint32_t max_flow_num = tbl->max_flow_num;
> +
> +	for (i = 0; i < max_flow_num; i++) {
> +		if (unlikely(tbl->flow_num == 0))
> +			return k;
> +
> +		j = tbl->flows[i].start_index;
> +		while (j != INVALID_ARRAY_INDEX) {
> +			if (tbl->items[j].inner_item.start_time <=
> +					flush_timestamp) {
> +				gro_vxlan_udp4_merge_items(tbl, j);
> +				out[k++] = tbl->items[j].inner_item.firstseg;
> +				if (tbl->items[j].inner_item.nb_merged > 1)
> +					update_vxlan_header(&(tbl-
> >items[j]));
> +				/*
> +				 * Delete the item and get the next packet
> +				 * index.
> +				 */
> +				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
> +				tbl->flows[i].start_index = j;
> +				if (j == INVALID_ARRAY_INDEX)
> +					tbl->flow_num--;
> +
> +				if (unlikely(k == nb_out))
> +					return k;
> +			} else
> +				/*
> +				 * The left packets in the flow won't be
> +				 * timeout. Go to check other flows.
> +				 */
> +				break;

The items of a flow are ordered by frag_oft, and start_time
of these items is not always in ascending order. Therefore,
you cannot skip checking the items after the item whose
start_time is greater than flush_timestamp. This issue also
exists in UDP/IPv4 GRO, and need to correct them both.

> +		}
> +	}
> +	return k;
> +}
> diff --git a/lib/librte_gro/gro_vxlan_udp4.h
> b/lib/librte_gro/gro_vxlan_udp4.h
> new file mode 100644
> index 0000000..6a42fb3
> --- /dev/null
> +++ b/lib/librte_gro/gro_vxlan_udp4.h
> @@ -0,0 +1,154 @@
> +/* SPDX-License-Identifier: BSD-3-Clause
> + * Copyright(c) 2020 Inspur Corporation
> + */
> +
> +#ifndef _GRO_VXLAN_UDP4_H_
> +#define _GRO_VXLAN_UDP4_H_
> +
> +#include "gro_udp4.h"
> +
> +#define GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> +
> +/* Header fields representing a VxLAN flow */
> +struct vxlan_udp4_flow_key {
> +	struct udp4_flow_key inner_key;
> +	struct rte_vxlan_hdr vxlan_hdr;
> +
> +	struct rte_ether_addr outer_eth_saddr;
> +	struct rte_ether_addr outer_eth_daddr;
> +
> +	uint32_t outer_ip_src_addr;
> +	uint32_t outer_ip_dst_addr;
> +
> +	/* Note: It is unnecessary to save outer_src_port here because it can
> +	 * be different for VxLAN UDP fragments from the same flow.
> +	 */
> +	uint16_t outer_dst_port;
> +

The above empty line is unnecessary.

> +};
> +
> +
> +struct gro_vxlan_udp4_item {
> +	struct gro_udp4_item inner_item;
> +	/* Note: VXLAN UDP/IPv4 GRO needn't check outer_ip_id because
> +	 * the difference between outer_ip_ids of two received packets
> +	 * isn't always +/-1 in case of OVS DPDK. So no outer_ip_id
> +	 * and outer_is_atomic fields here.
> +	 */

It seems the above comments for outer IP ID is enough, and no need
to highlight in the commit log again. How do you think?

> +};
> +
> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
> index f623230..db990cf 100644
> --- a/lib/librte_gro/rte_gro.c
> +++ b/lib/librte_gro/rte_gro.c
> @@ -11,6 +11,7 @@
>  #include "gro_tcp4.h"
>  #include "gro_udp4.h"
>  #include "gro_vxlan_tcp4.h"
> +#include "gro_vxlan_udp4.h"
> 
>  /*
>   * GRO context structure. It keeps the table structures, which are
> @@ -137,19 +148,27 @@ struct gro_ctx {
>  	struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> = {{0} };
> 
>  	/* Allocate a reassembly table for VXLAN TCP GRO */
> -	struct gro_vxlan_tcp4_tbl vxlan_tbl;
> -	struct gro_vxlan_tcp4_flow
> vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> -	struct gro_vxlan_tcp4_item
> vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> +	struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
> +	struct gro_vxlan_tcp4_flow
> vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> +	struct gro_vxlan_tcp4_item
> vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>  			= {{{0}, 0, 0} };
> 
> +	/* Allocate a reassembly table for VXLAN UDP GRO */
> +	struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
> +	struct gro_vxlan_udp4_flow
> vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> +	struct gro_vxlan_udp4_item
> vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> +			= {{{0}} };
> +
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	uint32_t item_num;
>  	int32_t ret;
>  	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> -	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
> +	uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
> +		do_vxlan_udp_gro = 0;
> 
>  	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>  					RTE_GRO_TCP_IPV4 |
> +					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>  					RTE_GRO_UDP_IPV4)) == 0))
>  		return nb_pkts;
> 
> @@ -160,15 +179,28 @@ struct gro_ctx {
> 
>  	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>  		for (i = 0; i < item_num; i++)
> -			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
> -
> -		vxlan_tbl.flows = vxlan_flows;
> -		vxlan_tbl.items = vxlan_items;
> -		vxlan_tbl.flow_num = 0;
> -		vxlan_tbl.item_num = 0;
> -		vxlan_tbl.max_flow_num = item_num;
> -		vxlan_tbl.max_item_num = item_num;
> -		do_vxlan_gro = 1;
> +			vxlan_tcp_flows[i].start_index =
> INVALID_ARRAY_INDEX;
> +
> +		vxlan_tcp_tbl.flows = vxlan_tcp_flows;
> +		vxlan_tcp_tbl.items = vxlan_tcp_items;
> +		vxlan_tcp_tbl.flow_num = 0;
> +		vxlan_tcp_tbl.item_num = 0;
> +		vxlan_tcp_tbl.max_flow_num = item_num;
> +		vxlan_tcp_tbl.max_item_num = item_num;
> +		do_vxlan_tcp_gro = 1;
> +	}
> +
> +	if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
> +		for (i = 0; i < item_num; i++)
> +			vxlan_udp_flows[i].start_index =
> INVALID_ARRAY_INDEX;
> +
> +		vxlan_udp_tbl.flows = vxlan_udp_flows;
> +		vxlan_udp_tbl.items = vxlan_udp_items;
> +		vxlan_udp_tbl.flow_num = 0;
> +		vxlan_udp_tbl.item_num = 0;
> +		vxlan_udp_tbl.max_flow_num = item_num;
> +		vxlan_udp_tbl.max_item_num = item_num;
> +		do_vxlan_udp_gro = 1;
>  	}
> 
>  	if (param->gro_types & RTE_GRO_TCP_IPV4) {
> @@ -204,9 +236,18 @@ struct gro_ctx {
>  		 * will be flushed from the tables.
>  		 */
>  		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> -				do_vxlan_gro) {
> +				do_vxlan_tcp_gro) {
>  			ret = gro_vxlan_tcp4_reassemble(pkts[i],
> -							&vxlan_tbl, 0);
> +							&vxlan_tcp_tbl, 0);
> +			if (ret > 0)
> +				/* Merge successfully */
> +				nb_after_gro--;
> +			else if (ret < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
> +		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> +				do_vxlan_udp_gro) {
> +			ret = gro_vxlan_udp4_reassemble(pkts[i],
> +							&vxlan_udp_tbl, 0);
>  			if (ret > 0)
>  				/* Merge successfully */
>  				nb_after_gro--;
> @@ -236,11 +277,17 @@ struct gro_ctx {
>  		 || (unprocess_num < nb_pkts)) {
>  		i = 0;
>  		/* Flush all packets from the tables */
> -		if (do_vxlan_gro) {
> -			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
> +		if (do_vxlan_tcp_gro) {
> +			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>  					0, pkts, nb_pkts);
>  		}
> 
> +		if (do_vxlan_udp_gro) {
> +			i +=
> gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
> +					0, &pkts[i], nb_pkts - i);
> +
> +		}
> +
>  		if (do_tcp4_gro) {
>  			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>  					&pkts[i], nb_pkts - i);
> @@ -269,33 +316,42 @@ struct gro_ctx {
>  {
>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>  	struct gro_ctx *gro_ctx = ctx;
> -	void *tcp_tbl, *udp_tbl, *vxlan_tbl;
> +	void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>  	uint64_t current_time;
>  	uint16_t i, unprocess_num = 0;
> -	uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
> +	uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro,
> do_vxlan_udp_gro;
> 
>  	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4
> |
>  					RTE_GRO_TCP_IPV4 |
> +					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>  					RTE_GRO_UDP_IPV4)) == 0))
>  		return nb_pkts;
> 
>  	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
> -	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> +	vxlan_tcp_tbl = gro_ctx-
> >tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>  	udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
> +	vxlan_udp_tbl = gro_ctx-
> >tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
> 
>  	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>  		RTE_GRO_TCP_IPV4;
> -	do_vxlan_gro = (gro_ctx->gro_types &
> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
> +	do_vxlan_tcp_gro = (gro_ctx->gro_types &
> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>  		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>  	do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>  		RTE_GRO_UDP_IPV4;
> +	do_vxlan_udp_gro = (gro_ctx->gro_types &
> RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
> +		RTE_GRO_IPV4_VXLAN_UDP_IPV4;
> 
>  	current_time = rte_rdtsc();
> 
>  	for (i = 0; i < nb_pkts; i++) {
>  		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> -				do_vxlan_gro) {
> -			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
> +				do_vxlan_tcp_gro) {
> +			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
> +						current_time) < 0)
> +				unprocess_pkts[unprocess_num++] = pkts[i];
> +		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> +				do_vxlan_udp_gro) {
> +			if (gro_vxlan_udp4_reassemble(pkts[i],
> vxlan_udp_tbl,
>  						current_time) < 0)
>  				unprocess_pkts[unprocess_num++] = pkts[i];
>  		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> @@ -341,6 +397,13 @@ struct gro_ctx {
>  		left_nb_out = max_nb_out - num;
>  	}
> 
> +	if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out >
> 0) {

Max_nb_out is read-only and not updated. So it cannot check if 'out' array
has enough space. Need to use left_nb_out instead. This issue also existed
in UDP/IPv4 GRO, and please correct them both.

> +		num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
> +				RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
> +				flush_timestamp, &out[num], left_nb_out);
> +		left_nb_out = max_nb_out - num;
> +	}
> +
>  	/* If no available space in 'out', stop flushing. */
>  	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
>  		num += gro_tcp4_tbl_timeout_flush(
> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
> index 470f3ed..9f9ed49 100644
> --- a/lib/librte_gro/rte_gro.h
> +++ b/lib/librte_gro/rte_gro.h
> @@ -35,6 +35,9 @@
>  #define RTE_GRO_UDP_IPV4_INDEX 2
>  #define RTE_GRO_UDP_IPV4 (1ULL << RTE_GRO_UDP_IPV4_INDEX)
>  /**< UDP/IPv4 GRO flag */
> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL <<
> RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
> +/**< VxLAN UDP/IPv4 GRO flag. */
> 
>  /**
>   * Structure used to create GRO context objects or used to pass
> --
> 1.8.3.1
yang_y_yi Sept. 22, 2020, 1:29 a.m. UTC | #2
Thanks Jiayu, I have fixed other comments except this one:



>The items of a flow are ordered by frag_oft, and start_time
>of these items is not always in ascending order. Therefore,
>you cannot skip checking the items after the item whose
>start_time is greater than flush_timestamp. This issue also
>exists in UDP/IPv4 GRO, and need to correct them both.


I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.





diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
index 061e7b0..ffa35a2 100644
--- a/lib/librte_gro/gro_udp4.c
+++ b/lib/librte_gro/gro_udp4.c
@@ -391,7 +391,6 @@

                j = tbl->flows[i].start_index;
                while (j != INVALID_ARRAY_INDEX) {
-                       if (tbl->items[j].start_time <= flush_timestamp) {
                                gro_udp4_merge_items(tbl, j);
                                out[k++] = tbl->items[j].firstseg;
                                if (tbl->items[j].nb_merged > 1)
@@ -407,12 +406,6 @@

                                if (unlikely(k == nb_out))
                                        return k;
-                       } else
-                               /*
-                                * The left packets in this flow won't be
-                                * timeout. Go to check other flows.
-                                */
-                               break;
                }
        }
        return k;









At 2020-09-21 15:54:36, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
>Hi Yi,
>
>Some comments are inline.
>
>Thanks,
>Jiayu
>
>> -----Original Message-----
>> From: yang_y_yi@163.com <yang_y_yi@163.com>
>> Sent: Thursday, September 17, 2020 11:50 AM
>> To: dev@dpdk.org
>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; thomas@monjalon.net;
>> yangyi01@inspur.com; yang_y_yi@163.com
>> Subject: [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO support
>> 
>> From: Yi Yang <yangyi01@inspur.com>
>> 
>> VXLAN UDP/IPv4 GRO can help improve VM-to-VM UDP
>> performance when UFO or GSO is enabled in VM, GRO
>> must be supported if UFO or GSO is enabled,
>> otherwise, performance can't get big improvement
>> if only GSO is there.
>> 
>> With this enabled in DPDK, OVS DPDK can leverage it
>> to improve VM-to-VM UDP performance, it will reassemble
>> VXLAN UDP/IPv4 fragments immediate after they are
>> received from a physical NIC. It is very helpful in
>> OVS DPDK VXLAN use case.
>> 
>> Note: outer IP ID isn't used to check if two packets
>> are same flow and can be merged because the difference
>> between outer IP IDs of two packets isn't always +/-1
>> in case of OVS DPDK.
>> 
>> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>> ---
>>  lib/librte_gro/gro_udp4.h       |   1 +
>>  lib/librte_gro/gro_vxlan_udp4.c | 542
>> ++++++++++++++++++++++++++++++++++++++++
>>  lib/librte_gro/gro_vxlan_udp4.h | 154 ++++++++++++
>>  lib/librte_gro/meson.build      |   2 +-
>>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>>  lib/librte_gro/rte_gro.h        |   3 +
>>  6 files changed, 790 insertions(+), 27 deletions(-)
>>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>> 
>> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>> index 0a078e4..d38b393 100644
>> --- a/lib/librte_gro/gro_udp4.h
>> +++ b/lib/librte_gro/gro_udp4.h
>> @@ -7,6 +7,7 @@
>> 
>>  #include <rte_ip.h>
>>  #include <rte_udp.h>
>> +#include <rte_vxlan.h>
>> 
>>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> new file mode 100644
>> index 0000000..4eece56
>> --- /dev/null
>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> @@ -0,0 +1,542 @@
>> +
>> +uint16_t
>> +gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
>> +		uint64_t flush_timestamp,
>> +		struct rte_mbuf **out,
>> +		uint16_t nb_out)
>> +{
>> +	uint16_t k = 0;
>> +	uint32_t i, j;
>> +	uint32_t max_flow_num = tbl->max_flow_num;
>> +
>> +	for (i = 0; i < max_flow_num; i++) {
>> +		if (unlikely(tbl->flow_num == 0))
>> +			return k;
>> +
>> +		j = tbl->flows[i].start_index;
>> +		while (j != INVALID_ARRAY_INDEX) {
>> +			if (tbl->items[j].inner_item.start_time <=
>> +					flush_timestamp) {
>> +				gro_vxlan_udp4_merge_items(tbl, j);
>> +				out[k++] = tbl->items[j].inner_item.firstseg;
>> +				if (tbl->items[j].inner_item.nb_merged > 1)
>> +					update_vxlan_header(&(tbl-
>> >items[j]));
>> +				/*
>> +				 * Delete the item and get the next packet
>> +				 * index.
>> +				 */
>> +				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
>> +				tbl->flows[i].start_index = j;
>> +				if (j == INVALID_ARRAY_INDEX)
>> +					tbl->flow_num--;
>> +
>> +				if (unlikely(k == nb_out))
>> +					return k;
>> +			} else
>> +				/*
>> +				 * The left packets in the flow won't be
>> +				 * timeout. Go to check other flows.
>> +				 */
>> +				break;
>
>The items of a flow are ordered by frag_oft, and start_time
>of these items is not always in ascending order. Therefore,
>you cannot skip checking the items after the item whose
>start_time is greater than flush_timestamp. This issue also
>exists in UDP/IPv4 GRO, and need to correct them both.
>
>> +		}
>> +	}
>> +	return k;
>> +}
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.h
>> b/lib/librte_gro/gro_vxlan_udp4.h
>> new file mode 100644
>> index 0000000..6a42fb3
>> --- /dev/null
>> +++ b/lib/librte_gro/gro_vxlan_udp4.h
>> @@ -0,0 +1,154 @@
>> +/* SPDX-License-Identifier: BSD-3-Clause
>> + * Copyright(c) 2020 Inspur Corporation
>> + */
>> +
>> +#ifndef _GRO_VXLAN_UDP4_H_
>> +#define _GRO_VXLAN_UDP4_H_
>> +
>> +#include "gro_udp4.h"
>> +
>> +#define GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> +
>> +/* Header fields representing a VxLAN flow */
>> +struct vxlan_udp4_flow_key {
>> +	struct udp4_flow_key inner_key;
>> +	struct rte_vxlan_hdr vxlan_hdr;
>> +
>> +	struct rte_ether_addr outer_eth_saddr;
>> +	struct rte_ether_addr outer_eth_daddr;
>> +
>> +	uint32_t outer_ip_src_addr;
>> +	uint32_t outer_ip_dst_addr;
>> +
>> +	/* Note: It is unnecessary to save outer_src_port here because it can
>> +	 * be different for VxLAN UDP fragments from the same flow.
>> +	 */
>> +	uint16_t outer_dst_port;
>> +
>
>The above empty line is unnecessary.
>
>> +};
>> +
>> +
>> +struct gro_vxlan_udp4_item {
>> +	struct gro_udp4_item inner_item;
>> +	/* Note: VXLAN UDP/IPv4 GRO needn't check outer_ip_id because
>> +	 * the difference between outer_ip_ids of two received packets
>> +	 * isn't always +/-1 in case of OVS DPDK. So no outer_ip_id
>> +	 * and outer_is_atomic fields here.
>> +	 */
>
>It seems the above comments for outer IP ID is enough, and no need
>to highlight in the commit log again. How do you think?
>
>> +};
>> +
>> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
>> index f623230..db990cf 100644
>> --- a/lib/librte_gro/rte_gro.c
>> +++ b/lib/librte_gro/rte_gro.c
>> @@ -11,6 +11,7 @@
>>  #include "gro_tcp4.h"
>>  #include "gro_udp4.h"
>>  #include "gro_vxlan_tcp4.h"
>> +#include "gro_vxlan_udp4.h"
>> 
>>  /*
>>   * GRO context structure. It keeps the table structures, which are
>> @@ -137,19 +148,27 @@ struct gro_ctx {
>>  	struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> = {{0} };
>> 
>>  	/* Allocate a reassembly table for VXLAN TCP GRO */
>> -	struct gro_vxlan_tcp4_tbl vxlan_tbl;
>> -	struct gro_vxlan_tcp4_flow
>> vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> -	struct gro_vxlan_tcp4_item
>> vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> +	struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>> +	struct gro_vxlan_tcp4_flow
>> vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> +	struct gro_vxlan_tcp4_item
>> vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>>  			= {{{0}, 0, 0} };
>> 
>> +	/* Allocate a reassembly table for VXLAN UDP GRO */
>> +	struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>> +	struct gro_vxlan_udp4_flow
>> vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> +	struct gro_vxlan_udp4_item
>> vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> +			= {{{0}} };
>> +
>>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>>  	uint32_t item_num;
>>  	int32_t ret;
>>  	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>> -	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>> +	uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>> +		do_vxlan_udp_gro = 0;
>> 
>>  	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>>  					RTE_GRO_TCP_IPV4 |
>> +					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>>  					RTE_GRO_UDP_IPV4)) == 0))
>>  		return nb_pkts;
>> 
>> @@ -160,15 +179,28 @@ struct gro_ctx {
>> 
>>  	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>>  		for (i = 0; i < item_num; i++)
>> -			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>> -
>> -		vxlan_tbl.flows = vxlan_flows;
>> -		vxlan_tbl.items = vxlan_items;
>> -		vxlan_tbl.flow_num = 0;
>> -		vxlan_tbl.item_num = 0;
>> -		vxlan_tbl.max_flow_num = item_num;
>> -		vxlan_tbl.max_item_num = item_num;
>> -		do_vxlan_gro = 1;
>> +			vxlan_tcp_flows[i].start_index =
>> INVALID_ARRAY_INDEX;
>> +
>> +		vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>> +		vxlan_tcp_tbl.items = vxlan_tcp_items;
>> +		vxlan_tcp_tbl.flow_num = 0;
>> +		vxlan_tcp_tbl.item_num = 0;
>> +		vxlan_tcp_tbl.max_flow_num = item_num;
>> +		vxlan_tcp_tbl.max_item_num = item_num;
>> +		do_vxlan_tcp_gro = 1;
>> +	}
>> +
>> +	if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>> +		for (i = 0; i < item_num; i++)
>> +			vxlan_udp_flows[i].start_index =
>> INVALID_ARRAY_INDEX;
>> +
>> +		vxlan_udp_tbl.flows = vxlan_udp_flows;
>> +		vxlan_udp_tbl.items = vxlan_udp_items;
>> +		vxlan_udp_tbl.flow_num = 0;
>> +		vxlan_udp_tbl.item_num = 0;
>> +		vxlan_udp_tbl.max_flow_num = item_num;
>> +		vxlan_udp_tbl.max_item_num = item_num;
>> +		do_vxlan_udp_gro = 1;
>>  	}
>> 
>>  	if (param->gro_types & RTE_GRO_TCP_IPV4) {
>> @@ -204,9 +236,18 @@ struct gro_ctx {
>>  		 * will be flushed from the tables.
>>  		 */
>>  		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> -				do_vxlan_gro) {
>> +				do_vxlan_tcp_gro) {
>>  			ret = gro_vxlan_tcp4_reassemble(pkts[i],
>> -							&vxlan_tbl, 0);
>> +							&vxlan_tcp_tbl, 0);
>> +			if (ret > 0)
>> +				/* Merge successfully */
>> +				nb_after_gro--;
>> +			else if (ret < 0)
>> +				unprocess_pkts[unprocess_num++] = pkts[i];
>> +		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> +				do_vxlan_udp_gro) {
>> +			ret = gro_vxlan_udp4_reassemble(pkts[i],
>> +							&vxlan_udp_tbl, 0);
>>  			if (ret > 0)
>>  				/* Merge successfully */
>>  				nb_after_gro--;
>> @@ -236,11 +277,17 @@ struct gro_ctx {
>>  		 || (unprocess_num < nb_pkts)) {
>>  		i = 0;
>>  		/* Flush all packets from the tables */
>> -		if (do_vxlan_gro) {
>> -			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>> +		if (do_vxlan_tcp_gro) {
>> +			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>>  					0, pkts, nb_pkts);
>>  		}
>> 
>> +		if (do_vxlan_udp_gro) {
>> +			i +=
>> gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>> +					0, &pkts[i], nb_pkts - i);
>> +
>> +		}
>> +
>>  		if (do_tcp4_gro) {
>>  			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>>  					&pkts[i], nb_pkts - i);
>> @@ -269,33 +316,42 @@ struct gro_ctx {
>>  {
>>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>>  	struct gro_ctx *gro_ctx = ctx;
>> -	void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>> +	void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>>  	uint64_t current_time;
>>  	uint16_t i, unprocess_num = 0;
>> -	uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>> +	uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro,
>> do_vxlan_udp_gro;
>> 
>>  	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4
>> |
>>  					RTE_GRO_TCP_IPV4 |
>> +					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>>  					RTE_GRO_UDP_IPV4)) == 0))
>>  		return nb_pkts;
>> 
>>  	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>> -	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> +	vxlan_tcp_tbl = gro_ctx-
>> >tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>>  	udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>> +	vxlan_udp_tbl = gro_ctx-
>> >tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>> 
>>  	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>>  		RTE_GRO_TCP_IPV4;
>> -	do_vxlan_gro = (gro_ctx->gro_types &
>> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> +	do_vxlan_tcp_gro = (gro_ctx->gro_types &
>> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>>  		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>>  	do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>>  		RTE_GRO_UDP_IPV4;
>> +	do_vxlan_udp_gro = (gro_ctx->gro_types &
>> RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>> +		RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>> 
>>  	current_time = rte_rdtsc();
>> 
>>  	for (i = 0; i < nb_pkts; i++) {
>>  		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> -				do_vxlan_gro) {
>> -			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>> +				do_vxlan_tcp_gro) {
>> +			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>> +						current_time) < 0)
>> +				unprocess_pkts[unprocess_num++] = pkts[i];
>> +		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> +				do_vxlan_udp_gro) {
>> +			if (gro_vxlan_udp4_reassemble(pkts[i],
>> vxlan_udp_tbl,
>>  						current_time) < 0)
>>  				unprocess_pkts[unprocess_num++] = pkts[i];
>>  		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>> @@ -341,6 +397,13 @@ struct gro_ctx {
>>  		left_nb_out = max_nb_out - num;
>>  	}
>> 
>> +	if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out >
>> 0) {
>
>Max_nb_out is read-only and not updated. So it cannot check if 'out' array
>has enough space. Need to use left_nb_out instead. This issue also existed
>in UDP/IPv4 GRO, and please correct them both.
>
>> +		num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>> +				RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>> +				flush_timestamp, &out[num], left_nb_out);
>> +		left_nb_out = max_nb_out - num;
>> +	}
>> +
>>  	/* If no available space in 'out', stop flushing. */
>>  	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
>>  		num += gro_tcp4_tbl_timeout_flush(
>> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
>> index 470f3ed..9f9ed49 100644
>> --- a/lib/librte_gro/rte_gro.h
>> +++ b/lib/librte_gro/rte_gro.h
>> @@ -35,6 +35,9 @@
>>  #define RTE_GRO_UDP_IPV4_INDEX 2
>>  #define RTE_GRO_UDP_IPV4 (1ULL << RTE_GRO_UDP_IPV4_INDEX)
>>  /**< UDP/IPv4 GRO flag */
>> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
>> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL <<
>> RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
>> +/**< VxLAN UDP/IPv4 GRO flag. */
>> 
>>  /**
>>   * Structure used to create GRO context objects or used to pass
>> --
>> 1.8.3.1
yang_y_yi Sept. 22, 2020, 1:44 a.m. UTC | #3
BTW, start_time is checked for the first packet in a flow, gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check start_time, so this still can let some new items in this flow have chance to be merged.

At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
>Thanks Jiayu, I have fixed other comments except this one:
>
>
>
>>The items of a flow are ordered by frag_oft, and start_time
>>of these items is not always in ascending order. Therefore,
>>you cannot skip checking the items after the item whose
>>start_time is greater than flush_timestamp. This issue also
>>exists in UDP/IPv4 GRO, and need to correct them both.
>
>
>I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
>
>
>
>
>
>diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
>index 061e7b0..ffa35a2 100644
>--- a/lib/librte_gro/gro_udp4.c
>+++ b/lib/librte_gro/gro_udp4.c
>@@ -391,7 +391,6 @@
>
>                j = tbl->flows[i].start_index;
>                while (j != INVALID_ARRAY_INDEX) {
>-                       if (tbl->items[j].start_time <= flush_timestamp) {
>                                gro_udp4_merge_items(tbl, j);
>                                out[k++] = tbl->items[j].firstseg;
>                                if (tbl->items[j].nb_merged > 1)
>@@ -407,12 +406,6 @@
>
>                                if (unlikely(k == nb_out))
>                                        return k;
>-                       } else
>-                               /*
>-                                * The left packets in this flow won't be
>-                                * timeout. Go to check other flows.
>-                                */
>-                               break;
>                }
>        }
>        return k;
>
>
>
>
>
>
>
>
>
>At 2020-09-21 15:54:36, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
>>Hi Yi,
>>
>>Some comments are inline.
>>
>>Thanks,
>>Jiayu
>>
>>> -----Original Message-----
>>> From: yang_y_yi@163.com <yang_y_yi@163.com>
>>> Sent: Thursday, September 17, 2020 11:50 AM
>>> To: dev@dpdk.org
>>> Cc: Hu, Jiayu <jiayu.hu@intel.com>; thomas@monjalon.net;
>>> yangyi01@inspur.com; yang_y_yi@163.com
>>> Subject: [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO support
>>> 
>>> From: Yi Yang <yangyi01@inspur.com>
>>> 
>>> VXLAN UDP/IPv4 GRO can help improve VM-to-VM UDP
>>> performance when UFO or GSO is enabled in VM, GRO
>>> must be supported if UFO or GSO is enabled,
>>> otherwise, performance can't get big improvement
>>> if only GSO is there.
>>> 
>>> With this enabled in DPDK, OVS DPDK can leverage it
>>> to improve VM-to-VM UDP performance, it will reassemble
>>> VXLAN UDP/IPv4 fragments immediate after they are
>>> received from a physical NIC. It is very helpful in
>>> OVS DPDK VXLAN use case.
>>> 
>>> Note: outer IP ID isn't used to check if two packets
>>> are same flow and can be merged because the difference
>>> between outer IP IDs of two packets isn't always +/-1
>>> in case of OVS DPDK.
>>> 
>>> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>>> ---
>>>  lib/librte_gro/gro_udp4.h       |   1 +
>>>  lib/librte_gro/gro_vxlan_udp4.c | 542
>>> ++++++++++++++++++++++++++++++++++++++++
>>>  lib/librte_gro/gro_vxlan_udp4.h | 154 ++++++++++++
>>>  lib/librte_gro/meson.build      |   2 +-
>>>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>>>  lib/librte_gro/rte_gro.h        |   3 +
>>>  6 files changed, 790 insertions(+), 27 deletions(-)
>>>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>>>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>>> 
>>> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>>> index 0a078e4..d38b393 100644
>>> --- a/lib/librte_gro/gro_udp4.h
>>> +++ b/lib/librte_gro/gro_udp4.h
>>> @@ -7,6 +7,7 @@
>>> 
>>>  #include <rte_ip.h>
>>>  #include <rte_udp.h>
>>> +#include <rte_vxlan.h>
>>> 
>>>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>>>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>>> new file mode 100644
>>> index 0000000..4eece56
>>> --- /dev/null
>>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>>> @@ -0,0 +1,542 @@
>>> +
>>> +uint16_t
>>> +gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
>>> +		uint64_t flush_timestamp,
>>> +		struct rte_mbuf **out,
>>> +		uint16_t nb_out)
>>> +{
>>> +	uint16_t k = 0;
>>> +	uint32_t i, j;
>>> +	uint32_t max_flow_num = tbl->max_flow_num;
>>> +
>>> +	for (i = 0; i < max_flow_num; i++) {
>>> +		if (unlikely(tbl->flow_num == 0))
>>> +			return k;
>>> +
>>> +		j = tbl->flows[i].start_index;
>>> +		while (j != INVALID_ARRAY_INDEX) {
>>> +			if (tbl->items[j].inner_item.start_time <=
>>> +					flush_timestamp) {
>>> +				gro_vxlan_udp4_merge_items(tbl, j);
>>> +				out[k++] = tbl->items[j].inner_item.firstseg;
>>> +				if (tbl->items[j].inner_item.nb_merged > 1)
>>> +					update_vxlan_header(&(tbl-
>>> >items[j]));
>>> +				/*
>>> +				 * Delete the item and get the next packet
>>> +				 * index.
>>> +				 */
>>> +				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
>>> +				tbl->flows[i].start_index = j;
>>> +				if (j == INVALID_ARRAY_INDEX)
>>> +					tbl->flow_num--;
>>> +
>>> +				if (unlikely(k == nb_out))
>>> +					return k;
>>> +			} else
>>> +				/*
>>> +				 * The left packets in the flow won't be
>>> +				 * timeout. Go to check other flows.
>>> +				 */
>>> +				break;
>>
>>The items of a flow are ordered by frag_oft, and start_time
>>of these items is not always in ascending order. Therefore,
>>you cannot skip checking the items after the item whose
>>start_time is greater than flush_timestamp. This issue also
>>exists in UDP/IPv4 GRO, and need to correct them both.
>>
>>> +		}
>>> +	}
>>> +	return k;
>>> +}
>>> diff --git a/lib/librte_gro/gro_vxlan_udp4.h
>>> b/lib/librte_gro/gro_vxlan_udp4.h
>>> new file mode 100644
>>> index 0000000..6a42fb3
>>> --- /dev/null
>>> +++ b/lib/librte_gro/gro_vxlan_udp4.h
>>> @@ -0,0 +1,154 @@
>>> +/* SPDX-License-Identifier: BSD-3-Clause
>>> + * Copyright(c) 2020 Inspur Corporation
>>> + */
>>> +
>>> +#ifndef _GRO_VXLAN_UDP4_H_
>>> +#define _GRO_VXLAN_UDP4_H_
>>> +
>>> +#include "gro_udp4.h"
>>> +
>>> +#define GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>>> +
>>> +/* Header fields representing a VxLAN flow */
>>> +struct vxlan_udp4_flow_key {
>>> +	struct udp4_flow_key inner_key;
>>> +	struct rte_vxlan_hdr vxlan_hdr;
>>> +
>>> +	struct rte_ether_addr outer_eth_saddr;
>>> +	struct rte_ether_addr outer_eth_daddr;
>>> +
>>> +	uint32_t outer_ip_src_addr;
>>> +	uint32_t outer_ip_dst_addr;
>>> +
>>> +	/* Note: It is unnecessary to save outer_src_port here because it can
>>> +	 * be different for VxLAN UDP fragments from the same flow.
>>> +	 */
>>> +	uint16_t outer_dst_port;
>>> +
>>
>>The above empty line is unnecessary.
>>
>>> +};
>>> +
>>> +
>>> +struct gro_vxlan_udp4_item {
>>> +	struct gro_udp4_item inner_item;
>>> +	/* Note: VXLAN UDP/IPv4 GRO needn't check outer_ip_id because
>>> +	 * the difference between outer_ip_ids of two received packets
>>> +	 * isn't always +/-1 in case of OVS DPDK. So no outer_ip_id
>>> +	 * and outer_is_atomic fields here.
>>> +	 */
>>
>>It seems the above comments for outer IP ID is enough, and no need
>>to highlight in the commit log again. How do you think?
>>
>>> +};
>>> +
>>> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
>>> index f623230..db990cf 100644
>>> --- a/lib/librte_gro/rte_gro.c
>>> +++ b/lib/librte_gro/rte_gro.c
>>> @@ -11,6 +11,7 @@
>>>  #include "gro_tcp4.h"
>>>  #include "gro_udp4.h"
>>>  #include "gro_vxlan_tcp4.h"
>>> +#include "gro_vxlan_udp4.h"
>>> 
>>>  /*
>>>   * GRO context structure. It keeps the table structures, which are
>>> @@ -137,19 +148,27 @@ struct gro_ctx {
>>>  	struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>>> = {{0} };
>>> 
>>>  	/* Allocate a reassembly table for VXLAN TCP GRO */
>>> -	struct gro_vxlan_tcp4_tbl vxlan_tbl;
>>> -	struct gro_vxlan_tcp4_flow
>>> vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>>> -	struct gro_vxlan_tcp4_item
>>> vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>>> +	struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>>> +	struct gro_vxlan_tcp4_flow
>>> vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>>> +	struct gro_vxlan_tcp4_item
>>> vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>>>  			= {{{0}, 0, 0} };
>>> 
>>> +	/* Allocate a reassembly table for VXLAN UDP GRO */
>>> +	struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>>> +	struct gro_vxlan_udp4_flow
>>> vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>>> +	struct gro_vxlan_udp4_item
>>> vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>>> +			= {{{0}} };
>>> +
>>>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>>>  	uint32_t item_num;
>>>  	int32_t ret;
>>>  	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>>> -	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>>> +	uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>>> +		do_vxlan_udp_gro = 0;
>>> 
>>>  	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>>>  					RTE_GRO_TCP_IPV4 |
>>> +					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>>>  					RTE_GRO_UDP_IPV4)) == 0))
>>>  		return nb_pkts;
>>> 
>>> @@ -160,15 +179,28 @@ struct gro_ctx {
>>> 
>>>  	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>>>  		for (i = 0; i < item_num; i++)
>>> -			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>>> -
>>> -		vxlan_tbl.flows = vxlan_flows;
>>> -		vxlan_tbl.items = vxlan_items;
>>> -		vxlan_tbl.flow_num = 0;
>>> -		vxlan_tbl.item_num = 0;
>>> -		vxlan_tbl.max_flow_num = item_num;
>>> -		vxlan_tbl.max_item_num = item_num;
>>> -		do_vxlan_gro = 1;
>>> +			vxlan_tcp_flows[i].start_index =
>>> INVALID_ARRAY_INDEX;
>>> +
>>> +		vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>>> +		vxlan_tcp_tbl.items = vxlan_tcp_items;
>>> +		vxlan_tcp_tbl.flow_num = 0;
>>> +		vxlan_tcp_tbl.item_num = 0;
>>> +		vxlan_tcp_tbl.max_flow_num = item_num;
>>> +		vxlan_tcp_tbl.max_item_num = item_num;
>>> +		do_vxlan_tcp_gro = 1;
>>> +	}
>>> +
>>> +	if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>>> +		for (i = 0; i < item_num; i++)
>>> +			vxlan_udp_flows[i].start_index =
>>> INVALID_ARRAY_INDEX;
>>> +
>>> +		vxlan_udp_tbl.flows = vxlan_udp_flows;
>>> +		vxlan_udp_tbl.items = vxlan_udp_items;
>>> +		vxlan_udp_tbl.flow_num = 0;
>>> +		vxlan_udp_tbl.item_num = 0;
>>> +		vxlan_udp_tbl.max_flow_num = item_num;
>>> +		vxlan_udp_tbl.max_item_num = item_num;
>>> +		do_vxlan_udp_gro = 1;
>>>  	}
>>> 
>>>  	if (param->gro_types & RTE_GRO_TCP_IPV4) {
>>> @@ -204,9 +236,18 @@ struct gro_ctx {
>>>  		 * will be flushed from the tables.
>>>  		 */
>>>  		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>>> -				do_vxlan_gro) {
>>> +				do_vxlan_tcp_gro) {
>>>  			ret = gro_vxlan_tcp4_reassemble(pkts[i],
>>> -							&vxlan_tbl, 0);
>>> +							&vxlan_tcp_tbl, 0);
>>> +			if (ret > 0)
>>> +				/* Merge successfully */
>>> +				nb_after_gro--;
>>> +			else if (ret < 0)
>>> +				unprocess_pkts[unprocess_num++] = pkts[i];
>>> +		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>>> +				do_vxlan_udp_gro) {
>>> +			ret = gro_vxlan_udp4_reassemble(pkts[i],
>>> +							&vxlan_udp_tbl, 0);
>>>  			if (ret > 0)
>>>  				/* Merge successfully */
>>>  				nb_after_gro--;
>>> @@ -236,11 +277,17 @@ struct gro_ctx {
>>>  		 || (unprocess_num < nb_pkts)) {
>>>  		i = 0;
>>>  		/* Flush all packets from the tables */
>>> -		if (do_vxlan_gro) {
>>> -			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>>> +		if (do_vxlan_tcp_gro) {
>>> +			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>>>  					0, pkts, nb_pkts);
>>>  		}
>>> 
>>> +		if (do_vxlan_udp_gro) {
>>> +			i +=
>>> gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>>> +					0, &pkts[i], nb_pkts - i);
>>> +
>>> +		}
>>> +
>>>  		if (do_tcp4_gro) {
>>>  			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>>>  					&pkts[i], nb_pkts - i);
>>> @@ -269,33 +316,42 @@ struct gro_ctx {
>>>  {
>>>  	struct rte_mbuf *unprocess_pkts[nb_pkts];
>>>  	struct gro_ctx *gro_ctx = ctx;
>>> -	void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>>> +	void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>>>  	uint64_t current_time;
>>>  	uint16_t i, unprocess_num = 0;
>>> -	uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>>> +	uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro,
>>> do_vxlan_udp_gro;
>>> 
>>>  	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4
>>> |
>>>  					RTE_GRO_TCP_IPV4 |
>>> +					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>>>  					RTE_GRO_UDP_IPV4)) == 0))
>>>  		return nb_pkts;
>>> 
>>>  	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>>> -	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>>> +	vxlan_tcp_tbl = gro_ctx-
>>> >tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>>>  	udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>>> +	vxlan_udp_tbl = gro_ctx-
>>> >tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>>> 
>>>  	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>>>  		RTE_GRO_TCP_IPV4;
>>> -	do_vxlan_gro = (gro_ctx->gro_types &
>>> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>>> +	do_vxlan_tcp_gro = (gro_ctx->gro_types &
>>> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>>>  		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>>>  	do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>>>  		RTE_GRO_UDP_IPV4;
>>> +	do_vxlan_udp_gro = (gro_ctx->gro_types &
>>> RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>>> +		RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>>> 
>>>  	current_time = rte_rdtsc();
>>> 
>>>  	for (i = 0; i < nb_pkts; i++) {
>>>  		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>>> -				do_vxlan_gro) {
>>> -			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>>> +				do_vxlan_tcp_gro) {
>>> +			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>>> +						current_time) < 0)
>>> +				unprocess_pkts[unprocess_num++] = pkts[i];
>>> +		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>>> +				do_vxlan_udp_gro) {
>>> +			if (gro_vxlan_udp4_reassemble(pkts[i],
>>> vxlan_udp_tbl,
>>>  						current_time) < 0)
>>>  				unprocess_pkts[unprocess_num++] = pkts[i];
>>>  		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>>> @@ -341,6 +397,13 @@ struct gro_ctx {
>>>  		left_nb_out = max_nb_out - num;
>>>  	}
>>> 
>>> +	if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out >
>>> 0) {
>>
>>Max_nb_out is read-only and not updated. So it cannot check if 'out' array
>>has enough space. Need to use left_nb_out instead. This issue also existed
>>in UDP/IPv4 GRO, and please correct them both.
>>
>>> +		num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>>> +				RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>>> +				flush_timestamp, &out[num], left_nb_out);
>>> +		left_nb_out = max_nb_out - num;
>>> +	}
>>> +
>>>  	/* If no available space in 'out', stop flushing. */
>>>  	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
>>>  		num += gro_tcp4_tbl_timeout_flush(
>>> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
>>> index 470f3ed..9f9ed49 100644
>>> --- a/lib/librte_gro/rte_gro.h
>>> +++ b/lib/librte_gro/rte_gro.h
>>> @@ -35,6 +35,9 @@
>>>  #define RTE_GRO_UDP_IPV4_INDEX 2
>>>  #define RTE_GRO_UDP_IPV4 (1ULL << RTE_GRO_UDP_IPV4_INDEX)
>>>  /**< UDP/IPv4 GRO flag */
>>> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
>>> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL <<
>>> RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
>>> +/**< VxLAN UDP/IPv4 GRO flag. */
>>> 
>>>  /**
>>>   * Structure used to create GRO context objects or used to pass
>>> --
>>> 1.8.3.1
yang_y_yi Sept. 22, 2020, 3 a.m. UTC | #4
Jiayu, you didn't get my point. I just show what you want by code, but that isn't a good way. I think current code is most reasonable. Please check my comments in last email, actually, only the first packet of a flow is checked by timestamp, if this flow includes some new items (you're saying), they can be merged, so it isn't the case you're saying.

At 2020-09-22 11:01:26, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>On Tue, Sep 22, 2020 at 09:29:38AM +0800, yang_y_yi wrote:
>> Thanks Jiayu, I have fixed other comments except this one:
>> 
>> 
>> >The items of a flow are ordered by frag_oft, and start_time
>> >of these items is not always in ascending order. Therefore,
>> >you cannot skip checking the items after the item whose
>> >start_time is greater than flush_timestamp. This issue also
>> >exists in UDP/IPv4 GRO, and need to correct them both.
>> 
>> I think the issue here is if we should strictly follow flush_timestamp, it is
>> possible there are new items in items chain. we have chance to merge more
>> packets if we don't follow flush_timestamp. So an ideal change can be this. But
>> is it acceptible if we don't use flush_timestamp? It can flush some packets in
>> advance therefore miss next merge window. Maybe current way is most resonable
>> and a tradeoff between two exterem cases.
>
>You cannot simplily delete the code, as rte_gro_timeout_flush()
>is designed to flush packets according to cycles. If users
>want to flush all packets from the table, they can set
>'timeout_cycles' in rte_gro_timeout_flush() to 0.
>rte_gro_reassemble_burst() uses this way to return all
>packets of a burst to users after GRO.
>
>Thanks,
>Jiayu
>> 
>> 
>> diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
>> index 061e7b0..ffa35a2 100644
>> --- a/lib/librte_gro/gro_udp4.c
>> +++ b/lib/librte_gro/gro_udp4.c
>> @@ -391,7 +391,6 @@
>> 
>>                 j = tbl->flows[i].start_index;
>>                 while (j != INVALID_ARRAY_INDEX) {
>> -                       if (tbl->items[j].start_time <= flush_timestamp) {
>>                                 gro_udp4_merge_items(tbl, j);
>>                                 out[k++] = tbl->items[j].firstseg;
>>                                 if (tbl->items[j].nb_merged > 1)
>> @@ -407,12 +406,6 @@
>> 
>>                                 if (unlikely(k == nb_out))
>>                                         return k;
>> -                       } else
>> -                               /*
>> -                                * The left packets in this flow won't be
>> -                                * timeout. Go to check other flows.
>> -                                */
>> -                               break;
>>                 }
>>         }
>>         return k;
>> 
>> 
>> 
>> 
>> At 2020-09-21 15:54:36, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
>> >Hi Yi,
>> >
>> >Some comments are inline.
>> >
>> >Thanks,
>> >Jiayu
>> >
>> >> -----Original Message-----
>> >> From: yang_y_yi@163.com <yang_y_yi@163.com>
>> >> Sent: Thursday, September 17, 2020 11:50 AM
>> >> To: dev@dpdk.org
>> >> Cc: Hu, Jiayu <jiayu.hu@intel.com>; thomas@monjalon.net;
>> >> yangyi01@inspur.com; yang_y_yi@163.com
>> >> Subject: [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO support
>> >>
>> >> From: Yi Yang <yangyi01@inspur.com>
>> >>
>> >> VXLAN UDP/IPv4 GRO can help improve VM-to-VM UDP
>> >> performance when UFO or GSO is enabled in VM, GRO
>> >> must be supported if UFO or GSO is enabled,
>> >> otherwise, performance can't get big improvement
>> >> if only GSO is there.
>> >>
>> >> With this enabled in DPDK, OVS DPDK can leverage it
>> >> to improve VM-to-VM UDP performance, it will reassemble
>> >> VXLAN UDP/IPv4 fragments immediate after they are
>> >> received from a physical NIC. It is very helpful in
>> >> OVS DPDK VXLAN use case.
>> >>
>> >> Note: outer IP ID isn't used to check if two packets
>> >> are same flow and can be merged because the difference
>> >> between outer IP IDs of two packets isn't always +/-1
>> >> in case of OVS DPDK.
>> >>
>> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>> >> ---
>> >>  lib/librte_gro/gro_udp4.h       |   1 +
>> >>  lib/librte_gro/gro_vxlan_udp4.c | 542
>> >> ++++++++++++++++++++++++++++++++++++++++
>> >>  lib/librte_gro/gro_vxlan_udp4.h | 154 ++++++++++++
>> >>  lib/librte_gro/meson.build      |   2 +-
>> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>> >>  lib/librte_gro/rte_gro.h        |   3 +
>> >>  6 files changed, 790 insertions(+), 27 deletions(-)
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>> >>
>> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>> >> index 0a078e4..d38b393 100644
>> >> --- a/lib/librte_gro/gro_udp4.h
>> >> +++ b/lib/librte_gro/gro_udp4.h
>> >> @@ -7,6 +7,7 @@
>> >>
>> >>  #include <rte_ip.h>
>> >>  #include <rte_udp.h>
>> >> +#include <rte_vxlan.h>
>> >>
>> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> >> new file mode 100644
>> >> index 0000000..4eece56
>> >> --- /dev/null
>> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> >> @@ -0,0 +1,542 @@
>> >> +
>> >> +uint16_t
>> >> +gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
>> >> +            uint64_t flush_timestamp,
>> >> +            struct rte_mbuf **out,
>> >> +            uint16_t nb_out)
>> >> +{
>> >> +    uint16_t k = 0;
>> >> +    uint32_t i, j;
>> >> +    uint32_t max_flow_num = tbl->max_flow_num;
>> >> +
>> >> +    for (i = 0; i < max_flow_num; i++) {
>> >> +            if (unlikely(tbl->flow_num == 0))
>> >> +                    return k;
>> >> +
>> >> +            j = tbl->flows[i].start_index;
>> >> +            while (j != INVALID_ARRAY_INDEX) {
>> >> +                    if (tbl->items[j].inner_item.start_time <=
>> >> +                                    flush_timestamp) {
>> >> +                            gro_vxlan_udp4_merge_items(tbl, j);
>> >> +                            out[k++] = tbl->items[j].inner_item.firstseg;
>> >> +                            if (tbl->items[j].inner_item.nb_merged > 1)
>> >> +                                    update_vxlan_header(&(tbl-
>> >> >items[j]));
>> >> +                            /*
>> >> +                             * Delete the item and get the next packet
>> >> +                             * index.
>> >> +                             */
>> >> +                            j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
>> >> +                            tbl->flows[i].start_index = j;
>> >> +                            if (j == INVALID_ARRAY_INDEX)
>> >> +                                    tbl->flow_num--;
>> >> +
>> >> +                            if (unlikely(k == nb_out))
>> >> +                                    return k;
>> >> +                    } else
>> >> +                            /*
>> >> +                             * The left packets in the flow won't be
>> >> +                             * timeout. Go to check other flows.
>> >> +                             */
>> >> +                            break;
>> >
>> >The items of a flow are ordered by frag_oft, and start_time
>> >of these items is not always in ascending order. Therefore,
>> >you cannot skip checking the items after the item whose
>> >start_time is greater than flush_timestamp. This issue also
>> >exists in UDP/IPv4 GRO, and need to correct them both.
>> >
>> >> +            }
>> >> +    }
>> >> +    return k;
>> >> +}
>> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.h
>> >> b/lib/librte_gro/gro_vxlan_udp4.h
>> >> new file mode 100644
>> >> index 0000000..6a42fb3
>> >> --- /dev/null
>> >> +++ b/lib/librte_gro/gro_vxlan_udp4.h
>> >> @@ -0,0 +1,154 @@
>> >> +/* SPDX-License-Identifier: BSD-3-Clause
>> >> + * Copyright(c) 2020 Inspur Corporation
>> >> + */
>> >> +
>> >> +#ifndef _GRO_VXLAN_UDP4_H_
>> >> +#define _GRO_VXLAN_UDP4_H_
>> >> +
>> >> +#include "gro_udp4.h"
>> >> +
>> >> +#define GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> >> +
>> >> +/* Header fields representing a VxLAN flow */
>> >> +struct vxlan_udp4_flow_key {
>> >> +    struct udp4_flow_key inner_key;
>> >> +    struct rte_vxlan_hdr vxlan_hdr;
>> >> +
>> >> +    struct rte_ether_addr outer_eth_saddr;
>> >> +    struct rte_ether_addr outer_eth_daddr;
>> >> +
>> >> +    uint32_t outer_ip_src_addr;
>> >> +    uint32_t outer_ip_dst_addr;
>> >> +
>> >> +    /* Note: It is unnecessary to save outer_src_port here because it can
>> >> +     * be different for VxLAN UDP fragments from the same flow.
>> >> +     */
>> >> +    uint16_t outer_dst_port;
>> >> +
>> >
>> >The above empty line is unnecessary.
>> >
>> >> +};
>> >> +
>> >> +
>> >> +struct gro_vxlan_udp4_item {
>> >> +    struct gro_udp4_item inner_item;
>> >> +    /* Note: VXLAN UDP/IPv4 GRO needn't check outer_ip_id because
>> >> +     * the difference between outer_ip_ids of two received packets
>> >> +     * isn't always +/-1 in case of OVS DPDK. So no outer_ip_id
>> >> +     * and outer_is_atomic fields here.
>> >> +     */
>> >
>> >It seems the above comments for outer IP ID is enough, and no need
>> >to highlight in the commit log again. How do you think?
>> >
>> >> +};
>> >> +
>> >> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
>> >> index f623230..db990cf 100644
>> >> --- a/lib/librte_gro/rte_gro.c
>> >> +++ b/lib/librte_gro/rte_gro.c
>> >> @@ -11,6 +11,7 @@
>> >>  #include "gro_tcp4.h"
>> >>  #include "gro_udp4.h"
>> >>  #include "gro_vxlan_tcp4.h"
>> >> +#include "gro_vxlan_udp4.h"
>> >>
>> >>  /*
>> >>   * GRO context structure. It keeps the table structures, which are
>> >> @@ -137,19 +148,27 @@ struct gro_ctx {
>> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> = {{0} };
>> >>
>> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
>> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
>> >> -    struct gro_vxlan_tcp4_flow
>> >> vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> -    struct gro_vxlan_tcp4_item
>> >> vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>> >> +    struct gro_vxlan_tcp4_flow
>> >> vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_tcp4_item
>> >> vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >>                      = {{{0}, 0, 0} };
>> >>
>> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
>> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>> >> +    struct gro_vxlan_udp4_flow
>> >> vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_udp4_item
>> >> vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +                    = {{{0}} };
>> >> +
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      uint32_t item_num;
>> >>      int32_t ret;
>> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>> >> +            do_vxlan_udp_gro = 0;
>> >>
>> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >> @@ -160,15 +179,28 @@ struct gro_ctx {
>> >>
>> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>> >>              for (i = 0; i < item_num; i++)
>> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> -
>> >> -            vxlan_tbl.flows = vxlan_flows;
>> >> -            vxlan_tbl.items = vxlan_items;
>> >> -            vxlan_tbl.flow_num = 0;
>> >> -            vxlan_tbl.item_num = 0;
>> >> -            vxlan_tbl.max_flow_num = item_num;
>> >> -            vxlan_tbl.max_item_num = item_num;
>> >> -            do_vxlan_gro = 1;
>> >> +                    vxlan_tcp_flows[i].start_index =
>> >> INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
>> >> +            vxlan_tcp_tbl.flow_num = 0;
>> >> +            vxlan_tcp_tbl.item_num = 0;
>> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
>> >> +            vxlan_tcp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_tcp_gro = 1;
>> >> +    }
>> >> +
>> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>> >> +            for (i = 0; i < item_num; i++)
>> >> +                    vxlan_udp_flows[i].start_index =
>> >> INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
>> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
>> >> +            vxlan_udp_tbl.flow_num = 0;
>> >> +            vxlan_udp_tbl.item_num = 0;
>> >> +            vxlan_udp_tbl.max_flow_num = item_num;
>> >> +            vxlan_udp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_udp_gro = 1;
>> >>      }
>> >>
>> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
>> >> @@ -204,9 +236,18 @@ struct gro_ctx {
>> >>               * will be flushed from the tables.
>> >>               */
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> +                            do_vxlan_tcp_gro) {
>> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
>> >> -                                                    &vxlan_tbl, 0);
>> >> +                                                    &vxlan_tcp_tbl, 0);
>> >> +                    if (ret > 0)
>> >> +                            /* Merge successfully */
>> >> +                            nb_after_gro--;
>> >> +                    else if (ret < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
>> >> +                                                    &vxlan_udp_tbl, 0);
>> >>                      if (ret > 0)
>> >>                              /* Merge successfully */
>> >>                              nb_after_gro--;
>> >> @@ -236,11 +277,17 @@ struct gro_ctx {
>> >>               || (unprocess_num < nb_pkts)) {
>> >>              i = 0;
>> >>              /* Flush all packets from the tables */
>> >> -            if (do_vxlan_gro) {
>> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>> >> +            if (do_vxlan_tcp_gro) {
>> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>> >>                                      0, pkts, nb_pkts);
>> >>              }
>> >>
>> >> +            if (do_vxlan_udp_gro) {
>> >> +                    i +=
>> >> gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>> >> +                                    0, &pkts[i], nb_pkts - i);
>> >> +
>> >> +            }
>> >> +
>> >>              if (do_tcp4_gro) {
>> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>> >>                                      &pkts[i], nb_pkts - i);
>> >> @@ -269,33 +316,42 @@ struct gro_ctx {
>> >>  {
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      struct gro_ctx *gro_ctx = ctx;
>> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>> >>      uint64_t current_time;
>> >>      uint16_t i, unprocess_num = 0;
>> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro,
>> >> do_vxlan_udp_gro;
>> >>
>> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4
>> >> |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >> +    vxlan_tcp_tbl = gro_ctx-
>> >> >tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>> >> +    vxlan_udp_tbl = gro_ctx-
>> >> >tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>> >>
>> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>> >>              RTE_GRO_TCP_IPV4;
>> >> -    do_vxlan_gro = (gro_ctx->gro_types &
>> >> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types &
>> >> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>> >>              RTE_GRO_UDP_IPV4;
>> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types &
>> >> RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>> >>
>> >>      current_time = rte_rdtsc();
>> >>
>> >>      for (i = 0; i < nb_pkts; i++) {
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>> >> +                            do_vxlan_tcp_gro) {
>> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>> >> +                                            current_time) < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i],
>> >> vxlan_udp_tbl,
>> >>                                              current_time) < 0)
>> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
>> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>> >> @@ -341,6 +397,13 @@ struct gro_ctx {
>> >>              left_nb_out = max_nb_out - num;
>> >>      }
>> >>
>> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out >
>> >> 0) {
>> >
>> >Max_nb_out is read-only and not updated. So it cannot check if 'out' array
>> >has enough space. Need to use left_nb_out instead. This issue also existed
>> >in UDP/IPv4 GRO, and please correct them both.
>> >
>> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>> >> +                            flush_timestamp, &out[num], left_nb_out);
>> >> +            left_nb_out = max_nb_out - num;
>> >> +    }
>> >> +
>> >>      /* If no available space in 'out', stop flushing. */
>> >>      if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
>> >>              num += gro_tcp4_tbl_timeout_flush(
>> >> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
>> >> index 470f3ed..9f9ed49 100644
>> >> --- a/lib/librte_gro/rte_gro.h
>> >> +++ b/lib/librte_gro/rte_gro.h
>> >> @@ -35,6 +35,9 @@
>> >>  #define RTE_GRO_UDP_IPV4_INDEX 2
>> >>  #define RTE_GRO_UDP_IPV4 (1ULL << RTE_GRO_UDP_IPV4_INDEX)
>> >>  /**< UDP/IPv4 GRO flag */
>> >> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
>> >> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL <<
>> >> RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
>> >> +/**< VxLAN UDP/IPv4 GRO flag. */
>> >>
>> >>  /**
>> >>   * Structure used to create GRO context objects or used to pass
>> >> --
>> >> 1.8.3.1
>> 
>> 
>> 
>>  
>>
Hu, Jiayu Sept. 22, 2020, 3:01 a.m. UTC | #5
On Tue, Sep 22, 2020 at 09:29:38AM +0800, yang_y_yi wrote:
> Thanks Jiayu, I have fixed other comments except this one:
> 
> 
> >The items of a flow are ordered by frag_oft, and start_time
> >of these items is not always in ascending order. Therefore,
> >you cannot skip checking the items after the item whose
> >start_time is greater than flush_timestamp. This issue also
> >exists in UDP/IPv4 GRO, and need to correct them both.
> 
> I think the issue here is if we should strictly follow flush_timestamp, it is
> possible there are new items in items chain. we have chance to merge more
> packets if we don't follow flush_timestamp. So an ideal change can be this. But
> is it acceptible if we don't use flush_timestamp? It can flush some packets in
> advance therefore miss next merge window. Maybe current way is most resonable
> and a tradeoff between two exterem cases.

You cannot simplily delete the code, as rte_gro_timeout_flush()
is designed to flush packets according to cycles. If users
want to flush all packets from the table, they can set
'timeout_cycles' in rte_gro_timeout_flush() to 0.
rte_gro_reassemble_burst() uses this way to return all
packets of a burst to users after GRO.

Thanks,
Jiayu
> 
> 
> diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
> index 061e7b0..ffa35a2 100644
> --- a/lib/librte_gro/gro_udp4.c
> +++ b/lib/librte_gro/gro_udp4.c
> @@ -391,7 +391,6 @@
> 
>                 j = tbl->flows[i].start_index;
>                 while (j != INVALID_ARRAY_INDEX) {
> -                       if (tbl->items[j].start_time <= flush_timestamp) {
>                                 gro_udp4_merge_items(tbl, j);
>                                 out[k++] = tbl->items[j].firstseg;
>                                 if (tbl->items[j].nb_merged > 1)
> @@ -407,12 +406,6 @@
> 
>                                 if (unlikely(k == nb_out))
>                                         return k;
> -                       } else
> -                               /*
> -                                * The left packets in this flow won't be
> -                                * timeout. Go to check other flows.
> -                                */
> -                               break;
>                 }
>         }
>         return k;
> 
> 
> 
> 
> At 2020-09-21 15:54:36, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
> >Hi Yi,
> >
> >Some comments are inline.
> >
> >Thanks,
> >Jiayu
> >
> >> -----Original Message-----
> >> From: yang_y_yi@163.com <yang_y_yi@163.com>
> >> Sent: Thursday, September 17, 2020 11:50 AM
> >> To: dev@dpdk.org
> >> Cc: Hu, Jiayu <jiayu.hu@intel.com>; thomas@monjalon.net;
> >> yangyi01@inspur.com; yang_y_yi@163.com
> >> Subject: [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO support
> >>
> >> From: Yi Yang <yangyi01@inspur.com>
> >>
> >> VXLAN UDP/IPv4 GRO can help improve VM-to-VM UDP
> >> performance when UFO or GSO is enabled in VM, GRO
> >> must be supported if UFO or GSO is enabled,
> >> otherwise, performance can't get big improvement
> >> if only GSO is there.
> >>
> >> With this enabled in DPDK, OVS DPDK can leverage it
> >> to improve VM-to-VM UDP performance, it will reassemble
> >> VXLAN UDP/IPv4 fragments immediate after they are
> >> received from a physical NIC. It is very helpful in
> >> OVS DPDK VXLAN use case.
> >>
> >> Note: outer IP ID isn't used to check if two packets
> >> are same flow and can be merged because the difference
> >> between outer IP IDs of two packets isn't always +/-1
> >> in case of OVS DPDK.
> >>
> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
> >> ---
> >>  lib/librte_gro/gro_udp4.h       |   1 +
> >>  lib/librte_gro/gro_vxlan_udp4.c | 542
> >> ++++++++++++++++++++++++++++++++++++++++
> >>  lib/librte_gro/gro_vxlan_udp4.h | 154 ++++++++++++
> >>  lib/librte_gro/meson.build      |   2 +-
> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
> >>  lib/librte_gro/rte_gro.h        |   3 +
> >>  6 files changed, 790 insertions(+), 27 deletions(-)
> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
> >>
> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
> >> index 0a078e4..d38b393 100644
> >> --- a/lib/librte_gro/gro_udp4.h
> >> +++ b/lib/librte_gro/gro_udp4.h
> >> @@ -7,6 +7,7 @@
> >>
> >>  #include <rte_ip.h>
> >>  #include <rte_udp.h>
> >> +#include <rte_vxlan.h>
> >>
> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> >> new file mode 100644
> >> index 0000000..4eece56
> >> --- /dev/null
> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> >> @@ -0,0 +1,542 @@
> >> +
> >> +uint16_t
> >> +gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
> >> +            uint64_t flush_timestamp,
> >> +            struct rte_mbuf **out,
> >> +            uint16_t nb_out)
> >> +{
> >> +    uint16_t k = 0;
> >> +    uint32_t i, j;
> >> +    uint32_t max_flow_num = tbl->max_flow_num;
> >> +
> >> +    for (i = 0; i < max_flow_num; i++) {
> >> +            if (unlikely(tbl->flow_num == 0))
> >> +                    return k;
> >> +
> >> +            j = tbl->flows[i].start_index;
> >> +            while (j != INVALID_ARRAY_INDEX) {
> >> +                    if (tbl->items[j].inner_item.start_time <=
> >> +                                    flush_timestamp) {
> >> +                            gro_vxlan_udp4_merge_items(tbl, j);
> >> +                            out[k++] = tbl->items[j].inner_item.firstseg;
> >> +                            if (tbl->items[j].inner_item.nb_merged > 1)
> >> +                                    update_vxlan_header(&(tbl-
> >> >items[j]));
> >> +                            /*
> >> +                             * Delete the item and get the next packet
> >> +                             * index.
> >> +                             */
> >> +                            j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
> >> +                            tbl->flows[i].start_index = j;
> >> +                            if (j == INVALID_ARRAY_INDEX)
> >> +                                    tbl->flow_num--;
> >> +
> >> +                            if (unlikely(k == nb_out))
> >> +                                    return k;
> >> +                    } else
> >> +                            /*
> >> +                             * The left packets in the flow won't be
> >> +                             * timeout. Go to check other flows.
> >> +                             */
> >> +                            break;
> >
> >The items of a flow are ordered by frag_oft, and start_time
> >of these items is not always in ascending order. Therefore,
> >you cannot skip checking the items after the item whose
> >start_time is greater than flush_timestamp. This issue also
> >exists in UDP/IPv4 GRO, and need to correct them both.
> >
> >> +            }
> >> +    }
> >> +    return k;
> >> +}
> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.h
> >> b/lib/librte_gro/gro_vxlan_udp4.h
> >> new file mode 100644
> >> index 0000000..6a42fb3
> >> --- /dev/null
> >> +++ b/lib/librte_gro/gro_vxlan_udp4.h
> >> @@ -0,0 +1,154 @@
> >> +/* SPDX-License-Identifier: BSD-3-Clause
> >> + * Copyright(c) 2020 Inspur Corporation
> >> + */
> >> +
> >> +#ifndef _GRO_VXLAN_UDP4_H_
> >> +#define _GRO_VXLAN_UDP4_H_
> >> +
> >> +#include "gro_udp4.h"
> >> +
> >> +#define GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> >> +
> >> +/* Header fields representing a VxLAN flow */
> >> +struct vxlan_udp4_flow_key {
> >> +    struct udp4_flow_key inner_key;
> >> +    struct rte_vxlan_hdr vxlan_hdr;
> >> +
> >> +    struct rte_ether_addr outer_eth_saddr;
> >> +    struct rte_ether_addr outer_eth_daddr;
> >> +
> >> +    uint32_t outer_ip_src_addr;
> >> +    uint32_t outer_ip_dst_addr;
> >> +
> >> +    /* Note: It is unnecessary to save outer_src_port here because it can
> >> +     * be different for VxLAN UDP fragments from the same flow.
> >> +     */
> >> +    uint16_t outer_dst_port;
> >> +
> >
> >The above empty line is unnecessary.
> >
> >> +};
> >> +
> >> +
> >> +struct gro_vxlan_udp4_item {
> >> +    struct gro_udp4_item inner_item;
> >> +    /* Note: VXLAN UDP/IPv4 GRO needn't check outer_ip_id because
> >> +     * the difference between outer_ip_ids of two received packets
> >> +     * isn't always +/-1 in case of OVS DPDK. So no outer_ip_id
> >> +     * and outer_is_atomic fields here.
> >> +     */
> >
> >It seems the above comments for outer IP ID is enough, and no need
> >to highlight in the commit log again. How do you think?
> >
> >> +};
> >> +
> >> diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
> >> index f623230..db990cf 100644
> >> --- a/lib/librte_gro/rte_gro.c
> >> +++ b/lib/librte_gro/rte_gro.c
> >> @@ -11,6 +11,7 @@
> >>  #include "gro_tcp4.h"
> >>  #include "gro_udp4.h"
> >>  #include "gro_vxlan_tcp4.h"
> >> +#include "gro_vxlan_udp4.h"
> >>
> >>  /*
> >>   * GRO context structure. It keeps the table structures, which are
> >> @@ -137,19 +148,27 @@ struct gro_ctx {
> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> >> = {{0} };
> >>
> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
> >> -    struct gro_vxlan_tcp4_flow
> >> vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> >> -    struct gro_vxlan_tcp4_item
> >> vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
> >> +    struct gro_vxlan_tcp4_flow
> >> vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> >> +    struct gro_vxlan_tcp4_item
> >> vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> >>                      = {{{0}, 0, 0} };
> >>
> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
> >> +    struct gro_vxlan_udp4_flow
> >> vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> >> +    struct gro_vxlan_udp4_item
> >> vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> >> +                    = {{{0}} };
> >> +
> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
> >>      uint32_t item_num;
> >>      int32_t ret;
> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
> >> +            do_vxlan_udp_gro = 0;
> >>
> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> >>                                      RTE_GRO_TCP_IPV4 |
> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
> >>                                      RTE_GRO_UDP_IPV4)) == 0))
> >>              return nb_pkts;
> >>
> >> @@ -160,15 +179,28 @@ struct gro_ctx {
> >>
> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> >>              for (i = 0; i < item_num; i++)
> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
> >> -
> >> -            vxlan_tbl.flows = vxlan_flows;
> >> -            vxlan_tbl.items = vxlan_items;
> >> -            vxlan_tbl.flow_num = 0;
> >> -            vxlan_tbl.item_num = 0;
> >> -            vxlan_tbl.max_flow_num = item_num;
> >> -            vxlan_tbl.max_item_num = item_num;
> >> -            do_vxlan_gro = 1;
> >> +                    vxlan_tcp_flows[i].start_index =
> >> INVALID_ARRAY_INDEX;
> >> +
> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
> >> +            vxlan_tcp_tbl.flow_num = 0;
> >> +            vxlan_tcp_tbl.item_num = 0;
> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
> >> +            vxlan_tcp_tbl.max_item_num = item_num;
> >> +            do_vxlan_tcp_gro = 1;
> >> +    }
> >> +
> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
> >> +            for (i = 0; i < item_num; i++)
> >> +                    vxlan_udp_flows[i].start_index =
> >> INVALID_ARRAY_INDEX;
> >> +
> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
> >> +            vxlan_udp_tbl.flow_num = 0;
> >> +            vxlan_udp_tbl.item_num = 0;
> >> +            vxlan_udp_tbl.max_flow_num = item_num;
> >> +            vxlan_udp_tbl.max_item_num = item_num;
> >> +            do_vxlan_udp_gro = 1;
> >>      }
> >>
> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
> >> @@ -204,9 +236,18 @@ struct gro_ctx {
> >>               * will be flushed from the tables.
> >>               */
> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> >> -                            do_vxlan_gro) {
> >> +                            do_vxlan_tcp_gro) {
> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
> >> -                                                    &vxlan_tbl, 0);
> >> +                                                    &vxlan_tcp_tbl, 0);
> >> +                    if (ret > 0)
> >> +                            /* Merge successfully */
> >> +                            nb_after_gro--;
> >> +                    else if (ret < 0)
> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> >> +                            do_vxlan_udp_gro) {
> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
> >> +                                                    &vxlan_udp_tbl, 0);
> >>                      if (ret > 0)
> >>                              /* Merge successfully */
> >>                              nb_after_gro--;
> >> @@ -236,11 +277,17 @@ struct gro_ctx {
> >>               || (unprocess_num < nb_pkts)) {
> >>              i = 0;
> >>              /* Flush all packets from the tables */
> >> -            if (do_vxlan_gro) {
> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
> >> +            if (do_vxlan_tcp_gro) {
> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
> >>                                      0, pkts, nb_pkts);
> >>              }
> >>
> >> +            if (do_vxlan_udp_gro) {
> >> +                    i +=
> >> gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
> >> +                                    0, &pkts[i], nb_pkts - i);
> >> +
> >> +            }
> >> +
> >>              if (do_tcp4_gro) {
> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
> >>                                      &pkts[i], nb_pkts - i);
> >> @@ -269,33 +316,42 @@ struct gro_ctx {
> >>  {
> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
> >>      struct gro_ctx *gro_ctx = ctx;
> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
> >>      uint64_t current_time;
> >>      uint16_t i, unprocess_num = 0;
> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro,
> >> do_vxlan_udp_gro;
> >>
> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4
> >> |
> >>                                      RTE_GRO_TCP_IPV4 |
> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
> >>                                      RTE_GRO_UDP_IPV4)) == 0))
> >>              return nb_pkts;
> >>
> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> >> +    vxlan_tcp_tbl = gro_ctx-
> >> >tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
> >> +    vxlan_udp_tbl = gro_ctx-
> >> >tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
> >>
> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
> >>              RTE_GRO_TCP_IPV4;
> >> -    do_vxlan_gro = (gro_ctx->gro_types &
> >> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types &
> >> RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
> >>              RTE_GRO_UDP_IPV4;
> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types &
> >> RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
> >>
> >>      current_time = rte_rdtsc();
> >>
> >>      for (i = 0; i < nb_pkts; i++) {
> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> >> -                            do_vxlan_gro) {
> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
> >> +                            do_vxlan_tcp_gro) {
> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
> >> +                                            current_time) < 0)
> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> >> +                            do_vxlan_udp_gro) {
> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i],
> >> vxlan_udp_tbl,
> >>                                              current_time) < 0)
> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> >> @@ -341,6 +397,13 @@ struct gro_ctx {
> >>              left_nb_out = max_nb_out - num;
> >>      }
> >>
> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out >
> >> 0) {
> >
> >Max_nb_out is read-only and not updated. So it cannot check if 'out' array
> >has enough space. Need to use left_nb_out instead. This issue also existed
> >in UDP/IPv4 GRO, and please correct them both.
> >
> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
> >> +                            flush_timestamp, &out[num], left_nb_out);
> >> +            left_nb_out = max_nb_out - num;
> >> +    }
> >> +
> >>      /* If no available space in 'out', stop flushing. */
> >>      if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
> >>              num += gro_tcp4_tbl_timeout_flush(
> >> diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
> >> index 470f3ed..9f9ed49 100644
> >> --- a/lib/librte_gro/rte_gro.h
> >> +++ b/lib/librte_gro/rte_gro.h
> >> @@ -35,6 +35,9 @@
> >>  #define RTE_GRO_UDP_IPV4_INDEX 2
> >>  #define RTE_GRO_UDP_IPV4 (1ULL << RTE_GRO_UDP_IPV4_INDEX)
> >>  /**< UDP/IPv4 GRO flag */
> >> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
> >> +#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL <<
> >> RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
> >> +/**< VxLAN UDP/IPv4 GRO flag. */
> >>
> >>  /**
> >>   * Structure used to create GRO context objects or used to pass
> >> --
> >> 1.8.3.1
> 
> 
> 
>  
>
Hu, Jiayu Sept. 22, 2020, 6:14 a.m. UTC | #6
Fragments of a flow are sorted by frag_oft, but they may have different
timestamp. For example, there are three fragments, whose frag_oft is:
frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are
fragments of one UDP packet but are not neighbors. In the first RX burst,
host receives frag[1] and calls rte_gro_reassemble(), and we assume the
timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]
and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the
third time, host receives frag[2] and timestamp of frag[2] is 12. The three
fragments are stored in three items of a UDP GRO table:
items[0]: frag[0], timestamp is 11
items[1]: frag[1], timestamp is 10
items[2]: frag[2], timestamp is 12
Now we want to flush packets whose timestamp is less than or equal to
10. frag[1] should be returned, but in your code, no packets will be flushed.
Because the timestamp of items[0] is greater than 10, the left two fragments
will not be checked. This is what I want to say.

From: yang_y_yi <yang_y_yi@163.com>
Sent: Tuesday, September 22, 2020 9:44 AM
To: Hu, Jiayu <jiayu.hu@intel.com>
Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO support
Importance: High

BTW, start_time is checked for the first packet in a flow, gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check start_time, so this still can let some new items in this flow have chance to be merged.

At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com<mailto:yang_y_yi@163.com>> wrote:

>Thanks Jiayu, I have fixed other comments except this one:

>

>

>

>>The items of a flow are ordered by frag_oft, and start_time

>>of these items is not always in ascending order. Therefore,

>>you cannot skip checking the items after the item whose

>>start_time is greater than flush_timestamp. This issue also

>>exists in UDP/IPv4 GRO, and need to correct them both.

>

>

>I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.

>

>

>

>

>

>diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c

>index 061e7b0..ffa35a2 100644

>--- a/lib/librte_gro/gro_udp4.c

>+++ b/lib/librte_gro/gro_udp4.c

>@@ -391,7 +391,6 @@

>

>                j = tbl->flows[i].start_index;

>                while (j != INVALID_ARRAY_INDEX) {

>-                       if (tbl->items[j].start_time <= flush_timestamp) {

>                                gro_udp4_merge_items(tbl, j);

>                                out[k++] = tbl->items[j].firstseg;

>                                if (tbl->items[j].nb_merged > 1)

>@@ -407,12 +406,6 @@

>

>                                if (unlikely(k == nb_out))

>                                        return k;

>-                       } else

>-                               /*

>-                                * The left packets in this flow won't be

>-                                * timeout. Go to check other flows.

>-                                */

>-                               break;

>                }

>        }

>        return k;

>
yang_y_yi Sept. 22, 2020, 6:23 a.m. UTC | #7
Not a question, in next flush, they will be flushed, we have to check timestamp in the first time unless we don't strictly follow this time limitation.


At 2020-09-22 14:14:00, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:

Fragments of a flow are sorted by frag_oft, but they may have different

timestamp. For example, there are three fragments, whose frag_oft is:

frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are

fragments of one UDP packet but are not neighbors. In the first RX burst,

host receives frag[1] and calls rte_gro_reassemble(), and we assume the

timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]

and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the

third time, host receives frag[2] and timestamp of frag[2] is 12. The three

fragments are stored in three items of a UDP GRO table:

items[0]: frag[0], timestamp is 11

items[1]: frag[1], timestamp is 10

items[2]: frag[2], timestamp is 12

Now we want to flush packets whose timestamp is less than or equal to

10. frag[1] should be returned, but in your code, no packets will be flushed.

Because the timestamp of items[0] is greater than 10, the left two fragments

will not be checked. This is what I want to say.

 

From: yang_y_yi <yang_y_yi@163.com>
Sent: Tuesday, September 22, 2020 9:44 AM
To: Hu, Jiayu <jiayu.hu@intel.com>
Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO support
Importance: High

 

BTW, start_time is checked for the first packet in a flow, gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check start_time, so this still can let some new items in this flow have chance to be merged.

At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
>Thanks Jiayu, I have fixed other comments except this one:
> 
> 
> 
>>The items of a flow are ordered by frag_oft, and start_time
>>of these items is not always in ascending order. Therefore,
>>you cannot skip checking the items after the item whose
>>start_time is greater than flush_timestamp. This issue also
>>exists in UDP/IPv4 GRO, and need to correct them both.
> 
> 
>I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
> 
> 
> 
> 
> 
>diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
>index 061e7b0..ffa35a2 100644
>--- a/lib/librte_gro/gro_udp4.c
>+++ b/lib/librte_gro/gro_udp4.c
>@@ -391,7 +391,6 @@
> 
>                j = tbl->flows[i].start_index;
>                while (j != INVALID_ARRAY_INDEX) {
>-                       if (tbl->items[j].start_time <= flush_timestamp) {
>                                gro_udp4_merge_items(tbl, j);
>                                out[k++] = tbl->items[j].firstseg;
>                                if (tbl->items[j].nb_merged > 1)
>@@ -407,12 +406,6 @@
> 
>                                if (unlikely(k == nb_out))
>                                        return k;
>-                       } else
>-                               /*
>-                                * The left packets in this flow won't be
>-                                * timeout. Go to check other flows.
>-                                */
>-                               break;
>                }
>        }
>        return k;
>
Hu, Jiayu Sept. 22, 2020, 6:55 a.m. UTC | #8
On Tue, Sep 22, 2020 at 02:23:39PM +0800, yang_y_yi wrote:
> Not a question, in next flush, they will be flushed, we have to check timestamp
> in the first time unless we don't strictly follow this time limitation.

who will check the timestamp? I did't get the point.

IMO, this will cause inconsistency of the rte_gro_timeout_flush().
BTW, what stops you to traverse all items and check timestamp
before flush them out?

> 
> At 2020-09-22 14:14:00, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
> 
>     Fragments of a flow are sorted by frag_oft, but they may have different
> 
>     timestamp. For example, there are three fragments, whose frag_oft is:
> 
>     frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are
> 
>     fragments of one UDP packet but are not neighbors. In the first RX burst,
> 
>     host receives frag[1] and calls rte_gro_reassemble(), and we assume the
> 
>     timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]
> 
>     and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the
> 
>     third time, host receives frag[2] and timestamp of frag[2] is 12. The three
> 
>     fragments are stored in three items of a UDP GRO table:
> 
>     items[0]: frag[0], timestamp is 11
> 
>     items[1]: frag[1], timestamp is 10
> 
>     items[2]: frag[2], timestamp is 12
> 
>     Now we want to flush packets whose timestamp is less than or equal to
> 
>     10. frag[1] should be returned, but in your code, no packets will be
>     flushed.
> 
>     Because the timestamp of items[0] is greater than 10, the left two
>     fragments
> 
>     will not be checked. This is what I want to say.
> 
>      
> 
>     From: yang_y_yi <yang_y_yi@163.com>
>     Sent: Tuesday, September 22, 2020 9:44 AM
>     To: Hu, Jiayu <jiayu.hu@intel.com>
>     Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
>     Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO
>     support
>     Importance: High
> 
>      
> 
>     BTW, start_time is checked for the first packet in a flow,
>     gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once
>     if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check
>     start_time, so this still can let some new items in this flow have chance
>     to be merged.
> 
>     At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
> 
>     >Thanks Jiayu, I have fixed other comments except this one:
> 
>     > 
> 
>     > 
> 
>     > 
> 
>     >>The items of a flow are ordered by frag_oft, and start_time
> 
>     >>of these items is not always in ascending order. Therefore,
> 
>     >>you cannot skip checking the items after the item whose
> 
>     >>start_time is greater than flush_timestamp. This issue also
> 
>     >>exists in UDP/IPv4 GRO, and need to correct them both.
> 
>     > 
> 
>     > 
> 
>     >I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
> 
>     > 
> 
>     > 
> 
>     > 
> 
>     > 
> 
>     > 
> 
>     >diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
> 
>     >index 061e7b0..ffa35a2 100644
> 
>     >--- a/lib/librte_gro/gro_udp4.c
> 
>     >+++ b/lib/librte_gro/gro_udp4.c
> 
>     >@@ -391,7 +391,6 @@
> 
>     > 
> 
>     >                j = tbl->flows[i].start_index;
> 
>     >                while (j != INVALID_ARRAY_INDEX) {
> 
>     >-                       if (tbl->items[j].start_time <= flush_timestamp) {
> 
>     >                                gro_udp4_merge_items(tbl, j);
> 
>     >                                out[k++] = tbl->items[j].firstseg;
> 
>     >                                if (tbl->items[j].nb_merged > 1)
> 
>     >@@ -407,12 +406,6 @@
> 
>     > 
> 
>     >                                if (unlikely(k == nb_out))
> 
>     >                                        return k;
> 
>     >-                       } else
> 
>     >-                               /*
> 
>     >-                                * The left packets in this flow won't be
> 
>     >-                                * timeout. Go to check other flows.
> 
>     >-                                */
> 
>     >-                               break;
> 
>     >                }
> 
>     >        }
> 
>     >        return k;
> 
>     > 
> 
>      
> 
> 
> 
>  
>
yang_y_yi Sept. 22, 2020, 7:38 a.m. UTC | #9
The problem is timestamp of which one item in a flow we should use as timestamp reference base for this flow, for merge (reassemble), only the first packet can trigger merge of all the packets, merge is forward not backward, if you traverse the whole linked list in this flow to get the oldest timestamp and compare it with flushtime, that will be not worthy, in the worst case (say I send a 64K UDP packet and MTU is 1450), you will have 46 items to check to get the oldest timestamp by linked list.





I'm not sure what inconsistentcy you're saying mean.


At 2020-09-22 14:55:46, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>On Tue, Sep 22, 2020 at 02:23:39PM +0800, yang_y_yi wrote:
>> Not a question, in next flush, they will be flushed, we have to check timestamp
>> in the first time unless we don't strictly follow this time limitation.
>
>who will check the timestamp? I did't get the point.
>
>IMO, this will cause inconsistency of the rte_gro_timeout_flush().
>BTW, what stops you to traverse all items and check timestamp
>before flush them out?
>
>> 
>> At 2020-09-22 14:14:00, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
>> 
>>     Fragments of a flow are sorted by frag_oft, but they may have different
>> 
>>     timestamp. For example, there are three fragments, whose frag_oft is:
>> 
>>     frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are
>> 
>>     fragments of one UDP packet but are not neighbors. In the first RX burst,
>> 
>>     host receives frag[1] and calls rte_gro_reassemble(), and we assume the
>> 
>>     timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]
>> 
>>     and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the
>> 
>>     third time, host receives frag[2] and timestamp of frag[2] is 12. The three
>> 
>>     fragments are stored in three items of a UDP GRO table:
>> 
>>     items[0]: frag[0], timestamp is 11
>> 
>>     items[1]: frag[1], timestamp is 10
>> 
>>     items[2]: frag[2], timestamp is 12
>> 
>>     Now we want to flush packets whose timestamp is less than or equal to
>> 
>>     10. frag[1] should be returned, but in your code, no packets will be
>>     flushed.
>> 
>>     Because the timestamp of items[0] is greater than 10, the left two
>>     fragments
>> 
>>     will not be checked. This is what I want to say.
>> 
>>      
>> 
>>     From: yang_y_yi <yang_y_yi@163.com>
>>     Sent: Tuesday, September 22, 2020 9:44 AM
>>     To: Hu, Jiayu <jiayu.hu@intel.com>
>>     Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
>>     Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO
>>     support
>>     Importance: High
>> 
>>      
>> 
>>     BTW, start_time is checked for the first packet in a flow,
>>     gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once
>>     if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check
>>     start_time, so this still can let some new items in this flow have chance
>>     to be merged.
>> 
>>     At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
>> 
>>     >Thanks Jiayu, I have fixed other comments except this one:
>> 
>>     > 
>> 
>>     > 
>> 
>>     > 
>> 
>>     >>The items of a flow are ordered by frag_oft, and start_time
>> 
>>     >>of these items is not always in ascending order. Therefore,
>> 
>>     >>you cannot skip checking the items after the item whose
>> 
>>     >>start_time is greater than flush_timestamp. This issue also
>> 
>>     >>exists in UDP/IPv4 GRO, and need to correct them both.
>> 
>>     > 
>> 
>>     > 
>> 
>>     >I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
>> 
>>     > 
>> 
>>     > 
>> 
>>     > 
>> 
>>     > 
>> 
>>     > 
>> 
>>     >diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
>> 
>>     >index 061e7b0..ffa35a2 100644
>> 
>>     >--- a/lib/librte_gro/gro_udp4.c
>> 
>>     >+++ b/lib/librte_gro/gro_udp4.c
>> 
>>     >@@ -391,7 +391,6 @@
>> 
>>     > 
>> 
>>     >                j = tbl->flows[i].start_index;
>> 
>>     >                while (j != INVALID_ARRAY_INDEX) {
>> 
>>     >-                       if (tbl->items[j].start_time <= flush_timestamp) {
>> 
>>     >                                gro_udp4_merge_items(tbl, j);
>> 
>>     >                                out[k++] = tbl->items[j].firstseg;
>> 
>>     >                                if (tbl->items[j].nb_merged > 1)
>> 
>>     >@@ -407,12 +406,6 @@
>> 
>>     > 
>> 
>>     >                                if (unlikely(k == nb_out))
>> 
>>     >                                        return k;
>> 
>>     >-                       } else
>> 
>>     >-                               /*
>> 
>>     >-                                * The left packets in this flow won't be
>> 
>>     >-                                * timeout. Go to check other flows.
>> 
>>     >-                                */
>> 
>>     >-                               break;
>> 
>>     >                }
>> 
>>     >        }
>> 
>>     >        return k;
>> 
>>     > 
>> 
>>      
>> 
>> 
>> 
>>  
>>
Hu, Jiayu Sept. 23, 2020, 2:15 a.m. UTC | #10
On Tue, Sep 22, 2020 at 03:38:29PM +0800, yang_y_yi wrote:
> The problem is timestamp of which one item in a flow we should use as timestamp
> reference base for this flow, for merge (reassemble), only the first packet can
> trigger merge of all the packets, merge is forward not backward, if you
> traverse the whole linked list in this flow to get the oldest timestamp and
> compare it with flushtime, that will be not worthy, in the worst case (say I
> send a 64K UDP packet and MTU is 1450), you will have 46 items to check to get
> the oldest timestamp by linked list.

OK, I got the point. I agree to flush packets without strictly
obeying timestamp. But you need to change the comment in the
code and clarify the design for both UDP and VxLAN GRO patch.
Current comment "The left packets in ..." is not appropriate
for your design.

> 
> 
> I'm not sure what inconsistentcy you're saying mean.
> 
> At 2020-09-22 14:55:46, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
> >On Tue, Sep 22, 2020 at 02:23:39PM +0800, yang_y_yi wrote:
> >> Not a question, in next flush, they will be flushed, we have to check timestamp
> >> in the first time unless we don't strictly follow this time limitation.
> >
> >who will check the timestamp? I did't get the point.
> >
> >IMO, this will cause inconsistency of the rte_gro_timeout_flush().
> >BTW, what stops you to traverse all items and check timestamp
> >before flush them out?
> >
> >>
> >> At 2020-09-22 14:14:00, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
> >>
> >>     Fragments of a flow are sorted by frag_oft, but they may have different
> >>
> >>     timestamp. For example, there are three fragments, whose frag_oft is:
> >>
> >>     frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are
> >>
> >>     fragments of one UDP packet but are not neighbors. In the first RX burst,
> >>
> >>     host receives frag[1] and calls rte_gro_reassemble(), and we assume the
> >>
> >>     timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]
> >>
> >>     and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the
> >>
> >>     third time, host receives frag[2] and timestamp of frag[2] is 12. The three
> >>
> >>     fragments are stored in three items of a UDP GRO table:
> >>
> >>     items[0]: frag[0], timestamp is 11
> >>
> >>     items[1]: frag[1], timestamp is 10
> >>
> >>     items[2]: frag[2], timestamp is 12
> >>
> >>     Now we want to flush packets whose timestamp is less than or equal to
> >>
> >>     10. frag[1] should be returned, but in your code, no packets will be
> >>     flushed.
> >>
> >>     Because the timestamp of items[0] is greater than 10, the left two
> >>     fragments
> >>
> >>     will not be checked. This is what I want to say.
> >>
> >>
> >>
> >>     From: yang_y_yi <yang_y_yi@163.com>
> >>     Sent: Tuesday, September 22, 2020 9:44 AM
> >>     To: Hu, Jiayu <jiayu.hu@intel.com>
> >>     Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
> >>     Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO
> >>     support
> >>     Importance: High
> >>
> >>
> >>
> >>     BTW, start_time is checked for the first packet in a flow,
> >>     gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once
> >>     if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check
> >>     start_time, so this still can let some new items in this flow have chance
> >>     to be merged.
> >>
> >>     At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
> >>
> >>     >Thanks Jiayu, I have fixed other comments except this one:
> >>
> >>     >
> >>
> >>     >
> >>
> >>     >
> >>
> >>     >>The items of a flow are ordered by frag_oft, and start_time
> >>
> >>     >>of these items is not always in ascending order. Therefore,
> >>
> >>     >>you cannot skip checking the items after the item whose
> >>
> >>     >>start_time is greater than flush_timestamp. This issue also
> >>
> >>     >>exists in UDP/IPv4 GRO, and need to correct them both.
> >>
> >>     >
> >>
> >>     >
> >>
> >>     >I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
> >>
> >>     >
> >>
> >>     >
> >>
> >>     >
> >>
> >>     >
> >>
> >>     >
> >>
> >>     >diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
> >>
> >>     >index 061e7b0..ffa35a2 100644
> >>
> >>     >--- a/lib/librte_gro/gro_udp4.c
> >>
> >>     >+++ b/lib/librte_gro/gro_udp4.c
> >>
> >>     >@@ -391,7 +391,6 @@
> >>
> >>     >
> >>
> >>     >                j = tbl->flows[i].start_index;
> >>
> >>     >                while (j != INVALID_ARRAY_INDEX) {
> >>
> >>     >-                       if (tbl->items[j].start_time <= flush_timestamp) {
> >>
> >>     >                                gro_udp4_merge_items(tbl, j);
> >>
> >>     >                                out[k++] = tbl->items[j].firstseg;
> >>
> >>     >                                if (tbl->items[j].nb_merged > 1)
> >>
> >>     >@@ -407,12 +406,6 @@
> >>
> >>     >
> >>
> >>     >                                if (unlikely(k == nb_out))
> >>
> >>     >                                        return k;
> >>
> >>     >-                       } else
> >>
> >>     >-                               /*
> >>
> >>     >-                                * The left packets in this flow won't be
> >>
> >>     >-                                * timeout. Go to check other flows.
> >>
> >>     >-                                */
> >>
> >>     >-                               break;
> >>
> >>     >                }
> >>
> >>     >        }
> >>
> >>     >        return k;
> >>
> >>     >
> >>
> >>
> >>
> >>
> >>
> >>
> >>
> 
> 
> 
>  
>
yang_y_yi Sept. 23, 2020, 2:28 a.m. UTC | #11
Thanks Jiayu, do you mean not comparing timestamp and flush all the packets in a flow as I showed code, right? If so, we shouldn't provide argument flush_timestamp. But this will result in very bad issues, rte_gro_timeout_flush will be called after rte_gro_reassemble every time, so the result may be you can't reassemble out any original UDP packet because every UDP fragments will be flushed very soon, no chance to reassemble.

At 2020-09-23 10:15:12, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>On Tue, Sep 22, 2020 at 03:38:29PM +0800, yang_y_yi wrote:
>> The problem is timestamp of which one item in a flow we should use as timestamp
>> reference base for this flow, for merge (reassemble), only the first packet can
>> trigger merge of all the packets, merge is forward not backward, if you
>> traverse the whole linked list in this flow to get the oldest timestamp and
>> compare it with flushtime, that will be not worthy, in the worst case (say I
>> send a 64K UDP packet and MTU is 1450), you will have 46 items to check to get
>> the oldest timestamp by linked list.
>
>OK, I got the point. I agree to flush packets without strictly
>obeying timestamp. But you need to change the comment in the
>code and clarify the design for both UDP and VxLAN GRO patch.
>Current comment "The left packets in ..." is not appropriate
>for your design.
>
>> 
>> 
>> I'm not sure what inconsistentcy you're saying mean.
>> 
>> At 2020-09-22 14:55:46, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> >On Tue, Sep 22, 2020 at 02:23:39PM +0800, yang_y_yi wrote:
>> >> Not a question, in next flush, they will be flushed, we have to check timestamp
>> >> in the first time unless we don't strictly follow this time limitation.
>> >
>> >who will check the timestamp? I did't get the point.
>> >
>> >IMO, this will cause inconsistency of the rte_gro_timeout_flush().
>> >BTW, what stops you to traverse all items and check timestamp
>> >before flush them out?
>> >
>> >>
>> >> At 2020-09-22 14:14:00, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
>> >>
>> >>     Fragments of a flow are sorted by frag_oft, but they may have different
>> >>
>> >>     timestamp. For example, there are three fragments, whose frag_oft is:
>> >>
>> >>     frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are
>> >>
>> >>     fragments of one UDP packet but are not neighbors. In the first RX burst,
>> >>
>> >>     host receives frag[1] and calls rte_gro_reassemble(), and we assume the
>> >>
>> >>     timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]
>> >>
>> >>     and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the
>> >>
>> >>     third time, host receives frag[2] and timestamp of frag[2] is 12. The three
>> >>
>> >>     fragments are stored in three items of a UDP GRO table:
>> >>
>> >>     items[0]: frag[0], timestamp is 11
>> >>
>> >>     items[1]: frag[1], timestamp is 10
>> >>
>> >>     items[2]: frag[2], timestamp is 12
>> >>
>> >>     Now we want to flush packets whose timestamp is less than or equal to
>> >>
>> >>     10. frag[1] should be returned, but in your code, no packets will be
>> >>     flushed.
>> >>
>> >>     Because the timestamp of items[0] is greater than 10, the left two
>> >>     fragments
>> >>
>> >>     will not be checked. This is what I want to say.
>> >>
>> >>
>> >>
>> >>     From: yang_y_yi <yang_y_yi@163.com>
>> >>     Sent: Tuesday, September 22, 2020 9:44 AM
>> >>     To: Hu, Jiayu <jiayu.hu@intel.com>
>> >>     Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
>> >>     Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO
>> >>     support
>> >>     Importance: High
>> >>
>> >>
>> >>
>> >>     BTW, start_time is checked for the first packet in a flow,
>> >>     gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once
>> >>     if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check
>> >>     start_time, so this still can let some new items in this flow have chance
>> >>     to be merged.
>> >>
>> >>     At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
>> >>
>> >>     >Thanks Jiayu, I have fixed other comments except this one:
>> >>
>> >>     >
>> >>
>> >>     >
>> >>
>> >>     >
>> >>
>> >>     >>The items of a flow are ordered by frag_oft, and start_time
>> >>
>> >>     >>of these items is not always in ascending order. Therefore,
>> >>
>> >>     >>you cannot skip checking the items after the item whose
>> >>
>> >>     >>start_time is greater than flush_timestamp. This issue also
>> >>
>> >>     >>exists in UDP/IPv4 GRO, and need to correct them both.
>> >>
>> >>     >
>> >>
>> >>     >
>> >>
>> >>     >I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
>> >>
>> >>     >
>> >>
>> >>     >
>> >>
>> >>     >
>> >>
>> >>     >
>> >>
>> >>     >
>> >>
>> >>     >diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
>> >>
>> >>     >index 061e7b0..ffa35a2 100644
>> >>
>> >>     >--- a/lib/librte_gro/gro_udp4.c
>> >>
>> >>     >+++ b/lib/librte_gro/gro_udp4.c
>> >>
>> >>     >@@ -391,7 +391,6 @@
>> >>
>> >>     >
>> >>
>> >>     >                j = tbl->flows[i].start_index;
>> >>
>> >>     >                while (j != INVALID_ARRAY_INDEX) {
>> >>
>> >>     >-                       if (tbl->items[j].start_time <= flush_timestamp) {
>> >>
>> >>     >                                gro_udp4_merge_items(tbl, j);
>> >>
>> >>     >                                out[k++] = tbl->items[j].firstseg;
>> >>
>> >>     >                                if (tbl->items[j].nb_merged > 1)
>> >>
>> >>     >@@ -407,12 +406,6 @@
>> >>
>> >>     >
>> >>
>> >>     >                                if (unlikely(k == nb_out))
>> >>
>> >>     >                                        return k;
>> >>
>> >>     >-                       } else
>> >>
>> >>     >-                               /*
>> >>
>> >>     >-                                * The left packets in this flow won't be
>> >>
>> >>     >-                                * timeout. Go to check other flows.
>> >>
>> >>     >-                                */
>> >>
>> >>     >-                               break;
>> >>
>> >>     >                }
>> >>
>> >>     >        }
>> >>
>> >>     >        return k;
>> >>
>> >>     >
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> 
>> 
>> 
>>  
>>
Hu, Jiayu Sept. 23, 2020, 2:43 a.m. UTC | #12
On Wed, Sep 23, 2020 at 10:28:00AM +0800, yang_y_yi wrote:
> Thanks Jiayu, do you mean not comparing timestamp and flush all the packets in
> a flow as I showed code, right? If so, we shouldn't provide argument
> flush_timestamp. But this will result in very bad issues, rte_gro_timeout_flush
> will be called after rte_gro_reassemble every time, so the result may be you
> can't reassemble out any original UDP packet because every UDP fragments will
> be flushed very soon, no chance to reassemble.

No, I mean the design in your patch, which stops flushing packets
once find one whose timestamp is greater than flush_timestamp.

> 
> At 2020-09-23 10:15:12, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
> >On Tue, Sep 22, 2020 at 03:38:29PM +0800, yang_y_yi wrote:
> >> The problem is timestamp of which one item in a flow we should use as timestamp
> >> reference base for this flow, for merge (reassemble), only the first packet can
> >> trigger merge of all the packets, merge is forward not backward, if you
> >> traverse the whole linked list in this flow to get the oldest timestamp and
> >> compare it with flushtime, that will be not worthy, in the worst case (say I
> >> send a 64K UDP packet and MTU is 1450), you will have 46 items to check to get
> >> the oldest timestamp by linked list.
> >
> >OK, I got the point. I agree to flush packets without strictly
> >obeying timestamp. But you need to change the comment in the
> >code and clarify the design for both UDP and VxLAN GRO patch.
> >Current comment "The left packets in ..." is not appropriate
> >for your design.
> >
> >>
> >>
> >> I'm not sure what inconsistentcy you're saying mean.
> >>
> >> At 2020-09-22 14:55:46, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
> >> >On Tue, Sep 22, 2020 at 02:23:39PM +0800, yang_y_yi wrote:
> >> >> Not a question, in next flush, they will be flushed, we have to check timestamp
> >> >> in the first time unless we don't strictly follow this time limitation.
> >> >
> >> >who will check the timestamp? I did't get the point.
> >> >
> >> >IMO, this will cause inconsistency of the rte_gro_timeout_flush().
> >> >BTW, what stops you to traverse all items and check timestamp
> >> >before flush them out?
> >> >
> >> >>
> >> >> At 2020-09-22 14:14:00, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
> >> >>
> >> >>     Fragments of a flow are sorted by frag_oft, but they may have different
> >> >>
> >> >>     timestamp. For example, there are three fragments, whose frag_oft is:
> >> >>
> >> >>     frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are
> >> >>
> >> >>     fragments of one UDP packet but are not neighbors. In the first RX burst,
> >> >>
> >> >>     host receives frag[1] and calls rte_gro_reassemble(), and we assume the
> >> >>
> >> >>     timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]
> >> >>
> >> >>     and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the
> >> >>
> >> >>     third time, host receives frag[2] and timestamp of frag[2] is 12. The three
> >> >>
> >> >>     fragments are stored in three items of a UDP GRO table:
> >> >>
> >> >>     items[0]: frag[0], timestamp is 11
> >> >>
> >> >>     items[1]: frag[1], timestamp is 10
> >> >>
> >> >>     items[2]: frag[2], timestamp is 12
> >> >>
> >> >>     Now we want to flush packets whose timestamp is less than or equal to
> >> >>
> >> >>     10. frag[1] should be returned, but in your code, no packets will be
> >> >>     flushed.
> >> >>
> >> >>     Because the timestamp of items[0] is greater than 10, the left two
> >> >>     fragments
> >> >>
> >> >>     will not be checked. This is what I want to say.
> >> >>
> >> >>
> >> >>
> >> >>     From: yang_y_yi <yang_y_yi@163.com>
> >> >>     Sent: Tuesday, September 22, 2020 9:44 AM
> >> >>     To: Hu, Jiayu <jiayu.hu@intel.com>
> >> >>     Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
> >> >>     Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO
> >> >>     support
> >> >>     Importance: High
> >> >>
> >> >>
> >> >>
> >> >>     BTW, start_time is checked for the first packet in a flow,
> >> >>     gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once
> >> >>     if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check
> >> >>     start_time, so this still can let some new items in this flow have chance
> >> >>     to be merged.
> >> >>
> >> >>     At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
> >> >>
> >> >>     >Thanks Jiayu, I have fixed other comments except this one:
> >> >>
> >> >>     >
> >> >>
> >> >>     >
> >> >>
> >> >>     >
> >> >>
> >> >>     >>The items of a flow are ordered by frag_oft, and start_time
> >> >>
> >> >>     >>of these items is not always in ascending order. Therefore,
> >> >>
> >> >>     >>you cannot skip checking the items after the item whose
> >> >>
> >> >>     >>start_time is greater than flush_timestamp. This issue also
> >> >>
> >> >>     >>exists in UDP/IPv4 GRO, and need to correct them both.
> >> >>
> >> >>     >
> >> >>
> >> >>     >
> >> >>
> >> >>     >I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
> >> >>
> >> >>     >
> >> >>
> >> >>     >
> >> >>
> >> >>     >
> >> >>
> >> >>     >
> >> >>
> >> >>     >
> >> >>
> >> >>     >diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
> >> >>
> >> >>     >index 061e7b0..ffa35a2 100644
> >> >>
> >> >>     >--- a/lib/librte_gro/gro_udp4.c
> >> >>
> >> >>     >+++ b/lib/librte_gro/gro_udp4.c
> >> >>
> >> >>     >@@ -391,7 +391,6 @@
> >> >>
> >> >>     >
> >> >>
> >> >>     >                j = tbl->flows[i].start_index;
> >> >>
> >> >>     >                while (j != INVALID_ARRAY_INDEX) {
> >> >>
> >> >>     >-                       if (tbl->items[j].start_time <= flush_timestamp) {
> >> >>
> >> >>     >                                gro_udp4_merge_items(tbl, j);
> >> >>
> >> >>     >                                out[k++] = tbl->items[j].firstseg;
> >> >>
> >> >>     >                                if (tbl->items[j].nb_merged > 1)
> >> >>
> >> >>     >@@ -407,12 +406,6 @@
> >> >>
> >> >>     >
> >> >>
> >> >>     >                                if (unlikely(k == nb_out))
> >> >>
> >> >>     >                                        return k;
> >> >>
> >> >>     >-                       } else
> >> >>
> >> >>     >-                               /*
> >> >>
> >> >>     >-                                * The left packets in this flow won't be
> >> >>
> >> >>     >-                                * timeout. Go to check other flows.
> >> >>
> >> >>     >-                                */
> >> >>
> >> >>     >-                               break;
> >> >>
> >> >>     >                }
> >> >>
> >> >>     >        }
> >> >>
> >> >>     >        return k;
> >> >>
> >> >>     >
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >> >>
> >>
> >>
> >>
> >>
> >>
> 
> 
> 
>  
>
yang_y_yi Sept. 24, 2020, 2:41 a.m. UTC | #13
Jiayu, is this comment ok?



diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
index 061e7b0..93e368a 100644
--- a/lib/librte_gro/gro_udp4.c
+++ b/lib/librte_gro/gro_udp4.c
@@ -409,8 +409,10 @@
                                        return k;
                        } else
                                /*
-                                * The left packets in this flow won't be
-                                * timeout. Go to check other flows.
+                                * If start_time of one item is greater than
+                                * flush_timestamp, that means the left packets
+                                * in this flow won't be flushed this time. Go
+                                * to check other flows.
                                 */
                                break;
                }






















At 2020-09-23 10:43:03, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>On Wed, Sep 23, 2020 at 10:28:00AM +0800, yang_y_yi wrote:
>> Thanks Jiayu, do you mean not comparing timestamp and flush all the packets in
>> a flow as I showed code, right? If so, we shouldn't provide argument
>> flush_timestamp. But this will result in very bad issues, rte_gro_timeout_flush
>> will be called after rte_gro_reassemble every time, so the result may be you
>> can't reassemble out any original UDP packet because every UDP fragments will
>> be flushed very soon, no chance to reassemble.
>
>No, I mean the design in your patch, which stops flushing packets
>once find one whose timestamp is greater than flush_timestamp.
>
>> 
>> At 2020-09-23 10:15:12, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> >On Tue, Sep 22, 2020 at 03:38:29PM +0800, yang_y_yi wrote:
>> >> The problem is timestamp of which one item in a flow we should use as timestamp
>> >> reference base for this flow, for merge (reassemble), only the first packet can
>> >> trigger merge of all the packets, merge is forward not backward, if you
>> >> traverse the whole linked list in this flow to get the oldest timestamp and
>> >> compare it with flushtime, that will be not worthy, in the worst case (say I
>> >> send a 64K UDP packet and MTU is 1450), you will have 46 items to check to get
>> >> the oldest timestamp by linked list.
>> >
>> >OK, I got the point. I agree to flush packets without strictly
>> >obeying timestamp. But you need to change the comment in the
>> >code and clarify the design for both UDP and VxLAN GRO patch.
>> >Current comment "The left packets in ..." is not appropriate
>> >for your design.
>> >
>> >>
>> >>
>> >> I'm not sure what inconsistentcy you're saying mean.
>> >>
>> >> At 2020-09-22 14:55:46, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> >> >On Tue, Sep 22, 2020 at 02:23:39PM +0800, yang_y_yi wrote:
>> >> >> Not a question, in next flush, they will be flushed, we have to check timestamp
>> >> >> in the first time unless we don't strictly follow this time limitation.
>> >> >
>> >> >who will check the timestamp? I did't get the point.
>> >> >
>> >> >IMO, this will cause inconsistency of the rte_gro_timeout_flush().
>> >> >BTW, what stops you to traverse all items and check timestamp
>> >> >before flush them out?
>> >> >
>> >> >>
>> >> >> At 2020-09-22 14:14:00, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:
>> >> >>
>> >> >>     Fragments of a flow are sorted by frag_oft, but they may have different
>> >> >>
>> >> >>     timestamp. For example, there are three fragments, whose frag_oft is:
>> >> >>
>> >> >>     frag[0].frag_oft=0, frag[1].frag_oft=4, frag[2].frag_oft=6; and they are
>> >> >>
>> >> >>     fragments of one UDP packet but are not neighbors. In the first RX burst,
>> >> >>
>> >> >>     host receives frag[1] and calls rte_gro_reassemble(), and we assume the
>> >> >>
>> >> >>     timestamp of frag[1] is 10; in the second RX burst, host receives frag[0]
>> >> >>
>> >> >>     and also call rte_gro_reassemble(), and timestamp of frag[0] is 11; the
>> >> >>
>> >> >>     third time, host receives frag[2] and timestamp of frag[2] is 12. The three
>> >> >>
>> >> >>     fragments are stored in three items of a UDP GRO table:
>> >> >>
>> >> >>     items[0]: frag[0], timestamp is 11
>> >> >>
>> >> >>     items[1]: frag[1], timestamp is 10
>> >> >>
>> >> >>     items[2]: frag[2], timestamp is 12
>> >> >>
>> >> >>     Now we want to flush packets whose timestamp is less than or equal to
>> >> >>
>> >> >>     10. frag[1] should be returned, but in your code, no packets will be
>> >> >>     flushed.
>> >> >>
>> >> >>     Because the timestamp of items[0] is greater than 10, the left two
>> >> >>     fragments
>> >> >>
>> >> >>     will not be checked. This is what I want to say.
>> >> >>
>> >> >>
>> >> >>
>> >> >>     From: yang_y_yi <yang_y_yi@163.com>
>> >> >>     Sent: Tuesday, September 22, 2020 9:44 AM
>> >> >>     To: Hu, Jiayu <jiayu.hu@intel.com>
>> >> >>     Cc: dev@dpdk.org; thomas@monjalon.net; yangyi01@inspur.com
>> >> >>     Subject: Re:Re: [dpdk-dev] [PATCH v6 2/3] gro: add VXLAN UDP/IPv4 GRO
>> >> >>     support
>> >> >>     Importance: High
>> >> >>
>> >> >>
>> >> >>
>> >> >>     BTW, start_time is checked for the first packet in a flow,
>> >> >>     gro_udp4_merge_items(tbl, j) will merge all the packets in this flow once
>> >> >>     if they can be reassembled, gro_udp4_merge_items(tbl, j) doesn't check
>> >> >>     start_time, so this still can let some new items in this flow have chance
>> >> >>     to be merged.
>> >> >>
>> >> >>     At 2020-09-22 09:29:38, "yang_y_yi" <yang_y_yi@163.com> wrote:
>> >> >>
>> >> >>     >Thanks Jiayu, I have fixed other comments except this one:
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >>The items of a flow are ordered by frag_oft, and start_time
>> >> >>
>> >> >>     >>of these items is not always in ascending order. Therefore,
>> >> >>
>> >> >>     >>you cannot skip checking the items after the item whose
>> >> >>
>> >> >>     >>start_time is greater than flush_timestamp. This issue also
>> >> >>
>> >> >>     >>exists in UDP/IPv4 GRO, and need to correct them both.
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >I think the issue here is if we should strictly follow flush_timestamp, it is possible there are new items in items chain. we have chance to merge more packets if we don't follow flush_timestamp. So an ideal change can be this. But is it acceptible if we don't use flush_timestamp? It can flush some packets in advance therefore miss next merge window. Maybe current way is most resonable and a tradeoff between two exterem cases.
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >diff --git a/lib/librte_gro/gro_udp4.c b/lib/librte_gro/gro_udp4.c
>> >> >>
>> >> >>     >index 061e7b0..ffa35a2 100644
>> >> >>
>> >> >>     >--- a/lib/librte_gro/gro_udp4.c
>> >> >>
>> >> >>     >+++ b/lib/librte_gro/gro_udp4.c
>> >> >>
>> >> >>     >@@ -391,7 +391,6 @@
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >                j = tbl->flows[i].start_index;
>> >> >>
>> >> >>     >                while (j != INVALID_ARRAY_INDEX) {
>> >> >>
>> >> >>     >-                       if (tbl->items[j].start_time <= flush_timestamp) {
>> >> >>
>> >> >>     >                                gro_udp4_merge_items(tbl, j);
>> >> >>
>> >> >>     >                                out[k++] = tbl->items[j].firstseg;
>> >> >>
>> >> >>     >                                if (tbl->items[j].nb_merged > 1)
>> >> >>
>> >> >>     >@@ -407,12 +406,6 @@
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>     >                                if (unlikely(k == nb_out))
>> >> >>
>> >> >>     >                                        return k;
>> >> >>
>> >> >>     >-                       } else
>> >> >>
>> >> >>     >-                               /*
>> >> >>
>> >> >>     >-                                * The left packets in this flow won't be
>> >> >>
>> >> >>     >-                                * timeout. Go to check other flows.
>> >> >>
>> >> >>     >-                                */
>> >> >>
>> >> >>     >-                               break;
>> >> >>
>> >> >>     >                }
>> >> >>
>> >> >>     >        }
>> >> >>
>> >> >>     >        return k;
>> >> >>
>> >> >>     >
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >> >>
>> >>
>> >>
>> >>
>> >>
>> >>
>> 
>> 
>> 
>>  
>>

Patch
diff mbox series

diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
index 0a078e4..d38b393 100644
--- a/lib/librte_gro/gro_udp4.h
+++ b/lib/librte_gro/gro_udp4.h
@@ -7,6 +7,7 @@ 
 
 #include <rte_ip.h>
 #include <rte_udp.h>
+#include <rte_vxlan.h>
 
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
new file mode 100644
index 0000000..4eece56
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_udp4.c
@@ -0,0 +1,542 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_udp.h>
+
+#include "gro_vxlan_udp4.h"
+
+void *
+gro_vxlan_udp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_vxlan_udp4_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_vxlan_udp4_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_vxlan_udp4_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_vxlan_udp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_vxlan_udp4_tbl_destroy(void *tbl)
+{
+	struct gro_vxlan_udp4_tbl *vxlan_tbl = tbl;
+
+	if (vxlan_tbl) {
+		rte_free(vxlan_tbl->items);
+		rte_free(vxlan_tbl->flows);
+	}
+	rte_free(vxlan_tbl);
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_vxlan_udp4_tbl *tbl)
+{
+	uint32_t max_item_num = tbl->max_item_num, i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (tbl->items[i].inner_item.firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_vxlan_udp4_tbl *tbl)
+{
+	uint32_t max_flow_num = tbl->max_flow_num, i;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_vxlan_udp4_tbl *tbl,
+		struct rte_mbuf *pkt,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint16_t frag_offset,
+		uint8_t is_last_frag)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(tbl);
+	if (unlikely(item_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	tbl->items[item_idx].inner_item.firstseg = pkt;
+	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
+	tbl->items[item_idx].inner_item.start_time = start_time;
+	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
+	tbl->items[item_idx].inner_item.frag_offset = frag_offset;
+	tbl->items[item_idx].inner_item.is_last_frag = is_last_frag;
+	tbl->items[item_idx].inner_item.nb_merged = 1;
+	tbl->item_num++;
+
+	/* If the previous packet exists, chain the new one with it. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		tbl->items[item_idx].inner_item.next_pkt_idx =
+			tbl->items[prev_idx].inner_item.next_pkt_idx;
+		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_vxlan_udp4_tbl *tbl,
+		uint32_t item_idx,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
+
+	/* NULL indicates an empty item. */
+	tbl->items[item_idx].inner_item.firstseg = NULL;
+	tbl->item_num--;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
+		struct vxlan_udp4_flow_key *src,
+		uint32_t item_idx)
+{
+	struct vxlan_udp4_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	rte_ether_addr_copy(&(src->inner_key.eth_saddr),
+			&(dst->inner_key.eth_saddr));
+	rte_ether_addr_copy(&(src->inner_key.eth_daddr),
+			&(dst->inner_key.eth_daddr));
+	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
+	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
+	dst->inner_key.ip_id = src->inner_key.ip_id;
+
+	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
+	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
+	rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
+	rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
+	dst->outer_ip_src_addr = src->outer_ip_src_addr;
+	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
+	dst->outer_dst_port = src->outer_dst_port;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+static inline int
+is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
+		struct vxlan_udp4_flow_key k2)
+{
+	/* For VxLAN packet, outer udp src port is calculated from
+	 * inner packet RSS hash, udp src port of the first UDP
+	 * fragment is different from one of other UDP fragments
+	 * even if they are same flow, so we have to skip outer udp
+	 * src port comparison here.
+	 */
+	return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
+					&k2.outer_eth_saddr) &&
+			rte_is_same_ether_addr(&k1.outer_eth_daddr,
+				&k2.outer_eth_daddr) &&
+			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
+			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
+			(k1.outer_dst_port == k2.outer_dst_port) &&
+			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
+			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
+			is_same_udp4_flow(k1.inner_key, k2.inner_key));
+}
+
+static inline int
+udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
+		uint16_t frag_offset,
+		uint16_t ip_dl)
+{
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	int cmp;
+	uint16_t l2_offset;
+	int ret = 0;
+
+	/* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
+	 * we needn't compare outer_ip_id because they are same,
+	 * for the case outer_is_atomic is 1, we also have no way
+	 * to compare outer_ip_id because the difference between
+	 * outer_ip_ids of two received packets isn't always +/-1.
+	 * So skip outer_ip_id comparison here.
+	 */
+
+	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
+	cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
+					l2_offset);
+	if (cmp > 0)
+		/* Append the new packet. */
+		ret = 1;
+	else if (cmp < 0)
+		/* Prepend the new packet. */
+		ret = -1;
+
+	return ret;
+}
+
+static inline int
+merge_two_vxlan_udp4_packets(struct gro_vxlan_udp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint16_t frag_offset,
+		uint8_t is_last_frag)
+{
+	if (merge_two_udp4_packets(&item->inner_item, pkt, cmp, frag_offset,
+				is_last_frag,
+				pkt->outer_l2_len + pkt->outer_l3_len)) {
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void
+update_vxlan_header(struct gro_vxlan_udp4_item *item)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_udp_hdr *udp_hdr;
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	uint16_t len;
+	uint16_t frag_offset;
+
+	/* Update the outer IPv4 header. */
+	len = pkt->pkt_len - pkt->outer_l2_len;
+	ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Update the outer UDP header. */
+	len -= pkt->outer_l3_len;
+	udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
+
+	/* Update the inner IPv4 header. */
+	len -= pkt->l2_len;
+	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Clear MF bit if it is last fragment */
+	if (item->inner_item.is_last_frag) {
+		frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+		ipv4_hdr->fragment_offset =
+			rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG);
+	}
+}
+
+int32_t
+gro_vxlan_udp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t start_time)
+{
+	struct rte_ether_hdr *outer_eth_hdr, *eth_hdr;
+	struct rte_ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
+	struct rte_udp_hdr *udp_hdr;
+	struct rte_vxlan_hdr *vxlan_hdr;
+	uint16_t frag_offset;
+	uint8_t is_last_frag;
+	int16_t ip_dl;
+	uint16_t ip_id;
+
+	struct vxlan_udp4_flow_key key;
+	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t i, max_flow_num, remaining_flow_num;
+	int cmp;
+	uint16_t hdr_len;
+	uint8_t find;
+
+	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+	outer_ipv4_hdr = (struct rte_ipv4_hdr *)((char *)outer_eth_hdr +
+			pkt->outer_l2_len);
+
+	udp_hdr = (struct rte_udp_hdr *)((char *)outer_ipv4_hdr +
+			pkt->outer_l3_len);
+	vxlan_hdr = (struct rte_vxlan_hdr *)((char *)udp_hdr +
+			sizeof(struct rte_udp_hdr));
+	eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_hdr +
+			sizeof(struct rte_vxlan_hdr));
+	/* l2_len = outer udp hdr len + vxlan hdr len + inner l2 len */
+	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+
+	/*
+	 * Don't process the packet which has non-fragment inner IP.
+	 */
+	if (!is_ipv4_fragment(ipv4_hdr))
+		return -1;
+
+	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len;
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	if (pkt->pkt_len <= hdr_len)
+		return -1;
+
+	ip_dl = pkt->pkt_len - hdr_len;
+
+	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+	frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0;
+	frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3;
+
+	rte_ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
+	rte_ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
+	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
+	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.inner_key.ip_id = ip_id;
+
+	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
+	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
+	rte_ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
+	rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
+	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
+	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
+	/* Note: It is unnecessary to save outer_src_port here because it can
+	 * be different for VxLAN UDP fragments from the same flow.
+	 */
+	key.outer_dst_port = udp_hdr->dst_port;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	remaining_flow_num = tbl->flow_num;
+	find = 0;
+	for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
+			if (is_same_vxlan_udp4_flow(tbl->flows[i].key, key)) {
+				find = 1;
+				break;
+			}
+			remaining_flow_num--;
+		}
+	}
+
+	/*
+	 * Can't find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (find == 0) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, frag_offset,
+				is_last_frag);
+		if (unlikely(item_idx == INVALID_ARRAY_INDEX))
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+				INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so
+			 * delete the inserted packet.
+			 */
+			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+
+	/* Check all packets in the flow and try to find a neighbor. */
+	cur_idx = tbl->flows[i].start_index;
+	prev_idx = cur_idx;
+	do {
+		cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
+				frag_offset, ip_dl);
+		if (cmp) {
+			if (merge_two_vxlan_udp4_packets(
+						&(tbl->items[cur_idx]),
+						pkt, cmp, frag_offset,
+						is_last_frag)) {
+				return 1;
+			}
+			/*
+			 * Can't merge two packets, as the packet
+			 * length will be greater than the max value.
+			 * Insert the packet into the flow.
+			 */
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						frag_offset, is_last_frag) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+
+		/* Ensure inserted items are ordered by frag_offset */
+		if (frag_offset
+			< tbl->items[cur_idx].inner_item.frag_offset) {
+			break;
+		}
+
+		prev_idx = cur_idx;
+		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Can't find neighbor. Insert the packet into the flow. */
+	if (cur_idx == tbl->flows[i].start_index) {
+		/* Insert it before the first packet of the flow */
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, frag_offset,
+				is_last_frag);
+		if (unlikely(item_idx == INVALID_ARRAY_INDEX))
+			return -1;
+		tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
+		tbl->flows[i].start_index = item_idx;
+	} else {
+		if (insert_new_item(tbl, pkt, start_time, prev_idx,
+					frag_offset, is_last_frag
+					) == INVALID_ARRAY_INDEX)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int
+gro_vxlan_udp4_merge_items(struct gro_vxlan_udp4_tbl *tbl,
+			   uint32_t start_idx)
+{
+	uint16_t frag_offset;
+	uint8_t is_last_frag;
+	int16_t ip_dl;
+	struct rte_mbuf *pkt;
+	int cmp;
+	uint32_t item_idx;
+	uint16_t hdr_len;
+
+	item_idx = tbl->items[start_idx].inner_item.next_pkt_idx;
+	while (item_idx != INVALID_ARRAY_INDEX) {
+		pkt = tbl->items[item_idx].inner_item.firstseg;
+		hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+			pkt->l3_len;
+		ip_dl = pkt->pkt_len - hdr_len;
+		frag_offset = tbl->items[item_idx].inner_item.frag_offset;
+		is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
+		cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
+					frag_offset, ip_dl);
+		if (cmp) {
+			if (merge_two_vxlan_udp4_packets(
+					&(tbl->items[start_idx]),
+					pkt, cmp, frag_offset,
+					is_last_frag)) {
+				item_idx = delete_item(tbl, item_idx,
+							INVALID_ARRAY_INDEX);
+				tbl->items[start_idx].inner_item.next_pkt_idx
+					= item_idx;
+			} else
+				return 0;
+		} else
+			return 0;
+	}
+
+	return 0;
+}
+
+uint16_t
+gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].inner_item.start_time <=
+					flush_timestamp) {
+				gro_vxlan_udp4_merge_items(tbl, j);
+				out[k++] = tbl->items[j].inner_item.firstseg;
+				if (tbl->items[j].inner_item.nb_merged > 1)
+					update_vxlan_header(&(tbl->items[j]));
+				/*
+				 * Delete the item and get the next packet
+				 * index.
+				 */
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in the flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_vxlan_udp4_tbl_pkt_count(void *tbl)
+{
+	struct gro_vxlan_udp4_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/librte_gro/gro_vxlan_udp4.h b/lib/librte_gro/gro_vxlan_udp4.h
new file mode 100644
index 0000000..6a42fb3
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_udp4.h
@@ -0,0 +1,154 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#ifndef _GRO_VXLAN_UDP4_H_
+#define _GRO_VXLAN_UDP4_H_
+
+#include "gro_udp4.h"
+
+#define GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a VxLAN flow */
+struct vxlan_udp4_flow_key {
+	struct udp4_flow_key inner_key;
+	struct rte_vxlan_hdr vxlan_hdr;
+
+	struct rte_ether_addr outer_eth_saddr;
+	struct rte_ether_addr outer_eth_daddr;
+
+	uint32_t outer_ip_src_addr;
+	uint32_t outer_ip_dst_addr;
+
+	/* Note: It is unnecessary to save outer_src_port here because it can
+	 * be different for VxLAN UDP fragments from the same flow.
+	 */
+	uint16_t outer_dst_port;
+
+};
+
+struct gro_vxlan_udp4_flow {
+	struct vxlan_udp4_flow_key key;
+	/*
+	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
+	 * indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+struct gro_vxlan_udp4_item {
+	struct gro_udp4_item inner_item;
+	/* Note: VXLAN UDP/IPv4 GRO needn't check outer_ip_id because
+	 * the difference between outer_ip_ids of two received packets
+	 * isn't always +/-1 in case of OVS DPDK. So no outer_ip_id
+	 * and outer_is_atomic fields here.
+	 */
+};
+
+/*
+ * VxLAN (with an outer IPv4 header and an inner UDP/IPv4 packet)
+ * reassembly table structure
+ */
+struct gro_vxlan_udp4_tbl {
+	/* item array */
+	struct gro_vxlan_udp4_item *items;
+	/* flow array */
+	struct gro_vxlan_udp4_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow number */
+	uint32_t flow_num;
+	/* the maximum item number */
+	uint32_t max_item_num;
+	/* the maximum flow number */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a VxLAN reassembly table for VxLAN packets
+ * which have an outer IPv4 header and an inner UDP/IPv4 packet.
+ *
+ * @param socket_id
+ *  Socket index for allocating the table
+ * @param max_flow_num
+ *  The maximum number of flows in the table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_vxlan_udp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a VxLAN reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ */
+void gro_vxlan_udp4_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a VxLAN packet which has an outer IPv4 header and
+ * an inner UDP/IPv4 packet. It does not process the packet which does not
+ * have payload.
+ *
+ * This function does not check if the packet has correct checksums and
+ * does not re-calculate checksums for the merged packet. It returns the
+ * packet if there is no available space in the table.
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
+ */
+int32_t gro_vxlan_udp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in the VxLAN reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  Pointer pointing to a VxLAN GRO table
+ * @param flush_timestamp
+ *  This function flushes packets which are inserted into the table
+ *  before or at the flush_timestamp.
+ * @param out
+ *  Pointer array used to keep flushed packets
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets
+ */
+uint16_t gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a VxLAN
+ * reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ *
+ * @return
+ *  The number of packets in the table
+ */
+uint32_t gro_vxlan_udp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/meson.build b/lib/librte_gro/meson.build
index 0d18dc2..ea8b45c 100644
--- a/lib/librte_gro/meson.build
+++ b/lib/librte_gro/meson.build
@@ -1,6 +1,6 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_udp4.c', 'gro_vxlan_tcp4.c')
+sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_udp4.c', 'gro_vxlan_tcp4.c', 'gro_vxlan_udp4.c')
 headers = files('rte_gro.h')
 deps += ['ethdev']
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index f623230..db990cf 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -11,6 +11,7 @@ 
 #include "gro_tcp4.h"
 #include "gro_udp4.h"
 #include "gro_vxlan_tcp4.h"
+#include "gro_vxlan_udp4.h"
 
 typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -20,14 +21,14 @@ 
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
 		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create,
-		gro_udp4_tbl_create, NULL};
+		gro_udp4_tbl_create, gro_vxlan_udp4_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
-			gro_udp4_tbl_destroy,
+			gro_udp4_tbl_destroy, gro_vxlan_udp4_tbl_destroy,
 			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
-			gro_udp4_tbl_pkt_count,
+			gro_udp4_tbl_pkt_count, gro_vxlan_udp4_tbl_pkt_count,
 			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
@@ -47,6 +48,16 @@ 
 		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
 		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
 
+#define IS_IPV4_VXLAN_UDP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
+		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
+		 RTE_PTYPE_TUNNEL_VXLAN) && \
+		 ((ptype & RTE_PTYPE_INNER_L4_UDP) == \
+		  RTE_PTYPE_INNER_L4_UDP) && \
+		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
+		    (RTE_PTYPE_INNER_L3_IPV4 | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
 
 /*
  * GRO context structure. It keeps the table structures, which are
@@ -137,19 +148,27 @@  struct gro_ctx {
 	struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	/* Allocate a reassembly table for VXLAN TCP GRO */
-	struct gro_vxlan_tcp4_tbl vxlan_tbl;
-	struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
-	struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
+	struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
+	struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
 			= {{{0}, 0, 0} };
 
+	/* Allocate a reassembly table for VXLAN UDP GRO */
+	struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
+	struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
+			= {{{0}} };
+
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	uint32_t item_num;
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
-	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
+	uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
+		do_vxlan_udp_gro = 0;
 
 	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
 					RTE_GRO_TCP_IPV4 |
+					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
 					RTE_GRO_UDP_IPV4)) == 0))
 		return nb_pkts;
 
@@ -160,15 +179,28 @@  struct gro_ctx {
 
 	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
 		for (i = 0; i < item_num; i++)
-			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
-
-		vxlan_tbl.flows = vxlan_flows;
-		vxlan_tbl.items = vxlan_items;
-		vxlan_tbl.flow_num = 0;
-		vxlan_tbl.item_num = 0;
-		vxlan_tbl.max_flow_num = item_num;
-		vxlan_tbl.max_item_num = item_num;
-		do_vxlan_gro = 1;
+			vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_tcp_tbl.flows = vxlan_tcp_flows;
+		vxlan_tcp_tbl.items = vxlan_tcp_items;
+		vxlan_tcp_tbl.flow_num = 0;
+		vxlan_tcp_tbl.item_num = 0;
+		vxlan_tcp_tbl.max_flow_num = item_num;
+		vxlan_tcp_tbl.max_item_num = item_num;
+		do_vxlan_tcp_gro = 1;
+	}
+
+	if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_udp_tbl.flows = vxlan_udp_flows;
+		vxlan_udp_tbl.items = vxlan_udp_items;
+		vxlan_udp_tbl.flow_num = 0;
+		vxlan_udp_tbl.item_num = 0;
+		vxlan_udp_tbl.max_flow_num = item_num;
+		vxlan_udp_tbl.max_item_num = item_num;
+		do_vxlan_udp_gro = 1;
 	}
 
 	if (param->gro_types & RTE_GRO_TCP_IPV4) {
@@ -204,9 +236,18 @@  struct gro_ctx {
 		 * will be flushed from the tables.
 		 */
 		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
-				do_vxlan_gro) {
+				do_vxlan_tcp_gro) {
 			ret = gro_vxlan_tcp4_reassemble(pkts[i],
-							&vxlan_tbl, 0);
+							&vxlan_tcp_tbl, 0);
+			if (ret > 0)
+				/* Merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_udp_gro) {
+			ret = gro_vxlan_udp4_reassemble(pkts[i],
+							&vxlan_udp_tbl, 0);
 			if (ret > 0)
 				/* Merge successfully */
 				nb_after_gro--;
@@ -236,11 +277,17 @@  struct gro_ctx {
 		 || (unprocess_num < nb_pkts)) {
 		i = 0;
 		/* Flush all packets from the tables */
-		if (do_vxlan_gro) {
-			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
+		if (do_vxlan_tcp_gro) {
+			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
 					0, pkts, nb_pkts);
 		}
 
+		if (do_vxlan_udp_gro) {
+			i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
+					0, &pkts[i], nb_pkts - i);
+
+		}
+
 		if (do_tcp4_gro) {
 			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
 					&pkts[i], nb_pkts - i);
@@ -269,33 +316,42 @@  struct gro_ctx {
 {
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
-	void *tcp_tbl, *udp_tbl, *vxlan_tbl;
+	void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
-	uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
+	uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
 
 	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
 					RTE_GRO_TCP_IPV4 |
+					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
 					RTE_GRO_UDP_IPV4)) == 0))
 		return nb_pkts;
 
 	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
-	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
+	vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
 	udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
+	vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
 
 	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
 		RTE_GRO_TCP_IPV4;
-	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
+	do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
 		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
 	do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
 		RTE_GRO_UDP_IPV4;
+	do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
+		RTE_GRO_IPV4_VXLAN_UDP_IPV4;
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
 		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
-				do_vxlan_gro) {
-			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
+				do_vxlan_tcp_gro) {
+			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_udp_gro) {
+			if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
@@ -341,6 +397,13 @@  struct gro_ctx {
 		left_nb_out = max_nb_out - num;
 	}
 
+	if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
+		num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
+				RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
+				flush_timestamp, &out[num], left_nb_out);
+		left_nb_out = max_nb_out - num;
+	}
+
 	/* If no available space in 'out', stop flushing. */
 	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
 		num += gro_tcp4_tbl_timeout_flush(
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index 470f3ed..9f9ed49 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -35,6 +35,9 @@ 
 #define RTE_GRO_UDP_IPV4_INDEX 2
 #define RTE_GRO_UDP_IPV4 (1ULL << RTE_GRO_UDP_IPV4_INDEX)
 /**< UDP/IPv4 GRO flag */
+#define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
+#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
+/**< VxLAN UDP/IPv4 GRO flag. */
 
 /**
  * Structure used to create GRO context objects or used to pass