diff mbox series

[v5,2/2] gro: add VXLAN UDP GRO support

Message ID 20200910092914.120452-3-yang_y_yi@163.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers show
Series gro: add UDP GRO and VXLAN UDP GRO support | expand

Checks

Context Check Description
ci/Intel-compilation fail Compilation issues
ci/travis-robot warning Travis build: failed
ci/checkpatch success coding style OK

Commit Message

yang_y_yi Sept. 10, 2020, 9:29 a.m. UTC
From: Yi Yang <yangyi01@inspur.com>

VXLAN UDP GRO can help improve VM-to-VM UDP performance
when VM is enabled UFO or GSO, GRO must be supported if
GSO or UFO is enabled, otherwise, performance gain will
be hurt.

With this enabled in DPDK, OVS DPDK can leverage it to
improve VM-to-VM UDP performance, this will make sure
IP fragments will be reassembled once it is received
from physical NIC. It is very helpful in OVS DPDK VXLAN
TSO case.

Signed-off-by: Yi Yang <yangyi01@inspur.com>
---
 lib/librte_gro/gro_udp4.h       |   1 +
 lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
 lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
 lib/librte_gro/meson.build      |   2 +-
 lib/librte_gro/rte_gro.c        | 115 +++++++--
 lib/librte_gro/rte_gro.h        |   3 +
 6 files changed, 794 insertions(+), 27 deletions(-)
 create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
 create mode 100644 lib/librte_gro/gro_vxlan_udp4.h

Comments

Hu, Jiayu Sept. 14, 2020, 4:21 a.m. UTC | #1
Replies are inline.

BTW, you need to update the programmer guide
doc/guides/prog_guide/generic_receive_offload_lib.rst and
release note doc/guides/rel_notes/release_20_11.rst.

Thanks,
Jiayu

On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:
> Jiayu, thank you so much, please check my replies for some of your comments,
> here is incremental patch. I built it by meson this time :-)
> 
> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> index 9e9df72..1fcfaf1 100644
> --- a/lib/librte_gro/gro_vxlan_udp4.c
> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> @@ -203,7 +203,7 @@
>  }
> 
>  static inline int
> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>                 uint16_t frag_offset,
>                 uint16_t ip_dl)
>  {
> @@ -213,7 +213,7 @@
>         int ret = 0;
> 
>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>                                         l2_offset);
>         /* VXLAN outer IP ID is out of order, so don't touch it and
>          * don't compare it.
> @@ -379,7 +379,7 @@
>                 item_idx = insert_new_item(tbl, pkt, start_time,
>                                 INVALID_ARRAY_INDEX, frag_offset,
>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> -               if (item_idx == INVALID_ARRAY_INDEX)
> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>                         return -1;
>                 if (insert_new_flow(tbl, &key, item_idx) ==
>                                 INVALID_ARRAY_INDEX) {
> @@ -397,7 +397,7 @@
>         cur_idx = tbl->flows[i].start_index;
>         prev_idx = cur_idx;
>         do {
> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>                                 frag_offset, ip_dl);
>                 if (cmp) {
>                         if (merge_two_vxlan_udp4_packets(
> @@ -436,7 +436,7 @@
>                 item_idx = insert_new_item(tbl, pkt, start_time,
>                                 INVALID_ARRAY_INDEX, frag_offset,
>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> -               if (item_idx == INVALID_ARRAY_INDEX)
> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>                         return -1;
>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>                 tbl->flows[i].start_index = item_idx;
> @@ -470,7 +470,7 @@
>                 ip_dl = pkt->pkt_len - hdr_len;
> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> index 9e9df72..1fcfaf1 100644
> --- a/lib/librte_gro/gro_vxlan_udp4.c
> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> @@ -203,7 +203,7 @@
>  }
> 
>  static inline int
> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>                 uint16_t frag_offset,
>                 uint16_t ip_dl)
>  {
> @@ -213,7 +213,7 @@
>         int ret = 0;
> 
>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>                                         l2_offset);
>         /* VXLAN outer IP ID is out of order, so don't touch it and
>          * don't compare it.
> @@ -379,7 +379,7 @@
>                 item_idx = insert_new_item(tbl, pkt, start_time,
>                                 INVALID_ARRAY_INDEX, frag_offset,
>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> -               if (item_idx == INVALID_ARRAY_INDEX)
> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>                         return -1;
>                 if (insert_new_flow(tbl, &key, item_idx) ==
>                                 INVALID_ARRAY_INDEX) {
> @@ -397,7 +397,7 @@
>         cur_idx = tbl->flows[i].start_index;
>         prev_idx = cur_idx;
>         do {
> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>                                 frag_offset, ip_dl);
>                 if (cmp) {
>                         if (merge_two_vxlan_udp4_packets(
> @@ -436,7 +436,7 @@
>                 item_idx = insert_new_item(tbl, pkt, start_time,
>                                 INVALID_ARRAY_INDEX, frag_offset,
>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> -               if (item_idx == INVALID_ARRAY_INDEX)
> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>                         return -1;
>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>                 tbl->flows[i].start_index = item_idx;
> @@ -470,7 +470,7 @@
>                 ip_dl = pkt->pkt_len - hdr_len;
>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;
>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
>                                         frag_offset, ip_dl);
>                 if (cmp) {
>                         if (merge_two_vxlan_udp4_packets(
> @@ -481,12 +481,10 @@
>                                                         INVALID_ARRAY_INDEX);
>                                 tbl->items[start_idx].inner_item.next_pkt_idx
>                                         = item_idx;
> -                       } else {
> +                       } else
>                                 return 0;
> -                       }
> -               } else {
> +               } else
>                         return 0;
> -               }
>         }
> 
>         return 0;
> 
> 
> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
> >Some comments are inline.
> >
> >Thanks,
> >Jiayu
> >
> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com wrote:
> >> From: Yi Yang <yangyi01@inspur.com>
> >>
> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance
> >> when VM is enabled UFO or GSO, GRO must be supported if
> >> GSO or UFO is enabled, otherwise, performance gain will
> >> be hurt.
> >>
> >> With this enabled in DPDK, OVS DPDK can leverage it to
> >> improve VM-to-VM UDP performance, this will make sure
> >> IP fragments will be reassembled once it is received
> >> from physical NIC. It is very helpful in OVS DPDK VXLAN
> >> TSO case.
> >>
> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
> >> ---
> >>  lib/librte_gro/gro_udp4.h       |   1 +
> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
> >>  lib/librte_gro/meson.build      |   2 +-
> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
> >>  lib/librte_gro/rte_gro.h        |   3 +
> >>  6 files changed, 794 insertions(+), 27 deletions(-)
> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
> >>
> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
> >> index 0a078e4..d38b393 100644
> >> --- a/lib/librte_gro/gro_udp4.h
> >> +++ b/lib/librte_gro/gro_udp4.h
> >> @@ -7,6 +7,7 @@
> >>
> >>  #include <rte_ip.h>
> >>  #include <rte_udp.h>
> >> +#include <rte_vxlan.h>
> >>
> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> >> new file mode 100644
> >> index 0000000..9e9df72
> >> --- /dev/null
> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> >> @@ -0,0 +1,548 @@
> >> +/* SPDX-License-Identifier: BSD-3-Clause
> >> + * Copyright(c) 2020 Inspur Corporation
> >> + */
> >> +
> >> +
> >> +
> >> +static inline uint32_t
> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,
> >> +            uint32_t item_idx,
> >> +            uint32_t prev_item_idx)
> >> +{
> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
> >> +
> >> +    /* NULL indicates an empty item. */
> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;
> >> +    tbl->item_num--;
> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)
> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
> >> +
> >> +    return next_idx;
> >> +}
> >> +
> >> +static inline uint32_t
> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
> >> +            struct vxlan_udp4_flow_key *src,
> >> +            uint32_t item_idx)
> >> +{
> >> +    struct vxlan_udp4_flow_key *dst;
> >> +    uint32_t flow_idx;
> >> +
> >> +    flow_idx = find_an_empty_flow(tbl);
> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
> >> +            return INVALID_ARRAY_INDEX;
> >> +
> >> +    dst = &(tbl->flows[flow_idx].key);
> >> +
> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),
> >> +                    &(dst->inner_key.eth_saddr));
> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),
> >> +                    &(dst->inner_key.eth_daddr));
> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;
> >> +
> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;
> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
> >> +    dst->outer_src_port = src->outer_src_port;
> >> +    dst->outer_dst_port = src->outer_dst_port;
> >> +
> >> +    tbl->flows[flow_idx].start_index = item_idx;
> >> +    tbl->flow_num++;
> >> +
> >> +    return flow_idx;
> >> +}
> >> +
> >> +static inline int
> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
> >> +            struct vxlan_udp4_flow_key k2)
> >> +{
> >> +    /* src port is changing, so shouldn't use it for flow check */
> >
> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source
> >port. In what cases outer source port will change?
> 
> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is
> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,
> protocol type).
> 
> ovs/lib/netdev-native-tnl.h:
> 
> static inline uint32_t *
> dp_packet_rss_ptr(const struct dp_packet *b)
> {
>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
> }
> 
> static inline uint32_t
> dp_packet_get_rss_hash(const struct dp_packet *p)
> {
>     return *dp_packet_rss_ptr(p);
> }
> 
> static inline ovs_be16
> netdev_tnl_get_src_port(struct dp_packet *packet)
> {
>     uint32_t hash;
> 
>     hash = dp_packet_get_rss_hash(packet);
> 
>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
>                  tnl_udp_port_min);
> }
> 
> void
> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
>                            struct dp_packet *packet,
>                            const struct ovs_action_push_tnl *data)
> {
>     struct udp_header *udp;
>     int ip_tot_size;
> 
>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
> 
>     /* set udp src port */
>     udp->udp_src = netdev_tnl_get_src_port(packet);
>     udp->udp_len = htons(ip_tot_size);
> 
>     if (udp->udp_csum) {
>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
>     }
> }
> 
> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The
> first fragment of original inner UDP is considered as different flow from other fragments.
> 
> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.
> 

You are right. I checked VxLAN RFC 7348, and outer source port is
calculated from inner packet. In OVS case, the first fragment
will have different outer source port compared with other fragments.
Please add some comments in the code for people to better understand
the code. In addition, need to remove outer_src_port from the structure
vxlan_udp4_flow_key.

> >
> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
> >> +                                    &k2.outer_eth_saddr) &&
> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,
> >> +                            &k2.outer_eth_daddr) &&
> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&
> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));
> >> +}
> >> +
> >> +static inline int
> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> >> +            uint16_t frag_offset,
> >> +            uint16_t ip_dl)
> >
> >Better rename it as udp4_check_vxlan_neighbor.
> 
> Ok
> 
> >
> >> +{
> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;
> >> +    int cmp;
> >> +    uint16_t l2_offset;
> >> +    int ret = 0;
> >> +
> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> >> +                                    l2_offset);
> >
> >Why don't check outer IP ID? If outer DF bit is not set, all fragments
> >should have incremental outer IP ID; if DF bit is set, we don't need
> >to check outer IP ID.
> 
> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check
> is enough to differentiate if they are same flow, isn't it?

OVS is one of workloads using DPDK, and we can assume
other cases have the same design as OVS. I think IP ID
check is necessary.

> 
> >
> >In addition, udp_check_neighbor is renamed, but you didn't change here.
> 
> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will
> use meson build to check it for next version.
> 
> >
> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and
> >> +     * don't compare it.
> >> +     */
> >> +    if (cmp > 0)
> >> +            /* Append the new packet. */
> >> +            ret = 1;
> >> +    else if (cmp < 0)
> >> +            /* Prepend the new packet. */
> >> +            ret = -1;
> >> +
> >> +    return ret;
> >> +}
> >> +
> >>
> >>  /*
> >>   * GRO context structure. It keeps the table structures, which are
> >> @@ -137,19 +148,27 @@ struct gro_ctx {
> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
> >>
> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> >> +                    = {{{0}, 0, 0} };
> >> +
> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> >>                      = {{{0}, 0, 0} };
> >>
> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
> >>      uint32_t item_num;
> >>      int32_t ret;
> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
> >> +            do_vxlan_udp_gro = 0;
> >>
> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> >>                                      RTE_GRO_TCP_IPV4 |
> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
> >>                                      RTE_GRO_UDP_IPV4)) == 0))
> >>              return nb_pkts;
> >>
> >> @@ -160,15 +179,28 @@ struct gro_ctx {
> >>
> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> >>              for (i = 0; i < item_num; i++)
> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
> >> -
> >> -            vxlan_tbl.flows = vxlan_flows;
> >> -            vxlan_tbl.items = vxlan_items;
> >> -            vxlan_tbl.flow_num = 0;
> >> -            vxlan_tbl.item_num = 0;
> >> -            vxlan_tbl.max_flow_num = item_num;
> >> -            vxlan_tbl.max_item_num = item_num;
> >> -            do_vxlan_gro = 1;
> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> >> +
> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
> >> +            vxlan_tcp_tbl.flow_num = 0;
> >> +            vxlan_tcp_tbl.item_num = 0;
> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
> >> +            vxlan_tcp_tbl.max_item_num = item_num;
> >> +            do_vxlan_tcp_gro = 1;
> >> +    }
> >> +
> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
> >> +            for (i = 0; i < item_num; i++)
> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
> >> +
> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
> >> +            vxlan_udp_tbl.flow_num = 0;
> >> +            vxlan_udp_tbl.item_num = 0;
> >> +            vxlan_udp_tbl.max_flow_num = item_num;
> >> +            vxlan_udp_tbl.max_item_num = item_num;
> >> +            do_vxlan_udp_gro = 1;
> >>      }
> >>
> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
> >> @@ -204,9 +236,18 @@ struct gro_ctx {
> >>               * will be flushed from the tables.
> >>               */
> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> >> -                            do_vxlan_gro) {
> >> +                            do_vxlan_tcp_gro) {
> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
> >> -                                                    &vxlan_tbl, 0);
> >> +                                                    &vxlan_tcp_tbl, 0);
> >> +                    if (ret > 0)
> >> +                            /* Merge successfully */
> >> +                            nb_after_gro--;
> >> +                    else if (ret < 0)
> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> >> +                            do_vxlan_udp_gro) {
> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
> >> +                                                    &vxlan_udp_tbl, 0);
> >>                      if (ret > 0)
> >>                              /* Merge successfully */
> >>                              nb_after_gro--;
> >> @@ -236,11 +277,17 @@ struct gro_ctx {
> >>               || (unprocess_num < nb_pkts)) {
> >>              i = 0;
> >>              /* Flush all packets from the tables */
> >> -            if (do_vxlan_gro) {
> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
> >> +            if (do_vxlan_tcp_gro) {
> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
> >>                                      0, pkts, nb_pkts);
> >>              }
> >>
> >> +            if (do_vxlan_udp_gro) {
> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
> >> +                                    0, &pkts[i], nb_pkts - i);
> >> +
> >> +            }
> >> +
> >>              if (do_tcp4_gro) {
> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
> >>                                      &pkts[i], nb_pkts - i);
> >> @@ -269,33 +316,42 @@ struct gro_ctx {
> >>  {
> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
> >>      struct gro_ctx *gro_ctx = ctx;
> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
> >>      uint64_t current_time;
> >>      uint16_t i, unprocess_num = 0;
> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
> >>
> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> >>                                      RTE_GRO_TCP_IPV4 |
> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
> >>                                      RTE_GRO_UDP_IPV4)) == 0))
> >>              return nb_pkts;
> >>
> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
> >>
> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
> >>              RTE_GRO_TCP_IPV4;
> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
> >>              RTE_GRO_UDP_IPV4;
> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
> >>
> >>      current_time = rte_rdtsc();
> >>
> >>      for (i = 0; i < nb_pkts; i++) {
> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> >> -                            do_vxlan_gro) {
> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
> >> +                            do_vxlan_tcp_gro) {
> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
> >> +                                            current_time) < 0)
> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> >> +                            do_vxlan_udp_gro) {
> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
> >>                                              current_time) < 0)
> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> >> @@ -341,6 +397,13 @@ struct gro_ctx {
> >>              left_nb_out = max_nb_out - num;
> >>      }
> >>
> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
> >> +                            flush_timestamp, &out[num], left_nb_out);
> >> +            left_nb_out = max_nb_out - num;
> >
> >Don't need to calculate left_nb_out here. In the previous patch, you
> >also calculate it for UDP GRO. Please remove it both.
> 
> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.

I don't think so. It's unnecessary for current design.
If others want to add new GRO types, they will write
correct code.

> 
> >
> 
> 
> 
>  
>
yang_y_yi Sept. 14, 2020, 7:11 a.m. UTC | #2
Thanks Jiayu, will change them per your comments. For doument part, is such change ok?





diff --git a/doc/guides/prog_guide/generic_receive_offload_lib.rst b/doc/guides/prog
index 5b3fb91..98a5d29 100644
--- a/doc/guides/prog_guide/generic_receive_offload_lib.rst
+++ b/doc/guides/prog_guide/generic_receive_offload_lib.rst
@@ -30,9 +30,9 @@ assumes the packets are complete (i.e., MF==0 && frag_off==0), whe
 fragmentation is possible (i.e., DF==0). Additionally, it complies RFC
 6864 to process the IPv4 ID field.

-Currently, the GRO library provides GRO supports for TCP/IPv4 packets and
-VxLAN packets which contain an outer IPv4 header and an inner TCP/IPv4
-packet.
+Currently, the GRO library provides GRO supports for TCP/IPv4 and UDP/IPv4
+packets as well as VxLAN packets which contain an outer IPv4 header and an
+inner TCP/IPv4 or UDP/IPv4 packet.

 Two Sets of API
 ---------------
diff --git a/doc/guides/rel_notes/release_20_11.rst b/doc/guides/rel_notes/release_2
index df227a1..25e0387 100644
--- a/doc/guides/rel_notes/release_20_11.rst
+++ b/doc/guides/rel_notes/release_20_11.rst
@@ -55,6 +55,12 @@ New Features
      Also, make sure to start the actual text at the margin.
      =======================================================

+* **Add UDP/IPv4 GRO support for VxLAN and non-VxLAN packets.**
+
+  Add UDP/IPv4 GRO support for VxLAN and non-VxLAN packets. For
+  VxLAN packets, this adds inner UDP/IPv4 support, for non-VxLAN
+  packets, this adds UDP/IPv4 support.
+

 Removed Items
 -------------









At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>Replies are inline.
>
>BTW, you need to update the programmer guide
>doc/guides/prog_guide/generic_receive_offload_lib.rst and
>release note doc/guides/rel_notes/release_20_11.rst.
>
>Thanks,
>Jiayu
>
>On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:
>> Jiayu, thank you so much, please check my replies for some of your comments,
>> here is incremental patch. I built it by meson this time :-)
>> 
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> index 9e9df72..1fcfaf1 100644
>> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> @@ -203,7 +203,7 @@
>>  }
>> 
>>  static inline int
>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>>                 uint16_t frag_offset,
>>                 uint16_t ip_dl)
>>  {
>> @@ -213,7 +213,7 @@
>>         int ret = 0;
>> 
>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                         l2_offset);
>>         /* VXLAN outer IP ID is out of order, so don't touch it and
>>          * don't compare it.
>> @@ -379,7 +379,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 if (insert_new_flow(tbl, &key, item_idx) ==
>>                                 INVALID_ARRAY_INDEX) {
>> @@ -397,7 +397,7 @@
>>         cur_idx = tbl->flows[i].start_index;
>>         prev_idx = cur_idx;
>>         do {
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>>                                 frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -436,7 +436,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>>                 tbl->flows[i].start_index = item_idx;
>> @@ -470,7 +470,7 @@
>>                 ip_dl = pkt->pkt_len - hdr_len;
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> index 9e9df72..1fcfaf1 100644
>> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> @@ -203,7 +203,7 @@
>>  }
>> 
>>  static inline int
>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>>                 uint16_t frag_offset,
>>                 uint16_t ip_dl)
>>  {
>> @@ -213,7 +213,7 @@
>>         int ret = 0;
>> 
>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                         l2_offset);
>>         /* VXLAN outer IP ID is out of order, so don't touch it and
>>          * don't compare it.
>> @@ -379,7 +379,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 if (insert_new_flow(tbl, &key, item_idx) ==
>>                                 INVALID_ARRAY_INDEX) {
>> @@ -397,7 +397,7 @@
>>         cur_idx = tbl->flows[i].start_index;
>>         prev_idx = cur_idx;
>>         do {
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>>                                 frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -436,7 +436,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>>                 tbl->flows[i].start_index = item_idx;
>> @@ -470,7 +470,7 @@
>>                 ip_dl = pkt->pkt_len - hdr_len;
>>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;
>>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
>>                                         frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -481,12 +481,10 @@
>>                                                         INVALID_ARRAY_INDEX);
>>                                 tbl->items[start_idx].inner_item.next_pkt_idx
>>                                         = item_idx;
>> -                       } else {
>> +                       } else
>>                                 return 0;
>> -                       }
>> -               } else {
>> +               } else
>>                         return 0;
>> -               }
>>         }
>> 
>>         return 0;
>> 
>> 
>> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> >Some comments are inline.
>> >
>> >Thanks,
>> >Jiayu
>> >
>> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com wrote:
>> >> From: Yi Yang <yangyi01@inspur.com>
>> >>
>> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance
>> >> when VM is enabled UFO or GSO, GRO must be supported if
>> >> GSO or UFO is enabled, otherwise, performance gain will
>> >> be hurt.
>> >>
>> >> With this enabled in DPDK, OVS DPDK can leverage it to
>> >> improve VM-to-VM UDP performance, this will make sure
>> >> IP fragments will be reassembled once it is received
>> >> from physical NIC. It is very helpful in OVS DPDK VXLAN
>> >> TSO case.
>> >>
>> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>> >> ---
>> >>  lib/librte_gro/gro_udp4.h       |   1 +
>> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
>> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
>> >>  lib/librte_gro/meson.build      |   2 +-
>> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>> >>  lib/librte_gro/rte_gro.h        |   3 +
>> >>  6 files changed, 794 insertions(+), 27 deletions(-)
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>> >>
>> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>> >> index 0a078e4..d38b393 100644
>> >> --- a/lib/librte_gro/gro_udp4.h
>> >> +++ b/lib/librte_gro/gro_udp4.h
>> >> @@ -7,6 +7,7 @@
>> >>
>> >>  #include <rte_ip.h>
>> >>  #include <rte_udp.h>
>> >> +#include <rte_vxlan.h>
>> >>
>> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> >> new file mode 100644
>> >> index 0000000..9e9df72
>> >> --- /dev/null
>> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> >> @@ -0,0 +1,548 @@
>> >> +/* SPDX-License-Identifier: BSD-3-Clause
>> >> + * Copyright(c) 2020 Inspur Corporation
>> >> + */
>> >> +
>> >> +
>> >> +
>> >> +static inline uint32_t
>> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,
>> >> +            uint32_t item_idx,
>> >> +            uint32_t prev_item_idx)
>> >> +{
>> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
>> >> +
>> >> +    /* NULL indicates an empty item. */
>> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;
>> >> +    tbl->item_num--;
>> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)
>> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
>> >> +
>> >> +    return next_idx;
>> >> +}
>> >> +
>> >> +static inline uint32_t
>> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
>> >> +            struct vxlan_udp4_flow_key *src,
>> >> +            uint32_t item_idx)
>> >> +{
>> >> +    struct vxlan_udp4_flow_key *dst;
>> >> +    uint32_t flow_idx;
>> >> +
>> >> +    flow_idx = find_an_empty_flow(tbl);
>> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
>> >> +            return INVALID_ARRAY_INDEX;
>> >> +
>> >> +    dst = &(tbl->flows[flow_idx].key);
>> >> +
>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),
>> >> +                    &(dst->inner_key.eth_saddr));
>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),
>> >> +                    &(dst->inner_key.eth_daddr));
>> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
>> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
>> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;
>> >> +
>> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
>> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
>> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
>> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
>> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;
>> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
>> >> +    dst->outer_src_port = src->outer_src_port;
>> >> +    dst->outer_dst_port = src->outer_dst_port;
>> >> +
>> >> +    tbl->flows[flow_idx].start_index = item_idx;
>> >> +    tbl->flow_num++;
>> >> +
>> >> +    return flow_idx;
>> >> +}
>> >> +
>> >> +static inline int
>> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>> >> +            struct vxlan_udp4_flow_key k2)
>> >> +{
>> >> +    /* src port is changing, so shouldn't use it for flow check */
>> >
>> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source
>> >port. In what cases outer source port will change?
>> 
>> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is
>> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,
>> protocol type).
>> 
>> ovs/lib/netdev-native-tnl.h:
>> 
>> static inline uint32_t *
>> dp_packet_rss_ptr(const struct dp_packet *b)
>> {
>>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
>> }
>> 
>> static inline uint32_t
>> dp_packet_get_rss_hash(const struct dp_packet *p)
>> {
>>     return *dp_packet_rss_ptr(p);
>> }
>> 
>> static inline ovs_be16
>> netdev_tnl_get_src_port(struct dp_packet *packet)
>> {
>>     uint32_t hash;
>> 
>>     hash = dp_packet_get_rss_hash(packet);
>> 
>>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
>>                  tnl_udp_port_min);
>> }
>> 
>> void
>> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
>>                            struct dp_packet *packet,
>>                            const struct ovs_action_push_tnl *data)
>> {
>>     struct udp_header *udp;
>>     int ip_tot_size;
>> 
>>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
>> 
>>     /* set udp src port */
>>     udp->udp_src = netdev_tnl_get_src_port(packet);
>>     udp->udp_len = htons(ip_tot_size);
>> 
>>     if (udp->udp_csum) {
>>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
>>     }
>> }
>> 
>> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The
>> first fragment of original inner UDP is considered as different flow from other fragments.
>> 
>> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.
>> 
>
>You are right. I checked VxLAN RFC 7348, and outer source port is
>calculated from inner packet. In OVS case, the first fragment
>will have different outer source port compared with other fragments.
>Please add some comments in the code for people to better understand
>the code. In addition, need to remove outer_src_port from the structure
>vxlan_udp4_flow_key.
>
>> >
>> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>> >> +                                    &k2.outer_eth_saddr) &&
>> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,
>> >> +                            &k2.outer_eth_daddr) &&
>> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
>> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
>> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&
>> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
>> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
>> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));
>> >> +}
>> >> +
>> >> +static inline int
>> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> >> +            uint16_t frag_offset,
>> >> +            uint16_t ip_dl)
>> >
>> >Better rename it as udp4_check_vxlan_neighbor.
>> 
>> Ok
>> 
>> >
>> >> +{
>> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;
>> >> +    int cmp;
>> >> +    uint16_t l2_offset;
>> >> +    int ret = 0;
>> >> +
>> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> >> +                                    l2_offset);
>> >
>> >Why don't check outer IP ID? If outer DF bit is not set, all fragments
>> >should have incremental outer IP ID; if DF bit is set, we don't need
>> >to check outer IP ID.
>> 
>> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check
>> is enough to differentiate if they are same flow, isn't it?
>
>OVS is one of workloads using DPDK, and we can assume
>other cases have the same design as OVS. I think IP ID
>check is necessary.
>
>> 
>> >
>> >In addition, udp_check_neighbor is renamed, but you didn't change here.
>> 
>> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will
>> use meson build to check it for next version.
>> 
>> >
>> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and
>> >> +     * don't compare it.
>> >> +     */
>> >> +    if (cmp > 0)
>> >> +            /* Append the new packet. */
>> >> +            ret = 1;
>> >> +    else if (cmp < 0)
>> >> +            /* Prepend the new packet. */
>> >> +            ret = -1;
>> >> +
>> >> +    return ret;
>> >> +}
>> >> +
>> >>
>> >>  /*
>> >>   * GRO context structure. It keeps the table structures, which are
>> >> @@ -137,19 +148,27 @@ struct gro_ctx {
>> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
>> >>
>> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
>> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
>> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +                    = {{{0}, 0, 0} };
>> >> +
>> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
>> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >>                      = {{{0}, 0, 0} };
>> >>
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      uint32_t item_num;
>> >>      int32_t ret;
>> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>> >> +            do_vxlan_udp_gro = 0;
>> >>
>> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >> @@ -160,15 +179,28 @@ struct gro_ctx {
>> >>
>> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>> >>              for (i = 0; i < item_num; i++)
>> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> -
>> >> -            vxlan_tbl.flows = vxlan_flows;
>> >> -            vxlan_tbl.items = vxlan_items;
>> >> -            vxlan_tbl.flow_num = 0;
>> >> -            vxlan_tbl.item_num = 0;
>> >> -            vxlan_tbl.max_flow_num = item_num;
>> >> -            vxlan_tbl.max_item_num = item_num;
>> >> -            do_vxlan_gro = 1;
>> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
>> >> +            vxlan_tcp_tbl.flow_num = 0;
>> >> +            vxlan_tcp_tbl.item_num = 0;
>> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
>> >> +            vxlan_tcp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_tcp_gro = 1;
>> >> +    }
>> >> +
>> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>> >> +            for (i = 0; i < item_num; i++)
>> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
>> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
>> >> +            vxlan_udp_tbl.flow_num = 0;
>> >> +            vxlan_udp_tbl.item_num = 0;
>> >> +            vxlan_udp_tbl.max_flow_num = item_num;
>> >> +            vxlan_udp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_udp_gro = 1;
>> >>      }
>> >>
>> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
>> >> @@ -204,9 +236,18 @@ struct gro_ctx {
>> >>               * will be flushed from the tables.
>> >>               */
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> +                            do_vxlan_tcp_gro) {
>> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
>> >> -                                                    &vxlan_tbl, 0);
>> >> +                                                    &vxlan_tcp_tbl, 0);
>> >> +                    if (ret > 0)
>> >> +                            /* Merge successfully */
>> >> +                            nb_after_gro--;
>> >> +                    else if (ret < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
>> >> +                                                    &vxlan_udp_tbl, 0);
>> >>                      if (ret > 0)
>> >>                              /* Merge successfully */
>> >>                              nb_after_gro--;
>> >> @@ -236,11 +277,17 @@ struct gro_ctx {
>> >>               || (unprocess_num < nb_pkts)) {
>> >>              i = 0;
>> >>              /* Flush all packets from the tables */
>> >> -            if (do_vxlan_gro) {
>> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>> >> +            if (do_vxlan_tcp_gro) {
>> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>> >>                                      0, pkts, nb_pkts);
>> >>              }
>> >>
>> >> +            if (do_vxlan_udp_gro) {
>> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>> >> +                                    0, &pkts[i], nb_pkts - i);
>> >> +
>> >> +            }
>> >> +
>> >>              if (do_tcp4_gro) {
>> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>> >>                                      &pkts[i], nb_pkts - i);
>> >> @@ -269,33 +316,42 @@ struct gro_ctx {
>> >>  {
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      struct gro_ctx *gro_ctx = ctx;
>> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>> >>      uint64_t current_time;
>> >>      uint16_t i, unprocess_num = 0;
>> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
>> >>
>> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>> >>
>> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>> >>              RTE_GRO_TCP_IPV4;
>> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>> >>              RTE_GRO_UDP_IPV4;
>> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>> >>
>> >>      current_time = rte_rdtsc();
>> >>
>> >>      for (i = 0; i < nb_pkts; i++) {
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>> >> +                            do_vxlan_tcp_gro) {
>> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>> >> +                                            current_time) < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
>> >>                                              current_time) < 0)
>> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
>> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>> >> @@ -341,6 +397,13 @@ struct gro_ctx {
>> >>              left_nb_out = max_nb_out - num;
>> >>      }
>> >>
>> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
>> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>> >> +                            flush_timestamp, &out[num], left_nb_out);
>> >> +            left_nb_out = max_nb_out - num;
>> >
>> >Don't need to calculate left_nb_out here. In the previous patch, you
>> >also calculate it for UDP GRO. Please remove it both.
>> 
>> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.
>
>I don't think so. It's unnecessary for current design.
>If others want to add new GRO types, they will write
>correct code.
>
>> 
>> >
>> 
>> 
>> 
>>  
>>
yang_y_yi Sept. 14, 2020, 8:26 a.m. UTC | #3
For outer_ip_id, there is same ip_id disorder issue existing as UDP GRO, so we can't use ip_id +/-1 to check if they are same flow, here is my incrmental change for it with more comments.





@@ -189,7 +188,12 @@
 is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
                struct vxlan_udp4_flow_key k2)
 {
-       /* src port is changing, so shouldn't use it for flow check */
+       /* For VxLAN packet, outer udp src port is calculated from
+        * inner packet RSS hash, udp src port of the first UDP
+        * fragment is different from one of other UDP fragments
+        * even if they are same flow, so we have to skip outer udp
+        * src port comparison here.
+        */
        return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
                                        &k2.outer_eth_saddr) &&
                        rte_is_same_ether_addr(&k1.outer_eth_daddr,
@@ -212,12 +216,16 @@
        uint16_t l2_offset;
        int ret = 0;

+       /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
+        * we needn't compare outer_ip_id because they are same,
+        * for the case outer_is_atomic is 1, we also have no way
+        * to comapre outer_ip_id because the received packets can
+        * be out of order. So ignore outer_ip_id comparison here.
+        */
+
        l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
        cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
                                        l2_offset);
-       /* VXLAN outer IP ID is out of order, so don't touch it and
-        * don't compare it.
-        */
        if (cmp > 0)
                /* Append the new packet. */
                ret = 1;
@@ -354,7 +362,9 @@
        rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
        key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
        key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
-       key.outer_src_port = udp_hdr->src_port;
+       /* Note: It is unnecessary to save outer_src_port here because it can
+        * be different for VxLAN UDP fragments from the same flow.
+        */
        key.outer_dst_port = udp_hdr->dst_port;

        /* Search for a matched flow. */












At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>Replies are inline.
>
>BTW, you need to update the programmer guide
>doc/guides/prog_guide/generic_receive_offload_lib.rst and
>release note doc/guides/rel_notes/release_20_11.rst.
>
>Thanks,
>Jiayu
>
>On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:
>> Jiayu, thank you so much, please check my replies for some of your comments,
>> here is incremental patch. I built it by meson this time :-)
>> 
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> index 9e9df72..1fcfaf1 100644
>> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> @@ -203,7 +203,7 @@
>>  }
>> 
>>  static inline int
>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>>                 uint16_t frag_offset,
>>                 uint16_t ip_dl)
>>  {
>> @@ -213,7 +213,7 @@
>>         int ret = 0;
>> 
>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                         l2_offset);
>>         /* VXLAN outer IP ID is out of order, so don't touch it and
>>          * don't compare it.
>> @@ -379,7 +379,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 if (insert_new_flow(tbl, &key, item_idx) ==
>>                                 INVALID_ARRAY_INDEX) {
>> @@ -397,7 +397,7 @@
>>         cur_idx = tbl->flows[i].start_index;
>>         prev_idx = cur_idx;
>>         do {
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>>                                 frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -436,7 +436,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>>                 tbl->flows[i].start_index = item_idx;
>> @@ -470,7 +470,7 @@
>>                 ip_dl = pkt->pkt_len - hdr_len;
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> index 9e9df72..1fcfaf1 100644
>> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> @@ -203,7 +203,7 @@
>>  }
>> 
>>  static inline int
>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>>                 uint16_t frag_offset,
>>                 uint16_t ip_dl)
>>  {
>> @@ -213,7 +213,7 @@
>>         int ret = 0;
>> 
>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                         l2_offset);
>>         /* VXLAN outer IP ID is out of order, so don't touch it and
>>          * don't compare it.
>> @@ -379,7 +379,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 if (insert_new_flow(tbl, &key, item_idx) ==
>>                                 INVALID_ARRAY_INDEX) {
>> @@ -397,7 +397,7 @@
>>         cur_idx = tbl->flows[i].start_index;
>>         prev_idx = cur_idx;
>>         do {
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>>                                 frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -436,7 +436,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>>                 tbl->flows[i].start_index = item_idx;
>> @@ -470,7 +470,7 @@
>>                 ip_dl = pkt->pkt_len - hdr_len;
>>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;
>>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
>>                                         frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -481,12 +481,10 @@
>>                                                         INVALID_ARRAY_INDEX);
>>                                 tbl->items[start_idx].inner_item.next_pkt_idx
>>                                         = item_idx;
>> -                       } else {
>> +                       } else
>>                                 return 0;
>> -                       }
>> -               } else {
>> +               } else
>>                         return 0;
>> -               }
>>         }
>> 
>>         return 0;
>> 
>> 
>> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> >Some comments are inline.
>> >
>> >Thanks,
>> >Jiayu
>> >
>> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com wrote:
>> >> From: Yi Yang <yangyi01@inspur.com>
>> >>
>> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance
>> >> when VM is enabled UFO or GSO, GRO must be supported if
>> >> GSO or UFO is enabled, otherwise, performance gain will
>> >> be hurt.
>> >>
>> >> With this enabled in DPDK, OVS DPDK can leverage it to
>> >> improve VM-to-VM UDP performance, this will make sure
>> >> IP fragments will be reassembled once it is received
>> >> from physical NIC. It is very helpful in OVS DPDK VXLAN
>> >> TSO case.
>> >>
>> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>> >> ---
>> >>  lib/librte_gro/gro_udp4.h       |   1 +
>> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
>> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
>> >>  lib/librte_gro/meson.build      |   2 +-
>> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>> >>  lib/librte_gro/rte_gro.h        |   3 +
>> >>  6 files changed, 794 insertions(+), 27 deletions(-)
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>> >>
>> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>> >> index 0a078e4..d38b393 100644
>> >> --- a/lib/librte_gro/gro_udp4.h
>> >> +++ b/lib/librte_gro/gro_udp4.h
>> >> @@ -7,6 +7,7 @@
>> >>
>> >>  #include <rte_ip.h>
>> >>  #include <rte_udp.h>
>> >> +#include <rte_vxlan.h>
>> >>
>> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> >> new file mode 100644
>> >> index 0000000..9e9df72
>> >> --- /dev/null
>> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> >> @@ -0,0 +1,548 @@
>> >> +/* SPDX-License-Identifier: BSD-3-Clause
>> >> + * Copyright(c) 2020 Inspur Corporation
>> >> + */
>> >> +
>> >> +
>> >> +
>> >> +static inline uint32_t
>> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,
>> >> +            uint32_t item_idx,
>> >> +            uint32_t prev_item_idx)
>> >> +{
>> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
>> >> +
>> >> +    /* NULL indicates an empty item. */
>> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;
>> >> +    tbl->item_num--;
>> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)
>> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
>> >> +
>> >> +    return next_idx;
>> >> +}
>> >> +
>> >> +static inline uint32_t
>> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
>> >> +            struct vxlan_udp4_flow_key *src,
>> >> +            uint32_t item_idx)
>> >> +{
>> >> +    struct vxlan_udp4_flow_key *dst;
>> >> +    uint32_t flow_idx;
>> >> +
>> >> +    flow_idx = find_an_empty_flow(tbl);
>> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
>> >> +            return INVALID_ARRAY_INDEX;
>> >> +
>> >> +    dst = &(tbl->flows[flow_idx].key);
>> >> +
>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),
>> >> +                    &(dst->inner_key.eth_saddr));
>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),
>> >> +                    &(dst->inner_key.eth_daddr));
>> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
>> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
>> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;
>> >> +
>> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
>> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
>> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
>> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
>> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;
>> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
>> >> +    dst->outer_src_port = src->outer_src_port;
>> >> +    dst->outer_dst_port = src->outer_dst_port;
>> >> +
>> >> +    tbl->flows[flow_idx].start_index = item_idx;
>> >> +    tbl->flow_num++;
>> >> +
>> >> +    return flow_idx;
>> >> +}
>> >> +
>> >> +static inline int
>> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>> >> +            struct vxlan_udp4_flow_key k2)
>> >> +{
>> >> +    /* src port is changing, so shouldn't use it for flow check */
>> >
>> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source
>> >port. In what cases outer source port will change?
>> 
>> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is
>> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,
>> protocol type).
>> 
>> ovs/lib/netdev-native-tnl.h:
>> 
>> static inline uint32_t *
>> dp_packet_rss_ptr(const struct dp_packet *b)
>> {
>>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
>> }
>> 
>> static inline uint32_t
>> dp_packet_get_rss_hash(const struct dp_packet *p)
>> {
>>     return *dp_packet_rss_ptr(p);
>> }
>> 
>> static inline ovs_be16
>> netdev_tnl_get_src_port(struct dp_packet *packet)
>> {
>>     uint32_t hash;
>> 
>>     hash = dp_packet_get_rss_hash(packet);
>> 
>>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
>>                  tnl_udp_port_min);
>> }
>> 
>> void
>> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
>>                            struct dp_packet *packet,
>>                            const struct ovs_action_push_tnl *data)
>> {
>>     struct udp_header *udp;
>>     int ip_tot_size;
>> 
>>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
>> 
>>     /* set udp src port */
>>     udp->udp_src = netdev_tnl_get_src_port(packet);
>>     udp->udp_len = htons(ip_tot_size);
>> 
>>     if (udp->udp_csum) {
>>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
>>     }
>> }
>> 
>> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The
>> first fragment of original inner UDP is considered as different flow from other fragments.
>> 
>> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.
>> 
>
>You are right. I checked VxLAN RFC 7348, and outer source port is
>calculated from inner packet. In OVS case, the first fragment
>will have different outer source port compared with other fragments.
>Please add some comments in the code for people to better understand
>the code. In addition, need to remove outer_src_port from the structure
>vxlan_udp4_flow_key.
>
>> >
>> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>> >> +                                    &k2.outer_eth_saddr) &&
>> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,
>> >> +                            &k2.outer_eth_daddr) &&
>> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
>> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
>> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&
>> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
>> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
>> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));
>> >> +}
>> >> +
>> >> +static inline int
>> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> >> +            uint16_t frag_offset,
>> >> +            uint16_t ip_dl)
>> >
>> >Better rename it as udp4_check_vxlan_neighbor.
>> 
>> Ok
>> 
>> >
>> >> +{
>> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;
>> >> +    int cmp;
>> >> +    uint16_t l2_offset;
>> >> +    int ret = 0;
>> >> +
>> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> >> +                                    l2_offset);
>> >
>> >Why don't check outer IP ID? If outer DF bit is not set, all fragments
>> >should have incremental outer IP ID; if DF bit is set, we don't need
>> >to check outer IP ID.
>> 
>> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check
>> is enough to differentiate if they are same flow, isn't it?
>
>OVS is one of workloads using DPDK, and we can assume
>other cases have the same design as OVS. I think IP ID
>check is necessary.
>
>> 
>> >
>> >In addition, udp_check_neighbor is renamed, but you didn't change here.
>> 
>> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will
>> use meson build to check it for next version.
>> 
>> >
>> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and
>> >> +     * don't compare it.
>> >> +     */
>> >> +    if (cmp > 0)
>> >> +            /* Append the new packet. */
>> >> +            ret = 1;
>> >> +    else if (cmp < 0)
>> >> +            /* Prepend the new packet. */
>> >> +            ret = -1;
>> >> +
>> >> +    return ret;
>> >> +}
>> >> +
>> >>
>> >>  /*
>> >>   * GRO context structure. It keeps the table structures, which are
>> >> @@ -137,19 +148,27 @@ struct gro_ctx {
>> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
>> >>
>> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
>> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
>> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +                    = {{{0}, 0, 0} };
>> >> +
>> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
>> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >>                      = {{{0}, 0, 0} };
>> >>
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      uint32_t item_num;
>> >>      int32_t ret;
>> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>> >> +            do_vxlan_udp_gro = 0;
>> >>
>> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >> @@ -160,15 +179,28 @@ struct gro_ctx {
>> >>
>> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>> >>              for (i = 0; i < item_num; i++)
>> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> -
>> >> -            vxlan_tbl.flows = vxlan_flows;
>> >> -            vxlan_tbl.items = vxlan_items;
>> >> -            vxlan_tbl.flow_num = 0;
>> >> -            vxlan_tbl.item_num = 0;
>> >> -            vxlan_tbl.max_flow_num = item_num;
>> >> -            vxlan_tbl.max_item_num = item_num;
>> >> -            do_vxlan_gro = 1;
>> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
>> >> +            vxlan_tcp_tbl.flow_num = 0;
>> >> +            vxlan_tcp_tbl.item_num = 0;
>> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
>> >> +            vxlan_tcp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_tcp_gro = 1;
>> >> +    }
>> >> +
>> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>> >> +            for (i = 0; i < item_num; i++)
>> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
>> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
>> >> +            vxlan_udp_tbl.flow_num = 0;
>> >> +            vxlan_udp_tbl.item_num = 0;
>> >> +            vxlan_udp_tbl.max_flow_num = item_num;
>> >> +            vxlan_udp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_udp_gro = 1;
>> >>      }
>> >>
>> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
>> >> @@ -204,9 +236,18 @@ struct gro_ctx {
>> >>               * will be flushed from the tables.
>> >>               */
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> +                            do_vxlan_tcp_gro) {
>> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
>> >> -                                                    &vxlan_tbl, 0);
>> >> +                                                    &vxlan_tcp_tbl, 0);
>> >> +                    if (ret > 0)
>> >> +                            /* Merge successfully */
>> >> +                            nb_after_gro--;
>> >> +                    else if (ret < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
>> >> +                                                    &vxlan_udp_tbl, 0);
>> >>                      if (ret > 0)
>> >>                              /* Merge successfully */
>> >>                              nb_after_gro--;
>> >> @@ -236,11 +277,17 @@ struct gro_ctx {
>> >>               || (unprocess_num < nb_pkts)) {
>> >>              i = 0;
>> >>              /* Flush all packets from the tables */
>> >> -            if (do_vxlan_gro) {
>> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>> >> +            if (do_vxlan_tcp_gro) {
>> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>> >>                                      0, pkts, nb_pkts);
>> >>              }
>> >>
>> >> +            if (do_vxlan_udp_gro) {
>> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>> >> +                                    0, &pkts[i], nb_pkts - i);
>> >> +
>> >> +            }
>> >> +
>> >>              if (do_tcp4_gro) {
>> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>> >>                                      &pkts[i], nb_pkts - i);
>> >> @@ -269,33 +316,42 @@ struct gro_ctx {
>> >>  {
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      struct gro_ctx *gro_ctx = ctx;
>> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>> >>      uint64_t current_time;
>> >>      uint16_t i, unprocess_num = 0;
>> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
>> >>
>> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>> >>
>> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>> >>              RTE_GRO_TCP_IPV4;
>> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>> >>              RTE_GRO_UDP_IPV4;
>> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>> >>
>> >>      current_time = rte_rdtsc();
>> >>
>> >>      for (i = 0; i < nb_pkts; i++) {
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>> >> +                            do_vxlan_tcp_gro) {
>> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>> >> +                                            current_time) < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
>> >>                                              current_time) < 0)
>> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
>> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>> >> @@ -341,6 +397,13 @@ struct gro_ctx {
>> >>              left_nb_out = max_nb_out - num;
>> >>      }
>> >>
>> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
>> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>> >> +                            flush_timestamp, &out[num], left_nb_out);
>> >> +            left_nb_out = max_nb_out - num;
>> >
>> >Don't need to calculate left_nb_out here. In the previous patch, you
>> >also calculate it for UDP GRO. Please remove it both.
>> 
>> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.
>
>I don't think so. It's unnecessary for current design.
>If others want to add new GRO types, they will write
>correct code.
>
>> 
>> >
>> 
>> 
>> 
>>  
>>
Hu, Jiayu Sept. 14, 2020, 8:50 a.m. UTC | #4
Out IP ID is not used to check if they belong to the same flow,
but check if they are neighbors. If two vxlan/udp fragments are
neighbors, their outer IP ID and outer frag_oft should be incremental.
I still cannot understand why ignore outer IP ID when DF is 0.
Can you give more explanation?

From: yang_y_yi <yang_y_yi@163.com>
Sent: Monday, September 14, 2020 4:27 PM
To: Hu, Jiayu <jiayu.hu@intel.com>
Cc: thomas@monjalon.net; yangyi01@inspur.com; dev@dpdk.org
Subject: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support
Importance: High

For outer_ip_id, there is same ip_id disorder issue existing as UDP GRO, so we can't use ip_id +/-1 to check if they are same flow, here is my incrmental change for it with more comments.



@@ -189,7 +188,12 @@
 is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
                struct vxlan_udp4_flow_key k2)
 {
-       /* src port is changing, so shouldn't use it for flow check */
+       /* For VxLAN packet, outer udp src port is calculated from
+        * inner packet RSS hash, udp src port of the first UDP
+        * fragment is different from one of other UDP fragments
+        * even if they are same flow, so we have to skip outer udp
+        * src port comparison here.
+        */
        return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
                                        &k2.outer_eth_saddr) &&
                        rte_is_same_ether_addr(&k1.outer_eth_daddr,
@@ -212,12 +216,16 @@
        uint16_t l2_offset;
        int ret = 0;

+       /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
+        * we needn't compare outer_ip_id because they are same,
+        * for the case outer_is_atomic is 1, we also have no way
+        * to comapre outer_ip_id because the received packets can
+        * be out of order. So ignore outer_ip_id comparison here.
+        */
+
        l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
        cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
                                        l2_offset);
-       /* VXLAN outer IP ID is out of order, so don't touch it and
-        * don't compare it.
-        */
        if (cmp > 0)
                /* Append the new packet. */
                ret = 1;
@@ -354,7 +362,9 @@
        rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
        key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
        key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
-       key.outer_src_port = udp_hdr->src_port;
+       /* Note: It is unnecessary to save outer_src_port here because it can
+        * be different for VxLAN UDP fragments from the same flow.
+        */
        key.outer_dst_port = udp_hdr->dst_port;

        /* Search for a matched flow. */







At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>> wrote:

>Replies are inline.

>

>BTW, you need to update the programmer guide

>doc/guides/prog_guide/generic_receive_offload_lib.rst and

>release note doc/guides/rel_notes/release_20_11.rst.

>

>Thanks,

>Jiayu

>

>On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:

>> Jiayu, thank you so much, please check my replies for some of your comments,

>> here is incremental patch. I built it by meson this time :-)

>>

>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c

>> index 9e9df72..1fcfaf1 100644

>> --- a/lib/librte_gro/gro_vxlan_udp4.c

>> +++ b/lib/librte_gro/gro_vxlan_udp4.c

>> @@ -203,7 +203,7 @@

>>  }

>>

>>  static inline int

>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>>                 uint16_t frag_offset,

>>                 uint16_t ip_dl)

>>  {

>> @@ -213,7 +213,7 @@

>>         int ret = 0;

>>

>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;

>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>                                         l2_offset);

>>         /* VXLAN outer IP ID is out of order, so don't touch it and

>>          * don't compare it.

>> @@ -379,7 +379,7 @@

>>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>                                 INVALID_ARRAY_INDEX, frag_offset,

>>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>> -               if (item_idx == INVALID_ARRAY_INDEX)

>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>                         return -1;

>>                 if (insert_new_flow(tbl, &key, item_idx) ==

>>                                 INVALID_ARRAY_INDEX) {

>> @@ -397,7 +397,7 @@

>>         cur_idx = tbl->flows[i].start_index;

>>         prev_idx = cur_idx;

>>         do {

>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>>                                 frag_offset, ip_dl);

>>                 if (cmp) {

>>                         if (merge_two_vxlan_udp4_packets(

>> @@ -436,7 +436,7 @@

>>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>                                 INVALID_ARRAY_INDEX, frag_offset,

>>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>> -               if (item_idx == INVALID_ARRAY_INDEX)

>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>                         return -1;

>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;

>>                 tbl->flows[i].start_index = item_idx;

>> @@ -470,7 +470,7 @@

>>                 ip_dl = pkt->pkt_len - hdr_len;

>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c

>> index 9e9df72..1fcfaf1 100644

>> --- a/lib/librte_gro/gro_vxlan_udp4.c

>> +++ b/lib/librte_gro/gro_vxlan_udp4.c

>> @@ -203,7 +203,7 @@

>>  }

>>

>>  static inline int

>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>>                 uint16_t frag_offset,

>>                 uint16_t ip_dl)

>>  {

>> @@ -213,7 +213,7 @@

>>         int ret = 0;

>>

>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;

>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>                                         l2_offset);

>>         /* VXLAN outer IP ID is out of order, so don't touch it and

>>          * don't compare it.

>> @@ -379,7 +379,7 @@

>>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>                                 INVALID_ARRAY_INDEX, frag_offset,

>>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>> -               if (item_idx == INVALID_ARRAY_INDEX)

>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>                         return -1;

>>                 if (insert_new_flow(tbl, &key, item_idx) ==

>>                                 INVALID_ARRAY_INDEX) {

>> @@ -397,7 +397,7 @@

>>         cur_idx = tbl->flows[i].start_index;

>>         prev_idx = cur_idx;

>>         do {

>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>>                                 frag_offset, ip_dl);

>>                 if (cmp) {

>>                         if (merge_two_vxlan_udp4_packets(

>> @@ -436,7 +436,7 @@

>>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>                                 INVALID_ARRAY_INDEX, frag_offset,

>>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>> -               if (item_idx == INVALID_ARRAY_INDEX)

>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>                         return -1;

>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;

>>                 tbl->flows[i].start_index = item_idx;

>> @@ -470,7 +470,7 @@

>>                 ip_dl = pkt->pkt_len - hdr_len;

>>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;

>>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;

>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),

>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),

>>                                         frag_offset, ip_dl);

>>                 if (cmp) {

>>                         if (merge_two_vxlan_udp4_packets(

>> @@ -481,12 +481,10 @@

>>                                                         INVALID_ARRAY_INDEX);

>>                                 tbl->items[start_idx].inner_item.next_pkt_idx

>>                                         = item_idx;

>> -                       } else {

>> +                       } else

>>                                 return 0;

>> -                       }

>> -               } else {

>> +               } else

>>                         return 0;

>> -               }

>>         }

>>

>>         return 0;

>>

>>

>> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>> wrote:

>> >Some comments are inline.

>> >

>> >Thanks,

>> >Jiayu

>> >

>> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com<mailto:yang_y_yi@163.com> wrote:

>> >> From: Yi Yang <yangyi01@inspur.com<mailto:yangyi01@inspur.com>>

>> >>

>> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance

>> >> when VM is enabled UFO or GSO, GRO must be supported if

>> >> GSO or UFO is enabled, otherwise, performance gain will

>> >> be hurt.

>> >>

>> >> With this enabled in DPDK, OVS DPDK can leverage it to

>> >> improve VM-to-VM UDP performance, this will make sure

>> >> IP fragments will be reassembled once it is received

>> >> from physical NIC. It is very helpful in OVS DPDK VXLAN

>> >> TSO case.

>> >>

>> >> Signed-off-by: Yi Yang <yangyi01@inspur.com<mailto:yangyi01@inspur.com>>

>> >> ---

>> >>  lib/librte_gro/gro_udp4.h       |   1 +

>> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++

>> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++

>> >>  lib/librte_gro/meson.build      |   2 +-

>> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--

>> >>  lib/librte_gro/rte_gro.h        |   3 +

>> >>  6 files changed, 794 insertions(+), 27 deletions(-)

>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c

>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h

>> >>

>> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h

>> >> index 0a078e4..d38b393 100644

>> >> --- a/lib/librte_gro/gro_udp4.h

>> >> +++ b/lib/librte_gro/gro_udp4.h

>> >> @@ -7,6 +7,7 @@

>> >>

>> >>  #include <rte_ip.h>

>> >>  #include <rte_udp.h>

>> >> +#include <rte_vxlan.h>

>> >>

>> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL

>> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)

>> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c

>> >> new file mode 100644

>> >> index 0000000..9e9df72

>> >> --- /dev/null

>> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c

>> >> @@ -0,0 +1,548 @@

>> >> +/* SPDX-License-Identifier: BSD-3-Clause

>> >> + * Copyright(c) 2020 Inspur Corporation

>> >> + */

>> >> +

>> >> +

>> >> +

>> >> +static inline uint32_t

>> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,

>> >> +            uint32_t item_idx,

>> >> +            uint32_t prev_item_idx)

>> >> +{

>> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;

>> >> +

>> >> +    /* NULL indicates an empty item. */

>> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;

>> >> +    tbl->item_num--;

>> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)

>> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;

>> >> +

>> >> +    return next_idx;

>> >> +}

>> >> +

>> >> +static inline uint32_t

>> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,

>> >> +            struct vxlan_udp4_flow_key *src,

>> >> +            uint32_t item_idx)

>> >> +{

>> >> +    struct vxlan_udp4_flow_key *dst;

>> >> +    uint32_t flow_idx;

>> >> +

>> >> +    flow_idx = find_an_empty_flow(tbl);

>> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))

>> >> +            return INVALID_ARRAY_INDEX;

>> >> +

>> >> +    dst = &(tbl->flows[flow_idx].key);

>> >> +

>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),

>> >> +                    &(dst->inner_key.eth_saddr));

>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),

>> >> +                    &(dst->inner_key.eth_daddr));

>> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;

>> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;

>> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;

>> >> +

>> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;

>> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;

>> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));

>> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));

>> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;

>> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;

>> >> +    dst->outer_src_port = src->outer_src_port;

>> >> +    dst->outer_dst_port = src->outer_dst_port;

>> >> +

>> >> +    tbl->flows[flow_idx].start_index = item_idx;

>> >> +    tbl->flow_num++;

>> >> +

>> >> +    return flow_idx;

>> >> +}

>> >> +

>> >> +static inline int

>> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,

>> >> +            struct vxlan_udp4_flow_key k2)

>> >> +{

>> >> +    /* src port is changing, so shouldn't use it for flow check */

>> >

>> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source

>> >port. In what cases outer source port will change?

>>

>> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is

>> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,

>> protocol type).

>>

>> ovs/lib/netdev-native-tnl.h:

>>

>> static inline uint32_t *

>> dp_packet_rss_ptr(const struct dp_packet *b)

>> {

>>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);

>> }

>>

>> static inline uint32_t

>> dp_packet_get_rss_hash(const struct dp_packet *p)

>> {

>>     return *dp_packet_rss_ptr(p);

>> }

>>

>> static inline ovs_be16

>> netdev_tnl_get_src_port(struct dp_packet *packet)

>> {

>>     uint32_t hash;

>>

>>     hash = dp_packet_get_rss_hash(packet);

>>

>>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +

>>                  tnl_udp_port_min);

>> }

>>

>> void

>> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,

>>                            struct dp_packet *packet,

>>                            const struct ovs_action_push_tnl *data)

>> {

>>     struct udp_header *udp;

>>     int ip_tot_size;

>>

>>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);

>>

>>     /* set udp src port */

>>     udp->udp_src = netdev_tnl_get_src_port(packet);

>>     udp->udp_len = htons(ip_tot_size);

>>

>>     if (udp->udp_csum) {

>>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);

>>     }

>> }

>>

>> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The

>> first fragment of original inner UDP is considered as different flow from other fragments.

>>

>> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.

>>

>

>You are right. I checked VxLAN RFC 7348, and outer source port is

>calculated from inner packet. In OVS case, the first fragment

>will have different outer source port compared with other fragments.

>Please add some comments in the code for people to better understand

>the code. In addition, need to remove outer_src_port from the structure

>vxlan_udp4_flow_key.

>

>> >

>> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,

>> >> +                                    &k2.outer_eth_saddr) &&

>> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,

>> >> +                            &k2.outer_eth_daddr) &&

>> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&

>> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&

>> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&

>> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&

>> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&

>> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));

>> >> +}

>> >> +

>> >> +static inline int

>> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>> >> +            uint16_t frag_offset,

>> >> +            uint16_t ip_dl)

>> >

>> >Better rename it as udp4_check_vxlan_neighbor.

>>

>> Ok

>>

>> >

>> >> +{

>> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;

>> >> +    int cmp;

>> >> +    uint16_t l2_offset;

>> >> +    int ret = 0;

>> >> +

>> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;

>> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>> >> +                                    l2_offset);

>> >

>> >Why don't check outer IP ID? If outer DF bit is not set, all fragments

>> >should have incremental outer IP ID; if DF bit is set, we don't need

>> >to check outer IP ID.

>>

>> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check

>> is enough to differentiate if they are same flow, isn't it?

>

>OVS is one of workloads using DPDK, and we can assume

>other cases have the same design as OVS. I think IP ID

>check is necessary.

>

>>

>> >

>> >In addition, udp_check_neighbor is renamed, but you didn't change here.

>>

>> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will

>> use meson build to check it for next version.

>>

>> >

>> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and

>> >> +     * don't compare it.

>> >> +     */

>> >> +    if (cmp > 0)

>> >> +            /* Append the new packet. */

>> >> +            ret = 1;

>> >> +    else if (cmp < 0)

>> >> +            /* Prepend the new packet. */

>> >> +            ret = -1;

>> >> +

>> >> +    return ret;

>> >> +}

>> >> +

>> >>

>> >>  /*

>> >>   * GRO context structure. It keeps the table structures, which are

>> >> @@ -137,19 +148,27 @@ struct gro_ctx {

>> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };

>> >>

>> >>      /* Allocate a reassembly table for VXLAN TCP GRO */

>> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;

>> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];

>> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]

>> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;

>> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];

>> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]

>> >> +                    = {{{0}, 0, 0} };

>> >> +

>> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */

>> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;

>> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];

>> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]

>> >>                      = {{{0}, 0, 0} };

>> >>

>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];

>> >>      uint32_t item_num;

>> >>      int32_t ret;

>> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;

>> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;

>> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,

>> >> +            do_vxlan_udp_gro = 0;

>> >>

>> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |

>> >>                                      RTE_GRO_TCP_IPV4 |

>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |

>> >>                                      RTE_GRO_UDP_IPV4)) == 0))

>> >>              return nb_pkts;

>> >>

>> >> @@ -160,15 +179,28 @@ struct gro_ctx {

>> >>

>> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {

>> >>              for (i = 0; i < item_num; i++)

>> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;

>> >> -

>> >> -            vxlan_tbl.flows = vxlan_flows;

>> >> -            vxlan_tbl.items = vxlan_items;

>> >> -            vxlan_tbl.flow_num = 0;

>> >> -            vxlan_tbl.item_num = 0;

>> >> -            vxlan_tbl.max_flow_num = item_num;

>> >> -            vxlan_tbl.max_item_num = item_num;

>> >> -            do_vxlan_gro = 1;

>> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;

>> >> +

>> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;

>> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;

>> >> +            vxlan_tcp_tbl.flow_num = 0;

>> >> +            vxlan_tcp_tbl.item_num = 0;

>> >> +            vxlan_tcp_tbl.max_flow_num = item_num;

>> >> +            vxlan_tcp_tbl.max_item_num = item_num;

>> >> +            do_vxlan_tcp_gro = 1;

>> >> +    }

>> >> +

>> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {

>> >> +            for (i = 0; i < item_num; i++)

>> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;

>> >> +

>> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;

>> >> +            vxlan_udp_tbl.items = vxlan_udp_items;

>> >> +            vxlan_udp_tbl.flow_num = 0;

>> >> +            vxlan_udp_tbl.item_num = 0;

>> >> +            vxlan_udp_tbl.max_flow_num = item_num;

>> >> +            vxlan_udp_tbl.max_item_num = item_num;

>> >> +            do_vxlan_udp_gro = 1;

>> >>      }

>> >>

>> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {

>> >> @@ -204,9 +236,18 @@ struct gro_ctx {

>> >>               * will be flushed from the tables.

>> >>               */

>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&

>> >> -                            do_vxlan_gro) {

>> >> +                            do_vxlan_tcp_gro) {

>> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],

>> >> -                                                    &vxlan_tbl, 0);

>> >> +                                                    &vxlan_tcp_tbl, 0);

>> >> +                    if (ret > 0)

>> >> +                            /* Merge successfully */

>> >> +                            nb_after_gro--;

>> >> +                    else if (ret < 0)

>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];

>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&

>> >> +                            do_vxlan_udp_gro) {

>> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],

>> >> +                                                    &vxlan_udp_tbl, 0);

>> >>                      if (ret > 0)

>> >>                              /* Merge successfully */

>> >>                              nb_after_gro--;

>> >> @@ -236,11 +277,17 @@ struct gro_ctx {

>> >>               || (unprocess_num < nb_pkts)) {

>> >>              i = 0;

>> >>              /* Flush all packets from the tables */

>> >> -            if (do_vxlan_gro) {

>> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,

>> >> +            if (do_vxlan_tcp_gro) {

>> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,

>> >>                                      0, pkts, nb_pkts);

>> >>              }

>> >>

>> >> +            if (do_vxlan_udp_gro) {

>> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,

>> >> +                                    0, &pkts[i], nb_pkts - i);

>> >> +

>> >> +            }

>> >> +

>> >>              if (do_tcp4_gro) {

>> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,

>> >>                                      &pkts[i], nb_pkts - i);

>> >> @@ -269,33 +316,42 @@ struct gro_ctx {

>> >>  {

>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];

>> >>      struct gro_ctx *gro_ctx = ctx;

>> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;

>> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;

>> >>      uint64_t current_time;

>> >>      uint16_t i, unprocess_num = 0;

>> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;

>> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;

>> >>

>> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |

>> >>                                      RTE_GRO_TCP_IPV4 |

>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |

>> >>                                      RTE_GRO_UDP_IPV4)) == 0))

>> >>              return nb_pkts;

>> >>

>> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];

>> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];

>> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];

>> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];

>> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];

>> >>

>> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==

>> >>              RTE_GRO_TCP_IPV4;

>> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==

>> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==

>> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;

>> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==

>> >>              RTE_GRO_UDP_IPV4;

>> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==

>> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;

>> >>

>> >>      current_time = rte_rdtsc();

>> >>

>> >>      for (i = 0; i < nb_pkts; i++) {

>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&

>> >> -                            do_vxlan_gro) {

>> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,

>> >> +                            do_vxlan_tcp_gro) {

>> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,

>> >> +                                            current_time) < 0)

>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];

>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&

>> >> +                            do_vxlan_udp_gro) {

>> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,

>> >>                                              current_time) < 0)

>> >>                              unprocess_pkts[unprocess_num++] = pkts[i];

>> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&

>> >> @@ -341,6 +397,13 @@ struct gro_ctx {

>> >>              left_nb_out = max_nb_out - num;

>> >>      }

>> >>

>> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {

>> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[

>> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],

>> >> +                            flush_timestamp, &out[num], left_nb_out);

>> >> +            left_nb_out = max_nb_out - num;

>> >

>> >Don't need to calculate left_nb_out here. In the previous patch, you

>> >also calculate it for UDP GRO. Please remove it both.

>>

>> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.

>

>I don't think so. It's unnecessary for current design.

>If others want to add new GRO types, they will write

>correct code.

>

>>

>> >

>>

>>

>>

>>

>>
yang_y_yi Sept. 14, 2020, 9:14 a.m. UTC | #5
Jiayu, VM to VM case for big UDP packet (say udp parload size is 8192K, but mtu 1500). VM will segment it as UDP framgments, but when ovs dpdk uses VxLan to encapsulate them, it doesn't know they are same flow, OVS DPDK has no knowldge about them, just encapsulate them as if they are not related. But on receive side on another machine, OVS DPDK will GRO them, at this point, outer IP ID is even random for every packet. For TCP, I think same issue is there, GRO just considered them as different flow and doesn't do GRO, in my opinion, we also should do GRO for such case.





在 2020-09-14 16:50:08,"Hu, Jiayu" <jiayu.hu@intel.com> 写道:

Out IP ID is not used to check if they belong to the same flow,

but check if they are neighbors. If two vxlan/udp fragments are

neighbors, their outer IP ID and outer frag_oft should be incremental.

I still cannot understand why ignore outer IP ID when DF is 0.

Can you give more explanation?

 

From: yang_y_yi <yang_y_yi@163.com>
Sent: Monday, September 14, 2020 4:27 PM
To: Hu, Jiayu <jiayu.hu@intel.com>
Cc: thomas@monjalon.net; yangyi01@inspur.com; dev@dpdk.org
Subject: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support
Importance: High

 

For outer_ip_id, there is same ip_id disorder issue existing as UDP GRO, so we can't use ip_id +/-1 to check if they are same flow, here is my incrmental change for it with more comments.

 

@@ -189,7 +188,12 @@
 is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
                struct vxlan_udp4_flow_key k2)
 {
-       /* src port is changing, so shouldn't use it for flow check */
+       /* For VxLAN packet, outer udp src port is calculated from
+        * inner packet RSS hash, udp src port of the first UDP
+        * fragment is different from one of other UDP fragments
+        * even if they are same flow, so we have to skip outer udp
+        * src port comparison here.
+        */
        return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
                                        &k2.outer_eth_saddr) &&
                        rte_is_same_ether_addr(&k1.outer_eth_daddr,
@@ -212,12 +216,16 @@
        uint16_t l2_offset;
        int ret = 0;

+       /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
+        * we needn't compare outer_ip_id because they are same,
+        * for the case outer_is_atomic is 1, we also have no way
+        * to comapre outer_ip_id because the received packets can
+        * be out of order. So ignore outer_ip_id comparison here.
+        */
+
        l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
        cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
                                        l2_offset);
-       /* VXLAN outer IP ID is out of order, so don't touch it and
-        * don't compare it.
-        */
        if (cmp > 0)
                /* Append the new packet. */
                ret = 1;
@@ -354,7 +362,9 @@
        rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
        key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
        key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
-       key.outer_src_port = udp_hdr->src_port;
+       /* Note: It is unnecessary to save outer_src_port here because it can
+        * be different for VxLAN UDP fragments from the same flow.
+        */
        key.outer_dst_port = udp_hdr->dst_port;

        /* Search for a matched flow. */

 

 

 


At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>Replies are inline.
> 
>BTW, you need to update the programmer guide
>doc/guides/prog_guide/generic_receive_offload_lib.rst and
>release note doc/guides/rel_notes/release_20_11.rst.
> 
>Thanks,
>Jiayu
> 
>On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:
>> Jiayu, thank you so much, please check my replies for some of your comments,
>> here is incremental patch. I built it by meson this time :-)
>> 
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> index 9e9df72..1fcfaf1 100644
>> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> @@ -203,7 +203,7 @@
>>  }
>> 
>>  static inline int
>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>>                 uint16_t frag_offset,
>>                 uint16_t ip_dl)
>>  {
>> @@ -213,7 +213,7 @@
>>         int ret = 0;
>> 
>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                         l2_offset);
>>         /* VXLAN outer IP ID is out of order, so don't touch it and
>>          * don't compare it.
>> @@ -379,7 +379,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 if (insert_new_flow(tbl, &key, item_idx) ==
>>                                 INVALID_ARRAY_INDEX) {
>> @@ -397,7 +397,7 @@
>>         cur_idx = tbl->flows[i].start_index;
>>         prev_idx = cur_idx;
>>         do {
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>>                                 frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -436,7 +436,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>>                 tbl->flows[i].start_index = item_idx;
>> @@ -470,7 +470,7 @@
>>                 ip_dl = pkt->pkt_len - hdr_len;
>> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> index 9e9df72..1fcfaf1 100644
>> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> @@ -203,7 +203,7 @@
>>  }
>> 
>>  static inline int
>> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>>                 uint16_t frag_offset,
>>                 uint16_t ip_dl)
>>  {
>> @@ -213,7 +213,7 @@
>>         int ret = 0;
>> 
>>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                         l2_offset);
>>         /* VXLAN outer IP ID is out of order, so don't touch it and
>>          * don't compare it.
>> @@ -379,7 +379,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 if (insert_new_flow(tbl, &key, item_idx) ==
>>                                 INVALID_ARRAY_INDEX) {
>> @@ -397,7 +397,7 @@
>>         cur_idx = tbl->flows[i].start_index;
>>         prev_idx = cur_idx;
>>         do {
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>>                                 frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -436,7 +436,7 @@
>>                 item_idx = insert_new_item(tbl, pkt, start_time,
>>                                 INVALID_ARRAY_INDEX, frag_offset,
>>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> -               if (item_idx == INVALID_ARRAY_INDEX)
>> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>>                         return -1;
>>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>>                 tbl->flows[i].start_index = item_idx;
>> @@ -470,7 +470,7 @@
>>                 ip_dl = pkt->pkt_len - hdr_len;
>>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;
>>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
>> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
>> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
>>                                         frag_offset, ip_dl);
>>                 if (cmp) {
>>                         if (merge_two_vxlan_udp4_packets(
>> @@ -481,12 +481,10 @@
>>                                                         INVALID_ARRAY_INDEX);
>>                                 tbl->items[start_idx].inner_item.next_pkt_idx
>>                                         = item_idx;
>> -                       } else {
>> +                       } else
>>                                 return 0;
>> -                       }
>> -               } else {
>> +               } else
>>                         return 0;
>> -               }
>>         }
>> 
>>         return 0;
>> 
>> 
>> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> >Some comments are inline.
>> >
>> >Thanks,
>> >Jiayu
>> >
>> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com wrote:
>> >> From: Yi Yang <yangyi01@inspur.com>
>> >>
>> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance
>> >> when VM is enabled UFO or GSO, GRO must be supported if
>> >> GSO or UFO is enabled, otherwise, performance gain will
>> >> be hurt.
>> >>
>> >> With this enabled in DPDK, OVS DPDK can leverage it to
>> >> improve VM-to-VM UDP performance, this will make sure
>> >> IP fragments will be reassembled once it is received
>> >> from physical NIC. It is very helpful in OVS DPDK VXLAN
>> >> TSO case.
>> >>
>> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>> >> ---
>> >>  lib/librte_gro/gro_udp4.h       |   1 +
>> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
>> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
>> >>  lib/librte_gro/meson.build      |   2 +-
>> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>> >>  lib/librte_gro/rte_gro.h        |   3 +
>> >>  6 files changed, 794 insertions(+), 27 deletions(-)
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>> >>
>> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>> >> index 0a078e4..d38b393 100644
>> >> --- a/lib/librte_gro/gro_udp4.h
>> >> +++ b/lib/librte_gro/gro_udp4.h
>> >> @@ -7,6 +7,7 @@
>> >>
>> >>  #include <rte_ip.h>
>> >>  #include <rte_udp.h>
>> >> +#include <rte_vxlan.h>
>> >>
>> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> >> new file mode 100644
>> >> index 0000000..9e9df72
>> >> --- /dev/null
>> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> >> @@ -0,0 +1,548 @@
>> >> +/* SPDX-License-Identifier: BSD-3-Clause
>> >> + * Copyright(c) 2020 Inspur Corporation
>> >> + */
>> >> +
>> >> +
>> >> +
>> >> +static inline uint32_t
>> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,
>> >> +            uint32_t item_idx,
>> >> +            uint32_t prev_item_idx)
>> >> +{
>> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
>> >> +
>> >> +    /* NULL indicates an empty item. */
>> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;
>> >> +    tbl->item_num--;
>> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)
>> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
>> >> +
>> >> +    return next_idx;
>> >> +}
>> >> +
>> >> +static inline uint32_t
>> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
>> >> +            struct vxlan_udp4_flow_key *src,
>> >> +            uint32_t item_idx)
>> >> +{
>> >> +    struct vxlan_udp4_flow_key *dst;
>> >> +    uint32_t flow_idx;
>> >> +
>> >> +    flow_idx = find_an_empty_flow(tbl);
>> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
>> >> +            return INVALID_ARRAY_INDEX;
>> >> +
>> >> +    dst = &(tbl->flows[flow_idx].key);
>> >> +
>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),
>> >> +                    &(dst->inner_key.eth_saddr));
>> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),
>> >> +                    &(dst->inner_key.eth_daddr));
>> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
>> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
>> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;
>> >> +
>> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
>> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
>> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
>> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
>> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;
>> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
>> >> +    dst->outer_src_port = src->outer_src_port;
>> >> +    dst->outer_dst_port = src->outer_dst_port;
>> >> +
>> >> +    tbl->flows[flow_idx].start_index = item_idx;
>> >> +    tbl->flow_num++;
>> >> +
>> >> +    return flow_idx;
>> >> +}
>> >> +
>> >> +static inline int
>> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>> >> +            struct vxlan_udp4_flow_key k2)
>> >> +{
>> >> +    /* src port is changing, so shouldn't use it for flow check */
>> >
>> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source
>> >port. In what cases outer source port will change?
>> 
>> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is
>> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,
>> protocol type).
>> 
>> ovs/lib/netdev-native-tnl.h:
>> 
>> static inline uint32_t *
>> dp_packet_rss_ptr(const struct dp_packet *b)
>> {
>>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
>> }
>> 
>> static inline uint32_t
>> dp_packet_get_rss_hash(const struct dp_packet *p)
>> {
>>     return *dp_packet_rss_ptr(p);
>> }
>> 
>> static inline ovs_be16
>> netdev_tnl_get_src_port(struct dp_packet *packet)
>> {
>>     uint32_t hash;
>> 
>>     hash = dp_packet_get_rss_hash(packet);
>> 
>>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
>>                  tnl_udp_port_min);
>> }
>> 
>> void
>> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
>>                            struct dp_packet *packet,
>>                            const struct ovs_action_push_tnl *data)
>> {
>>     struct udp_header *udp;
>>     int ip_tot_size;
>> 
>>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
>> 
>>     /* set udp src port */
>>     udp->udp_src = netdev_tnl_get_src_port(packet);
>>     udp->udp_len = htons(ip_tot_size);
>> 
>>     if (udp->udp_csum) {
>>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
>>     }
>> }
>> 
>> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The
>> first fragment of original inner UDP is considered as different flow from other fragments.
>> 
>> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.
>> 
> 
>You are right. I checked VxLAN RFC 7348, and outer source port is
>calculated from inner packet. In OVS case, the first fragment
>will have different outer source port compared with other fragments.
>Please add some comments in the code for people to better understand
>the code. In addition, need to remove outer_src_port from the structure
>vxlan_udp4_flow_key.
> 
>> >
>> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>> >> +                                    &k2.outer_eth_saddr) &&
>> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,
>> >> +                            &k2.outer_eth_daddr) &&
>> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
>> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
>> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&
>> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
>> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
>> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));
>> >> +}
>> >> +
>> >> +static inline int
>> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> >> +            uint16_t frag_offset,
>> >> +            uint16_t ip_dl)
>> >
>> >Better rename it as udp4_check_vxlan_neighbor.
>> 
>> Ok
>> 
>> >
>> >> +{
>> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;
>> >> +    int cmp;
>> >> +    uint16_t l2_offset;
>> >> +    int ret = 0;
>> >> +
>> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> >> +                                    l2_offset);
>> >
>> >Why don't check outer IP ID? If outer DF bit is not set, all fragments
>> >should have incremental outer IP ID; if DF bit is set, we don't need
>> >to check outer IP ID.
>> 
>> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check
>> is enough to differentiate if they are same flow, isn't it?
> 
>OVS is one of workloads using DPDK, and we can assume
>other cases have the same design as OVS. I think IP ID
>check is necessary.
> 
>> 
>> >
>> >In addition, udp_check_neighbor is renamed, but you didn't change here.
>> 
>> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will
>> use meson build to check it for next version.
>> 
>> >
>> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and
>> >> +     * don't compare it.
>> >> +     */
>> >> +    if (cmp > 0)
>> >> +            /* Append the new packet. */
>> >> +            ret = 1;
>> >> +    else if (cmp < 0)
>> >> +            /* Prepend the new packet. */
>> >> +            ret = -1;
>> >> +
>> >> +    return ret;
>> >> +}
>> >> +
>> >>
>> >>  /*
>> >>   * GRO context structure. It keeps the table structures, which are
>> >> @@ -137,19 +148,27 @@ struct gro_ctx {
>> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
>> >>
>> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
>> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
>> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >> +                    = {{{0}, 0, 0} };
>> >> +
>> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
>> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> >>                      = {{{0}, 0, 0} };
>> >>
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      uint32_t item_num;
>> >>      int32_t ret;
>> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>> >> +            do_vxlan_udp_gro = 0;
>> >>
>> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >> @@ -160,15 +179,28 @@ struct gro_ctx {
>> >>
>> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>> >>              for (i = 0; i < item_num; i++)
>> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> -
>> >> -            vxlan_tbl.flows = vxlan_flows;
>> >> -            vxlan_tbl.items = vxlan_items;
>> >> -            vxlan_tbl.flow_num = 0;
>> >> -            vxlan_tbl.item_num = 0;
>> >> -            vxlan_tbl.max_flow_num = item_num;
>> >> -            vxlan_tbl.max_item_num = item_num;
>> >> -            do_vxlan_gro = 1;
>> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
>> >> +            vxlan_tcp_tbl.flow_num = 0;
>> >> +            vxlan_tcp_tbl.item_num = 0;
>> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
>> >> +            vxlan_tcp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_tcp_gro = 1;
>> >> +    }
>> >> +
>> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>> >> +            for (i = 0; i < item_num; i++)
>> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> >> +
>> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
>> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
>> >> +            vxlan_udp_tbl.flow_num = 0;
>> >> +            vxlan_udp_tbl.item_num = 0;
>> >> +            vxlan_udp_tbl.max_flow_num = item_num;
>> >> +            vxlan_udp_tbl.max_item_num = item_num;
>> >> +            do_vxlan_udp_gro = 1;
>> >>      }
>> >>
>> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
>> >> @@ -204,9 +236,18 @@ struct gro_ctx {
>> >>               * will be flushed from the tables.
>> >>               */
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> +                            do_vxlan_tcp_gro) {
>> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
>> >> -                                                    &vxlan_tbl, 0);
>> >> +                                                    &vxlan_tcp_tbl, 0);
>> >> +                    if (ret > 0)
>> >> +                            /* Merge successfully */
>> >> +                            nb_after_gro--;
>> >> +                    else if (ret < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
>> >> +                                                    &vxlan_udp_tbl, 0);
>> >>                      if (ret > 0)
>> >>                              /* Merge successfully */
>> >>                              nb_after_gro--;
>> >> @@ -236,11 +277,17 @@ struct gro_ctx {
>> >>               || (unprocess_num < nb_pkts)) {
>> >>              i = 0;
>> >>              /* Flush all packets from the tables */
>> >> -            if (do_vxlan_gro) {
>> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>> >> +            if (do_vxlan_tcp_gro) {
>> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>> >>                                      0, pkts, nb_pkts);
>> >>              }
>> >>
>> >> +            if (do_vxlan_udp_gro) {
>> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>> >> +                                    0, &pkts[i], nb_pkts - i);
>> >> +
>> >> +            }
>> >> +
>> >>              if (do_tcp4_gro) {
>> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>> >>                                      &pkts[i], nb_pkts - i);
>> >> @@ -269,33 +316,42 @@ struct gro_ctx {
>> >>  {
>> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> >>      struct gro_ctx *gro_ctx = ctx;
>> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>> >>      uint64_t current_time;
>> >>      uint16_t i, unprocess_num = 0;
>> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
>> >>
>> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> >>                                      RTE_GRO_TCP_IPV4 |
>> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> >>              return nb_pkts;
>> >>
>> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>> >>
>> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>> >>              RTE_GRO_TCP_IPV4;
>> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>> >>              RTE_GRO_UDP_IPV4;
>> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>> >>
>> >>      current_time = rte_rdtsc();
>> >>
>> >>      for (i = 0; i < nb_pkts; i++) {
>> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> >> -                            do_vxlan_gro) {
>> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>> >> +                            do_vxlan_tcp_gro) {
>> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>> >> +                                            current_time) < 0)
>> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> >> +                            do_vxlan_udp_gro) {
>> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
>> >>                                              current_time) < 0)
>> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
>> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>> >> @@ -341,6 +397,13 @@ struct gro_ctx {
>> >>              left_nb_out = max_nb_out - num;
>> >>      }
>> >>
>> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
>> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>> >> +                            flush_timestamp, &out[num], left_nb_out);
>> >> +            left_nb_out = max_nb_out - num;
>> >
>> >Don't need to calculate left_nb_out here. In the previous patch, you
>> >also calculate it for UDP GRO. Please remove it both.
>> 
>> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.
> 
>I don't think so. It's unnecessary for current design.
>If others want to add new GRO types, they will write
>correct code.
> 
>> 
>> >
>> 
>> 
>> 
>>  
>>
Hu, Jiayu Sept. 16, 2020, 2:54 a.m. UTC | #6
On Mon, Sep 14, 2020 at 05:14:59PM +0800, yang_y_yi wrote:
> 
> Jiayu, VM to VM case for big UDP packet (say udp parload size is 8192K, but mtu
> 1500). VM will segment it as UDP framgments, but when ovs dpdk uses VxLan to
> encapsulate them, it doesn't know they are same flow, OVS DPDK has no knowldge
> about them, just encapsulate them as if they are not related. But on receive
> side on another machine, OVS DPDK will GRO them, at this point, outer IP ID is
> even random for every packet. For TCP, I think same issue is there, GRO just
> considered them as different flow and doesn't do GRO, in my opinion, we also
> should do GRO for such case.

I am a little confused about ovs behavior. Does ovs know they are udp fragments?
Or it knows they are fragments but doesn't know they are from one flow?
Given ovs receives 3 udp fragments of a udp packet, if ovs doesn't know they are
fragments of a udp packet, their mbuf->packet_type will be ether/ipv4/udp,
ether/ipv4, and ether/ipv4. dpdk udp gro will not process the second and third
one as their packet type in mbuf is not ether/ipv4/udp.

> 
> 
>    2020-09-14 16:50:08  "Hu, Jiayu" <jiayu.hu@intel.com> д    
> 
>     Out IP ID is not used to check if they belong to the same flow,
> 
>     but check if they are neighbors. If two vxlan/udp fragments are
> 
>     neighbors, their outer IP ID and outer frag_oft should be incremental.
> 
>     I still cannot understand why ignore outer IP ID when DF is 0.
> 
>     Can you give more explanation?
> 
>      
> 
>     From: yang_y_yi <yang_y_yi@163.com>
>     Sent: Monday, September 14, 2020 4:27 PM
>     To: Hu, Jiayu <jiayu.hu@intel.com>
>     Cc: thomas@monjalon.net; yangyi01@inspur.com; dev@dpdk.org
>     Subject: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support
>     Importance: High
> 
>      
> 
>     For outer_ip_id, there is same ip_id disorder issue existing as UDP GRO, so
>     we can't use ip_id +/-1 to check if they are same flow, here is my
>     incrmental change for it with more comments.
> 
>      
> 
>     @@ -189,7 +188,12 @@
>      is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>                     struct vxlan_udp4_flow_key k2)
>      {
>     -       /* src port is changing, so shouldn't use it for flow check */
>     +       /* For VxLAN packet, outer udp src port is calculated from
>     +        * inner packet RSS hash, udp src port of the first UDP
>     +        * fragment is different from one of other UDP fragments
>     +        * even if they are same flow, so we have to skip outer udp
>     +        * src port comparison here.
>     +        */
>             return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>                                             &k2.outer_eth_saddr) &&
>                             rte_is_same_ether_addr(&k1.outer_eth_daddr,
>     @@ -212,12 +216,16 @@
>             uint16_t l2_offset;
>             int ret = 0;
> 
>     +       /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
>     +        * we needn't compare outer_ip_id because they are same,
>     +        * for the case outer_is_atomic is 1, we also have no way
>     +        * to comapre outer_ip_id because the received packets can
>     +        * be out of order. So ignore outer_ip_id comparison here.
>     +        */
>     +
>             l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>             cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>                                             l2_offset);
>     -       /* VXLAN outer IP ID is out of order, so don't touch it and
>     -        * don't compare it.
>     -        */
>             if (cmp > 0)
>                     /* Append the new packet. */
>                     ret = 1;
>     @@ -354,7 +362,9 @@
>             rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &
>     (key.outer_eth_daddr));
>             key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
>             key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
>     -       key.outer_src_port = udp_hdr->src_port;
>     +       /* Note: It is unnecessary to save outer_src_port here because it
>     can
>     +        * be different for VxLAN UDP fragments from the same flow.
>     +        */
>             key.outer_dst_port = udp_hdr->dst_port;
> 
>             /* Search for a matched flow. */
> 
>      
> 
>      
> 
>      
> 
>     
>     At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
> 
>     >Replies are inline.
> 
>     > 
> 
>     >BTW, you need to update the programmer guide
> 
>     >doc/guides/prog_guide/generic_receive_offload_lib.rst and
> 
>     >release note doc/guides/rel_notes/release_20_11.rst.
> 
>     > 
> 
>     >Thanks,
> 
>     >Jiayu
> 
>     > 
> 
>     >On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:
> 
>     >> Jiayu, thank you so much, please check my replies for some of your comments,
> 
>     >> here is incremental patch. I built it by meson this time :-)
> 
>     >> 
> 
>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> index 9e9df72..1fcfaf1 100644
> 
>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> @@ -203,7 +203,7 @@
> 
>     >>  }
> 
>     >> 
> 
>     >>  static inline int
> 
>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> 
>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> 
>     >>                 uint16_t frag_offset,
> 
>     >>                 uint16_t ip_dl)
> 
>     >>  {
> 
>     >> @@ -213,7 +213,7 @@
> 
>     >>         int ret = 0;
> 
>     >> 
> 
>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> 
>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> 
>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> 
>     >>                                         l2_offset);
> 
>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and
> 
>     >>          * don't compare it.
> 
>     >> @@ -379,7 +379,7 @@
> 
>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
> 
>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
> 
>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> 
>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
> 
>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
> 
>     >>                         return -1;
> 
>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==
> 
>     >>                                 INVALID_ARRAY_INDEX) {
> 
>     >> @@ -397,7 +397,7 @@
> 
>     >>         cur_idx = tbl->flows[i].start_index;
> 
>     >>         prev_idx = cur_idx;
> 
>     >>         do {
> 
>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
> 
>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
> 
>     >>                                 frag_offset, ip_dl);
> 
>     >>                 if (cmp) {
> 
>     >>                         if (merge_two_vxlan_udp4_packets(
> 
>     >> @@ -436,7 +436,7 @@
> 
>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
> 
>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
> 
>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> 
>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
> 
>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
> 
>     >>                         return -1;
> 
>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
> 
>     >>                 tbl->flows[i].start_index = item_idx;
> 
>     >> @@ -470,7 +470,7 @@
> 
>     >>                 ip_dl = pkt->pkt_len - hdr_len;
> 
>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> index 9e9df72..1fcfaf1 100644
> 
>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> @@ -203,7 +203,7 @@
> 
>     >>  }
> 
>     >> 
> 
>     >>  static inline int
> 
>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> 
>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> 
>     >>                 uint16_t frag_offset,
> 
>     >>                 uint16_t ip_dl)
> 
>     >>  {
> 
>     >> @@ -213,7 +213,7 @@
> 
>     >>         int ret = 0;
> 
>     >> 
> 
>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> 
>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> 
>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> 
>     >>                                         l2_offset);
> 
>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and
> 
>     >>          * don't compare it.
> 
>     >> @@ -379,7 +379,7 @@
> 
>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
> 
>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
> 
>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> 
>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
> 
>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
> 
>     >>                         return -1;
> 
>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==
> 
>     >>                                 INVALID_ARRAY_INDEX) {
> 
>     >> @@ -397,7 +397,7 @@
> 
>     >>         cur_idx = tbl->flows[i].start_index;
> 
>     >>         prev_idx = cur_idx;
> 
>     >>         do {
> 
>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
> 
>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
> 
>     >>                                 frag_offset, ip_dl);
> 
>     >>                 if (cmp) {
> 
>     >>                         if (merge_two_vxlan_udp4_packets(
> 
>     >> @@ -436,7 +436,7 @@
> 
>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
> 
>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
> 
>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
> 
>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
> 
>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
> 
>     >>                         return -1;
> 
>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
> 
>     >>                 tbl->flows[i].start_index = item_idx;
> 
>     >> @@ -470,7 +470,7 @@
> 
>     >>                 ip_dl = pkt->pkt_len - hdr_len;
> 
>     >>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;
> 
>     >>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
> 
>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
> 
>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
> 
>     >>                                         frag_offset, ip_dl);
> 
>     >>                 if (cmp) {
> 
>     >>                         if (merge_two_vxlan_udp4_packets(
> 
>     >> @@ -481,12 +481,10 @@
> 
>     >>                                                         INVALID_ARRAY_INDEX);
> 
>     >>                                 tbl->items[start_idx].inner_item.next_pkt_idx
> 
>     >>                                         = item_idx;
> 
>     >> -                       } else {
> 
>     >> +                       } else
> 
>     >>                                 return 0;
> 
>     >> -                       }
> 
>     >> -               } else {
> 
>     >> +               } else
> 
>     >>                         return 0;
> 
>     >> -               }
> 
>     >>         }
> 
>     >> 
> 
>     >>         return 0;
> 
>     >> 
> 
>     >> 
> 
>     >> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
> 
>     >> >Some comments are inline.
> 
>     >> >
> 
>     >> >Thanks,
> 
>     >> >Jiayu
> 
>     >> >
> 
>     >> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com wrote:
> 
>     >> >> From: Yi Yang <yangyi01@inspur.com>
> 
>     >> >>
> 
>     >> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance
> 
>     >> >> when VM is enabled UFO or GSO, GRO must be supported if
> 
>     >> >> GSO or UFO is enabled, otherwise, performance gain will
> 
>     >> >> be hurt.
> 
>     >> >>
> 
>     >> >> With this enabled in DPDK, OVS DPDK can leverage it to
> 
>     >> >> improve VM-to-VM UDP performance, this will make sure
> 
>     >> >> IP fragments will be reassembled once it is received
> 
>     >> >> from physical NIC. It is very helpful in OVS DPDK VXLAN
> 
>     >> >> TSO case.
> 
>     >> >>
> 
>     >> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
> 
>     >> >> ---
> 
>     >> >>  lib/librte_gro/gro_udp4.h       |   1 +
> 
>     >> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
> 
>     >> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
> 
>     >> >>  lib/librte_gro/meson.build      |   2 +-
> 
>     >> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
> 
>     >> >>  lib/librte_gro/rte_gro.h        |   3 +
> 
>     >> >>  6 files changed, 794 insertions(+), 27 deletions(-)
> 
>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
> 
>     >> >>
> 
>     >> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
> 
>     >> >> index 0a078e4..d38b393 100644
> 
>     >> >> --- a/lib/librte_gro/gro_udp4.h
> 
>     >> >> +++ b/lib/librte_gro/gro_udp4.h
> 
>     >> >> @@ -7,6 +7,7 @@
> 
>     >> >>
> 
>     >> >>  #include <rte_ip.h>
> 
>     >> >>  #include <rte_udp.h>
> 
>     >> >> +#include <rte_vxlan.h>
> 
>     >> >>
> 
>     >> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
> 
>     >> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
> 
>     >> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> >> new file mode 100644
> 
>     >> >> index 0000000..9e9df72
> 
>     >> >> --- /dev/null
> 
>     >> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
> 
>     >> >> @@ -0,0 +1,548 @@
> 
>     >> >> +/* SPDX-License-Identifier: BSD-3-Clause
> 
>     >> >> + * Copyright(c) 2020 Inspur Corporation
> 
>     >> >> + */
> 
>     >> >> +
> 
>     >> >> +
> 
>     >> >> +
> 
>     >> >> +static inline uint32_t
> 
>     >> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,
> 
>     >> >> +            uint32_t item_idx,
> 
>     >> >> +            uint32_t prev_item_idx)
> 
>     >> >> +{
> 
>     >> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
> 
>     >> >> +
> 
>     >> >> +    /* NULL indicates an empty item. */
> 
>     >> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;
> 
>     >> >> +    tbl->item_num--;
> 
>     >> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)
> 
>     >> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
> 
>     >> >> +
> 
>     >> >> +    return next_idx;
> 
>     >> >> +}
> 
>     >> >> +
> 
>     >> >> +static inline uint32_t
> 
>     >> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
> 
>     >> >> +            struct vxlan_udp4_flow_key *src,
> 
>     >> >> +            uint32_t item_idx)
> 
>     >> >> +{
> 
>     >> >> +    struct vxlan_udp4_flow_key *dst;
> 
>     >> >> +    uint32_t flow_idx;
> 
>     >> >> +
> 
>     >> >> +    flow_idx = find_an_empty_flow(tbl);
> 
>     >> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
> 
>     >> >> +            return INVALID_ARRAY_INDEX;
> 
>     >> >> +
> 
>     >> >> +    dst = &(tbl->flows[flow_idx].key);
> 
>     >> >> +
> 
>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),
> 
>     >> >> +                    &(dst->inner_key.eth_saddr));
> 
>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),
> 
>     >> >> +                    &(dst->inner_key.eth_daddr));
> 
>     >> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
> 
>     >> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
> 
>     >> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;
> 
>     >> >> +
> 
>     >> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
> 
>     >> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
> 
>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
> 
>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
> 
>     >> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;
> 
>     >> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
> 
>     >> >> +    dst->outer_src_port = src->outer_src_port;
> 
>     >> >> +    dst->outer_dst_port = src->outer_dst_port;
> 
>     >> >> +
> 
>     >> >> +    tbl->flows[flow_idx].start_index = item_idx;
> 
>     >> >> +    tbl->flow_num++;
> 
>     >> >> +
> 
>     >> >> +    return flow_idx;
> 
>     >> >> +}
> 
>     >> >> +
> 
>     >> >> +static inline int
> 
>     >> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
> 
>     >> >> +            struct vxlan_udp4_flow_key k2)
> 
>     >> >> +{
> 
>     >> >> +    /* src port is changing, so shouldn't use it for flow check */
> 
>     >> >
> 
>     >> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source
> 
>     >> >port. In what cases outer source port will change?
> 
>     >> 
> 
>     >> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is
> 
>     >> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,
> 
>     >> protocol type).
> 
>     >> 
> 
>     >> ovs/lib/netdev-native-tnl.h:
> 
>     >> 
> 
>     >> static inline uint32_t *
> 
>     >> dp_packet_rss_ptr(const struct dp_packet *b)
> 
>     >> {
> 
>     >>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
> 
>     >> }
> 
>     >> 
> 
>     >> static inline uint32_t
> 
>     >> dp_packet_get_rss_hash(const struct dp_packet *p)
> 
>     >> {
> 
>     >>     return *dp_packet_rss_ptr(p);
> 
>     >> }
> 
>     >> 
> 
>     >> static inline ovs_be16
> 
>     >> netdev_tnl_get_src_port(struct dp_packet *packet)
> 
>     >> {
> 
>     >>     uint32_t hash;
> 
>     >> 
> 
>     >>     hash = dp_packet_get_rss_hash(packet);
> 
>     >> 
> 
>     >>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
> 
>     >>                  tnl_udp_port_min);
> 
>     >> }
> 
>     >> 
> 
>     >> void
> 
>     >> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
> 
>     >>                            struct dp_packet *packet,
> 
>     >>                            const struct ovs_action_push_tnl *data)
> 
>     >> {
> 
>     >>     struct udp_header *udp;
> 
>     >>     int ip_tot_size;
> 
>     >> 
> 
>     >>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
> 
>     >> 
> 
>     >>     /* set udp src port */
> 
>     >>     udp->udp_src = netdev_tnl_get_src_port(packet);
> 
>     >>     udp->udp_len = htons(ip_tot_size);
> 
>     >> 
> 
>     >>     if (udp->udp_csum) {
> 
>     >>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
> 
>     >>     }
> 
>     >> }
> 
>     >> 
> 
>     >> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The
> 
>     >> first fragment of original inner UDP is considered as different flow from other fragments.
> 
>     >> 
> 
>     >> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.
> 
>     >> 
> 
>     > 
> 
>     >You are right. I checked VxLAN RFC 7348, and outer source port is
> 
>     >calculated from inner packet. In OVS case, the first fragment
> 
>     >will have different outer source port compared with other fragments.
> 
>     >Please add some comments in the code for people to better understand
> 
>     >the code. In addition, need to remove outer_src_port from the structure
> 
>     >vxlan_udp4_flow_key.
> 
>     > 
> 
>     >> >
> 
>     >> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
> 
>     >> >> +                                    &k2.outer_eth_saddr) &&
> 
>     >> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,
> 
>     >> >> +                            &k2.outer_eth_daddr) &&
> 
>     >> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
> 
>     >> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
> 
>     >> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&
> 
>     >> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
> 
>     >> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
> 
>     >> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));
> 
>     >> >> +}
> 
>     >> >> +
> 
>     >> >> +static inline int
> 
>     >> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
> 
>     >> >> +            uint16_t frag_offset,
> 
>     >> >> +            uint16_t ip_dl)
> 
>     >> >
> 
>     >> >Better rename it as udp4_check_vxlan_neighbor.
> 
>     >> 
> 
>     >> Ok
> 
>     >> 
> 
>     >> >
> 
>     >> >> +{
> 
>     >> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;
> 
>     >> >> +    int cmp;
> 
>     >> >> +    uint16_t l2_offset;
> 
>     >> >> +    int ret = 0;
> 
>     >> >> +
> 
>     >> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
> 
>     >> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
> 
>     >> >> +                                    l2_offset);
> 
>     >> >
> 
>     >> >Why don't check outer IP ID? If outer DF bit is not set, all fragments
> 
>     >> >should have incremental outer IP ID; if DF bit is set, we don't need
> 
>     >> >to check outer IP ID.
> 
>     >> 
> 
>     >> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check
> 
>     >> is enough to differentiate if they are same flow, isn't it?
> 
>     > 
> 
>     >OVS is one of workloads using DPDK, and we can assume
> 
>     >other cases have the same design as OVS. I think IP ID
> 
>     >check is necessary.
> 
>     > 
> 
>     >> 
> 
>     >> >
> 
>     >> >In addition, udp_check_neighbor is renamed, but you didn't change here.
> 
>     >> 
> 
>     >> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will
> 
>     >> use meson build to check it for next version.
> 
>     >> 
> 
>     >> >
> 
>     >> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and
> 
>     >> >> +     * don't compare it.
> 
>     >> >> +     */
> 
>     >> >> +    if (cmp > 0)
> 
>     >> >> +            /* Append the new packet. */
> 
>     >> >> +            ret = 1;
> 
>     >> >> +    else if (cmp < 0)
> 
>     >> >> +            /* Prepend the new packet. */
> 
>     >> >> +            ret = -1;
> 
>     >> >> +
> 
>     >> >> +    return ret;
> 
>     >> >> +}
> 
>     >> >> +
> 
>     >> >>
> 
>     >> >>  /*
> 
>     >> >>   * GRO context structure. It keeps the table structures, which are
> 
>     >> >> @@ -137,19 +148,27 @@ struct gro_ctx {
> 
>     >> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
> 
>     >> >>
> 
>     >> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
> 
>     >> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
> 
>     >> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> 
>     >> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> 
>     >> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
> 
>     >> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> 
>     >> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> 
>     >> >> +                    = {{{0}, 0, 0} };
> 
>     >> >> +
> 
>     >> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
> 
>     >> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
> 
>     >> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
> 
>     >> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
> 
>     >> >>                      = {{{0}, 0, 0} };
> 
>     >> >>
> 
>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
> 
>     >> >>      uint32_t item_num;
> 
>     >> >>      int32_t ret;
> 
>     >> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
> 
>     >> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
> 
>     >> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
> 
>     >> >> +            do_vxlan_udp_gro = 0;
> 
>     >> >>
> 
>     >> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> 
>     >> >>                                      RTE_GRO_TCP_IPV4 |
> 
>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
> 
>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))
> 
>     >> >>              return nb_pkts;
> 
>     >> >>
> 
>     >> >> @@ -160,15 +179,28 @@ struct gro_ctx {
> 
>     >> >>
> 
>     >> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
> 
>     >> >>              for (i = 0; i < item_num; i++)
> 
>     >> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
> 
>     >> >> -
> 
>     >> >> -            vxlan_tbl.flows = vxlan_flows;
> 
>     >> >> -            vxlan_tbl.items = vxlan_items;
> 
>     >> >> -            vxlan_tbl.flow_num = 0;
> 
>     >> >> -            vxlan_tbl.item_num = 0;
> 
>     >> >> -            vxlan_tbl.max_flow_num = item_num;
> 
>     >> >> -            vxlan_tbl.max_item_num = item_num;
> 
>     >> >> -            do_vxlan_gro = 1;
> 
>     >> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
> 
>     >> >> +
> 
>     >> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
> 
>     >> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
> 
>     >> >> +            vxlan_tcp_tbl.flow_num = 0;
> 
>     >> >> +            vxlan_tcp_tbl.item_num = 0;
> 
>     >> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
> 
>     >> >> +            vxlan_tcp_tbl.max_item_num = item_num;
> 
>     >> >> +            do_vxlan_tcp_gro = 1;
> 
>     >> >> +    }
> 
>     >> >> +
> 
>     >> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
> 
>     >> >> +            for (i = 0; i < item_num; i++)
> 
>     >> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
> 
>     >> >> +
> 
>     >> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
> 
>     >> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
> 
>     >> >> +            vxlan_udp_tbl.flow_num = 0;
> 
>     >> >> +            vxlan_udp_tbl.item_num = 0;
> 
>     >> >> +            vxlan_udp_tbl.max_flow_num = item_num;
> 
>     >> >> +            vxlan_udp_tbl.max_item_num = item_num;
> 
>     >> >> +            do_vxlan_udp_gro = 1;
> 
>     >> >>      }
> 
>     >> >>
> 
>     >> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
> 
>     >> >> @@ -204,9 +236,18 @@ struct gro_ctx {
> 
>     >> >>               * will be flushed from the tables.
> 
>     >> >>               */
> 
>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> 
>     >> >> -                            do_vxlan_gro) {
> 
>     >> >> +                            do_vxlan_tcp_gro) {
> 
>     >> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
> 
>     >> >> -                                                    &vxlan_tbl, 0);
> 
>     >> >> +                                                    &vxlan_tcp_tbl, 0);
> 
>     >> >> +                    if (ret > 0)
> 
>     >> >> +                            /* Merge successfully */
> 
>     >> >> +                            nb_after_gro--;
> 
>     >> >> +                    else if (ret < 0)
> 
>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
> 
>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> 
>     >> >> +                            do_vxlan_udp_gro) {
> 
>     >> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
> 
>     >> >> +                                                    &vxlan_udp_tbl, 0);
> 
>     >> >>                      if (ret > 0)
> 
>     >> >>                              /* Merge successfully */
> 
>     >> >>                              nb_after_gro--;
> 
>     >> >> @@ -236,11 +277,17 @@ struct gro_ctx {
> 
>     >> >>               || (unprocess_num < nb_pkts)) {
> 
>     >> >>              i = 0;
> 
>     >> >>              /* Flush all packets from the tables */
> 
>     >> >> -            if (do_vxlan_gro) {
> 
>     >> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
> 
>     >> >> +            if (do_vxlan_tcp_gro) {
> 
>     >> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
> 
>     >> >>                                      0, pkts, nb_pkts);
> 
>     >> >>              }
> 
>     >> >>
> 
>     >> >> +            if (do_vxlan_udp_gro) {
> 
>     >> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
> 
>     >> >> +                                    0, &pkts[i], nb_pkts - i);
> 
>     >> >> +
> 
>     >> >> +            }
> 
>     >> >> +
> 
>     >> >>              if (do_tcp4_gro) {
> 
>     >> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
> 
>     >> >>                                      &pkts[i], nb_pkts - i);
> 
>     >> >> @@ -269,33 +316,42 @@ struct gro_ctx {
> 
>     >> >>  {
> 
>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
> 
>     >> >>      struct gro_ctx *gro_ctx = ctx;
> 
>     >> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
> 
>     >> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
> 
>     >> >>      uint64_t current_time;
> 
>     >> >>      uint16_t i, unprocess_num = 0;
> 
>     >> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
> 
>     >> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
> 
>     >> >>
> 
>     >> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
> 
>     >> >>                                      RTE_GRO_TCP_IPV4 |
> 
>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
> 
>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))
> 
>     >> >>              return nb_pkts;
> 
>     >> >>
> 
>     >> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
> 
>     >> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> 
>     >> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
> 
>     >> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
> 
>     >> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
> 
>     >> >>
> 
>     >> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
> 
>     >> >>              RTE_GRO_TCP_IPV4;
> 
>     >> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
> 
>     >> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
> 
>     >> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
> 
>     >> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
> 
>     >> >>              RTE_GRO_UDP_IPV4;
> 
>     >> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
> 
>     >> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
> 
>     >> >>
> 
>     >> >>      current_time = rte_rdtsc();
> 
>     >> >>
> 
>     >> >>      for (i = 0; i < nb_pkts; i++) {
> 
>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
> 
>     >> >> -                            do_vxlan_gro) {
> 
>     >> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
> 
>     >> >> +                            do_vxlan_tcp_gro) {
> 
>     >> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
> 
>     >> >> +                                            current_time) < 0)
> 
>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
> 
>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
> 
>     >> >> +                            do_vxlan_udp_gro) {
> 
>     >> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
> 
>     >> >>                                              current_time) < 0)
> 
>     >> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
> 
>     >> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
> 
>     >> >> @@ -341,6 +397,13 @@ struct gro_ctx {
> 
>     >> >>              left_nb_out = max_nb_out - num;
> 
>     >> >>      }
> 
>     >> >>
> 
>     >> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
> 
>     >> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
> 
>     >> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
> 
>     >> >> +                            flush_timestamp, &out[num], left_nb_out);
> 
>     >> >> +            left_nb_out = max_nb_out - num;
> 
>     >> >
> 
>     >> >Don't need to calculate left_nb_out here. In the previous patch, you
> 
>     >> >also calculate it for UDP GRO. Please remove it both.
> 
>     >> 
> 
>     >> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.
> 
>     > 
> 
>     >I don't think so. It's unnecessary for current design.
> 
>     >If others want to add new GRO types, they will write
> 
>     >correct code.
> 
>     > 
> 
>     >> 
> 
>     >> >
> 
>     >> 
> 
>     >> 
> 
>     >> 
> 
>     >>  
> 
>     >> 
> 
>      
> 
>      
> 
> 
> 
>  
>
yang_y_yi Sept. 16, 2020, 3:05 a.m. UTC | #7
No, next_proto_id of inner IP header can clearly identify it is a UDP packet even if it is a no-udp-header ip fragment.





















At 2020-09-16 10:54:24, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>On Mon, Sep 14, 2020 at 05:14:59PM +0800, yang_y_yi wrote:
>> 
>> Jiayu, VM to VM case for big UDP packet (say udp parload size is 8192K, but mtu
>> 1500). VM will segment it as UDP framgments, but when ovs dpdk uses VxLan to
>> encapsulate them, it doesn't know they are same flow, OVS DPDK has no knowldge
>> about them, just encapsulate them as if they are not related. But on receive
>> side on another machine, OVS DPDK will GRO them, at this point, outer IP ID is
>> even random for every packet. For TCP, I think same issue is there, GRO just
>> considered them as different flow and doesn't do GRO, in my opinion, we also
>> should do GRO for such case.
>
>I am a little confused about ovs behavior. Does ovs know they are udp fragments?
>Or it knows they are fragments but doesn't know they are from one flow?
>Given ovs receives 3 udp fragments of a udp packet, if ovs doesn't know they are
>fragments of a udp packet, their mbuf->packet_type will be ether/ipv4/udp,
>ether/ipv4, and ether/ipv4. dpdk udp gro will not process the second and third
>one as their packet type in mbuf is not ether/ipv4/udp.
>
>> 
>> 
>>    2020-09-14 16:50:08  "Hu, Jiayu" <jiayu.hu@intel.com> д    
>> 
>>     Out IP ID is not used to check if they belong to the same flow,
>> 
>>     but check if they are neighbors. If two vxlan/udp fragments are
>> 
>>     neighbors, their outer IP ID and outer frag_oft should be incremental.
>> 
>>     I still cannot understand why ignore outer IP ID when DF is 0.
>> 
>>     Can you give more explanation?
>> 
>>      
>> 
>>     From: yang_y_yi <yang_y_yi@163.com>
>>     Sent: Monday, September 14, 2020 4:27 PM
>>     To: Hu, Jiayu <jiayu.hu@intel.com>
>>     Cc: thomas@monjalon.net; yangyi01@inspur.com; dev@dpdk.org
>>     Subject: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support
>>     Importance: High
>> 
>>      
>> 
>>     For outer_ip_id, there is same ip_id disorder issue existing as UDP GRO, so
>>     we can't use ip_id +/-1 to check if they are same flow, here is my
>>     incrmental change for it with more comments.
>> 
>>      
>> 
>>     @@ -189,7 +188,12 @@
>>      is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>>                     struct vxlan_udp4_flow_key k2)
>>      {
>>     -       /* src port is changing, so shouldn't use it for flow check */
>>     +       /* For VxLAN packet, outer udp src port is calculated from
>>     +        * inner packet RSS hash, udp src port of the first UDP
>>     +        * fragment is different from one of other UDP fragments
>>     +        * even if they are same flow, so we have to skip outer udp
>>     +        * src port comparison here.
>>     +        */
>>             return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>>                                             &k2.outer_eth_saddr) &&
>>                             rte_is_same_ether_addr(&k1.outer_eth_daddr,
>>     @@ -212,12 +216,16 @@
>>             uint16_t l2_offset;
>>             int ret = 0;
>> 
>>     +       /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
>>     +        * we needn't compare outer_ip_id because they are same,
>>     +        * for the case outer_is_atomic is 1, we also have no way
>>     +        * to comapre outer_ip_id because the received packets can
>>     +        * be out of order. So ignore outer_ip_id comparison here.
>>     +        */
>>     +
>>             l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>>             cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                             l2_offset);
>>     -       /* VXLAN outer IP ID is out of order, so don't touch it and
>>     -        * don't compare it.
>>     -        */
>>             if (cmp > 0)
>>                     /* Append the new packet. */
>>                     ret = 1;
>>     @@ -354,7 +362,9 @@
>>             rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &
>>     (key.outer_eth_daddr));
>>             key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
>>             key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
>>     -       key.outer_src_port = udp_hdr->src_port;
>>     +       /* Note: It is unnecessary to save outer_src_port here because it
>>     can
>>     +        * be different for VxLAN UDP fragments from the same flow.
>>     +        */
>>             key.outer_dst_port = udp_hdr->dst_port;
>> 
>>             /* Search for a matched flow. */
>> 
>>      
>> 
>>      
>> 
>>      
>> 
>>     
>>     At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> 
>>     >Replies are inline.
>> 
>>     > 
>> 
>>     >BTW, you need to update the programmer guide
>> 
>>     >doc/guides/prog_guide/generic_receive_offload_lib.rst and
>> 
>>     >release note doc/guides/rel_notes/release_20_11.rst.
>> 
>>     > 
>> 
>>     >Thanks,
>> 
>>     >Jiayu
>> 
>>     > 
>> 
>>     >On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:
>> 
>>     >> Jiayu, thank you so much, please check my replies for some of your comments,
>> 
>>     >> here is incremental patch. I built it by meson this time :-)
>> 
>>     >> 
>> 
>>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> index 9e9df72..1fcfaf1 100644
>> 
>>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> @@ -203,7 +203,7 @@
>> 
>>     >>  }
>> 
>>     >> 
>> 
>>     >>  static inline int
>> 
>>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >>                 uint16_t frag_offset,
>> 
>>     >>                 uint16_t ip_dl)
>> 
>>     >>  {
>> 
>>     >> @@ -213,7 +213,7 @@
>> 
>>     >>         int ret = 0;
>> 
>>     >> 
>> 
>>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> 
>>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >>                                         l2_offset);
>> 
>>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and
>> 
>>     >>          * don't compare it.
>> 
>>     >> @@ -379,7 +379,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==
>> 
>>     >>                                 INVALID_ARRAY_INDEX) {
>> 
>>     >> @@ -397,7 +397,7 @@
>> 
>>     >>         cur_idx = tbl->flows[i].start_index;
>> 
>>     >>         prev_idx = cur_idx;
>> 
>>     >>         do {
>> 
>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >>                                 frag_offset, ip_dl);
>> 
>>     >>                 if (cmp) {
>> 
>>     >>                         if (merge_two_vxlan_udp4_packets(
>> 
>>     >> @@ -436,7 +436,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>> 
>>     >>                 tbl->flows[i].start_index = item_idx;
>> 
>>     >> @@ -470,7 +470,7 @@
>> 
>>     >>                 ip_dl = pkt->pkt_len - hdr_len;
>> 
>>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> index 9e9df72..1fcfaf1 100644
>> 
>>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> @@ -203,7 +203,7 @@
>> 
>>     >>  }
>> 
>>     >> 
>> 
>>     >>  static inline int
>> 
>>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >>                 uint16_t frag_offset,
>> 
>>     >>                 uint16_t ip_dl)
>> 
>>     >>  {
>> 
>>     >> @@ -213,7 +213,7 @@
>> 
>>     >>         int ret = 0;
>> 
>>     >> 
>> 
>>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> 
>>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >>                                         l2_offset);
>> 
>>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and
>> 
>>     >>          * don't compare it.
>> 
>>     >> @@ -379,7 +379,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==
>> 
>>     >>                                 INVALID_ARRAY_INDEX) {
>> 
>>     >> @@ -397,7 +397,7 @@
>> 
>>     >>         cur_idx = tbl->flows[i].start_index;
>> 
>>     >>         prev_idx = cur_idx;
>> 
>>     >>         do {
>> 
>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >>                                 frag_offset, ip_dl);
>> 
>>     >>                 if (cmp) {
>> 
>>     >>                         if (merge_two_vxlan_udp4_packets(
>> 
>>     >> @@ -436,7 +436,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>> 
>>     >>                 tbl->flows[i].start_index = item_idx;
>> 
>>     >> @@ -470,7 +470,7 @@
>> 
>>     >>                 ip_dl = pkt->pkt_len - hdr_len;
>> 
>>     >>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;
>> 
>>     >>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
>> 
>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
>> 
>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
>> 
>>     >>                                         frag_offset, ip_dl);
>> 
>>     >>                 if (cmp) {
>> 
>>     >>                         if (merge_two_vxlan_udp4_packets(
>> 
>>     >> @@ -481,12 +481,10 @@
>> 
>>     >>                                                         INVALID_ARRAY_INDEX);
>> 
>>     >>                                 tbl->items[start_idx].inner_item.next_pkt_idx
>> 
>>     >>                                         = item_idx;
>> 
>>     >> -                       } else {
>> 
>>     >> +                       } else
>> 
>>     >>                                 return 0;
>> 
>>     >> -                       }
>> 
>>     >> -               } else {
>> 
>>     >> +               } else
>> 
>>     >>                         return 0;
>> 
>>     >> -               }
>> 
>>     >>         }
>> 
>>     >> 
>> 
>>     >>         return 0;
>> 
>>     >> 
>> 
>>     >> 
>> 
>>     >> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> 
>>     >> >Some comments are inline.
>> 
>>     >> >
>> 
>>     >> >Thanks,
>> 
>>     >> >Jiayu
>> 
>>     >> >
>> 
>>     >> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com wrote:
>> 
>>     >> >> From: Yi Yang <yangyi01@inspur.com>
>> 
>>     >> >>
>> 
>>     >> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance
>> 
>>     >> >> when VM is enabled UFO or GSO, GRO must be supported if
>> 
>>     >> >> GSO or UFO is enabled, otherwise, performance gain will
>> 
>>     >> >> be hurt.
>> 
>>     >> >>
>> 
>>     >> >> With this enabled in DPDK, OVS DPDK can leverage it to
>> 
>>     >> >> improve VM-to-VM UDP performance, this will make sure
>> 
>>     >> >> IP fragments will be reassembled once it is received
>> 
>>     >> >> from physical NIC. It is very helpful in OVS DPDK VXLAN
>> 
>>     >> >> TSO case.
>> 
>>     >> >>
>> 
>>     >> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>> 
>>     >> >> ---
>> 
>>     >> >>  lib/librte_gro/gro_udp4.h       |   1 +
>> 
>>     >> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
>> 
>>     >> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
>> 
>>     >> >>  lib/librte_gro/meson.build      |   2 +-
>> 
>>     >> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>> 
>>     >> >>  lib/librte_gro/rte_gro.h        |   3 +
>> 
>>     >> >>  6 files changed, 794 insertions(+), 27 deletions(-)
>> 
>>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>> 
>>     >> >>
>> 
>>     >> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>> 
>>     >> >> index 0a078e4..d38b393 100644
>> 
>>     >> >> --- a/lib/librte_gro/gro_udp4.h
>> 
>>     >> >> +++ b/lib/librte_gro/gro_udp4.h
>> 
>>     >> >> @@ -7,6 +7,7 @@
>> 
>>     >> >>
>> 
>>     >> >>  #include <rte_ip.h>
>> 
>>     >> >>  #include <rte_udp.h>
>> 
>>     >> >> +#include <rte_vxlan.h>
>> 
>>     >> >>
>> 
>>     >> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>> 
>>     >> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> 
>>     >> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> >> new file mode 100644
>> 
>>     >> >> index 0000000..9e9df72
>> 
>>     >> >> --- /dev/null
>> 
>>     >> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> >> @@ -0,0 +1,548 @@
>> 
>>     >> >> +/* SPDX-License-Identifier: BSD-3-Clause
>> 
>>     >> >> + * Copyright(c) 2020 Inspur Corporation
>> 
>>     >> >> + */
>> 
>>     >> >> +
>> 
>>     >> >> +
>> 
>>     >> >> +
>> 
>>     >> >> +static inline uint32_t
>> 
>>     >> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,
>> 
>>     >> >> +            uint32_t item_idx,
>> 
>>     >> >> +            uint32_t prev_item_idx)
>> 
>>     >> >> +{
>> 
>>     >> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
>> 
>>     >> >> +
>> 
>>     >> >> +    /* NULL indicates an empty item. */
>> 
>>     >> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;
>> 
>>     >> >> +    tbl->item_num--;
>> 
>>     >> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)
>> 
>>     >> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
>> 
>>     >> >> +
>> 
>>     >> >> +    return next_idx;
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >> +static inline uint32_t
>> 
>>     >> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
>> 
>>     >> >> +            struct vxlan_udp4_flow_key *src,
>> 
>>     >> >> +            uint32_t item_idx)
>> 
>>     >> >> +{
>> 
>>     >> >> +    struct vxlan_udp4_flow_key *dst;
>> 
>>     >> >> +    uint32_t flow_idx;
>> 
>>     >> >> +
>> 
>>     >> >> +    flow_idx = find_an_empty_flow(tbl);
>> 
>>     >> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
>> 
>>     >> >> +            return INVALID_ARRAY_INDEX;
>> 
>>     >> >> +
>> 
>>     >> >> +    dst = &(tbl->flows[flow_idx].key);
>> 
>>     >> >> +
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),
>> 
>>     >> >> +                    &(dst->inner_key.eth_saddr));
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),
>> 
>>     >> >> +                    &(dst->inner_key.eth_daddr));
>> 
>>     >> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
>> 
>>     >> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
>> 
>>     >> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;
>> 
>>     >> >> +
>> 
>>     >> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
>> 
>>     >> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
>> 
>>     >> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;
>> 
>>     >> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
>> 
>>     >> >> +    dst->outer_src_port = src->outer_src_port;
>> 
>>     >> >> +    dst->outer_dst_port = src->outer_dst_port;
>> 
>>     >> >> +
>> 
>>     >> >> +    tbl->flows[flow_idx].start_index = item_idx;
>> 
>>     >> >> +    tbl->flow_num++;
>> 
>>     >> >> +
>> 
>>     >> >> +    return flow_idx;
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >> +static inline int
>> 
>>     >> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>> 
>>     >> >> +            struct vxlan_udp4_flow_key k2)
>> 
>>     >> >> +{
>> 
>>     >> >> +    /* src port is changing, so shouldn't use it for flow check */
>> 
>>     >> >
>> 
>>     >> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source
>> 
>>     >> >port. In what cases outer source port will change?
>> 
>>     >> 
>> 
>>     >> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is
>> 
>>     >> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,
>> 
>>     >> protocol type).
>> 
>>     >> 
>> 
>>     >> ovs/lib/netdev-native-tnl.h:
>> 
>>     >> 
>> 
>>     >> static inline uint32_t *
>> 
>>     >> dp_packet_rss_ptr(const struct dp_packet *b)
>> 
>>     >> {
>> 
>>     >>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> static inline uint32_t
>> 
>>     >> dp_packet_get_rss_hash(const struct dp_packet *p)
>> 
>>     >> {
>> 
>>     >>     return *dp_packet_rss_ptr(p);
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> static inline ovs_be16
>> 
>>     >> netdev_tnl_get_src_port(struct dp_packet *packet)
>> 
>>     >> {
>> 
>>     >>     uint32_t hash;
>> 
>>     >> 
>> 
>>     >>     hash = dp_packet_get_rss_hash(packet);
>> 
>>     >> 
>> 
>>     >>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
>> 
>>     >>                  tnl_udp_port_min);
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> void
>> 
>>     >> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
>> 
>>     >>                            struct dp_packet *packet,
>> 
>>     >>                            const struct ovs_action_push_tnl *data)
>> 
>>     >> {
>> 
>>     >>     struct udp_header *udp;
>> 
>>     >>     int ip_tot_size;
>> 
>>     >> 
>> 
>>     >>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
>> 
>>     >> 
>> 
>>     >>     /* set udp src port */
>> 
>>     >>     udp->udp_src = netdev_tnl_get_src_port(packet);
>> 
>>     >>     udp->udp_len = htons(ip_tot_size);
>> 
>>     >> 
>> 
>>     >>     if (udp->udp_csum) {
>> 
>>     >>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
>> 
>>     >>     }
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The
>> 
>>     >> first fragment of original inner UDP is considered as different flow from other fragments.
>> 
>>     >> 
>> 
>>     >> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.
>> 
>>     >> 
>> 
>>     > 
>> 
>>     >You are right. I checked VxLAN RFC 7348, and outer source port is
>> 
>>     >calculated from inner packet. In OVS case, the first fragment
>> 
>>     >will have different outer source port compared with other fragments.
>> 
>>     >Please add some comments in the code for people to better understand
>> 
>>     >the code. In addition, need to remove outer_src_port from the structure
>> 
>>     >vxlan_udp4_flow_key.
>> 
>>     > 
>> 
>>     >> >
>> 
>>     >> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>> 
>>     >> >> +                                    &k2.outer_eth_saddr) &&
>> 
>>     >> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,
>> 
>>     >> >> +                            &k2.outer_eth_daddr) &&
>> 
>>     >> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
>> 
>>     >> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
>> 
>>     >> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&
>> 
>>     >> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
>> 
>>     >> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
>> 
>>     >> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >> +static inline int
>> 
>>     >> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >> >> +            uint16_t frag_offset,
>> 
>>     >> >> +            uint16_t ip_dl)
>> 
>>     >> >
>> 
>>     >> >Better rename it as udp4_check_vxlan_neighbor.
>> 
>>     >> 
>> 
>>     >> Ok
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> >> +{
>> 
>>     >> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;
>> 
>>     >> >> +    int cmp;
>> 
>>     >> >> +    uint16_t l2_offset;
>> 
>>     >> >> +    int ret = 0;
>> 
>>     >> >> +
>> 
>>     >> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> 
>>     >> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >> >> +                                    l2_offset);
>> 
>>     >> >
>> 
>>     >> >Why don't check outer IP ID? If outer DF bit is not set, all fragments
>> 
>>     >> >should have incremental outer IP ID; if DF bit is set, we don't need
>> 
>>     >> >to check outer IP ID.
>> 
>>     >> 
>> 
>>     >> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check
>> 
>>     >> is enough to differentiate if they are same flow, isn't it?
>> 
>>     > 
>> 
>>     >OVS is one of workloads using DPDK, and we can assume
>> 
>>     >other cases have the same design as OVS. I think IP ID
>> 
>>     >check is necessary.
>> 
>>     > 
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> >In addition, udp_check_neighbor is renamed, but you didn't change here.
>> 
>>     >> 
>> 
>>     >> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will
>> 
>>     >> use meson build to check it for next version.
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and
>> 
>>     >> >> +     * don't compare it.
>> 
>>     >> >> +     */
>> 
>>     >> >> +    if (cmp > 0)
>> 
>>     >> >> +            /* Append the new packet. */
>> 
>>     >> >> +            ret = 1;
>> 
>>     >> >> +    else if (cmp < 0)
>> 
>>     >> >> +            /* Prepend the new packet. */
>> 
>>     >> >> +            ret = -1;
>> 
>>     >> >> +
>> 
>>     >> >> +    return ret;
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >>
>> 
>>     >> >>  /*
>> 
>>     >> >>   * GRO context structure. It keeps the table structures, which are
>> 
>>     >> >> @@ -137,19 +148,27 @@ struct gro_ctx {
>> 
>>     >> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
>> 
>>     >> >>
>> 
>>     >> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
>> 
>>     >> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
>> 
>>     >> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> 
>>     >> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> 
>>     >> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>> 
>>     >> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> 
>>     >> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> 
>>     >> >> +                    = {{{0}, 0, 0} };
>> 
>>     >> >> +
>> 
>>     >> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
>> 
>>     >> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>> 
>>     >> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> 
>>     >> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> 
>>     >> >>                      = {{{0}, 0, 0} };
>> 
>>     >> >>
>> 
>>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> 
>>     >> >>      uint32_t item_num;
>> 
>>     >> >>      int32_t ret;
>> 
>>     >> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>> 
>>     >> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>> 
>>     >> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>> 
>>     >> >> +            do_vxlan_udp_gro = 0;
>> 
>>     >> >>
>> 
>>     >> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_TCP_IPV4 |
>> 
>>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> 
>>     >> >>              return nb_pkts;
>> 
>>     >> >>
>> 
>>     >> >> @@ -160,15 +179,28 @@ struct gro_ctx {
>> 
>>     >> >>
>> 
>>     >> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>> 
>>     >> >>              for (i = 0; i < item_num; i++)
>> 
>>     >> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>> 
>>     >> >> -
>> 
>>     >> >> -            vxlan_tbl.flows = vxlan_flows;
>> 
>>     >> >> -            vxlan_tbl.items = vxlan_items;
>> 
>>     >> >> -            vxlan_tbl.flow_num = 0;
>> 
>>     >> >> -            vxlan_tbl.item_num = 0;
>> 
>>     >> >> -            vxlan_tbl.max_flow_num = item_num;
>> 
>>     >> >> -            vxlan_tbl.max_item_num = item_num;
>> 
>>     >> >> -            do_vxlan_gro = 1;
>> 
>>     >> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> 
>>     >> >> +
>> 
>>     >> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>> 
>>     >> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
>> 
>>     >> >> +            vxlan_tcp_tbl.flow_num = 0;
>> 
>>     >> >> +            vxlan_tcp_tbl.item_num = 0;
>> 
>>     >> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
>> 
>>     >> >> +            vxlan_tcp_tbl.max_item_num = item_num;
>> 
>>     >> >> +            do_vxlan_tcp_gro = 1;
>> 
>>     >> >> +    }
>> 
>>     >> >> +
>> 
>>     >> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>> 
>>     >> >> +            for (i = 0; i < item_num; i++)
>> 
>>     >> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> 
>>     >> >> +
>> 
>>     >> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
>> 
>>     >> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
>> 
>>     >> >> +            vxlan_udp_tbl.flow_num = 0;
>> 
>>     >> >> +            vxlan_udp_tbl.item_num = 0;
>> 
>>     >> >> +            vxlan_udp_tbl.max_flow_num = item_num;
>> 
>>     >> >> +            vxlan_udp_tbl.max_item_num = item_num;
>> 
>>     >> >> +            do_vxlan_udp_gro = 1;
>> 
>>     >> >>      }
>> 
>>     >> >>
>> 
>>     >> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
>> 
>>     >> >> @@ -204,9 +236,18 @@ struct gro_ctx {
>> 
>>     >> >>               * will be flushed from the tables.
>> 
>>     >> >>               */
>> 
>>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> -                            do_vxlan_gro) {
>> 
>>     >> >> +                            do_vxlan_tcp_gro) {
>> 
>>     >> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
>> 
>>     >> >> -                                                    &vxlan_tbl, 0);
>> 
>>     >> >> +                                                    &vxlan_tcp_tbl, 0);
>> 
>>     >> >> +                    if (ret > 0)
>> 
>>     >> >> +                            /* Merge successfully */
>> 
>>     >> >> +                            nb_after_gro--;
>> 
>>     >> >> +                    else if (ret < 0)
>> 
>>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> 
>>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> +                            do_vxlan_udp_gro) {
>> 
>>     >> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
>> 
>>     >> >> +                                                    &vxlan_udp_tbl, 0);
>> 
>>     >> >>                      if (ret > 0)
>> 
>>     >> >>                              /* Merge successfully */
>> 
>>     >> >>                              nb_after_gro--;
>> 
>>     >> >> @@ -236,11 +277,17 @@ struct gro_ctx {
>> 
>>     >> >>               || (unprocess_num < nb_pkts)) {
>> 
>>     >> >>              i = 0;
>> 
>>     >> >>              /* Flush all packets from the tables */
>> 
>>     >> >> -            if (do_vxlan_gro) {
>> 
>>     >> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>> 
>>     >> >> +            if (do_vxlan_tcp_gro) {
>> 
>>     >> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>> 
>>     >> >>                                      0, pkts, nb_pkts);
>> 
>>     >> >>              }
>> 
>>     >> >>
>> 
>>     >> >> +            if (do_vxlan_udp_gro) {
>> 
>>     >> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>> 
>>     >> >> +                                    0, &pkts[i], nb_pkts - i);
>> 
>>     >> >> +
>> 
>>     >> >> +            }
>> 
>>     >> >> +
>> 
>>     >> >>              if (do_tcp4_gro) {
>> 
>>     >> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>> 
>>     >> >>                                      &pkts[i], nb_pkts - i);
>> 
>>     >> >> @@ -269,33 +316,42 @@ struct gro_ctx {
>> 
>>     >> >>  {
>> 
>>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> 
>>     >> >>      struct gro_ctx *gro_ctx = ctx;
>> 
>>     >> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>> 
>>     >> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>> 
>>     >> >>      uint64_t current_time;
>> 
>>     >> >>      uint16_t i, unprocess_num = 0;
>> 
>>     >> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>> 
>>     >> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
>> 
>>     >> >>
>> 
>>     >> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_TCP_IPV4 |
>> 
>>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> 
>>     >> >>              return nb_pkts;
>> 
>>     >> >>
>> 
>>     >> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>> 
>>     >> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> 
>>     >> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> 
>>     >> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>> 
>>     >> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>> 
>>     >> >>
>> 
>>     >> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>> 
>>     >> >>              RTE_GRO_TCP_IPV4;
>> 
>>     >> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> 
>>     >> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> 
>>     >> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>> 
>>     >> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>> 
>>     >> >>              RTE_GRO_UDP_IPV4;
>> 
>>     >> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>> 
>>     >> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>> 
>>     >> >>
>> 
>>     >> >>      current_time = rte_rdtsc();
>> 
>>     >> >>
>> 
>>     >> >>      for (i = 0; i < nb_pkts; i++) {
>> 
>>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> -                            do_vxlan_gro) {
>> 
>>     >> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>> 
>>     >> >> +                            do_vxlan_tcp_gro) {
>> 
>>     >> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>> 
>>     >> >> +                                            current_time) < 0)
>> 
>>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> 
>>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> +                            do_vxlan_udp_gro) {
>> 
>>     >> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
>> 
>>     >> >>                                              current_time) < 0)
>> 
>>     >> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
>> 
>>     >> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> @@ -341,6 +397,13 @@ struct gro_ctx {
>> 
>>     >> >>              left_nb_out = max_nb_out - num;
>> 
>>     >> >>      }
>> 
>>     >> >>
>> 
>>     >> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
>> 
>>     >> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>> 
>>     >> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>> 
>>     >> >> +                            flush_timestamp, &out[num], left_nb_out);
>> 
>>     >> >> +            left_nb_out = max_nb_out - num;
>> 
>>     >> >
>> 
>>     >> >Don't need to calculate left_nb_out here. In the previous patch, you
>> 
>>     >> >also calculate it for UDP GRO. Please remove it both.
>> 
>>     >> 
>> 
>>     >> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.
>> 
>>     > 
>> 
>>     >I don't think so. It's unnecessary for current design.
>> 
>>     >If others want to add new GRO types, they will write
>> 
>>     >correct code.
>> 
>>     > 
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> 
>> 
>>     >> 
>> 
>>     >> 
>> 
>>     >>  
>> 
>>     >> 
>> 
>>      
>> 
>>      
>> 
>> 
>> 
>>  
>>
Stephen Hemminger Sept. 16, 2020, 4:19 a.m. UTC | #8
On Wed, 16 Sep 2020 11:05:38 +0800 (CST)
yang_y_yi  <yang_y_yi@163.com> wrote:

> No, next_proto_id of inner IP header can clearly identify it is a UDP packet even if it is a no-udp-header ip fragment.

Yes GRO will know its UDP, but it will have no port information (in this packet).
GRO can only safely combine packets from the same flow. Since there is no port in
this packet, it is not sufficient to identify the inner flow.
Hu, Jiayu Sept. 17, 2020, 2:06 a.m. UTC | #9
OK, I agree to ignore processing outer IP ID. But I think the comment need to change, as
it is not accurate. VxLAN/UDP GRO doesn't process outer IP ID is because it is meaningless
for reassembly in the ovs case you mentioned, rather than it's out-of-order. You can
give the ovs example in the commit log, and it's easier to understand the code IMO.

Thanks,
Jiayu

From: yang_y_yi <yang_y_yi@163.com>
Sent: Wednesday, September 16, 2020 11:06 AM
To: Hu, Jiayu <jiayu.hu@intel.com>
Cc: yangyi01@inspur.com; thomas@monjalon.net; dev@dpdk.org
Subject: Re:Re: RE: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support
Importance: High

No, next_proto_id of inner IP header can clearly identify it is a UDP packet even if it is a no-udp-header ip fragment.













At 2020-09-16 10:54:24, "Jiayu Hu" <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>> wrote:

>On Mon, Sep 14, 2020 at 05:14:59PM +0800, yang_y_yi wrote:

>>

>> Jiayu, VM to VM case for big UDP packet (say udp parload size is 8192K, but mtu

>> 1500). VM will segment it as UDP framgments, but when ovs dpdk uses VxLan to

>> encapsulate them, it doesn't know they are same flow, OVS DPDK has no knowldge

>> about them, just encapsulate them as if they are not related. But on receive

>> side on another machine, OVS DPDK will GRO them, at this point, outer IP ID is

>> even random for every packet. For TCP, I think same issue is there, GRO just

>> considered them as different flow and doesn't do GRO, in my opinion, we also

>> should do GRO for such case.

>

>I am a little confused about ovs behavior. Does ovs know they are udp fragments?

>Or it knows they are fragments but doesn't know they are from one flow?

>Given ovs receives 3 udp fragments of a udp packet, if ovs doesn't know they are

>fragments of a udp packet, their mbuf->packet_type will be ether/ipv4/udp,

>ether/ipv4, and ether/ipv4. dpdk udp gro will not process the second and third

>one as their packet type in mbuf is not ether/ipv4/udp.

>

>>

>>

>>    2020-09-14 16:50:08  "Hu, Jiayu" <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>> д

>>

>>     Out IP ID is not used to check if they belong to the same flow,

>>

>>     but check if they are neighbors. If two vxlan/udp fragments are

>>

>>     neighbors, their outer IP ID and outer frag_oft should be incremental.

>>

>>     I still cannot understand why ignore outer IP ID when DF is 0.

>>

>>     Can you give more explanation?

>>

>>

>>

>>     From: yang_y_yi <yang_y_yi@163.com<mailto:yang_y_yi@163.com>>

>>     Sent: Monday, September 14, 2020 4:27 PM

>>     To: Hu, Jiayu <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>>

>>     Cc: thomas@monjalon.net<mailto:thomas@monjalon.net>; yangyi01@inspur.com<mailto:yangyi01@inspur.com>; dev@dpdk.org<mailto:dev@dpdk.org>

>>     Subject: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support

>>     Importance: High

>>

>>

>>

>>     For outer_ip_id, there is same ip_id disorder issue existing as UDP GRO, so

>>     we can't use ip_id +/-1 to check if they are same flow, here is my

>>     incrmental change for it with more comments.

>>

>>

>>

>>     @@ -189,7 +188,12 @@

>>      is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,

>>                     struct vxlan_udp4_flow_key k2)

>>      {

>>     -       /* src port is changing, so shouldn't use it for flow check */

>>     +       /* For VxLAN packet, outer udp src port is calculated from

>>     +        * inner packet RSS hash, udp src port of the first UDP

>>     +        * fragment is different from one of other UDP fragments

>>     +        * even if they are same flow, so we have to skip outer udp

>>     +        * src port comparison here.

>>     +        */

>>             return (rte_is_same_ether_addr(&k1.outer_eth_saddr,

>>                                             &k2.outer_eth_saddr) &&

>>                             rte_is_same_ether_addr(&k1.outer_eth_daddr,

>>     @@ -212,12 +216,16 @@

>>             uint16_t l2_offset;

>>             int ret = 0;

>>

>>     +       /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,

>>     +        * we needn't compare outer_ip_id because they are same,

>>     +        * for the case outer_is_atomic is 1, we also have no way

>>     +        * to comapre outer_ip_id because the received packets can

>>     +        * be out of order. So ignore outer_ip_id comparison here.

>>     +        */

>>     +

>>             l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;

>>             cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>                                             l2_offset);

>>     -       /* VXLAN outer IP ID is out of order, so don't touch it and

>>     -        * don't compare it.

>>     -        */

>>             if (cmp > 0)

>>                     /* Append the new packet. */

>>                     ret = 1;

>>     @@ -354,7 +362,9 @@

>>             rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &

>>     (key.outer_eth_daddr));

>>             key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;

>>             key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;

>>     -       key.outer_src_port = udp_hdr->src_port;

>>     +       /* Note: It is unnecessary to save outer_src_port here because it

>>     can

>>     +        * be different for VxLAN UDP fragments from the same flow.

>>     +        */

>>             key.outer_dst_port = udp_hdr->dst_port;

>>

>>             /* Search for a matched flow. */

>>

>>

>>

>>

>>

>>

>>

>>

>>     At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>> wrote:

>>

>>     >Replies are inline.

>>

>>     >

>>

>>     >BTW, you need to update the programmer guide

>>

>>     >doc/guides/prog_guide/generic_receive_offload_lib.rst and

>>

>>     >release note doc/guides/rel_notes/release_20_11.rst.

>>

>>     >

>>

>>     >Thanks,

>>

>>     >Jiayu

>>

>>     >

>>

>>     >On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:

>>

>>     >> Jiayu, thank you so much, please check my replies for some of your comments,

>>

>>     >> here is incremental patch. I built it by meson this time :-)

>>

>>     >>

>>

>>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> index 9e9df72..1fcfaf1 100644

>>

>>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> @@ -203,7 +203,7 @@

>>

>>     >>  }

>>

>>     >>

>>

>>     >>  static inline int

>>

>>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>>

>>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>>

>>     >>                 uint16_t frag_offset,

>>

>>     >>                 uint16_t ip_dl)

>>

>>     >>  {

>>

>>     >> @@ -213,7 +213,7 @@

>>

>>     >>         int ret = 0;

>>

>>     >>

>>

>>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;

>>

>>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>

>>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>

>>     >>                                         l2_offset);

>>

>>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and

>>

>>     >>          * don't compare it.

>>

>>     >> @@ -379,7 +379,7 @@

>>

>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>

>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,

>>

>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>>

>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)

>>

>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>

>>     >>                         return -1;

>>

>>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==

>>

>>     >>                                 INVALID_ARRAY_INDEX) {

>>

>>     >> @@ -397,7 +397,7 @@

>>

>>     >>         cur_idx = tbl->flows[i].start_index;

>>

>>     >>         prev_idx = cur_idx;

>>

>>     >>         do {

>>

>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>>

>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>>

>>     >>                                 frag_offset, ip_dl);

>>

>>     >>                 if (cmp) {

>>

>>     >>                         if (merge_two_vxlan_udp4_packets(

>>

>>     >> @@ -436,7 +436,7 @@

>>

>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>

>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,

>>

>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>>

>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)

>>

>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>

>>     >>                         return -1;

>>

>>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;

>>

>>     >>                 tbl->flows[i].start_index = item_idx;

>>

>>     >> @@ -470,7 +470,7 @@

>>

>>     >>                 ip_dl = pkt->pkt_len - hdr_len;

>>

>>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> index 9e9df72..1fcfaf1 100644

>>

>>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> @@ -203,7 +203,7 @@

>>

>>     >>  }

>>

>>     >>

>>

>>     >>  static inline int

>>

>>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>>

>>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>>

>>     >>                 uint16_t frag_offset,

>>

>>     >>                 uint16_t ip_dl)

>>

>>     >>  {

>>

>>     >> @@ -213,7 +213,7 @@

>>

>>     >>         int ret = 0;

>>

>>     >>

>>

>>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;

>>

>>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>

>>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>

>>     >>                                         l2_offset);

>>

>>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and

>>

>>     >>          * don't compare it.

>>

>>     >> @@ -379,7 +379,7 @@

>>

>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>

>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,

>>

>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>>

>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)

>>

>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>

>>     >>                         return -1;

>>

>>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==

>>

>>     >>                                 INVALID_ARRAY_INDEX) {

>>

>>     >> @@ -397,7 +397,7 @@

>>

>>     >>         cur_idx = tbl->flows[i].start_index;

>>

>>     >>         prev_idx = cur_idx;

>>

>>     >>         do {

>>

>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>>

>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),

>>

>>     >>                                 frag_offset, ip_dl);

>>

>>     >>                 if (cmp) {

>>

>>     >>                         if (merge_two_vxlan_udp4_packets(

>>

>>     >> @@ -436,7 +436,7 @@

>>

>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,

>>

>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,

>>

>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);

>>

>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)

>>

>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))

>>

>>     >>                         return -1;

>>

>>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;

>>

>>     >>                 tbl->flows[i].start_index = item_idx;

>>

>>     >> @@ -470,7 +470,7 @@

>>

>>     >>                 ip_dl = pkt->pkt_len - hdr_len;

>>

>>     >>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;

>>

>>     >>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;

>>

>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),

>>

>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),

>>

>>     >>                                         frag_offset, ip_dl);

>>

>>     >>                 if (cmp) {

>>

>>     >>                         if (merge_two_vxlan_udp4_packets(

>>

>>     >> @@ -481,12 +481,10 @@

>>

>>     >>                                                         INVALID_ARRAY_INDEX);

>>

>>     >>                                 tbl->items[start_idx].inner_item.next_pkt_idx

>>

>>     >>                                         = item_idx;

>>

>>     >> -                       } else {

>>

>>     >> +                       } else

>>

>>     >>                                 return 0;

>>

>>     >> -                       }

>>

>>     >> -               } else {

>>

>>     >> +               } else

>>

>>     >>                         return 0;

>>

>>     >> -               }

>>

>>     >>         }

>>

>>     >>

>>

>>     >>         return 0;

>>

>>     >>

>>

>>     >>

>>

>>     >> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com<mailto:jiayu.hu@intel.com>> wrote:

>>

>>     >> >Some comments are inline.

>>

>>     >> >

>>

>>     >> >Thanks,

>>

>>     >> >Jiayu

>>

>>     >> >

>>

>>     >> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com<mailto:yang_y_yi@163.com> wrote:

>>

>>     >> >> From: Yi Yang <yangyi01@inspur.com<mailto:yangyi01@inspur.com>>

>>

>>     >> >>

>>

>>     >> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance

>>

>>     >> >> when VM is enabled UFO or GSO, GRO must be supported if

>>

>>     >> >> GSO or UFO is enabled, otherwise, performance gain will

>>

>>     >> >> be hurt.

>>

>>     >> >>

>>

>>     >> >> With this enabled in DPDK, OVS DPDK can leverage it to

>>

>>     >> >> improve VM-to-VM UDP performance, this will make sure

>>

>>     >> >> IP fragments will be reassembled once it is received

>>

>>     >> >> from physical NIC. It is very helpful in OVS DPDK VXLAN

>>

>>     >> >> TSO case.

>>

>>     >> >>

>>

>>     >> >> Signed-off-by: Yi Yang <yangyi01@inspur.com<mailto:yangyi01@inspur.com>>

>>

>>     >> >> ---

>>

>>     >> >>  lib/librte_gro/gro_udp4.h       |   1 +

>>

>>     >> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++

>>

>>     >> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++

>>

>>     >> >>  lib/librte_gro/meson.build      |   2 +-

>>

>>     >> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--

>>

>>     >> >>  lib/librte_gro/rte_gro.h        |   3 +

>>

>>     >> >>  6 files changed, 794 insertions(+), 27 deletions(-)

>>

>>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h

>>

>>     >> >>

>>

>>     >> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h

>>

>>     >> >> index 0a078e4..d38b393 100644

>>

>>     >> >> --- a/lib/librte_gro/gro_udp4.h

>>

>>     >> >> +++ b/lib/librte_gro/gro_udp4.h

>>

>>     >> >> @@ -7,6 +7,7 @@

>>

>>     >> >>

>>

>>     >> >>  #include <rte_ip.h>

>>

>>     >> >>  #include <rte_udp.h>

>>

>>     >> >> +#include <rte_vxlan.h>

>>

>>     >> >>

>>

>>     >> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL

>>

>>     >> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)

>>

>>     >> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> >> new file mode 100644

>>

>>     >> >> index 0000000..9e9df72

>>

>>     >> >> --- /dev/null

>>

>>     >> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c

>>

>>     >> >> @@ -0,0 +1,548 @@

>>

>>     >> >> +/* SPDX-License-Identifier: BSD-3-Clause

>>

>>     >> >> + * Copyright(c) 2020 Inspur Corporation

>>

>>     >> >> + */

>>

>>     >> >> +

>>

>>     >> >> +

>>

>>     >> >> +

>>

>>     >> >> +static inline uint32_t

>>

>>     >> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,

>>

>>     >> >> +            uint32_t item_idx,

>>

>>     >> >> +            uint32_t prev_item_idx)

>>

>>     >> >> +{

>>

>>     >> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;

>>

>>     >> >> +

>>

>>     >> >> +    /* NULL indicates an empty item. */

>>

>>     >> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;

>>

>>     >> >> +    tbl->item_num--;

>>

>>     >> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)

>>

>>     >> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;

>>

>>     >> >> +

>>

>>     >> >> +    return next_idx;

>>

>>     >> >> +}

>>

>>     >> >> +

>>

>>     >> >> +static inline uint32_t

>>

>>     >> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,

>>

>>     >> >> +            struct vxlan_udp4_flow_key *src,

>>

>>     >> >> +            uint32_t item_idx)

>>

>>     >> >> +{

>>

>>     >> >> +    struct vxlan_udp4_flow_key *dst;

>>

>>     >> >> +    uint32_t flow_idx;

>>

>>     >> >> +

>>

>>     >> >> +    flow_idx = find_an_empty_flow(tbl);

>>

>>     >> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))

>>

>>     >> >> +            return INVALID_ARRAY_INDEX;

>>

>>     >> >> +

>>

>>     >> >> +    dst = &(tbl->flows[flow_idx].key);

>>

>>     >> >> +

>>

>>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),

>>

>>     >> >> +                    &(dst->inner_key.eth_saddr));

>>

>>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),

>>

>>     >> >> +                    &(dst->inner_key.eth_daddr));

>>

>>     >> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;

>>

>>     >> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;

>>

>>     >> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;

>>

>>     >> >> +

>>

>>     >> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;

>>

>>     >> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;

>>

>>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));

>>

>>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));

>>

>>     >> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;

>>

>>     >> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;

>>

>>     >> >> +    dst->outer_src_port = src->outer_src_port;

>>

>>     >> >> +    dst->outer_dst_port = src->outer_dst_port;

>>

>>     >> >> +

>>

>>     >> >> +    tbl->flows[flow_idx].start_index = item_idx;

>>

>>     >> >> +    tbl->flow_num++;

>>

>>     >> >> +

>>

>>     >> >> +    return flow_idx;

>>

>>     >> >> +}

>>

>>     >> >> +

>>

>>     >> >> +static inline int

>>

>>     >> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,

>>

>>     >> >> +            struct vxlan_udp4_flow_key k2)

>>

>>     >> >> +{

>>

>>     >> >> +    /* src port is changing, so shouldn't use it for flow check */

>>

>>     >> >

>>

>>     >> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source

>>

>>     >> >port. In what cases outer source port will change?

>>

>>     >>

>>

>>     >> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is

>>

>>     >> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,

>>

>>     >> protocol type).

>>

>>     >>

>>

>>     >> ovs/lib/netdev-native-tnl.h:

>>

>>     >>

>>

>>     >> static inline uint32_t *

>>

>>     >> dp_packet_rss_ptr(const struct dp_packet *b)

>>

>>     >> {

>>

>>     >>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);

>>

>>     >> }

>>

>>     >>

>>

>>     >> static inline uint32_t

>>

>>     >> dp_packet_get_rss_hash(const struct dp_packet *p)

>>

>>     >> {

>>

>>     >>     return *dp_packet_rss_ptr(p);

>>

>>     >> }

>>

>>     >>

>>

>>     >> static inline ovs_be16

>>

>>     >> netdev_tnl_get_src_port(struct dp_packet *packet)

>>

>>     >> {

>>

>>     >>     uint32_t hash;

>>

>>     >>

>>

>>     >>     hash = dp_packet_get_rss_hash(packet);

>>

>>     >>

>>

>>     >>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +

>>

>>     >>                  tnl_udp_port_min);

>>

>>     >> }

>>

>>     >>

>>

>>     >> void

>>

>>     >> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,

>>

>>     >>                            struct dp_packet *packet,

>>

>>     >>                            const struct ovs_action_push_tnl *data)

>>

>>     >> {

>>

>>     >>     struct udp_header *udp;

>>

>>     >>     int ip_tot_size;

>>

>>     >>

>>

>>     >>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);

>>

>>     >>

>>

>>     >>     /* set udp src port */

>>

>>     >>     udp->udp_src = netdev_tnl_get_src_port(packet);

>>

>>     >>     udp->udp_len = htons(ip_tot_size);

>>

>>     >>

>>

>>     >>     if (udp->udp_csum) {

>>

>>     >>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);

>>

>>     >>     }

>>

>>     >> }

>>

>>     >>

>>

>>     >> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The

>>

>>     >> first fragment of original inner UDP is considered as different flow from other fragments.

>>

>>     >>

>>

>>     >> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.

>>

>>     >>

>>

>>     >

>>

>>     >You are right. I checked VxLAN RFC 7348, and outer source port is

>>

>>     >calculated from inner packet. In OVS case, the first fragment

>>

>>     >will have different outer source port compared with other fragments.

>>

>>     >Please add some comments in the code for people to better understand

>>

>>     >the code. In addition, need to remove outer_src_port from the structure

>>

>>     >vxlan_udp4_flow_key.

>>

>>     >

>>

>>     >> >

>>

>>     >> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,

>>

>>     >> >> +                                    &k2.outer_eth_saddr) &&

>>

>>     >> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,

>>

>>     >> >> +                            &k2.outer_eth_daddr) &&

>>

>>     >> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&

>>

>>     >> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&

>>

>>     >> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&

>>

>>     >> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&

>>

>>     >> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&

>>

>>     >> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));

>>

>>     >> >> +}

>>

>>     >> >> +

>>

>>     >> >> +static inline int

>>

>>     >> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,

>>

>>     >> >> +            uint16_t frag_offset,

>>

>>     >> >> +            uint16_t ip_dl)

>>

>>     >> >

>>

>>     >> >Better rename it as udp4_check_vxlan_neighbor.

>>

>>     >>

>>

>>     >> Ok

>>

>>     >>

>>

>>     >> >

>>

>>     >> >> +{

>>

>>     >> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;

>>

>>     >> >> +    int cmp;

>>

>>     >> >> +    uint16_t l2_offset;

>>

>>     >> >> +    int ret = 0;

>>

>>     >> >> +

>>

>>     >> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;

>>

>>     >> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,

>>

>>     >> >> +                                    l2_offset);

>>

>>     >> >

>>

>>     >> >Why don't check outer IP ID? If outer DF bit is not set, all fragments

>>

>>     >> >should have incremental outer IP ID; if DF bit is set, we don't need

>>

>>     >> >to check outer IP ID.

>>

>>     >>

>>

>>     >> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check

>>

>>     >> is enough to differentiate if they are same flow, isn't it?

>>

>>     >

>>

>>     >OVS is one of workloads using DPDK, and we can assume

>>

>>     >other cases have the same design as OVS. I think IP ID

>>

>>     >check is necessary.

>>

>>     >

>>

>>     >>

>>

>>     >> >

>>

>>     >> >In addition, udp_check_neighbor is renamed, but you didn't change here.

>>

>>     >>

>>

>>     >> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will

>>

>>     >> use meson build to check it for next version.

>>

>>     >>

>>

>>     >> >

>>

>>     >> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and

>>

>>     >> >> +     * don't compare it.

>>

>>     >> >> +     */

>>

>>     >> >> +    if (cmp > 0)

>>

>>     >> >> +            /* Append the new packet. */

>>

>>     >> >> +            ret = 1;

>>

>>     >> >> +    else if (cmp < 0)

>>

>>     >> >> +            /* Prepend the new packet. */

>>

>>     >> >> +            ret = -1;

>>

>>     >> >> +

>>

>>     >> >> +    return ret;

>>

>>     >> >> +}

>>

>>     >> >> +

>>

>>     >> >>

>>

>>     >> >>  /*

>>

>>     >> >>   * GRO context structure. It keeps the table structures, which are

>>

>>     >> >> @@ -137,19 +148,27 @@ struct gro_ctx {

>>

>>     >> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };

>>

>>     >> >>

>>

>>     >> >>      /* Allocate a reassembly table for VXLAN TCP GRO */

>>

>>     >> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;

>>

>>     >> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];

>>

>>     >> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]

>>

>>     >> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;

>>

>>     >> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];

>>

>>     >> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]

>>

>>     >> >> +                    = {{{0}, 0, 0} };

>>

>>     >> >> +

>>

>>     >> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */

>>

>>     >> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;

>>

>>     >> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];

>>

>>     >> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]

>>

>>     >> >>                      = {{{0}, 0, 0} };

>>

>>     >> >>

>>

>>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];

>>

>>     >> >>      uint32_t item_num;

>>

>>     >> >>      int32_t ret;

>>

>>     >> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;

>>

>>     >> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;

>>

>>     >> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,

>>

>>     >> >> +            do_vxlan_udp_gro = 0;

>>

>>     >> >>

>>

>>     >> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |

>>

>>     >> >>                                      RTE_GRO_TCP_IPV4 |

>>

>>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |

>>

>>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))

>>

>>     >> >>              return nb_pkts;

>>

>>     >> >>

>>

>>     >> >> @@ -160,15 +179,28 @@ struct gro_ctx {

>>

>>     >> >>

>>

>>     >> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {

>>

>>     >> >>              for (i = 0; i < item_num; i++)

>>

>>     >> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;

>>

>>     >> >> -

>>

>>     >> >> -            vxlan_tbl.flows = vxlan_flows;

>>

>>     >> >> -            vxlan_tbl.items = vxlan_items;

>>

>>     >> >> -            vxlan_tbl.flow_num = 0;

>>

>>     >> >> -            vxlan_tbl.item_num = 0;

>>

>>     >> >> -            vxlan_tbl.max_flow_num = item_num;

>>

>>     >> >> -            vxlan_tbl.max_item_num = item_num;

>>

>>     >> >> -            do_vxlan_gro = 1;

>>

>>     >> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;

>>

>>     >> >> +

>>

>>     >> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;

>>

>>     >> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;

>>

>>     >> >> +            vxlan_tcp_tbl.flow_num = 0;

>>

>>     >> >> +            vxlan_tcp_tbl.item_num = 0;

>>

>>     >> >> +            vxlan_tcp_tbl.max_flow_num = item_num;

>>

>>     >> >> +            vxlan_tcp_tbl.max_item_num = item_num;

>>

>>     >> >> +            do_vxlan_tcp_gro = 1;

>>

>>     >> >> +    }

>>

>>     >> >> +

>>

>>     >> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {

>>

>>     >> >> +            for (i = 0; i < item_num; i++)

>>

>>     >> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;

>>

>>     >> >> +

>>

>>     >> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;

>>

>>     >> >> +            vxlan_udp_tbl.items = vxlan_udp_items;

>>

>>     >> >> +            vxlan_udp_tbl.flow_num = 0;

>>

>>     >> >> +            vxlan_udp_tbl.item_num = 0;

>>

>>     >> >> +            vxlan_udp_tbl.max_flow_num = item_num;

>>

>>     >> >> +            vxlan_udp_tbl.max_item_num = item_num;

>>

>>     >> >> +            do_vxlan_udp_gro = 1;

>>

>>     >> >>      }

>>

>>     >> >>

>>

>>     >> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {

>>

>>     >> >> @@ -204,9 +236,18 @@ struct gro_ctx {

>>

>>     >> >>               * will be flushed from the tables.

>>

>>     >> >>               */

>>

>>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&

>>

>>     >> >> -                            do_vxlan_gro) {

>>

>>     >> >> +                            do_vxlan_tcp_gro) {

>>

>>     >> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],

>>

>>     >> >> -                                                    &vxlan_tbl, 0);

>>

>>     >> >> +                                                    &vxlan_tcp_tbl, 0);

>>

>>     >> >> +                    if (ret > 0)

>>

>>     >> >> +                            /* Merge successfully */

>>

>>     >> >> +                            nb_after_gro--;

>>

>>     >> >> +                    else if (ret < 0)

>>

>>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];

>>

>>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&

>>

>>     >> >> +                            do_vxlan_udp_gro) {

>>

>>     >> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],

>>

>>     >> >> +                                                    &vxlan_udp_tbl, 0);

>>

>>     >> >>                      if (ret > 0)

>>

>>     >> >>                              /* Merge successfully */

>>

>>     >> >>                              nb_after_gro--;

>>

>>     >> >> @@ -236,11 +277,17 @@ struct gro_ctx {

>>

>>     >> >>               || (unprocess_num < nb_pkts)) {

>>

>>     >> >>              i = 0;

>>

>>     >> >>              /* Flush all packets from the tables */

>>

>>     >> >> -            if (do_vxlan_gro) {

>>

>>     >> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,

>>

>>     >> >> +            if (do_vxlan_tcp_gro) {

>>

>>     >> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,

>>

>>     >> >>                                      0, pkts, nb_pkts);

>>

>>     >> >>              }

>>

>>     >> >>

>>

>>     >> >> +            if (do_vxlan_udp_gro) {

>>

>>     >> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,

>>

>>     >> >> +                                    0, &pkts[i], nb_pkts - i);

>>

>>     >> >> +

>>

>>     >> >> +            }

>>

>>     >> >> +

>>

>>     >> >>              if (do_tcp4_gro) {

>>

>>     >> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,

>>

>>     >> >>                                      &pkts[i], nb_pkts - i);

>>

>>     >> >> @@ -269,33 +316,42 @@ struct gro_ctx {

>>

>>     >> >>  {

>>

>>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];

>>

>>     >> >>      struct gro_ctx *gro_ctx = ctx;

>>

>>     >> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;

>>

>>     >> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;

>>

>>     >> >>      uint64_t current_time;

>>

>>     >> >>      uint16_t i, unprocess_num = 0;

>>

>>     >> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;

>>

>>     >> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;

>>

>>     >> >>

>>

>>     >> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |

>>

>>     >> >>                                      RTE_GRO_TCP_IPV4 |

>>

>>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |

>>

>>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))

>>

>>     >> >>              return nb_pkts;

>>

>>     >> >>

>>

>>     >> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];

>>

>>     >> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];

>>

>>     >> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];

>>

>>     >> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];

>>

>>     >> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];

>>

>>     >> >>

>>

>>     >> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==

>>

>>     >> >>              RTE_GRO_TCP_IPV4;

>>

>>     >> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==

>>

>>     >> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==

>>

>>     >> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;

>>

>>     >> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==

>>

>>     >> >>              RTE_GRO_UDP_IPV4;

>>

>>     >> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==

>>

>>     >> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;

>>

>>     >> >>

>>

>>     >> >>      current_time = rte_rdtsc();

>>

>>     >> >>

>>

>>     >> >>      for (i = 0; i < nb_pkts; i++) {

>>

>>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&

>>

>>     >> >> -                            do_vxlan_gro) {

>>

>>     >> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,

>>

>>     >> >> +                            do_vxlan_tcp_gro) {

>>

>>     >> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,

>>

>>     >> >> +                                            current_time) < 0)

>>

>>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];

>>

>>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&

>>

>>     >> >> +                            do_vxlan_udp_gro) {

>>

>>     >> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,

>>

>>     >> >>                                              current_time) < 0)

>>

>>     >> >>                              unprocess_pkts[unprocess_num++] = pkts[i];

>>

>>     >> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&

>>

>>     >> >> @@ -341,6 +397,13 @@ struct gro_ctx {

>>

>>     >> >>              left_nb_out = max_nb_out - num;

>>

>>     >> >>      }

>>

>>     >> >>

>>

>>     >> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {

>>

>>     >> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[

>>

>>     >> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],

>>

>>     >> >> +                            flush_timestamp, &out[num], left_nb_out);

>>

>>     >> >> +            left_nb_out = max_nb_out - num;

>>

>>     >> >

>>

>>     >> >Don't need to calculate left_nb_out here. In the previous patch, you

>>

>>     >> >also calculate it for UDP GRO. Please remove it both.

>>

>>     >>

>>

>>     >> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.

>>

>>     >

>>

>>     >I don't think so. It's unnecessary for current design.

>>

>>     >If others want to add new GRO types, they will write

>>

>>     >correct code.

>>

>>     >

>>

>>     >>

>>

>>     >> >

>>

>>     >>

>>

>>     >>

>>

>>     >>

>>

>>     >>

>>

>>     >>

>>

>>

>>

>>

>>

>>

>>

>>

>>
yang_y_yi Sept. 17, 2020, 2:21 a.m. UTC | #10
Thanks Jiayu, will add comments to explain it in v5.




















At 2020-09-17 10:06:37, "Hu, Jiayu" <jiayu.hu@intel.com> wrote:

OK, I agree to ignore processing outer IP ID. But I think the comment need to change, as

it is not accurate. VxLAN/UDP GRO doesn’t process outer IP ID is because it is meaningless

for reassembly in the ovs case you mentioned, rather than it’s out-of-order. You can

give the ovs example in the commit log, and it’s easier to understand the code IMO.

 

Thanks,

Jiayu

 

From: yang_y_yi <yang_y_yi@163.com>
Sent: Wednesday, September 16, 2020 11:06 AM
To: Hu, Jiayu <jiayu.hu@intel.com>
Cc: yangyi01@inspur.com; thomas@monjalon.net; dev@dpdk.org
Subject: Re:Re: RE: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support
Importance: High

 

No, next_proto_id of inner IP header can clearly identify it is a UDP packet even if it is a no-udp-header ip fragment.

 

 

 

 

 

 


At 2020-09-16 10:54:24, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>On Mon, Sep 14, 2020 at 05:14:59PM +0800, yang_y_yi wrote:
>> 
>> Jiayu, VM to VM case for big UDP packet (say udp parload size is 8192K, but mtu
>> 1500). VM will segment it as UDP framgments, but when ovs dpdk uses VxLan to
>> encapsulate them, it doesn't know they are same flow, OVS DPDK has no knowldge
>> about them, just encapsulate them as if they are not related. But on receive
>> side on another machine, OVS DPDK will GRO them, at this point, outer IP ID is
>> even random for every packet. For TCP, I think same issue is there, GRO just
>> considered them as different flow and doesn't do GRO, in my opinion, we also
>> should do GRO for such case.
> 
>I am a little confused about ovs behavior. Does ovs know they are udp fragments?
>Or it knows they are fragments but doesn't know they are from one flow?
>Given ovs receives 3 udp fragments of a udp packet, if ovs doesn't know they are
>fragments of a udp packet, their mbuf->packet_type will be ether/ipv4/udp,
>ether/ipv4, and ether/ipv4. dpdk udp gro will not process the second and third
>one as their packet type in mbuf is not ether/ipv4/udp.
> 
>> 
>> 
>>    2020-09-14 16:50:08  "Hu, Jiayu" <jiayu.hu@intel.com> д    
>> 
>>     Out IP ID is not used to check if they belong to the same flow,
>> 
>>     but check if they are neighbors. If two vxlan/udp fragments are
>> 
>>     neighbors, their outer IP ID and outer frag_oft should be incremental.
>> 
>>     I still cannot understand why ignore outer IP ID when DF is 0.
>> 
>>     Can you give more explanation?
>> 
>>      
>> 
>>     From: yang_y_yi <yang_y_yi@163.com>
>>     Sent: Monday, September 14, 2020 4:27 PM
>>     To: Hu, Jiayu <jiayu.hu@intel.com>
>>     Cc: thomas@monjalon.net; yangyi01@inspur.com; dev@dpdk.org
>>     Subject: Re:Re: Re: [PATCH v5 2/2] gro: add VXLAN UDP GRO support
>>     Importance: High
>> 
>>      
>> 
>>     For outer_ip_id, there is same ip_id disorder issue existing as UDP GRO, so
>>     we can't use ip_id +/-1 to check if they are same flow, here is my
>>     incrmental change for it with more comments.
>> 
>>      
>> 
>>     @@ -189,7 +188,12 @@
>>      is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>>                     struct vxlan_udp4_flow_key k2)
>>      {
>>     -       /* src port is changing, so shouldn't use it for flow check */
>>     +       /* For VxLAN packet, outer udp src port is calculated from
>>     +        * inner packet RSS hash, udp src port of the first UDP
>>     +        * fragment is different from one of other UDP fragments
>>     +        * even if they are same flow, so we have to skip outer udp
>>     +        * src port comparison here.
>>     +        */
>>             return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>>                                             &k2.outer_eth_saddr) &&
>>                             rte_is_same_ether_addr(&k1.outer_eth_daddr,
>>     @@ -212,12 +216,16 @@
>>             uint16_t l2_offset;
>>             int ret = 0;
>> 
>>     +       /* Note: if outer DF bit is set, i.e outer_is_atomic is 0,
>>     +        * we needn't compare outer_ip_id because they are same,
>>     +        * for the case outer_is_atomic is 1, we also have no way
>>     +        * to comapre outer_ip_id because the received packets can
>>     +        * be out of order. So ignore outer_ip_id comparison here.
>>     +        */
>>     +
>>             l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>>             cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>>                                             l2_offset);
>>     -       /* VXLAN outer IP ID is out of order, so don't touch it and
>>     -        * don't compare it.
>>     -        */
>>             if (cmp > 0)
>>                     /* Append the new packet. */
>>                     ret = 1;
>>     @@ -354,7 +362,9 @@
>>             rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &
>>     (key.outer_eth_daddr));
>>             key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
>>             key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
>>     -       key.outer_src_port = udp_hdr->src_port;
>>     +       /* Note: It is unnecessary to save outer_src_port here because it
>>     can
>>     +        * be different for VxLAN UDP fragments from the same flow.
>>     +        */
>>             key.outer_dst_port = udp_hdr->dst_port;
>> 
>>             /* Search for a matched flow. */
>> 
>>      
>> 
>>      
>> 
>>      
>> 
>>     
>>     At 2020-09-14 12:21:26, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> 
>>     >Replies are inline.
>> 
>>     > 
>> 
>>     >BTW, you need to update the programmer guide
>> 
>>     >doc/guides/prog_guide/generic_receive_offload_lib.rst and
>> 
>>     >release note doc/guides/rel_notes/release_20_11.rst.
>> 
>>     > 
>> 
>>     >Thanks,
>> 
>>     >Jiayu
>> 
>>     > 
>> 
>>     >On Mon, Sep 14, 2020 at 10:13:44AM +0800, yang_y_yi wrote:
>> 
>>     >> Jiayu, thank you so much, please check my replies for some of your comments,
>> 
>>     >> here is incremental patch. I built it by meson this time :-)
>> 
>>     >> 
>> 
>>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> index 9e9df72..1fcfaf1 100644
>> 
>>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> @@ -203,7 +203,7 @@
>> 
>>     >>  }
>> 
>>     >> 
>> 
>>     >>  static inline int
>> 
>>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >>                 uint16_t frag_offset,
>> 
>>     >>                 uint16_t ip_dl)
>> 
>>     >>  {
>> 
>>     >> @@ -213,7 +213,7 @@
>> 
>>     >>         int ret = 0;
>> 
>>     >> 
>> 
>>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> 
>>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >>                                         l2_offset);
>> 
>>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and
>> 
>>     >>          * don't compare it.
>> 
>>     >> @@ -379,7 +379,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==
>> 
>>     >>                                 INVALID_ARRAY_INDEX) {
>> 
>>     >> @@ -397,7 +397,7 @@
>> 
>>     >>         cur_idx = tbl->flows[i].start_index;
>> 
>>     >>         prev_idx = cur_idx;
>> 
>>     >>         do {
>> 
>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >>                                 frag_offset, ip_dl);
>> 
>>     >>                 if (cmp) {
>> 
>>     >>                         if (merge_two_vxlan_udp4_packets(
>> 
>>     >> @@ -436,7 +436,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>> 
>>     >>                 tbl->flows[i].start_index = item_idx;
>> 
>>     >> @@ -470,7 +470,7 @@
>> 
>>     >>                 ip_dl = pkt->pkt_len - hdr_len;
>> 
>>     >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> index 9e9df72..1fcfaf1 100644
>> 
>>     >> --- a/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> @@ -203,7 +203,7 @@
>> 
>>     >>  }
>> 
>>     >> 
>> 
>>     >>  static inline int
>> 
>>     >> -udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >> +udp4_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >>                 uint16_t frag_offset,
>> 
>>     >>                 uint16_t ip_dl)
>> 
>>     >>  {
>> 
>>     >> @@ -213,7 +213,7 @@
>> 
>>     >>         int ret = 0;
>> 
>>     >> 
>> 
>>     >>         l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> 
>>     >> -       cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >> +       cmp = udp4_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >>                                         l2_offset);
>> 
>>     >>         /* VXLAN outer IP ID is out of order, so don't touch it and
>> 
>>     >>          * don't compare it.
>> 
>>     >> @@ -379,7 +379,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 if (insert_new_flow(tbl, &key, item_idx) ==
>> 
>>     >>                                 INVALID_ARRAY_INDEX) {
>> 
>>     >> @@ -397,7 +397,7 @@
>> 
>>     >>         cur_idx = tbl->flows[i].start_index;
>> 
>>     >>         prev_idx = cur_idx;
>> 
>>     >>         do {
>> 
>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[cur_idx]),
>> 
>>     >>                                 frag_offset, ip_dl);
>> 
>>     >>                 if (cmp) {
>> 
>>     >>                         if (merge_two_vxlan_udp4_packets(
>> 
>>     >> @@ -436,7 +436,7 @@
>> 
>>     >>                 item_idx = insert_new_item(tbl, pkt, start_time,
>> 
>>     >>                                 INVALID_ARRAY_INDEX, frag_offset,
>> 
>>     >>                                 is_last_frag, outer_ip_id, outer_is_atomic);
>> 
>>     >> -               if (item_idx == INVALID_ARRAY_INDEX)
>> 
>>     >> +               if (unlikely(item_idx == INVALID_ARRAY_INDEX))
>> 
>>     >>                         return -1;
>> 
>>     >>                 tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
>> 
>>     >>                 tbl->flows[i].start_index = item_idx;
>> 
>>     >> @@ -470,7 +470,7 @@
>> 
>>     >>                 ip_dl = pkt->pkt_len - hdr_len;
>> 
>>     >>                 frag_offset = tbl->items[item_idx].inner_item.frag_offset;
>> 
>>     >>                 is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
>> 
>>     >> -               cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
>> 
>>     >> +               cmp = udp4_check_vxlan_neighbor(&(tbl->items[start_idx]),
>> 
>>     >>                                         frag_offset, ip_dl);
>> 
>>     >>                 if (cmp) {
>> 
>>     >>                         if (merge_two_vxlan_udp4_packets(
>> 
>>     >> @@ -481,12 +481,10 @@
>> 
>>     >>                                                         INVALID_ARRAY_INDEX);
>> 
>>     >>                                 tbl->items[start_idx].inner_item.next_pkt_idx
>> 
>>     >>                                         = item_idx;
>> 
>>     >> -                       } else {
>> 
>>     >> +                       } else
>> 
>>     >>                                 return 0;
>> 
>>     >> -                       }
>> 
>>     >> -               } else {
>> 
>>     >> +               } else
>> 
>>     >>                         return 0;
>> 
>>     >> -               }
>> 
>>     >>         }
>> 
>>     >> 
>> 
>>     >>         return 0;
>> 
>>     >> 
>> 
>>     >> 
>> 
>>     >> At 2020-09-11 12:24:16, "Jiayu Hu" <jiayu.hu@intel.com> wrote:
>> 
>>     >> >Some comments are inline.
>> 
>>     >> >
>> 
>>     >> >Thanks,
>> 
>>     >> >Jiayu
>> 
>>     >> >
>> 
>>     >> >On Thu, Sep 10, 2020 at 05:29:14PM +0800, yang_y_yi@163.com wrote:
>> 
>>     >> >> From: Yi Yang <yangyi01@inspur.com>
>> 
>>     >> >>
>> 
>>     >> >> VXLAN UDP GRO can help improve VM-to-VM UDP performance
>> 
>>     >> >> when VM is enabled UFO or GSO, GRO must be supported if
>> 
>>     >> >> GSO or UFO is enabled, otherwise, performance gain will
>> 
>>     >> >> be hurt.
>> 
>>     >> >>
>> 
>>     >> >> With this enabled in DPDK, OVS DPDK can leverage it to
>> 
>>     >> >> improve VM-to-VM UDP performance, this will make sure
>> 
>>     >> >> IP fragments will be reassembled once it is received
>> 
>>     >> >> from physical NIC. It is very helpful in OVS DPDK VXLAN
>> 
>>     >> >> TSO case.
>> 
>>     >> >>
>> 
>>     >> >> Signed-off-by: Yi Yang <yangyi01@inspur.com>
>> 
>>     >> >> ---
>> 
>>     >> >>  lib/librte_gro/gro_udp4.h       |   1 +
>> 
>>     >> >>  lib/librte_gro/gro_vxlan_udp4.c | 548 ++++++++++++++++++++++++++++++++++++++++
>> 
>>     >> >>  lib/librte_gro/gro_vxlan_udp4.h | 152 +++++++++++
>> 
>>     >> >>  lib/librte_gro/meson.build      |   2 +-
>> 
>>     >> >>  lib/librte_gro/rte_gro.c        | 115 +++++++--
>> 
>>     >> >>  lib/librte_gro/rte_gro.h        |   3 +
>> 
>>     >> >>  6 files changed, 794 insertions(+), 27 deletions(-)
>> 
>>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> >>  create mode 100644 lib/librte_gro/gro_vxlan_udp4.h
>> 
>>     >> >>
>> 
>>     >> >> diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
>> 
>>     >> >> index 0a078e4..d38b393 100644
>> 
>>     >> >> --- a/lib/librte_gro/gro_udp4.h
>> 
>>     >> >> +++ b/lib/librte_gro/gro_udp4.h
>> 
>>     >> >> @@ -7,6 +7,7 @@
>> 
>>     >> >>
>> 
>>     >> >>  #include <rte_ip.h>
>> 
>>     >> >>  #include <rte_udp.h>
>> 
>>     >> >> +#include <rte_vxlan.h>
>> 
>>     >> >>
>> 
>>     >> >>  #define INVALID_ARRAY_INDEX 0xffffffffUL
>> 
>>     >> >>  #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
>> 
>>     >> >> diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> >> new file mode 100644
>> 
>>     >> >> index 0000000..9e9df72
>> 
>>     >> >> --- /dev/null
>> 
>>     >> >> +++ b/lib/librte_gro/gro_vxlan_udp4.c
>> 
>>     >> >> @@ -0,0 +1,548 @@
>> 
>>     >> >> +/* SPDX-License-Identifier: BSD-3-Clause
>> 
>>     >> >> + * Copyright(c) 2020 Inspur Corporation
>> 
>>     >> >> + */
>> 
>>     >> >> +
>> 
>>     >> >> +
>> 
>>     >> >> +
>> 
>>     >> >> +static inline uint32_t
>> 
>>     >> >> +delete_item(struct gro_vxlan_udp4_tbl *tbl,
>> 
>>     >> >> +            uint32_t item_idx,
>> 
>>     >> >> +            uint32_t prev_item_idx)
>> 
>>     >> >> +{
>> 
>>     >> >> +    uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
>> 
>>     >> >> +
>> 
>>     >> >> +    /* NULL indicates an empty item. */
>> 
>>     >> >> +    tbl->items[item_idx].inner_item.firstseg = NULL;
>> 
>>     >> >> +    tbl->item_num--;
>> 
>>     >> >> +    if (prev_item_idx != INVALID_ARRAY_INDEX)
>> 
>>     >> >> +            tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
>> 
>>     >> >> +
>> 
>>     >> >> +    return next_idx;
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >> +static inline uint32_t
>> 
>>     >> >> +insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
>> 
>>     >> >> +            struct vxlan_udp4_flow_key *src,
>> 
>>     >> >> +            uint32_t item_idx)
>> 
>>     >> >> +{
>> 
>>     >> >> +    struct vxlan_udp4_flow_key *dst;
>> 
>>     >> >> +    uint32_t flow_idx;
>> 
>>     >> >> +
>> 
>>     >> >> +    flow_idx = find_an_empty_flow(tbl);
>> 
>>     >> >> +    if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
>> 
>>     >> >> +            return INVALID_ARRAY_INDEX;
>> 
>>     >> >> +
>> 
>>     >> >> +    dst = &(tbl->flows[flow_idx].key);
>> 
>>     >> >> +
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_saddr),
>> 
>>     >> >> +                    &(dst->inner_key.eth_saddr));
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->inner_key.eth_daddr),
>> 
>>     >> >> +                    &(dst->inner_key.eth_daddr));
>> 
>>     >> >> +    dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
>> 
>>     >> >> +    dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
>> 
>>     >> >> +    dst->inner_key.ip_id = src->inner_key.ip_id;
>> 
>>     >> >> +
>> 
>>     >> >> +    dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
>> 
>>     >> >> +    dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
>> 
>>     >> >> +    rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
>> 
>>     >> >> +    dst->outer_ip_src_addr = src->outer_ip_src_addr;
>> 
>>     >> >> +    dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
>> 
>>     >> >> +    dst->outer_src_port = src->outer_src_port;
>> 
>>     >> >> +    dst->outer_dst_port = src->outer_dst_port;
>> 
>>     >> >> +
>> 
>>     >> >> +    tbl->flows[flow_idx].start_index = item_idx;
>> 
>>     >> >> +    tbl->flow_num++;
>> 
>>     >> >> +
>> 
>>     >> >> +    return flow_idx;
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >> +static inline int
>> 
>>     >> >> +is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
>> 
>>     >> >> +            struct vxlan_udp4_flow_key k2)
>> 
>>     >> >> +{
>> 
>>     >> >> +    /* src port is changing, so shouldn't use it for flow check */
>> 
>>     >> >
>> 
>>     >> >As I know, fragments of a vxlan/udp/ipv4 packet have same outer source
>> 
>>     >> >port. In what cases outer source port will change?
>> 
>>     >> 
>> 
>>     >> In OVS, vxlan udp soure port is calculated from inner packet RSS, so for UDP fragment, RSS of first packet is
>> 
>>     >> different from other fragments because RSS is calculated based on 5 tuples (src ip, src port, dst ip, dst port,
>> 
>>     >> protocol type).
>> 
>>     >> 
>> 
>>     >> ovs/lib/netdev-native-tnl.h:
>> 
>>     >> 
>> 
>>     >> static inline uint32_t *
>> 
>>     >> dp_packet_rss_ptr(const struct dp_packet *b)
>> 
>>     >> {
>> 
>>     >>     return CONST_CAST(uint32_t *, &b->mbuf.hash.rss);
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> static inline uint32_t
>> 
>>     >> dp_packet_get_rss_hash(const struct dp_packet *p)
>> 
>>     >> {
>> 
>>     >>     return *dp_packet_rss_ptr(p);
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> static inline ovs_be16
>> 
>>     >> netdev_tnl_get_src_port(struct dp_packet *packet)
>> 
>>     >> {
>> 
>>     >>     uint32_t hash;
>> 
>>     >> 
>> 
>>     >>     hash = dp_packet_get_rss_hash(packet);
>> 
>>     >> 
>> 
>>     >>     return htons((((uint64_t) hash * (tnl_udp_port_max - tnl_udp_port_min)) >> 32) +
>> 
>>     >>                  tnl_udp_port_min);
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> void
>> 
>>     >> netdev_tnl_push_udp_header(const struct netdev *netdev OVS_UNUSED,
>> 
>>     >>                            struct dp_packet *packet,
>> 
>>     >>                            const struct ovs_action_push_tnl *data)
>> 
>>     >> {
>> 
>>     >>     struct udp_header *udp;
>> 
>>     >>     int ip_tot_size;
>> 
>>     >> 
>> 
>>     >>     udp = netdev_tnl_push_ip_header(packet, data->header, data->header_len, &ip_tot_size);
>> 
>>     >> 
>> 
>>     >>     /* set udp src port */
>> 
>>     >>     udp->udp_src = netdev_tnl_get_src_port(packet);
>> 
>>     >>     udp->udp_len = htons(ip_tot_size);
>> 
>>     >> 
>> 
>>     >>     if (udp->udp_csum) {
>> 
>>     >>         netdev_tnl_calc_udp_csum(udp, packet, ip_tot_size);
>> 
>>     >>     }
>> 
>>     >> }
>> 
>>     >> 
>> 
>>     >> This is special for ECMP which only checks outer header to differentiate different flows (innner packet). The
>> 
>>     >> first fragment of original inner UDP is considered as different flow from other fragments.
>> 
>>     >> 
>> 
>>     >> I found this by debugging becuase the same UDP traffic did generate different vxlan UDP source port.
>> 
>>     >> 
>> 
>>     > 
>> 
>>     >You are right. I checked VxLAN RFC 7348, and outer source port is
>> 
>>     >calculated from inner packet. In OVS case, the first fragment
>> 
>>     >will have different outer source port compared with other fragments.
>> 
>>     >Please add some comments in the code for people to better understand
>> 
>>     >the code. In addition, need to remove outer_src_port from the structure
>> 
>>     >vxlan_udp4_flow_key.
>> 
>>     > 
>> 
>>     >> >
>> 
>>     >> >> +    return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
>> 
>>     >> >> +                                    &k2.outer_eth_saddr) &&
>> 
>>     >> >> +                    rte_is_same_ether_addr(&k1.outer_eth_daddr,
>> 
>>     >> >> +                            &k2.outer_eth_daddr) &&
>> 
>>     >> >> +                    (k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
>> 
>>     >> >> +                    (k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
>> 
>>     >> >> +                    (k1.outer_dst_port == k2.outer_dst_port) &&
>> 
>>     >> >> +                    (k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
>> 
>>     >> >> +                    (k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
>> 
>>     >> >> +                    is_same_udp4_flow(k1.inner_key, k2.inner_key));
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >> +static inline int
>> 
>>     >> >> +udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
>> 
>>     >> >> +            uint16_t frag_offset,
>> 
>>     >> >> +            uint16_t ip_dl)
>> 
>>     >> >
>> 
>>     >> >Better rename it as udp4_check_vxlan_neighbor.
>> 
>>     >> 
>> 
>>     >> Ok
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> >> +{
>> 
>>     >> >> +    struct rte_mbuf *pkt = item->inner_item.firstseg;
>> 
>>     >> >> +    int cmp;
>> 
>>     >> >> +    uint16_t l2_offset;
>> 
>>     >> >> +    int ret = 0;
>> 
>>     >> >> +
>> 
>>     >> >> +    l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
>> 
>>     >> >> +    cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
>> 
>>     >> >> +                                    l2_offset);
>> 
>>     >> >
>> 
>>     >> >Why don't check outer IP ID? If outer DF bit is not set, all fragments
>> 
>>     >> >should have incremental outer IP ID; if DF bit is set, we don't need
>> 
>>     >> >to check outer IP ID.
>> 
>>     >> 
>> 
>>     >> By default, OVS set DF flag. I don't think we need to compare outer IP id because inner check
>> 
>>     >> is enough to differentiate if they are same flow, isn't it?
>> 
>>     > 
>> 
>>     >OVS is one of workloads using DPDK, and we can assume
>> 
>>     >other cases have the same design as OVS. I think IP ID
>> 
>>     >check is necessary.
>> 
>>     > 
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> >In addition, udp_check_neighbor is renamed, but you didn't change here.
>> 
>>     >> 
>> 
>>     >> Forgot to build it, Makefile has been removed from dpdk main branch, build isn't so convinient, will
>> 
>>     >> use meson build to check it for next version.
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> >> +    /* VXLAN outer IP ID is out of order, so don't touch it and
>> 
>>     >> >> +     * don't compare it.
>> 
>>     >> >> +     */
>> 
>>     >> >> +    if (cmp > 0)
>> 
>>     >> >> +            /* Append the new packet. */
>> 
>>     >> >> +            ret = 1;
>> 
>>     >> >> +    else if (cmp < 0)
>> 
>>     >> >> +            /* Prepend the new packet. */
>> 
>>     >> >> +            ret = -1;
>> 
>>     >> >> +
>> 
>>     >> >> +    return ret;
>> 
>>     >> >> +}
>> 
>>     >> >> +
>> 
>>     >> >>
>> 
>>     >> >>  /*
>> 
>>     >> >>   * GRO context structure. It keeps the table structures, which are
>> 
>>     >> >> @@ -137,19 +148,27 @@ struct gro_ctx {
>> 
>>     >> >>      struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
>> 
>>     >> >>
>> 
>>     >> >>      /* Allocate a reassembly table for VXLAN TCP GRO */
>> 
>>     >> >> -    struct gro_vxlan_tcp4_tbl vxlan_tbl;
>> 
>>     >> >> -    struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> 
>>     >> >> -    struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> 
>>     >> >> +    struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
>> 
>>     >> >> +    struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> 
>>     >> >> +    struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> 
>>     >> >> +                    = {{{0}, 0, 0} };
>> 
>>     >> >> +
>> 
>>     >> >> +    /* Allocate a reassembly table for VXLAN UDP GRO */
>> 
>>     >> >> +    struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
>> 
>>     >> >> +    struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
>> 
>>     >> >> +    struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
>> 
>>     >> >>                      = {{{0}, 0, 0} };
>> 
>>     >> >>
>> 
>>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> 
>>     >> >>      uint32_t item_num;
>> 
>>     >> >>      int32_t ret;
>> 
>>     >> >>      uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
>> 
>>     >> >> -    uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
>> 
>>     >> >> +    uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
>> 
>>     >> >> +            do_vxlan_udp_gro = 0;
>> 
>>     >> >>
>> 
>>     >> >>      if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_TCP_IPV4 |
>> 
>>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> 
>>     >> >>              return nb_pkts;
>> 
>>     >> >>
>> 
>>     >> >> @@ -160,15 +179,28 @@ struct gro_ctx {
>> 
>>     >> >>
>> 
>>     >> >>      if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
>> 
>>     >> >>              for (i = 0; i < item_num; i++)
>> 
>>     >> >> -                    vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
>> 
>>     >> >> -
>> 
>>     >> >> -            vxlan_tbl.flows = vxlan_flows;
>> 
>>     >> >> -            vxlan_tbl.items = vxlan_items;
>> 
>>     >> >> -            vxlan_tbl.flow_num = 0;
>> 
>>     >> >> -            vxlan_tbl.item_num = 0;
>> 
>>     >> >> -            vxlan_tbl.max_flow_num = item_num;
>> 
>>     >> >> -            vxlan_tbl.max_item_num = item_num;
>> 
>>     >> >> -            do_vxlan_gro = 1;
>> 
>>     >> >> +                    vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> 
>>     >> >> +
>> 
>>     >> >> +            vxlan_tcp_tbl.flows = vxlan_tcp_flows;
>> 
>>     >> >> +            vxlan_tcp_tbl.items = vxlan_tcp_items;
>> 
>>     >> >> +            vxlan_tcp_tbl.flow_num = 0;
>> 
>>     >> >> +            vxlan_tcp_tbl.item_num = 0;
>> 
>>     >> >> +            vxlan_tcp_tbl.max_flow_num = item_num;
>> 
>>     >> >> +            vxlan_tcp_tbl.max_item_num = item_num;
>> 
>>     >> >> +            do_vxlan_tcp_gro = 1;
>> 
>>     >> >> +    }
>> 
>>     >> >> +
>> 
>>     >> >> +    if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
>> 
>>     >> >> +            for (i = 0; i < item_num; i++)
>> 
>>     >> >> +                    vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
>> 
>>     >> >> +
>> 
>>     >> >> +            vxlan_udp_tbl.flows = vxlan_udp_flows;
>> 
>>     >> >> +            vxlan_udp_tbl.items = vxlan_udp_items;
>> 
>>     >> >> +            vxlan_udp_tbl.flow_num = 0;
>> 
>>     >> >> +            vxlan_udp_tbl.item_num = 0;
>> 
>>     >> >> +            vxlan_udp_tbl.max_flow_num = item_num;
>> 
>>     >> >> +            vxlan_udp_tbl.max_item_num = item_num;
>> 
>>     >> >> +            do_vxlan_udp_gro = 1;
>> 
>>     >> >>      }
>> 
>>     >> >>
>> 
>>     >> >>      if (param->gro_types & RTE_GRO_TCP_IPV4) {
>> 
>>     >> >> @@ -204,9 +236,18 @@ struct gro_ctx {
>> 
>>     >> >>               * will be flushed from the tables.
>> 
>>     >> >>               */
>> 
>>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> -                            do_vxlan_gro) {
>> 
>>     >> >> +                            do_vxlan_tcp_gro) {
>> 
>>     >> >>                      ret = gro_vxlan_tcp4_reassemble(pkts[i],
>> 
>>     >> >> -                                                    &vxlan_tbl, 0);
>> 
>>     >> >> +                                                    &vxlan_tcp_tbl, 0);
>> 
>>     >> >> +                    if (ret > 0)
>> 
>>     >> >> +                            /* Merge successfully */
>> 
>>     >> >> +                            nb_after_gro--;
>> 
>>     >> >> +                    else if (ret < 0)
>> 
>>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> 
>>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> +                            do_vxlan_udp_gro) {
>> 
>>     >> >> +                    ret = gro_vxlan_udp4_reassemble(pkts[i],
>> 
>>     >> >> +                                                    &vxlan_udp_tbl, 0);
>> 
>>     >> >>                      if (ret > 0)
>> 
>>     >> >>                              /* Merge successfully */
>> 
>>     >> >>                              nb_after_gro--;
>> 
>>     >> >> @@ -236,11 +277,17 @@ struct gro_ctx {
>> 
>>     >> >>               || (unprocess_num < nb_pkts)) {
>> 
>>     >> >>              i = 0;
>> 
>>     >> >>              /* Flush all packets from the tables */
>> 
>>     >> >> -            if (do_vxlan_gro) {
>> 
>>     >> >> -                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
>> 
>>     >> >> +            if (do_vxlan_tcp_gro) {
>> 
>>     >> >> +                    i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
>> 
>>     >> >>                                      0, pkts, nb_pkts);
>> 
>>     >> >>              }
>> 
>>     >> >>
>> 
>>     >> >> +            if (do_vxlan_udp_gro) {
>> 
>>     >> >> +                    i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
>> 
>>     >> >> +                                    0, &pkts[i], nb_pkts - i);
>> 
>>     >> >> +
>> 
>>     >> >> +            }
>> 
>>     >> >> +
>> 
>>     >> >>              if (do_tcp4_gro) {
>> 
>>     >> >>                      i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
>> 
>>     >> >>                                      &pkts[i], nb_pkts - i);
>> 
>>     >> >> @@ -269,33 +316,42 @@ struct gro_ctx {
>> 
>>     >> >>  {
>> 
>>     >> >>      struct rte_mbuf *unprocess_pkts[nb_pkts];
>> 
>>     >> >>      struct gro_ctx *gro_ctx = ctx;
>> 
>>     >> >> -    void *tcp_tbl, *udp_tbl, *vxlan_tbl;
>> 
>>     >> >> +    void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
>> 
>>     >> >>      uint64_t current_time;
>> 
>>     >> >>      uint16_t i, unprocess_num = 0;
>> 
>>     >> >> -    uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
>> 
>>     >> >> +    uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
>> 
>>     >> >>
>> 
>>     >> >>      if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_TCP_IPV4 |
>> 
>>     >> >> +                                    RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
>> 
>>     >> >>                                      RTE_GRO_UDP_IPV4)) == 0))
>> 
>>     >> >>              return nb_pkts;
>> 
>>     >> >>
>> 
>>     >> >>      tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
>> 
>>     >> >> -    vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> 
>>     >> >> +    vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
>> 
>>     >> >>      udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
>> 
>>     >> >> +    vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
>> 
>>     >> >>
>> 
>>     >> >>      do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
>> 
>>     >> >>              RTE_GRO_TCP_IPV4;
>> 
>>     >> >> -    do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> 
>>     >> >> +    do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
>> 
>>     >> >>              RTE_GRO_IPV4_VXLAN_TCP_IPV4;
>> 
>>     >> >>      do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
>> 
>>     >> >>              RTE_GRO_UDP_IPV4;
>> 
>>     >> >> +    do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
>> 
>>     >> >> +            RTE_GRO_IPV4_VXLAN_UDP_IPV4;
>> 
>>     >> >>
>> 
>>     >> >>      current_time = rte_rdtsc();
>> 
>>     >> >>
>> 
>>     >> >>      for (i = 0; i < nb_pkts; i++) {
>> 
>>     >> >>              if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> -                            do_vxlan_gro) {
>> 
>>     >> >> -                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
>> 
>>     >> >> +                            do_vxlan_tcp_gro) {
>> 
>>     >> >> +                    if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
>> 
>>     >> >> +                                            current_time) < 0)
>> 
>>     >> >> +                            unprocess_pkts[unprocess_num++] = pkts[i];
>> 
>>     >> >> +            } else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> +                            do_vxlan_udp_gro) {
>> 
>>     >> >> +                    if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
>> 
>>     >> >>                                              current_time) < 0)
>> 
>>     >> >>                              unprocess_pkts[unprocess_num++] = pkts[i];
>> 
>>     >> >>              } else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
>> 
>>     >> >> @@ -341,6 +397,13 @@ struct gro_ctx {
>> 
>>     >> >>              left_nb_out = max_nb_out - num;
>> 
>>     >> >>      }
>> 
>>     >> >>
>> 
>>     >> >> +    if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
>> 
>>     >> >> +            num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
>> 
>>     >> >> +                            RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
>> 
>>     >> >> +                            flush_timestamp, &out[num], left_nb_out);
>> 
>>     >> >> +            left_nb_out = max_nb_out - num;
>> 
>>     >> >
>> 
>>     >> >Don't need to calculate left_nb_out here. In the previous patch, you
>> 
>>     >> >also calculate it for UDP GRO. Please remove it both.
>> 
>>     >> 
>> 
>>     >> I think keeping it here isn't bad thing, if someone will add a new GRO type here, he/she won't make a mistake.
>> 
>>     > 
>> 
>>     >I don't think so. It's unnecessary for current design.
>> 
>>     >If others want to add new GRO types, they will write
>> 
>>     >correct code.
>> 
>>     > 
>> 
>>     >> 
>> 
>>     >> >
>> 
>>     >> 
>> 
>>     >> 
>> 
>>     >> 
>> 
>>     >>  
>> 
>>     >> 
>> 
>>      
>> 
>>      
>> 
>> 
>> 
>>  
>>
diff mbox series

Patch

diff --git a/lib/librte_gro/gro_udp4.h b/lib/librte_gro/gro_udp4.h
index 0a078e4..d38b393 100644
--- a/lib/librte_gro/gro_udp4.h
+++ b/lib/librte_gro/gro_udp4.h
@@ -7,6 +7,7 @@ 
 
 #include <rte_ip.h>
 #include <rte_udp.h>
+#include <rte_vxlan.h>
 
 #define INVALID_ARRAY_INDEX 0xffffffffUL
 #define GRO_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
diff --git a/lib/librte_gro/gro_vxlan_udp4.c b/lib/librte_gro/gro_vxlan_udp4.c
new file mode 100644
index 0000000..9e9df72
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_udp4.c
@@ -0,0 +1,548 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_udp.h>
+
+#include "gro_vxlan_udp4.h"
+
+void *
+gro_vxlan_udp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow)
+{
+	struct gro_vxlan_udp4_tbl *tbl;
+	size_t size;
+	uint32_t entries_num, i;
+
+	entries_num = max_flow_num * max_item_per_flow;
+	entries_num = RTE_MIN(entries_num, GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM);
+
+	if (entries_num == 0)
+		return NULL;
+
+	tbl = rte_zmalloc_socket(__func__,
+			sizeof(struct gro_vxlan_udp4_tbl),
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl == NULL)
+		return NULL;
+
+	size = sizeof(struct gro_vxlan_udp4_item) * entries_num;
+	tbl->items = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->items == NULL) {
+		rte_free(tbl);
+		return NULL;
+	}
+	tbl->max_item_num = entries_num;
+
+	size = sizeof(struct gro_vxlan_udp4_flow) * entries_num;
+	tbl->flows = rte_zmalloc_socket(__func__,
+			size,
+			RTE_CACHE_LINE_SIZE,
+			socket_id);
+	if (tbl->flows == NULL) {
+		rte_free(tbl->items);
+		rte_free(tbl);
+		return NULL;
+	}
+
+	for (i = 0; i < entries_num; i++)
+		tbl->flows[i].start_index = INVALID_ARRAY_INDEX;
+	tbl->max_flow_num = entries_num;
+
+	return tbl;
+}
+
+void
+gro_vxlan_udp4_tbl_destroy(void *tbl)
+{
+	struct gro_vxlan_udp4_tbl *vxlan_tbl = tbl;
+
+	if (vxlan_tbl) {
+		rte_free(vxlan_tbl->items);
+		rte_free(vxlan_tbl->flows);
+	}
+	rte_free(vxlan_tbl);
+}
+
+static inline uint32_t
+find_an_empty_item(struct gro_vxlan_udp4_tbl *tbl)
+{
+	uint32_t max_item_num = tbl->max_item_num, i;
+
+	for (i = 0; i < max_item_num; i++)
+		if (tbl->items[i].inner_item.firstseg == NULL)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+find_an_empty_flow(struct gro_vxlan_udp4_tbl *tbl)
+{
+	uint32_t max_flow_num = tbl->max_flow_num, i;
+
+	for (i = 0; i < max_flow_num; i++)
+		if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX)
+			return i;
+	return INVALID_ARRAY_INDEX;
+}
+
+static inline uint32_t
+insert_new_item(struct gro_vxlan_udp4_tbl *tbl,
+		struct rte_mbuf *pkt,
+		uint64_t start_time,
+		uint32_t prev_idx,
+		uint16_t frag_offset,
+		uint8_t is_last_frag,
+		uint16_t outer_ip_id,
+		uint8_t outer_is_atomic)
+{
+	uint32_t item_idx;
+
+	item_idx = find_an_empty_item(tbl);
+	if (unlikely(item_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	tbl->items[item_idx].inner_item.firstseg = pkt;
+	tbl->items[item_idx].inner_item.lastseg = rte_pktmbuf_lastseg(pkt);
+	tbl->items[item_idx].inner_item.start_time = start_time;
+	tbl->items[item_idx].inner_item.next_pkt_idx = INVALID_ARRAY_INDEX;
+	tbl->items[item_idx].inner_item.frag_offset = frag_offset;
+	tbl->items[item_idx].inner_item.is_last_frag = is_last_frag;
+	tbl->items[item_idx].inner_item.nb_merged = 1;
+	tbl->items[item_idx].outer_ip_id = outer_ip_id;
+	tbl->items[item_idx].outer_is_atomic = outer_is_atomic;
+	tbl->item_num++;
+
+	/* If the previous packet exists, chain the new one with it. */
+	if (prev_idx != INVALID_ARRAY_INDEX) {
+		tbl->items[item_idx].inner_item.next_pkt_idx =
+			tbl->items[prev_idx].inner_item.next_pkt_idx;
+		tbl->items[prev_idx].inner_item.next_pkt_idx = item_idx;
+	}
+
+	return item_idx;
+}
+
+static inline uint32_t
+delete_item(struct gro_vxlan_udp4_tbl *tbl,
+		uint32_t item_idx,
+		uint32_t prev_item_idx)
+{
+	uint32_t next_idx = tbl->items[item_idx].inner_item.next_pkt_idx;
+
+	/* NULL indicates an empty item. */
+	tbl->items[item_idx].inner_item.firstseg = NULL;
+	tbl->item_num--;
+	if (prev_item_idx != INVALID_ARRAY_INDEX)
+		tbl->items[prev_item_idx].inner_item.next_pkt_idx = next_idx;
+
+	return next_idx;
+}
+
+static inline uint32_t
+insert_new_flow(struct gro_vxlan_udp4_tbl *tbl,
+		struct vxlan_udp4_flow_key *src,
+		uint32_t item_idx)
+{
+	struct vxlan_udp4_flow_key *dst;
+	uint32_t flow_idx;
+
+	flow_idx = find_an_empty_flow(tbl);
+	if (unlikely(flow_idx == INVALID_ARRAY_INDEX))
+		return INVALID_ARRAY_INDEX;
+
+	dst = &(tbl->flows[flow_idx].key);
+
+	rte_ether_addr_copy(&(src->inner_key.eth_saddr),
+			&(dst->inner_key.eth_saddr));
+	rte_ether_addr_copy(&(src->inner_key.eth_daddr),
+			&(dst->inner_key.eth_daddr));
+	dst->inner_key.ip_src_addr = src->inner_key.ip_src_addr;
+	dst->inner_key.ip_dst_addr = src->inner_key.ip_dst_addr;
+	dst->inner_key.ip_id = src->inner_key.ip_id;
+
+	dst->vxlan_hdr.vx_flags = src->vxlan_hdr.vx_flags;
+	dst->vxlan_hdr.vx_vni = src->vxlan_hdr.vx_vni;
+	rte_ether_addr_copy(&(src->outer_eth_saddr), &(dst->outer_eth_saddr));
+	rte_ether_addr_copy(&(src->outer_eth_daddr), &(dst->outer_eth_daddr));
+	dst->outer_ip_src_addr = src->outer_ip_src_addr;
+	dst->outer_ip_dst_addr = src->outer_ip_dst_addr;
+	dst->outer_src_port = src->outer_src_port;
+	dst->outer_dst_port = src->outer_dst_port;
+
+	tbl->flows[flow_idx].start_index = item_idx;
+	tbl->flow_num++;
+
+	return flow_idx;
+}
+
+static inline int
+is_same_vxlan_udp4_flow(struct vxlan_udp4_flow_key k1,
+		struct vxlan_udp4_flow_key k2)
+{
+	/* src port is changing, so shouldn't use it for flow check */
+	return (rte_is_same_ether_addr(&k1.outer_eth_saddr,
+					&k2.outer_eth_saddr) &&
+			rte_is_same_ether_addr(&k1.outer_eth_daddr,
+				&k2.outer_eth_daddr) &&
+			(k1.outer_ip_src_addr == k2.outer_ip_src_addr) &&
+			(k1.outer_ip_dst_addr == k2.outer_ip_dst_addr) &&
+			(k1.outer_dst_port == k2.outer_dst_port) &&
+			(k1.vxlan_hdr.vx_flags == k2.vxlan_hdr.vx_flags) &&
+			(k1.vxlan_hdr.vx_vni == k2.vxlan_hdr.vx_vni) &&
+			is_same_udp4_flow(k1.inner_key, k2.inner_key));
+}
+
+static inline int
+udp_check_vxlan_neighbor(struct gro_vxlan_udp4_item *item,
+		uint16_t frag_offset,
+		uint16_t ip_dl)
+{
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	int cmp;
+	uint16_t l2_offset;
+	int ret = 0;
+
+	l2_offset = pkt->outer_l2_len + pkt->outer_l3_len;
+	cmp = udp_check_neighbor(&item->inner_item, frag_offset, ip_dl,
+					l2_offset);
+	/* VXLAN outer IP ID is out of order, so don't touch it and
+	 * don't compare it.
+	 */
+	if (cmp > 0)
+		/* Append the new packet. */
+		ret = 1;
+	else if (cmp < 0)
+		/* Prepend the new packet. */
+		ret = -1;
+
+	return ret;
+}
+
+static inline int
+merge_two_vxlan_udp4_packets(struct gro_vxlan_udp4_item *item,
+		struct rte_mbuf *pkt,
+		int cmp,
+		uint16_t frag_offset,
+		uint8_t is_last_frag)
+{
+	if (merge_two_udp4_packets(&item->inner_item, pkt, cmp, frag_offset,
+				is_last_frag,
+				pkt->outer_l2_len + pkt->outer_l3_len)) {
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void
+update_vxlan_header(struct gro_vxlan_udp4_item *item)
+{
+	struct rte_ipv4_hdr *ipv4_hdr;
+	struct rte_udp_hdr *udp_hdr;
+	struct rte_mbuf *pkt = item->inner_item.firstseg;
+	uint16_t len;
+	uint16_t frag_offset;
+
+	/* Update the outer IPv4 header. */
+	len = pkt->pkt_len - pkt->outer_l2_len;
+	ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+			pkt->outer_l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Update the outer UDP header. */
+	len -= pkt->outer_l3_len;
+	udp_hdr = (struct rte_udp_hdr *)((char *)ipv4_hdr + pkt->outer_l3_len);
+	udp_hdr->dgram_len = rte_cpu_to_be_16(len);
+
+	/* Update the inner IPv4 header. */
+	len -= pkt->l2_len;
+	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+	ipv4_hdr->total_length = rte_cpu_to_be_16(len);
+
+	/* Clear MF bit if it is last fragment */
+	if (item->inner_item.is_last_frag) {
+		frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+		ipv4_hdr->fragment_offset =
+			rte_cpu_to_be_16(frag_offset & ~RTE_IPV4_HDR_MF_FLAG);
+	}
+}
+
+int32_t
+gro_vxlan_udp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t start_time)
+{
+	struct rte_ether_hdr *outer_eth_hdr, *eth_hdr;
+	struct rte_ipv4_hdr *outer_ipv4_hdr, *ipv4_hdr;
+	struct rte_udp_hdr *udp_hdr;
+	struct rte_vxlan_hdr *vxlan_hdr;
+	uint16_t frag_offset;
+	uint8_t is_last_frag;
+	int16_t ip_dl;
+	uint16_t outer_ip_id, ip_id;
+	uint8_t outer_is_atomic;
+
+	struct vxlan_udp4_flow_key key;
+	uint32_t cur_idx, prev_idx, item_idx;
+	uint32_t i, max_flow_num, remaining_flow_num;
+	int cmp;
+	uint16_t hdr_len;
+	uint8_t find;
+
+	outer_eth_hdr = rte_pktmbuf_mtod(pkt, struct rte_ether_hdr *);
+	outer_ipv4_hdr = (struct rte_ipv4_hdr *)((char *)outer_eth_hdr +
+			pkt->outer_l2_len);
+
+	udp_hdr = (struct rte_udp_hdr *)((char *)outer_ipv4_hdr +
+			pkt->outer_l3_len);
+	vxlan_hdr = (struct rte_vxlan_hdr *)((char *)udp_hdr +
+			sizeof(struct rte_udp_hdr));
+	eth_hdr = (struct rte_ether_hdr *)((char *)vxlan_hdr +
+			sizeof(struct rte_vxlan_hdr));
+	/* l2_len = outer udp hdr len + vxlan hdr len + inner l2 len */
+	ipv4_hdr = (struct rte_ipv4_hdr *)((char *)udp_hdr + pkt->l2_len);
+
+	/*
+	 * Don't process the packet which has non-fragment inner IP.
+	 */
+	if (!is_ipv4_fragment(ipv4_hdr))
+		return -1;
+
+	hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+		pkt->l3_len;
+	/*
+	 * Don't process the packet whose payload length is less than or
+	 * equal to 0.
+	 */
+	ip_dl = pkt->pkt_len - hdr_len;
+	if (ip_dl <= 0)
+		return -1;
+
+	/*
+	 * Save IPv4 ID for the packet whose DF bit is 0. For the packet
+	 * whose DF bit is 1, IPv4 ID is ignored.
+	 */
+	frag_offset = rte_be_to_cpu_16(outer_ipv4_hdr->fragment_offset);
+	outer_is_atomic =
+		((frag_offset & RTE_IPV4_HDR_DF_FLAG) == RTE_IPV4_HDR_DF_FLAG);
+	outer_ip_id = outer_is_atomic ? 0 :
+		rte_be_to_cpu_16(outer_ipv4_hdr->packet_id);
+	ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+	frag_offset = rte_be_to_cpu_16(ipv4_hdr->fragment_offset);
+	is_last_frag = ((frag_offset & RTE_IPV4_HDR_MF_FLAG) == 0) ? 1 : 0;
+	frag_offset = (uint16_t)(frag_offset & RTE_IPV4_HDR_OFFSET_MASK) << 3;
+
+	rte_ether_addr_copy(&(eth_hdr->s_addr), &(key.inner_key.eth_saddr));
+	rte_ether_addr_copy(&(eth_hdr->d_addr), &(key.inner_key.eth_daddr));
+	key.inner_key.ip_src_addr = ipv4_hdr->src_addr;
+	key.inner_key.ip_dst_addr = ipv4_hdr->dst_addr;
+	key.inner_key.ip_id = ip_id;
+
+	key.vxlan_hdr.vx_flags = vxlan_hdr->vx_flags;
+	key.vxlan_hdr.vx_vni = vxlan_hdr->vx_vni;
+	rte_ether_addr_copy(&(outer_eth_hdr->s_addr), &(key.outer_eth_saddr));
+	rte_ether_addr_copy(&(outer_eth_hdr->d_addr), &(key.outer_eth_daddr));
+	key.outer_ip_src_addr = outer_ipv4_hdr->src_addr;
+	key.outer_ip_dst_addr = outer_ipv4_hdr->dst_addr;
+	key.outer_src_port = udp_hdr->src_port;
+	key.outer_dst_port = udp_hdr->dst_port;
+
+	/* Search for a matched flow. */
+	max_flow_num = tbl->max_flow_num;
+	remaining_flow_num = tbl->flow_num;
+	find = 0;
+	for (i = 0; i < max_flow_num && remaining_flow_num; i++) {
+		if (tbl->flows[i].start_index != INVALID_ARRAY_INDEX) {
+			if (is_same_vxlan_udp4_flow(tbl->flows[i].key, key)) {
+				find = 1;
+				break;
+			}
+			remaining_flow_num--;
+		}
+	}
+
+	/*
+	 * Can't find a matched flow. Insert a new flow and store the
+	 * packet into the flow.
+	 */
+	if (find == 0) {
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, frag_offset,
+				is_last_frag, outer_ip_id, outer_is_atomic);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		if (insert_new_flow(tbl, &key, item_idx) ==
+				INVALID_ARRAY_INDEX) {
+			/*
+			 * Fail to insert a new flow, so
+			 * delete the inserted packet.
+			 */
+			delete_item(tbl, item_idx, INVALID_ARRAY_INDEX);
+			return -1;
+		}
+		return 0;
+	}
+
+	/* Check all packets in the flow and try to find a neighbor. */
+	cur_idx = tbl->flows[i].start_index;
+	prev_idx = cur_idx;
+	do {
+		cmp = udp_check_vxlan_neighbor(&(tbl->items[cur_idx]),
+				frag_offset, ip_dl);
+		if (cmp) {
+			if (merge_two_vxlan_udp4_packets(
+						&(tbl->items[cur_idx]),
+						pkt, cmp, frag_offset,
+						is_last_frag)) {
+				return 1;
+			}
+			/*
+			 * Can't merge two packets, as the packet
+			 * length will be greater than the max value.
+			 * Insert the packet into the flow.
+			 */
+			if (insert_new_item(tbl, pkt, start_time, prev_idx,
+						frag_offset, is_last_frag,
+						outer_ip_id,
+						outer_is_atomic) ==
+					INVALID_ARRAY_INDEX)
+				return -1;
+			return 0;
+		}
+
+		/* Ensure inserted items are ordered by frag_offset */
+		if (frag_offset
+			< tbl->items[cur_idx].inner_item.frag_offset) {
+			break;
+		}
+
+		prev_idx = cur_idx;
+		cur_idx = tbl->items[cur_idx].inner_item.next_pkt_idx;
+	} while (cur_idx != INVALID_ARRAY_INDEX);
+
+	/* Can't find neighbor. Insert the packet into the flow. */
+	if (cur_idx == tbl->flows[i].start_index) {
+		/* Insert it before the first packet of the flow */
+		item_idx = insert_new_item(tbl, pkt, start_time,
+				INVALID_ARRAY_INDEX, frag_offset,
+				is_last_frag, outer_ip_id, outer_is_atomic);
+		if (item_idx == INVALID_ARRAY_INDEX)
+			return -1;
+		tbl->items[item_idx].inner_item.next_pkt_idx = cur_idx;
+		tbl->flows[i].start_index = item_idx;
+	} else {
+		if (insert_new_item(tbl, pkt, start_time, prev_idx,
+				frag_offset, is_last_frag, outer_ip_id,
+				outer_is_atomic) == INVALID_ARRAY_INDEX)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int
+gro_vxlan_udp4_merge_items(struct gro_vxlan_udp4_tbl *tbl,
+			   uint32_t start_idx)
+{
+	uint16_t frag_offset;
+	uint8_t is_last_frag;
+	int16_t ip_dl;
+	struct rte_mbuf *pkt;
+	int cmp;
+	uint32_t item_idx;
+	uint16_t hdr_len;
+
+	item_idx = tbl->items[start_idx].inner_item.next_pkt_idx;
+	while (item_idx != INVALID_ARRAY_INDEX) {
+		pkt = tbl->items[item_idx].inner_item.firstseg;
+		hdr_len = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len +
+			pkt->l3_len;
+		ip_dl = pkt->pkt_len - hdr_len;
+		frag_offset = tbl->items[item_idx].inner_item.frag_offset;
+		is_last_frag = tbl->items[item_idx].inner_item.is_last_frag;
+		cmp = udp_check_vxlan_neighbor(&(tbl->items[start_idx]),
+					frag_offset, ip_dl);
+		if (cmp) {
+			if (merge_two_vxlan_udp4_packets(
+					&(tbl->items[start_idx]),
+					pkt, cmp, frag_offset,
+					is_last_frag)) {
+				item_idx = delete_item(tbl, item_idx,
+							INVALID_ARRAY_INDEX);
+				tbl->items[start_idx].inner_item.next_pkt_idx
+					= item_idx;
+			} else {
+				return 0;
+			}
+		} else {
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
+uint16_t
+gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out)
+{
+	uint16_t k = 0;
+	uint32_t i, j;
+	uint32_t max_flow_num = tbl->max_flow_num;
+
+	for (i = 0; i < max_flow_num; i++) {
+		if (unlikely(tbl->flow_num == 0))
+			return k;
+
+		j = tbl->flows[i].start_index;
+		while (j != INVALID_ARRAY_INDEX) {
+			if (tbl->items[j].inner_item.start_time <=
+					flush_timestamp) {
+				gro_vxlan_udp4_merge_items(tbl, j);
+				out[k++] = tbl->items[j].inner_item.firstseg;
+				if (tbl->items[j].inner_item.nb_merged > 1)
+					update_vxlan_header(&(tbl->items[j]));
+				/*
+				 * Delete the item and get the next packet
+				 * index.
+				 */
+				j = delete_item(tbl, j, INVALID_ARRAY_INDEX);
+				tbl->flows[i].start_index = j;
+				if (j == INVALID_ARRAY_INDEX)
+					tbl->flow_num--;
+
+				if (unlikely(k == nb_out))
+					return k;
+			} else
+				/*
+				 * The left packets in the flow won't be
+				 * timeout. Go to check other flows.
+				 */
+				break;
+		}
+	}
+	return k;
+}
+
+uint32_t
+gro_vxlan_udp4_tbl_pkt_count(void *tbl)
+{
+	struct gro_vxlan_udp4_tbl *gro_tbl = tbl;
+
+	if (gro_tbl)
+		return gro_tbl->item_num;
+
+	return 0;
+}
diff --git a/lib/librte_gro/gro_vxlan_udp4.h b/lib/librte_gro/gro_vxlan_udp4.h
new file mode 100644
index 0000000..a16e327
--- /dev/null
+++ b/lib/librte_gro/gro_vxlan_udp4.h
@@ -0,0 +1,152 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#ifndef _GRO_VXLAN_UDP4_H_
+#define _GRO_VXLAN_UDP4_H_
+
+#include "gro_udp4.h"
+
+#define GRO_VXLAN_UDP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL)
+
+/* Header fields representing a VxLAN flow */
+struct vxlan_udp4_flow_key {
+	struct udp4_flow_key inner_key;
+	struct rte_vxlan_hdr vxlan_hdr;
+
+	struct rte_ether_addr outer_eth_saddr;
+	struct rte_ether_addr outer_eth_daddr;
+
+	uint32_t outer_ip_src_addr;
+	uint32_t outer_ip_dst_addr;
+
+	/* Outer UDP ports */
+	uint16_t outer_src_port;
+	uint16_t outer_dst_port;
+
+};
+
+struct gro_vxlan_udp4_flow {
+	struct vxlan_udp4_flow_key key;
+	/*
+	 * The index of the first packet in the flow. INVALID_ARRAY_INDEX
+	 * indicates an empty flow.
+	 */
+	uint32_t start_index;
+};
+
+struct gro_vxlan_udp4_item {
+	struct gro_udp4_item inner_item;
+	/* IPv4 ID in the outer IPv4 header */
+	uint16_t outer_ip_id;
+	/* Indicate if outer IPv4 ID can be ignored */
+	uint8_t outer_is_atomic;
+};
+
+/*
+ * VxLAN (with an outer IPv4 header and an inner UDP/IPv4 packet)
+ * reassembly table structure
+ */
+struct gro_vxlan_udp4_tbl {
+	/* item array */
+	struct gro_vxlan_udp4_item *items;
+	/* flow array */
+	struct gro_vxlan_udp4_flow *flows;
+	/* current item number */
+	uint32_t item_num;
+	/* current flow number */
+	uint32_t flow_num;
+	/* the maximum item number */
+	uint32_t max_item_num;
+	/* the maximum flow number */
+	uint32_t max_flow_num;
+};
+
+/**
+ * This function creates a VxLAN reassembly table for VxLAN packets
+ * which have an outer IPv4 header and an inner UDP/IPv4 packet.
+ *
+ * @param socket_id
+ *  Socket index for allocating the table
+ * @param max_flow_num
+ *  The maximum number of flows in the table
+ * @param max_item_per_flow
+ *  The maximum number of packets per flow
+ *
+ * @return
+ *  - Return the table pointer on success.
+ *  - Return NULL on failure.
+ */
+void *gro_vxlan_udp4_tbl_create(uint16_t socket_id,
+		uint16_t max_flow_num,
+		uint16_t max_item_per_flow);
+
+/**
+ * This function destroys a VxLAN reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ */
+void gro_vxlan_udp4_tbl_destroy(void *tbl);
+
+/**
+ * This function merges a VxLAN packet which has an outer IPv4 header and
+ * an inner UDP/IPv4 packet. It does not process the packet which does not
+ * have payload.
+ *
+ * This function does not check if the packet has correct checksums and
+ * does not re-calculate checksums for the merged packet. It returns the
+ * packet if there is no available space in the table.
+ *
+ * @param pkt
+ *  Packet to reassemble
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ * @start_time
+ *  The time when the packet is inserted into the table
+ *
+ * @return
+ *  - Return a positive value if the packet is merged.
+ *  - Return zero if the packet isn't merged but stored in the table.
+ *  - Return a negative value for invalid parameters or no available
+ *    space in the table.
+ */
+int32_t gro_vxlan_udp4_reassemble(struct rte_mbuf *pkt,
+		struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t start_time);
+
+/**
+ * This function flushes timeout packets in the VxLAN reassembly table,
+ * and without updating checksums.
+ *
+ * @param tbl
+ *  Pointer pointing to a VxLAN GRO table
+ * @param flush_timestamp
+ *  This function flushes packets which are inserted into the table
+ *  before or at the flush_timestamp.
+ * @param out
+ *  Pointer array used to keep flushed packets
+ * @param nb_out
+ *  The element number in 'out'. It also determines the maximum number of
+ *  packets that can be flushed finally.
+ *
+ * @return
+ *  The number of flushed packets
+ */
+uint16_t gro_vxlan_udp4_tbl_timeout_flush(struct gro_vxlan_udp4_tbl *tbl,
+		uint64_t flush_timestamp,
+		struct rte_mbuf **out,
+		uint16_t nb_out);
+
+/**
+ * This function returns the number of the packets in a VxLAN
+ * reassembly table.
+ *
+ * @param tbl
+ *  Pointer pointing to the VxLAN reassembly table
+ *
+ * @return
+ *  The number of packets in the table
+ */
+uint32_t gro_vxlan_udp4_tbl_pkt_count(void *tbl);
+#endif
diff --git a/lib/librte_gro/meson.build b/lib/librte_gro/meson.build
index 0d18dc2..ea8b45c 100644
--- a/lib/librte_gro/meson.build
+++ b/lib/librte_gro/meson.build
@@ -1,6 +1,6 @@ 
 # SPDX-License-Identifier: BSD-3-Clause
 # Copyright(c) 2017 Intel Corporation
 
-sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_udp4.c', 'gro_vxlan_tcp4.c')
+sources = files('rte_gro.c', 'gro_tcp4.c', 'gro_udp4.c', 'gro_vxlan_tcp4.c', 'gro_vxlan_udp4.c')
 headers = files('rte_gro.h')
 deps += ['ethdev']
diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c
index d094129..6295fbb 100644
--- a/lib/librte_gro/rte_gro.c
+++ b/lib/librte_gro/rte_gro.c
@@ -11,6 +11,7 @@ 
 #include "gro_tcp4.h"
 #include "gro_udp4.h"
 #include "gro_vxlan_tcp4.h"
+#include "gro_vxlan_udp4.h"
 
 typedef void *(*gro_tbl_create_fn)(uint16_t socket_id,
 		uint16_t max_flow_num,
@@ -20,14 +21,14 @@ 
 
 static gro_tbl_create_fn tbl_create_fn[RTE_GRO_TYPE_MAX_NUM] = {
 		gro_tcp4_tbl_create, gro_vxlan_tcp4_tbl_create,
-		gro_udp4_tbl_create, NULL};
+		gro_udp4_tbl_create, gro_vxlan_udp4_tbl_create, NULL};
 static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_destroy, gro_vxlan_tcp4_tbl_destroy,
-			gro_udp4_tbl_destroy,
+			gro_udp4_tbl_destroy, gro_vxlan_udp4_tbl_destroy,
 			NULL};
 static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = {
 			gro_tcp4_tbl_pkt_count, gro_vxlan_tcp4_tbl_pkt_count,
-			gro_udp4_tbl_pkt_count,
+			gro_udp4_tbl_pkt_count, gro_vxlan_udp4_tbl_pkt_count,
 			NULL};
 
 #define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
@@ -47,6 +48,16 @@ 
 		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
 		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
 
+#define IS_IPV4_VXLAN_UDP4_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \
+		((ptype & RTE_PTYPE_L4_UDP) == RTE_PTYPE_L4_UDP) && \
+		((ptype & RTE_PTYPE_TUNNEL_VXLAN) == \
+		 RTE_PTYPE_TUNNEL_VXLAN) && \
+		 ((ptype & RTE_PTYPE_INNER_L4_UDP) == \
+		  RTE_PTYPE_INNER_L4_UDP) && \
+		  (((ptype & RTE_PTYPE_INNER_L3_MASK) & \
+		    (RTE_PTYPE_INNER_L3_IPV4 | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT | \
+		     RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN)) != 0))
 
 /*
  * GRO context structure. It keeps the table structures, which are
@@ -137,19 +148,27 @@  struct gro_ctx {
 	struct gro_udp4_item udp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} };
 
 	/* Allocate a reassembly table for VXLAN TCP GRO */
-	struct gro_vxlan_tcp4_tbl vxlan_tbl;
-	struct gro_vxlan_tcp4_flow vxlan_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
-	struct gro_vxlan_tcp4_item vxlan_items[RTE_GRO_MAX_BURST_ITEM_NUM]
+	struct gro_vxlan_tcp4_tbl vxlan_tcp_tbl;
+	struct gro_vxlan_tcp4_flow vxlan_tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_tcp4_item vxlan_tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
+			= {{{0}, 0, 0} };
+
+	/* Allocate a reassembly table for VXLAN UDP GRO */
+	struct gro_vxlan_udp4_tbl vxlan_udp_tbl;
+	struct gro_vxlan_udp4_flow vxlan_udp_flows[RTE_GRO_MAX_BURST_ITEM_NUM];
+	struct gro_vxlan_udp4_item vxlan_udp_items[RTE_GRO_MAX_BURST_ITEM_NUM]
 			= {{{0}, 0, 0} };
 
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	uint32_t item_num;
 	int32_t ret;
 	uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts;
-	uint8_t do_tcp4_gro = 0, do_vxlan_gro = 0, do_udp4_gro = 0;
+	uint8_t do_tcp4_gro = 0, do_vxlan_tcp_gro = 0, do_udp4_gro = 0,
+		do_vxlan_udp_gro = 0;
 
 	if (unlikely((param->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
 					RTE_GRO_TCP_IPV4 |
+					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
 					RTE_GRO_UDP_IPV4)) == 0))
 		return nb_pkts;
 
@@ -160,15 +179,28 @@  struct gro_ctx {
 
 	if (param->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) {
 		for (i = 0; i < item_num; i++)
-			vxlan_flows[i].start_index = INVALID_ARRAY_INDEX;
-
-		vxlan_tbl.flows = vxlan_flows;
-		vxlan_tbl.items = vxlan_items;
-		vxlan_tbl.flow_num = 0;
-		vxlan_tbl.item_num = 0;
-		vxlan_tbl.max_flow_num = item_num;
-		vxlan_tbl.max_item_num = item_num;
-		do_vxlan_gro = 1;
+			vxlan_tcp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_tcp_tbl.flows = vxlan_tcp_flows;
+		vxlan_tcp_tbl.items = vxlan_tcp_items;
+		vxlan_tcp_tbl.flow_num = 0;
+		vxlan_tcp_tbl.item_num = 0;
+		vxlan_tcp_tbl.max_flow_num = item_num;
+		vxlan_tcp_tbl.max_item_num = item_num;
+		do_vxlan_tcp_gro = 1;
+	}
+
+	if (param->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) {
+		for (i = 0; i < item_num; i++)
+			vxlan_udp_flows[i].start_index = INVALID_ARRAY_INDEX;
+
+		vxlan_udp_tbl.flows = vxlan_udp_flows;
+		vxlan_udp_tbl.items = vxlan_udp_items;
+		vxlan_udp_tbl.flow_num = 0;
+		vxlan_udp_tbl.item_num = 0;
+		vxlan_udp_tbl.max_flow_num = item_num;
+		vxlan_udp_tbl.max_item_num = item_num;
+		do_vxlan_udp_gro = 1;
 	}
 
 	if (param->gro_types & RTE_GRO_TCP_IPV4) {
@@ -204,9 +236,18 @@  struct gro_ctx {
 		 * will be flushed from the tables.
 		 */
 		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
-				do_vxlan_gro) {
+				do_vxlan_tcp_gro) {
 			ret = gro_vxlan_tcp4_reassemble(pkts[i],
-							&vxlan_tbl, 0);
+							&vxlan_tcp_tbl, 0);
+			if (ret > 0)
+				/* Merge successfully */
+				nb_after_gro--;
+			else if (ret < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_udp_gro) {
+			ret = gro_vxlan_udp4_reassemble(pkts[i],
+							&vxlan_udp_tbl, 0);
 			if (ret > 0)
 				/* Merge successfully */
 				nb_after_gro--;
@@ -236,11 +277,17 @@  struct gro_ctx {
 		 || (unprocess_num < nb_pkts)) {
 		i = 0;
 		/* Flush all packets from the tables */
-		if (do_vxlan_gro) {
-			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tbl,
+		if (do_vxlan_tcp_gro) {
+			i = gro_vxlan_tcp4_tbl_timeout_flush(&vxlan_tcp_tbl,
 					0, pkts, nb_pkts);
 		}
 
+		if (do_vxlan_udp_gro) {
+			i += gro_vxlan_udp4_tbl_timeout_flush(&vxlan_udp_tbl,
+					0, &pkts[i], nb_pkts - i);
+
+		}
+
 		if (do_tcp4_gro) {
 			i += gro_tcp4_tbl_timeout_flush(&tcp_tbl, 0,
 					&pkts[i], nb_pkts - i);
@@ -269,33 +316,42 @@  struct gro_ctx {
 {
 	struct rte_mbuf *unprocess_pkts[nb_pkts];
 	struct gro_ctx *gro_ctx = ctx;
-	void *tcp_tbl, *udp_tbl, *vxlan_tbl;
+	void *tcp_tbl, *udp_tbl, *vxlan_tcp_tbl, *vxlan_udp_tbl;
 	uint64_t current_time;
 	uint16_t i, unprocess_num = 0;
-	uint8_t do_tcp4_gro, do_vxlan_gro, do_udp4_gro;
+	uint8_t do_tcp4_gro, do_vxlan_tcp_gro, do_udp4_gro, do_vxlan_udp_gro;
 
 	if (unlikely((gro_ctx->gro_types & (RTE_GRO_IPV4_VXLAN_TCP_IPV4 |
 					RTE_GRO_TCP_IPV4 |
+					RTE_GRO_IPV4_VXLAN_UDP_IPV4 |
 					RTE_GRO_UDP_IPV4)) == 0))
 		return nb_pkts;
 
 	tcp_tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX];
-	vxlan_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
+	vxlan_tcp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_TCP_IPV4_INDEX];
 	udp_tbl = gro_ctx->tbls[RTE_GRO_UDP_IPV4_INDEX];
+	vxlan_udp_tbl = gro_ctx->tbls[RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX];
 
 	do_tcp4_gro = (gro_ctx->gro_types & RTE_GRO_TCP_IPV4) ==
 		RTE_GRO_TCP_IPV4;
-	do_vxlan_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
+	do_vxlan_tcp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_TCP_IPV4) ==
 		RTE_GRO_IPV4_VXLAN_TCP_IPV4;
 	do_udp4_gro = (gro_ctx->gro_types & RTE_GRO_UDP_IPV4) ==
 		RTE_GRO_UDP_IPV4;
+	do_vxlan_udp_gro = (gro_ctx->gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) ==
+		RTE_GRO_IPV4_VXLAN_UDP_IPV4;
 
 	current_time = rte_rdtsc();
 
 	for (i = 0; i < nb_pkts; i++) {
 		if (IS_IPV4_VXLAN_TCP4_PKT(pkts[i]->packet_type) &&
-				do_vxlan_gro) {
-			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tbl,
+				do_vxlan_tcp_gro) {
+			if (gro_vxlan_tcp4_reassemble(pkts[i], vxlan_tcp_tbl,
+						current_time) < 0)
+				unprocess_pkts[unprocess_num++] = pkts[i];
+		} else if (IS_IPV4_VXLAN_UDP4_PKT(pkts[i]->packet_type) &&
+				do_vxlan_udp_gro) {
+			if (gro_vxlan_udp4_reassemble(pkts[i], vxlan_udp_tbl,
 						current_time) < 0)
 				unprocess_pkts[unprocess_num++] = pkts[i];
 		} else if (IS_IPV4_TCP_PKT(pkts[i]->packet_type) &&
@@ -341,6 +397,13 @@  struct gro_ctx {
 		left_nb_out = max_nb_out - num;
 	}
 
+	if ((gro_types & RTE_GRO_IPV4_VXLAN_UDP_IPV4) && max_nb_out > 0) {
+		num += gro_vxlan_udp4_tbl_timeout_flush(gro_ctx->tbls[
+				RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX],
+				flush_timestamp, &out[num], left_nb_out);
+		left_nb_out = max_nb_out - num;
+	}
+
 	/* If no available space in 'out', stop flushing. */
 	if ((gro_types & RTE_GRO_TCP_IPV4) && max_nb_out > 0) {
 		num += gro_tcp4_tbl_timeout_flush(
diff --git a/lib/librte_gro/rte_gro.h b/lib/librte_gro/rte_gro.h
index 470f3ed..9f9ed49 100644
--- a/lib/librte_gro/rte_gro.h
+++ b/lib/librte_gro/rte_gro.h
@@ -35,6 +35,9 @@ 
 #define RTE_GRO_UDP_IPV4_INDEX 2
 #define RTE_GRO_UDP_IPV4 (1ULL << RTE_GRO_UDP_IPV4_INDEX)
 /**< UDP/IPv4 GRO flag */
+#define RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX 3
+#define RTE_GRO_IPV4_VXLAN_UDP_IPV4 (1ULL << RTE_GRO_IPV4_VXLAN_UDP_IPV4_INDEX)
+/**< VxLAN UDP/IPv4 GRO flag. */
 
 /**
  * Structure used to create GRO context objects or used to pass