gso: add VXLAN UDP GSO support
Checks
Commit Message
From: Yi Yang <yangyi01@inspur.com>
Many NICs can't offload VXLAN UFO, so it is very important
to do VXLAN UDP GSO by software to improve VM-to-VM UDP
performance, especially for the case that VM MTU is just
1500 but not 9000.
With this enabled in DPDK, OVS DPDK can leverage it to
improve VM-to-VM UDP performance, performance gain is very
huge, over 2 times.
Signed-off-by: Yi Yang <yangyi01@inspur.com>
---
lib/librte_gso/Makefile | 1 +
lib/librte_gso/gso_common.h | 5 ++
lib/librte_gso/gso_tunnel_udp4.c | 108 +++++++++++++++++++++++++++++++++++++++
lib/librte_gso/gso_tunnel_udp4.h | 43 ++++++++++++++++
lib/librte_gso/meson.build | 2 +-
lib/librte_gso/rte_gso.c | 8 +++
6 files changed, 166 insertions(+), 1 deletion(-)
create mode 100644 lib/librte_gso/gso_tunnel_udp4.c
create mode 100644 lib/librte_gso/gso_tunnel_udp4.h
Comments
Jiayu, also please help review this GSO patch, thanks a lot.
At 2020-07-01 14:46:43, yang_y_yi@163.com wrote:
>From: Yi Yang <yangyi01@inspur.com>
>
>Many NICs can't offload VXLAN UFO, so it is very important
>to do VXLAN UDP GSO by software to improve VM-to-VM UDP
>performance, especially for the case that VM MTU is just
>1500 but not 9000.
>
>With this enabled in DPDK, OVS DPDK can leverage it to
>improve VM-to-VM UDP performance, performance gain is very
>huge, over 2 times.
>
>Signed-off-by: Yi Yang <yangyi01@inspur.com>
>---
> lib/librte_gso/Makefile | 1 +
> lib/librte_gso/gso_common.h | 5 ++
> lib/librte_gso/gso_tunnel_udp4.c | 108 +++++++++++++++++++++++++++++++++++++++
> lib/librte_gso/gso_tunnel_udp4.h | 43 ++++++++++++++++
> lib/librte_gso/meson.build | 2 +-
> lib/librte_gso/rte_gso.c | 8 +++
> 6 files changed, 166 insertions(+), 1 deletion(-)
> create mode 100644 lib/librte_gso/gso_tunnel_udp4.c
> create mode 100644 lib/librte_gso/gso_tunnel_udp4.h
>
>diff --git a/lib/librte_gso/Makefile b/lib/librte_gso/Makefile
>index a34846e..3005817 100644
>--- a/lib/librte_gso/Makefile
>+++ b/lib/librte_gso/Makefile
>@@ -17,6 +17,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
> SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
> SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
> SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
>+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_udp4.c
> SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c
>
> # install this header file
>diff --git a/lib/librte_gso/gso_common.h b/lib/librte_gso/gso_common.h
>index a0b8343..4d5f303 100644
>--- a/lib/librte_gso/gso_common.h
>+++ b/lib/librte_gso/gso_common.h
>@@ -26,6 +26,11 @@
> (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
> PKT_TX_TUNNEL_VXLAN))
>
>+#define IS_IPV4_VXLAN_UDP4(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4 | \
>+ PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
>+ (PKT_TX_UDP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
>+ PKT_TX_TUNNEL_VXLAN))
>+
> #define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
> PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
> (PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
>diff --git a/lib/librte_gso/gso_tunnel_udp4.c b/lib/librte_gso/gso_tunnel_udp4.c
>new file mode 100644
>index 0000000..1a018ee
>--- /dev/null
>+++ b/lib/librte_gso/gso_tunnel_udp4.c
>@@ -0,0 +1,108 @@
>+/* SPDX-License-Identifier: BSD-3-Clause
>+ * Copyright(c) 2020 Inspur Corporation
>+ */
>+
>+#include "gso_common.h"
>+#include "gso_tunnel_udp4.h"
>+
>+#define IPV4_HDR_MF_BIT (1U << 13)
>+
>+static void
>+update_tunnel_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs,
>+ uint16_t nb_segs)
>+{
>+ struct rte_ipv4_hdr *ipv4_hdr;
>+ uint16_t outer_id, inner_id, tail_idx, i, length;
>+ uint16_t outer_ipv4_offset, inner_ipv4_offset;
>+ uint16_t udp_gre_offset, udp_offset;
>+ uint8_t update_udp_hdr;
>+ uint16_t frag_offset = 0, is_mf;
>+
>+ outer_ipv4_offset = pkt->outer_l2_len;
>+ udp_gre_offset = outer_ipv4_offset + pkt->outer_l3_len;
>+ inner_ipv4_offset = udp_gre_offset + pkt->l2_len;
>+ udp_offset = inner_ipv4_offset + pkt->l3_len;
>+
>+ /* Outer IPv4 header. */
>+ ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
>+ outer_ipv4_offset);
>+ outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
>+
>+ /* Inner IPv4 header. */
>+ ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
>+ inner_ipv4_offset);
>+ inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
>+
>+ tail_idx = nb_segs - 1;
>+
>+ /* Only update UDP header for VxLAN packets. */
>+ update_udp_hdr = (pkt->ol_flags & PKT_TX_TUNNEL_VXLAN) ? 1 : 0;
>+
>+ for (i = 0; i < nb_segs; i++) {
>+ update_ipv4_header(segs[i], outer_ipv4_offset, outer_id);
>+ if (update_udp_hdr)
>+ update_udp_header(segs[i], udp_gre_offset);
>+ update_ipv4_header(segs[i], inner_ipv4_offset, inner_id);
>+ update_udp_header(segs[i], udp_offset);
>+ /* For the case inner packet is UDP, we must keep UDP
>+ * datagram boundary, it must be handled as IP fragment.
>+ *
>+ * Set IP fragment offset for inner IP header.
>+ */
>+ ipv4_hdr = (struct rte_ipv4_hdr *)
>+ (rte_pktmbuf_mtod(segs[i], char *) +
>+ inner_ipv4_offset);
>+ is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0;
>+ ipv4_hdr->fragment_offset =
>+ rte_cpu_to_be_16(frag_offset | is_mf);
>+ length = segs[i]->pkt_len - inner_ipv4_offset - pkt->l3_len;
>+ frag_offset += (length >> 3);
>+ outer_id++;
>+ }
>+}
>+
>+int
>+gso_tunnel_udp4_segment(struct rte_mbuf *pkt,
>+ uint16_t gso_size,
>+ struct rte_mempool *direct_pool,
>+ struct rte_mempool *indirect_pool,
>+ struct rte_mbuf **pkts_out,
>+ uint16_t nb_pkts_out)
>+{
>+ struct rte_ipv4_hdr *inner_ipv4_hdr;
>+ uint16_t pyld_unit_size, hdr_offset, frag_off;
>+ int ret = 1;
>+
>+ hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len;
>+ inner_ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
>+ hdr_offset);
>+ /*
>+ * Don't process the packet whose MF bit or offset in the inner
>+ * IPv4 header are non-zero.
>+ */
>+ frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset);
>+ if (unlikely(IS_FRAGMENTED(frag_off))) {
>+ pkts_out[0] = pkt;
>+ return 1;
>+ }
>+
>+ hdr_offset += pkt->l3_len;
>+ /* Don't process the packet without data */
>+ if ((hdr_offset + pkt->l4_len) >= pkt->pkt_len) {
>+ pkts_out[0] = pkt;
>+ return 1;
>+ }
>+
>+ /* pyld_unit_size must be a multiple of 8 because frag_off
>+ * uses 8 bytes as unit.
>+ */
>+ pyld_unit_size = (gso_size - hdr_offset) & ~7U;
>+
>+ /* Segment the payload */
>+ ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
>+ indirect_pool, pkts_out, nb_pkts_out);
>+ if (ret > 1)
>+ update_tunnel_ipv4_udp_headers(pkt, pkts_out, ret);
>+
>+ return ret;
>+}
>diff --git a/lib/librte_gso/gso_tunnel_udp4.h b/lib/librte_gso/gso_tunnel_udp4.h
>new file mode 100644
>index 0000000..d56e342
>--- /dev/null
>+++ b/lib/librte_gso/gso_tunnel_udp4.h
>@@ -0,0 +1,43 @@
>+/* SPDX-License-Identifier: BSD-3-Clause
>+ * Copyright(c) 2020 Inspur Corporation
>+ */
>+
>+#ifndef _GSO_TUNNEL_UDP4_H_
>+#define _GSO_TUNNEL_UDP4_H_
>+
>+#include <stdint.h>
>+#include <rte_mbuf.h>
>+
>+/**
>+ * Segment a tunneling packet with inner TCP/IPv4 headers. This function
>+ * does not check if the input packet has correct checksums, and does not
>+ * update checksums for output GSO segments. Furthermore, it does not
>+ * process IP fragment packets.
>+ *
>+ * @param pkt
>+ * The packet mbuf to segment.
>+ * @param gso_size
>+ * The max length of a GSO segment, measured in bytes.
>+ * @param direct_pool
>+ * MBUF pool used for allocating direct buffers for output segments.
>+ * @param indirect_pool
>+ * MBUF pool used for allocating indirect buffers for output segments.
>+ * @param pkts_out
>+ * Pointer array used to store the MBUF addresses of output GSO
>+ * segments, when it succeeds. If the memory space in pkts_out is
>+ * insufficient, it fails and returns -EINVAL.
>+ * @param nb_pkts_out
>+ * The max number of items that 'pkts_out' can keep.
>+ *
>+ * @return
>+ * - The number of GSO segments filled in pkts_out on success.
>+ * - Return -ENOMEM if run out of memory in MBUF pools.
>+ * - Return -EINVAL for invalid parameters.
>+ */
>+int gso_tunnel_udp4_segment(struct rte_mbuf *pkt,
>+ uint16_t gso_size,
>+ struct rte_mempool *direct_pool,
>+ struct rte_mempool *indirect_pool,
>+ struct rte_mbuf **pkts_out,
>+ uint16_t nb_pkts_out);
>+#endif
>diff --git a/lib/librte_gso/meson.build b/lib/librte_gso/meson.build
>index ad8dd85..05904f2 100644
>--- a/lib/librte_gso/meson.build
>+++ b/lib/librte_gso/meson.build
>@@ -2,6 +2,6 @@
> # Copyright(c) 2017 Intel Corporation
>
> sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c',
>- 'gso_tunnel_tcp4.c', 'rte_gso.c')
>+ 'gso_tunnel_tcp4.c', 'gso_tunnel_udp4.c', 'rte_gso.c')
> headers = files('rte_gso.h')
> deps += ['ethdev']
>diff --git a/lib/librte_gso/rte_gso.c b/lib/librte_gso/rte_gso.c
>index 751b5b6..cf401b2 100644
>--- a/lib/librte_gso/rte_gso.c
>+++ b/lib/librte_gso/rte_gso.c
>@@ -11,6 +11,7 @@
> #include "gso_common.h"
> #include "gso_tcp4.h"
> #include "gso_tunnel_tcp4.h"
>+#include "gso_tunnel_udp4.h"
> #include "gso_udp4.h"
>
> #define ILLEGAL_UDP_GSO_CTX(ctx) \
>@@ -62,6 +63,13 @@
> ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta,
> direct_pool, indirect_pool,
> pkts_out, nb_pkts_out);
>+ } else if (IS_IPV4_VXLAN_UDP4(pkt->ol_flags) &&
>+ (gso_ctx->gso_types & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
>+ DEV_TX_OFFLOAD_UDP_TSO))) {
>+ pkt->ol_flags &= (~PKT_TX_UDP_SEG);
>+ ret = gso_tunnel_udp4_segment(pkt, gso_size,
>+ direct_pool, indirect_pool,
>+ pkts_out, nb_pkts_out);
> } else if (IS_IPV4_TCP(pkt->ol_flags) &&
> (gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) {
> pkt->ol_flags &= (~PKT_TX_TCP_SEG);
>--
>1.8.3.1
@@ -17,6 +17,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_GSO) += rte_gso.c
SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_common.c
SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tcp4.c
SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_tcp4.c
+SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_tunnel_udp4.c
SRCS-$(CONFIG_RTE_LIBRTE_GSO) += gso_udp4.c
# install this header file
@@ -26,6 +26,11 @@
(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
PKT_TX_TUNNEL_VXLAN))
+#define IS_IPV4_VXLAN_UDP4(flag) (((flag) & (PKT_TX_UDP_SEG | PKT_TX_IPV4 | \
+ PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
+ (PKT_TX_UDP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
+ PKT_TX_TUNNEL_VXLAN))
+
#define IS_IPV4_GRE_TCP4(flag) (((flag) & (PKT_TX_TCP_SEG | PKT_TX_IPV4 | \
PKT_TX_OUTER_IPV4 | PKT_TX_TUNNEL_MASK)) == \
(PKT_TX_TCP_SEG | PKT_TX_IPV4 | PKT_TX_OUTER_IPV4 | \
new file mode 100644
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#include "gso_common.h"
+#include "gso_tunnel_udp4.h"
+
+#define IPV4_HDR_MF_BIT (1U << 13)
+
+static void
+update_tunnel_ipv4_udp_headers(struct rte_mbuf *pkt, struct rte_mbuf **segs,
+ uint16_t nb_segs)
+{
+ struct rte_ipv4_hdr *ipv4_hdr;
+ uint16_t outer_id, inner_id, tail_idx, i, length;
+ uint16_t outer_ipv4_offset, inner_ipv4_offset;
+ uint16_t udp_gre_offset, udp_offset;
+ uint8_t update_udp_hdr;
+ uint16_t frag_offset = 0, is_mf;
+
+ outer_ipv4_offset = pkt->outer_l2_len;
+ udp_gre_offset = outer_ipv4_offset + pkt->outer_l3_len;
+ inner_ipv4_offset = udp_gre_offset + pkt->l2_len;
+ udp_offset = inner_ipv4_offset + pkt->l3_len;
+
+ /* Outer IPv4 header. */
+ ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ outer_ipv4_offset);
+ outer_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+ /* Inner IPv4 header. */
+ ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ inner_ipv4_offset);
+ inner_id = rte_be_to_cpu_16(ipv4_hdr->packet_id);
+
+ tail_idx = nb_segs - 1;
+
+ /* Only update UDP header for VxLAN packets. */
+ update_udp_hdr = (pkt->ol_flags & PKT_TX_TUNNEL_VXLAN) ? 1 : 0;
+
+ for (i = 0; i < nb_segs; i++) {
+ update_ipv4_header(segs[i], outer_ipv4_offset, outer_id);
+ if (update_udp_hdr)
+ update_udp_header(segs[i], udp_gre_offset);
+ update_ipv4_header(segs[i], inner_ipv4_offset, inner_id);
+ update_udp_header(segs[i], udp_offset);
+ /* For the case inner packet is UDP, we must keep UDP
+ * datagram boundary, it must be handled as IP fragment.
+ *
+ * Set IP fragment offset for inner IP header.
+ */
+ ipv4_hdr = (struct rte_ipv4_hdr *)
+ (rte_pktmbuf_mtod(segs[i], char *) +
+ inner_ipv4_offset);
+ is_mf = i < tail_idx ? IPV4_HDR_MF_BIT : 0;
+ ipv4_hdr->fragment_offset =
+ rte_cpu_to_be_16(frag_offset | is_mf);
+ length = segs[i]->pkt_len - inner_ipv4_offset - pkt->l3_len;
+ frag_offset += (length >> 3);
+ outer_id++;
+ }
+}
+
+int
+gso_tunnel_udp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out)
+{
+ struct rte_ipv4_hdr *inner_ipv4_hdr;
+ uint16_t pyld_unit_size, hdr_offset, frag_off;
+ int ret = 1;
+
+ hdr_offset = pkt->outer_l2_len + pkt->outer_l3_len + pkt->l2_len;
+ inner_ipv4_hdr = (struct rte_ipv4_hdr *)(rte_pktmbuf_mtod(pkt, char *) +
+ hdr_offset);
+ /*
+ * Don't process the packet whose MF bit or offset in the inner
+ * IPv4 header are non-zero.
+ */
+ frag_off = rte_be_to_cpu_16(inner_ipv4_hdr->fragment_offset);
+ if (unlikely(IS_FRAGMENTED(frag_off))) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ hdr_offset += pkt->l3_len;
+ /* Don't process the packet without data */
+ if ((hdr_offset + pkt->l4_len) >= pkt->pkt_len) {
+ pkts_out[0] = pkt;
+ return 1;
+ }
+
+ /* pyld_unit_size must be a multiple of 8 because frag_off
+ * uses 8 bytes as unit.
+ */
+ pyld_unit_size = (gso_size - hdr_offset) & ~7U;
+
+ /* Segment the payload */
+ ret = gso_do_segment(pkt, hdr_offset, pyld_unit_size, direct_pool,
+ indirect_pool, pkts_out, nb_pkts_out);
+ if (ret > 1)
+ update_tunnel_ipv4_udp_headers(pkt, pkts_out, ret);
+
+ return ret;
+}
new file mode 100644
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2020 Inspur Corporation
+ */
+
+#ifndef _GSO_TUNNEL_UDP4_H_
+#define _GSO_TUNNEL_UDP4_H_
+
+#include <stdint.h>
+#include <rte_mbuf.h>
+
+/**
+ * Segment a tunneling packet with inner TCP/IPv4 headers. This function
+ * does not check if the input packet has correct checksums, and does not
+ * update checksums for output GSO segments. Furthermore, it does not
+ * process IP fragment packets.
+ *
+ * @param pkt
+ * The packet mbuf to segment.
+ * @param gso_size
+ * The max length of a GSO segment, measured in bytes.
+ * @param direct_pool
+ * MBUF pool used for allocating direct buffers for output segments.
+ * @param indirect_pool
+ * MBUF pool used for allocating indirect buffers for output segments.
+ * @param pkts_out
+ * Pointer array used to store the MBUF addresses of output GSO
+ * segments, when it succeeds. If the memory space in pkts_out is
+ * insufficient, it fails and returns -EINVAL.
+ * @param nb_pkts_out
+ * The max number of items that 'pkts_out' can keep.
+ *
+ * @return
+ * - The number of GSO segments filled in pkts_out on success.
+ * - Return -ENOMEM if run out of memory in MBUF pools.
+ * - Return -EINVAL for invalid parameters.
+ */
+int gso_tunnel_udp4_segment(struct rte_mbuf *pkt,
+ uint16_t gso_size,
+ struct rte_mempool *direct_pool,
+ struct rte_mempool *indirect_pool,
+ struct rte_mbuf **pkts_out,
+ uint16_t nb_pkts_out);
+#endif
@@ -2,6 +2,6 @@
# Copyright(c) 2017 Intel Corporation
sources = files('gso_common.c', 'gso_tcp4.c', 'gso_udp4.c',
- 'gso_tunnel_tcp4.c', 'rte_gso.c')
+ 'gso_tunnel_tcp4.c', 'gso_tunnel_udp4.c', 'rte_gso.c')
headers = files('rte_gso.h')
deps += ['ethdev']
@@ -11,6 +11,7 @@
#include "gso_common.h"
#include "gso_tcp4.h"
#include "gso_tunnel_tcp4.h"
+#include "gso_tunnel_udp4.h"
#include "gso_udp4.h"
#define ILLEGAL_UDP_GSO_CTX(ctx) \
@@ -62,6 +63,13 @@
ret = gso_tunnel_tcp4_segment(pkt, gso_size, ipid_delta,
direct_pool, indirect_pool,
pkts_out, nb_pkts_out);
+ } else if (IS_IPV4_VXLAN_UDP4(pkt->ol_flags) &&
+ (gso_ctx->gso_types & (DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
+ DEV_TX_OFFLOAD_UDP_TSO))) {
+ pkt->ol_flags &= (~PKT_TX_UDP_SEG);
+ ret = gso_tunnel_udp4_segment(pkt, gso_size,
+ direct_pool, indirect_pool,
+ pkts_out, nb_pkts_out);
} else if (IS_IPV4_TCP(pkt->ol_flags) &&
(gso_ctx->gso_types & DEV_TX_OFFLOAD_TCP_TSO)) {
pkt->ol_flags &= (~PKT_TX_TCP_SEG);