From patchwork Thu Nov 24 08:56:38 2016 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Olivier Matz X-Patchwork-Id: 17230 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 2FF4B56A2; Thu, 24 Nov 2016 09:58:04 +0100 (CET) Received: from proxy.6wind.com (host.76.145.23.62.rev.coltfrance.com [62.23.145.76]) by dpdk.org (Postfix) with ESMTP id 9146C4CE6 for ; Thu, 24 Nov 2016 09:57:03 +0100 (CET) Received: from glumotte.dev.6wind.com (unknown [10.16.0.195]) by proxy.6wind.com (Postfix) with ESMTP id C4F7B26D30; Thu, 24 Nov 2016 09:56:52 +0100 (CET) From: Olivier Matz To: dev@dpdk.org, yuanhan.liu@linux.intel.com Cc: maxime.coquelin@redhat.com, huawei.xie@intel.com, stephen@networkplumber.org Date: Thu, 24 Nov 2016 09:56:38 +0100 Message-Id: <1479977798-13417-6-git-send-email-olivier.matz@6wind.com> X-Mailer: git-send-email 2.8.1 In-Reply-To: <1479977798-13417-1-git-send-email-olivier.matz@6wind.com> References: <1479977798-13417-1-git-send-email-olivier.matz@6wind.com> Subject: [dpdk-dev] [PATCH 5/5] net/virtio: fix Tso when mbuf is shared X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" With virtio, doing tso requires to modify the network packet data: - the dpdk API requires to set the l4 checksum to an Intel-Nic-like pseudo header checksum that does not include the ip length - the virtio peer expects that the l4 checksum is a standard pseudo header checksum. This is a problem with shared packets, because they should not be modified. This patch fixes this issue by copying the headers into a linear buffer in that case. This buffer is located in the virtio_tx_region, at the same place where the virtio header is stored. The size of this buffer is set to 256, which should be enough in all cases: sizeof(ethernet) + sizeof(vlan) * 2 + sizeof(ip6) sizeof(ip6-ext) + sizeof(tcp) + sizeof(tcp-opts) = 14 + 8 + 40 + sizeof(ip6-ext) + 40 + sizeof(tcp-opts) = 102 + sizeof(ip6-ext) + sizeof(tcp-opts) Fixes: 696573046e9e ("net/virtio: support TSO") Signed-off-by: Olivier Matz --- drivers/net/virtio/virtio_rxtx.c | 119 +++++++++++++++++++++++++++------------ drivers/net/virtio/virtqueue.h | 2 + 2 files changed, 85 insertions(+), 36 deletions(-) diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index 22d97a4..577c775 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -211,43 +211,73 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) /* When doing TSO, the IP length is not included in the pseudo header * checksum of the packet given to the PMD, but for virtio it is - * expected. + * expected. Fix the mbuf or a copy if the mbuf is shared. */ -static void -virtio_tso_fix_cksum(struct rte_mbuf *m) +static unsigned int +virtio_tso_fix_cksum(struct rte_mbuf *m, char *hdr, size_t hdr_sz) { - /* common case: header is not fragmented */ - if (likely(rte_pktmbuf_data_len(m) >= m->l2_len + m->l3_len + - m->l4_len)) { - struct ipv4_hdr *iph; - struct ipv6_hdr *ip6h; - struct tcp_hdr *th; - uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; - uint32_t tmp; - - iph = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); - th = RTE_PTR_ADD(iph, m->l3_len); - if ((iph->version_ihl >> 4) == 4) { - iph->hdr_checksum = 0; - iph->hdr_checksum = rte_ipv4_cksum(iph); - ip_len = iph->total_length; - ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - - m->l3_len); - } else { - ip6h = (struct ipv6_hdr *)iph; - ip_paylen = ip6h->payload_len; + struct ipv4_hdr *iph, iph_copy; + struct ipv6_hdr *ip6h = NULL, ip6h_copy; + struct tcp_hdr *th, th_copy; + size_t hdrlen = m->l2_len + m->l3_len + m->l4_len; + uint16_t prev_cksum, new_cksum, ip_len, ip_paylen; + uint32_t tmp; + int shared = 0; + + /* mbuf is write-only, we need to copy the headers in a linear buffer */ + if (unlikely(rte_pktmbuf_data_is_shared(m, 0, hdrlen))) { + shared = 1; + + /* network headers are too big, there's nothing we can do */ + if (hdrlen > hdr_sz) + return 0; + + rte_pktmbuf_read_copy(m, 0, hdrlen, hdr); + iph = (struct ipv4_hdr *)(hdr + m->l2_len); + ip6h = (struct ipv6_hdr *)(hdr + m->l2_len); + th = (struct tcp_hdr *)(hdr + m->l2_len + m->l3_len); + } else { + iph = rte_pktmbuf_read(m, m->l2_len, sizeof(*iph), &iph_copy); + th = rte_pktmbuf_read(m, m->l2_len + m->l3_len, sizeof(*th), + &th_copy); + } + + if ((iph->version_ihl >> 4) == 4) { + iph->hdr_checksum = 0; + iph->hdr_checksum = rte_ipv4_cksum(iph); + ip_len = iph->total_length; + ip_paylen = rte_cpu_to_be_16(rte_be_to_cpu_16(ip_len) - + m->l3_len); + } else { + if (!shared) { + ip6h = rte_pktmbuf_read(m, m->l2_len, sizeof(*ip6h), + &ip6h_copy); } + ip_paylen = ip6h->payload_len; + } - /* calculate the new phdr checksum not including ip_paylen */ - prev_cksum = th->cksum; - tmp = prev_cksum; - tmp += ip_paylen; - tmp = (tmp & 0xffff) + (tmp >> 16); - new_cksum = tmp; + /* calculate the new phdr checksum not including ip_paylen */ + prev_cksum = th->cksum; + tmp = prev_cksum; + tmp += ip_paylen; + tmp = (tmp & 0xffff) + (tmp >> 16); + new_cksum = tmp; - /* replace it in the packet */ - th->cksum = new_cksum; - } + /* replace it in the header */ + th->cksum = new_cksum; + + /* the update was done in the linear buffer, return */ + if (shared) + return hdrlen; + + /* copy from local buffer into mbuf if required */ + if ((iph->version_ihl >> 4) == 4) + rte_pktmbuf_write(m, m->l2_len, sizeof(*iph), iph); + else + rte_pktmbuf_write(m, m->l2_len, sizeof(*ip6h), ip6h); + rte_pktmbuf_write(m, m->l2_len + m->l3_len, sizeof(*th), th); + + return 0; } static inline int @@ -268,7 +298,9 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, struct vring_desc *start_dp; uint16_t seg_num = cookie->nb_segs; uint16_t head_idx, idx; + uint16_t hdr_idx = 0; uint16_t head_size = vq->hw->vtnet_hdr_size; + unsigned int offset = 0; struct virtio_net_hdr *hdr; int offload; @@ -303,6 +335,8 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, /* loop below will fill in rest of the indirect elements */ start_dp = txr[idx].tx_indir; + hdr_idx = 0; + start_dp[hdr_idx].len = vq->hw->vtnet_hdr_size; idx = 1; } else { /* setup first tx ring slot to point to header @@ -313,7 +347,7 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, start_dp[idx].len = vq->hw->vtnet_hdr_size; start_dp[idx].flags = VRING_DESC_F_NEXT; hdr = (struct virtio_net_hdr *)&txr[idx].tx_hdr; - + hdr_idx = idx; idx = start_dp[idx].next; } @@ -345,7 +379,14 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, /* TCP Segmentation Offload */ if (cookie->ol_flags & PKT_TX_TCP_SEG) { - virtio_tso_fix_cksum(cookie); + offset = virtio_tso_fix_cksum(cookie, + RTE_PTR_ADD(hdr, start_dp[hdr_idx].len), + VIRTIO_MAX_HDR_SZ); + if (offset > 0) { + RTE_ASSERT(can_push != 0); + start_dp[hdr_idx].len += offset; + } + hdr->gso_type = (cookie->ol_flags & PKT_TX_IPV6) ? VIRTIO_NET_HDR_GSO_TCPV6 : VIRTIO_NET_HDR_GSO_TCPV4; @@ -362,10 +403,16 @@ virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, } do { - start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); - start_dp[idx].len = cookie->data_len; + if (offset > cookie->data_len) { + offset -= cookie->data_len; + continue; + } + start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq) + + offset; + start_dp[idx].len = cookie->data_len - offset; start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; idx = start_dp[idx].next; + offset = 0; } while ((cookie = cookie->next) != NULL); if (use_indirect) diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index f0bb089..edfe0dd 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -254,8 +254,10 @@ struct virtio_net_hdr_mrg_rxbuf { /* Region reserved to allow for transmit header and indirect ring */ #define VIRTIO_MAX_TX_INDIRECT 8 +#define VIRTIO_MAX_HDR_SZ 256 struct virtio_tx_region { struct virtio_net_hdr_mrg_rxbuf tx_hdr; + char net_headers[VIRTIO_MAX_HDR_SZ]; /* for offload if mbuf is RO */ struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] __attribute__((__aligned__(16))); };