From patchwork Sat Nov 25 03:17:39 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Hu, Jiayu" X-Patchwork-Id: 31666 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id 8F63B2A6C; Sat, 25 Nov 2017 04:15:29 +0100 (CET) Received: from mga03.intel.com (mga03.intel.com [134.134.136.65]) by dpdk.org (Postfix) with ESMTP id 26670223 for ; Sat, 25 Nov 2017 04:15:25 +0100 (CET) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga103.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 24 Nov 2017 19:15:23 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos; i="5.44,450,1505804400"; d="scan'208"; a="1248340430" Received: from dpdk15.sh.intel.com ([10.67.111.77]) by fmsmga002.fm.intel.com with ESMTP; 24 Nov 2017 19:15:22 -0800 From: Jiayu Hu To: dev@dpdk.org Cc: jianfeng.tan@intel.com, konstantin.ananyev@intel.com, Jiayu Hu Date: Sat, 25 Nov 2017 11:17:39 +0800 Message-Id: <1511579860-37020-2-git-send-email-jiayu.hu@intel.com> X-Mailer: git-send-email 2.7.4 In-Reply-To: <1511579860-37020-1-git-send-email-jiayu.hu@intel.com> References: <1511579860-37020-1-git-send-email-jiayu.hu@intel.com> Subject: [dpdk-dev] [PATCH 1/2] gro: TCP/IPV4 GRO codes cleanup X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This patch updates TCP/IPv4 GRO as follows: - remove IP identification check when merge TCP/IPv4 packets - extract common internal functions for supporting tunneled GRO - rename internal functions and variants for better understanding - update comments Signed-off-by: Jiayu Hu --- lib/librte_gro/gro_tcp4.c | 294 +++++++++++++--------------------------------- lib/librte_gro/gro_tcp4.h | 236 ++++++++++++++++++++++++++----------- lib/librte_gro/rte_gro.c | 60 +++++----- 3 files changed, 278 insertions(+), 312 deletions(-) diff --git a/lib/librte_gro/gro_tcp4.c b/lib/librte_gro/gro_tcp4.c index 61a0423..a560a84 100644 --- a/lib/librte_gro/gro_tcp4.c +++ b/lib/librte_gro/gro_tcp4.c @@ -34,8 +34,6 @@ #include #include #include -#include -#include #include "gro_tcp4.h" @@ -72,20 +70,20 @@ gro_tcp4_tbl_create(uint16_t socket_id, } tbl->max_item_num = entries_num; - size = sizeof(struct gro_tcp4_key) * entries_num; - tbl->keys = rte_zmalloc_socket(__func__, + size = sizeof(struct gro_tcp4_flow) * entries_num; + tbl->flows = rte_zmalloc_socket(__func__, size, RTE_CACHE_LINE_SIZE, socket_id); - if (tbl->keys == NULL) { + if (tbl->flows == NULL) { rte_free(tbl->items); rte_free(tbl); return NULL; } - /* INVALID_ARRAY_INDEX indicates empty key */ + /* INVALID_ARRAY_INDEX indicates an empty flow */ for (i = 0; i < entries_num; i++) - tbl->keys[i].start_index = INVALID_ARRAY_INDEX; - tbl->max_key_num = entries_num; + tbl->flows[i].start_index = INVALID_ARRAY_INDEX; + tbl->max_flow_num = entries_num; return tbl; } @@ -97,111 +95,11 @@ gro_tcp4_tbl_destroy(void *tbl) if (tcp_tbl) { rte_free(tcp_tbl->items); - rte_free(tcp_tbl->keys); + rte_free(tcp_tbl->flows); } rte_free(tcp_tbl); } -/* - * merge two TCP/IPv4 packets without updating checksums. - * If cmp is larger than 0, append the new packet to the - * original packet. Otherwise, pre-pend the new packet to - * the original packet. - */ -static inline int -merge_two_tcp4_packets(struct gro_tcp4_item *item_src, - struct rte_mbuf *pkt, - uint16_t ip_id, - uint32_t sent_seq, - int cmp) -{ - struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; - uint16_t tcp_datalen; - - if (cmp > 0) { - pkt_head = item_src->firstseg; - pkt_tail = pkt; - } else { - pkt_head = pkt; - pkt_tail = item_src->firstseg; - } - - /* check if the packet length will be beyond the max value */ - tcp_datalen = pkt_tail->pkt_len - pkt_tail->l2_len - - pkt_tail->l3_len - pkt_tail->l4_len; - if (pkt_head->pkt_len - pkt_head->l2_len + tcp_datalen > - TCP4_MAX_L3_LENGTH) - return 0; - - /* remove packet header for the tail packet */ - rte_pktmbuf_adj(pkt_tail, - pkt_tail->l2_len + - pkt_tail->l3_len + - pkt_tail->l4_len); - - /* chain two packets together */ - if (cmp > 0) { - item_src->lastseg->next = pkt; - item_src->lastseg = rte_pktmbuf_lastseg(pkt); - /* update IP ID to the larger value */ - item_src->ip_id = ip_id; - } else { - lastseg = rte_pktmbuf_lastseg(pkt); - lastseg->next = item_src->firstseg; - item_src->firstseg = pkt; - /* update sent_seq to the smaller value */ - item_src->sent_seq = sent_seq; - } - item_src->nb_merged++; - - /* update mbuf metadata for the merged packet */ - pkt_head->nb_segs += pkt_tail->nb_segs; - pkt_head->pkt_len += pkt_tail->pkt_len; - - return 1; -} - -static inline int -check_seq_option(struct gro_tcp4_item *item, - struct tcp_hdr *tcp_hdr, - uint16_t tcp_hl, - uint16_t tcp_dl, - uint16_t ip_id, - uint32_t sent_seq) -{ - struct rte_mbuf *pkt0 = item->firstseg; - struct ipv4_hdr *ipv4_hdr0; - struct tcp_hdr *tcp_hdr0; - uint16_t tcp_hl0, tcp_dl0; - uint16_t len; - - ipv4_hdr0 = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt0, char *) + - pkt0->l2_len); - tcp_hdr0 = (struct tcp_hdr *)((char *)ipv4_hdr0 + pkt0->l3_len); - tcp_hl0 = pkt0->l4_len; - - /* check if TCP option fields equal. If not, return 0. */ - len = RTE_MAX(tcp_hl, tcp_hl0) - sizeof(struct tcp_hdr); - if ((tcp_hl != tcp_hl0) || - ((len > 0) && (memcmp(tcp_hdr + 1, - tcp_hdr0 + 1, - len) != 0))) - return 0; - - /* check if the two packets are neighbors */ - tcp_dl0 = pkt0->pkt_len - pkt0->l2_len - pkt0->l3_len - tcp_hl0; - if ((sent_seq == (item->sent_seq + tcp_dl0)) && - (ip_id == (item->ip_id + 1))) - /* append the new packet */ - return 1; - else if (((sent_seq + tcp_dl) == item->sent_seq) && - ((ip_id + item->nb_merged) == item->ip_id)) - /* pre-pend the new packet */ - return -1; - else - return 0; -} - static inline uint32_t find_an_empty_item(struct gro_tcp4_tbl *tbl) { @@ -215,13 +113,13 @@ find_an_empty_item(struct gro_tcp4_tbl *tbl) } static inline uint32_t -find_an_empty_key(struct gro_tcp4_tbl *tbl) +find_an_empty_flow(struct gro_tcp4_tbl *tbl) { uint32_t i; - uint32_t max_key_num = tbl->max_key_num; + uint32_t max_flow_num = tbl->max_flow_num; - for (i = 0; i < max_key_num; i++) - if (tbl->keys[i].start_index == INVALID_ARRAY_INDEX) + for (i = 0; i < max_flow_num; i++) + if (tbl->flows[i].start_index == INVALID_ARRAY_INDEX) return i; return INVALID_ARRAY_INDEX; } @@ -229,7 +127,6 @@ find_an_empty_key(struct gro_tcp4_tbl *tbl) static inline uint32_t insert_new_item(struct gro_tcp4_tbl *tbl, struct rte_mbuf *pkt, - uint16_t ip_id, uint32_t sent_seq, uint32_t prev_idx, uint64_t start_time) @@ -245,7 +142,6 @@ insert_new_item(struct gro_tcp4_tbl *tbl, tbl->items[item_idx].start_time = start_time; tbl->items[item_idx].next_pkt_idx = INVALID_ARRAY_INDEX; tbl->items[item_idx].sent_seq = sent_seq; - tbl->items[item_idx].ip_id = ip_id; tbl->items[item_idx].nb_merged = 1; tbl->item_num++; @@ -265,7 +161,7 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx, { uint32_t next_idx = tbl->items[item_idx].next_pkt_idx; - /* set NULL to firstseg to indicate it's an empty item */ + /* NULL indicates an empty item */ tbl->items[item_idx].firstseg = NULL; tbl->item_num--; if (prev_item_idx != INVALID_ARRAY_INDEX) @@ -275,53 +171,33 @@ delete_item(struct gro_tcp4_tbl *tbl, uint32_t item_idx, } static inline uint32_t -insert_new_key(struct gro_tcp4_tbl *tbl, - struct tcp4_key *key_src, +insert_new_flow(struct gro_tcp4_tbl *tbl, + struct tcp4_flow_key *src, uint32_t item_idx) { - struct tcp4_key *key_dst; - uint32_t key_idx; + struct tcp4_flow_key *dst; + uint32_t flow_idx; - key_idx = find_an_empty_key(tbl); - if (key_idx == INVALID_ARRAY_INDEX) + flow_idx = find_an_empty_flow(tbl); + if (flow_idx == INVALID_ARRAY_INDEX) return INVALID_ARRAY_INDEX; - key_dst = &(tbl->keys[key_idx].key); + dst = &(tbl->flows[flow_idx].key); - ether_addr_copy(&(key_src->eth_saddr), &(key_dst->eth_saddr)); - ether_addr_copy(&(key_src->eth_daddr), &(key_dst->eth_daddr)); - key_dst->ip_src_addr = key_src->ip_src_addr; - key_dst->ip_dst_addr = key_src->ip_dst_addr; - key_dst->recv_ack = key_src->recv_ack; - key_dst->src_port = key_src->src_port; - key_dst->dst_port = key_src->dst_port; + ether_addr_copy(&(src->eth_saddr), &(dst->eth_saddr)); + ether_addr_copy(&(src->eth_daddr), &(dst->eth_daddr)); + dst->ip_src_addr = src->ip_src_addr; + dst->ip_dst_addr = src->ip_dst_addr; + dst->recv_ack = src->recv_ack; + dst->src_port = src->src_port; + dst->dst_port = src->dst_port; - /* non-INVALID_ARRAY_INDEX value indicates this key is valid */ - tbl->keys[key_idx].start_index = item_idx; - tbl->key_num++; + tbl->flows[flow_idx].start_index = item_idx; + tbl->flow_num++; - return key_idx; + return flow_idx; } -static inline int -is_same_key(struct tcp4_key k1, struct tcp4_key k2) -{ - if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0) - return 0; - - if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0) - return 0; - - return ((k1.ip_src_addr == k2.ip_src_addr) && - (k1.ip_dst_addr == k2.ip_dst_addr) && - (k1.recv_ack == k2.recv_ack) && - (k1.src_port == k2.src_port) && - (k1.dst_port == k2.dst_port)); -} - -/* - * update packet length for the flushed packet. - */ static inline void update_header(struct gro_tcp4_item *item) { @@ -343,30 +219,32 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, struct ipv4_hdr *ipv4_hdr; struct tcp_hdr *tcp_hdr; uint32_t sent_seq; - uint16_t tcp_dl, ip_id; + uint16_t tcp_dl, hdr_len; - struct tcp4_key key; + struct tcp4_flow_key key; uint32_t cur_idx, prev_idx, item_idx; - uint32_t i, max_key_num; + uint32_t i, max_flow_num; int cmp; eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *); ipv4_hdr = (struct ipv4_hdr *)((char *)eth_hdr + pkt->l2_len); tcp_hdr = (struct tcp_hdr *)((char *)ipv4_hdr + pkt->l3_len); + hdr_len = pkt->l2_len + pkt->l3_len + pkt->l4_len; /* - * if FIN, SYN, RST, PSH, URG, ECE or - * CWR is set, return immediately. + * Don't process the packet which has FIN, SYN, RST, PSH, URG, ECE + * or CWR set. */ if (tcp_hdr->tcp_flags != TCP_ACK_FLAG) return -1; - /* if payload length is 0, return immediately */ - tcp_dl = rte_be_to_cpu_16(ipv4_hdr->total_length) - pkt->l3_len - - pkt->l4_len; - if (tcp_dl == 0) + /* + * Don't process the packet whose payload length is less than or + * equal to 0. + */ + tcp_dl = pkt->pkt_len - hdr_len; + if (tcp_dl <= 0) return -1; - ip_id = rte_be_to_cpu_16(ipv4_hdr->packet_id); sent_seq = rte_be_to_cpu_32(tcp_hdr->sent_seq); ether_addr_copy(&(eth_hdr->s_addr), &(key.eth_saddr)); @@ -377,49 +255,51 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, key.dst_port = tcp_hdr->dst_port; key.recv_ack = tcp_hdr->recv_ack; - /* search for a key */ - max_key_num = tbl->max_key_num; - for (i = 0; i < max_key_num; i++) { - if ((tbl->keys[i].start_index != INVALID_ARRAY_INDEX) && - is_same_key(tbl->keys[i].key, key)) + /* Search for a matched flow. */ + max_flow_num = tbl->max_flow_num; + for (i = 0; i < max_flow_num; i++) { + if ((tbl->flows[i].start_index != INVALID_ARRAY_INDEX) && + is_same_tcp4_flow(tbl->flows[i].key, key)) break; } - /* can't find a key, so insert a new key and a new item. */ - if (i == tbl->max_key_num) { - item_idx = insert_new_item(tbl, pkt, ip_id, sent_seq, + /* + * Fail to find a matched flow. Insert a new flow and store the + * packet into the flow. + */ + if (i == tbl->max_flow_num) { + item_idx = insert_new_item(tbl, pkt, sent_seq, INVALID_ARRAY_INDEX, start_time); if (item_idx == INVALID_ARRAY_INDEX) return -1; - if (insert_new_key(tbl, &key, item_idx) == + if (insert_new_flow(tbl, &key, item_idx) == INVALID_ARRAY_INDEX) { - /* - * fail to insert a new key, so - * delete the inserted item - */ + /* Fail to insert a new flow. */ delete_item(tbl, item_idx, INVALID_ARRAY_INDEX); return -1; } return 0; } - /* traverse all packets in the item group to find one to merge */ - cur_idx = tbl->keys[i].start_index; + /* + * Check all packets in the flow and try to find a neighbor for + * the input packet. + */ + cur_idx = tbl->flows[i].start_index; prev_idx = cur_idx; do { cmp = check_seq_option(&(tbl->items[cur_idx]), tcp_hdr, - pkt->l4_len, tcp_dl, ip_id, sent_seq); + pkt->l4_len, tcp_dl, sent_seq, 0); if (cmp) { if (merge_two_tcp4_packets(&(tbl->items[cur_idx]), - pkt, ip_id, - sent_seq, cmp)) + pkt, sent_seq, cmp, 0)) return 1; /* - * fail to merge two packets since the packet - * length will be greater than the max value. - * So insert the packet into the item group. + * Fail to merge the two packets, as the packet + * length is greater than the max value. Store + * the packet into the flow. */ - if (insert_new_item(tbl, pkt, ip_id, sent_seq, + if (insert_new_item(tbl, pkt, sent_seq, prev_idx, start_time) == INVALID_ARRAY_INDEX) return -1; @@ -429,11 +309,8 @@ gro_tcp4_reassemble(struct rte_mbuf *pkt, cur_idx = tbl->items[cur_idx].next_pkt_idx; } while (cur_idx != INVALID_ARRAY_INDEX); - /* - * can't find a packet in the item group to merge, - * so insert the packet into the item group. - */ - if (insert_new_item(tbl, pkt, ip_id, sent_seq, prev_idx, + /* Fail to find a neighbor, so store the packet into the flow. */ + if (insert_new_item(tbl, pkt, sent_seq, prev_idx, start_time) == INVALID_ARRAY_INDEX) return -1; @@ -448,44 +325,33 @@ gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, { uint16_t k = 0; uint32_t i, j; - uint32_t max_key_num = tbl->max_key_num; + uint32_t max_flow_num = tbl->max_flow_num; - for (i = 0; i < max_key_num; i++) { - /* all keys have been checked, return immediately */ - if (tbl->key_num == 0) + for (i = 0; i < max_flow_num; i++) { + if (unlikely(tbl->flow_num == 0)) return k; - j = tbl->keys[i].start_index; + j = tbl->flows[i].start_index; while (j != INVALID_ARRAY_INDEX) { if (tbl->items[j].start_time <= flush_timestamp) { out[k++] = tbl->items[j].firstseg; if (tbl->items[j].nb_merged > 1) update_header(&(tbl->items[j])); /* - * delete the item and get - * the next packet index + * Delete the packet and get the next + * packet in the flow. */ - j = delete_item(tbl, j, - INVALID_ARRAY_INDEX); + j = delete_item(tbl, j, INVALID_ARRAY_INDEX); + tbl->flows[i].start_index = j; + if (j == INVALID_ARRAY_INDEX) + tbl->flow_num--; - /* - * delete the key as all of - * packets are flushed - */ - if (j == INVALID_ARRAY_INDEX) { - tbl->keys[i].start_index = - INVALID_ARRAY_INDEX; - tbl->key_num--; - } else - /* update start_index of the key */ - tbl->keys[i].start_index = j; - - if (k == nb_out) + if (unlikely(k == nb_out)) return k; } else /* - * left packets of this key won't be - * timeout, so go to check other keys. + * The left packets in this flow won't be + * timeout. Go to check other flows. */ break; } diff --git a/lib/librte_gro/gro_tcp4.h b/lib/librte_gro/gro_tcp4.h index 0a81716..de9925e 100644 --- a/lib/librte_gro/gro_tcp4.h +++ b/lib/librte_gro/gro_tcp4.h @@ -33,17 +33,20 @@ #ifndef _GRO_TCP4_H_ #define _GRO_TCP4_H_ +#include +#include + #define INVALID_ARRAY_INDEX 0xffffffffUL #define GRO_TCP4_TBL_MAX_ITEM_NUM (1024UL * 1024UL) /* - * the max L3 length of a TCP/IPv4 packet. The L3 length - * is the sum of ipv4 header, tcp header and L4 payload. + * The max length of a IPv4 packet, which includes the length of L3 + * header, L4 header and the payload. */ -#define TCP4_MAX_L3_LENGTH UINT16_MAX +#define MAX_IPV4_PKT_LENGTH UINT16_MAX -/* criteria of mergeing packets */ -struct tcp4_key { +/* Header fields representing a TCP/IPv4 flow. */ +struct tcp4_flow_key { struct ether_addr eth_saddr; struct ether_addr eth_daddr; uint32_t ip_src_addr; @@ -54,43 +57,39 @@ struct tcp4_key { uint16_t dst_port; }; -struct gro_tcp4_key { - struct tcp4_key key; +struct gro_tcp4_flow { + struct tcp4_flow_key key; /* - * the index of the first packet in the item group. - * If the value is INVALID_ARRAY_INDEX, it means - * the key is empty. + * The index of the first packet in the flow. + * INVALID_ARRAY_INDEX indicates an empty flow. */ uint32_t start_index; }; struct gro_tcp4_item { /* - * first segment of the packet. If the value + * First segment of the packet. If the value * is NULL, it means the item is empty. */ struct rte_mbuf *firstseg; - /* last segment of the packet */ + /* Last segment of the packet */ struct rte_mbuf *lastseg; /* - * the time when the first packet is inserted + * The time when the first packet is inserted * into the table. If a packet in the table is * merged with an incoming packet, this value - * won't be updated. We set this value only - * when the first packet is inserted into the - * table. + * won't be updated. */ uint64_t start_time; /* - * we use next_pkt_idx to chain the packets that - * have same key value but can't be merged together. + * next_pkt_idx is used to chain the packets that + * are in the same flow but can't be merged together + * (i.e. caused by packet reordering). */ uint32_t next_pkt_idx; - /* the sequence number of the packet */ + /* TCP sequence number of the packet */ uint32_t sent_seq; - /* the IP ID of the packet */ - uint16_t ip_id; - /* the number of merged packets */ + /* The number of merged packets */ uint16_t nb_merged; }; @@ -100,31 +99,31 @@ struct gro_tcp4_item { struct gro_tcp4_tbl { /* item array */ struct gro_tcp4_item *items; - /* key array */ - struct gro_tcp4_key *keys; + /* flow array */ + struct gro_tcp4_flow *flows; /* current item number */ uint32_t item_num; - /* current key num */ - uint32_t key_num; + /* current flow num */ + uint32_t flow_num; /* item array size */ uint32_t max_item_num; - /* key array size */ - uint32_t max_key_num; + /* flow array size */ + uint32_t max_flow_num; }; /** * This function creates a TCP/IPv4 reassembly table. * * @param socket_id - * socket index for allocating TCP/IPv4 reassemble table + * Socket index for allocating the TCP/IPv4 reassemble table * @param max_flow_num - * the maximum number of flows in the TCP/IPv4 GRO table + * The maximum number of flows in the TCP/IPv4 GRO table * @param max_item_per_flow - * the maximum packet number per flow. + * The maximum number of packets per flow * * @return - * if create successfully, return a pointer which points to the - * created TCP/IPv4 GRO table. Otherwise, return NULL. + * - Return the table pointer on success. + * - Return NULL on failure. */ void *gro_tcp4_tbl_create(uint16_t socket_id, uint16_t max_flow_num, @@ -134,62 +133,53 @@ void *gro_tcp4_tbl_create(uint16_t socket_id, * This function destroys a TCP/IPv4 reassembly table. * * @param tbl - * a pointer points to the TCP/IPv4 reassembly table. + * Pointer pointint to the TCP/IPv4 reassembly table. */ void gro_tcp4_tbl_destroy(void *tbl); /** - * This function searches for a packet in the TCP/IPv4 reassembly table - * to merge with the inputted one. To merge two packets is to chain them - * together and update packet headers. Packets, whose SYN, FIN, RST, PSH - * CWR, ECE or URG bit is set, are returned immediately. Packets which - * only have packet headers (i.e. without data) are also returned - * immediately. Otherwise, the packet is either merged, or inserted into - * the table. Besides, if there is no available space to insert the - * packet, this function returns immediately too. + * This function merges a TCP/IPv4 packet. It doesn't process the packet, + * which has SYN, FIN, RST, PSH, CWR, ECE or URG set, or doesn't have + * payload. It returns the packet if there is no available space in the + * table. * - * This function assumes the inputted packet is with correct IPv4 and - * TCP checksums. And if two packets are merged, it won't re-calculate - * IPv4 and TCP checksums. Besides, if the inputted packet is IP - * fragmented, it assumes the packet is complete (with TCP header). + * This function doesn't check if the packet has correct checksums. + * Additionally, it doesn't re-calculate checksums for the merged packet. + * If the input packet is IP fragmented, it assumes the packet is complete. * * @param pkt - * packet to reassemble. + * Packet to reassemble * @param tbl - * a pointer that points to a TCP/IPv4 reassembly table. + * Pointer pointing to the TCP/IPv4 reassembly table * @start_time - * the start time that the packet is inserted into the table + * The time when the packet is inserted into the table * * @return - * if the packet doesn't have data, or SYN, FIN, RST, PSH, CWR, ECE - * or URG bit is set, or there is no available space in the table to - * insert a new item or a new key, return a negative value. If the - * packet is merged successfully, return an positive value. If the - * packet is inserted into the table, return 0. + * - Return a positive value if the input packet is merged. + * - Return zero if the input packet isn't merged but stored in the table. + * - Return a negative value for invalid parameters. */ int32_t gro_tcp4_reassemble(struct rte_mbuf *pkt, struct gro_tcp4_tbl *tbl, uint64_t start_time); /** - * This function flushes timeout packets in a TCP/IPv4 reassembly table - * to applications, and without updating checksums for merged packets. - * The max number of flushed timeout packets is the element number of - * the array which is used to keep flushed packets. + * This function flushes timeout packets in a TCP/IPv4 reassembly table, + * and without updating checksums. * * @param tbl - * a pointer that points to a TCP GRO table. + * Pointer points to a TCP/IPv4 reassembly table * @param flush_timestamp - * this function flushes packets which are inserted into the table - * before or at the flush_timestamp. + * Flush packets which are inserted into the table before or at the + * flush_timestamp * @param out - * pointer array which is used to keep flushed packets. + * Pointer array used to keep flushed packets * @param nb_out - * the element number of out. It's also the max number of timeout + * The element number in 'out'. It also determines the maximum number of * packets that can be flushed finally. * * @return - * the number of packets that are returned. + * The number of flushed packets */ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, uint64_t flush_timestamp, @@ -201,10 +191,124 @@ uint16_t gro_tcp4_tbl_timeout_flush(struct gro_tcp4_tbl *tbl, * reassembly table. * * @param tbl - * pointer points to a TCP/IPv4 reassembly table. + * Pointer pointing to a TCP/IPv4 reassembly table * * @return - * the number of packets in the table + * The number of packets in the table */ uint32_t gro_tcp4_tbl_pkt_count(void *tbl); + +/* + * Check if two TCP/IPv4 packets belong to the same flow. + */ +static inline int +is_same_tcp4_flow(struct tcp4_flow_key k1, struct tcp4_flow_key k2) +{ + if (is_same_ether_addr(&k1.eth_saddr, &k2.eth_saddr) == 0) + return 0; + + if (is_same_ether_addr(&k1.eth_daddr, &k2.eth_daddr) == 0) + return 0; + + return ((k1.ip_src_addr == k2.ip_src_addr) && + (k1.ip_dst_addr == k2.ip_dst_addr) && + (k1.recv_ack == k2.recv_ack) && + (k1.src_port == k2.src_port) && + (k1.dst_port == k2.dst_port)); +} + +/* + * Check if two TCP/IPv4 packets are neighbors. + */ +static inline int +check_seq_option(struct gro_tcp4_item *item, + struct tcp_hdr *tcph, + uint16_t tcp_hl, + uint16_t tcp_dl, + uint32_t sent_seq, + uint16_t l2_offset) +{ + struct rte_mbuf *pkt_orig = item->firstseg; + struct ipv4_hdr *iph_orig; + struct tcp_hdr *tcph_orig; + uint16_t len, l4_len_orig; + + iph_orig = (struct ipv4_hdr *)(rte_pktmbuf_mtod(pkt_orig, char *) + + l2_offset + pkt_orig->l2_len); + tcph_orig = (struct tcp_hdr *)((char *)iph_orig + pkt_orig->l3_len); + l4_len_orig = pkt_orig->l4_len; + + /* Check if TCP option fields equal */ + len = RTE_MAX(tcp_hl, l4_len_orig) - sizeof(struct tcp_hdr); + if ((tcp_hl != l4_len_orig) || ((len > 0) && + (memcmp(tcph + 1, tcph_orig + 1, + len) != 0))) + return 0; + + /* Check if the two packets are neighbors */ + len = pkt_orig->pkt_len - l2_offset - pkt_orig->l2_len - + pkt_orig->l3_len - l4_len_orig; + if (sent_seq == item->sent_seq + len) + /* Append the new packet */ + return 1; + else if (sent_seq + tcp_dl == item->sent_seq) + /* Pre-pend the new packet */ + return -1; + else + return 0; +} + +/* + * Merge two TCP/IPv4 packets without updating checksums. + * If cmp is larger than 0, append the new packet to the + * original packet. Otherwise, pre-pend the new packet to + * the original packet. + */ +static inline int +merge_two_tcp4_packets(struct gro_tcp4_item *item, + struct rte_mbuf *pkt, + uint32_t sent_seq, + int cmp, + uint16_t l2_offset) +{ + struct rte_mbuf *pkt_head, *pkt_tail, *lastseg; + uint16_t hdr_len; + + if (cmp > 0) { + pkt_head = item->firstseg; + pkt_tail = pkt; + } else { + pkt_head = pkt; + pkt_tail = item->firstseg; + } + + /* Check if the length is greater than the max value */ + hdr_len = l2_offset + pkt_head->l2_len + pkt_head->l3_len + + pkt_head->l4_len; + if (pkt_head->pkt_len - l2_offset - pkt_head->l2_len + + pkt_tail->pkt_len - hdr_len > MAX_IPV4_PKT_LENGTH) + return 0; + + /* Remove packet header for the tail packet */ + rte_pktmbuf_adj(pkt_tail, hdr_len); + + /* Chain two packets together */ + if (cmp > 0) { + item->lastseg->next = pkt; + item->lastseg = rte_pktmbuf_lastseg(pkt); + } else { + lastseg = rte_pktmbuf_lastseg(pkt); + lastseg->next = item->firstseg; + item->firstseg = pkt; + /* Update sent_seq to the smaller value */ + item->sent_seq = sent_seq; + } + item->nb_merged++; + + /* Update mbuf metadata for the merged packet */ + pkt_head->nb_segs += pkt_tail->nb_segs; + pkt_head->pkt_len += pkt_tail->pkt_len; + + return 1; +} #endif diff --git a/lib/librte_gro/rte_gro.c b/lib/librte_gro/rte_gro.c index 7853246..dfee932 100644 --- a/lib/librte_gro/rte_gro.c +++ b/lib/librte_gro/rte_gro.c @@ -51,6 +51,9 @@ static gro_tbl_destroy_fn tbl_destroy_fn[RTE_GRO_TYPE_MAX_NUM] = { static gro_tbl_pkt_count_fn tbl_pkt_count_fn[RTE_GRO_TYPE_MAX_NUM] = { gro_tcp4_tbl_pkt_count, NULL}; +#define IS_IPV4_TCP_PKT(ptype) (RTE_ETH_IS_IPV4_HDR(ptype) && \ + ((ptype & RTE_PTYPE_L4_TCP) == RTE_PTYPE_L4_TCP)) + /* * GRO context structure, which is used to merge packets. It keeps * many reassembly tables of desired GRO types. Applications need to @@ -131,62 +134,55 @@ rte_gro_reassemble_burst(struct rte_mbuf **pkts, uint16_t nb_pkts, const struct rte_gro_param *param) { - uint16_t i; - uint16_t nb_after_gro = nb_pkts; - uint32_t item_num; - - /* allocate a reassembly table for TCP/IPv4 GRO */ + /* Allocate a reassembly table for TCP/IPv4 GRO. */ struct gro_tcp4_tbl tcp_tbl; - struct gro_tcp4_key tcp_keys[RTE_GRO_MAX_BURST_ITEM_NUM]; + struct gro_tcp4_flow tcp_flows[RTE_GRO_MAX_BURST_ITEM_NUM]; struct gro_tcp4_item tcp_items[RTE_GRO_MAX_BURST_ITEM_NUM] = {{0} }; struct rte_mbuf *unprocess_pkts[nb_pkts]; - uint16_t unprocess_num = 0; - int32_t ret; uint64_t current_time; + uint32_t item_num; + int32_t ret; + uint16_t i, unprocess_num = 0, nb_after_gro = nb_pkts; if ((param->gro_types & RTE_GRO_TCP_IPV4) == 0) return nb_pkts; - /* get the actual number of packets */ + /* Get the actual number of packets. */ item_num = RTE_MIN(nb_pkts, (param->max_flow_num * - param->max_item_per_flow)); + param->max_item_per_flow)); item_num = RTE_MIN(item_num, RTE_GRO_MAX_BURST_ITEM_NUM); for (i = 0; i < item_num; i++) - tcp_keys[i].start_index = INVALID_ARRAY_INDEX; + tcp_flows[i].start_index = INVALID_ARRAY_INDEX; - tcp_tbl.keys = tcp_keys; + tcp_tbl.flows = tcp_flows; tcp_tbl.items = tcp_items; - tcp_tbl.key_num = 0; + tcp_tbl.flow_num = 0; tcp_tbl.item_num = 0; - tcp_tbl.max_key_num = item_num; + tcp_tbl.max_flow_num = item_num; tcp_tbl.max_item_num = item_num; current_time = rte_rdtsc(); for (i = 0; i < nb_pkts; i++) { - if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | - RTE_PTYPE_L4_TCP)) == - (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { - ret = gro_tcp4_reassemble(pkts[i], - &tcp_tbl, + if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) { + ret = gro_tcp4_reassemble(pkts[i], &tcp_tbl, current_time); if (ret > 0) - /* merge successfully */ + /* Merge successfully */ nb_after_gro--; - else if (ret < 0) { - unprocess_pkts[unprocess_num++] = - pkts[i]; - } + else if (ret < 0) + unprocess_pkts[unprocess_num++] = pkts[i]; } else unprocess_pkts[unprocess_num++] = pkts[i]; } - /* re-arrange GROed packets */ if (nb_after_gro < nb_pkts) { + /* Flush packets from the tables. */ i = gro_tcp4_tbl_timeout_flush(&tcp_tbl, current_time, pkts, nb_pkts); + /* Copy unprocessed packets. */ if (unprocess_num > 0) { memcpy(&pkts[i], unprocess_pkts, sizeof(struct rte_mbuf *) * @@ -202,10 +198,11 @@ rte_gro_reassemble(struct rte_mbuf **pkts, uint16_t nb_pkts, void *ctx) { - uint16_t i, unprocess_num = 0; struct rte_mbuf *unprocess_pkts[nb_pkts]; struct gro_ctx *gro_ctx = ctx; + void *tbl; uint64_t current_time; + uint16_t i, unprocess_num = 0; if ((gro_ctx->gro_types & RTE_GRO_TCP_IPV4) == 0) return nb_pkts; @@ -213,12 +210,9 @@ rte_gro_reassemble(struct rte_mbuf **pkts, current_time = rte_rdtsc(); for (i = 0; i < nb_pkts; i++) { - if ((pkts[i]->packet_type & (RTE_PTYPE_L3_IPV4 | - RTE_PTYPE_L4_TCP)) == - (RTE_PTYPE_L3_IPV4 | RTE_PTYPE_L4_TCP)) { - if (gro_tcp4_reassemble(pkts[i], - gro_ctx->tbls - [RTE_GRO_TCP_IPV4_INDEX], + if (IS_IPV4_TCP_PKT(pkts[i]->packet_type)) { + tbl = gro_ctx->tbls[RTE_GRO_TCP_IPV4_INDEX]; + if (gro_tcp4_reassemble(pkts[i], tbl, current_time) < 0) unprocess_pkts[unprocess_num++] = pkts[i]; } else @@ -252,6 +246,7 @@ rte_gro_timeout_flush(void *ctx, flush_timestamp, out, max_nb_out); } + return 0; } @@ -274,5 +269,6 @@ rte_gro_get_pkt_count(void *ctx) continue; item_num += pkt_count_fn(gro_ctx->tbls[i]); } + return item_num; }