From patchwork Tue Jul 7 17:12:28 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Rahul Lakkireddy X-Patchwork-Id: 6161 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id B052E5A40; Tue, 7 Jul 2015 19:12:53 +0200 (CEST) Received: from stargate3.asicdesigners.com (stargate.chelsio.com [67.207.112.58]) by dpdk.org (Postfix) with ESMTP id 591B65A0A for ; Tue, 7 Jul 2015 19:12:51 +0200 (CEST) Received: from localhost (scalar.blr.asicdesigners.com [10.193.185.94]) by stargate3.asicdesigners.com (8.13.8/8.13.8) with ESMTP id t67HCk03015230; Tue, 7 Jul 2015 10:12:47 -0700 From: Rahul Lakkireddy To: dev@dpdk.org Date: Tue, 7 Jul 2015 22:42:28 +0530 Message-Id: <96172d6f608d59f6d8463d407f21d08a963d6d5a.1436288467.git.rahul.lakkireddy@chelsio.com> X-Mailer: git-send-email 2.4.1 In-Reply-To: References: In-Reply-To: References: Cc: Felix Marti , Kumar Sanghvi , Nirranjan Kirubaharan Subject: [dpdk-dev] [PATCH 1/3] cxgbe: Fix RX performance for cxgbe PMD. X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" CXGBE PMD rx allocates a new mbuf everytime, which could lead to performance hit. Instead, do bulk allocation of mbufs and re-use them. Also, simplify the overall rx-handler, and update its logic to fix rx perf. Signed-off-by: Rahul Lakkireddy Signed-off-by: Kumar Sanghvi --- drivers/net/cxgbe/base/adapter.h | 2 + drivers/net/cxgbe/base/t4_regs_values.h | 1 + drivers/net/cxgbe/sge.c | 239 ++++++++++++-------------------- 3 files changed, 89 insertions(+), 153 deletions(-) diff --git a/drivers/net/cxgbe/base/adapter.h b/drivers/net/cxgbe/base/adapter.h index 0ea1c95..a1e8ef7 100644 --- a/drivers/net/cxgbe/base/adapter.h +++ b/drivers/net/cxgbe/base/adapter.h @@ -149,6 +149,7 @@ struct sge_rspq { /* state for an SGE response queue */ unsigned int bar2_qid; /* Queue ID for BAR2 Queue registers */ unsigned int cidx; /* consumer index */ + unsigned int gts_idx; /* last gts write sent */ unsigned int iqe_len; /* entry size */ unsigned int size; /* capacity of response queue */ int offset; /* offset into current Rx buffer */ @@ -157,6 +158,7 @@ struct sge_rspq { /* state for an SGE response queue */ u8 intr_params; /* interrupt holdoff parameters */ u8 next_intr_params; /* holdoff params for next interrupt */ u8 pktcnt_idx; /* interrupt packet threshold */ + u8 port_id; /* associated port-id */ u8 idx; /* queue index within its group */ u16 cntxt_id; /* SGE relative QID for the response Q */ u16 abs_id; /* absolute SGE id for the response q */ diff --git a/drivers/net/cxgbe/base/t4_regs_values.h b/drivers/net/cxgbe/base/t4_regs_values.h index 181bd9d..d7d3144 100644 --- a/drivers/net/cxgbe/base/t4_regs_values.h +++ b/drivers/net/cxgbe/base/t4_regs_values.h @@ -68,6 +68,7 @@ * Egress Context field values */ #define X_FETCHBURSTMIN_64B 2 +#define X_FETCHBURSTMIN_128B 3 #define X_FETCHBURSTMAX_256B 2 #define X_FETCHBURSTMAX_512B 3 diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c index 359296e..b737183 100644 --- a/drivers/net/cxgbe/sge.c +++ b/drivers/net/cxgbe/sge.c @@ -74,7 +74,7 @@ static inline void ship_tx_pkt_coalesce_wr(struct adapter *adap, /* * Max number of Rx buffers we replenish at a time. */ -#define MAX_RX_REFILL 16U +#define MAX_RX_REFILL 64U #define NOMEM_TMR_IDX (SGE_NTIMERS - 1) @@ -238,39 +238,6 @@ static inline bool fl_starving(const struct adapter *adapter, return fl->avail - fl->pend_cred <= s->fl_starve_thres; } -static inline unsigned int get_buf_size(struct adapter *adapter, - const struct rx_sw_desc *d) -{ - struct sge *s = &adapter->sge; - unsigned int rx_buf_size_idx = d->dma_addr & RX_BUF_SIZE; - unsigned int buf_size; - - switch (rx_buf_size_idx) { - case RX_SMALL_PG_BUF: - buf_size = PAGE_SIZE; - break; - - case RX_LARGE_PG_BUF: - buf_size = PAGE_SIZE << s->fl_pg_order; - break; - - case RX_SMALL_MTU_BUF: - buf_size = FL_MTU_SMALL_BUFSIZE(adapter); - break; - - case RX_LARGE_MTU_BUF: - buf_size = FL_MTU_LARGE_BUFSIZE(adapter); - break; - - default: - BUG_ON(1); - buf_size = 0; /* deal with bogus compiler warnings */ - /* NOTREACHED */ - } - - return buf_size; -} - /** * free_rx_bufs - free the Rx buffers on an SGE free list * @q: the SGE free list to free buffers from @@ -319,7 +286,8 @@ static void unmap_rx_buf(struct sge_fl *q) static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) { - if (q->pend_cred >= 8) { + /* see if we have exceeded q->size / 4 */ + if (q->pend_cred >= (q->size / 4)) { u32 val = adap->params.arch.sge_fl_db; if (is_t4(adap->params.chip)) @@ -356,15 +324,6 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) } } -static inline struct rte_mbuf *cxgbe_rxmbuf_alloc(struct rte_mempool *mp) -{ - struct rte_mbuf *m; - - m = __rte_mbuf_raw_alloc(mp); - __rte_mbuf_sanity_check_raw(m, 0); - return m; -} - static inline void set_rx_sw_desc(struct rx_sw_desc *sd, void *buf, dma_addr_t mapping) { @@ -393,9 +352,20 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q, __be64 *d = &q->desc[q->pidx]; struct rx_sw_desc *sd = &q->sdesc[q->pidx]; unsigned int buf_size_idx = RX_SMALL_MTU_BUF; + struct rte_mbuf *buf_bulk[n]; + int ret, i; - while (n--) { - struct rte_mbuf *mbuf = cxgbe_rxmbuf_alloc(rxq->rspq.mb_pool); + ret = rte_mempool_get_bulk(rxq->rspq.mb_pool, (void *)buf_bulk, n); + if (unlikely(ret != 0)) { + dev_debug(adap, "%s: failed to allocated fl entries in bulk ..\n", + __func__); + q->alloc_failed++; + rxq->rspq.eth_dev->data->rx_mbuf_alloc_failed++; + goto out; + } + + for (i = 0; i < n; i++) { + struct rte_mbuf *mbuf = buf_bulk[i]; dma_addr_t mapping; if (!mbuf) { @@ -405,11 +375,13 @@ static unsigned int refill_fl_usembufs(struct adapter *adap, struct sge_fl *q, goto out; } + rte_mbuf_refcnt_set(mbuf, 1); mbuf->data_off = RTE_PKTMBUF_HEADROOM; mbuf->next = NULL; + mbuf->nb_segs = 1; + mbuf->port = rxq->rspq.port_id; mapping = (dma_addr_t)(mbuf->buf_physaddr + mbuf->data_off); - mapping |= buf_size_idx; *d++ = cpu_to_be64(mapping); set_rx_sw_desc(sd, mbuf, mapping); @@ -668,6 +640,7 @@ static void write_sgl(struct rte_mbuf *mbuf, struct sge_txq *q, ((head) >= (tail) ? (head) - (tail) : (wrap) - (tail) + (head)) #define Q_IDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->size) +#define R_IDXDIFF(q, idx) IDXDIFF((q)->cidx, (q)->idx, (q)->size) /** * ring_tx_db - ring a Tx queue's doorbell @@ -1354,31 +1327,6 @@ int t4_ethrx_handler(struct sge_rspq *q, const __be64 *rsp, } /** - * restore_rx_bufs - put back a packet's Rx buffers - * @q: the SGE free list - * @frags: number of FL buffers to restore - * - * Puts back on an FL the Rx buffers. The buffers have already been - * unmapped and are left unmapped, we mark them so to prevent further - * unmapping attempts. - * - * This function undoes a series of @unmap_rx_buf calls when we find out - * that the current packet can't be processed right away afterall and we - * need to come back to it later. This is a very rare event and there's - * no effort to make this particularly efficient. - */ -static void restore_rx_bufs(struct sge_fl *q, int frags) -{ - while (frags--) { - if (q->cidx == 0) - q->cidx = q->size - 1; - else - q->cidx--; - q->avail++; - } -} - -/** * is_new_response - check if a response is newly written * @r: the response descriptor * @q: the response queue @@ -1431,7 +1379,6 @@ static int process_responses(struct sge_rspq *q, int budget, int budget_left = budget; const struct rsp_ctrl *rc; struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq); - struct adapter *adapter = q->adapter; while (likely(budget_left)) { rc = (const struct rsp_ctrl *) @@ -1447,63 +1394,46 @@ static int process_responses(struct sge_rspq *q, int budget, rsp_type = G_RSPD_TYPE(rc->u.type_gen); if (likely(rsp_type == X_RSPD_TYPE_FLBUF)) { - struct pkt_gl si; - const struct rx_sw_desc *rsd; - struct rte_mbuf *pkt = NULL; - u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags; + const struct rx_sw_desc *rsd = + &rxq->fl.sdesc[rxq->fl.cidx]; + const struct rss_header *rss_hdr = + (const void *)q->cur_desc; + const struct cpl_rx_pkt *cpl = + (const void *)&q->cur_desc[1]; + bool csum_ok = cpl->csum_calc && !cpl->err_vec; + struct rte_mbuf *pkt; + u32 len = ntohl(rc->pldbuflen_qid); - si.usembufs = rxq->usembufs; - /* - * In "use mbufs" mode, we don't pack multiple - * ingress packets per buffer (mbuf) so we - * should _always_ get a "New Buffer" flags - * from the SGE. Also, since we hand the - * mbuf's up to the host stack for it to - * eventually free, we don't release the mbuf's - * in the driver (in contrast to the "packed - * page" mode where the driver needs to - * release its reference on the page buffers). - */ BUG_ON(!(len & F_RSPD_NEWBUF)); - len = G_RSPD_LEN(len); - si.tot_len = len; - - /* gather packet fragments */ - for (frags = 0; len; frags++) { - rsd = &rxq->fl.sdesc[rxq->fl.cidx]; - bufsz = min(get_buf_size(adapter, rsd), len); - pkt = rsd->buf; - pkt->data_len = bufsz; - pkt->pkt_len = bufsz; - si.mbufs[frags] = pkt; - len -= bufsz; - unmap_rx_buf(&rxq->fl); + pkt = rsd->buf; + pkt->data_len = G_RSPD_LEN(len); + pkt->pkt_len = pkt->data_len; + unmap_rx_buf(&rxq->fl); + + if (cpl->l2info & htonl(F_RXF_IP)) { + pkt->ol_flags |= PKT_RX_IPV4_HDR; + if (unlikely(!csum_ok)) + pkt->ol_flags |= PKT_RX_IP_CKSUM_BAD; + + if ((cpl->l2info & + htonl(F_RXF_UDP | F_RXF_TCP)) && !csum_ok) + pkt->ol_flags |= PKT_RX_L4_CKSUM_BAD; + } else if (cpl->l2info & htonl(F_RXF_IP6)) { + pkt->ol_flags |= PKT_RX_IPV6_HDR; } - si.va = RTE_PTR_ADD(si.mbufs[0]->buf_addr, - si.mbufs[0]->data_off); - rte_prefetch1(si.va); - - /* - * For the "use mbuf" case here, we can end up - * chewing through our Free List very rapidly - * with one entry per Ingress packet getting - * consumed. So if the handler() successfully - * consumed the mbuf, check to see if we can - * refill the Free List incrementally in the - * loop ... - */ - si.nfrags = frags; - ret = q->handler(q, q->cur_desc, &si); - - if (unlikely(ret != 0)) { - restore_rx_bufs(&rxq->fl, frags); - } else { - rx_pkts[budget - budget_left] = pkt; - if (fl_cap(&rxq->fl) - rxq->fl.avail >= 8) - __refill_fl(q->adapter, &rxq->fl); + if (!rss_hdr->filter_tid && rss_hdr->hash_type) { + pkt->ol_flags |= PKT_RX_RSS_HASH; + pkt->hash.rss = ntohl(rss_hdr->hash_val); } + if (cpl->vlan_ex) { + pkt->ol_flags |= PKT_RX_VLAN_PKT; + pkt->vlan_tci = ntohs(cpl->vlan); + } + rxq->stats.pkts++; + rxq->stats.rx_bytes += pkt->pkt_len; + rx_pkts[budget - budget_left] = pkt; } else if (likely(rsp_type == X_RSPD_TYPE_CPL)) { ret = q->handler(q, q->cur_desc, NULL); } else { @@ -1518,6 +1448,34 @@ static int process_responses(struct sge_rspq *q, int budget, rspq_next(q); budget_left--; + + if (R_IDXDIFF(q, gts_idx) >= 64) { + unsigned int cidx_inc = R_IDXDIFF(q, gts_idx); + unsigned int params; + u32 val; + + __refill_fl(q->adapter, &rxq->fl); + params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX); + q->next_intr_params = params; + val = V_CIDXINC(cidx_inc) | V_SEINTARM(params); + + if (unlikely(!q->bar2_addr)) + t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS), + val | + V_INGRESSQID((u32)q->cntxt_id)); + else { + writel(val | V_INGRESSQID(q->bar2_qid), + (void *)((uintptr_t)q->bar2_addr + + SGE_UDB_GTS)); + /* + * This Write memory Barrier will force the + * write to the User Doorbell area to be + * flushed. + */ + wmb(); + } + q->gts_idx = q->cidx; + } } /* @@ -1526,7 +1484,7 @@ static int process_responses(struct sge_rspq *q, int budget, * refill the Free List. */ - if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 8) + if (q->offset >= 0 && fl_cap(&rxq->fl) - rxq->fl.avail >= 64) __refill_fl(q->adapter, &rxq->fl); return budget - budget_left; @@ -1535,36 +1493,9 @@ static int process_responses(struct sge_rspq *q, int budget, int cxgbe_poll(struct sge_rspq *q, struct rte_mbuf **rx_pkts, unsigned int budget, unsigned int *work_done) { - unsigned int params; - u32 val; int err = 0; *work_done = process_responses(q, budget, rx_pkts); - params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX); - q->next_intr_params = params; - val = V_CIDXINC(*work_done) | V_SEINTARM(params); - - if (*work_done) { - /* - * If we don't have access to the new User GTS (T5+), - * use the old doorbell mechanism; otherwise use the new - * BAR2 mechanism. - */ - if (unlikely(!q->bar2_addr)) - t4_write_reg(q->adapter, MYPF_REG(A_SGE_PF_GTS), - val | V_INGRESSQID((u32)q->cntxt_id)); - else { - writel(val | V_INGRESSQID(q->bar2_qid), - (void *)((uintptr_t)q->bar2_addr + - SGE_UDB_GTS)); - /* - * This Write memory Barrier will force the write to - * the User Doorbell area to be flushed. - */ - wmb(); - } - } - return err; } @@ -1717,7 +1648,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, * Hence maximum allowed burst size will be 448 bytes. */ c.fl0dcaen_to_fl0cidxfthresh = - htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) | + htons(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) | V_FW_IQ_CMD_FL0FBMAX((chip <= CHELSIO_T5) ? X_FETCHBURSTMAX_512B : X_FETCHBURSTMAX_256B)); c.fl0size = htons(flsz); @@ -1730,6 +1661,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->cur_desc = iq->desc; iq->cidx = 0; + iq->gts_idx = 0; iq->gen = 1; iq->next_intr_params = iq->intr_params; iq->cntxt_id = ntohs(c.iqid); @@ -1739,6 +1671,7 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq, iq->size--; /* subtract status entry */ iq->eth_dev = eth_dev; iq->handler = hnd; + iq->port_id = pi->port_id; iq->mb_pool = mp; /* set offset to -1 to distinguish ingress queues without FL */