From patchwork Thu Jan 15 05:15:24 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Ouyang Changchun X-Patchwork-Id: 2318 Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [IPv6:::1]) by dpdk.org (Postfix) with ESMTP id 15F505ABB; Thu, 15 Jan 2015 06:16:32 +0100 (CET) Received: from mga01.intel.com (mga01.intel.com [192.55.52.88]) by dpdk.org (Postfix) with ESMTP id E1E365A1F for ; Thu, 15 Jan 2015 06:16:17 +0100 (CET) Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by fmsmga101.fm.intel.com with ESMTP; 14 Jan 2015 21:16:14 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.09,401,1418112000"; d="scan'208";a="662128046" Received: from shvmail01.sh.intel.com ([10.239.29.42]) by fmsmga002.fm.intel.com with ESMTP; 14 Jan 2015 21:16:13 -0800 Received: from shecgisg004.sh.intel.com (shecgisg004.sh.intel.com [10.239.29.89]) by shvmail01.sh.intel.com with ESMTP id t0F5GBra001769; Thu, 15 Jan 2015 13:16:11 +0800 Received: from shecgisg004.sh.intel.com (localhost [127.0.0.1]) by shecgisg004.sh.intel.com (8.13.6/8.13.6/SuSE Linux 0.8) with ESMTP id t0F5G8M3015356; Thu, 15 Jan 2015 13:16:10 +0800 Received: (from couyang@localhost) by shecgisg004.sh.intel.com (8.13.6/8.13.6/Submit) id t0F5G8AA015352; Thu, 15 Jan 2015 13:16:08 +0800 From: Ouyang Changchun To: dev@dpdk.org Date: Thu, 15 Jan 2015 13:15:24 +0800 Message-Id: <1421298930-15210-17-git-send-email-changchun.ouyang@intel.com> X-Mailer: git-send-email 1.7.12.2 In-Reply-To: <1421298930-15210-1-git-send-email-changchun.ouyang@intel.com> References: <1421298930-15210-1-git-send-email-changchun.ouyang@intel.com> Subject: [dpdk-dev] [PATCH 16/22] virtio: Free mbuf's with threshold X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: patches and discussions about DPDK List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This makes virtio driver work like ixgbe. Transmit buffers are held until a transmit threshold is reached. The previous behavior was to hold mbuf's until the ring entry was reused which caused more memory usage than needed. Signed-off-by: Stephen Hemminger Signed-off-by: Changchun Ouyang --- lib/librte_pmd_virtio/virtio_ethdev.c | 7 ++-- lib/librte_pmd_virtio/virtio_rxtx.c | 75 +++++++++++++++++++++++++---------- lib/librte_pmd_virtio/virtqueue.h | 3 +- 3 files changed, 60 insertions(+), 25 deletions(-) diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c b/lib/librte_pmd_virtio/virtio_ethdev.c index c5f21c1..1ec29e1 100644 --- a/lib/librte_pmd_virtio/virtio_ethdev.c +++ b/lib/librte_pmd_virtio/virtio_ethdev.c @@ -176,15 +176,16 @@ virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, virtqueue_notify(vq); - while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) + rte_rmb(); + while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) { + rte_rmb(); usleep(100); + } while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) { uint32_t idx, desc_idx, used_idx; struct vring_used_elem *uep; - virtio_rmb(); - used_idx = (uint32_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); uep = &vq->vq_ring.used->ring[used_idx]; diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c b/lib/librte_pmd_virtio/virtio_rxtx.c index b44f091..12c2310 100644 --- a/lib/librte_pmd_virtio/virtio_rxtx.c +++ b/lib/librte_pmd_virtio/virtio_rxtx.c @@ -129,17 +129,32 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, return i; } +#ifndef DEFAULT_TX_FREE_THRESH +#define DEFAULT_TX_FREE_THRESH 32 +#endif + +/* Cleanup from completed transmits. */ static void -virtqueue_dequeue_pkt_tx(struct virtqueue *vq) +virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) { - struct vring_used_elem *uep; - uint16_t used_idx, desc_idx; + uint16_t i, used_idx, desc_idx; + for (i = 0; i < num; i++) { + struct vring_used_elem *uep; + struct vq_desc_extra *dxp; + + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + dxp = &vq->vq_descx[used_idx]; + + desc_idx = (uint16_t) uep->id; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); - used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); - uep = &vq->vq_ring.used->ring[used_idx]; - desc_idx = (uint16_t) uep->id; - vq->vq_used_cons_idx++; - vq_ring_free_chain(vq, desc_idx); + if (dxp->cookie != NULL) { + rte_pktmbuf_free(dxp->cookie); + dxp->cookie = NULL; + } + } } @@ -203,8 +218,6 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie) idx = head_idx; dxp = &txvq->vq_descx[idx]; - if (dxp->cookie != NULL) - rte_pktmbuf_free(dxp->cookie); dxp->cookie = (void *)cookie; dxp->ndescs = needed; @@ -404,6 +417,7 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, { uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; struct virtqueue *vq; + uint16_t tx_free_thresh; int ret; PMD_INIT_FUNC_TRACE(); @@ -421,6 +435,22 @@ virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, return ret; } + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) + tx_free_thresh = + RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= (vq->vq_nentries - 3)) { + RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " + "number of TX entries minus 3 (%u)." + " (tx_free_thresh=%u port=%u queue=%u)\n", + vq->vq_nentries - 3, + tx_free_thresh, dev->data->port_id, queue_idx); + return -EINVAL; + } + + vq->vq_free_thresh = tx_free_thresh; + dev->data->tx_queues[queue_idx] = vq; return 0; } @@ -688,11 +718,9 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) { struct virtqueue *txvq = tx_queue; struct rte_mbuf *txm; - uint16_t nb_used, nb_tx, num; + uint16_t nb_used, nb_tx; int error; - nb_tx = 0; - if (unlikely(nb_pkts < 1)) return nb_pkts; @@ -700,21 +728,26 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) nb_used = VIRTQUEUE_NUSED(txvq); virtio_rmb(); + if (likely(nb_used > txvq->vq_free_thresh)) + virtio_xmit_cleanup(txvq, nb_used); - num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ); + nb_tx = 0; while (nb_tx < nb_pkts) { /* Need one more descriptor for virtio header. */ int need = tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1; - int deq_cnt = RTE_MIN(need, (int)num); - num -= (deq_cnt > 0) ? deq_cnt : 0; - while (deq_cnt > 0) { - virtqueue_dequeue_pkt_tx(txvq); - deq_cnt--; + /*Positive value indicates it need free vring descriptors */ + if (unlikely(need > 0)) { + nb_used = VIRTQUEUE_NUSED(txvq); + virtio_rmb(); + need = RTE_MIN(need, (int)nb_used); + + virtio_xmit_cleanup(txvq, need); + need = (int)tx_pkts[nb_tx]->nb_segs - + txvq->vq_free_cnt + 1; } - need = (int)tx_pkts[nb_tx]->nb_segs - txvq->vq_free_cnt + 1; /* * Zero or negative value indicates it has enough free * descriptors to use for transmitting. @@ -723,7 +756,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) txm = tx_pkts[nb_tx]; /* Do VLAN tag insertion */ - if (txm->ol_flags & PKT_TX_VLAN_PKT) { + if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { error = rte_vlan_insert(&txm); if (unlikely(error)) { rte_pktmbuf_free(txm); diff --git a/lib/librte_pmd_virtio/virtqueue.h b/lib/librte_pmd_virtio/virtqueue.h index d210f4f..6c45c27 100644 --- a/lib/librte_pmd_virtio/virtqueue.h +++ b/lib/librte_pmd_virtio/virtqueue.h @@ -164,6 +164,7 @@ struct virtqueue { struct rte_mempool *mpool; /**< mempool for mbuf allocation */ uint16_t queue_id; /**< DPDK queue index. */ uint8_t port_id; /**< Device port identifier. */ + uint16_t vq_queue_index; /**< PCI queue index */ void *vq_ring_virt_mem; /**< linear address of vring*/ unsigned int vq_ring_size; @@ -172,7 +173,7 @@ struct virtqueue { struct vring vq_ring; /**< vring keeping desc, used and avail */ uint16_t vq_free_cnt; /**< num of desc available */ uint16_t vq_nentries; /**< vring desc numbers */ - uint16_t vq_queue_index; /**< PCI queue index */ + uint16_t vq_free_thresh; /**< free threshold */ /** * Head of the free chain in the descriptor table. If * there are no free descriptors, this will be set to