[dpdk-dev,v3] virtio: Support mergeable buffer in virtio pmd
Commit Message
v3 change:
- Investigate the comments from Huawei and fix one potential issue of wrong offset to
the number of descriptor in buffer; also fix other tiny comments.
v2 change:
- Resolve conflicts with the tip code;
- And resolve 2 issues:
-- fix mbuf leak when discard an uncompleted packet.
-- refine pkt.data to point to actual payload data start point.
v1 change:
- This patch supports mergeable buffer feature in DPDK based virtio PMD, which can
receive jumbo frame with larger size, like 3K, 4K or even 9K.
Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
Acked-by: Huawei Xie <huawei.xie@intel.com>
---
lib/librte_pmd_virtio/virtio_ethdev.c | 20 +--
lib/librte_pmd_virtio/virtio_ethdev.h | 3 +
lib/librte_pmd_virtio/virtio_rxtx.c | 221 +++++++++++++++++++++++++++++-----
3 files changed, 207 insertions(+), 37 deletions(-)
Comments
Hi all,
Any comments for this patch?
And what's the status for merging it into mainline?
Thanks in advance
Changchun
> -----Original Message-----
> From: Ouyang, Changchun
> Sent: Thursday, August 14, 2014 4:55 PM
> To: dev@dpdk.org
> Cc: Cao, Waterman; Ouyang, Changchun
> Subject: [PATCH v3] virtio: Support mergeable buffer in virtio pmd
>
> v3 change:
> - Investigate the comments from Huawei and fix one potential issue of
> wrong offset to
> the number of descriptor in buffer; also fix other tiny comments.
>
> v2 change:
> - Resolve conflicts with the tip code;
> - And resolve 2 issues:
> -- fix mbuf leak when discard an uncompleted packet.
> -- refine pkt.data to point to actual payload data start point.
>
> v1 change:
> - This patch supports mergeable buffer feature in DPDK based virtio PMD,
> which can
> receive jumbo frame with larger size, like 3K, 4K or even 9K.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> Acked-by: Huawei Xie <huawei.xie@intel.com>
> ---
> lib/librte_pmd_virtio/virtio_ethdev.c | 20 +--
> lib/librte_pmd_virtio/virtio_ethdev.h | 3 +
> lib/librte_pmd_virtio/virtio_rxtx.c | 221
> +++++++++++++++++++++++++++++-----
> 3 files changed, 207 insertions(+), 37 deletions(-)
>
> diff --git a/lib/librte_pmd_virtio/virtio_ethdev.c
> b/lib/librte_pmd_virtio/virtio_ethdev.c
> index b9f5529..535d798 100644
> --- a/lib/librte_pmd_virtio/virtio_ethdev.c
> +++ b/lib/librte_pmd_virtio/virtio_ethdev.c
> @@ -337,7 +337,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
> snprintf(vq_name, sizeof(vq_name),
> "port%d_tvq%d_hdrzone",
> dev->data->port_id, queue_idx);
> vq->virtio_net_hdr_mz =
> rte_memzone_reserve_aligned(vq_name,
> - vq_size * sizeof(struct virtio_net_hdr),
> + vq_size * hw->vtnet_hdr_size,
> socket_id, 0, CACHE_LINE_SIZE);
> if (vq->virtio_net_hdr_mz == NULL) {
> rte_free(vq);
> @@ -346,7 +346,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
> vq->virtio_net_hdr_mem =
> vq->virtio_net_hdr_mz->phys_addr;
> memset(vq->virtio_net_hdr_mz->addr, 0,
> - vq_size * sizeof(struct virtio_net_hdr));
> + vq_size * hw->vtnet_hdr_size);
> } else if (queue_type == VTNET_CQ) {
> /* Allocate a page for control vq command, data and status
> */
> snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
> @@ -571,9 +571,6 @@ virtio_negotiate_features(struct virtio_hw *hw)
> mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6
> | VIRTIO_NET_F_GUEST_ECN;
> mask |= VTNET_LRO_FEATURES;
>
> - /* rx_mbuf should not be in multiple merged segments */
> - mask |= VIRTIO_NET_F_MRG_RXBUF;
> -
> /* not negotiating INDIRECT descriptor table support */
> mask |= VIRTIO_RING_F_INDIRECT_DESC;
>
> @@ -746,7 +743,6 @@ eth_virtio_dev_init(__rte_unused struct eth_driver
> *eth_drv,
> }
>
> eth_dev->dev_ops = &virtio_eth_dev_ops;
> - eth_dev->rx_pkt_burst = &virtio_recv_pkts;
> eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
>
> if (rte_eal_process_type() == RTE_PROC_SECONDARY)
> @@ -801,10 +797,13 @@ eth_virtio_dev_init(__rte_unused struct
> eth_driver *eth_drv,
> virtio_negotiate_features(hw);
>
> /* Setting up rx_header size for the device */
> - if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
> + if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
> + eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
> hw->vtnet_hdr_size = sizeof(struct
> virtio_net_hdr_mrg_rxbuf);
> - else
> + } else {
> + eth_dev->rx_pkt_burst = &virtio_recv_pkts;
> hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
> + }
>
> /* Allocate memory for storing MAC addresses */
> eth_dev->data->mac_addrs = rte_zmalloc("virtio",
> ETHER_ADDR_LEN, 0);
> @@ -1009,7 +1008,7 @@ static void virtio_dev_free_mbufs(struct
> rte_eth_dev *dev)
>
> while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
> dev->data->rx_queues[i])) != NULL) {
> - rte_pktmbuf_free_seg(buf);
> + rte_pktmbuf_free(buf);
> mbuf_num++;
> }
>
> @@ -1028,7 +1027,8 @@ static void virtio_dev_free_mbufs(struct
> rte_eth_dev *dev)
> mbuf_num = 0;
> while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
> dev->data->tx_queues[i])) != NULL) {
> - rte_pktmbuf_free_seg(buf);
> + rte_pktmbuf_free(buf);
> +
> mbuf_num++;
> }
>
> diff --git a/lib/librte_pmd_virtio/virtio_ethdev.h
> b/lib/librte_pmd_virtio/virtio_ethdev.h
> index 858e644..d2e1eed 100644
> --- a/lib/librte_pmd_virtio/virtio_ethdev.h
> +++ b/lib/librte_pmd_virtio/virtio_ethdev.h
> @@ -104,6 +104,9 @@ int virtio_dev_tx_queue_setup(struct rte_eth_dev
> *dev, uint16_t tx_queue_id,
> uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
> uint16_t nb_pkts);
>
> +uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts,
> + uint16_t nb_pkts);
> +
> uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
> uint16_t nb_pkts);
>
> diff --git a/lib/librte_pmd_virtio/virtio_rxtx.c
> b/lib/librte_pmd_virtio/virtio_rxtx.c
> index fcd8bd1..0b10108 100644
> --- a/lib/librte_pmd_virtio/virtio_rxtx.c
> +++ b/lib/librte_pmd_virtio/virtio_rxtx.c
> @@ -146,6 +146,7 @@ static inline int
> virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf
> *cookie)
> {
> struct vq_desc_extra *dxp;
> + struct virtio_hw *hw = vq->hw;
> struct vring_desc *start_dp;
> uint16_t needed = 1;
> uint16_t head_idx, idx;
> @@ -165,9 +166,11 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq,
> struct rte_mbuf *cookie)
> dxp->ndescs = needed;
>
> start_dp = vq->vq_ring.desc;
> - start_dp[idx].addr =
> - (uint64_t) (cookie->buf_physaddr +
> RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
> - start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM +
> sizeof(struct virtio_net_hdr);
> + start_dp[idx].addr =
> + (uint64_t)(cookie->buf_physaddr +
> RTE_PKTMBUF_HEADROOM
> + - hw->vtnet_hdr_size);
> + start_dp[idx].len =
> + cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw-
> >vtnet_hdr_size;
> start_dp[idx].flags = VRING_DESC_F_WRITE;
> idx = start_dp[idx].next;
> vq->vq_desc_head_idx = idx;
> @@ -184,8 +187,10 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq,
> struct rte_mbuf *cookie)
> {
> struct vq_desc_extra *dxp;
> struct vring_desc *start_dp;
> - uint16_t needed = 2;
> + uint16_t seg_num = cookie->pkt.nb_segs;
> + uint16_t needed = 1 + seg_num;
> uint16_t head_idx, idx;
> + uint16_t head_size = txvq->hw->vtnet_hdr_size;
>
> if (unlikely(txvq->vq_free_cnt == 0))
> return -ENOSPC;
> @@ -198,19 +203,25 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq,
> struct rte_mbuf *cookie)
> idx = head_idx;
> dxp = &txvq->vq_descx[idx];
> if (dxp->cookie != NULL)
> - rte_pktmbuf_free_seg(dxp->cookie);
> + rte_pktmbuf_free(dxp->cookie);
> dxp->cookie = (void *)cookie;
> dxp->ndescs = needed;
>
> start_dp = txvq->vq_ring.desc;
> - start_dp[idx].addr =
> - txvq->virtio_net_hdr_mem + idx * sizeof(struct
> virtio_net_hdr);
> - start_dp[idx].len = sizeof(struct virtio_net_hdr);
> + start_dp[idx].addr =
> + txvq->virtio_net_hdr_mem + idx * head_size;
> + start_dp[idx].len = (uint32_t)head_size;
> start_dp[idx].flags = VRING_DESC_F_NEXT;
> - idx = start_dp[idx].next;
> - start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie);
> - start_dp[idx].len = cookie->pkt.data_len;
> - start_dp[idx].flags = 0;
> +
> + for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
> + idx = start_dp[idx].next;
> + start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie);
> + start_dp[idx].len = cookie->pkt.data_len;
> + start_dp[idx].flags = VRING_DESC_F_NEXT;
> + cookie = cookie->pkt.next;
> + }
> +
> + start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
> idx = start_dp[idx].next;
> txvq->vq_desc_head_idx = idx;
> if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
> @@ -284,7 +295,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int
> queue_type)
> error = virtqueue_enqueue_recv_refill(vq, m);
>
> if (error) {
> - rte_pktmbuf_free_seg(m);
> + rte_pktmbuf_free(m);
> break;
> }
> nbufs++;
> @@ -423,7 +434,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct
> rte_mbuf *m)
> error = virtqueue_enqueue_recv_refill(vq, m);
> if (unlikely(error)) {
> RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
> - rte_pktmbuf_free_seg(m);
> + rte_pktmbuf_free(m);
> }
> }
>
> @@ -433,13 +444,13 @@ uint16_t
> virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t
> nb_pkts)
> {
> struct virtqueue *rxvq = rx_queue;
> - struct virtio_hw *hw = rxvq->hw;
> struct rte_mbuf *rxm, *new_mbuf;
> uint16_t nb_used, num, nb_rx = 0;
> uint32_t len[VIRTIO_MBUF_BURST_SZ];
> struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
> int error;
> uint32_t i, nb_enqueued = 0;
> + const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
>
> nb_used = VIRTQUEUE_NUSED(rxvq);
>
> @@ -460,8 +471,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts, uint16_t nb_pkts)
>
> PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
>
> - if (unlikely(len[i]
> - < (uint32_t)hw->vtnet_hdr_size +
> ETHER_HDR_LEN)) {
> + if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
> PMD_RX_LOG(ERR, "Packet drop");
> nb_enqueued++;
> virtio_discard_rxbuf(rxvq, rxm);
> @@ -471,17 +481,16 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts, uint16_t nb_pkts)
>
> rxm->pkt.in_port = rxvq->port_id;
> rxm->pkt.data = (char *)rxm->buf_addr +
> RTE_PKTMBUF_HEADROOM;
> +
> rxm->pkt.nb_segs = 1;
> rxm->pkt.next = NULL;
> - rxm->pkt.pkt_len = (uint32_t)(len[i]
> - - sizeof(struct virtio_net_hdr));
> - rxm->pkt.data_len = (uint16_t)(len[i]
> - - sizeof(struct virtio_net_hdr));
> + rxm->pkt.pkt_len = (uint32_t)(len[i] - hdr_size);
> + rxm->pkt.data_len = (uint16_t)(len[i] - hdr_size);
>
> VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len);
>
> rx_pkts[nb_rx++] = rxm;
> - rxvq->bytes += len[i] - sizeof(struct virtio_net_hdr);
> + rxvq->bytes += rx_pkts[nb_rx - 1]->pkt.pkt_len;
> }
>
> rxvq->packets += nb_rx;
> @@ -498,11 +507,165 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf
> **rx_pkts, uint16_t nb_pkts)
> }
> error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
> if (unlikely(error)) {
> - rte_pktmbuf_free_seg(new_mbuf);
> + rte_pktmbuf_free(new_mbuf);
> break;
> }
> nb_enqueued++;
> }
> +
> + if (likely(nb_enqueued)) {
> + if (unlikely(virtqueue_kick_prepare(rxvq))) {
> + virtqueue_notify(rxvq);
> + PMD_RX_LOG(DEBUG, "Notified\n");
> + }
> + }
> +
> + vq_update_avail_idx(rxvq);
> +
> + return nb_rx;
> +}
> +
> +uint16_t
> +virtio_recv_mergeable_pkts(void *rx_queue,
> + struct rte_mbuf **rx_pkts,
> + uint16_t nb_pkts)
> +{
> + struct virtqueue *rxvq = rx_queue;
> + struct rte_mbuf *rxm, *new_mbuf;
> + uint16_t nb_used, num, nb_rx = 0;
> + uint32_t len[VIRTIO_MBUF_BURST_SZ];
> + struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
> + struct rte_mbuf *prev;
> + int error;
> + uint32_t i = 0, nb_enqueued = 0;
> + uint32_t seg_num = 0;
> + uint16_t extra_idx = 0;
> + uint32_t seg_res = 0;
> + const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
> +
> + nb_used = VIRTQUEUE_NUSED(rxvq);
> +
> + rmb();
> +
> + if (nb_used == 0)
> + return 0;
> +
> + PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
> +
> + while (i < nb_used) {
> + struct virtio_net_hdr_mrg_rxbuf *header;
> +
> + if (nb_rx == nb_pkts)
> + break;
> +
> + num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
> + if (num != 1)
> + continue;
> +
> + i++;
> +
> + PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
> + PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
> +
> + rxm = rcv_pkts[0];
> +
> + if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
> + PMD_RX_LOG(ERR, "Packet drop\n");
> + nb_enqueued++;
> + virtio_discard_rxbuf(rxvq, rxm);
> + rxvq->errors++;
> + continue;
> + }
> +
> + header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm-
> >buf_addr +
> + RTE_PKTMBUF_HEADROOM - hdr_size);
> + seg_num = header->num_buffers;
> +
> + if (seg_num == 0)
> + seg_num = 1;
> +
> + rxm->pkt.data = (char *)rxm->buf_addr +
> RTE_PKTMBUF_HEADROOM;
> + rxm->pkt.nb_segs = seg_num;
> + rxm->pkt.next = NULL;
> + rxm->pkt.pkt_len = (uint32_t)(len[0] - hdr_size);
> + rxm->pkt.data_len = (uint16_t)(len[0] - hdr_size);
> +
> + rxm->pkt.in_port = rxvq->port_id;
> + rx_pkts[nb_rx] = rxm;
> + prev = rxm;
> +
> + seg_res = seg_num - 1;
> +
> + while (seg_res != 0) {
> + /*
> + * Get extra segments for current uncompleted
> packet.
> + */
> + uint32_t rcv_cnt =
> + RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
> + if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
> + uint32_t rx_num =
> + virtqueue_dequeue_burst_rx(rxvq,
> + rcv_pkts, len, rcv_cnt);
> + i += rx_num;
> + rcv_cnt = rx_num;
> + } else {
> + PMD_RX_LOG(ERR,
> + "No enough segments for
> packet.\n");
> + nb_enqueued++;
> + virtio_discard_rxbuf(rxvq, rxm);
> + rxvq->errors++;
> + break;
> + }
> +
> + extra_idx = 0;
> +
> + while (extra_idx < rcv_cnt) {
> + rxm = rcv_pkts[extra_idx];
> +
> + rxm->pkt.data =
> + (char *)rxm->buf_addr +
> + RTE_PKTMBUF_HEADROOM -
> hdr_size;
> + rxm->pkt.next = NULL;
> + rxm->pkt.pkt_len =
> (uint32_t)(len[extra_idx]);
> + rxm->pkt.data_len =
> (uint16_t)(len[extra_idx]);
> +
> + if (prev)
> + prev->pkt.next = rxm;
> +
> + prev = rxm;
> + rx_pkts[nb_rx]->pkt.pkt_len += rxm-
> >pkt.pkt_len;
> + extra_idx++;
> + };
> + seg_res -= rcv_cnt;
> + }
> +
> + VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
> + rx_pkts[nb_rx]->pkt.data_len);
> +
> + rxvq->bytes += rx_pkts[nb_rx]->pkt.pkt_len;
> + nb_rx++;
> + }
> +
> + rxvq->packets += nb_rx;
> +
> + /* Allocate new mbuf for the used descriptor */
> + error = ENOSPC;
> + while (likely(!virtqueue_full(rxvq))) {
> + new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
> + if (unlikely(new_mbuf == NULL)) {
> + struct rte_eth_dev *dev
> + = &rte_eth_devices[rxvq->port_id];
> + dev->data->rx_mbuf_alloc_failed++;
> + break;
> + }
> + error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
> + if (unlikely(error)) {
> + rte_pktmbuf_free(new_mbuf);
> + break;
> + }
> + nb_enqueued++;
> + }
> +
> if (likely(nb_enqueued)) {
> if (unlikely(virtqueue_kick_prepare(rxvq))) {
> virtqueue_notify(rxvq);
> @@ -536,12 +699,16 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
> num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ?
> nb_used : VIRTIO_MBUF_BURST_SZ);
>
> while (nb_tx < nb_pkts) {
> - if (virtqueue_full(txvq) && num) {
> + int need = tx_pkts[nb_tx]->pkt.nb_segs - txvq->vq_free_cnt;
> + int deq_cnt = RTE_MIN(need, (int)num);
> +
> + num -= (deq_cnt > 0) ? deq_cnt : 0;
> + while (deq_cnt > 0) {
> virtqueue_dequeue_pkt_tx(txvq);
> - num--;
> + deq_cnt--;
> }
>
> - if (!virtqueue_full(txvq)) {
> + if (tx_pkts[nb_tx]->pkt.nb_segs <= txvq->vq_free_cnt) {
> txm = tx_pkts[nb_tx];
> /* Enqueue Packet buffers */
> error = virtqueue_enqueue_xmit(txvq, txm);
> @@ -555,7 +722,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf
> **tx_pkts, uint16_t nb_pkts)
> break;
> }
> nb_tx++;
> - txvq->bytes += txm->pkt.data_len;
> + txvq->bytes += txm->pkt.pkt_len;
> } else {
> PMD_TX_LOG(ERR, "No free tx descriptors to
> transmit");
> break;
> --
> 1.8.4.2
2014-08-14 16:54, Ouyang Changchun:
> v3 change:
> - Investigate the comments from Huawei and fix one potential issue of wrong offset to
> the number of descriptor in buffer; also fix other tiny comments.
>
> v2 change:
> - Resolve conflicts with the tip code;
> - And resolve 2 issues:
> -- fix mbuf leak when discard an uncompleted packet.
> -- refine pkt.data to point to actual payload data start point.
>
> v1 change:
> - This patch supports mergeable buffer feature in DPDK based virtio PMD, which can
> receive jumbo frame with larger size, like 3K, 4K or even 9K.
>
> Signed-off-by: Changchun Ouyang <changchun.ouyang@intel.com>
> Acked-by: Huawei Xie <huawei.xie@intel.com>
Applied for version 1.7.1.
Thanks
@@ -337,7 +337,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone",
dev->data->port_id, queue_idx);
vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name,
- vq_size * sizeof(struct virtio_net_hdr),
+ vq_size * hw->vtnet_hdr_size,
socket_id, 0, CACHE_LINE_SIZE);
if (vq->virtio_net_hdr_mz == NULL) {
rte_free(vq);
@@ -346,7 +346,7 @@ int virtio_dev_queue_setup(struct rte_eth_dev *dev,
vq->virtio_net_hdr_mem =
vq->virtio_net_hdr_mz->phys_addr;
memset(vq->virtio_net_hdr_mz->addr, 0,
- vq_size * sizeof(struct virtio_net_hdr));
+ vq_size * hw->vtnet_hdr_size);
} else if (queue_type == VTNET_CQ) {
/* Allocate a page for control vq command, data and status */
snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone",
@@ -571,9 +571,6 @@ virtio_negotiate_features(struct virtio_hw *hw)
mask |= VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN;
mask |= VTNET_LRO_FEATURES;
- /* rx_mbuf should not be in multiple merged segments */
- mask |= VIRTIO_NET_F_MRG_RXBUF;
-
/* not negotiating INDIRECT descriptor table support */
mask |= VIRTIO_RING_F_INDIRECT_DESC;
@@ -746,7 +743,6 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
}
eth_dev->dev_ops = &virtio_eth_dev_ops;
- eth_dev->rx_pkt_burst = &virtio_recv_pkts;
eth_dev->tx_pkt_burst = &virtio_xmit_pkts;
if (rte_eal_process_type() == RTE_PROC_SECONDARY)
@@ -801,10 +797,13 @@ eth_virtio_dev_init(__rte_unused struct eth_driver *eth_drv,
virtio_negotiate_features(hw);
/* Setting up rx_header size for the device */
- if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF))
+ if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) {
+ eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts;
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
- else
+ } else {
+ eth_dev->rx_pkt_burst = &virtio_recv_pkts;
hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
+ }
/* Allocate memory for storing MAC addresses */
eth_dev->data->mac_addrs = rte_zmalloc("virtio", ETHER_ADDR_LEN, 0);
@@ -1009,7 +1008,7 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
dev->data->rx_queues[i])) != NULL) {
- rte_pktmbuf_free_seg(buf);
+ rte_pktmbuf_free(buf);
mbuf_num++;
}
@@ -1028,7 +1027,8 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev)
mbuf_num = 0;
while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused(
dev->data->tx_queues[i])) != NULL) {
- rte_pktmbuf_free_seg(buf);
+ rte_pktmbuf_free(buf);
+
mbuf_num++;
}
@@ -104,6 +104,9 @@ int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
+uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
@@ -146,6 +146,7 @@ static inline int
virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
{
struct vq_desc_extra *dxp;
+ struct virtio_hw *hw = vq->hw;
struct vring_desc *start_dp;
uint16_t needed = 1;
uint16_t head_idx, idx;
@@ -165,9 +166,11 @@ virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie)
dxp->ndescs = needed;
start_dp = vq->vq_ring.desc;
- start_dp[idx].addr =
- (uint64_t) (cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM - sizeof(struct virtio_net_hdr));
- start_dp[idx].len = cookie->buf_len - RTE_PKTMBUF_HEADROOM + sizeof(struct virtio_net_hdr);
+ start_dp[idx].addr =
+ (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM
+ - hw->vtnet_hdr_size);
+ start_dp[idx].len =
+ cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;
start_dp[idx].flags = VRING_DESC_F_WRITE;
idx = start_dp[idx].next;
vq->vq_desc_head_idx = idx;
@@ -184,8 +187,10 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
{
struct vq_desc_extra *dxp;
struct vring_desc *start_dp;
- uint16_t needed = 2;
+ uint16_t seg_num = cookie->pkt.nb_segs;
+ uint16_t needed = 1 + seg_num;
uint16_t head_idx, idx;
+ uint16_t head_size = txvq->hw->vtnet_hdr_size;
if (unlikely(txvq->vq_free_cnt == 0))
return -ENOSPC;
@@ -198,19 +203,25 @@ virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie)
idx = head_idx;
dxp = &txvq->vq_descx[idx];
if (dxp->cookie != NULL)
- rte_pktmbuf_free_seg(dxp->cookie);
+ rte_pktmbuf_free(dxp->cookie);
dxp->cookie = (void *)cookie;
dxp->ndescs = needed;
start_dp = txvq->vq_ring.desc;
- start_dp[idx].addr =
- txvq->virtio_net_hdr_mem + idx * sizeof(struct virtio_net_hdr);
- start_dp[idx].len = sizeof(struct virtio_net_hdr);
+ start_dp[idx].addr =
+ txvq->virtio_net_hdr_mem + idx * head_size;
+ start_dp[idx].len = (uint32_t)head_size;
start_dp[idx].flags = VRING_DESC_F_NEXT;
- idx = start_dp[idx].next;
- start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie);
- start_dp[idx].len = cookie->pkt.data_len;
- start_dp[idx].flags = 0;
+
+ for (; ((seg_num > 0) && (cookie != NULL)); seg_num--) {
+ idx = start_dp[idx].next;
+ start_dp[idx].addr = RTE_MBUF_DATA_DMA_ADDR(cookie);
+ start_dp[idx].len = cookie->pkt.data_len;
+ start_dp[idx].flags = VRING_DESC_F_NEXT;
+ cookie = cookie->pkt.next;
+ }
+
+ start_dp[idx].flags &= ~VRING_DESC_F_NEXT;
idx = start_dp[idx].next;
txvq->vq_desc_head_idx = idx;
if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END)
@@ -284,7 +295,7 @@ virtio_dev_vring_start(struct virtqueue *vq, int queue_type)
error = virtqueue_enqueue_recv_refill(vq, m);
if (error) {
- rte_pktmbuf_free_seg(m);
+ rte_pktmbuf_free(m);
break;
}
nbufs++;
@@ -423,7 +434,7 @@ virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m)
error = virtqueue_enqueue_recv_refill(vq, m);
if (unlikely(error)) {
RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf");
- rte_pktmbuf_free_seg(m);
+ rte_pktmbuf_free(m);
}
}
@@ -433,13 +444,13 @@ uint16_t
virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct virtqueue *rxvq = rx_queue;
- struct virtio_hw *hw = rxvq->hw;
struct rte_mbuf *rxm, *new_mbuf;
uint16_t nb_used, num, nb_rx = 0;
uint32_t len[VIRTIO_MBUF_BURST_SZ];
struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
int error;
uint32_t i, nb_enqueued = 0;
+ const uint32_t hdr_size = sizeof(struct virtio_net_hdr);
nb_used = VIRTQUEUE_NUSED(rxvq);
@@ -460,8 +471,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
PMD_RX_LOG(DEBUG, "packet len:%d", len[i]);
- if (unlikely(len[i]
- < (uint32_t)hw->vtnet_hdr_size + ETHER_HDR_LEN)) {
+ if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) {
PMD_RX_LOG(ERR, "Packet drop");
nb_enqueued++;
virtio_discard_rxbuf(rxvq, rxm);
@@ -471,17 +481,16 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
rxm->pkt.in_port = rxvq->port_id;
rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+
rxm->pkt.nb_segs = 1;
rxm->pkt.next = NULL;
- rxm->pkt.pkt_len = (uint32_t)(len[i]
- - sizeof(struct virtio_net_hdr));
- rxm->pkt.data_len = (uint16_t)(len[i]
- - sizeof(struct virtio_net_hdr));
+ rxm->pkt.pkt_len = (uint32_t)(len[i] - hdr_size);
+ rxm->pkt.data_len = (uint16_t)(len[i] - hdr_size);
VIRTIO_DUMP_PACKET(rxm, rxm->pkt.data_len);
rx_pkts[nb_rx++] = rxm;
- rxvq->bytes += len[i] - sizeof(struct virtio_net_hdr);
+ rxvq->bytes += rx_pkts[nb_rx - 1]->pkt.pkt_len;
}
rxvq->packets += nb_rx;
@@ -498,11 +507,165 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
}
error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
if (unlikely(error)) {
- rte_pktmbuf_free_seg(new_mbuf);
+ rte_pktmbuf_free(new_mbuf);
break;
}
nb_enqueued++;
}
+
+ if (likely(nb_enqueued)) {
+ if (unlikely(virtqueue_kick_prepare(rxvq))) {
+ virtqueue_notify(rxvq);
+ PMD_RX_LOG(DEBUG, "Notified\n");
+ }
+ }
+
+ vq_update_avail_idx(rxvq);
+
+ return nb_rx;
+}
+
+uint16_t
+virtio_recv_mergeable_pkts(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct virtqueue *rxvq = rx_queue;
+ struct rte_mbuf *rxm, *new_mbuf;
+ uint16_t nb_used, num, nb_rx = 0;
+ uint32_t len[VIRTIO_MBUF_BURST_SZ];
+ struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ];
+ struct rte_mbuf *prev;
+ int error;
+ uint32_t i = 0, nb_enqueued = 0;
+ uint32_t seg_num = 0;
+ uint16_t extra_idx = 0;
+ uint32_t seg_res = 0;
+ const uint32_t hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
+
+ nb_used = VIRTQUEUE_NUSED(rxvq);
+
+ rmb();
+
+ if (nb_used == 0)
+ return 0;
+
+ PMD_RX_LOG(DEBUG, "used:%d\n", nb_used);
+
+ while (i < nb_used) {
+ struct virtio_net_hdr_mrg_rxbuf *header;
+
+ if (nb_rx == nb_pkts)
+ break;
+
+ num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1);
+ if (num != 1)
+ continue;
+
+ i++;
+
+ PMD_RX_LOG(DEBUG, "dequeue:%d\n", num);
+ PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]);
+
+ rxm = rcv_pkts[0];
+
+ if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) {
+ PMD_RX_LOG(ERR, "Packet drop\n");
+ nb_enqueued++;
+ virtio_discard_rxbuf(rxvq, rxm);
+ rxvq->errors++;
+ continue;
+ }
+
+ header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr +
+ RTE_PKTMBUF_HEADROOM - hdr_size);
+ seg_num = header->num_buffers;
+
+ if (seg_num == 0)
+ seg_num = 1;
+
+ rxm->pkt.data = (char *)rxm->buf_addr + RTE_PKTMBUF_HEADROOM;
+ rxm->pkt.nb_segs = seg_num;
+ rxm->pkt.next = NULL;
+ rxm->pkt.pkt_len = (uint32_t)(len[0] - hdr_size);
+ rxm->pkt.data_len = (uint16_t)(len[0] - hdr_size);
+
+ rxm->pkt.in_port = rxvq->port_id;
+ rx_pkts[nb_rx] = rxm;
+ prev = rxm;
+
+ seg_res = seg_num - 1;
+
+ while (seg_res != 0) {
+ /*
+ * Get extra segments for current uncompleted packet.
+ */
+ uint32_t rcv_cnt =
+ RTE_MIN(seg_res, RTE_DIM(rcv_pkts));
+ if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) {
+ uint32_t rx_num =
+ virtqueue_dequeue_burst_rx(rxvq,
+ rcv_pkts, len, rcv_cnt);
+ i += rx_num;
+ rcv_cnt = rx_num;
+ } else {
+ PMD_RX_LOG(ERR,
+ "No enough segments for packet.\n");
+ nb_enqueued++;
+ virtio_discard_rxbuf(rxvq, rxm);
+ rxvq->errors++;
+ break;
+ }
+
+ extra_idx = 0;
+
+ while (extra_idx < rcv_cnt) {
+ rxm = rcv_pkts[extra_idx];
+
+ rxm->pkt.data =
+ (char *)rxm->buf_addr +
+ RTE_PKTMBUF_HEADROOM - hdr_size;
+ rxm->pkt.next = NULL;
+ rxm->pkt.pkt_len = (uint32_t)(len[extra_idx]);
+ rxm->pkt.data_len = (uint16_t)(len[extra_idx]);
+
+ if (prev)
+ prev->pkt.next = rxm;
+
+ prev = rxm;
+ rx_pkts[nb_rx]->pkt.pkt_len += rxm->pkt.pkt_len;
+ extra_idx++;
+ };
+ seg_res -= rcv_cnt;
+ }
+
+ VIRTIO_DUMP_PACKET(rx_pkts[nb_rx],
+ rx_pkts[nb_rx]->pkt.data_len);
+
+ rxvq->bytes += rx_pkts[nb_rx]->pkt.pkt_len;
+ nb_rx++;
+ }
+
+ rxvq->packets += nb_rx;
+
+ /* Allocate new mbuf for the used descriptor */
+ error = ENOSPC;
+ while (likely(!virtqueue_full(rxvq))) {
+ new_mbuf = rte_rxmbuf_alloc(rxvq->mpool);
+ if (unlikely(new_mbuf == NULL)) {
+ struct rte_eth_dev *dev
+ = &rte_eth_devices[rxvq->port_id];
+ dev->data->rx_mbuf_alloc_failed++;
+ break;
+ }
+ error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf);
+ if (unlikely(error)) {
+ rte_pktmbuf_free(new_mbuf);
+ break;
+ }
+ nb_enqueued++;
+ }
+
if (likely(nb_enqueued)) {
if (unlikely(virtqueue_kick_prepare(rxvq))) {
virtqueue_notify(rxvq);
@@ -536,12 +699,16 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
num = (uint16_t)(likely(nb_used < VIRTIO_MBUF_BURST_SZ) ? nb_used : VIRTIO_MBUF_BURST_SZ);
while (nb_tx < nb_pkts) {
- if (virtqueue_full(txvq) && num) {
+ int need = tx_pkts[nb_tx]->pkt.nb_segs - txvq->vq_free_cnt;
+ int deq_cnt = RTE_MIN(need, (int)num);
+
+ num -= (deq_cnt > 0) ? deq_cnt : 0;
+ while (deq_cnt > 0) {
virtqueue_dequeue_pkt_tx(txvq);
- num--;
+ deq_cnt--;
}
- if (!virtqueue_full(txvq)) {
+ if (tx_pkts[nb_tx]->pkt.nb_segs <= txvq->vq_free_cnt) {
txm = tx_pkts[nb_tx];
/* Enqueue Packet buffers */
error = virtqueue_enqueue_xmit(txvq, txm);
@@ -555,7 +722,7 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
break;
}
nb_tx++;
- txvq->bytes += txm->pkt.data_len;
+ txvq->bytes += txm->pkt.pkt_len;
} else {
PMD_TX_LOG(ERR, "No free tx descriptors to transmit");
break;