get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/67268/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 67268,
    "url": "https://patches.dpdk.org/api/patches/67268/?format=api",
    "web_url": "https://patches.dpdk.org/project/dpdk/patch/20200327165412.87359-4-yong.liu@intel.com/",
    "project": {
        "id": 1,
        "url": "https://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20200327165412.87359-4-yong.liu@intel.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20200327165412.87359-4-yong.liu@intel.com",
    "date": "2020-03-27T16:54:08",
    "name": "[v2,3/7] net/virtio: add vectorized packed ring Rx function",
    "commit_ref": null,
    "pull_url": null,
    "state": "superseded",
    "archived": true,
    "hash": "902d8a2fa4dae3d5a6d485d8ee2e0d20b415be7a",
    "submitter": {
        "id": 17,
        "url": "https://patches.dpdk.org/api/people/17/?format=api",
        "name": "Marvin Liu",
        "email": "yong.liu@intel.com"
    },
    "delegate": {
        "id": 2642,
        "url": "https://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "https://patches.dpdk.org/project/dpdk/patch/20200327165412.87359-4-yong.liu@intel.com/mbox/",
    "series": [
        {
            "id": 9075,
            "url": "https://patches.dpdk.org/api/series/9075/?format=api",
            "web_url": "https://patches.dpdk.org/project/dpdk/list/?series=9075",
            "date": "2020-03-27T16:54:05",
            "name": "add packed ring vectorized datapath",
            "version": 2,
            "mbox": "https://patches.dpdk.org/series/9075/mbox/"
        }
    ],
    "comments": "https://patches.dpdk.org/api/patches/67268/comments/",
    "check": "warning",
    "checks": "https://patches.dpdk.org/api/patches/67268/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from dpdk.org (dpdk.org [92.243.14.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id 0911BA0589;\n\tFri, 27 Mar 2020 10:18:42 +0100 (CET)",
            "from [92.243.14.124] (localhost [127.0.0.1])\n\tby dpdk.org (Postfix) with ESMTP id EA20B1C0CA;\n\tFri, 27 Mar 2020 10:18:19 +0100 (CET)",
            "from mga05.intel.com (mga05.intel.com [192.55.52.43])\n by dpdk.org (Postfix) with ESMTP id 13FF21C0B4\n for <dev@dpdk.org>; Fri, 27 Mar 2020 10:18:12 +0100 (CET)",
            "from fmsmga004.fm.intel.com ([10.253.24.48])\n by fmsmga105.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384;\n 27 Mar 2020 02:18:12 -0700",
            "from npg-dpdk-virtual-marvin-dev.sh.intel.com ([10.67.119.58])\n by fmsmga004.fm.intel.com with ESMTP; 27 Mar 2020 02:18:11 -0700"
        ],
        "IronPort-SDR": [
            "\n 7ts0To/yyHsii/sRbG357MU7Tn4LA0B34TJUxL/QGKCYNtjnb57yyFcuXC/Jmzzia2r5F2svyD\n 3t5tTzV9p+pQ==",
            "\n fd21nc21EN/Z8gly4/Mts3aQxAhv/BG3HTjdOE25DSxtxm+kbAvltdjci3JNCSSn0qNmQQRosp\n ZuOT5sJDw7HA=="
        ],
        "X-Amp-Result": "SKIPPED(no attachment in message)",
        "X-Amp-File-Uploaded": "False",
        "X-ExtLoop1": "1",
        "X-IronPort-AV": "E=Sophos;i=\"5.72,311,1580803200\"; d=\"scan'208\";a=\"271507917\"",
        "From": "Marvin Liu <yong.liu@intel.com>",
        "To": "maxime.coquelin@redhat.com, xiaolong.ye@intel.com, zhihong.wang@intel.com",
        "Cc": "dev@dpdk.org,\n\tMarvin Liu <yong.liu@intel.com>",
        "Date": "Sat, 28 Mar 2020 00:54:08 +0800",
        "Message-Id": "<20200327165412.87359-4-yong.liu@intel.com>",
        "X-Mailer": "git-send-email 2.17.1",
        "In-Reply-To": "<20200327165412.87359-1-yong.liu@intel.com>",
        "References": "<20200313174230.74661-1-yong.liu@intel.com>\n <20200327165412.87359-1-yong.liu@intel.com>",
        "Subject": "[dpdk-dev] [PATCH v2 3/7] net/virtio: add vectorized packed ring Rx\n\tfunction",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.15",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org",
        "Sender": "\"dev\" <dev-bounces@dpdk.org>"
    },
    "content": "Optimize packed ring Rx datapath when AVX512 enabled and mergeable\nbuffer/Rx LRO offloading are not required. Solution of optimization\nis pretty like vhost, is that split datapath into batch and single\nfunctions. Batch function is further optimized by vector instructions.\nAlso pad desc extra structure to 16 bytes aligned, thus four elements\nwill be saved in one batch.\n\nSigned-off-by: Marvin Liu <yong.liu@intel.com>",
    "diff": "diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile\nindex efdcb0d93..7bdb87c49 100644\n--- a/drivers/net/virtio/Makefile\n+++ b/drivers/net/virtio/Makefile\n@@ -37,6 +37,34 @@ else ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),)\n SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple_neon.c\n endif\n \n+ifeq ($(RTE_TOOLCHAIN), gcc)\n+ifeq ($(shell test $(GCC_VERSION) -ge 83 && echo 1), 1)\n+CFLAGS += -DVIRTIO_GCC_UNROLL_PRAGMA\n+endif\n+endif\n+\n+ifeq ($(RTE_TOOLCHAIN), clang)\n+ifeq ($(shell test $(CLANG_MAJOR_VERSION)$(CLANG_MINOR_VERSION) -ge 37 && echo 1), 1)\n+CFLAGS += -DVIRTIO_CLANG_UNROLL_PRAGMA\n+endif\n+endif\n+\n+ifeq ($(RTE_TOOLCHAIN), icc)\n+ifeq ($(shell test $(ICC_MAJOR_VERSION) -ge 16 && echo 1), 1)\n+CFLAGS += -DVIRTIO_ICC_UNROLL_PRAGMA\n+endif\n+endif\n+\n+ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX512F,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX512F)\n+ifneq ($(FORCE_DISABLE_AVX512), y)\n+CFLAGS += -DCC_AVX512_SUPPORT\n+ifeq ($(shell test $(GCC_VERSION) -ge 100 && echo 1), 1)\n+CFLAGS_virtio_rxtx_packed_avx.o += -Wno-zero-length-bounds\n+endif\n+SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_packed_avx.c\n+endif\n+endif\n+\n ifeq ($(CONFIG_RTE_VIRTIO_USER),y)\n SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c\n SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c\ndiff --git a/drivers/net/virtio/meson.build b/drivers/net/virtio/meson.build\nindex 04c7fdf25..652ae39af 100644\n--- a/drivers/net/virtio/meson.build\n+++ b/drivers/net/virtio/meson.build\n@@ -11,6 +11,17 @@ deps += ['kvargs', 'bus_pci']\n \n if arch_subdir == 'x86'\n \tsources += files('virtio_rxtx_simple_sse.c')\n+\tif dpdk_conf.has('RTE_MACHINE_CPUFLAG_AVX512F')\n+\t\tcflags += ['-DCC_AVX512_SUPPORT']\n+\t\tif (toolchain == 'gcc' and cc.version().version_compare('>=8.3.0'))\n+\t\t\tcflags += '-DVHOST_GCC_UNROLL_PRAGMA'\n+\t\telif (toolchain == 'clang' and cc.version().version_compare('>=3.7.0'))\n+\t\t\tcflags += '-DVHOST_CLANG_UNROLL_PRAGMA'\n+\t\telif (toolchain == 'icc' and cc.version().version_compare('>=16.0.0'))\n+\t\t\tcflags += '-DVHOST_ICC_UNROLL_PRAGMA'\n+\t\tendif\n+\t\tsources += files('virtio_rxtx_packed_avx.c')\n+\tendif\n elif arch_subdir == 'ppc_64'\n \tsources += files('virtio_rxtx_simple_altivec.c')\n elif arch_subdir == 'arm' and host_machine.cpu_family().startswith('aarch64')\ndiff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h\nindex cd8947656..10e39670e 100644\n--- a/drivers/net/virtio/virtio_ethdev.h\n+++ b/drivers/net/virtio/virtio_ethdev.h\n@@ -104,6 +104,9 @@ uint16_t virtio_xmit_pkts_inorder(void *tx_queue, struct rte_mbuf **tx_pkts,\n uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n \t\tuint16_t nb_pkts);\n \n+uint16_t virtio_recv_pkts_packed_vec(void *rx_queue, struct rte_mbuf **rx_pkts,\n+\t\tuint16_t nb_pkts);\n+\n int eth_virtio_dev_init(struct rte_eth_dev *eth_dev);\n \n void virtio_interrupt_handler(void *param);\ndiff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c\nindex 3a2dbc2e0..ac417232b 100644\n--- a/drivers/net/virtio/virtio_rxtx.c\n+++ b/drivers/net/virtio/virtio_rxtx.c\n@@ -1245,7 +1245,6 @@ virtio_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)\n \treturn 0;\n }\n \n-#define VIRTIO_MBUF_BURST_SZ 64\n #define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))\n uint16_t\n virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)\n@@ -2328,3 +2327,11 @@ virtio_xmit_pkts_inorder(void *tx_queue,\n \n \treturn nb_tx;\n }\n+\n+__rte_weak uint16_t\n+virtio_recv_pkts_packed_vec(void __rte_unused *rx_queue,\n+\t\t\t    struct rte_mbuf __rte_unused **rx_pkts,\n+\t\t\t    uint16_t __rte_unused nb_pkts)\n+{\n+\treturn 0;\n+}\ndiff --git a/drivers/net/virtio/virtio_rxtx_packed_avx.c b/drivers/net/virtio/virtio_rxtx_packed_avx.c\nnew file mode 100644\nindex 000000000..e2310d74e\n--- /dev/null\n+++ b/drivers/net/virtio/virtio_rxtx_packed_avx.c\n@@ -0,0 +1,361 @@\n+/* SPDX-License-Identifier: BSD-3-Clause\n+ * Copyright(c) 2010-2020 Intel Corporation\n+ */\n+\n+#include <stdint.h>\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <errno.h>\n+\n+#include <rte_net.h>\n+\n+#include \"virtio_logs.h\"\n+#include \"virtio_ethdev.h\"\n+#include \"virtio_pci.h\"\n+#include \"virtqueue.h\"\n+\n+#define PACKED_FLAGS_MASK (1ULL << 55 | 1ULL << 63)\n+\n+#define PACKED_BATCH_SIZE (RTE_CACHE_LINE_SIZE / \\\n+\tsizeof(struct vring_packed_desc))\n+#define PACKED_BATCH_MASK (PACKED_BATCH_SIZE - 1)\n+\n+#ifdef VIRTIO_GCC_UNROLL_PRAGMA\n+#define virtio_for_each_try_unroll(iter, val, size) _Pragma(\"GCC unroll 4\") \\\n+\tfor (iter = val; iter < size; iter++)\n+#endif\n+\n+#ifdef VIRTIO_CLANG_UNROLL_PRAGMA\n+#define virtio_for_each_try_unroll(iter, val, size) _Pragma(\"unroll 4\") \\\n+\tfor (iter = val; iter < size; iter++)\n+#endif\n+\n+#ifdef VIRTIO_ICC_UNROLL_PRAGMA\n+#define virtio_for_each_try_unroll(iter, val, size) _Pragma(\"unroll (4)\") \\\n+\tfor (iter = val; iter < size; iter++)\n+#endif\n+\n+#ifndef virtio_for_each_try_unroll\n+#define virtio_for_each_try_unroll(iter, val, num) \\\n+\tfor (iter = val; iter < num; iter++)\n+#endif\n+\n+\n+static inline void\n+virtio_update_batch_stats(struct virtnet_stats *stats,\n+\t\t\t  uint16_t pkt_len1,\n+\t\t\t  uint16_t pkt_len2,\n+\t\t\t  uint16_t pkt_len3,\n+\t\t\t  uint16_t pkt_len4)\n+{\n+\tstats->bytes += pkt_len1;\n+\tstats->bytes += pkt_len2;\n+\tstats->bytes += pkt_len3;\n+\tstats->bytes += pkt_len4;\n+}\n+/* Optionally fill offload information in structure */\n+static inline int\n+virtio_vec_rx_offload(struct rte_mbuf *m, struct virtio_net_hdr *hdr)\n+{\n+\tstruct rte_net_hdr_lens hdr_lens;\n+\tuint32_t hdrlen, ptype;\n+\tint l4_supported = 0;\n+\n+\t/* nothing to do */\n+\tif (hdr->flags == 0)\n+\t\treturn 0;\n+\n+\t/* GSO not support in vec path, skip check */\n+\tm->ol_flags |= PKT_RX_IP_CKSUM_UNKNOWN;\n+\n+\tptype = rte_net_get_ptype(m, &hdr_lens, RTE_PTYPE_ALL_MASK);\n+\tm->packet_type = ptype;\n+\tif ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_TCP ||\n+\t    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP ||\n+\t    (ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_SCTP)\n+\t\tl4_supported = 1;\n+\n+\tif (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {\n+\t\thdrlen = hdr_lens.l2_len + hdr_lens.l3_len + hdr_lens.l4_len;\n+\t\tif (hdr->csum_start <= hdrlen && l4_supported) {\n+\t\t\tm->ol_flags |= PKT_RX_L4_CKSUM_NONE;\n+\t\t} else {\n+\t\t\t/* Unknown proto or tunnel, do sw cksum. We can assume\n+\t\t\t * the cksum field is in the first segment since the\n+\t\t\t * buffers we provided to the host are large enough.\n+\t\t\t * In case of SCTP, this will be wrong since it's a CRC\n+\t\t\t * but there's nothing we can do.\n+\t\t\t */\n+\t\t\tuint16_t csum = 0, off;\n+\n+\t\t\trte_raw_cksum_mbuf(m, hdr->csum_start,\n+\t\t\t\trte_pktmbuf_pkt_len(m) - hdr->csum_start,\n+\t\t\t\t&csum);\n+\t\t\tif (likely(csum != 0xffff))\n+\t\t\t\tcsum = ~csum;\n+\t\t\toff = hdr->csum_offset + hdr->csum_start;\n+\t\t\tif (rte_pktmbuf_data_len(m) >= off + 1)\n+\t\t\t\t*rte_pktmbuf_mtod_offset(m, uint16_t *,\n+\t\t\t\t\toff) = csum;\n+\t\t}\n+\t} else if (hdr->flags & VIRTIO_NET_HDR_F_DATA_VALID && l4_supported) {\n+\t\tm->ol_flags |= PKT_RX_L4_CKSUM_GOOD;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static uint16_t\n+virtqueue_dequeue_batch_packed_vec(struct virtnet_rx *rxvq,\n+\t\t\t\t   struct rte_mbuf **rx_pkts)\n+{\n+\tstruct virtqueue *vq = rxvq->vq;\n+\tstruct virtio_hw *hw = vq->hw;\n+\tuint16_t hdr_size = hw->vtnet_hdr_size;\n+\tstruct virtio_net_hdr *hdrs[PACKED_BATCH_SIZE];\n+\tuint64_t addrs[PACKED_BATCH_SIZE << 1];\n+\tuint16_t id = vq->vq_used_cons_idx;\n+\tuint8_t desc_stats;\n+\tuint16_t i;\n+\tvoid *desc_addr;\n+\n+\tif (id & PACKED_BATCH_MASK)\n+\t\treturn -1;\n+\n+\t/* only care avail/used bits */\n+\t__m512i desc_flags = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);\n+\tdesc_addr = &vq->vq_packed.ring.desc[id];\n+\n+\trte_smp_rmb();\n+\t__m512i packed_desc = _mm512_loadu_si512(desc_addr);\n+\t__m512i flags_mask  = _mm512_maskz_and_epi64(0xff, packed_desc,\n+\t\t\tdesc_flags);\n+\n+\t__m512i used_flags;\n+\tif (vq->vq_packed.used_wrap_counter)\n+\t\tused_flags = _mm512_maskz_set1_epi64(0xaa, PACKED_FLAGS_MASK);\n+\telse\n+\t\tused_flags = _mm512_setzero_si512();\n+\n+\t/* Check all descs are used */\n+\tdesc_stats = _mm512_cmp_epu64_mask(flags_mask, used_flags,\n+\t\t\t_MM_CMPINT_EQ);\n+\tif (desc_stats != 0xff)\n+\t\treturn -1;\n+\n+\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\trx_pkts[i] = (struct rte_mbuf *)vq->vq_descx[id + i].cookie;\n+\t\trte_packet_prefetch(rte_pktmbuf_mtod(rx_pkts[i], void *));\n+\n+\t\taddrs[i << 1] = (uint64_t)rx_pkts[i]->rx_descriptor_fields1;\n+\t\taddrs[(i << 1) + 1] =\n+\t\t\t(uint64_t)rx_pkts[i]->rx_descriptor_fields1 + 8;\n+\t}\n+\n+\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\tchar *addr = (char *)rx_pkts[i]->buf_addr +\n+\t\t\tRTE_PKTMBUF_HEADROOM - hdr_size;\n+\t\thdrs[i] = (struct virtio_net_hdr *)addr;\n+\t}\n+\n+\t/* addresses of pkt_len and data_len */\n+\t__m512i vindex = _mm512_loadu_si512((void *)addrs);\n+\n+\t/*\n+\t * select 10b*4 load 32bit from packed_desc[95:64]\n+\t * mmask  0110b*4 save 32bit into pkt_len and data_len\n+\t */\n+\t__m512i value = _mm512_maskz_shuffle_epi32(0x6666, packed_desc, 0xAA);\n+\n+\t/* mmask 0110b*4 reduce hdr_len from pkt_len and data_len */\n+\t__m512i mbuf_len_offset = _mm512_maskz_set1_epi32(0x6666,\n+\t\t\t(uint32_t)-hdr_size);\n+\n+\tvalue = _mm512_add_epi32(value, mbuf_len_offset);\n+\t/* batch store into mbufs */\n+\t_mm512_i64scatter_epi64(0, vindex, value, 1);\n+\n+\tif (hw->has_rx_offload) {\n+\t\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE)\n+\t\t\tvirtio_vec_rx_offload(rx_pkts[i], hdrs[i]);\n+\t}\n+\n+\tvirtio_update_batch_stats(&rxvq->stats, rx_pkts[0]->pkt_len,\n+\t\t\trx_pkts[1]->pkt_len, rx_pkts[2]->pkt_len,\n+\t\t\trx_pkts[3]->pkt_len);\n+\n+\tvq->vq_free_cnt += PACKED_BATCH_SIZE;\n+\n+\tvq->vq_used_cons_idx += PACKED_BATCH_SIZE;\n+\tif (vq->vq_used_cons_idx >= vq->vq_nentries) {\n+\t\tvq->vq_used_cons_idx -= vq->vq_nentries;\n+\t\tvq->vq_packed.used_wrap_counter ^= 1;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static uint16_t\n+virtqueue_dequeue_single_packed_vec(struct virtnet_rx *rxvq,\n+\t\t\t\t    struct rte_mbuf **rx_pkts)\n+{\n+\tuint16_t used_idx, id;\n+\tuint32_t len;\n+\tstruct virtqueue *vq = rxvq->vq;\n+\tstruct virtio_hw *hw = vq->hw;\n+\tuint32_t hdr_size = hw->vtnet_hdr_size;\n+\tstruct virtio_net_hdr *hdr;\n+\tstruct vring_packed_desc *desc;\n+\tstruct rte_mbuf *cookie;\n+\n+\tdesc = vq->vq_packed.ring.desc;\n+\tused_idx = vq->vq_used_cons_idx;\n+\tif (!desc_is_used(&desc[used_idx], vq))\n+\t\treturn -1;\n+\n+\tlen = desc[used_idx].len;\n+\tid = desc[used_idx].id;\n+\tcookie = (struct rte_mbuf *)vq->vq_descx[id].cookie;\n+\tif (unlikely(cookie == NULL)) {\n+\t\tPMD_DRV_LOG(ERR, \"vring descriptor with no mbuf cookie at %u\",\n+\t\t\t\tvq->vq_used_cons_idx);\n+\t\treturn -1;\n+\t}\n+\trte_prefetch0(cookie);\n+\trte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *));\n+\n+\tcookie->data_off = RTE_PKTMBUF_HEADROOM;\n+\tcookie->ol_flags = 0;\n+\tcookie->pkt_len = (uint32_t)(len - hdr_size);\n+\tcookie->data_len = (uint32_t)(len - hdr_size);\n+\n+\thdr = (struct virtio_net_hdr *)((char *)cookie->buf_addr +\n+\t\t\t\t\tRTE_PKTMBUF_HEADROOM - hdr_size);\n+\tif (hw->has_rx_offload)\n+\t\tvirtio_vec_rx_offload(cookie, hdr);\n+\n+\t*rx_pkts = cookie;\n+\n+\trxvq->stats.bytes += cookie->pkt_len;\n+\n+\tvq->vq_free_cnt++;\n+\tvq->vq_used_cons_idx++;\n+\tif (vq->vq_used_cons_idx >= vq->vq_nentries) {\n+\t\tvq->vq_used_cons_idx -= vq->vq_nentries;\n+\t\tvq->vq_packed.used_wrap_counter ^= 1;\n+\t}\n+\n+\treturn 0;\n+}\n+\n+static inline void\n+virtio_recv_refill_packed_vec(struct virtnet_rx *rxvq,\n+\t\t\t      struct rte_mbuf **cookie,\n+\t\t\t      uint16_t num)\n+{\n+\tstruct virtqueue *vq = rxvq->vq;\n+\tstruct vring_packed_desc *start_dp = vq->vq_packed.ring.desc;\n+\tuint16_t flags = vq->vq_packed.cached_flags;\n+\tstruct virtio_hw *hw = vq->hw;\n+\tstruct vq_desc_extra *dxp;\n+\tuint16_t idx, i;\n+\tuint16_t total_num = 0;\n+\tuint16_t head_idx = vq->vq_avail_idx;\n+\tuint16_t head_flag = vq->vq_packed.cached_flags;\n+\tuint64_t addr;\n+\n+\tdo {\n+\t\tidx = vq->vq_avail_idx;\n+\t\tvirtio_for_each_try_unroll(i, 0, PACKED_BATCH_SIZE) {\n+\t\t\tdxp = &vq->vq_descx[idx + i];\n+\t\t\tdxp->cookie = (void *)cookie[total_num + i];\n+\n+\t\t\taddr = VIRTIO_MBUF_ADDR(cookie[total_num + i], vq) +\n+\t\t\t\tRTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size;\n+\t\t\tstart_dp[idx + i].addr = addr;\n+\t\t\tstart_dp[idx + i].len = cookie[total_num + i]->buf_len\n+\t\t\t\t- RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size;\n+\t\t\tif (total_num || i) {\n+\t\t\t\tvirtqueue_store_flags_packed(&start_dp[idx + i],\n+\t\t\t\t\t\tflags, hw->weak_barriers);\n+\t\t\t}\n+\t\t}\n+\n+\t\tvq->vq_avail_idx += PACKED_BATCH_SIZE;\n+\t\tif (vq->vq_avail_idx >= vq->vq_nentries) {\n+\t\t\tvq->vq_avail_idx -= vq->vq_nentries;\n+\t\t\tvq->vq_packed.cached_flags ^=\n+\t\t\t\tVRING_PACKED_DESC_F_AVAIL_USED;\n+\t\t\tflags = vq->vq_packed.cached_flags;\n+\t\t}\n+\t\ttotal_num += PACKED_BATCH_SIZE;\n+\t} while (total_num < num);\n+\n+\tvirtqueue_store_flags_packed(&start_dp[head_idx], head_flag,\n+\t\t\t\thw->weak_barriers);\n+\tvq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - num);\n+}\n+\n+uint16_t\n+virtio_recv_pkts_packed_vec(void *rx_queue,\n+\t\t\t    struct rte_mbuf **rx_pkts,\n+\t\t\t    uint16_t nb_pkts)\n+{\n+\tstruct virtnet_rx *rxvq = rx_queue;\n+\tstruct virtqueue *vq = rxvq->vq;\n+\tstruct virtio_hw *hw = vq->hw;\n+\tuint16_t num, nb_rx = 0;\n+\tuint32_t nb_enqueued = 0;\n+\tuint16_t free_cnt = vq->vq_free_thresh;\n+\n+\tif (unlikely(hw->started == 0))\n+\t\treturn nb_rx;\n+\n+\tnum = RTE_MIN(VIRTIO_MBUF_BURST_SZ, nb_pkts);\n+\tif (likely(num > PACKED_BATCH_SIZE))\n+\t\tnum = num - ((vq->vq_used_cons_idx + num) % PACKED_BATCH_SIZE);\n+\n+\twhile (num) {\n+\t\tif (!virtqueue_dequeue_batch_packed_vec(rxvq,\n+\t\t\t\t\t&rx_pkts[nb_rx])) {\n+\t\t\tnb_rx += PACKED_BATCH_SIZE;\n+\t\t\tnum -= PACKED_BATCH_SIZE;\n+\t\t\tcontinue;\n+\t\t}\n+\t\tif (!virtqueue_dequeue_single_packed_vec(rxvq,\n+\t\t\t\t\t&rx_pkts[nb_rx])) {\n+\t\t\tnb_rx++;\n+\t\t\tnum--;\n+\t\t\tcontinue;\n+\t\t}\n+\t\tbreak;\n+\t};\n+\n+\tPMD_RX_LOG(DEBUG, \"dequeue:%d\", num);\n+\n+\trxvq->stats.packets += nb_rx;\n+\n+\tif (likely(vq->vq_free_cnt >= free_cnt)) {\n+\t\tstruct rte_mbuf *new_pkts[free_cnt];\n+\t\tif (likely(rte_pktmbuf_alloc_bulk(rxvq->mpool, new_pkts,\n+\t\t\t\t\t\tfree_cnt) == 0)) {\n+\t\t\tvirtio_recv_refill_packed_vec(rxvq, new_pkts,\n+\t\t\t\t\tfree_cnt);\n+\t\t\tnb_enqueued += free_cnt;\n+\t\t} else {\n+\t\t\tstruct rte_eth_dev *dev =\n+\t\t\t\t&rte_eth_devices[rxvq->port_id];\n+\t\t\tdev->data->rx_mbuf_alloc_failed += free_cnt;\n+\t\t}\n+\t}\n+\n+\tif (likely(nb_enqueued)) {\n+\t\tif (unlikely(virtqueue_kick_prepare_packed(vq))) {\n+\t\t\tvirtqueue_notify(vq);\n+\t\t\tPMD_RX_LOG(DEBUG, \"Notified\");\n+\t\t}\n+\t}\n+\n+\treturn nb_rx;\n+}\ndiff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h\nindex 6301c56b2..43e305ecc 100644\n--- a/drivers/net/virtio/virtqueue.h\n+++ b/drivers/net/virtio/virtqueue.h\n@@ -20,6 +20,7 @@ struct rte_mbuf;\n \n #define DEFAULT_RX_FREE_THRESH 32\n \n+#define VIRTIO_MBUF_BURST_SZ 64\n /*\n  * Per virtio_ring.h in Linux.\n  *     For virtio_pci on SMP, we don't need to order with respect to MMIO\n@@ -236,7 +237,8 @@ struct vq_desc_extra {\n \tvoid *cookie;\n \tuint16_t ndescs;\n \tuint16_t next;\n-};\n+\tuint8_t padding[4];\n+} __rte_packed __rte_aligned(16);\n \n struct virtqueue {\n \tstruct virtio_hw  *hw; /**< virtio_hw structure pointer. */\n",
    "prefixes": [
        "v2",
        "3/7"
    ]
}