From patchwork Tue Mar 9 10:19:56 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Loftus, Ciara" X-Patchwork-Id: 88745 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 5C258A0567; Tue, 9 Mar 2021 11:50:55 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id E571622A4F2; Tue, 9 Mar 2021 11:50:49 +0100 (CET) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id 0193322A498 for ; Tue, 9 Mar 2021 11:50:47 +0100 (CET) IronPort-SDR: hyIx7mjdH3LDSEogkrdnRRzPl1e2+JmErk5Iyrb/QfCer7V7qAqRALLORyHO8liJWotgynp+mP mwGcS03gISWQ== X-IronPort-AV: E=McAfee;i="6000,8403,9917"; a="168118943" X-IronPort-AV: E=Sophos;i="5.81,234,1610438400"; d="scan'208";a="168118943" Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 09 Mar 2021 02:50:46 -0800 IronPort-SDR: LWqtPZu8zkt6w9josPPq2DjxbrrO2c03DLXeBV75X/4MB+MCA4e2B0eWfjxuL4OwiEzATDbCQv UaJ60bNdyAfA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.81,234,1610438400"; d="scan'208";a="371499380" Received: from silpixa00399839.ir.intel.com (HELO localhost.localdomain) ([10.237.222.142]) by orsmga006.jf.intel.com with ESMTP; 09 Mar 2021 02:50:45 -0800 From: Ciara Loftus To: dev@dpdk.org Cc: Ciara Loftus Date: Tue, 9 Mar 2021 10:19:56 +0000 Message-Id: <20210309101958.27355-2-ciara.loftus@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20210309101958.27355-1-ciara.loftus@intel.com> References: <20210224111852.11947-1-ciara.loftus@intel.com> <20210309101958.27355-1-ciara.loftus@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 1/3] net/af_xdp: allow bigger batch sizes X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Prior to this commit, the maximum batch sizes for zero-copy and copy-mode rx and copy-mode tx were set to 32. Apart from zero-copy tx, the user could never rx/tx any more than 32 packets at a time and without inspecting the code the user wouldn't be aware of this. This commit removes these upper limits placed on the user and instead sets an internal batch size equal to the default ring size (2048). Batches larger than this are still processed, however they are split into smaller batches similar to how it's done in other drivers. This is necessary because some arrays used during rx/tx need to be sized at compile-time. Allowing a larger batch size allows for fewer batches and thus larger bulk operations, fewer ring accesses and fewer syscalls which should yield improved performance. Signed-off-by: Ciara Loftus --- drivers/net/af_xdp/rte_eth_af_xdp.c | 67 ++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 3957227bf0..be524e4784 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -66,8 +66,8 @@ RTE_LOG_REGISTER(af_xdp_logtype, pmd.net.af_xdp, NOTICE); #define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 #define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 -#define ETH_AF_XDP_RX_BATCH_SIZE 32 -#define ETH_AF_XDP_TX_BATCH_SIZE 32 +#define ETH_AF_XDP_RX_BATCH_SIZE XSK_RING_CONS__DEFAULT_NUM_DESCS +#define ETH_AF_XDP_TX_BATCH_SIZE XSK_RING_CONS__DEFAULT_NUM_DESCS struct xsk_umem_info { @@ -329,8 +329,7 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) struct rte_mbuf *mbufs[ETH_AF_XDP_RX_BATCH_SIZE]; if (xsk_prod_nb_free(fq, free_thresh) >= free_thresh) - (void)reserve_fill_queue(umem, ETH_AF_XDP_RX_BATCH_SIZE, - NULL, fq); + (void)reserve_fill_queue(umem, nb_pkts, NULL, fq); nb_pkts = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); if (nb_pkts == 0) { @@ -379,10 +378,8 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) #endif static uint16_t -eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) { - nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); - #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) return af_xdp_rx_zc(queue, bufs, nb_pkts); #else @@ -390,6 +387,32 @@ eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) #endif } +static uint16_t +eth_af_xdp_rx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + uint16_t nb_rx; + + if (likely(nb_pkts <= ETH_AF_XDP_RX_BATCH_SIZE)) + return af_xdp_rx(queue, bufs, nb_pkts); + + /* Split larger batch into smaller batches of size + * ETH_AF_XDP_RX_BATCH_SIZE or less. + */ + nb_rx = 0; + while (nb_pkts) { + uint16_t ret, n; + + n = (uint16_t)RTE_MIN(nb_pkts, ETH_AF_XDP_RX_BATCH_SIZE); + ret = af_xdp_rx(queue, &bufs[nb_rx], n); + nb_rx = (uint16_t)(nb_rx + ret); + nb_pkts = (uint16_t)(nb_pkts - ret); + if (ret < n) + break; + } + + return nb_rx; +} + static void pull_umem_cq(struct xsk_umem_info *umem, int size, struct xsk_ring_cons *cq) { @@ -535,8 +558,6 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) uint32_t idx_tx; struct xsk_ring_cons *cq = &txq->pair->cq; - nb_pkts = RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); - pull_umem_cq(umem, nb_pkts, cq); nb_pkts = rte_ring_dequeue_bulk(umem->buf_ring, addrs, @@ -575,6 +596,32 @@ af_xdp_tx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) return nb_pkts; } + +static uint16_t +af_xdp_tx_cp_batch(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) +{ + uint16_t nb_tx; + + if (likely(nb_pkts <= ETH_AF_XDP_TX_BATCH_SIZE)) + return af_xdp_tx_cp(queue, bufs, nb_pkts); + + nb_tx = 0; + while (nb_pkts) { + uint16_t ret, n; + + /* Split larger batch into smaller batches of size + * ETH_AF_XDP_TX_BATCH_SIZE or less. + */ + n = (uint16_t)RTE_MIN(nb_pkts, ETH_AF_XDP_TX_BATCH_SIZE); + ret = af_xdp_tx_cp(queue, &bufs[nb_tx], n); + nb_tx = (uint16_t)(nb_tx + ret); + nb_pkts = (uint16_t)(nb_pkts - ret); + if (ret < n) + break; + } + + return nb_tx; +} #endif static uint16_t @@ -583,7 +630,7 @@ eth_af_xdp_tx(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) #if defined(XDP_UMEM_UNALIGNED_CHUNK_FLAG) return af_xdp_tx_zc(queue, bufs, nb_pkts); #else - return af_xdp_tx_cp(queue, bufs, nb_pkts); + return af_xdp_tx_cp_batch(queue, bufs, nb_pkts); #endif } From patchwork Tue Mar 9 10:19:57 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Loftus, Ciara" X-Patchwork-Id: 88746 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D8A9DA0567; Tue, 9 Mar 2021 11:51:01 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 3D56C22A4EA; Tue, 9 Mar 2021 11:50:51 +0100 (CET) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id 7588322A498 for ; Tue, 9 Mar 2021 11:50:48 +0100 (CET) IronPort-SDR: 78RVrJG8EAapEu9LJdEc47kjwvpo3+xHYboe0yQx9wgCrbsgFXsOmTTdQDS2MfGvXqunfkndfs /0u7RUWyeN8w== X-IronPort-AV: E=McAfee;i="6000,8403,9917"; a="168118944" X-IronPort-AV: E=Sophos;i="5.81,234,1610438400"; d="scan'208";a="168118944" Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 09 Mar 2021 02:50:47 -0800 IronPort-SDR: 0sJuD0Q9UONf7mrFf2rbfjrZ7qzZSmo+PNQRrpk9mMyY2EJHOcxbt/OjtYu3knQo4CsmqhZb5a 1wpi1uhI3NHA== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.81,234,1610438400"; d="scan'208";a="371499385" Received: from silpixa00399839.ir.intel.com (HELO localhost.localdomain) ([10.237.222.142]) by orsmga006.jf.intel.com with ESMTP; 09 Mar 2021 02:50:46 -0800 From: Ciara Loftus To: dev@dpdk.org Cc: Ciara Loftus Date: Tue, 9 Mar 2021 10:19:57 +0000 Message-Id: <20210309101958.27355-3-ciara.loftus@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20210309101958.27355-1-ciara.loftus@intel.com> References: <20210224111852.11947-1-ciara.loftus@intel.com> <20210309101958.27355-1-ciara.loftus@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 2/3] net/af_xdp: Use recvfrom() instead of poll() X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" poll() is more expensive and requires more tuning when used with the upcoming busy polling functionality. Signed-off-by: Ciara Loftus --- drivers/net/af_xdp/rte_eth_af_xdp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index be524e4784..9c0e935cd3 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -263,7 +263,8 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (nb_pkts == 0) { #if defined(XDP_USE_NEED_WAKEUP) if (xsk_ring_prod__needs_wakeup(fq)) - (void)poll(rxq->fds, 1, 1000); + recvfrom(xsk_socket__fd(rxq->xsk), NULL, 0, + MSG_DONTWAIT, NULL, NULL); #endif return 0; @@ -335,7 +336,8 @@ af_xdp_rx_cp(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) if (nb_pkts == 0) { #if defined(XDP_USE_NEED_WAKEUP) if (xsk_ring_prod__needs_wakeup(fq)) - (void)poll(rxq->fds, 1, 1000); + recvfrom(xsk_socket__fd(rxq->xsk), NULL, 0, + MSG_DONTWAIT, NULL, NULL); #endif return 0; } From patchwork Tue Mar 9 10:19:58 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Loftus, Ciara" X-Patchwork-Id: 88747 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 02EE1A0567; Tue, 9 Mar 2021 11:51:08 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 87C6722A503; Tue, 9 Mar 2021 11:50:52 +0100 (CET) Received: from mga17.intel.com (mga17.intel.com [192.55.52.151]) by mails.dpdk.org (Postfix) with ESMTP id D2BE922A4F0 for ; Tue, 9 Mar 2021 11:50:49 +0100 (CET) IronPort-SDR: RVefIAUpLSZqizxGDQioVuO8uNuJcIPK7f2FsvazGwzBBT5FyvJ8M/T3us1qAETkeUNV8upuoT yxZdLhJFCWGg== X-IronPort-AV: E=McAfee;i="6000,8403,9917"; a="168118946" X-IronPort-AV: E=Sophos;i="5.81,234,1610438400"; d="scan'208";a="168118946" Received: from orsmga006.jf.intel.com ([10.7.209.51]) by fmsmga107.fm.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 09 Mar 2021 02:50:48 -0800 IronPort-SDR: F9Z6Bx+zd80xG26gZWRyF+nd1UKhUN2SsgSnoZXwVW6FCW7HTRqFsXwGjxfedU6x1Hhmfr+/AP TtpTMd4JX0wg== X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.81,234,1610438400"; d="scan'208";a="371499396" Received: from silpixa00399839.ir.intel.com (HELO localhost.localdomain) ([10.237.222.142]) by orsmga006.jf.intel.com with ESMTP; 09 Mar 2021 02:50:47 -0800 From: Ciara Loftus To: dev@dpdk.org Cc: Ciara Loftus Date: Tue, 9 Mar 2021 10:19:58 +0000 Message-Id: <20210309101958.27355-4-ciara.loftus@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20210309101958.27355-1-ciara.loftus@intel.com> References: <20210224111852.11947-1-ciara.loftus@intel.com> <20210309101958.27355-1-ciara.loftus@intel.com> MIME-Version: 1.0 Subject: [dpdk-dev] [PATCH v2 3/3] net/af_xdp: preferred busy polling X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" This commit introduces support for preferred busy polling to the AF_XDP PMD. This feature aims to improve single-core performance for AF_XDP sockets under heavy load. A new vdev arg is introduced called 'busy_budget' whose default value is 64. busy_budget is the value supplied to the kernel with the SO_BUSY_POLL_BUDGET socket option and represents the busy-polling NAPI budget. To set the budget to a different value eg. 256: --vdev=net_af_xdp0,iface=eth0,busy_budget=256 Preferred busy polling is enabled by default provided a kernel with version >= v5.11 is in use. To disable it, set the budget to zero. The following settings are also strongly recommended to be used in conjunction with this feature: echo 2 | sudo tee /sys/class/net/eth0/napi_defer_hard_irqs echo 200000 | sudo tee /sys/class/net/eth0/gro_flush_timeout .. where eth0 is the interface being used by the PMD. Signed-off-by: Ciara Loftus --- doc/guides/nics/af_xdp.rst | 38 +++++++- doc/guides/rel_notes/release_21_05.rst | 4 + drivers/net/af_xdp/compat.h | 14 +++ drivers/net/af_xdp/rte_eth_af_xdp.c | 117 ++++++++++++++++++++++--- 4 files changed, 161 insertions(+), 12 deletions(-) diff --git a/doc/guides/nics/af_xdp.rst b/doc/guides/nics/af_xdp.rst index 5ed24374f8..8bf40b5f0f 100644 --- a/doc/guides/nics/af_xdp.rst +++ b/doc/guides/nics/af_xdp.rst @@ -35,6 +35,7 @@ The following options can be provided to set up an af_xdp port in DPDK. * ``shared_umem`` - PMD will attempt to share UMEM with others (optional, default 0); * ``xdp_prog`` - path to custom xdp program (optional, default none); +* ``busy_budget`` - busy polling budget (optional, default 64); Prerequisites ------------- @@ -51,6 +52,7 @@ This is a Linux-specific PMD, thus the following prerequisites apply: * For shared_umem, it requires kernel version v5.10 or later and libbpf version v0.2.0 or later. * For 32-bit OS, a kernel with version 5.4 or later is required. +* For busy polling, kernel version v5.11 or later is required. Set up an af_xdp interface ----------------------------- @@ -107,4 +109,38 @@ Limitations .. code-block:: console --vdev net_af_xdp0,iface=ens786f1,shared_umem=1 \ - --vdev net_af_xdp1,iface=ens786f2,shared_umem=1 \ \ No newline at end of file + --vdev net_af_xdp1,iface=ens786f2,shared_umem=1 \ + +- **Preferred Busy Polling** + + The SO_PREFER_BUSY_POLL socket option was introduced in kernel v5.11. It can + deliver a performance improvement for sockets with heavy traffic loads and + can significantly improve single-core performance in this context. + + The feature is enabled by default in the AF_XDP PMD. To disable it, set the + 'busy_budget' vdevarg to zero: + + .. code-block:: console + + --vdev net_af_xdp0,iface=ens786f1,busy_budget=0 + + The default 'busy_budget' is 64 and it represents the number of packets the + kernel will attempt to process in the netdev's NAPI context. You can change + the value for example to 256 like so: + + .. code-block:: console + + --vdev net_af_xdp0,iface=ens786f1,busy_budget=256 + + It is also strongly recommended to set the following for optimal performance: + + .. code-block:: console + + echo 2 | sudo tee /sys/class/net/ens786f1/napi_defer_hard_irqs + echo 200000 | sudo tee /sys/class/net/ens786f1/gro_flush_timeout + + The above defers interrupts for interface ens786f1 and instead schedules its + NAPI context from a watchdog timer instead of from softirqs. More information + on this feature can be found at [1]. + + [1] https://lwn.net/Articles/837010/ \ No newline at end of file diff --git a/doc/guides/rel_notes/release_21_05.rst b/doc/guides/rel_notes/release_21_05.rst index 23f7f0bff9..2d4794aa21 100644 --- a/doc/guides/rel_notes/release_21_05.rst +++ b/doc/guides/rel_notes/release_21_05.rst @@ -70,6 +70,10 @@ New Features * Added command to display Rx queue used descriptor count. ``show port (port_id) rxq (queue_id) desc used count`` +* **Updated the AF_XDP driver.** + + * Added support for preferred busy polling. + Removed Items ------------- diff --git a/drivers/net/af_xdp/compat.h b/drivers/net/af_xdp/compat.h index 7aa40d522e..545c8aa395 100644 --- a/drivers/net/af_xdp/compat.h +++ b/drivers/net/af_xdp/compat.h @@ -39,3 +39,17 @@ create_shared_socket(struct xsk_socket **xsk_ptr __rte_unused, return -1; } #endif + +#ifdef XDP_USE_NEED_WAKEUP +static int +syscall_needed(struct xsk_ring_prod *q, uint32_t busy_budget) +{ + return xsk_ring_prod__needs_wakeup(q) | busy_budget; +} +#else +static int +syscall_needed(struct xsk_ring_prod *q __rte_unused, uint32_t busy_budget) +{ + return busy_budget; +} +#endif diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 9c0e935cd3..4953525484 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -41,6 +41,13 @@ #include "compat.h" +#ifndef SO_PREFER_BUSY_POLL +#define SO_PREFER_BUSY_POLL 69 +#endif +#ifndef SO_BUSY_POLL_BUDGET +#define SO_BUSY_POLL_BUDGET 70 +#endif + #ifndef SOL_XDP #define SOL_XDP 283 @@ -65,6 +72,8 @@ RTE_LOG_REGISTER(af_xdp_logtype, pmd.net.af_xdp, NOTICE); #define ETH_AF_XDP_DFLT_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS #define ETH_AF_XDP_DFLT_START_QUEUE_IDX 0 #define ETH_AF_XDP_DFLT_QUEUE_COUNT 1 +#define ETH_AF_XDP_DFLT_BUSY_BUDGET 64 +#define ETH_AF_XDP_DFLT_BUSY_TIMEOUT 20 #define ETH_AF_XDP_RX_BATCH_SIZE XSK_RING_CONS__DEFAULT_NUM_DESCS #define ETH_AF_XDP_TX_BATCH_SIZE XSK_RING_CONS__DEFAULT_NUM_DESCS @@ -100,6 +109,7 @@ struct pkt_rx_queue { struct pkt_tx_queue *pair; struct pollfd fds[1]; int xsk_queue_idx; + uint32_t busy_budget; }; struct tx_stats { @@ -140,6 +150,7 @@ struct pmd_internals { #define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count" #define ETH_AF_XDP_SHARED_UMEM_ARG "shared_umem" #define ETH_AF_XDP_PROG_ARG "xdp_prog" +#define ETH_AF_XDP_BUDGET_ARG "busy_budget" static const char * const valid_arguments[] = { ETH_AF_XDP_IFACE_ARG, @@ -147,6 +158,7 @@ static const char * const valid_arguments[] = { ETH_AF_XDP_QUEUE_COUNT_ARG, ETH_AF_XDP_SHARED_UMEM_ARG, ETH_AF_XDP_PROG_ARG, + ETH_AF_XDP_BUDGET_ARG, NULL }; @@ -261,11 +273,9 @@ af_xdp_rx_zc(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts) nb_pkts = xsk_ring_cons__peek(rx, nb_pkts, &idx_rx); if (nb_pkts == 0) { -#if defined(XDP_USE_NEED_WAKEUP) - if (xsk_ring_prod__needs_wakeup(fq)) + if (syscall_needed(&rxq->fq, rxq->busy_budget)) recvfrom(xsk_socket__fd(rxq->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); -#endif return 0; } @@ -446,9 +456,7 @@ kick_tx(struct pkt_tx_queue *txq, struct xsk_ring_cons *cq) pull_umem_cq(umem, XSK_RING_CONS__DEFAULT_NUM_DESCS, cq); -#if defined(XDP_USE_NEED_WAKEUP) - if (xsk_ring_prod__needs_wakeup(&txq->tx)) -#endif + if (syscall_needed(&txq->tx, txq->pair->busy_budget)) while (send(xsk_socket__fd(txq->pair->xsk), NULL, 0, MSG_DONTWAIT) < 0) { /* some thing unexpected */ @@ -795,6 +803,8 @@ eth_dev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->max_mtu = ETH_AF_XDP_FRAME_SIZE - XDP_PACKET_HEADROOM; #endif + dev_info->default_rxportconf.burst_size = ETH_AF_XDP_DFLT_BUSY_BUDGET; + dev_info->default_txportconf.burst_size = ETH_AF_XDP_DFLT_BUSY_BUDGET; dev_info->default_rxportconf.nb_queues = 1; dev_info->default_txportconf.nb_queues = 1; dev_info->default_rxportconf.ring_size = ETH_AF_XDP_DFLT_NUM_DESCS; @@ -1142,6 +1152,65 @@ load_custom_xdp_prog(const char *prog_path, int if_index) return 0; } +/* Detect support for busy polling through setsockopt(). */ +static int +configure_preferred_busy_poll(struct pkt_rx_queue *rxq) +{ + int sock_opt = 1; + int fd = xsk_socket__fd(rxq->xsk); + int ret = 0; + + ret = setsockopt(fd, SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)); + if (ret < 0) { + AF_XDP_LOG(DEBUG, "Failed to set SO_PREFER_BUSY_POLL\n"); + goto err_prefer; + } + + sock_opt = ETH_AF_XDP_DFLT_BUSY_TIMEOUT; + ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL, (void *)&sock_opt, + sizeof(sock_opt)); + if (ret < 0) { + AF_XDP_LOG(DEBUG, "Failed to set SO_BUSY_POLL\n"); + goto err_timeout; + } + + sock_opt = rxq->busy_budget; + ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL_BUDGET, + (void *)&sock_opt, sizeof(sock_opt)); + if (ret < 0) { + AF_XDP_LOG(DEBUG, "Failed to set SO_BUSY_POLL_BUDGET\n"); + } else { + AF_XDP_LOG(INFO, "Busy polling budget set to: %u\n", + rxq->busy_budget); + return 0; + } + + /* setsockopt failure - attempt to restore xsk to default state and + * proceed without busy polling support. + */ + sock_opt = 0; + ret = setsockopt(fd, SOL_SOCKET, SO_BUSY_POLL, (void *)&sock_opt, + sizeof(sock_opt)); + if (ret < 0) { + AF_XDP_LOG(ERR, "Failed to unset SO_BUSY_POLL\n"); + return -1; + } + +err_timeout: + sock_opt = 0; + ret = setsockopt(fd, SOL_SOCKET, SO_PREFER_BUSY_POLL, + (void *)&sock_opt, sizeof(sock_opt)); + if (ret < 0) { + AF_XDP_LOG(ERR, "Failed to unset SO_PREFER_BUSY_POLL\n"); + return -1; + } + +err_prefer: + rxq->busy_budget = 0; + return 0; +} + static int xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, int ring_size) @@ -1200,6 +1269,15 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq, goto err; } #endif + + if (rxq->busy_budget) { + ret = configure_preferred_busy_poll(rxq); + if (ret) { + AF_XDP_LOG(ERR, "Failed configure busy polling.\n"); + goto err; + } + } + ret = reserve_fill_queue(rxq->umem, reserve_size, fq_bufs, &rxq->fq); if (ret) { xsk_socket__delete(rxq->xsk); @@ -1257,6 +1335,9 @@ eth_rx_queue_setup(struct rte_eth_dev *dev, goto err; } + if (!rxq->busy_budget) + AF_XDP_LOG(DEBUG, "Preferred busy polling not enabled\n"); + rxq->fds[0].fd = xsk_socket__fd(rxq->xsk); rxq->fds[0].events = POLLIN; @@ -1465,7 +1546,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues, static int parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, - int *queue_cnt, int *shared_umem, char *prog_path) + int *queue_cnt, int *shared_umem, char *prog_path, + int *busy_budget) { int ret; @@ -1496,6 +1578,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue, if (ret < 0) goto free_kvlist; + ret = rte_kvargs_process(kvlist, ETH_AF_XDP_BUDGET_ARG, + &parse_integer_arg, busy_budget); + if (ret < 0) + goto free_kvlist; + free_kvlist: rte_kvargs_free(kvlist); return ret; @@ -1534,7 +1621,7 @@ get_iface_info(const char *if_name, static struct rte_eth_dev * init_internals(struct rte_vdev_device *dev, const char *if_name, int start_queue_idx, int queue_cnt, int shared_umem, - const char *prog_path) + const char *prog_path, int busy_budget) { const char *name = rte_vdev_device_name(dev); const unsigned int numa_node = dev->device.numa_node; @@ -1595,6 +1682,7 @@ init_internals(struct rte_vdev_device *dev, const char *if_name, internals->rx_queues[i].pair = &internals->tx_queues[i]; internals->rx_queues[i].xsk_queue_idx = start_queue_idx + i; internals->tx_queues[i].xsk_queue_idx = start_queue_idx + i; + internals->rx_queues[i].busy_budget = busy_budget; } ret = get_iface_info(if_name, &internals->eth_addr, @@ -1638,6 +1726,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) int xsk_queue_cnt = ETH_AF_XDP_DFLT_QUEUE_COUNT; int shared_umem = 0; char prog_path[PATH_MAX] = {'\0'}; + int busy_budget = -1; struct rte_eth_dev *eth_dev = NULL; const char *name; @@ -1667,7 +1756,8 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) dev->device.numa_node = rte_socket_id(); if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx, - &xsk_queue_cnt, &shared_umem, prog_path) < 0) { + &xsk_queue_cnt, &shared_umem, prog_path, + &busy_budget) < 0) { AF_XDP_LOG(ERR, "Invalid kvargs value\n"); return -EINVAL; } @@ -1677,8 +1767,12 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev) return -EINVAL; } + busy_budget = busy_budget == -1 ? ETH_AF_XDP_DFLT_BUSY_BUDGET : + busy_budget; + eth_dev = init_internals(dev, if_name, xsk_start_queue_idx, - xsk_queue_cnt, shared_umem, prog_path); + xsk_queue_cnt, shared_umem, prog_path, + busy_budget); if (eth_dev == NULL) { AF_XDP_LOG(ERR, "Failed to init internals\n"); return -1; @@ -1723,4 +1817,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp, "start_queue= " "queue_count= " "shared_umem= " - "xdp_prog= "); + "xdp_prog= " + "busy_budget=");