From patchwork Fri Jul 29 07:18:40 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?M=C3=A1rio_Kuka?= X-Patchwork-Id: 114448 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 329ADA00C4; Fri, 29 Jul 2022 18:03:20 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 0E7E342C3F; Fri, 29 Jul 2022 18:03:16 +0200 (CEST) Received: from office2.cesnet.cz (office2.cesnet.cz [195.113.144.244]) by mails.dpdk.org (Postfix) with ESMTP id 0BD9140151 for ; Fri, 29 Jul 2022 09:19:15 +0200 (CEST) Received: from dpdk-test8.liberouter.org (rt-tmc-kou.liberouter.org [195.113.172.126]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by office2.cesnet.cz (Postfix) with ESMTPSA id BE98440006D; Fri, 29 Jul 2022 09:19:14 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cesnet.cz; s=office2-2020; t=1659079154; bh=ocVp0ZqgrtDMbLLkYjufngBhYo39g2yceTpg6KPkw24=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=Jb7PNcijb2bfjgJzqzD9mWuGhgo3lTQMH9SiaOsJAbHaNYvop0AiC32mBBr2eiD9m KVEbouA6MyB+Ivwvx5FN+NHKOBfS1fXIrBaCpS8l2+5d2Gl+sI5VqaxZOGIRJ29GEM 1sU1qwKsgMSesSsynRkyuJ39ZuKXGRlS5bn/0E8VAK43FfFNoRddh4dm0Ctc5T19DW IEaSybmtiUC5qjBCUQJMIAiKEnKJACfggwcmfv901uHSkncHohb8ObMCI7CilC0DgU u84MaQdxJJ1wzFg6UlNGUcIMEdpOJ3HI1s/c7ShD3Eb0QpxJ006+j2E1YMpB4suOOd L840By2sh98PA== From: =?utf-8?q?M=C3=A1rio_Kuka?= To: kuka@cesnet.cz Cc: dev@dpdk.org, mdr@ashroe.eu, reshma.pattan@intel.com, stephen@networkplumber.org Subject: [PATCH v2 1/2] pcapng: fix write more packets than IOV_MAX limit Date: Fri, 29 Jul 2022 09:18:40 +0200 Message-Id: <20220729071841.18198-2-kuka@cesnet.cz> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20220729071841.18198-1-kuka@cesnet.cz> References: <20220725152811.409447-1-kuka@cesnet.cz> <20220729071841.18198-1-kuka@cesnet.cz> MIME-Version: 1.0 X-Mailman-Approved-At: Fri, 29 Jul 2022 18:03:14 +0200 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org The rte_pcapng_write_packets() function fails when we try to write more packets than the IOV_MAX limit. writev() system call is limited by the IOV_MAX limit. The iovcnt argument is valid if it is greater than 0 and less than or equal to IOV_MAX as defined in . To avoid this problem, we can check that all segments of the next packet will fit into the iovec buffer, whose capacity will be limited by the IOV_MAX limit. If not, we flush the current iovec buffer to the file by calling writev() and, if successful, fit the current packet at the beginning of the flushed iovec buffer. Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stephen@networkplumber.org Signed-off-by: Mário Kuka --- app/test/test_pcapng.c | 42 +++++++++++++++++++++++++++++++++++- lib/pcapng/rte_pcapng.c | 47 ++++++++++++++++++++--------------------- 2 files changed, 64 insertions(+), 25 deletions(-) diff --git a/app/test/test_pcapng.c b/app/test/test_pcapng.c index 320dacea34..7f51946fff 100644 --- a/app/test/test_pcapng.c +++ b/app/test/test_pcapng.c @@ -110,7 +110,7 @@ test_setup(void) } /* Make a pool for cloned packets */ - mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", NUM_PACKETS, + mp = rte_pktmbuf_pool_create_by_ops("pcapng_test_pool", IOV_MAX + NUM_PACKETS, 0, 0, rte_pcapng_mbuf_size(pkt_len), SOCKET_ID_ANY, "ring_mp_sc"); @@ -237,6 +237,45 @@ test_validate(void) return ret; } +static int +test_write_over_limit_iov_max(void) +{ + struct rte_mbuf *orig; + struct rte_mbuf *clones[IOV_MAX + NUM_PACKETS] = { }; + struct dummy_mbuf mbfs; + unsigned int i; + ssize_t len; + + /* make a dummy packet */ + mbuf1_prepare(&mbfs, pkt_len); + + /* clone them */ + orig = &mbfs.mb[0]; + for (i = 0; i < IOV_MAX + NUM_PACKETS; i++) { + struct rte_mbuf *mc; + + mc = rte_pcapng_copy(port_id, 0, orig, mp, pkt_len, + rte_get_tsc_cycles(), 0); + if (mc == NULL) { + fprintf(stderr, "Cannot copy packet\n"); + return -1; + } + clones[i] = mc; + } + + /* write it to capture file */ + len = rte_pcapng_write_packets(pcapng, clones, IOV_MAX + NUM_PACKETS); + + rte_pktmbuf_free_bulk(clones, IOV_MAX + NUM_PACKETS); + + if (len <= 0) { + fprintf(stderr, "Write of packets failed\n"); + return -1; + } + + return 0; +} + static void test_cleanup(void) { @@ -256,6 +295,7 @@ unit_test_suite test_pcapng_suite = { TEST_CASE(test_write_packets), TEST_CASE(test_write_stats), TEST_CASE(test_validate), + TEST_CASE(test_write_over_limit_iov_max), TEST_CASES_END() } }; diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index 06ad712bd1..e41cf909e1 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -551,33 +551,16 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, return NULL; } -/* Count how many segments are in this array of mbufs */ -static unsigned int -mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n) -{ - unsigned int i, iovcnt; - - for (iovcnt = 0, i = 0; i < n; i++) { - const struct rte_mbuf *m = pkts[i]; - - __rte_mbuf_sanity_check(m, 1); - - iovcnt += m->nb_segs; - } - return iovcnt; -} - /* Write pre-formatted packets to file. */ ssize_t rte_pcapng_write_packets(rte_pcapng_t *self, struct rte_mbuf *pkts[], uint16_t nb_pkts) { - int iovcnt = mbuf_burst_segs(pkts, nb_pkts); - struct iovec iov[iovcnt]; - unsigned int i, cnt; - ssize_t ret; + struct iovec iov[IOV_MAX]; + unsigned int i, cnt = 0; + ssize_t ret, total = 0; - for (i = cnt = 0; i < nb_pkts; i++) { + for (i = 0; i < nb_pkts; i++) { struct rte_mbuf *m = pkts[i]; struct pcapng_enhance_packet_block *epb; @@ -589,6 +572,20 @@ rte_pcapng_write_packets(rte_pcapng_t *self, return -1; } + /* + * Handle case of highly fragmented and large burst size + * Note: this assumes that max segments per mbuf < IOV_MAX + */ + if (unlikely(cnt + m->nb_segs >= IOV_MAX)) { + ret = writev(self->outfd, iov, cnt); + if (unlikely(ret < 0)) { + rte_errno = errno; + return -1; + } + total += ret; + cnt = 0; + } + /* * The DPDK port is recorded during pcapng_copy. * Map that to PCAPNG interface in file. @@ -601,10 +598,12 @@ rte_pcapng_write_packets(rte_pcapng_t *self, } while ((m = m->next)); } - ret = writev(self->outfd, iov, iovcnt); - if (unlikely(ret < 0)) + ret = writev(self->outfd, iov, cnt); + if (unlikely(ret < 0)) { rte_errno = errno; - return ret; + return -1; + } + return total + ret; } /* Create new pcapng writer handle */ From patchwork Fri Jul 29 07:18:41 2022 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 8bit X-Patchwork-Submitter: =?utf-8?q?M=C3=A1rio_Kuka?= X-Patchwork-Id: 114449 X-Patchwork-Delegate: thomas@monjalon.net Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id D7866A00C4; Fri, 29 Jul 2022 18:03:25 +0200 (CEST) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id 342B242C46; Fri, 29 Jul 2022 18:03:17 +0200 (CEST) Received: from office2.cesnet.cz (office2.cesnet.cz [195.113.144.244]) by mails.dpdk.org (Postfix) with ESMTP id 217A842C05 for ; Fri, 29 Jul 2022 09:19:15 +0200 (CEST) Received: from dpdk-test8.liberouter.org (rt-tmc-kou.liberouter.org [195.113.172.126]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest SHA256) (No client certificate requested) by office2.cesnet.cz (Postfix) with ESMTPSA id DB99840006E; Fri, 29 Jul 2022 09:19:14 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cesnet.cz; s=office2-2020; t=1659079154; bh=RAJzHFS3EQkQbwg5eobqWAt9P7/eF7rSZS+/AN6Ya5o=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=dJaEL/Nq55qjd+7lRtgeyZYqxYRlC0WznIBx11Wp724QBPFXUaGvI0gPUPH2xe582 3D2ka/rZDPC5hGnmqQtNg3yoCqT9ydrwrpY5/RpOW+9MJNdCreeeD/t1xMbODpKNzf /ME6FNxgz5hJs+6izJKcI17kjCwpFOm1COBvnD9zSYA8sveOiro4WKwIi+ya6ynvEy YPfqcOr6F2k34RC0Cc7Ys1DGUCvgza0F+4oCckPmuglrV1CmfoBt8H/PY12W7DmIc/ /RTBTGdK8mlomSksCwWYotc+tAEIBmwFRjJ0Ocw+CiRV2w3jnBgjYot+P6y3gP/CoG j9x6Lu/m0uUnA== From: =?utf-8?q?M=C3=A1rio_Kuka?= To: kuka@cesnet.cz Cc: dev@dpdk.org, mdr@ashroe.eu, reshma.pattan@intel.com, stephen@networkplumber.org Subject: [PATCH v2 2/2] pcapng: check if writev() returns a partial write Date: Fri, 29 Jul 2022 09:18:41 +0200 Message-Id: <20220729071841.18198-3-kuka@cesnet.cz> X-Mailer: git-send-email 2.31.1 In-Reply-To: <20220729071841.18198-1-kuka@cesnet.cz> References: <20220725152811.409447-1-kuka@cesnet.cz> <20220729071841.18198-1-kuka@cesnet.cz> MIME-Version: 1.0 X-Mailman-Approved-At: Fri, 29 Jul 2022 18:03:14 +0200 X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org The result from wrtiev() is not checked. When writev() returns a partial write, the output file will not contain all packets from the pkts buffer and some packets may be partially written, which is undesirable behavior. To avoid this problem, we have to check the number of bytes returned from the writev(), and if we get a partial write, we need to call the writev() function again on any ivo buffers that were not written or were written partially. Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files") Cc: stephen@networkplumber.org Signed-off-by: Mário Kuka --- lib/pcapng/rte_pcapng.c | 67 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c index e41cf909e1..7c1136337c 100644 --- a/lib/pcapng/rte_pcapng.c +++ b/lib/pcapng/rte_pcapng.c @@ -551,6 +551,69 @@ rte_pcapng_copy(uint16_t port_id, uint32_t queue, return NULL; } +/* + * Update iov after writev() has returned written. We must find how many iov + * buffers (from beginning) have been written. The first buffer that was not + * written fully is to be updated accordingly. + * + * Returns offset of buffer that was not written fully. + */ +static int +pcapng_update_iov(struct iovec *iov, const int count, size_t written) +{ + int i; + + for (i = 0; written > 0 && i < count; ++i) { + if (written < iov[i].iov_len) { + /* found buffer that was not written fully */ + iov[i].iov_base = RTE_PTR_ADD(iov[i].iov_base, written); + iov[i].iov_len -= written; + + return i; + } + + written -= iov[i].iov_len; + } + + return count; +} + +/* + * Writes all iovcnt buffers of data described by iov to the file associated with + * the file descriptor fd. + */ +static ssize_t +pcapng_writev(int fd, struct iovec *iov, const int count) +{ + size_t total = 0; + int at = 0; + + while (at < count) { + /* + * Note: writev() can return the following on a write request: + * Complete: + * written = [sum of all iov.iov_len] + * Partial: + * written < [sum of all iov.iov_len] + * Deferred: + * written = -1, errno = [EAGAIN] + * + * Partial and deferred writes are only possible with O_NONBLOCK set. + * + * If we get a partial result, we have to call the writev() again on any ivo buffers + * that have not been fully written. + */ + ssize_t written = writev(fd, &iov[at], count - at); + if (unlikely(written < 0)) + return written; + + total += written; + at += pcapng_update_iov(&iov[at], count - at, written); + } + + return total; +} + /* Write pre-formatted packets to file. */ ssize_t rte_pcapng_write_packets(rte_pcapng_t *self, @@ -577,7 +640,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, * Note: this assumes that max segments per mbuf < IOV_MAX */ if (unlikely(cnt + m->nb_segs >= IOV_MAX)) { - ret = writev(self->outfd, iov, cnt); + ret = pcapng_writev(self->outfd, iov, cnt); if (unlikely(ret < 0)) { rte_errno = errno; return -1; @@ -598,7 +661,7 @@ rte_pcapng_write_packets(rte_pcapng_t *self, } while ((m = m->next)); } - ret = writev(self->outfd, iov, cnt); + ret = pcapng_writev(self->outfd, iov, cnt); if (unlikely(ret < 0)) { rte_errno = errno; return -1;