[v2,2/2] pcapng: check if writev() returns a partial write

Message ID 20220729071841.18198-3-kuka@cesnet.cz (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series pcapng: fix some issues with writing packets. |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK
ci/intel-Testing success Testing PASS
ci/iol-aarch64-compile-testing success Testing PASS
ci/iol-mellanox-Performance success Performance Testing PASS
ci/iol-x86_64-unit-testing success Testing PASS
ci/iol-x86_64-compile-testing success Testing PASS
ci/iol-intel-Functional success Functional Testing PASS
ci/iol-aarch64-unit-testing success Testing PASS
ci/iol-intel-Performance success Performance Testing PASS

Commit Message

Mário Kuka July 29, 2022, 7:18 a.m. UTC
  The result from wrtiev() is not checked. When writev() returns
a partial write, the output file will not contain all packets from the
pkts buffer and some packets may be partially written, which is
undesirable behavior.

To avoid this problem, we have to check the number of bytes returned
from the writev(), and if we get a partial write, we need to call the
writev() function again on any ivo buffers that were not written or
were written partially.

Fixes: 8d23ce8f5ee9 ("pcapng: add new library for writing pcapng files")
Cc: stephen@networkplumber.org

Signed-off-by: Mário Kuka <kuka@cesnet.cz>
---
 lib/pcapng/rte_pcapng.c | 67 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 2 deletions(-)
  

Patch

diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
index e41cf909e1..7c1136337c 100644
--- a/lib/pcapng/rte_pcapng.c
+++ b/lib/pcapng/rte_pcapng.c
@@ -551,6 +551,69 @@  rte_pcapng_copy(uint16_t port_id, uint32_t queue,
 	return NULL;
 }
 
+/*
+ * Update iov after writev() has returned written. We must find how many iov
+ * buffers (from beginning) have been written. The first buffer that was not
+ * written fully is to be updated accordingly.
+ *
+ * Returns offset of buffer that was not written fully.
+ */
+static int
+pcapng_update_iov(struct iovec *iov, const int count, size_t written)
+{
+	int i;
+
+	for (i = 0; written > 0 && i < count; ++i) {
+		if (written < iov[i].iov_len) {
+			/* found buffer that was not written fully */
+			iov[i].iov_base = RTE_PTR_ADD(iov[i].iov_base, written);
+			iov[i].iov_len -= written;
+
+			return i;
+		}
+
+		written -= iov[i].iov_len;
+	}
+
+	return count;
+}
+
+/*
+ * Writes all iovcnt buffers of data described by iov to the file associated with
+ * the file descriptor fd.
+ */
+static ssize_t
+pcapng_writev(int fd, struct iovec *iov, const int count)
+{
+	size_t total = 0;
+	int at = 0;
+
+	while (at < count) {
+		/*
+		 * Note: writev() can return the following on a write request:
+		 *     Complete:
+		 *         written = [sum of all iov.iov_len]
+		 *     Partial:
+		 *         written < [sum of all iov.iov_len]
+		 *     Deferred:
+		 *         written = -1, errno = [EAGAIN]
+		 *
+		 * Partial and deferred writes are only possible with O_NONBLOCK set.
+		 *
+		 * If we get a partial result, we have to call the writev() again on any ivo buffers
+		 * that have not been fully written.
+		 */
+		ssize_t written = writev(fd, &iov[at], count - at);
+		if (unlikely(written < 0))
+			return written;
+
+		total += written;
+		at += pcapng_update_iov(&iov[at], count - at, written);
+	}
+
+	return total;
+}
+
 /* Write pre-formatted packets to file. */
 ssize_t
 rte_pcapng_write_packets(rte_pcapng_t *self,
@@ -577,7 +640,7 @@  rte_pcapng_write_packets(rte_pcapng_t *self,
 		 * Note: this assumes that max segments per mbuf < IOV_MAX
 		 */
 		if (unlikely(cnt + m->nb_segs >= IOV_MAX)) {
-			ret = writev(self->outfd, iov, cnt);
+			ret = pcapng_writev(self->outfd, iov, cnt);
 			if (unlikely(ret < 0)) {
 				rte_errno = errno;
 				return -1;
@@ -598,7 +661,7 @@  rte_pcapng_write_packets(rte_pcapng_t *self,
 		} while ((m = m->next));
 	}
 
-	ret = writev(self->outfd, iov, cnt);
+	ret = pcapng_writev(self->outfd, iov, cnt);
 	if (unlikely(ret < 0)) {
 		rte_errno = errno;
 		return -1;