[v14,02/12] librte_pcapng: add new library for writing pcapng files

Message ID 20211015201129.63220-3-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series Packet capture framework update |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Stephen Hemminger Oct. 15, 2021, 8:11 p.m. UTC
  This is utility library for writing pcapng format files
used by Wireshark family of utilities. Older tcpdump
also knows how to read (but not write) this format.

See
  https://github.com/pcapng/pcapng/

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/meson.build           |   1 +
 lib/pcapng/meson.build    |   8 +
 lib/pcapng/pcapng_proto.h | 129 ++++++++
 lib/pcapng/rte_pcapng.c   | 607 ++++++++++++++++++++++++++++++++++++++
 lib/pcapng/rte_pcapng.h   | 195 ++++++++++++
 lib/pcapng/version.map    |  12 +
 6 files changed, 952 insertions(+)
 create mode 100644 lib/pcapng/meson.build
 create mode 100644 lib/pcapng/pcapng_proto.h
 create mode 100644 lib/pcapng/rte_pcapng.c
 create mode 100644 lib/pcapng/rte_pcapng.h
 create mode 100644 lib/pcapng/version.map
  

Comments

Pattan, Reshma Oct. 19, 2021, 10:24 a.m. UTC | #1
> -----Original Message-----
> From: Stephen Hemminger <stephen@networkplumber.org>
> Subject: [PATCH v14 02/12] librte_pcapng: add new library for writing pcapng
> files
> 
> This is utility library for writing pcapng format files used by Wireshark family of
> utilities. Older tcpdump also knows how to read (but not write) this format.
> 
> See
>   https://github.com/pcapng/pcapng/
> 
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>

Acked-by: Reshma Pattan <reshma.pattan@intel.com>
  

Patch

diff --git a/lib/meson.build b/lib/meson.build
index ef5ff522aeaf..15150efa19a7 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -41,6 +41,7 @@  libraries = [
         'latencystats',
         'lpm',
         'member',
+        'pcapng',
         'power',
         'pdump',
         'rawdev',
diff --git a/lib/pcapng/meson.build b/lib/pcapng/meson.build
new file mode 100644
index 000000000000..fe636bdf3c0b
--- /dev/null
+++ b/lib/pcapng/meson.build
@@ -0,0 +1,8 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2019 Microsoft Corporation
+
+version = 1
+sources = files('rte_pcapng.c')
+headers = files('rte_pcapng.h')
+
+deps += ['ethdev']
diff --git a/lib/pcapng/pcapng_proto.h b/lib/pcapng/pcapng_proto.h
new file mode 100644
index 000000000000..47161d8a1213
--- /dev/null
+++ b/lib/pcapng/pcapng_proto.h
@@ -0,0 +1,129 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019-2020 Microsoft Corporation
+ *
+ * PCAP Next Generation Capture File writer
+ *
+ * See: https://github.com/pcapng/pcapng/ for the file format.
+ */
+
+enum pcapng_block_types {
+	PCAPNG_INTERFACE_BLOCK		= 1,
+	PCAPNG_PACKET_BLOCK,		/* Obsolete */
+	PCAPNG_SIMPLE_PACKET_BLOCK,
+	PCAPNG_NAME_RESOLUTION_BLOCK,
+	PCAPNG_INTERFACE_STATS_BLOCK,
+	PCAPNG_ENHANCED_PACKET_BLOCK,
+
+	PCAPNG_SECTION_BLOCK		= 0x0A0D0D0A,
+};
+
+struct pcapng_option {
+	uint16_t code;
+	uint16_t length;
+	uint8_t data[];
+};
+
+#define PCAPNG_BYTE_ORDER_MAGIC 0x1A2B3C4D
+#define PCAPNG_MAJOR_VERS 1
+#define PCAPNG_MINOR_VERS 0
+
+enum pcapng_opt {
+	PCAPNG_OPT_END	= 0,
+	PCAPNG_OPT_COMMENT = 1,
+};
+
+struct pcapng_section_header {
+	uint32_t block_type;
+	uint32_t block_length;
+	uint32_t byte_order_magic;
+	uint16_t major_version;
+	uint16_t minor_version;
+	uint64_t section_length;
+};
+
+enum pcapng_section_opt {
+	PCAPNG_SHB_HARDWARE = 2,
+	PCAPNG_SHB_OS	    = 3,
+	PCAPNG_SHB_USERAPPL = 4,
+};
+
+struct pcapng_interface_block {
+	uint32_t block_type;	/* 1 */
+	uint32_t block_length;
+	uint16_t link_type;
+	uint16_t reserved;
+	uint32_t snap_len;
+};
+
+enum pcapng_interface_options {
+	PCAPNG_IFB_NAME	 = 2,
+	PCAPNG_IFB_DESCRIPTION,
+	PCAPNG_IFB_IPV4ADDR,
+	PCAPNG_IFB_IPV6ADDR,
+	PCAPNG_IFB_MACADDR,
+	PCAPNG_IFB_EUIADDR,
+	PCAPNG_IFB_SPEED,
+	PCAPNG_IFB_TSRESOL,
+	PCAPNG_IFB_TZONE,
+	PCAPNG_IFB_FILTER,
+	PCAPNG_IFB_OS,
+	PCAPNG_IFB_FCSLEN,
+	PCAPNG_IFB_TSOFFSET,
+	PCAPNG_IFB_HARDWARE,
+};
+
+struct pcapng_enhance_packet_block {
+	uint32_t block_type;	/* 6 */
+	uint32_t block_length;
+	uint32_t interface_id;
+	uint32_t timestamp_hi;
+	uint32_t timestamp_lo;
+	uint32_t capture_length;
+	uint32_t original_length;
+};
+
+/* Flags values */
+#define PCAPNG_IFB_INBOUND   0b01
+#define PCAPNG_IFB_OUTBOUND  0b10
+
+enum pcapng_epb_options {
+	PCAPNG_EPB_FLAGS = 2,
+	PCAPNG_EPB_HASH,
+	PCAPNG_EPB_DROPCOUNT,
+	PCAPNG_EPB_PACKETID,
+	PCAPNG_EPB_QUEUE,
+	PCAPNG_EPB_VERDICT,
+};
+
+enum pcapng_epb_hash {
+	PCAPNG_HASH_2COMP = 0,
+	PCAPNG_HASH_XOR,
+	PCAPNG_HASH_CRC32,
+	PCAPNG_HASH_MD5,
+	PCAPNG_HASH_SHA1,
+	PCAPNG_HASH_TOEPLITZ,
+};
+
+struct pcapng_simple_packet {
+	uint32_t block_type;	/* 3 */
+	uint32_t block_length;
+	uint32_t packet_length;
+};
+
+struct pcapng_statistics {
+	uint32_t block_type;	/* 5 */
+	uint32_t block_length;
+	uint32_t interface_id;
+	uint32_t timestamp_hi;
+	uint32_t timestamp_lo;
+};
+
+enum pcapng_isb_options {
+	PCAPNG_ISB_STARTTIME = 2,
+	PCAPNG_ISB_ENDTIME,
+	PCAPNG_ISB_IFRECV,
+	PCAPNG_ISB_IFDROP,
+	PCAPNG_ISB_FILTERACCEPT,
+	PCAPNG_ISB_OSDROP,
+	PCAPNG_ISB_USRDELIV,
+};
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
new file mode 100644
index 000000000000..3a399de3d037
--- /dev/null
+++ b/lib/pcapng/rte_pcapng.c
@@ -0,0 +1,607 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Microsoft Corporation
+ */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_mbuf.h>
+#include <rte_pcapng.h>
+#include <rte_time.h>
+
+#include "pcapng_proto.h"
+
+/* conversion from DPDK speed to PCAPNG */
+#define PCAPNG_MBPS_SPEED 1000000ull
+
+/* Format of the capture file handle */
+struct rte_pcapng {
+	int  outfd;		/* output file */
+	/* DPDK port id to interface index in file */
+	uint32_t port_index[RTE_MAX_ETHPORTS];
+};
+
+/* For converting TSC cycles to PCAPNG ns format */
+struct pcapng_time {
+	uint64_t ns;
+	uint64_t cycles;
+} pcapng_time;
+
+RTE_INIT(pcapng_init)
+{
+	struct timespec ts;
+
+	pcapng_time.cycles = rte_get_tsc_cycles();
+	clock_gettime(CLOCK_REALTIME, &ts);
+	pcapng_time.ns = rte_timespec_to_ns(&ts);
+}
+
+/* PCAPNG timestamps are in nanoseconds */
+static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
+{
+	uint64_t delta;
+
+	delta = cycles - pcapng_time.cycles;
+	return pcapng_time.ns + (delta * NSEC_PER_SEC) / rte_get_tsc_hz();
+}
+
+/* length of option including padding */
+static uint16_t pcapng_optlen(uint16_t len)
+{
+	return RTE_ALIGN(sizeof(struct pcapng_option) + len,
+			 sizeof(uint32_t));
+}
+
+/* build TLV option and return location of next */
+static struct pcapng_option *
+pcapng_add_option(struct pcapng_option *popt, uint16_t code,
+		  const void *data, uint16_t len)
+{
+	popt->code = code;
+	popt->length = len;
+	memcpy(popt->data, data, len);
+
+	return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
+}
+
+/*
+ * Write required initial section header describing the capture
+ */
+static int
+pcapng_section_block(rte_pcapng_t *self,
+		    const char *os, const char *hw,
+		    const char *app, const char *comment)
+{
+	struct pcapng_section_header *hdr;
+	struct pcapng_option *opt;
+	void *buf;
+	uint32_t len;
+	ssize_t cc;
+
+	len = sizeof(*hdr);
+	if (hw)
+		len += pcapng_optlen(strlen(hw));
+	if (os)
+		len += pcapng_optlen(strlen(os));
+	if (app)
+		len += pcapng_optlen(strlen(app));
+	if (comment)
+		len += pcapng_optlen(strlen(comment));
+
+	/* reserve space for OPT_END */
+	len += pcapng_optlen(0);
+	len += sizeof(uint32_t);
+
+	buf = calloc(1, len);
+	if (!buf)
+		return -1;
+
+	hdr = (struct pcapng_section_header *)buf;
+	*hdr = (struct pcapng_section_header) {
+		.block_type = PCAPNG_SECTION_BLOCK,
+		.block_length = len,
+		.byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
+		.major_version = PCAPNG_MAJOR_VERS,
+		.minor_version = PCAPNG_MINOR_VERS,
+		.section_length = UINT64_MAX,
+	};
+
+	/* After the section header insert variable length options. */
+	opt = (struct pcapng_option *)(hdr + 1);
+	if (comment)
+		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+					comment, strlen(comment));
+	if (hw)
+		opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
+					hw, strlen(hw));
+	if (os)
+		opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
+					os, strlen(os));
+	if (app)
+		opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
+					app, strlen(app));
+
+	/* The standard requires last option to be OPT_END */
+	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+	/* clone block_length after option */
+	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
+
+	cc = write(self->outfd, buf, len);
+	free(buf);
+
+	return cc;
+}
+
+/* Write an interface block for a DPDK port */
+static int
+pcapng_add_interface(rte_pcapng_t *self, uint16_t port)
+{
+	struct pcapng_interface_block *hdr;
+	struct rte_eth_dev_info dev_info;
+	struct rte_ether_addr *ea, macaddr;
+	const struct rte_device *dev;
+	struct rte_eth_link link;
+	struct pcapng_option *opt;
+	const uint8_t tsresol = 9;	/* nanosecond resolution */
+	uint32_t len;
+	void *buf;
+	char ifname[IF_NAMESIZE];
+	char ifhw[256];
+	uint64_t speed = 0;
+
+	if (rte_eth_dev_info_get(port, &dev_info) < 0)
+		return -1;
+
+	/* make something like an interface name */
+	if (if_indextoname(dev_info.if_index, ifname) == NULL)
+		snprintf(ifname, IF_NAMESIZE, "dpdk:%u", port);
+
+	/* make a useful device hardware string */
+	dev = dev_info.device;
+	if (dev)
+		snprintf(ifhw, sizeof(ifhw),
+			 "%s-%s", dev->bus->name, dev->name);
+
+	/* DPDK reports in units of Mbps */
+	rte_eth_link_get(port, &link);
+	if (link.link_status == ETH_LINK_UP)
+		speed = link.link_speed * PCAPNG_MBPS_SPEED;
+
+	if (rte_eth_macaddr_get(port, &macaddr) < 0)
+		ea = NULL;
+	else
+		ea = &macaddr;
+
+	/* Compute length of interface block options */
+	len = sizeof(*hdr);
+
+	len += pcapng_optlen(sizeof(tsresol));	/* timestamp */
+	len += pcapng_optlen(strlen(ifname));	/* ifname */
+
+	if (ea)
+		len += pcapng_optlen(RTE_ETHER_ADDR_LEN); /* macaddr */
+	if (speed != 0)
+		len += pcapng_optlen(sizeof(uint64_t));
+	if (dev)
+		len += pcapng_optlen(strlen(ifhw));
+
+	len += pcapng_optlen(0);
+	len += sizeof(uint32_t);
+
+	buf = alloca(len);
+	if (!buf)
+		return -1;
+
+	hdr = (struct pcapng_interface_block *)buf;
+	*hdr = (struct pcapng_interface_block) {
+		.block_type = PCAPNG_INTERFACE_BLOCK,
+		.link_type = 1,		/* DLT_EN10MB - Ethernet */
+		.block_length = len,
+	};
+
+	opt = (struct pcapng_option *)(hdr + 1);
+	opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
+				&tsresol, sizeof(tsresol));
+	opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
+				ifname, strlen(ifname));
+	if (ea)
+		opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
+					ea, RTE_ETHER_ADDR_LEN);
+	if (speed != 0)
+		opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
+					 &speed, sizeof(uint64_t));
+	if (dev)
+		opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
+					 ifhw, strlen(ifhw));
+	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+	/* clone block_length after optionsa */
+	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
+
+	return write(self->outfd, buf, len);
+}
+
+/*
+ * Write the list of possible interfaces at the start
+ * of the file.
+ */
+static int
+pcapng_interfaces(rte_pcapng_t *self)
+{
+	uint16_t port_id;
+	uint16_t index = 0;
+
+	RTE_ETH_FOREACH_DEV(port_id) {
+		/* The list if ports in pcapng needs to be contiguous */
+		self->port_index[port_id] = index++;
+		if (pcapng_add_interface(self, port_id) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+/*
+ * Write an Interface statistics block at the end of capture.
+ */
+ssize_t
+rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
+		       const char *comment,
+		       uint64_t start_time, uint64_t end_time,
+		       uint64_t ifrecv, uint64_t ifdrop)
+{
+	struct pcapng_statistics *hdr;
+	struct pcapng_option *opt;
+	uint32_t optlen, len;
+	uint8_t *buf;
+	uint64_t ns;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	optlen = 0;
+
+	if (ifrecv != UINT64_MAX)
+		optlen += pcapng_optlen(sizeof(ifrecv));
+	if (ifdrop != UINT64_MAX)
+		optlen += pcapng_optlen(sizeof(ifdrop));
+	if (start_time != 0)
+		optlen += pcapng_optlen(sizeof(start_time));
+	if (end_time != 0)
+		optlen += pcapng_optlen(sizeof(end_time));
+	if (comment)
+		optlen += pcapng_optlen(strlen(comment));
+	if (optlen != 0)
+		optlen += pcapng_optlen(0);
+
+	len = sizeof(*hdr) + optlen + sizeof(uint32_t);
+	buf = alloca(len);
+	if (buf == NULL)
+		return -1;
+
+	hdr = (struct pcapng_statistics *)buf;
+	opt = (struct pcapng_option *)(hdr + 1);
+
+	if (comment)
+		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+					comment, strlen(comment));
+	if (start_time != 0)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
+					 &start_time, sizeof(start_time));
+	if (end_time != 0)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
+					 &end_time, sizeof(end_time));
+	if (ifrecv != UINT64_MAX)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
+				&ifrecv, sizeof(ifrecv));
+	if (ifdrop != UINT64_MAX)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
+				&ifdrop, sizeof(ifdrop));
+	if (optlen != 0)
+		opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+	hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
+	hdr->block_length = len;
+	hdr->interface_id = self->port_index[port_id];
+
+	ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
+	hdr->timestamp_hi = ns >> 32;
+	hdr->timestamp_lo = (uint32_t)ns;
+
+	/* clone block_length after option */
+	memcpy(opt, &len, sizeof(uint32_t));
+
+	return write(self->outfd, buf, len);
+}
+
+uint32_t
+rte_pcapng_mbuf_size(uint32_t length)
+{
+	/* The VLAN and EPB header must fit in the mbuf headroom. */
+	RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
+		   sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
+
+	/* The flags and queue information are added at the end. */
+	return sizeof(struct rte_mbuf)
+		+ RTE_ALIGN(length, sizeof(uint32_t))
+		+ pcapng_optlen(sizeof(uint32_t)) /* flag option */
+		+ pcapng_optlen(sizeof(uint32_t)) /* queue option */
+		+ sizeof(uint32_t);		  /*  length */
+}
+
+/* More generalized version rte_vlan_insert() */
+static int
+pcapng_vlan_insert(struct rte_mbuf *m, uint16_t ether_type, uint16_t tci)
+{
+	struct rte_ether_hdr *nh, *oh;
+	struct rte_vlan_hdr *vh;
+
+	if (!RTE_MBUF_DIRECT(m) || rte_mbuf_refcnt_read(m) > 1)
+		return -EINVAL;
+
+	if (rte_pktmbuf_data_len(m) < sizeof(*oh))
+		return -EINVAL;
+
+	oh = rte_pktmbuf_mtod(m, struct rte_ether_hdr *);
+	nh = (struct rte_ether_hdr *)
+		rte_pktmbuf_prepend(m, sizeof(struct rte_vlan_hdr));
+	if (nh == NULL)
+		return -ENOSPC;
+
+	memmove(nh, oh, 2 * RTE_ETHER_ADDR_LEN);
+	nh->ether_type = rte_cpu_to_be_16(ether_type);
+
+	vh = (struct rte_vlan_hdr *) (nh + 1);
+	vh->vlan_tci = rte_cpu_to_be_16(tci);
+
+	return 0;
+}
+
+/*
+ *   The mbufs created use the Pcapng standard enhanced packet  block.
+ *
+ *                         1                   2                   3
+ *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  0 |                    Block Type = 0x00000006                    |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  4 |                      Block Total Length                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  8 |                         Interface ID                          |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 12 |                        Timestamp (High)                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 16 |                        Timestamp (Low)                        |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 20 |                    Captured Packet Length                     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 24 |                    Original Packet Length                     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 28 /                                                               /
+ *    /                          Packet Data                          /
+ *    /              variable length, padded to 32 bits               /
+ *    /                                                               /
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |      Option Code = 0x0002     |     Option Length = 0x004     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |              Flags (direction)                                |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |      Option Code = 0x0006     |     Option Length = 0x002     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |              Queue id                                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                      Block Total Length                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+/* Make a copy of original mbuf with pcapng header and options */
+struct rte_mbuf *
+rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+		const struct rte_mbuf *md,
+		struct rte_mempool *mp,
+		uint32_t length, uint64_t cycles,
+		enum rte_pcapng_direction direction)
+{
+	struct pcapng_enhance_packet_block *epb;
+	uint32_t orig_len, data_len, padding, flags;
+	struct pcapng_option *opt;
+	const uint16_t optlen = pcapng_optlen(sizeof(flags)) + pcapng_optlen(sizeof(queue));
+	struct rte_mbuf *mc;
+	uint64_t ns;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, NULL);
+#endif
+	ns = pcapng_tsc_to_ns(cycles);
+
+	orig_len = rte_pktmbuf_pkt_len(md);
+
+	/* Take snapshot of the data */
+	mc = rte_pktmbuf_copy(md, mp, 0, length);
+	if (unlikely(mc == NULL))
+		return NULL;
+
+	/* Expand any offloaded VLAN information */
+	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
+	     (md->ol_flags & PKT_RX_VLAN_STRIPPED)) ||
+	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
+	     (md->ol_flags & PKT_TX_VLAN))) {
+		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_VLAN,
+				       md->vlan_tci) != 0)
+			goto fail;
+	}
+
+	if ((direction == RTE_PCAPNG_DIRECTION_IN &&
+	     (md->ol_flags & PKT_RX_QINQ_STRIPPED)) ||
+	    (direction == RTE_PCAPNG_DIRECTION_OUT &&
+	     (md->ol_flags & PKT_TX_QINQ))) {
+		if (pcapng_vlan_insert(mc, RTE_ETHER_TYPE_QINQ,
+				       md->vlan_tci_outer) != 0)
+			goto fail;
+	}
+
+	/* pad the packet to 32 bit boundary */
+	data_len = rte_pktmbuf_data_len(mc);
+	padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
+	if (padding > 0) {
+		void *tail = rte_pktmbuf_append(mc, padding);
+
+		if (tail == NULL)
+			goto fail;
+		memset(tail, 0, padding);
+	}
+
+	/* reserve trailing options and block length */
+	opt = (struct pcapng_option *)
+		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
+	if (unlikely(opt == NULL))
+		goto fail;
+
+	switch (direction) {
+	case RTE_PCAPNG_DIRECTION_IN:
+		flags = PCAPNG_IFB_INBOUND;
+		break;
+	case RTE_PCAPNG_DIRECTION_OUT:
+		flags = PCAPNG_IFB_OUTBOUND;
+		break;
+	default:
+		flags = 0;
+	}
+
+	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
+				&flags, sizeof(flags));
+
+	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
+				&queue, sizeof(queue));
+
+	/* Note: END_OPT necessary here. Wireshark doesn't do it. */
+
+	/* Add PCAPNG packet header */
+	epb = (struct pcapng_enhance_packet_block *)
+		rte_pktmbuf_prepend(mc, sizeof(*epb));
+	if (unlikely(epb == NULL))
+		goto fail;
+
+	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
+	epb->block_length = rte_pktmbuf_data_len(mc);
+
+	/* Interface index is filled in later during write */
+	mc->port = port_id;
+
+	epb->timestamp_hi = ns >> 32;
+	epb->timestamp_lo = (uint32_t)ns;
+	epb->capture_length = data_len;
+	epb->original_length = orig_len;
+
+	/* set trailer of block length */
+	*(uint32_t *)opt = epb->block_length;
+
+	return mc;
+
+fail:
+	rte_pktmbuf_free(mc);
+	return NULL;
+}
+
+/* Count how many segments are in this array of mbufs */
+static unsigned int
+mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n)
+{
+	unsigned int i, iovcnt;
+
+	for (iovcnt = 0, i = 0; i < n; i++) {
+		const struct rte_mbuf *m = pkts[i];
+
+		__rte_mbuf_sanity_check(m, 1);
+
+		iovcnt += m->nb_segs;
+	}
+	return iovcnt;
+}
+
+/* Write pre-formatted packets to file. */
+ssize_t
+rte_pcapng_write_packets(rte_pcapng_t *self,
+			 struct rte_mbuf *pkts[], uint16_t nb_pkts)
+{
+	int iovcnt = mbuf_burst_segs(pkts, nb_pkts);
+	struct iovec iov[iovcnt];
+	unsigned int i, cnt;
+	ssize_t ret;
+
+	for (i = cnt = 0; i < nb_pkts; i++) {
+		struct rte_mbuf *m = pkts[i];
+		struct pcapng_enhance_packet_block *epb;
+
+		/* sanity check that is really a pcapng mbuf */
+		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
+		if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
+			     epb->block_length != rte_pktmbuf_data_len(m))) {
+			rte_errno = EINVAL;
+			return -1;
+		}
+
+		/*
+		 * The DPDK port is recorded during pcapng_copy.
+		 * Map that to PCAPNG interface in file.
+		 */
+		epb->interface_id = self->port_index[m->port];
+		do {
+			iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
+			iov[cnt].iov_len = rte_pktmbuf_data_len(m);
+			++cnt;
+		} while ((m = m->next));
+	}
+
+	ret = writev(self->outfd, iov, iovcnt);
+	if (unlikely(ret < 0))
+		rte_errno = errno;
+	return ret;
+}
+
+/* Create new pcapng writer handle */
+rte_pcapng_t *
+rte_pcapng_fdopen(int fd,
+		  const char *osname, const char *hardware,
+		  const char *appname, const char *comment)
+{
+	rte_pcapng_t *self;
+
+	self = malloc(sizeof(*self));
+	if (!self) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	self->outfd = fd;
+
+	if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
+		goto fail;
+
+	if (pcapng_interfaces(self) < 0)
+		goto fail;
+
+	return self;
+fail:
+	free(self);
+	return NULL;
+}
+
+void
+rte_pcapng_close(rte_pcapng_t *self)
+{
+	close(self->outfd);
+	free(self);
+}
diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
new file mode 100644
index 000000000000..8d3fbb1941b4
--- /dev/null
+++ b/lib/pcapng/rte_pcapng.h
@@ -0,0 +1,195 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Microsoft Corporation
+ */
+
+/**
+ * @file
+ * RTE pcapng
+ *
+ * @warning
+ * @b EXPERIMENTAL:
+ * All functions in this file may be changed or removed without prior notice.
+ *
+ * Pcapng is an evolution from the pcap format, created to address some of
+ * its deficiencies. Namely, the lack of extensibility and inability to store
+ * additional information.
+ *
+ * For details about the file format see RFC:
+ *   https://www.ietf.org/id/draft-tuexen-opsawg-pcapng-03.html
+ *  and
+ *    https://github.com/pcapng/pcapng/
+ */
+
+#ifndef _RTE_PCAPNG_H_
+#define _RTE_PCAPNG_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_mempool.h>
+#include <rte_ring.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Opaque handle used for functions in this library. */
+typedef struct rte_pcapng rte_pcapng_t;
+
+/**
+ * Write data to existing open file
+ *
+ * @param fd
+ *   file descriptor
+ * @param osname
+ *   Optional description of the operating system.
+ *   Examples: "Debian 11", "Windows Server 22"
+ * @param hardware
+ *   Optional description of the hardware used to create this file.
+ *   Examples: "x86 Virtual Machine"
+ * @param appname
+ *   Optional: application name recorded in the pcapng file.
+ *   Example: "dpdk-dumpcap 1.0 (DPDK 20.11)"
+ * @param comment
+ *   Optional comment to add to file header.
+ * @return
+ *   handle to library, or NULL in case of error (and rte_errno is set).
+ */
+__rte_experimental
+rte_pcapng_t *
+rte_pcapng_fdopen(int fd,
+		  const char *osname, const char *hardware,
+		  const char *appname, const char *comment);
+
+/**
+ * Close capture file
+ *
+ * @param self
+ *  handle to library
+ */
+__rte_experimental
+void
+rte_pcapng_close(rte_pcapng_t *self);
+
+/**
+ * Direction flag
+ * These should match Enhanced Packet Block flag bits
+ */
+enum rte_pcapng_direction {
+	RTE_PCAPNG_DIRECTION_UNKNOWN = 0,
+	RTE_PCAPNG_DIRECTION_IN  = 1,
+	RTE_PCAPNG_DIRECTION_OUT = 2,
+};
+
+/**
+ * Format an mbuf for writing to file.
+ *
+ * @param port_id
+ *   The Ethernet port on which packet was received
+ *   or is going to be transmitted.
+ * @param queue
+ *   The queue on the Ethernet port where packet was received
+ *   or is going to be transmitted.
+ * @param mp
+ *   The mempool from which the "clone" mbufs are allocated.
+ * @param m
+ *   The mbuf to copy
+ * @param length
+ *   The upper limit on bytes to copy.  Passing UINT32_MAX
+ *   means all data (after offset).
+ * @param timestamp
+ *   The timestamp in TSC cycles.
+ * @param direction
+ *   The direction of the packer: receive, transmit or unknown.
+ *
+ * @return
+ *   - The pointer to the new mbuf formatted for pcapng_write
+ *   - NULL if allocation fails.
+ *
+ */
+__rte_experimental
+struct rte_mbuf *
+rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+		const struct rte_mbuf *m, struct rte_mempool *mp,
+		uint32_t length, uint64_t timestamp,
+		enum rte_pcapng_direction direction);
+
+
+/**
+ * Determine optimum mbuf data size.
+ *
+ * @param length
+ *   The largest packet that will be copied.
+ * @return
+ *   The minimum size of mbuf data to handle packet with length bytes.
+ *   Accounting for required header and trailer fields
+ */
+__rte_experimental
+uint32_t
+rte_pcapng_mbuf_size(uint32_t length);
+
+/**
+ * Write packets to the capture file.
+ *
+ * Packets to be captured are copied by rte_pcapng_copy()
+ * and then this function is called to write them to the file.
+ *
+ * @warning
+ * Do not pass original mbufs from transmit or receive
+ * or file will be invalid pcapng format.
+ *
+ * @param self
+ *  The handle to the packet capture file
+ * @param pkts
+ *  The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *  which contain the output packets
+ * @param nb_pkts
+ *  The number of packets to write to the file.
+ * @return
+ *  The number of bytes written to file, -1 on failure to write file.
+ *  The mbuf's in *pkts* are always freed.
+ */
+__rte_experimental
+ssize_t
+rte_pcapng_write_packets(rte_pcapng_t *self,
+			 struct rte_mbuf *pkts[], uint16_t nb_pkts);
+
+/**
+ * Write an Interface statistics block.
+ * For statistics, use 0 if don't know or care to report it.
+ * Should be called before closing capture to report results.
+ *
+ * @param self
+ *  The handle to the packet capture file
+ * @param port
+ *  The Ethernet port to report stats on.
+ * @param comment
+ *   Optional comment to add to statistics.
+ * @param start_time
+ *  The time when packet capture was started in nanoseconds.
+ *  Optional: can be zero if not known.
+ * @param end_time
+ *  The time when packet capture was stopped in nanoseconds.
+ *  Optional: can be zero if not finished;
+ * @param ifrecv
+ *  The number of packets received by capture.
+ *  Optional: use UINT64_MAX if not known.
+ * @param ifdrop
+ *  The number of packets missed by the capture process.
+ *  Optional: use UINT64_MAX if not known.
+ * @return
+ *  number of bytes written to file, -1 on failure to write file
+ */
+__rte_experimental
+ssize_t
+rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port,
+		       const char *comment,
+		       uint64_t start_time, uint64_t end_time,
+		       uint64_t ifrecv, uint64_t ifdrop);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PCAPNG_H_ */
diff --git a/lib/pcapng/version.map b/lib/pcapng/version.map
new file mode 100644
index 000000000000..05a9c86a7d91
--- /dev/null
+++ b/lib/pcapng/version.map
@@ -0,0 +1,12 @@ 
+EXPERIMENTAL {
+	global:
+
+	rte_pcapng_close;
+	rte_pcapng_copy;
+	rte_pcapng_fdopen;
+	rte_pcapng_mbuf_size;
+	rte_pcapng_write_packets;
+	rte_pcapng_write_stats;
+
+	local: *;
+};