[v6,01/10] librte_pcapng: add new library for writing pcapng files

Message ID 20210909233329.190021-2-stephen@networkplumber.org (mailing list archive)
State Superseded, archived
Headers
Series Packet capture framework enhancements |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Stephen Hemminger Sept. 9, 2021, 11:33 p.m. UTC
  This is utility library for writing pcapng format files
used by Wireshark family of utilities. Older tcpdump
also knows how to read (but not write) this format.

See draft RFC
  https://www.ietf.org/id/draft-tuexen-opsawg-pcapng-03.html
and
  https://github.com/pcapng/pcapng/

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 lib/meson.build           |   1 +
 lib/pcapng/meson.build    |   8 +
 lib/pcapng/pcapng_proto.h | 129 +++++++++
 lib/pcapng/rte_pcapng.c   | 574 ++++++++++++++++++++++++++++++++++++++
 lib/pcapng/rte_pcapng.h   | 194 +++++++++++++
 lib/pcapng/version.map    |  12 +
 6 files changed, 918 insertions(+)
 create mode 100644 lib/pcapng/meson.build
 create mode 100644 lib/pcapng/pcapng_proto.h
 create mode 100644 lib/pcapng/rte_pcapng.c
 create mode 100644 lib/pcapng/rte_pcapng.h
 create mode 100644 lib/pcapng/version.map
  

Patch

diff --git a/lib/meson.build b/lib/meson.build
index 1673ca4323c0..51bf9c2d11f0 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -41,6 +41,7 @@  libraries = [
         'latencystats',
         'lpm',
         'member',
+        'pcapng',
         'power',
         'pdump',
         'rawdev',
diff --git a/lib/pcapng/meson.build b/lib/pcapng/meson.build
new file mode 100644
index 000000000000..fe636bdf3c0b
--- /dev/null
+++ b/lib/pcapng/meson.build
@@ -0,0 +1,8 @@ 
+# SPDX-License-Identifier: BSD-3-Clause
+# Copyright(c) 2019 Microsoft Corporation
+
+version = 1
+sources = files('rte_pcapng.c')
+headers = files('rte_pcapng.h')
+
+deps += ['ethdev']
diff --git a/lib/pcapng/pcapng_proto.h b/lib/pcapng/pcapng_proto.h
new file mode 100644
index 000000000000..47161d8a1213
--- /dev/null
+++ b/lib/pcapng/pcapng_proto.h
@@ -0,0 +1,129 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019-2020 Microsoft Corporation
+ *
+ * PCAP Next Generation Capture File writer
+ *
+ * See: https://github.com/pcapng/pcapng/ for the file format.
+ */
+
+enum pcapng_block_types {
+	PCAPNG_INTERFACE_BLOCK		= 1,
+	PCAPNG_PACKET_BLOCK,		/* Obsolete */
+	PCAPNG_SIMPLE_PACKET_BLOCK,
+	PCAPNG_NAME_RESOLUTION_BLOCK,
+	PCAPNG_INTERFACE_STATS_BLOCK,
+	PCAPNG_ENHANCED_PACKET_BLOCK,
+
+	PCAPNG_SECTION_BLOCK		= 0x0A0D0D0A,
+};
+
+struct pcapng_option {
+	uint16_t code;
+	uint16_t length;
+	uint8_t data[];
+};
+
+#define PCAPNG_BYTE_ORDER_MAGIC 0x1A2B3C4D
+#define PCAPNG_MAJOR_VERS 1
+#define PCAPNG_MINOR_VERS 0
+
+enum pcapng_opt {
+	PCAPNG_OPT_END	= 0,
+	PCAPNG_OPT_COMMENT = 1,
+};
+
+struct pcapng_section_header {
+	uint32_t block_type;
+	uint32_t block_length;
+	uint32_t byte_order_magic;
+	uint16_t major_version;
+	uint16_t minor_version;
+	uint64_t section_length;
+};
+
+enum pcapng_section_opt {
+	PCAPNG_SHB_HARDWARE = 2,
+	PCAPNG_SHB_OS	    = 3,
+	PCAPNG_SHB_USERAPPL = 4,
+};
+
+struct pcapng_interface_block {
+	uint32_t block_type;	/* 1 */
+	uint32_t block_length;
+	uint16_t link_type;
+	uint16_t reserved;
+	uint32_t snap_len;
+};
+
+enum pcapng_interface_options {
+	PCAPNG_IFB_NAME	 = 2,
+	PCAPNG_IFB_DESCRIPTION,
+	PCAPNG_IFB_IPV4ADDR,
+	PCAPNG_IFB_IPV6ADDR,
+	PCAPNG_IFB_MACADDR,
+	PCAPNG_IFB_EUIADDR,
+	PCAPNG_IFB_SPEED,
+	PCAPNG_IFB_TSRESOL,
+	PCAPNG_IFB_TZONE,
+	PCAPNG_IFB_FILTER,
+	PCAPNG_IFB_OS,
+	PCAPNG_IFB_FCSLEN,
+	PCAPNG_IFB_TSOFFSET,
+	PCAPNG_IFB_HARDWARE,
+};
+
+struct pcapng_enhance_packet_block {
+	uint32_t block_type;	/* 6 */
+	uint32_t block_length;
+	uint32_t interface_id;
+	uint32_t timestamp_hi;
+	uint32_t timestamp_lo;
+	uint32_t capture_length;
+	uint32_t original_length;
+};
+
+/* Flags values */
+#define PCAPNG_IFB_INBOUND   0b01
+#define PCAPNG_IFB_OUTBOUND  0b10
+
+enum pcapng_epb_options {
+	PCAPNG_EPB_FLAGS = 2,
+	PCAPNG_EPB_HASH,
+	PCAPNG_EPB_DROPCOUNT,
+	PCAPNG_EPB_PACKETID,
+	PCAPNG_EPB_QUEUE,
+	PCAPNG_EPB_VERDICT,
+};
+
+enum pcapng_epb_hash {
+	PCAPNG_HASH_2COMP = 0,
+	PCAPNG_HASH_XOR,
+	PCAPNG_HASH_CRC32,
+	PCAPNG_HASH_MD5,
+	PCAPNG_HASH_SHA1,
+	PCAPNG_HASH_TOEPLITZ,
+};
+
+struct pcapng_simple_packet {
+	uint32_t block_type;	/* 3 */
+	uint32_t block_length;
+	uint32_t packet_length;
+};
+
+struct pcapng_statistics {
+	uint32_t block_type;	/* 5 */
+	uint32_t block_length;
+	uint32_t interface_id;
+	uint32_t timestamp_hi;
+	uint32_t timestamp_lo;
+};
+
+enum pcapng_isb_options {
+	PCAPNG_ISB_STARTTIME = 2,
+	PCAPNG_ISB_ENDTIME,
+	PCAPNG_ISB_IFRECV,
+	PCAPNG_ISB_IFDROP,
+	PCAPNG_ISB_FILTERACCEPT,
+	PCAPNG_ISB_OSDROP,
+	PCAPNG_ISB_USRDELIV,
+};
diff --git a/lib/pcapng/rte_pcapng.c b/lib/pcapng/rte_pcapng.c
new file mode 100644
index 000000000000..7bde8d9b75fd
--- /dev/null
+++ b/lib/pcapng/rte_pcapng.c
@@ -0,0 +1,574 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Microsoft Corporation
+ */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_dev.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_mbuf.h>
+#include <rte_pcapng.h>
+#include <rte_time.h>
+
+#include "pcapng_proto.h"
+
+/* conversion from DPDK speed to PCAPNG */
+#define PCAPNG_MBPS_SPEED 1000000ull
+
+/* Format of the capture file handle */
+struct rte_pcapng {
+	int  outfd;		/* output file */
+	/* DPDK port id to interface index in file */
+	uint32_t port_index[RTE_MAX_ETHPORTS];
+};
+
+/* For converting TSC cycles to PCAPNG ns format */
+struct pcapng_time {
+	uint64_t ns;
+	uint64_t cycles;
+} pcapng_time;
+
+RTE_INIT(pcapng_init)
+{
+	struct timespec ts;
+
+	pcapng_time.cycles = rte_get_tsc_cycles();
+	clock_gettime(CLOCK_REALTIME, &ts);
+	pcapng_time.ns = rte_timespec_to_ns(&ts);
+}
+
+/* PCAPNG timestamps are in nanoseconds */
+static uint64_t pcapng_tsc_to_ns(uint64_t cycles)
+{
+	uint64_t delta;
+
+	delta = cycles - pcapng_time.cycles;
+	return pcapng_time.ns + (delta * NSEC_PER_SEC) / rte_get_tsc_hz();
+}
+
+/* length of option including padding */
+static uint16_t pcapng_optlen(uint16_t len)
+{
+	return RTE_ALIGN(sizeof(struct pcapng_option) + len,
+			 sizeof(uint32_t));
+}
+
+/* build TLV option and return location of next */
+static struct pcapng_option *
+pcapng_add_option(struct pcapng_option *popt, uint16_t code,
+		  const void *data, uint16_t len)
+{
+	popt->code = code;
+	popt->length = len;
+	memcpy(popt->data, data, len);
+
+	return (struct pcapng_option *)((uint8_t *)popt + pcapng_optlen(len));
+}
+
+/*
+ * Write required initial section header describing the capture
+ */
+static int
+pcapng_section_block(rte_pcapng_t *self,
+		    const char *os, const char *hw,
+		    const char *app, const char *comment)
+{
+	struct pcapng_section_header *hdr;
+	struct pcapng_option *opt;
+	void *buf;
+	uint32_t len;
+	ssize_t cc;
+
+	len = sizeof(*hdr);
+	if (hw)
+		len += pcapng_optlen(strlen(hw));
+	if (os)
+		len += pcapng_optlen(strlen(os));
+	if (app)
+		len += pcapng_optlen(strlen(app));
+	if (comment)
+		len += pcapng_optlen(strlen(comment));
+
+	len += pcapng_optlen(0);
+	len += sizeof(uint32_t);
+
+	buf = calloc(1, len);
+	if (!buf)
+		return -1;
+
+	hdr = (struct pcapng_section_header *)buf;
+	*hdr = (struct pcapng_section_header) {
+		.block_type = PCAPNG_SECTION_BLOCK,
+		.block_length = len,
+		.byte_order_magic = PCAPNG_BYTE_ORDER_MAGIC,
+		.major_version = PCAPNG_MAJOR_VERS,
+		.minor_version = PCAPNG_MINOR_VERS,
+		.section_length = UINT64_MAX,
+	};
+	hdr->block_length = len;
+
+	opt = (struct pcapng_option *)(hdr + 1);
+	if (comment)
+		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+					comment, strlen(comment));
+	if (hw)
+		opt = pcapng_add_option(opt, PCAPNG_SHB_HARDWARE,
+					hw, strlen(hw));
+	if (os)
+		opt = pcapng_add_option(opt, PCAPNG_SHB_OS,
+					os, strlen(os));
+	if (app)
+		opt = pcapng_add_option(opt, PCAPNG_SHB_USERAPPL,
+					app, strlen(app));
+
+	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+	/* clone block_length after option */
+	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
+
+	cc = write(self->outfd, buf, len);
+	free(buf);
+
+	return cc;
+}
+
+/* Write the PCAPNG section header at start of file */
+static ssize_t
+pcapng_interface_block(rte_pcapng_t *self, const char *if_name,
+		      uint64_t if_speed, const uint8_t *mac_addr,
+		      const char *if_hw, const char *comment)
+{
+	struct pcapng_interface_block *hdr;
+	struct pcapng_option *opt;
+	const uint8_t tsresol = 9;	/* nanosecond resolution */
+	uint32_t len = sizeof(*hdr);
+	ssize_t cc;
+	void *buf;
+
+	len += pcapng_optlen(sizeof(tsresol));
+	if (if_name)
+		len += pcapng_optlen(strlen(if_name));
+	if (mac_addr)
+		len += pcapng_optlen(6);
+	if (if_speed)
+		len += pcapng_optlen(sizeof(uint64_t));
+	if (if_hw)
+		len += pcapng_optlen(strlen(if_hw));
+	if (comment)
+		len += pcapng_optlen(strlen(comment));
+
+	len += pcapng_optlen(0);
+	len += sizeof(uint32_t);
+	buf = calloc(1, len);
+	if (!buf)
+		return -ENOMEM;
+
+	hdr = (struct pcapng_interface_block *)buf;
+	hdr->block_type = PCAPNG_INTERFACE_BLOCK;
+	hdr->link_type = 1;	/* Ethernet */
+	hdr->block_length = len;
+
+	opt = (struct pcapng_option *)(hdr + 1);
+	if (if_name)
+		opt = pcapng_add_option(opt, PCAPNG_IFB_NAME,
+					 if_name, strlen(if_name));
+	if (mac_addr)
+		opt = pcapng_add_option(opt, PCAPNG_IFB_MACADDR,
+					mac_addr, RTE_ETHER_ADDR_LEN);
+	if (if_speed)
+		opt = pcapng_add_option(opt, PCAPNG_IFB_SPEED,
+					 &if_speed, sizeof(uint64_t));
+	opt = pcapng_add_option(opt, PCAPNG_IFB_TSRESOL,
+				&tsresol, sizeof(tsresol));
+	if (if_hw)
+		opt = pcapng_add_option(opt, PCAPNG_IFB_HARDWARE,
+					 if_hw, strlen(if_hw));
+	if (comment)
+		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+					comment, strlen(comment));
+
+	opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+	memcpy(opt, &hdr->block_length, sizeof(uint32_t));
+	cc = write(self->outfd, buf, len);
+	free(buf);
+
+	return cc;
+}
+
+static int
+pcapng_add_interface(rte_pcapng_t *self, uint16_t port)
+{
+	struct rte_eth_dev_info dev_info;
+	struct rte_ether_addr macaddr;
+	const struct rte_device *dev;
+	struct rte_eth_link link;
+	char ifname[IF_NAMESIZE];
+	char ifhw[256];
+	uint64_t speed = 0;
+
+	if (rte_eth_dev_info_get(port, &dev_info) < 0)
+		return -1;
+
+	/* make something like an interface name */
+	if (if_indextoname(dev_info.if_index, ifname) == NULL)
+		snprintf(ifname, IF_NAMESIZE, "dpdk:%u", port);
+
+	/* make a useful device hardware string */
+	dev = dev_info.device;
+	if (dev)
+		snprintf(ifhw, sizeof(ifhw),
+			 "%s-%s", dev->bus->name, dev->name);
+
+	/* DPDK reports in units of Mbps */
+	rte_eth_link_get(port, &link);
+	if (link.link_status == ETH_LINK_UP)
+		speed = link.link_speed * PCAPNG_MBPS_SPEED;
+
+	rte_eth_macaddr_get(port, &macaddr);
+
+	return pcapng_interface_block(self, ifname, speed,
+				      macaddr.addr_bytes,
+				      dev ? ifhw : NULL, NULL);
+}
+
+/*
+ * Write the list of possible interfaces at the start
+ * of the file.
+ */
+static int
+pcapng_interfaces(rte_pcapng_t *self)
+{
+	uint16_t port_id;
+	uint16_t index = 0;
+
+	RTE_ETH_FOREACH_DEV(port_id) {
+		/* The list if ports in pcapng needs to be contiguous */
+		self->port_index[port_id] = index++;
+		if (pcapng_add_interface(self, port_id) < 0)
+			return -1;
+	}
+	return 0;
+}
+
+/*
+ * Write an Interface statistics block at the end of capture.
+ */
+ssize_t
+rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port_id,
+		       const char *comment,
+		       uint64_t start_time, uint64_t end_time,
+		       uint64_t ifrecv, uint64_t ifdrop)
+{
+	struct pcapng_statistics *hdr;
+	struct pcapng_option *opt;
+	uint32_t optlen, len;
+	uint8_t *buf;
+	uint64_t ns;
+
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+
+	optlen = 0;
+
+	if (ifrecv != UINT64_MAX)
+		optlen += pcapng_optlen(sizeof(ifrecv));
+	if (ifdrop != UINT64_MAX)
+		optlen += pcapng_optlen(sizeof(ifdrop));
+	if (start_time != 0)
+		optlen += pcapng_optlen(sizeof(start_time));
+	if (end_time != 0)
+		optlen += pcapng_optlen(sizeof(end_time));
+	if (comment)
+		optlen += pcapng_optlen(strlen(comment));
+	if (optlen != 0)
+		optlen += pcapng_optlen(0);
+
+	len = sizeof(*hdr) + optlen + sizeof(uint32_t);
+	buf = alloca(len);
+	if (buf == NULL)
+		return -1;
+
+	hdr = (struct pcapng_statistics *)buf;
+	opt = (struct pcapng_option *)(hdr + 1);
+
+	if (comment)
+		opt = pcapng_add_option(opt, PCAPNG_OPT_COMMENT,
+					comment, strlen(comment));
+	if (start_time != 0)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_STARTTIME,
+					 &start_time, sizeof(start_time));
+	if (end_time != 0)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_ENDTIME,
+					 &end_time, sizeof(end_time));
+	if (ifrecv != UINT64_MAX)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_IFRECV,
+				&ifrecv, sizeof(ifrecv));
+	if (ifdrop != UINT64_MAX)
+		opt = pcapng_add_option(opt, PCAPNG_ISB_IFDROP,
+				&ifdrop, sizeof(ifdrop));
+	if (optlen != 0)
+		opt = pcapng_add_option(opt, PCAPNG_OPT_END, NULL, 0);
+
+	hdr->block_type = PCAPNG_INTERFACE_STATS_BLOCK;
+	hdr->block_length = len;
+	hdr->interface_id = self->port_index[port_id];
+
+	ns = pcapng_tsc_to_ns(rte_get_tsc_cycles());
+	hdr->timestamp_hi = ns >> 32;
+	hdr->timestamp_lo = (uint32_t)ns;
+
+	/* clone block_length after option */
+	memcpy(opt, &len, sizeof(uint32_t));
+
+	return write(self->outfd, buf, len);
+}
+
+uint32_t
+rte_pcapng_mbuf_size(uint32_t length)
+{
+	/* The VLAN and EPB header must fit in the mbuf headroom. */
+	RTE_ASSERT(sizeof(struct pcapng_enhance_packet_block) +
+		   sizeof(struct rte_vlan_hdr) <= RTE_PKTMBUF_HEADROOM);
+
+	/* The flags and queue information are added at the end. */
+	return sizeof(struct rte_mbuf)
+		+ RTE_ALIGN(length, sizeof(uint32_t))
+		+ pcapng_optlen(sizeof(uint32_t)) /* flag option */
+		+ pcapng_optlen(sizeof(uint32_t)) /* queue option */
+		+ sizeof(uint32_t);		  /*  length */
+}
+
+/*
+ *   The mbufs created use the Pcapng standard enhanced packet  block.
+ *
+ *                         1                   2                   3
+ *     0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  0 |                    Block Type = 0x00000006                    |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  4 |                      Block Total Length                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  8 |                         Interface ID                          |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 12 |                        Timestamp (High)                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 16 |                        Timestamp (Low)                        |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 20 |                    Captured Packet Length                     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 24 |                    Original Packet Length                     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * 28 /                                                               /
+ *    /                          Packet Data                          /
+ *    /              variable length, padded to 32 bits               /
+ *    /                                                               /
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |      Option Code = 0x0002     |     Option Length = 0x004     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |              Flags (direction)                                |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |      Option Code = 0x0006     |     Option Length = 0x002     |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |              Queue id                                         |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *    |                      Block Total Length                       |
+ *    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+/* Make a copy of original mbuf with pcapng header and options */
+struct rte_mbuf *
+rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+		const struct rte_mbuf *md,
+		struct rte_mempool *mp,
+		uint32_t length, uint64_t cycles,
+		enum rte_pcapng_direction direction)
+{
+	struct pcapng_enhance_packet_block *epb;
+	uint32_t orig_len, data_len, padding, flags;
+	struct pcapng_option *opt;
+	const uint16_t optlen = pcapng_optlen(sizeof(flags)) + pcapng_optlen(sizeof(queue));
+	struct rte_mbuf *mc;
+	uint64_t ns;
+
+#ifdef RTE_LIBRTE_ETHDEV_DEBUG
+	RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL);
+#endif
+	ns = pcapng_tsc_to_ns(cycles);
+
+	orig_len = rte_pktmbuf_pkt_len(md);
+
+	/* Take snapshot of the data */
+	mc = rte_pktmbuf_copy(md, mp, 0, length);
+	if (unlikely(mc == NULL))
+		return NULL;
+
+	/* If packet had offloaded VLAN, expand it */
+	if (md->ol_flags & ~(PKT_RX_VLAN_STRIPPED | PKT_TX_VLAN)) {
+		if (rte_vlan_insert(&mc) != 0)
+			goto fail;
+
+		orig_len += sizeof(struct rte_vlan_hdr);
+	}
+
+	/* pad the packet to 32 bit boundary */
+	data_len = rte_pktmbuf_data_len(mc);
+	padding = RTE_ALIGN(data_len, sizeof(uint32_t)) - data_len;
+	if (padding > 0) {
+		void *tail = rte_pktmbuf_append(mc, padding);
+
+		if (tail == NULL)
+			goto fail;
+		memset(tail, 0, padding);
+	}
+
+	/* reserve trailing options and block length */
+	opt = (struct pcapng_option *)
+		rte_pktmbuf_append(mc, optlen + sizeof(uint32_t));
+	if (unlikely(opt == NULL))
+		goto fail;
+
+	switch (direction) {
+	case RTE_PCAPNG_DIRECTION_IN:
+		flags = PCAPNG_IFB_INBOUND;
+		break;
+	case RTE_PCAPNG_DIRECTION_OUT:
+		flags = PCAPNG_IFB_OUTBOUND;
+		break;
+	default:
+		flags = 0;
+	}
+
+	opt = pcapng_add_option(opt, PCAPNG_EPB_FLAGS,
+				&flags, sizeof(flags));
+
+	opt = pcapng_add_option(opt, PCAPNG_EPB_QUEUE,
+				&queue, sizeof(queue));
+
+	/* Add PCAPNG packet header */
+	epb = (struct pcapng_enhance_packet_block *)
+		rte_pktmbuf_prepend(mc, sizeof(*epb));
+	if (unlikely(epb == NULL))
+		goto fail;
+
+	epb->block_type = PCAPNG_ENHANCED_PACKET_BLOCK;
+	epb->block_length = rte_pktmbuf_data_len(mc);
+
+	/* Interface index is filled in later during write */
+	mc->port = port_id;
+
+	epb->timestamp_hi = ns >> 32;
+	epb->timestamp_lo = (uint32_t)ns;
+	epb->capture_length = data_len;
+	epb->original_length = orig_len;
+
+	/* set trailer of block length */
+	*(uint32_t *)opt = epb->block_length;
+
+	return mc;
+
+fail:
+	rte_pktmbuf_free(mc);
+	return NULL;
+}
+
+/* Count how many segments are in this array of mbufs */
+static unsigned int
+mbuf_burst_segs(struct rte_mbuf *pkts[], unsigned int n)
+{
+	unsigned int i, iovcnt;
+
+	for (iovcnt = 0, i = 0; i < n; i++) {
+		const struct rte_mbuf *m = pkts[i];
+
+		__rte_mbuf_sanity_check(m, 1);
+
+		iovcnt += m->nb_segs;
+	}
+	return iovcnt;
+}
+
+/* Write pre-formatted packets to file. */
+ssize_t
+rte_pcapng_write_packets(rte_pcapng_t *self,
+			 struct rte_mbuf *pkts[], uint16_t nb_pkts)
+{
+	int iovcnt = mbuf_burst_segs(pkts, nb_pkts);
+	struct iovec iov[iovcnt];
+	unsigned int i, cnt;
+	ssize_t ret;
+
+	for (i = cnt = 0; i < nb_pkts; i++) {
+		struct rte_mbuf *m = pkts[i];
+		struct pcapng_enhance_packet_block *epb;
+
+		/* sanity check that is really a pcapng mbuf */
+		epb = rte_pktmbuf_mtod(m, struct pcapng_enhance_packet_block *);
+		if (unlikely(epb->block_type != PCAPNG_ENHANCED_PACKET_BLOCK ||
+			     epb->block_length != rte_pktmbuf_data_len(m))) {
+			rte_errno = EINVAL;
+			return -1;
+		}
+
+		/*
+		 * The DPDK port is recorded during pcapng_copy.
+		 * Map that to PCAPNG interface in file.
+		 */
+		epb->interface_id = self->port_index[m->port];
+		do {
+			iov[cnt].iov_base = rte_pktmbuf_mtod(m, void *);
+			iov[cnt].iov_len = rte_pktmbuf_data_len(m);
+			++cnt;
+		} while ((m = m->next));
+	}
+
+	ret = writev(self->outfd, iov, iovcnt);
+	if (unlikely(ret < 0))
+		rte_errno = errno;
+	return ret;
+}
+
+/* Create new pcapng writer handle */
+rte_pcapng_t *
+rte_pcapng_fdopen(int fd,
+		  const char *osname, const char *hardware,
+		  const char *appname, const char *comment)
+{
+	rte_pcapng_t *self;
+
+	self = malloc(sizeof(*self));
+	if (!self) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	self->outfd = fd;
+
+	if (pcapng_section_block(self, osname, hardware, appname, comment) < 0)
+		goto fail;
+
+	if (pcapng_interfaces(self) < 0)
+		goto fail;
+
+	return self;
+fail:
+	free(self);
+	return NULL;
+}
+
+void
+rte_pcapng_close(rte_pcapng_t *self)
+{
+	close(self->outfd);
+	free(self);
+}
diff --git a/lib/pcapng/rte_pcapng.h b/lib/pcapng/rte_pcapng.h
new file mode 100644
index 000000000000..2f1bb073df08
--- /dev/null
+++ b/lib/pcapng/rte_pcapng.h
@@ -0,0 +1,194 @@ 
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Microsoft Corporation
+ */
+
+/**
+ * @file
+ * RTE pcapng
+ *
+ * @warning
+ * @b EXPERIMENTAL:
+ * All functions in this file may be changed or removed without prior notice.
+ *
+ * Pcapng is an evolution from the pcap format, created to address some of
+ * its deficiencies. Namely, the lack of extensibility and inability to store
+ * additional information.
+ *
+ * For details about the file format see RFC:
+ *   https://www.ietf.org/id/draft-tuexen-opsawg-pcapng-03.html
+ *  and
+ *    https://github.com/pcapng/pcapng/
+ */
+
+#ifndef _RTE_PCAPNG_H_
+#define _RTE_PCAPNG_H_
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <rte_compat.h>
+#include <rte_common.h>
+#include <rte_mempool.h>
+#include <rte_ring.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Opaque handle used for functions in this library. */
+typedef struct rte_pcapng rte_pcapng_t;
+
+/**
+ * Write data to existing open file
+ *
+ * @param fd
+ *   file descriptor
+ * @param osname
+ *   Optional description of the operating system.
+ *   Examples: "Debian 11", "Windows Server 22"
+ * @param hardware
+ *   Optional description of the hardware used to create this file.
+ *   Examples: "x86 Virtual Machine"
+ * @param appname
+ *   Optional: application name recorded in the pcapng file.
+ *   Example: "dpdk-dumpcap 1.0 (DPDK 20.11)"
+ * @param comment
+ *   Optional comment to add to file header.
+ * @return
+ *   handle to library, or NULL in case of error (and rte_errno is set).
+ */
+__rte_experimental
+rte_pcapng_t *
+rte_pcapng_fdopen(int fd,
+		  const char *osname, const char *hardware,
+		  const char *appname, const char *comment);
+
+/**
+ * Close capture file
+ *
+ * @param self
+ *  handle to library
+ */
+__rte_experimental
+void
+rte_pcapng_close(rte_pcapng_t *self);
+
+/**
+ * Direction flag
+ * These should match Enhanced Packet Block flag bits
+ */
+enum rte_pcapng_direction {
+	RTE_PCAPNG_DIRECTION_UNKNOWN = 0,
+	RTE_PCAPNG_DIRECTION_IN  = 1,
+	RTE_PCAPNG_DIRECTION_OUT = 2,
+};
+
+/**
+ * Format an mbuf for writing to file.
+ *
+ * @param port_id
+ *   The Ethernet port on which packet was received
+ *   or is going to be transmitted.
+ * @param queue
+ *   The queue on the Ethernet port where packet was received
+ *   or is going to be transmitted.
+ * @param mp
+ *   The mempool from which the "clone" mbufs are allocated.
+ * @param m
+ *   The mbuf to copy
+ * @param length
+ *   The upper limit on bytes to copy.  Passing UINT32_MAX
+ *   means all data (after offset).
+ * @param timestamp
+ *   The timestamp in TSC cycles.
+ * @param direction
+ *   The direction of the packer: receive, transmit or unknown.
+ *
+ * @return
+ *   - The pointer to the new mbuf formatted for pcapng_write
+ *   - NULL if allocation fails.
+ *
+ */
+__rte_experimental
+struct rte_mbuf *
+rte_pcapng_copy(uint16_t port_id, uint32_t queue,
+		const struct rte_mbuf *m, struct rte_mempool *mp,
+		uint32_t length, uint64_t timestamp,
+		enum rte_pcapng_direction direction);
+
+
+/**
+ * Determine optimum mbuf data size.
+ *
+ * @param length
+ *   The upper limit on bytes to copy.  Passing UINT32_MAX
+ *   means all data (after offset).
+ * @return
+ *   The minimum size of mbuf data to handle packet with length bytes.
+ *   Accounting for required header and trailer fields
+ */
+__rte_experimental
+uint32_t
+rte_pcapng_mbuf_size(uint32_t length);
+
+/**
+ * Write packets to the capture file.
+ *
+ * Packets to be captured are copied by rte_pcapng_mbuf()
+ * and then this function is called to write them to the file.
+ * @warning
+ * Do not pass original mbufs
+ *
+ * @param self
+ *  The handle to the packet capture file
+ * @param pkts
+ *  The address of an array of *nb_pkts* pointers to *rte_mbuf* structures
+ *  which contain the output packets
+ * @param nb_pkts
+ *  The number of packets to write to the file.
+ * @return
+ *  The number of bytes written to file, -1 on failure to write file.
+ *  The mbuf's in *pkts* are always freed.
+ */
+__rte_experimental
+ssize_t
+rte_pcapng_write_packets(rte_pcapng_t *self,
+			 struct rte_mbuf *pkts[], uint16_t nb_pkts);
+
+/**
+ * Write an Interface statistics block.
+ * For statistics, use 0 if don't know or care to report it.
+ * Should be called before closing capture to report results.
+ *
+ * @param self
+ *  The handle to the packet capture file
+ * @param port
+ *  The Ethernet port to report stats on.
+ * @param comment
+ *   Optional comment to add to statistics.
+ * @param start_time
+ *  The time when packet capture was started in nanoseconds.
+ *  Optional: can be zero if not known.
+ * @param end_time
+ *  The time when packet capture was stopped in nanoseconds.
+ *  Optional: can be zero if not finished;
+ * @param ifrecv
+ *  The number of packets received by capture.
+ *  Optional: use UINT64_MAX if not known.
+ * @param ifdrop
+ *  The number of packets missed by the capture process.
+ *  Optional: use UINT64_MAX if not known.
+ * @return
+ *  number of bytes written to file, -1 on failure to write file
+ */
+__rte_experimental
+ssize_t
+rte_pcapng_write_stats(rte_pcapng_t *self, uint16_t port,
+		       const char *comment,
+		       uint64_t start_time, uint64_t end_time,
+		       uint64_t ifrecv, uint64_t ifdrop);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_PCAPNG_H_ */
diff --git a/lib/pcapng/version.map b/lib/pcapng/version.map
new file mode 100644
index 000000000000..05a9c86a7d91
--- /dev/null
+++ b/lib/pcapng/version.map
@@ -0,0 +1,12 @@ 
+EXPERIMENTAL {
+	global:
+
+	rte_pcapng_close;
+	rte_pcapng_copy;
+	rte_pcapng_fdopen;
+	rte_pcapng_mbuf_size;
+	rte_pcapng_write_packets;
+	rte_pcapng_write_stats;
+
+	local: *;
+};