[dpdk-dev,3/5] app/proc_info: add tcpdump support in secondary process

Message ID 1454073052-27025-4-git-send-email-reshma.pattan@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers

Commit Message

Pattan, Reshma Jan. 29, 2016, 1:10 p.m. UTC
Added "--tcupdump2 and "--src-ip-filter" command line options
for tcpdump support.
Added pcap device creation and writing of packets to pcap device
for tcpdump.
Added socket functionality to communicate with primary process.

Signed-off-by: Reshma Pattan <reshma.pattan@intel.com>
---
 app/proc_info/main.c |  454 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 451 insertions(+), 3 deletions(-)
  

Patch

diff --git a/app/proc_info/main.c b/app/proc_info/main.c
index 6448d7b..9be1a37 100644
--- a/app/proc_info/main.c
+++ b/app/proc_info/main.c
@@ -1,7 +1,7 @@ 
 /*
  *   BSD LICENSE
  *
- *   Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ *   Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
  *   All rights reserved.
  *
  *   Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,25 @@ 
 #include <stdarg.h>
 #include <inttypes.h>
 #include <sys/queue.h>
+#include <sys/socket.h>
 #include <stdlib.h>
 #include <getopt.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <arpa/inet.h>
+
+/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
+#ifdef __USE_MISC
+#define REMOVED_USE_MISC
+#undef __USE_MISC
+#endif
+#include <sys/un.h>
+/* make sure we redefine __USE_MISC only if it was previously undefined */
+#ifdef REMOVED_USE_MISC
+#define __USE_MISC
+#undef REMOVED_USE_MISC
+#endif
 
 #include <rte_eal.h>
 #include <rte_config.h>
@@ -58,11 +75,42 @@ 
 #include <rte_atomic.h>
 #include <rte_branch_prediction.h>
 #include <rte_string_fns.h>
+#include <rte_errno.h>
+
+#ifdef RTE_LIBRTE_PMD_PCAP
+#include <rte_eth_pcap.h>
+#endif
 
 /* Maximum long option length for option parsing. */
 #define MAX_LONG_OPT_SZ 64
 #define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+#define APP_ARG_TCPDUMP_MAX_TUPLES 54
+#define TCPDUMP_SOCKET_PATH "%s/tcpdump_mp_socket"
+#define CMSGLEN CMSG_LEN(sizeof(int))
+#define TX_DESC_PER_QUEUE 512
+#define RX_DESC_PER_QUEUE 128
+#define BURST_SIZE 32
+#define MBUF_PER_POOL 65535
+#define MBUF_POOL_CACHE_SIZE 250
+
+
+uint32_t src_ip_filter;
+
+int socket_fd =  -1;
 
+enum tcpdump_msg_type {
+	REMOVE_RXTX_CBS =  1,
+	REGISTER_RXTX_CBS = 2
+};
+
+enum rx_tx_type {
+	RX = 1,
+	TX = 2,
+	RX_TX_TYPES = 2
+};
+
+static struct rte_eth_conf port_conf_default;
+volatile uint8_t quit_signal;
 /**< mask of enabled ports */
 static uint32_t enabled_port_mask;
 /**< Enable stats. */
@@ -76,13 +124,46 @@  static uint32_t reset_xstats;
 /**< Enable memory info. */
 static uint32_t mem_info;
 
+bool is_tcpdump_enabled;
+static volatile struct tcpdump_app_stats {
+	struct {
+		uint64_t dequeue_pkts;
+		uint64_t tx_pkts;
+		uint64_t freed_pkts;
+	} in __rte_cache_aligned;
+	struct {
+		uint64_t dequeue_pkts;
+		uint64_t tx_pkts;
+		uint64_t freed_pkts;
+	} out __rte_cache_aligned;
+} tcpdump_app_stats __rte_cache_aligned;
+
+struct tcpdump_port_queue_tuples {
+	int num_pq_tuples;
+	uint8_t port_id[APP_ARG_TCPDUMP_MAX_TUPLES];
+	uint8_t queue_id[APP_ARG_TCPDUMP_MAX_TUPLES];
+} __rte_cache_aligned;
+
+int pcap_vdev_port_id[RX_TX_TYPES];
+
+static struct tcpdump_port_queue_tuples tcpdump_pq_t;
+
+struct output_buffer {
+	unsigned count;
+	struct rte_mbuf *mbufs[BURST_SIZE];
+};
+
 /**< display usage */
+
 static void
 proc_info_usage(const char *prgname)
 {
 	printf("%s [EAL options] -- -p PORTMASK\n"
 		"  -m to display DPDK memory zones, segments and TAILQ information\n"
 		"  -p PORTMASK: hexadecimal bitmask of ports to retrieve stats for\n"
+		"  --tcpdump (port,queue): port and queue info for capturing packets "
+			"for tcpdump\n"
+		"  --src-ip-filter \"A.B.C.D\": src ip for tcpdump filtering\n"
 		"  --stats: to display port statistics, enabled by default\n"
 		"  --xstats: to display extended port statistics, disabled by "
 			"default\n"
@@ -117,14 +198,79 @@  parse_portmask(const char *portmask)
 
 }
 
+static int
+parse_tcpdump(const char *q_arg)
+{
+	char s[256];
+	const char *p, *p0 = q_arg;
+	char *end;
+
+	enum fieldnames {
+		FLD_PORT = 0,
+		FLD_QUEUE,
+		_NUM_FLD
+	};
+
+	unsigned long int_fld[_NUM_FLD];
+	char *str_fld[_NUM_FLD];
+	int i;
+	unsigned size;
+	uint32_t nb_tcpdump_params;
+
+	nb_tcpdump_params = 0;
+
+	while ((p = strchr(p0, '(')) != NULL) {
+		++p;
+		p0 = strchr(p, ')');
+		if (p0 == NULL)
+			return -1;
+
+		size = p0 - p;
+		if (size >= sizeof(s))
+			return -1;
+
+		snprintf(s, sizeof(s), "%.*s", size, p);
+		if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+			return -1;
+		for (i = 0; i < _NUM_FLD; i++) {
+			errno = 0;
+			int_fld[i] = strtoul(str_fld[i], &end, 0);
+			if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+				return -1;
+		}
+		if (nb_tcpdump_params >= APP_ARG_TCPDUMP_MAX_TUPLES) {
+			printf("exceeded max number of port params: %"PRIu32"\n",
+					nb_tcpdump_params);
+			return -1;
+		}
+		tcpdump_pq_t.port_id[tcpdump_pq_t.num_pq_tuples] =
+							(uint8_t)int_fld[FLD_PORT];
+		tcpdump_pq_t.queue_id[tcpdump_pq_t.num_pq_tuples] =
+							(uint8_t)int_fld[FLD_QUEUE];
+		tcpdump_pq_t.num_pq_tuples++;
+	}
+	return 0;
+}
+
+static int
+parse_ip(const char *q_arg)
+{
+	if (!inet_pton(AF_INET, q_arg, &src_ip_filter))
+		return 1;
+
+	return 0;
+}
+
 /* Parse the argument given in the command line of the application */
 static int
 proc_info_parse_args(int argc, char **argv)
 {
-	int opt;
+	int opt, ret;
 	int option_index;
 	char *prgname = argv[0];
 	static struct option long_option[] = {
+		{"tcpdump", 1, 0, 0},
+		{"src-ip-filter", 1, 0, 0},
 		{"stats", 0, NULL, 0},
 		{"stats-reset", 0, NULL, 0},
 		{"xstats", 0, NULL, 0},
@@ -152,6 +298,27 @@  proc_info_parse_args(int argc, char **argv)
 			mem_info = 1;
 			break;
 		case 0:
+			if (!strncmp(long_option[option_index].name, "tcpdump",
+					MAX_LONG_OPT_SZ)) {
+				ret = parse_tcpdump(optarg);
+				if (ret) {
+					printf("invalid tcpdump\n");
+					proc_info_usage(prgname);
+					return -1;
+				}
+				is_tcpdump_enabled = true;
+			}
+
+			if (!strncmp(long_option[option_index].name, "src-ip-filter",
+					MAX_LONG_OPT_SZ)) {
+				ret = parse_ip(optarg);
+				if (ret) {
+					printf("invalid src-ip-filter\n");
+					proc_info_usage(prgname);
+					return -1;
+				}
+			}
+
 			/* Print stats */
 			if (!strncmp(long_option[option_index].name, "stats",
 					MAX_LONG_OPT_SZ))
@@ -286,6 +453,202 @@  nic_xstats_clear(uint8_t port_id)
 	printf("\n  NIC extended statistics for port %d cleared\n", port_id);
 }
 
+/* get socket path (/var/run if root, $HOME otherwise) */
+static void
+tcpdump_get_socket_path(char *buffer, int bufsz)
+{
+	const char *dir = "/var/run/tcpdump_socket";
+	const char *home_dir = getenv("HOME/tcpdump_socket");
+
+	if (getuid() != 0 && home_dir != NULL)
+		dir = home_dir;
+	/* use current prefix as file path */
+	snprintf(buffer, bufsz, TCPDUMP_SOCKET_PATH, dir);
+}
+
+static int
+tcpdump_connect_to_primary(void)
+{
+	struct sockaddr_un addr;
+	socklen_t sockaddr_len;
+
+	/* set up a socket */
+	socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+	if (socket_fd < 0) {
+		RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+		return -1;
+	}
+
+	tcpdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path));
+	addr.sun_family = AF_UNIX;
+
+	sockaddr_len = sizeof(struct sockaddr_un);
+
+	if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
+		return socket_fd;
+
+	/* if connect failed */
+	close(socket_fd);
+	return -1;
+}
+
+/* send a request, return -1 on error */
+static int
+tcpdump_send_request(int socket, enum tcpdump_msg_type type)
+{
+	char buffer[256];
+	struct msghdr reg_cb_msg;
+	struct iovec msg[3];
+	int ret, wc, buf, i, n = 0;
+
+	buf =  type;
+	for (i = 0; i < tcpdump_pq_t.num_pq_tuples; i++) {
+		wc = snprintf(buffer + n, sizeof(buffer) - n, "(%d,%d)",
+			tcpdump_pq_t.port_id[i], tcpdump_pq_t.queue_id[i]);
+		n += wc;
+	}
+
+	memset(msg, 0, sizeof(msg));
+	msg[0].iov_base = (char *) &buf;
+	msg[0].iov_len = 1;
+	msg[1].iov_base = (char *)buffer;
+	msg[1].iov_len = sizeof(buffer);
+	msg[2].iov_base = (char *) &src_ip_filter;
+	msg[2].iov_len = sizeof(src_ip_filter);
+
+	memset(&reg_cb_msg, 0, sizeof(reg_cb_msg));
+	reg_cb_msg.msg_iov =  msg;
+	reg_cb_msg.msg_iovlen = 3;
+
+	ret =  sendmsg(socket, &reg_cb_msg, 0);
+	if (ret < 0)
+		return -1;
+	return 0;
+}
+
+static void
+int_handler(int sig_num)
+{
+	/* connect to primary process using AF_UNIX socket */
+	socket_fd = tcpdump_connect_to_primary();
+	if (socket_fd < 0)
+		printf("cannot connect to primary process for RX/TX CBs removal!\n");
+
+	/* send request to remove rx/tx callbacks */
+	if (tcpdump_send_request(socket_fd, REMOVE_RXTX_CBS) < 0) {
+		printf("cannot send tcpdump remove rxtx cbs eequest!\n");
+		close(socket_fd);
+	}
+
+	/* close tcpdump socket fd */
+	close(socket_fd);
+	printf("Exiting on signal %d\n", sig_num);
+	quit_signal = 1;
+}
+
+static inline int
+configure_pcap_vdev(uint8_t port_id)
+{
+	struct ether_addr addr;
+	const uint16_t rxRings = 0, txRings = 1;
+	const uint8_t nb_ports = rte_eth_dev_count();
+	int ret;
+	uint16_t q;
+
+	if (port_id > nb_ports)
+		return -1;
+
+	ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default);
+	if (ret != 0)
+		return ret;
+
+	for (q = 0; q < txRings; q++) {
+		ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE,
+				rte_eth_dev_socket_id(port_id), NULL);
+		if (ret < 0) {
+			rte_exit(EXIT_FAILURE, "queue setup failed\n");
+			return ret;
+		}
+	}
+
+	ret = rte_eth_dev_start(port_id);
+	if (ret < 0)
+		return ret;
+
+	rte_eth_macaddr_get(port_id, &addr);
+	printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+			" %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+			(unsigned)port_id,
+			addr.addr_bytes[0], addr.addr_bytes[1],
+			addr.addr_bytes[2], addr.addr_bytes[3],
+			addr.addr_bytes[4], addr.addr_bytes[5]);
+
+	rte_eth_promiscuous_enable(port_id);
+
+	return 0;
+}
+
+static int
+create_pcap_pmd_vdev(enum rx_tx_type type) {
+	char pcap_vdev_name[32];
+	char pcap_filename[32];
+#ifdef RTE_LIBRTE_PMD_PCAP
+	struct rx_pcaps rxpcap;
+	struct tx_pcaps txpcap;
+#endif
+	int port_id;
+
+	if (type == RX) {
+		snprintf(pcap_vdev_name, sizeof(pcap_vdev_name),
+				"eth_pcap_tcpdump_%s", "RX");
+		snprintf(pcap_filename, sizeof(pcap_filename),
+				"/tmp/%s_pcap.pcap", "RX");
+	} else if (type == TX) {
+		snprintf(pcap_vdev_name, sizeof(pcap_vdev_name),
+				"eth_pcap_tcpdump_%s", "TX");
+		snprintf(pcap_filename, sizeof(pcap_filename),
+				"/tmp/%s_pcap.pcap", "TX");
+	}
+
+#ifdef RTE_LIBRTE_PMD_PCAP
+	rxpcap.names[0] = "";
+	rxpcap.types[0] = "";
+	rxpcap.num_of_rx = 0;
+	txpcap.names[0] = pcap_filename;
+	txpcap.types[0] =  "tx_pcap";
+	txpcap.num_of_tx = 1;
+
+	port_id  = rte_eth_from_pcapsndumpers(pcap_vdev_name,
+				&rxpcap, rxpcap.num_of_rx,
+				&txpcap, txpcap.num_of_tx, rte_socket_id());
+#else
+	port_id = -1;
+#endif
+	if (port_id < 0)
+		rte_exit(EXIT_FAILURE, "Failed to create pcap_vdev\n");
+
+	return port_id;
+}
+
+static void
+print_tcpdump_stats(void)
+{
+	printf("##### TCPDUMP DEBUG STATS #####\n");
+	printf(" - Input packets dequeued:		%"PRIu64"\n",
+						tcpdump_app_stats.in.dequeue_pkts);
+	printf(" - Input packets transmitted to pcap:	%"PRIu64"\n",
+						tcpdump_app_stats.in.tx_pkts);
+	printf(" - Input packets freed:			%"PRIu64"\n",
+						tcpdump_app_stats.in.freed_pkts);
+	printf(" - Output packets dequeued:		%"PRIu64"\n",
+						tcpdump_app_stats.out.dequeue_pkts);
+	printf(" - Output packets transmitted to pcap:	%"PRIu64"\n",
+						tcpdump_app_stats.out.tx_pkts);
+	printf(" - Output packets freed:		%"PRIu64"\n",
+						tcpdump_app_stats.out.freed_pkts);
+	printf("################################\n");
+}
+
 int
 main(int argc, char **argv)
 {
@@ -296,6 +659,11 @@  main(int argc, char **argv)
 	char mp_flag[] = "--proc-type=secondary";
 	char *argp[argc + 3];
 	uint8_t nb_ports;
+	struct rte_ring *rx_ring, *tx_ring;
+	int socket_fd;
+
+	/* catch ctrl-c so we can print on exit */
+	signal(SIGINT, int_handler);
 
 	argp[0] = argv[0];
 	argp[1] = c_flag;
@@ -328,7 +696,6 @@  main(int argc, char **argv)
 	if (nb_ports == 0)
 		rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
 
-
 	if (nb_ports > RTE_MAX_ETHPORTS)
 		nb_ports = RTE_MAX_ETHPORTS;
 
@@ -349,5 +716,86 @@  main(int argc, char **argv)
 		}
 	}
 
+	if (is_tcpdump_enabled == true) {
+
+		/* create pcap virtual devices for rx and tx */
+		pcap_vdev_port_id[0] = create_pcap_pmd_vdev(RX);
+		configure_pcap_vdev(pcap_vdev_port_id[0]);
+
+		pcap_vdev_port_id[1] = create_pcap_pmd_vdev(TX);
+		configure_pcap_vdev(pcap_vdev_port_id[1]);
+
+		/* connect to primary process using AF_UNIX socket */
+		socket_fd = tcpdump_connect_to_primary();
+		if (socket_fd < 0) {
+			printf("cannot connect to primary process!\n");
+			return -1;
+		}
+
+		if (tcpdump_send_request(socket_fd, REGISTER_RXTX_CBS) < 0) {
+			printf("cannot send tcpdump register rxtx cbs request!\n");
+			close(socket_fd);
+			return -1;
+		}
+
+		while (1) {
+			rx_ring  =  rte_ring_lookup("prim_to_sec_rx");
+			tx_ring = rte_ring_lookup("prim_to_sec_tx");
+			if (rx_ring != NULL && tx_ring !=  NULL)
+				break;
+		}
+
+		while (!quit_signal) {
+			/* write input packets of port to pcap file for tcpdump */
+			struct rte_mbuf *rx_bufs[BURST_SIZE];
+
+			/* first dequeue packets from ring of primary process */
+			const uint16_t nb_in_deq = rte_ring_dequeue_burst(rx_ring,
+					(void *)rx_bufs, BURST_SIZE);
+			tcpdump_app_stats.in.dequeue_pkts += nb_in_deq;
+
+			if (nb_in_deq) {
+				/* then sent on pcap file */
+				uint16_t nb_in_txd = rte_eth_tx_burst(
+						pcap_vdev_port_id[0],
+						0, rx_bufs, nb_in_deq);
+				tcpdump_app_stats.in.tx_pkts += nb_in_txd;
+
+				if (unlikely(nb_in_txd < nb_in_deq)) {
+					do {
+						rte_pktmbuf_free(rx_bufs[nb_in_txd]);
+						tcpdump_app_stats.in.freed_pkts++;
+					} while (++nb_in_txd < nb_in_deq);
+				}
+
+			}
+
+			/* write output packets of port to pcap file for tcpdump */
+			struct rte_mbuf *tx_bufs[BURST_SIZE];
+
+			/* first dequeue from ring of primary process */
+			const uint16_t nb_out_deq = rte_ring_dequeue_burst(tx_ring,
+							(void *)tx_bufs, BURST_SIZE);
+			tcpdump_app_stats.out.dequeue_pkts += nb_out_deq;
+
+			if (nb_out_deq) {
+				/* then sent on pcap file */
+				uint16_t nb_out_txd = rte_eth_tx_burst(
+						pcap_vdev_port_id[1],
+						0, tx_bufs, nb_out_deq);
+				tcpdump_app_stats.out.tx_pkts += nb_out_txd;
+				if (unlikely(nb_out_txd < nb_out_deq)) {
+					do {
+						rte_pktmbuf_free(tx_bufs[nb_out_txd]);
+						tcpdump_app_stats.out.freed_pkts++;
+					} while (++nb_out_txd < nb_out_deq);
+
+				}
+			}
+		}
+
+		print_tcpdump_stats();
+
+	}
 	return 0;
 }