@@ -1,7 +1,7 @@
/*
* BSD LICENSE
*
- * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -38,8 +38,25 @@
#include <stdarg.h>
#include <inttypes.h>
#include <sys/queue.h>
+#include <sys/socket.h>
#include <stdlib.h>
#include <getopt.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <arpa/inet.h>
+
+/* sys/un.h with __USE_MISC uses strlen, which is unsafe */
+#ifdef __USE_MISC
+#define REMOVED_USE_MISC
+#undef __USE_MISC
+#endif
+#include <sys/un.h>
+/* make sure we redefine __USE_MISC only if it was previously undefined */
+#ifdef REMOVED_USE_MISC
+#define __USE_MISC
+#undef REMOVED_USE_MISC
+#endif
#include <rte_eal.h>
#include <rte_config.h>
@@ -58,11 +75,42 @@
#include <rte_atomic.h>
#include <rte_branch_prediction.h>
#include <rte_string_fns.h>
+#include <rte_errno.h>
+
+#ifdef RTE_LIBRTE_PMD_PCAP
+#include <rte_eth_pcap.h>
+#endif
/* Maximum long option length for option parsing. */
#define MAX_LONG_OPT_SZ 64
#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+#define APP_ARG_TCPDUMP_MAX_TUPLES 54
+#define TCPDUMP_SOCKET_PATH "%s/tcpdump_mp_socket"
+#define CMSGLEN CMSG_LEN(sizeof(int))
+#define TX_DESC_PER_QUEUE 512
+#define RX_DESC_PER_QUEUE 128
+#define BURST_SIZE 32
+#define MBUF_PER_POOL 65535
+#define MBUF_POOL_CACHE_SIZE 250
+
+
+uint32_t src_ip_filter;
+
+int socket_fd = -1;
+enum tcpdump_msg_type {
+ REMOVE_RXTX_CBS = 1,
+ REGISTER_RXTX_CBS = 2
+};
+
+enum rx_tx_type {
+ RX = 1,
+ TX = 2,
+ RX_TX_TYPES = 2
+};
+
+static struct rte_eth_conf port_conf_default;
+volatile uint8_t quit_signal;
/**< mask of enabled ports */
static uint32_t enabled_port_mask;
/**< Enable stats. */
@@ -76,13 +124,46 @@ static uint32_t reset_xstats;
/**< Enable memory info. */
static uint32_t mem_info;
+bool is_tcpdump_enabled;
+static volatile struct tcpdump_app_stats {
+ struct {
+ uint64_t dequeue_pkts;
+ uint64_t tx_pkts;
+ uint64_t freed_pkts;
+ } in __rte_cache_aligned;
+ struct {
+ uint64_t dequeue_pkts;
+ uint64_t tx_pkts;
+ uint64_t freed_pkts;
+ } out __rte_cache_aligned;
+} tcpdump_app_stats __rte_cache_aligned;
+
+struct tcpdump_port_queue_tuples {
+ int num_pq_tuples;
+ uint8_t port_id[APP_ARG_TCPDUMP_MAX_TUPLES];
+ uint8_t queue_id[APP_ARG_TCPDUMP_MAX_TUPLES];
+} __rte_cache_aligned;
+
+int pcap_vdev_port_id[RX_TX_TYPES];
+
+static struct tcpdump_port_queue_tuples tcpdump_pq_t;
+
+struct output_buffer {
+ unsigned count;
+ struct rte_mbuf *mbufs[BURST_SIZE];
+};
+
/**< display usage */
+
static void
proc_info_usage(const char *prgname)
{
printf("%s [EAL options] -- -p PORTMASK\n"
" -m to display DPDK memory zones, segments and TAILQ information\n"
" -p PORTMASK: hexadecimal bitmask of ports to retrieve stats for\n"
+ " --tcpdump (port,queue): port and queue info for capturing packets "
+ "for tcpdump\n"
+ " --src-ip-filter \"A.B.C.D\": src ip for tcpdump filtering\n"
" --stats: to display port statistics, enabled by default\n"
" --xstats: to display extended port statistics, disabled by "
"default\n"
@@ -117,14 +198,79 @@ parse_portmask(const char *portmask)
}
+static int
+parse_tcpdump(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ _NUM_FLD
+ };
+
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+ uint32_t nb_tcpdump_params;
+
+ nb_tcpdump_params = 0;
+
+ while ((p = strchr(p0, '(')) != NULL) {
+ ++p;
+ p0 = strchr(p, ')');
+ if (p0 == NULL)
+ return -1;
+
+ size = p0 - p;
+ if (size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_tcpdump_params >= APP_ARG_TCPDUMP_MAX_TUPLES) {
+ printf("exceeded max number of port params: %"PRIu32"\n",
+ nb_tcpdump_params);
+ return -1;
+ }
+ tcpdump_pq_t.port_id[tcpdump_pq_t.num_pq_tuples] =
+ (uint8_t)int_fld[FLD_PORT];
+ tcpdump_pq_t.queue_id[tcpdump_pq_t.num_pq_tuples] =
+ (uint8_t)int_fld[FLD_QUEUE];
+ tcpdump_pq_t.num_pq_tuples++;
+ }
+ return 0;
+}
+
+static int
+parse_ip(const char *q_arg)
+{
+ if (!inet_pton(AF_INET, q_arg, &src_ip_filter))
+ return 1;
+
+ return 0;
+}
+
/* Parse the argument given in the command line of the application */
static int
proc_info_parse_args(int argc, char **argv)
{
- int opt;
+ int opt, ret;
int option_index;
char *prgname = argv[0];
static struct option long_option[] = {
+ {"tcpdump", 1, 0, 0},
+ {"src-ip-filter", 1, 0, 0},
{"stats", 0, NULL, 0},
{"stats-reset", 0, NULL, 0},
{"xstats", 0, NULL, 0},
@@ -152,6 +298,27 @@ proc_info_parse_args(int argc, char **argv)
mem_info = 1;
break;
case 0:
+ if (!strncmp(long_option[option_index].name, "tcpdump",
+ MAX_LONG_OPT_SZ)) {
+ ret = parse_tcpdump(optarg);
+ if (ret) {
+ printf("invalid tcpdump\n");
+ proc_info_usage(prgname);
+ return -1;
+ }
+ is_tcpdump_enabled = true;
+ }
+
+ if (!strncmp(long_option[option_index].name, "src-ip-filter",
+ MAX_LONG_OPT_SZ)) {
+ ret = parse_ip(optarg);
+ if (ret) {
+ printf("invalid src-ip-filter\n");
+ proc_info_usage(prgname);
+ return -1;
+ }
+ }
+
/* Print stats */
if (!strncmp(long_option[option_index].name, "stats",
MAX_LONG_OPT_SZ))
@@ -286,6 +453,202 @@ nic_xstats_clear(uint8_t port_id)
printf("\n NIC extended statistics for port %d cleared\n", port_id);
}
+/* get socket path (/var/run if root, $HOME otherwise) */
+static void
+tcpdump_get_socket_path(char *buffer, int bufsz)
+{
+ const char *dir = "/var/run/tcpdump_socket";
+ const char *home_dir = getenv("HOME/tcpdump_socket");
+
+ if (getuid() != 0 && home_dir != NULL)
+ dir = home_dir;
+ /* use current prefix as file path */
+ snprintf(buffer, bufsz, TCPDUMP_SOCKET_PATH, dir);
+}
+
+static int
+tcpdump_connect_to_primary(void)
+{
+ struct sockaddr_un addr;
+ socklen_t sockaddr_len;
+
+ /* set up a socket */
+ socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0);
+ if (socket_fd < 0) {
+ RTE_LOG(ERR, EAL, "Failed to create socket!\n");
+ return -1;
+ }
+
+ tcpdump_get_socket_path(addr.sun_path, sizeof(addr.sun_path));
+ addr.sun_family = AF_UNIX;
+
+ sockaddr_len = sizeof(struct sockaddr_un);
+
+ if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0)
+ return socket_fd;
+
+ /* if connect failed */
+ close(socket_fd);
+ return -1;
+}
+
+/* send a request, return -1 on error */
+static int
+tcpdump_send_request(int socket, enum tcpdump_msg_type type)
+{
+ char buffer[256];
+ struct msghdr reg_cb_msg;
+ struct iovec msg[3];
+ int ret, wc, buf, i, n = 0;
+
+ buf = type;
+ for (i = 0; i < tcpdump_pq_t.num_pq_tuples; i++) {
+ wc = snprintf(buffer + n, sizeof(buffer) - n, "(%d,%d)",
+ tcpdump_pq_t.port_id[i], tcpdump_pq_t.queue_id[i]);
+ n += wc;
+ }
+
+ memset(msg, 0, sizeof(msg));
+ msg[0].iov_base = (char *) &buf;
+ msg[0].iov_len = 1;
+ msg[1].iov_base = (char *)buffer;
+ msg[1].iov_len = sizeof(buffer);
+ msg[2].iov_base = (char *) &src_ip_filter;
+ msg[2].iov_len = sizeof(src_ip_filter);
+
+ memset(®_cb_msg, 0, sizeof(reg_cb_msg));
+ reg_cb_msg.msg_iov = msg;
+ reg_cb_msg.msg_iovlen = 3;
+
+ ret = sendmsg(socket, ®_cb_msg, 0);
+ if (ret < 0)
+ return -1;
+ return 0;
+}
+
+static void
+int_handler(int sig_num)
+{
+ /* connect to primary process using AF_UNIX socket */
+ socket_fd = tcpdump_connect_to_primary();
+ if (socket_fd < 0)
+ printf("cannot connect to primary process for RX/TX CBs removal!\n");
+
+ /* send request to remove rx/tx callbacks */
+ if (tcpdump_send_request(socket_fd, REMOVE_RXTX_CBS) < 0) {
+ printf("cannot send tcpdump remove rxtx cbs eequest!\n");
+ close(socket_fd);
+ }
+
+ /* close tcpdump socket fd */
+ close(socket_fd);
+ printf("Exiting on signal %d\n", sig_num);
+ quit_signal = 1;
+}
+
+static inline int
+configure_pcap_vdev(uint8_t port_id)
+{
+ struct ether_addr addr;
+ const uint16_t rxRings = 0, txRings = 1;
+ const uint8_t nb_ports = rte_eth_dev_count();
+ int ret;
+ uint16_t q;
+
+ if (port_id > nb_ports)
+ return -1;
+
+ ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default);
+ if (ret != 0)
+ return ret;
+
+ for (q = 0; q < txRings; q++) {
+ ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE,
+ rte_eth_dev_socket_id(port_id), NULL);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "queue setup failed\n");
+ return ret;
+ }
+ }
+
+ ret = rte_eth_dev_start(port_id);
+ if (ret < 0)
+ return ret;
+
+ rte_eth_macaddr_get(port_id, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port_id,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port_id);
+
+ return 0;
+}
+
+static int
+create_pcap_pmd_vdev(enum rx_tx_type type) {
+ char pcap_vdev_name[32];
+ char pcap_filename[32];
+#ifdef RTE_LIBRTE_PMD_PCAP
+ struct rx_pcaps rxpcap;
+ struct tx_pcaps txpcap;
+#endif
+ int port_id;
+
+ if (type == RX) {
+ snprintf(pcap_vdev_name, sizeof(pcap_vdev_name),
+ "eth_pcap_tcpdump_%s", "RX");
+ snprintf(pcap_filename, sizeof(pcap_filename),
+ "/tmp/%s_pcap.pcap", "RX");
+ } else if (type == TX) {
+ snprintf(pcap_vdev_name, sizeof(pcap_vdev_name),
+ "eth_pcap_tcpdump_%s", "TX");
+ snprintf(pcap_filename, sizeof(pcap_filename),
+ "/tmp/%s_pcap.pcap", "TX");
+ }
+
+#ifdef RTE_LIBRTE_PMD_PCAP
+ rxpcap.names[0] = "";
+ rxpcap.types[0] = "";
+ rxpcap.num_of_rx = 0;
+ txpcap.names[0] = pcap_filename;
+ txpcap.types[0] = "tx_pcap";
+ txpcap.num_of_tx = 1;
+
+ port_id = rte_eth_from_pcapsndumpers(pcap_vdev_name,
+ &rxpcap, rxpcap.num_of_rx,
+ &txpcap, txpcap.num_of_tx, rte_socket_id());
+#else
+ port_id = -1;
+#endif
+ if (port_id < 0)
+ rte_exit(EXIT_FAILURE, "Failed to create pcap_vdev\n");
+
+ return port_id;
+}
+
+static void
+print_tcpdump_stats(void)
+{
+ printf("##### TCPDUMP DEBUG STATS #####\n");
+ printf(" - Input packets dequeued: %"PRIu64"\n",
+ tcpdump_app_stats.in.dequeue_pkts);
+ printf(" - Input packets transmitted to pcap: %"PRIu64"\n",
+ tcpdump_app_stats.in.tx_pkts);
+ printf(" - Input packets freed: %"PRIu64"\n",
+ tcpdump_app_stats.in.freed_pkts);
+ printf(" - Output packets dequeued: %"PRIu64"\n",
+ tcpdump_app_stats.out.dequeue_pkts);
+ printf(" - Output packets transmitted to pcap: %"PRIu64"\n",
+ tcpdump_app_stats.out.tx_pkts);
+ printf(" - Output packets freed: %"PRIu64"\n",
+ tcpdump_app_stats.out.freed_pkts);
+ printf("################################\n");
+}
+
int
main(int argc, char **argv)
{
@@ -296,6 +659,11 @@ main(int argc, char **argv)
char mp_flag[] = "--proc-type=secondary";
char *argp[argc + 3];
uint8_t nb_ports;
+ struct rte_ring *rx_ring, *tx_ring;
+ int socket_fd;
+
+ /* catch ctrl-c so we can print on exit */
+ signal(SIGINT, int_handler);
argp[0] = argv[0];
argp[1] = c_flag;
@@ -328,7 +696,6 @@ main(int argc, char **argv)
if (nb_ports == 0)
rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
-
if (nb_ports > RTE_MAX_ETHPORTS)
nb_ports = RTE_MAX_ETHPORTS;
@@ -349,5 +716,86 @@ main(int argc, char **argv)
}
}
+ if (is_tcpdump_enabled == true) {
+
+ /* create pcap virtual devices for rx and tx */
+ pcap_vdev_port_id[0] = create_pcap_pmd_vdev(RX);
+ configure_pcap_vdev(pcap_vdev_port_id[0]);
+
+ pcap_vdev_port_id[1] = create_pcap_pmd_vdev(TX);
+ configure_pcap_vdev(pcap_vdev_port_id[1]);
+
+ /* connect to primary process using AF_UNIX socket */
+ socket_fd = tcpdump_connect_to_primary();
+ if (socket_fd < 0) {
+ printf("cannot connect to primary process!\n");
+ return -1;
+ }
+
+ if (tcpdump_send_request(socket_fd, REGISTER_RXTX_CBS) < 0) {
+ printf("cannot send tcpdump register rxtx cbs request!\n");
+ close(socket_fd);
+ return -1;
+ }
+
+ while (1) {
+ rx_ring = rte_ring_lookup("prim_to_sec_rx");
+ tx_ring = rte_ring_lookup("prim_to_sec_tx");
+ if (rx_ring != NULL && tx_ring != NULL)
+ break;
+ }
+
+ while (!quit_signal) {
+ /* write input packets of port to pcap file for tcpdump */
+ struct rte_mbuf *rx_bufs[BURST_SIZE];
+
+ /* first dequeue packets from ring of primary process */
+ const uint16_t nb_in_deq = rte_ring_dequeue_burst(rx_ring,
+ (void *)rx_bufs, BURST_SIZE);
+ tcpdump_app_stats.in.dequeue_pkts += nb_in_deq;
+
+ if (nb_in_deq) {
+ /* then sent on pcap file */
+ uint16_t nb_in_txd = rte_eth_tx_burst(
+ pcap_vdev_port_id[0],
+ 0, rx_bufs, nb_in_deq);
+ tcpdump_app_stats.in.tx_pkts += nb_in_txd;
+
+ if (unlikely(nb_in_txd < nb_in_deq)) {
+ do {
+ rte_pktmbuf_free(rx_bufs[nb_in_txd]);
+ tcpdump_app_stats.in.freed_pkts++;
+ } while (++nb_in_txd < nb_in_deq);
+ }
+
+ }
+
+ /* write output packets of port to pcap file for tcpdump */
+ struct rte_mbuf *tx_bufs[BURST_SIZE];
+
+ /* first dequeue from ring of primary process */
+ const uint16_t nb_out_deq = rte_ring_dequeue_burst(tx_ring,
+ (void *)tx_bufs, BURST_SIZE);
+ tcpdump_app_stats.out.dequeue_pkts += nb_out_deq;
+
+ if (nb_out_deq) {
+ /* then sent on pcap file */
+ uint16_t nb_out_txd = rte_eth_tx_burst(
+ pcap_vdev_port_id[1],
+ 0, tx_bufs, nb_out_deq);
+ tcpdump_app_stats.out.tx_pkts += nb_out_txd;
+ if (unlikely(nb_out_txd < nb_out_deq)) {
+ do {
+ rte_pktmbuf_free(tx_bufs[nb_out_txd]);
+ tcpdump_app_stats.out.freed_pkts++;
+ } while (++nb_out_txd < nb_out_deq);
+
+ }
+ }
+ }
+
+ print_tcpdump_stats();
+
+ }
return 0;
}