[v15,02/11] net/tap: do not duplicate fd's

Message ID 20240521201410.126116-3-stephen@networkplumber.org (mailing list archive)
State Accepted
Delegated to: Ferruh Yigit
Headers
Series net/tap: make RSS work again |

Checks

Context Check Description
ci/checkpatch success coding style OK

Commit Message

Stephen Hemminger May 21, 2024, 8:12 p.m. UTC
  The TAP device can use same file descriptor for both rx and tx queues
which reduces the number of fd's required.

MP process support passes file descriptors from primary
to secondary process; but because of the restriction on
max fd's passed RTE_MP_MAX_FD_NUM (8) the TAP device was restricted
to only 4 queues if using secondary.
This allows up to 8 queues (versus 4).

The restriction on max fd's should be changed in eal in
future, but it will break ABI compatibility.
The max Linux supports which is SCM_MAX_FD (253).

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
---
 doc/guides/rel_notes/release_24_07.rst |   4 +
 drivers/net/tap/rte_eth_tap.c          | 192 ++++++++++---------------
 drivers/net/tap/rte_eth_tap.h          |   3 +-
 drivers/net/tap/tap_flow.c             |   3 +-
 drivers/net/tap/tap_intr.c             |   7 +-
 5 files changed, 89 insertions(+), 120 deletions(-)
  

Patch

diff --git a/doc/guides/rel_notes/release_24_07.rst b/doc/guides/rel_notes/release_24_07.rst
index a69f24cf99..a6295359b1 100644
--- a/doc/guides/rel_notes/release_24_07.rst
+++ b/doc/guides/rel_notes/release_24_07.rst
@@ -55,6 +55,10 @@  New Features
      Also, make sure to start the actual text at the margin.
      =======================================================
 
+* **Update Tap PMD driver.**
+
+  * Updated to support up to 8 queues when used by secondary process.
+
 
 Removed Items
 -------------
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 69d9da695b..b84fc01856 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -124,8 +124,7 @@  enum ioctl_mode {
 /* Message header to synchronize queues via IPC */
 struct ipc_queues {
 	char port_name[RTE_DEV_NAME_MAX_LEN];
-	int rxq_count;
-	int txq_count;
+	int q_count;
 	/*
 	 * The file descriptors are in the dedicated part
 	 * of the Unix message to be translated by the kernel.
@@ -446,7 +445,7 @@  pmd_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
 		uint16_t data_off = rte_pktmbuf_headroom(mbuf);
 		int len;
 
-		len = readv(process_private->rxq_fds[rxq->queue_id],
+		len = readv(process_private->fds[rxq->queue_id],
 			*rxq->iovecs,
 			1 + (rxq->rxmode->offloads & RTE_ETH_RX_OFFLOAD_SCATTER ?
 			     rxq->nb_rx_desc : 1));
@@ -643,7 +642,7 @@  tap_write_mbufs(struct tx_queue *txq, uint16_t num_mbufs,
 		}
 
 		/* copy the tx frame data */
-		n = writev(process_private->txq_fds[txq->queue_id], iovecs, k);
+		n = writev(process_private->fds[txq->queue_id], iovecs, k);
 		if (n <= 0)
 			return -1;
 
@@ -851,7 +850,6 @@  tap_mp_req_on_rxtx(struct rte_eth_dev *dev)
 	struct rte_mp_msg msg;
 	struct ipc_queues *request_param = (struct ipc_queues *)msg.param;
 	int err;
-	int fd_iterator = 0;
 	struct pmd_process_private *process_private = dev->process_private;
 	int i;
 
@@ -859,16 +857,13 @@  tap_mp_req_on_rxtx(struct rte_eth_dev *dev)
 	strlcpy(msg.name, TAP_MP_REQ_START_RXTX, sizeof(msg.name));
 	strlcpy(request_param->port_name, dev->data->name, sizeof(request_param->port_name));
 	msg.len_param = sizeof(*request_param);
-	for (i = 0; i < dev->data->nb_tx_queues; i++) {
-		msg.fds[fd_iterator++] = process_private->txq_fds[i];
-		msg.num_fds++;
-		request_param->txq_count++;
-	}
-	for (i = 0; i < dev->data->nb_rx_queues; i++) {
-		msg.fds[fd_iterator++] = process_private->rxq_fds[i];
-		msg.num_fds++;
-		request_param->rxq_count++;
-	}
+
+	/* rx and tx share file descriptors and nb_tx_queues == nb_rx_queues */
+	for (i = 0; i < dev->data->nb_rx_queues; i++)
+		msg.fds[i] = process_private->fds[i];
+
+	request_param->q_count = dev->data->nb_rx_queues;
+	msg.num_fds = dev->data->nb_rx_queues;
 
 	err = rte_mp_sendmsg(&msg);
 	if (err < 0) {
@@ -910,8 +905,6 @@  tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void
 	struct rte_eth_dev *dev;
 	const struct ipc_queues *request_param =
 		(const struct ipc_queues *)request->param;
-	int fd_iterator;
-	int queue;
 	struct pmd_process_private *process_private;
 
 	dev = rte_eth_dev_get_by_name(request_param->port_name);
@@ -920,14 +913,13 @@  tap_mp_req_start_rxtx(const struct rte_mp_msg *request, __rte_unused const void
 			request_param->port_name);
 		return -1;
 	}
+
 	process_private = dev->process_private;
-	fd_iterator = 0;
-	TAP_LOG(DEBUG, "tap_attach rx_q:%d tx_q:%d\n", request_param->rxq_count,
-		request_param->txq_count);
-	for (queue = 0; queue < request_param->txq_count; queue++)
-		process_private->txq_fds[queue] = request->fds[fd_iterator++];
-	for (queue = 0; queue < request_param->rxq_count; queue++)
-		process_private->rxq_fds[queue] = request->fds[fd_iterator++];
+	TAP_LOG(DEBUG, "tap_attach q:%d\n", request_param->q_count);
+
+	for (int q = 0; q < request_param->q_count; q++)
+		process_private->fds[q] = request->fds[q];
+
 
 	return 0;
 }
@@ -1115,13 +1107,21 @@  tap_stats_reset(struct rte_eth_dev *dev)
 	return 0;
 }
 
+static void
+tap_queue_close(struct pmd_process_private *process_private, uint16_t qid)
+{
+	if (process_private->fds[qid] != -1) {
+		close(process_private->fds[qid]);
+		process_private->fds[qid] = -1;
+	}
+}
+
 static int
 tap_dev_close(struct rte_eth_dev *dev)
 {
 	int i;
 	struct pmd_internals *internals = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
-	struct rx_queue *rxq;
 
 	if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
 		rte_free(dev->process_private);
@@ -1141,19 +1141,14 @@  tap_dev_close(struct rte_eth_dev *dev)
 	}
 
 	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		if (process_private->rxq_fds[i] != -1) {
-			rxq = &internals->rxq[i];
-			close(process_private->rxq_fds[i]);
-			process_private->rxq_fds[i] = -1;
-			tap_rxq_pool_free(rxq->pool);
-			rte_free(rxq->iovecs);
-			rxq->pool = NULL;
-			rxq->iovecs = NULL;
-		}
-		if (process_private->txq_fds[i] != -1) {
-			close(process_private->txq_fds[i]);
-			process_private->txq_fds[i] = -1;
-		}
+		struct rx_queue *rxq = &internals->rxq[i];
+
+		tap_queue_close(process_private, i);
+
+		tap_rxq_pool_free(rxq->pool);
+		rte_free(rxq->iovecs);
+		rxq->pool = NULL;
+		rxq->iovecs = NULL;
 	}
 
 	if (internals->remote_if_index) {
@@ -1206,15 +1201,16 @@  tap_rx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 
 	if (!rxq)
 		return;
+
 	process_private = rte_eth_devices[rxq->in_port].process_private;
-	if (process_private->rxq_fds[rxq->queue_id] != -1) {
-		close(process_private->rxq_fds[rxq->queue_id]);
-		process_private->rxq_fds[rxq->queue_id] = -1;
-		tap_rxq_pool_free(rxq->pool);
-		rte_free(rxq->iovecs);
-		rxq->pool = NULL;
-		rxq->iovecs = NULL;
-	}
+
+	tap_rxq_pool_free(rxq->pool);
+	rte_free(rxq->iovecs);
+	rxq->pool = NULL;
+	rxq->iovecs = NULL;
+
+	if (dev->data->tx_queues[qid] == NULL)
+		tap_queue_close(process_private, qid);
 }
 
 static void
@@ -1225,12 +1221,10 @@  tap_tx_queue_release(struct rte_eth_dev *dev, uint16_t qid)
 
 	if (!txq)
 		return;
-	process_private = rte_eth_devices[txq->out_port].process_private;
 
-	if (process_private->txq_fds[txq->queue_id] != -1) {
-		close(process_private->txq_fds[txq->queue_id]);
-		process_private->txq_fds[txq->queue_id] = -1;
-	}
+	process_private = rte_eth_devices[txq->out_port].process_private;
+	if (dev->data->rx_queues[qid] == NULL)
+		tap_queue_close(process_private, qid);
 }
 
 static int
@@ -1482,52 +1476,31 @@  tap_setup_queue(struct rte_eth_dev *dev,
 		uint16_t qid,
 		int is_rx)
 {
-	int ret;
-	int *fd;
-	int *other_fd;
-	const char *dir;
+	int fd, ret;
 	struct pmd_internals *pmd = dev->data->dev_private;
 	struct pmd_process_private *process_private = dev->process_private;
 	struct rx_queue *rx = &internals->rxq[qid];
 	struct tx_queue *tx = &internals->txq[qid];
-	struct rte_gso_ctx *gso_ctx;
+	struct rte_gso_ctx *gso_ctx = is_rx ? NULL : &tx->gso_ctx;
+	const char *dir = is_rx ? "rx" : "tx";
 
-	if (is_rx) {
-		fd = &process_private->rxq_fds[qid];
-		other_fd = &process_private->txq_fds[qid];
-		dir = "rx";
-		gso_ctx = NULL;
-	} else {
-		fd = &process_private->txq_fds[qid];
-		other_fd = &process_private->rxq_fds[qid];
-		dir = "tx";
-		gso_ctx = &tx->gso_ctx;
-	}
-	if (*fd != -1) {
+	fd = process_private->fds[qid];
+	if (fd != -1) {
 		/* fd for this queue already exists */
 		TAP_LOG(DEBUG, "%s: fd %d for %s queue qid %d exists",
-			pmd->name, *fd, dir, qid);
+			pmd->name, fd, dir, qid);
 		gso_ctx = NULL;
-	} else if (*other_fd != -1) {
-		/* Only other_fd exists. dup it */
-		*fd = dup(*other_fd);
-		if (*fd < 0) {
-			*fd = -1;
-			TAP_LOG(ERR, "%s: dup() failed.", pmd->name);
-			return -1;
-		}
-		TAP_LOG(DEBUG, "%s: dup fd %d for %s queue qid %d (%d)",
-			pmd->name, *other_fd, dir, qid, *fd);
 	} else {
-		/* Both RX and TX fds do not exist (equal -1). Create fd */
-		*fd = tun_alloc(pmd, 0, 0);
-		if (*fd < 0) {
-			*fd = -1; /* restore original value */
+		fd = tun_alloc(pmd, 0, 0);
+		if (fd < 0) {
 			TAP_LOG(ERR, "%s: tun_alloc() failed.", pmd->name);
 			return -1;
 		}
+
 		TAP_LOG(DEBUG, "%s: add %s queue for qid %d fd %d",
-			pmd->name, dir, qid, *fd);
+			pmd->name, dir, qid, fd);
+
+		process_private->fds[qid] = fd;
 	}
 
 	tx->mtu = &dev->data->mtu;
@@ -1540,7 +1513,7 @@  tap_setup_queue(struct rte_eth_dev *dev,
 
 	tx->type = pmd->type;
 
-	return *fd;
+	return fd;
 }
 
 static int
@@ -1620,7 +1593,7 @@  tap_rx_queue_setup(struct rte_eth_dev *dev,
 
 	TAP_LOG(DEBUG, "  RX TUNTAP device name %s, qid %d on fd %d",
 		internals->name, rx_queue_id,
-		process_private->rxq_fds[rx_queue_id]);
+		process_private->fds[rx_queue_id]);
 
 	return 0;
 
@@ -1664,7 +1637,7 @@  tap_tx_queue_setup(struct rte_eth_dev *dev,
 	TAP_LOG(DEBUG,
 		"  TX TUNTAP device name %s, qid %d on fd %d csum %s",
 		internals->name, tx_queue_id,
-		process_private->txq_fds[tx_queue_id],
+		process_private->fds[tx_queue_id],
 		txq->csum ? "on" : "off");
 
 	return 0;
@@ -2001,10 +1974,9 @@  eth_dev_tap_create(struct rte_vdev_device *vdev, const char *tap_name,
 	dev->intr_handle = pmd->intr_handle;
 
 	/* Presetup the fds to -1 as being not valid */
-	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++) {
-		process_private->rxq_fds[i] = -1;
-		process_private->txq_fds[i] = -1;
-	}
+	for (i = 0; i < RTE_PMD_TAP_MAX_QUEUES; i++)
+		process_private->fds[i] = -1;
+
 
 	if (pmd->type == ETH_TUNTAP_TYPE_TAP) {
 		if (rte_is_zero_ether_addr(mac_addr))
@@ -2332,7 +2304,6 @@  tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
 	struct ipc_queues *request_param = (struct ipc_queues *)request.param;
 	struct ipc_queues *reply_param;
 	struct pmd_process_private *process_private = dev->process_private;
-	int queue, fd_iterator;
 
 	/* Prepare the request */
 	memset(&request, 0, sizeof(request));
@@ -2352,18 +2323,17 @@  tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
 	TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
 
 	/* Attach the queues from received file descriptors */
-	if (reply_param->rxq_count + reply_param->txq_count != reply->num_fds) {
+	if (reply_param->q_count != reply->num_fds) {
 		TAP_LOG(ERR, "Unexpected number of fds received");
 		return -1;
 	}
 
-	dev->data->nb_rx_queues = reply_param->rxq_count;
-	dev->data->nb_tx_queues = reply_param->txq_count;
-	fd_iterator = 0;
-	for (queue = 0; queue < reply_param->rxq_count; queue++)
-		process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
-	for (queue = 0; queue < reply_param->txq_count; queue++)
-		process_private->txq_fds[queue] = reply->fds[fd_iterator++];
+	dev->data->nb_rx_queues = reply_param->q_count;
+	dev->data->nb_tx_queues = reply_param->q_count;
+
+	for (int q = 0; q < reply_param->q_count; q++)
+		process_private->fds[q] = reply->fds[q];
+
 	free(reply);
 	return 0;
 }
@@ -2393,25 +2363,19 @@  tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
 
 	/* Fill file descriptors for all queues */
 	reply.num_fds = 0;
-	reply_param->rxq_count = 0;
-	if (dev->data->nb_rx_queues + dev->data->nb_tx_queues >
-			RTE_MP_MAX_FD_NUM){
-		TAP_LOG(ERR, "Number of rx/tx queues exceeds max number of fds");
+	reply_param->q_count = 0;
+
+	RTE_ASSERT(dev->data->nb_rx_queues == dev->data->nb_tx_queues);
+	if (dev->data->nb_rx_queues > RTE_MP_MAX_FD_NUM) {
+		TAP_LOG(ERR, "Number of rx/tx queues %u exceeds max number of fds %u",
+			dev->data->nb_rx_queues, RTE_MP_MAX_FD_NUM);
 		return -1;
 	}
 
 	for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
-		reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
-		reply_param->rxq_count++;
-	}
-	RTE_ASSERT(reply_param->rxq_count == dev->data->nb_rx_queues);
-
-	reply_param->txq_count = 0;
-	for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
-		reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
-		reply_param->txq_count++;
+		reply.fds[reply.num_fds++] = process_private->fds[queue];
+		reply_param->q_count++;
 	}
-	RTE_ASSERT(reply_param->txq_count == dev->data->nb_tx_queues);
 
 	/* Send reply */
 	strlcpy(reply.name, request->name, sizeof(reply.name));
diff --git a/drivers/net/tap/rte_eth_tap.h b/drivers/net/tap/rte_eth_tap.h
index 5ac93f93e9..dc8201020b 100644
--- a/drivers/net/tap/rte_eth_tap.h
+++ b/drivers/net/tap/rte_eth_tap.h
@@ -96,8 +96,7 @@  struct pmd_internals {
 };
 
 struct pmd_process_private {
-	int rxq_fds[RTE_PMD_TAP_MAX_QUEUES];
-	int txq_fds[RTE_PMD_TAP_MAX_QUEUES];
+	int fds[RTE_PMD_TAP_MAX_QUEUES];
 };
 
 /* tap_intr.c */
diff --git a/drivers/net/tap/tap_flow.c b/drivers/net/tap/tap_flow.c
index 79cd6a12ca..a78fd50cd4 100644
--- a/drivers/net/tap/tap_flow.c
+++ b/drivers/net/tap/tap_flow.c
@@ -1595,8 +1595,9 @@  tap_flow_isolate(struct rte_eth_dev *dev,
 	 * If netdevice is there, setup appropriate flow rules immediately.
 	 * Otherwise it will be set when bringing up the netdevice (tun_alloc).
 	 */
-	if (process_private->rxq_fds[0] == -1)
+	if (process_private->fds[0] == -1)
 		return 0;
+
 	if (set) {
 		struct rte_flow *remote_flow;
 
diff --git a/drivers/net/tap/tap_intr.c b/drivers/net/tap/tap_intr.c
index a9097def1a..1908f71f97 100644
--- a/drivers/net/tap/tap_intr.c
+++ b/drivers/net/tap/tap_intr.c
@@ -68,9 +68,11 @@  tap_rx_intr_vec_install(struct rte_eth_dev *dev)
 	}
 	for (i = 0; i < n; i++) {
 		struct rx_queue *rxq = pmd->dev->data->rx_queues[i];
+		int fd = process_private->fds[i];
 
 		/* Skip queues that cannot request interrupts. */
-		if (!rxq || process_private->rxq_fds[i] == -1) {
+		if (!rxq || fd == -1) {
+			/* Use invalid intr_vec[] index to disable entry. */
 			/* Use invalid intr_vec[] index to disable entry. */
 			if (rte_intr_vec_list_index_set(intr_handle, i,
 			RTE_INTR_VEC_RXTX_OFFSET + RTE_MAX_RXTX_INTR_VEC_ID))
@@ -80,8 +82,7 @@  tap_rx_intr_vec_install(struct rte_eth_dev *dev)
 		if (rte_intr_vec_list_index_set(intr_handle, i,
 					RTE_INTR_VEC_RXTX_OFFSET + count))
 			return -rte_errno;
-		if (rte_intr_efds_index_set(intr_handle, count,
-						   process_private->rxq_fds[i]))
+		if (rte_intr_efds_index_set(intr_handle, count, fd))
 			return -rte_errno;
 		count++;
 	}