[v3,2/2] net/tap: add queues when attaching from secondary process

Message ID 1538047196-13789-2-git-send-email-rasland@mellanox.com (mailing list archive)
State Superseded, archived
Delegated to: Ferruh Yigit
Headers
Series [v3,1/2] net/tap: change queue fd to be pointers to process private |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation success Compilation OK

Commit Message

Raslan Darawsheh Sept. 27, 2018, 11:19 a.m. UTC
  In the case the device is created by the primary process,
the secondary must request some file descriptors to attach the queues.
The file descriptors are shared via IPC Unix socket.

Thanks to the IPC synchronization, the secondary process
is now able to do Rx/Tx on a TAP created by the primary process.

Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>

---
v2:
   - translate file descriptors via IPC API
   - add documentation
v3:
   - rabse the commit
   - use private static array for fd's to be local for each process

---
---
 doc/guides/nics/tap.rst                |  16 ++++
 doc/guides/rel_notes/release_18_11.rst |   4 +
 drivers/net/tap/Makefile               |   1 +
 drivers/net/tap/rte_eth_tap.c          | 133 +++++++++++++++++++++++++++++++++
 4 files changed, 154 insertions(+)
  

Comments

Wiles, Keith Sept. 27, 2018, 1:04 p.m. UTC | #1
> On Sep 27, 2018, at 6:19 AM, Raslan Darawsheh <rasland@mellanox.com> wrote:
> 
> In the case the device is created by the primary process,
> the secondary must request some file descriptors to attach the queues.
> The file descriptors are shared via IPC Unix socket.
> 
> Thanks to the IPC synchronization, the secondary process
> is now able to do Rx/Tx on a TAP created by the primary process.
> 
> Signed-off-by: Raslan Darawsheh <rasland@mellanox.com>
> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
> 
> ---
> v2:
>   - translate file descriptors via IPC API
>   - add documentation
> v3:
>   - rabse the commit
>   - use private static array for fd's to be local for each process
> 
> ---
> ---
> doc/guides/nics/tap.rst                |  16 ++++
> doc/guides/rel_notes/release_18_11.rst |   4 +
> drivers/net/tap/Makefile               |   1 +
> drivers/net/tap/rte_eth_tap.c          | 133 +++++++++++++++++++++++++++++++++
> 4 files changed, 154 insertions(+)
> 
> diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
> index 2714868..d1f3e1c 100644
> --- a/doc/guides/nics/tap.rst
> +++ b/doc/guides/nics/tap.rst
> @@ -152,6 +152,22 @@ Distribute IPv4 TCP packets using RSS to a given MAC address over queues 0-3::
>    testpmd> flow create 0 priority 4 ingress pattern eth dst is 0a:0b:0c:0d:0e:0f \
>             / ipv4 / tcp / end actions rss queues 0 1 2 3 end / end
> 
> +Multi-process sharing
> +---------------------
> +
> +It is possible to attach an existing TAP device in a secondary process,
> +by declaring it as a vdev with the same name as in the primary process,
> +and without any parameter.
> +
> +The port attached in a secondary process will give access to the
> +statistics and the queues.
> +Therefore it can be used for monitoring or Rx/Tx processing.
> +
> +The IPC synchronization of Rx/Tx queues is currently limited:
> +
> +  - Only 8 queues
> +  - Synchronized on probing, but not on later port update
> +
> Example
> -------
> 
> diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
> index 8c4bb54..a9dda5b 100644
> --- a/doc/guides/rel_notes/release_18_11.rst
> +++ b/doc/guides/rel_notes/release_18_11.rst
> @@ -67,6 +67,10 @@ New Features
>   SR-IOV option in Hyper-V and Azure. This is an alternative to the previous
>   vdev_netvsc, tap, and failsafe drivers combination.
> 
> +* **Added TAP Rx/Tx queues sharing with a secondary process.**
> +
> +  A secondary process can attach a TAP device created in the primary process,
> +  probe the queues, and process Rx/Tx in a secondary process.
> 
> API Changes
> -----------
> diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
> index 3243365..7748283 100644
> --- a/drivers/net/tap/Makefile
> +++ b/drivers/net/tap/Makefile
> @@ -22,6 +22,7 @@ CFLAGS += -O3
> CFLAGS += -I$(SRCDIR)
> CFLAGS += -I.
> CFLAGS += $(WERROR_FLAGS)
> +CFLAGS += -DALLOW_EXPERIMENTAL_API
> LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
> LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
> LDLIBS += -lrte_bus_vdev -lrte_gso
> diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
> index 8cc4552..8d276eb 100644
> --- a/drivers/net/tap/rte_eth_tap.c
> +++ b/drivers/net/tap/rte_eth_tap.c
> @@ -16,6 +16,8 @@
> #include <rte_debug.h>
> #include <rte_ip.h>
> #include <rte_string_fns.h>
> +#include <rte_ethdev.h>
> +#include <rte_errno.h>
> 
> #include <assert.h>
> #include <sys/types.h>
> @@ -62,6 +64,9 @@
> #define TAP_GSO_MBUFS_NUM \
> 	(TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
> 
> +/* IPC key for queue fds sync */
> +#define TAP_MP_KEY "tap_mp_sync_queues"
> +
> static struct rte_vdev_driver pmd_tap_drv;
> static struct rte_vdev_driver pmd_tun_drv;
> static struct pmd_process_private *process_private;
> @@ -101,6 +106,17 @@ enum ioctl_mode {
> 	REMOTE_ONLY,
> };
> 
> +/* Message header to synchronize queues via IPC */
> +struct ipc_queues {
> +	char port_name[RTE_DEV_NAME_MAX_LEN];
> +	int rxq_count;
> +	int txq_count;
> +	/*
> +	 * The file descriptors are in the dedicated part
> +	 * of the Unix message to be translated by the kernel.
> +	 */
> +};
> +
> static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
> 
> /**
> @@ -1980,6 +1996,100 @@ rte_pmd_tun_probe(struct rte_vdev_device *dev)
> 	return ret;
> }
> 
> +/* Request queue file descriptors from secondary to primary. */
> +static int
> +tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
> +{
> +	int ret;
> +	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
> +	struct rte_mp_msg request, *reply;
> +	struct rte_mp_reply replies;
> +	struct ipc_queues *request_param = (struct ipc_queues *)request.param;
> +	struct ipc_queues *reply_param;
> +	int queue, fd_iterator;
> +
> +	/* Prepare the request */
> +	strcpy(request.name, TAP_MP_KEY);
> +	strcpy(request_param->port_name, port_name);

Should we not be using the strlcpy() functions here.
> +	request.len_param = sizeof(*request_param);
> +	/* Send request and receive reply */
> +	ret = rte_mp_request_sync(&request, &replies, &timeout);
> +	if (ret < 0) {
> +		TAP_LOG(ERR, "Failed to request queues from primary: %d",
> +			rte_errno);
> +		return -1;
> +	}
> +	/* FIXME: handle replies.nb_received > 1 */
> +	reply = &replies.msgs[0];
> +	reply_param = (struct ipc_queues *)reply->param;
> +	TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
> +
> +	/* Attach the queues from received file descriptors */
> +
> +	dev->data->nb_rx_queues = reply_param->rxq_count;
> +	dev->data->nb_tx_queues = reply_param->txq_count;

Do we really need a rxq_count and txq_count as they are always the same it seems? Just a comment not a request to change it.
> +	fd_iterator = 0;
> +	for (queue = 0; queue < reply_param->rxq_count; queue++)
> +		process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
> +	for (queue = 0; queue < reply_param->txq_count; queue++)
> +		process_private->txq_fds[queue] = reply->fds[fd_iterator++];
> +
> +

Extra blank line here, needs to be removed.
> +	return 0;
> +}
> +
> +/* Send the queue file descriptors from the primary process to secondary. */
> +static int
> +tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
> +{
> +	struct rte_eth_dev *dev;
> +	struct rte_mp_msg reply;
> +	const struct ipc_queues *request_param =
> +		(const struct ipc_queues *)request->param;
> +	struct ipc_queues *reply_param =
> +		(struct ipc_queues *)reply.param;
> +	uint16_t port_id;
> +	int queue;
> +	int ret;
> +
> +	/* Get requested port */
> +	TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
> +	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
> +	if (ret) {
> +		TAP_LOG(ERR, "Failed to get port id for %s",
> +			request_param->port_name);
> +		return -1;
> +	}
> +	dev = &rte_eth_devices[port_id];
> +
> +	/* Fill file descriptors for all queues */
> +	reply.num_fds = 0;
> +	reply_param->rxq_count = 0;
> +	for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
> +		reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
> +		reply_param->rxq_count++;
> +	}
> +	assert(reply_param->rxq_count == dev->data->nb_rx_queues);

Pick an assert method assert() or RTE_ASSERT() why have both? I would suggest using RTE_ASSERT everywhere.
> +	reply_param->txq_count = 0;
> +	for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
> +		reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
> +		reply_param->txq_count++;
> +	}
> +	assert(reply_param->txq_count == dev->data->nb_tx_queues);
> +	/* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */
> +	RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);

I do not like FIXME or TODO statements in the code. For FIXME we need to fix it or remove the comment. For TODO we need to put it in a enhancement list and do it later.
> +
> +	/* Send reply */
> +	strcpy(reply.name, request->name);
> +	strcpy(reply_param->port_name, request_param->port_name);

strlcpy() here as well.
> +	reply.len_param = sizeof(*reply_param);
> +	if (rte_mp_reply(&reply, peer) < 0) {
> +		TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
> +		return -1;
> +	}
> +	return 0;
> +}
> +
> /* Open a TAP interface device.
>  */
> static int
> @@ -2009,6 +2119,21 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 		/* TODO: request info from primary to set up Rx and Tx */
> 		eth_dev->dev_ops = &ops;
> 		eth_dev->device = &dev->device;
> +		eth_dev->rx_pkt_burst = pmd_rx_burst;
> +		eth_dev->tx_pkt_burst = pmd_tx_burst;
> +		if (!rte_eal_primary_proc_alive(NULL)) {
> +			TAP_LOG(ERR, "Primary process is missing");
> +			return -1;
> +		}
> +		process_private = (struct pmd_process_private *)
> +			rte_zmalloc_socket(name,
> +				sizeof(struct pmd_process_private),
> +					RTE_CACHE_LINE_SIZE,
> +					eth_dev->device->numa_node);
> +
> +		ret = tap_mp_attach_queues(name, eth_dev);
> +		if (ret != 0)
> +			return -1;
> 		rte_eth_dev_probing_finish(eth_dev);
> 		return 0;
> 	}
> @@ -2056,6 +2181,13 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
> 		name, tap_name);
> 
> +	/* Register IPC feed callback */
> +	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
> +	if (ret < 0 && rte_errno != EEXIST) {
> +		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
> +			tuntap_name, strerror(rte_errno));
> +		goto leave;
> +	}
> 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
> 		ETH_TUNTAP_TYPE_TAP);
> 
> @@ -2063,6 +2195,7 @@ rte_pmd_tap_probe(struct rte_vdev_device *dev)
> 	if (ret == -1) {
> 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
> 			name, tap_name);
> +		rte_mp_action_unregister(TAP_MP_KEY);
> 		tap_unit--;		/* Restore the unit number */
> 	}
> 	rte_kvargs_free(kvlist);
> -- 
> 2.7.4
> 

Regards,
Keith
  
Thomas Monjalon Sept. 27, 2018, 6:53 p.m. UTC | #2
Hi Keith and Raslan,

Thanks for your review Keith.
It reminds me that you had some comments in my v3 which were not addressed.

Raslan, it seems you missed the v3 I had sent (you sent another v3).
Please check the comments done by Keith and Ferruh during the summer.

Thank you all
  

Patch

diff --git a/doc/guides/nics/tap.rst b/doc/guides/nics/tap.rst
index 2714868..d1f3e1c 100644
--- a/doc/guides/nics/tap.rst
+++ b/doc/guides/nics/tap.rst
@@ -152,6 +152,22 @@  Distribute IPv4 TCP packets using RSS to a given MAC address over queues 0-3::
    testpmd> flow create 0 priority 4 ingress pattern eth dst is 0a:0b:0c:0d:0e:0f \
             / ipv4 / tcp / end actions rss queues 0 1 2 3 end / end
 
+Multi-process sharing
+---------------------
+
+It is possible to attach an existing TAP device in a secondary process,
+by declaring it as a vdev with the same name as in the primary process,
+and without any parameter.
+
+The port attached in a secondary process will give access to the
+statistics and the queues.
+Therefore it can be used for monitoring or Rx/Tx processing.
+
+The IPC synchronization of Rx/Tx queues is currently limited:
+
+  - Only 8 queues
+  - Synchronized on probing, but not on later port update
+
 Example
 -------
 
diff --git a/doc/guides/rel_notes/release_18_11.rst b/doc/guides/rel_notes/release_18_11.rst
index 8c4bb54..a9dda5b 100644
--- a/doc/guides/rel_notes/release_18_11.rst
+++ b/doc/guides/rel_notes/release_18_11.rst
@@ -67,6 +67,10 @@  New Features
   SR-IOV option in Hyper-V and Azure. This is an alternative to the previous
   vdev_netvsc, tap, and failsafe drivers combination.
 
+* **Added TAP Rx/Tx queues sharing with a secondary process.**
+
+  A secondary process can attach a TAP device created in the primary process,
+  probe the queues, and process Rx/Tx in a secondary process.
 
 API Changes
 -----------
diff --git a/drivers/net/tap/Makefile b/drivers/net/tap/Makefile
index 3243365..7748283 100644
--- a/drivers/net/tap/Makefile
+++ b/drivers/net/tap/Makefile
@@ -22,6 +22,7 @@  CFLAGS += -O3
 CFLAGS += -I$(SRCDIR)
 CFLAGS += -I.
 CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -DALLOW_EXPERIMENTAL_API
 LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
 LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs -lrte_hash
 LDLIBS += -lrte_bus_vdev -lrte_gso
diff --git a/drivers/net/tap/rte_eth_tap.c b/drivers/net/tap/rte_eth_tap.c
index 8cc4552..8d276eb 100644
--- a/drivers/net/tap/rte_eth_tap.c
+++ b/drivers/net/tap/rte_eth_tap.c
@@ -16,6 +16,8 @@ 
 #include <rte_debug.h>
 #include <rte_ip.h>
 #include <rte_string_fns.h>
+#include <rte_ethdev.h>
+#include <rte_errno.h>
 
 #include <assert.h>
 #include <sys/types.h>
@@ -62,6 +64,9 @@ 
 #define TAP_GSO_MBUFS_NUM \
 	(TAP_GSO_MBUFS_PER_CORE * TAP_GSO_MBUF_CACHE_SIZE)
 
+/* IPC key for queue fds sync */
+#define TAP_MP_KEY "tap_mp_sync_queues"
+
 static struct rte_vdev_driver pmd_tap_drv;
 static struct rte_vdev_driver pmd_tun_drv;
 static struct pmd_process_private *process_private;
@@ -101,6 +106,17 @@  enum ioctl_mode {
 	REMOTE_ONLY,
 };
 
+/* Message header to synchronize queues via IPC */
+struct ipc_queues {
+	char port_name[RTE_DEV_NAME_MAX_LEN];
+	int rxq_count;
+	int txq_count;
+	/*
+	 * The file descriptors are in the dedicated part
+	 * of the Unix message to be translated by the kernel.
+	 */
+};
+
 static int tap_intr_handle_set(struct rte_eth_dev *dev, int set);
 
 /**
@@ -1980,6 +1996,100 @@  rte_pmd_tun_probe(struct rte_vdev_device *dev)
 	return ret;
 }
 
+/* Request queue file descriptors from secondary to primary. */
+static int
+tap_mp_attach_queues(const char *port_name, struct rte_eth_dev *dev)
+{
+	int ret;
+	struct timespec timeout = {.tv_sec = 1, .tv_nsec = 0};
+	struct rte_mp_msg request, *reply;
+	struct rte_mp_reply replies;
+	struct ipc_queues *request_param = (struct ipc_queues *)request.param;
+	struct ipc_queues *reply_param;
+	int queue, fd_iterator;
+
+	/* Prepare the request */
+	strcpy(request.name, TAP_MP_KEY);
+	strcpy(request_param->port_name, port_name);
+	request.len_param = sizeof(*request_param);
+	/* Send request and receive reply */
+	ret = rte_mp_request_sync(&request, &replies, &timeout);
+	if (ret < 0) {
+		TAP_LOG(ERR, "Failed to request queues from primary: %d",
+			rte_errno);
+		return -1;
+	}
+	/* FIXME: handle replies.nb_received > 1 */
+	reply = &replies.msgs[0];
+	reply_param = (struct ipc_queues *)reply->param;
+	TAP_LOG(DEBUG, "Received IPC reply for %s", reply_param->port_name);
+
+	/* Attach the queues from received file descriptors */
+
+	dev->data->nb_rx_queues = reply_param->rxq_count;
+	dev->data->nb_tx_queues = reply_param->txq_count;
+	fd_iterator = 0;
+	for (queue = 0; queue < reply_param->rxq_count; queue++)
+		process_private->rxq_fds[queue] = reply->fds[fd_iterator++];
+	for (queue = 0; queue < reply_param->txq_count; queue++)
+		process_private->txq_fds[queue] = reply->fds[fd_iterator++];
+
+
+	return 0;
+}
+
+/* Send the queue file descriptors from the primary process to secondary. */
+static int
+tap_mp_sync_queues(const struct rte_mp_msg *request, const void *peer)
+{
+	struct rte_eth_dev *dev;
+	struct rte_mp_msg reply;
+	const struct ipc_queues *request_param =
+		(const struct ipc_queues *)request->param;
+	struct ipc_queues *reply_param =
+		(struct ipc_queues *)reply.param;
+	uint16_t port_id;
+	int queue;
+	int ret;
+
+	/* Get requested port */
+	TAP_LOG(DEBUG, "Received IPC request for %s", request_param->port_name);
+	ret = rte_eth_dev_get_port_by_name(request_param->port_name, &port_id);
+	if (ret) {
+		TAP_LOG(ERR, "Failed to get port id for %s",
+			request_param->port_name);
+		return -1;
+	}
+	dev = &rte_eth_devices[port_id];
+
+	/* Fill file descriptors for all queues */
+	reply.num_fds = 0;
+	reply_param->rxq_count = 0;
+	for (queue = 0; queue < dev->data->nb_rx_queues; queue++) {
+		reply.fds[reply.num_fds++] = process_private->rxq_fds[queue];
+		reply_param->rxq_count++;
+	}
+	assert(reply_param->rxq_count == dev->data->nb_rx_queues);
+	reply_param->txq_count = 0;
+	for (queue = 0; queue < dev->data->nb_tx_queues; queue++) {
+		reply.fds[reply.num_fds++] = process_private->txq_fds[queue];
+		reply_param->txq_count++;
+	}
+	assert(reply_param->txq_count == dev->data->nb_tx_queues);
+	/* FIXME: split message if more queues than RTE_MP_MAX_FD_NUM */
+	RTE_ASSERT(reply.num_fds <= RTE_MP_MAX_FD_NUM);
+
+	/* Send reply */
+	strcpy(reply.name, request->name);
+	strcpy(reply_param->port_name, request_param->port_name);
+	reply.len_param = sizeof(*reply_param);
+	if (rte_mp_reply(&reply, peer) < 0) {
+		TAP_LOG(ERR, "Failed to reply an IPC request to sync queues");
+		return -1;
+	}
+	return 0;
+}
+
 /* Open a TAP interface device.
  */
 static int
@@ -2009,6 +2119,21 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 		/* TODO: request info from primary to set up Rx and Tx */
 		eth_dev->dev_ops = &ops;
 		eth_dev->device = &dev->device;
+		eth_dev->rx_pkt_burst = pmd_rx_burst;
+		eth_dev->tx_pkt_burst = pmd_tx_burst;
+		if (!rte_eal_primary_proc_alive(NULL)) {
+			TAP_LOG(ERR, "Primary process is missing");
+			return -1;
+		}
+		process_private = (struct pmd_process_private *)
+			rte_zmalloc_socket(name,
+				sizeof(struct pmd_process_private),
+					RTE_CACHE_LINE_SIZE,
+					eth_dev->device->numa_node);
+
+		ret = tap_mp_attach_queues(name, eth_dev);
+		if (ret != 0)
+			return -1;
 		rte_eth_dev_probing_finish(eth_dev);
 		return 0;
 	}
@@ -2056,6 +2181,13 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 	TAP_LOG(NOTICE, "Initializing pmd_tap for %s as %s",
 		name, tap_name);
 
+	/* Register IPC feed callback */
+	ret = rte_mp_action_register(TAP_MP_KEY, tap_mp_sync_queues);
+	if (ret < 0 && rte_errno != EEXIST) {
+		TAP_LOG(ERR, "%s: Failed to register IPC callback: %s",
+			tuntap_name, strerror(rte_errno));
+		goto leave;
+	}
 	ret = eth_dev_tap_create(dev, tap_name, remote_iface, &user_mac,
 		ETH_TUNTAP_TYPE_TAP);
 
@@ -2063,6 +2195,7 @@  rte_pmd_tap_probe(struct rte_vdev_device *dev)
 	if (ret == -1) {
 		TAP_LOG(ERR, "Failed to create pmd for %s as %s",
 			name, tap_name);
+		rte_mp_action_unregister(TAP_MP_KEY);
 		tap_unit--;		/* Restore the unit number */
 	}
 	rte_kvargs_free(kvlist);