[02/11] telemetry: add initial connection socket

Message ID 1535026093-101872-3-git-send-email-ciara.power@intel.com (mailing list archive)
State Superseded, archived
Delegated to: Thomas Monjalon
Headers
Series introduce telemetry library |

Checks

Context Check Description
ci/checkpatch success coding style OK
ci/Intel-compilation fail Compilation issues

Commit Message

Power, Ciara Aug. 23, 2018, 12:08 p.m. UTC
  This patch adds the telemetry UNIX socket. It is used to
allow connections from external clients.

On the initial connection from a client, ethdev stats are
registered in the metrics library, to allow for their retrieval
at a later stage.

Signed-off-by: Ciara Power <ciara.power@intel.com>
Signed-off-by: Brian Archbold <brian.archbold@intel.com>
---
 lib/librte_telemetry/rte_telemetry.c          | 205 ++++++++++++++++++++++++++
 lib/librte_telemetry/rte_telemetry_internal.h |   4 +
 2 files changed, 209 insertions(+)
  

Comments

Gaëtan Rivet Aug. 28, 2018, 4:40 p.m. UTC | #1
Hi Ciara,

On Thu, Aug 23, 2018 at 01:08:04PM +0100, Ciara Power wrote:
> This patch adds the telemetry UNIX socket. It is used to
> allow connections from external clients.
> 
> On the initial connection from a client, ethdev stats are
> registered in the metrics library, to allow for their retrieval
> at a later stage.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> Signed-off-by: Brian Archbold <brian.archbold@intel.com>
> ---
>  lib/librte_telemetry/rte_telemetry.c          | 205 ++++++++++++++++++++++++++
>  lib/librte_telemetry/rte_telemetry_internal.h |   4 +
>  2 files changed, 209 insertions(+)
> 
> diff --git a/lib/librte_telemetry/rte_telemetry.c b/lib/librte_telemetry/rte_telemetry.c
> index 8d7b0e3..f984929 100644
> --- a/lib/librte_telemetry/rte_telemetry.c
> +++ b/lib/librte_telemetry/rte_telemetry.c
> @@ -3,21 +3,159 @@
>   */
>  
>  #include <unistd.h>
> +#include <fcntl.h>
> +#include <sys/socket.h>
> +#include <sys/un.h>
>  
>  #include <rte_eal.h>
>  #include <rte_ethdev.h>
>  #include <rte_metrics.h>
> +#include <rte_string_fns.h>
>  
>  #include "rte_telemetry.h"
>  #include "rte_telemetry_internal.h"
>  
>  #define SLEEP_TIME 10
>  
> +#define DEFAULT_DPDK_PATH "/var/run/.rte_telemetry"
> +
> +const char *socket_path = DEFAULT_DPDK_PATH;
>  static telemetry_impl *static_telemetry;
>  
> +int32_t
> +rte_telemetry_check_port_activity(int port_id)
> +{
> +	int pid;
> +
> +	RTE_ETH_FOREACH_DEV(pid) {
> +		if (pid == port_id)
> +			return 1;
> +	}
> +	TELEMETRY_LOG_ERR("Error - port_id: %d is invalid, not active\n",
> +		port_id);
> +	return 0;
> +}
> +

This function seems more about ethdev than telemetry.
Maybe add it as part of librte_ethdev.

The "active" semantic is blurry however. With this implementation, this
is checking that the device is currently attached and owned by the
default user. Should telemetry be limited to devices owned by default
user?

Also, this function does not seem used in this patch, can it be added
when used?

> +static int32_t
> +rte_telemetry_reg_ethdev_to_metrics(uint16_t port_id)

"_to" might not be necessary.

> +{
> +	int ret, num_xstats, start_index, i;
> +	struct rte_eth_xstat *eth_xstats;
> +
> +	if (!rte_eth_dev_is_valid_port(port_id)) {
> +		TELEMETRY_LOG_ERR("Error - port_id: %d is invalid\n", port_id);
> +		return -EINVAL;
> +	}
> +
> +	num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
> +	if (num_xstats < 0) {
> +		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get(%u) failed:"
> +			" %d\n", port_id, num_xstats);

I guess there isn't really a consensus yet, as the checkpatch.sh tool
might be misconfigured, but the cesura is very awkward here.

Same for other logs.

> +		return -EPERM;
> +	}
> +
> +	eth_xstats = malloc(sizeof(struct rte_eth_xstat) * num_xstats);
> +	if (eth_xstats == NULL) {
> +		TELEMETRY_LOG_ERR("Error - Failed to malloc memory for"
> +			" xstats\n");
> +		return -ENOMEM;
> +	}
> +	ret = rte_eth_xstats_get(port_id, eth_xstats, num_xstats);
> +	if (ret < 0 || ret > num_xstats) {
> +		free(eth_xstats);
> +		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get(%u) len%i"
> +			" failed: %d\n", port_id, num_xstats, ret);
> +		return -EPERM;
> +	}
> +	struct rte_eth_xstat_name *eth_xstats_names;
> +	eth_xstats_names = malloc(sizeof(struct rte_eth_xstat_name) *
> +		 num_xstats);
> +	if (eth_xstats_names == NULL) {

You are sometimes checking pointers against NULL, sometimes using "!".
You can choose either in your library, but it would be better to be
consistent and use a unified coding style.

> +		free(eth_xstats);
> +		TELEMETRY_LOG_ERR("Error - Failed to malloc memory for"
> +			" xstats_names\n");
> +		return -ENOMEM;
> +	}
> +	ret = rte_eth_xstats_get_names(port_id, eth_xstats_names,
> +		 num_xstats);
> +	if (ret < 0 || ret > num_xstats) {
> +		free(eth_xstats);
> +		free(eth_xstats_names);
> +		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get_names(%u)"
> +			" len%i failed: %d\n", port_id, num_xstats,
> +				 ret);
> +		return -EPERM;
> +	}
> +	const char *xstats_names[num_xstats];
> +
> +	for (i = 0; i < num_xstats; i++)
> +		xstats_names[i] = eth_xstats_names[eth_xstats[i].id].name;
> +
> +	start_index = rte_metrics_reg_names(xstats_names, num_xstats);
> +
> +	if (start_index < 0) {
> +		TELEMETRY_LOG_ERR("Error - rte_metrics_reg_names failed -"
> +			" metrics may already be registered\n");
> +		free(eth_xstats);
> +		free(eth_xstats_names);
> +		return -1;
> +	}
> +	free(eth_xstats_names);
> +	free(eth_xstats);

At this point, you have repeated 3 times those frees().
It would be cleaner to define proper labels and use goto instead.

[snip]
  
Van Haaren, Harry Aug. 28, 2018, 5:03 p.m. UTC | #2
> From: Gaëtan Rivet [mailto:gaetan.rivet@6wind.com]
> Sent: Tuesday, August 28, 2018 5:40 PM
> To: Power, Ciara <ciara.power@intel.com>
> Cc: Van Haaren, Harry <harry.van.haaren@intel.com>; Archbold, Brian
> <brian.archbold@intel.com>; Kenny, Emma <emma.kenny@intel.com>; dev@dpdk.org
> Subject: Re: [dpdk-dev] [PATCH 02/11] telemetry: add initial connection
> socket

<snip>

> > +int32_t
> > +rte_telemetry_check_port_activity(int port_id)
> > +{
> > +	int pid;
> > +
> > +	RTE_ETH_FOREACH_DEV(pid) {
> > +		if (pid == port_id)
> > +			return 1;
> > +	}
> > +	TELEMETRY_LOG_ERR("Error - port_id: %d is invalid, not active\n",
> > +		port_id);
> > +	return 0;
> > +}
> > +
> 
> This function seems more about ethdev than telemetry.
> Maybe add it as part of librte_ethdev.

Yep that might be a better place, making it a generic ethdev function.


> The "active" semantic is blurry however. With this implementation, this
> is checking that the device is currently attached and owned by the
> default user. Should telemetry be limited to devices owned by default
> user?

Good question - perhaps one that we can get input on from others.
Would you (app X) want App Y to read telemetry from your ethdev/cryptodev?

Perhaps keeping telemetry to an "owned" port is a feature, perhaps
a limitation.

I'd like input on this one :D


> Also, this function does not seem used in this patch, can it be added
> when used?

Sure.


> > +static int32_t
> > +rte_telemetry_reg_ethdev_to_metrics(uint16_t port_id)
> 
> "_to" might not be necessary.
> 
> > +{
> > +	int ret, num_xstats, start_index, i;
> > +	struct rte_eth_xstat *eth_xstats;
> > +
> > +	if (!rte_eth_dev_is_valid_port(port_id)) {
> > +		TELEMETRY_LOG_ERR("Error - port_id: %d is invalid\n", port_id);
> > +		return -EINVAL;
> > +	}
> > +
> > +	num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
> > +	if (num_xstats < 0) {
> > +		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get(%u) failed:"
> > +			" %d\n", port_id, num_xstats);
> 
> I guess there isn't really a consensus yet, as the checkpatch.sh tool
> might be misconfigured, but the cesura is very awkward here.
> 
> Same for other logs.

Agreed, improvements possible here.


> > +		return -EPERM;
> > +	}
> > +
> > +	eth_xstats = malloc(sizeof(struct rte_eth_xstat) * num_xstats);
> > +	if (eth_xstats == NULL) {
> > +		TELEMETRY_LOG_ERR("Error - Failed to malloc memory for"
> > +			" xstats\n");
> > +		return -ENOMEM;
> > +	}
> > +	ret = rte_eth_xstats_get(port_id, eth_xstats, num_xstats);
> > +	if (ret < 0 || ret > num_xstats) {
> > +		free(eth_xstats);
> > +		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get(%u) len%i"
> > +			" failed: %d\n", port_id, num_xstats, ret);
> > +		return -EPERM;
> > +	}
> > +	struct rte_eth_xstat_name *eth_xstats_names;
> > +	eth_xstats_names = malloc(sizeof(struct rte_eth_xstat_name) *
> > +		 num_xstats);
> > +	if (eth_xstats_names == NULL) {
> 
> You are sometimes checking pointers against NULL, sometimes using "!".
> You can choose either in your library, but it would be better to be
> consistent and use a unified coding style.

Heh, I guess that's down to dev-style, and you'll note multiple sign-offs ;)

Can review for v2.


> > +		free(eth_xstats);
> > +		TELEMETRY_LOG_ERR("Error - Failed to malloc memory for"
> > +			" xstats_names\n");
> > +		return -ENOMEM;
> > +	}
> > +	ret = rte_eth_xstats_get_names(port_id, eth_xstats_names,
> > +		 num_xstats);
> > +	if (ret < 0 || ret > num_xstats) {
> > +		free(eth_xstats);
> > +		free(eth_xstats_names);
> > +		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get_names(%u)"
> > +			" len%i failed: %d\n", port_id, num_xstats,
> > +				 ret);
> > +		return -EPERM;
> > +	}
> > +	const char *xstats_names[num_xstats];
> > +
> > +	for (i = 0; i < num_xstats; i++)
> > +		xstats_names[i] = eth_xstats_names[eth_xstats[i].id].name;
> > +
> > +	start_index = rte_metrics_reg_names(xstats_names, num_xstats);
> > +
> > +	if (start_index < 0) {
> > +		TELEMETRY_LOG_ERR("Error - rte_metrics_reg_names failed -"
> > +			" metrics may already be registered\n");
> > +		free(eth_xstats);
> > +		free(eth_xstats_names);
> > +		return -1;
> > +	}
> > +	free(eth_xstats_names);
> > +	free(eth_xstats);
> 
> At this point, you have repeated 3 times those frees().
> It would be cleaner to define proper labels and use goto instead.

Agreed.

Thanks for review!
  
Burakov, Anatoly Sept. 7, 2018, 9:48 a.m. UTC | #3
On 23-Aug-18 1:08 PM, Ciara Power wrote:
> This patch adds the telemetry UNIX socket. It is used to
> allow connections from external clients.
> 
> On the initial connection from a client, ethdev stats are
> registered in the metrics library, to allow for their retrieval
> at a later stage.
> 
> Signed-off-by: Ciara Power <ciara.power@intel.com>
> Signed-off-by: Brian Archbold <brian.archbold@intel.com>
> ---

Quick question: is there really a need for a separate thread? Can't you 
just register the socket as an interrupt, and just get a callback when 
something arrives? IPC works like that right now, i see no reason to not 
do it this way for telemetry?
  

Patch

diff --git a/lib/librte_telemetry/rte_telemetry.c b/lib/librte_telemetry/rte_telemetry.c
index 8d7b0e3..f984929 100644
--- a/lib/librte_telemetry/rte_telemetry.c
+++ b/lib/librte_telemetry/rte_telemetry.c
@@ -3,21 +3,159 @@ 
  */
 
 #include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
 
 #include <rte_eal.h>
 #include <rte_ethdev.h>
 #include <rte_metrics.h>
+#include <rte_string_fns.h>
 
 #include "rte_telemetry.h"
 #include "rte_telemetry_internal.h"
 
 #define SLEEP_TIME 10
 
+#define DEFAULT_DPDK_PATH "/var/run/.rte_telemetry"
+
+const char *socket_path = DEFAULT_DPDK_PATH;
 static telemetry_impl *static_telemetry;
 
+int32_t
+rte_telemetry_check_port_activity(int port_id)
+{
+	int pid;
+
+	RTE_ETH_FOREACH_DEV(pid) {
+		if (pid == port_id)
+			return 1;
+	}
+	TELEMETRY_LOG_ERR("Error - port_id: %d is invalid, not active\n",
+		port_id);
+	return 0;
+}
+
+static int32_t
+rte_telemetry_reg_ethdev_to_metrics(uint16_t port_id)
+{
+	int ret, num_xstats, start_index, i;
+	struct rte_eth_xstat *eth_xstats;
+
+	if (!rte_eth_dev_is_valid_port(port_id)) {
+		TELEMETRY_LOG_ERR("Error - port_id: %d is invalid\n", port_id);
+		return -EINVAL;
+	}
+
+	num_xstats = rte_eth_xstats_get(port_id, NULL, 0);
+	if (num_xstats < 0) {
+		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get(%u) failed:"
+			" %d\n", port_id, num_xstats);
+		return -EPERM;
+	}
+
+	eth_xstats = malloc(sizeof(struct rte_eth_xstat) * num_xstats);
+	if (eth_xstats == NULL) {
+		TELEMETRY_LOG_ERR("Error - Failed to malloc memory for"
+			" xstats\n");
+		return -ENOMEM;
+	}
+	ret = rte_eth_xstats_get(port_id, eth_xstats, num_xstats);
+	if (ret < 0 || ret > num_xstats) {
+		free(eth_xstats);
+		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get(%u) len%i"
+			" failed: %d\n", port_id, num_xstats, ret);
+		return -EPERM;
+	}
+	struct rte_eth_xstat_name *eth_xstats_names;
+	eth_xstats_names = malloc(sizeof(struct rte_eth_xstat_name) *
+		 num_xstats);
+	if (eth_xstats_names == NULL) {
+		free(eth_xstats);
+		TELEMETRY_LOG_ERR("Error - Failed to malloc memory for"
+			" xstats_names\n");
+		return -ENOMEM;
+	}
+	ret = rte_eth_xstats_get_names(port_id, eth_xstats_names,
+		 num_xstats);
+	if (ret < 0 || ret > num_xstats) {
+		free(eth_xstats);
+		free(eth_xstats_names);
+		TELEMETRY_LOG_ERR("Error - rte_eth_xstats_get_names(%u)"
+			" len%i failed: %d\n", port_id, num_xstats,
+				 ret);
+		return -EPERM;
+	}
+	const char *xstats_names[num_xstats];
+
+	for (i = 0; i < num_xstats; i++)
+		xstats_names[i] = eth_xstats_names[eth_xstats[i].id].name;
+
+	start_index = rte_metrics_reg_names(xstats_names, num_xstats);
+
+	if (start_index < 0) {
+		TELEMETRY_LOG_ERR("Error - rte_metrics_reg_names failed -"
+			" metrics may already be registered\n");
+		free(eth_xstats);
+		free(eth_xstats_names);
+		return -1;
+	}
+	free(eth_xstats_names);
+	free(eth_xstats);
+	return start_index;
+}
+
+static int32_t
+rte_telemetry_initial_accept(struct telemetry_impl *telemetry)
+{
+	int pid;
+
+	RTE_ETH_FOREACH_DEV(pid) {
+		telemetry->reg_index =
+			rte_telemetry_reg_ethdev_to_metrics(pid);
+		break;
+	}
+
+	if (telemetry->reg_index < 0) {
+		TELEMETRY_LOG_ERR("Error - failed to register ethdev "
+			"metrics\n");
+		return -1;
+	}
+	telemetry->metrics_register_done = 1;
+
+	return 0;
+}
+
+static int32_t
+rte_telemetry_accept_new_client(struct telemetry_impl *telemetry)
+{
+	int ret;
+	if (telemetry->accept_fd == 0 || telemetry->accept_fd == -1) {
+		ret = listen(telemetry->server_fd, 1);
+		if (ret < 0) {
+			TELEMETRY_LOG_ERR("Error - Listening error with "
+				"server fd\n");
+			return -1;
+		}
+		telemetry->accept_fd = accept(telemetry->server_fd, NULL, NULL);
+
+		if (telemetry->accept_fd > 0 &&
+			telemetry->metrics_register_done == 0) {
+			ret = rte_telemetry_initial_accept(telemetry);
+			if (ret < 0) {
+				TELEMETRY_LOG_ERR("Error - Failed to run "
+					"initial configurations/tests\n");
+				return -1;
+			}
+		}
+	}
+	return 0;
+}
+
 static int32_t
 rte_telemetry_run(void *userdata)
 {
+	int ret;
 	struct telemetry_impl *telemetry = (struct telemetry_impl *)userdata;
 	if (!telemetry) {
 		TELEMETRY_LOG_WARN("Warning - TELEMETRY could not be "
@@ -25,6 +163,12 @@  rte_telemetry_run(void *userdata)
 		return -1;
 	}
 
+	ret = rte_telemetry_accept_new_client(telemetry);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Error - Accept and read new client "
+			"failed\n");
+		return -1;
+	}
 	return 0;
 }
 
@@ -50,6 +194,51 @@  static void
 	pthread_exit(0);
 }
 
+static int32_t
+rte_telemetry_set_socket_nonblock(int fd)
+{
+	if (fd < 0) {
+		TELEMETRY_LOG_ERR("Error - Invalid fd provided\n");
+		return -1;
+	}
+
+	int flags = fcntl(fd, F_GETFL, 0);
+	if (flags < 0)
+		flags = 0;
+	return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+}
+
+static int32_t
+rte_telemetry_create_socket(struct telemetry_impl *telemetry)
+{
+	int ret;
+
+	if (!telemetry)
+		return -1;
+
+	telemetry->server_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (telemetry->server_fd == -1) {
+		TELEMETRY_LOG_ERR("Error - Failed to open socket\n");
+		return -1;
+	}
+	ret  = rte_telemetry_set_socket_nonblock(telemetry->server_fd);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Error - Could not set socket to NONBLOCK\n");
+		return -1;
+	}
+	struct sockaddr_un addr = {0};
+	addr.sun_family = AF_UNIX;
+	strlcpy(addr.sun_path, socket_path, sizeof(addr.sun_path));
+	unlink(socket_path);
+
+	if (bind(telemetry->server_fd, (struct sockaddr *)&addr,
+		sizeof(addr)) < 0) {
+		TELEMETRY_LOG_ERR("Error - Socket binding error\n");
+		return -1;
+	}
+	return 0;
+}
+
 int32_t
 rte_telemetry_init(uint32_t socket_id)
 {
@@ -71,6 +260,14 @@  rte_telemetry_init(uint32_t socket_id)
 
 	static_telemetry->socket_id = socket_id;
 	rte_metrics_init(static_telemetry->socket_id);
+	ret = rte_telemetry_create_socket(static_telemetry);
+	if (ret < 0) {
+		ret = rte_telemetry_cleanup();
+		if (ret < 0)
+			TELEMETRY_LOG_ERR("Error - TELEMETRY cleanup failed\n");
+		return -EPERM;
+	}
+
 	pthread_attr_init(&attr);
 	ret = rte_ctrl_thread_create(&static_telemetry->thread_id,
 		telemetry_ctrl_thread, &attr, rte_telemetry_run_thread_func,
@@ -88,7 +285,15 @@  rte_telemetry_init(uint32_t socket_id)
 int32_t
 rte_telemetry_cleanup(void)
 {
+	int ret;
 	struct telemetry_impl *telemetry = static_telemetry;
+
+	ret = close(telemetry->server_fd);
+	if (ret < 0) {
+		TELEMETRY_LOG_ERR("Error - Close TELEMETRY socket failed\n");
+		free(telemetry);
+		return -EPERM;
+	}
 	telemetry->thread_status = 0;
 	pthread_join(telemetry->thread_id, NULL);
 	free(telemetry);
diff --git a/lib/librte_telemetry/rte_telemetry_internal.h b/lib/librte_telemetry/rte_telemetry_internal.h
index 4e810a8..569d56a 100644
--- a/lib/librte_telemetry/rte_telemetry_internal.h
+++ b/lib/librte_telemetry/rte_telemetry_internal.h
@@ -24,9 +24,13 @@  extern int telemetry_log_level;
 	TELEMETRY_LOG(INFO, fmt, ## args)
 
 typedef struct telemetry_impl {
+	int accept_fd;
+	int server_fd;
 	pthread_t thread_id;
 	int thread_status;
 	uint32_t socket_id;
+	int reg_index;
+	int metrics_register_done;
 } telemetry_impl;
 
 #endif