@@ -289,6 +289,8 @@
close(priv->nl_socket_rdma);
if (priv->mnl_socket)
mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+ if (priv->mnl_rcvbuf)
+ mlx5_flow_tcf_rcv_buf_destroy(priv->mnl_rcvbuf);
ret = mlx5_hrxq_ibv_verify(dev);
if (ret)
DRV_LOG(WARNING, "port %u some hash Rx queue still remain",
@@ -1139,7 +1141,8 @@
if (vf && config.vf_nl_en)
mlx5_nl_mac_addr_sync(eth_dev);
priv->mnl_socket = mlx5_flow_tcf_socket_create();
- if (!priv->mnl_socket) {
+ priv->mnl_rcvbuf = mlx5_flow_tcf_rcv_buf_create();
+ if (!priv->mnl_socket || !priv->mnl_rcvbuf) {
err = -rte_errno;
DRV_LOG(WARNING,
"flow rules relying on switch offloads will not be"
@@ -1163,7 +1166,9 @@
" not be supported: %s: %s",
error.message, strerror(rte_errno));
mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+ mlx5_flow_tcf_rcv_buf_destroy(priv->mnl_rcvbuf);
priv->mnl_socket = NULL;
+ priv->mnl_rcvbuf = NULL;
}
}
TAILQ_INIT(&priv->flows);
@@ -1220,6 +1225,8 @@
close(priv->nl_socket_rdma);
if (priv->mnl_socket)
mlx5_flow_tcf_socket_destroy(priv->mnl_socket);
+ if (priv->mnl_rcvbuf)
+ mlx5_flow_tcf_rcv_buf_destroy(priv->mnl_rcvbuf);
if (own_domain_id)
claim_zero(rte_eth_switch_domain_free(priv->domain_id));
rte_free(priv);
@@ -158,6 +158,7 @@ struct mlx5_drop {
};
struct mnl_socket;
+struct mlx5_flow_tcf_rbuf;
struct priv {
LIST_ENTRY(priv) mem_event_cb; /* Called by memory event callback. */
@@ -225,6 +226,8 @@ struct priv {
/* UAR same-page access control required in 32bit implementations. */
#endif
struct mnl_socket *mnl_socket; /* Libmnl socket. */
+ struct mlx5_flow_tcf_rbuf *mnl_rcvbuf;
+ /* Buffer for receiving libmnl messages. */
};
#define PORT_ID(priv) ((priv)->dev_data->port_id)
@@ -1599,6 +1599,17 @@ int mlx5_flow_validate_item_mpls(uint64_t item_flags __rte_unused,
return -rte_errno;
}
+int
+flow_null_query(struct rte_eth_dev *dev __rte_unused,
+ struct rte_flow *flow __rte_unused,
+ enum rte_flow_action_type type __rte_unused,
+ void *data __rte_unused,
+ struct rte_flow_error *error __rte_unused)
+{
+ rte_errno = ENOTSUP;
+ return -rte_errno;
+}
+
/* Void driver to protect from null pointer reference. */
const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
.validate = flow_null_validate,
@@ -1606,6 +1617,7 @@ int mlx5_flow_validate_item_mpls(uint64_t item_flags __rte_unused,
.translate = flow_null_translate,
.apply = flow_null_apply,
.remove = flow_null_remove,
+ .query = flow_null_query,
};
/**
@@ -2262,10 +2274,19 @@ struct rte_flow *
* 0 on success, a negative errno value otherwise and rte_errno is set.
*/
static int
-mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
- void *data __rte_unused,
+mlx5_flow_query_count(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ void *data,
struct rte_flow_error *error)
{
+ const struct mlx5_flow_driver_ops *fops;
+ enum mlx5_flow_drv_type ftype = flow->drv_type;
+
+ assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
+ fops = flow_get_drv_ops(ftype);
+ if (ftype == MLX5_FLOW_TYPE_TCF)
+ return fops->query(dev, flow,
+ RTE_FLOW_ACTION_TYPE_COUNT, data, error);
#ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
if (flow->actions & MLX5_ACTION_COUNT) {
struct rte_flow_query_count *qc = data;
@@ -2315,7 +2336,7 @@ struct rte_flow *
* @see rte_flow_ops
*/
int
-mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
+mlx5_flow_query(struct rte_eth_dev *dev,
struct rte_flow *flow,
const struct rte_flow_action *actions,
void *data,
@@ -2328,7 +2349,7 @@ struct rte_flow *
case RTE_FLOW_ACTION_TYPE_VOID:
break;
case RTE_FLOW_ACTION_TYPE_COUNT:
- ret = mlx5_flow_query_count(flow, data, error);
+ ret = mlx5_flow_query_count(dev, flow, data, error);
break;
default:
return rte_flow_error_set(error, ENOTSUP,
@@ -127,6 +127,8 @@ enum mlx5_flow_drv_type {
struct mlx5_flow_tcf {
struct nlmsghdr *nlh;
struct tcmsg *tcm;
+ uint64_t hits;
+ uint64_t bytes;
};
/** Handles information leading to a drop fate. */
@@ -204,7 +206,6 @@ struct rte_flow {
struct rte_flow_action_rss rss;/**< RSS context. */
uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
- void *nl_flow; /**< Netlink flow buffer if relevant. */
LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
uint32_t actions;
};
@@ -229,15 +230,26 @@ typedef int (*mlx5_flow_apply_t)(struct rte_eth_dev *dev, struct rte_flow *flow,
typedef int (*mlx5_flow_remove_t)(struct rte_eth_dev *dev,
struct rte_flow *flow,
struct rte_flow_error *error);
+typedef int (*mlx5_flow_query_t)(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *data,
+ struct rte_flow_error *error);
struct mlx5_flow_driver_ops {
mlx5_flow_validate_t validate;
mlx5_flow_prepare_t prepare;
mlx5_flow_translate_t translate;
mlx5_flow_apply_t apply;
mlx5_flow_remove_t remove;
+ mlx5_flow_query_t query;
};
/* mlx5_flow.c */
+int flow_null_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *data,
+ struct rte_flow_error *error);
int mlx5_flow_validate_action_flag(uint64_t action_flags,
struct rte_flow_error *error);
int mlx5_flow_validate_action_mark(uint64_t action_flags,
@@ -302,5 +314,7 @@ int mlx5_flow_tcf_init(struct mnl_socket *nl, unsigned int ifindex,
struct rte_flow_error *error);
struct mnl_socket *mlx5_flow_tcf_socket_create(void);
void mlx5_flow_tcf_socket_destroy(struct mnl_socket *nl);
+struct mlx5_flow_tcf_rbuf *mlx5_flow_tcf_rcv_buf_create(void);
+void mlx5_flow_tcf_rcv_buf_destroy(struct mlx5_flow_tcf_rbuf *rb);
#endif
@@ -1429,6 +1429,7 @@ static int flow_dv_validate_attributes(struct rte_eth_dev *dev,
.translate = flow_dv_translate,
.apply = flow_dv_apply,
.remove = flow_dv_remove,
+ .query = flow_null_query,
};
#endif
@@ -6,6 +6,7 @@
#include <assert.h>
#include <errno.h>
#include <libmnl/libmnl.h>
+#include <linux/gen_stats.h>
#include <linux/if_ether.h>
#include <linux/netlink.h>
#include <linux/pkt_cls.h>
@@ -153,6 +154,16 @@ struct tc_vlan {
#define IPV6_ADDR_LEN 16
#endif
+/**
+ * Structure for holding netlink message buffer of MNL_SOCKET_BUFFER_SIZE.
+ * Using this (8KB) buffer size ensures that netlink messages will never be
+ * truncated.
+ */
+struct mlx5_flow_tcf_rbuf {
+ uint8_t *buf;
+ uint16_t bsize;
+};
+
/** Empty masks for known item types. */
static const union {
struct rte_flow_item_port_id port_id;
@@ -704,6 +715,9 @@ struct flow_tcf_ptoi {
"can't have multiple fate actions");
action_flags |= MLX5_ACTION_DROP;
break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ action_flags |= MLX5_ACTION_COUNT;
+ break;
case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
action_flags |= MLX5_ACTION_OF_POP_VLAN;
break;
@@ -844,6 +858,9 @@ struct flow_tcf_ptoi {
SZ_NLATTR_TYPE_OF(struct tc_gact);
flags |= MLX5_ACTION_DROP;
break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ flags |= MLX5_ACTION_COUNT;
+ break;
case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
flags |= MLX5_ACTION_OF_POP_VLAN;
goto action_of_vlan;
@@ -1330,6 +1347,12 @@ struct flow_tcf_ptoi {
mnl_attr_nest_end(nlh, na_act);
mnl_attr_nest_end(nlh, na_act_index);
break;
+ case RTE_FLOW_ACTION_TYPE_COUNT:
+ /*
+ * Driver adds the count action implicitly for
+ * each rule it creates.
+ */
+ break;
case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
conf.of_push_vlan = NULL;
vlan_act = TCA_VLAN_ACT_POP;
@@ -1511,12 +1534,328 @@ struct flow_tcf_ptoi {
return 0;
}
+/**
+ * Parse rtnetlink message attributes filling the attribute table with the info
+ * being retrieved.
+ *
+ * @param tb
+ * Attribute table to be filled.
+ * @param[out] max
+ * Maxinum entry in the attribute table.
+ * @param rte
+ * The attributes section in the message to be parsed.
+ * @param len
+ * The length of the attributes section in the message.
+ * @return
+ * 0 on successful extraction of action counts, -1 otherwise.
+ */
+static void
+tc_parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
+{
+ unsigned short type;
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ type = rta->rta_type;
+ if (type <= max && !tb[type])
+ tb[type] = rta;
+ rta = RTA_NEXT(rta, len);
+ }
+}
+
+ /**
+ * Extract action counters from flower action.
+ *
+ * @param rta
+ * flower action stats properties in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ * @return
+ * 0 on successful extraction of action counts, -1 otherwise.
+ */
+static int
+tc_flow_extract_stats_attr(struct rtattr *rta, struct rte_flow_query_count *qc)
+{
+ struct rtattr *tbs[TCA_STATS_MAX + 1];
+
+ tc_parse_rtattr(tbs, TCA_STATS_MAX, RTA_DATA(rta), RTA_PAYLOAD(rta));
+ if (tbs[TCA_STATS_BASIC]) {
+ struct gnet_stats_basic bs = {0};
+
+ memcpy(&bs, RTA_DATA(tbs[TCA_STATS_BASIC]),
+ RTE_MIN(RTA_PAYLOAD(tbs[TCA_STATS_BASIC]),
+ sizeof(bs)));
+ qc->bytes = bs.bytes;
+ qc->hits = bs.packets;
+ qc->bytes_set = 1;
+ qc->hits_set = 1;
+ return 0;
+ }
+ return -1;
+}
+
+ /**
+ * Parse flower single action retrieving the flow counters from it if present.
+ *
+ * @param arg
+ * flower action properties in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ * @return
+ * 0 on successful retrieval of action counts, -1 otherwise.
+ */
+static int
+tc_flow_parse_one_action(struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+ struct rtattr *tb[TCA_ACT_MAX + 1];
+
+ if (arg == NULL)
+ return -1;
+ tc_parse_rtattr(tb, TCA_ACT_MAX, RTA_DATA(arg), RTA_PAYLOAD(arg));
+ if (tb[TCA_ACT_KIND] == NULL)
+ return -1;
+ if (tb[TCA_ACT_STATS])
+ return tc_flow_extract_stats_attr(tb[TCA_ACT_STATS], qc);
+ return -1;
+}
+
+ /**
+ * Parse flower action section in the message, retrieving the flow counters
+ * from the first action that contains them.
+ * flow counters are stored in the actions defined by the flow and not in the
+ * flow itself, therefore we need to traverse the flower action in search for
+ * them.
+ *
+ * @param opt
+ * flower section in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ */
+static void
+tc_flow_parse_action(const struct rtattr *arg, struct rte_flow_query_count *qc)
+{
+ struct rtattr *tb[TCA_ACT_MAX_PRIO + 1];
+ int i;
+
+ if (arg == NULL)
+ return;
+ tc_parse_rtattr(tb, TCA_ACT_MAX_PRIO, RTA_DATA(arg), RTA_PAYLOAD(arg));
+ for (i = 0; i <= TCA_ACT_MAX_PRIO; i++)
+ if (tb[i])
+ if (tc_flow_parse_one_action(tb[i], qc) == 0)
+ break;
+}
+
+ /**
+ * Parse Netlink reply on flower type of filters, retrieving the flow counters
+ * from it.
+ *
+ * @param opt
+ * flower section in the Netlink message received.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ */
+static void
+tc_flower_parse_opt(struct rtattr *opt,
+ struct rte_flow_query_count *qc)
+{
+ struct rtattr *tb[TCA_FLOWER_MAX + 1];
+
+ if (!opt)
+ return;
+ tc_parse_rtattr(tb, TCA_FLOWER_MAX, RTA_DATA(opt), RTA_PAYLOAD(opt));
+ if (tb[TCA_FLOWER_ACT])
+ tc_flow_parse_action(tb[TCA_FLOWER_ACT], qc);
+}
+
+ /**
+ * Parse Netlink reply on filter query, retrieving the flow counters.
+ *
+ * @param nlh
+ * Message received from Netlink.
+ * @param[out] qc
+ * Count statistics retrieved from the message query.
+ *
+ * @return
+ * MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+ */
+static int
+mlx5_nl_flow_parse_filter(const struct nlmsghdr *nlh,
+ struct rte_flow_query_count *qc)
+{
+ struct tcmsg *t = NLMSG_DATA(nlh);
+ int len = nlh->nlmsg_len;
+ struct rtattr *tb[TCA_MAX + 1] = { };
+
+ if (nlh->nlmsg_type != RTM_NEWTFILTER &&
+ nlh->nlmsg_type != RTM_GETTFILTER &&
+ nlh->nlmsg_type != RTM_DELTFILTER)
+ return MNL_CB_OK;
+ len -= NLMSG_LENGTH(sizeof(*t));
+ if (len < 0)
+ return MNL_CB_ERROR;
+ tc_parse_rtattr(tb, TCA_MAX, TCA_RTA(t), len);
+ if (tb[TCA_KIND])
+ if (strcmp(RTA_DATA(tb[TCA_KIND]), "flower") == 0)
+ tc_flower_parse_opt(tb[TCA_OPTIONS], qc);
+ return MNL_CB_OK;
+}
+
+/**
+ * A callback to parse Netlink reply on filter query attempting to retrieve the
+ * flow counters if present.
+ *
+ * @param nlh
+ * Message received from Netlink.
+ * @param[out] data
+ * pointer to the count statistics to be filled by the routine.
+ *
+ * @return
+ * MNL_CB_ERROR on error, MNL_CB_OK value otherwise.
+ */
+static int
+mlx5_nl_flow_parse_message(const struct nlmsghdr *nlh, void *data)
+{
+ struct rte_flow_query_count *qc = (struct rte_flow_query_count *)data;
+
+ switch (nlh->nlmsg_type) {
+ case NLMSG_NOOP:
+ return MNL_CB_OK;
+ case NLMSG_ERROR:
+ case NLMSG_OVERRUN:
+ return MNL_CB_ERROR;
+ default:
+ break;
+ }
+ return mlx5_nl_flow_parse_filter(nlh, qc);
+}
+
+ /**
+ * Query a tcf rule for its statistics via netlink.
+ *
+ * @param[in] dev
+ * Pointer to Ethernet device.
+ * @param[in] flow
+ * Pointer to the sub flow.
+ * @param[out] data
+ * data retrieved by the query.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * 0 on success, a negative errno value otherwise and rte_errno is set.
+ */
+static int
+mlx5_flow_tcf_query(struct rte_eth_dev *dev,
+ struct rte_flow *flow,
+ enum rte_flow_action_type type,
+ void *data,
+ struct rte_flow_error *error)
+{
+ struct rte_flow_query_count *qc = data;
+ struct priv *priv = dev->data->dev_private;
+ struct mnl_socket *nl = priv->mnl_socket;
+ struct mlx5_flow_tcf_rbuf *rbuf = priv->mnl_rcvbuf;
+ struct mlx5_flow *dev_flow;
+ struct nlmsghdr *nlh;
+ uint32_t seq = random();
+ ssize_t ret;
+
+ assert(qc);
+ assert(rbuf);
+ dev_flow = LIST_FIRST(&flow->dev_flows);
+ /* E-Switch flow can't be expanded. */
+ assert(!LIST_NEXT(dev_flow, next));
+ /* Currently only query count is supported. */
+ if (type != RTE_FLOW_ACTION_TYPE_COUNT)
+ goto error_nosup;
+ nlh = dev_flow->tcf.nlh;
+ nlh->nlmsg_type = RTM_GETTFILTER;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ECHO;
+ nlh->nlmsg_seq = seq;
+ if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) == -1)
+ goto error_exit;
+ ret = mnl_socket_recvfrom(nl, rbuf->buf, rbuf->bsize);
+ if (ret == -1)
+ goto error_exit;
+ while (ret > 0) {
+ ret = mnl_cb_run(rbuf->buf, ret, seq,
+ mnl_socket_get_portid(nl),
+ mlx5_nl_flow_parse_message, qc);
+ if (ret <= MNL_CB_STOP)
+ break;
+ ret = mnl_socket_recvfrom(nl, rbuf->buf, rbuf->bsize);
+ }
+ /* Return the delta from last reset. */
+ qc->hits -= dev_flow->tcf.hits;
+ qc->bytes -= dev_flow->tcf.bytes;
+ if (qc->reset) {
+ dev_flow->tcf.hits = qc->hits;
+ dev_flow->tcf.bytes = qc->bytes;
+ }
+ return 0;
+error_nosup:
+ return rte_flow_error_set
+ (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
+ NULL, "tcf: unsupported query");
+error_exit:
+ return rte_flow_error_set
+ (error, errno, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "netlink: failed to read flow rule statistics");
+}
+
+/**
+ * Create netlink receive buffer.
+ * Netlink queries my result a large netlink reply, in case of a short
+ * receive buffer a reply message may be truncated. To avoid this,
+ * we allocate a buffer of MNL_SOCKET_BUFFER_SIZE (which is system dependent
+ * and ususally a 8KB long). Using this buffer size ensures that netlink
+ * messages will be stored without truncating.
+ *
+ * @return
+ * pointer to mlx5_nl_rbuf created, NULL value otherwise.
+ */
+struct mlx5_flow_tcf_rbuf *
+mlx5_flow_tcf_rcv_buf_create(void)
+{
+ struct mlx5_flow_tcf_rbuf *rbuf =
+ rte_zmalloc(__func__,
+ sizeof(struct mlx5_flow_tcf_rbuf),
+ sizeof(uint32_t));
+ uint8_t *buf = rte_zmalloc(__func__,
+ MNL_SOCKET_BUFFER_SIZE,
+ sizeof(uint32_t));
+ if (!buf || !rbuf) {
+ rte_free(buf);
+ rte_free(rbuf);
+ return NULL;
+ }
+ rbuf->buf = buf;
+ rbuf->bsize = MNL_SOCKET_BUFFER_SIZE;
+ return rbuf;
+}
+
+ /**
+ * Destroy mlx5_flow_tcf_rbuf.
+ *
+ * @param rb
+ * The receive buffer to destroy.
+ */
+void
+mlx5_flow_tcf_rcv_buf_destroy(struct mlx5_flow_tcf_rbuf *rb)
+{
+ if (rb) {
+ rte_free(rb->buf);
+ rte_free(rb);
+ }
+}
+
const struct mlx5_flow_driver_ops mlx5_flow_tcf_drv_ops = {
.validate = flow_tcf_validate,
.prepare = flow_tcf_prepare,
.translate = flow_tcf_translate,
.apply = flow_tcf_apply,
.remove = flow_tcf_remove,
+ .query = mlx5_flow_tcf_query,
};
/**
@@ -1775,4 +1775,5 @@ struct ibv_spec_header {
.translate = flow_verbs_translate,
.apply = flow_verbs_apply,
.remove = flow_verbs_remove,
+ .query = flow_null_query,
};