@@ -1482,7 +1482,7 @@ struct flow_tcf_ptoi {
default:
return rte_flow_error_set(error, ENOTSUP,
RTE_FLOW_ERROR_TYPE_ITEM,
- NULL, "item not supported");
+ items, "item not supported");
}
}
for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
@@ -2886,6 +2886,291 @@ struct flow_tcf_ptoi {
return 0;
}
+/* VTEP device list is shared between PMD port instances. */
+static LIST_HEAD(, mlx5_flow_tcf_vtep)
+ vtep_list_vxlan = LIST_HEAD_INITIALIZER();
+static pthread_mutex_t vtep_list_mutex = PTHREAD_MUTEX_INITIALIZER;
+static struct mlx5_flow_tcf_vtep *vtep_encap;
+
+/**
+ * Deletes VTEP network device.
+ *
+ * @param[in] tcf
+ * Context object initialized by mlx5_flow_tcf_socket_open().
+ * @param[in] vtep
+ * Flow tcf object with tunnel device structure to delete.
+ */
+static void
+flow_tcf_delete_iface(struct mlx5_tcf_socket *tcf,
+ struct mlx5_flow_tcf_vtep *vtep)
+{
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *ifm;
+ alignas(struct nlmsghdr)
+ uint8_t buf[mnl_nlmsg_size(MNL_ALIGN(sizeof(*ifm))) + 8];
+ int ret;
+
+ DRV_LOG(NOTICE, "VTEP delete (%d)", vtep->port);
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = RTM_DELLINK;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
+ ifm->ifi_family = AF_UNSPEC;
+ ifm->ifi_index = vtep->ifindex;
+ ret = flow_tcf_nl_ack(tcf, nlh);
+ if (ret)
+ DRV_LOG(DEBUG, "error deleting VXLAN encap/decap ifindex %u",
+ ifm->ifi_index);
+}
+
+/**
+ * Creates VTEP network device.
+ *
+ * @param[in] tcf
+ * Context object initialized by mlx5_flow_tcf_socket_open().
+ * @param[in] port
+ * UDP port of created VTEP device.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * Pointer to created device structure on success, NULL otherwise
+ * and rte_errno is set.
+ */
+static struct mlx5_flow_tcf_vtep*
+flow_tcf_create_iface(struct mlx5_tcf_socket *tcf, uint16_t port,
+ struct rte_flow_error *error)
+{
+ struct mlx5_flow_tcf_vtep *vtep;
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *ifm;
+ alignas(struct nlmsghdr)
+ char name[sizeof(MLX5_VXLAN_DEVICE_PFX) + 24];
+ uint8_t buf[mnl_nlmsg_size(sizeof(*ifm)) +
+ SZ_NLATTR_DATA_OF(sizeof(name)) +
+ SZ_NLATTR_NEST * 2 +
+ SZ_NLATTR_STRZ_OF("vxlan") +
+ SZ_NLATTR_TYPE_OF_UINT32 +
+ SZ_NLATTR_TYPE_OF_UINT16 +
+ SZ_NLATTR_TYPE_OF_UINT8 + 128];
+ struct nlattr *na_info;
+ struct nlattr *na_vxlan;
+ rte_be16_t vxlan_port = RTE_BE16(port);
+ int ret;
+
+ vtep = rte_zmalloc(__func__, sizeof(*vtep),
+ alignof(struct mlx5_flow_tcf_vtep));
+ if (!vtep) {
+ rte_flow_error_set
+ (error, ENOMEM, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
+ NULL, "unadble to allocate memory for VTEP desc");
+ return NULL;
+ }
+ *vtep = (struct mlx5_flow_tcf_vtep){
+ .refcnt = 0,
+ .port = port,
+ .notcreated = 0,
+ };
+ memset(buf, 0, sizeof(buf));
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = RTM_NEWLINK;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
+ ifm->ifi_family = AF_UNSPEC;
+ ifm->ifi_type = 0;
+ ifm->ifi_index = 0;
+ ifm->ifi_flags = IFF_UP;
+ ifm->ifi_change = 0xffffffff;
+ snprintf(name, sizeof(name), "%s%u", MLX5_VXLAN_DEVICE_PFX, port);
+ mnl_attr_put_strz(nlh, IFLA_IFNAME, name);
+ na_info = mnl_attr_nest_start(nlh, IFLA_LINKINFO);
+ assert(na_info);
+ mnl_attr_put_strz(nlh, IFLA_INFO_KIND, "vxlan");
+ na_vxlan = mnl_attr_nest_start(nlh, IFLA_INFO_DATA);
+ assert(na_vxlan);
+ mnl_attr_put_u8(nlh, IFLA_VXLAN_COLLECT_METADATA, 1);
+ mnl_attr_put_u8(nlh, IFLA_VXLAN_UDP_ZERO_CSUM6_RX, 1);
+ mnl_attr_put_u8(nlh, IFLA_VXLAN_LEARNING, 0);
+ mnl_attr_put_u16(nlh, IFLA_VXLAN_PORT, vxlan_port);
+ mnl_attr_nest_end(nlh, na_vxlan);
+ mnl_attr_nest_end(nlh, na_info);
+ assert(sizeof(buf) >= nlh->nlmsg_len);
+ ret = flow_tcf_nl_ack(tcf, nlh);
+ if (ret) {
+ DRV_LOG(WARNING,
+ "VTEP %s create failure (%d)",
+ name, rte_errno);
+ vtep->notcreated = 1; /* Assume the device exists. */
+ }
+ ret = if_nametoindex(name);
+ if (ret) {
+ vtep->ifindex = ret;
+ memset(buf, 0, sizeof(buf));
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = RTM_NEWLINK;
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ ifm = mnl_nlmsg_put_extra_header(nlh, sizeof(*ifm));
+ ifm->ifi_family = AF_UNSPEC;
+ ifm->ifi_type = 0;
+ ifm->ifi_index = vtep->ifindex;
+ ifm->ifi_flags = IFF_UP;
+ ifm->ifi_change = IFF_UP;
+ ret = flow_tcf_nl_ack(tcf, nlh);
+ if (ret) {
+ DRV_LOG(WARNING,
+ "VTEP %s set link up failure (%d)", name, rte_errno);
+ rte_free(vtep);
+ rte_flow_error_set
+ (error, -errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "netlink: failed to set VTEP link up");
+ vtep = NULL;
+ } else {
+ ret = mlx5_flow_tcf_ifindex_init(tcf,
+ vtep->ifindex, error);
+ if (ret)
+ DRV_LOG(WARNING,
+ "VTEP %s init failure (%d)", name, rte_errno);
+ }
+ } else {
+ DRV_LOG(WARNING,
+ "VTEP %s failed to get index (%d)", name, errno);
+ rte_flow_error_set
+ (error, -errno,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ vtep->notcreated ? "netlink: failed to create VTEP" :
+ "netlink: failed to retrieve VTEP ifindex");
+ ret = 1;
+ }
+ if (ret) {
+ if (!vtep->notcreated && vtep->ifindex)
+ flow_tcf_delete_iface(tcf, vtep);
+ rte_free(vtep);
+ vtep = NULL;
+ }
+ DRV_LOG(NOTICE, "VTEP create (%d, %s)", vtep->port, vtep ? "OK" : "error");
+ return vtep;
+}
+
+/**
+ * Creates target interface index for tunneling.
+ *
+ * @param tcf
+ * Context object initialized by mlx5_flow_tcf_socket_open().
+ * @param[in] dev_flow
+ * Flow tcf object with tunnel structure pointer set.
+ * @param[out] error
+ * Perform verbose error reporting if not NULL.
+ *
+ * @return
+ * Interface index on success, zero otherwise and rte_errno is set.
+ */
+static unsigned int
+flow_tcf_tunnel_vtep_create(struct mlx5_tcf_socket *tcf,
+ struct mlx5_flow *dev_flow,
+ struct rte_flow_error *error)
+{
+ unsigned int ret;
+
+ assert(dev_flow->tcf.tunnel);
+ pthread_mutex_lock(&vtep_list_mutex);
+ switch (dev_flow->tcf.tunnel->type) {
+ case MLX5_FLOW_TCF_TUNACT_VXLAN_ENCAP:
+ if (!vtep_encap) {
+ vtep_encap = flow_tcf_create_iface(tcf,
+ MLX5_VXLAN_DEFAULT_PORT, error);
+ if (!vtep_encap) {
+ ret = 0;
+ break;
+ }
+ LIST_INSERT_HEAD(&vtep_list_vxlan, vtep_encap, next);
+ }
+ vtep_encap->refcnt++;
+ ret = vtep_encap->ifindex;
+ assert(ret);
+ break;
+ case MLX5_FLOW_TCF_TUNACT_VXLAN_DECAP: {
+ struct mlx5_flow_tcf_vtep *vtep;
+ uint16_t port = dev_flow->tcf.vxlan_decap->udp_port;
+
+ LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
+ if (vtep->port == port)
+ break;
+ }
+ if (!vtep) {
+ vtep = flow_tcf_create_iface(tcf, port, error);
+ if (!vtep) {
+ ret = 0;
+ break;
+ }
+ LIST_INSERT_HEAD(&vtep_list_vxlan, vtep, next);
+ }
+ vtep->refcnt++;
+ ret = vtep->ifindex;
+ assert(ret);
+ break;
+ }
+ default:
+ rte_flow_error_set(error, ENOTSUP,
+ RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
+ "unsupported tunnel type");
+ ret = 0;
+ break;
+ }
+ pthread_mutex_unlock(&vtep_list_mutex);
+ return ret;
+}
+
+/**
+ * Deletes tunneling interface by UDP port.
+ *
+ * @param tx
+ * Context object initialized by mlx5_flow_tcf_socket_open().
+ * @param[in] dev_flow
+ * Flow tcf object with tunnel structure pointer set.
+ */
+static void
+flow_tcf_tunnel_vtep_delete(struct mlx5_tcf_socket *tcf,
+ struct mlx5_flow *dev_flow)
+{
+ struct mlx5_flow_tcf_vtep *vtep;
+ uint16_t port = MLX5_VXLAN_DEFAULT_PORT;
+
+ assert(dev_flow->tcf.tunnel);
+ pthread_mutex_lock(&vtep_list_mutex);
+ switch (dev_flow->tcf.tunnel->type) {
+ case MLX5_FLOW_TCF_TUNACT_VXLAN_DECAP:
+ port = dev_flow->tcf.vxlan_decap->udp_port;
+ /* There is no break operator intentionally. */
+ case MLX5_FLOW_TCF_TUNACT_VXLAN_ENCAP:
+ LIST_FOREACH(vtep, &vtep_list_vxlan, next) {
+ if (vtep->port == port)
+ break;
+ }
+ if (!vtep) {
+ DRV_LOG(WARNING,
+ "No VTEP device found in the list");
+ break;
+ }
+ assert(dev_flow->tcf.tunnel->ifindex_tun == vtep->ifindex);
+ assert(vtep->refcnt);
+ if (vtep->refcnt && --vtep->refcnt)
+ break;
+ if (!vtep->notcreated)
+ flow_tcf_delete_iface(tcf, vtep);
+ LIST_REMOVE(vtep, next);
+ if (vtep_encap == vtep)
+ vtep_encap = NULL;
+ rte_free(vtep);
+ break;
+ default:
+ assert(false);
+ DRV_LOG(WARNING, "Unsupported tunnel type");
+ break;
+ }
+ pthread_mutex_unlock(&vtep_list_mutex);
+}
+
/**
* Apply flow to E-Switch by sending Netlink message.
*
@@ -2917,12 +3202,45 @@ struct flow_tcf_ptoi {
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_NEWTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ if (dev_flow->tcf.tunnel) {
+ /*
+ * Replace the interface index, target for
+ * encapsulation, source for decapsulation
+ */
+ assert(!dev_flow->tcf.tunnel->ifindex_tun);
+ assert(dev_flow->tcf.tunnel->ifindex_ptr);
+ /* Create actual VTEP device when rule is being applied. */
+ dev_flow->tcf.tunnel->ifindex_tun
+ = flow_tcf_tunnel_vtep_create(&priv->tcf_socket,
+ dev_flow, error);
+ DRV_LOG(INFO, "Replace ifindex: %d->%d",
+ dev_flow->tcf.tunnel->ifindex_tun,
+ *dev_flow->tcf.tunnel->ifindex_ptr);
+ if (!dev_flow->tcf.tunnel->ifindex_tun)
+ return -rte_errno;
+ dev_flow->tcf.tunnel->ifindex_org
+ = *dev_flow->tcf.tunnel->ifindex_ptr;
+ *dev_flow->tcf.tunnel->ifindex_ptr
+ = dev_flow->tcf.tunnel->ifindex_tun;
+ }
ret = flow_tcf_nl_ack(tcf, nlh);
+ if (dev_flow->tcf.tunnel) {
+ DRV_LOG(INFO, "Restore ifindex: %d->%d",
+ dev_flow->tcf.tunnel->ifindex_org,
+ *dev_flow->tcf.tunnel->ifindex_ptr);
+ *dev_flow->tcf.tunnel->ifindex_ptr
+ = dev_flow->tcf.tunnel->ifindex_org;
+ dev_flow->tcf.tunnel->ifindex_org = 0;
+ }
if (!ret) {
dev_flow->tcf.applied = 1;
return 0;
}
DRV_LOG(WARNING, "Failed to create TC rule (%d)", rte_errno);
+ if (dev_flow->tcf.tunnel->ifindex_tun) {
+ flow_tcf_tunnel_vtep_delete(&priv->tcf_socket, dev_flow);
+ dev_flow->tcf.tunnel->ifindex_tun = 0;
+ }
return rte_flow_error_set(error, rte_errno,
RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
"netlink: failed to create TC flow rule");
@@ -2951,10 +3269,34 @@ struct flow_tcf_ptoi {
return;
/* E-Switch flow can't be expanded. */
assert(!LIST_NEXT(dev_flow, next));
+ if (!dev_flow->tcf.applied)
+ return;
+ if (dev_flow->tcf.tunnel) {
+ /*
+ * Replace the interface index, target for
+ * encapsulation, source for decapsulation
+ */
+ assert(dev_flow->tcf.tunnel->ifindex_tun);
+ assert(dev_flow->tcf.tunnel->ifindex_ptr);
+ dev_flow->tcf.tunnel->ifindex_org
+ = *dev_flow->tcf.tunnel->ifindex_ptr;
+ *dev_flow->tcf.tunnel->ifindex_ptr
+ = dev_flow->tcf.tunnel->ifindex_tun;
+ }
nlh = dev_flow->tcf.nlh;
nlh->nlmsg_type = RTM_DELTFILTER;
nlh->nlmsg_flags = NLM_F_REQUEST;
flow_tcf_nl_ack(tcf, nlh);
+ if (dev_flow->tcf.tunnel) {
+ *dev_flow->tcf.tunnel->ifindex_ptr
+ = dev_flow->tcf.tunnel->ifindex_org;
+ dev_flow->tcf.tunnel->ifindex_org = 0;
+ if (dev_flow->tcf.tunnel->ifindex_tun) {
+ flow_tcf_tunnel_vtep_delete(&priv->tcf_socket,
+ dev_flow);
+ dev_flow->tcf.tunnel->ifindex_tun = 0;
+ }
+ }
dev_flow->tcf.applied = 0;
}