From patchwork Thu Mar 8 13:52:48 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Qi Zhang X-Patchwork-Id: 35784 X-Patchwork-Delegate: ferruh.yigit@amd.com Return-Path: X-Original-To: patchwork@dpdk.org Delivered-To: patchwork@dpdk.org Received: from [92.243.14.124] (localhost [127.0.0.1]) by dpdk.org (Postfix) with ESMTP id F1E937CDA; Thu, 8 Mar 2018 14:52:57 +0100 (CET) Received: from mga07.intel.com (mga07.intel.com [134.134.136.100]) by dpdk.org (Postfix) with ESMTP id 96AC25B34 for ; Thu, 8 Mar 2018 14:52:54 +0100 (CET) X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from fmsmga002.fm.intel.com ([10.253.24.26]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 08 Mar 2018 05:52:54 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.47,441,1515484800"; d="scan'208";a="26294184" Received: from dpdk51.sh.intel.com ([10.67.110.184]) by fmsmga002.fm.intel.com with ESMTP; 08 Mar 2018 05:52:53 -0800 From: Qi Zhang To: dev@dpdk.org Cc: magnus.karlsson@intel.com, bjorn.topel@intel.com, Qi Zhang Date: Thu, 8 Mar 2018 21:52:48 +0800 Message-Id: <20180308135249.28187-7-qi.z.zhang@intel.com> X-Mailer: git-send-email 2.13.6 In-Reply-To: <20180308135249.28187-1-qi.z.zhang@intel.com> References: <20180308135249.28187-1-qi.z.zhang@intel.com> Subject: [dpdk-dev] [RFC v2 6/7] net/af_xdp: load BPF file X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.15 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" Add libbpf and libelf dependency in Makefile. Durring initialization, a bpf prog which call imm "xdpsk_redirect" will be loaded. Then the driver will always try to link XDP fd with DRV mode first, then SKB mode if failed in previoius. Link will be released during dev_close. Note: this is workaround solution, af_xdp may remove BPF dependency in future. Signed-off-by: Qi Zhang --- drivers/net/af_xdp/Makefile | 5 +- drivers/net/af_xdp/bpf_load.c | 168 ++++++++++++++++++++++++++++++++++++ drivers/net/af_xdp/bpf_load.h | 11 +++ drivers/net/af_xdp/rte_eth_af_xdp.c | 80 ++++++++++++++--- mk/rte.app.mk | 2 +- 5 files changed, 254 insertions(+), 12 deletions(-) create mode 100644 drivers/net/af_xdp/bpf_load.c create mode 100644 drivers/net/af_xdp/bpf_load.h diff --git a/drivers/net/af_xdp/Makefile b/drivers/net/af_xdp/Makefile index 990073655..f16b5306b 100644 --- a/drivers/net/af_xdp/Makefile +++ b/drivers/net/af_xdp/Makefile @@ -12,7 +12,9 @@ EXPORT_MAP := rte_pmd_af_xdp_version.map LIBABIVER := 1 -CFLAGS += -O3 -I/opt/af_xdp/linux_headers/include +LINUX_HEADER_DIR := /opt/af_xdp/linux_headers/include + +CFLAGS += -O3 -I$(LINUX_HEADER_DIR) CFLAGS += $(WERROR_FLAGS) LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs @@ -22,5 +24,6 @@ LDLIBS += -lrte_bus_vdev # all source are stored in SRCS-y # SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += rte_eth_af_xdp.c +SRCS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += bpf_load.c include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/af_xdp/bpf_load.c b/drivers/net/af_xdp/bpf_load.c new file mode 100644 index 000000000..255e67187 --- /dev/null +++ b/drivers/net/af_xdp/bpf_load.c @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_load.h" + +static char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +struct bpf_insn prog[] = { + { + .code = 0x85, //call imm + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = BPF_FUNC_xdpsk_redirect, + }, + { + .code = 0x95, //exit + .dst_reg = 0, + .src_reg = 0, + .off = 0, + .imm = 0, + }, +}; + +int load_bpf_file(void) +{ + int fd; + + fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, + ARRAY_SIZE(prog), + "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + + if (fd < 0) { + printf("bpf_load_program() err=%d\n%s", errno, bpf_log_buf); + return -1; + } + + return fd; +} + +int set_link_xdp_fd(int ifindex, int fd, __u32 flags) +{ + struct sockaddr_nl sa; + int sock, len, ret = -1; + uint32_t seq = 0; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + + /* started nested attribute for XDP */ + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + nla->nla_len = NLA_HDRLEN; + + /* add XDP fd */ + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len += nla_xdp->nla_len; + + /* if user passed in any flags, add those too */ + if (flags) { + nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len); + nla_xdp->nla_type = 3/*IFLA_XDP_FLAGS*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags); + memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags)); + nla->nla_len += nla_xdp->nla_len; + } + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + printf("send to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + printf("recv from netlink: %s\n", strerror(errno)); + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != (uint32_t)getpid()) { + printf("Wrong pid %d, expected %d\n", + nh->nlmsg_pid, getpid()); + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + printf("Wrong seq %d, expected %d\n", + nh->nlmsg_seq, seq); + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + printf("nlmsg error %s\n", strerror(-err->error)); + goto cleanup; + case NLMSG_DONE: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} diff --git a/drivers/net/af_xdp/bpf_load.h b/drivers/net/af_xdp/bpf_load.h new file mode 100644 index 000000000..2561ede55 --- /dev/null +++ b/drivers/net/af_xdp/bpf_load.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2018 Intel Corporation + */ +#ifndef __BPF_LOAD_H +#define __BPF_LOAD_H + +#include + +int load_bpf_file(void); +int set_link_xdp_fd(int ifindex, int fd, __u32 flags); +#endif diff --git a/drivers/net/af_xdp/rte_eth_af_xdp.c b/drivers/net/af_xdp/rte_eth_af_xdp.c index 7e839f0da..825273c11 100644 --- a/drivers/net/af_xdp/rte_eth_af_xdp.c +++ b/drivers/net/af_xdp/rte_eth_af_xdp.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -20,6 +21,7 @@ #include #include #include "xdpsock_queue.h" +#include "bpf_load.h" #ifndef SOL_XDP #define SOL_XDP 283 @@ -81,6 +83,9 @@ struct pmd_internals { uint16_t port_id; uint16_t queue_idx; int ring_size; + + uint32_t xdp_flags; + int bpf_fd; }; static const char * const valid_arguments[] = { @@ -97,6 +102,39 @@ static struct rte_eth_link pmd_link = { .link_autoneg = ETH_LINK_AUTONEG }; +static int load_bpf(struct pmd_internals *internals) +{ + /* need fix: hard coded bpf file */ + int fd = load_bpf_file(); + + if (fd < 0) + return -1; + + internals->bpf_fd = fd; + return 0; +} + +static int link_bpf_file(struct pmd_internals *internals) +{ + if (!set_link_xdp_fd(internals->if_index, + internals->bpf_fd, + XDP_FLAGS_DRV_MODE)) + internals->xdp_flags = XDP_FLAGS_DRV_MODE; + else if (!set_link_xdp_fd(internals->if_index, + internals->bpf_fd, + XDP_FLAGS_SKB_MODE)) + internals->xdp_flags = XDP_FLAGS_SKB_MODE; + else + return -1; + + return 0; +} + +static void unlink_bpf_file(struct pmd_internals *internals) +{ + set_link_xdp_fd(internals->if_index, -1, internals->xdp_flags); +} + static void *get_pkt_data(struct pmd_internals *internals, uint32_t index, uint32_t offset) @@ -380,8 +418,26 @@ eth_stats_reset(struct rte_eth_dev *dev) } static void -eth_dev_close(struct rte_eth_dev *dev __rte_unused) +eth_dev_close(struct rte_eth_dev *dev) { + struct pmd_internals *internals = dev->data->dev_private; + + if (internals->xdp_flags) { + unlink_bpf_file(internals); + internals->xdp_flags = 0; + } + + if (internals->umem) { + if (internals->umem->mb_pool && !internals->share_mb_pool) + rte_mempool_free(internals->umem->mb_pool); + free(internals->umem); + internals->umem = NULL; + } + + if (internals->sfd != -1) { + close(internals->sfd); + internals->sfd = -1; + } } static void @@ -743,9 +799,17 @@ init_internals(struct rte_vdev_device *dev, if (ret) goto error_3; + if (load_bpf(internals)) { + printf("load bpf file failed\n"); + goto error_3; + } + + if (link_bpf_file(internals)) + goto error_3; + eth_dev = rte_eth_vdev_allocate(dev, 0); if (!eth_dev) - goto error_3; + goto error_4; rte_memcpy(data, eth_dev->data, sizeof(*data)); internals->port_id = eth_dev->data->port_id; @@ -763,6 +827,9 @@ init_internals(struct rte_vdev_device *dev, return 0; +error_4: + unlink_bpf_file(internals); + error_3: close(internals->sfd); @@ -808,7 +875,6 @@ static int rte_pmd_af_xdp_remove(struct rte_vdev_device *dev) { struct rte_eth_dev *eth_dev = NULL; - struct pmd_internals *internals; RTE_LOG(INFO, PMD, "Closing AF_XDP ethdev on numa socket %u\n", rte_socket_id()); @@ -821,15 +887,9 @@ rte_pmd_af_xdp_remove(struct rte_vdev_device *dev) if (!eth_dev) return -1; - internals = eth_dev->data->dev_private; - if (internals->umem) { - if (internals->umem->mb_pool && !internals->share_mb_pool) - rte_mempool_free(internals->umem->mb_pool); - rte_free(internals->umem); - } + eth_dev_close(eth_dev); rte_free(eth_dev->data->dev_private); rte_free(eth_dev->data); - close(internals->sfd); rte_eth_dev_release_port(eth_dev); diff --git a/mk/rte.app.mk b/mk/rte.app.mk index bc26e1457..d05e6c0e4 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -120,7 +120,7 @@ ifeq ($(CONFIG_RTE_BUILD_SHARED_LIB),n) _LDLIBS-$(CONFIG_RTE_DRIVER_MEMPOOL_STACK) += -lrte_mempool_stack _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_PACKET) += -lrte_pmd_af_packet -_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += -lrte_pmd_af_xdp +_LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_AF_XDP) += -lrte_pmd_af_xdp -lelf -lbpf _LDLIBS-$(CONFIG_RTE_LIBRTE_ARK_PMD) += -lrte_pmd_ark _LDLIBS-$(CONFIG_RTE_LIBRTE_AVF_PMD) += -lrte_pmd_avf _LDLIBS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += -lrte_pmd_avp