[2/3] net/af_xdp: support pinning of IRQs
Checks
Commit Message
Network devices using the AF_XDP PMD will trigger interrupts
on reception of packets. The new PMD argument 'queue_irq'
allows the user to specify a core on which to pin interrupts
for a given queue. Multiple queue_irq arguments can be specified.
For example:
--vdev=net_af_xdp1,iface=eth0,queue_count=2,
queue_irq=0:2,queue_irq=1:5
..will pin queue 0 interrupts to core 2 and queue 1 interrupts
to core 5.
The queue argument refers to the ethdev queue as opposed to the
netdev queue. These values are the same unless a value greater
than 0 is specified in a start_queue argument.
The drivers supported for this feature are those with support for
AF_XDP zero copy in the kernel, namely ixgbe, i40e and mlx5_core.
Signed-off-by: Ciara Loftus <ciara.loftus@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
---
doc/guides/nics/af_xdp.rst | 15 ++
doc/guides/rel_notes/release_19_11.rst | 7 +
drivers/net/af_xdp/rte_eth_af_xdp.c | 357 ++++++++++++++++++++++++-
3 files changed, 374 insertions(+), 5 deletions(-)
Comments
On 09/19, Ciara Loftus wrote:
[snip]
>+/* drivers supported for the queue_irq option */
>+enum {I40E_DRIVER, IXGBE_DRIVER, MLX5_DRIVER, NUM_DRIVERS};
Minor nit, how about using below format for readability and align with other
enum type definition in DPDK?
enum supported_driver {
I40E_DRIVER,
IXGBE_DRIVER,
MLX5_DRIVER,
NUM_DRIVERS
};
>+char driver_array[NUM_DRIVERS][NAME_MAX] = {"i40e", "ixgbe", "mlx5_core"};
[snip]
>+ * function for getting the index into driver_handlers array that corresponds
>+ * to 'driver'
>+ */
>+static int
>+get_driver_idx(char *driver)
const char *driver
>+{
>+ for (int i = 0; i < NUM_DRIVERS; i++) {
>+ if (strncmp(driver, driver_array[i], strlen(driver_array[i])))
>+ continue;
>+ return i;
>+ }
>+
>+ return -1;
>+}
>+
>+/** generate /proc/interrupts search regex based on driver type */
>+static int
>+generate_search_regex(const char *driver, struct pmd_internals *internals,
>+ uint16_t netdev_qid, regex_t *r)
I'd prefer put *internals as the first parameter.
>+{
>+ char iface_regex_str[128];
>+ int ret = -1;
>+ char *driver_dup = strdup(driver);
>+ int idx = get_driver_idx(driver_dup);
Why not using driver directly?
>+
>+ if (idx == -1) {
>+ AF_XDP_LOG(ERR, "Error getting driver index for %s\n",
>+ internals->if_name);
>+ goto out;
>+ }
>+
>+ if (driver_handlers[idx](iface_regex_str, internals, netdev_qid)) {
>+ AF_XDP_LOG(ERR, "Error getting regex string for %s\n",
>+ internals->if_name);
>+ goto out;
>+ }
Need to check whether driver_handlers[idex] exists.
>+
>+ if (regcomp(r, iface_regex_str, 0)) {
>+ AF_XDP_LOG(ERR, "Error computing regex %s\n", iface_regex_str);
>+ goto out;
>+ }
>+
>+ ret = 0;
>+
>+out:
>+ free(driver_dup);
>+ return ret;
>+}
>+
>+/** get interrupt number associated with the given interface qid */
>+static int
>+get_interrupt_number(regex_t *r, int *interrupt,
>+ struct pmd_internals *internals)
Better to put the input before output in parameter list, I'd prefer
get_interrupt_number(struct pmd_internals *internals,
regex_t *r, int *interrupt)
>+{
>+ FILE *f_int_proc;
>+ int found = 0;
>+ char line[4096];
>+ int ret = 0;
>+
>+ f_int_proc = fopen("/proc/interrupts", "r");
>+ if (f_int_proc == NULL) {
>+ AF_XDP_LOG(ERR, "Failed to open /proc/interrupts.\n");
>+ return -1;
>+ }
>+
>+ while (!feof(f_int_proc) && !found) {
>+ /* Make sure to read a full line at a time */
>+ if (fgets(line, sizeof(line), f_int_proc) == NULL ||
>+ line[strlen(line) - 1] != '\n') {
>+ AF_XDP_LOG(ERR, "Error reading from interrupts file\n");
>+ ret = -1;
>+ break;
>+ }
>+
>+ /* Extract interrupt number from line */
>+ if (regexec(r, line, 0, NULL, 0) == 0) {
>+ *interrupt = atoi(line);
>+ found = true;
>+ AF_XDP_LOG(INFO, "Got interrupt %d for %s\n",
>+ *interrupt, internals->if_name);
>+ }
>+ }
>+
>+ fclose(f_int_proc);
>+
>+ return ret;
>+}
>+
>+/** affinitise interrupts for the given qid to the given coreid */
>+static int
>+set_irq_affinity(int coreid, struct pmd_internals *internals,
>+ uint16_t rx_queue_id, uint16_t netdev_qid, int interrupt)
Prefer to put *internals in the beginning.
>+{
>+ char bitmask[128];
>+ char smp_affinity_filename[NAME_MAX];
>+ FILE *f_int_smp_affinity;
>+ int i;
>+
>+ /* Create affinity bitmask. Every 32 bits are separated by a comma */
>+ snprintf(bitmask, sizeof(bitmask), "%x", 1 << (coreid % 32));
>+ for (i = 0; i < coreid / 32; i++)
>+ strlcat(bitmask, ",00000000", sizeof(bitmask));
>+
>+ /* Write the new affinity bitmask */
>+ snprintf(smp_affinity_filename, sizeof(smp_affinity_filename),
>+ "/proc/irq/%d/smp_affinity", interrupt);
>+ f_int_smp_affinity = fopen(smp_affinity_filename, "w");
>+ if (f_int_smp_affinity == NULL) {
>+ AF_XDP_LOG(ERR, "Error opening %s\n", smp_affinity_filename);
>+ return -1;
>+ }
>+ fwrite(bitmask, strlen(bitmask), 1, f_int_smp_affinity);
Need to check the return value of fwrite, otherwise coverity may complain.
>+ fclose(f_int_smp_affinity);
>+ AF_XDP_LOG(INFO, "IRQs for %s ethdev queue %i (netdev queue %i)"
>+ " affinitised to core %i\n",
>+ internals->if_name, rx_queue_id,
>+ netdev_qid, coreid);
>+
>+ return 0;
>+}
>+
>+static void
>+configure_irqs(struct pmd_internals *internals, uint16_t rx_queue_id)
>+{
>+ int coreid = internals->queue_irqs[rx_queue_id];
>+ char driver[NAME_MAX];
>+ uint16_t netdev_qid = rx_queue_id + internals->start_queue_idx;
>+ regex_t r;
>+ int interrupt;
>+
>+ if (coreid < 0)
>+ return;
>+
>+ if (coreid > (get_nprocs() - 1)) {
>+ AF_XDP_LOG(ERR, "Affinitisation failed - invalid coreid %i\n",
>+ coreid);
>+ return;
>+ }
I think we can combine above 2 sanity checks together.
>+
>+ if (get_driver_name(internals, driver)) {
>+ AF_XDP_LOG(ERR, "Error retrieving driver name for %s\n",
>+ internals->if_name);
>+ return;
>+ }
>+
>+ if (generate_search_regex(driver, internals, netdev_qid, &r)) {
>+ AF_XDP_LOG(ERR, "Error generating search regex for %s\n",
>+ internals->if_name);
>+ return;
>+ }
>+
>+ if (get_interrupt_number(&r, &interrupt, internals)) {
>+ AF_XDP_LOG(ERR, "Error getting interrupt number for %s\n",
>+ internals->if_name);
>+ return;
>+ }
>+
>+ if (set_irq_affinity(coreid, internals, rx_queue_id, netdev_qid,
>+ interrupt)) {
>+ AF_XDP_LOG(ERR, "Error setting interrupt affinity for %s\n",
>+ internals->if_name);
>+ return;
>+ }
>+}
>+
> static int
> eth_rx_queue_setup(struct rte_eth_dev *dev,
> uint16_t rx_queue_id,
>@@ -697,6 +996,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> goto err;
> }
>
>+ configure_irqs(internals, rx_queue_id);
>+
> rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
> rxq->fds[0].events = POLLIN;
>
>@@ -834,6 +1135,39 @@ parse_name_arg(const char *key __rte_unused,
> return 0;
> }
>
>+/** parse queue irq argument */
>+static int
>+parse_queue_irq_arg(const char *key __rte_unused,
>+ const char *value, void *extra_args)
>+{
>+ int (*queue_irqs)[RTE_MAX_QUEUES_PER_PORT] = extra_args;
>+ char *parse_str = strdup(value);
>+ char delimiter[] = ":";
>+ char *queue_str;
>+
>+ queue_str = strtok(parse_str, delimiter);
>+ if (queue_str != NULL && strncmp(queue_str, value, strlen(value))) {
>+ char *end;
>+ long queue = strtol(queue_str, &end, 10);
>+
>+ if (*end == '\0' && queue >= 0 &&
>+ queue < RTE_MAX_QUEUES_PER_PORT) {
>+ char *core_str = strtok(NULL, delimiter);
>+ long core = strtol(core_str, &end, 10);
>+
>+ if (*end == '\0' && core >= 0 && core < get_nprocs()) {
>+ (*queue_irqs)[queue] = core;
>+ free(parse_str);
>+ return 0;
>+ }
>+ }
>+ }
>+
>+ AF_XDP_LOG(ERR, "Invalid queue_irq argument.\n");
>+ free(parse_str);
>+ return -1;
>+}
>+
> static int
> xdp_get_channels_info(const char *if_name, int *max_queues,
> int *combined_queues)
>@@ -877,7 +1211,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
>
> static int
> parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
>- int *queue_cnt, int *pmd_zc)
>+ int *queue_cnt, int *pmd_zc,
>+ int (*queue_irqs)[RTE_MAX_QUEUES_PER_PORT])
> {
> int ret;
>
>@@ -903,6 +1238,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
> if (ret < 0)
> goto free_kvlist;
>
>+ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IRQ_ARG,
>+ &parse_queue_irq_arg, queue_irqs);
>+ if (ret < 0)
>+ goto free_kvlist;
>+
> free_kvlist:
> rte_kvargs_free(kvlist);
> return ret;
>@@ -940,7 +1280,8 @@ get_iface_info(const char *if_name,
>
> static struct rte_eth_dev *
> init_internals(struct rte_vdev_device *dev, const char *if_name,
>- int start_queue_idx, int queue_cnt, int pmd_zc)
>+ int start_queue_idx, int queue_cnt, int pmd_zc,
>+ int queue_irqs[RTE_MAX_QUEUES_PER_PORT])
> {
> const char *name = rte_vdev_device_name(dev);
> const unsigned int numa_node = dev->device.numa_node;
>@@ -957,6 +1298,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
> internals->queue_cnt = queue_cnt;
> internals->pmd_zc = pmd_zc;
> strlcpy(internals->if_name, if_name, IFNAMSIZ);
>+ memcpy(internals->queue_irqs, queue_irqs,
Use rte_memcpy instead.
Thanks,
Xiaolong
>+ sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
>
> if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
> &internals->combined_queue_cnt)) {
>@@ -1035,6 +1378,9 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> struct rte_eth_dev *eth_dev = NULL;
> const char *name;
> int pmd_zc = 0;
>+ int queue_irqs[RTE_MAX_QUEUES_PER_PORT];
>+
>+ memset(queue_irqs, -1, sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
>
> AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
> rte_vdev_device_name(dev));
>@@ -1062,7 +1408,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> dev->device.numa_node = rte_socket_id();
>
> if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
>- &xsk_queue_cnt, &pmd_zc) < 0) {
>+ &xsk_queue_cnt, &pmd_zc, &queue_irqs) < 0) {
> AF_XDP_LOG(ERR, "Invalid kvargs value\n");
> return -EINVAL;
> }
>@@ -1073,7 +1419,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
> }
>
> eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
>- xsk_queue_cnt, pmd_zc);
>+ xsk_queue_cnt, pmd_zc, queue_irqs);
> if (eth_dev == NULL) {
> AF_XDP_LOG(ERR, "Failed to init internals\n");
> return -1;
>@@ -1117,7 +1463,8 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
> "iface=<string> "
> "start_queue=<int> "
> "queue_count=<int> "
>- "pmd_zero_copy=<0|1>");
>+ "pmd_zero_copy=<0|1> "
>+ "queue_irq=<int>:<int>");
>
> RTE_INIT(af_xdp_init_log)
> {
>--
>2.17.1
>
On Thu, 19 Sep 2019 14:15:19 +0000
Ciara Loftus <ciara.loftus@intel.com> wrote:
> +char driver_array[NUM_DRIVERS][NAME_MAX] = {"i40e", "ixgbe", "mlx5_core"};
This only makes sense in this file. Should be static.
[snip]
> >+
> >+static void
> >+configure_irqs(struct pmd_internals *internals, uint16_t rx_queue_id)
> >+{
> >+ int coreid = internals->queue_irqs[rx_queue_id];
> >+ char driver[NAME_MAX];
> >+ uint16_t netdev_qid = rx_queue_id + internals->start_queue_idx;
> >+ regex_t r;
> >+ int interrupt;
> >+
> >+ if (coreid < 0)
> >+ return;
> >+
> >+ if (coreid > (get_nprocs() - 1)) {
> >+ AF_XDP_LOG(ERR, "Affinitisation failed - invalid coreid %i\n",
> >+ coreid);
> >+ return;
> >+ }
>
> I think we can combine above 2 sanity checks together.
>
Hi Xiaolong,
Thanks for your review. I agree with all of your feedback except this one.
configure_irqs() is called for every queue. The queues with no affinity have a coreid initialized to -1. So coreid < 0 is a valid value and we should return with no error. However for the case where coreid > nprocs, this is an actual error and we should report that with a log.
What do you think?
Thanks,
Ciara
[snip]
> >@@ -697,6 +996,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
> > goto err;
> > }
> >
> >+ configure_irqs(internals, rx_queue_id);
> >+
> > rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
> > rxq->fds[0].events = POLLIN;
> >
> >@@ -834,6 +1135,39 @@ parse_name_arg(const char *key __rte_unused,
> > return 0;
> > }
> >
On 09/27, Loftus, Ciara wrote:
>[snip]
>
>> >+
>> >+static void
>> >+configure_irqs(struct pmd_internals *internals, uint16_t rx_queue_id)
>> >+{
>> >+ int coreid = internals->queue_irqs[rx_queue_id];
>> >+ char driver[NAME_MAX];
>> >+ uint16_t netdev_qid = rx_queue_id + internals->start_queue_idx;
>> >+ regex_t r;
>> >+ int interrupt;
>> >+
>> >+ if (coreid < 0)
>> >+ return;
>> >+
>> >+ if (coreid > (get_nprocs() - 1)) {
>> >+ AF_XDP_LOG(ERR, "Affinitisation failed - invalid coreid %i\n",
>> >+ coreid);
>> >+ return;
>> >+ }
>>
>> I think we can combine above 2 sanity checks together.
>>
>
>Hi Xiaolong,
>
>Thanks for your review. I agree with all of your feedback except this one.
>
>configure_irqs() is called for every queue. The queues with no affinity have a coreid initialized to -1. So coreid < 0 is a valid value and we should return with no error. However for the case where coreid > nprocs, this is an actual error and we should report that with a log.
>What do you think?
It makes sense, thanks for the explanation.
Thanks,
Xiaolong
>
>Thanks,
>Ciara
>
>[snip]
>
>> >@@ -697,6 +996,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
>> > goto err;
>> > }
>> >
>> >+ configure_irqs(internals, rx_queue_id);
>> >+
>> > rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
>> > rxq->fds[0].events = POLLIN;
>> >
>> >@@ -834,6 +1135,39 @@ parse_name_arg(const char *key __rte_unused,
>> > return 0;
>> > }
>> >
>
@@ -36,6 +36,11 @@ The following options can be provided to set up an af_xdp port in DPDK.
* ``start_queue`` - starting netdev queue id (optional, default 0);
* ``queue_count`` - total netdev queue number (optional, default 1);
* ``pmd_zero_copy`` - enable zero copy or not (optional, default 0);
+* ``queue_irq`` - pin queue irqs to specified core <queue:core> (optional,
+ default no pinning). The queue argument refers to the ethdev queue as
+ opposed to the netdev queue. These values are the same unless a value
+ greater than 0 is specified for start_queue. ixgbe, i40e and mlx5 drivers
+ supported;
Prerequisites
-------------
@@ -57,3 +62,13 @@ The following example will set up an af_xdp interface in DPDK:
.. code-block:: console
--vdev net_af_xdp,iface=ens786f1
+
+Pin queue IRQs
+--------------
+The following example will pin queue 0 interrupts to core 2 and queue 1
+interrupts to core 5.
+
+.. code-block:: console
+
+ --vdev=net_af_xdp1,iface=eth0,queue_count=2,
+ queue_irq=0:2,queue_irq=1:5
@@ -56,6 +56,13 @@ New Features
Also, make sure to start the actual text at the margin.
=========================================================
+* **Updated the AF_XDP PMD.**
+
+ Updated the AF_XDP PMD. The new features include:
+
+ * Support for pinning netdev queue IRQs to cores specified by the user.
+ Available for ixgbe, i40e and mlx5 drivers.
+
Removed Items
-------------
@@ -3,6 +3,7 @@
*/
#include <unistd.h>
#include <errno.h>
+#include <regex.h>
#include <stdlib.h>
#include <string.h>
#include <poll.h>
@@ -10,6 +11,7 @@
#include <net/if.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
#include <linux/if_ether.h>
#include <linux/if_xdp.h>
#include <linux/if_link.h>
@@ -17,6 +19,8 @@
#include <linux/sockios.h>
#include "af_xdp_deps.h"
#include <bpf/xsk.h>
+#include <sys/stat.h>
+#include <libgen.h>
#include <rte_ethdev.h>
#include <rte_ethdev_driver.h>
@@ -116,6 +120,7 @@ struct pmd_internals {
int queue_cnt;
int max_queue_cnt;
int combined_queue_cnt;
+ int queue_irqs[RTE_MAX_QUEUES_PER_PORT];
int pmd_zc;
struct rte_ether_addr eth_addr;
@@ -128,12 +133,14 @@ struct pmd_internals {
#define ETH_AF_XDP_START_QUEUE_ARG "start_queue"
#define ETH_AF_XDP_QUEUE_COUNT_ARG "queue_count"
#define ETH_AF_XDP_PMD_ZC_ARG "pmd_zero_copy"
+#define ETH_AF_XDP_QUEUE_IRQ_ARG "queue_irq"
static const char * const valid_arguments[] = {
ETH_AF_XDP_IFACE_ARG,
ETH_AF_XDP_START_QUEUE_ARG,
ETH_AF_XDP_QUEUE_COUNT_ARG,
ETH_AF_XDP_PMD_ZC_ARG,
+ ETH_AF_XDP_QUEUE_IRQ_ARG,
NULL
};
@@ -144,6 +151,21 @@ static const struct rte_eth_link pmd_link = {
.link_autoneg = ETH_LINK_AUTONEG
};
+/* drivers supported for the queue_irq option */
+enum {I40E_DRIVER, IXGBE_DRIVER, MLX5_DRIVER, NUM_DRIVERS};
+char driver_array[NUM_DRIVERS][NAME_MAX] = {"i40e", "ixgbe", "mlx5_core"};
+
+/*
+ * function pointer template to be implemented for each driver in 'driver_array'
+ * to generate the appropriate regular expression to search for in
+ * /proc/interrupts in order to identify the IRQ number for the netdev_qid of
+ * the given interface.
+ */
+typedef
+int (*generate_driver_regex_func)(char *iface_regex_str,
+ struct pmd_internals *internals,
+ uint16_t netdev_qid);
+
static inline int
reserve_fill_queue(struct xsk_umem_info *umem, uint16_t reserve_size)
{
@@ -660,6 +682,283 @@ xsk_configure(struct pmd_internals *internals, struct pkt_rx_queue *rxq,
return ret;
}
+/** get interface's driver name to determine /proc/interrupts entry format */
+static int
+get_driver_name(struct pmd_internals *internals, char *driver)
+{
+ char driver_path[PATH_MAX];
+ struct stat s;
+ char link[PATH_MAX];
+ int len;
+
+ snprintf(driver_path, sizeof(driver_path),
+ "/sys/class/net/%s/device/driver", internals->if_name);
+ if (lstat(driver_path, &s)) {
+ AF_XDP_LOG(ERR, "Error reading %s: %s\n",
+ driver_path, strerror(errno));
+ return -errno;
+ }
+
+ /* driver_path should link to /sys/bus/pci/drivers/<driver_name> */
+ len = readlink(driver_path, link, PATH_MAX - 1);
+ if (len == -1) {
+ AF_XDP_LOG(ERR, "Error reading symbolic link %s: %s\n",
+ driver_path, strerror(errno));
+ return -errno;
+ }
+
+ link[len] = '\0';
+ strlcpy(driver, basename(link), NAME_MAX);
+ if (!strncmp(driver, ".", strlen(driver))) {
+ AF_XDP_LOG(ERR, "Error getting driver name from %s: %s\n",
+ link, strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int
+generate_ixgbe_i40e_regex(char *iface_regex_str,
+ struct pmd_internals *internals, uint16_t netdev_qid)
+{
+ if (snprintf(iface_regex_str, 128,
+ "-%s.*-%d", internals->if_name, netdev_qid) >= 128) {
+ AF_XDP_LOG(INFO, "Cannot get interrupt for %s q %i\n",
+ internals->if_name, netdev_qid);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+generate_mlx5_regex(char *iface_regex_str, struct pmd_internals *internals,
+ uint16_t netdev_qid)
+{
+ char pci_path[PATH_MAX];
+ char *pci;
+ int ret = -1;
+ struct stat s;
+ char *link;
+ int len;
+
+ snprintf(pci_path, sizeof(pci_path),
+ "/sys/class/net/%s/device", internals->if_name);
+ if (lstat(pci_path, &s)) {
+ AF_XDP_LOG(ERR, "Error reading %s: %s\n",
+ pci_path, strerror(errno));
+ return -errno;
+ }
+
+ /* pci_path should link to a directory whose name is the pci addr */
+ link = malloc(s.st_size + 1);
+ len = readlink(pci_path, link, PATH_MAX - 1);
+ if (len == -1) {
+ AF_XDP_LOG(ERR, "Error reading symbolic link %s: %s\n",
+ pci_path, strerror(errno));
+ ret = -errno;
+ goto out;
+ }
+
+ link[len] = '\0';
+ pci = basename(link);
+ if (!strncmp(pci, ".", strlen(pci))) {
+ AF_XDP_LOG(ERR, "Error getting pci from %s\n", link);
+ goto out;
+ }
+
+ if (snprintf(iface_regex_str, 128, ".*p%i@pci:%s", netdev_qid, pci) >=
+ 128) {
+ AF_XDP_LOG(INFO, "Cannot get interrupt for %s q %i\n",
+ internals->if_name, netdev_qid);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (link)
+ free(link);
+
+ return ret;
+}
+
+/*
+ * array of handlers for different drivers for generating appropriate regex
+ * format for searching /proc/interrupts
+ */
+generate_driver_regex_func driver_handlers[NUM_DRIVERS] = {
+ generate_ixgbe_i40e_regex,
+ generate_ixgbe_i40e_regex,
+ generate_mlx5_regex};
+
+/*
+ * function for getting the index into driver_handlers array that corresponds
+ * to 'driver'
+ */
+static int
+get_driver_idx(char *driver)
+{
+ for (int i = 0; i < NUM_DRIVERS; i++) {
+ if (strncmp(driver, driver_array[i], strlen(driver_array[i])))
+ continue;
+ return i;
+ }
+
+ return -1;
+}
+
+/** generate /proc/interrupts search regex based on driver type */
+static int
+generate_search_regex(const char *driver, struct pmd_internals *internals,
+ uint16_t netdev_qid, regex_t *r)
+{
+ char iface_regex_str[128];
+ int ret = -1;
+ char *driver_dup = strdup(driver);
+ int idx = get_driver_idx(driver_dup);
+
+ if (idx == -1) {
+ AF_XDP_LOG(ERR, "Error getting driver index for %s\n",
+ internals->if_name);
+ goto out;
+ }
+
+ if (driver_handlers[idx](iface_regex_str, internals, netdev_qid)) {
+ AF_XDP_LOG(ERR, "Error getting regex string for %s\n",
+ internals->if_name);
+ goto out;
+ }
+
+ if (regcomp(r, iface_regex_str, 0)) {
+ AF_XDP_LOG(ERR, "Error computing regex %s\n", iface_regex_str);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ free(driver_dup);
+ return ret;
+}
+
+/** get interrupt number associated with the given interface qid */
+static int
+get_interrupt_number(regex_t *r, int *interrupt,
+ struct pmd_internals *internals)
+{
+ FILE *f_int_proc;
+ int found = 0;
+ char line[4096];
+ int ret = 0;
+
+ f_int_proc = fopen("/proc/interrupts", "r");
+ if (f_int_proc == NULL) {
+ AF_XDP_LOG(ERR, "Failed to open /proc/interrupts.\n");
+ return -1;
+ }
+
+ while (!feof(f_int_proc) && !found) {
+ /* Make sure to read a full line at a time */
+ if (fgets(line, sizeof(line), f_int_proc) == NULL ||
+ line[strlen(line) - 1] != '\n') {
+ AF_XDP_LOG(ERR, "Error reading from interrupts file\n");
+ ret = -1;
+ break;
+ }
+
+ /* Extract interrupt number from line */
+ if (regexec(r, line, 0, NULL, 0) == 0) {
+ *interrupt = atoi(line);
+ found = true;
+ AF_XDP_LOG(INFO, "Got interrupt %d for %s\n",
+ *interrupt, internals->if_name);
+ }
+ }
+
+ fclose(f_int_proc);
+
+ return ret;
+}
+
+/** affinitise interrupts for the given qid to the given coreid */
+static int
+set_irq_affinity(int coreid, struct pmd_internals *internals,
+ uint16_t rx_queue_id, uint16_t netdev_qid, int interrupt)
+{
+ char bitmask[128];
+ char smp_affinity_filename[NAME_MAX];
+ FILE *f_int_smp_affinity;
+ int i;
+
+ /* Create affinity bitmask. Every 32 bits are separated by a comma */
+ snprintf(bitmask, sizeof(bitmask), "%x", 1 << (coreid % 32));
+ for (i = 0; i < coreid / 32; i++)
+ strlcat(bitmask, ",00000000", sizeof(bitmask));
+
+ /* Write the new affinity bitmask */
+ snprintf(smp_affinity_filename, sizeof(smp_affinity_filename),
+ "/proc/irq/%d/smp_affinity", interrupt);
+ f_int_smp_affinity = fopen(smp_affinity_filename, "w");
+ if (f_int_smp_affinity == NULL) {
+ AF_XDP_LOG(ERR, "Error opening %s\n", smp_affinity_filename);
+ return -1;
+ }
+ fwrite(bitmask, strlen(bitmask), 1, f_int_smp_affinity);
+ fclose(f_int_smp_affinity);
+ AF_XDP_LOG(INFO, "IRQs for %s ethdev queue %i (netdev queue %i)"
+ " affinitised to core %i\n",
+ internals->if_name, rx_queue_id,
+ netdev_qid, coreid);
+
+ return 0;
+}
+
+static void
+configure_irqs(struct pmd_internals *internals, uint16_t rx_queue_id)
+{
+ int coreid = internals->queue_irqs[rx_queue_id];
+ char driver[NAME_MAX];
+ uint16_t netdev_qid = rx_queue_id + internals->start_queue_idx;
+ regex_t r;
+ int interrupt;
+
+ if (coreid < 0)
+ return;
+
+ if (coreid > (get_nprocs() - 1)) {
+ AF_XDP_LOG(ERR, "Affinitisation failed - invalid coreid %i\n",
+ coreid);
+ return;
+ }
+
+ if (get_driver_name(internals, driver)) {
+ AF_XDP_LOG(ERR, "Error retrieving driver name for %s\n",
+ internals->if_name);
+ return;
+ }
+
+ if (generate_search_regex(driver, internals, netdev_qid, &r)) {
+ AF_XDP_LOG(ERR, "Error generating search regex for %s\n",
+ internals->if_name);
+ return;
+ }
+
+ if (get_interrupt_number(&r, &interrupt, internals)) {
+ AF_XDP_LOG(ERR, "Error getting interrupt number for %s\n",
+ internals->if_name);
+ return;
+ }
+
+ if (set_irq_affinity(coreid, internals, rx_queue_id, netdev_qid,
+ interrupt)) {
+ AF_XDP_LOG(ERR, "Error setting interrupt affinity for %s\n",
+ internals->if_name);
+ return;
+ }
+}
+
static int
eth_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t rx_queue_id,
@@ -697,6 +996,8 @@ eth_rx_queue_setup(struct rte_eth_dev *dev,
goto err;
}
+ configure_irqs(internals, rx_queue_id);
+
rxq->fds[0].fd = xsk_socket__fd(rxq->xsk);
rxq->fds[0].events = POLLIN;
@@ -834,6 +1135,39 @@ parse_name_arg(const char *key __rte_unused,
return 0;
}
+/** parse queue irq argument */
+static int
+parse_queue_irq_arg(const char *key __rte_unused,
+ const char *value, void *extra_args)
+{
+ int (*queue_irqs)[RTE_MAX_QUEUES_PER_PORT] = extra_args;
+ char *parse_str = strdup(value);
+ char delimiter[] = ":";
+ char *queue_str;
+
+ queue_str = strtok(parse_str, delimiter);
+ if (queue_str != NULL && strncmp(queue_str, value, strlen(value))) {
+ char *end;
+ long queue = strtol(queue_str, &end, 10);
+
+ if (*end == '\0' && queue >= 0 &&
+ queue < RTE_MAX_QUEUES_PER_PORT) {
+ char *core_str = strtok(NULL, delimiter);
+ long core = strtol(core_str, &end, 10);
+
+ if (*end == '\0' && core >= 0 && core < get_nprocs()) {
+ (*queue_irqs)[queue] = core;
+ free(parse_str);
+ return 0;
+ }
+ }
+ }
+
+ AF_XDP_LOG(ERR, "Invalid queue_irq argument.\n");
+ free(parse_str);
+ return -1;
+}
+
static int
xdp_get_channels_info(const char *if_name, int *max_queues,
int *combined_queues)
@@ -877,7 +1211,8 @@ xdp_get_channels_info(const char *if_name, int *max_queues,
static int
parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
- int *queue_cnt, int *pmd_zc)
+ int *queue_cnt, int *pmd_zc,
+ int (*queue_irqs)[RTE_MAX_QUEUES_PER_PORT])
{
int ret;
@@ -903,6 +1238,11 @@ parse_parameters(struct rte_kvargs *kvlist, char *if_name, int *start_queue,
if (ret < 0)
goto free_kvlist;
+ ret = rte_kvargs_process(kvlist, ETH_AF_XDP_QUEUE_IRQ_ARG,
+ &parse_queue_irq_arg, queue_irqs);
+ if (ret < 0)
+ goto free_kvlist;
+
free_kvlist:
rte_kvargs_free(kvlist);
return ret;
@@ -940,7 +1280,8 @@ get_iface_info(const char *if_name,
static struct rte_eth_dev *
init_internals(struct rte_vdev_device *dev, const char *if_name,
- int start_queue_idx, int queue_cnt, int pmd_zc)
+ int start_queue_idx, int queue_cnt, int pmd_zc,
+ int queue_irqs[RTE_MAX_QUEUES_PER_PORT])
{
const char *name = rte_vdev_device_name(dev);
const unsigned int numa_node = dev->device.numa_node;
@@ -957,6 +1298,8 @@ init_internals(struct rte_vdev_device *dev, const char *if_name,
internals->queue_cnt = queue_cnt;
internals->pmd_zc = pmd_zc;
strlcpy(internals->if_name, if_name, IFNAMSIZ);
+ memcpy(internals->queue_irqs, queue_irqs,
+ sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
if (xdp_get_channels_info(if_name, &internals->max_queue_cnt,
&internals->combined_queue_cnt)) {
@@ -1035,6 +1378,9 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
struct rte_eth_dev *eth_dev = NULL;
const char *name;
int pmd_zc = 0;
+ int queue_irqs[RTE_MAX_QUEUES_PER_PORT];
+
+ memset(queue_irqs, -1, sizeof(int) * RTE_MAX_QUEUES_PER_PORT);
AF_XDP_LOG(INFO, "Initializing pmd_af_xdp for %s\n",
rte_vdev_device_name(dev));
@@ -1062,7 +1408,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
dev->device.numa_node = rte_socket_id();
if (parse_parameters(kvlist, if_name, &xsk_start_queue_idx,
- &xsk_queue_cnt, &pmd_zc) < 0) {
+ &xsk_queue_cnt, &pmd_zc, &queue_irqs) < 0) {
AF_XDP_LOG(ERR, "Invalid kvargs value\n");
return -EINVAL;
}
@@ -1073,7 +1419,7 @@ rte_pmd_af_xdp_probe(struct rte_vdev_device *dev)
}
eth_dev = init_internals(dev, if_name, xsk_start_queue_idx,
- xsk_queue_cnt, pmd_zc);
+ xsk_queue_cnt, pmd_zc, queue_irqs);
if (eth_dev == NULL) {
AF_XDP_LOG(ERR, "Failed to init internals\n");
return -1;
@@ -1117,7 +1463,8 @@ RTE_PMD_REGISTER_PARAM_STRING(net_af_xdp,
"iface=<string> "
"start_queue=<int> "
"queue_count=<int> "
- "pmd_zero_copy=<0|1>");
+ "pmd_zero_copy=<0|1> "
+ "queue_irq=<int>:<int>");
RTE_INIT(af_xdp_init_log)
{