From patchwork Wed Nov 10 03:01:33 2021 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Narcisa Ana Maria Vasile X-Patchwork-Id: 104076 X-Patchwork-Delegate: david.marchand@redhat.com Return-Path: X-Original-To: patchwork@inbox.dpdk.org Delivered-To: patchwork@inbox.dpdk.org Received: from mails.dpdk.org (mails.dpdk.org [217.70.189.124]) by inbox.dpdk.org (Postfix) with ESMTP id 1BCAEA034F; Wed, 10 Nov 2021 04:02:14 +0100 (CET) Received: from [217.70.189.124] (localhost [127.0.0.1]) by mails.dpdk.org (Postfix) with ESMTP id C21BA41134; Wed, 10 Nov 2021 04:01:52 +0100 (CET) Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by mails.dpdk.org (Postfix) with ESMTP id 0B72D40E03 for ; Wed, 10 Nov 2021 04:01:47 +0100 (CET) Received: by linux.microsoft.com (Postfix, from userid 1059) id 3386320C3548; Tue, 9 Nov 2021 19:01:46 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 3386320C3548 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1636513306; bh=4LVnA6HfcZgzX9eXbDGMjHALoh0WFVFuZQz6pNzJaYE=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=oMMGxgAFlrjrXeZvpj/t/o8XQpA3bTK2/XQFBDCl3VfOy+1VAVfbuySN3FH2XzTFD L+jiyGIMZVsUwma3jKpYhheV2Hj03nQw5FE5DZPaguApChNar4vw+XGwRPuhyf2/iZ gumn0LlW0pZ4JlQabc/N35SS8n5MRrloaQIqB72E= From: Narcisa Ana Maria Vasile To: dev@dpdk.org, thomas@monjalon.net, dmitry.kozliuk@gmail.com, khot@microsoft.com, navasile@microsoft.com, dmitrym@microsoft.com, roretzla@microsoft.com, talshn@nvidia.com, ocardona@microsoft.com Cc: bruce.richardson@intel.com, david.marchand@redhat.com, pallavi.kadam@intel.com Date: Tue, 9 Nov 2021 19:01:33 -0800 Message-Id: <1636513302-7359-5-git-send-email-navasile@linux.microsoft.com> X-Mailer: git-send-email 1.8.3.1 In-Reply-To: <1636513302-7359-1-git-send-email-navasile@linux.microsoft.com> References: <1633765318-28356-1-git-send-email-navasile@linux.microsoft.com> <1636513302-7359-1-git-send-email-navasile@linux.microsoft.com> Subject: [dpdk-dev] [PATCH v17 04/13] eal: implement functions for thread affinity management X-BeenThere: dev@dpdk.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: DPDK patches and discussions List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: dev-bounces@dpdk.org Sender: "dev" From: Narcisa Vasile Implement functions for getting/setting thread affinity. Threads can be pinned to specific cores by setting their affinity attribute. Signed-off-by: Narcisa Vasile Signed-off-by: Dmitry Malloy --- lib/eal/common/rte_thread.c | 16 ++++ lib/eal/include/rte_thread.h | 36 +++++++ lib/eal/version.map | 2 + lib/eal/windows/eal_lcore.c | 176 +++++++++++++++++++++++++--------- lib/eal/windows/eal_windows.h | 10 ++ lib/eal/windows/rte_thread.c | 125 +++++++++++++++++++++++- 6 files changed, 319 insertions(+), 46 deletions(-) diff --git a/lib/eal/common/rte_thread.c b/lib/eal/common/rte_thread.c index 27ad1c7eb0..73b7b3141c 100644 --- a/lib/eal/common/rte_thread.c +++ b/lib/eal/common/rte_thread.c @@ -34,6 +34,22 @@ rte_thread_equal(rte_thread_t t1, rte_thread_t t2) return pthread_equal((pthread_t)t1.opaque_id, (pthread_t)t2.opaque_id); } +int +rte_thread_set_affinity_by_id(rte_thread_t thread_id, + const rte_cpuset_t *cpuset) +{ + return pthread_setaffinity_np((pthread_t)thread_id.opaque_id, + sizeof(*cpuset), cpuset); +} + +int +rte_thread_get_affinity_by_id(rte_thread_t thread_id, + rte_cpuset_t *cpuset) +{ + return pthread_getaffinity_np((pthread_t)thread_id.opaque_id, + sizeof(*cpuset), cpuset); +} + int rte_thread_attr_init(rte_thread_attr_t *attr) { diff --git a/lib/eal/include/rte_thread.h b/lib/eal/include/rte_thread.h index 8a20215a94..5b100cafda 100644 --- a/lib/eal/include/rte_thread.h +++ b/lib/eal/include/rte_thread.h @@ -85,6 +85,42 @@ int rte_thread_equal(rte_thread_t t1, rte_thread_t t2); #ifdef RTE_HAS_CPUSET +/** + * Set the affinity of thread 'thread_id' to the cpu set + * specified by 'cpuset'. + * + * @param thread_id + * Id of the thread for which to set the affinity. + * + * @param cpuset + * Pointer to CPU affinity to set. + * + * @return + * On success, return 0. + * On failure, return a positive errno-style error number. + */ +__rte_experimental +int rte_thread_set_affinity_by_id(rte_thread_t thread_id, + const rte_cpuset_t *cpuset); + +/** + * Get the affinity of thread 'thread_id' and store it + * in 'cpuset'. + * + * @param thread_id + * Id of the thread for which to get the affinity. + * + * @param cpuset + * Pointer for storing the affinity value. + * + * @return + * On success, return 0. + * On failure, return a positive errno-style error number. + */ +__rte_experimental +int rte_thread_get_affinity_by_id(rte_thread_t thread_id, + rte_cpuset_t *cpuset); + /** * Initialize the attributes of a thread. * These attributes can be passed to the rte_thread_create() function diff --git a/lib/eal/version.map b/lib/eal/version.map index 3fc33fcd70..193a79a4d2 100644 --- a/lib/eal/version.map +++ b/lib/eal/version.map @@ -427,6 +427,8 @@ EXPERIMENTAL { rte_thread_attr_get_affinity; rte_thread_attr_set_affinity; rte_thread_attr_set_priority; + rte_thread_get_affinity_by_id; + rte_thread_set_affinity_by_id; }; INTERNAL { diff --git a/lib/eal/windows/eal_lcore.c b/lib/eal/windows/eal_lcore.c index 476c2d2bdf..295af50698 100644 --- a/lib/eal/windows/eal_lcore.c +++ b/lib/eal/windows/eal_lcore.c @@ -2,7 +2,6 @@ * Copyright(c) 2019 Intel Corporation */ -#include #include #include @@ -27,13 +26,15 @@ struct socket_map { }; struct cpu_map { - unsigned int socket_count; unsigned int lcore_count; + unsigned int socket_count; + unsigned int cpu_count; struct lcore_map lcores[RTE_MAX_LCORE]; struct socket_map sockets[RTE_MAX_NUMA_NODES]; + GROUP_AFFINITY cpus[CPU_SETSIZE]; }; -static struct cpu_map cpu_map = { 0 }; +static struct cpu_map cpu_map; /* eal_create_cpu_map() is called before logging is initialized */ static void @@ -47,13 +48,118 @@ log_early(const char *format, ...) va_end(va); } +static int +eal_query_group_affinity(void) +{ + SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos = NULL; + unsigned int *cpu_count = &cpu_map.cpu_count; + DWORD infos_size = 0; + int ret = 0; + USHORT group_count; + KAFFINITY affinity; + USHORT group_no; + unsigned int i; + + if (!GetLogicalProcessorInformationEx(RelationGroup, NULL, + &infos_size)) { + DWORD error = GetLastError(); + if (error != ERROR_INSUFFICIENT_BUFFER) { + log_early("Cannot get group information size, " + "error %lu\n", error); + rte_errno = EINVAL; + ret = -1; + goto cleanup; + } + } + + infos = malloc(infos_size); + if (infos == NULL) { + log_early("Cannot allocate memory for NUMA node information\n"); + rte_errno = ENOMEM; + ret = -1; + goto cleanup; + } + + if (!GetLogicalProcessorInformationEx(RelationGroup, infos, + &infos_size)) { + log_early("Cannot get group information, error %lu\n", + GetLastError()); + rte_errno = EINVAL; + ret = -1; + goto cleanup; + } + + *cpu_count = 0; + group_count = infos->Group.ActiveGroupCount; + for (group_no = 0; group_no < group_count; group_no++) { + + affinity = infos->Group.GroupInfo[group_no].ActiveProcessorMask; + + for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) { + if ((affinity & ((KAFFINITY)1 << i)) == 0) + continue; + cpu_map.cpus[*cpu_count].Group = group_no; + cpu_map.cpus[*cpu_count].Mask = (KAFFINITY)1 << i; + (*cpu_count)++; + } + } + +cleanup: + free(infos); + return ret; +} + +static bool +eal_create_lcore_map(const SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *info) +{ + const unsigned int node_id = info->NumaNode.NodeNumber; + const GROUP_AFFINITY *cores = &info->NumaNode.GroupMask; + struct lcore_map *lcore; + unsigned int socket_id; + unsigned int i; + + /* NUMA node may be reported multiple times if it includes + * cores from different processor groups, e. g. 80 cores + * of a physical processor comprise one NUMA node, but two + * processor groups, because group size is limited by 32/64. + */ + for (socket_id = 0; socket_id < cpu_map.socket_count; socket_id++) { + if (cpu_map.sockets[socket_id].node_id == node_id) + break; + } + + if (socket_id == cpu_map.socket_count) { + if (socket_id == RTE_DIM(cpu_map.sockets)) + return true; + + cpu_map.sockets[socket_id].node_id = node_id; + cpu_map.socket_count++; + } + + for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) { + if ((cores->Mask & ((KAFFINITY)1 << i)) == 0) + continue; + + if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores)) + return true; + + lcore = &cpu_map.lcores[cpu_map.lcore_count]; + lcore->socket_id = socket_id; + lcore->core_id = cores->Group * EAL_PROCESSOR_GROUP_SIZE + i; + cpu_map.lcore_count++; + } + return false; +} + int eal_create_cpu_map(void) { SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *infos, *info; DWORD infos_size; bool full = false; + int ret = 0; + infos = NULL; infos_size = 0; if (!GetLogicalProcessorInformationEx( RelationNumaNode, NULL, &infos_size)) { @@ -78,57 +184,29 @@ eal_create_cpu_map(void) log_early("Cannot get NUMA node information, error %lu\n", GetLastError()); rte_errno = EINVAL; - return -1; + ret = -1; + goto exit; } info = infos; while ((uint8_t *)info - (uint8_t *)infos < infos_size) { - unsigned int node_id = info->NumaNode.NodeNumber; - GROUP_AFFINITY *cores = &info->NumaNode.GroupMask; - struct lcore_map *lcore; - unsigned int i, socket_id; - - /* NUMA node may be reported multiple times if it includes - * cores from different processor groups, e. g. 80 cores - * of a physical processor comprise one NUMA node, but two - * processor groups, because group size is limited by 32/64. - */ - for (socket_id = 0; socket_id < cpu_map.socket_count; - socket_id++) { - if (cpu_map.sockets[socket_id].node_id == node_id) - break; - } - - if (socket_id == cpu_map.socket_count) { - if (socket_id == RTE_DIM(cpu_map.sockets)) { - full = true; - goto exit; - } - - cpu_map.sockets[socket_id].node_id = node_id; - cpu_map.socket_count++; - } - - for (i = 0; i < EAL_PROCESSOR_GROUP_SIZE; i++) { - if ((cores->Mask & ((KAFFINITY)1 << i)) == 0) - continue; - - if (cpu_map.lcore_count == RTE_DIM(cpu_map.lcores)) { - full = true; - goto exit; - } - - lcore = &cpu_map.lcores[cpu_map.lcore_count]; - lcore->socket_id = socket_id; - lcore->core_id = - cores->Group * EAL_PROCESSOR_GROUP_SIZE + i; - cpu_map.lcore_count++; + if (eal_create_lcore_map(info)) { + full = true; + break; } info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)( (uint8_t *)info + info->Size); } + if (eal_query_group_affinity()) { + /* + * No need to set rte_errno here. + * It is set by eal_query_group_affinity(). + */ + ret = -1; + goto exit; + } exit: if (full) { /* Not a fatal error, but important for troubleshooting. */ @@ -138,7 +216,7 @@ eal_create_cpu_map(void) free(infos); - return 0; + return ret; } int @@ -164,3 +242,11 @@ eal_socket_numa_node(unsigned int socket_id) { return cpu_map.sockets[socket_id].node_id; } + +PGROUP_AFFINITY +eal_get_cpu_affinity(size_t cpu_index) +{ + RTE_VERIFY(cpu_index < CPU_SETSIZE); + + return &cpu_map.cpus[cpu_index]; +} diff --git a/lib/eal/windows/eal_windows.h b/lib/eal/windows/eal_windows.h index 23ead6d30c..355ef181a5 100644 --- a/lib/eal/windows/eal_windows.h +++ b/lib/eal/windows/eal_windows.h @@ -55,6 +55,16 @@ int eal_thread_create(pthread_t *thread); */ unsigned int eal_socket_numa_node(unsigned int socket_id); +/** + * Get pointer to the group affinity for the cpu. + * + * @param cpu_index + * Index of the cpu, as it comes from rte_cpuset_t. + * @return + * Pointer to the group affinity for the cpu. + */ +PGROUP_AFFINITY eal_get_cpu_affinity(size_t cpu_index); + /** * Schedule code for execution in the interrupt thread. * diff --git a/lib/eal/windows/rte_thread.c b/lib/eal/windows/rte_thread.c index c1ecfbd6ae..0127119f49 100644 --- a/lib/eal/windows/rte_thread.c +++ b/lib/eal/windows/rte_thread.c @@ -7,7 +7,8 @@ #include #include #include -#include + +#include "eal_windows.h" struct eal_tls_key { DWORD thread_index; @@ -77,6 +78,128 @@ rte_thread_equal(rte_thread_t t1, rte_thread_t t2) return t1.opaque_id == t2.opaque_id; } +static int +rte_convert_cpuset_to_affinity(const rte_cpuset_t *cpuset, + PGROUP_AFFINITY affinity) +{ + int ret = 0; + PGROUP_AFFINITY cpu_affinity = NULL; + unsigned int cpu_idx; + + memset(affinity, 0, sizeof(GROUP_AFFINITY)); + affinity->Group = (USHORT)-1; + + /* Check that all cpus of the set belong to the same processor group and + * accumulate thread affinity to be applied. + */ + for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) { + if (!CPU_ISSET(cpu_idx, cpuset)) + continue; + + cpu_affinity = eal_get_cpu_affinity(cpu_idx); + + if (affinity->Group == (USHORT)-1) { + affinity->Group = cpu_affinity->Group; + } else if (affinity->Group != cpu_affinity->Group) { + ret = EINVAL; + goto cleanup; + } + + affinity->Mask |= cpu_affinity->Mask; + } + + if (affinity->Mask == 0) { + ret = EINVAL; + goto cleanup; + } + +cleanup: + return ret; +} + +int +rte_thread_set_affinity_by_id(rte_thread_t thread_id, + const rte_cpuset_t *cpuset) +{ + int ret = 0; + GROUP_AFFINITY thread_affinity; + HANDLE thread_handle = NULL; + + RTE_VERIFY(cpuset != NULL); + + ret = rte_convert_cpuset_to_affinity(cpuset, &thread_affinity); + if (ret != 0) { + RTE_LOG(DEBUG, EAL, "Unable to convert cpuset to thread affinity\n"); + goto cleanup; + } + + thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE, + thread_id.opaque_id); + if (thread_handle == NULL) { + ret = thread_log_last_error("OpenThread()"); + goto cleanup; + } + + if (!SetThreadGroupAffinity(thread_handle, &thread_affinity, NULL)) { + ret = thread_log_last_error("SetThreadGroupAffinity()"); + goto cleanup; + } + +cleanup: + if (thread_handle != NULL) { + CloseHandle(thread_handle); + thread_handle = NULL; + } + + return ret; +} + +int +rte_thread_get_affinity_by_id(rte_thread_t thread_id, + rte_cpuset_t *cpuset) +{ + HANDLE thread_handle = NULL; + PGROUP_AFFINITY cpu_affinity; + GROUP_AFFINITY thread_affinity; + unsigned int cpu_idx; + int ret = 0; + + RTE_VERIFY(cpuset != NULL); + + thread_handle = OpenThread(THREAD_ALL_ACCESS, FALSE, + thread_id.opaque_id); + if (thread_handle == NULL) { + ret = thread_log_last_error("OpenThread()"); + goto cleanup; + } + + /* obtain previous thread affinity */ + if (!GetThreadGroupAffinity(thread_handle, &thread_affinity)) { + ret = thread_log_last_error("GetThreadGroupAffinity()"); + goto cleanup; + } + + CPU_ZERO(cpuset); + + /* Convert affinity to DPDK cpu set */ + for (cpu_idx = 0; cpu_idx < CPU_SETSIZE; cpu_idx++) { + + cpu_affinity = eal_get_cpu_affinity(cpu_idx); + + if ((cpu_affinity->Group == thread_affinity.Group) && + ((cpu_affinity->Mask & thread_affinity.Mask) != 0)) { + CPU_SET(cpu_idx, cpuset); + } + } + +cleanup: + if (thread_handle != NULL) { + CloseHandle(thread_handle); + thread_handle = NULL; + } + return ret; +} + int rte_thread_attr_init(rte_thread_attr_t *attr) {