get:
Show a patch.

patch:
Update a patch.

put:
Update a patch.

GET /api/patches/139214/?format=api
HTTP 200 OK
Allow: GET, PUT, PATCH, HEAD, OPTIONS
Content-Type: application/json
Vary: Accept

{
    "id": 139214,
    "url": "http://patches.dpdk.org/api/patches/139214/?format=api",
    "web_url": "http://patches.dpdk.org/project/dpdk/patch/20240409114845.1336403-6-maxime.coquelin@redhat.com/",
    "project": {
        "id": 1,
        "url": "http://patches.dpdk.org/api/projects/1/?format=api",
        "name": "DPDK",
        "link_name": "dpdk",
        "list_id": "dev.dpdk.org",
        "list_email": "dev@dpdk.org",
        "web_url": "http://core.dpdk.org",
        "scm_url": "git://dpdk.org/dpdk",
        "webscm_url": "http://git.dpdk.org/dpdk",
        "list_archive_url": "https://inbox.dpdk.org/dev",
        "list_archive_url_format": "https://inbox.dpdk.org/dev/{}",
        "commit_url_format": ""
    },
    "msgid": "<20240409114845.1336403-6-maxime.coquelin@redhat.com>",
    "list_archive_url": "https://inbox.dpdk.org/dev/20240409114845.1336403-6-maxime.coquelin@redhat.com",
    "date": "2024-04-09T11:48:45",
    "name": "[v3,5/5] vhost: manage FD with epoll",
    "commit_ref": null,
    "pull_url": null,
    "state": "new",
    "archived": false,
    "hash": "1d8fb64c9f802f51da4ee9c1d66f7e8b611ecef1",
    "submitter": {
        "id": 512,
        "url": "http://patches.dpdk.org/api/people/512/?format=api",
        "name": "Maxime Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "delegate": {
        "id": 2642,
        "url": "http://patches.dpdk.org/api/users/2642/?format=api",
        "username": "mcoquelin",
        "first_name": "Maxime",
        "last_name": "Coquelin",
        "email": "maxime.coquelin@redhat.com"
    },
    "mbox": "http://patches.dpdk.org/project/dpdk/patch/20240409114845.1336403-6-maxime.coquelin@redhat.com/mbox/",
    "series": [
        {
            "id": 31711,
            "url": "http://patches.dpdk.org/api/series/31711/?format=api",
            "web_url": "http://patches.dpdk.org/project/dpdk/list/?series=31711",
            "date": "2024-04-09T11:48:40",
            "name": "vhost: FD manager improvements",
            "version": 3,
            "mbox": "http://patches.dpdk.org/series/31711/mbox/"
        }
    ],
    "comments": "http://patches.dpdk.org/api/patches/139214/comments/",
    "check": "success",
    "checks": "http://patches.dpdk.org/api/patches/139214/checks/",
    "tags": {},
    "related": [],
    "headers": {
        "Return-Path": "<dev-bounces@dpdk.org>",
        "X-Original-To": "patchwork@inbox.dpdk.org",
        "Delivered-To": "patchwork@inbox.dpdk.org",
        "Received": [
            "from mails.dpdk.org (mails.dpdk.org [217.70.189.124])\n\tby inbox.dpdk.org (Postfix) with ESMTP id A3B6543E29;\n\tTue,  9 Apr 2024 13:49:32 +0200 (CEST)",
            "from mails.dpdk.org (localhost [127.0.0.1])\n\tby mails.dpdk.org (Postfix) with ESMTP id B01BE406FF;\n\tTue,  9 Apr 2024 13:49:06 +0200 (CEST)",
            "from us-smtp-delivery-124.mimecast.com\n (us-smtp-delivery-124.mimecast.com [170.10.129.124])\n by mails.dpdk.org (Postfix) with ESMTP id F3C974068A\n for <dev@dpdk.org>; Tue,  9 Apr 2024 13:49:03 +0200 (CEST)",
            "from mimecast-mx02.redhat.com (mx-ext.redhat.com [66.187.233.73])\n by relay.mimecast.com with ESMTP with STARTTLS (version=TLSv1.3,\n cipher=TLS_AES_256_GCM_SHA384) id us-mta-141-KJnUv4SWOPS2v_tYi3ADfg-1; Tue,\n 09 Apr 2024 07:49:02 -0400",
            "from smtp.corp.redhat.com (int-mx03.intmail.prod.int.rdu2.redhat.com\n [10.11.54.3])\n (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits)\n key-exchange X25519 server-signature RSA-PSS (2048 bits) server-digest\n SHA256)\n (No client certificate requested)\n by mimecast-mx02.redhat.com (Postfix) with ESMTPS id B2B8038107B7;\n Tue,  9 Apr 2024 11:49:01 +0000 (UTC)",
            "from max-p1.redhat.com (unknown [10.39.208.27])\n by smtp.corp.redhat.com (Postfix) with ESMTP id 483AF10060FE;\n Tue,  9 Apr 2024 11:49:00 +0000 (UTC)"
        ],
        "DKIM-Signature": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=redhat.com;\n s=mimecast20190719; t=1712663343;\n h=from:from:reply-to:subject:subject:date:date:message-id:message-id:\n to:to:cc:cc:mime-version:mime-version:content-type:content-type:\n content-transfer-encoding:content-transfer-encoding:\n in-reply-to:in-reply-to:references:references;\n bh=UkehagZcbZwuThpik0rBz4rDUxpt0YcZCblGv9du2LI=;\n b=Os++10gPNi6I7jKkW1qMcV4axqJ6IW8iPlRgtq33Qo5wXuL7tW11zsIM8ophZqc4eEKb2a\n BIqCtzAR1+UpH85INhG+07IoHWj5KQOL88HJKzaBLfM+sgOmkZUDn6UJXb+kDCfvpz5E22\n 9ybV9dLmQZxopDIiVf4qvjUVaZkTD7g=",
        "X-MC-Unique": "KJnUv4SWOPS2v_tYi3ADfg-1",
        "From": "Maxime Coquelin <maxime.coquelin@redhat.com>",
        "To": "dev@dpdk.org,\n\tdavid.marchand@redhat.com,\n\tchenbox@nvidia.com",
        "Cc": "Maxime Coquelin <maxime.coquelin@redhat.com>",
        "Subject": "[PATCH v3 5/5] vhost: manage FD with epoll",
        "Date": "Tue,  9 Apr 2024 13:48:45 +0200",
        "Message-ID": "<20240409114845.1336403-6-maxime.coquelin@redhat.com>",
        "In-Reply-To": "<20240409114845.1336403-1-maxime.coquelin@redhat.com>",
        "References": "<20240409114845.1336403-1-maxime.coquelin@redhat.com>",
        "MIME-Version": "1.0",
        "X-Scanned-By": "MIMEDefang 3.4.1 on 10.11.54.3",
        "X-Mimecast-Spam-Score": "0",
        "X-Mimecast-Originator": "redhat.com",
        "Content-Transfer-Encoding": "8bit",
        "Content-Type": "text/plain; charset=\"US-ASCII\"; x-default=true",
        "X-BeenThere": "dev@dpdk.org",
        "X-Mailman-Version": "2.1.29",
        "Precedence": "list",
        "List-Id": "DPDK patches and discussions <dev.dpdk.org>",
        "List-Unsubscribe": "<https://mails.dpdk.org/options/dev>,\n <mailto:dev-request@dpdk.org?subject=unsubscribe>",
        "List-Archive": "<http://mails.dpdk.org/archives/dev/>",
        "List-Post": "<mailto:dev@dpdk.org>",
        "List-Help": "<mailto:dev-request@dpdk.org?subject=help>",
        "List-Subscribe": "<https://mails.dpdk.org/listinfo/dev>,\n <mailto:dev-request@dpdk.org?subject=subscribe>",
        "Errors-To": "dev-bounces@dpdk.org"
    },
    "content": "From: David Marchand <david.marchand@redhat.com>\n\nSwitch to epoll so that the concern over the poll() fd array\nis removed.\nAdd a simple list of used entries and track the next free entry.\n\nepoll() is thread safe, we no more need a synchronization\nmechanism and so can remove the notification pipe.\n\nSigned-off-by: David Marchand <david.marchand@redhat.com>\nSigned-off-by: Maxime Coquelin <maxime.coquelin@redhat.com>\n---\n lib/vhost/fd_man.c | 399 ++++++++++++---------------------------------\n lib/vhost/fd_man.h |   5 +-\n 2 files changed, 106 insertions(+), 298 deletions(-)",
    "diff": "diff --git a/lib/vhost/fd_man.c b/lib/vhost/fd_man.c\nindex 8b47c97d45..a4a2965da1 100644\n--- a/lib/vhost/fd_man.c\n+++ b/lib/vhost/fd_man.c\n@@ -3,9 +3,9 @@\n  */\n \n #include <errno.h>\n-#include <pthread.h>\n #include <stdio.h>\n #include <string.h>\n+#include <sys/epoll.h>\n #include <unistd.h>\n \n #include <rte_common.h>\n@@ -21,49 +21,34 @@ RTE_LOG_REGISTER_SUFFIX(vhost_fdset_logtype, fdset, INFO);\n #define VHOST_FDMAN_LOG(level, ...) \\\n \tRTE_LOG_LINE(level, VHOST_FDMAN, \"\" __VA_ARGS__)\n \n-#define FDPOLLERR (POLLERR | POLLHUP | POLLNVAL)\n-\n struct fdentry {\n \tint fd;\t\t/* -1 indicates this entry is empty */\n \tfd_cb rcb;\t/* callback when this fd is readable. */\n \tfd_cb wcb;\t/* callback when this fd is writeable.*/\n \tvoid *dat;\t/* fd context */\n \tint busy;\t/* whether this entry is being used in cb. */\n+\tLIST_ENTRY(fdentry) next;\n };\n \n struct fdset {\n \tchar name[RTE_THREAD_NAME_SIZE];\n-\tstruct pollfd rwfds[MAX_FDS];\n+\tint epfd;\n \tstruct fdentry fd[MAX_FDS];\n+\tLIST_HEAD(, fdentry) fdlist;\n+\tint next_free_idx;\n \trte_thread_t tid;\n \tpthread_mutex_t fd_mutex;\n-\tpthread_mutex_t fd_polling_mutex;\n-\tint num;\t/* current fd number of this fdset */\n-\n-\tunion pipefds {\n-\t\tstruct {\n-\t\t\tint pipefd[2];\n-\t\t};\n-\t\tstruct {\n-\t\t\tint readfd;\n-\t\t\tint writefd;\n-\t\t};\n-\t} u;\n-\n-\tpthread_mutex_t sync_mutex;\n-\tpthread_cond_t sync_cond;\n-\tbool sync;\n+\n \tbool destroy;\n };\n \n-static int fdset_add_no_sync(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat);\n-static uint32_t fdset_event_dispatch(void *arg);\n-\n #define MAX_FDSETS 8\n \n static struct fdset *fdsets[MAX_FDSETS];\n pthread_mutex_t fdsets_mutex = PTHREAD_MUTEX_INITIALIZER;\n \n+static uint32_t fdset_event_dispatch(void *arg);\n+\n static struct fdset *\n fdset_lookup(const char *name)\n {\n@@ -96,166 +81,6 @@ fdset_insert(struct fdset *fdset)\n \treturn -1;\n }\n \n-static void\n-fdset_pipe_read_cb(int readfd, void *dat,\n-\t\t   int *remove __rte_unused)\n-{\n-\tchar charbuf[16];\n-\tstruct fdset *fdset = dat;\n-\tint r = read(readfd, charbuf, sizeof(charbuf));\n-\t/*\n-\t * Just an optimization, we don't care if read() failed\n-\t * so ignore explicitly its return value to make the\n-\t * compiler happy\n-\t */\n-\tRTE_SET_USED(r);\n-\n-\tpthread_mutex_lock(&fdset->sync_mutex);\n-\tfdset->sync = true;\n-\tpthread_cond_broadcast(&fdset->sync_cond);\n-\tpthread_mutex_unlock(&fdset->sync_mutex);\n-}\n-\n-static void\n-fdset_pipe_uninit(struct fdset *fdset)\n-{\n-\tfdset_del(fdset, fdset->u.readfd);\n-\tclose(fdset->u.readfd);\n-\tfdset->u.readfd = -1;\n-\tclose(fdset->u.writefd);\n-\tfdset->u.writefd = -1;\n-}\n-\n-static int\n-fdset_pipe_init(struct fdset *fdset)\n-{\n-\tint ret;\n-\n-\tpthread_mutex_init(&fdset->sync_mutex, NULL);\n-\tpthread_cond_init(&fdset->sync_cond, NULL);\n-\n-\tif (pipe(fdset->u.pipefd) < 0) {\n-\t\tVHOST_FDMAN_LOG(ERR,\n-\t\t\t\"failed to create pipe for vhost fdset\");\n-\t\treturn -1;\n-\t}\n-\n-\tret = fdset_add_no_sync(fdset, fdset->u.readfd,\n-\t\t\tfdset_pipe_read_cb, NULL, fdset);\n-\tif (ret < 0) {\n-\t\tVHOST_FDMAN_LOG(ERR,\n-\t\t\t\"failed to add pipe readfd %d into vhost server fdset\",\n-\t\t\tfdset->u.readfd);\n-\n-\t\tfdset_pipe_uninit(fdset);\n-\t\treturn -1;\n-\t}\n-\n-\treturn 0;\n-}\n-\n-static void\n-fdset_sync(struct fdset *fdset)\n-{\n-\tint ret;\n-\n-\tpthread_mutex_lock(&fdset->sync_mutex);\n-\n-\tfdset->sync = false;\n-\tret = write(fdset->u.writefd, \"1\", 1);\n-\tif (ret < 0) {\n-\t\tVHOST_FDMAN_LOG(ERR,\n-\t\t\t\"Failed to write to notification pipe: %s\",\n-\t\t\tstrerror(errno));\n-\t\tgoto out_unlock;\n-\t}\n-\n-\twhile (!fdset->sync)\n-\t\tpthread_cond_wait(&fdset->sync_cond, &fdset->sync_mutex);\n-\n-out_unlock:\n-\tpthread_mutex_unlock(&fdset->sync_mutex);\n-}\n-\n-static int\n-get_last_valid_idx(struct fdset *pfdset, int last_valid_idx)\n-{\n-\tint i;\n-\n-\tfor (i = last_valid_idx; i >= 0 && pfdset->fd[i].fd == -1; i--)\n-\t\t;\n-\n-\treturn i;\n-}\n-\n-static void\n-fdset_move(struct fdset *pfdset, int dst, int src)\n-{\n-\tpfdset->fd[dst]    = pfdset->fd[src];\n-\tpfdset->rwfds[dst] = pfdset->rwfds[src];\n-}\n-\n-static void\n-fdset_shrink_nolock(struct fdset *pfdset)\n-{\n-\tint i;\n-\tint last_valid_idx = get_last_valid_idx(pfdset, pfdset->num - 1);\n-\n-\tfor (i = 0; i < last_valid_idx; i++) {\n-\t\tif (pfdset->fd[i].fd != -1)\n-\t\t\tcontinue;\n-\n-\t\tfdset_move(pfdset, i, last_valid_idx);\n-\t\tlast_valid_idx = get_last_valid_idx(pfdset, last_valid_idx - 1);\n-\t}\n-\tpfdset->num = last_valid_idx + 1;\n-}\n-\n-/*\n- * Find deleted fd entries and remove them\n- */\n-static void\n-fdset_shrink(struct fdset *pfdset)\n-{\n-\tpthread_mutex_lock(&pfdset->fd_mutex);\n-\tfdset_shrink_nolock(pfdset);\n-\tpthread_mutex_unlock(&pfdset->fd_mutex);\n-}\n-\n-/**\n- * Returns the index in the fdset for a given fd.\n- * @return\n- *   index for the fd, or -1 if fd isn't in the fdset.\n- */\n-static int\n-fdset_find_fd(struct fdset *pfdset, int fd)\n-{\n-\tint i;\n-\n-\tfor (i = 0; i < pfdset->num && pfdset->fd[i].fd != fd; i++)\n-\t\t;\n-\n-\treturn i == pfdset->num ? -1 : i;\n-}\n-\n-static void\n-fdset_add_fd(struct fdset *pfdset, int idx, int fd,\n-\tfd_cb rcb, fd_cb wcb, void *dat)\n-{\n-\tstruct fdentry *pfdentry = &pfdset->fd[idx];\n-\tstruct pollfd *pfd = &pfdset->rwfds[idx];\n-\n-\tpfdentry->fd  = fd;\n-\tpfdentry->rcb = rcb;\n-\tpfdentry->wcb = wcb;\n-\tpfdentry->dat = dat;\n-\n-\tpfd->fd = fd;\n-\tpfd->events  = rcb ? POLLIN : 0;\n-\tpfd->events |= wcb ? POLLOUT : 0;\n-\tpfd->revents = 0;\n-}\n-\n struct fdset *\n fdset_init(const char *name)\n {\n@@ -284,16 +109,20 @@ fdset_init(const char *name)\n \trte_strscpy(fdset->name, name, RTE_THREAD_NAME_SIZE);\n \n \tpthread_mutex_init(&fdset->fd_mutex, NULL);\n-\tpthread_mutex_init(&fdset->fd_polling_mutex, NULL);\n \n-\tfor (i = 0; i < MAX_FDS; i++) {\n+\tfor (i = 0; i < (int)RTE_DIM(fdset->fd); i++) {\n \t\tfdset->fd[i].fd = -1;\n \t\tfdset->fd[i].dat = NULL;\n \t}\n-\tfdset->num = 0;\n+\tLIST_INIT(&fdset->fdlist);\n \n-\tif (fdset_pipe_init(fdset)) {\n-\t\tVHOST_FDMAN_LOG(ERR, \"Failed to init pipe for %s\", name);\n+\t/*\n+\t * Any non-zero value would work (see man epoll_create),\n+\t * but pass MAX_FDS for consistency.\n+\t */\n+\tfdset->epfd = epoll_create(MAX_FDS);\n+\tif (fdset->epfd < 0) {\n+\t\tVHOST_FDMAN_LOG(ERR, \"failed to create epoll for %s fdset\", name);\n \t\tgoto err_free;\n \t}\n \n@@ -301,7 +130,7 @@ fdset_init(const char *name)\n \t\t\t\t\tfdset_event_dispatch, fdset)) {\n \t\tVHOST_FDMAN_LOG(ERR, \"Failed to create %s event dispatch thread\",\n \t\t\t\tfdset->name);\n-\t\tgoto err_pipe;\n+\t\tgoto err_epoll;\n \t}\n \n \tif (fdset_insert(fdset)) {\n@@ -315,10 +144,9 @@ fdset_init(const char *name)\n \n err_thread:\n \tfdset->destroy = true;\n-\tfdset_sync(fdset);\n \trte_thread_join(fdset->tid, &val);\n-err_pipe:\n-\tfdset_pipe_uninit(fdset);\n+err_epoll:\n+\tclose(fdset->epfd);\n err_free:\n \trte_free(fdset);\n err_unlock:\n@@ -330,78 +158,99 @@ fdset_init(const char *name)\n /**\n  * Register the fd in the fdset with read/write handler and context.\n  */\n-static int\n-fdset_add_no_sync(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)\n+int\n+fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)\n {\n-\tint i;\n+\tstruct fdentry *pfdentry;\n+\tstruct epoll_event ev;\n \n \tif (pfdset == NULL || fd == -1)\n \t\treturn -1;\n \n \tpthread_mutex_lock(&pfdset->fd_mutex);\n-\ti = pfdset->num < MAX_FDS ? pfdset->num++ : -1;\n-\tif (i == -1) {\n-\t\tpthread_mutex_lock(&pfdset->fd_polling_mutex);\n-\t\tfdset_shrink_nolock(pfdset);\n-\t\tpthread_mutex_unlock(&pfdset->fd_polling_mutex);\n-\t\ti = pfdset->num < MAX_FDS ? pfdset->num++ : -1;\n-\t\tif (i == -1) {\n-\t\t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n-\t\t\treturn -2;\n-\t\t}\n+\tif (pfdset->next_free_idx >= (int)RTE_DIM(pfdset->fd)) {\n+\t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n+\t\treturn -2;\n \t}\n \n-\tfdset_add_fd(pfdset, i, fd, rcb, wcb, dat);\n+\tpfdentry = &pfdset->fd[pfdset->next_free_idx];\n+\tpfdentry->fd  = fd;\n+\tpfdentry->rcb = rcb;\n+\tpfdentry->wcb = wcb;\n+\tpfdentry->dat = dat;\n+\n+\tLIST_INSERT_HEAD(&pfdset->fdlist, pfdentry, next);\n+\n+\t/* Find next free slot */\n+\tpfdset->next_free_idx++;\n+\tfor (; pfdset->next_free_idx < (int)RTE_DIM(pfdset->fd); pfdset->next_free_idx++) {\n+\t\tif (pfdset->fd[pfdset->next_free_idx].fd != -1)\n+\t\t\tcontinue;\n+\t\tbreak;\n+\t}\n \tpthread_mutex_unlock(&pfdset->fd_mutex);\n \n+\tev.events = EPOLLERR;\n+\tev.events |= rcb ? EPOLLIN : 0;\n+\tev.events |= wcb ? EPOLLOUT : 0;\n+\tev.data.fd = fd;\n+\n+\tif (epoll_ctl(pfdset->epfd, EPOLL_CTL_ADD, fd, &ev) == -1)\n+\t\tVHOST_FDMAN_LOG(ERR, \"could not add %d fd to %d epfd: %s\",\n+\t\t\tfd, pfdset->epfd, strerror(errno));\n+\n \treturn 0;\n }\n \n-int\n-fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat)\n+static struct fdentry *\n+fdset_find_entry_locked(struct fdset *pfdset, int fd)\n {\n-\tint ret;\n+\tstruct fdentry *pfdentry;\n \n-\tret = fdset_add_no_sync(pfdset, fd, rcb, wcb, dat);\n-\tif (ret < 0)\n-\t\treturn ret;\n+\tLIST_FOREACH(pfdentry, &pfdset->fdlist, next) {\n+\t\tif (pfdentry->fd != fd)\n+\t\t\tcontinue;\n+\t\treturn pfdentry;\n+\t}\n \n-\tfdset_sync(pfdset);\n+\treturn NULL;\n+}\n \n-\treturn 0;\n+static void\n+fdset_del_locked(struct fdset *pfdset, struct fdentry *pfdentry)\n+{\n+\tint entry_idx;\n+\n+\tif (epoll_ctl(pfdset->epfd, EPOLL_CTL_DEL, pfdentry->fd, NULL) == -1)\n+\t\tVHOST_FDMAN_LOG(ERR, \"could not remove %d fd from %d epfd: %s\",\n+\t\t\tpfdentry->fd, pfdset->epfd, strerror(errno));\n+\n+\tpfdentry->fd = -1;\n+\tpfdentry->rcb = pfdentry->wcb = NULL;\n+\tpfdentry->dat = NULL;\n+\tentry_idx = pfdentry - pfdset->fd;\n+\tif (entry_idx < pfdset->next_free_idx)\n+\t\tpfdset->next_free_idx = entry_idx;\n+\tLIST_REMOVE(pfdentry, next);\n }\n \n-/**\n- *  Unregister the fd from the fdset.\n- *  Returns context of a given fd or NULL.\n- */\n-void *\n+void\n fdset_del(struct fdset *pfdset, int fd)\n {\n-\tint i;\n-\tvoid *dat = NULL;\n+\tstruct fdentry *pfdentry;\n \n \tif (pfdset == NULL || fd == -1)\n-\t\treturn NULL;\n+\t\treturn;\n \n \tdo {\n \t\tpthread_mutex_lock(&pfdset->fd_mutex);\n-\n-\t\ti = fdset_find_fd(pfdset, fd);\n-\t\tif (i != -1 && pfdset->fd[i].busy == 0) {\n-\t\t\t/* busy indicates r/wcb is executing! */\n-\t\t\tdat = pfdset->fd[i].dat;\n-\t\t\tpfdset->fd[i].fd = -1;\n-\t\t\tpfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;\n-\t\t\tpfdset->fd[i].dat = NULL;\n-\t\t\ti = -1;\n+\t\tpfdentry = fdset_find_entry_locked(pfdset, fd);\n+\t\tif (pfdentry != NULL && pfdentry->busy == 0) {\n+\t\t\tfdset_del_locked(pfdset, pfdentry);\n+\t\t\tpfdentry = NULL;\n \t\t}\n \t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n-\t} while (i != -1);\n-\n-\tfdset_sync(pfdset);\n-\n-\treturn dat;\n+\t} while (pfdentry != NULL);\n }\n \n /**\n@@ -415,28 +264,22 @@ fdset_del(struct fdset *pfdset, int fd)\n int\n fdset_try_del(struct fdset *pfdset, int fd)\n {\n-\tint i;\n+\tstruct fdentry *pfdentry;\n \n \tif (pfdset == NULL || fd == -1)\n \t\treturn -2;\n \n \tpthread_mutex_lock(&pfdset->fd_mutex);\n-\ti = fdset_find_fd(pfdset, fd);\n-\tif (i != -1 && pfdset->fd[i].busy) {\n+\tpfdentry = fdset_find_entry_locked(pfdset, fd);\n+\tif (pfdentry != NULL && pfdentry->busy != 0) {\n \t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n \t\treturn -1;\n \t}\n \n-\tif (i != -1) {\n-\t\tpfdset->fd[i].fd = -1;\n-\t\tpfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL;\n-\t\tpfdset->fd[i].dat = NULL;\n-\t}\n+\tif (pfdentry != NULL)\n+\t\tfdset_del_locked(pfdset, pfdentry);\n \n \tpthread_mutex_unlock(&pfdset->fd_mutex);\n-\n-\tfdset_sync(pfdset);\n-\n \treturn 0;\n }\n \n@@ -453,53 +296,29 @@ static uint32_t\n fdset_event_dispatch(void *arg)\n {\n \tint i;\n-\tstruct pollfd *pfd;\n-\tstruct fdentry *pfdentry;\n \tfd_cb rcb, wcb;\n \tvoid *dat;\n \tint fd, numfds;\n \tint remove1, remove2;\n-\tint need_shrink;\n \tstruct fdset *pfdset = arg;\n-\tint val;\n \n \tif (pfdset == NULL)\n \t\treturn 0;\n \n \twhile (1) {\n+\t\tstruct epoll_event events[MAX_FDS];\n+\t\tstruct fdentry *pfdentry;\n \n-\t\t/*\n-\t\t * When poll is blocked, other threads might unregister\n-\t\t * listenfds from and register new listenfds into fdset.\n-\t\t * When poll returns, the entries for listenfds in the fdset\n-\t\t * might have been updated. It is ok if there is unwanted call\n-\t\t * for new listenfds.\n-\t\t */\n-\t\tpthread_mutex_lock(&pfdset->fd_mutex);\n-\t\tnumfds = pfdset->num;\n-\t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n-\n-\t\tpthread_mutex_lock(&pfdset->fd_polling_mutex);\n-\t\tval = poll(pfdset->rwfds, numfds, 1000 /* millisecs */);\n-\t\tpthread_mutex_unlock(&pfdset->fd_polling_mutex);\n-\t\tif (val < 0)\n+\t\tnumfds = epoll_wait(pfdset->epfd, events, RTE_DIM(events), 1000);\n+\t\tif (numfds < 0)\n \t\t\tcontinue;\n \n-\t\tneed_shrink = 0;\n \t\tfor (i = 0; i < numfds; i++) {\n \t\t\tpthread_mutex_lock(&pfdset->fd_mutex);\n \n-\t\t\tpfdentry = &pfdset->fd[i];\n-\t\t\tfd = pfdentry->fd;\n-\t\t\tpfd = &pfdset->rwfds[i];\n-\n-\t\t\tif (fd < 0) {\n-\t\t\t\tneed_shrink = 1;\n-\t\t\t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n-\t\t\t\tcontinue;\n-\t\t\t}\n-\n-\t\t\tif (!pfd->revents) {\n+\t\t\tfd = events[i].data.fd;\n+\t\t\tpfdentry = fdset_find_entry_locked(pfdset, fd);\n+\t\t\tif (pfdentry == NULL) {\n \t\t\t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n \t\t\t\tcontinue;\n \t\t\t}\n@@ -513,9 +332,9 @@ fdset_event_dispatch(void *arg)\n \n \t\t\tpthread_mutex_unlock(&pfdset->fd_mutex);\n \n-\t\t\tif (rcb && pfd->revents & (POLLIN | FDPOLLERR))\n+\t\t\tif (rcb && events[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP))\n \t\t\t\trcb(fd, dat, &remove1);\n-\t\t\tif (wcb && pfd->revents & (POLLOUT | FDPOLLERR))\n+\t\t\tif (wcb && events[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP))\n \t\t\t\twcb(fd, dat, &remove2);\n \t\t\tpfdentry->busy = 0;\n \t\t\t/*\n@@ -524,23 +343,13 @@ fdset_event_dispatch(void *arg)\n \t\t\t * directly.\n \t\t\t */\n \t\t\t/*\n-\t\t\t * When we are to clean up the fd from fdset,\n-\t\t\t * because the fd is closed in the cb,\n-\t\t\t * the old fd val could be reused by when creates new\n-\t\t\t * listen fd in another thread, we couldn't call\n-\t\t\t * fdset_del.\n+\t\t\t * A concurrent fdset_del may have been waiting for the\n+\t\t\t * fdentry not to be busy, so we can't call\n+\t\t\t * fdset_del_locked().\n \t\t\t */\n-\t\t\tif (remove1 || remove2) {\n-\t\t\t\tpfdentry->fd = -1;\n-\t\t\t\tneed_shrink = 1;\n-\t\t\t}\n+\t\t\tif (remove1 || remove2)\n+\t\t\t\tfdset_del(pfdset, fd);\n \t\t}\n-\n-\t\tif (need_shrink)\n-\t\t\tfdset_shrink(pfdset);\n-\n-\t\tif (pfdset->destroy)\n-\t\t\tbreak;\n \t}\n \n \treturn 0;\ndiff --git a/lib/vhost/fd_man.h b/lib/vhost/fd_man.h\nindex 079fa0155f..6398343a6a 100644\n--- a/lib/vhost/fd_man.h\n+++ b/lib/vhost/fd_man.h\n@@ -6,7 +6,7 @@\n #define _FD_MAN_H_\n #include <pthread.h>\n #include <poll.h>\n-#include <stdbool.h>\n+#include <sys/queue.h>\n \n struct fdset;\n \n@@ -19,8 +19,7 @@ struct fdset *fdset_init(const char *name);\n int fdset_add(struct fdset *pfdset, int fd,\n \tfd_cb rcb, fd_cb wcb, void *dat);\n \n-void *fdset_del(struct fdset *pfdset, int fd);\n-\n+void fdset_del(struct fdset *pfdset, int fd);\n int fdset_try_del(struct fdset *pfdset, int fd);\n \n #endif\n",
    "prefixes": [
        "v3",
        "5/5"
    ]
}