1
0
mirror of https://github.com/libuv/libuv synced 2025-03-28 21:13:16 +00:00

linux: don't delay EPOLL_CTL_DEL operations (#4328)

Perform EPOLL_CTL_DEL immediately instead of going through
io_uring's submit queue, otherwise the file descriptor may
be closed by the time the kernel starts the operation.

Fixes: https://github.com/libuv/libuv/issues/4323
This commit is contained in:
Ben Noordhuis 2024-03-21 09:23:08 +01:00 committed by GitHub
parent cc23e204d7
commit 3ecce91410
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 163 additions and 47 deletions

View File

@ -566,6 +566,7 @@ if(LIBUV_BUILD_TESTS)
test/test-hrtime.c
test/test-idle.c
test/test-idna.c
test/test-iouring-pollhup.c
test/test-ip4-addr.c
test/test-ip6-addr.c
test/test-ip-name.c

View File

@ -198,6 +198,7 @@ test_run_tests_SOURCES = test/blackhole-server.c \
test/test-hrtime.c \
test/test-idle.c \
test/test-idna.c \
test/test-iouring-pollhup.c \
test/test-ip4-addr.c \
test/test-ip6-addr.c \
test/test-ip-name.c \

View File

@ -712,23 +712,17 @@ void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) {
* This avoids a problem where the same file description remains open
* in another process, causing repeated junk epoll events.
*
* Perform EPOLL_CTL_DEL immediately instead of going through
* io_uring's submit queue, otherwise the file descriptor may
* be closed by the time the kernel starts the operation.
*
* We pass in a dummy epoll_event, to work around a bug in old kernels.
*
* Work around a bug in kernels 3.10 to 3.19 where passing a struct that
* has the EPOLLWAKEUP flag set generates spurious audit syslog warnings.
*/
memset(&dummy, 0, sizeof(dummy));
if (inv == NULL) {
epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
} else {
uv__epoll_ctl_prep(loop->backend_fd,
&lfields->ctl,
inv->prep,
EPOLL_CTL_DEL,
fd,
&dummy);
}
epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy);
}
@ -1215,6 +1209,10 @@ static void uv__poll_io_uring(uv_loop_t* loop, struct uv__iou* iou) {
}
/* Only for EPOLL_CTL_ADD and EPOLL_CTL_MOD. EPOLL_CTL_DEL should always be
* executed immediately, otherwise the file descriptor may have been closed
* by the time the kernel starts the operation.
*/
static void uv__epoll_ctl_prep(int epollfd,
struct uv__iou* ctl,
struct epoll_event (*events)[256],
@ -1226,45 +1224,28 @@ static void uv__epoll_ctl_prep(int epollfd,
uint32_t mask;
uint32_t slot;
if (ctl->ringfd == -1) {
if (!epoll_ctl(epollfd, op, fd, e))
return;
assert(op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD);
assert(ctl->ringfd != -1);
if (op == EPOLL_CTL_DEL)
return; /* Ignore errors, may be racing with another thread. */
mask = ctl->sqmask;
slot = (*ctl->sqtail)++ & mask;
if (op != EPOLL_CTL_ADD)
abort();
pe = &(*events)[slot];
*pe = *e;
if (errno != EEXIST)
abort();
sqe = ctl->sqe;
sqe = &sqe[slot];
/* File descriptor that's been watched before, update event mask. */
if (!epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, e))
return;
memset(sqe, 0, sizeof(*sqe));
sqe->addr = (uintptr_t) pe;
sqe->fd = epollfd;
sqe->len = op;
sqe->off = fd;
sqe->opcode = UV__IORING_OP_EPOLL_CTL;
sqe->user_data = op | slot << 2 | (int64_t) fd << 32;
abort();
} else {
mask = ctl->sqmask;
slot = (*ctl->sqtail)++ & mask;
pe = &(*events)[slot];
*pe = *e;
sqe = ctl->sqe;
sqe = &sqe[slot];
memset(sqe, 0, sizeof(*sqe));
sqe->addr = (uintptr_t) pe;
sqe->fd = epollfd;
sqe->len = op;
sqe->off = fd;
sqe->opcode = UV__IORING_OP_EPOLL_CTL;
sqe->user_data = op | slot << 2 | (int64_t) fd << 32;
if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask))
uv__epoll_ctl_flush(epollfd, ctl, events);
}
if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask))
uv__epoll_ctl_flush(epollfd, ctl, events);
}
@ -1405,8 +1386,22 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
w->events = w->pevents;
e.events = w->pevents;
e.data.fd = w->fd;
fd = w->fd;
uv__epoll_ctl_prep(epollfd, ctl, &prep, op, w->fd, &e);
if (ctl->ringfd != -1) {
uv__epoll_ctl_prep(epollfd, ctl, &prep, op, fd, &e);
continue;
}
if (!epoll_ctl(epollfd, op, fd, &e))
continue;
assert(op == EPOLL_CTL_ADD);
assert(errno == EEXIST);
/* File descriptor that's been watched before, update event mask. */
if (epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &e))
abort();
}
inv.events = events;
@ -1494,8 +1489,12 @@ void uv__io_poll(uv_loop_t* loop, int timeout) {
*
* Ignore all errors because we may be racing with another thread
* when the file descriptor is closed.
*
* Perform EPOLL_CTL_DEL immediately instead of going through
* io_uring's submit queue, otherwise the file descriptor may
* be closed by the time the kernel starts the operation.
*/
uv__epoll_ctl_prep(epollfd, ctl, &prep, EPOLL_CTL_DEL, fd, pe);
epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, pe);
continue;
}

111
test/test-iouring-pollhup.c Normal file
View File

@ -0,0 +1,111 @@
/* Copyright libuv project contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include "uv.h"
#include "task.h"
#ifdef _WIN32
TEST_IMPL(iouring_pollhup) {
RETURN_SKIP("Not on Windows.");
}
#else /* !_WIN32 */
#include <unistd.h> /* close() */
static uv_pipe_t p1;
static uv_pipe_t p2;
static uv_idle_t idle_handle;
static int iters;
static int duped_fd;
static int newpipefds[2];
static void alloc_buffer(uv_handle_t* handle,
size_t suggested_size,
uv_buf_t* buf) {
static char slab[32];
*buf = uv_buf_init(slab, sizeof(slab));
}
static void read_data2(uv_stream_t* stream,
ssize_t nread,
const uv_buf_t* buf) {
if (nread < 0) {
ASSERT_EQ(nread, UV_EOF);
ASSERT_OK(close(duped_fd));
duped_fd = -1;
uv_close((uv_handle_t*) &p2, NULL);
uv_close((uv_handle_t*) &idle_handle, NULL);
} else {
/* If nread == 0 is because of POLLHUP received still from pipefds[0] file
* description which is still referenced in duped_fd. It should not happen
* if close(p1) was called after EPOLL_CTL_DEL.
*/
ASSERT_GT(nread, 0);
}
}
static void idle_cb(uv_idle_t* handle) {
if (++iters == 1) {
ASSERT_OK(uv_pipe_open(&p2, newpipefds[0]));
ASSERT_OK(uv_read_start((uv_stream_t*) &p2, alloc_buffer, read_data2));
} else {
ASSERT_OK(uv_idle_stop(handle));
ASSERT_OK(close(newpipefds[1]));
newpipefds[1] = -1;
}
}
static void read_data(uv_stream_t* stream,
ssize_t nread,
const uv_buf_t* buf) {
ASSERT_EQ(nread, UV_EOF);
uv_close((uv_handle_t*) stream, NULL);
ASSERT_OK(uv_idle_start(&idle_handle, idle_cb));
}
TEST_IMPL(iouring_pollhup) {
uv_loop_t* loop;
int pipefds[2];
loop = uv_default_loop();
ASSERT_OK(uv_pipe_init(loop, &p1, 0));
ASSERT_OK(uv_pipe_init(loop, &p2, 0));
ASSERT_OK(uv_idle_init(loop, &idle_handle));
ASSERT_OK(pipe(pipefds));
ASSERT_OK(pipe(newpipefds));
ASSERT_OK(uv_pipe_open(&p1, pipefds[0]));
duped_fd = dup(pipefds[0]);
ASSERT_NE(duped_fd, -1);
ASSERT_OK(uv_read_start((uv_stream_t*) &p1, alloc_buffer, read_data));
ASSERT_OK(close(pipefds[1])); /* Close write end, generate POLLHUP. */
pipefds[1] = -1;
ASSERT_OK(uv_run(loop, UV_RUN_DEFAULT));
MAKE_VALGRIND_HAPPY(uv_default_loop());
return 0;
}
#endif /* !_WIN32 */

View File

@ -563,6 +563,8 @@ TEST_DECLARE (fork_threadpool_queue_work_simple)
#endif
#endif
TEST_DECLARE (iouring_pollhup)
TEST_DECLARE (idna_toascii)
TEST_DECLARE (utf8_decode1)
TEST_DECLARE (utf8_decode1_overrun)
@ -1204,6 +1206,8 @@ TASK_LIST_START
#endif
#endif
TEST_ENTRY (iouring_pollhup)
TEST_ENTRY (utf8_decode1)
TEST_ENTRY (utf8_decode1_overrun)
TEST_ENTRY (uname)