diff --git a/CMakeLists.txt b/CMakeLists.txt index 62609708..f5bb871b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -566,6 +566,7 @@ if(LIBUV_BUILD_TESTS) test/test-hrtime.c test/test-idle.c test/test-idna.c + test/test-iouring-pollhup.c test/test-ip4-addr.c test/test-ip6-addr.c test/test-ip-name.c diff --git a/Makefile.am b/Makefile.am index a14228da..c344f7dc 100644 --- a/Makefile.am +++ b/Makefile.am @@ -198,6 +198,7 @@ test_run_tests_SOURCES = test/blackhole-server.c \ test/test-hrtime.c \ test/test-idle.c \ test/test-idna.c \ + test/test-iouring-pollhup.c \ test/test-ip4-addr.c \ test/test-ip6-addr.c \ test/test-ip-name.c \ diff --git a/src/unix/linux.c b/src/unix/linux.c index 960af5c9..8fdcb12c 100644 --- a/src/unix/linux.c +++ b/src/unix/linux.c @@ -712,23 +712,17 @@ void uv__platform_invalidate_fd(uv_loop_t* loop, int fd) { * This avoids a problem where the same file description remains open * in another process, causing repeated junk epoll events. * + * Perform EPOLL_CTL_DEL immediately instead of going through + * io_uring's submit queue, otherwise the file descriptor may + * be closed by the time the kernel starts the operation. + * * We pass in a dummy epoll_event, to work around a bug in old kernels. * * Work around a bug in kernels 3.10 to 3.19 where passing a struct that * has the EPOLLWAKEUP flag set generates spurious audit syslog warnings. */ memset(&dummy, 0, sizeof(dummy)); - - if (inv == NULL) { - epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy); - } else { - uv__epoll_ctl_prep(loop->backend_fd, - &lfields->ctl, - inv->prep, - EPOLL_CTL_DEL, - fd, - &dummy); - } + epoll_ctl(loop->backend_fd, EPOLL_CTL_DEL, fd, &dummy); } @@ -1215,6 +1209,10 @@ static void uv__poll_io_uring(uv_loop_t* loop, struct uv__iou* iou) { } +/* Only for EPOLL_CTL_ADD and EPOLL_CTL_MOD. EPOLL_CTL_DEL should always be + * executed immediately, otherwise the file descriptor may have been closed + * by the time the kernel starts the operation. + */ static void uv__epoll_ctl_prep(int epollfd, struct uv__iou* ctl, struct epoll_event (*events)[256], @@ -1226,45 +1224,28 @@ static void uv__epoll_ctl_prep(int epollfd, uint32_t mask; uint32_t slot; - if (ctl->ringfd == -1) { - if (!epoll_ctl(epollfd, op, fd, e)) - return; + assert(op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD); + assert(ctl->ringfd != -1); - if (op == EPOLL_CTL_DEL) - return; /* Ignore errors, may be racing with another thread. */ + mask = ctl->sqmask; + slot = (*ctl->sqtail)++ & mask; - if (op != EPOLL_CTL_ADD) - abort(); + pe = &(*events)[slot]; + *pe = *e; - if (errno != EEXIST) - abort(); + sqe = ctl->sqe; + sqe = &sqe[slot]; - /* File descriptor that's been watched before, update event mask. */ - if (!epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, e)) - return; + memset(sqe, 0, sizeof(*sqe)); + sqe->addr = (uintptr_t) pe; + sqe->fd = epollfd; + sqe->len = op; + sqe->off = fd; + sqe->opcode = UV__IORING_OP_EPOLL_CTL; + sqe->user_data = op | slot << 2 | (int64_t) fd << 32; - abort(); - } else { - mask = ctl->sqmask; - slot = (*ctl->sqtail)++ & mask; - - pe = &(*events)[slot]; - *pe = *e; - - sqe = ctl->sqe; - sqe = &sqe[slot]; - - memset(sqe, 0, sizeof(*sqe)); - sqe->addr = (uintptr_t) pe; - sqe->fd = epollfd; - sqe->len = op; - sqe->off = fd; - sqe->opcode = UV__IORING_OP_EPOLL_CTL; - sqe->user_data = op | slot << 2 | (int64_t) fd << 32; - - if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask)) - uv__epoll_ctl_flush(epollfd, ctl, events); - } + if ((*ctl->sqhead & mask) == (*ctl->sqtail & mask)) + uv__epoll_ctl_flush(epollfd, ctl, events); } @@ -1405,8 +1386,22 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { w->events = w->pevents; e.events = w->pevents; e.data.fd = w->fd; + fd = w->fd; - uv__epoll_ctl_prep(epollfd, ctl, &prep, op, w->fd, &e); + if (ctl->ringfd != -1) { + uv__epoll_ctl_prep(epollfd, ctl, &prep, op, fd, &e); + continue; + } + + if (!epoll_ctl(epollfd, op, fd, &e)) + continue; + + assert(op == EPOLL_CTL_ADD); + assert(errno == EEXIST); + + /* File descriptor that's been watched before, update event mask. */ + if (epoll_ctl(epollfd, EPOLL_CTL_MOD, fd, &e)) + abort(); } inv.events = events; @@ -1494,8 +1489,12 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { * * Ignore all errors because we may be racing with another thread * when the file descriptor is closed. + * + * Perform EPOLL_CTL_DEL immediately instead of going through + * io_uring's submit queue, otherwise the file descriptor may + * be closed by the time the kernel starts the operation. */ - uv__epoll_ctl_prep(epollfd, ctl, &prep, EPOLL_CTL_DEL, fd, pe); + epoll_ctl(epollfd, EPOLL_CTL_DEL, fd, pe); continue; } diff --git a/test/test-iouring-pollhup.c b/test/test-iouring-pollhup.c new file mode 100644 index 00000000..342789aa --- /dev/null +++ b/test/test-iouring-pollhup.c @@ -0,0 +1,111 @@ +/* Copyright libuv project contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "uv.h" +#include "task.h" + +#ifdef _WIN32 + +TEST_IMPL(iouring_pollhup) { + RETURN_SKIP("Not on Windows."); +} + +#else /* !_WIN32 */ + +#include /* close() */ + +static uv_pipe_t p1; +static uv_pipe_t p2; +static uv_idle_t idle_handle; +static int iters; +static int duped_fd; +static int newpipefds[2]; + +static void alloc_buffer(uv_handle_t* handle, + size_t suggested_size, + uv_buf_t* buf) { + static char slab[32]; + *buf = uv_buf_init(slab, sizeof(slab)); +} + +static void read_data2(uv_stream_t* stream, + ssize_t nread, + const uv_buf_t* buf) { + if (nread < 0) { + ASSERT_EQ(nread, UV_EOF); + ASSERT_OK(close(duped_fd)); + duped_fd = -1; + uv_close((uv_handle_t*) &p2, NULL); + uv_close((uv_handle_t*) &idle_handle, NULL); + } else { + /* If nread == 0 is because of POLLHUP received still from pipefds[0] file + * description which is still referenced in duped_fd. It should not happen + * if close(p1) was called after EPOLL_CTL_DEL. + */ + ASSERT_GT(nread, 0); + } +} + +static void idle_cb(uv_idle_t* handle) { + if (++iters == 1) { + ASSERT_OK(uv_pipe_open(&p2, newpipefds[0])); + ASSERT_OK(uv_read_start((uv_stream_t*) &p2, alloc_buffer, read_data2)); + } else { + ASSERT_OK(uv_idle_stop(handle)); + ASSERT_OK(close(newpipefds[1])); + newpipefds[1] = -1; + } +} + +static void read_data(uv_stream_t* stream, + ssize_t nread, + const uv_buf_t* buf) { + ASSERT_EQ(nread, UV_EOF); + uv_close((uv_handle_t*) stream, NULL); + ASSERT_OK(uv_idle_start(&idle_handle, idle_cb)); +} + +TEST_IMPL(iouring_pollhup) { + uv_loop_t* loop; + int pipefds[2]; + + loop = uv_default_loop(); + ASSERT_OK(uv_pipe_init(loop, &p1, 0)); + ASSERT_OK(uv_pipe_init(loop, &p2, 0)); + ASSERT_OK(uv_idle_init(loop, &idle_handle)); + ASSERT_OK(pipe(pipefds)); + ASSERT_OK(pipe(newpipefds)); + + ASSERT_OK(uv_pipe_open(&p1, pipefds[0])); + duped_fd = dup(pipefds[0]); + ASSERT_NE(duped_fd, -1); + + ASSERT_OK(uv_read_start((uv_stream_t*) &p1, alloc_buffer, read_data)); + ASSERT_OK(close(pipefds[1])); /* Close write end, generate POLLHUP. */ + pipefds[1] = -1; + + ASSERT_OK(uv_run(loop, UV_RUN_DEFAULT)); + + MAKE_VALGRIND_HAPPY(uv_default_loop()); + return 0; +} + +#endif /* !_WIN32 */ diff --git a/test/test-list.h b/test/test-list.h index d30f02fa..84c885cc 100644 --- a/test/test-list.h +++ b/test/test-list.h @@ -563,6 +563,8 @@ TEST_DECLARE (fork_threadpool_queue_work_simple) #endif #endif +TEST_DECLARE (iouring_pollhup) + TEST_DECLARE (idna_toascii) TEST_DECLARE (utf8_decode1) TEST_DECLARE (utf8_decode1_overrun) @@ -1204,6 +1206,8 @@ TASK_LIST_START #endif #endif + TEST_ENTRY (iouring_pollhup) + TEST_ENTRY (utf8_decode1) TEST_ENTRY (utf8_decode1_overrun) TEST_ENTRY (uname)