From ced77975b84d4892f23eab5cdd2a55e25dbc0af3 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 18 Oct 2024 09:51:27 -0400 Subject: [PATCH] Revert "Revert "linux: eliminate a read on eventfd per wakeup (#4400)" (#4585)" This reverts commit 18d48bc13ce4a44efb9c37fd81cfdd9db7bc92b8. --- src/unix/async.c | 128 +++++++++++++++++++++++++++++++++++++++----- src/unix/internal.h | 22 ++++++++ src/unix/kqueue.c | 11 ++++ src/unix/linux.c | 6 +++ 4 files changed, 153 insertions(+), 14 deletions(-) diff --git a/src/unix/async.c b/src/unix/async.c index 0ff2669e..bc97ec54 100644 --- a/src/unix/async.c +++ b/src/unix/async.c @@ -38,6 +38,34 @@ #include #endif +#if UV__KQUEUE_EVFILT_USER +static uv_once_t kqueue_runtime_detection_guard = UV_ONCE_INIT; +static int kqueue_evfilt_user_support = 1; + + +static void uv__kqueue_runtime_detection(void) { + int kq; + struct kevent ev[2]; + struct timespec timeout = {0, 0}; + + /* Perform the runtime detection to ensure that kqueue with + * EVFILT_USER actually works. */ + kq = kqueue(); + EV_SET(ev, UV__KQUEUE_EVFILT_USER_IDENT, EVFILT_USER, + EV_ADD | EV_CLEAR, 0, 0, 0); + EV_SET(ev + 1, UV__KQUEUE_EVFILT_USER_IDENT, EVFILT_USER, + 0, NOTE_TRIGGER, 0, 0); + if (kevent(kq, ev, 2, ev, 1, &timeout) < 1 || + ev[0].filter != EVFILT_USER || + ev[0].ident != UV__KQUEUE_EVFILT_USER_IDENT || + ev[0].flags & EV_ERROR) + /* If we wind up here, we can assume that EVFILT_USER is defined but + * broken on the current system. */ + kqueue_evfilt_user_support = 0; + uv__close(kq); +} +#endif + static void uv__async_send(uv_loop_t* loop); static int uv__async_start(uv_loop_t* loop); static void uv__cpu_relax(void); @@ -130,8 +158,10 @@ void uv__async_close(uv_async_t* handle) { static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { +#ifndef __linux__ char buf[1024]; ssize_t r; +#endif struct uv__queue queue; struct uv__queue* q; uv_async_t* h; @@ -139,7 +169,12 @@ static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { assert(w == &loop->async_io_watcher); +#ifndef __linux__ +#if UV__KQUEUE_EVFILT_USER + for (;!kqueue_evfilt_user_support;) { +#else for (;;) { +#endif r = read(w->fd, buf, sizeof(buf)); if (r == sizeof(buf)) @@ -156,6 +191,7 @@ static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { abort(); } +#endif /* !__linux__ */ uv__queue_move(&loop->async_handles, &queue); while (!uv__queue_empty(&queue)) { @@ -179,34 +215,58 @@ static void uv__async_io(uv_loop_t* loop, uv__io_t* w, unsigned int events) { static void uv__async_send(uv_loop_t* loop) { - const void* buf; - ssize_t len; int fd; - int r; + ssize_t r; +#ifdef __linux__ + uint64_t val; - buf = ""; - len = 1; - fd = loop->async_wfd; + fd = loop->async_io_watcher.fd; /* eventfd */ + for (val = 1; /* empty */; val = 1) { + r = write(fd, &val, sizeof(uint64_t)); + if (r < 0) { + /* When EAGAIN occurs, the eventfd counter hits the maximum value of the unsigned 64-bit. + * We need to first drain the eventfd and then write again. + * + * Check out https://man7.org/linux/man-pages/man2/eventfd.2.html for details. + */ + if (errno == EAGAIN) { + /* It's ready to retry. */ + if (read(fd, &val, sizeof(uint64_t)) > 0 || errno == EAGAIN) { + continue; + } + } + /* Unknown error occurs. */ + break; + } + return; + } +#else +#if UV__KQUEUE_EVFILT_USER + struct kevent ev; -#if defined(__linux__) - if (fd == -1) { - static const uint64_t val = 1; - buf = &val; - len = sizeof(val); - fd = loop->async_io_watcher.fd; /* eventfd */ + if (kqueue_evfilt_user_support) { + fd = loop->async_io_watcher.fd; /* magic number for EVFILT_USER */ + EV_SET(&ev, fd, EVFILT_USER, 0, NOTE_TRIGGER, 0, 0); + r = kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL); + if (r == 0) + return; + else + abort(); } #endif + fd = loop->async_wfd; /* write end of the pipe */ do - r = write(fd, buf, len); + r = write(fd, "x", 1); while (r == -1 && errno == EINTR); - if (r == len) + if (r == 1) return; if (r == -1) if (errno == EAGAIN || errno == EWOULDBLOCK) return; +#endif abort(); } @@ -215,6 +275,9 @@ static void uv__async_send(uv_loop_t* loop) { static int uv__async_start(uv_loop_t* loop) { int pipefd[2]; int err; +#if UV__KQUEUE_EVFILT_USER + struct kevent ev; +#endif if (loop->async_io_watcher.fd != -1) return 0; @@ -226,6 +289,36 @@ static int uv__async_start(uv_loop_t* loop) { pipefd[0] = err; pipefd[1] = -1; +#elif UV__KQUEUE_EVFILT_USER + uv_once(&kqueue_runtime_detection_guard, uv__kqueue_runtime_detection); + if (kqueue_evfilt_user_support) { + /* In order not to break the generic pattern of I/O polling, a valid + * file descriptor is required to take up a room in loop->watchers, + * thus we create one for that, but this fd will not be actually used, + * it's just a placeholder and magic number which is going to be closed + * during the cleanup, as other FDs. */ + err = uv__open_cloexec("/dev/null", O_RDONLY); + if (err < 0) + return err; + + pipefd[0] = err; + pipefd[1] = -1; + + /* When using EVFILT_USER event to wake up the kqueue, this event must be + * registered beforehand. Otherwise, calling kevent() to issue an + * unregistered EVFILT_USER event will get an ENOENT. + * Since uv__async_send() may happen before uv__io_poll() with multi-threads, + * we can't defer this registration of EVFILT_USER event as we did for other + * events, but must perform it right away. */ + EV_SET(&ev, err, EVFILT_USER, EV_ADD | EV_CLEAR, 0, 0, 0); + err = kevent(loop->backend_fd, &ev, 1, NULL, 0, NULL); + if (err < 0) + return UV__ERR(errno); + } else { + err = uv__make_pipe(pipefd, UV_NONBLOCK_PIPE); + if (err < 0) + return err; + } #else err = uv__make_pipe(pipefd, UV_NONBLOCK_PIPE); if (err < 0) @@ -236,6 +329,13 @@ static int uv__async_start(uv_loop_t* loop) { uv__io_start(loop, &loop->async_io_watcher, POLLIN); loop->async_wfd = pipefd[1]; +#if UV__KQUEUE_EVFILT_USER + /* Prevent the EVFILT_USER event from being added to kqueue redundantly + * and mistakenly later in uv__io_poll(). */ + if (kqueue_evfilt_user_support) + loop->async_io_watcher.events = loop->async_io_watcher.pevents; +#endif + return 0; } diff --git a/src/unix/internal.h b/src/unix/internal.h index 8d586b0b..568a55b5 100644 --- a/src/unix/internal.h +++ b/src/unix/internal.h @@ -35,6 +35,10 @@ #include #include #include +#if defined(__APPLE__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) +#include +#endif #define uv__msan_unpoison(p, n) \ do { \ @@ -504,4 +508,22 @@ int uv__get_constrained_cpu(uv__cpu_constraint* constraint); #endif #endif +#if defined(EVFILT_USER) && defined(NOTE_TRIGGER) +/* EVFILT_USER is available since OS X 10.6, DragonFlyBSD 4.0, + * FreeBSD 8.1, and NetBSD 10.0. + * + * Note that even though EVFILT_USER is defined on the current system, + * it may still fail to work at runtime somehow. In that case, we fall + * back to pipe-based signaling. + */ +#define UV__KQUEUE_EVFILT_USER 1 +/* Magic number of identifier used for EVFILT_USER during runtime detection. + * There are no Google hits for this number when I create it. That way, + * people will be directed here if this number gets printed due to some + * kqueue error and they google for help. */ +#define UV__KQUEUE_EVFILT_USER_IDENT 0x1e7e7711 +#else +#define UV__KQUEUE_EVFILT_USER 0 +#endif + #endif /* UV_UNIX_INTERNAL_H_ */ diff --git a/src/unix/kqueue.c b/src/unix/kqueue.c index 66aa166f..876b7170 100644 --- a/src/unix/kqueue.c +++ b/src/unix/kqueue.c @@ -367,6 +367,17 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { continue; } +#if UV__KQUEUE_EVFILT_USER + if (ev->filter == EVFILT_USER) { + w = &loop->async_io_watcher; + assert(fd == w->fd); + uv__metrics_update_idle_time(loop); + w->cb(loop, w, w->events); + nevents++; + continue; + } +#endif + if (ev->filter == EVFILT_VNODE) { assert(w->events == POLLIN); assert(w->pevents == POLLIN); diff --git a/src/unix/linux.c b/src/unix/linux.c index 857a4ef8..803a9a9d 100644 --- a/src/unix/linux.c +++ b/src/unix/linux.c @@ -1414,6 +1414,12 @@ void uv__io_poll(uv_loop_t* loop, int timeout) { w->events = w->pevents; e.events = w->pevents; + if (w == &loop->async_io_watcher) + /* Enable edge-triggered mode on async_io_watcher(eventfd), + * so that we're able to eliminate the overhead of reading + * the eventfd via system call on each event loop wakeup. + */ + e.events |= EPOLLET; e.data.fd = w->fd; fd = w->fd;