diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9ec7356cc..a5730eb53 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -486,6 +486,7 @@ jobs: patch -p1 < $GITHUB_WORKSPACE/ci/tiocgsid.patch patch -p1 < $GITHUB_WORKSPACE/ci/more-sockopts.patch patch -p1 < $GITHUB_WORKSPACE/ci/pidfd-open.patch + patch -p1 < $GITHUB_WORKSPACE/ci/select-setsize.patch ./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache }}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs ninja -C build install if: matrix.qemu != '' && matrix.os == 'ubuntu-latest' @@ -624,6 +625,7 @@ jobs: patch -p1 < $GITHUB_WORKSPACE/ci/tiocgsid.patch patch -p1 < $GITHUB_WORKSPACE/ci/more-sockopts.patch patch -p1 < $GITHUB_WORKSPACE/ci/pidfd-open.patch + patch -p1 < $GITHUB_WORKSPACE/ci/select-setsize.patch ./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache }}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs ninja -C build install if: matrix.qemu != '' && matrix.os == 'ubuntu-latest' @@ -718,6 +720,7 @@ jobs: patch -p1 < $GITHUB_WORKSPACE/ci/tiocgsid.patch patch -p1 < $GITHUB_WORKSPACE/ci/more-sockopts.patch patch -p1 < $GITHUB_WORKSPACE/ci/pidfd-open.patch + patch -p1 < $GITHUB_WORKSPACE/ci/select-setsize.patch ./configure --target-list=${{ matrix.qemu_target }} --prefix=${{ runner.tool_cache }}/qemu --disable-tools --disable-slirp --disable-fdt --disable-capstone --disable-docs ninja -C build install if: matrix.qemu != '' && matrix.os == 'ubuntu-latest' diff --git a/ci/select-setsize.patch b/ci/select-setsize.patch new file mode 100644 index 000000000..48f9824d7 --- /dev/null +++ b/ci/select-setsize.patch @@ -0,0 +1,269 @@ +From Dan Gohman +Subject: [PATCH] Remove the `FD_SETSIZE` limitation in `select` + +The `fd_set` type is limited to a fixed `FD_SETSIZE` number of file +descriptors, however Linux's `select has no such limitation. Change +the `select` implementation to using manual bit-vector logic to better +implement the Linux semantics. + +diff -ur a/linux-user/syscall.c b/linux-user/syscall.c +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -664,8 +664,9 @@ + char **, argv, char **, envp, int, flags) + #if defined(TARGET_NR_select) || defined(TARGET_NR__newselect) || \ + defined(TARGET_NR_pselect6) || defined(TARGET_NR_pselect6_time64) +-safe_syscall6(int, pselect6, int, nfds, fd_set *, readfds, fd_set *, writefds, \ +- fd_set *, exceptfds, struct timespec *, timeout, void *, sig) ++safe_syscall6(int, pselect6, int, nfds, unsigned long *, readfds, \ ++ unsigned long *, writefds, unsigned long *, exceptfds, \ ++ struct timespec *, timeout, void *, sig) + #endif + #if defined(TARGET_NR_ppoll) || defined(TARGET_NR_ppoll_time64) + safe_syscall5(int, ppoll, struct pollfd *, ufds, unsigned int, nfds, +@@ -861,7 +862,7 @@ + + #if defined(TARGET_NR_select) || defined(TARGET_NR__newselect) || \ + defined(TARGET_NR_pselect6) || defined(TARGET_NR_pselect6_time64) +-static inline abi_long copy_from_user_fdset(fd_set *fds, ++static inline abi_long copy_from_user_fdset(unsigned long *fds, + abi_ulong target_fds_addr, + int n) + { +@@ -875,7 +876,8 @@ + 1))) + return -TARGET_EFAULT; + +- FD_ZERO(fds); ++ memset(fds, 0, DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); + k = 0; + for (i = 0; i < nw; i++) { + /* grab the abi_ulong */ +@@ -883,7 +885,8 @@ + for (j = 0; j < TARGET_ABI_BITS; j++) { + /* check the bit inside the abi_ulong */ + if ((b >> j) & 1) +- FD_SET(k, fds); ++ fds[k / (sizeof(unsigned long) * 8)] |= ++ 1ul << (k % (sizeof(unsigned long) * 8)); + k++; + } + } +@@ -893,7 +896,8 @@ + return 0; + } + +-static inline abi_ulong copy_from_user_fdset_ptr(fd_set *fds, fd_set **fds_ptr, ++static inline abi_ulong copy_from_user_fdset_ptr(unsigned long *fds, ++ unsigned long **fds_ptr, + abi_ulong target_fds_addr, + int n) + { +@@ -908,7 +912,7 @@ + } + + static inline abi_long copy_to_user_fdset(abi_ulong target_fds_addr, +- const fd_set *fds, ++ const unsigned long *fds, + int n) + { + int i, nw, j, k; +@@ -926,7 +930,10 @@ + for (i = 0; i < nw; i++) { + v = 0; + for (j = 0; j < TARGET_ABI_BITS; j++) { +- v |= ((abi_ulong)(FD_ISSET(k, fds) != 0) << j); ++ bool set = ++ (fds[k / (sizeof(unsigned long) * 8)] & ++ (1ul << (k % (sizeof(unsigned long) * 8)))) != 0; ++ v |= ((abi_ulong)set << j); + k++; + } + __put_user(v, &target_fds[i]); +@@ -1295,28 +1302,40 @@ + abi_ulong rfd_addr, abi_ulong wfd_addr, + abi_ulong efd_addr, abi_ulong target_tv_addr) + { +- fd_set rfds, wfds, efds; +- fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; ++ unsigned long *rfds, *wfds, *efds; ++ unsigned long *rfds_ptr, *wfds_ptr, *efds_ptr; + struct timeval tv; + struct timespec ts, *ts_ptr; + abi_long ret; + +- ret = copy_from_user_fdset_ptr(&rfds, &rfds_ptr, rfd_addr, n); ++ rfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ wfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ efds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ ++ ret = copy_from_user_fdset_ptr(rfds, &rfds_ptr, rfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&wfds, &wfds_ptr, wfd_addr, n); ++ ret = copy_from_user_fdset_ptr(wfds, &wfds_ptr, wfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&efds, &efds_ptr, efd_addr, n); ++ ret = copy_from_user_fdset_ptr(efds, &efds_ptr, efd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } + + if (target_tv_addr) { +- if (copy_from_user_timeval(&tv, target_tv_addr)) ++ if (copy_from_user_timeval(&tv, target_tv_addr)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; ++ } + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * 1000; + ts_ptr = &ts; +@@ -1328,22 +1347,30 @@ + ts_ptr, NULL)); + + if (!is_error(ret)) { +- if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) ++ if (rfd_addr && copy_to_user_fdset(rfd_addr, rfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; +- if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n)) ++ } ++ if (wfd_addr && copy_to_user_fdset(wfd_addr, wfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; +- if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) ++ } ++ if (efd_addr && copy_to_user_fdset(efd_addr, efds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; ++ } + + if (target_tv_addr) { + tv.tv_sec = ts.tv_sec; + tv.tv_usec = ts.tv_nsec / 1000; + if (copy_to_user_timeval(target_tv_addr, &tv)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } + } + ++ free(rfds); free(wfds); free(efds); + return ret; + } + +@@ -1377,8 +1404,8 @@ + bool time64) + { + abi_long rfd_addr, wfd_addr, efd_addr, n, ts_addr; +- fd_set rfds, wfds, efds; +- fd_set *rfds_ptr, *wfds_ptr, *efds_ptr; ++ unsigned long *rfds, *wfds, *efds; ++ unsigned long *rfds_ptr, *wfds_ptr, *efds_ptr; + struct timespec ts, *ts_ptr; + abi_long ret; + +@@ -1399,16 +1426,26 @@ + efd_addr = arg4; + ts_addr = arg5; + +- ret = copy_from_user_fdset_ptr(&rfds, &rfds_ptr, rfd_addr, n); ++ rfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ wfds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ efds = malloc(DIV_ROUND_UP(n, sizeof(unsigned long) * 8) * ++ sizeof(unsigned long)); ++ ++ ret = copy_from_user_fdset_ptr(rfds, &rfds_ptr, rfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&wfds, &wfds_ptr, wfd_addr, n); ++ ret = copy_from_user_fdset_ptr(wfds, &wfds_ptr, wfd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } +- ret = copy_from_user_fdset_ptr(&efds, &efds_ptr, efd_addr, n); ++ ret = copy_from_user_fdset_ptr(efds, &efds_ptr, efd_addr, n); + if (ret) { ++ free(rfds); free(wfds); free(efds); + return ret; + } + +@@ -1419,10 +1456,12 @@ + if (ts_addr) { + if (time64) { + if (target_to_host_timespec64(&ts, ts_addr)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } else { + if (target_to_host_timespec(&ts, ts_addr)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } +@@ -1436,6 +1475,7 @@ + if (arg6) { + arg7 = lock_user(VERIFY_READ, arg6, sizeof(*arg7) * 2, 1); + if (!arg7) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + arg_sigset = tswapal(arg7[0]); +@@ -1445,6 +1485,7 @@ + if (arg_sigset) { + ret = process_sigsuspend_mask(&sig.set, arg_sigset, arg_sigsize); + if (ret != 0) { ++ free(rfds); free(wfds); free(efds); + return ret; + } + sig_ptr = &sig; +@@ -1460,25 +1501,31 @@ + } + + if (!is_error(ret)) { +- if (rfd_addr && copy_to_user_fdset(rfd_addr, &rfds, n)) { ++ if (rfd_addr && copy_to_user_fdset(rfd_addr, rfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } +- if (wfd_addr && copy_to_user_fdset(wfd_addr, &wfds, n)) { ++ if (wfd_addr && copy_to_user_fdset(wfd_addr, wfds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } +- if (efd_addr && copy_to_user_fdset(efd_addr, &efds, n)) { ++ if (efd_addr && copy_to_user_fdset(efd_addr, efds, n)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + if (time64) { + if (ts_addr && host_to_target_timespec64(ts_addr, &ts)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } else { + if (ts_addr && host_to_target_timespec(ts_addr, &ts)) { ++ free(rfds); free(wfds); free(efds); + return -TARGET_EFAULT; + } + } + } ++ free(rfds); free(wfds); free(efds); + return ret; + } + #endif diff --git a/src/backend/libc/event/syscalls.rs b/src/backend/libc/event/syscalls.rs index dcd0135f0..763f2e2c0 100644 --- a/src/backend/libc/event/syscalls.rs +++ b/src/backend/libc/event/syscalls.rs @@ -16,6 +16,8 @@ use crate::event::port::Event; target_os = "espidf" ))] use crate::event::EventfdFlags; +#[cfg(any(bsd, linux_kernel, target_os = "wasi"))] +use crate::event::FdSetElement; use crate::event::PollFd; use crate::io; #[cfg(solarish)] @@ -28,7 +30,9 @@ use crate::utils::as_ptr; all(feature = "alloc", any(linux_kernel, target_os = "redox")), ))] use core::mem::MaybeUninit; -#[cfg(any(linux_kernel, solarish, target_os = "redox"))] +#[cfg(any(bsd, linux_kernel, target_os = "wasi"))] +use core::ptr::null; +#[cfg(any(bsd, linux_kernel, solarish, target_os = "redox", target_os = "wasi"))] use core::ptr::null_mut; #[cfg(any( linux_kernel, @@ -48,7 +52,7 @@ use {crate::backend::conv::borrowed_fd, crate::fd::BorrowedFd}; ))] use {crate::backend::conv::ret_owned_fd, crate::fd::OwnedFd}; #[cfg(all(feature = "alloc", bsd))] -use {crate::event::kqueue::Event, crate::utils::as_ptr, core::ptr::null}; +use {crate::event::kqueue::Event, crate::utils::as_ptr}; #[cfg(any( linux_kernel, @@ -125,6 +129,137 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + let len = crate::event::fd_set_num_elements_for_bitvector(nfds); + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= len); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= len); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= len); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `c::timeval`. + timeout_data = c::timeval { + tv_sec: timeout.tv_sec.try_into().map_err(|_| io::Errno::OVERFLOW)?, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => null(), + }; + + // On Apple platforms, use the specially mangled `select` which doesn't + // have an `FD_SETSIZE` limitation. + #[cfg(apple)] + { + extern "C" { + #[link_name = "select$DARWIN_EXTSN$NOCANCEL"] + fn select( + nfds: c::c_int, + readfds: *mut FdSetElement, + writefds: *mut FdSetElement, + errorfds: *mut FdSetElement, + timeout: *const c::timeval, + ) -> c::c_int; + } + + ret_c_int(select(nfds, readfds, writefds, exceptfds, timeout_ptr)) + } + + // Otherwise just use the normal `select`. + #[cfg(not(apple))] + { + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr as *mut c::timeval, + )) + } +} + +// WASI uses a count + array instead of a bitvector. +#[cfg(target_os = "wasi")] +pub(crate) unsafe fn select( + nfds: i32, + readfds: Option<&mut [FdSetElement]>, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + let len = crate::event::fd_set_num_elements_for_fd_array(nfds as usize); + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= len); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= len); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= len); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `c::timeval`. + timeout_data = c::timeval { + tv_sec: timeout.tv_sec.try_into().map_err(|_| io::Errno::OVERFLOW)?, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => null(), + }; + + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr as *mut c::timeval, + )) +} + #[cfg(solarish)] pub(crate) fn port_create() -> io::Result { unsafe { ret_owned_fd(c::port_create()) } @@ -207,7 +342,7 @@ pub(crate) fn port_send( unsafe { ret(c::port_send(borrowed_fd(port), events, userdata)) } } -#[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] +#[cfg(not(any(target_os = "redox", target_os = "wasi")))] pub(crate) fn pause() { let r = unsafe { c::pause() }; let errno = libc_errno::errno().0; diff --git a/src/backend/libc/event/windows_syscalls.rs b/src/backend/libc/event/windows_syscalls.rs index 8ccad4794..c152411bf 100644 --- a/src/backend/libc/event/windows_syscalls.rs +++ b/src/backend/libc/event/windows_syscalls.rs @@ -2,7 +2,7 @@ use crate::backend::c; use crate::backend::conv::ret_c_int; -use crate::event::PollFd; +use crate::event::{FdSetElement, PollFd}; use crate::io; pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result { @@ -14,3 +14,61 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + use core::ptr::{null, null_mut}; + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= readfds[0].0 as usize); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= writefds[0].0 as usize); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= exceptfds[0].0 as usize); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + let timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + // Convert from `Timespec` to `TIMEVAL`. + timeout_data = c::TIMEVAL { + tv_sec: timeout + .tv_sec + .try_into() + .map_err(|_| io::Errno::OPNOTSUPP)?, + tv_usec: ((timeout.tv_nsec + 999) / 1000) as _, + }; + &timeout_data + } + None => null(), + }; + + unsafe { + ret_c_int(c::select( + nfds, + readfds.cast(), + writefds.cast(), + exceptfds.cast(), + timeout_ptr, + )) + } +} diff --git a/src/backend/libc/winsock_c.rs b/src/backend/libc/winsock_c.rs index ee2704ade..007cda69c 100644 --- a/src/backend/libc/winsock_c.rs +++ b/src/backend/libc/winsock_c.rs @@ -57,3 +57,11 @@ pub(crate) use WinSock::{ WSAEWOULDBLOCK as EWOULDBLOCK, WSAEWOULDBLOCK as EAGAIN, WSAPOLLFD as pollfd, WSA_E_CANCELLED as ECANCELED, *, }; + +// Windows doesn't have `timespec`, just `timeval`. Rustix only uses `timespec` +// in its public API. So define one, and we'll convert it internally. +pub struct timespec { + pub tv_sec: time_t, + pub tv_nsec: i64, +} +pub type time_t = i64; diff --git a/src/backend/linux_raw/c.rs b/src/backend/linux_raw/c.rs index 4035bf945..95f701b16 100644 --- a/src/backend/linux_raw/c.rs +++ b/src/backend/linux_raw/c.rs @@ -9,6 +9,7 @@ pub(crate) type size_t = usize; pub(crate) use linux_raw_sys::ctypes::*; pub(crate) use linux_raw_sys::errno::EINVAL; +pub(crate) use linux_raw_sys::general::{__kernel_fd_set as fd_set, __FD_SETSIZE as FD_SETSIZE}; pub(crate) use linux_raw_sys::ioctl::{FIONBIO, FIONREAD}; // Import the kernel's `uid_t` and `gid_t` if they're 32-bit. #[cfg(not(any(target_arch = "arm", target_arch = "sparc", target_arch = "x86")))] diff --git a/src/backend/linux_raw/event/syscalls.rs b/src/backend/linux_raw/event/syscalls.rs index ac199adfa..3886fa7e6 100644 --- a/src/backend/linux_raw/event/syscalls.rs +++ b/src/backend/linux_raw/event/syscalls.rs @@ -7,13 +7,15 @@ use crate::backend::c; use crate::backend::conv::{ - by_ref, c_int, c_uint, ret, ret_error, ret_owned_fd, ret_usize, slice_mut, zero, + by_ref, c_int, c_uint, ret, ret_c_int, ret_error, ret_owned_fd, ret_usize, slice_mut, zero, }; -use crate::event::{epoll, EventfdFlags, PollFd}; +use crate::event::{epoll, EventfdFlags, FdSetElement, PollFd}; use crate::fd::{BorrowedFd, OwnedFd}; use crate::io; +use crate::utils::as_mut_ptr; #[cfg(feature = "alloc")] use core::mem::MaybeUninit; +use core::ptr::null_mut; use linux_raw_sys::general::{EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD}; #[cfg(any(target_arch = "aarch64", target_arch = "riscv64"))] use { @@ -50,6 +52,85 @@ pub(crate) fn poll(fds: &mut [PollFd<'_>], timeout: c::c_int) -> io::Result, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&crate::timespec::Timespec>, +) -> io::Result { + let len = crate::event::fd_set_num_elements_for_bitvector(nfds); + + let readfds = match readfds { + Some(readfds) => { + assert!(readfds.len() >= len); + readfds.as_mut_ptr() + } + None => null_mut(), + }; + let writefds = match writefds { + Some(writefds) => { + assert!(writefds.len() >= len); + writefds.as_mut_ptr() + } + None => null_mut(), + }; + let exceptfds = match exceptfds { + Some(exceptfds) => { + assert!(exceptfds.len() >= len); + exceptfds.as_mut_ptr() + } + None => null_mut(), + }; + + // Linux's `pselect6` mutates the timeout argument. Our public interface + // does not do this, because it's not portable to other platforms, so we + // create a temporary value to hide this behavior. + let mut timeout_data; + let timeout_ptr = match timeout { + Some(timeout) => { + timeout_data = *timeout; + as_mut_ptr(&mut timeout_data) + } + None => null_mut(), + }; + + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "sparc", + target_arch = "csky", + target_arch = "x86", + target_arch = "mips32r6", + target_arch = "riscv32", + target_arch = "mips" + ))] + { + ret_c_int(syscall!( + __NR_pselect6_time64, + c_int(nfds), + readfds, + writefds, + exceptfds, + timeout_ptr, + zero() + )) + } + + #[cfg(target_pointer_width = "64")] + { + ret_c_int(syscall!( + __NR_pselect6, + c_int(nfds), + readfds, + writefds, + exceptfds, + timeout_ptr, + zero() + )) + } +} + #[inline] pub(crate) fn epoll_create(flags: epoll::CreateFlags) -> io::Result { // SAFETY: `__NR_epoll_create1` doesn't access any user memory. diff --git a/src/backend/linux_raw/process/syscalls.rs b/src/backend/linux_raw/process/syscalls.rs index d562aab33..85c6fbbad 100644 --- a/src/backend/linux_raw/process/syscalls.rs +++ b/src/backend/linux_raw/process/syscalls.rs @@ -348,12 +348,12 @@ pub(crate) fn prlimit(pid: Option, limit: Resource, new: Rlimit) -> io::Res /// Convert a C `rlimit64` to a Rust `Rlimit`. #[inline] fn rlimit_from_linux(lim: rlimit64) -> Rlimit { - let current = if lim.rlim_cur == RLIM64_INFINITY as _ { + let current = if lim.rlim_cur == RLIM64_INFINITY as u64 { None } else { Some(lim.rlim_cur) }; - let maximum = if lim.rlim_max == RLIM64_INFINITY as _ { + let maximum = if lim.rlim_max == RLIM64_INFINITY as u64 { None } else { Some(lim.rlim_max) diff --git a/src/event/mod.rs b/src/event/mod.rs index dab9c6932..a1a51c14f 100644 --- a/src/event/mod.rs +++ b/src/event/mod.rs @@ -16,6 +16,8 @@ mod pause; mod poll; #[cfg(solarish)] pub mod port; +#[cfg(any(bsd, linux_kernel, windows, target_os = "wasi"))] +mod select; #[cfg(any( linux_kernel, @@ -27,3 +29,5 @@ pub use eventfd::{eventfd, EventfdFlags}; #[cfg(not(any(windows, target_os = "redox", target_os = "wasi")))] pub use pause::*; pub use poll::{poll, PollFd, PollFlags}; +#[cfg(any(bsd, linux_kernel, windows, target_os = "wasi"))] +pub use select::*; diff --git a/src/event/poll.rs b/src/event/poll.rs index 0937dd6fd..8c86a0f3f 100644 --- a/src/event/poll.rs +++ b/src/event/poll.rs @@ -2,7 +2,12 @@ use crate::{backend, io}; pub use backend::event::poll_fd::{PollFd, PollFlags}; -/// `poll(self.fds, timeout)` +/// `poll(self.fds, timeout)`—Wait for events on lists of file descriptors. +/// +/// On macOS, `poll` doesn't work on fds for /dev/tty or /dev/null, however +/// [`select`] is available and does work on these fds. +/// +/// [`select`]: crate::event::select /// /// # References /// - [Beej's Guide to Network Programming] diff --git a/src/event/select.rs b/src/event/select.rs new file mode 100644 index 000000000..d124e13c3 --- /dev/null +++ b/src/event/select.rs @@ -0,0 +1,373 @@ +//! The `select` function. +//! +//! # Safety +//! +//! `select` is unsafe due to I/O safety. +#![allow(unsafe_code)] + +#[cfg(any(linux_like, target_os = "wasi"))] +use crate::backend::c; +use crate::fd::RawFd; +use crate::{backend, io}; +#[cfg(any(windows, target_os = "wasi"))] +use core::mem::{align_of, size_of}; +#[cfg(any(windows, target_os = "wasi"))] +use core::slice; + +pub use crate::timespec::{Nsecs, Secs, Timespec}; + +/// wasi-libc's `fd_set` type. The libc bindings for it have private fields, +/// so we redeclare it for ourselves so that we can access the fields. They're +/// publicly exposed in wasi-libc. +#[cfg(target_os = "wasi")] +#[repr(C)] +struct FD_SET { + /// The wasi-libc headers call this `__nfds`. + fd_count: usize, + /// The wasi-libc headers call this `__fds`. + fd_array: [i32; c::FD_SETSIZE], +} + +#[cfg(windows)] +use windows_sys::Win32::Networking::WinSock::FD_SET; + +/// Storage element type for use with [`select`]. +#[cfg(any( + windows, + all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") + ) +))] +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) u64); + +/// Storage element type for use with [`select`]. +#[cfg(linux_like)] +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) c::c_ulong); + +/// Storage element type for use with [`select`]. +#[cfg(not(any( + linux_like, + windows, + target_os = "wasi", + all( + target_pointer_width = "64", + any(target_os = "freebsd", target_os = "dragonfly") + ) +)))] +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) u32); + +/// Storage element type for use with [`select`]. +#[cfg(target_os = "wasi")] +#[repr(transparent)] +#[derive(Copy, Clone, Default)] +pub struct FdSetElement(pub(crate) usize); + +/// `select(nfds, readfds, writefds, exceptfds, timeout)`—Wait for events on +/// sets of file descriptors. +/// +/// `readfds`, `writefds`, `exceptfds` must point to arrays of `FdSetElement` +/// containing at least `nfds.div_ceil(size_of::())` elements. +/// +/// This `select` wrapper differs from POSIX in that `nfds` is not limited to +/// `FD_SETSIZE`. Instead of using the fixed-sized `fd_set` type, this function +/// takes raw pointers to arrays of `fd_set_num_elements(max_fd + 1, num_fds)`, +/// where `max_fd` is the maximum value of any fd that will be inserted into +/// the set, and `num_fds` is the maximum number of fds that will be inserted +/// into the set. +/// +/// In particular, on Apple platforms, this function behaves as if +/// `_DARWIN_UNLIMITED_SELECT` were predefined. +/// +/// On illumos, this function is not defined because the `select` function on +/// this platform always has an `FD_SETSIZE` limitation, following POSIX. This +/// platform's documentation recommends using [`poll`] instead. +/// +/// [`fd_set_insert`], [`fd_set_remove`], and [`FdSetIter`] are provided for +/// setting, clearing, and iterating with sets. +/// +/// [`poll`]: crate::event::poll() +/// +/// # Safety +/// +/// All fds in in all the sets must correspond to open file descriptors. +/// +/// # References +/// - [POSIX] +/// - [Linux] +/// - [Apple] +/// - [FreeBSD] +/// - [NetBSD] +/// - [OpenBSD] +/// - [DragonFly BSD] +/// - [Winsock] +/// - [glibc] +/// +/// [POSIX]: https://pubs.opengroup.org/onlinepubs/9799919799/functions/select.html +/// [Linux]: https://man7.org/linux/man-pages/man2/select.2.html +/// [Apple]: https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/select.2.html +/// [FreeBSD]: https://man.freebsd.org/cgi/man.cgi?query=select&sektion=2 +/// [NetBSD]: https://man.netbsd.org/select.2 +/// [OpenBSD]: https://man.openbsd.org/select.2 +/// [DragonFly BSD]: https://man.dragonflybsd.org/?command=select§ion=2 +/// [Winsock]: https://learn.microsoft.com/en-us/windows/win32/api/winsock2/nf-winsock2-select +/// [glibc]: https://sourceware.org/glibc/manual/latest/html_node/Waiting-for-I_002fO.html#index-select +pub unsafe fn select( + nfds: i32, + readfds: Option<&mut [FdSetElement]>, + writefds: Option<&mut [FdSetElement]>, + exceptfds: Option<&mut [FdSetElement]>, + timeout: Option<&Timespec>, +) -> io::Result { + backend::event::syscalls::select(nfds, readfds, writefds, exceptfds, timeout) +} + +#[cfg(not(any(windows, target_os = "wasi")))] +const BITS: usize = core::mem::size_of::() * 8; + +/// Set `fd` in the set pointed to by `fds`. +#[doc(alias = "FD_SET")] +#[inline] +pub fn fd_set_insert(fds: &mut [FdSetElement], fd: RawFd) { + #[cfg(not(any(windows, target_os = "wasi")))] + { + let fd = fd as usize; + fds[fd / BITS].0 |= 1 << (fd % BITS); + } + + #[cfg(any(windows, target_os = "wasi"))] + { + let set = unsafe { &mut *fds.as_mut_ptr().cast::() }; + let fd_count = set.fd_count; + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; + + if !fd_array.iter().any(|p| *p as RawFd == fd) { + let fd_array = unsafe { + slice::from_raw_parts_mut(set.fd_array.as_mut_ptr(), fd_count as usize + 1) + }; + set.fd_count = fd_count + 1; + fd_array[fd_count as usize] = fd as _; + } + } +} + +/// Clear `fd` in the set pointed to by `fds`. +#[doc(alias = "FD_CLR")] +#[inline] +pub fn fd_set_remove(fds: &mut [FdSetElement], fd: RawFd) { + #[cfg(not(any(windows, target_os = "wasi")))] + { + let fd = fd as usize; + fds[fd / BITS].0 &= !(1 << (fd % BITS)); + } + + #[cfg(any(windows, target_os = "wasi"))] + { + let set = unsafe { &mut *fds.as_mut_ptr().cast::() }; + let fd_count = set.fd_count; + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; + + if let Some(pos) = fd_array.iter().position(|p| *p as RawFd == fd) { + set.fd_count = fd_count - 1; + set.fd_array[pos] = *set.fd_array.last().unwrap(); + } + } +} + +/// Compute the minimum `nfds` value needed for the set pointed to by +/// `fds`. +#[inline] +pub fn fd_set_bound(fds: &[FdSetElement]) -> RawFd { + #[cfg(not(any(windows, target_os = "wasi")))] + { + if let Some(position) = fds.iter().rposition(|element| element.0 != 0) { + let element = fds[position].0; + (position * BITS + (BITS - element.leading_zeros() as usize)) as RawFd + } else { + 0 + } + } + + #[cfg(any(windows, target_os = "wasi"))] + { + let set = unsafe { &*fds.as_ptr().cast::() }; + let fd_count = set.fd_count; + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; + let mut max = 0; + for fd in fd_array { + if *fd >= max { + max = *fd + 1; + } + } + max as RawFd + } +} + +/// Compute the number of `FdSetElement`s needed to hold a set which can +/// contain up to `set_count` file descriptors with values less than `nfds`. +#[inline] +pub fn fd_set_num_elements(set_count: usize, nfds: RawFd) -> usize { + #[cfg(any(windows, target_os = "wasi"))] + { + let _ = nfds; + + fd_set_num_elements_for_fd_array(set_count) + } + + #[cfg(not(any(windows, target_os = "wasi")))] + { + let _ = set_count; + + fd_set_num_elements_for_bitvector(nfds) + } +} + +/// `fd_set_num_elements` implementation on platforms with fd array +/// implementations. +#[cfg(any(windows, target_os = "wasi"))] +#[inline] +pub(crate) fn fd_set_num_elements_for_fd_array(set_count: usize) -> usize { + // Allocate space for an `fd_count` field, plus `set_count` elements + // for the `fd_array` field. + div_ceil( + align_of::() + set_count * size_of::(), + size_of::(), + ) +} + +/// `fd_set_num_elements` implementation on platforms with bitvector +/// implementations. +#[cfg(not(any(windows, target_os = "wasi")))] +#[inline] +pub(crate) fn fd_set_num_elements_for_bitvector(nfds: RawFd) -> usize { + // Allocate space for a dense bitvector for `nfds` bits. + let nfds = nfds as usize; + div_ceil(nfds, BITS) +} + +fn div_ceil(lhs: usize, rhs: usize) -> usize { + let d = lhs / rhs; + let r = lhs % rhs; + if r > 0 { + d + 1 + } else { + d + } +} + +/// An iterator over the fds in a set. +#[doc(alias = "FD_ISSET")] +#[cfg(not(any(windows, target_os = "wasi")))] +pub struct FdSetIter<'a> { + current: RawFd, + fds: &'a [FdSetElement], +} + +/// An iterator over the fds in a set. +#[doc(alias = "FD_ISSET")] +#[cfg(any(windows, target_os = "wasi"))] +pub struct FdSetIter<'a> { + current: usize, + fds: &'a [FdSetElement], +} + +impl<'a> FdSetIter<'a> { + /// Construct a `FdSetIter` for the given set. + pub fn new(fds: &'a [FdSetElement]) -> Self { + Self { current: 0, fds } + } +} + +#[cfg(not(any(windows, target_os = "wasi")))] +impl<'a> Iterator for FdSetIter<'a> { + type Item = RawFd; + + fn next(&mut self) -> Option { + if let Some(element) = self.fds.get(self.current as usize / BITS) { + // Test whether the current element has more bits set. + let shifted = element.0 >> ((self.current as usize % BITS) as u32); + if shifted != 0 { + let fd = self.current + shifted.trailing_zeros() as RawFd; + self.current = fd + 1; + return Some(fd); + } + + // Search through the array for the next element with bits set. + if let Some(index) = self.fds[(self.current as usize / BITS) + 1..] + .iter() + .position(|element| element.0 != 0) + { + let index = index + (self.current as usize / BITS) + 1; + let element = self.fds[index].0; + let fd = (index * BITS) as RawFd + element.trailing_zeros() as RawFd; + self.current = fd + 1; + return Some(fd); + } + } + None + } +} + +#[cfg(any(windows, target_os = "wasi"))] +impl<'a> Iterator for FdSetIter<'a> { + type Item = RawFd; + + fn next(&mut self) -> Option { + let current = self.current; + + let set = unsafe { &*self.fds.as_ptr().cast::() }; + let fd_count = set.fd_count; + let fd_array = unsafe { slice::from_raw_parts(set.fd_array.as_ptr(), fd_count as usize) }; + + if current == fd_count as usize { + return None; + } + let fd = fd_array[current as usize]; + self.current = current + 1; + Some(fd as RawFd) + } +} + +#[cfg(test)] +mod test { + use super::*; + use core::mem::{align_of, size_of}; + + #[test] + #[cfg(any(windows, target_os = "wasi"))] + fn layouts() { + // The `FdSetElement` array should be suitably aligned. + assert_eq!(align_of::(), align_of::()); + + // The layout of `FD_SET` should match our layout of a set of the same + // size. + assert_eq!( + fd_set_num_elements_for_fd_array( + memoffset::span_of!(FD_SET, fd_array).len() / size_of::() + ) * size_of::(), + size_of::() + ); + } + + #[test] + #[cfg(any(bsd, linux_kernel))] + fn layouts() { + use crate::backend::c; + + // The `FdSetElement` array should be suitably aligned. + assert_eq!(align_of::(), align_of::()); + + // The layout of `fd_set` should match our layout of a set of the same + // size. + assert_eq!( + fd_set_num_elements_for_bitvector(c::FD_SETSIZE as RawFd) * size_of::(), + size_of::() + ); + } +} diff --git a/src/io_uring.rs b/src/io_uring.rs index de272188f..bc15595c0 100644 --- a/src/io_uring.rs +++ b/src/io_uring.rs @@ -40,7 +40,7 @@ pub use crate::fs::{ }; pub use crate::io::ReadWriteFlags; pub use crate::net::{RecvFlags, SendFlags, SocketFlags}; -pub use crate::timespec::Timespec; +pub use crate::timespec::{Nsecs, Secs, Timespec}; pub use linux_raw_sys::general::sigset_t; pub use net::{__kernel_sockaddr_storage as sockaddr_storage, msghdr, sockaddr, socklen_t}; diff --git a/src/lib.rs b/src/lib.rs index 0fd0dc4f8..e53e263ab 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,6 +71,7 @@ //! - Provide y2038 compatibility, on platforms which support this. //! - Correct selected platform bugs, such as behavioral differences when //! running under seccomp. +//! - Use `timespec` for timestamps instead of `timeval`. //! //! Things they don't do include: //! - Detecting whether functions are supported at runtime, except in specific @@ -355,13 +356,13 @@ mod prctl; #[cfg(not(any(windows, target_os = "espidf", target_os = "wasi")))] #[cfg(any(feature = "process", feature = "runtime", all(bsd, feature = "event")))] mod signal; -#[cfg(not(windows))] #[cfg(any( feature = "fs", feature = "process", feature = "runtime", feature = "thread", feature = "time", + all(feature = "event", any(bsd, linux_kernel, windows, target_os = "wasi")), all( linux_raw, not(feature = "use-libc-auxv"), diff --git a/src/thread/clock.rs b/src/thread/clock.rs index 8023f5466..a5302ba3a 100644 --- a/src/thread/clock.rs +++ b/src/thread/clock.rs @@ -1,7 +1,7 @@ use crate::{backend, io}; use core::fmt; -pub use crate::timespec::Timespec; +pub use crate::timespec::{Nsecs, Secs, Timespec}; #[cfg(not(any( apple, diff --git a/src/thread/futex.rs b/src/thread/futex.rs index 35c1f550d..d05ab5257 100644 --- a/src/thread/futex.rs +++ b/src/thread/futex.rs @@ -34,7 +34,7 @@ use crate::fd::{FromRawFd, OwnedFd, RawFd}; use crate::utils::option_as_ptr; use crate::{backend, io}; -pub use crate::timespec::Timespec; +pub use crate::timespec::{Nsecs, Secs, Timespec}; pub use backend::thread::futex::{Flags, OWNER_DIED, WAITERS}; diff --git a/tests/event/main.rs b/tests/event/main.rs index 68f999737..5e8ef6936 100644 --- a/tests/event/main.rs +++ b/tests/event/main.rs @@ -10,3 +10,34 @@ mod epoll; #[cfg(not(target_os = "wasi"))] mod eventfd; mod poll; +#[cfg(any(bsd, linux_kernel, windows, target_os = "wasi"))] +mod select; + +#[cfg(windows)] +mod windows { + use std::sync::OnceLock; + + pub struct Thing; + + impl Thing { + pub fn new() -> Self { + let _ = rustix::net::wsa_startup().unwrap(); + Self + } + } + + impl Drop for Thing { + fn drop(&mut self) { + rustix::net::wsa_cleanup().unwrap(); + } + } + + pub static CLEANUP: OnceLock = OnceLock::new(); +} + +/// Checks whether the Windows socket interface has been started already, and +/// if not, starts it. +pub fn init() { + #[cfg(windows)] + let _ = windows::CLEANUP.get_or_init(|| windows::Thing::new()); +} diff --git a/tests/event/select.rs b/tests/event/select.rs new file mode 100644 index 000000000..66919824c --- /dev/null +++ b/tests/event/select.rs @@ -0,0 +1,428 @@ +use rustix::event::{ + fd_set_bound, fd_set_insert, fd_set_num_elements, fd_set_remove, FdSetElement, FdSetIter, +}; +use rustix::event::{select, Timespec}; +use rustix::fd::{AsRawFd, RawFd}; +#[cfg(feature = "pipe")] +#[cfg(not(windows))] +use rustix::fd::{FromRawFd, OwnedFd}; +use rustix::io::retry_on_intr; +use serial_test::serial; +use std::cmp::max; + +#[cfg(feature = "pipe")] +#[cfg(not(windows))] +#[test] +fn test_select_with_pipes() { + use rustix::io::{read, write}; + use rustix::pipe::pipe; + + // Create a pipe. + let (reader, writer) = pipe().unwrap(); + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Write a byte to the pipe. + assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); + + // `select` should now say there's data to be read. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); + assert_eq!(num, 1); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!(retry_on_intr(|| read(&reader, &mut buf)).unwrap(), 1); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); +} + +#[cfg(feature = "pipe")] +#[cfg(not(windows))] +#[test] +#[serial] // for `setrlimit` usage +fn test_select_with_great_fds() { + use core::cmp::max; + use rustix::io::{read, write}; + use rustix::pipe::pipe; + use rustix::process::{getrlimit, setrlimit, Resource}; + + // Create a pipe. + let (reader, writer) = pipe().unwrap(); + + // Raise the file descriptor limit so that we can test fds above + // `FD_SETSIZE`. + let orig_rlimit = getrlimit(Resource::Nofile); + let mut rlimit = orig_rlimit; + if let Some(current) = rlimit.current { + rlimit.current = Some(max(current, libc::FD_SETSIZE as u64 + 2)); + } + setrlimit(Resource::Nofile, rlimit).unwrap(); + + // Create a fd at `FD_SETSIZE + 1` out of thin air. Use `libc` instead + // of `OwnedFd::from_raw_fd` because grabbing a fd out of thin air + // violates Rust's concept of I/O safety (and wouldn't make sense to do + // in anything other than a test like this). + let great_fd = unsafe { libc::dup2(reader.as_raw_fd(), libc::FD_SETSIZE as RawFd + 1) }; + let reader = unsafe { OwnedFd::from_raw_fd(great_fd) }; + + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Write a byte to the pipe. + assert_eq!(retry_on_intr(|| write(&writer, b"a")).unwrap(), 1); + + // `select` should now say there's data to be read. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); + assert_eq!(num, 1); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!(retry_on_intr(|| read(&reader, &mut buf)).unwrap(), 1); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Reset the process limit. + setrlimit(Resource::Nofile, orig_rlimit).unwrap(); +} + +#[cfg(feature = "net")] +#[test] +#[serial] // for `crate::init` +fn test_select_with_sockets() { + use rustix::net::{recv, send, AddressFamily, RecvFlags, SendFlags, SocketType}; + use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + + crate::init(); + + // Create a socket pair (but don't use `socketpair` because we want this + // to work on Windows too). + + let localhost = IpAddr::V4(Ipv4Addr::LOCALHOST); + let addr = SocketAddr::new(localhost, 0); + let listener = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::bind(&listener, &addr).expect("bind"); + rustix::net::listen(&listener, 1).expect("listen"); + let local_addr = rustix::net::getsockname(&listener).unwrap(); + let writer = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::connect_any(&writer, &local_addr).expect("connect"); + let reader = rustix::net::accept(&listener).expect("accept"); + + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + #[cfg(windows)] + let nfds: i32 = nfds.try_into().unwrap(); + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Write a byte to the pipe. + assert_eq!( + retry_on_intr(|| send(&writer, b"a", SendFlags::empty())).unwrap(), + 1 + ); + + // `select` should now say there's data to be read. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); + assert_eq!(num, 1); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!( + retry_on_intr(|| recv(&reader, &mut buf, RecvFlags::empty())).unwrap(), + 1 + ); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); +} + +// Like `test_select_with_sockets` but test with the maximum permitted +// fd value. +#[cfg(feature = "net")] +#[cfg(not(windows))] // for `dup2` usage +#[test] +#[serial] // for `setrlimit` usage, and `crate::init` +fn test_select_with_maxfd_sockets() { + use rustix::net::{recv, send, AddressFamily, RecvFlags, SendFlags, SocketType}; + use rustix::process::{getrlimit, setrlimit, Resource}; + use std::net::{IpAddr, Ipv4Addr, SocketAddr}; + + crate::init(); + + let localhost = IpAddr::V4(Ipv4Addr::LOCALHOST); + let addr = SocketAddr::new(localhost, 0); + let listener = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::bind(&listener, &addr).expect("bind"); + rustix::net::listen(&listener, 1).expect("listen"); + let local_addr = rustix::net::getsockname(&listener).unwrap(); + let writer = rustix::net::socket(AddressFamily::INET, SocketType::STREAM, None).unwrap(); + rustix::net::connect_any(&writer, &local_addr).expect("connect"); + let reader = rustix::net::accept(&listener).expect("accept"); + + // Raise the fd limit to the maximum. + let orig_rlimit = getrlimit(Resource::Nofile); + let mut rlimit = orig_rlimit; + let mut fd_limit = libc::FD_SETSIZE as RawFd; + if let Some(maximum) = rlimit.maximum { + rlimit.current = Some(maximum); + fd_limit = maximum as RawFd; + } + setrlimit(Resource::Nofile, rlimit).unwrap(); + + // Renumber the fds to the maximum possible values. + let great_fd = unsafe { libc::dup2(reader.as_raw_fd(), fd_limit as RawFd - 1) }; + + // On old versions of macOS, the above `dup2` call fails with `EBADF`. Just + // skip the rest of this test in that case. + #[cfg(apple)] + if great_fd == -1 && libc_errno::errno().0 == libc::EBADF { + return; + } + + let reader = unsafe { OwnedFd::from_raw_fd(great_fd) }; + let great_fd = unsafe { libc::dup2(writer.as_raw_fd(), fd_limit as RawFd - 2) }; + let writer = unsafe { OwnedFd::from_raw_fd(great_fd) }; + + let nfds = max(reader.as_raw_fd(), writer.as_raw_fd()) + 1; + + #[cfg(windows)] + let nfds: i32 = nfds.try_into().unwrap(); + + // `select` should say there's nothing ready to be read from the pipe. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Write a byte to the pipe. + assert_eq!( + retry_on_intr(|| send(&writer, b"a", SendFlags::empty())).unwrap(), + 1 + ); + + // `select` should now say there's data to be read. + let mut readfds = vec![FdSetElement::default(); fd_set_num_elements(2, nfds as RawFd)]; + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = + retry_on_intr(|| unsafe { select(nfds, Some(&mut readfds), None, None, None) }).unwrap(); + assert_eq!(num, 1); + assert!(fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), reader.as_raw_fd() + 1); + fd_set_remove(&mut readfds, reader.as_raw_fd()); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + // Read the byte from the pipe. + let mut buf = [b'\0']; + assert_eq!( + retry_on_intr(|| recv(&reader, &mut buf, RecvFlags::empty())).unwrap(), + 1 + ); + assert_eq!(buf[0], b'a'); + + // Select should now say there's no more data to be read. + fd_set_insert(&mut readfds, reader.as_raw_fd()); + let num = retry_on_intr(|| unsafe { + select( + nfds, + Some(&mut readfds), + None, + None, + Some(&Timespec { + tv_sec: 0, + tv_nsec: 0, + }), + ) + }) + .unwrap(); + assert_eq!(num, 0); + assert!(!fd_set_contains(&readfds, reader.as_raw_fd())); + assert_eq!(fd_set_bound(&readfds), 0); + + setrlimit(Resource::Nofile, orig_rlimit).unwrap(); +} + +#[test] +fn test_select_iter() { + for stuff in [ + &[1, 3, 31, 64, 128, 1024, 1025, 1030][..], + &[100, 101, 102, 103, 104, 105, 106, 107, 2999][..], + &[0, 8, 32, 64, 128][..], + &[0, 1, 2, 3, 31, 32, 33, 34, 35][..], + &[500][..], + &[128][..], + &[127][..], + &[0][..], + &[][..], + ] { + let nfds = if stuff.is_empty() { + 0 + } else { + *stuff.last().unwrap() + 1 + }; + let mut fds = vec![FdSetElement::default(); fd_set_num_elements(stuff.len(), nfds)]; + for fd in stuff { + assert!(!fd_set_contains(&fds, *fd)); + fd_set_insert(&mut fds, *fd); + assert!(fd_set_contains(&fds, *fd)); + fd_set_remove(&mut fds, *fd); + assert!(!fd_set_contains(&fds, *fd)); + fd_set_insert(&mut fds, *fd); + assert!(fd_set_contains(&fds, *fd)); + } + assert_eq!(fd_set_bound(&fds), nfds); + assert_eq!(FdSetIter::new(&fds).collect::>(), stuff); + } +} + +// This isn't in rustix's public API because it isn't constant time. On +// bitvector platforms it could be, but on fd array platforms it can't be. +fn fd_set_contains(fds: &[FdSetElement], fd: RawFd) -> bool { + FdSetIter::new(fds).any(|x| x == fd) +} diff --git a/tests/path/arg.rs b/tests/path/arg.rs index 66f0bfd23..2b330b95b 100644 --- a/tests/path/arg.rs +++ b/tests/path/arg.rs @@ -1,3 +1,6 @@ +// TODO: Rename `Arg::as_str` to avoid collisions. +#![allow(unstable_name_collisions)] + use rustix::ffi::{CStr, CString}; use rustix::io; use rustix::path::Arg; diff --git a/tests/process/wait.rs b/tests/process/wait.rs index 23c41b1bb..1149d660c 100644 --- a/tests/process/wait.rs +++ b/tests/process/wait.rs @@ -23,6 +23,15 @@ fn test_waitpid_none() { .unwrap(); assert_eq!(pid, process::Pid::from_child(&child)); assert!(status.stopped()); + + // Clean up the child process. + unsafe { kill(child.id() as _, SIGKILL) }; + + let (pid, status) = process::waitpid(None, process::WaitOptions::UNTRACED) + .expect("failed to wait") + .unwrap(); + assert_eq!(pid, process::Pid::from_child(&child)); + assert!(status.signaled()); } #[test] @@ -41,6 +50,15 @@ fn test_waitpid_some() { .unwrap(); assert_eq!(rpid, pid); assert!(status.stopped()); + + // Clean up the child process. + unsafe { kill(child.id() as _, SIGKILL) }; + + let (rpid, status) = process::waitpid(Some(pid), process::WaitOptions::UNTRACED) + .expect("failed to wait") + .unwrap(); + assert_eq!(rpid, pid); + assert!(status.signaled()); } #[test] @@ -59,6 +77,15 @@ fn test_waitpgid() { .unwrap(); assert_eq!(pid, process::Pid::from_child(&child)); assert!(status.stopped()); + + // Clean up the child process. + unsafe { kill(child.id() as _, SIGKILL) }; + + let (pid, status) = process::waitpgid(pgid, process::WaitOptions::UNTRACED) + .expect("failed to wait") + .unwrap(); + assert_eq!(pid, process::Pid::from_child(&child)); + assert!(status.signaled()); } #[cfg(not(any(