diff --git a/bootstrap.c b/bootstrap.c index 1f17fc8455..44ba071476 100644 --- a/bootstrap.c +++ b/bootstrap.c @@ -143,6 +143,7 @@ int main(int argc, char **argv) { "pub const skip_non_native = false;\n" "pub const debug_gpa = false;\n" "pub const dev = .core;\n" + "pub const io_mode: enum { threaded, evented } = .threaded;\n" "pub const value_interpret_mode = .direct;\n" , zig_version); if (written < 100) diff --git a/build.zig b/build.zig index ed16d78dad..1ec1fa9caf 100644 --- a/build.zig +++ b/build.zig @@ -13,6 +13,7 @@ const DevEnv = @import("src/dev.zig").Env; const zig_version: std.SemanticVersion = .{ .major = 0, .minor = 16, .patch = 0 }; const stack_size = 46 * 1024 * 1024; +const IoMode = enum { threaded, evented }; const ValueInterpretMode = enum { direct, by_name }; pub fn build(b: *std.Build) !void { @@ -188,6 +189,7 @@ pub fn build(b: *std.Build) !void { const strip = b.option(bool, "strip", "Omit debug information"); const valgrind = b.option(bool, "valgrind", "Enable valgrind integration"); const pie = b.option(bool, "pie", "Produce a Position Independent Executable"); + const io_mode = b.option(IoMode, "io-mode", "How the compiler performs IO") orelse .threaded; const value_interpret_mode = b.option(ValueInterpretMode, "value-interpret-mode", "How the compiler translates between 'std.builtin' types and its internal datastructures") orelse .direct; const value_tracing = b.option(bool, "value-tracing", "Enable extra state tracking to help troubleshoot bugs in the compiler (using the std.debug.Trace API)") orelse false; @@ -236,6 +238,7 @@ pub fn build(b: *std.Build) !void { exe_options.addOption(bool, "llvm_has_xtensa", llvm_has_xtensa); exe_options.addOption(bool, "debug_gpa", debug_gpa); exe_options.addOption(DevEnv, "dev", b.option(DevEnv, "dev", "Build a compiler with a reduced feature set for development of specific features") orelse if (only_c) .bootstrap else .full); + exe_options.addOption(IoMode, "io_mode", io_mode); exe_options.addOption(ValueInterpretMode, "value_interpret_mode", value_interpret_mode); if (link_libc) { @@ -710,6 +713,7 @@ fn addWasiUpdateStep(b: *std.Build, version: [:0]const u8) !void { exe_options.addOption(u32, "tracy_callstack_depth", 0); exe_options.addOption(bool, "value_tracing", false); exe_options.addOption(DevEnv, "dev", .bootstrap); + exe_options.addOption(IoMode, "io_mode", .threaded); // zig1 chooses to interpret values by name. The tradeoff is as follows: // diff --git a/lib/std/Io.zig b/lib/std/Io.zig index c56ec68866..b22345ed94 100644 --- a/lib/std/Io.zig +++ b/lib/std/Io.zig @@ -378,7 +378,9 @@ pub const Operation = union(enum) { pub const Pending = struct { node: List.DoubleNode, tag: Tag, - context: [3]usize, + context: Context align(@max(@alignOf(usize), 4)), + + pub const Context = [3]usize; }; pub const Completion = struct { @@ -426,10 +428,10 @@ pub fn operate(io: Io, operation: Operation) Cancelable!Operation.Result { pub const Batch = struct { storage: []Operation.Storage, unused: Operation.List, - submissions: Operation.List, + submitted: Operation.List, pending: Operation.List, - completions: Operation.List, - context: ?*anyopaque, + completed: Operation.List, + context: ?*anyopaque align(@max(@alignOf(?*anyopaque), 4)), /// After calling this, it is safe to unconditionally defer a call to /// `cancel`. `storage` is a pre-allocated buffer of undefined memory that @@ -448,9 +450,9 @@ pub const Batch = struct { .head = .fromIndex(0), .tail = .fromIndex(storage.len - 1), }, - .submissions = .empty, + .submitted = .empty, .pending = .empty, - .completions = .empty, + .completed = .empty, .context = null, }; } @@ -471,20 +473,20 @@ pub const Batch = struct { const storage = &b.storage[index]; const unused = storage.unused; switch (unused.prev) { - .none => b.unused.head = .none, + .none => b.unused.head = unused.next, else => |prev_index| b.storage[prev_index.toIndex()].unused.next = unused.next, } switch (unused.next) { - .none => b.unused.tail = .none, + .none => b.unused.tail = unused.prev, else => |next_index| b.storage[next_index.toIndex()].unused.prev = unused.prev, } - switch (b.submissions.tail) { - .none => b.submissions.head = .fromIndex(index), + switch (b.submitted.tail) { + .none => b.submitted.head = .fromIndex(index), else => |tail_index| b.storage[tail_index.toIndex()].submission.node.next = .fromIndex(index), } storage.* = .{ .submission = .{ .node = .{ .next = .none }, .operation = operation } }; - b.submissions.tail = .fromIndex(index); + b.submitted.tail = .fromIndex(index); } pub const Completion = struct { @@ -501,13 +503,13 @@ pub const Batch = struct { /// Each completion returned from this function dequeues from the `Batch`. /// It is not required to dequeue all completions before awaiting again. pub fn next(b: *Batch) ?Completion { - const index = b.completions.head; + const index = b.completed.head; if (index == .none) return null; const storage = &b.storage[index.toIndex()]; const completion = storage.completion; const next_index = completion.node.next; - b.completions.head = next_index; - if (next_index == .none) b.completions.tail = .none; + b.completed.head = next_index; + if (next_index == .none) b.completed.tail = .none; const tail_index = b.unused.tail; switch (tail_index) { @@ -551,7 +553,27 @@ pub const Batch = struct { /// may have successfully completed regardless of the cancel request and /// will appear in the iteration. pub fn cancel(b: *Batch, io: Io) void { - return io.vtable.batchCancel(io.userdata, b); + { // abort pending submissions + var tail_index = b.unused.tail; + defer b.unused.tail = tail_index; + var index = b.submitted.head; + errdefer b.submissions.head = index; + while (index != .none) { + const next_index = b.storage[index.toIndex()].submission.node.next; + switch (tail_index) { + .none => b.unused.head = index, + else => b.storage[tail_index.toIndex()].unused.next = index, + } + b.storage[index.toIndex()] = .{ .unused = .{ .prev = tail_index, .next = .none } }; + tail_index = index; + index = next_index; + } + b.submitted = .{ .head = .none, .tail = .none }; + } + io.vtable.batchCancel(io.userdata, b); + assert(b.submitted.head == .none and b.submitted.tail == .none); + assert(b.pending.head == .none and b.pending.tail == .none); + assert(b.context == null); // that was the last chance to deallocate resources } }; @@ -1117,13 +1139,13 @@ pub fn recancel(io: Io) void { /// To modify a task's cancel protection state, see `swapCancelProtection`. /// /// For a description of cancelation and cancelation points, see `Future.cancel`. -pub const CancelProtection = enum { +pub const CancelProtection = enum(u1) { /// Any call to an `Io` function with `error.Canceled` in its error set is a cancelation point. /// /// This is the default state, which all tasks are created in. - unblocked, + unblocked = 0, /// No `Io` function introduces a cancelation point (`error.Canceled` will never be returned). - blocked, + blocked = 1, }; /// Updates the current task's cancel protection state (see `CancelProtection`). /// @@ -1292,8 +1314,7 @@ pub fn futexWake(io: Io, comptime T: type, ptr: *align(@alignOf(u32)) const T, m /// shared region of code known as the "critical section". /// /// Mutex is an extern struct so that it may be used as a field inside another -/// extern struct. Having a guaranteed memory layout including mutexes is -/// important for IPC over shared memory (mmap). +/// extern struct. pub const Mutex = extern struct { state: std.atomic.Value(State), diff --git a/lib/std/Io/File.zig b/lib/std/Io/File.zig index ba7f5d01e0..07f4285cef 100644 --- a/lib/std/Io/File.zig +++ b/lib/std/Io/File.zig @@ -477,12 +477,17 @@ pub const Permissions = std.Options.FilePermissions orelse if (is_windows) enum( /// libc implementations use `0o666` inside `fopen` and then rely on the /// process-scoped "umask" setting to adjust this number for file creation. default_file = 0o666, - default_dir = 0o755, - executable_file = 0o777, + /// This is the default mode given to POSIX operating systems for creating + /// directories. `0o777` is "-rwxrwxrwx" which is counter-intuitive at first, + /// since most people would expect "-rwxr-xr-x", for example, when using + /// the `touch` command, which would correspond to `0o755`. + default_dir = 0o777, _, pub const has_executable_bit = native_os != .wasi; + pub const executable_file: @This() = .default_dir; + pub fn toMode(self: @This()) std.posix.mode_t { return @intFromEnum(self); } diff --git a/lib/std/Io/IoUring.zig b/lib/std/Io/IoUring.zig index 8ff3ae22ef..f853aaa08f 100644 --- a/lib/std/Io/IoUring.zig +++ b/lib/std/Io/IoUring.zig @@ -1,21 +1,80 @@ -const EventLoop = @This(); -const builtin = @import("builtin"); - -const std = @import("../std.zig"); -const Io = std.Io; -const assert = std.debug.assert; -const Allocator = std.mem.Allocator; +const addressFromPosix = Io.Threaded.addressFromPosix; +const addressToPosix = Io.Threaded.addressToPosix; const Alignment = std.mem.Alignment; -const IoUring = std.os.linux.IoUring; +const Allocator = std.mem.Allocator; +const Argv0 = Io.Threaded.Argv0; +const assert = std.debug.assert; +const builtin = @import("builtin"); +const ChdirError = Io.Threaded.ChdirError; +const clockToPosix = Io.Threaded.clockToPosix; +const Csprng = Io.Threaded.Csprng; +const default_PATH = Io.Threaded.default_PATH; +const Dir = Io.Dir; +const Environ = Io.Threaded.Environ; +const errnoBug = Io.Threaded.errnoBug; +const Evented = @This(); +const fallbackSeed = Io.Threaded.fallbackSeed; +const fd_t = linux.fd_t; +const File = Io.File; +const Io = std.Io; +const IoUring = linux.IoUring; +const iovec = std.posix.iovec; +const iovec_const = std.posix.iovec_const; +const linux = std.os.linux; +const linux_statx_request = Io.Threaded.linux_statx_request; +const LOCK = std.posix.LOCK; +const log = std.log.scoped(.@"io-uring"); +const max_iovecs_len = Io.Threaded.max_iovecs_len; +const nanosecondsFromPosix = Io.Threaded.nanosecondsFromPosix; +const net = Io.net; +const PATH_MAX = linux.PATH_MAX; +const pathToPosix = Io.Threaded.pathToPosix; +const pid_t = linux.pid_t; +const PosixAddress = Io.Threaded.PosixAddress; +const posixAddressFamily = Io.Threaded.posixAddressFamily; +const posixProtocol = Io.Threaded.posixProtocol; +const posixSocketMode = Io.Threaded.posixSocketMode; +const process = std.process; +const recoverableOsBugDetected = Io.Threaded.recoverableOsBugDetected; +const setTimestampToPosix = Io.Threaded.setTimestampToPosix; +const splat_buffer_size = Io.Threaded.splat_buffer_size; +const statFromLinux = Io.Threaded.statFromLinux; +const std = @import("../std.zig"); +const timestampFromPosix = Io.Threaded.timestampFromPosix; +const unexpectedErrno = std.posix.unexpectedErrno; +const winsize = std.posix.winsize; -/// Must be a thread-safe allocator. -gpa: Allocator, -mutex: Io.Mutex, -main_fiber_buffer: [@sizeOf(Fiber) + Fiber.max_result_size]u8 align(@alignOf(Fiber)), +backing_allocator_needs_mutex: bool, +backing_allocator_mutex: Io.Mutex, +/// Does not need to be thread-safe if not used elsewhere. +backing_allocator: Allocator, +main_fiber_buffer: [ + std.mem.alignForward(usize, @sizeOf(Fiber), @alignOf(Completion)) + @sizeOf(Completion) +]u8 align(@max(@alignOf(Fiber), @alignOf(Completion))), threads: Thread.List, +stderr_mutex: Io.Mutex, +stderr_writer: File.Writer = .{ + .io = undefined, + .interface = Io.File.Writer.initInterface(&.{}), + .file = .stderr(), + .mode = .streaming, +}, +stderr_mode: Io.Terminal.Mode = .no_color, +stderr_writer_initialized: bool = false, + +environ_mutex: Io.Mutex, +environ: Environ, + +null_fd: CachedFd, +random_fd: CachedFd, + +csprng_mutex: Io.Mutex, +csprng: Csprng, + /// Empirically saw >128KB being used by the self-hosted backend to panic. -const idle_stack_size = 256 * 1024; +/// Empirically saw glibc complain about 256KB. +const idle_stack_size = 512 * 1024; const max_idle_search = 4; const max_steal_ready_search = 4; @@ -23,6 +82,7 @@ const max_steal_ready_search = 4; const io_uring_entries = 64; const Thread = struct { + required_align: void align(4), thread: std.Thread, idle_context: Context, current_context: *Context, @@ -30,19 +90,33 @@ const Thread = struct { io_uring: IoUring, idle_search_index: u32, steal_ready_search_index: u32, + csprng: Csprng, - const canceling: ?*Thread = @ptrFromInt(@alignOf(Thread)); + threadlocal var self: ?*Thread = null; - threadlocal var self: *Thread = undefined; - - fn current() *Thread { - return self; + noinline fn current() *Thread { + return self.?; } fn currentFiber(thread: *Thread) *Fiber { + assert(thread.current_context != &thread.idle_context); return @fieldParentPtr("context", thread.current_context); } + fn enqueue(thread: *Thread) *linux.io_uring_sqe { + while (true) return thread.io_uring.get_sqe() catch { + thread.submit(); + continue; + }; + } + + fn submit(thread: *Thread) void { + _ = thread.io_uring.submit() catch |err| switch (err) { + error.SignalInterrupt => {}, + else => |e| @panic(@errorName(e)), + }; + } + const List = struct { allocated: []Thread, reserved: u32, @@ -53,18 +127,109 @@ const Thread = struct { const Fiber = struct { required_align: void align(4), context: Context, - awaiter: ?*Fiber, - queue_next: ?*Fiber, - cancel_thread: ?*Thread, - awaiting_completions: std.StaticBitSet(3), + await_count: i32, + link: union { + awaiter: ?*Fiber, + group: struct { prev: ?*Fiber, next: ?*Fiber }, + }, + status: union(enum) { + queue_next: ?*Fiber, + awaiting_group: Group, + }, + cancel_status: CancelStatus, + cancel_protection: CancelProtection, + + const CancelStatus = packed struct(u32) { + requested: bool, + awaiting: Awaiting, + + const unrequested: CancelStatus = .{ .requested = false, .awaiting = .nothing }; + + const Awaiting = enum(u31) { + nothing = std.math.maxInt(u31), + group = std.math.maxInt(u31) - 1, + select = std.math.maxInt(u31) - 2, + /// An io_uring fd. + _, + + fn subWrap(lhs: Awaiting, rhs: Awaiting) Awaiting { + return @enumFromInt(@intFromEnum(lhs) -% @intFromEnum(rhs)); + } + + fn fromIoUringFd(fd: fd_t) Awaiting { + const awaiting: Awaiting = @enumFromInt(fd); + switch (awaiting) { + .nothing, .group, .select => unreachable, + _ => return awaiting, + } + } + + fn toIoUringFd(awaiting: Awaiting) fd_t { + switch (awaiting) { + .nothing, .group => unreachable, + _ => return @intFromEnum(awaiting), + } + } + }; + + fn changeAwaiting( + cancel_status: *CancelStatus, + old_awaiting: Awaiting, + new_awaiting: Awaiting, + ) bool { + const old_cancel_status = @atomicRmw(CancelStatus, cancel_status, .Add, .{ + .requested = false, + .awaiting = new_awaiting.subWrap(old_awaiting), + }, .monotonic); + assert(old_cancel_status.awaiting == old_awaiting); + return old_cancel_status.requested; + } + }; + + const CancelProtection = packed struct { + user: Io.CancelProtection, + acknowledged: bool, + + const unblocked: CancelProtection = .{ .user = .unblocked, .acknowledged = false }; + + fn check(cancel_protection: CancelProtection) Io.CancelProtection { + return @enumFromInt(@intFromBool(cancel_protection != unblocked)); + } + + fn acknowledge(cancel_protection: *CancelProtection) void { + assert(!cancel_protection.acknowledged); + cancel_protection.acknowledged = true; + } + + fn recancel(cancel_protection: *CancelProtection) void { + assert(cancel_protection.acknowledged); + cancel_protection.acknowledged = false; + } + + test check { + try std.testing.expectEqual(Io.CancelProtection.unblocked, check(.unblocked)); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .unblocked, + .acknowledged = true, + })); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .blocked, + .acknowledged = false, + })); + try std.testing.expectEqual(Io.CancelProtection.blocked, check(.{ + .user = .blocked, + .acknowledged = true, + })); + } + }; const finished: ?*Fiber = @ptrFromInt(@alignOf(Thread)); const max_result_align: Alignment = .@"16"; - const max_result_size = max_result_align.forward(64); + const max_result_size = max_result_align.forward(512); /// This includes any stack realignments that need to happen, and also the /// initial frame return address slot and argument frame, depending on target. - const min_stack_size = 4 * 1024 * 1024; + const min_stack_size = 60 * 1024 * 1024; const max_context_align: Alignment = .@"16"; const max_context_size = max_context_align.forward(1024); const max_closure_size: usize = @sizeOf(AsyncClosure); @@ -76,9 +241,19 @@ const Fiber = struct { ) + max_closure_size + max_context_size, std.heap.page_size_max, ); + comptime { + assert(max_result_align.compare(.gte, .of(Completion))); + assert(max_result_size >= @sizeOf(Completion)); + } - fn allocate(el: *EventLoop) error{OutOfMemory}!*Fiber { - return @ptrCast(try el.gpa.alignedAlloc(u8, .of(Fiber), allocation_size)); + fn create(ev: *Evented) error{OutOfMemory}!*Fiber { + return @ptrCast(try ev.allocator().alignedAlloc(u8, .of(Fiber), allocation_size)); + } + + fn destroy(fiber: *Fiber, gpa: std.mem.Allocator) void { + log.debug("destroying {*}", .{fiber}); + assert(fiber.status.queue_next == null); + gpa.free(fiber.allocatedSlice()); } fn allocatedSlice(f: *Fiber) []align(@alignOf(Fiber)) u8 { @@ -98,98 +273,513 @@ const Fiber = struct { return @ptrFromInt(alignment.forward(@intFromPtr(f) + @sizeOf(Fiber))); } - fn enterCancelRegion(fiber: *Fiber, thread: *Thread) error{Canceled}!void { - if (@cmpxchgStrong( - ?*Thread, - &fiber.cancel_thread, - null, - thread, + const Queue = struct { head: *Fiber, tail: *Fiber }; + + /// Like a `*Fiber`, but 2 bits smaller than a pointer (because the LSBs are always 0 due to + /// alignment) so that those two bits can be used in a `packed struct`. + const PackedPtr = enum(@Int(.unsigned, @bitSizeOf(usize) - 2)) { + null = 0, + all_ones = std.math.maxInt(@Int(.unsigned, @bitSizeOf(usize) - 2)), + _, + + const Split = packed struct(usize) { low: u2, high: PackedPtr }; + fn pack(ptr: ?*Fiber) PackedPtr { + const split: Split = @bitCast(@intFromPtr(ptr)); + assert(split.low == 0); + return split.high; + } + fn unpack(ptr: PackedPtr) ?*Fiber { + const split: Split = .{ .low = 0, .high = ptr }; + return @ptrFromInt(@as(usize, @bitCast(split))); + } + }; + + fn requestCancel(fiber: *Fiber, ev: *Evented) void { + const cancel_status = @atomicRmw( + Fiber.CancelStatus, + &fiber.cancel_status, + .Or, + .{ .requested = true, .awaiting = @enumFromInt(0) }, .acq_rel, - .acquire, - )) |cancel_thread| { - assert(cancel_thread == Thread.canceling); + ); + assert(!cancel_status.requested); + switch (cancel_status.awaiting) { + .nothing => {}, + .group => { + // The awaiter received a cancelation request while awaiting a group, + // so propagate the cancelation to the group. + if (fiber.status.awaiting_group.cancel(ev, null)) { + fiber.status = .{ .queue_next = null }; + _ = ev.schedule(.current(), .{ .head = fiber, .tail = fiber }); + } + }, + .select => if (@atomicRmw(i32, &fiber.await_count, .Add, 1, .monotonic) == -1) { + _ = ev.schedule(.current(), .{ .head = fiber, .tail = fiber }); + }, + _ => |cancel_io_uring_fd| { + const thread: *Thread = .current(); + thread.enqueue().* = if (thread.io_uring.fd == @intFromEnum(cancel_io_uring_fd)) .{ + .opcode = .ASYNC_CANCEL, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(fiber), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.UserData.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + } else .{ + .opcode = .MSG_RING, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = @intFromEnum(cancel_io_uring_fd), + .off = @intFromPtr(fiber) | 0b01, + .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.UserData.cleanup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + }, + } + } +}; + +const CancelRegion = struct { + fiber: *Fiber, + status: Fiber.CancelStatus, + fn init() CancelRegion { + const fiber = Thread.current().currentFiber(); + return .{ + .fiber = fiber, + .status = .{ + .requested = fiber.cancel_protection.check() == .unblocked, + .awaiting = .nothing, + }, + }; + } + fn initBlocked() CancelRegion { + return .{ + .fiber = Thread.current().currentFiber(), + .status = .{ .requested = false, .awaiting = .nothing }, + }; + } + fn deinit(cancel_region: *CancelRegion) void { + if (cancel_region.status.requested) _ = cancel_region.fiber.cancel_status.changeAwaiting( + cancel_region.status.awaiting, + .nothing, + ); + cancel_region.* = undefined; + } + fn await(cancel_region: *CancelRegion, awaiting: Fiber.CancelStatus.Awaiting) Io.Cancelable!void { + if (!cancel_region.status.requested) return; + const status: Fiber.CancelStatus = .{ .requested = true, .awaiting = awaiting }; + if (cancel_region.fiber.cancel_status.changeAwaiting( + cancel_region.status.awaiting, + status.awaiting, + )) { + cancel_region.fiber.cancel_protection.acknowledge(); + cancel_region.status = .unrequested; return error.Canceled; } + cancel_region.status = status; + } + fn awaitIoUring(cancel_region: *CancelRegion) Io.Cancelable!*Thread { + const thread: *Thread = .current(); + try cancel_region.await(.fromIoUringFd(thread.io_uring.fd)); + return thread; + } + fn completion(cancel_region: *const CancelRegion) Completion { + return cancel_region.fiber.resultPointer(Completion).*; + } + fn errno(cancel_region: *const CancelRegion) linux.E { + return cancel_region.completion().errno(); + } +}; + +const CachedFd = struct { + once: Once, + + const Once = enum(fd_t) { + uninitialized = -1, + initializing = -2, + /// fd + _, + + fn fromFd(fd: fd_t) Once { + return @enumFromInt(@as(u31, @intCast(fd))); + } + + fn toFd(once: Once) fd_t { + return @as(u31, @intCast(@intFromEnum(once))); + } + }; + + const init: CachedFd = .{ .once = .uninitialized }; + + fn close(cached_fd: *CachedFd) void { + switch (cached_fd.once) { + .uninitialized => {}, + .initializing => unreachable, + _ => |fd| { + assert(@intFromEnum(fd) >= 0); + std.posix.close(@intFromEnum(fd)); + cached_fd.* = .init; + }, + } } - fn exitCancelRegion(fiber: *Fiber, thread: *Thread) void { - if (@cmpxchgStrong( - ?*Thread, - &fiber.cancel_thread, - thread, - null, - .acq_rel, - .acquire, - )) |cancel_thread| assert(cancel_thread == Thread.canceling); + fn open( + cached_fd: *CachedFd, + ev: *Evented, + cancel_region: *CancelRegion, + path: [*:0]const u8, + flags: linux.O, + ) File.OpenError!fd_t { + var once = @atomicLoad(Once, &cached_fd.once, .monotonic); + while (true) { + switch (once) { + .uninitialized => {}, + .initializing => try futexWait( + ev, + @ptrCast(&cached_fd.once), + @bitCast(@intFromEnum(once)), + .none, + ), + _ => |fd| { + @branchHint(.likely); + return fd.toFd(); + }, + } + once = @cmpxchgWeak( + Once, + &cached_fd.once, + .uninitialized, + .initializing, + .monotonic, + .monotonic, + ) orelse { + errdefer { + @atomicStore(Once, &cached_fd.once, .uninitialized, .monotonic); + futexWake(ev, @ptrCast(&cached_fd.once), 1); + } + const fd = try ev.openat(cancel_region, linux.AT.FDCWD, path, flags, 0); + @atomicStore(Once, &cached_fd.once, .fromFd(fd), .monotonic); + futexWake(ev, @ptrCast(&cached_fd.once), std.math.maxInt(u32)); + return fd; + }; + } } - - const Queue = struct { head: *Fiber, tail: *Fiber }; }; -fn recycle(el: *EventLoop, fiber: *Fiber) void { - std.log.debug("recyling {*}", .{fiber}); - assert(fiber.queue_next == null); - el.gpa.free(fiber.allocatedSlice()); +pub fn allocator(ev: *Evented) std.mem.Allocator { + return if (ev.backing_allocator_needs_mutex) .{ + .ptr = ev, + .vtable = &.{ + .alloc = alloc, + .resize = resize, + .remap = remap, + .free = free, + }, + } else ev.backing_allocator; } -pub fn io(el: *EventLoop) Io { +fn alloc(userdata: *anyopaque, len: usize, alignment: std.mem.Alignment, ret_addr: usize) ?[*]u8 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawAlloc(len, alignment, ret_addr); +} + +fn resize( + userdata: *anyopaque, + memory: []u8, + alignment: std.mem.Alignment, + new_len: usize, + ret_addr: usize, +) bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawResize(memory, alignment, new_len, ret_addr); +} + +fn remap( + userdata: *anyopaque, + memory: []u8, + alignment: Alignment, + new_len: usize, + ret_addr: usize, +) ?[*]u8 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawRemap(memory, alignment, new_len, ret_addr); +} + +fn free(userdata: *anyopaque, memory: []u8, alignment: std.mem.Alignment, ret_addr: usize) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.backing_allocator_mutex.lockUncancelable(ev_io); + defer ev.backing_allocator_mutex.unlock(ev_io); + return ev.backing_allocator.rawFree(memory, alignment, ret_addr); +} + +pub fn io(ev: *Evented) Io { return .{ - .userdata = el, + .userdata = ev, .vtable = &.{ .async = async, .concurrent = concurrent, .await = await, - .select = select, .cancel = cancel, - .cancelRequested = cancelRequested, - .mutexLock = mutexLock, - .mutexUnlock = mutexUnlock, + .groupAsync = groupAsync, + .groupConcurrent = groupConcurrent, + .groupAwait = groupAwait, + .groupCancel = groupCancel, - .conditionWait = conditionWait, - .conditionWake = conditionWake, + .recancel = recancel, + .swapCancelProtection = swapCancelProtection, + .checkCancel = checkCancel, - .createFile = createFile, - .fileOpen = fileOpen, + .select = select, + + .futexWait = futexWait, + .futexWaitUncancelable = futexWaitUncancelable, + .futexWake = futexWake, + + .operate = operate, + .batchAwaitAsync = batchAwaitAsync, + .batchAwaitConcurrent = batchAwaitConcurrent, + .batchCancel = batchCancel, + + .dirCreateDir = dirCreateDir, + .dirCreateDirPath = dirCreateDirPath, + .dirCreateDirPathOpen = dirCreateDirPathOpen, + .dirOpenDir = dirOpenDir, + .dirStat = dirStat, + .dirStatFile = dirStatFile, + .dirAccess = dirAccess, + .dirCreateFile = dirCreateFile, + .dirCreateFileAtomic = dirCreateFileAtomic, + .dirOpenFile = dirOpenFile, + .dirClose = dirClose, + .dirRead = dirRead, + .dirRealPath = dirRealPath, + .dirRealPathFile = dirRealPathFile, + .dirDeleteFile = dirDeleteFile, + .dirDeleteDir = dirDeleteDir, + .dirRename = dirRename, + .dirRenamePreserve = dirRenamePreserve, + .dirSymLink = dirSymLink, + .dirReadLink = dirReadLink, + .dirSetOwner = dirSetOwner, + .dirSetFileOwner = dirSetFileOwner, + .dirSetPermissions = dirSetPermissions, + .dirSetFilePermissions = dirSetFilePermissions, + .dirSetTimestamps = dirSetTimestamps, + .dirHardLink = dirHardLink, + + .fileStat = fileStat, + .fileLength = fileLength, .fileClose = fileClose, - .pread = pread, - .pwrite = pwrite, + .fileWritePositional = fileWritePositional, + .fileWriteFileStreaming = fileWriteFileStreaming, + .fileWriteFilePositional = fileWriteFilePositional, + .fileReadPositional = fileReadPositional, + .fileSeekBy = fileSeekBy, + .fileSeekTo = fileSeekTo, + .fileSync = fileSync, + .fileIsTty = fileIsTty, + .fileEnableAnsiEscapeCodes = fileEnableAnsiEscapeCodes, + .fileSupportsAnsiEscapeCodes = fileIsTty, + .fileSetLength = fileSetLength, + .fileSetOwner = fileSetOwner, + .fileSetPermissions = fileSetPermissions, + .fileSetTimestamps = fileSetTimestamps, + .fileLock = fileLock, + .fileTryLock = fileTryLock, + .fileUnlock = fileUnlock, + .fileDowngradeLock = fileDowngradeLock, + .fileRealPath = fileRealPath, + .fileHardLink = fileHardLink, + + .fileMemoryMapCreate = fileMemoryMapCreate, + .fileMemoryMapDestroy = fileMemoryMapDestroy, + .fileMemoryMapSetLength = fileMemoryMapSetLength, + .fileMemoryMapRead = fileMemoryMapRead, + .fileMemoryMapWrite = fileMemoryMapWrite, + + .processExecutableOpen = processExecutableOpen, + .processExecutablePath = processExecutablePath, + .lockStderr = lockStderr, + .tryLockStderr = tryLockStderr, + .unlockStderr = unlockStderr, + .processCurrentPath = processCurrentPath, + .processSetCurrentDir = processSetCurrentDir, + .processReplace = processReplace, + .processReplacePath = processReplacePath, + .processSpawn = processSpawn, + .processSpawnPath = processSpawnPath, + .childWait = childWait, + .childKill = childKill, + + .progressParentFile = progressParentFile, .now = now, + .clockResolution = clockResolution, .sleep = sleep, + + .random = random, + .randomSecure = randomSecure, + + .netListenIp = netListenIpUnavailable, + .netAccept = netAcceptUnavailable, + .netBindIp = netBindIp, + .netConnectIp = netConnectIpUnavailable, + .netListenUnix = netListenUnixUnavailable, + .netConnectUnix = netConnectUnixUnavailable, + .netSocketCreatePair = netSocketCreatePairUnavailable, + .netSend = netSendUnavailable, + .netReceive = netReceive, + .netRead = netReadUnavailable, + .netWrite = netWriteUnavailable, + .netWriteFile = netWriteFileUnavailable, + .netClose = netClose, + .netShutdown = netShutdown, + .netInterfaceNameResolve = netInterfaceNameResolveUnavailable, + .netInterfaceName = netInterfaceNameUnavailable, + .netLookup = netLookupUnavailable, }, }; } -pub fn init(el: *EventLoop, gpa: Allocator) !void { +fn fileMemoryMapSetLength( + userdata: ?*anyopaque, + mm: *File.MemoryMap, + new_len: usize, +) File.MemoryMap.SetLengthError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const page_size = std.heap.pageSize(); + const alignment: Alignment = .fromByteUnits(page_size); + const page_align = std.heap.page_size_min; + const old_memory = mm.memory; + + if (alignment.forward(new_len) == alignment.forward(old_memory.len)) { + mm.memory.len = new_len; + return; + } + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const flags: linux.MREMAP = .{ .MAYMOVE = true }; + const addr_hint: ?[*]const u8 = null; + const new_memory = while (true) { + try cancel_region.await(.nothing); + const rc = linux.mremap(old_memory.ptr, old_memory.len, new_len, flags, addr_hint); + switch (linux.errno(rc)) { + .SUCCESS => break @as([*]align(page_align) u8, @ptrFromInt(rc))[0..new_len], + .INTR => continue, + .AGAIN => return error.LockedMemoryLimitExceeded, + .NOMEM => return error.OutOfMemory, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + }; + mm.memory = new_memory; +} + +fn fileMemoryMapRead(userdata: ?*anyopaque, mm: *File.MemoryMap) File.ReadPositionalError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = mm; +} + +fn fileMemoryMapWrite(userdata: ?*anyopaque, mm: *File.MemoryMap) File.WritePositionalError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = mm; +} + +pub const InitOptions = struct { + backing_allocator_needs_mutex: bool = true, + + /// Affects the following operations: + /// * `processExecutablePath` on OpenBSD and Haiku. + argv0: Argv0 = .empty, + /// Affects the following operations: + /// * `fileIsTty` + /// * `processSpawn`, `processSpawnPath`, `processReplace`, `processReplacePath` + environ: process.Environ, +}; + +pub fn init(ev: *Evented, backing_allocator: Allocator, options: InitOptions) !void { const threads_size = @max(std.Thread.getCpuCount() catch 1, 1) * @sizeOf(Thread); - const idle_stack_end_offset = std.mem.alignForward(usize, threads_size + idle_stack_size, std.heap.page_size_max); - const allocated_slice = try gpa.alignedAlloc(u8, .of(Thread), idle_stack_end_offset); - errdefer gpa.free(allocated_slice); - el.* = .{ - .gpa = gpa, - .mutex = .{}, + const idle_stack_end_offset = + std.mem.alignForward(usize, threads_size + idle_stack_size, std.heap.page_size_max); + const allocated_slice = try backing_allocator.alignedAlloc(u8, .of(Thread), idle_stack_end_offset); + errdefer backing_allocator.free(allocated_slice); + ev.* = .{ + .backing_allocator_needs_mutex = options.backing_allocator_needs_mutex, + .backing_allocator_mutex = .init, + .backing_allocator = backing_allocator, .main_fiber_buffer = undefined, .threads = .{ .allocated = @ptrCast(allocated_slice[0..threads_size]), .reserved = 1, .active = 1, }, + + .stderr_mutex = .init, + .stderr_writer = .{ + .io = ev.io(), + .interface = Io.File.Writer.initInterface(&.{}), + .file = .stderr(), + .mode = .streaming, + }, + .stderr_mode = .no_color, + .stderr_writer_initialized = false, + + .environ_mutex = .init, + .environ = .{ .process_environ = options.environ }, + + .null_fd = .init, + .random_fd = .init, + + .csprng_mutex = .init, + .csprng = .uninitialized, }; - const main_fiber: *Fiber = @ptrCast(&el.main_fiber_buffer); + const main_fiber: *Fiber = @ptrCast(&ev.main_fiber_buffer); main_fiber.* = .{ .required_align = {}, .context = undefined, - .awaiter = null, - .queue_next = null, - .cancel_thread = null, - .awaiting_completions = .initEmpty(), + .await_count = 0, + .link = .{ .awaiter = null }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, }; - const main_thread = &el.threads.allocated[0]; + const main_thread = &ev.threads.allocated[0]; Thread.self = main_thread; - const idle_stack_end: [*]align(16) usize = @ptrCast(@alignCast(allocated_slice[idle_stack_end_offset..].ptr)); - (idle_stack_end - 1)[0..1].* = .{@intFromPtr(el)}; + const idle_stack_end: [*]align(16) usize = + @ptrCast(@alignCast(allocated_slice[idle_stack_end_offset..].ptr)); + (idle_stack_end - 1)[0..1].* = .{@intFromPtr(ev)}; main_thread.* = .{ + .required_align = {}, .thread = undefined, .idle_context = switch (builtin.cpu.arch) { .aarch64 => .{ @@ -206,42 +796,56 @@ pub fn init(el: *EventLoop, gpa: Allocator) !void { }, .current_context = &main_fiber.context, .ready_queue = null, - .io_uring = try IoUring.init(io_uring_entries, 0), + .io_uring = try .init( + io_uring_entries, + linux.IORING_SETUP_COOP_TASKRUN | linux.IORING_SETUP_SINGLE_ISSUER, + ), .idle_search_index = 1, .steal_ready_search_index = 1, + .csprng = .uninitialized, }; errdefer main_thread.io_uring.deinit(); - std.log.debug("created main idle {*}", .{&main_thread.idle_context}); - std.log.debug("created main {*}", .{main_fiber}); + log.debug("created main idle {*}", .{&main_thread.idle_context}); + log.debug("created main {*}", .{main_fiber}); } -pub fn deinit(el: *EventLoop) void { - const active_threads = @atomicLoad(u32, &el.threads.active, .acquire); - for (el.threads.allocated[0..active_threads]) |*thread| { +pub fn deinit(ev: *Evented) void { + const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); + for (ev.threads.allocated[0..active_threads]) |*thread| { const ready_fiber = @atomicLoad(?*Fiber, &thread.ready_queue, .monotonic); assert(ready_fiber == null or ready_fiber == Fiber.finished); // pending async } - el.yield(null, .exit); - const allocated_ptr: [*]align(@alignOf(Thread)) u8 = @ptrCast(@alignCast(el.threads.allocated.ptr)); - const idle_stack_end_offset = std.mem.alignForward(usize, el.threads.allocated.len * @sizeOf(Thread) + idle_stack_size, std.heap.page_size_max); - for (el.threads.allocated[1..active_threads]) |*thread| thread.thread.join(); - el.gpa.free(allocated_ptr[0..idle_stack_end_offset]); - el.* = undefined; + ev.yield(null, .exit); + ev.threads.allocated[0].io_uring.deinit(); + ev.null_fd.close(); + ev.random_fd.close(); + const allocated_ptr: [*]align(@alignOf(Thread)) u8 = @ptrCast(@alignCast(ev.threads.allocated.ptr)); + const idle_stack_end_offset = std.mem.alignForward( + usize, + ev.threads.allocated.len * @sizeOf(Thread) + idle_stack_size, + std.heap.page_size_max, + ); + for (ev.threads.allocated[1..active_threads]) |*thread| thread.thread.join(); + assert(active_threads == ev.threads.active); // spawned threads while there was no pending async? + ev.backing_allocator.free(allocated_ptr[0..idle_stack_end_offset]); + ev.* = undefined; } -fn findReadyFiber(el: *EventLoop, thread: *Thread) ?*Fiber { +fn findReadyFiber(ev: *Evented, thread: *Thread) ?*Fiber { if (@atomicRmw(?*Fiber, &thread.ready_queue, .Xchg, Fiber.finished, .acquire)) |ready_fiber| { - @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.queue_next, .release); - ready_fiber.queue_next = null; + @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.status.queue_next, .release); + ready_fiber.status.queue_next = null; return ready_fiber; } - const active_threads = @atomicLoad(u32, &el.threads.active, .acquire); + const active_threads = @atomicLoad(u32, &ev.threads.active, .acquire); for (0..@min(max_steal_ready_search, active_threads)) |_| { defer thread.steal_ready_search_index += 1; if (thread.steal_ready_search_index == active_threads) thread.steal_ready_search_index = 0; - const steal_ready_search_thread = &el.threads.allocated[0..active_threads][thread.steal_ready_search_index]; + const steal_ready_search_thread = + &ev.threads.allocated[0..active_threads][thread.steal_ready_search_index]; if (steal_ready_search_thread == thread) continue; - const ready_fiber = @atomicLoad(?*Fiber, &steal_ready_search_thread.ready_queue, .acquire) orelse continue; + const ready_fiber = + @atomicLoad(?*Fiber, &steal_ready_search_thread.ready_queue, .acquire) orelse continue; if (ready_fiber == Fiber.finished) continue; if (@cmpxchgWeak( ?*Fiber, @@ -251,8 +855,8 @@ fn findReadyFiber(el: *EventLoop, thread: *Thread) ?*Fiber { .acquire, .monotonic, )) |_| continue; - @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.queue_next, .release); - ready_fiber.queue_next = null; + @atomicStore(?*Fiber, &thread.ready_queue, ready_fiber.status.queue_next, .release); + ready_fiber.status.queue_next = null; return ready_fiber; } // couldn't find anything to do, so we are now open for business @@ -260,9 +864,9 @@ fn findReadyFiber(el: *EventLoop, thread: *Thread) ?*Fiber { return null; } -fn yield(el: *EventLoop, maybe_ready_fiber: ?*Fiber, pending_task: SwitchMessage.PendingTask) void { +fn yield(ev: *Evented, maybe_ready_fiber: ?*Fiber, pending_task: SwitchMessage.PendingTask) void { const thread: *Thread = .current(); - const ready_context = if (maybe_ready_fiber orelse el.findReadyFiber(thread)) |ready_fiber| + const ready_context = if (maybe_ready_fiber orelse ev.findReadyFiber(thread)) |ready_fiber| &ready_fiber.context else &thread.idle_context; @@ -273,25 +877,25 @@ fn yield(el: *EventLoop, maybe_ready_fiber: ?*Fiber, pending_task: SwitchMessage }, .pending_task = pending_task, }; - std.log.debug("switching from {*} to {*}", .{ message.contexts.prev, message.contexts.ready }); - contextSwitch(&message).handle(el); + log.debug("switching from {*} to {*}", .{ message.contexts.prev, message.contexts.ready }); + contextSwitch(&message).handle(ev); } -fn schedule(el: *EventLoop, thread: *Thread, ready_queue: Fiber.Queue) void { +fn schedule(ev: *Evented, thread: *Thread, ready_queue: Fiber.Queue) bool { { var fiber = ready_queue.head; while (true) { - std.log.debug("scheduling {*}", .{fiber}); - fiber = fiber.queue_next orelse break; + log.debug("scheduling {*}", .{fiber}); + fiber = fiber.status.queue_next orelse break; } assert(fiber == ready_queue.tail); } // shared fields of previous `Thread` must be initialized before later ones are marked as active - const new_thread_index = @atomicLoad(u32, &el.threads.active, .acquire); + const new_thread_index = @atomicLoad(u32, &ev.threads.active, .acquire); for (0..@min(max_idle_search, new_thread_index)) |_| { defer thread.idle_search_index += 1; if (thread.idle_search_index == new_thread_index) thread.idle_search_index = 0; - const idle_search_thread = &el.threads.allocated[0..new_thread_index][thread.idle_search_index]; + const idle_search_thread = &ev.threads.allocated[0..new_thread_index][thread.idle_search_index]; if (idle_search_thread == thread) continue; if (@cmpxchgWeak( ?*Fiber, @@ -301,13 +905,13 @@ fn schedule(el: *EventLoop, thread: *Thread, ready_queue: Fiber.Queue) void { .release, .monotonic, )) |_| continue; - getSqe(&thread.io_uring).* = .{ + thread.enqueue().* = .{ .opcode = .MSG_RING, - .flags = std.os.linux.IOSQE_CQE_SKIP_SUCCESS, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, .ioprio = 0, .fd = idle_search_thread.io_uring.fd, .off = @intFromEnum(Completion.UserData.wakeup), - .addr = 0, + .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), .len = 0, .rw_flags = 0, .user_data = @intFromEnum(Completion.UserData.wakeup), @@ -317,145 +921,221 @@ fn schedule(el: *EventLoop, thread: *Thread, ready_queue: Fiber.Queue) void { .addr3 = 0, .resv = 0, }; - return; + return true; } spawn_thread: { // previous failed reservations must have completed before retrying - if (new_thread_index == el.threads.allocated.len or @cmpxchgWeak( + if (new_thread_index == ev.threads.allocated.len or @cmpxchgWeak( u32, - &el.threads.reserved, + &ev.threads.reserved, new_thread_index, new_thread_index + 1, .acquire, .monotonic, ) != null) break :spawn_thread; - const new_thread = &el.threads.allocated[new_thread_index]; + const new_thread = &ev.threads.allocated[new_thread_index]; const next_thread_index = new_thread_index + 1; + var params = std.mem.zeroInit(linux.io_uring_params, .{ + .flags = linux.IORING_SETUP_ATTACH_WQ | + linux.IORING_SETUP_R_DISABLED | + linux.IORING_SETUP_COOP_TASKRUN | + linux.IORING_SETUP_SINGLE_ISSUER, + .wq_fd = @as(u32, @intCast(ev.threads.allocated[0].io_uring.fd)), + }); new_thread.* = .{ + .required_align = {}, .thread = undefined, .idle_context = undefined, .current_context = &new_thread.idle_context, .ready_queue = ready_queue.head, - .io_uring = IoUring.init(io_uring_entries, 0) catch |err| { - @atomicStore(u32, &el.threads.reserved, new_thread_index, .release); + .io_uring = IoUring.init_params(io_uring_entries, ¶ms) catch |err| { + @atomicStore(u32, &ev.threads.reserved, new_thread_index, .release); // no more access to `thread` after giving up reservation - std.log.warn("unable to create worker thread due to io_uring init failure: {s}", .{@errorName(err)}); + log.warn("unable to create worker thread due to io_uring init failure: {s}", .{ + @errorName(err), + }); break :spawn_thread; }, .idle_search_index = 0, .steal_ready_search_index = 0, + .csprng = .uninitialized, }; new_thread.thread = std.Thread.spawn(.{ .stack_size = idle_stack_size, - .allocator = el.gpa, - }, threadEntry, .{ el, new_thread_index }) catch |err| { + .allocator = ev.allocator(), + }, threadEntry, .{ ev, new_thread_index }) catch |err| { new_thread.io_uring.deinit(); - @atomicStore(u32, &el.threads.reserved, new_thread_index, .release); + @atomicStore(u32, &ev.threads.reserved, new_thread_index, .release); // no more access to `thread` after giving up reservation - std.log.warn("unable to create worker thread due spawn failure: {s}", .{@errorName(err)}); + log.warn("unable to create worker thread due spawn failure: {s}", .{@errorName(err)}); break :spawn_thread; }; // shared fields of `Thread` must be initialized before being marked active - @atomicStore(u32, &el.threads.active, next_thread_index, .release); - return; + @atomicStore(u32, &ev.threads.active, next_thread_index, .release); + return false; } // nobody wanted it, so just queue it on ourselves while (@cmpxchgWeak( ?*Fiber, &thread.ready_queue, - ready_queue.tail.queue_next, + ready_queue.tail.status.queue_next, ready_queue.head, .acq_rel, .acquire, - )) |old_head| ready_queue.tail.queue_next = old_head; + )) |old_head| ready_queue.tail.status.queue_next = old_head; + return false; } -fn mainIdle(el: *EventLoop, message: *const SwitchMessage) callconv(.withStackAlign(.c, @max(@alignOf(Thread), @alignOf(Context)))) noreturn { - message.handle(el); - el.idle(&el.threads.allocated[0]); - el.yield(@ptrCast(&el.main_fiber_buffer), .nothing); +fn mainIdle( + ev: *Evented, + message: *const SwitchMessage, +) callconv(.withStackAlign(.c, @max(@alignOf(Thread), @alignOf(Context)))) noreturn { + message.handle(ev); + ev.idle(&ev.threads.allocated[0]); + ev.yield(@ptrCast(&ev.main_fiber_buffer), .nothing); unreachable; // switched to dead fiber } -fn threadEntry(el: *EventLoop, index: u32) void { - const thread: *Thread = &el.threads.allocated[index]; +fn threadEntry(ev: *Evented, index: u32) void { + const thread: *Thread = &ev.threads.allocated[index]; Thread.self = thread; - std.log.debug("created thread idle {*}", .{&thread.idle_context}); - el.idle(thread); + defer thread.io_uring.deinit(); + log.debug("created thread idle {*}", .{&thread.idle_context}); + switch (linux.errno(linux.io_uring_register(thread.io_uring.fd, .REGISTER_ENABLE_RINGS, null, 0))) { + .SUCCESS => ev.idle(thread), + else => |err| @panic(@tagName(err)), + } } const Completion = struct { + result: i32, + flags: u32, + const UserData = enum(usize) { unused, wakeup, + futex_wake, cleanup, exit, - /// *Fiber + /// If bit 0 is 1, a pointer to the `context` field of `Io.Batch.Storage.Pending`. + /// If bits 0 and 1 are 0, a `*Fiber`. _, }; - result: i32, - flags: u32, + + fn errno(completion: Completion) linux.E { + return linux.errno(@bitCast(@as(isize, completion.result))); + } }; -fn idle(el: *EventLoop, thread: *Thread) void { +fn idle(ev: *Evented, thread: *Thread) void { var maybe_ready_fiber: ?*Fiber = null; while (true) { - while (maybe_ready_fiber orelse el.findReadyFiber(thread)) |ready_fiber| { - el.yield(ready_fiber, .nothing); + while (maybe_ready_fiber orelse ev.findReadyFiber(thread)) |ready_fiber| { + ev.yield(ready_fiber, .nothing); maybe_ready_fiber = null; } _ = thread.io_uring.submit_and_wait(1) catch |err| switch (err) { - error.SignalInterrupt => std.log.warn("submit_and_wait failed with SignalInterrupt", .{}), + error.SignalInterrupt => {}, else => |e| @panic(@errorName(e)), }; - var cqes_buffer: [io_uring_entries]std.os.linux.io_uring_cqe = undefined; + var cqes_buffer: [io_uring_entries]linux.io_uring_cqe = undefined; var maybe_ready_queue: ?Fiber.Queue = null; for (cqes_buffer[0 .. thread.io_uring.copy_cqes(&cqes_buffer, 0) catch |err| switch (err) { - error.SignalInterrupt => cqes_len: { - std.log.warn("copy_cqes failed with SignalInterrupt", .{}); - break :cqes_len 0; - }, + error.SignalInterrupt => 0, else => |e| @panic(@errorName(e)), - }]) |cqe| switch (@as(Completion.UserData, @enumFromInt(cqe.user_data))) { + }]) |cqe| if (cqe.flags & linux.IORING_CQE_F_SKIP == 0) switch (@as( + Completion.UserData, + @enumFromInt(cqe.user_data), + )) { .unused => unreachable, // bad submission queued? .wakeup => {}, + .futex_wake => switch (Completion.errno(.{ .result = cqe.res, .flags = cqe.flags })) { + .SUCCESS => recoverableOsBugDetected(), // success is skipped + .INVAL => {}, // invalid futex_wait() on ptr done elsewhere + .INTR, .CANCELED => recoverableOsBugDetected(), // `Completion.UserData.futex_wake` is not cancelable + .FAULT => {}, // pointer became invalid while doing the wake + else => recoverableOsBugDetected(), // deadlock due to operating system bug + }, .cleanup => @panic("failed to notify other threads that we are exiting"), .exit => { assert(maybe_ready_fiber == null and maybe_ready_queue == null); // pending async return; }, - _ => switch (errno(cqe.res)) { - .INTR => getSqe(&thread.io_uring).* = .{ - .opcode = .ASYNC_CANCEL, - .flags = std.os.linux.IOSQE_CQE_SKIP_SUCCESS, - .ioprio = 0, - .fd = 0, - .off = 0, - .addr = cqe.user_data, - .len = 0, - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.wakeup), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }, - else => { - const fiber: *Fiber = @ptrFromInt(cqe.user_data); - assert(fiber.queue_next == null); - fiber.resultPointer(Completion).* = .{ + _ => if (@as(?*Fiber, ready_fiber: switch (@as(u2, @truncate(cqe.user_data))) { + 0b00 => { + const ready_fiber: *Fiber = @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); + ready_fiber.resultPointer(Completion).* = .{ .result = cqe.res, .flags = cqe.flags, }; - if (maybe_ready_fiber == null) maybe_ready_fiber = fiber else if (maybe_ready_queue) |*ready_queue| { - ready_queue.tail.queue_next = fiber; - ready_queue.tail = fiber; - } else maybe_ready_queue = .{ .head = fiber, .tail = fiber }; + break :ready_fiber ready_fiber; }, + 0b01 => { + thread.enqueue().* = .{ + .opcode = .ASYNC_CANCEL, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = cqe.user_data & ~@as(usize, 0b11), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.UserData.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + break :ready_fiber null; + }, + 0b10 => { + const context: *Io.Operation.Storage.Pending.Context = + @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); + const batch: *Io.Batch = @ptrFromInt(context[0]); + var next: usize = 0b00; + context[0..3].* = .{ next, @as(u32, @bitCast(cqe.res)), cqe.flags }; + while (true) { + next = @cmpxchgWeak( + usize, + @as(*usize, @ptrCast(&batch.context)), + next, + cqe.user_data, + .release, + .acquire, + ) orelse break; + context[0] = next; + } + break :ready_fiber switch (@as(u2, @truncate(next))) { + 0b00, 0b01 => @ptrFromInt(next & ~@as(usize, 0b11)), + 0b10, 0b11 => null, + }; + }, + 0b11 => switch (Completion.errno(.{ .result = cqe.res, .flags = cqe.flags })) { + .SUCCESS => unreachable, // no event count specified + .TIME => { + const context: *usize = @ptrFromInt(cqe.user_data & ~@as(usize, 0b11)); + const fiber = @atomicRmw(usize, context, .Add, 0b01, .acquire); + break :ready_fiber switch (@as(u2, @truncate(fiber))) { + else => unreachable, // timeout completed multiple times + 0b00 => @ptrFromInt(fiber & ~@as(usize, 0b11)), + 0b10 => null, + }; + }, + .CANCELED => null, // user data may have been invalidated + else => |err| unexpectedErrno(err) catch null, + }, + })) |ready_fiber| { + assert(ready_fiber.status.queue_next == null); + if (maybe_ready_fiber == null) { + maybe_ready_fiber = ready_fiber; + } else if (maybe_ready_queue) |*ready_queue| { + ready_queue.tail.status.queue_next = ready_fiber; + ready_queue.tail = ready_fiber; + } else maybe_ready_queue = .{ .head = ready_fiber, .tail = ready_fiber }; }, }; - if (maybe_ready_queue) |ready_queue| el.schedule(thread, ready_queue); + if (maybe_ready_queue) |ready_queue| _ = ev.schedule(thread, ready_queue); } } @@ -469,113 +1149,68 @@ const SwitchMessage = struct { const PendingTask = union(enum) { nothing, reschedule, - recycle: *Fiber, - register_awaiter: *?*Fiber, - register_select: []const *Io.AnyFuture, - mutex_lock: struct { - prev_state: Io.Mutex.State, - mutex: *Io.Mutex, - }, - condition_wait: struct { - cond: *Io.Condition, - mutex: *Io.Mutex, - }, + await: u31, + group_await: Group, + group_cancel: Group, + batch_await: *Io.Batch, + destroy, exit, }; - fn handle(message: *const SwitchMessage, el: *EventLoop) void { + fn handle(message: *const SwitchMessage, ev: *Evented) void { const thread: *Thread = .current(); thread.current_context = message.contexts.ready; switch (message.pending_task) { .nothing => {}, .reschedule => if (message.contexts.prev != &thread.idle_context) { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - assert(prev_fiber.queue_next == null); - el.schedule(thread, .{ .head = prev_fiber, .tail = prev_fiber }); + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + assert(fiber.status.queue_next == null); + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); }, - .recycle => |fiber| { - el.recycle(fiber); + .await => |count| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + if (@atomicRmw(i32, &fiber.await_count, .Sub, count, .monotonic) > 0) + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); }, - .register_awaiter => |awaiter| { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - assert(prev_fiber.queue_next == null); - if (@atomicRmw(?*Fiber, awaiter, .Xchg, prev_fiber, .acq_rel) == Fiber.finished) - el.schedule(thread, .{ .head = prev_fiber, .tail = prev_fiber }); + .group_await => |group| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + if (group.await(ev, fiber)) + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); }, - .register_select => |futures| { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - assert(prev_fiber.queue_next == null); - for (futures) |any_future| { - const future_fiber: *Fiber = @ptrCast(@alignCast(any_future)); - if (@atomicRmw(?*Fiber, &future_fiber.awaiter, .Xchg, prev_fiber, .acq_rel) == Fiber.finished) { - const closure: *AsyncClosure = .fromFiber(future_fiber); - if (!@atomicRmw(bool, &closure.already_awaited, .Xchg, true, .seq_cst)) { - el.schedule(thread, .{ .head = prev_fiber, .tail = prev_fiber }); - } - } - } + .group_cancel => |group| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + if (group.cancel(ev, fiber)) + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); }, - .mutex_lock => |mutex_lock| { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - assert(prev_fiber.queue_next == null); - var prev_state = mutex_lock.prev_state; - while (switch (prev_state) { - else => next_state: { - prev_fiber.queue_next = @ptrFromInt(@intFromEnum(prev_state)); - break :next_state @cmpxchgWeak( - Io.Mutex.State, - &mutex_lock.mutex.state, - prev_state, - @enumFromInt(@intFromPtr(prev_fiber)), - .release, - .acquire, - ); - }, - .unlocked => @cmpxchgWeak( - Io.Mutex.State, - &mutex_lock.mutex.state, - .unlocked, - .locked_once, - .acquire, - .acquire, - ) orelse { - prev_fiber.queue_next = null; - el.schedule(thread, .{ .head = prev_fiber, .tail = prev_fiber }); - return; - }, - }) |next_state| prev_state = next_state; - }, - .condition_wait => |condition_wait| { - const prev_fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); - assert(prev_fiber.queue_next == null); - const cond_impl = prev_fiber.resultPointer(ConditionImpl); - cond_impl.* = .{ - .tail = prev_fiber, - .event = .queued, - }; + .batch_await => |batch| { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); if (@cmpxchgStrong( - ?*Fiber, - @as(*?*Fiber, @ptrCast(&condition_wait.cond.state)), + ?*anyopaque, + &batch.context, null, - prev_fiber, + fiber, .release, - .acquire, - )) |waiting_fiber| { - const waiting_cond_impl = waiting_fiber.?.resultPointer(ConditionImpl); - assert(waiting_cond_impl.tail.queue_next == null); - waiting_cond_impl.tail.queue_next = prev_fiber; - waiting_cond_impl.tail = prev_fiber; + .monotonic, + )) |head| { + assert(@as(u2, @truncate(@intFromPtr(head))) != 0b00); + _ = ev.schedule(thread, .{ .head = fiber, .tail = fiber }); } - condition_wait.mutex.unlock(el.io()); }, - .exit => for (el.threads.allocated[0..@atomicLoad(u32, &el.threads.active, .acquire)]) |*each_thread| { - getSqe(&thread.io_uring).* = .{ + .destroy => { + const fiber: *Fiber = @alignCast(@fieldParentPtr("context", message.contexts.prev)); + fiber.destroy(ev.backing_allocator); + ev.backing_allocator_mutex.unlock(ev.io()); + }, + .exit => for ( + ev.threads.allocated[0..@atomicLoad(u32, &ev.threads.active, .acquire)], + ) |*each_thread| { + thread.enqueue().* = .{ .opcode = .MSG_RING, - .flags = std.os.linux.IOSQE_CQE_SKIP_SUCCESS, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, .ioprio = 0, .fd = each_thread.io_uring.fd, .off = @intFromEnum(Completion.UserData.exit), - .addr = 0, + .addr = @intFromEnum(linux.IORING_MSG_RING_COMMAND.DATA), .len = 0, .rw_flags = 0, .user_data = @intFromEnum(Completion.UserData.cleanup), @@ -784,65 +1419,73 @@ inline fn contextSwitch(message: *const SwitchMessage) *const SwitchMessage { fn mainIdleEntry() callconv(.naked) void { switch (builtin.cpu.arch) { - .x86_64 => asm volatile ( - \\ movq (%%rsp), %%rdi - \\ jmp %[mainIdle:P] - : - : [mainIdle] "X" (&mainIdle), - ), .aarch64 => asm volatile ( \\ ldr x0, [sp, #-8] \\ b %[mainIdle] : : [mainIdle] "X" (&mainIdle), ), - else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), - } -} - -fn fiberEntry() callconv(.naked) void { - switch (builtin.cpu.arch) { .x86_64 => asm volatile ( - \\ leaq 8(%%rsp), %%rdi - \\ jmp %[AsyncClosure_call:P] + \\ movq (%%rsp), %%rdi + \\ jmp %[mainIdle:P] : - : [AsyncClosure_call] "X" (&AsyncClosure.call), + : [mainIdle] "X" (&mainIdle), ), else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), } } const AsyncClosure = struct { - event_loop: *EventLoop, + ev: *Evented, fiber: *Fiber, start: *const fn (context: *const anyopaque, result: *anyopaque) void, result_align: Alignment, - already_awaited: bool, - - fn contextPointer(closure: *AsyncClosure) [*]align(Fiber.max_context_align.toByteUnits()) u8 { - return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(AsyncClosure)); - } - - fn call(closure: *AsyncClosure, message: *const SwitchMessage) callconv(.withStackAlign(.c, @alignOf(AsyncClosure))) noreturn { - message.handle(closure.event_loop); - const fiber = closure.fiber; - std.log.debug("{*} performing async", .{fiber}); - closure.start(closure.contextPointer(), fiber.resultBytes(closure.result_align)); - const awaiter = @atomicRmw(?*Fiber, &fiber.awaiter, .Xchg, Fiber.finished, .acq_rel); - const ready_awaiter = r: { - const a = awaiter orelse break :r null; - if (@atomicRmw(bool, &closure.already_awaited, .Xchg, true, .acq_rel)) break :r null; - break :r a; - }; - closure.event_loop.yield(ready_awaiter, .nothing); - unreachable; // switched to dead fiber - } fn fromFiber(fiber: *Fiber) *AsyncClosure { return @ptrFromInt(Fiber.max_context_align.max(.of(AsyncClosure)).backward( @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, ) - @sizeOf(AsyncClosure)); } + + fn contextPointer(closure: *AsyncClosure) [*]align(Fiber.max_context_align.toByteUnits()) u8 { + return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(AsyncClosure)); + } + + fn entry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, sp + \\ b %[call] + : + : [call] "X" (&call), + ), + .x86_64 => asm volatile ( + \\ leaq 8(%%rsp), %%rdi + \\ jmp %[call:P] + : + : [call] "X" (&call), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } + } + + fn call( + closure: *AsyncClosure, + message: *const SwitchMessage, + ) callconv(.withStackAlign(.c, @alignOf(AsyncClosure))) noreturn { + message.handle(closure.ev); + const fiber = closure.fiber; + log.debug("{*} performing async", .{fiber}); + closure.start(closure.contextPointer(), fiber.resultBytes(closure.result_align)); + closure.ev.yield( + if (@atomicRmw(?*Fiber, &fiber.link.awaiter, .Xchg, Fiber.finished, .acq_rel)) |awaiter| + if (@atomicRmw(i32, &awaiter.await_count, .Add, 1, .monotonic) == -1) awaiter else null + else + null, + .nothing, + ); + unreachable; // switched to dead fiber + } }; fn async( @@ -853,7 +1496,8 @@ fn async( context_alignment: Alignment, start: *const fn (context: *const anyopaque, result: *anyopaque) void, ) ?*std.Io.AnyFuture { - return concurrent(userdata, result.len, result_alignment, context, context_alignment, start) catch { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return concurrent(ev, result.len, result_alignment, context, context_alignment, start) catch { start(context.ptr, result.ptr); return null; }; @@ -872,509 +1516,3330 @@ fn concurrent( assert(result_len <= Fiber.max_result_size); // TODO assert(context.len <= Fiber.max_context_size); // TODO - const event_loop: *EventLoop = @ptrCast(@alignCast(userdata)); - const fiber = try Fiber.allocate(event_loop); - std.log.debug("allocated {*}", .{fiber}); + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const fiber = Fiber.create(ev) catch |err| switch (err) { + error.OutOfMemory => return error.ConcurrencyUnavailable, + }; + log.debug("allocated {*}", .{fiber}); const closure: *AsyncClosure = .fromFiber(fiber); fiber.* = .{ .required_align = {}, .context = switch (builtin.cpu.arch) { - .x86_64 => .{ - .rsp = @intFromPtr(closure) - @sizeOf(usize), - .rbp = 0, - .rip = @intFromPtr(&fiberEntry), - }, .aarch64 => .{ .sp = @intFromPtr(closure), .fp = 0, - .pc = @intFromPtr(&fiberEntry), + .pc = @intFromPtr(&AsyncClosure.entry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(closure) - @sizeOf(usize), + .rbp = 0, + .rip = @intFromPtr(&AsyncClosure.entry), }, else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), }, - .awaiter = null, - .queue_next = null, - .cancel_thread = null, - .awaiting_completions = .initEmpty(), + .await_count = 0, + .link = .{ .awaiter = null }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, }; closure.* = .{ - .event_loop = event_loop, + .ev = ev, .fiber = fiber, .start = start, .result_align = result_alignment, - .already_awaited = false, }; @memcpy(closure.contextPointer(), context); - event_loop.schedule(.current(), .{ .head = fiber, .tail = fiber }); + const thread: *Thread = .current(); + if (ev.schedule(thread, .{ .head = fiber, .tail = fiber })) thread.submit(); return @ptrCast(fiber); } fn await( userdata: ?*anyopaque, - any_future: *std.Io.AnyFuture, + future: *std.Io.AnyFuture, result: []u8, result_alignment: Alignment, ) void { - const event_loop: *EventLoop = @ptrCast(@alignCast(userdata)); - const future_fiber: *Fiber = @ptrCast(@alignCast(any_future)); - if (@atomicLoad(?*Fiber, &future_fiber.awaiter, .acquire) != Fiber.finished) - event_loop.yield(null, .{ .register_awaiter = &future_fiber.awaiter }); + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const fiber = Thread.current().currentFiber(); + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + if (@atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, fiber, .acq_rel)) |awaiter| { + assert(awaiter == Fiber.finished); + } else while (true) { + ev.yield(null, .{ .await = 1 }); + const awaiter = @atomicLoad(?*Fiber, &future_fiber.link.awaiter, .acquire); + if (awaiter == Fiber.finished) break; + assert(awaiter == fiber); // spurious wakeup + } @memcpy(result, future_fiber.resultBytes(result_alignment)); - event_loop.recycle(future_fiber); -} - -fn select(userdata: ?*anyopaque, futures: []const *Io.AnyFuture) usize { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - - // Optimization to avoid the yield below. - for (futures, 0..) |any_future, i| { - const future_fiber: *Fiber = @ptrCast(@alignCast(any_future)); - if (@atomicLoad(?*Fiber, &future_fiber.awaiter, .acquire) == Fiber.finished) - return i; - } - - el.yield(null, .{ .register_select = futures }); - - std.log.debug("back from select yield", .{}); - - const my_thread: *Thread = .current(); - const my_fiber = my_thread.currentFiber(); - var result: ?usize = null; - - for (futures, 0..) |any_future, i| { - const future_fiber: *Fiber = @ptrCast(@alignCast(any_future)); - if (@cmpxchgStrong(?*Fiber, &future_fiber.awaiter, my_fiber, null, .seq_cst, .seq_cst)) |awaiter| { - if (awaiter == Fiber.finished) { - if (result == null) result = i; - } else if (awaiter) |a| { - const closure: *AsyncClosure = .fromFiber(a); - closure.already_awaited = false; - } - } else { - const closure: *AsyncClosure = .fromFiber(my_fiber); - closure.already_awaited = false; - } - } - - return result.?; + future_fiber.destroy(ev.allocator()); } fn cancel( userdata: ?*anyopaque, - any_future: *std.Io.AnyFuture, + future: *std.Io.AnyFuture, result: []u8, result_alignment: Alignment, ) void { - const future_fiber: *Fiber = @ptrCast(@alignCast(any_future)); - if (@atomicRmw( - ?*Thread, - &future_fiber.cancel_thread, - .Xchg, - Thread.canceling, - .acq_rel, - )) |cancel_thread| if (cancel_thread != Thread.canceling) { - getSqe(&Thread.current().io_uring).* = .{ - .opcode = .MSG_RING, - .flags = std.os.linux.IOSQE_CQE_SKIP_SUCCESS, + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + future_fiber.requestCancel(ev); + await(ev, future, result, result_alignment); +} + +const Group = struct { + ptr: *Io.Group, + + const List = packed struct(usize) { + cancel_requested: bool, + awaiter_delayed: bool, + fibers: Fiber.PackedPtr, + }; + fn listPtr(group: Group) *List { + return @ptrCast(&group.ptr.token); + } + + const Mutex = packed struct(u32) { + locked: bool, + contended: bool, + shared2: u30, + }; + fn mutexPtr(group: Group) *Mutex { + return switch (comptime builtin.cpu.arch.endian()) { + .little => @ptrCast(&group.ptr.state), + .big => @ptrCast(@alignCast( + @as([*]u8, @ptrCast(&group.ptr.state)) + @sizeOf(usize) - @sizeOf(u32), + )), + }; + } + + const Awaiter = packed struct(usize) { + locked: bool, + contended: bool, + awaiter: Fiber.PackedPtr, + }; + fn awaiterPtr(group: Group) *Awaiter { + return @ptrCast(&group.ptr.state); + } + + fn lock(group: Group, ev: *Evented) void { + const mutex = group.mutexPtr(); + { + const old_state = @atomicRmw( + Mutex, + mutex, + .Or, + .{ .locked = true, .contended = false, .shared2 = 0 }, + .acquire, + ); + if (!old_state.locked) { + @branchHint(.likely); + return; + } + if (old_state.contended) { + futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); + } + } + while (true) { + var old_state = @atomicRmw( + Mutex, + mutex, + .Or, + .{ .locked = true, .contended = true, .shared2 = 0 }, + .acquire, + ); + if (!old_state.locked) { + @branchHint(.likely); + return; + } + old_state.contended = true; + futexWaitUncancelable(ev, @ptrCast(mutex), @bitCast(old_state)); + } + } + + fn unlock(group: Group, ev: *Evented) void { + const mutex = group.mutexPtr(); + const old_state = @atomicRmw( + Mutex, + mutex, + .And, + .{ .locked = false, .contended = false, .shared2 = std.math.maxInt(u30) }, + .release, + ); + assert(old_state.locked); + if (old_state.contended) futexWake(ev, @ptrCast(mutex), 1); + } + + fn addFiber(group: Group, ev: *Evented, fiber: *Fiber) void { + group.lock(ev); + defer group.unlock(ev); + const list_ptr = group.listPtr(); + const list = @atomicLoad(List, list_ptr, .monotonic); + if (list.cancel_requested) fiber.cancel_status = .{ .requested = true, .awaiting = .nothing }; + const old_head = list.fibers.unpack(); + if (old_head) |head| head.link.group.prev = fiber; + fiber.link.group.next = old_head; + @atomicStore(List, list_ptr, .{ + .cancel_requested = list.cancel_requested, + .awaiter_delayed = list.awaiter_delayed, + .fibers = .pack(fiber), + }, .monotonic); + } + + fn removeFiber(group: Group, ev: *Evented, fiber: *Fiber) ?*Fiber { + group.lock(ev); + defer group.unlock(ev); + const list_ptr = group.listPtr(); + const list = @atomicLoad(List, list_ptr, .monotonic); + if (fiber.link.group.next) |next| next.link.group.prev = fiber.link.group.prev; + if (fiber.link.group.prev) |prev| { + prev.link.group.next = fiber.link.group.next; + } else if (fiber.link.group.next) |new_head| { + @atomicStore(List, list_ptr, .{ + .cancel_requested = list.cancel_requested, + .awaiter_delayed = list.awaiter_delayed, + .fibers = .pack(new_head), + }, .monotonic); + } else if (@atomicLoad(Awaiter, group.awaiterPtr(), .monotonic).awaiter.unpack()) |awaiter| { + if (!awaiter.cancel_status.changeAwaiting(.group, .nothing) or list.cancel_requested) { + @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = false, + .fibers = .null, + }, .release); + assert(awaiter.status.awaiting_group.ptr == group.ptr); + awaiter.status = .{ .queue_next = null }; + return awaiter; + } + // Race with `Fiber.requestCancel` + @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = true, + .fibers = .null, + }, .monotonic); + } else @atomicStore(List, list_ptr, .{ + .cancel_requested = false, + .awaiter_delayed = false, + .fibers = .null, + }, .release); + return null; + } + + fn await(group: Group, ev: *Evented, awaiter: *Fiber) bool { + group.lock(ev); + defer group.unlock(ev); + if (@atomicLoad(List, group.listPtr(), .monotonic).fibers.unpack()) |_| { + if (group.registerAwaiter(awaiter) and awaiter.cancel_protection.check() == .unblocked) { + // The awaiter already had an unacknowledged cancelation request before + // attempting to await a group, so propagate the cancelation to the group. + assert(!group.cancelLocked(ev, null)); + } + return false; + } + return true; + } + + fn cancel(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { + group.lock(ev); + defer group.unlock(ev); + return group.cancelLocked(ev, maybe_awaiter); + } + + /// Assumes the mutex is held. + fn cancelLocked(group: Group, ev: *Evented, maybe_awaiter: ?*Fiber) bool { + const list_ptr = group.listPtr(); + const list = @atomicRmw( + List, + list_ptr, + .Add, + .{ .cancel_requested = true, .awaiter_delayed = false, .fibers = .null }, + .monotonic, + ); + assert(!list.cancel_requested); + if (list.fibers.unpack()) |head| { + var maybe_fiber: ?*Fiber = head; + while (maybe_fiber) |fiber| { + fiber.requestCancel(ev); + maybe_fiber = fiber.link.group.next; + } + if (maybe_awaiter) |awaiter| _ = group.registerAwaiter(awaiter); + return false; + } + @atomicStore( + List, + list_ptr, + .{ .cancel_requested = false, .awaiter_delayed = false, .fibers = .null }, + .release, + ); + return if (maybe_awaiter) |_| true else list.awaiter_delayed; + } + + /// Assumes the mutex is held. + fn registerAwaiter(group: Group, awaiter: *Fiber) bool { + assert(awaiter.status.queue_next == null); + awaiter.status = .{ .awaiting_group = group }; + assert(@atomicRmw( + Awaiter, + group.awaiterPtr(), + .Add, + .{ .locked = false, .contended = false, .awaiter = .pack(awaiter) }, + .monotonic, + ).awaiter == .null); + return awaiter.cancel_status.changeAwaiting(.nothing, .group); + } + + const AsyncClosure = struct { + ev: *Evented, + group: Group, + fiber: *Fiber, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, + + fn fromFiber(fiber: *Fiber) *Group.AsyncClosure { + return @ptrFromInt(Fiber.max_context_align.max(.of(Group.AsyncClosure)).backward( + @intFromPtr(fiber.allocatedEnd()) - Fiber.max_context_size, + ) - @sizeOf(Group.AsyncClosure)); + } + + fn contextPointer( + closure: *Group.AsyncClosure, + ) [*]align(Fiber.max_context_align.toByteUnits()) u8 { + return @alignCast(@as([*]u8, @ptrCast(closure)) + @sizeOf(Group.AsyncClosure)); + } + + fn entry() callconv(.naked) void { + switch (builtin.cpu.arch) { + .aarch64 => asm volatile ( + \\ mov x0, sp + \\ b %[call] + : + : [call] "X" (&call), + ), + .x86_64 => asm volatile ( + \\ leaq 8(%%rsp), %%rdi + \\ jmp %[call:P] + : + : [call] "X" (&call), + ), + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + } + } + + fn call( + closure: *Group.AsyncClosure, + message: *const SwitchMessage, + ) callconv(.withStackAlign(.c, @alignOf(Group.AsyncClosure))) noreturn { + message.handle(closure.ev); + assert(closure.fiber.status.queue_next == null); + log.debug("{*} performing group async", .{closure.fiber}); + const result = closure.start(closure.contextPointer()); + const ev = closure.ev; + const group = closure.group; + const fiber = closure.fiber; + const cancel_acknowledged = fiber.cancel_protection.acknowledged; + if (result) { + assert(!cancel_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` + } else |err| switch (err) { + error.Canceled => assert(cancel_acknowledged), // group task returned `error.Canceled` but was never canceled + } + const awaiter = group.removeFiber(ev, fiber); + ev.backing_allocator_mutex.lockUncancelable(ev.io()); + ev.yield(awaiter, .destroy); + unreachable; // switched to dead fiber + } + }; +}; + +fn groupAsync( + userdata: ?*anyopaque, + type_erased: *Io.Group, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, +) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return groupConcurrent(ev, type_erased, context, context_alignment, start) catch { + const fiber = Thread.current().currentFiber(); + const pre_acknowledged = fiber.cancel_protection.acknowledged; + const result = start(context.ptr); + const post_acknowledged = fiber.cancel_protection.acknowledged; + if (result) { + if (pre_acknowledged) { + assert(post_acknowledged); // group task called `recancel` but was not canceled + } else { + assert(!post_acknowledged); // group task acknowledged cancelation but did not return `error.Canceled` + } + } else |err| switch (err) { + // Don't swallow the cancelation: make it visible to the `Group.async` caller. + error.Canceled => { + assert(!pre_acknowledged); // group task called `recancel` but was not canceled + assert(post_acknowledged); // group task returned `error.Canceled` but was never canceled + recancel(userdata); + }, + } + }; +} + +fn groupConcurrent( + userdata: ?*anyopaque, + type_erased: *Io.Group, + context: []const u8, + context_alignment: Alignment, + start: *const fn (context: *const anyopaque) Io.Cancelable!void, +) Io.ConcurrentError!void { + assert(context_alignment.compare(.lte, Fiber.max_context_align)); // TODO + assert(context.len <= Fiber.max_context_size); // TODO + + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const group: Group = .{ .ptr = type_erased }; + const fiber = Fiber.create(ev) catch |err| switch (err) { + error.OutOfMemory => return error.ConcurrencyUnavailable, + }; + log.debug("allocated {*}", .{fiber}); + + const closure: *Group.AsyncClosure = .fromFiber(fiber); + fiber.* = .{ + .required_align = {}, + .context = switch (builtin.cpu.arch) { + .aarch64 => .{ + .sp = @intFromPtr(closure), + .fp = 0, + .pc = @intFromPtr(&Group.AsyncClosure.entry), + }, + .x86_64 => .{ + .rsp = @intFromPtr(closure) - @sizeOf(usize), + .rbp = 0, + .rip = @intFromPtr(&Group.AsyncClosure.entry), + }, + else => |arch| @compileError("unimplemented architecture: " ++ @tagName(arch)), + }, + .await_count = 0, + .link = .{ .group = .{ .prev = null, .next = null } }, + .status = .{ .queue_next = null }, + .cancel_status = .unrequested, + .cancel_protection = .unblocked, + }; + closure.* = .{ + .ev = ev, + .group = group, + .fiber = fiber, + .start = start, + }; + @memcpy(closure.contextPointer(), context); + group.addFiber(ev, fiber); + const thread: *Thread = .current(); + if (ev.schedule(thread, .{ .head = fiber, .tail = fiber })) thread.submit(); +} + +fn groupAwait( + userdata: ?*anyopaque, + type_erased: *Io.Group, + initial_token: *anyopaque, +) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = initial_token; + ev.yield(null, .{ .group_await = .{ .ptr = type_erased } }); +} + +fn groupCancel(userdata: ?*anyopaque, type_erased: *Io.Group, initial_token: *anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = initial_token; + ev.yield(null, .{ .group_cancel = .{ .ptr = type_erased } }); +} + +fn recancel(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const cancel_protection = &Thread.current().currentFiber().cancel_protection; + assert(cancel_protection.acknowledged); + cancel_protection.acknowledged = false; +} + +fn swapCancelProtection(userdata: ?*anyopaque, new: Io.CancelProtection) Io.CancelProtection { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const cancel_protection = &Thread.current().currentFiber().cancel_protection; + defer cancel_protection.user = new; + return cancel_protection.user; +} + +fn checkCancel(userdata: ?*anyopaque) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const fiber = Thread.current().currentFiber(); + switch (fiber.cancel_protection.check()) { + .blocked => {}, + .unblocked => if (@atomicLoad(Fiber.CancelStatus, &fiber.cancel_status, .monotonic).requested) { + fiber.cancel_protection.acknowledge(); + return error.Canceled; + }, + } +} + +fn select(userdata: ?*anyopaque, futures: []const *Io.AnyFuture) Io.Cancelable!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + var await_count: u31, var result = for (futures, 0..) |future, future_index| { + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + if (@atomicRmw( + ?*Fiber, + &future_fiber.link.awaiter, + .Xchg, + cancel_region.fiber, + .acq_rel, + )) |awaiter| { + assert(awaiter == Fiber.finished); + break .{ @intCast(future_index), future_index }; + } + } else result: { + const await_count: u31 = @intCast(futures.len); + cancel_region.await(.select) catch |err| switch (err) { + error.Canceled => |e| break :result .{ await_count + 1, e }, + }; + ev.yield(null, .{ .await = 1 }); + cancel_region.await(.nothing) catch |err| switch (err) { + error.Canceled => |e| break :result .{ await_count, e }, + }; + break :result .{ await_count - 1, futures.len }; + }; + for (futures[0 .. result catch futures.len], 0..) |future, future_index| { + const future_fiber: *Fiber = @ptrCast(@alignCast(future)); + const awaiter = @atomicRmw(?*Fiber, &future_fiber.link.awaiter, .Xchg, null, .monotonic); + if (awaiter == Fiber.finished) { + @atomicStore(?*Fiber, &future_fiber.link.awaiter, Fiber.finished, .monotonic); + result = if (result) |finished_index| @min(future_index, finished_index) else |e| e; + } else { + assert(awaiter == cancel_region.fiber); + await_count -= 1; + } + } + // Equivalent to `ev.yield(null, .{ .await = await_count });`, + // but avoiding a context switch in the common case. + switch (std.math.order( + @atomicRmw(i32, &cancel_region.fiber.await_count, .Sub, await_count, .monotonic), + await_count, + )) { + .lt => ev.yield(null, .{ .await = 0 }), + .eq => {}, + .gt => unreachable, + } + return result; +} + +fn futexWait( + userdata: ?*anyopaque, + ptr: *const u32, + expected: u32, + timeout: Io.Timeout, +) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (builtin.single_threaded) unreachable; // Deadlock. + const timespec: ?linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = timespec: switch (timeout) { + .none => .{ + null, + .awake, + linux.IORING_TIMEOUT_ABS, + }, + .duration => |duration| { + const ns = duration.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + duration.clock, + 0, + }; + }, + .deadline => |deadline| { + const ns = deadline.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + deadline.clock, + linux.IORING_TIMEOUT_ABS, + }; + }, + }; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .FUTEX_WAIT, + .flags = if (timespec) |_| linux.IOSQE_IO_LINK else 0, + .ioprio = 0, + .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), + .off = expected, + .addr = @intFromPtr(ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = std.math.maxInt(u32), + .resv = 0, + }; + if (timespec) |*timespec_ptr| thread.enqueue().* = .{ + .opcode = .LINK_TIMEOUT, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(timespec_ptr), + .len = 1, + .rw_flags = timeout_flags | @as(u32, switch (clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, + }), + .user_data = @intFromEnum(Completion.UserData.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => {}, // notified by `wake()` + .INTR, .CANCELED => {}, // caller's responsibility to retry + .AGAIN => {}, // ptr.* != expect + .INVAL => {}, // possibly timeout overflow + .TIMEDOUT => unreachable, + .FAULT => recoverableOsBugDetected(), // ptr was invalid + else => recoverableOsBugDetected(), + } +} + +fn futexWaitUncancelable(userdata: ?*anyopaque, ptr: *const u32, expected: u32) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (builtin.single_threaded) unreachable; // Deadlock. + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + const thread = cancel_region.awaitIoUring() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + thread.enqueue().* = .{ + .opcode = .FUTEX_WAIT, + .flags = 0, + .ioprio = 0, + .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), + .off = expected, + .addr = @intFromPtr(ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = std.math.maxInt(u32), + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => {}, // notified by `wake()` + .INTR, .CANCELED => {}, // caller's responsibility to retry + .AGAIN => {}, // ptr.* != expect + .INVAL => {}, // possibly timeout overflow + .FAULT => recoverableOsBugDetected(), // ptr was invalid + else => recoverableOsBugDetected(), + } +} + +fn futexWake(userdata: ?*anyopaque, ptr: *const u32, max_waiters: u32) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + if (builtin.single_threaded) unreachable; // Nothing to wake up. + const thread: *Thread = .current(); + thread.enqueue().* = .{ + .opcode = .FUTEX_WAKE, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = @bitCast(linux.FUTEX2_FLAGS{ .size = .U32, .private = true }), + .off = max_waiters, + .addr = @intFromPtr(ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.UserData.futex_wake), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = std.math.maxInt(u32), + .resv = 0, + }; + thread.submit(); +} + +fn operate(userdata: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Operation.Result { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + switch (operation) { + .file_read_streaming => |o| return .{ + .file_read_streaming = ev.fileReadStreaming(o.file, o.data) catch |err| switch (err) { + error.Canceled => |e| return e, + else => |e| e, + }, + }, + .file_write_streaming => |o| return .{ + .file_write_streaming = ev.fileWriteStreaming(o.file, o.header, o.data, o.splat) catch |err| switch (err) { + error.Canceled => |e| return e, + else => |e| e, + }, + }, + .device_io_control => |*o| return .{ .device_io_control = try deviceIoControl(o) }, + } +} + +fn fileReadStreaming(ev: *Evented, file: File, data: []const []u8) File.Reader.Error!usize { + var iovecs_buffer: [max_iovecs_len]iovec = undefined; + var i: usize = 0; + for (data) |buf| { + if (iovecs_buffer.len - i == 0) break; + if (buf.len != 0) { + iovecs_buffer[i] = .{ .base = buf.ptr, .len = buf.len }; + i += 1; + } + } + const dest = iovecs_buffer[0..i]; + assert(dest[0].len > 0); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.preadv(&cancel_region, file.handle, dest, null); +} + +fn fileWriteStreaming( + ev: *Evented, + file: File, + header: []const u8, + data: []const []const u8, + splat: usize, +) File.Writer.Error!usize { + var iovecs: [max_iovecs_len]iovec_const = undefined; + var iovlen: iovlen_t = 0; + addBuf(&iovecs, &iovlen, header); + for (data[0 .. data.len - 1]) |bytes| addBuf(&iovecs, &iovlen, bytes); + const pattern = data[data.len - 1]; + if (iovecs.len - iovlen != 0) switch (splat) { + 0 => {}, + 1 => addBuf(&iovecs, &iovlen, pattern), + else => switch (pattern.len) { + 0 => {}, + 1 => { + var backup_buffer: [splat_buffer_size]u8 = undefined; + const splat_buffer = &backup_buffer; + const memset_len = @min(splat_buffer.len, splat); + const buf = splat_buffer[0..memset_len]; + @memset(buf, pattern[0]); + addBuf(&iovecs, &iovlen, buf); + var remaining_splat = splat - buf.len; + while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0) { + assert(buf.len == splat_buffer.len); + addBuf(&iovecs, &iovlen, splat_buffer); + remaining_splat -= splat_buffer.len; + } + addBuf(&iovecs, &iovlen, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); + }, + else => for (0..@min(splat, iovecs.len - iovlen)) |_| { + addBuf(&iovecs, &iovlen, pattern); + }, + }, + }; + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.pwritev(&cancel_region, file.handle, iovecs[0..iovlen], null); +} + +fn deviceIoControl(o: *const Io.Operation.DeviceIoControl) Io.Cancelable!i32 { + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + const rc = linux.ioctl(o.file.handle, @bitCast(o.code), @intFromPtr(o.arg)); + switch (linux.errno(rc)) { + .SUCCESS => return @bitCast(@as(u32, @truncate(rc))), + .INTR => continue, + else => |err| return -@as(i32, @intFromEnum(err)), + } + } +} + +fn batchAwaitAsync(userdata: ?*anyopaque, batch: *Io.Batch) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + batchDrainSubmitted(batch, &cancel_region, false) catch |err| switch (err) { + error.ConcurrencyUnavailable => unreachable, // passed concurrency=false + else => |e| return e, + }; + while (true) { + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; + if (batch.completed.head != .none) return; + ev.yield(null, .{ .batch_await = batch }); + } +} + +fn batchAwaitConcurrent( + userdata: ?*anyopaque, + batch: *Io.Batch, + timeout: Io.Timeout, +) Io.Batch.AwaitConcurrentError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try batchDrainSubmitted(batch, &cancel_region, true); + const timespec: linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = while (true) { + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; + if (batch.completed.head != .none) return; + switch (timeout) { + .none => ev.yield(null, .{ .batch_await = batch }), + .duration => |duration| { + const ns = duration.raw.toNanoseconds(); + break .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + duration.clock, + 0, + }; + }, + .deadline => |deadline| { + const ns = deadline.raw.toNanoseconds(); + break .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + deadline.clock, + linux.IORING_TIMEOUT_ABS, + }; + }, + } + }; + { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .TIMEOUT, + .flags = 0, .ioprio = 0, - .fd = cancel_thread.io_uring.fd, - .off = @intFromPtr(future_fiber), - .addr = 0, - .len = @bitCast(-@as(i32, @intFromEnum(std.os.linux.E.INTR))), - .rw_flags = 0, - .user_data = @intFromEnum(Completion.UserData.cleanup), + .fd = 0, + .off = 0, + .addr = @intFromPtr(×pec), + .len = 1, + .rw_flags = timeout_flags | @as(u32, switch (clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, + }), + .user_data = @intFromPtr(&batch.context) | 0b11, .buf_index = 0, .personality = 0, .splice_fd_in = 0, .addr3 = 0, .resv = 0, }; - }; - await(userdata, any_future, result, result_alignment); -} - -fn cancelRequested(userdata: ?*anyopaque) bool { - _ = userdata; - return @atomicLoad(?*Thread, &Thread.current().currentFiber().cancel_thread, .acquire) == Thread.canceling; -} - -fn createFile( - userdata: ?*anyopaque, - dir: Io.Dir, - sub_path: []const u8, - flags: Io.File.CreateFlags, -) Io.File.OpenError!Io.File { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - const thread: *Thread = .current(); - const iou = &thread.io_uring; - const fiber = thread.currentFiber(); - try fiber.enterCancelRegion(thread); - - const posix = std.posix; - const sub_path_c = try posix.toPosixPath(sub_path); - - var os_flags: posix.O = .{ - .ACCMODE = if (flags.read) .RDWR else .WRONLY, - .CREAT = true, - .TRUNC = flags.truncate, - .EXCL = flags.exclusive, - }; - if (@hasField(posix.O, "LARGEFILE")) os_flags.LARGEFILE = true; - if (@hasField(posix.O, "CLOEXEC")) os_flags.CLOEXEC = true; - - // Use the O locking flags if the os supports them to acquire the lock - // atomically. Note that the NONBLOCK flag is removed after the openat() - // call is successful. - const has_flock_open_flags = @hasField(posix.O, "EXLOCK"); - if (has_flock_open_flags) switch (flags.lock) { - .none => {}, - .shared => { - os_flags.SHLOCK = true; - os_flags.NONBLOCK = flags.lock_nonblocking; - }, - .exclusive => { - os_flags.EXLOCK = true; - os_flags.NONBLOCK = flags.lock_nonblocking; - }, - }; - const have_flock = @TypeOf(posix.system.flock) != void; - - if (have_flock and !has_flock_open_flags and flags.lock != .none) { - @panic("TODO"); } - - if (has_flock_open_flags and flags.lock_nonblocking) { - @panic("TODO"); + while (batch.completed.head == .none) { + ev.yield(null, .{ .batch_await = batch }); + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => |e| return if (batch.completed.head == .none) e, + }; + if (batch.completed.head == .none) continue; } - - getSqe(iou).* = .{ - .opcode = .OPENAT, + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .TIMEOUT_REMOVE, .flags = 0, .ioprio = 0, - .fd = dir.handle, + .fd = 0, .off = 0, - .addr = @intFromPtr(&sub_path_c), - .len = @intCast(flags.mode), - .rw_flags = @bitCast(os_flags), - .user_data = @intFromPtr(fiber), + .addr = @intFromPtr(&batch.context) | 0b11, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), .buf_index = 0, .personality = 0, .splice_fd_in = 0, .addr3 = 0, .resv = 0, }; - - el.yield(null, .nothing); - fiber.exitCancelRegion(thread); - - const completion = fiber.resultPointer(Completion); - switch (errno(completion.result)) { - .SUCCESS => return .{ .handle = completion.result }, - .INTR => unreachable, - .CANCELED => return error.Canceled, - - .FAULT => unreachable, - .INVAL => return error.BadPathName, - .BADF => unreachable, - .ACCES => return error.AccessDenied, - .FBIG => return error.FileTooBig, - .OVERFLOW => return error.FileTooBig, - .ISDIR => return error.IsDir, - .LOOP => return error.SymLinkLoop, - .MFILE => return error.ProcessFdQuotaExceeded, - .NAMETOOLONG => return error.NameTooLong, - .NFILE => return error.SystemFdQuotaExceeded, - .NODEV => return error.NoDevice, - .NOENT => return error.FileNotFound, - .NOMEM => return error.SystemResources, - .NOSPC => return error.NoSpaceLeft, - .NOTDIR => return error.NotDir, - .PERM => return error.PermissionDenied, - .EXIST => return error.PathAlreadyExists, - .BUSY => return error.DeviceBusy, - .OPNOTSUPP => return error.FileLocksUnsupported, - .AGAIN => return error.WouldBlock, - .TXTBSY => return error.FileBusy, - .NXIO => return error.NoDevice, - else => |err| return posix.unexpectedErrno(err), + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .BUSY, .NOENT => {}, + else => |err| unexpectedErrno(err) catch {}, + } + while (true) { + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => return, + }; + ev.yield(null, .{ .batch_await = batch }); } } -fn fileOpen( +/// If `concurrency` is false, `error.ConcurrencyUnavailable` is unreachable. +fn batchDrainSubmitted( + batch: *Io.Batch, + cancel_region: *CancelRegion, + concurrency: bool, +) (Io.ConcurrentError || Io.Cancelable)!void { + var index = batch.submitted.head; + if (index == .none) return; + errdefer batch.submitted.head = index; + const thread = try cancel_region.awaitIoUring(); + while (index != .none) { + const storage = &batch.storage[index.toIndex()]; + const next_index = storage.submission.node.next; + if (@as(?Io.Operation.Result, operation: switch (storage.submission.operation) { + .file_read_streaming => |o| { + const buffer = for (o.data) |buffer| { + if (buffer.len != 0) break buffer; + } else break :operation .{ .file_read_streaming = 0 }; + const fd = o.file.handle; + storage.* = .{ .pending = .{ + .node = .{ .prev = batch.pending.tail, .next = .none }, + .tag = .file_read_streaming, + .context = undefined, + } }; + thread.enqueue().* = .{ + .opcode = .READ, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = std.math.maxInt(u64), + .addr = @intFromPtr(buffer.ptr), + .len = @min(buffer.len, 0xfffff000), + .rw_flags = 0, + .user_data = @intFromPtr(&storage.pending.context) | 0b10, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + break :operation null; + }, + .file_write_streaming => |o| { + const buffer = buffer: { + if (o.header.len != 0) break :buffer o.header; + for (o.data[0 .. o.data.len - 1]) |buffer| { + if (buffer.len != 0) break :buffer buffer; + } + if (o.splat > 0) break :buffer o.data[o.data.len - 1]; + break :operation .{ .file_write_streaming = 0 }; + }; + const fd = o.file.handle; + storage.* = .{ .pending = .{ + .node = .{ .prev = batch.pending.tail, .next = .none }, + .tag = .file_write_streaming, + .context = undefined, + } }; + thread.enqueue().* = .{ + .opcode = .WRITE, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = std.math.maxInt(u64), + .addr = @intFromPtr(buffer.ptr), + .len = @min(buffer.len, 0xfffff000), + .rw_flags = 0, + .user_data = @intFromPtr(&storage.pending.context) | 0b10, + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + break :operation null; + }, + .device_io_control => |o| if (concurrency) + return error.ConcurrencyUnavailable + else + .{ .device_io_control = try deviceIoControl(&o) }, + })) |result| { + switch (batch.completed.tail) { + .none => batch.completed.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = index, + } + batch.completed.tail = index; + storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; + } else { + switch (batch.pending.tail) { + .none => batch.pending.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].pending.node.next = index, + } + batch.pending.tail = index; + storage.pending.context[0] = @intFromPtr(batch); + } + index = next_index; + } + batch.submitted = .{ .head = .none, .tail = .none }; +} + +fn batchDrainReady(batch: *Io.Batch) Io.Timeout.Error!void { + while (@atomicRmw(?*anyopaque, &batch.context, .Xchg, null, .acquire)) |head| { + var next: usize = @intFromPtr(head); + var timeout = false; + while (cond: switch (@as(u2, @truncate(next))) { + 0b00 => if (timeout) return error.Timeout else false, + 0b01 => { + assert(!timeout); + return error.Timeout; + }, + 0b10 => true, + 0b11 => { + assert(!timeout); + timeout = true; + break :cond true; + }, + }) { + var context: *Io.Operation.Storage.Pending.Context = @ptrFromInt(next & ~@as(usize, 0b11)); + next = context[0]; + const completion: Completion = .{ + .result = @bitCast(@as(u32, @intCast(context[1]))), + .flags = @intCast(context[2]), + }; + const pending: *Io.Operation.Storage.Pending = @fieldParentPtr("context", context); + const storage: *Io.Operation.Storage = @fieldParentPtr("pending", pending); + const index: Io.Operation.OptionalIndex = .fromIndex(storage - batch.storage.ptr); + assert(completion.flags & linux.IORING_CQE_F_SKIP == 0); + switch (pending.node.prev) { + .none => batch.pending.head = pending.node.next, + else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.next = + pending.node.next, + } + switch (pending.node.next) { + .none => batch.pending.tail = pending.node.prev, + else => |prev_index| batch.storage[prev_index.toIndex()].pending.node.prev = + pending.node.prev, + } + if (@as(?Io.Operation.Result, result: switch (pending.tag) { + .file_read_streaming => .{ + .file_read_streaming = switch (completion.errno()) { + .SUCCESS => @as(u32, @bitCast(completion.result)), + .INTR => 0, + .CANCELED => break :result null, + .INVAL => |err| errnoBug(err), + .FAULT => |err| errnoBug(err), + .AGAIN => error.WouldBlock, + .BADF => |err| errnoBug(err), // File descriptor used after closed + .IO => error.InputOutput, + .ISDIR => error.IsDir, + .NOBUFS => error.SystemResources, + .NOMEM => error.SystemResources, + .NOTCONN => error.SocketUnconnected, + .CONNRESET => error.ConnectionResetByPeer, + else => |err| unexpectedErrno(err), + }, + }, + .file_write_streaming => .{ + .file_write_streaming = switch (completion.errno()) { + .SUCCESS => @as(u32, @bitCast(completion.result)), + .INTR => 0, + .CANCELED => break :result null, + .INVAL => |err| errnoBug(err), + .FAULT => |err| errnoBug(err), + .AGAIN => error.WouldBlock, + .BADF => error.NotOpenForWriting, // Can be a race condition. + .DESTADDRREQ => |err| errnoBug(err), // `connect` was never called. + .DQUOT => error.DiskQuota, + .FBIG => error.FileTooBig, + .IO => error.InputOutput, + .NOSPC => error.NoSpaceLeft, + .PERM => error.PermissionDenied, + .PIPE => error.BrokenPipe, + .CONNRESET => |err| errnoBug(err), // Not a socket handle. + .BUSY => error.DeviceBusy, + else => |err| unexpectedErrno(err), + }, + }, + .device_io_control => unreachable, + })) |result| { + switch (batch.completed.tail) { + .none => batch.completed.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].completion.node.next = + index, + } + storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; + batch.completed.tail = index; + } else { + switch (batch.unused.tail) { + .none => batch.unused.head = index, + else => |tail_index| batch.storage[tail_index.toIndex()].unused.next = index, + } + storage.* = .{ .unused = .{ .prev = batch.unused.tail, .next = .none } }; + batch.unused.tail = index; + } + } + } +} + +fn batchCancel(userdata: ?*anyopaque, batch: *Io.Batch) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; + var index = batch.pending.head; + if (index == .none) return; + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + const thread = cancel_region.awaitIoUring() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + while (index != .none) { + const pending = &batch.storage[index.toIndex()].pending; + thread.enqueue().* = .{ + .opcode = .ASYNC_CANCEL, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(&pending.context) | 0b10, + .len = 0, + .rw_flags = 0, + .user_data = @intFromEnum(Completion.UserData.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + index = pending.node.next; + } + while (batch.pending.head != .none) batchDrainReady(batch) catch |err| switch (err) { + error.Timeout => unreachable, // no timeout + }; +} + +fn dirCreateDir( userdata: ?*anyopaque, - dir: Io.Dir, + dir: Dir, sub_path: []const u8, - flags: Io.File.OpenFlags, -) Io.File.OpenError!Io.File { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - const thread: *Thread = .current(); - const iou = &thread.io_uring; - const fiber = thread.currentFiber(); - try fiber.enterCancelRegion(thread); + permissions: Dir.Permissions, +) Dir.CreateDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); - const posix = std.posix; - const sub_path_c = try posix.toPosixPath(sub_path); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); - var os_flags: posix.O = .{ + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .MKDIRAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = 0, + .addr = @intFromPtr(sub_path_posix.ptr), + .len = permissions.toMode(), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .PERM => return error.PermissionDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .FAULT => |err| return errnoBug(err), + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .ROFS => return error.ReadOnlyFileSystem, + // dragonfly: when dir_fd is unlinked from filesystem + .NOTCONN => return error.FileNotFound, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirCreateDirPath( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, +) Dir.CreateDirPathError!Dir.CreatePathStatus { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var it = Dir.path.componentIterator(sub_path); + var status: Dir.CreatePathStatus = .existed; + var component = it.last() orelse return error.BadPathName; + while (true) { + if (dirCreateDir(ev, dir, component.path, permissions)) |_| { + status = .created; + } else |err| switch (err) { + error.PathAlreadyExists => { + // stat the file and return an error if it's not a directory + // this is important because otherwise a dangling symlink + // could cause an infinite loop + const fstat = try dirStatFile(ev, dir, component.path, .{}); + if (fstat.kind != .directory) return error.NotDir; + }, + error.FileNotFound => |e| { + component = it.previous() orelse return e; + continue; + }, + else => |e| return e, + } + component = it.next() orelse return status; + } +} + +fn dirCreateDirPathOpen( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, + options: Dir.OpenOptions, +) Dir.CreateDirPathOpenError!Dir { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return dirOpenDir(ev, dir, sub_path, options) catch |err| switch (err) { + error.FileNotFound => { + _ = try dirCreateDirPath(ev, dir, sub_path, permissions); + return dirOpenDir(ev, dir, sub_path, options); + }, + else => |e| return e, + }; +} + +fn dirOpenDir( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.OpenOptions, +) Dir.OpenError!Dir { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return .{ + .handle = ev.openat(&cancel_region, dir.handle, sub_path_posix, .{ + .ACCMODE = .RDONLY, + .DIRECTORY = true, + .NOFOLLOW = !options.follow_symlinks, + .CLOEXEC = true, + .PATH = !options.iterate, + }, 0) catch |err| switch (err) { + error.IsDir => return errnoBug(.ISDIR), + error.WouldBlock => return errnoBug(.AGAIN), + error.FileTooBig => return errnoBug(.FBIG), + error.NoSpaceLeft => return errnoBug(.NOSPC), + error.DeviceBusy => return errnoBug(.BUSY), // O_EXCL not passed + error.FileBusy => return errnoBug(.TXTBSY), + error.PathAlreadyExists => return errnoBug(.EXIST), // Not creating. + error.PipeBusy => return error.Unexpected, // Not opening a pipe. + error.AntivirusInterference => unreachable, // Windows-only + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. + else => |e| return e, + }, + }; +} + +fn dirStat(userdata: ?*anyopaque, dir: Dir) Dir.StatError!Dir.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.stat(&cancel_region, dir.handle); +} + +fn dirStatFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.StatFileOptions, +) Dir.StatFileError!File.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.statx(&cancel_region, dir.handle, sub_path_posix, linux.AT.NO_AUTOMOUNT | + @as(u32, if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW)); +} + +fn dirAccess( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.AccessOptions, +) Dir.AccessError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + const mode: u32 = + @as(u32, if (options.read) linux.R_OK else 0) | + @as(u32, if (options.write) linux.W_OK else 0) | + @as(u32, if (options.execute) linux.X_OK else 0); + const flags: u32 = if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW; + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.faccessat(dir.handle, sub_path_posix, mode, flags))) { + .SUCCESS => return, + .INTR => continue, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + .LOOP => return error.SymLinkLoop, + .TXTBSY => return error.FileBusy, + .NOTDIR => return error.FileNotFound, + .NOENT => return error.FileNotFound, + .NAMETOOLONG => return error.NameTooLong, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .IO => return error.InputOutput, + .NOMEM => return error.SystemResources, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirCreateFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + flags: File.CreateFlags, +) File.OpenError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const fd = try ev.openat(&cancel_region, dir.handle, sub_path_posix, .{ + .ACCMODE = if (flags.read) .RDWR else .WRONLY, + .CREAT = true, + .TRUNC = flags.truncate, + .EXCL = flags.exclusive, + .CLOEXEC = true, + }, flags.permissions.toMode()); + errdefer ev.close(fd); + + switch (flags.lock) { + .none => {}, + .shared, .exclusive => try ev.flock( + &cancel_region, + fd, + flags.lock, + if (flags.lock_nonblocking) .nonblocking else .blocking, + ), + } + + return .{ .handle = fd, .flags = .{ .nonblocking = false } }; +} + +fn dirCreateFileAtomic( + userdata: ?*anyopaque, + dir: Dir, + dest_path: []const u8, + options: Dir.CreateFileAtomicOptions, +) Dir.CreateFileAtomicError!File.Atomic { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + // Linux has O_TMPFILE, but linkat() does not support AT_REPLACE, so it's + // useless when we have to make up a bogus path name to do the rename() + // anyway. + if (!options.replace) tmpfile: { + const flags: linux.O = if (@hasField(linux.O, "TMPFILE")) .{ + .ACCMODE = .RDWR, + .TMPFILE = true, + .DIRECTORY = true, + .CLOEXEC = true, + } else if (@hasField(linux.O, "TMPFILE0") and !@hasField(linux.O, "TMPFILE2")) .{ + .ACCMODE = .RDWR, + .TMPFILE0 = true, + .TMPFILE1 = true, + .DIRECTORY = true, + .CLOEXEC = true, + } else break :tmpfile; + + const dest_dirname = Dir.path.dirname(dest_path); + if (dest_dirname) |dirname| { + // This has a nice side effect of preemptively triggering EISDIR or + // ENOENT, avoiding the ambiguity below. + _ = dirCreateDirPath(ev, dir, dirname, .default_dir) catch |err| switch (err) { + // None of these make sense in this context. + error.IsDir, + error.Streaming, + error.DiskQuota, + error.PathAlreadyExists, + error.LinkQuotaExceeded, + error.PipeBusy, + error.FileTooBig, + error.DeviceBusy, + error.FileLocksUnsupported, + error.FileBusy, + => return error.Unexpected, + + else => |e| return e, + }; + } + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(dest_dirname orelse ".", &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return .{ + .file = .{ + .handle = ev.openat( + &cancel_region, + dir.handle, + sub_path_posix, + flags, + options.permissions.toMode(), + ) catch |err| switch (err) { + error.IsDir, error.FileNotFound => { + // Ambiguous error code. It might mean the file system + // does not support O_TMPFILE. Therefore, we must fall + // back to not using O_TMPFILE. + break :tmpfile; + }, + error.FileTooBig => return errnoBug(.FBIG), + error.DeviceBusy => return errnoBug(.BUSY), // O_EXCL not passed + error.PathAlreadyExists => return errnoBug(.EXIST), // Not creating. + error.PipeBusy => return error.Unexpected, // Not opening a pipe. + error.AntivirusInterference => unreachable, // Windows-only + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. + else => |e| return e, + }, + .flags = .{ .nonblocking = false }, + }, + .file_basename_hex = 0, + .dest_sub_path = dest_path, + .file_open = true, + .file_exists = false, + .close_dir_on_deinit = false, + .dir = dir, + }; + } + + if (Dir.path.dirname(dest_path)) |dirname| { + const new_dir = if (options.make_path) + dirCreateDirPathOpen(ev, dir, dirname, .default_dir, .{}) catch |err| switch (err) { + // None of these make sense in this context. + error.IsDir, + error.Streaming, + error.DiskQuota, + error.PathAlreadyExists, + error.LinkQuotaExceeded, + error.PipeBusy, + error.FileTooBig, + error.FileLocksUnsupported, + error.DeviceBusy, + => return error.Unexpected, + + else => |e| return e, + } + else + try dirOpenDir(ev, dir, dirname, .{}); + + return ev.atomicFileInit(Dir.path.basename(dest_path), options.permissions, new_dir, true); + } + + return ev.atomicFileInit(dest_path, options.permissions, dir, false); +} + +fn atomicFileInit( + ev: *Evented, + dest_basename: []const u8, + permissions: File.Permissions, + dir: Dir, + close_dir_on_deinit: bool, +) Dir.CreateFileAtomicError!File.Atomic { + while (true) { + var random_integer: u64 = undefined; + random(ev, @ptrCast(&random_integer)); + const tmp_sub_path = std.fmt.hex(random_integer); + const file = dirCreateFile(ev, dir, &tmp_sub_path, .{ + .permissions = permissions, + .exclusive = true, + }) catch |err| switch (err) { + error.PathAlreadyExists => continue, + error.DeviceBusy => continue, + error.FileBusy => continue, + + error.IsDir => return error.Unexpected, // No path components. + error.FileTooBig => return error.Unexpected, // Creating, not opening. + error.FileLocksUnsupported => return error.Unexpected, // Not asking for locks. + error.PipeBusy => return error.Unexpected, // Not opening a pipe. + + else => |e| return e, + }; + return .{ + .file = file, + .file_basename_hex = random_integer, + .dest_sub_path = dest_basename, + .file_open = true, + .file_exists = true, + .close_dir_on_deinit = close_dir_on_deinit, + .dir = dir, + }; + } +} + +fn dirOpenFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + flags: File.OpenFlags, +) File.OpenError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const fd = try ev.openat(&cancel_region, dir.handle, sub_path_posix, .{ .ACCMODE = switch (flags.mode) { .read_only => .RDONLY, .write_only => .WRONLY, .read_write => .RDWR, }, + .NOCTTY = !flags.allow_ctty, + .NOFOLLOW = !flags.follow_symlinks, + .CLOEXEC = true, + .PATH = flags.path_only, + }, 0); + errdefer ev.close(fd); + + if (!flags.allow_directory) { + const is_dir = is_dir: { + const s = ev.stat(&cancel_region, fd) catch |err| switch (err) { + // The directory-ness is either unknown or unknowable + error.Streaming => break :is_dir false, + else => |e| return e, + }; + break :is_dir s.kind == .directory; + }; + if (is_dir) return error.IsDir; + } + + switch (flags.lock) { + .none => {}, + .shared, .exclusive => try ev.flock( + &cancel_region, + fd, + flags.lock, + if (flags.lock_nonblocking) .nonblocking else .blocking, + ), + } + + return .{ .handle = fd, .flags = .{ .nonblocking = false } }; +} + +fn dirClose(userdata: ?*anyopaque, dirs: []const Dir) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + for (dirs) |dir| ev.close(dir.handle); +} + +fn dirRead(userdata: ?*anyopaque, dr: *Dir.Reader, buffer: []Dir.Entry) Dir.Reader.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var buffer_index: usize = 0; + while (buffer.len - buffer_index != 0) { + if (dr.end - dr.index == 0) { + // Refill the buffer, unless we've already created references to + // buffered data. + if (buffer_index != 0) break; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + if (dr.state == .reset) { + ev.lseek(&cancel_region, dr.dir.handle, 0, linux.SEEK.SET) catch |err| switch (err) { + error.Unseekable => return error.Unexpected, + else => |e| return e, + }; + dr.state = .reading; + } + const n = while (true) { + try cancel_region.await(.nothing); + const rc = linux.getdents64(dr.dir.handle, dr.buffer.ptr, dr.buffer.len); + switch (linux.errno(rc)) { + .SUCCESS => break rc, + .INTR => continue, + .BADF => |err| return errnoBug(err), // Dir is invalid or was opened without iteration ability. + .FAULT => |err| return errnoBug(err), + .NOTDIR => |err| return errnoBug(err), + // To be consistent across platforms, iteration + // ends if the directory being iterated is deleted + // during iteration. This matches the behavior of + // non-Linux, non-WASI UNIX platforms. + .NOENT => { + dr.state = .finished; + return 0; + }, + // This can occur when reading /proc/$PID/net, or + // if the provided buffer is too small. Neither + // scenario is intended to be handled by this API. + .INVAL => return error.Unexpected, + .ACCES => return error.AccessDenied, // Lacking permission to iterate this directory. + else => |err| return unexpectedErrno(err), + } + }; + if (n == 0) { + dr.state = .finished; + return 0; + } + dr.index = 0; + dr.end = n; + } + // Linux aligns the header by padding after the null byte of the name + // to align the next entry. This means we can find the end of the name + // by looking at only the 8 bytes before the next record. However since + // file names are usually short it's better to keep the machine code + // simpler. + // + // Furthermore, I observed qemu user mode to not align this struct, so + // this code makes the conservative choice to not assume alignment. + const linux_entry: *align(1) linux.dirent64 = @ptrCast(&dr.buffer[dr.index]); + const next_index = dr.index + linux_entry.reclen; + dr.index = next_index; + const name_ptr: [*]u8 = &linux_entry.name; + const padded_name = name_ptr[0 .. linux_entry.reclen - @offsetOf(linux.dirent64, "name")]; + const name_len = std.mem.findScalar(u8, padded_name, 0).?; + const name = name_ptr[0..name_len :0]; + + if (std.mem.eql(u8, name, ".") or std.mem.eql(u8, name, "..")) continue; + + const entry_kind: File.Kind = switch (linux_entry.type) { + linux.DT.BLK => .block_device, + linux.DT.CHR => .character_device, + linux.DT.DIR => .directory, + linux.DT.FIFO => .named_pipe, + linux.DT.LNK => .sym_link, + linux.DT.REG => .file, + linux.DT.SOCK => .unix_domain_socket, + else => .unknown, + }; + buffer[buffer_index] = .{ + .name = name, + .kind = entry_kind, + .inode = linux_entry.ino, + }; + buffer_index += 1; + } + return buffer_index; +} + +fn dirRealPath(userdata: ?*anyopaque, dir: Dir, out_buffer: []u8) Dir.RealPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.realPath(&cancel_region, dir.handle, out_buffer); +} + +fn dirRealPathFile( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + out_buffer: []u8, +) Dir.RealPathFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const fd = ev.openat(&cancel_region, dir.handle, sub_path_posix, .{ + .CLOEXEC = true, + .PATH = true, + }, 0) catch |err| switch (err) { + error.WouldBlock => return errnoBug(.AGAIN), + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Not asking for locks. + else => |e| return e, }; + defer ev.close(fd); + return ev.realPath(&cancel_region, fd, out_buffer); +} - if (@hasField(posix.O, "CLOEXEC")) os_flags.CLOEXEC = true; - if (@hasField(posix.O, "LARGEFILE")) os_flags.LARGEFILE = true; - if (@hasField(posix.O, "NOCTTY")) os_flags.NOCTTY = !flags.allow_ctty; +fn dirDeleteFile(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteFileError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); - // Use the O locking flags if the os supports them to acquire the lock - // atomically. - const has_flock_open_flags = @hasField(posix.O, "EXLOCK"); - if (has_flock_open_flags) { - // Note that the NONBLOCK flag is removed after the openat() call - // is successful. - switch (flags.lock) { - .none => {}, - .shared => { - os_flags.SHLOCK = true; - os_flags.NONBLOCK = flags.lock_nonblocking; - }, - .exclusive => { - os_flags.EXLOCK = true; - os_flags.NONBLOCK = flags.lock_nonblocking; - }, + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .UNLINKAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = 0, + .addr = @intFromPtr(sub_path_posix.ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .PERM => return error.PermissionDenied, + .ACCES => return error.AccessDenied, + .BUSY => return error.FileBusy, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .ROFS => return error.ReadOnlyFileSystem, + .EXIST => |err| return errnoBug(err), + .NOTEMPTY => |err| return errnoBug(err), // Not passing AT.REMOVEDIR + .ILSEQ => return error.BadPathName, + .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + else => |err| return unexpectedErrno(err), } } - const have_flock = @TypeOf(posix.system.flock) != void; +} - if (have_flock and !has_flock_open_flags and flags.lock != .none) { - @panic("TODO"); +fn dirDeleteDir(userdata: ?*anyopaque, dir: Dir, sub_path: []const u8) Dir.DeleteDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .UNLINKAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = 0, + .addr = @intFromPtr(sub_path_posix.ptr), + .len = 0, + .rw_flags = linux.AT.REMOVEDIR, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .BUSY => return error.FileBusy, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .ISDIR => |err| return errnoBug(err), + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .ROFS => return error.ReadOnlyFileSystem, + .EXIST => |err| return errnoBug(err), + .NOTEMPTY => return error.DirNotEmpty, + .ILSEQ => return error.BadPathName, + .INVAL => |err| return errnoBug(err), // invalid flags, or pathname has . as last component + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + else => |err| return unexpectedErrno(err), + } } +} - if (has_flock_open_flags and flags.lock_nonblocking) { - @panic("TODO"); +fn dirRename( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenameError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var old_path_buffer: [PATH_MAX]u8 = undefined; + var new_path_buffer: [PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.renameat( + &cancel_region, + old_dir.handle, + old_sub_path_posix, + new_dir.handle, + new_sub_path_posix, + .{}, + ); +} + +fn dirRenamePreserve( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, +) Dir.RenamePreserveError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var old_path_buffer: [PATH_MAX]u8 = undefined; + var new_path_buffer: [PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.renameat( + &cancel_region, + old_dir.handle, + old_sub_path_posix, + new_dir.handle, + new_sub_path_posix, + .{ .NOREPLACE = true }, + ); +} + +fn dirSymLink( + userdata: ?*anyopaque, + dir: Dir, + target_path: []const u8, + sym_link_path: []const u8, + flags: Dir.SymLinkFlags, +) Dir.SymLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = flags; + + var target_path_buffer: [PATH_MAX]u8 = undefined; + var sym_link_path_buffer: [PATH_MAX]u8 = undefined; + + const target_path_posix = try pathToPosix(target_path, &target_path_buffer); + const sym_link_path_posix = try pathToPosix(sym_link_path, &sym_link_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .SYMLINKAT, + .flags = 0, + .ioprio = 0, + .fd = dir.handle, + .off = @intFromPtr(sym_link_path_posix.ptr), + .addr = @intFromPtr(target_path_posix.ptr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .ROFS => return error.ReadOnlyFileSystem, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } } +} - getSqe(iou).* = .{ - .opcode = .OPENAT, - .flags = 0, - .ioprio = 0, - .fd = dir.handle, - .off = 0, - .addr = @intFromPtr(&sub_path_c), - .len = 0, - .rw_flags = @bitCast(os_flags), - .user_data = @intFromPtr(fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, +fn dirReadLink( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + buffer: []u8, +) Dir.ReadLinkError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + + var sub_path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &sub_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + const rc = linux.readlinkat(dir.handle, sub_path_posix, buffer.ptr, buffer.len); + switch (linux.errno(rc)) { + .SUCCESS => { + const len: usize = @bitCast(rc); + return len; + }, + .INTR => continue, + .ACCES => return error.AccessDenied, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.NotLink, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn dirSetOwner( + userdata: ?*anyopaque, + dir: Dir, + owner: ?File.Uid, + group: ?File.Gid, +) Dir.SetOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.fchownat( + &cancel_region, + dir.handle, + "", + owner orelse std.math.maxInt(linux.uid_t), + group orelse std.math.maxInt(linux.gid_t), + linux.AT.EMPTY_PATH, + ); +} + +fn dirSetFileOwner( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + owner: ?File.Uid, + group: ?File.Gid, + options: Dir.SetFileOwnerOptions, +) Dir.SetFileOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.fchownat( + &cancel_region, + dir.handle, + sub_path_posix, + owner orelse std.math.maxInt(linux.uid_t), + group orelse std.math.maxInt(linux.gid_t), + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn dirSetPermissions( + userdata: ?*anyopaque, + dir: Dir, + permissions: Dir.Permissions, +) Dir.SetPermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + ev.fchmodat( + &cancel_region, + dir.handle, + "", + permissions.toMode(), + linux.AT.EMPTY_PATH, + ) catch |err| switch (err) { + error.NameTooLong => return errnoBug(.NAMETOOLONG), + error.BadPathName => return errnoBug(.ILSEQ), + error.ProcessFdQuotaExceeded => return errnoBug(.MFILE), + error.SystemFdQuotaExceeded => return errnoBug(.NFILE), + error.OperationUnsupported => return errnoBug(.OPNOTSUPP), + else => |e| return e, + }; +} + +fn dirSetFilePermissions( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + permissions: Dir.Permissions, + options: Dir.SetFilePermissionsOptions, +) Dir.SetFilePermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.fchmodat( + &cancel_region, + dir.handle, + sub_path_posix, + permissions.toMode(), + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn dirSetTimestamps( + userdata: ?*anyopaque, + dir: Dir, + sub_path: []const u8, + options: Dir.SetTimestampsOptions, +) Dir.SetTimestampsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var path_buffer: [PATH_MAX]u8 = undefined; + const sub_path_posix = try pathToPosix(sub_path, &path_buffer); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.utimensat( + &cancel_region, + dir.handle, + sub_path_posix, + if (options.modify_timestamp != .now or options.access_timestamp != .now) &.{ + setTimestampToPosix(options.access_timestamp), + setTimestampToPosix(options.modify_timestamp), + } else null, + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn dirHardLink( + userdata: ?*anyopaque, + old_dir: Dir, + old_sub_path: []const u8, + new_dir: Dir, + new_sub_path: []const u8, + options: Dir.HardLinkOptions, +) Dir.HardLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var old_path_buffer: [PATH_MAX]u8 = undefined; + var new_path_buffer: [PATH_MAX]u8 = undefined; + + const old_sub_path_posix = try pathToPosix(old_sub_path, &old_path_buffer); + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.linkat( + &cancel_region, + old_dir.handle, + old_sub_path_posix, + new_dir.handle, + new_sub_path_posix, + if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW, + ); +} + +fn fileStat(userdata: ?*anyopaque, file: File) File.StatError!File.Stat { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.stat(&cancel_region, file.handle); +} + +fn fileLength(userdata: ?*anyopaque, file: File) File.LengthError!u64 { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + var statx_buf = std.mem.zeroes(linux.Statx); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .STATX, + .flags = 0, + .ioprio = 0, + .fd = file.handle, + .off = @intFromPtr(&statx_buf), + .addr = @intFromPtr(""), + .len = @bitCast(linux.STATX{ .SIZE = true }), + .rw_flags = linux.AT.EMPTY_PATH, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => { + if (!statx_buf.mask.SIZE) return error.Unexpected; + return statx_buf.size; + }, + .INTR, .CANCELED => continue, + .ACCES => |err| return errnoBug(err), + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .LOOP => |err| return errnoBug(err), + .NAMETOOLONG => |err| return errnoBug(err), + .NOENT => |err| return errnoBug(err), + .NOMEM => return error.SystemResources, + .NOTDIR => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileClose(userdata: ?*anyopaque, files: []const File) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + for (files) |file| ev.close(file.handle); +} + +fn fileWritePositional( + userdata: ?*anyopaque, + file: File, + header: []const u8, + data: []const []const u8, + splat: usize, + offset: u64, +) File.WritePositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var iovecs: [max_iovecs_len]iovec_const = undefined; + var iovlen: iovlen_t = 0; + addBuf(&iovecs, &iovlen, header); + for (data[0 .. data.len - 1]) |bytes| addBuf(&iovecs, &iovlen, bytes); + const pattern = data[data.len - 1]; + if (iovecs.len - iovlen != 0) switch (splat) { + 0 => {}, + 1 => addBuf(&iovecs, &iovlen, pattern), + else => switch (pattern.len) { + 0 => {}, + 1 => { + var backup_buffer: [splat_buffer_size]u8 = undefined; + const splat_buffer = &backup_buffer; + const memset_len = @min(splat_buffer.len, splat); + const buf = splat_buffer[0..memset_len]; + @memset(buf, pattern[0]); + addBuf(&iovecs, &iovlen, buf); + var remaining_splat = splat - buf.len; + while (remaining_splat > splat_buffer.len and iovecs.len - iovlen != 0) { + assert(buf.len == splat_buffer.len); + addBuf(&iovecs, &iovlen, splat_buffer); + remaining_splat -= splat_buffer.len; + } + addBuf(&iovecs, &iovlen, splat_buffer[0..@min(remaining_splat, splat_buffer.len)]); + }, + else => for (0..@min(splat, iovecs.len - iovlen)) |_| { + addBuf(&iovecs, &iovlen, pattern); + }, + }, }; - el.yield(null, .nothing); - fiber.exitCancelRegion(thread); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.pwritev(&cancel_region, file.handle, iovecs[0..iovlen], offset); +} - const completion = fiber.resultPointer(Completion); - switch (errno(completion.result)) { - .SUCCESS => return .{ .handle = completion.result }, - .INTR => unreachable, - .CANCELED => return error.Canceled, +/// This is either usize or u32. Since, either is fine, let's use the same +/// `addBuf` function for both writing to a file and sending network messages. +const iovlen_t = @FieldType(linux.msghdr_const, "iovlen"); - .FAULT => unreachable, - .INVAL => return error.BadPathName, - .BADF => unreachable, - .ACCES => return error.AccessDenied, - .FBIG => return error.FileTooBig, - .OVERFLOW => return error.FileTooBig, - .ISDIR => return error.IsDir, - .LOOP => return error.SymLinkLoop, +fn addBuf(v: []iovec_const, i: *iovlen_t, bytes: []const u8) void { + // OS checks ptr addr before length so zero length vectors must be omitted. + if (bytes.len == 0) return; + if (v.len - i.* == 0) return; + v[i.*] = .{ .base = bytes.ptr, .len = bytes.len }; + i.* += 1; +} + +fn fileWriteFileStreaming( + userdata: ?*anyopaque, + file: File, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, +) File.Writer.WriteFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = file; + _ = header; + _ = file_reader; + _ = limit; + return error.Unimplemented; +} + +fn fileWriteFilePositional( + userdata: ?*anyopaque, + file: File, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, + offset: u64, +) File.WriteFilePositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = file; + _ = header; + _ = file_reader; + _ = limit; + _ = offset; + return error.Unimplemented; +} + +fn fileReadPositional( + userdata: ?*anyopaque, + file: File, + data: []const []u8, + offset: u64, +) File.ReadPositionalError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var iovecs_buffer: [max_iovecs_len]iovec = undefined; + var i: usize = 0; + for (data) |buf| { + if (iovecs_buffer.len - i == 0) break; + if (buf.len != 0) { + iovecs_buffer[i] = .{ .base = buf.ptr, .len = buf.len }; + i += 1; + } + } + if (i == 0) return 0; + const dest = iovecs_buffer[0..i]; + assert(dest[0].len > 0); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.preadv(&cancel_region, file.handle, dest, offset) catch |err| switch (err) { + error.SocketUnconnected => errnoBug(.NOTCONN), // not a socket + error.ConnectionResetByPeer => errnoBug(.CONNRESET), // not a socket + else => |e| e, + }; +} + +fn fileSeekBy(userdata: ?*anyopaque, file: File, offset: i64) File.SeekError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.lseek(&cancel_region, file.handle, @bitCast(offset), linux.SEEK.CUR); +} + +fn fileSeekTo(userdata: ?*anyopaque, file: File, offset: u64) File.SeekError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.lseek(&cancel_region, file.handle, offset, linux.SEEK.SET); +} + +fn fileSync(userdata: ?*anyopaque, file: File) File.SyncError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .FSYNC, + .flags = 0, + .ioprio = 0, + .fd = file.handle, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ROFS => |err| return errnoBug(err), + .IO => return error.InputOutput, + .NOSPC => return error.NoSpaceLeft, + .DQUOT => return error.DiskQuota, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileIsTty(userdata: ?*anyopaque, file: File) Io.Cancelable!bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + var wsz: winsize = undefined; + const fd: usize = @bitCast(@as(isize, file.handle)); + const rc = linux.syscall3(.ioctl, fd, linux.T.IOCGWINSZ, @intFromPtr(&wsz)); + switch (linux.errno(rc)) { + .SUCCESS => return true, + .INTR => continue, + else => return false, + } + } +} + +fn fileEnableAnsiEscapeCodes(userdata: ?*anyopaque, file: File) File.EnableAnsiEscapeCodesError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (!try fileIsTty(ev, file)) return error.NotTerminalDevice; +} + +fn fileSetLength(userdata: ?*anyopaque, file: File, length: u64) File.SetLengthError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .FTRUNCATE, + .flags = 0, + .ioprio = 0, + .fd = file.handle, + .off = length, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .FBIG => return error.FileTooBig, + .IO => return error.InputOutput, + .PERM => return error.PermissionDenied, + .TXTBSY => return error.FileBusy, + .BADF => |err| return errnoBug(err), // Handle not open for writing. + .INVAL => return error.NonResizable, // This is returned for /dev/null for example. + else => |err| return unexpectedErrno(err), + } + } +} + +fn fileSetOwner( + userdata: ?*anyopaque, + file: File, + owner: ?File.Uid, + group: ?File.Gid, +) File.SetOwnerError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.fchownat( + &cancel_region, + file.handle, + "", + owner orelse std.math.maxInt(linux.uid_t), + group orelse std.math.maxInt(linux.gid_t), + linux.AT.EMPTY_PATH, + ); +} + +fn fileSetPermissions( + userdata: ?*anyopaque, + file: File, + permissions: File.Permissions, +) File.SetPermissionsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + ev.fchmodat( + &cancel_region, + file.handle, + "", + permissions.toMode(), + linux.AT.EMPTY_PATH, + ) catch |err| switch (err) { + error.NameTooLong => return errnoBug(.NAMETOOLONG), + error.BadPathName => return errnoBug(.ILSEQ), + error.ProcessFdQuotaExceeded => return errnoBug(.MFILE), + error.SystemFdQuotaExceeded => return errnoBug(.NFILE), + error.OperationUnsupported => return errnoBug(.OPNOTSUPP), + else => |e| return e, + }; +} + +fn fileSetTimestamps( + userdata: ?*anyopaque, + file: File, + options: File.SetTimestampsOptions, +) File.SetTimestampsError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try ev.utimensat( + &cancel_region, + file.handle, + "", + if (options.modify_timestamp != .now or options.access_timestamp != .now) &.{ + setTimestampToPosix(options.access_timestamp), + setTimestampToPosix(options.modify_timestamp), + } else null, + linux.AT.EMPTY_PATH, + ); +} + +fn fileLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + ev.flock(&cancel_region, file.handle, lock, .blocking) catch |err| switch (err) { + error.WouldBlock => unreachable, // blocking + else => |e| return e, + }; +} + +fn fileTryLock(userdata: ?*anyopaque, file: File, lock: File.Lock) File.LockError!bool { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + ev.flock(&cancel_region, file.handle, lock, switch (lock) { + .none => .blocking, + .shared, .exclusive => .nonblocking, + }) catch |err| switch (err) { + error.WouldBlock => return false, + else => |e| return e, + }; + return true; +} + +fn fileUnlock(userdata: ?*anyopaque, file: File) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + ev.flock(&cancel_region, file.handle, .none, .blocking) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + error.WouldBlock => unreachable, // blocking + error.SystemResources => return recoverableOsBugDetected(), // Resource deallocation. + error.FileLocksUnsupported => return recoverableOsBugDetected(), // We already got the lock. + error.Unexpected => return recoverableOsBugDetected(), // Resource deallocation must succeed. + }; +} + +fn fileDowngradeLock(userdata: ?*anyopaque, file: File) File.DowngradeLockError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + ev.flock(&cancel_region, file.handle, .shared, .nonblocking) catch |err| switch (err) { + error.WouldBlock => return errnoBug(.AGAIN), // File was not locked in exclusive mode. + error.SystemResources => return errnoBug(.NOLCK), // Lock already obtained. + error.FileLocksUnsupported => return errnoBug(.OPNOTSUPP), // Lock already obtained. + else => |e| return e, + }; +} + +fn fileRealPath(userdata: ?*anyopaque, file: File, out_buffer: []u8) File.RealPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.realPath(&cancel_region, file.handle, out_buffer); +} + +fn fileHardLink( + userdata: ?*anyopaque, + file: File, + new_dir: Dir, + new_sub_path: []const u8, + options: File.HardLinkOptions, +) File.HardLinkError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + var new_path_buffer: [PATH_MAX]u8 = undefined; + const new_sub_path_posix = try pathToPosix(new_sub_path, &new_path_buffer); + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + return ev.linkat( + &cancel_region, + file.handle, + "", + new_dir.handle, + new_sub_path_posix, + linux.AT.EMPTY_PATH | @as(u32, if (options.follow_symlinks) 0 else linux.AT.SYMLINK_NOFOLLOW), + ); +} + +fn fileMemoryMapCreate( + userdata: ?*anyopaque, + file: File, + options: File.MemoryMap.CreateOptions, +) File.MemoryMap.CreateError!File.MemoryMap { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const prot: linux.PROT = .{ + .READ = options.protection.read, + .WRITE = options.protection.write, + .EXEC = options.protection.execute, + }; + const flags: linux.MAP = .{ + .TYPE = .SHARED_VALIDATE, + .POPULATE = options.populate, + }; + + const page_align = std.heap.page_size_min; + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const contents = while (true) { + try cancel_region.await(.nothing); + const casted_offset = std.math.cast(i64, options.offset) orelse return error.Unseekable; + const rc = linux.mmap(null, options.len, prot, flags, file.handle, casted_offset); + switch (linux.errno(rc)) { + .SUCCESS => break @as([*]align(page_align) u8, @ptrFromInt(rc))[0..options.len], + .INTR => continue, + .ACCES => return error.AccessDenied, + .AGAIN => return error.LockedMemoryLimitExceeded, + .MFILE => return error.ProcessFdQuotaExceeded, + .NFILE => return error.SystemFdQuotaExceeded, + .NOMEM => return error.OutOfMemory, + .PERM => return error.PermissionDenied, + .OVERFLOW => return error.Unseekable, + .BADF => |err| return errnoBug(err), // Always a race condition. + .INVAL => |err| return errnoBug(err), // Invalid parameters to mmap() + .OPNOTSUPP => |err| return errnoBug(err), // Bad flags with MAP.SHARED_VALIDATE on Linux. + else => |err| return unexpectedErrno(err), + } + }; + return .{ + .file = file, + .offset = options.offset, + .memory = contents, + .section = {}, + }; +} + +fn fileMemoryMapDestroy(userdata: ?*anyopaque, mm: *File.MemoryMap) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const memory = mm.memory; + if (memory.len == 0) return; + switch (linux.errno(linux.munmap(memory.ptr, memory.len))) { + .SUCCESS => {}, + else => |err| if (builtin.mode == .Debug) + std.log.err("failed to unmap {d} bytes at {*}: {t}", .{ memory.len, memory.ptr, err }), + } + mm.* = undefined; +} + +fn processExecutableOpen( + userdata: ?*anyopaque, + flags: File.OpenFlags, +) process.OpenExecutableError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return dirOpenFile(ev, .{ .handle = linux.AT.FDCWD }, "/proc/self/exe", flags); +} + +fn processExecutablePath(userdata: ?*anyopaque, out_buffer: []u8) process.ExecutablePathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + return dirReadLink(ev, .cwd(), "/proc/self/exe", out_buffer) catch |err| switch (err) { + error.UnsupportedReparsePointType => unreachable, // Windows-only + error.NetworkNotFound => unreachable, // Windows-only + error.FileBusy => unreachable, // Windows-only + else => |e| return e, + }; +} + +fn lockStderr(userdata: ?*anyopaque, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + ev.stderr_mutex.lockUncancelable(ev_io); + errdefer ev.stderr_mutex.unlock(ev_io); + return ev.initLockedStderr(terminal_mode); +} + +fn tryLockStderr( + userdata: ?*anyopaque, + terminal_mode: ?Io.Terminal.Mode, +) Io.Cancelable!?Io.LockedStderr { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + if (!ev.stderr_mutex.tryLock()) return null; + errdefer ev.stderr_mutex.unlock(ev_io); + return try ev.initLockedStderr(terminal_mode); +} + +fn initLockedStderr(ev: *Evented, terminal_mode: ?Io.Terminal.Mode) Io.Cancelable!Io.LockedStderr { + if (!ev.stderr_writer_initialized) { + const ev_io = ev.io(); + try ev.scanEnviron(); + const NO_COLOR = ev.environ.exist.NO_COLOR; + const CLICOLOR_FORCE = ev.environ.exist.CLICOLOR_FORCE; + ev.stderr_mode = terminal_mode orelse + try .detect(ev_io, ev.stderr_writer.file, NO_COLOR, CLICOLOR_FORCE); + ev.stderr_writer_initialized = true; + } + return .{ + .file_writer = &ev.stderr_writer, + .terminal_mode = terminal_mode orelse ev.stderr_mode, + }; +} + +fn unlockStderr(userdata: ?*anyopaque) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + ev.stderr_writer.interface.flush() catch |err| switch (err) { + error.WriteFailed => switch (ev.stderr_writer.err.?) { + error.Canceled => recancel(ev), + else => {}, + }, + }; + ev.stderr_writer.interface.end = 0; + ev.stderr_writer.interface.buffer = &.{}; + ev.stderr_mutex.unlock(ev.io()); +} + +fn processCurrentPath(userdata: ?*anyopaque, buffer: []u8) process.CurrentPathError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.getcwd(buffer.ptr, buffer.len))) { + .SUCCESS => return std.mem.findScalar(u8, buffer, 0).?, + .INTR => continue, + .NOENT => return error.CurrentDirUnlinked, + .RANGE => return error.NameTooLong, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn processSetCurrentDir(userdata: ?*anyopaque, dir: Dir) process.SetCurrentDirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + if (dir.handle == linux.AT.FDCWD) return; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.fchdir(dir.handle))) { + .SUCCESS => return, + .INTR => continue, + .ACCES => return error.AccessDenied, + .NOTDIR => return error.NotDir, + .IO => return error.FileSystem, + .BADF => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn processSetCurrentPath(userdata: ?*anyopaque, dir_path: []const u8) ChdirError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var path_buffer: [PATH_MAX]u8 = undefined; + const dir_path_posix = try pathToPosix(dir_path, &path_buffer); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.chdir(dir_path_posix))) { + .SUCCESS => return, + .INTR => continue, + .ACCES => return error.AccessDenied, + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn processReplace(userdata: ?*anyopaque, options: process.ReplaceOptions) process.ReplaceError { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + try ev.scanEnviron(); // for PATH + const PATH = ev.environ.string.PATH orelse default_PATH; + + var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); + for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + const env_block = env_block: { + const prog_fd: i32 = -1; + if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + }; + + return execv(options.expand_arg0, argv_buf.ptr[0].?, argv_buf.ptr, env_block, PATH); +} + +fn processReplacePath( + userdata: ?*anyopaque, + dir: Dir, + options: process.ReplaceOptions, +) process.ReplaceError { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = dir; + _ = options; + @panic("TODO processReplacePath"); +} + +fn processSpawn(userdata: ?*anyopaque, options: process.SpawnOptions) process.SpawnError!process.Child { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const spawned = try ev.spawn(options); + defer ev.close(spawned.err_fd); + + // Wait for the child to report any errors in or before `execvpe`. + var child_err: ForkBailError = undefined; + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + ev.readAll(&cancel_region, spawned.err_fd, @ptrCast(&child_err)) catch |read_err| { + switch (read_err) { + error.Canceled => unreachable, // blocked + error.EndOfStream => { + // Write end closed by CLOEXEC at the time of the `execvpe` call, + // indicating success. + }, + else => { + // Problem reading the error from the error reporting pipe. We + // don't know if the child is alive or dead. Better to assume it is + // alive so the resource does not risk being leaked. + }, + } + return .{ + .id = spawned.pid, + .thread_handle = {}, + .stdin = spawned.stdin, + .stdout = spawned.stdout, + .stderr = spawned.stderr, + .request_resource_usage_statistics = options.request_resource_usage_statistics, + }; + }; + return child_err; +} + +fn processSpawnPath( + userdata: ?*anyopaque, + dir: Dir, + options: process.SpawnOptions, +) process.SpawnError!process.Child { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = dir; + _ = options; + @panic("TODO processSpawnPath"); +} + +const Spawned = struct { + pid: pid_t, + err_fd: fd_t, + stdin: ?File, + stdout: ?File, + stderr: ?File, +}; +fn spawn(ev: *Evented, options: process.SpawnOptions) process.SpawnError!Spawned { + // The child process does need to access (one end of) these pipes. However, + // we must initially set CLOEXEC to avoid a race condition. If another thread + // is racing to spawn a different child process, we don't want it to inherit + // these FDs in any scenario; that would mean that, for instance, calls to + // `poll` from the parent would not report the child's stdout as closing when + // expected, since the other child may retain a reference to the write end of + // the pipe. So, we create the pipes with CLOEXEC initially. After fork, we + // need to do something in the new child to make sure we preserve the reference + // we want. We could use `fcntl` to remove CLOEXEC from the FD, but as it + // turns out, we `dup2` everything anyway, so there's no need! + const pipe_flags: linux.O = .{ .CLOEXEC = true }; + + const stdin_pipe = if (options.stdin == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stdin == .pipe) { + ev.destroyPipe(stdin_pipe); + }; + + const stdout_pipe = if (options.stdout == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stdout == .pipe) { + ev.destroyPipe(stdout_pipe); + }; + + const stderr_pipe = if (options.stderr == .pipe) try pipe2(pipe_flags) else undefined; + errdefer if (options.stderr == .pipe) { + ev.destroyPipe(stderr_pipe); + }; + + const any_ignore = + options.stdin == .ignore or options.stdout == .ignore or options.stderr == .ignore; + const dev_null_fd = if (any_ignore) dev_null_fd: { + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + break :dev_null_fd try ev.null_fd.open(ev, &cancel_region, "/dev/null", .{ + .ACCMODE = .RDWR, + }); + } else undefined; + + const prog_pipe: [2]fd_t = if (options.progress_node.index != .none) pipe: { + // We use CLOEXEC for the same reason as in `pipe_flags`. + const pipe = try pipe2(.{ .NONBLOCK = true, .CLOEXEC = true }); + _ = linux.fcntl(pipe[0], linux.F.SETPIPE_SZ, @as(u32, std.Progress.max_packet_len * 2)); + break :pipe pipe; + } else .{ -1, -1 }; + errdefer ev.destroyPipe(prog_pipe); + + var arena_allocator = std.heap.ArenaAllocator.init(ev.allocator()); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + // The POSIX standard does not allow malloc() between fork() and execve(), + // and this allocator may be a libc allocator. + // I have personally observed the child process deadlocking when it tries + // to call malloc() due to a heap allocation between fork() and execve(), + // in musl v1.1.24. + // Additionally, we want to reduce the number of possible ways things + // can fail between fork() and execve(). + // Therefore, we do all the allocation for the execve() before the fork(). + // This means we must do the null-termination of argv and env vars here. + const argv_buf = try arena.allocSentinel(?[*:0]const u8, options.argv.len, null); + for (options.argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + const prog_fileno = 3; + comptime assert(@max(linux.STDIN_FILENO, linux.STDOUT_FILENO, linux.STDERR_FILENO) + 1 == prog_fileno); + + const env_block = env_block: { + const prog_fd: i32 = if (prog_pipe[1] == -1) -1 else prog_fileno; + if (options.environ_map) |environ_map| break :env_block try environ_map.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + break :env_block try ev.environ.process_environ.createPosixBlock(arena, .{ + .zig_progress_fd = prog_fd, + }); + }; + + // This pipe communicates to the parent errors in the child between `fork` and `execvpe`. + // It is closed by the child (via CLOEXEC) without writing if `execvpe` succeeds. + const err_pipe: [2]fd_t = try pipe2(.{ .CLOEXEC = true }); + errdefer ev.destroyPipe(err_pipe); + + try ev.scanEnviron(); // for PATH + const PATH = ev.environ.string.PATH orelse default_PATH; + + const pid_result: pid_t = fork: { + const rc = linux.fork(); + switch (linux.errno(rc)) { + .SUCCESS => break :fork @intCast(rc), + .AGAIN => return error.SystemResources, + .NOMEM => return error.SystemResources, + .NOSYS => return error.OperationUnsupported, + else => |err| return unexpectedErrno(err), + } + }; + + if (pid_result == 0) { + defer comptime unreachable; // We are the child. + _ = swapCancelProtection(ev, .blocked); + const ep1 = err_pipe[1]; + + ev.setUpChildIo(options.stdin, stdin_pipe[0], linux.STDIN_FILENO, dev_null_fd) catch |err| + ev.forkBail(ep1, err); + ev.setUpChildIo(options.stdout, stdout_pipe[1], linux.STDOUT_FILENO, dev_null_fd) catch |err| + ev.forkBail(ep1, err); + ev.setUpChildIo(options.stderr, stderr_pipe[1], linux.STDERR_FILENO, dev_null_fd) catch |err| + ev.forkBail(ep1, err); + + switch (options.cwd) { + .inherit => {}, + .dir => |cwd| processSetCurrentDir(ev, cwd) catch |err| ev.forkBail(ep1, err), + .path => |cwd| processSetCurrentPath(ev, cwd) catch |err| ev.forkBail(ep1, err), + } + + // Must happen after fchdir above, the cwd file descriptor might be + // equal to prog_fileno and be clobbered by this dup2 call. + if (prog_pipe[1] != -1) dup2(prog_pipe[1], prog_fileno) catch |err| ev.forkBail(ep1, err); + + if (options.gid) |gid| { + switch (linux.errno(linux.setregid(gid, gid))) { + .SUCCESS => {}, + .AGAIN => ev.forkBail(ep1, error.ResourceLimitReached), + .INVAL => ev.forkBail(ep1, error.InvalidUserId), + .PERM => ev.forkBail(ep1, error.PermissionDenied), + else => ev.forkBail(ep1, error.Unexpected), + } + } + + if (options.uid) |uid| { + switch (linux.errno(linux.setreuid(uid, uid))) { + .SUCCESS => {}, + .AGAIN => ev.forkBail(ep1, error.ResourceLimitReached), + .INVAL => ev.forkBail(ep1, error.InvalidUserId), + .PERM => ev.forkBail(ep1, error.PermissionDenied), + else => ev.forkBail(ep1, error.Unexpected), + } + } + + if (options.pgid) |pid| { + switch (linux.errno(linux.setpgid(0, pid))) { + .SUCCESS => {}, + .ACCES => ev.forkBail(ep1, error.ProcessAlreadyExec), + .INVAL => ev.forkBail(ep1, error.InvalidProcessGroupId), + .PERM => ev.forkBail(ep1, error.PermissionDenied), + else => ev.forkBail(ep1, error.Unexpected), + } + } + + if (options.start_suspended) { + switch (linux.errno(linux.kill(linux.getpid(), .STOP))) { + .SUCCESS => {}, + .PERM => ev.forkBail(ep1, error.PermissionDenied), + else => ev.forkBail(ep1, error.Unexpected), + } + } + + const err = execv(options.expand_arg0, argv_buf.ptr[0].?, argv_buf.ptr, env_block, PATH); + ev.forkBail(ep1, err); + } + + const pid: pid_t = @intCast(pid_result); // We are the parent. + errdefer comptime unreachable; // The child is forked; we must not error from now on + + ev.close(err_pipe[1]); // make sure only the child holds the write end open + + if (options.stdin == .pipe) ev.close(stdin_pipe[0]); + if (options.stdout == .pipe) ev.close(stdout_pipe[1]); + if (options.stderr == .pipe) ev.close(stderr_pipe[1]); + + if (prog_pipe[1] != -1) ev.close(prog_pipe[1]); + + options.progress_node.setIpcFile(ev, .{ .handle = prog_pipe[0], .flags = .{ .nonblocking = true } }); + + return .{ + .pid = pid, + .err_fd = err_pipe[0], + .stdin = switch (options.stdin) { + .pipe => .{ .handle = stdin_pipe[1], .flags = .{ .nonblocking = false } }, + else => null, + }, + .stdout = switch (options.stdout) { + .pipe => .{ .handle = stdout_pipe[0], .flags = .{ .nonblocking = false } }, + else => null, + }, + .stderr = switch (options.stderr) { + .pipe => .{ .handle = stderr_pipe[0], .flags = .{ .nonblocking = false } }, + else => null, + }, + }; +} + +pub const PipeError = error{ + SystemFdQuotaExceeded, + ProcessFdQuotaExceeded, +} || Io.UnexpectedError; +pub fn pipe2(flags: linux.O) PipeError![2]fd_t { + var fds: [2]fd_t = undefined; + switch (linux.errno(linux.pipe2(&fds, flags))) { + .SUCCESS => return fds, + .INVAL => |err| return errnoBug(err), // Invalid flags + .NFILE => return error.SystemFdQuotaExceeded, + .MFILE => return error.ProcessFdQuotaExceeded, + else => |err| return unexpectedErrno(err), + } +} +fn destroyPipe(ev: *Evented, pipe: [2]fd_t) void { + if (pipe[0] != -1) ev.close(pipe[0]); + if (pipe[0] != pipe[1]) ev.close(pipe[1]); +} + +fn setUpChildIo( + ev: *Evented, + stdio: process.SpawnOptions.StdIo, + pipe_fd: fd_t, + std_fileno: i32, + dev_null_fd: fd_t, +) !void { + switch (stdio) { + .pipe => try dup2(pipe_fd, std_fileno), + .close => ev.close(std_fileno), + .inherit => {}, + .ignore => try dup2(dev_null_fd, std_fileno), + .file => |file| try dup2(file.handle, std_fileno), + } +} + +pub const DupError = error{ + ProcessFdQuotaExceeded, + SystemResources, +} || Io.UnexpectedError || Io.Cancelable; +pub fn dup2(old_fd: fd_t, new_fd: fd_t) DupError!void { + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.dup2(old_fd, new_fd))) { + .SUCCESS => {}, + .BUSY, .INTR => continue, + .INVAL => |err| return errnoBug(err), // invalid parameters + .BADF => |err| return errnoBug(err), // use after free + .MFILE => return error.ProcessFdQuotaExceeded, + .NOMEM => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +/// Errors that can occur between fork() and execv() +const ForkBailError = process.SetCurrentDirError || ChdirError || + process.SpawnError || process.ReplaceError; +/// Child of fork calls this to report an error to the fork parent. Then the +/// child exits. +fn forkBail(ev: *Evented, fd: fd_t, err: ForkBailError) noreturn { + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + ev.writeAll(&cancel_region, fd, @ptrCast(&err)) catch {}; + const exit = if (builtin.single_threaded) linux.exit else linux.exit_group; + exit(1); +} + +fn execv( + arg0_expand: process.ArgExpansion, + file: [*:0]const u8, + child_argv: [*:null]?[*:0]const u8, + env_block: process.Environ.PosixBlock, + PATH: []const u8, +) process.ReplaceError { + const file_slice = std.mem.sliceTo(file, 0); + if (std.mem.findScalar(u8, file_slice, '/') != null) return execvPath(file, child_argv, env_block); + + // Use of PATH_MAX here is valid as the path_buf will be passed + // directly to the operating system in posixExecvPath. + var path_buf: [PATH_MAX]u8 = undefined; + var it = std.mem.tokenizeScalar(u8, PATH, ':'); + var seen_eacces = false; + var err: process.ReplaceError = error.FileNotFound; + + // In case of expanding arg0 we must put it back if we return with an error. + const prev_arg0 = child_argv[0]; + defer switch (arg0_expand) { + .expand => child_argv[0] = prev_arg0, + .no_expand => {}, + }; + + while (it.next()) |search_path| { + const path_len = search_path.len + file_slice.len + 1; + if (path_buf.len < path_len + 1) return error.NameTooLong; + @memcpy(path_buf[0..search_path.len], search_path); + path_buf[search_path.len] = '/'; + @memcpy(path_buf[search_path.len + 1 ..][0..file_slice.len], file_slice); + path_buf[path_len] = 0; + const full_path = path_buf[0..path_len :0].ptr; + switch (arg0_expand) { + .expand => child_argv[0] = full_path, + .no_expand => {}, + } + err = execvPath(full_path, child_argv, env_block); + switch (err) { + error.AccessDenied => seen_eacces = true, + error.FileNotFound, error.NotDir => {}, + else => |e| return e, + } + } + if (seen_eacces) return error.AccessDenied; + return err; +} +/// This function ignores PATH environment variable. +pub fn execvPath( + path: [*:0]const u8, + child_argv: [*:null]const ?[*:0]const u8, + env_block: process.Environ.PosixBlock, +) process.ReplaceError { + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + try cancel_region.await(.nothing); + switch (linux.errno(linux.execve(path, child_argv, env_block.slice.ptr))) { + .FAULT => |err| return errnoBug(err), // Bad pointer parameter. + .@"2BIG" => return error.SystemResources, .MFILE => return error.ProcessFdQuotaExceeded, .NAMETOOLONG => return error.NameTooLong, .NFILE => return error.SystemFdQuotaExceeded, - .NODEV => return error.NoDevice, - .NOENT => return error.FileNotFound, .NOMEM => return error.SystemResources, - .NOSPC => return error.NoSpaceLeft, - .NOTDIR => return error.NotDir, - .PERM => return error.PermissionDenied, - .EXIST => return error.PathAlreadyExists, - .BUSY => return error.DeviceBusy, - .OPNOTSUPP => return error.FileLocksUnsupported, - .AGAIN => return error.WouldBlock, - .TXTBSY => return error.FileBusy, - .NXIO => return error.NoDevice, - else => |err| return posix.unexpectedErrno(err), - } -} - -fn fileClose(userdata: ?*anyopaque, file: Io.File) void { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - const thread: *Thread = .current(); - const iou = &thread.io_uring; - const fiber = thread.currentFiber(); - - getSqe(iou).* = .{ - .opcode = .CLOSE, - .flags = 0, - .ioprio = 0, - .fd = file.handle, - .off = 0, - .addr = 0, - .len = 0, - .rw_flags = 0, - .user_data = @intFromPtr(fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - - el.yield(null, .nothing); - - const completion = fiber.resultPointer(Completion); - switch (errno(completion.result)) { - .SUCCESS => return, - .INTR => unreachable, - .CANCELED => return, - - .BADF => unreachable, // Always a race condition. - else => return, - } -} - -fn pread(userdata: ?*anyopaque, file: Io.File, buffer: []u8, offset: std.posix.off_t) Io.File.PReadError!usize { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - const thread: *Thread = .current(); - const iou = &thread.io_uring; - const fiber = thread.currentFiber(); - try fiber.enterCancelRegion(thread); - - getSqe(iou).* = .{ - .opcode = .READ, - .flags = 0, - .ioprio = 0, - .fd = file.handle, - .off = @bitCast(offset), - .addr = @intFromPtr(buffer.ptr), - .len = @min(buffer.len, 0x7ffff000), - .rw_flags = 0, - .user_data = @intFromPtr(fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - - el.yield(null, .nothing); - fiber.exitCancelRegion(thread); - - const completion = fiber.resultPointer(Completion); - switch (errno(completion.result)) { - .SUCCESS => return @as(u32, @bitCast(completion.result)), - .INTR => unreachable, - .CANCELED => return error.Canceled, - - .INVAL => unreachable, - .FAULT => unreachable, - .NOENT => return error.ProcessNotFound, - .AGAIN => return error.WouldBlock, - .BADF => return error.NotOpenForReading, // Can be a race condition. - .IO => return error.InputOutput, - .ISDIR => return error.IsDir, - .NOBUFS => return error.SystemResources, - .NOMEM => return error.SystemResources, - .NOTCONN => return error.SocketUnconnected, - .CONNRESET => return error.ConnectionResetByPeer, - .TIMEDOUT => return error.Timeout, - .NXIO => return error.Unseekable, - .SPIPE => return error.Unseekable, - .OVERFLOW => return error.Unseekable, - else => |err| return std.posix.unexpectedErrno(err), - } -} - -fn pwrite(userdata: ?*anyopaque, file: Io.File, buffer: []const u8, offset: std.posix.off_t) Io.File.PWriteError!usize { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - const thread: *Thread = .current(); - const iou = &thread.io_uring; - const fiber = thread.currentFiber(); - try fiber.enterCancelRegion(thread); - - getSqe(iou).* = .{ - .opcode = .WRITE, - .flags = 0, - .ioprio = 0, - .fd = file.handle, - .off = @bitCast(offset), - .addr = @intFromPtr(buffer.ptr), - .len = @min(buffer.len, 0x7ffff000), - .rw_flags = 0, - .user_data = @intFromPtr(fiber), - .buf_index = 0, - .personality = 0, - .splice_fd_in = 0, - .addr3 = 0, - .resv = 0, - }; - - el.yield(null, .nothing); - fiber.exitCancelRegion(thread); - - const completion = fiber.resultPointer(Completion); - switch (errno(completion.result)) { - .SUCCESS => return @as(u32, @bitCast(completion.result)), - .INTR => unreachable, - .CANCELED => return error.Canceled, - - .INVAL => return error.InvalidArgument, - .FAULT => unreachable, - .NOENT => return error.ProcessNotFound, - .AGAIN => return error.WouldBlock, - .BADF => return error.NotOpenForWriting, // can be a race condition. - .DESTADDRREQ => unreachable, // `connect` was never called. - .DQUOT => return error.DiskQuota, - .FBIG => return error.FileTooBig, - .IO => return error.InputOutput, - .NOSPC => return error.NoSpaceLeft, .ACCES => return error.AccessDenied, .PERM => return error.PermissionDenied, - .PIPE => return error.BrokenPipe, - .NXIO => return error.Unseekable, - .SPIPE => return error.Unseekable, - .OVERFLOW => return error.Unseekable, - .BUSY => return error.DeviceBusy, - .CONNRESET => return error.ConnectionResetByPeer, - .MSGSIZE => return error.MessageOversize, - else => |err| return std.posix.unexpectedErrno(err), + .INVAL => return error.InvalidExe, + .NOEXEC => return error.InvalidExe, + .IO => return error.FileSystem, + .LOOP => return error.FileSystem, + .ISDIR => return error.IsDir, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .TXTBSY => return error.FileBusy, + .LIBBAD => return error.InvalidExe, + else => |err| return unexpectedErrno(err), } } -fn now(userdata: ?*anyopaque, clockid: std.posix.clockid_t) Io.ClockGetTimeError!Io.Timestamp { - _ = userdata; - const timespec = try std.posix.clock_gettime(clockid); - return @enumFromInt(@as(i128, timespec.sec) * std.time.ns_per_s + timespec.nsec); +fn childWait(userdata: ?*anyopaque, child: *process.Child) process.Child.WaitError!process.Child.Term { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + defer ev.childCleanup(child); + + const pid = child.id.?; + var info: linux.siginfo_t = undefined; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .WAITID, + .flags = 0, + .ioprio = 0, + .fd = pid, + .off = @intFromPtr(&info), + .addr = 0, + .len = @intFromEnum(linux.P.PID), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = linux.W.EXITED | + @as(i32, if (child.request_resource_usage_statistics) linux.W.NOWAIT else 0), + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => { + if (child.request_resource_usage_statistics) while (true) { + try cancel_region.await(.nothing); + var rusage: linux.rusage = undefined; + switch (linux.errno(linux.waitid( + .PID, + pid, + &info, + linux.W.EXITED | linux.W.NOHANG, + &rusage, + ))) { + .SUCCESS => { + child.resource_usage_statistics.rusage = rusage; + break; + }, + .INTR, .CANCELED => continue, + .CHILD => |err| return errnoBug(err), // Double-free. + else => |err| return unexpectedErrno(err), + } + }; + const status: u32 = @bitCast(info.fields.common.second.sigchld.status); + const code: linux.CLD = @enumFromInt(info.code); + return switch (code) { + .EXITED => .{ .exited = @truncate(status) }, + .KILLED, .DUMPED => .{ .signal = @enumFromInt(status) }, + .TRAPPED, .STOPPED => .{ .stopped = status }, + _, .CONTINUED => .{ .unknown = status }, + }; + }, + .INTR, .CANCELED => continue, + .CHILD => |err| return errnoBug(err), // Double-free. + else => |err| return unexpectedErrno(err), + } + } } -fn sleep(userdata: ?*anyopaque, clockid: std.posix.clockid_t, deadline: Io.Deadline) Io.SleepError!void { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - const thread: *Thread = .current(); - const iou = &thread.io_uring; - const fiber = thread.currentFiber(); - try fiber.enterCancelRegion(thread); +fn childKill(userdata: ?*anyopaque, child: *process.Child) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + defer ev.childCleanup(child); - const deadline_nanoseconds: i96 = switch (deadline) { - .duration => |duration| duration.nanoseconds, - .timestamp => |timestamp| @intFromEnum(timestamp), + const pid = child.id.?; + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + while (true) switch (linux.errno(linux.kill(pid, .TERM))) { + .SUCCESS => break, + .INTR => continue, + .PERM => return, + .INVAL => |err| return errnoBug(err) catch {}, + .SRCH => |err| return errnoBug(err) catch {}, + else => |err| return unexpectedErrno(err) catch {}, }; - const timespec: std.os.linux.kernel_timespec = .{ - .sec = @intCast(@divFloor(deadline_nanoseconds, std.time.ns_per_s)), - .nsec = @intCast(@mod(deadline_nanoseconds, std.time.ns_per_s)), + + var info: linux.siginfo_t = undefined; + while (true) { + const thread = cancel_region.awaitIoUring() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + thread.enqueue().* = .{ + .opcode = .WAITID, + .flags = 0, + .ioprio = 0, + .fd = pid, + .off = @intFromPtr(&info), + .addr = 0, + .len = @intFromEnum(linux.P.PID), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = linux.W.EXITED, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .CHILD => |err| return errnoBug(err) catch {}, // Double-free. + else => |err| return unexpectedErrno(err) catch {}, + } + } +} + +fn childCleanup(ev: *Evented, child: *process.Child) void { + if (child.stdin) |*stdin| { + ev.close(stdin.handle); + child.stdin = null; + } + if (child.stdout) |*stdout| { + ev.close(stdout.handle); + child.stdout = null; + } + if (child.stderr) |*stderr| { + ev.close(stderr.handle); + child.stderr = null; + } + child.id = null; +} + +fn progressParentFile(userdata: ?*anyopaque) std.Progress.ParentFileError!File { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const cancel_protection = swapCancelProtection(ev, .blocked); + defer assert(swapCancelProtection(ev, cancel_protection) == .blocked); + ev.scanEnviron() catch |err| switch (err) { + error.Canceled => unreachable, // blocked }; - getSqe(iou).* = .{ + return ev.environ.zig_progress_file; +} + +fn scanEnviron(ev: *Evented) Io.Cancelable!void { + const ev_io = ev.io(); + try ev.environ_mutex.lock(ev_io); + defer ev.environ_mutex.unlock(ev_io); + ev.environ.scan(ev.allocator()); +} + +fn clockResolution(userdata: ?*anyopaque, clock: Io.Clock) Io.Clock.ResolutionError!Io.Duration { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + const clock_id = clockToPosix(clock); + var timespec: linux.timespec = undefined; + return switch (linux.errno(linux.clock_getres(clock_id, ×pec))) { + .SUCCESS => .fromNanoseconds(nanosecondsFromPosix(×pec)), + .INVAL => return error.ClockUnavailable, + else => |err| return unexpectedErrno(err), + }; +} + +fn now(userdata: ?*anyopaque, clock: Io.Clock) Io.Timestamp { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + var tp: linux.timespec = undefined; + switch (linux.errno(linux.clock_gettime(clockToPosix(clock), &tp))) { + .SUCCESS => return timestampFromPosix(&tp), + else => return .zero, + } +} + +fn sleep(userdata: ?*anyopaque, timeout: Io.Timeout) Io.Cancelable!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + + const timespec: linux.kernel_timespec, const clock: Io.Clock, const timeout_flags: u32 = timespec: switch (timeout) { + .none => .{ + .{ + .sec = std.math.maxInt(i64), + .nsec = std.time.ns_per_s - 1, + }, + .awake, + linux.IORING_TIMEOUT_ABS, + }, + .duration => |duration| { + const ns = duration.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + duration.clock, + 0, + }; + }, + .deadline => |deadline| { + const ns = deadline.raw.toNanoseconds(); + break :timespec .{ + .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + deadline.clock, + linux.IORING_TIMEOUT_ABS, + }; + }, + }; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ .opcode = .TIMEOUT, .flags = 0, .ioprio = 0, @@ -1382,116 +4847,1257 @@ fn sleep(userdata: ?*anyopaque, clockid: std.posix.clockid_t, deadline: Io.Deadl .off = 0, .addr = @intFromPtr(×pec), .len = 1, - .rw_flags = @as(u32, switch (deadline) { - .duration => 0, - .timestamp => std.os.linux.IORING_TIMEOUT_ABS, - }) | @as(u32, switch (clockid) { - .REALTIME => std.os.linux.IORING_TIMEOUT_REALTIME, - .MONOTONIC => 0, - .BOOTTIME => std.os.linux.IORING_TIMEOUT_BOOTTIME, - else => return error.UnsupportedClock, + .rw_flags = timeout_flags | @as(u32, switch (clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, }), - .user_data = @intFromPtr(fiber), + .user_data = @intFromPtr(cancel_region.fiber), .buf_index = 0, .personality = 0, .splice_fd_in = 0, .addr3 = 0, .resv = 0, }; - - el.yield(null, .nothing); - fiber.exitCancelRegion(thread); - - const completion = fiber.resultPointer(Completion); - switch (errno(completion.result)) { - .SUCCESS, .TIME => return, - .INTR => unreachable, - .CANCELED => return error.Canceled, - - else => |err| return std.posix.unexpectedErrno(err), + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + // Handles SUCCESS as well as clock not available and unexpected + // errors. The user had a chance to check clock resolution before + // getting here, which would have reported 0, making this a legal + // amount of time to sleep. + else => return, + .INTR, .CANCELED => return error.Canceled, } } -fn mutexLock(userdata: ?*anyopaque, prev_state: Io.Mutex.State, mutex: *Io.Mutex) error{Canceled}!void { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - el.yield(null, .{ .mutex_lock = .{ .prev_state = prev_state, .mutex = mutex } }); -} -fn mutexUnlock(userdata: ?*anyopaque, prev_state: Io.Mutex.State, mutex: *Io.Mutex) void { - var maybe_waiting_fiber: ?*Fiber = @ptrFromInt(@intFromEnum(prev_state)); - while (if (maybe_waiting_fiber) |waiting_fiber| @cmpxchgWeak( - Io.Mutex.State, - &mutex.state, - @enumFromInt(@intFromPtr(waiting_fiber)), - @enumFromInt(@intFromPtr(waiting_fiber.queue_next)), - .release, - .acquire, - ) else @cmpxchgWeak( - Io.Mutex.State, - &mutex.state, - .locked_once, - .unlocked, - .release, - .acquire, - ) orelse return) |next_state| maybe_waiting_fiber = @ptrFromInt(@intFromEnum(next_state)); - maybe_waiting_fiber.?.queue_next = null; - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - el.yield(maybe_waiting_fiber.?, .reschedule); -} - -const ConditionImpl = struct { - tail: *Fiber, - event: union(enum) { - queued, - wake: Io.Condition.Wake, - }, -}; - -fn conditionWait(userdata: ?*anyopaque, cond: *Io.Condition, mutex: *Io.Mutex) Io.Cancelable!void { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - el.yield(null, .{ .condition_wait = .{ .cond = cond, .mutex = mutex } }); - const thread = Thread.current(); - const fiber = thread.currentFiber(); - const cond_impl = fiber.resultPointer(ConditionImpl); - try mutex.lock(el.io()); - switch (cond_impl.event) { - .queued => {}, - .wake => |wake| if (fiber.queue_next) |next_fiber| switch (wake) { - .one => if (@cmpxchgStrong( - ?*Fiber, - @as(*?*Fiber, @ptrCast(&cond.state)), - null, - next_fiber, - .release, - .acquire, - )) |old_fiber| { - const old_cond_impl = old_fiber.?.resultPointer(ConditionImpl); - assert(old_cond_impl.tail.queue_next == null); - old_cond_impl.tail.queue_next = next_fiber; - old_cond_impl.tail = cond_impl.tail; - }, - .all => el.schedule(thread, .{ .head = next_fiber, .tail = cond_impl.tail }), - }, +fn random(userdata: ?*anyopaque, buffer: []u8) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var thread: *Thread = .current(); + if (!thread.csprng.isInitialized()) { + @branchHint(.unlikely); + var seed: [Csprng.seed_len]u8 = undefined; + { + const ev_io = ev.io(); + ev.csprng_mutex.lockUncancelable(ev_io); + defer ev.csprng_mutex.unlock(ev_io); + if (!ev.csprng.isInitialized()) { + @branchHint(.unlikely); + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + ev.urandomReadAll(&cancel_region, &seed) catch |err| switch (err) { + error.Canceled => unreachable, // blocked + else => fallbackSeed(ev, &seed), + }; + ev.csprng.rng = .init(seed); + thread = .current(); + } + ev.csprng.rng.fill(&seed); + } + if (!thread.csprng.isInitialized()) { + @branchHint(.likely); + thread.csprng.rng = .init(seed); + } else thread.csprng.rng.addEntropy(&seed); } - fiber.queue_next = null; + thread.csprng.rng.fill(buffer); } -fn conditionWake(userdata: ?*anyopaque, cond: *Io.Condition, wake: Io.Condition.Wake) void { - const el: *EventLoop = @ptrCast(@alignCast(userdata)); - const waiting_fiber = @atomicRmw(?*Fiber, @as(*?*Fiber, @ptrCast(&cond.state)), .Xchg, null, .acquire) orelse return; - waiting_fiber.resultPointer(ConditionImpl).event = .{ .wake = wake }; - el.yield(waiting_fiber, .reschedule); -} - -fn errno(signed: i32) std.os.linux.E { - return .init(@bitCast(@as(isize, signed))); -} - -fn getSqe(iou: *IoUring) *std.os.linux.io_uring_sqe { - while (true) return iou.get_sqe() catch { - _ = iou.submit_and_wait(0) catch |err| switch (err) { - error.SignalInterrupt => std.log.warn("submit_and_wait failed with SignalInterrupt", .{}), - else => |e| @panic(@errorName(e)), - }; - continue; +fn randomSecure(userdata: ?*anyopaque, buffer: []u8) Io.RandomSecureError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + if (buffer.len == 0) return; + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + ev.urandomReadAll(&cancel_region, buffer) catch |err| switch (err) { + error.Canceled => return error.Canceled, + else => return error.EntropyUnavailable, }; } + +fn netListenIpUnavailable( + userdata: ?*anyopaque, + address: net.IpAddress, + options: net.IpAddress.ListenOptions, +) net.IpAddress.ListenError!net.Server { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netAcceptUnavailable( + userdata: ?*anyopaque, + listen_handle: net.Socket.Handle, +) net.Server.AcceptError!net.Stream { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = listen_handle; + return error.NetworkDown; +} + +fn netBindIp( + userdata: ?*anyopaque, + address: *const net.IpAddress, + options: net.IpAddress.BindOptions, +) net.IpAddress.BindError!net.Socket { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const family = posixAddressFamily(address); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + const socket_fd = try ev.socket(&cancel_region, family, options); + errdefer ev.close(socket_fd); + var storage: PosixAddress = undefined; + var addr_len = addressToPosix(address, &storage); + try ev.bind(&cancel_region, socket_fd, &storage.any, addr_len); + try ev.getsockname(&cancel_region, socket_fd, &storage.any, &addr_len); + return .{ + .handle = socket_fd, + .address = addressFromPosix(&storage), + }; +} + +fn netBindIpUnavailable( + userdata: ?*anyopaque, + address: *const net.IpAddress, + options: net.IpAddress.BindOptions, +) net.IpAddress.BindError!net.Socket { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netConnectIpUnavailable( + userdata: ?*anyopaque, + address: *const net.IpAddress, + options: net.IpAddress.ConnectOptions, +) net.IpAddress.ConnectError!net.Stream { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.NetworkDown; +} + +fn netListenUnixUnavailable( + userdata: ?*anyopaque, + address: *const net.UnixAddress, + options: net.UnixAddress.ListenOptions, +) net.UnixAddress.ListenError!net.Socket.Handle { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + _ = options; + return error.AddressFamilyUnsupported; +} + +fn netConnectUnixUnavailable( + userdata: ?*anyopaque, + address: *const net.UnixAddress, +) net.UnixAddress.ConnectError!net.Socket.Handle { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = address; + return error.AddressFamilyUnsupported; +} + +fn netSocketCreatePairUnavailable( + userdata: ?*anyopaque, + options: net.Socket.CreatePairOptions, +) net.Socket.CreatePairError![2]net.Socket { + _ = userdata; + _ = options; + return error.OperationUnsupported; +} + +fn netSendUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + messages: []net.OutgoingMessage, + flags: net.SendFlags, +) struct { ?net.Socket.SendError, usize } { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = messages; + _ = flags; + return .{ error.NetworkDown, 0 }; +} + +fn netReceive( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + message_buffer: []net.IncomingMessage, + data_buffer: []u8, + flags: net.ReceiveFlags, + timeout: Io.Timeout, +) struct { ?net.Socket.ReceiveTimeoutError, usize } { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + const ev_io = ev.io(); + + var message_i: usize = 0; + var data_i: usize = 0; + + const deadline: ?struct { + raw: Io.Timestamp, + timespec: linux.kernel_timespec, + clock: Io.Clock, + } = if (timeout.toTimestamp(ev_io)) |deadline| deadline: { + const ns = deadline.raw.toNanoseconds(); + break :deadline .{ + .raw = deadline.raw, + .timespec = .{ + .sec = @intCast(@divFloor(ns, std.time.ns_per_s)), + .nsec = @intCast(@mod(ns, std.time.ns_per_s)), + }, + .clock = deadline.clock, + }; + } else null; + + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + if (message_buffer.len - message_i == 0) return .{ null, message_i }; + const message = &message_buffer[message_i]; + const remaining_data_buffer = data_buffer[data_i..]; + var storage: PosixAddress = undefined; + var iov: iovec = .{ .base = remaining_data_buffer.ptr, .len = remaining_data_buffer.len }; + var msg: linux.msghdr = .{ + .name = &storage.any, + .namelen = @sizeOf(PosixAddress), + .iov = (&iov)[0..1], + .iovlen = 1, + .control = message.control.ptr, + .controllen = @intCast(message.control.len), + .flags = undefined, + }; + + const thread = cancel_region.awaitIoUring() catch |err| return .{ err, message_i }; + thread.enqueue().* = .{ + .opcode = .RECVMSG, + .flags = if (deadline) |_| linux.IOSQE_IO_LINK else 0, + .ioprio = 0, + .fd = handle, + .off = 0, + .addr = @intFromPtr(&msg), + .len = 0, + .rw_flags = linux.MSG.NOSIGNAL | + @as(u32, if (flags.oob) linux.MSG.OOB else 0) | + @as(u32, if (flags.peek) linux.MSG.PEEK else 0) | + @as(u32, if (flags.trunc) linux.MSG.TRUNC else 0), + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + if (deadline) |*deadline_ptr| thread.enqueue().* = .{ + .opcode = .LINK_TIMEOUT, + .flags = linux.IOSQE_CQE_SKIP_SUCCESS, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = @intFromPtr(&deadline_ptr.timespec), + .len = 1, + .rw_flags = linux.IORING_TIMEOUT_ABS | @as(u32, switch (deadline_ptr.clock) { + .real => linux.IORING_TIMEOUT_REALTIME, + else => 0, + .boot => linux.IORING_TIMEOUT_BOOTTIME, + }), + .user_data = @intFromEnum(Completion.UserData.wakeup), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => { + const data = remaining_data_buffer[0..@intCast(completion.result)]; + data_i += data.len; + message.* = .{ + .from = addressFromPosix(&storage), + .data = data, + .control = if (msg.control) |ptr| @as([*]u8, @ptrCast(ptr))[0..msg.controllen] else message.control, + .flags = .{ + .eor = (msg.flags & linux.MSG.EOR) != 0, + .trunc = (msg.flags & linux.MSG.TRUNC) != 0, + .ctrunc = (msg.flags & linux.MSG.CTRUNC) != 0, + .oob = (msg.flags & linux.MSG.OOB) != 0, + .errqueue = if (@hasDecl(linux.MSG, "ERRQUEUE")) (msg.flags & linux.MSG.ERRQUEUE) != 0 else false, + }, + }; + message_i += 1; + continue; + }, + .AGAIN => unreachable, + .INTR, .CANCELED => { + if (deadline) |d| { + if (now(ev, d.clock).nanoseconds >= d.raw.nanoseconds) return .{ error.Timeout, message_i }; + } + continue; + }, + + .BADF => |err| return .{ errnoBug(err), message_i }, + .NFILE => return .{ error.SystemFdQuotaExceeded, message_i }, + .MFILE => return .{ error.ProcessFdQuotaExceeded, message_i }, + .FAULT => |err| return .{ errnoBug(err), message_i }, + .INVAL => |err| return .{ errnoBug(err), message_i }, + .NOBUFS => return .{ error.SystemResources, message_i }, + .NOMEM => return .{ error.SystemResources, message_i }, + .NOTCONN => return .{ error.SocketUnconnected, message_i }, + .NOTSOCK => |err| return .{ errnoBug(err), message_i }, + .MSGSIZE => return .{ error.MessageOversize, message_i }, + .PIPE => return .{ error.SocketUnconnected, message_i }, + .OPNOTSUPP => |err| return .{ errnoBug(err), message_i }, + .CONNRESET => return .{ error.ConnectionResetByPeer, message_i }, + .NETDOWN => return .{ error.NetworkDown, message_i }, + else => |err| return .{ unexpectedErrno(err), message_i }, + } + } +} + +fn netReceiveUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + message_buffer: []net.IncomingMessage, + data_buffer: []u8, + flags: net.ReceiveFlags, + timeout: Io.Timeout, +) struct { ?net.Socket.ReceiveTimeoutError, usize } { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = message_buffer; + _ = data_buffer; + _ = flags; + _ = timeout; + return .{ error.NetworkDown, 0 }; +} + +fn netReadUnavailable( + userdata: ?*anyopaque, + fd: net.Socket.Handle, + data: [][]u8, +) net.Stream.Reader.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = fd; + _ = data; + return error.NetworkDown; +} + +fn netWriteUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + header: []const u8, + data: []const []const u8, + splat: usize, +) net.Stream.Writer.Error!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = header; + _ = data; + _ = splat; + return error.NetworkDown; +} + +fn netWriteFileUnavailable( + userdata: ?*anyopaque, + socket_handle: net.Socket.Handle, + header: []const u8, + file_reader: *File.Reader, + limit: Io.Limit, +) net.Stream.Writer.WriteFileError!usize { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = socket_handle; + _ = header; + _ = file_reader; + _ = limit; + return error.NetworkDown; +} + +fn netClose(userdata: ?*anyopaque, handles: []const net.Socket.Handle) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + for (handles) |handle| ev.close(handle); +} + +fn netCloseUnavailable(userdata: ?*anyopaque, handles: []const net.Socket.Handle) void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handles; + unreachable; // How you gonna close something that was impossible to open? +} + +fn netShutdown( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + how: net.ShutdownHow, +) net.ShutdownError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + var cancel_region: CancelRegion = .init(); + defer cancel_region.deinit(); + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .SHUTDOWN, + .flags = 0, + .ioprio = 0, + .fd = handle, + .off = 0, + .addr = 0, + .len = switch (how) { + .recv => linux.SHUT.RD, + .send => linux.SHUT.WR, + .both => linux.SHUT.RDWR, + }, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .BADF, .NOTSOCK, .INVAL => |err| return errnoBug(err), + .NOTCONN => return error.SocketUnconnected, + .NOBUFS => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn netShutdownUnavailable( + userdata: ?*anyopaque, + handle: net.Socket.Handle, + how: net.ShutdownHow, +) net.ShutdownError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = handle; + _ = how; + unreachable; // How you gonna shutdown something that was impossible to open? +} + +fn netInterfaceNameResolveUnavailable( + userdata: ?*anyopaque, + name: *const net.Interface.Name, +) net.Interface.Name.ResolveError!net.Interface { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = name; + return error.InterfaceNotFound; +} + +fn netInterfaceNameUnavailable( + userdata: ?*anyopaque, + interface: net.Interface, +) net.Interface.NameError!net.Interface.Name { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = ev; + _ = interface; + return error.Unexpected; +} + +fn netLookupUnavailable( + userdata: ?*anyopaque, + host_name: net.HostName, + resolved: *Io.Queue(net.HostName.LookupResult), + options: net.HostName.LookupOptions, +) net.HostName.LookupError!void { + const ev: *Evented = @ptrCast(@alignCast(userdata)); + _ = host_name; + _ = options; + resolved.close(ev.io()); + return error.NetworkDown; +} + +fn bind( + ev: *Evented, + cancel_region: *CancelRegion, + socket_fd: fd_t, + addr: *const linux.sockaddr, + addr_len: linux.socklen_t, +) !void { + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .BIND, + .flags = 0, + .ioprio = 0, + .fd = socket_fd, + .off = addr_len, + .addr = @intFromPtr(addr), + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ADDRINUSE => return error.AddressInUse, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOTSOCK => |err| return errnoBug(err), // invalid `sockfd` + .AFNOSUPPORT => return error.AddressFamilyUnsupported, + .ADDRNOTAVAIL => return error.AddressUnavailable, + .FAULT => |err| return errnoBug(err), // invalid `addr` pointer + .NOMEM => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn close(ev: *Evented, fd: fd_t) void { + var cancel_region: CancelRegion = .initBlocked(); + defer cancel_region.deinit(); + while (true) { + const thread = cancel_region.awaitIoUring() catch |err| switch (err) { + error.Canceled => unreachable, // blocked + }; + thread.enqueue().* = .{ + .opcode = .CLOSE, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .BADF => unreachable, // Always a race condition. + else => break, + } + } +} + +fn fchmodat( + ev: *Evented, + cancel_region: *CancelRegion, + dir: fd_t, + path: [*:0]const u8, + mode: linux.mode_t, + flags: u32, +) Dir.SetFilePermissionsError!void { + _ = ev; + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.fchmodat2(dir, path, mode, flags))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.FileNotFound, + .OPNOTSUPP => return error.OperationUnsupported, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + } + } +} + +fn fchownat( + ev: *Evented, + cancel_region: *CancelRegion, + dir: fd_t, + path: [*:0]const u8, + owner: linux.uid_t, + group: linux.gid_t, + flags: u32, +) File.SetOwnerError!void { + _ = ev; + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.fchownat(dir, path, owner, group, flags))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // likely fd refers to directory opened without `Dir.OpenOptions.iterate` + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .IO => return error.InputOutput, + .LOOP => return error.SymLinkLoop, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.FileNotFound, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + } + } +} + +fn flock( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + op: File.Lock, + blocking: enum { blocking, nonblocking }, +) (File.LockError || error{WouldBlock})!void { + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.flock(fd, LOCK.NB | @as(i32, switch (op) { + .none => LOCK.UN, + .shared => LOCK.SH, + .exclusive => LOCK.EX, + })))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOLCK => return error.SystemResources, + .AGAIN => { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .NOP, + .flags = 0, + .ioprio = 0, + .fd = 0, + .off = 0, + .addr = 0, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS, .INTR, .CANCELED => {}, + else => unreachable, + } + switch (blocking) { + .blocking => continue, + .nonblocking => return error.WouldBlock, + } + }, + .OPNOTSUPP => return error.FileLocksUnsupported, + else => |err| return unexpectedErrno(err), + } + } +} + +fn getsockname( + ev: *Evented, + cancel_region: *CancelRegion, + socket_fd: fd_t, + addr: *linux.sockaddr, + addr_len: *linux.socklen_t, +) !void { + _ = ev; + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.getsockname(socket_fd, addr, addr_len))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), // invalid parameters + .NOTSOCK => |err| return errnoBug(err), // always a race condition + .NOBUFS => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn linkat( + ev: *Evented, + cancel_region: *CancelRegion, + old_dir: fd_t, + old_path: [*:0]const u8, + new_dir: fd_t, + new_path: [*:0]const u8, + flags: u32, +) File.HardLinkError!void { + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .LINKAT, + .flags = 0, + .ioprio = 0, + .fd = old_dir, + .off = @intFromPtr(new_path), + .addr = @intFromPtr(old_path), + .len = @bitCast(new_dir), + .rw_flags = flags, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .DQUOT => return error.DiskQuota, + .EXIST => return error.PathAlreadyExists, + .IO => return error.HardwareFailure, + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + .XDEV => return error.CrossDevice, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn lseek( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + offset: u64, + whence: u32, +) File.SeekError!void { + _ = ev; + while (true) { + try cancel_region.await(.nothing); + var result: u64 = undefined; + switch (linux.errno(switch (@sizeOf(usize)) { + else => comptime unreachable, + 4 => linux.llseek(fd, offset, &result, whence), + 8 => linux.lseek(fd, @bitCast(offset), whence), + })) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .INVAL => return error.Unseekable, + .OVERFLOW => return error.Unseekable, + .SPIPE => return error.Unseekable, + .NXIO => return error.Unseekable, + else => |err| return unexpectedErrno(err), + } + } +} + +fn openat( + ev: *Evented, + cancel_region: *CancelRegion, + dir: fd_t, + path: [*:0]const u8, + flags: linux.O, + mode: linux.mode_t, +) File.OpenError!fd_t { + var mut_flags = flags; + if (@hasField(linux.O, "LARGEFILE")) mut_flags.LARGEFILE = true; + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .OPENAT, + .flags = 0, + .ioprio = 0, + .fd = dir, + .off = 0, + .addr = @intFromPtr(path), + .len = mode, + .rw_flags = @bitCast(mut_flags), + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => return completion.result, + .INTR, .CANCELED => continue, + .FAULT => |err| return errnoBug(err), + .INVAL => return error.BadPathName, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .ACCES => return error.AccessDenied, + .FBIG => return error.FileTooBig, + .OVERFLOW => return error.FileTooBig, + .ISDIR => return error.IsDir, + .LOOP => return error.SymLinkLoop, + .MFILE => return error.ProcessFdQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NFILE => return error.SystemFdQuotaExceeded, + .NODEV => return error.NoDevice, + .NOENT => return error.FileNotFound, + .SRCH => return error.FileNotFound, // Linux when opening procfs files. + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .NOTDIR => return error.NotDir, + .PERM => return error.PermissionDenied, + .EXIST => return error.PathAlreadyExists, + .BUSY => return error.DeviceBusy, + .OPNOTSUPP => return error.FileLocksUnsupported, + .AGAIN => return error.WouldBlock, + .TXTBSY => return error.FileBusy, + .NXIO => return error.NoDevice, + .ILSEQ => return error.BadPathName, + else => |err| return unexpectedErrno(err), + } + } +} + +fn preadv( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + iov: []const iovec, + offset: ?u64, +) File.Reader.Error!usize { + if (iov.len == 0) return 0; + const gather = iov.len > 1 or iov[0].len > 0xfffff000; + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = if (gather) .READV else .READ, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = offset orelse std.math.maxInt(u64), + .addr = if (gather) @intFromPtr(iov.ptr) else @intFromPtr(iov[0].base), + .len = @intCast(if (gather) iov.len else iov[0].len), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => return @as(u32, @bitCast(completion.result)), + .INTR, .CANCELED => continue, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .AGAIN => return error.WouldBlock, + .BADF => |err| return errnoBug(err), // File descriptor used after closed + .IO => return error.InputOutput, + .ISDIR => return error.IsDir, + .NOBUFS => return error.SystemResources, + .NOMEM => return error.SystemResources, + .NOTCONN => return error.SocketUnconnected, + .CONNRESET => return error.ConnectionResetByPeer, + else => |err| return unexpectedErrno(err), + } + } +} + +fn pwritev( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + iov: []const iovec_const, + offset: ?u64, +) File.Writer.Error!usize { + if (iov.len == 0) return 0; + const scatter = iov.len > 1 or iov[0].len > 0xfffff000; + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = if (scatter) .WRITEV else .WRITE, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = offset orelse std.math.maxInt(u64), + .addr = if (scatter) @intFromPtr(iov.ptr) else @intFromPtr(iov[0].base), + .len = @intCast(if (scatter) iov.len else iov[0].len), + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => return @as(u32, @bitCast(completion.result)), + .INTR, .CANCELED => continue, + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + .AGAIN => return error.WouldBlock, + .BADF => return error.NotOpenForWriting, // Can be a race condition. + .DESTADDRREQ => |err| return errnoBug(err), // `connect` was never called. + .DQUOT => return error.DiskQuota, + .FBIG => return error.FileTooBig, + .IO => return error.InputOutput, + .NOSPC => return error.NoSpaceLeft, + .PERM => return error.PermissionDenied, + .PIPE => return error.BrokenPipe, + .CONNRESET => |err| return errnoBug(err), // Not a socket handle. + .BUSY => return error.DeviceBusy, + else => |err| return unexpectedErrno(err), + } + } +} + +fn readAll( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + buffer: []u8, +) (File.Reader.Error || error{EndOfStream})!void { + var index: usize = 0; + while (buffer.len - index != 0) { + const len = try ev.preadv(cancel_region, fd, &.{ + .{ .base = buffer[index..].ptr, .len = buffer.len - index }, + }, null); + if (len == 0) return error.EndOfStream; + index += len; + } +} + +fn realPath( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + out_buffer: []u8, +) File.RealPathError!usize { + _ = ev; + var procfs_buf: [std.fmt.count("/proc/self/fd/{d}\x00", .{std.math.minInt(fd_t)})]u8 = undefined; + const proc_path = std.fmt.bufPrintSentinel(&procfs_buf, "/proc/self/fd/{d}", .{fd}, 0) catch + unreachable; + while (true) { + try cancel_region.await(.nothing); + const rc = linux.readlink(proc_path, out_buffer.ptr, out_buffer.len); + switch (linux.errno(rc)) { + .SUCCESS => return rc, + .INTR => continue, + .ACCES => return error.AccessDenied, + .FAULT => |err| return errnoBug(err), + .IO => return error.FileSystem, + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOMEM => return error.SystemResources, + .NOTDIR => return error.NotDir, + .ILSEQ => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn renameat( + ev: *Evented, + cancel_region: *CancelRegion, + old_dir: fd_t, + old_path: [*:0]const u8, + new_dir: fd_t, + new_path: [*:0]const u8, + flags: linux.RENAME, +) Dir.RenameError!void { + while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .RENAMEAT, + .flags = 0, + .ioprio = 0, + .fd = old_dir, + .off = @intFromPtr(new_path), + .addr = @intFromPtr(old_path), + .len = @bitCast(new_dir), + .rw_flags = @bitCast(flags), + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .BUSY => return error.FileBusy, + .DQUOT => return error.DiskQuota, + .ISDIR => return error.IsDir, + .IO => return error.HardwareFailure, + .LOOP => return error.SymLinkLoop, + .MLINK => return error.LinkQuotaExceeded, + .NAMETOOLONG => return error.NameTooLong, + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + .NOSPC => return error.NoSpaceLeft, + .EXIST => return error.DirNotEmpty, + .NOTEMPTY => return error.DirNotEmpty, + .ROFS => return error.ReadOnlyFileSystem, + .XDEV => return error.CrossDevice, + .ILSEQ => return error.BadPathName, + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn setsockopt( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + level: i32, + opt_name: u32, + option: u32, +) !void { + const o: []const u8 = @ptrCast(&option); + while (true) { + const off: extern struct { + cmd_op: linux.IO_URING_SOCKET_OP, + pad: u32, + } align(@alignOf(u64)) = .{ + .cmd_op = .SETSOCKOPT, + .pad = 0, + }; + const addr: extern struct { level: i32, opt_name: u32 } align(@alignOf(u64)) = .{ + .level = level, + .opt_name = opt_name, + }; + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .URING_CMD, + .flags = 0, + .ioprio = 0, + .fd = fd, + .off = @as(*const u64, @ptrCast(&off)).*, + .addr = @as(*const u64, @ptrCast(&addr)).*, + .len = 0, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = @intCast(o.len), + .addr3 = @intFromPtr(o.ptr), + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return, + .INTR, .CANCELED => continue, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .NOTSOCK => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .FAULT => |err| return errnoBug(err), + else => |err| return unexpectedErrno(err), + } + } +} + +fn socket( + ev: *Evented, + cancel_region: *CancelRegion, + family: linux.sa_family_t, + options: net.IpAddress.BindOptions, +) error{ + AddressFamilyUnsupported, + ProtocolUnsupportedBySystem, + ProcessFdQuotaExceeded, + SystemFdQuotaExceeded, + SystemResources, + ProtocolUnsupportedByAddressFamily, + SocketModeUnsupported, + OptionUnsupported, + Unexpected, + Canceled, +}!fd_t { + const mode = posixSocketMode(options.mode); + const protocol = posixProtocol(options.protocol); + const socket_fd = while (true) { + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .SOCKET, + .flags = 0, + .ioprio = 0, + .fd = family, + .off = mode | linux.SOCK.CLOEXEC, + .addr = 0, + .len = protocol, + .rw_flags = 0, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + const completion = cancel_region.completion(); + switch (completion.errno()) { + .SUCCESS => break completion.result, + .INTR, .CANCELED => continue, + .AFNOSUPPORT => return error.AddressFamilyUnsupported, + .INVAL => return error.ProtocolUnsupportedBySystem, + .MFILE => return error.ProcessFdQuotaExceeded, + .NFILE => return error.SystemFdQuotaExceeded, + .NOBUFS => return error.SystemResources, + .NOMEM => return error.SystemResources, + .PROTONOSUPPORT => return error.ProtocolUnsupportedByAddressFamily, + .PROTOTYPE => return error.SocketModeUnsupported, + else => |err| return unexpectedErrno(err), + } + }; + errdefer ev.close(socket_fd); + + if (options.ip6_only) { + if (linux.IPV6 == void) return error.OptionUnsupported; + try ev.setsockopt(cancel_region, socket_fd, linux.IPPROTO.IPV6, linux.IPV6.V6ONLY, 0); + } + + return socket_fd; +} + +fn stat(ev: *Evented, cancel_region: *CancelRegion, fd: fd_t) Dir.StatError!Dir.Stat { + return ev.statx(cancel_region, fd, "", linux.AT.EMPTY_PATH) catch |err| switch (err) { + error.BadPathName, error.NameTooLong => unreachable, // path is empty + error.AccessDenied => return errnoBug(.ACCES), + error.SymLinkLoop => return errnoBug(.LOOP), + error.FileNotFound => return errnoBug(.NOENT), + error.NotDir => return errnoBug(.NOTDIR), + else => |e| return e, + }; +} + +fn statx( + ev: *Evented, + cancel_region: *CancelRegion, + dir: fd_t, + path: [*:0]const u8, + flags: u32, +) (Dir.StatError || Dir.PathNameError || error{ FileNotFound, NotDir, SymLinkLoop })!Dir.Stat { + while (true) { + var statx_buf = std.mem.zeroes(linux.Statx); + const thread = try cancel_region.awaitIoUring(); + thread.enqueue().* = .{ + .opcode = .STATX, + .flags = 0, + .ioprio = 0, + .fd = dir, + .off = @intFromPtr(&statx_buf), + .addr = @intFromPtr(path), + .len = @bitCast(linux_statx_request), + .rw_flags = flags, + .user_data = @intFromPtr(cancel_region.fiber), + .buf_index = 0, + .personality = 0, + .splice_fd_in = 0, + .addr3 = 0, + .resv = 0, + }; + ev.yield(null, .nothing); + switch (cancel_region.errno()) { + .SUCCESS => return statFromLinux(&statx_buf), + .INTR, .CANCELED => continue, + .ACCES => return error.AccessDenied, + .BADF => |err| return errnoBug(err), // File descriptor used after closed. + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .LOOP => return error.SymLinkLoop, + .NAMETOOLONG => |err| return errnoBug(err), + .NOENT => return error.FileNotFound, + .NOTDIR => return error.NotDir, + .NOMEM => return error.SystemResources, + else => |err| return unexpectedErrno(err), + } + } +} + +fn urandomReadAll( + ev: *Evented, + cancel_region: *CancelRegion, + buffer: []u8, +) (File.OpenError || File.Reader.Error || error{EndOfStream})!void { + return ev.readAll(cancel_region, try ev.random_fd.open(ev, cancel_region, "/dev/urandom", .{ + .ACCMODE = .RDONLY, + .CLOEXEC = true, + }), buffer); +} + +fn utimensat( + ev: *Evented, + cancel_region: *CancelRegion, + dir: fd_t, + path: [*:0]const u8, + times: ?*const [2]linux.timespec, + flags: u32, +) File.SetTimestampsError!void { + _ = ev; + while (true) { + try cancel_region.await(.nothing); + switch (linux.errno(linux.utimensat(dir, path, times, flags))) { + .SUCCESS => return, + .INTR => continue, + .BADF => |err| return errnoBug(err), // always a race condition + .FAULT => |err| return errnoBug(err), + .INVAL => |err| return errnoBug(err), + .ACCES => return error.AccessDenied, + .PERM => return error.PermissionDenied, + .ROFS => return error.ReadOnlyFileSystem, + else => |err| return unexpectedErrno(err), + } + } +} + +fn writeAll( + ev: *Evented, + cancel_region: *CancelRegion, + fd: fd_t, + buffer: []const u8, +) (File.Writer.Error || error{EndOfStream})!void { + var index: usize = 0; + while (buffer.len - index != 0) { + const len = try ev.pwritev(cancel_region, fd, &.{ + .{ .base = buffer[index..].ptr, .len = buffer.len - index }, + }, null); + if (len == 0) return error.EndOfStream; + index += len; + } +} + +test { + _ = Fiber.CancelProtection; +} diff --git a/lib/std/Io/Threaded.zig b/lib/std/Io/Threaded.zig index 1f450d1dac..dac4ada39d 100644 --- a/lib/std/Io/Threaded.zig +++ b/lib/std/Io/Threaded.zig @@ -78,7 +78,7 @@ null_file: NullFile = .{}, random_file: RandomFile = .{}, pipe_file: PipeFile = .{}, -csprng: Csprng = .{}, +csprng: Csprng = .uninitialized, system_basic_information: SystemBasicInformation = .{}, @@ -88,10 +88,12 @@ const SystemBasicInformation = if (!is_windows) struct {} else struct { }; pub const Csprng = struct { - rng: std.Random.DefaultCsprng = .{ + rng: std.Random.DefaultCsprng, + + pub const uninitialized: Csprng = .{ .rng = .{ .state = undefined, .offset = std.math.maxInt(usize), - }, + } }; pub const seed_len = std.Random.DefaultCsprng.secret_seed_length; @@ -120,7 +122,7 @@ pub const Argv0 = switch (native_os) { }, }; -const Environ = struct { +pub const Environ = struct { /// Unmodified data directly from the OS. process_environ: process.Environ, /// Protected by `mutex`. Determines whether the other fields have been @@ -157,6 +159,127 @@ const Environ = struct { HOME: ?[:0]const u8 = null, }, }; + + pub fn scan(environ: *Environ, allocator: std.mem.Allocator) void { + if (environ.initialized) return; + environ.initialized = true; + + if (is_windows) { + // This value expires with any call that modifies the environment, + // which is outside of this Io implementation's control, so references + // must be short-lived. + const peb = windows.peb(); + assert(windows.ntdll.RtlEnterCriticalSection(peb.FastPebLock) == .SUCCESS); + defer assert(windows.ntdll.RtlLeaveCriticalSection(peb.FastPebLock) == .SUCCESS); + const ptr = peb.ProcessParameters.Environment; + + var i: usize = 0; + while (ptr[i] != 0) { + // There are some special environment variables that start with =, + // so we need a special case to not treat = as a key/value separator + // if it's the first character. + // https://devblogs.microsoft.com/oldnewthing/20100506-00/?p=14133 + const key_start = i; + if (ptr[i] == '=') i += 1; + while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {} + const key_w = ptr[key_start..i]; + + const value_start = i + 1; + while (ptr[i] != 0) : (i += 1) {} // skip over '=' and value + const value_w = ptr[value_start..i]; + i += 1; // skip over null byte + + if (windows.eqlIgnoreCaseWtf16(key_w, &.{ 'N', 'O', '_', 'C', 'O', 'L', 'O', 'R' })) { + environ.exist.NO_COLOR = true; + } else if (windows.eqlIgnoreCaseWtf16(key_w, &.{ 'C', 'L', 'I', 'C', 'O', 'L', 'O', 'R', '_', 'F', 'O', 'R', 'C', 'E' })) { + environ.exist.CLICOLOR_FORCE = true; + } else if (windows.eqlIgnoreCaseWtf16(key_w, &.{ 'Z', 'I', 'G', '_', 'P', 'R', 'O', 'G', 'R', 'E', 'S', 'S' })) { + environ.zig_progress_file = file: { + var value_buf: [std.fmt.count("{d}", .{std.math.maxInt(usize)})]u8 = undefined; + const len = std.unicode.calcWtf8Len(value_w); + if (len > value_buf.len) break :file error.UnrecognizedFormat; + assert(std.unicode.wtf16LeToWtf8(&value_buf, value_w) == len); + break :file .{ + .handle = @ptrFromInt(std.fmt.parseInt(usize, value_buf[0..len], 10) catch + break :file error.UnrecognizedFormat), + .flags = .{ .nonblocking = true }, + }; + }; + } + comptime assert(@sizeOf(String) == 0); + } + } else if (native_os == .wasi and !builtin.link_libc) { + var environ_size: usize = undefined; + var environ_buf_size: usize = undefined; + + switch (std.os.wasi.environ_sizes_get(&environ_size, &environ_buf_size)) { + .SUCCESS => {}, + else => |err| { + environ.err = posix.unexpectedErrno(err); + return; + }, + } + if (environ_size == 0) return; + + const wasi_environ = allocator.alloc([*:0]u8, environ_size) catch |err| { + environ.err = err; + return; + }; + defer allocator.free(wasi_environ); + const wasi_environ_buf = allocator.alloc(u8, environ_buf_size) catch |err| { + environ.err = err; + return; + }; + defer allocator.free(wasi_environ_buf); + + switch (std.os.wasi.environ_get(wasi_environ.ptr, wasi_environ_buf.ptr)) { + .SUCCESS => {}, + else => |err| { + environ.err = posix.unexpectedErrno(err); + return; + }, + } + + for (wasi_environ) |env| { + const pair = std.mem.sliceTo(env, 0); + var parts = std.mem.splitScalar(u8, pair, '='); + const key = parts.first(); + if (std.mem.eql(u8, key, "NO_COLOR")) { + environ.exist.NO_COLOR = true; + } else if (std.mem.eql(u8, key, "CLICOLOR_FORCE")) { + environ.exist.CLICOLOR_FORCE = true; + } + comptime assert(@sizeOf(String) == 0); + } + } else { + for (environ.process_environ.block.slice) |opt_entry| { + const entry = opt_entry.?; + var entry_i: usize = 0; + while (entry[entry_i] != 0 and entry[entry_i] != '=') : (entry_i += 1) {} + const key = entry[0..entry_i]; + + var end_i: usize = entry_i; + while (entry[end_i] != 0) : (end_i += 1) {} + const value = entry[entry_i + 1 .. end_i :0]; + + if (std.mem.eql(u8, key, "NO_COLOR")) { + environ.exist.NO_COLOR = true; + } else if (std.mem.eql(u8, key, "CLICOLOR_FORCE")) { + environ.exist.CLICOLOR_FORCE = true; + } else if (std.mem.eql(u8, key, "ZIG_PROGRESS")) { + environ.zig_progress_file = file: { + break :file .{ + .handle = std.fmt.parseInt(u31, value, 10) catch + break :file error.UnrecognizedFormat, + .flags = .{ .nonblocking = true }, + }; + }; + } else inline for (@typeInfo(String).@"struct".fields) |field| { + if (std.mem.eql(u8, key, field.name)) @field(environ.string, field.name) = value; + } + } + } + } }; pub const NullFile = switch (native_os) { @@ -1397,13 +1520,13 @@ pub fn waitForApcOrAlert() void { _ = windows.ntdll.NtDelayExecution(windows.TRUE, &infinite_timeout); } -const max_iovecs_len = 8; -const splat_buffer_size = 64; +pub const max_iovecs_len = 8; +pub const splat_buffer_size = 64; /// Happens to be the same number that matches maximum number of handles that /// NtWaitForMultipleObjects accepts. We use this value also for poll() on /// posix systems. const poll_buffer_len = 64; -const default_PATH = "/usr/local/bin:/bin/:/usr/bin"; +pub const default_PATH = "/usr/local/bin:/bin/:/usr/bin"; /// There are multiple kernel bugs being worked around with retries. const max_windows_kernel_bug_retries = 13; @@ -1588,7 +1711,7 @@ fn worker(t: *Threaded) void { .cancel_protection = .unblocked, .futex_waiter = undefined, .unpark_flag = unpark_flag_init, - .csprng = .{}, + .csprng = .uninitialized, }; Thread.current = &thread; @@ -2563,12 +2686,12 @@ fn operate(userdata: ?*anyopaque, operation: Io.Operation) Io.Cancelable!Io.Oper fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { const t: *Threaded = @ptrCast(@alignCast(userdata)); if (is_windows) { - batchAwaitWindows(b, false) catch |err| switch (err) { + batchDrainSubmittedWindows(b, false) catch |err| switch (err) { error.ConcurrencyUnavailable => unreachable, // passed concurrency=false else => |e| return e, }; const alertable_syscall = try AlertableSyscall.start(); - while (b.pending.head != .none and b.completions.head == .none) waitForApcOrAlert(); + while (b.pending.head != .none and b.completed.head == .none) waitForApcOrAlert(); alertable_syscall.finish(); return; } @@ -2576,7 +2699,7 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { var poll_buffer: [poll_buffer_len]posix.pollfd = undefined; var poll_len: u32 = 0; { - var index = b.submissions.head; + var index = b.submitted.head; while (index != .none and poll_len < poll_buffer_len) { const submission = &b.storage[index.toIndex()].submission; switch (submission.operation) { @@ -2605,7 +2728,7 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { 1 => {}, else => while (true) { const timeout_ms: i32 = t: { - if (b.completions.head != .none) { + if (b.completed.head != .none) { // It is legal to call batchWait with already completed // operations in the ring. In such case, we need to avoid // blocking in the poll syscall, but we can still take this @@ -2620,7 +2743,7 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { switch (posix.errno(rc)) { .SUCCESS => { if (rc == 0) { - if (b.completions.head != .none) { + if (b.completed.head != .none) { // Since there are already completions available in the // queue, this is neither a timeout nor a case for // retrying. @@ -2629,7 +2752,7 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { continue; } var prev_index: Io.Operation.OptionalIndex = .none; - var index = b.submissions.head; + var index = b.submitted.head; for (poll_buffer[0..poll_len]) |poll_entry| { const storage = &b.storage[index.toIndex()]; const submission = &storage.submission; @@ -2638,17 +2761,17 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { const result = try operate(t, submission.operation); switch (prev_index) { - .none => b.submissions.head = next_index, + .none => b.submitted.head = next_index, else => b.storage[prev_index.toIndex()].submission.node.next = next_index, } - if (next_index == .none) b.submissions.tail = prev_index; + if (next_index == .none) b.submitted.tail = prev_index; - switch (b.completions.tail) { - .none => b.completions.head = index, + switch (b.completed.tail) { + .none => b.completed.head = index, else => |tail_index| b.storage[tail_index.toIndex()].completion.node.next = index, } storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; - b.completions.tail = index; + b.completed.tail = index; } else prev_index = index; index = next_index; } @@ -2662,10 +2785,10 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { } } - var tail_index = b.completions.tail; - defer b.completions.tail = tail_index; - var index = b.submissions.head; - errdefer b.submissions.head = index; + var tail_index = b.completed.tail; + defer b.completed.tail = tail_index; + var index = b.submitted.head; + errdefer b.submitted.head = index; while (index != .none) { const storage = &b.storage[index.toIndex()]; const submission = &storage.submission; @@ -2673,22 +2796,22 @@ fn batchAwaitAsync(userdata: ?*anyopaque, b: *Io.Batch) Io.Cancelable!void { const result = try operate(t, submission.operation); switch (tail_index) { - .none => b.completions.head = index, + .none => b.completed.head = index, else => b.storage[tail_index.toIndex()].completion.node.next = index, } storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; tail_index = index; index = next_index; } - b.submissions = .{ .head = .none, .tail = .none }; + b.submitted = .{ .head = .none, .tail = .none }; } fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout) Io.Batch.AwaitConcurrentError!void { const t: *Threaded = @ptrCast(@alignCast(userdata)); if (is_windows) { const deadline: ?Io.Clock.Timestamp = timeout.toTimestamp(ioBasic(t)); - try batchAwaitWindows(b, true); - while (b.pending.head != .none and b.completions.head == .none) { + try batchDrainSubmittedWindows(b, true); + while (b.pending.head != .none and b.completed.head == .none) { var delay_interval: windows.LARGE_INTEGER = interval: { const d = deadline orelse break :interval std.math.minInt(windows.LARGE_INTEGER); break :interval timeoutToWindowsInterval(.{ .deadline = d }).?; @@ -2701,7 +2824,7 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout // The thread woke due to the timeout. Although spurious // timeouts are OK, when no deadline is passed we must not // return `error.Timeout`. - if (timeout != .none and b.completions.head == .none) return error.Timeout; + if (timeout != .none and b.completed.head == .none) return error.Timeout; }, else => {}, } @@ -2743,7 +2866,7 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout } } = .{ .gpa = t.allocator, .b = b, .slice = &poll_buffer, .len = 0 }; { - var index = b.submissions.head; + var index = b.submitted.head; while (index != .none) { const submission = &b.storage[index.toIndex()].submission; switch (submission.operation) { @@ -2757,18 +2880,18 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout switch (poll_storage.len) { 0 => return, 1 => if (timeout == .none) { - const index = b.submissions.head; + const index = b.submitted.head; const storage = &b.storage[index.toIndex()]; const result = try operate(t, storage.submission.operation); - b.submissions = .{ .head = .none, .tail = .none }; + b.submitted = .{ .head = .none, .tail = .none }; - switch (b.completions.tail) { - .none => b.completions.head = index, + switch (b.completed.tail) { + .none => b.completed.head = index, else => |tail_index| b.storage[tail_index.toIndex()].completion.node.next = index, } storage.* = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; - b.completions.tail = index; + b.completed.tail = index; return; }, else => {}, @@ -2777,7 +2900,7 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout const deadline = timeout.toTimestamp(t_io); while (true) { const timeout_ms: i32 = t: { - if (b.completions.head != .none) { + if (b.completed.head != .none) { // It is legal to call batchWait with already completed // operations in the ring. In such case, we need to avoid // blocking in the poll syscall, but we can still take this @@ -2794,7 +2917,7 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout switch (posix.errno(rc)) { .SUCCESS => { if (rc == 0) { - if (b.completions.head != .none) { + if (b.completed.head != .none) { // Since there are already completions available in the // queue, this is neither a timeout nor a case for // retrying. @@ -2806,7 +2929,7 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout return error.Timeout; } var prev_index: Io.Operation.OptionalIndex = .none; - var index = b.submissions.head; + var index = b.submitted.head; for (poll_storage.slice[0..poll_storage.len]) |poll_entry| { const submission = &b.storage[index.toIndex()].submission; const next_index = submission.node.next; @@ -2814,17 +2937,20 @@ fn batchAwaitConcurrent(userdata: ?*anyopaque, b: *Io.Batch, timeout: Io.Timeout const result = try operate(t, submission.operation); switch (prev_index) { - .none => b.submissions.head = next_index, + .none => b.submitted.head = next_index, else => b.storage[prev_index.toIndex()].submission.node.next = next_index, } - if (next_index == .none) b.submissions.tail = prev_index; + if (next_index == .none) b.submitted.tail = prev_index; - switch (b.completions.tail) { - .none => b.completions.head = index, + switch (b.completed.tail) { + .none => b.completed.head = index, else => |tail_index| b.storage[tail_index.toIndex()].completion.node.next = index, } - b.completions.tail = index; - b.storage[index.toIndex()] = .{ .completion = .{ .node = .{ .next = .none }, .result = result } }; + b.completed.tail = index; + b.storage[index.toIndex()] = .{ .completion = .{ + .node = .{ .next = .none }, + .result = result, + } }; } else prev_index = index; index = next_index; } @@ -2841,7 +2967,7 @@ const WindowsBatchPendingOperationContext = extern struct { file: windows.HANDLE, iosb: windows.IO_STATUS_BLOCK, - const Erased = [3]usize; + const Erased = Io.Operation.Storage.Pending.Context; comptime { assert(@sizeOf(Erased) <= @sizeOf(WindowsBatchPendingOperationContext)); @@ -2858,24 +2984,9 @@ const WindowsBatchPendingOperationContext = extern struct { fn batchCancel(userdata: ?*anyopaque, b: *Io.Batch) void { const t: *Threaded = @ptrCast(@alignCast(userdata)); - { - var tail_index = b.unused.tail; - defer b.unused.tail = tail_index; - var index = b.submissions.head; - errdefer b.submissions.head = index; - while (index != .none) { - const next_index = b.storage[index.toIndex()].submission.node.next; - switch (tail_index) { - .none => b.unused.head = index, - else => b.storage[tail_index.toIndex()].unused.next = index, - } - b.storage[index.toIndex()] = .{ .unused = .{ .prev = tail_index, .next = .none } }; - tail_index = index; - index = next_index; - } - b.submissions = .{ .head = .none, .tail = .none }; - } if (is_windows) { + if (b.pending.head == .none) return; + waitForApcOrAlert(); var index = b.pending.head; while (index != .none) { const pending = &b.storage[index.toIndex()].pending; @@ -2889,10 +3000,13 @@ fn batchCancel(userdata: ?*anyopaque, b: *Io.Batch) void { t.allocator.free(@as([*]posix.pollfd, @ptrCast(@alignCast(context)))[0..b.storage.len]); b.context = null; } - assert(b.pending.head == .none); } -fn batchApc(apc_context: ?*anyopaque, iosb: *windows.IO_STATUS_BLOCK, _: windows.ULONG) callconv(.winapi) void { +fn batchApc( + apc_context: ?*anyopaque, + iosb: *windows.IO_STATUS_BLOCK, + _: windows.ULONG, +) callconv(.winapi) void { const b: *Io.Batch = @ptrCast(@alignCast(apc_context)); const context: *WindowsBatchPendingOperationContext = @fieldParentPtr("iosb", iosb); const erased_context = context.toErased(); @@ -2918,11 +3032,12 @@ fn batchApc(apc_context: ?*anyopaque, iosb: *windows.IO_STATUS_BLOCK, _: windows b.unused.tail = .fromIndex(index); }, else => { - switch (b.completions.tail) { - .none => b.completions.head = .fromIndex(index), - else => |tail_index| b.storage[tail_index.toIndex()].completion.node.next = .fromIndex(index), + switch (b.completed.tail) { + .none => b.completed.head = .fromIndex(index), + else => |tail_index| b.storage[tail_index.toIndex()].completion.node.next = + .fromIndex(index), } - b.completions.tail = .fromIndex(index); + b.completed.tail = .fromIndex(index); const result: Io.Operation.Result = switch (pending.tag) { .file_read_streaming => .{ .file_read_streaming = ntReadFileResult(iosb) }, .file_write_streaming => .{ .file_write_streaming = ntWriteFileResult(iosb) }, @@ -2934,9 +3049,9 @@ fn batchApc(apc_context: ?*anyopaque, iosb: *windows.IO_STATUS_BLOCK, _: windows } /// If `concurrency` is false, `error.ConcurrencyUnavailable` is unreachable. -fn batchAwaitWindows(b: *Io.Batch, concurrency: bool) error{ Canceled, ConcurrencyUnavailable }!void { - var index = b.submissions.head; - errdefer b.submissions.head = index; +fn batchDrainSubmittedWindows(b: *Io.Batch, concurrency: bool) (Io.ConcurrentError || Io.Cancelable)!void { + var index = b.submitted.head; + errdefer b.submitted.head = index; while (index != .none) { const storage = &b.storage[index.toIndex()]; const submission = storage.submission; @@ -2952,7 +3067,7 @@ fn batchAwaitWindows(b: *Io.Batch, concurrency: bool) error{ Canceled, Concurren b.pending.tail = index; const context: *WindowsBatchPendingOperationContext = .fromErased(&storage.pending.context); errdefer { - context.iosb.u.Status = .CANCELLED; + context.iosb = .{ .u = .{ .Status = .CANCELLED }, .Information = undefined }; batchApc(b, &context.iosb, 0); } switch (submission.operation) { @@ -2960,10 +3075,7 @@ fn batchAwaitWindows(b: *Io.Batch, concurrency: bool) error{ Canceled, Concurren var data_index: usize = 0; while (o.data.len - data_index != 0 and o.data[data_index].len == 0) data_index += 1; if (o.data.len - data_index == 0) { - context.iosb = .{ - .u = .{ .Status = .SUCCESS }, - .Information = 0, - }; + context.iosb = .{ .u = .{ .Status = .SUCCESS }, .Information = 0 }; batchApc(b, &context.iosb, 0); break :o; } @@ -3023,10 +3135,7 @@ fn batchAwaitWindows(b: *Io.Batch, concurrency: bool) error{ Canceled, Concurren .file_write_streaming => |o| o: { const buffer = windowsWriteBuffer(o.header, o.data, o.splat); if (buffer.len == 0) { - context.iosb = .{ - .u = .{ .Status = .SUCCESS }, - .Information = 0, - }; + context.iosb = .{ .u = .{ .Status = .SUCCESS }, .Information = 0 }; batchApc(b, &context.iosb, 0); break :o; } @@ -3140,7 +3249,7 @@ fn batchAwaitWindows(b: *Io.Batch, concurrency: bool) error{ Canceled, Concurren } index = submission.node.next; } - b.submissions = .{ .head = .none, .tail = .none }; + b.submitted = .{ .head = .none, .tail = .none }; } /// Since Windows only supports writing one contiguous buffer, returns the @@ -3155,7 +3264,7 @@ fn windowsWriteBuffer(header: []const u8, data: []const []const u8, splat: usize if (splat == 0) return &.{}; break :b data[data.len - 1]; }; - return buffer[0..@min(buffer.len, std.math.maxInt(u32))]; + return buffer[0..std.math.lossyCast(u32, buffer.len)]; } fn submitComplete(ring: []u32, complete_tail: *Io.Batch.RingIndex, op: u32) void { @@ -4677,8 +4786,8 @@ fn atomicFileInit( dir: Dir, close_dir_on_deinit: bool, ) Dir.CreateFileAtomicError!File.Atomic { - var random_integer: u64 = undefined; while (true) { + var random_integer: u64 = undefined; t_io.random(@ptrCast(&random_integer)); const tmp_sub_path = std.fmt.hex(random_integer); const file = dir.createFile(t_io, &tmp_sub_path, .{ @@ -14317,11 +14426,11 @@ pub fn posixProtocol(protocol: ?net.Protocol) u32 { return @intFromEnum(protocol orelse return 0); } -fn recoverableOsBugDetected() void { +pub fn recoverableOsBugDetected() void { if (is_debug) unreachable; } -fn clockToPosix(clock: Io.Clock) posix.clockid_t { +pub fn clockToPosix(clock: Io.Clock) posix.clockid_t { return switch (clock) { .real => posix.CLOCK.REALTIME, .awake => switch (native_os) { @@ -14355,7 +14464,7 @@ fn clockToWasi(clock: Io.Clock) std.os.wasi.clockid_t { }; } -const linux_statx_request: std.os.linux.STATX = .{ +pub const linux_statx_request: std.os.linux.STATX = .{ .TYPE = true, .MODE = true, .ATIME = true, @@ -14367,7 +14476,7 @@ const linux_statx_request: std.os.linux.STATX = .{ .BLOCKS = true, }; -const linux_statx_check: std.os.linux.STATX = .{ +pub const linux_statx_check: std.os.linux.STATX = .{ .TYPE = true, .MODE = true, .ATIME = false, @@ -14379,7 +14488,7 @@ const linux_statx_check: std.os.linux.STATX = .{ .BLOCKS = false, }; -fn statFromLinux(stx: *const std.os.linux.Statx) Io.UnexpectedError!File.Stat { +pub fn statFromLinux(stx: *const std.os.linux.Statx) Io.UnexpectedError!File.Stat { const actual_mask_int: u32 = @bitCast(stx.mask); const wanted_mask_int: u32 = @bitCast(linux_statx_check); if ((actual_mask_int | wanted_mask_int) != actual_mask_int) return error.Unexpected; @@ -14470,11 +14579,11 @@ fn statFromWasi(st: *const std.os.wasi.filestat_t) File.Stat { }; } -fn timestampFromPosix(timespec: *const posix.timespec) Io.Timestamp { +pub fn timestampFromPosix(timespec: *const posix.timespec) Io.Timestamp { return .{ .nanoseconds = nanosecondsFromPosix(timespec) }; } -fn nanosecondsFromPosix(timespec: *const posix.timespec) i96 { +pub fn nanosecondsFromPosix(timespec: *const posix.timespec) i96 { return @intCast(@as(i128, timespec.sec) * std.time.ns_per_s + timespec.nsec); } @@ -14492,7 +14601,7 @@ fn timestampToPosix(nanoseconds: i96) posix.timespec { }; } -fn setTimestampToPosix(set_ts: File.SetTimestamp) posix.timespec { +pub fn setTimestampToPosix(set_ts: File.SetTimestamp) posix.timespec { return switch (set_ts) { .unchanged => .OMIT, .now => .NOW, @@ -14500,7 +14609,7 @@ fn setTimestampToPosix(set_ts: File.SetTimestamp) posix.timespec { }; } -fn pathToPosix(file_path: []const u8, buffer: *[posix.PATH_MAX]u8) Dir.PathNameError![:0]u8 { +pub fn pathToPosix(file_path: []const u8, buffer: *[posix.PATH_MAX]u8) Dir.PathNameError![:0]u8 { if (std.mem.containsAtLeastScalar2(u8, file_path, 0, 1)) return error.BadPathName; // >= rather than > to make room for the null byte if (file_path.len >= buffer.len) return error.NameTooLong; @@ -14996,126 +15105,7 @@ const WindowsEnvironStrings = struct { fn scanEnviron(t: *Threaded) void { mutexLock(&t.mutex); defer mutexUnlock(&t.mutex); - - if (t.environ.initialized) return; - t.environ.initialized = true; - - if (is_windows) { - // This value expires with any call that modifies the environment, - // which is outside of this Io implementation's control, so references - // must be short-lived. - const peb = windows.peb(); - assert(windows.ntdll.RtlEnterCriticalSection(peb.FastPebLock) == .SUCCESS); - defer assert(windows.ntdll.RtlLeaveCriticalSection(peb.FastPebLock) == .SUCCESS); - const ptr = peb.ProcessParameters.Environment; - - var i: usize = 0; - while (ptr[i] != 0) { - - // There are some special environment variables that start with =, - // so we need a special case to not treat = as a key/value separator - // if it's the first character. - // https://devblogs.microsoft.com/oldnewthing/20100506-00/?p=14133 - const key_start = i; - if (ptr[i] == '=') i += 1; - while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {} - const key_w = ptr[key_start..i]; - - const value_start = i + 1; - while (ptr[i] != 0) : (i += 1) {} // skip over '=' and value - const value_w = ptr[value_start..i]; - i += 1; // skip over null byte - - if (windows.eqlIgnoreCaseWtf16(key_w, &.{ 'N', 'O', '_', 'C', 'O', 'L', 'O', 'R' })) { - t.environ.exist.NO_COLOR = true; - } else if (windows.eqlIgnoreCaseWtf16(key_w, &.{ 'C', 'L', 'I', 'C', 'O', 'L', 'O', 'R', '_', 'F', 'O', 'R', 'C', 'E' })) { - t.environ.exist.CLICOLOR_FORCE = true; - } else if (windows.eqlIgnoreCaseWtf16(key_w, &.{ 'Z', 'I', 'G', '_', 'P', 'R', 'O', 'G', 'R', 'E', 'S', 'S' })) { - t.environ.zig_progress_file = file: { - var value_buf: [std.fmt.count("{d}", .{std.math.maxInt(usize)})]u8 = undefined; - const len = std.unicode.calcWtf8Len(value_w); - if (len > value_buf.len) break :file error.UnrecognizedFormat; - assert(std.unicode.wtf16LeToWtf8(&value_buf, value_w) == len); - break :file .{ - .handle = @ptrFromInt(std.fmt.parseInt(usize, value_buf[0..len], 10) catch - break :file error.UnrecognizedFormat), - .flags = .{ .nonblocking = true }, - }; - }; - } - comptime assert(@sizeOf(Environ.String) == 0); - } - } else if (native_os == .wasi and !builtin.link_libc) { - var environ_count: usize = undefined; - var environ_buf_size: usize = undefined; - - switch (std.os.wasi.environ_sizes_get(&environ_count, &environ_buf_size)) { - .SUCCESS => {}, - else => |err| { - t.environ.err = posix.unexpectedErrno(err); - return; - }, - } - if (environ_count == 0) return; - - const environ = t.allocator.alloc([*:0]u8, environ_count) catch |err| { - t.environ.err = err; - return; - }; - defer t.allocator.free(environ); - const environ_buf = t.allocator.alloc(u8, environ_buf_size) catch |err| { - t.environ.err = err; - return; - }; - defer t.allocator.free(environ_buf); - - switch (std.os.wasi.environ_get(environ.ptr, environ_buf.ptr)) { - .SUCCESS => {}, - else => |err| { - t.environ.err = posix.unexpectedErrno(err); - return; - }, - } - - for (environ) |env| { - const pair = std.mem.sliceTo(env, 0); - var parts = std.mem.splitScalar(u8, pair, '='); - const key = parts.first(); - if (std.mem.eql(u8, key, "NO_COLOR")) { - t.environ.exist.NO_COLOR = true; - } else if (std.mem.eql(u8, key, "CLICOLOR_FORCE")) { - t.environ.exist.CLICOLOR_FORCE = true; - } - comptime assert(@sizeOf(Environ.String) == 0); - } - } else { - for (t.environ.process_environ.block.slice) |opt_entry| { - const entry = opt_entry.?; - var entry_i: usize = 0; - while (entry[entry_i] != 0 and entry[entry_i] != '=') : (entry_i += 1) {} - const key = entry[0..entry_i]; - - var end_i: usize = entry_i; - while (entry[end_i] != 0) : (end_i += 1) {} - const value = entry[entry_i + 1 .. end_i :0]; - - if (std.mem.eql(u8, key, "NO_COLOR")) { - t.environ.exist.NO_COLOR = true; - } else if (std.mem.eql(u8, key, "CLICOLOR_FORCE")) { - t.environ.exist.CLICOLOR_FORCE = true; - } else if (std.mem.eql(u8, key, "ZIG_PROGRESS")) { - t.environ.zig_progress_file = file: { - break :file .{ - .handle = std.fmt.parseInt(u31, value, 10) catch - break :file error.UnrecognizedFormat, - .flags = .{ .nonblocking = true }, - }; - }; - } else inline for (@typeInfo(Environ.String).@"struct".fields) |field| { - if (std.mem.eql(u8, key, field.name)) @field(t.environ.string, field.name) = value; - } - } - } + t.environ.scan(t.allocator); } fn processReplace(userdata: ?*anyopaque, options: process.ReplaceOptions) process.ReplaceError { @@ -15213,17 +15203,17 @@ fn spawnPosix(t: *Threaded, options: process.SpawnOptions) process.SpawnError!Sp const any_ignore = (options.stdin == .ignore or options.stdout == .ignore or options.stderr == .ignore); const dev_null_fd = if (any_ignore) try getDevNullFd(t) else undefined; - const prog_pipe: [2]posix.fd_t = if (options.progress_node.index != .none) + const prog_pipe: [2]posix.fd_t = if (options.progress_node.index != .none) pipe: { // We use CLOEXEC for the same reason as in `pipe_flags`. - try pipe2(.{ .NONBLOCK = true, .CLOEXEC = true }) - else - .{ -1, -1 }; + const pipe = try pipe2(.{ .NONBLOCK = true, .CLOEXEC = true }); + switch (native_os) { + .linux => _ = posix.system.fcntl(pipe[0], posix.F.SETPIPE_SZ, @as(u32, std.Progress.max_packet_len * 2)), + else => {}, + } + break :pipe pipe; + } else .{ -1, -1 }; errdefer destroyPipe(prog_pipe); - if (native_os == .linux and prog_pipe[0] != -1) { - _ = posix.system.fcntl(prog_pipe[0], posix.F.SETPIPE_SZ, @as(u32, std.Progress.max_packet_len * 2)); - } - var arena_allocator = std.heap.ArenaAllocator.init(t.allocator); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); @@ -17241,16 +17231,7 @@ fn randomMainThread(t: *Threaded, buffer: []u8) void { randomSecure(t, &seed) catch |err| switch (err) { error.Canceled => unreachable, - error.EntropyUnavailable => { - @memset(&seed, 0); - const aslr_addr = @intFromPtr(t); - std.mem.writeInt(usize, seed[seed.len - @sizeOf(usize) ..][0..@sizeOf(usize)], aslr_addr, .native); - switch (native_os) { - .windows => fallbackSeedWindows(&seed), - .wasi => if (builtin.link_libc) fallbackSeedPosix(&seed) else fallbackSeedWasi(&seed), - else => fallbackSeedPosix(&seed), - } - }, + error.EntropyUnavailable => fallbackSeed(t, &seed), }; } t.csprng.rng = .init(seed); @@ -17259,6 +17240,17 @@ fn randomMainThread(t: *Threaded, buffer: []u8) void { t.csprng.rng.fill(buffer); } +pub fn fallbackSeed(aslr_addr: ?*anyopaque, seed: *[Csprng.seed_len]u8) void { + @memset(seed, 0); + std.mem.writeInt(usize, seed[seed.len - @sizeOf(usize) ..][0..@sizeOf(usize)], @intFromPtr(aslr_addr), .native); + const fallbackSeedImpl = switch (native_os) { + .windows => fallbackSeedWindows, + .wasi => if (builtin.link_libc) fallbackSeedPosix else fallbackSeedWasi, + else => fallbackSeedPosix, + }; + fallbackSeedImpl(seed); +} + fn fallbackSeedPosix(seed: *[Csprng.seed_len]u8) void { std.mem.writeInt(posix.pid_t, seed[0..@sizeOf(posix.pid_t)], posix.system.getpid(), .native); const i_1 = @sizeOf(posix.pid_t); diff --git a/lib/std/os/linux.zig b/lib/std/os/linux.zig index c89585b9ba..7f16899432 100644 --- a/lib/std/os/linux.zig +++ b/lib/std/os/linux.zig @@ -6717,9 +6717,10 @@ pub const IORING_ACCEPT_MULTISHOT = 1 << 0; /// IORING_OP_MSG_RING command types, stored in sqe->addr pub const IORING_MSG_RING_COMMAND = enum(u8) { /// pass sqe->len as 'res' and off as user_data - DATA, + DATA = 0, /// send a registered fd to another ring - SEND_FD, + SEND_FD = 1, + _, }; // io_uring_sqe.msg_ring_flags (rw_flags in the Zig struct) @@ -6772,6 +6773,8 @@ pub const IORING_CQE_F_SOCK_NONEMPTY = 1 << 2; pub const IORING_CQE_F_NOTIF = 1 << 3; /// If set, the buffer ID set in the completion will get more completions. pub const IORING_CQE_F_BUF_MORE = 1 << 4; +pub const IORING_CQE_F_SKIP = 1 << 5; +pub const IORING_CQE_F_32 = 1 << 15; pub const IORING_CQE_BUFFER_SHIFT = 16; @@ -7068,7 +7071,7 @@ pub const IORING_RESTRICTION = enum(u16) { _, }; -pub const IO_URING_SOCKET_OP = enum(u16) { +pub const IO_URING_SOCKET_OP = enum(u32) { SIOCIN = 0, SIOCOUTQ = 1, GETSOCKOPT = 2, diff --git a/lib/std/process.zig b/lib/std/process.zig index 2e3056b960..fca2d0c773 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -60,7 +60,7 @@ pub const CurrentPathError = error{ NameTooLong, /// Not possible on Windows. Always returned on WASI. CurrentDirUnlinked, -} || Io.UnexpectedError; +} || Io.Cancelable || Io.UnexpectedError; /// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/). /// On other platforms, the result is an opaque sequence of bytes with no @@ -72,7 +72,7 @@ pub fn currentPath(io: Io, buffer: []u8) CurrentPathError!usize { pub const CurrentPathAllocError = Allocator.Error || error{ /// Not possible on Windows. Always returned on WASI. CurrentDirUnlinked, -} || Io.UnexpectedError; +} || Io.Cancelable || Io.UnexpectedError; /// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/). /// On other platforms, the result is an opaque sequence of bytes with no @@ -355,7 +355,7 @@ pub const SpawnError = error{ /// On Windows, the volume does not contain a recognized file system. File /// system drivers might not be loaded, or the volume may be corrupt. UnrecognizedVolume, -} || Io.Dir.PathNameError || Io.Cancelable || Io.UnexpectedError; +} || Io.File.OpenError || Io.Dir.PathNameError || Io.Cancelable || Io.UnexpectedError; pub const SpawnOptions = struct { argv: []const []const u8, diff --git a/lib/std/tar.zig b/lib/std/tar.zig index 024a425919..d108a03219 100644 --- a/lib/std/tar.zig +++ b/lib/std/tar.zig @@ -1128,10 +1128,10 @@ fn filePermissions(mode: u32, options: PipeOptions) Io.File.Permissions { test filePermissions { if (!Io.File.Permissions.has_executable_bit) return error.SkipZigTest; - try testing.expectEqual(.default_file, filePermissions(0o744, .{ .mode_mode = .ignore })); - try testing.expectEqual(.executable_file, filePermissions(0o744, .{})); - try testing.expectEqual(.default_file, filePermissions(0o644, .{})); - try testing.expectEqual(.default_file, filePermissions(0o655, .{})); + try testing.expectEqual(Io.File.Permissions.default_file, filePermissions(0o744, .{ .mode_mode = .ignore })); + try testing.expectEqual(Io.File.Permissions.executable_file, filePermissions(0o744, .{})); + try testing.expectEqual(Io.File.Permissions.default_file, filePermissions(0o644, .{})); + try testing.expectEqual(Io.File.Permissions.default_file, filePermissions(0o655, .{})); } test "executable bit" { diff --git a/src/Compilation.zig b/src/Compilation.zig index 4f671b71b9..608bbe4a94 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -4891,11 +4891,7 @@ fn performAllTheWork( work: while (true) { for (&comp.work_queues) |*work_queue| if (work_queue.popFront()) |job| { - try processOneJob( - @intFromEnum(Zcu.PerThread.Id.main), - comp, - job, - ); + try processOneJob(.main, comp, job); continue :work; }; if (comp.zcu) |zcu| { @@ -5160,11 +5156,7 @@ pub fn queueJobs(comp: *Compilation, jobs: []const Job) !void { for (jobs) |job| try comp.queueJob(job); } -fn processOneJob( - tid: usize, - comp: *Compilation, - job: Job, -) JobError!void { +fn processOneJob(tid: Zcu.PerThread.Id, comp: *Compilation, job: Job) JobError!void { switch (job) { .codegen_func => |func| { const zcu = comp.zcu.?; @@ -5232,7 +5224,7 @@ fn processOneJob( const named_frame = tracy.namedFrame("analyze_func"); defer named_frame.end(); - const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid)); + const pt: Zcu.PerThread = .activate(comp.zcu.?, tid); defer pt.deactivate(); pt.ensureFuncBodyUpToDate(func) catch |err| switch (err) { @@ -5245,7 +5237,7 @@ fn processOneJob( const named_frame = tracy.namedFrame("analyze_comptime_unit"); defer named_frame.end(); - const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid)); + const pt: Zcu.PerThread = .activate(comp.zcu.?, tid); defer pt.deactivate(); const maybe_err: Zcu.SemaError!void = switch (unit.unwrap()) { @@ -5285,7 +5277,7 @@ fn processOneJob( const named_frame = tracy.namedFrame("resolve_type_fully"); defer named_frame.end(); - const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid)); + const pt: Zcu.PerThread = .activate(comp.zcu.?, tid); defer pt.deactivate(); Type.fromInterned(ty).resolveFully(pt) catch |err| switch (err) { error.OutOfMemory, error.Canceled => |e| return e, @@ -5296,7 +5288,7 @@ fn processOneJob( const named_frame = tracy.namedFrame("analyze_mod"); defer named_frame.end(); - const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid)); + const pt: Zcu.PerThread = .activate(comp.zcu.?, tid); defer pt.deactivate(); pt.semaMod(mod) catch |err| switch (err) { error.OutOfMemory, error.Canceled => |e| return e, @@ -5642,13 +5634,14 @@ fn workerUpdateFile( prog_node: std.Progress.Node, group: *Io.Group, ) void { - const tid = Compilation.getTid(); const io = comp.io; + const tid: Zcu.PerThread.Id = .acquire(io); + defer tid.release(io); const child_prog_node = prog_node.start(fs.path.basename(file.path.sub_path), 0); defer child_prog_node.end(); - const pt: Zcu.PerThread = .activate(comp.zcu.?, @enumFromInt(tid)); + const pt: Zcu.PerThread = .activate(comp.zcu.?, tid); defer pt.deactivate(); pt.updateFile(file_index, file) catch |err| { pt.reportRetryableFileError(file_index, "unable to load '{s}': {s}", .{ fs.path.basename(file.path.sub_path), @errorName(err) }) catch |oom| switch (oom) { @@ -5708,9 +5701,10 @@ fn workerUpdateBuiltinFile(comp: *Compilation, file: *Zcu.File) void { } fn workerUpdateEmbedFile(comp: *Compilation, ef_index: Zcu.EmbedFile.Index, ef: *Zcu.EmbedFile) void { - const tid = Compilation.getTid(); const io = comp.io; - comp.detectEmbedFileUpdate(@enumFromInt(tid), ef_index, ef) catch |err| switch (err) { + const tid: Zcu.PerThread.Id = .acquire(io); + defer tid.release(io); + comp.detectEmbedFileUpdate(tid, ef_index, ef) catch |err| switch (err) { error.OutOfMemory => { comp.mutex.lockUncancelable(io); defer comp.mutex.unlock(io); @@ -5868,7 +5862,7 @@ pub fn translateC( } var stdout: []u8 = undefined; - try @import("main.zig").translateC(gpa, arena, io, argv.items, environ_map, prog_node, &stdout); + try @import("main.zig").translateC(gpa, arena, io, argv.items, environ_map, prog_node, comp.thread_limit, &stdout); if (out_dep_path) |dep_file_path| add_deps: { if (comp.verbose_cimport) log.info("processing dep file at {s}", .{dep_file_path}); @@ -8394,17 +8388,3 @@ pub fn compilerRtOptMode(comp: Compilation) std.builtin.OptimizeMode { pub fn compilerRtStrip(comp: Compilation) bool { return comp.root_mod.strip; } - -/// This is a temporary workaround put in place to migrate from `std.Thread.Pool` -/// to `std.Io.Threaded` for asynchronous/concurrent work. The eventual solution -/// will likely involve significant changes to the `InternPool` implementation. -pub fn getTid() usize { - if (my_tid == null) my_tid = next_tid.fetchAdd(1, .monotonic); - return my_tid.?; -} -pub fn setMainThread() void { - my_tid = 0; -} -/// TID 0 is reserved for the main thread. -var next_tid: std.atomic.Value(usize) = .init(1); -threadlocal var my_tid: ?usize = null; diff --git a/src/InternPool.zig b/src/InternPool.zig index d713adcd30..595bedd547 100644 --- a/src/InternPool.zig +++ b/src/InternPool.zig @@ -3,6 +3,7 @@ const InternPool = @This(); const builtin = @import("builtin"); +const build_options = @import("build_options"); const std = @import("std"); const Io = std.Io; @@ -86,13 +87,11 @@ dep_entries: std.ArrayList(DepEntry), /// garbage collection pass. free_dep_entries: std.ArrayList(DepEntry.Index), -/// Whether a multi-threaded intern pool is useful. -/// Currently `false` until the intern pool is actually accessed -/// from multiple threads to reduce the cost of this data structure. -const want_multi_threaded = true; - /// Whether a single-threaded intern pool impl is in use. -pub const single_threaded = builtin.single_threaded or !want_multi_threaded; +pub const single_threaded = switch (build_options.io_mode) { + .threaded => builtin.single_threaded, + .evented => false, // even without threads, evented can be access from multiple tasks at a time +}; pub const empty: InternPool = .{ .locals = &.{}, @@ -6915,7 +6914,7 @@ pub fn init(ip: *InternPool, gpa: Allocator, io: Io, available_threads: usize) ! assert(ip.locals.len == 0 and ip.shards.len == 0); assert(available_threads > 0 and available_threads <= std.math.maxInt(u8)); - const used_threads = if (single_threaded) 1 else available_threads; + const used_threads = if (single_threaded) 1 else @max(available_threads, 2); ip.locals = try gpa.alloc(Local, used_threads); @memset(ip.locals, .{ .shared = .{ diff --git a/src/Zcu.zig b/src/Zcu.zig index 6a2872eae8..f2d6dbf497 100644 --- a/src/Zcu.zig +++ b/src/Zcu.zig @@ -4954,8 +4954,10 @@ pub const CodegenTaskPool = struct { // We own `air` now, so we are responsbile for freeing it. var air = orig_air; defer air.deinit(zcu.comp.gpa); - const tid = Compilation.getTid(); - const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid)); + const io = zcu.comp.io; + const tid: Zcu.PerThread.Id = .acquire(io); + defer tid.release(io); + const pt: Zcu.PerThread = .activate(zcu, tid); defer pt.deactivate(); return pt.runCodegen(func_index, &air); } @@ -4964,8 +4966,10 @@ pub const CodegenTaskPool = struct { func_index: InternPool.Index, air: *Air, ) CodegenResult { - const tid = Compilation.getTid(); - const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid)); + const io = zcu.comp.io; + const tid: Zcu.PerThread.Id = .acquire(io); + defer tid.release(io); + const pt: Zcu.PerThread = .activate(zcu, tid); defer pt.deactivate(); return pt.runCodegen(func_index, air); } diff --git a/src/Zcu/PerThread.zig b/src/Zcu/PerThread.zig index 56b09d9da1..5472afc5f0 100644 --- a/src/Zcu/PerThread.zig +++ b/src/Zcu/PerThread.zig @@ -41,13 +41,86 @@ zcu: *Zcu, tid: Id, pub const IdBacking = u7; -pub const Id = if (InternPool.single_threaded) enum { main } else enum(IdBacking) { main, _ }; +pub const Id = if (InternPool.single_threaded) enum { + main, + + pub fn allocate(arena: Allocator, n: usize) Allocator.Error!void { + _ = arena; + _ = n; + } + pub fn acquire(io: std.Io) Id { + _ = io; + return .main; + } + pub fn release(tid: Id, io: std.Io) void { + _ = io; + _ = tid; + } +} else enum(IdBacking) { + main, + _, + + var tid_mutex: std.Io.Mutex = .init; + var tid_cond: std.Io.Condition = .init; + /// This is a temporary workaround put in place to migrate from `std.Thread.Pool` + /// to `std.Io.Threaded` for asynchronous/concurrent work. The eventual solution + /// will likely involve significant changes to the `InternPool` implementation. + var available_tids: std.ArrayList(Id) = .empty; + threadlocal var recursive_depth: usize = 0; + threadlocal var recursive_tid: Id = .main; + + pub fn allocate(arena: Allocator, n: usize) Allocator.Error!void { + assert(available_tids.items.len == 0); + try available_tids.ensureTotalCapacityPrecise(arena, n - 1); + for (1..n) |tid| available_tids.appendAssumeCapacity(@enumFromInt(tid)); + } + pub fn acquire(io: std.Io) Id { + switch (build_options.io_mode) { + .threaded => { + recursive_depth += 1; + if (recursive_depth > 1) { + assert(recursive_tid != .main); + return recursive_tid; + } + }, + .evented => {}, + } + tid_mutex.lockUncancelable(io); + defer tid_mutex.unlock(io); + while (true) { + if (available_tids.pop()) |tid| { + switch (build_options.io_mode) { + .threaded => recursive_tid = tid, + .evented => {}, + } + return tid; + } + tid_cond.waitUncancelable(io, &tid_mutex); + } + } + pub fn release(tid: Id, io: std.Io) void { + switch (build_options.io_mode) { + .threaded => { + assert(recursive_tid == tid); + recursive_depth -= 1; + if (recursive_depth > 0) return; + recursive_tid = .main; + }, + .evented => {}, + } + { + tid_mutex.lockUncancelable(io); + defer tid_mutex.unlock(io); + available_tids.appendAssumeCapacity(tid); + } + tid_cond.signal(io); + } +}; pub fn activate(zcu: *Zcu, tid: Id) Zcu.PerThread { zcu.intern_pool.activate(); return .{ .zcu = zcu, .tid = tid }; } - pub fn deactivate(pt: Zcu.PerThread) void { pt.zcu.intern_pool.deactivate(); } diff --git a/src/crash_report.zig b/src/crash_report.zig index e56bc7cec5..41d56e8aaf 100644 --- a/src/crash_report.zig +++ b/src/crash_report.zig @@ -1,34 +1,14 @@ -/// We override the panic implementation to our own one, so we can print our own information before -/// calling the default panic handler. This declaration must be re-exposed from `@import("root")`. -pub const panic = if (dev.env == .bootstrap) - std.debug.simple_panic -else - std.debug.FullPanic(panicImpl); - -/// We let std install its segfault handler, but we override the target-agnostic handler it calls, -/// so we can print our own information before calling the default segfault logic. This declaration -/// must be re-exposed from `@import("root")`. -pub const debug = struct { - pub const handleSegfault = handleSegfaultImpl; -}; - /// Printed in panic messages when suggesting a command to run, allowing copy-pasting the command. /// Set by `main` as soon as arguments are known. The value here is a default in case we somehow /// crash earlier than that. pub var zig_argv0: []const u8 = "zig"; -fn handleSegfaultImpl(addr: ?usize, name: []const u8, opt_ctx: ?std.debug.CpuContextPtr) noreturn { - @branchHint(.cold); - dumpCrashContext() catch {}; - std.debug.defaultHandleSegfault(addr, name, opt_ctx); -} -fn panicImpl(msg: []const u8, first_trace_addr: ?usize) noreturn { - @branchHint(.cold); - dumpCrashContext() catch {}; - std.debug.defaultPanic(msg, first_trace_addr orelse @returnAddress()); -} +const enabled = switch (build_options.io_mode) { + .threaded => build_options.enable_debug_extensions, + .evented => false, // would use threadlocals in a way incompatible with evented +}; -pub const AnalyzeBody = if (build_options.enable_debug_extensions) struct { +pub const AnalyzeBody = if (enabled) struct { parent: ?*AnalyzeBody, sema: *Sema, block: *Sema.Block, @@ -63,7 +43,7 @@ pub const AnalyzeBody = if (build_options.enable_debug_extensions) struct { pub inline fn setBodyIndex(_: @This(), _: usize) void {} }; -pub const CodegenFunc = if (build_options.enable_debug_extensions) struct { +pub const CodegenFunc = if (enabled) struct { zcu: *const Zcu, func_index: InternPool.Index, threadlocal var current: ?CodegenFunc = null; @@ -82,23 +62,14 @@ pub const CodegenFunc = if (build_options.enable_debug_extensions) struct { pub fn stop(_: InternPool.Index) void {} }; -fn dumpCrashContext() Io.Writer.Error!void { +pub fn dumpCrashContext(terminal: Io.Terminal) Io.Writer.Error!void { const S = struct { - /// In the case of recursive panics or segfaults, don't print the context for a second time. - threadlocal var already_dumped = false; /// TODO: make this unnecessary. It exists because `print_zir` currently needs an allocator, /// but that shouldn't be necessary---it's already only used in one place. - threadlocal var crash_heap: [64 * 1024]u8 = undefined; + var crash_heap: [64 * 1024]u8 = undefined; }; - if (S.already_dumped) return; - S.already_dumped = true; - - // TODO: this does mean that a different thread could grab the stderr mutex between the context - // and the actual panic printing, which would be quite confusing. - const stderr = std.debug.lockStderr(&.{}); - defer std.debug.unlockStderr(); - const w = &stderr.file_writer.interface; + const w = terminal.writer; try w.writeAll("Compiler crash context:\n"); if (CodegenFunc.current) |*cg| { diff --git a/src/introspect.zig b/src/introspect.zig index 0cba9fdcc4..13d0052093 100644 --- a/src/introspect.zig +++ b/src/introspect.zig @@ -54,11 +54,7 @@ pub fn findZigLibDir(gpa: Allocator, io: Io) !Cache.Directory { /// Like `std.process.currentPathAlloc`, but also resolves the path with `Dir.path.resolve`. This /// means the path has no repeated separators, no "." or ".." components, and no trailing separator. /// On WASI, "" is returned instead of ".". -pub fn getResolvedCwd(io: Io, gpa: Allocator) error{ - OutOfMemory, - CurrentDirUnlinked, - Unexpected, -}![]u8 { +pub fn getResolvedCwd(io: Io, gpa: Allocator) std.process.CurrentPathAllocError![]u8 { if (builtin.target.os.tag == .wasi) { if (std.debug.runtime_safety) { const cwd = try std.process.currentPathAlloc(io, gpa); diff --git a/src/link.zig b/src/link.zig index c8e46540ba..72c5d27105 100644 --- a/src/link.zig +++ b/src/link.zig @@ -1500,12 +1500,12 @@ pub fn doPrelinkTask(comp: *Compilation, task: PrelinkTask) void { }, } } -pub fn doZcuTask(comp: *Compilation, tid: usize, task: ZcuTask) void { +pub fn doZcuTask(comp: *Compilation, tid: Zcu.PerThread.Id, task: ZcuTask) void { const io = comp.io; const diags = &comp.link_diags; const zcu = comp.zcu.?; const ip = &zcu.intern_pool; - const pt: Zcu.PerThread = .activate(zcu, @enumFromInt(tid)); + const pt: Zcu.PerThread = .activate(zcu, tid); defer pt.deactivate(); var timer = comp.startTimer(); @@ -1610,8 +1610,8 @@ pub fn doZcuTask(comp: *Compilation, tid: usize, task: ZcuTask) void { } } } -pub fn doIdleTask(comp: *Compilation, tid: usize) error{ OutOfMemory, LinkFailure }!bool { - return if (comp.bin_file) |lf| lf.idle(@enumFromInt(tid)) else false; +pub fn doIdleTask(comp: *Compilation, tid: Zcu.PerThread.Id) error{ OutOfMemory, LinkFailure }!bool { + return if (comp.bin_file) |lf| lf.idle(tid) else false; } /// After the main pipeline is done, but before flush, the compilation may need to link one final /// `Nav` into the binary: the `builtin.test_functions` value. Since the link thread isn't running diff --git a/src/link/Queue.zig b/src/link/Queue.zig index b716800bae..a49c191567 100644 --- a/src/link/Queue.zig +++ b/src/link/Queue.zig @@ -96,12 +96,12 @@ pub fn enqueuePrelink(q: *Queue, comp: *Compilation, tasks: []const PrelinkTask) pub fn enqueueZcu( q: *Queue, comp: *Compilation, - tid: usize, + tid: Zcu.PerThread.Id, task: ZcuTask, ) Io.Cancelable!void { const io = comp.io; - assert(tid == 0); + assert(tid == .main); if (q.future != null) { if (q.zcu_queue.putOne(io, task)) |_| { @@ -148,8 +148,9 @@ pub fn finishZcuQueue(q: *Queue, comp: *Compilation) void { } fn runLinkTasks(q: *Queue, comp: *Compilation) void { - const tid = Compilation.getTid(); const io = comp.io; + const tid: Zcu.PerThread.Id = .acquire(io); + defer tid.release(io); var have_idle_tasks = true; @@ -198,7 +199,7 @@ fn runLinkTasks(q: *Queue, comp: *Compilation) void { } } } -fn runIdleTask(comp: *Compilation, tid: usize) bool { +fn runIdleTask(comp: *Compilation, tid: Zcu.PerThread.Id) bool { return link.doIdleTask(comp, tid) catch |err| switch (err) { error.OutOfMemory => have_more: { comp.link_diags.setAllocFailure(); @@ -217,5 +218,6 @@ const Compilation = @import("../Compilation.zig"); const InternPool = @import("../InternPool.zig"); const link = @import("../link.zig"); const PrelinkTask = link.PrelinkTask; -const ZcuTask = link.ZcuTask; const Queue = @This(); +const Zcu = @import("../Zcu.zig"); +const ZcuTask = link.ZcuTask; diff --git a/src/main.zig b/src/main.zig index ed194b4767..caa076f417 100644 --- a/src/main.zig +++ b/src/main.zig @@ -52,8 +52,11 @@ pub const std_options: std.Options = .{ }; pub const std_options_cwd = if (native_os == .wasi) wasi_cwd else null; -pub const panic = crash_report.panic; -pub const debug = crash_report.debug; +pub const debug = struct { + pub fn printCrashContext(terminal: Io.Terminal) void { + crash_report.dumpCrashContext(terminal) catch {}; + } +}; var preopens: std.process.Preopens = .empty; pub fn wasi_cwd() Io.Dir { @@ -158,25 +161,55 @@ pub fn log( std.log.defaultLog(level, scope, format, args); } -var debug_allocator: std.heap.DebugAllocator(.{ - .stack_trace_frames = build_options.mem_leak_frames, -}) = .init; - const use_debug_allocator = build_options.debug_gpa or (native_os != .wasi and !builtin.link_libc and switch (builtin.mode) { .Debug, .ReleaseSafe => true, .ReleaseFast, .ReleaseSmall => false, }); +const RootAllocator = if (use_debug_allocator) std.heap.DebugAllocator(.{ + .stack_trace_frames = build_options.mem_leak_frames, + .thread_safe = switch (build_options.io_mode) { + .threaded => true, + .evented => false, + }, +}) else struct { + pub const init: RootAllocator = .{}; + pub fn allocator(_: RootAllocator) Allocator { + if (native_os == .wasi) return std.heap.wasm_allocator; + if (builtin.link_libc) return std.heap.c_allocator; + return std.heap.smp_allocator; + } + pub fn deinit(_: RootAllocator) std.heap.Check { + return .ok; + } +}; + pub fn main(init: std.process.Init.Minimal) anyerror!void { - const gpa = gpa: { - if (use_debug_allocator) break :gpa debug_allocator.allocator(); - if (native_os == .wasi) break :gpa std.heap.wasm_allocator; - if (builtin.link_libc) break :gpa std.heap.c_allocator; - break :gpa std.heap.smp_allocator; - }; - defer if (use_debug_allocator) { - _ = debug_allocator.deinit(); + var root_allocator: RootAllocator = .init; + defer _ = root_allocator.deinit(); + const root_gpa = root_allocator.allocator(); + var io_impl: IoImpl = undefined; + switch (build_options.io_mode) { + .threaded => io_impl = .init(root_gpa, .{ + .stack_size = thread_stack_size, + + .argv0 = .init(init.args), + .environ = init.environ, + }), + .evented => try io_impl.init(root_gpa, .{ + .argv0 = .init(init.args), + .environ = init.environ, + + .backing_allocator_needs_mutex = use_debug_allocator, + }), + } + defer io_impl.deinit(); + io_impl_ptr = &io_impl; + const io = io_impl.io(); + const gpa = switch (build_options.io_mode) { + .threaded => root_gpa, + .evented => io_impl.allocator(), }; var arena_instance = std.heap.ArenaAllocator.init(gpa); defer arena_instance.deinit(); @@ -193,17 +226,6 @@ pub fn main(init: std.process.Init.Minimal) anyerror!void { var environ_map = init.environ.createMap(arena) catch |err| fatal("failed to parse environment: {t}", .{err}); - Compilation.setMainThread(); - - var threaded: Io.Threaded = .init(gpa, .{ - .argv0 = .init(init.args), - .environ = init.environ, - }); - defer threaded.deinit(); - threaded_impl_ptr = &threaded; - threaded.stack_size = thread_stack_size; - const io = threaded.io(); - if (tracy.enable_allocation) { var gpa_tracy = tracy.tracyAllocator(gpa); return mainArgs(gpa_tracy.allocator(), arena, io, args, &environ_map); @@ -3400,7 +3422,7 @@ fn buildOutputType( @max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking), ); - setThreadLimit(thread_limit); + try setThreadLimit(arena, thread_limit); for (create_module.c_source_files.items) |*src| { dev.check(.c_compiler); @@ -4731,13 +4753,13 @@ pub fn translateC( argv: []const []const u8, environ_map: *const process.Environ.Map, prog_node: std.Progress.Node, + thread_limit: usize, capture: ?*[]u8, ) !void { - try jitCmd(gpa, arena, io, argv, environ_map, .{ + try jitCmdInner(gpa, arena, io, argv, environ_map, prog_node, thread_limit, .{ .cmd_name = "translate-c", .root_src_path = "translate-c/main.zig", .depend_on_aro = true, - .progress_node = prog_node, .capture = capture, }); } @@ -5187,7 +5209,7 @@ fn cmdBuild(gpa: Allocator, arena: Allocator, io: Io, args: []const []const u8, @max(n_jobs orelse std.Thread.getCpuCount() catch 1, 1), std.math.maxInt(Zcu.PerThread.IdBacking), ); - setThreadLimit(thread_limit); + try setThreadLimit(arena, thread_limit); // Dummy http client that is not actually used when fetch_command is unsupported. // Prevents bootstrap from depending on a bunch of unnecessary stuff. @@ -5651,7 +5673,7 @@ const JitCmdOptions = struct { capture: ?*[]u8 = null, /// Send error bundles via std.zig.Server over stdout server: bool = false, - progress_node: ?std.Progress.Node = null, + color: Color = .auto, }; fn jitCmd( @@ -5664,12 +5686,30 @@ fn jitCmd( ) !void { dev.check(.jit_command); - const color: Color = .auto; - const root_prog_node = if (options.progress_node) |node| node else std.Progress.start(io, .{ - .disable_printing = (color == .off), + const root_prog_node = std.Progress.start(io, .{ + .disable_printing = (options.color == .off), }); defer root_prog_node.end(); + const thread_limit = @min( + @max(std.Thread.getCpuCount() catch 1, 1), + std.math.maxInt(Zcu.PerThread.IdBacking), + ); + try setThreadLimit(arena, thread_limit); + + return jitCmdInner(gpa, arena, io, args, environ_map, root_prog_node, thread_limit, options); +} + +fn jitCmdInner( + gpa: Allocator, + arena: Allocator, + io: Io, + args: []const []const u8, + environ_map: *const process.Environ.Map, + root_prog_node: std.Progress.Node, + thread_limit: usize, + options: JitCmdOptions, +) !void { const target_query: std.Target.Query = .{}; const resolved_target: Package.Module.ResolvedTarget = .{ .result = std.zig.resolveTargetQueryOrFatal(io, target_query), @@ -5702,12 +5742,6 @@ fn jitCmd( ); defer dirs.deinit(io); - const thread_limit = @min( - @max(std.Thread.getCpuCount() catch 1, 1), - std.math.maxInt(Zcu.PerThread.IdBacking), - ); - setThreadLimit(thread_limit); - var child_argv: std.ArrayList([]const u8) = .empty; try child_argv.ensureUnusedCapacity(arena, args.len + 4); @@ -5795,7 +5829,7 @@ fn jitCmd( process.exit(2); } } else { - updateModule(comp, color, root_prog_node) catch |err| switch (err) { + updateModule(comp, options.color, root_prog_node) catch |err| switch (err) { error.CompileErrorsReported => process.exit(2), else => |e| return e, }; @@ -7777,15 +7811,25 @@ fn addLibDirectoryWarn2( }); } -var threaded_impl_ptr: *Io.Threaded = undefined; -fn setThreadLimit(n: usize) void { - // We want a maximum of n total threads to keep the InternPool happy, but - // the main thread doesn't count towards the limits, so use n-1. Also, the - // linker can run concurrently, so we need to set both the async *and* the - // concurrency limit. - const limit: Io.Limit = .limited(n - 1); - threaded_impl_ptr.setAsyncLimit(limit); - threaded_impl_ptr.concurrent_limit = limit; +const IoImpl = switch (build_options.io_mode) { + .threaded => Io.Threaded, + .evented => Io.Evented, +}; +var io_impl_ptr: *IoImpl = undefined; +fn setThreadLimit(arena: std.mem.Allocator, n: usize) Allocator.Error!void { + switch (build_options.io_mode) { + .threaded => { + // We want a maximum of n total threads to keep the InternPool happy, but + // the main thread doesn't count towards the limits, so use n-1. Also, the + // linker can run concurrently, so we need to set both the async *and* the + // concurrency limit. + const limit: Io.Limit = .limited(n - 1); + io_impl_ptr.setAsyncLimit(limit); + io_impl_ptr.concurrent_limit = limit; + }, + .evented => {}, + } + try Zcu.PerThread.Id.allocate(arena, @max(n, 2)); } fn randInt(io: Io, comptime T: type) T { diff --git a/stage1/config.zig.in b/stage1/config.zig.in index d5c9a7ebbf..087585cd7e 100644 --- a/stage1/config.zig.in +++ b/stage1/config.zig.in @@ -13,4 +13,5 @@ pub const value_tracing = false; pub const skip_non_native = false; pub const debug_gpa = false; pub const dev = .core; +pub const io_mode: enum { threaded, evented } = .threaded; pub const value_interpret_mode = .direct;