From d6a1e73142396732be80a7e0757cca1c07551d30 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 28 Dec 2025 20:46:02 -0800 Subject: [PATCH] std: start wrangling environment variables and process args this commit is unfinished. It marks a spot where I wanted to start moving child process stuff below the std.Io.VTable --- lib/compiler/build_runner.zig | 14 +- lib/std/Build.zig | 15 +- lib/std/Build/Step.zig | 40 +- lib/std/Build/Step/Options.zig | 2 +- lib/std/Build/Step/Run.zig | 31 +- lib/std/Build/WebServer.zig | 12 +- lib/std/Io.zig | 6 + lib/std/Io/Dir.zig | 7 +- lib/std/Io/Threaded.zig | 4 +- lib/std/Progress.zig | 2 +- lib/std/debug.zig | 2 +- lib/std/debug/ElfFile.zig | 8 +- lib/std/debug/SelfInfo/Elf.zig | 5 +- lib/std/os.zig | 65 +- lib/std/posix.zig | 118 +- lib/std/process.zig | 1898 ++------------------- lib/std/process/Args.zig | 958 +++++++++++ lib/std/process/Child.zig | 68 +- lib/std/process/Environ.zig | 764 +++++++++ lib/std/start.zig | 132 +- lib/std/std.zig | 6 +- lib/std/zig.zig | 17 +- lib/std/zig/system/darwin.zig | 7 +- test/link/macho.zig | 2 +- test/standalone/ios/build.zig | 2 +- test/standalone/windows_bat_args/fuzz.zig | 4 +- test/standalone/windows_bat_args/test.zig | 4 +- test/standalone/windows_paths/test.zig | 6 +- tools/doctest.zig | 2 +- 29 files changed, 2095 insertions(+), 2106 deletions(-) create mode 100644 lib/std/process/Args.zig create mode 100644 lib/std/process/Environ.zig diff --git a/lib/compiler/build_runner.zig b/lib/compiler/build_runner.zig index f5315d6496..91e9d20dbb 100644 --- a/lib/compiler/build_runner.zig +++ b/lib/compiler/build_runner.zig @@ -24,7 +24,7 @@ pub const std_options: std.Options = .{ .crypto_fork_safety = false, }; -pub fn main() !void { +pub fn main(init: process.Init.Minimal) !void { // The build runner is often short-lived, but thanks to `--watch` and `--webui`, that's not // always the case. So, we do need a true gpa for some things. var debug_gpa_state: std.heap.DebugAllocator(.{}) = .init; @@ -37,7 +37,7 @@ pub fn main() !void { var thread_safe_arena: std.heap.ThreadSafeAllocator = .{ .child_allocator = single_threaded_arena.allocator() }; const arena = thread_safe_arena.allocator(); - const args = try process.argsAlloc(arena); + const args = try init.args.toSlice(arena); var threaded: std.Io.Threaded = .init(gpa, .{}); defer threaded.deinit(); @@ -83,7 +83,7 @@ pub fn main() !void { .manifest_dir = try local_cache_directory.handle.createDirPathOpen(io, "h", .{}), }, .zig_exe = zig_exe, - .env_map = try process.getEnvMap(arena), + .env_map = try init.environ.createMap(arena), .global_cache_root = global_cache_directory, .zig_lib_directory = zig_lib_directory, .host = .{ @@ -126,13 +126,13 @@ pub fn main() !void { var debounce_interval_ms: u16 = 50; var webui_listen: ?Io.net.IpAddress = null; - if (try std.zig.EnvVar.ZIG_BUILD_ERROR_STYLE.get(arena)) |str| { + if (std.zig.EnvVar.ZIG_BUILD_ERROR_STYLE.get(&graph.env_map)) |str| { if (std.meta.stringToEnum(ErrorStyle, str)) |style| { error_style = style; } } - if (try std.zig.EnvVar.ZIG_BUILD_MULTILINE_ERRORS.get(arena)) |str| { + if (std.zig.EnvVar.ZIG_BUILD_MULTILINE_ERRORS.get(&graph.env_map)) |str| { if (std.meta.stringToEnum(MultilineErrors, str)) |style| { multiline_errors = style; } @@ -429,8 +429,8 @@ pub fn main() !void { } } - const NO_COLOR = std.zig.EnvVar.NO_COLOR.isSet(); - const CLICOLOR_FORCE = std.zig.EnvVar.CLICOLOR_FORCE.isSet(); + const NO_COLOR = std.zig.EnvVar.NO_COLOR.isSet(&graph.env_map); + const CLICOLOR_FORCE = std.zig.EnvVar.CLICOLOR_FORCE.isSet(&graph.env_map); graph.stderr_mode = switch (color) { .auto => try .detect(io, .stderr(), NO_COLOR, CLICOLOR_FORCE), diff --git a/lib/std/Build.zig b/lib/std/Build.zig index dc3489bba3..8867ac05df 100644 --- a/lib/std/Build.zig +++ b/lib/std/Build.zig @@ -12,7 +12,7 @@ const StringHashMap = std.StringHashMap; const Allocator = std.mem.Allocator; const Target = std.Target; const process = std.process; -const EnvMap = std.process.EnvMap; +const EnvMap = std.process.Environ.Map; const File = std.Io.File; const Sha256 = std.crypto.hash.sha2.Sha256; const ArrayList = std.ArrayList; @@ -1840,13 +1840,12 @@ pub fn runAllowFail( const io = b.graph.io; const max_output_size = 400 * 1024; - var child = std.process.Child.init(argv, b.allocator); + var child = std.process.Child.init(b.allocator, argv, .{ .map = &b.graph.env_map }); child.stdin_behavior = .Ignore; child.stdout_behavior = .Pipe; child.stderr_behavior = stderr_behavior; - child.env_map = &b.graph.env_map; - try Step.handleVerbose2(b, null, child.env_map, argv); + try Step.handleVerbose2(b, null, child.environ.map, argv); try child.spawn(io); var stdout_reader = child.stdout.?.readerStreaming(io, &.{}); @@ -1877,17 +1876,15 @@ pub fn runAllowFail( pub fn run(b: *Build, argv: []const []const u8) []u8 { if (!process.can_spawn) { std.debug.print("unable to spawn the following command: cannot spawn child process\n{s}\n", .{ - try Step.allocPrintCmd(b.allocator, null, argv), + try Step.allocPrintCmd(b.allocator, null, null, argv), }); process.exit(1); } var code: u8 = undefined; return b.runAllowFail(argv, &code, .Inherit) catch |err| { - const printed_cmd = Step.allocPrintCmd(b.allocator, null, argv) catch @panic("OOM"); - std.debug.print("unable to spawn the following command: {s}\n{s}\n", .{ - @errorName(err), printed_cmd, - }); + const printed_cmd = Step.allocPrintCmd(b.allocator, null, null, argv) catch @panic("OOM"); + std.debug.print("unable to spawn the following command: {t}\n{s}\n", .{ err, printed_cmd }); process.exit(1); }; } diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index 243dee8604..5d6daf9a89 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -349,18 +349,20 @@ pub fn captureChildProcess( progress_node: std.Progress.Node, argv: []const []const u8, ) !std.process.Child.RunResult { - const arena = s.owner.allocator; - const io = s.owner.graph.io; + const graph = s.owner.graph; + const arena = graph.arena; + const io = graph.io; // If an error occurs, it's happened in this command: assert(s.result_failed_command == null); - s.result_failed_command = try allocPrintCmd(gpa, null, argv); + s.result_failed_command = try allocPrintCmd(gpa, null, null, argv); try handleChildProcUnsupported(s); try handleVerbose(s.owner, null, argv); const result = std.process.Child.run(arena, io, .{ .argv = argv, + .environ = .{ .map = &graph.env_map }, .progress_node = progress_node, }) catch |err| return s.fail("failed to run {s}: {t}", .{ argv[0], err }); @@ -406,7 +408,7 @@ pub fn evalZigProcess( // If an error occurs, it's happened in this command: assert(s.result_failed_command == null); - s.result_failed_command = try allocPrintCmd(gpa, null, argv); + s.result_failed_command = try allocPrintCmd(gpa, null, null, argv); if (s.getZigProcess()) |zp| update: { assert(watch); @@ -447,8 +449,7 @@ pub fn evalZigProcess( try handleChildProcUnsupported(s); try handleVerbose(s.owner, null, argv); - var child = std.process.Child.init(argv, arena); - child.env_map = &b.graph.env_map; + var child = std.process.Child.init(arena, argv, .{ .map = &b.graph.env_map }); child.stdin_behavior = .Pipe; child.stdout_behavior = .Pipe; child.stderr_behavior = .Pipe; @@ -692,13 +693,17 @@ pub fn handleVerbose( pub fn handleVerbose2( b: *Build, opt_cwd: ?[]const u8, - opt_env: ?*const std.process.EnvMap, + opt_env: ?*const std.process.Environ.Map, argv: []const []const u8, ) error{OutOfMemory}!void { if (b.verbose) { + const graph = b.graph; // Intention of verbose is to print all sub-process command lines to // stderr before spawning them. - const text = try allocPrintCmd2(b.allocator, opt_cwd, opt_env, argv); + const text = try allocPrintCmd(b.allocator, opt_cwd, if (opt_env) |env| .{ + .child = env, + .parent = &graph.env_map, + } else null, argv); std.debug.print("{s}\n", .{text}); } } @@ -728,15 +733,10 @@ pub fn handleChildProcessTerm(s: *Step, term: std.process.Child.Term) error{ Mak pub fn allocPrintCmd( gpa: Allocator, opt_cwd: ?[]const u8, - argv: []const []const u8, -) Allocator.Error![]u8 { - return allocPrintCmd2(gpa, opt_cwd, null, argv); -} - -pub fn allocPrintCmd2( - gpa: Allocator, - opt_cwd: ?[]const u8, - opt_env: ?*const std.process.EnvMap, + opt_env: ?struct { + child: *const std.process.Environ.Map, + parent: *const std.process.Environ.Map, + }, argv: []const []const u8, ) Allocator.Error![]u8 { const shell = struct { @@ -779,13 +779,11 @@ pub fn allocPrintCmd2( const writer = &aw.writer; if (opt_cwd) |cwd| writer.print("cd {s} && ", .{cwd}) catch return error.OutOfMemory; if (opt_env) |env| { - var process_env_map = std.process.getEnvMap(gpa) catch std.process.EnvMap.init(gpa); - defer process_env_map.deinit(); - var it = env.iterator(); + var it = env.child.iterator(); while (it.next()) |entry| { const key = entry.key_ptr.*; const value = entry.value_ptr.*; - if (process_env_map.get(key)) |process_value| { + if (env.parent.get(key)) |process_value| { if (std.mem.eql(u8, value, process_value)) continue; } writer.print("{s}=", .{key}) catch return error.OutOfMemory; diff --git a/lib/std/Build/Step/Options.zig b/lib/std/Build/Step/Options.zig index 610d417aea..8cfa7c1261 100644 --- a/lib/std/Build/Step/Options.zig +++ b/lib/std/Build/Step/Options.zig @@ -546,7 +546,7 @@ test Options { .manifest_dir = Io.Dir.cwd(), }, .zig_exe = "test", - .env_map = std.process.EnvMap.init(arena.allocator()), + .env_map = std.process.Environ.Map.init(arena.allocator()), .global_cache_root = .{ .path = "test", .handle = Io.Dir.cwd() }, .host = .{ .query = .{}, diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index 258fadd06c..4a5386f8cd 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -8,7 +8,7 @@ const Step = std.Build.Step; const Dir = std.Io.Dir; const mem = std.mem; const process = std.process; -const EnvMap = std.process.EnvMap; +const EnvMap = std.process.Environ.Map; const assert = std.debug.assert; const Path = std.Build.Cache.Path; @@ -581,23 +581,24 @@ pub fn getEnvMap(run: *Run) *EnvMap { } fn getEnvMapInternal(run: *Run) *EnvMap { - const arena = run.step.owner.allocator; + const graph = run.step.owner.graph; + const arena = graph.arena; return run.env_map orelse { - const env_map = arena.create(EnvMap) catch @panic("OOM"); - env_map.* = process.getEnvMap(arena) catch @panic("unhandled error"); - run.env_map = env_map; - return env_map; + const cloned_map = arena.create(EnvMap) catch @panic("OOM"); + cloned_map.* = graph.env_map.clone(arena) catch @panic("OOM"); + run.env_map = cloned_map; + return cloned_map; }; } pub fn setEnvironmentVariable(run: *Run, key: []const u8, value: []const u8) void { - const b = run.step.owner; const env_map = run.getEnvMap(); - env_map.put(b.dupe(key), b.dupe(value)) catch @panic("unhandled error"); + // This data structure already dupes keys and values. + env_map.put(key, value) catch @panic("OOM"); } pub fn removeEnvironmentVariable(run: *Run, key: []const u8) void { - run.getEnvMap().remove(key); + _ = run.getEnvMap().swapRemove(key); } /// Adds a check for exact stderr match. Does not add any other checks. @@ -1563,11 +1564,10 @@ fn spawnChildAndCollect( assert(run.stdio == .zig_test); } - var child = std.process.Child.init(argv, arena); + var child = std.process.Child.init(arena, argv, .{ .map = env_map }); if (run.cwd) |lazy_cwd| { child.cwd = lazy_cwd.getPath2(b, &run.step); } - child.env_map = env_map; child.request_resource_usage_statistics = true; child.stdin_behavior = switch (run.stdio) { @@ -1597,7 +1597,10 @@ fn spawnChildAndCollect( // If an error occurs, it's caused by this command: assert(run.step.result_failed_command == null); - run.step.result_failed_command = try Step.allocPrintCmd(options.gpa, child.cwd, argv); + run.step.result_failed_command = try Step.allocPrintCmd(options.gpa, child.cwd, .{ + .child = env_map, + .parent = &graph.env_map, + }, argv); if (run.stdio == .zig_test) { var timer = try std.time.Timer.start(); @@ -1627,11 +1630,11 @@ fn setColorEnvironmentVariables(run: *Run, env_map: *EnvMap, terminal_mode: Io.T .manual => {}, .enable => { try env_map.put("CLICOLOR_FORCE", "1"); - env_map.remove("NO_COLOR"); + _ = env_map.swapRemove("NO_COLOR"); }, .disable => { try env_map.put("NO_COLOR", "1"); - env_map.remove("CLICOLOR_FORCE"); + _ = env_map.swapRemove("CLICOLOR_FORCE"); }, .inherit => switch (terminal_mode) { .no_color, .windows_api => continue :color .disable, diff --git a/lib/std/Build/WebServer.zig b/lib/std/Build/WebServer.zig index a2b35e3522..2c53e103cc 100644 --- a/lib/std/Build/WebServer.zig +++ b/lib/std/Build/WebServer.zig @@ -528,13 +528,13 @@ pub fn serveTarFile(ws: *WebServer, request: *http.Server.Request, paths: []cons } fn buildClientWasm(ws: *WebServer, arena: Allocator, optimize: std.builtin.OptimizeMode) !Cache.Path { - const io = ws.graph.io; const root_name = "build-web"; const arch_os_abi = "wasm32-freestanding"; const cpu_features = "baseline+atomics+bulk_memory+multivalue+mutable_globals+nontrapping_fptoint+reference_types+sign_ext"; const gpa = ws.gpa; const graph = ws.graph; + const io = graph.io; const main_src_path: Cache.Path = .{ .root_dir = graph.zig_lib_directory, @@ -572,7 +572,7 @@ fn buildClientWasm(ws: *WebServer, arena: Allocator, optimize: std.builtin.Optim "--listen=-", }); - var child: std.process.Child = .init(argv.items, gpa); + var child: std.process.Child = .init(gpa, argv.items, .{ .map = &graph.env_map }); child.stdin_behavior = .Pipe; child.stdout_behavior = .Pipe; child.stderr_behavior = .Pipe; @@ -640,7 +640,7 @@ fn buildClientWasm(ws: *WebServer, arena: Allocator, optimize: std.builtin.Optim if (code != 0) { log.err( "the following command exited with error code {d}:\n{s}", - .{ code, try Build.Step.allocPrintCmd(arena, null, argv.items) }, + .{ code, try Build.Step.allocPrintCmd(arena, null, null, argv.items) }, ); return error.WasmCompilationFailed; } @@ -648,7 +648,7 @@ fn buildClientWasm(ws: *WebServer, arena: Allocator, optimize: std.builtin.Optim .Signal, .Stopped, .Unknown => { log.err( "the following command terminated unexpectedly:\n{s}", - .{try Build.Step.allocPrintCmd(arena, null, argv.items)}, + .{try Build.Step.allocPrintCmd(arena, null, null, argv.items)}, ); return error.WasmCompilationFailed; }, @@ -658,14 +658,14 @@ fn buildClientWasm(ws: *WebServer, arena: Allocator, optimize: std.builtin.Optim try result_error_bundle.renderToStderr(io, .{}, .auto); log.err("the following command failed with {d} compilation errors:\n{s}", .{ result_error_bundle.errorMessageCount(), - try Build.Step.allocPrintCmd(arena, null, argv.items), + try Build.Step.allocPrintCmd(arena, null, null, argv.items), }); return error.WasmCompilationFailed; } const base_path = result orelse { log.err("child process failed to report result\n{s}", .{ - try Build.Step.allocPrintCmd(arena, null, argv.items), + try Build.Step.allocPrintCmd(arena, null, null, argv.items), }); return error.WasmCompilationFailed; }; diff --git a/lib/std/Io.zig b/lib/std/Io.zig index 21fb71286f..31dd500df7 100644 --- a/lib/std/Io.zig +++ b/lib/std/Io.zig @@ -2232,3 +2232,9 @@ pub fn tryLockStderr(io: Io, buffer: []u8, terminal_mode: ?Terminal.Mode) Cancel pub fn unlockStderr(io: Io) void { return io.vtable.unlockStderr(io.userdata); } + +pub fn environ(io: Io, name: []const u8) ?[]const u8 { + _ = io; + _ = name; + if (true) @panic("TODO"); +} diff --git a/lib/std/Io/Dir.zig b/lib/std/Io/Dir.zig index 82bf3b927d..cc1dec8efe 100644 --- a/lib/std/Io/Dir.zig +++ b/lib/std/Io/Dir.zig @@ -82,13 +82,14 @@ pub const Entry = struct { /// /// On POSIX targets, this function is comptime-callable. /// -/// On WASI, the value this returns is application-configurable. +/// This function is overridable via `std.Options.cwd`. pub fn cwd() Dir { - return switch (native_os) { + const cwdFn = std.Options.cwd orelse return switch (native_os) { .windows => .{ .handle = std.os.windows.peb().ProcessParameters.CurrentDirectory.Handle }, - .wasi => .{ .handle = std.options.wasiCwd() }, + .wasi => .{ .handle = 3 }, // Expect the first preopen to be current working directory. else => .{ .handle = std.posix.AT.FDCWD }, }; + return cwdFn(); } pub const Reader = struct { diff --git a/lib/std/Io/Threaded.zig b/lib/std/Io/Threaded.zig index e16e70d6a7..50e9f4cf15 100644 --- a/lib/std/Io/Threaded.zig +++ b/lib/std/Io/Threaded.zig @@ -72,7 +72,7 @@ pub const Argv0 = switch (native_os) { pub const Environ = struct { /// Unmodified data directly from the OS. - block: Block = &.{}, + block: std.process.Environ.Block = &.{}, /// Protected by `mutex`. Determines whether the other fields have been /// memoized based on `block`. initialized: bool = false, @@ -89,8 +89,6 @@ pub const Environ = struct { pub const Error = Allocator.Error || Io.UnexpectedError; - pub const Block = []const [*:0]const u8; - pub const Exist = struct { NO_COLOR: bool = false, CLICOLOR_FORCE: bool = false, diff --git a/lib/std/Progress.zig b/lib/std/Progress.zig index f0baca2784..94fd6b47a0 100644 --- a/lib/std/Progress.zig +++ b/lib/std/Progress.zig @@ -476,7 +476,7 @@ pub fn start(io: Io, options: Options) Node { global_progress.io = io; - if (std.process.parseEnvVarInt("ZIG_PROGRESS", u31, 10)) |ipc_fd| { + if (std.process.Environ.parseInt(io, "ZIG_PROGRESS", u31, 10)) |ipc_fd| { global_progress.update_worker = io.concurrent(ipcThreadRun, .{ io, @as(Io.File, .{ .handle = switch (@typeInfo(Io.File.Handle)) { diff --git a/lib/std/debug.zig b/lib/std/debug.zig index d000bda62e..0f151447fc 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -40,7 +40,7 @@ pub const cpu_context = @import("debug/cpu_context.zig"); /// pub fn deinit(si: *SelfInfo, gpa: Allocator) void; /// /// /// Returns the symbol and source location of the instruction at `address`. -/// pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) SelfInfoError!Symbol; +/// pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) SelfInfoError!Symbol; /// /// Returns a name for the "module" (e.g. shared library or executable image) containing `address`. /// pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) SelfInfoError![]const u8; /// diff --git a/lib/std/debug/ElfFile.zig b/lib/std/debug/ElfFile.zig index a101309d22..e17c518271 100644 --- a/lib/std/debug/ElfFile.zig +++ b/lib/std/debug/ElfFile.zig @@ -66,16 +66,16 @@ pub const DebugInfoSearchPaths = struct { .exe_dir = null, }; - pub fn native(exe_path: []const u8) DebugInfoSearchPaths { + pub fn native(exe_path: []const u8, io: Io) DebugInfoSearchPaths { return .{ .debuginfod_client = p: { - if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |p| { + if (io.environ("DEBUGINFOD_CACHE_PATH")) |p| { break :p .{ p, "" }; } - if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { + if (io.environ("XDG_CACHE_HOME")) |cache_path| { break :p .{ cache_path, "/debuginfod_client" }; } - if (std.posix.getenv("HOME")) |home_path| { + if (io.environ("HOME")) |home_path| { break :p .{ home_path, "/.cache/debuginfod_client" }; } break :p null; diff --git a/lib/std/debug/SelfInfo/Elf.zig b/lib/std/debug/SelfInfo/Elf.zig index be76f3a8c2..c62c2df4b8 100644 --- a/lib/std/debug/SelfInfo/Elf.zig +++ b/lib/std/debug/SelfInfo/Elf.zig @@ -322,11 +322,12 @@ const Module = struct { if (mod.loaded_elf == null) mod.loaded_elf = loadElf(mod, gpa, io); return if (mod.loaded_elf.?) |*elf| elf else |err| err; } + fn loadElf(mod: *Module, gpa: Allocator, io: Io) Error!LoadedElf { const load_result = if (mod.name.len > 0) res: { var file = Io.Dir.cwd().openFile(io, mod.name, .{}) catch return error.MissingDebugInfo; defer file.close(io); - break :res std.debug.ElfFile.load(gpa, io, file, mod.build_id, &.native(mod.name)); + break :res std.debug.ElfFile.load(gpa, io, file, mod.build_id, &.native(mod.name, io)); } else res: { const path = std.process.executablePathAlloc(io, gpa) catch |err| switch (err) { error.OutOfMemory => |e| return e, @@ -335,7 +336,7 @@ const Module = struct { defer gpa.free(path); var file = Io.Dir.cwd().openFile(io, path, .{}) catch return error.MissingDebugInfo; defer file.close(io); - break :res std.debug.ElfFile.load(gpa, io, file, mod.build_id, &.native(path)); + break :res std.debug.ElfFile.load(gpa, io, file, mod.build_id, &.native(path, io)); }; var elf_file = load_result catch |err| switch (err) { diff --git a/lib/std/os.zig b/lib/std/os.zig index 667d743f3d..4f6643c3af 100644 --- a/lib/std/os.zig +++ b/lib/std/os.zig @@ -1,27 +1,4 @@ -//! This file contains thin wrappers around OS-specific APIs, with these -//! specific goals in mind: -//! * Convert "errno"-style error codes into Zig errors. -//! * When null-terminated byte buffers are required, provide APIs which accept -//! slices as well as APIs which accept null-terminated byte buffers. Same goes -//! for WTF-16LE encoding. -//! * Where operating systems share APIs, e.g. POSIX, these thin wrappers provide -//! cross platform abstracting. -//! * When there exists a corresponding libc function and linking libc, the libc -//! implementation is used. Exceptions are made for known buggy areas of libc. -//! On Linux libc can be side-stepped by using `std.os.linux` directly. -//! * For Windows, this file represents the API that libc would provide for -//! Windows. For thin wrappers around Windows-specific APIs, see `std.os.windows`. - -const root = @import("root"); -const std = @import("std.zig"); const builtin = @import("builtin"); -const assert = std.debug.assert; -const math = std.math; -const mem = std.mem; -const elf = std.elf; -const fs = std.fs; -const dl = @import("dynamic_library.zig"); -const posix = std.posix; const native_os = builtin.os.tag; pub const linux = @import("os/linux.zig"); @@ -33,47 +10,7 @@ pub const windows = @import("os/windows.zig"); test { _ = linux; - if (native_os == .uefi) { - _ = uefi; - } + if (native_os == .uefi) _ = uefi; _ = wasi; _ = windows; } - -/// See also `getenv`. Populated by startup code before main(). -/// TODO this is a footgun because the value will be undefined when using `zig build-lib`. -/// https://github.com/ziglang/zig/issues/4524 -pub var environ: [][*:0]u8 = undefined; - -/// Populated by startup code before main(). -/// Not available on WASI or Windows without libc. See `std.process.argsAlloc` -/// or `std.process.argsWithAllocator` for a cross-platform alternative. -pub var argv: [][*:0]u8 = if (builtin.link_libc) undefined else switch (native_os) { - .windows => @compileError("argv isn't supported on Windows: use std.process.argsAlloc instead"), - .wasi => @compileError("argv isn't supported on WASI: use std.process.argsAlloc instead"), - else => undefined, -}; - -pub const FstatError = error{ - SystemResources, - AccessDenied, - Unexpected, -}; - -pub fn fstat_wasi(fd: posix.fd_t) FstatError!wasi.filestat_t { - var stat: wasi.filestat_t = undefined; - switch (wasi.fd_filestat_get(fd, &stat)) { - .SUCCESS => return stat, - .INVAL => unreachable, - .BADF => unreachable, // Always a race condition. - .NOMEM => return error.SystemResources, - .ACCES => return error.AccessDenied, - .NOTCAPABLE => return error.AccessDenied, - else => |err| return posix.unexpectedErrno(err), - } -} - -pub fn defaultWasiCwd() std.os.wasi.fd_t { - // Expect the first preopen to be current working directory. - return 3; -} diff --git a/lib/std/posix.zig b/lib/std/posix.zig index 8bcf7e82ee..55b19df995 100644 --- a/lib/std/posix.zig +++ b/lib/std/posix.zig @@ -857,127 +857,15 @@ pub fn execveZ( } } -pub const Arg0Expand = enum { - expand, - no_expand, -}; - -/// Like `execvpeZ` except if `arg0_expand` is `.expand`, then `argv` is mutable, -/// and `argv[0]` is expanded to be the same absolute path that is passed to the execve syscall. -/// If this function returns with an error, `argv[0]` will be restored to the value it was when it was passed in. -pub fn execvpeZ_expandArg0( - comptime arg0_expand: Arg0Expand, - file: [*:0]const u8, - child_argv: switch (arg0_expand) { - .expand => [*:null]?[*:0]const u8, - .no_expand => [*:null]const ?[*:0]const u8, - }, - envp: [*:null]const ?[*:0]const u8, -) ExecveError { - const file_slice = mem.sliceTo(file, 0); - if (mem.findScalar(u8, file_slice, '/') != null) return execveZ(file, child_argv, envp); - - const PATH = getenvZ("PATH") orelse "/usr/local/bin:/bin/:/usr/bin"; - // Use of PATH_MAX here is valid as the path_buf will be passed - // directly to the operating system in execveZ. - var path_buf: [PATH_MAX]u8 = undefined; - var it = mem.tokenizeScalar(u8, PATH, ':'); - var seen_eacces = false; - var err: ExecveError = error.FileNotFound; - - // In case of expanding arg0 we must put it back if we return with an error. - const prev_arg0 = child_argv[0]; - defer switch (arg0_expand) { - .expand => child_argv[0] = prev_arg0, - .no_expand => {}, - }; - - while (it.next()) |search_path| { - const path_len = search_path.len + file_slice.len + 1; - if (path_buf.len < path_len + 1) return error.NameTooLong; - @memcpy(path_buf[0..search_path.len], search_path); - path_buf[search_path.len] = '/'; - @memcpy(path_buf[search_path.len + 1 ..][0..file_slice.len], file_slice); - path_buf[path_len] = 0; - const full_path = path_buf[0..path_len :0].ptr; - switch (arg0_expand) { - .expand => child_argv[0] = full_path, - .no_expand => {}, - } - err = execveZ(full_path, child_argv, envp); - switch (err) { - error.AccessDenied => seen_eacces = true, - error.FileNotFound, error.NotDir => {}, - else => |e| return e, - } - } - if (seen_eacces) return error.AccessDenied; - return err; -} - /// This function also uses the PATH environment variable to get the full path to the executable. /// If `file` is an absolute path, this is the same as `execveZ`. pub fn execvpeZ( file: [*:0]const u8, argv_ptr: [*:null]const ?[*:0]const u8, envp: [*:null]const ?[*:0]const u8, + optional_PATH: ?[]const u8, ) ExecveError { - return execvpeZ_expandArg0(.no_expand, file, argv_ptr, envp); -} - -/// Get an environment variable. -/// See also `getenvZ`. -pub fn getenv(key: []const u8) ?[:0]const u8 { - if (native_os == .windows) { - @compileError("std.posix.getenv is unavailable for Windows because environment strings are in WTF-16 format. See std.process.getEnvVarOwned for a cross-platform API or std.process.getenvW for a Windows-specific API."); - } - if (mem.findScalar(u8, key, '=') != null) { - return null; - } - if (builtin.link_libc) { - var ptr = std.c.environ; - while (ptr[0]) |line| : (ptr += 1) { - var line_i: usize = 0; - while (line[line_i] != 0) : (line_i += 1) { - if (line_i == key.len) break; - if (line[line_i] != key[line_i]) break; - } - if ((line_i != key.len) or (line[line_i] != '=')) continue; - - return mem.sliceTo(line + line_i + 1, 0); - } - return null; - } - if (native_os == .wasi) { - @compileError("std.posix.getenv is unavailable for WASI. See std.process.getEnvMap or std.process.getEnvVarOwned for a cross-platform API."); - } - // The simplified start logic doesn't populate environ. - if (std.start.simplified_logic) return null; - // TODO see https://github.com/ziglang/zig/issues/4524 - for (std.os.environ) |ptr| { - var line_i: usize = 0; - while (ptr[line_i] != 0) : (line_i += 1) { - if (line_i == key.len) break; - if (ptr[line_i] != key[line_i]) break; - } - if ((line_i != key.len) or (ptr[line_i] != '=')) continue; - - return mem.sliceTo(ptr + line_i + 1, 0); - } - return null; -} - -/// Get an environment variable with a null-terminated name. -/// See also `getenv`. -pub fn getenvZ(key: [*:0]const u8) ?[:0]const u8 { - if (builtin.link_libc) { - const value = system.getenv(key) orelse return null; - return mem.sliceTo(value, 0); - } - if (native_os == .windows) { - @compileError("std.posix.getenvZ is unavailable for Windows because environment string is in WTF-16 format. See std.process.getEnvVarOwned for cross-platform API or std.process.getenvW for Windows-specific API."); - } - return getenv(mem.sliceTo(key, 0)); + return execvpeZ_expandArg0(.no_expand, file, argv_ptr, envp, optional_PATH); } pub const GetCwdError = error{ @@ -1945,7 +1833,7 @@ pub const FStatError = std.Io.File.StatError; /// Return information about a file descriptor. pub fn fstat(fd: fd_t) FStatError!Stat { if (native_os == .wasi and !builtin.link_libc) { - return Stat.fromFilestat(try std.os.fstat_wasi(fd)); + @compileError("unsupported OS"); } var stat = mem.zeroes(Stat); diff --git a/lib/std/process.zig b/lib/std/process.zig index 865376d907..4221356bdd 100644 --- a/lib/std/process.zig +++ b/lib/std/process.zig @@ -16,10 +16,8 @@ const unicode = std.unicode; const max_path_bytes = std.fs.max_path_bytes; pub const Child = @import("process/Child.zig"); -pub const changeCurDir = posix.chdir; -pub const changeCurDirZ = posix.chdirZ; - -pub const GetCwdError = posix.GetCwdError; +pub const Args = @import("process/Args.zig"); +pub const Environ = @import("process/Environ.zig"); /// This is the global, process-wide protection to coordinate stderr writes. /// @@ -28,6 +26,39 @@ pub const GetCwdError = posix.GetCwdError; /// information. pub var stderr_thread_mutex: std.Thread.Mutex.Recursive = .init; +/// A standard set of pre-initialized useful APIs for programs to take +/// advantage of. This is the type of the first parameter of the main function. +/// Applications wanting more flexibility can accept `Init.Minimal` instead. +/// +/// Completion of https://github.com/ziglang/zig/issues/24510 will also allow +/// the second parameter of the main function to be a custom struct that +/// contain auto-parsed CLI arguments. +pub const Init = struct { + /// `Init` is a superset of `Minimal`; the latter is included here. + minimal: Minimal, + /// Permanent storage for the entire process, cleaned automatically on + /// exit. Not threadsafe. + arena: *std.heap.ArenaAllocator, + /// A default-selected general purpose allocator for temporary heap + /// allocations. Debug mode will set up leak checking. Threadsafe. + gpa: Allocator, + /// An appropriate default Io implementation based on the target + /// configuration. Debug mode will set up leak checking. + io: Io, + /// Environment variables, initialized with `gpa`. Not threadsafe. + env_map: *Environ.Map, + + /// Alternative to `Init` as the first parameter of the main function. + pub const Minimal = struct { + /// Environment variables. + environ: Environ, + /// Command line arguments. + args: Args, + }; +}; + +pub const GetCwdError = posix.GetCwdError; + /// The result is a slice of `out_buffer`, from index `0`. /// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/). /// On other platforms, the result is an opaque sequence of bytes with no particular encoding. @@ -73,1484 +104,6 @@ test getCwdAlloc { testing.allocator.free(cwd); } -pub const EnvMap = struct { - hash_map: HashMap, - - const HashMap = std.HashMap( - []const u8, - []const u8, - EnvNameHashContext, - std.hash_map.default_max_load_percentage, - ); - - pub const Size = HashMap.Size; - - pub const EnvNameHashContext = struct { - fn upcase(c: u21) u21 { - if (c <= std.math.maxInt(u16)) - return windows.ntdll.RtlUpcaseUnicodeChar(@as(u16, @intCast(c))); - return c; - } - - pub fn hash(self: @This(), s: []const u8) u64 { - _ = self; - if (native_os == .windows) { - var h = std.hash.Wyhash.init(0); - var it = unicode.Wtf8View.initUnchecked(s).iterator(); - while (it.nextCodepoint()) |cp| { - const cp_upper = upcase(cp); - h.update(&[_]u8{ - @as(u8, @intCast((cp_upper >> 16) & 0xff)), - @as(u8, @intCast((cp_upper >> 8) & 0xff)), - @as(u8, @intCast((cp_upper >> 0) & 0xff)), - }); - } - return h.final(); - } - return std.hash_map.hashString(s); - } - - pub fn eql(self: @This(), a: []const u8, b: []const u8) bool { - _ = self; - if (native_os == .windows) { - var it_a = unicode.Wtf8View.initUnchecked(a).iterator(); - var it_b = unicode.Wtf8View.initUnchecked(b).iterator(); - while (true) { - const c_a = it_a.nextCodepoint() orelse break; - const c_b = it_b.nextCodepoint() orelse return false; - if (upcase(c_a) != upcase(c_b)) - return false; - } - return if (it_b.nextCodepoint()) |_| false else true; - } - return std.hash_map.eqlString(a, b); - } - }; - - /// Create a EnvMap backed by a specific allocator. - /// That allocator will be used for both backing allocations - /// and string deduplication. - pub fn init(allocator: Allocator) EnvMap { - return EnvMap{ .hash_map = HashMap.init(allocator) }; - } - - /// Free the backing storage of the map, as well as all - /// of the stored keys and values. - pub fn deinit(self: *EnvMap) void { - var it = self.hash_map.iterator(); - while (it.next()) |entry| { - self.free(entry.key_ptr.*); - self.free(entry.value_ptr.*); - } - - self.hash_map.deinit(); - } - - /// Same as `put` but the key and value become owned by the EnvMap rather - /// than being copied. - /// If `putMove` fails, the ownership of key and value does not transfer. - /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. - pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void { - assert(unicode.wtf8ValidateSlice(key)); - const get_or_put = try self.hash_map.getOrPut(key); - if (get_or_put.found_existing) { - self.free(get_or_put.key_ptr.*); - self.free(get_or_put.value_ptr.*); - get_or_put.key_ptr.* = key; - } - get_or_put.value_ptr.* = value; - } - - /// `key` and `value` are copied into the EnvMap. - /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. - pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void { - assert(unicode.wtf8ValidateSlice(key)); - const value_copy = try self.copy(value); - errdefer self.free(value_copy); - const get_or_put = try self.hash_map.getOrPut(key); - if (get_or_put.found_existing) { - self.free(get_or_put.value_ptr.*); - } else { - get_or_put.key_ptr.* = self.copy(key) catch |err| { - _ = self.hash_map.remove(key); - return err; - }; - } - get_or_put.value_ptr.* = value_copy; - } - - /// Find the address of the value associated with a key. - /// The returned pointer is invalidated if the map resizes. - /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. - pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 { - assert(unicode.wtf8ValidateSlice(key)); - return self.hash_map.getPtr(key); - } - - /// Return the map's copy of the value associated with - /// a key. The returned string is invalidated if this - /// key is removed from the map. - /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. - pub fn get(self: EnvMap, key: []const u8) ?[]const u8 { - assert(unicode.wtf8ValidateSlice(key)); - return self.hash_map.get(key); - } - - /// Removes the item from the map and frees its value. - /// This invalidates the value returned by get() for this key. - /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. - pub fn remove(self: *EnvMap, key: []const u8) void { - assert(unicode.wtf8ValidateSlice(key)); - const kv = self.hash_map.fetchRemove(key) orelse return; - self.free(kv.key); - self.free(kv.value); - } - - /// Returns the number of KV pairs stored in the map. - pub fn count(self: EnvMap) HashMap.Size { - return self.hash_map.count(); - } - - /// Returns an iterator over entries in the map. - pub fn iterator(self: *const EnvMap) HashMap.Iterator { - return self.hash_map.iterator(); - } - - /// Returns a full copy of `em` allocated with `gpa`, which is not necessarily - /// the same allocator used to allocate `em`. - pub fn clone(em: *const EnvMap, gpa: Allocator) Allocator.Error!EnvMap { - var new: EnvMap = .init(gpa); - errdefer new.deinit(); - // Since we need to dupe the keys and values, the only way for error handling to not be a - // nightmare is to add keys to an empty map one-by-one. This could be avoided if this - // abstraction were a bit less... OOP-esque. - try new.hash_map.ensureUnusedCapacity(em.hash_map.count()); - var it = em.hash_map.iterator(); - while (it.next()) |entry| { - try new.put(entry.key_ptr.*, entry.value_ptr.*); - } - return new; - } - - fn free(self: EnvMap, value: []const u8) void { - self.hash_map.allocator.free(value); - } - - fn copy(self: EnvMap, value: []const u8) ![]u8 { - return self.hash_map.allocator.dupe(u8, value); - } -}; - -test EnvMap { - var env = EnvMap.init(testing.allocator); - defer env.deinit(); - - try env.put("SOMETHING_NEW", "hello"); - try testing.expectEqualStrings("hello", env.get("SOMETHING_NEW").?); - try testing.expectEqual(@as(EnvMap.Size, 1), env.count()); - - // overwrite - try env.put("SOMETHING_NEW", "something"); - try testing.expectEqualStrings("something", env.get("SOMETHING_NEW").?); - try testing.expectEqual(@as(EnvMap.Size, 1), env.count()); - - // a new longer name to test the Windows-specific conversion buffer - try env.put("SOMETHING_NEW_AND_LONGER", "1"); - try testing.expectEqualStrings("1", env.get("SOMETHING_NEW_AND_LONGER").?); - try testing.expectEqual(@as(EnvMap.Size, 2), env.count()); - - // case insensitivity on Windows only - if (native_os == .windows) { - try testing.expectEqualStrings("1", env.get("something_New_aNd_LONGER").?); - } else { - try testing.expect(null == env.get("something_New_aNd_LONGER")); - } - - var it = env.iterator(); - var count: EnvMap.Size = 0; - while (it.next()) |entry| { - const is_an_expected_name = std.mem.eql(u8, "SOMETHING_NEW", entry.key_ptr.*) or std.mem.eql(u8, "SOMETHING_NEW_AND_LONGER", entry.key_ptr.*); - try testing.expect(is_an_expected_name); - count += 1; - } - try testing.expectEqual(@as(EnvMap.Size, 2), count); - - env.remove("SOMETHING_NEW"); - try testing.expect(env.get("SOMETHING_NEW") == null); - - try testing.expectEqual(@as(EnvMap.Size, 1), env.count()); - - if (native_os == .windows) { - // test Unicode case-insensitivity on Windows - try env.put("КИРиллИЦА", "something else"); - try testing.expectEqualStrings("something else", env.get("кириллица").?); - - // and WTF-8 that's not valid UTF-8 - const wtf8_with_surrogate_pair = try unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{ - std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate - }); - defer testing.allocator.free(wtf8_with_surrogate_pair); - - try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair); - try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?); - } -} - -pub const GetEnvMapError = error{ - OutOfMemory, - /// WASI-only. `environ_sizes_get` or `environ_get` - /// failed for an unexpected reason. - Unexpected, -}; - -/// Returns a snapshot of the environment variables of the current process. -/// Any modifications to the resulting EnvMap will not be reflected in the environment, and -/// likewise, any future modifications to the environment will not be reflected in the EnvMap. -/// Caller owns resulting `EnvMap` and should call its `deinit` fn when done. -pub fn getEnvMap(allocator: Allocator) GetEnvMapError!EnvMap { - var result = EnvMap.init(allocator); - errdefer result.deinit(); - - if (native_os == .windows) { - const ptr = windows.peb().ProcessParameters.Environment; - - var i: usize = 0; - while (ptr[i] != 0) { - const key_start = i; - - // There are some special environment variables that start with =, - // so we need a special case to not treat = as a key/value separator - // if it's the first character. - // https://devblogs.microsoft.com/oldnewthing/20100506-00/?p=14133 - if (ptr[key_start] == '=') i += 1; - - while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {} - const key_w = ptr[key_start..i]; - const key = try unicode.wtf16LeToWtf8Alloc(allocator, key_w); - errdefer allocator.free(key); - - if (ptr[i] == '=') i += 1; - - const value_start = i; - while (ptr[i] != 0) : (i += 1) {} - const value_w = ptr[value_start..i]; - const value = try unicode.wtf16LeToWtf8Alloc(allocator, value_w); - errdefer allocator.free(value); - - i += 1; // skip over null byte - - try result.putMove(key, value); - } - return result; - } else if (native_os == .wasi and !builtin.link_libc) { - var environ_count: usize = undefined; - var environ_buf_size: usize = undefined; - - const environ_sizes_get_ret = std.os.wasi.environ_sizes_get(&environ_count, &environ_buf_size); - if (environ_sizes_get_ret != .SUCCESS) { - return posix.unexpectedErrno(environ_sizes_get_ret); - } - - if (environ_count == 0) { - return result; - } - - const environ = try allocator.alloc([*:0]u8, environ_count); - defer allocator.free(environ); - const environ_buf = try allocator.alloc(u8, environ_buf_size); - defer allocator.free(environ_buf); - - const environ_get_ret = std.os.wasi.environ_get(environ.ptr, environ_buf.ptr); - if (environ_get_ret != .SUCCESS) { - return posix.unexpectedErrno(environ_get_ret); - } - - for (environ) |env| { - const pair = mem.sliceTo(env, 0); - var parts = mem.splitScalar(u8, pair, '='); - const key = parts.first(); - const value = parts.rest(); - try result.put(key, value); - } - return result; - } else if (builtin.link_libc) { - var ptr = std.c.environ; - while (ptr[0]) |line| : (ptr += 1) { - var line_i: usize = 0; - while (line[line_i] != 0 and line[line_i] != '=') : (line_i += 1) {} - const key = line[0..line_i]; - - var end_i: usize = line_i; - while (line[end_i] != 0) : (end_i += 1) {} - const value = line[line_i + 1 .. end_i]; - - try result.put(key, value); - } - return result; - } else { - for (std.os.environ) |line| { - var line_i: usize = 0; - while (line[line_i] != 0 and line[line_i] != '=') : (line_i += 1) {} - const key = line[0..line_i]; - - var end_i: usize = line_i; - while (line[end_i] != 0) : (end_i += 1) {} - const value = line[line_i + 1 .. end_i]; - - try result.put(key, value); - } - return result; - } -} - -test getEnvMap { - var env = try getEnvMap(testing.allocator); - defer env.deinit(); -} - -pub const GetEnvVarOwnedError = error{ - OutOfMemory, - EnvironmentVariableNotFound, - - /// On Windows, environment variable keys provided by the user must be valid WTF-8. - /// https://wtf-8.codeberg.page/ - InvalidWtf8, -}; - -/// Caller must free returned memory. -/// On Windows, if `key` is not valid [WTF-8](https://wtf-8.codeberg.page/), -/// then `error.InvalidWtf8` is returned. -/// On Windows, the value is encoded as [WTF-8](https://wtf-8.codeberg.page/). -/// On other platforms, the value is an opaque sequence of bytes with no particular encoding. -pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 { - if (native_os == .windows) { - const result_w = blk: { - var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); - const stack_allocator = stack_alloc.get(); - const key_w = try unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); - defer stack_allocator.free(key_w); - - break :blk getenvW(key_w) orelse return error.EnvironmentVariableNotFound; - }; - // wtf16LeToWtf8Alloc can only fail with OutOfMemory - return unicode.wtf16LeToWtf8Alloc(allocator, result_w); - } else if (native_os == .wasi and !builtin.link_libc) { - var envmap = getEnvMap(allocator) catch return error.OutOfMemory; - defer envmap.deinit(); - const val = envmap.get(key) orelse return error.EnvironmentVariableNotFound; - return allocator.dupe(u8, val); - } else { - const result = posix.getenv(key) orelse return error.EnvironmentVariableNotFound; - return allocator.dupe(u8, result); - } -} - -/// On Windows, `key` must be valid WTF-8. -pub inline fn hasEnvVarConstant(comptime key: []const u8) bool { - if (native_os == .windows) { - const key_w = comptime unicode.wtf8ToWtf16LeStringLiteral(key); - return getenvW(key_w) != null; - } else if (native_os == .wasi and !builtin.link_libc) { - return false; - } else { - return posix.getenv(key) != null; - } -} - -/// On Windows, `key` must be valid WTF-8. -pub inline fn hasNonEmptyEnvVarConstant(comptime key: []const u8) bool { - if (native_os == .windows) { - const key_w = comptime unicode.wtf8ToWtf16LeStringLiteral(key); - const value = getenvW(key_w) orelse return false; - return value.len != 0; - } else if (native_os == .wasi and !builtin.link_libc) { - return false; - } else { - const value = posix.getenv(key) orelse return false; - return value.len != 0; - } -} - -pub const ParseEnvVarIntError = std.fmt.ParseIntError || error{EnvironmentVariableNotFound}; - -/// Parses an environment variable as an integer. -/// -/// Since the key is comptime-known, no allocation is needed. -/// -/// On Windows, `key` must be valid WTF-8. -pub fn parseEnvVarInt(comptime key: []const u8, comptime I: type, base: u8) ParseEnvVarIntError!I { - if (native_os == .windows) { - const key_w = comptime std.unicode.wtf8ToWtf16LeStringLiteral(key); - const text = getenvW(key_w) orelse return error.EnvironmentVariableNotFound; - return std.fmt.parseIntWithGenericCharacter(I, u16, text, base); - } else if (native_os == .wasi and !builtin.link_libc) { - @compileError("parseEnvVarInt is not supported for WASI without libc"); - } else { - const text = posix.getenv(key) orelse return error.EnvironmentVariableNotFound; - return std.fmt.parseInt(I, text, base); - } -} - -pub const HasEnvVarError = error{ - OutOfMemory, - - /// On Windows, environment variable keys provided by the user must be valid WTF-8. - /// https://wtf-8.codeberg.page/ - InvalidWtf8, -}; - -/// On Windows, if `key` is not valid [WTF-8](https://wtf-8.codeberg.page/), -/// then `error.InvalidWtf8` is returned. -pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool { - if (native_os == .windows) { - var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); - const stack_allocator = stack_alloc.get(); - const key_w = try unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); - defer stack_allocator.free(key_w); - return getenvW(key_w) != null; - } else if (native_os == .wasi and !builtin.link_libc) { - var envmap = getEnvMap(allocator) catch return error.OutOfMemory; - defer envmap.deinit(); - return envmap.getPtr(key) != null; - } else { - return posix.getenv(key) != null; - } -} - -/// On Windows, if `key` is not valid [WTF-8](https://wtf-8.codeberg.page/), -/// then `error.InvalidWtf8` is returned. -pub fn hasNonEmptyEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool { - if (native_os == .windows) { - var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); - const stack_allocator = stack_alloc.get(); - const key_w = try unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); - defer stack_allocator.free(key_w); - const value = getenvW(key_w) orelse return false; - return value.len != 0; - } else if (native_os == .wasi and !builtin.link_libc) { - var envmap = getEnvMap(allocator) catch return error.OutOfMemory; - defer envmap.deinit(); - const value = envmap.getPtr(key) orelse return false; - return value.len != 0; - } else { - const value = posix.getenv(key) orelse return false; - return value.len != 0; - } -} - -/// Windows-only. Get an environment variable with a null-terminated, WTF-16 encoded name. -/// The returned slice points to memory in the PEB. -/// -/// This function performs a Unicode-aware case-insensitive lookup using RtlEqualUnicodeString. -/// -/// See also: -/// * `std.posix.getenv` -/// * `getEnvMap` -/// * `getEnvVarOwned` -/// * `hasEnvVarConstant` -/// * `hasEnvVar` -pub fn getenvW(key: [*:0]const u16) ?[:0]const u16 { - if (native_os != .windows) { - @compileError("Windows-only"); - } - const key_slice = mem.sliceTo(key, 0); - // '=' anywhere but the start makes this an invalid environment variable name - if (key_slice.len > 0 and std.mem.findScalar(u16, key_slice[1..], '=') != null) { - return null; - } - const ptr = windows.peb().ProcessParameters.Environment; - var i: usize = 0; - while (ptr[i] != 0) { - const key_value = mem.sliceTo(ptr[i..], 0); - - // There are some special environment variables that start with =, - // so we need a special case to not treat = as a key/value separator - // if it's the first character. - // https://devblogs.microsoft.com/oldnewthing/20100506-00/?p=14133 - const equal_search_start: usize = if (key_value[0] == '=') 1 else 0; - const equal_index = std.mem.findScalarPos(u16, key_value, equal_search_start, '=') orelse { - // This is enforced by CreateProcess. - // If violated, CreateProcess will fail with INVALID_PARAMETER. - unreachable; // must contain a = - }; - - const this_key = key_value[0..equal_index]; - if (windows.eqlIgnoreCaseWtf16(key_slice, this_key)) { - return key_value[equal_index + 1 ..]; - } - - // skip past the NUL terminator - i += key_value.len + 1; - } - return null; -} - -test getEnvVarOwned { - try testing.expectError( - error.EnvironmentVariableNotFound, - getEnvVarOwned(std.testing.allocator, "BADENV"), - ); -} - -test hasEnvVarConstant { - if (native_os == .wasi and !builtin.link_libc) return error.SkipZigTest; - - try testing.expect(!hasEnvVarConstant("BADENV")); -} - -test hasEnvVar { - const has_env = try hasEnvVar(std.testing.allocator, "BADENV"); - try testing.expect(!has_env); -} - -pub const ArgIteratorPosix = struct { - index: usize, - count: usize, - - pub const InitError = error{}; - - pub fn init() ArgIteratorPosix { - return ArgIteratorPosix{ - .index = 0, - .count = std.os.argv.len, - }; - } - - pub fn next(self: *ArgIteratorPosix) ?[:0]const u8 { - if (self.index == self.count) return null; - - const s = std.os.argv[self.index]; - self.index += 1; - return mem.sliceTo(s, 0); - } - - pub fn skip(self: *ArgIteratorPosix) bool { - if (self.index == self.count) return false; - - self.index += 1; - return true; - } -}; - -pub const ArgIteratorWasi = struct { - allocator: Allocator, - index: usize, - args: [][:0]u8, - - pub const InitError = error{OutOfMemory} || posix.UnexpectedError; - - /// You must call deinit to free the internal buffer of the - /// iterator after you are done. - pub fn init(allocator: Allocator) InitError!ArgIteratorWasi { - const fetched_args = try ArgIteratorWasi.internalInit(allocator); - return ArgIteratorWasi{ - .allocator = allocator, - .index = 0, - .args = fetched_args, - }; - } - - fn internalInit(allocator: Allocator) InitError![][:0]u8 { - var count: usize = undefined; - var buf_size: usize = undefined; - - switch (std.os.wasi.args_sizes_get(&count, &buf_size)) { - .SUCCESS => {}, - else => |err| return posix.unexpectedErrno(err), - } - - if (count == 0) { - return &[_][:0]u8{}; - } - - const argv = try allocator.alloc([*:0]u8, count); - defer allocator.free(argv); - - const argv_buf = try allocator.alloc(u8, buf_size); - - switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) { - .SUCCESS => {}, - else => |err| return posix.unexpectedErrno(err), - } - - var result_args = try allocator.alloc([:0]u8, count); - var i: usize = 0; - while (i < count) : (i += 1) { - result_args[i] = mem.sliceTo(argv[i], 0); - } - - return result_args; - } - - pub fn next(self: *ArgIteratorWasi) ?[:0]const u8 { - if (self.index == self.args.len) return null; - - const arg = self.args[self.index]; - self.index += 1; - return arg; - } - - pub fn skip(self: *ArgIteratorWasi) bool { - if (self.index == self.args.len) return false; - - self.index += 1; - return true; - } - - /// Call to free the internal buffer of the iterator. - pub fn deinit(self: *ArgIteratorWasi) void { - // Nothing is allocated when there are no args - if (self.args.len == 0) return; - - const last_item = self.args[self.args.len - 1]; - const last_byte_addr = @intFromPtr(last_item.ptr) + last_item.len + 1; // null terminated - const first_item_ptr = self.args[0].ptr; - const len = last_byte_addr - @intFromPtr(first_item_ptr); - self.allocator.free(first_item_ptr[0..len]); - self.allocator.free(self.args); - } -}; - -/// Iterator that implements the Windows command-line parsing algorithm. -/// The implementation is intended to be compatible with the post-2008 C runtime, -/// but is *not* intended to be compatible with `CommandLineToArgvW` since -/// `CommandLineToArgvW` uses the pre-2008 parsing rules. -/// -/// This iterator faithfully implements the parsing behavior observed from the C runtime with -/// one exception: if the command-line string is empty, the iterator will immediately complete -/// without returning any arguments (whereas the C runtime will return a single argument -/// representing the name of the current executable). -/// -/// The essential parts of the algorithm are described in Microsoft's documentation: -/// -/// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments -/// -/// David Deley explains some additional undocumented quirks in great detail: -/// -/// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES -pub const ArgIteratorWindows = struct { - allocator: Allocator, - /// Encoded as WTF-16 LE. - cmd_line: []const u16, - index: usize = 0, - /// Owned by the iterator. Long enough to hold contiguous NUL-terminated slices - /// of each argument encoded as WTF-8. - buffer: []u8, - start: usize = 0, - end: usize = 0, - - pub const InitError = error{OutOfMemory}; - - /// `cmd_line_w` *must* be a WTF16-LE-encoded string. - /// - /// The iterator stores and uses `cmd_line_w`, so its memory must be valid for - /// at least as long as the returned ArgIteratorWindows. - pub fn init(allocator: Allocator, cmd_line_w: []const u16) InitError!ArgIteratorWindows { - const wtf8_len = unicode.calcWtf8Len(cmd_line_w); - - // This buffer must be large enough to contain contiguous NUL-terminated slices - // of each argument. - // - During parsing, the length of a parsed argument will always be equal to - // to less than its unparsed length - // - The first argument needs one extra byte of space allocated for its NUL - // terminator, but for each subsequent argument the necessary whitespace - // between arguments guarantees room for their NUL terminator(s). - const buffer = try allocator.alloc(u8, wtf8_len + 1); - errdefer allocator.free(buffer); - - return .{ - .allocator = allocator, - .cmd_line = cmd_line_w, - .buffer = buffer, - }; - } - - /// Returns the next argument and advances the iterator. Returns `null` if at the end of the - /// command-line string. The iterator owns the returned slice. - /// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/). - pub fn next(self: *ArgIteratorWindows) ?[:0]const u8 { - return self.nextWithStrategy(next_strategy); - } - - /// Skips the next argument and advances the iterator. Returns `true` if an argument was - /// skipped, `false` if at the end of the command-line string. - pub fn skip(self: *ArgIteratorWindows) bool { - return self.nextWithStrategy(skip_strategy); - } - - const next_strategy = struct { - const T = ?[:0]const u8; - - const eof = null; - - /// Returns '\' if any backslashes are emitted, otherwise returns `last_emitted_code_unit`. - fn emitBackslashes(self: *ArgIteratorWindows, count: usize, last_emitted_code_unit: ?u16) ?u16 { - for (0..count) |_| { - self.buffer[self.end] = '\\'; - self.end += 1; - } - return if (count != 0) '\\' else last_emitted_code_unit; - } - - /// If `last_emitted_code_unit` and `code_unit` form a surrogate pair, then - /// the previously emitted high surrogate is overwritten by the codepoint encoded - /// by the surrogate pair, and `null` is returned. - /// Otherwise, `code_unit` is emitted and returned. - fn emitCharacter(self: *ArgIteratorWindows, code_unit: u16, last_emitted_code_unit: ?u16) ?u16 { - // Because we are emitting WTF-8, we need to - // check to see if we've emitted two consecutive surrogate - // codepoints that form a valid surrogate pair in order - // to ensure that we're always emitting well-formed WTF-8 - // (https://wtf-8.codeberg.page/#concatenating). - // - // If we do have a valid surrogate pair, we need to emit - // the UTF-8 sequence for the codepoint that they encode - // instead of the WTF-8 encoding for the two surrogate pairs - // separately. - // - // This is relevant when dealing with a WTF-16 encoded - // command line like this: - // "<0xD801>"<0xDC37> - // which would get parsed and converted to WTF-8 as: - // <0xED><0xA0><0x81><0xED><0xB0><0xB7> - // but instead, we need to recognize the surrogate pair - // and emit the codepoint it encodes, which in this - // example is U+10437 (𐐷), which is encoded in UTF-8 as: - // <0xF0><0x90><0x90><0xB7> - if (last_emitted_code_unit != null and - std.unicode.utf16IsLowSurrogate(code_unit) and - std.unicode.utf16IsHighSurrogate(last_emitted_code_unit.?)) - { - const codepoint = std.unicode.utf16DecodeSurrogatePair(&.{ last_emitted_code_unit.?, code_unit }) catch unreachable; - - // Unpaired surrogate is 3 bytes long - const dest = self.buffer[self.end - 3 ..]; - const len = unicode.utf8Encode(codepoint, dest) catch unreachable; - // All codepoints that require a surrogate pair (> U+FFFF) are encoded as 4 bytes - assert(len == 4); - self.end += 1; - return null; - } - - const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable; - self.end += wtf8_len; - return code_unit; - } - - fn yieldArg(self: *ArgIteratorWindows) [:0]const u8 { - self.buffer[self.end] = 0; - const arg = self.buffer[self.start..self.end :0]; - self.end += 1; - self.start = self.end; - return arg; - } - }; - - const skip_strategy = struct { - const T = bool; - - const eof = false; - - fn emitBackslashes(_: *ArgIteratorWindows, _: usize, last_emitted_code_unit: ?u16) ?u16 { - return last_emitted_code_unit; - } - - fn emitCharacter(_: *ArgIteratorWindows, _: u16, last_emitted_code_unit: ?u16) ?u16 { - return last_emitted_code_unit; - } - - fn yieldArg(_: *ArgIteratorWindows) bool { - return true; - } - }; - - fn nextWithStrategy(self: *ArgIteratorWindows, comptime strategy: type) strategy.T { - var last_emitted_code_unit: ?u16 = null; - // The first argument (the executable name) uses different parsing rules. - if (self.index == 0) { - if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) { - // Immediately complete the iterator. - // The C runtime would return the name of the current executable here. - return strategy.eof; - } - - var inside_quotes = false; - while (true) : (self.index += 1) { - const char = if (self.index != self.cmd_line.len) - mem.littleToNative(u16, self.cmd_line[self.index]) - else - 0; - switch (char) { - 0 => { - return strategy.yieldArg(self); - }, - '"' => { - inside_quotes = !inside_quotes; - }, - ' ', '\t' => { - if (inside_quotes) { - last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); - } else { - self.index += 1; - return strategy.yieldArg(self); - } - }, - else => { - last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); - }, - } - } - } - - // Skip spaces and tabs. The iterator completes if we reach the end of the string here. - while (true) : (self.index += 1) { - const char = if (self.index != self.cmd_line.len) - mem.littleToNative(u16, self.cmd_line[self.index]) - else - 0; - switch (char) { - 0 => return strategy.eof, - ' ', '\t' => continue, - else => break, - } - } - - // Parsing rules for subsequent arguments: - // - // - The end of the string always terminates the current argument. - // - When not in 'inside_quotes' mode, a space or tab terminates the current argument. - // - 2n backslashes followed by a quote emit n backslashes (note: n can be zero). - // If in 'inside_quotes' and the quote is immediately followed by a second quote, - // one quote is emitted and the other is skipped, otherwise, the quote is skipped - // and 'inside_quotes' is toggled. - // - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote. - // - n backslashes not followed by a quote emit n backslashes. - var backslash_count: usize = 0; - var inside_quotes = false; - while (true) : (self.index += 1) { - const char = if (self.index != self.cmd_line.len) - mem.littleToNative(u16, self.cmd_line[self.index]) - else - 0; - switch (char) { - 0 => { - last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); - return strategy.yieldArg(self); - }, - ' ', '\t' => { - last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); - backslash_count = 0; - if (inside_quotes) { - last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); - } else return strategy.yieldArg(self); - }, - '"' => { - const char_is_escaped_quote = backslash_count % 2 != 0; - last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count / 2, last_emitted_code_unit); - backslash_count = 0; - if (char_is_escaped_quote) { - last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); - } else { - if (inside_quotes and - self.index + 1 != self.cmd_line.len and - mem.littleToNative(u16, self.cmd_line[self.index + 1]) == '"') - { - last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); - self.index += 1; - } else { - inside_quotes = !inside_quotes; - } - } - }, - '\\' => { - backslash_count += 1; - }, - else => { - last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); - backslash_count = 0; - last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); - }, - } - } - } - - /// Frees the iterator's copy of the command-line string and all previously returned - /// argument slices. - pub fn deinit(self: *ArgIteratorWindows) void { - self.allocator.free(self.buffer); - } -}; - -/// Optional parameters for `ArgIteratorGeneral` -pub const ArgIteratorGeneralOptions = struct { - comments: bool = false, - single_quotes: bool = false, -}; - -/// A general Iterator to parse a string into a set of arguments -pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type { - return struct { - allocator: Allocator, - index: usize = 0, - cmd_line: []const u8, - - /// Should the cmd_line field be free'd (using the allocator) on deinit()? - free_cmd_line_on_deinit: bool, - - /// buffer MUST be long enough to hold the cmd_line plus a null terminator. - /// buffer will we free'd (using the allocator) on deinit() - buffer: []u8, - start: usize = 0, - end: usize = 0, - - pub const Self = @This(); - - pub const InitError = error{OutOfMemory}; - - /// cmd_line_utf8 MUST remain valid and constant while using this instance - pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { - const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1); - errdefer allocator.free(buffer); - - return Self{ - .allocator = allocator, - .cmd_line = cmd_line_utf8, - .free_cmd_line_on_deinit = false, - .buffer = buffer, - }; - } - - /// cmd_line_utf8 will be free'd (with the allocator) on deinit() - pub fn initTakeOwnership(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { - const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1); - errdefer allocator.free(buffer); - - return Self{ - .allocator = allocator, - .cmd_line = cmd_line_utf8, - .free_cmd_line_on_deinit = true, - .buffer = buffer, - }; - } - - // Skips over whitespace in the cmd_line. - // Returns false if the terminating sentinel is reached, true otherwise. - // Also skips over comments (if supported). - fn skipWhitespace(self: *Self) bool { - while (true) : (self.index += 1) { - const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; - switch (character) { - 0 => return false, - ' ', '\t', '\r', '\n' => continue, - '#' => { - if (options.comments) { - while (true) : (self.index += 1) { - switch (self.cmd_line[self.index]) { - '\n' => break, - 0 => return false, - else => continue, - } - } - continue; - } else { - break; - } - }, - else => break, - } - } - return true; - } - - pub fn skip(self: *Self) bool { - if (!self.skipWhitespace()) { - return false; - } - - var backslash_count: usize = 0; - var in_quote = false; - while (true) : (self.index += 1) { - const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; - switch (character) { - 0 => return true, - '"', '\'' => { - if (!options.single_quotes and character == '\'') { - backslash_count = 0; - continue; - } - const quote_is_real = backslash_count % 2 == 0; - if (quote_is_real) { - in_quote = !in_quote; - } - }, - '\\' => { - backslash_count += 1; - }, - ' ', '\t', '\r', '\n' => { - if (!in_quote) { - return true; - } - backslash_count = 0; - }, - else => { - backslash_count = 0; - continue; - }, - } - } - } - - /// Returns a slice of the internal buffer that contains the next argument. - /// Returns null when it reaches the end. - pub fn next(self: *Self) ?[:0]const u8 { - if (!self.skipWhitespace()) { - return null; - } - - var backslash_count: usize = 0; - var in_quote = false; - while (true) : (self.index += 1) { - const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; - switch (character) { - 0 => { - self.emitBackslashes(backslash_count); - self.buffer[self.end] = 0; - const token = self.buffer[self.start..self.end :0]; - self.end += 1; - self.start = self.end; - return token; - }, - '"', '\'' => { - if (!options.single_quotes and character == '\'') { - self.emitBackslashes(backslash_count); - backslash_count = 0; - self.emitCharacter(character); - continue; - } - const quote_is_real = backslash_count % 2 == 0; - self.emitBackslashes(backslash_count / 2); - backslash_count = 0; - - if (quote_is_real) { - in_quote = !in_quote; - } else { - self.emitCharacter('"'); - } - }, - '\\' => { - backslash_count += 1; - }, - ' ', '\t', '\r', '\n' => { - self.emitBackslashes(backslash_count); - backslash_count = 0; - if (in_quote) { - self.emitCharacter(character); - } else { - self.buffer[self.end] = 0; - const token = self.buffer[self.start..self.end :0]; - self.end += 1; - self.start = self.end; - return token; - } - }, - else => { - self.emitBackslashes(backslash_count); - backslash_count = 0; - self.emitCharacter(character); - }, - } - } - } - - fn emitBackslashes(self: *Self, emit_count: usize) void { - var i: usize = 0; - while (i < emit_count) : (i += 1) { - self.emitCharacter('\\'); - } - } - - fn emitCharacter(self: *Self, char: u8) void { - self.buffer[self.end] = char; - self.end += 1; - } - - /// Call to free the internal buffer of the iterator. - pub fn deinit(self: *Self) void { - self.allocator.free(self.buffer); - - if (self.free_cmd_line_on_deinit) { - self.allocator.free(self.cmd_line); - } - } - }; -} - -/// Cross-platform command line argument iterator. -pub const ArgIterator = struct { - const InnerType = switch (native_os) { - .windows => ArgIteratorWindows, - .wasi => if (builtin.link_libc) ArgIteratorPosix else ArgIteratorWasi, - else => ArgIteratorPosix, - }; - - inner: InnerType, - - /// Initialize the args iterator. Consider using initWithAllocator() instead - /// for cross-platform compatibility. - pub fn init() ArgIterator { - if (native_os == .wasi) { - @compileError("In WASI, use initWithAllocator instead."); - } - if (native_os == .windows) { - @compileError("In Windows, use initWithAllocator instead."); - } - - return ArgIterator{ .inner = InnerType.init() }; - } - - pub const InitError = InnerType.InitError; - - /// You must deinitialize iterator's internal buffers by calling `deinit` when done. - pub fn initWithAllocator(allocator: Allocator) InitError!ArgIterator { - if (native_os == .wasi and !builtin.link_libc) { - return ArgIterator{ .inner = try InnerType.init(allocator) }; - } - if (native_os == .windows) { - const cmd_line = std.os.windows.peb().ProcessParameters.CommandLine; - const cmd_line_w = cmd_line.Buffer.?[0 .. cmd_line.Length / 2]; - return ArgIterator{ .inner = try InnerType.init(allocator, cmd_line_w) }; - } - - return ArgIterator{ .inner = InnerType.init() }; - } - - /// Get the next argument. Returns 'null' if we are at the end. - /// Returned slice is pointing to the iterator's internal buffer. - /// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/). - /// On other platforms, the result is an opaque sequence of bytes with no particular encoding. - pub fn next(self: *ArgIterator) ?([:0]const u8) { - return self.inner.next(); - } - - /// Parse past 1 argument without capturing it. - /// Returns `true` if skipped an arg, `false` if we are at the end. - pub fn skip(self: *ArgIterator) bool { - return self.inner.skip(); - } - - /// Call this to free the iterator's internal buffer if the iterator - /// was created with `initWithAllocator` function. - pub fn deinit(self: *ArgIterator) void { - // Unless we're targeting WASI or Windows, this is a no-op. - if (native_os == .wasi and !builtin.link_libc) { - self.inner.deinit(); - } - - if (native_os == .windows) { - self.inner.deinit(); - } - } -}; - -/// Holds the command-line arguments, with the program name as the first entry. -/// Use argsWithAllocator() for cross-platform code. -pub fn args() ArgIterator { - return ArgIterator.init(); -} - -/// You must deinitialize iterator's internal buffers by calling `deinit` when done. -pub fn argsWithAllocator(allocator: Allocator) ArgIterator.InitError!ArgIterator { - return ArgIterator.initWithAllocator(allocator); -} - -/// Caller must call argsFree on result. -/// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/). -/// On other platforms, the result is an opaque sequence of bytes with no particular encoding. -pub fn argsAlloc(allocator: Allocator) ![][:0]u8 { - // TODO refactor to only make 1 allocation. - var it = try argsWithAllocator(allocator); - defer it.deinit(); - - var contents = std.array_list.Managed(u8).init(allocator); - defer contents.deinit(); - - var slice_list = std.array_list.Managed(usize).init(allocator); - defer slice_list.deinit(); - - while (it.next()) |arg| { - try contents.appendSlice(arg[0 .. arg.len + 1]); - try slice_list.append(arg.len); - } - - const contents_slice = contents.items; - const slice_sizes = slice_list.items; - const slice_list_bytes = try math.mul(usize, @sizeOf([]u8), slice_sizes.len); - const total_bytes = try math.add(usize, slice_list_bytes, contents_slice.len); - const buf = try allocator.alignedAlloc(u8, .of([]u8), total_bytes); - errdefer allocator.free(buf); - - const result_slice_list = mem.bytesAsSlice([:0]u8, buf[0..slice_list_bytes]); - const result_contents = buf[slice_list_bytes..]; - @memcpy(result_contents[0..contents_slice.len], contents_slice); - - var contents_index: usize = 0; - for (slice_sizes, 0..) |len, i| { - const new_index = contents_index + len; - result_slice_list[i] = result_contents[contents_index..new_index :0]; - contents_index = new_index + 1; - } - - return result_slice_list; -} - -pub fn argsFree(allocator: Allocator, args_alloc: []const [:0]u8) void { - var total_bytes: usize = 0; - for (args_alloc) |arg| { - total_bytes += @sizeOf([]u8) + arg.len + 1; - } - const unaligned_allocated_buf = @as([*]const u8, @ptrCast(args_alloc.ptr))[0..total_bytes]; - const aligned_allocated_buf: []align(@alignOf([]u8)) const u8 = @alignCast(unaligned_allocated_buf); - return allocator.free(aligned_allocated_buf); -} - -test ArgIteratorWindows { - const t = testArgIteratorWindows; - - try t( - \\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 --eval="new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")" - , &.{ - \\C:\Program Files\zig\zig.exe - , - \\run - , - \\.\src\main.zig - , - \\-target - , - \\x86_64-windows-gnu - , - \\-O - , - \\ReleaseSafe - , - \\-- - , - \\--emoji=🗿 - , - \\--eval=new Regex("Dwayne \"The Rock\" Johnson") - , - }); - - // Empty - try t("", &.{}); - - // Separators - try t("aa bb cc", &.{ "aa", "bb", "cc" }); - try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" }); - try t("aa\nbb\ncc", &.{"aa\nbb\ncc"}); - try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"}); - try t("aa\rbb\rcc", &.{"aa\rbb\rcc"}); - try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"}); - try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"}); - try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"}); - - // Leading/trailing whitespace - try t(" ", &.{""}); - try t(" aa bb ", &.{ "", "aa", "bb" }); - try t("\t\t", &.{""}); - try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" }); - try t("\n\n", &.{"\n\n"}); - try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"}); - - // Executable name with quotes/backslashes - try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"}); - try t("\"", &.{""}); - try t("\"\"", &.{""}); - try t("\"\"\"", &.{""}); - try t("\"\"\"\"", &.{""}); - try t("\"\"\"\"\"", &.{""}); - try t("aa\"bb\"cc\"dd", &.{"aabbccdd"}); - try t("aa\"bb cc\"dd", &.{"aabb ccdd"}); - try t("\"aa\\\"bb\"", &.{"aa\\bb"}); - try t("\"aa\\\\\"", &.{"aa\\\\"}); - try t("aa\\\"bb", &.{"aa\\bb"}); - try t("aa\\\\\"bb", &.{"aa\\\\bb"}); - - // Arguments with quotes/backslashes - try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" }); - try t(". aa\" \"bb\"\t\"cc\"\n\"dd\"", &.{ ".", "aa bb\tcc\ndd" }); - try t(". ", &.{"."}); - try t(". \"", &.{ ".", "" }); - try t(". \"\"", &.{ ".", "" }); - try t(". \"\"\"", &.{ ".", "\"" }); - try t(". \"\"\"\"", &.{ ".", "\"" }); - try t(". \"\"\"\"\"", &.{ ".", "\"\"" }); - try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" }); - try t(". \" \"", &.{ ".", " " }); - try t(". \" \"\"", &.{ ".", " \"" }); - try t(". \" \"\"\"", &.{ ".", " \"" }); - try t(". \" \"\"\"\"", &.{ ".", " \"\"" }); - try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" }); - try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" }); - try t(". \\\"", &.{ ".", "\"" }); - try t(". \\\"\"", &.{ ".", "\"" }); - try t(". \\\"\"\"", &.{ ".", "\"" }); - try t(". \\\"\"\"\"", &.{ ".", "\"\"" }); - try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" }); - try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" }); - try t(". \" \\\"", &.{ ".", " \"" }); - try t(". \" \\\"\"", &.{ ".", " \"" }); - try t(". \" \\\"\"\"", &.{ ".", " \"\"" }); - try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" }); - try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" }); - try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" }); - try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" }); - try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" }); - try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" }); - - // From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines - try t( - \\foo.exe "abc" d e - , &.{ "foo.exe", "abc", "d", "e" }); - try t( - \\foo.exe a\\b d"e f"g h - , &.{ "foo.exe", "a\\\\b", "de fg", "h" }); - try t( - \\foo.exe a\\\"b c d - , &.{ "foo.exe", "a\\\"b", "c", "d" }); - try t( - \\foo.exe a\\\\"b c" d e - , &.{ "foo.exe", "a\\\\b c", "d", "e" }); - try t( - \\foo.exe a"b"" c d - , &.{ "foo.exe", "ab\" c d" }); - - // From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX - try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" }); - try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" }); - try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" }); - try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" }); - try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" }); - try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" }); - try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" }); - try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" }); - try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" }); - - // Surrogate pair encoding of 𐐷 separated by quotes. - // Encoded as WTF-16: - // "<0xD801>"<0xDC37> - // Encoded as WTF-8: - // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7> - // During parsing, the quotes drop out and the surrogate pair - // should end up encoded as its normal UTF-8 representation. - try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" }); -} - -fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void { - const cmd_line_w = try unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line); - defer testing.allocator.free(cmd_line_w); - - // next - { - var it = try ArgIteratorWindows.init(testing.allocator, cmd_line_w); - defer it.deinit(); - - for (expected_args) |expected| { - if (it.next()) |actual| { - try testing.expectEqualStrings(expected, actual); - } else { - return error.TestUnexpectedResult; - } - } - try testing.expect(it.next() == null); - } - - // skip - { - var it = try ArgIteratorWindows.init(testing.allocator, cmd_line_w); - defer it.deinit(); - - for (0..expected_args.len) |_| { - try testing.expect(it.skip()); - } - try testing.expect(!it.skip()); - } -} - -test "general arg parsing" { - try testGeneralCmdLine("a b\tc d", &.{ "a", "b", "c", "d" }); - try testGeneralCmdLine("\"abc\" d e", &.{ "abc", "d", "e" }); - try testGeneralCmdLine("a\\\\\\b d\"e f\"g h", &.{ "a\\\\\\b", "de fg", "h" }); - try testGeneralCmdLine("a\\\\\\\"b c d", &.{ "a\\\"b", "c", "d" }); - try testGeneralCmdLine("a\\\\\\\\\"b c\" d e", &.{ "a\\\\b c", "d", "e" }); - try testGeneralCmdLine("a b\tc \"d f", &.{ "a", "b", "c", "d f" }); - try testGeneralCmdLine("j k l\\", &.{ "j", "k", "l\\" }); - try testGeneralCmdLine("\"\" x y z\\\\", &.{ "", "x", "y", "z\\\\" }); - - try testGeneralCmdLine("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\"", &.{ - ".\\..\\zig-cache\\build", - "bin\\zig.exe", - ".\\..", - ".\\..\\zig-cache", - "--help", - }); - - try testGeneralCmdLine( - \\ 'foo' "bar" - , &.{ "'foo'", "bar" }); -} - -fn testGeneralCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void { - var it = try ArgIteratorGeneral(.{}).init(std.testing.allocator, input_cmd_line); - defer it.deinit(); - for (expected_args) |expected_arg| { - const arg = it.next().?; - try testing.expectEqualStrings(expected_arg, arg); - } - try testing.expect(it.next() == null); -} - -test "response file arg parsing" { - try testResponseFileCmdLine( - \\a b - \\c d\ - , &.{ "a", "b", "c", "d\\" }); - try testResponseFileCmdLine("a b c d\\", &.{ "a", "b", "c", "d\\" }); - - try testResponseFileCmdLine( - \\j - \\ k l # this is a comment \\ \\\ \\\\ "none" "\\" "\\\" - \\ "m" #another comment - \\ - , &.{ "j", "k", "l", "m" }); - - try testResponseFileCmdLine( - \\ "" q "" - \\ "r s # t" "u\" v" #another comment - \\ - , &.{ "", "q", "", "r s # t", "u\" v" }); - - try testResponseFileCmdLine( - \\ -l"advapi32" a# b#c d# - \\e\\\ - , &.{ "-ladvapi32", "a#", "b#c", "d#", "e\\\\\\" }); - - try testResponseFileCmdLine( - \\ 'foo' "bar" - , &.{ "foo", "bar" }); -} - -fn testResponseFileCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void { - var it = try ArgIteratorGeneral(.{ .comments = true, .single_quotes = true }) - .init(std.testing.allocator, input_cmd_line); - defer it.deinit(); - for (expected_args) |expected_arg| { - const arg = it.next().?; - try testing.expectEqualStrings(expected_arg, arg); - } - try testing.expect(it.next() == null); -} - pub const UserInfo = struct { uid: posix.uid_t, gid: posix.gid_t, @@ -1706,73 +259,143 @@ pub fn getBaseAddress() usize { } } -/// Tells whether calling the `execv` or `execve` functions will be a compile error. -pub const can_execv = switch (native_os) { +/// Deprecated in favor of `Child.can_spawn`. +pub const can_spawn = Child.can_spawn; +/// Deprecated in favor of `can_replace`. +pub const can_execv = can_replace; + +/// Tells whether the target operating system supports replacing the current +/// process image. If this is `false` then calling `execv` or `replace` +/// functions will cause compilation to fail. +pub const can_replace = switch (native_os) { .windows, .haiku, .wasi => false, else => true, }; -/// Tells whether spawning child processes is supported (e.g. via Child) -pub const can_spawn = switch (native_os) { - .wasi, .ios, .tvos, .visionos, .watchos => false, - else => true, -}; +pub const ReplaceError = std.posix.ExecveError || error{OutOfMemory}; -pub const ExecvError = std.posix.ExecveError || error{OutOfMemory}; +/// Replaces the current process image with the executed process. If this +/// function succeeds, it does not return. +/// +/// `argv[0]` is the name of the process to replace the current one with. If it +/// is not already a file path (i.e. it contains '/'), it is resolved into a +/// file path based on PATH from the parent environment. +/// +/// This operation is not available on targets for which `can_replace` is +/// `false`. +/// +/// This function must allocate memory to add a null terminating bytes on path +/// and each arg. +/// +/// Due to the heap allocation, it is illegal to call this function in a fork() +/// child. +pub fn replace(io: Io, gpa: Allocator, argv: []const []const u8, env: Environ.Block) ReplaceError { + if (!can_replace) @compileError("unsupported operation: replace"); -/// Replaces the current process image with the executed process. -/// This function must allocate memory to add a null terminating bytes on path and each arg. -/// It must also convert to KEY=VALUE\0 format for environment variables, and include null -/// pointers after the args and after the environment variables. -/// `argv[0]` is the executable path. -/// This function also uses the PATH environment variable to get the full path to the executable. -/// Due to the heap-allocation, it is illegal to call this function in a fork() child. -/// For that use case, use the `std.posix` functions directly. -pub fn execv(allocator: Allocator, argv: []const []const u8) ExecvError { - return execve(allocator, argv, null); -} - -/// Replaces the current process image with the executed process. -/// This function must allocate memory to add a null terminating bytes on path and each arg. -/// It must also convert to KEY=VALUE\0 format for environment variables, and include null -/// pointers after the args and after the environment variables. -/// `argv[0]` is the executable path. -/// This function also uses the PATH environment variable to get the full path to the executable. -/// Due to the heap-allocation, it is illegal to call this function in a fork() child. -/// For that use case, use the `std.posix` functions directly. -pub fn execve( - allocator: Allocator, - argv: []const []const u8, - env_map: ?*const EnvMap, -) ExecvError { - if (!can_execv) @compileError("The target OS does not support execv"); - - var arena_allocator = std.heap.ArenaAllocator.init(allocator); + var arena_allocator = std.heap.ArenaAllocator.init(gpa); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); const argv_buf = try arena.allocSentinel(?[*:0]const u8, argv.len, null); for (argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; - const envp = m: { - if (env_map) |m| { - const envp_buf = try createNullDelimitedEnvMap(arena, m); - break :m envp_buf.ptr; - } else if (builtin.link_libc) { - break :m std.c.environ; - } else if (builtin.output_mode == .Exe) { - // Then we have Zig start code and this works. - // TODO type-safety for null-termination of `os.environ`. - break :m @as([*:null]const ?[*:0]const u8, @ptrCast(std.os.environ.ptr)); - } else { - // TODO come up with a solution for this. - @compileError("missing std lib enhancement: std.process.execv implementation has no way to collect the environment variables to forward to the child process"); - } + return posix.execvpeZ_expandArg0(.no_expand, argv_buf.ptr[0].?, argv_buf.ptr, env); +} + +/// Replaces the current process image with the executed process. If this +/// function succeeds, it does not return. +/// +/// `argv[0]` is the file path of the process to replace the current one with, +/// relative to `dir`. It is *always* treated as a file path, even if it does +/// not contain '/'. +/// +/// This operation is not available on targets for which `can_replace` is +/// `false`. +/// +/// This function must allocate memory to add a null terminating bytes on path +/// and each arg. +/// +/// Due to the heap allocation, it is illegal to call this +/// function in a fork() child. For that use case, use the `std.posix` +/// functions directly. +pub fn replaceFile(io: Io, gpa: Allocator, argv: []const []const u8, env: Environ.Block) ReplaceError { + if (!can_replace) @compileError("unsupported operation: replaceFile"); + + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + const argv_buf = try arena.allocSentinel(?[*:0]const u8, argv.len, null); + for (argv, 0..) |arg, i| argv_buf[i] = (try arena.dupeZ(u8, arg)).ptr; + + return posix.execvpeZ_expandArg0(.no_expand, argv_buf.ptr[0].?, argv_buf.ptr, env); +} + +pub const Arg0Expand = enum { expand, no_expand }; + +/// Replaces the current process image with the executed process. If this +/// function succeeds, it does not return. +/// +/// This operation is not available on all targets. `can_execv` +/// +/// This function also uses the PATH environment variable to get the full path to the executable. +/// If `file` is an absolute path, this is the same as `execveZ`. +/// +/// Like `execvpeZ` except if `arg0_expand` is `.expand`, then `argv` is mutable, +/// and `argv[0]` is expanded to be the same absolute path that is passed to the execve syscall. +/// If this function returns with an error, `argv[0]` will be restored to the value it was when it was passed in. +pub fn replace( + comptime arg0_expand: Arg0Expand, + file: [*:0]const u8, + child_argv: switch (arg0_expand) { + .expand => [*:null]?[*:0]const u8, + .no_expand => [*:null]const ?[*:0]const u8, + }, + envp: [*:null]const ?[*:0]const u8, + optional_PATH: ?[]const u8, +) ExecveError { + const file_slice = mem.sliceTo(file, 0); + if (mem.findScalar(u8, file_slice, '/') != null) return execveZ(file, child_argv, envp); + + const PATH = optional_PATH orelse "/usr/local/bin:/bin/:/usr/bin"; + // Use of PATH_MAX here is valid as the path_buf will be passed + // directly to the operating system in execveZ. + var path_buf: [PATH_MAX]u8 = undefined; + var it = mem.tokenizeScalar(u8, PATH, ':'); + var seen_eacces = false; + var err: ExecveError = error.FileNotFound; + + // In case of expanding arg0 we must put it back if we return with an error. + const prev_arg0 = child_argv[0]; + defer switch (arg0_expand) { + .expand => child_argv[0] = prev_arg0, + .no_expand => {}, }; - return posix.execvpeZ_expandArg0(.no_expand, argv_buf.ptr[0].?, argv_buf.ptr, envp); + while (it.next()) |search_path| { + const path_len = search_path.len + file_slice.len + 1; + if (path_buf.len < path_len + 1) return error.NameTooLong; + @memcpy(path_buf[0..search_path.len], search_path); + path_buf[search_path.len] = '/'; + @memcpy(path_buf[search_path.len + 1 ..][0..file_slice.len], file_slice); + path_buf[path_len] = 0; + const full_path = path_buf[0..path_len :0].ptr; + switch (arg0_expand) { + .expand => child_argv[0] = full_path, + .no_expand => {}, + } + err = execveZ(full_path, child_argv, envp); + switch (err) { + error.AccessDenied => seen_eacces = true, + error.FileNotFound, error.NotDir => {}, + else => |e| return e, + } + } + if (seen_eacces) return error.AccessDenied; + return err; } + pub const TotalSystemMemoryError = error{ UnknownTotalSystemMemory, }; @@ -1903,215 +526,6 @@ test raiseFileDescriptorLimit { raiseFileDescriptorLimit(); } -pub const CreateEnvironOptions = struct { - /// `null` means to leave the `ZIG_PROGRESS` environment variable unmodified. - /// If non-null, negative means to remove the environment variable, and >= 0 - /// means to provide it with the given integer. - zig_progress_fd: ?i32 = null, -}; - -/// Creates a null-delimited environment variable block in the format -/// expected by POSIX, from a hash map plus options. -pub fn createEnvironFromMap( - arena: Allocator, - map: *const EnvMap, - options: CreateEnvironOptions, -) Allocator.Error![:null]?[*:0]u8 { - const ZigProgressAction = enum { nothing, edit, delete, add }; - const zig_progress_action: ZigProgressAction = a: { - const fd = options.zig_progress_fd orelse break :a .nothing; - const contains = map.get("ZIG_PROGRESS") != null; - if (fd >= 0) { - break :a if (contains) .edit else .add; - } else { - if (contains) break :a .delete; - } - break :a .nothing; - }; - - const envp_count: usize = c: { - var count: usize = map.count(); - switch (zig_progress_action) { - .add => count += 1, - .delete => count -= 1, - .nothing, .edit => {}, - } - break :c count; - }; - - const envp_buf = try arena.allocSentinel(?[*:0]u8, envp_count, null); - var i: usize = 0; - - if (zig_progress_action == .add) { - envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "ZIG_PROGRESS={d}", .{options.zig_progress_fd.?}, 0); - i += 1; - } - - { - var it = map.iterator(); - while (it.next()) |pair| { - if (mem.eql(u8, pair.key_ptr.*, "ZIG_PROGRESS")) switch (zig_progress_action) { - .add => unreachable, - .delete => continue, - .edit => { - envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "{s}={d}", .{ - pair.key_ptr.*, options.zig_progress_fd.?, - }, 0); - i += 1; - continue; - }, - .nothing => {}, - }; - - envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "{s}={s}", .{ pair.key_ptr.*, pair.value_ptr.* }, 0); - i += 1; - } - } - - assert(i == envp_count); - return envp_buf; -} - -/// Creates a null-delimited environment variable block in the format -/// expected by POSIX, from a hash map plus options. -pub fn createEnvironFromExisting( - arena: Allocator, - existing: [*:null]const ?[*:0]const u8, - options: CreateEnvironOptions, -) Allocator.Error![:null]?[*:0]u8 { - const existing_count, const contains_zig_progress = c: { - var count: usize = 0; - var contains = false; - while (existing[count]) |line| : (count += 1) { - contains = contains or mem.eql(u8, mem.sliceTo(line, '='), "ZIG_PROGRESS"); - } - break :c .{ count, contains }; - }; - const ZigProgressAction = enum { nothing, edit, delete, add }; - const zig_progress_action: ZigProgressAction = a: { - const fd = options.zig_progress_fd orelse break :a .nothing; - if (fd >= 0) { - break :a if (contains_zig_progress) .edit else .add; - } else { - if (contains_zig_progress) break :a .delete; - } - break :a .nothing; - }; - - const envp_count: usize = c: { - var count: usize = existing_count; - switch (zig_progress_action) { - .add => count += 1, - .delete => count -= 1, - .nothing, .edit => {}, - } - break :c count; - }; - - const envp_buf = try arena.allocSentinel(?[*:0]u8, envp_count, null); - var i: usize = 0; - var existing_index: usize = 0; - - if (zig_progress_action == .add) { - envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "ZIG_PROGRESS={d}", .{options.zig_progress_fd.?}, 0); - i += 1; - } - - while (existing[existing_index]) |line| : (existing_index += 1) { - if (mem.eql(u8, mem.sliceTo(line, '='), "ZIG_PROGRESS")) switch (zig_progress_action) { - .add => unreachable, - .delete => continue, - .edit => { - envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "ZIG_PROGRESS={d}", .{options.zig_progress_fd.?}, 0); - i += 1; - continue; - }, - .nothing => {}, - }; - envp_buf[i] = try arena.dupeZ(u8, mem.span(line)); - i += 1; - } - - assert(i == envp_count); - return envp_buf; -} - -pub fn createNullDelimitedEnvMap(arena: mem.Allocator, env_map: *const EnvMap) Allocator.Error![:null]?[*:0]u8 { - return createEnvironFromMap(arena, env_map, .{}); -} - -test createNullDelimitedEnvMap { - const allocator = testing.allocator; - var envmap = EnvMap.init(allocator); - defer envmap.deinit(); - - try envmap.put("HOME", "/home/ifreund"); - try envmap.put("WAYLAND_DISPLAY", "wayland-1"); - try envmap.put("DISPLAY", ":1"); - try envmap.put("DEBUGINFOD_URLS", " "); - try envmap.put("XCURSOR_SIZE", "24"); - - var arena = std.heap.ArenaAllocator.init(allocator); - defer arena.deinit(); - const environ = try createNullDelimitedEnvMap(arena.allocator(), &envmap); - - try testing.expectEqual(@as(usize, 5), environ.len); - - inline for (.{ - "HOME=/home/ifreund", - "WAYLAND_DISPLAY=wayland-1", - "DISPLAY=:1", - "DEBUGINFOD_URLS= ", - "XCURSOR_SIZE=24", - }) |target| { - for (environ) |variable| { - if (mem.eql(u8, mem.span(variable orelse continue), target)) break; - } else { - try testing.expect(false); // Environment variable not found - } - } -} - -/// Caller must free result. -pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) ![]u16 { - // count bytes needed - const max_chars_needed = x: { - // Only need 2 trailing NUL code units for an empty environment - var max_chars_needed: usize = if (env_map.count() == 0) 2 else 1; - var it = env_map.iterator(); - while (it.next()) |pair| { - // +1 for '=' - // +1 for null byte - max_chars_needed += pair.key_ptr.len + pair.value_ptr.len + 2; - } - break :x max_chars_needed; - }; - const result = try allocator.alloc(u16, max_chars_needed); - errdefer allocator.free(result); - - var it = env_map.iterator(); - var i: usize = 0; - while (it.next()) |pair| { - i += try unicode.wtf8ToWtf16Le(result[i..], pair.key_ptr.*); - result[i] = '='; - i += 1; - i += try unicode.wtf8ToWtf16Le(result[i..], pair.value_ptr.*); - result[i] = 0; - i += 1; - } - result[i] = 0; - i += 1; - // An empty environment is a special case that requires a redundant - // NUL terminator. CreateProcess will read the second code unit even - // though theoretically the first should be enough to recognize that the - // environment is empty (see https://nullprogram.com/blog/2023/08/23/) - if (env_map.count() == 0) { - result[i] = 0; - i += 1; - } - return try allocator.realloc(result, i); -} - /// Logs an error and then terminates the process with exit code 1. pub fn fatal(comptime format: []const u8, format_arguments: anytype) noreturn { std.log.err(format, format_arguments); diff --git a/lib/std/process/Args.zig b/lib/std/process/Args.zig new file mode 100644 index 0000000000..0eb5d81309 --- /dev/null +++ b/lib/std/process/Args.zig @@ -0,0 +1,958 @@ +const Args = @This(); + +const builtin = @import("builtin"); +const native_os = builtin.os.tag; + +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const testing = std.debug.testing; + +vector: Vector, + +pub const Vector = switch (native_os) { + .windows => []const u16, // WTF-16 encoded + else => []const [*:0]const u8, +}; + +/// Cross-platform access to command line one argument at a time. +pub const Iterator = struct { + const Inner = switch (native_os) { + .windows => Windows, + .wasi => if (builtin.link_libc) Posix else Wasi, + else => Posix, + }; + + inner: Inner, + + /// Initialize the args iterator. Consider using `initAllocator` instead + /// for cross-platform compatibility. + pub fn init(a: Args) Iterator { + if (native_os == .wasi) { + @compileError("In WASI, use initAllocator instead."); + } + if (native_os == .windows) { + @compileError("In Windows, use initAllocator instead."); + } + + return .{ .inner = .init(a) }; + } + + pub const InitError = Inner.InitError; + + /// You must deinitialize iterator's internal buffers by calling `deinit` when done. + pub fn initAllocator(a: Args, gpa: Allocator) InitError!Iterator { + if (native_os == .wasi and !builtin.link_libc) { + return .{ .inner = try .init(a, gpa) }; + } + if (native_os == .windows) { + return .{ .inner = try .init(a, gpa) }; + } + + return .{ .inner = .init(a) }; + } + + /// Return subsequent argument, or `null` if no more remaining. + /// + /// Returned slice is pointing to the iterator's internal buffer. + /// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/). + /// On other platforms, the result is an opaque sequence of bytes with no particular encoding. + pub fn next(it: *Iterator) ?([:0]const u8) { + return it.inner.next(); + } + + /// Parse past 1 argument without capturing it. + /// Returns `true` if skipped an arg, `false` if we are at the end. + pub fn skip(it: *Iterator) bool { + return it.inner.skip(); + } + + /// Required to release resources if the iterator was initialized with + /// `initAllocator` function. + pub fn deinit(it: *Iterator) void { + // Unless we're targeting WASI or Windows, this is a no-op. + if (native_os == .wasi and !builtin.link_libc) it.inner.deinit(); + if (native_os == .windows) it.inner.deinit(); + } + + /// Iterator that implements the Windows command-line parsing algorithm. + /// + /// The implementation is intended to be compatible with the post-2008 C runtime, + /// but is *not* intended to be compatible with `CommandLineToArgvW` since + /// `CommandLineToArgvW` uses the pre-2008 parsing rules. + /// + /// This iterator faithfully implements the parsing behavior observed from the C runtime with + /// one exception: if the command-line string is empty, the iterator will immediately complete + /// without returning any arguments (whereas the C runtime will return a single argument + /// representing the name of the current executable). + /// + /// The essential parts of the algorithm are described in Microsoft's documentation: + /// + /// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments + /// + /// David Deley explains some additional undocumented quirks in great detail: + /// + /// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES + pub const Windows = struct { + allocator: Allocator, + /// Encoded as WTF-16 LE. + cmd_line: []const u16, + index: usize = 0, + /// Owned by the iterator. Long enough to hold contiguous NUL-terminated slices + /// of each argument encoded as WTF-8. + buffer: []u8, + start: usize = 0, + end: usize = 0, + + pub const InitError = error{OutOfMemory}; + + /// `cmd_line_w` *must* be a WTF16-LE-encoded string. + /// + /// The iterator stores and uses `cmd_line_w`, so its memory must be valid for + /// at least as long as the returned Windows. + pub fn init(allocator: Allocator, cmd_line_w: []const u16) Windows.InitError!Windows { + const wtf8_len = std.unicode.calcWtf8Len(cmd_line_w); + + // This buffer must be large enough to contain contiguous NUL-terminated slices + // of each argument. + // - During parsing, the length of a parsed argument will always be equal to + // to less than its unparsed length + // - The first argument needs one extra byte of space allocated for its NUL + // terminator, but for each subsequent argument the necessary whitespace + // between arguments guarantees room for their NUL terminator(s). + const buffer = try allocator.alloc(u8, wtf8_len + 1); + errdefer allocator.free(buffer); + + return .{ + .allocator = allocator, + .cmd_line = cmd_line_w, + .buffer = buffer, + }; + } + + /// Returns the next argument and advances the iterator. Returns `null` if at the end of the + /// command-line string. The iterator owns the returned slice. + /// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/). + pub fn next(self: *Windows) ?[:0]const u8 { + return self.nextWithStrategy(next_strategy); + } + + /// Skips the next argument and advances the iterator. Returns `true` if an argument was + /// skipped, `false` if at the end of the command-line string. + pub fn skip(self: *Windows) bool { + return self.nextWithStrategy(skip_strategy); + } + + const next_strategy = struct { + const T = ?[:0]const u8; + + const eof = null; + + /// Returns '\' if any backslashes are emitted, otherwise returns `last_emitted_code_unit`. + fn emitBackslashes(self: *Windows, count: usize, last_emitted_code_unit: ?u16) ?u16 { + for (0..count) |_| { + self.buffer[self.end] = '\\'; + self.end += 1; + } + return if (count != 0) '\\' else last_emitted_code_unit; + } + + /// If `last_emitted_code_unit` and `code_unit` form a surrogate pair, then + /// the previously emitted high surrogate is overwritten by the codepoint encoded + /// by the surrogate pair, and `null` is returned. + /// Otherwise, `code_unit` is emitted and returned. + fn emitCharacter(self: *Windows, code_unit: u16, last_emitted_code_unit: ?u16) ?u16 { + // Because we are emitting WTF-8, we need to + // check to see if we've emitted two consecutive surrogate + // codepoints that form a valid surrogate pair in order + // to ensure that we're always emitting well-formed WTF-8 + // (https://wtf-8.codeberg.page/#concatenating). + // + // If we do have a valid surrogate pair, we need to emit + // the UTF-8 sequence for the codepoint that they encode + // instead of the WTF-8 encoding for the two surrogate pairs + // separately. + // + // This is relevant when dealing with a WTF-16 encoded + // command line like this: + // "<0xD801>"<0xDC37> + // which would get parsed and converted to WTF-8 as: + // <0xED><0xA0><0x81><0xED><0xB0><0xB7> + // but instead, we need to recognize the surrogate pair + // and emit the codepoint it encodes, which in this + // example is U+10437 (𐐷), which is encoded in UTF-8 as: + // <0xF0><0x90><0x90><0xB7> + if (last_emitted_code_unit != null and + std.unicode.utf16IsLowSurrogate(code_unit) and + std.unicode.utf16IsHighSurrogate(last_emitted_code_unit.?)) + { + const codepoint = std.unicode.utf16DecodeSurrogatePair(&.{ last_emitted_code_unit.?, code_unit }) catch unreachable; + + // Unpaired surrogate is 3 bytes long + const dest = self.buffer[self.end - 3 ..]; + const len = std.unicode.utf8Encode(codepoint, dest) catch unreachable; + // All codepoints that require a surrogate pair (> U+FFFF) are encoded as 4 bytes + assert(len == 4); + self.end += 1; + return null; + } + + const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable; + self.end += wtf8_len; + return code_unit; + } + + fn yieldArg(self: *Windows) [:0]const u8 { + self.buffer[self.end] = 0; + const arg = self.buffer[self.start..self.end :0]; + self.end += 1; + self.start = self.end; + return arg; + } + }; + + const skip_strategy = struct { + const T = bool; + + const eof = false; + + fn emitBackslashes(_: *Windows, _: usize, last_emitted_code_unit: ?u16) ?u16 { + return last_emitted_code_unit; + } + + fn emitCharacter(_: *Windows, _: u16, last_emitted_code_unit: ?u16) ?u16 { + return last_emitted_code_unit; + } + + fn yieldArg(_: *Windows) bool { + return true; + } + }; + + fn nextWithStrategy(self: *Windows, comptime strategy: type) strategy.T { + var last_emitted_code_unit: ?u16 = null; + // The first argument (the executable name) uses different parsing rules. + if (self.index == 0) { + if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) { + // Immediately complete the iterator. + // The C runtime would return the name of the current executable here. + return strategy.eof; + } + + var inside_quotes = false; + while (true) : (self.index += 1) { + const char = if (self.index != self.cmd_line.len) + std.mem.littleToNative(u16, self.cmd_line[self.index]) + else + 0; + switch (char) { + 0 => { + return strategy.yieldArg(self); + }, + '"' => { + inside_quotes = !inside_quotes; + }, + ' ', '\t' => { + if (inside_quotes) { + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); + } else { + self.index += 1; + return strategy.yieldArg(self); + } + }, + else => { + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); + }, + } + } + } + + // Skip spaces and tabs. The iterator completes if we reach the end of the string here. + while (true) : (self.index += 1) { + const char = if (self.index != self.cmd_line.len) + std.mem.littleToNative(u16, self.cmd_line[self.index]) + else + 0; + switch (char) { + 0 => return strategy.eof, + ' ', '\t' => continue, + else => break, + } + } + + // Parsing rules for subsequent arguments: + // + // - The end of the string always terminates the current argument. + // - When not in 'inside_quotes' mode, a space or tab terminates the current argument. + // - 2n backslashes followed by a quote emit n backslashes (note: n can be zero). + // If in 'inside_quotes' and the quote is immediately followed by a second quote, + // one quote is emitted and the other is skipped, otherwise, the quote is skipped + // and 'inside_quotes' is toggled. + // - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote. + // - n backslashes not followed by a quote emit n backslashes. + var backslash_count: usize = 0; + var inside_quotes = false; + while (true) : (self.index += 1) { + const char = if (self.index != self.cmd_line.len) + std.mem.littleToNative(u16, self.cmd_line[self.index]) + else + 0; + switch (char) { + 0 => { + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); + return strategy.yieldArg(self); + }, + ' ', '\t' => { + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); + backslash_count = 0; + if (inside_quotes) { + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); + } else return strategy.yieldArg(self); + }, + '"' => { + const char_is_escaped_quote = backslash_count % 2 != 0; + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count / 2, last_emitted_code_unit); + backslash_count = 0; + if (char_is_escaped_quote) { + last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); + } else { + if (inside_quotes and + self.index + 1 != self.cmd_line.len and + std.mem.littleToNative(u16, self.cmd_line[self.index + 1]) == '"') + { + last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); + self.index += 1; + } else { + inside_quotes = !inside_quotes; + } + } + }, + '\\' => { + backslash_count += 1; + }, + else => { + last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); + backslash_count = 0; + last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); + }, + } + } + } + + /// Frees the iterator's copy of the command-line string and all previously returned + /// argument slices. + pub fn deinit(self: *Windows) void { + self.allocator.free(self.buffer); + } + }; + + pub const Posix = struct { + remaining: Vector, + + pub const InitError = error{}; + + pub fn init(a: Args) Posix { + return .{ .remaining = a.vector }; + } + + pub fn next(it: *Posix) ?[:0]const u8 { + if (it.remaining.len == 0) return null; + const arg = it.remaining[0]; + it.remaining = it.remaining[1..]; + return std.mem.sliceTo(arg, 0); + } + + pub fn skip(it: *Posix) bool { + if (it.remaining.len == 0) return false; + it.remaining = it.remaining[1..]; + return true; + } + }; + + pub const Wasi = struct { + allocator: Allocator, + index: usize, + args: [][:0]u8, + + pub const InitError = error{OutOfMemory} || std.posix.UnexpectedError; + + /// You must call deinit to free the internal buffer of the + /// iterator after you are done. + pub fn init(allocator: Allocator) Wasi.InitError!Wasi { + const fetched_args = try Wasi.internalInit(allocator); + return Wasi{ + .allocator = allocator, + .index = 0, + .args = fetched_args, + }; + } + + fn internalInit(allocator: Allocator) Wasi.InitError![][:0]u8 { + var count: usize = undefined; + var buf_size: usize = undefined; + + switch (std.os.wasi.args_sizes_get(&count, &buf_size)) { + .SUCCESS => {}, + else => |err| return std.posix.unexpectedErrno(err), + } + + if (count == 0) { + return &[_][:0]u8{}; + } + + const argv = try allocator.alloc([*:0]u8, count); + defer allocator.free(argv); + + const argv_buf = try allocator.alloc(u8, buf_size); + + switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) { + .SUCCESS => {}, + else => |err| return std.posix.unexpectedErrno(err), + } + + var result_args = try allocator.alloc([:0]u8, count); + var i: usize = 0; + while (i < count) : (i += 1) { + result_args[i] = std.mem.sliceTo(argv[i], 0); + } + + return result_args; + } + + pub fn next(self: *Wasi) ?[:0]const u8 { + if (self.index == self.args.len) return null; + + const arg = self.args[self.index]; + self.index += 1; + return arg; + } + + pub fn skip(self: *Wasi) bool { + if (self.index == self.args.len) return false; + + self.index += 1; + return true; + } + + /// Call to free the internal buffer of the iterator. + pub fn deinit(self: *Wasi) void { + // Nothing is allocated when there are no args + if (self.args.len == 0) return; + + const last_item = self.args[self.args.len - 1]; + const last_byte_addr = @intFromPtr(last_item.ptr) + last_item.len + 1; // null terminated + const first_item_ptr = self.args[0].ptr; + const len = last_byte_addr - @intFromPtr(first_item_ptr); + self.allocator.free(first_item_ptr[0..len]); + self.allocator.free(self.args); + } + }; +}; + +/// Holds the command-line arguments, with the program name as the first entry. +/// Use `iterateAllocator` for cross-platform code. +pub fn iterate(a: Args) Iterator { + return .init(a); +} + +/// You must deinitialize iterator's internal buffers by calling `deinit` when +/// done. +pub fn iterateAllocator(a: Args, gpa: Allocator) Iterator.InitError!Iterator { + return .initAllocator(a, gpa); +} + +/// Returned value may reference several allocations; call `freeSlice` to +/// release. +/// +/// * On Windows, the result is encoded as +/// [WTF-8](https://wtf-8.codeberg.page/). +/// * On other platforms, the result is an opaque sequence of bytes with no +/// particular encoding. +pub fn toSlice(a: Args, gpa: Allocator) Allocator.Error![][:0]u8 { + var it = try a.iterateAllocator(gpa); + defer it.deinit(); + + var contents = std.array_list.Managed(u8).init(gpa); + defer contents.deinit(); + + var slice_list = std.array_list.Managed(usize).init(gpa); + defer slice_list.deinit(); + + while (it.next()) |arg| { + try contents.appendSlice(arg[0 .. arg.len + 1]); + try slice_list.append(arg.len); + } + + const contents_slice = contents.items; + const slice_sizes = slice_list.items; + const slice_list_bytes = std.math.mul(usize, @sizeOf([]u8), slice_sizes.len) catch return error.OutOfMemory; + const total_bytes = std.math.add(usize, slice_list_bytes, contents_slice.len) catch return error.OutOfMemory; + const buf = try gpa.alignedAlloc(u8, .of([]u8), total_bytes); + errdefer gpa.free(buf); + + const result_slice_list = std.mem.bytesAsSlice([:0]u8, buf[0..slice_list_bytes]); + const result_contents = buf[slice_list_bytes..]; + @memcpy(result_contents[0..contents_slice.len], contents_slice); + + var contents_index: usize = 0; + for (slice_sizes, 0..) |len, i| { + const new_index = contents_index + len; + result_slice_list[i] = result_contents[contents_index..new_index :0]; + contents_index = new_index + 1; + } + + return result_slice_list; +} + +/// Frees memory allocate by `toSlice`. +pub fn freeSlice(gpa: Allocator, to_slice_result: []const [:0]u8) void { + var total_bytes: usize = 0; + for (to_slice_result) |arg| { + total_bytes += @sizeOf([]u8) + arg.len + 1; + } + const unaligned_allocated_buf = @as([*]const u8, @ptrCast(to_slice_result.ptr))[0..total_bytes]; + const aligned_allocated_buf: []align(@alignOf([]u8)) const u8 = @alignCast(unaligned_allocated_buf); + return gpa.free(aligned_allocated_buf); +} + +test "Iterator.Windows" { + const t = testArgIteratorWindows; + + try t( + \\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 --eval="new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")" + , &.{ + \\C:\Program Files\zig\zig.exe + , + \\run + , + \\.\src\main.zig + , + \\-target + , + \\x86_64-windows-gnu + , + \\-O + , + \\ReleaseSafe + , + \\-- + , + \\--emoji=🗿 + , + \\--eval=new Regex("Dwayne \"The Rock\" Johnson") + , + }); + + // Empty + try t("", &.{}); + + // Separators + try t("aa bb cc", &.{ "aa", "bb", "cc" }); + try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" }); + try t("aa\nbb\ncc", &.{"aa\nbb\ncc"}); + try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"}); + try t("aa\rbb\rcc", &.{"aa\rbb\rcc"}); + try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"}); + try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"}); + try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"}); + + // Leading/trailing whitespace + try t(" ", &.{""}); + try t(" aa bb ", &.{ "", "aa", "bb" }); + try t("\t\t", &.{""}); + try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" }); + try t("\n\n", &.{"\n\n"}); + try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"}); + + // Executable name with quotes/backslashes + try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"}); + try t("\"", &.{""}); + try t("\"\"", &.{""}); + try t("\"\"\"", &.{""}); + try t("\"\"\"\"", &.{""}); + try t("\"\"\"\"\"", &.{""}); + try t("aa\"bb\"cc\"dd", &.{"aabbccdd"}); + try t("aa\"bb cc\"dd", &.{"aabb ccdd"}); + try t("\"aa\\\"bb\"", &.{"aa\\bb"}); + try t("\"aa\\\\\"", &.{"aa\\\\"}); + try t("aa\\\"bb", &.{"aa\\bb"}); + try t("aa\\\\\"bb", &.{"aa\\\\bb"}); + + // Arguments with quotes/backslashes + try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" }); + try t(". aa\" \"bb\"\t\"cc\"\n\"dd\"", &.{ ".", "aa bb\tcc\ndd" }); + try t(". ", &.{"."}); + try t(". \"", &.{ ".", "" }); + try t(". \"\"", &.{ ".", "" }); + try t(". \"\"\"", &.{ ".", "\"" }); + try t(". \"\"\"\"", &.{ ".", "\"" }); + try t(". \"\"\"\"\"", &.{ ".", "\"\"" }); + try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" }); + try t(". \" \"", &.{ ".", " " }); + try t(". \" \"\"", &.{ ".", " \"" }); + try t(". \" \"\"\"", &.{ ".", " \"" }); + try t(". \" \"\"\"\"", &.{ ".", " \"\"" }); + try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" }); + try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" }); + try t(". \\\"", &.{ ".", "\"" }); + try t(". \\\"\"", &.{ ".", "\"" }); + try t(". \\\"\"\"", &.{ ".", "\"" }); + try t(". \\\"\"\"\"", &.{ ".", "\"\"" }); + try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" }); + try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" }); + try t(". \" \\\"", &.{ ".", " \"" }); + try t(". \" \\\"\"", &.{ ".", " \"" }); + try t(". \" \\\"\"\"", &.{ ".", " \"\"" }); + try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" }); + try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" }); + try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" }); + try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" }); + try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" }); + try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" }); + + // From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines + try t( + \\foo.exe "abc" d e + , &.{ "foo.exe", "abc", "d", "e" }); + try t( + \\foo.exe a\\b d"e f"g h + , &.{ "foo.exe", "a\\\\b", "de fg", "h" }); + try t( + \\foo.exe a\\\"b c d + , &.{ "foo.exe", "a\\\"b", "c", "d" }); + try t( + \\foo.exe a\\\\"b c" d e + , &.{ "foo.exe", "a\\\\b c", "d", "e" }); + try t( + \\foo.exe a"b"" c d + , &.{ "foo.exe", "ab\" c d" }); + + // From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX + try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" }); + try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" }); + try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" }); + try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" }); + try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" }); + try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" }); + try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" }); + try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" }); + try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" }); + + // Surrogate pair encoding of 𐐷 separated by quotes. + // Encoded as WTF-16: + // "<0xD801>"<0xDC37> + // Encoded as WTF-8: + // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7> + // During parsing, the quotes drop out and the surrogate pair + // should end up encoded as its normal UTF-8 representation. + try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" }); +} + +fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void { + const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line); + defer testing.allocator.free(cmd_line_w); + + // next + { + var it = try Iterator.Windows.init(testing.allocator, cmd_line_w); + defer it.deinit(); + + for (expected_args) |expected| { + if (it.next()) |actual| { + try testing.expectEqualStrings(expected, actual); + } else { + return error.TestUnexpectedResult; + } + } + try testing.expect(it.next() == null); + } + + // skip + { + var it = try Iterator.Windows.init(testing.allocator, cmd_line_w); + defer it.deinit(); + + for (0..expected_args.len) |_| { + try testing.expect(it.skip()); + } + try testing.expect(!it.skip()); + } +} + +test "general arg parsing" { + try testGeneralCmdLine("a b\tc d", &.{ "a", "b", "c", "d" }); + try testGeneralCmdLine("\"abc\" d e", &.{ "abc", "d", "e" }); + try testGeneralCmdLine("a\\\\\\b d\"e f\"g h", &.{ "a\\\\\\b", "de fg", "h" }); + try testGeneralCmdLine("a\\\\\\\"b c d", &.{ "a\\\"b", "c", "d" }); + try testGeneralCmdLine("a\\\\\\\\\"b c\" d e", &.{ "a\\\\b c", "d", "e" }); + try testGeneralCmdLine("a b\tc \"d f", &.{ "a", "b", "c", "d f" }); + try testGeneralCmdLine("j k l\\", &.{ "j", "k", "l\\" }); + try testGeneralCmdLine("\"\" x y z\\\\", &.{ "", "x", "y", "z\\\\" }); + + try testGeneralCmdLine("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\"", &.{ + ".\\..\\zig-cache\\build", + "bin\\zig.exe", + ".\\..", + ".\\..\\zig-cache", + "--help", + }); + + try testGeneralCmdLine( + \\ 'foo' "bar" + , &.{ "'foo'", "bar" }); +} + +fn testGeneralCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void { + var it = try ArgIteratorGeneral(.{}).init(std.testing.allocator, input_cmd_line); + defer it.deinit(); + for (expected_args) |expected_arg| { + const arg = it.next().?; + try testing.expectEqualStrings(expected_arg, arg); + } + try testing.expect(it.next() == null); +} + +/// Optional parameters for `ArgIteratorGeneral` +pub const ArgIteratorGeneralOptions = struct { + comments: bool = false, + single_quotes: bool = false, +}; + +/// A general Iterator to parse a string into a set of arguments +pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type { + return struct { + allocator: Allocator, + index: usize = 0, + cmd_line: []const u8, + + /// Should the cmd_line field be free'd (using the allocator) on deinit()? + free_cmd_line_on_deinit: bool, + + /// buffer MUST be long enough to hold the cmd_line plus a null terminator. + /// buffer will we free'd (using the allocator) on deinit() + buffer: []u8, + start: usize = 0, + end: usize = 0, + + pub const Self = @This(); + + pub const InitError = error{OutOfMemory}; + + /// cmd_line_utf8 MUST remain valid and constant while using this instance + pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { + const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1); + errdefer allocator.free(buffer); + + return Self{ + .allocator = allocator, + .cmd_line = cmd_line_utf8, + .free_cmd_line_on_deinit = false, + .buffer = buffer, + }; + } + + /// cmd_line_utf8 will be free'd (with the allocator) on deinit() + pub fn initTakeOwnership(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { + const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1); + errdefer allocator.free(buffer); + + return Self{ + .allocator = allocator, + .cmd_line = cmd_line_utf8, + .free_cmd_line_on_deinit = true, + .buffer = buffer, + }; + } + + // Skips over whitespace in the cmd_line. + // Returns false if the terminating sentinel is reached, true otherwise. + // Also skips over comments (if supported). + fn skipWhitespace(self: *Self) bool { + while (true) : (self.index += 1) { + const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; + switch (character) { + 0 => return false, + ' ', '\t', '\r', '\n' => continue, + '#' => { + if (options.comments) { + while (true) : (self.index += 1) { + switch (self.cmd_line[self.index]) { + '\n' => break, + 0 => return false, + else => continue, + } + } + continue; + } else { + break; + } + }, + else => break, + } + } + return true; + } + + pub fn skip(self: *Self) bool { + if (!self.skipWhitespace()) { + return false; + } + + var backslash_count: usize = 0; + var in_quote = false; + while (true) : (self.index += 1) { + const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; + switch (character) { + 0 => return true, + '"', '\'' => { + if (!options.single_quotes and character == '\'') { + backslash_count = 0; + continue; + } + const quote_is_real = backslash_count % 2 == 0; + if (quote_is_real) { + in_quote = !in_quote; + } + }, + '\\' => { + backslash_count += 1; + }, + ' ', '\t', '\r', '\n' => { + if (!in_quote) { + return true; + } + backslash_count = 0; + }, + else => { + backslash_count = 0; + continue; + }, + } + } + } + + /// Returns a slice of the internal buffer that contains the next argument. + /// Returns null when it reaches the end. + pub fn next(self: *Self) ?[:0]const u8 { + if (!self.skipWhitespace()) { + return null; + } + + var backslash_count: usize = 0; + var in_quote = false; + while (true) : (self.index += 1) { + const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; + switch (character) { + 0 => { + self.emitBackslashes(backslash_count); + self.buffer[self.end] = 0; + const token = self.buffer[self.start..self.end :0]; + self.end += 1; + self.start = self.end; + return token; + }, + '"', '\'' => { + if (!options.single_quotes and character == '\'') { + self.emitBackslashes(backslash_count); + backslash_count = 0; + self.emitCharacter(character); + continue; + } + const quote_is_real = backslash_count % 2 == 0; + self.emitBackslashes(backslash_count / 2); + backslash_count = 0; + + if (quote_is_real) { + in_quote = !in_quote; + } else { + self.emitCharacter('"'); + } + }, + '\\' => { + backslash_count += 1; + }, + ' ', '\t', '\r', '\n' => { + self.emitBackslashes(backslash_count); + backslash_count = 0; + if (in_quote) { + self.emitCharacter(character); + } else { + self.buffer[self.end] = 0; + const token = self.buffer[self.start..self.end :0]; + self.end += 1; + self.start = self.end; + return token; + } + }, + else => { + self.emitBackslashes(backslash_count); + backslash_count = 0; + self.emitCharacter(character); + }, + } + } + } + + fn emitBackslashes(self: *Self, emit_count: usize) void { + var i: usize = 0; + while (i < emit_count) : (i += 1) { + self.emitCharacter('\\'); + } + } + + fn emitCharacter(self: *Self, char: u8) void { + self.buffer[self.end] = char; + self.end += 1; + } + + /// Call to free the internal buffer of the iterator. + pub fn deinit(self: *Self) void { + self.allocator.free(self.buffer); + + if (self.free_cmd_line_on_deinit) { + self.allocator.free(self.cmd_line); + } + } + }; +} + +test "response file arg parsing" { + try testResponseFileCmdLine( + \\a b + \\c d\ + , &.{ "a", "b", "c", "d\\" }); + try testResponseFileCmdLine("a b c d\\", &.{ "a", "b", "c", "d\\" }); + + try testResponseFileCmdLine( + \\j + \\ k l # this is a comment \\ \\\ \\\\ "none" "\\" "\\\" + \\ "m" #another comment + \\ + , &.{ "j", "k", "l", "m" }); + + try testResponseFileCmdLine( + \\ "" q "" + \\ "r s # t" "u\" v" #another comment + \\ + , &.{ "", "q", "", "r s # t", "u\" v" }); + + try testResponseFileCmdLine( + \\ -l"advapi32" a# b#c d# + \\e\\\ + , &.{ "-ladvapi32", "a#", "b#c", "d#", "e\\\\\\" }); + + try testResponseFileCmdLine( + \\ 'foo' "bar" + , &.{ "foo", "bar" }); +} + +fn testResponseFileCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void { + var it = try ArgIteratorGeneral(.{ .comments = true, .single_quotes = true }) + .init(std.testing.allocator, input_cmd_line); + defer it.deinit(); + for (expected_args) |expected_arg| { + const arg = it.next().?; + try testing.expectEqualStrings(expected_arg, arg); + } + try testing.expect(it.next() == null); +} diff --git a/lib/std/process/Child.zig b/lib/std/process/Child.zig index 7c737c318b..f2e096bca2 100644 --- a/lib/std/process/Child.zig +++ b/lib/std/process/Child.zig @@ -13,12 +13,17 @@ const windows = std.os.windows; const linux = std.os.linux; const posix = std.posix; const mem = std.mem; -const EnvMap = std.process.EnvMap; const maxInt = std.math.maxInt; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; +/// Tells whether spawning child processes is supported. +pub const can_spawn = switch (native_os) { + .wasi, .ios, .tvos, .visionos, .watchos => false, + else => true, +}; + pub const Id = switch (native_os) { .windows => windows.HANDLE, .wasi => void, @@ -54,9 +59,8 @@ term: ?(SpawnError!Term), argv: []const []const u8, -/// Leave as null to use the current env map using the supplied allocator. -/// Required if unable to access the current env map (e.g. building a library on -/// some platforms). +parent_environ: process.Environ, +/// `null` means to use `parent_environ` also for the spawned process. env_map: ?*const EnvMap, stdin_behavior: StdIo, @@ -229,15 +233,15 @@ pub const StdIo = enum { }; /// First argument in argv is the executable. -pub fn init(argv: []const []const u8, allocator: Allocator) Child { +pub fn init(gpa: Allocator, argv: []const []const u8, environ: Environ) Child { return .{ - .allocator = allocator, + .allocator = gpa, .argv = argv, + .environ = environ, .id = undefined, .thread_handle = undefined, .err_pipe = if (native_os == .windows) {} else null, .term = null, - .env_map = null, .cwd = null, .uid = if (native_os == .windows or native_os == .wasi) {} else null, .gid = if (native_os == .windows or native_os == .wasi) {} else null, @@ -435,41 +439,38 @@ pub const RunError = posix.GetCwdError || posix.ReadError || SpawnError || posix /// Spawns a child process, waits for it, collecting stdout and stderr, and then returns. /// If it succeeds, the caller owns result.stdout and result.stderr memory. -pub fn run(allocator: Allocator, io: Io, args: struct { +pub fn run(gpa: Allocator, io: Io, args: struct { argv: []const []const u8, + environ: Environ, cwd: ?[]const u8 = null, cwd_dir: ?Io.Dir = null, - /// Required if unable to access the current env map (e.g. building a - /// library on some platforms). - env_map: ?*const EnvMap = null, max_output_bytes: usize = 50 * 1024, expand_arg0: Arg0Expand = .no_expand, progress_node: std.Progress.Node = std.Progress.Node.none, }) RunError!RunResult { - var child = Child.init(args.argv, allocator); + var child = Child.init(gpa, args.argv, args.environ); child.stdin_behavior = .Ignore; child.stdout_behavior = .Pipe; child.stderr_behavior = .Pipe; child.cwd = args.cwd; child.cwd_dir = args.cwd_dir; - child.env_map = args.env_map; child.expand_arg0 = args.expand_arg0; child.progress_node = args.progress_node; var stdout: ArrayList(u8) = .empty; - defer stdout.deinit(allocator); + defer stdout.deinit(gpa); var stderr: ArrayList(u8) = .empty; - defer stderr.deinit(allocator); + defer stderr.deinit(gpa); try child.spawn(io); errdefer { _ = child.kill(io) catch {}; } - try child.collectOutput(allocator, &stdout, &stderr, args.max_output_bytes); + try child.collectOutput(gpa, &stdout, &stderr, args.max_output_bytes); return .{ - .stdout = try stdout.toOwnedSlice(allocator), - .stderr = try stderr.toOwnedSlice(allocator), + .stdout = try stdout.toOwnedSlice(gpa), + .stderr = try stderr.toOwnedSlice(gpa), .term = try child.wait(io), }; } @@ -643,23 +644,16 @@ fn spawnPosix(self: *Child, io: Io) SpawnError!void { const envp: [*:null]const ?[*:0]const u8 = m: { const prog_fd: i32 = if (prog_pipe[1] == -1) -1 else prog_fileno; - if (self.env_map) |env_map| { - break :m (try process.createEnvironFromMap(arena, env_map, .{ + switch (self.environ) { + .empty => break :m (try process.Environ.createBlock(.{ .block = &.{} }, arena, .{ .zig_progress_fd = prog_fd, - })).ptr; - } else if (builtin.link_libc) { - break :m (try process.createEnvironFromExisting(arena, std.c.environ, .{ + })).ptr, + .inherit => |b| break :m (try b.createBlock(arena, .{ .zig_progress_fd = prog_fd, - })).ptr; - } else if (builtin.output_mode == .Exe) { - // Then we have Zig start code and this works. - // TODO type-safety for null-termination of `os.environ`. - break :m (try process.createEnvironFromExisting(arena, @ptrCast(std.os.environ.ptr), .{ + })).ptr, + .map => |m| break :m (try m.createBlock(arena, .{ .zig_progress_fd = prog_fd, - })).ptr; - } else { - // TODO come up with a solution for this. - @panic("missing std lib enhancement: std.process.Child implementation has no way to collect the environment variables to forward to the child process"); + })).ptr, } }; @@ -701,9 +695,15 @@ fn spawnPosix(self: *Child, io: Io) SpawnError!void { posix.kill(posix.getpid(), .STOP) catch |err| forkChildErrReport(io, err_pipe[1], err); } + const parent_PATH: ?[]const u8 = switch(self.environ) { + .empty => null, + .inherit => + .map => |m| m.get("PATH"), + }; + const err = switch (self.expand_arg0) { - .expand => posix.execvpeZ_expandArg0(.expand, argv_buf.ptr[0].?, argv_buf.ptr, envp), - .no_expand => posix.execvpeZ_expandArg0(.no_expand, argv_buf.ptr[0].?, argv_buf.ptr, envp), + .expand => posix.execvpeZ_expandArg0(.expand, argv_buf.ptr[0].?, argv_buf.ptr, envp, parent_PATH), + .no_expand => posix.execvpeZ_expandArg0(.no_expand, argv_buf.ptr[0].?, argv_buf.ptr, envp, parent_PATH), }; forkChildErrReport(io, err_pipe[1], err); } diff --git a/lib/std/process/Environ.zig b/lib/std/process/Environ.zig new file mode 100644 index 0000000000..5f5386f02d --- /dev/null +++ b/lib/std/process/Environ.zig @@ -0,0 +1,764 @@ +const Environ = @This(); + +const builtin = @import("builtin"); +const native_os = builtin.os.tag; + +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const testing = std.debug.testing; +const unicode = std.unicode; +const posix = std.posix; +const mem = std.mem; + +block: Block, + +pub const Block = switch (native_os) { + .windows => []const u16, + else => []const [*:0]const u8, +}; + +pub const Map = struct { + array_hash_map: ArrayHashMap, + allocator: Allocator, + + const ArrayHashMap = std.ArrayHashMapUnmanaged([]const u8, []const u8, EnvNameHashContext, false); + + pub const Size = usize; + + pub const EnvNameHashContext = struct { + fn upcase(c: u21) u21 { + if (c <= std.math.maxInt(u16)) + return std.os.windows.ntdll.RtlUpcaseUnicodeChar(@as(u16, @intCast(c))); + return c; + } + + pub fn hash(self: @This(), s: []const u8) u32 { + _ = self; + if (native_os == .windows) { + var h = std.hash.Wyhash.init(0); + var it = unicode.Wtf8View.initUnchecked(s).iterator(); + while (it.nextCodepoint()) |cp| { + const cp_upper = upcase(cp); + h.update(&[_]u8{ + @as(u8, @intCast((cp_upper >> 16) & 0xff)), + @as(u8, @intCast((cp_upper >> 8) & 0xff)), + @as(u8, @intCast((cp_upper >> 0) & 0xff)), + }); + } + return h.final(); + } + return std.array_hash_map.hashString(s); + } + + pub fn eql(self: @This(), a: []const u8, b: []const u8, b_index: usize) bool { + _ = self; + _ = b_index; + if (native_os == .windows) { + var it_a = unicode.Wtf8View.initUnchecked(a).iterator(); + var it_b = unicode.Wtf8View.initUnchecked(b).iterator(); + while (true) { + const c_a = it_a.nextCodepoint() orelse break; + const c_b = it_b.nextCodepoint() orelse return false; + if (upcase(c_a) != upcase(c_b)) + return false; + } + return if (it_b.nextCodepoint()) |_| false else true; + } + return std.array_hash_map.eqlString(a, b); + } + }; + + /// Create a Map backed by a specific allocator. + /// That allocator will be used for both backing allocations + /// and string deduplication. + pub fn init(allocator: Allocator) Map { + return .{ .array_hash_map = .empty, .allocator = allocator }; + } + + /// Free the backing storage of the map, as well as all + /// of the stored keys and values. + pub fn deinit(self: *Map) void { + const gpa = self.allocator; + var it = self.array_hash_map.iterator(); + while (it.next()) |entry| { + gpa.free(entry.key_ptr.*); + gpa.free(entry.value_ptr.*); + } + self.array_hash_map.deinit(gpa); + self.* = undefined; + } + + pub fn keys(m: *Map) [][]const u8 { + return m.array_hash_map.keys(); + } + + pub fn values(m: *Map) [][]const u8 { + return m.array_hash_map.values(); + } + + /// Same as `put` but the key and value become owned by the Map rather + /// than being copied. + /// If `putMove` fails, the ownership of key and value does not transfer. + /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. + pub fn putMove(self: *Map, key: []u8, value: []u8) !void { + const gpa = self.allocator; + assert(unicode.wtf8ValidateSlice(key)); + const get_or_put = try self.array_hash_map.getOrPut(gpa, key); + if (get_or_put.found_existing) { + gpa.free(get_or_put.key_ptr.*); + gpa.free(get_or_put.value_ptr.*); + get_or_put.key_ptr.* = key; + } + get_or_put.value_ptr.* = value; + } + + /// `key` and `value` are copied into the Map. + /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. + pub fn put(self: *Map, key: []const u8, value: []const u8) !void { + assert(unicode.wtf8ValidateSlice(key)); + const gpa = self.allocator; + const value_copy = try gpa.dupe(u8, value); + errdefer gpa.free(value_copy); + const get_or_put = try self.array_hash_map.getOrPut(gpa, key); + errdefer { + if (!get_or_put.found_existing) assert(self.array_hash_map.pop() != null); + } + if (get_or_put.found_existing) { + gpa.free(get_or_put.value_ptr.*); + } else { + get_or_put.key_ptr.* = try gpa.dupe(u8, key); + } + get_or_put.value_ptr.* = value_copy; + } + + /// Find the address of the value associated with a key. + /// The returned pointer is invalidated if the map resizes. + /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. + pub fn getPtr(self: Map, key: []const u8) ?*[]const u8 { + assert(unicode.wtf8ValidateSlice(key)); + return self.array_hash_map.getPtr(key); + } + + /// Return the map's copy of the value associated with + /// a key. The returned string is invalidated if this + /// key is removed from the map. + /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. + pub fn get(self: Map, key: []const u8) ?[]const u8 { + assert(unicode.wtf8ValidateSlice(key)); + return self.array_hash_map.get(key); + } + + pub fn contains(m: *const Map, key: []const u8) bool { + return m.contains(key); + } + + /// If there is an entry with a matching key, it is deleted from the hash + /// map. The entry is removed from the underlying array by swapping it with + /// the last element. + /// + /// Returns true if an entry was removed, false otherwise. + /// + /// This invalidates the value returned by get() for this key. + /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. + pub fn swapRemove(self: *Map, key: []const u8) bool { + assert(unicode.wtf8ValidateSlice(key)); + const kv = self.array_hash_map.fetchSwapRemove(key) orelse return false; + const gpa = self.allocator; + gpa.free(kv.key); + gpa.free(kv.value); + return true; + } + + /// If there is an entry with a matching key, it is deleted from the map. + /// The entry is removed from the underlying array by shifting all elements + /// forward, thereby maintaining the current ordering. + /// + /// Returns true if an entry was removed, false otherwise. + /// + /// This invalidates the value returned by get() for this key. + /// On Windows `key` must be a valid [WTF-8](https://wtf-8.codeberg.page/) string. + pub fn orderedRemove(self: *Map, key: []const u8) bool { + assert(unicode.wtf8ValidateSlice(key)); + const kv = self.array_hash_map.fetchOrderedRemove(key) orelse return false; + const gpa = self.allocator; + gpa.free(kv.key); + gpa.free(kv.value); + return true; + } + + /// Returns the number of KV pairs stored in the map. + pub fn count(self: Map) Size { + return self.array_hash_map.count(); + } + + /// Returns an iterator over entries in the map. + pub fn iterator(self: *const Map) ArrayHashMap.Iterator { + return self.array_hash_map.iterator(); + } + + /// Returns a full copy of `em` allocated with `gpa`, which is not necessarily + /// the same allocator used to allocate `em`. + pub fn clone(m: *const Map, gpa: Allocator) Allocator.Error!Map { + // Since we need to dupe the keys and values, the only way for error handling to not be a + // nightmare is to add keys to an empty map one-by-one. This could be avoided if this + // abstraction were a bit less... OOP-esque. + var new: Map = .init(gpa); + errdefer new.deinit(); + try new.array_hash_map.ensureUnusedCapacity(gpa, m.array_hash_map.count()); + for (m.array_hash_map.keys(), m.array_hash_map.values()) |key, value| { + try new.put(key, value); + } + return new; + } + + /// Creates a null-delimited environment variable block in the format + /// expected by POSIX, from a hash map plus options. + pub fn createBlock( + map: *const Map, + arena: Allocator, + options: CreateBlockOptions, + ) Allocator.Error![:null]?[*:0]u8 { + const ZigProgressAction = enum { nothing, edit, delete, add }; + const zig_progress_action: ZigProgressAction = a: { + const fd = options.zig_progress_fd orelse break :a .nothing; + const exists = map.get("ZIG_PROGRESS") != null; + if (fd >= 0) { + break :a if (exists) .edit else .add; + } else { + if (exists) break :a .delete; + } + break :a .nothing; + }; + + const envp_count: usize = c: { + var c: usize = map.count(); + switch (zig_progress_action) { + .add => c += 1, + .delete => c -= 1, + .nothing, .edit => {}, + } + break :c c; + }; + + const envp_buf = try arena.allocSentinel(?[*:0]u8, envp_count, null); + var i: usize = 0; + + if (zig_progress_action == .add) { + envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "ZIG_PROGRESS={d}", .{options.zig_progress_fd.?}, 0); + i += 1; + } + + { + var it = map.iterator(); + while (it.next()) |pair| { + if (mem.eql(u8, pair.key_ptr.*, "ZIG_PROGRESS")) switch (zig_progress_action) { + .add => unreachable, + .delete => continue, + .edit => { + envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "{s}={d}", .{ + pair.key_ptr.*, options.zig_progress_fd.?, + }, 0); + i += 1; + continue; + }, + .nothing => {}, + }; + + envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "{s}={s}", .{ pair.key_ptr.*, pair.value_ptr.* }, 0); + i += 1; + } + } + + assert(i == envp_count); + return envp_buf; + } +}; + +pub const CreateMapError = error{ + OutOfMemory, + /// WASI-only. `environ_sizes_get` or `environ_get` failed for an + /// unexpected reason. + Unexpected, +}; + +/// Allocates a `Map` and copies environment block into it. +pub fn createMap(env: Environ, allocator: Allocator) CreateMapError!Map { + var result = Map.init(allocator); + errdefer result.deinit(); + + if (native_os == .windows) { + const ptr = env.block; + + var i: usize = 0; + while (ptr[i] != 0) { + const key_start = i; + + // There are some special environment variables that start with =, + // so we need a special case to not treat = as a key/value separator + // if it's the first character. + // https://devblogs.microsoft.com/oldnewthing/20100506-00/?p=14133 + if (ptr[key_start] == '=') i += 1; + + while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {} + const key_w = ptr[key_start..i]; + const key = try unicode.wtf16LeToWtf8Alloc(allocator, key_w); + errdefer allocator.free(key); + + if (ptr[i] == '=') i += 1; + + const value_start = i; + while (ptr[i] != 0) : (i += 1) {} + const value_w = ptr[value_start..i]; + const value = try unicode.wtf16LeToWtf8Alloc(allocator, value_w); + errdefer allocator.free(value); + + i += 1; // skip over null byte + + try result.putMove(key, value); + } + return result; + } else if (native_os == .wasi and !builtin.link_libc) { + var environ_count: usize = undefined; + var environ_buf_size: usize = undefined; + + const environ_sizes_get_ret = std.os.wasi.environ_sizes_get(&environ_count, &environ_buf_size); + if (environ_sizes_get_ret != .SUCCESS) { + return posix.unexpectedErrno(environ_sizes_get_ret); + } + + if (environ_count == 0) { + return result; + } + + const environ = try allocator.alloc([*:0]u8, environ_count); + defer allocator.free(environ); + const environ_buf = try allocator.alloc(u8, environ_buf_size); + defer allocator.free(environ_buf); + + const environ_get_ret = std.os.wasi.environ_get(environ.ptr, environ_buf.ptr); + if (environ_get_ret != .SUCCESS) { + return posix.unexpectedErrno(environ_get_ret); + } + + for (environ) |line| { + const pair = mem.sliceTo(line, 0); + var parts = mem.splitScalar(u8, pair, '='); + const key = parts.first(); + const value = parts.rest(); + try result.put(key, value); + } + return result; + } else if (builtin.link_libc) { + var ptr = env.block; + while (ptr[0]) |line| : (ptr += 1) { + var line_i: usize = 0; + while (line[line_i] != 0 and line[line_i] != '=') : (line_i += 1) {} + const key = line[0..line_i]; + + var end_i: usize = line_i; + while (line[end_i] != 0) : (end_i += 1) {} + const value = line[line_i + 1 .. end_i]; + + try result.put(key, value); + } + return result; + } else { + for (env.block) |line| { + var line_i: usize = 0; + while (line[line_i] != 0 and line[line_i] != '=') : (line_i += 1) {} + const key = line[0..line_i]; + + var end_i: usize = line_i; + while (line[end_i] != 0) : (end_i += 1) {} + const value = line[line_i + 1 .. end_i]; + + try result.put(key, value); + } + return result; + } +} + +test createMap { + var env = try createMap(testing.allocator); + defer env.deinit(); +} + +pub const GetEnvVarOwnedError = error{ + OutOfMemory, + EnvironmentVariableNotFound, + + /// On Windows, environment variable keys provided by the user must be valid WTF-8. + /// https://wtf-8.codeberg.page/ + InvalidWtf8, +}; + +/// Caller must free returned memory. +/// On Windows, if `key` is not valid [WTF-8](https://wtf-8.codeberg.page/), +/// then `error.InvalidWtf8` is returned. +/// On Windows, the value is encoded as [WTF-8](https://wtf-8.codeberg.page/). +/// On other platforms, the value is an opaque sequence of bytes with no particular encoding. +pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 { + if (native_os == .windows) { + const result_w = blk: { + var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); + const stack_allocator = stack_alloc.get(); + const key_w = try unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); + defer stack_allocator.free(key_w); + + break :blk getenvW(key_w) orelse return error.EnvironmentVariableNotFound; + }; + // wtf16LeToWtf8Alloc can only fail with OutOfMemory + return unicode.wtf16LeToWtf8Alloc(allocator, result_w); + } else if (native_os == .wasi and !builtin.link_libc) { + var envmap = createMap(allocator) catch return error.OutOfMemory; + defer envmap.deinit(); + const val = envmap.get(key) orelse return error.EnvironmentVariableNotFound; + return allocator.dupe(u8, val); + } else { + const result = posix.getenv(key) orelse return error.EnvironmentVariableNotFound; + return allocator.dupe(u8, result); + } +} + +/// On Windows, `key` must be valid WTF-8. +pub inline fn hasEnvVarConstant(comptime key: []const u8) bool { + if (native_os == .windows) { + const key_w = comptime unicode.wtf8ToWtf16LeStringLiteral(key); + return getenvW(key_w) != null; + } else if (native_os == .wasi and !builtin.link_libc) { + return false; + } else { + return posix.getenv(key) != null; + } +} + +/// On Windows, `key` must be valid WTF-8. +pub inline fn hasNonEmptyEnvVarConstant(comptime key: []const u8) bool { + if (native_os == .windows) { + const key_w = comptime unicode.wtf8ToWtf16LeStringLiteral(key); + const value = getenvW(key_w) orelse return false; + return value.len != 0; + } else if (native_os == .wasi and !builtin.link_libc) { + return false; + } else { + const value = posix.getenv(key) orelse return false; + return value.len != 0; + } +} + +pub const ParseIntError = std.fmt.ParseIntError || error{EnvironmentVariableNotFound}; + +/// Parses an environment variable as an integer. +/// +/// On Windows, `key` must be valid WTF-8. +pub fn parseInt(io: std.Io, key: []const u8, comptime I: type, base: u8) ParseIntError!I { + const text = io.environ(key) orelse return error.EnvironmentVariableNotFound; + return std.fmt.parseInt(I, text, base); +} + +pub const HasEnvVarError = error{ + OutOfMemory, + + /// On Windows, environment variable keys provided by the user must be valid WTF-8. + /// https://wtf-8.codeberg.page/ + InvalidWtf8, +}; + +/// On Windows, if `key` is not valid [WTF-8](https://wtf-8.codeberg.page/), +/// then `error.InvalidWtf8` is returned. +pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool { + if (native_os == .windows) { + var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); + const stack_allocator = stack_alloc.get(); + const key_w = try unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); + defer stack_allocator.free(key_w); + return getenvW(key_w) != null; + } else if (native_os == .wasi and !builtin.link_libc) { + var envmap = createMap(allocator) catch return error.OutOfMemory; + defer envmap.deinit(); + return envmap.getPtr(key) != null; + } else { + return posix.getenv(key) != null; + } +} + +/// On Windows, if `key` is not valid [WTF-8](https://wtf-8.codeberg.page/), +/// then `error.InvalidWtf8` is returned. +pub fn hasNonEmptyEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool { + if (native_os == .windows) { + var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator); + const stack_allocator = stack_alloc.get(); + const key_w = try unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key); + defer stack_allocator.free(key_w); + const value = getenvW(key_w) orelse return false; + return value.len != 0; + } else if (native_os == .wasi and !builtin.link_libc) { + var envmap = createMap(allocator) catch return error.OutOfMemory; + defer envmap.deinit(); + const value = envmap.getPtr(key) orelse return false; + return value.len != 0; + } else { + const value = posix.getenv(key) orelse return false; + return value.len != 0; + } +} + +/// Windows-only. Get an environment variable with a null-terminated, WTF-16 encoded name. +/// The returned slice points to memory in the PEB. +/// +/// This function performs a Unicode-aware case-insensitive lookup using RtlEqualUnicodeString. +/// +/// See also: +/// * `std.posix.getenv` +/// * `createMap` +/// * `getEnvVarOwned` +/// * `hasEnvVarConstant` +/// * `hasEnvVar` +pub fn getenvW(key: [*:0]const u16) ?[:0]const u16 { + if (native_os != .windows) { + @compileError("Windows-only"); + } + const key_slice = mem.sliceTo(key, 0); + // '=' anywhere but the start makes this an invalid environment variable name + if (key_slice.len > 0 and std.mem.findScalar(u16, key_slice[1..], '=') != null) { + return null; + } + const ptr = std.os.windows.peb().ProcessParameters.Environment; + var i: usize = 0; + while (ptr[i] != 0) { + const key_value = mem.sliceTo(ptr[i..], 0); + + // There are some special environment variables that start with =, + // so we need a special case to not treat = as a key/value separator + // if it's the first character. + // https://devblogs.microsoft.com/oldnewthing/20100506-00/?p=14133 + const equal_search_start: usize = if (key_value[0] == '=') 1 else 0; + const equal_index = std.mem.findScalarPos(u16, key_value, equal_search_start, '=') orelse { + // This is enforced by CreateProcess. + // If violated, CreateProcess will fail with INVALID_PARAMETER. + unreachable; // must contain a = + }; + + const this_key = key_value[0..equal_index]; + if (std.os.windows.eqlIgnoreCaseWtf16(key_slice, this_key)) { + return key_value[equal_index + 1 ..]; + } + + // skip past the NUL terminator + i += key_value.len + 1; + } + return null; +} + +test getEnvVarOwned { + try testing.expectError( + error.EnvironmentVariableNotFound, + getEnvVarOwned(std.testing.allocator, "BADENV"), + ); +} + +test hasEnvVarConstant { + if (native_os == .wasi and !builtin.link_libc) return error.SkipZigTest; + + try testing.expect(!hasEnvVarConstant("BADENV")); +} + +test hasEnvVar { + const has_env = try hasEnvVar(std.testing.allocator, "BADENV"); + try testing.expect(!has_env); +} + +pub const CreateBlockOptions = struct { + /// `null` means to leave the `ZIG_PROGRESS` environment variable unmodified. + /// If non-null, negative means to remove the environment variable, and >= 0 + /// means to provide it with the given integer. + zig_progress_fd: ?i32 = null, +}; + +/// Creates a null-delimited environment variable block in the format expected +/// by POSIX, from a different one. +pub fn createBlock(existing: Environ, arena: Allocator, options: CreateBlockOptions) Allocator.Error![:null]?[*:0]u8 { + const existing_count, const contains_zig_progress = c: { + var count: usize = 0; + var contains = false; + while (existing.block[count]) |line| : (count += 1) { + contains = contains or mem.eql(u8, mem.sliceTo(line, '='), "ZIG_PROGRESS"); + } + break :c .{ count, contains }; + }; + const ZigProgressAction = enum { nothing, edit, delete, add }; + const zig_progress_action: ZigProgressAction = a: { + const fd = options.zig_progress_fd orelse break :a .nothing; + if (fd >= 0) { + break :a if (contains_zig_progress) .edit else .add; + } else { + if (contains_zig_progress) break :a .delete; + } + break :a .nothing; + }; + + const envp_count: usize = c: { + var count: usize = existing_count; + switch (zig_progress_action) { + .add => count += 1, + .delete => count -= 1, + .nothing, .edit => {}, + } + break :c count; + }; + + const envp_buf = try arena.allocSentinel(?[*:0]u8, envp_count, null); + var i: usize = 0; + var existing_index: usize = 0; + + if (zig_progress_action == .add) { + envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "ZIG_PROGRESS={d}", .{options.zig_progress_fd.?}, 0); + i += 1; + } + + while (existing.block[existing_index]) |line| : (existing_index += 1) { + if (mem.eql(u8, mem.sliceTo(line, '='), "ZIG_PROGRESS")) switch (zig_progress_action) { + .add => unreachable, + .delete => continue, + .edit => { + envp_buf[i] = try std.fmt.allocPrintSentinel(arena, "ZIG_PROGRESS={d}", .{options.zig_progress_fd.?}, 0); + i += 1; + continue; + }, + .nothing => {}, + }; + envp_buf[i] = try arena.dupeZ(u8, mem.span(line)); + i += 1; + } + + assert(i == envp_count); + return envp_buf; +} + +test "Map.createBlock" { + const allocator = testing.allocator; + var envmap = Map.init(allocator); + defer envmap.deinit(); + + try envmap.put("HOME", "/home/ifreund"); + try envmap.put("WAYLAND_DISPLAY", "wayland-1"); + try envmap.put("DISPLAY", ":1"); + try envmap.put("DEBUGINFOD_URLS", " "); + try envmap.put("XCURSOR_SIZE", "24"); + + var arena = std.heap.ArenaAllocator.init(allocator); + defer arena.deinit(); + const environ = try envmap.createBlock(arena.allocator(), .{}); + + try testing.expectEqual(@as(usize, 5), environ.len); + + inline for (.{ + "HOME=/home/ifreund", + "WAYLAND_DISPLAY=wayland-1", + "DISPLAY=:1", + "DEBUGINFOD_URLS= ", + "XCURSOR_SIZE=24", + }) |target| { + for (environ) |variable| { + if (mem.eql(u8, mem.span(variable orelse continue), target)) break; + } else { + try testing.expect(false); // Environment variable not found + } + } +} + +/// Caller must free result. +pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const Map) ![]u16 { + // count bytes needed + const max_chars_needed = x: { + // Only need 2 trailing NUL code units for an empty environment + var max_chars_needed: usize = if (env_map.count() == 0) 2 else 1; + var it = env_map.iterator(); + while (it.next()) |pair| { + // +1 for '=' + // +1 for null byte + max_chars_needed += pair.key_ptr.len + pair.value_ptr.len + 2; + } + break :x max_chars_needed; + }; + const result = try allocator.alloc(u16, max_chars_needed); + errdefer allocator.free(result); + + var it = env_map.iterator(); + var i: usize = 0; + while (it.next()) |pair| { + i += try unicode.wtf8ToWtf16Le(result[i..], pair.key_ptr.*); + result[i] = '='; + i += 1; + i += try unicode.wtf8ToWtf16Le(result[i..], pair.value_ptr.*); + result[i] = 0; + i += 1; + } + result[i] = 0; + i += 1; + // An empty environment is a special case that requires a redundant + // NUL terminator. CreateProcess will read the second code unit even + // though theoretically the first should be enough to recognize that the + // environment is empty (see https://nullprogram.com/blog/2023/08/23/) + if (env_map.count() == 0) { + result[i] = 0; + i += 1; + } + return try allocator.realloc(result, i); +} + +test Map { + var env = Map.init(testing.allocator); + defer env.deinit(); + + try env.put("SOMETHING_NEW", "hello"); + try testing.expectEqualStrings("hello", env.get("SOMETHING_NEW").?); + try testing.expectEqual(@as(Map.Size, 1), env.count()); + + // overwrite + try env.put("SOMETHING_NEW", "something"); + try testing.expectEqualStrings("something", env.get("SOMETHING_NEW").?); + try testing.expectEqual(@as(Map.Size, 1), env.count()); + + // a new longer name to test the Windows-specific conversion buffer + try env.put("SOMETHING_NEW_AND_LONGER", "1"); + try testing.expectEqualStrings("1", env.get("SOMETHING_NEW_AND_LONGER").?); + try testing.expectEqual(@as(Map.Size, 2), env.count()); + + // case insensitivity on Windows only + if (native_os == .windows) { + try testing.expectEqualStrings("1", env.get("something_New_aNd_LONGER").?); + } else { + try testing.expect(null == env.get("something_New_aNd_LONGER")); + } + + var it = env.iterator(); + var count: Map.Size = 0; + while (it.next()) |entry| { + const is_an_expected_name = std.mem.eql(u8, "SOMETHING_NEW", entry.key_ptr.*) or std.mem.eql(u8, "SOMETHING_NEW_AND_LONGER", entry.key_ptr.*); + try testing.expect(is_an_expected_name); + count += 1; + } + try testing.expectEqual(@as(Map.Size, 2), count); + + env.remove("SOMETHING_NEW"); + try testing.expect(env.get("SOMETHING_NEW") == null); + + try testing.expectEqual(@as(Map.Size, 1), env.count()); + + if (native_os == .windows) { + // test Unicode case-insensitivity on Windows + try env.put("КИРиллИЦА", "something else"); + try testing.expectEqualStrings("something else", env.get("кириллица").?); + + // and WTF-8 that's not valid UTF-8 + const wtf8_with_surrogate_pair = try unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{ + std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate + }); + defer testing.allocator.free(wtf8_with_surrogate_pair); + + try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair); + try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?); + } +} diff --git a/lib/std/start.zig b/lib/std/start.zig index c6a9f06724..51a498c554 100644 --- a/lib/std/start.zig +++ b/lib/std/start.zig @@ -524,7 +524,10 @@ fn WinStartup() callconv(.withStackAlign(.c, 1)) noreturn { std.debug.maybeEnableSegfaultHandler(); - std.os.windows.ntdll.RtlExitUserProcess(callMain()); + std.os.windows.ntdll.RtlExitUserProcess(callMain( + std.os.windows.peb().ProcessParameters.CommandLine, + std.os.windows.peb().ProcessParameters.Environment, + )); } fn wWinMainCRTStartup() callconv(.withStackAlign(.c, 1)) noreturn { @@ -666,17 +669,12 @@ fn expandStackSize(phdrs: []elf.Phdr) void { } inline fn callMainWithArgs(argc: usize, argv: [*][*:0]u8, envp: [][*:0]u8) u8 { - std.os.argv = argv[0..argc]; - std.os.environ = envp; - if (std.Options.debug_threaded_io) |t| { if (@sizeOf(std.Io.Threaded.Argv0) != 0) t.argv0.value = argv[0]; t.environ = .{ .block = envp }; } - std.debug.maybeEnableSegfaultHandler(); - - return callMain(); + return callMain(argv[0..argc], envp); } fn main(c_argc: c_int, c_argv: [*][*:0]c_char, c_envp: [*:null]?[*:0]c_char) callconv(.c) c_int { @@ -695,62 +693,94 @@ fn main(c_argc: c_int, c_argv: [*][*:0]c_char, c_envp: [*:null]?[*:0]c_char) cal } fn mainWithoutEnv(c_argc: c_int, c_argv: [*][*:0]c_char) callconv(.c) c_int { - std.os.argv = @as([*][*:0]u8, @ptrCast(c_argv))[0..@intCast(c_argc)]; - + const argv = @as([*][*:0]u8, @ptrCast(c_argv))[0..@intCast(c_argc)]; if (@sizeOf(std.Io.Threaded.Argv0) != 0) { - if (std.Options.debug_threaded_io) |t| t.argv0.value = std.os.argv[0]; + if (std.Options.debug_threaded_io) |t| t.argv0.value = argv[0]; } - - return callMain(); + return callMain(argv, &.{}); } -// General error message for a malformed return type +/// General error message for a malformed return type const bad_main_ret = "expected return type of main to be 'void', '!void', 'noreturn', 'u8', or '!u8'"; -pub inline fn callMain() u8 { - const ReturnType = @typeInfo(@TypeOf(root.main)).@"fn".return_type.?; +const use_debug_allocator = !builtin.link_libc and !native_arch.isWasm() and builtin.mode == .Debug; +var debug_allocator: std.heap.DebugAllocator(.{}) = .init; - switch (ReturnType) { - void => { - root.main(); - return 0; - }, - noreturn, u8 => { - return root.main(); - }, - else => { - if (@typeInfo(ReturnType) != .error_union) @compileError(bad_main_ret); +inline fn callMain(args: std.process.Args.Vector, environ: std.process.Environ.Block) u8 { + const fn_info = @typeInfo(@TypeOf(root.main)).@"fn"; + if (fn_info.params.len == 0) return wrapMain(root.main()); + if (fn_info.params[0].type.? == std.process.Init.Minimal) return wrapMain(root.main(.{ + .args = .{ .vector = args }, + .environ = .{ .block = environ }, + })); - const result = root.main() catch |err| { - switch (builtin.zig_backend) { - .stage2_powerpc, - .stage2_riscv64, - => { - _ = std.posix.write(std.posix.STDERR_FILENO, "error: failed with error\n") catch {}; - return 1; - }, - else => {}, - } - std.log.err("{s}", .{@errorName(err)}); - switch (native_os) { - .freestanding, .other => {}, - else => if (@errorReturnTrace()) |trace| { - std.debug.dumpStackTrace(trace); - }, - } - return 1; - }; + const gpa = if (builtin.link_libc) + std.heap.c_allocator + else if (native_arch.isWasm()) + std.heap.wasm_allocator + else if (use_debug_allocator) + debug_allocator.allocator() + else + std.heap.smp_allocator; - return switch (@TypeOf(result)) { - void => 0, - u8 => result, - else => @compileError(bad_main_ret), - }; + defer if (use_debug_allocator) switch (debug_allocator.deinit()) { + .leak => std.process.exit(1), + .ok => {}, + }; + + var arena_allocator = std.heap.ArenaAllocator.init(std.heap.page_allocator); + defer arena_allocator.deinit(); + + var threaded: std.Io.Threaded = .init(gpa, .{ + .argv0 = if (@sizeOf(std.Io.Threaded.Argv0) != 0) .{ .value = args[0] } else .{}, + .environ = environ, + }); + defer threaded.deinit(); + + var env_map = environ.getEnvMap(gpa) catch |err| + std.process.fatal("failed to parse environment variables: {t}", .{err}); + defer env_map.deinit(); + + return wrapMain(root.main(.{ + .minimal = .{ + .args = .{ .vector = args }, + .environ = .{ .block = environ }, }, - } + .arena = &arena_allocator, + .gpa = gpa, + .io = threaded.io(), + .env_map = env_map, + })); } -pub fn call_wWinMain() std.os.windows.INT { +inline fn wrapMain(result: anytype) u8 { + const ReturnType = @TypeOf(result); + switch (ReturnType) { + void => return 0, + noreturn => unreachable, + u8 => return result, + else => {}, + } + if (@typeInfo(ReturnType) != .error_union) @compileError(bad_main_ret); + + const unwrapped_result = result catch |err| { + std.log.err("{t}", .{err}); + switch (native_os) { + .freestanding, .other => {}, + else => if (@errorReturnTrace()) |trace| std.debug.dumpStackTrace(trace), + } + return 1; + }; + + return switch (@TypeOf(unwrapped_result)) { + noreturn => unreachable, + void => 0, + u8 => unwrapped_result, + else => @compileError(bad_main_ret), + }; +} + +fn call_wWinMain() std.os.windows.INT { const peb = std.os.windows.peb(); const MAIN_HINSTANCE = @typeInfo(@TypeOf(root.wWinMain)).@"fn".params[0].type.?; const hInstance: MAIN_HINSTANCE = @ptrCast(peb.ImageBaseAddress); diff --git a/lib/std/std.zig b/lib/std/std.zig index 6ec39306ea..c4cb3b1537 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -114,9 +114,6 @@ pub const options: Options = if (@hasDecl(root, "std_options")) root.std_options pub const Options = struct { enable_segfault_handler: bool = debug.default_enable_segfault_handler, - /// Function used to implement `std.Io.Dir.cwd` for WASI. - wasiCwd: fn () os.wasi.fd_t = os.defaultWasiCwd, - /// The current log level. log_level: log.Level = log.default_level, @@ -193,6 +190,9 @@ pub const Options = struct { /// Overrides `std.Io.File.Permissions`. pub const FilePermissions: ?type = if (@hasDecl(root, "std_options_FilePermissions")) root.std_options_FilePermissions else null; + + /// Overrides `std.Io.Dir.cwd`. + pub const cwd: ?fn () Io.Dir = if (@hasDecl(root, "std_options_cwd")) root.std_options_cwd else null; }; // This forces the start.zig file to be imported, and the comptime logic inside that diff --git a/lib/std/zig.zig b/lib/std/zig.zig index abc213ba27..5524cac22d 100644 --- a/lib/std/zig.zig +++ b/lib/std/zig.zig @@ -746,21 +746,12 @@ pub const EnvVar = enum { LOCALAPPDATA, HOME, - pub fn isSet(comptime ev: EnvVar) bool { - return std.process.hasNonEmptyEnvVarConstant(@tagName(ev)); + pub fn isSet(ev: EnvVar, map: *const std.process.Environ.Map) bool { + return map.contains(@tagName(ev)); } - pub fn get(ev: EnvVar, arena: std.mem.Allocator) !?[]u8 { - if (std.process.getEnvVarOwned(arena, @tagName(ev))) |value| { - return value; - } else |err| switch (err) { - error.EnvironmentVariableNotFound => return null, - else => |e| return e, - } - } - - pub fn getPosix(comptime ev: EnvVar) ?[:0]const u8 { - return std.posix.getenvZ(@tagName(ev)); + pub fn get(ev: EnvVar, map: *const std.process.Environ.Map) ?[]const u8 { + return map.get(@tagName(ev)); } }; diff --git a/lib/std/zig/system/darwin.zig b/lib/std/zig/system/darwin.zig index b493ccf0ec..e69de48d26 100644 --- a/lib/std/zig/system/darwin.zig +++ b/lib/std/zig/system/darwin.zig @@ -35,7 +35,7 @@ pub fn isSdkInstalled(gpa: Allocator, io: Io) bool { /// Caller owns the memory. /// stderr from xcrun is ignored. /// If error.OutOfMemory occurs in Allocator, this function returns null. -pub fn getSdk(gpa: Allocator, io: Io, target: *const Target) ?[]const u8 { +pub fn getSdk(gpa: Allocator, io: Io, environ: std.process.Child.Environ, target: *const Target) ?[]const u8 { const is_simulator_abi = target.abi == .simulator; const sdk = switch (target.os.tag) { .driverkit => "driverkit", @@ -47,7 +47,10 @@ pub fn getSdk(gpa: Allocator, io: Io, target: *const Target) ?[]const u8 { else => return null, }; const argv = &[_][]const u8{ "xcrun", "--sdk", sdk, "--show-sdk-path" }; - const result = std.process.Child.run(gpa, io, .{ .argv = argv }) catch return null; + const result = std.process.Child.run(gpa, io, .{ + .argv = argv, + .environ = environ, + }) catch return null; defer { gpa.free(result.stderr); gpa.free(result.stdout); diff --git a/test/link/macho.zig b/test/link/macho.zig index ccfecefa44..844273b8e5 100644 --- a/test/link/macho.zig +++ b/test/link/macho.zig @@ -871,7 +871,7 @@ fn testLinkDirectlyCppTbd(b: *Build, opts: Options) *Step { const io = b.graph.io; const test_step = addTestStep(b, "link-directly-cpp-tbd", opts); - const sdk = std.zig.system.darwin.getSdk(b.allocator, io, &opts.target.result) orelse + const sdk = std.zig.system.darwin.getSdk(b.allocator, io, .{ .map = &b.graph.env_map }, &opts.target.result) orelse @panic("macOS SDK is required to run the test"); const exe = addExecutable(b, opts, .{ diff --git a/test/standalone/ios/build.zig b/test/standalone/ios/build.zig index b87d55993b..d9bd93875b 100644 --- a/test/standalone/ios/build.zig +++ b/test/standalone/ios/build.zig @@ -25,7 +25,7 @@ pub fn build(b: *std.Build) void { const io = b.graph.io; - if (std.zig.system.darwin.getSdk(b.allocator, io, &target.result)) |sdk| { + if (std.zig.system.darwin.getSdk(b.allocator, io, .{ .map = &b.graph.env_map }, &target.result)) |sdk| { b.sysroot = sdk; exe.root_module.addSystemIncludePath(.{ .cwd_relative = b.pathJoin(&.{ sdk, "/usr/include" }) }); exe.root_module.addSystemFrameworkPath(.{ .cwd_relative = b.pathJoin(&.{ sdk, "/System/Library/Frameworks" }) }); diff --git a/test/standalone/windows_bat_args/fuzz.zig b/test/standalone/windows_bat_args/fuzz.zig index 28749259f7..f0da2321b1 100644 --- a/test/standalone/windows_bat_args/fuzz.zig +++ b/test/standalone/windows_bat_args/fuzz.zig @@ -84,13 +84,13 @@ pub fn main() anyerror!void { } } -fn testExec(gpa: Allocator, io: Io, args: []const []const u8, env: ?*std.process.EnvMap) !void { +fn testExec(gpa: Allocator, io: Io, args: []const []const u8, env: ?*std.process.Environ.Map) !void { try testExecBat(gpa, io, "args1.bat", args, env); try testExecBat(gpa, io, "args2.bat", args, env); try testExecBat(gpa, io, "args3.bat", args, env); } -fn testExecBat(gpa: Allocator, io: Io, bat: []const u8, args: []const []const u8, env: ?*std.process.EnvMap) !void { +fn testExecBat(gpa: Allocator, io: Io, bat: []const u8, args: []const []const u8, env: ?*std.process.Environ.Map) !void { const argv = try gpa.alloc([]const u8, 1 + args.len); defer gpa.free(argv); argv[0] = bat; diff --git a/test/standalone/windows_bat_args/test.zig b/test/standalone/windows_bat_args/test.zig index e0d1abe806..ac851cf8f6 100644 --- a/test/standalone/windows_bat_args/test.zig +++ b/test/standalone/windows_bat_args/test.zig @@ -130,13 +130,13 @@ fn testExecError(err: anyerror, gpa: Allocator, io: Io, args: []const []const u8 return std.testing.expectError(err, testExec(gpa, io, args, null)); } -fn testExec(gpa: Allocator, io: Io, args: []const []const u8, env: ?*std.process.EnvMap) !void { +fn testExec(gpa: Allocator, io: Io, args: []const []const u8, env: ?*std.process.Environ.Map) !void { try testExecBat(gpa, io, "args1.bat", args, env); try testExecBat(gpa, io, "args2.bat", args, env); try testExecBat(gpa, io, "args3.bat", args, env); } -fn testExecBat(gpa: Allocator, io: Io, bat: []const u8, args: []const []const u8, env: ?*std.process.EnvMap) !void { +fn testExecBat(gpa: Allocator, io: Io, bat: []const u8, args: []const []const u8, env: ?*std.process.Environ.Map) !void { const argv = try gpa.alloc([]const u8, 1 + args.len); defer gpa.free(argv); argv[0] = bat; diff --git a/test/standalone/windows_paths/test.zig b/test/standalone/windows_paths/test.zig index ed4069dc61..1fbdca2115 100644 --- a/test/standalone/windows_paths/test.zig +++ b/test/standalone/windows_paths/test.zig @@ -25,10 +25,10 @@ pub fn main() anyerror!void { const alt_drive_letter = try getAltDriveLetter(cwd_path); const alt_drive_cwd_key = try std.fmt.allocPrint(arena, "={c}:", .{alt_drive_letter}); const alt_drive_cwd = try std.fmt.allocPrint(arena, "{c}:\\baz", .{alt_drive_letter}); - var alt_drive_env_map = std.process.EnvMap.init(arena); + var alt_drive_env_map = std.process.Environ.Map.init(arena); try alt_drive_env_map.put(alt_drive_cwd_key, alt_drive_cwd); - const empty_env = std.process.EnvMap.init(arena); + const empty_env = std.process.Environ.Map.init(arena); { const drive_rel = try std.fmt.allocPrint(arena, "{c}:foo", .{alt_drive_letter}); @@ -96,7 +96,7 @@ fn checkRelative( expected_stdout: []const u8, argv: []const []const u8, cwd: ?[]const u8, - env_map: ?*const std.process.EnvMap, + env_map: ?*const std.process.Environ.Map, ) !void { const result = try std.process.Child.run(allocator, io, .{ .argv = argv, diff --git a/tools/doctest.zig b/tools/doctest.zig index 3a67210a59..cd836c624e 100644 --- a/tools/doctest.zig +++ b/tools/doctest.zig @@ -1128,7 +1128,7 @@ fn in(slice: []const u8, number: u8) bool { fn run( allocator: Allocator, io: Io, - env_map: *process.EnvMap, + env_map: *process.Environ.Map, cwd: []const u8, args: []const []const u8, ) !process.Child.RunResult {