mirror of
https://codeberg.org/ziglang/zig.git
synced 2026-03-08 03:44:46 +01:00
976 lines
37 KiB
Zig
976 lines
37 KiB
Zig
const Args = @This();
|
|
|
|
const builtin = @import("builtin");
|
|
const native_os = builtin.os.tag;
|
|
|
|
const std = @import("../std.zig");
|
|
const Allocator = std.mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
const testing = std.testing;
|
|
|
|
vector: Vector,
|
|
|
|
/// On WASI without libc, this is `void` because the environment has to be
|
|
/// queried and heap-allocated at runtime.
|
|
pub const Vector = switch (native_os) {
|
|
.windows => []const u16, // WTF-16 encoded
|
|
.wasi => switch (builtin.link_libc) {
|
|
false => void,
|
|
true => []const [*:0]const u8,
|
|
},
|
|
.freestanding, .other => void,
|
|
else => []const [*:0]const u8,
|
|
};
|
|
|
|
/// Cross-platform access to command line one argument at a time.
|
|
pub const Iterator = struct {
|
|
const Inner = switch (native_os) {
|
|
.windows => Windows,
|
|
.wasi => if (builtin.link_libc) Posix else Wasi,
|
|
else => Posix,
|
|
};
|
|
|
|
inner: Inner,
|
|
|
|
/// Initialize the args iterator. Consider using `initAllocator` instead
|
|
/// for cross-platform compatibility.
|
|
pub fn init(a: Args) Iterator {
|
|
if (native_os == .wasi) @compileError("In WASI, use initAllocator instead.");
|
|
if (native_os == .windows) @compileError("In Windows, use initAllocator instead.");
|
|
return .{ .inner = .init(a) };
|
|
}
|
|
|
|
pub const InitError = Inner.InitError;
|
|
|
|
/// You must deinitialize iterator's internal buffers by calling `deinit` when done.
|
|
pub fn initAllocator(a: Args, gpa: Allocator) InitError!Iterator {
|
|
if (native_os == .wasi and !builtin.link_libc) {
|
|
return .{ .inner = try .init(gpa) };
|
|
}
|
|
if (native_os == .windows) {
|
|
return .{ .inner = try .init(gpa, a.vector) };
|
|
}
|
|
|
|
return .{ .inner = .init(a) };
|
|
}
|
|
|
|
/// Return subsequent argument, or `null` if no more remaining.
|
|
///
|
|
/// Returned slice is pointing to the iterator's internal buffer.
|
|
/// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
|
|
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
|
|
pub fn next(it: *Iterator) ?[:0]const u8 {
|
|
return it.inner.next();
|
|
}
|
|
|
|
/// Parse past 1 argument without capturing it.
|
|
/// Returns `true` if skipped an arg, `false` if we are at the end.
|
|
pub fn skip(it: *Iterator) bool {
|
|
return it.inner.skip();
|
|
}
|
|
|
|
/// Required to release resources if the iterator was initialized with
|
|
/// `initAllocator` function.
|
|
pub fn deinit(it: *Iterator) void {
|
|
// Unless we're targeting WASI or Windows, this is a no-op.
|
|
if (native_os == .wasi and !builtin.link_libc) it.inner.deinit();
|
|
if (native_os == .windows) it.inner.deinit();
|
|
}
|
|
|
|
/// Iterator that implements the Windows command-line parsing algorithm.
|
|
///
|
|
/// The implementation is intended to be compatible with the post-2008 C runtime,
|
|
/// but is *not* intended to be compatible with `CommandLineToArgvW` since
|
|
/// `CommandLineToArgvW` uses the pre-2008 parsing rules.
|
|
///
|
|
/// This iterator faithfully implements the parsing behavior observed from the C runtime with
|
|
/// one exception: if the command-line string is empty, the iterator will immediately complete
|
|
/// without returning any arguments (whereas the C runtime will return a single argument
|
|
/// representing the name of the current executable).
|
|
///
|
|
/// The essential parts of the algorithm are described in Microsoft's documentation:
|
|
///
|
|
/// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments
|
|
///
|
|
/// David Deley explains some additional undocumented quirks in great detail:
|
|
///
|
|
/// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
|
|
pub const Windows = struct {
|
|
allocator: Allocator,
|
|
/// Encoded as WTF-16 LE.
|
|
cmd_line: []const u16,
|
|
index: usize = 0,
|
|
/// Owned by the iterator. Long enough to hold contiguous NUL-terminated slices
|
|
/// of each argument encoded as WTF-8.
|
|
buffer: []u8,
|
|
start: usize = 0,
|
|
end: usize = 0,
|
|
|
|
pub const InitError = error{OutOfMemory};
|
|
|
|
/// `cmd_line_w` *must* be a WTF16-LE-encoded string.
|
|
///
|
|
/// The iterator stores and uses `cmd_line_w`, so its memory must be valid for
|
|
/// at least as long as the returned Windows.
|
|
pub fn init(gpa: Allocator, cmd_line_w: []const u16) Windows.InitError!Windows {
|
|
const wtf8_len = std.unicode.calcWtf8Len(cmd_line_w);
|
|
|
|
// This buffer must be large enough to contain contiguous NUL-terminated slices
|
|
// of each argument.
|
|
// - During parsing, the length of a parsed argument will always be equal to
|
|
// to less than its unparsed length
|
|
// - The first argument needs one extra byte of space allocated for its NUL
|
|
// terminator, but for each subsequent argument the necessary whitespace
|
|
// between arguments guarantees room for their NUL terminator(s).
|
|
const buffer = try gpa.alloc(u8, wtf8_len + 1);
|
|
errdefer gpa.free(buffer);
|
|
|
|
return .{
|
|
.allocator = gpa,
|
|
.cmd_line = cmd_line_w,
|
|
.buffer = buffer,
|
|
};
|
|
}
|
|
|
|
/// Returns the next argument and advances the iterator. Returns `null` if at the end of the
|
|
/// command-line string. The iterator owns the returned slice.
|
|
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
|
|
pub fn next(self: *Windows) ?[:0]const u8 {
|
|
return self.nextWithStrategy(next_strategy);
|
|
}
|
|
|
|
/// Skips the next argument and advances the iterator. Returns `true` if an argument was
|
|
/// skipped, `false` if at the end of the command-line string.
|
|
pub fn skip(self: *Windows) bool {
|
|
return self.nextWithStrategy(skip_strategy);
|
|
}
|
|
|
|
const next_strategy = struct {
|
|
const T = ?[:0]const u8;
|
|
|
|
const eof = null;
|
|
|
|
/// Returns '\' if any backslashes are emitted, otherwise returns `last_emitted_code_unit`.
|
|
fn emitBackslashes(self: *Windows, count: usize, last_emitted_code_unit: ?u16) ?u16 {
|
|
for (0..count) |_| {
|
|
self.buffer[self.end] = '\\';
|
|
self.end += 1;
|
|
}
|
|
return if (count != 0) '\\' else last_emitted_code_unit;
|
|
}
|
|
|
|
/// If `last_emitted_code_unit` and `code_unit` form a surrogate pair, then
|
|
/// the previously emitted high surrogate is overwritten by the codepoint encoded
|
|
/// by the surrogate pair, and `null` is returned.
|
|
/// Otherwise, `code_unit` is emitted and returned.
|
|
fn emitCharacter(self: *Windows, code_unit: u16, last_emitted_code_unit: ?u16) ?u16 {
|
|
// Because we are emitting WTF-8, we need to
|
|
// check to see if we've emitted two consecutive surrogate
|
|
// codepoints that form a valid surrogate pair in order
|
|
// to ensure that we're always emitting well-formed WTF-8
|
|
// (https://wtf-8.codeberg.page/#concatenating).
|
|
//
|
|
// If we do have a valid surrogate pair, we need to emit
|
|
// the UTF-8 sequence for the codepoint that they encode
|
|
// instead of the WTF-8 encoding for the two surrogate pairs
|
|
// separately.
|
|
//
|
|
// This is relevant when dealing with a WTF-16 encoded
|
|
// command line like this:
|
|
// "<0xD801>"<0xDC37>
|
|
// which would get parsed and converted to WTF-8 as:
|
|
// <0xED><0xA0><0x81><0xED><0xB0><0xB7>
|
|
// but instead, we need to recognize the surrogate pair
|
|
// and emit the codepoint it encodes, which in this
|
|
// example is U+10437 (𐐷), which is encoded in UTF-8 as:
|
|
// <0xF0><0x90><0x90><0xB7>
|
|
if (last_emitted_code_unit != null and
|
|
std.unicode.utf16IsLowSurrogate(code_unit) and
|
|
std.unicode.utf16IsHighSurrogate(last_emitted_code_unit.?))
|
|
{
|
|
const codepoint = std.unicode.utf16DecodeSurrogatePair(&.{ last_emitted_code_unit.?, code_unit }) catch unreachable;
|
|
|
|
// Unpaired surrogate is 3 bytes long
|
|
const dest = self.buffer[self.end - 3 ..];
|
|
const len = std.unicode.utf8Encode(codepoint, dest) catch unreachable;
|
|
// All codepoints that require a surrogate pair (> U+FFFF) are encoded as 4 bytes
|
|
assert(len == 4);
|
|
self.end += 1;
|
|
return null;
|
|
}
|
|
|
|
const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable;
|
|
self.end += wtf8_len;
|
|
return code_unit;
|
|
}
|
|
|
|
fn yieldArg(self: *Windows) [:0]const u8 {
|
|
self.buffer[self.end] = 0;
|
|
const arg = self.buffer[self.start..self.end :0];
|
|
self.end += 1;
|
|
self.start = self.end;
|
|
return arg;
|
|
}
|
|
};
|
|
|
|
const skip_strategy = struct {
|
|
const T = bool;
|
|
|
|
const eof = false;
|
|
|
|
fn emitBackslashes(_: *Windows, _: usize, last_emitted_code_unit: ?u16) ?u16 {
|
|
return last_emitted_code_unit;
|
|
}
|
|
|
|
fn emitCharacter(_: *Windows, _: u16, last_emitted_code_unit: ?u16) ?u16 {
|
|
return last_emitted_code_unit;
|
|
}
|
|
|
|
fn yieldArg(_: *Windows) bool {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
fn nextWithStrategy(self: *Windows, comptime strategy: type) strategy.T {
|
|
var last_emitted_code_unit: ?u16 = null;
|
|
// The first argument (the executable name) uses different parsing rules.
|
|
if (self.index == 0) {
|
|
if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) {
|
|
// Immediately complete the iterator.
|
|
// The C runtime would return the name of the current executable here.
|
|
return strategy.eof;
|
|
}
|
|
|
|
var inside_quotes = false;
|
|
while (true) : (self.index += 1) {
|
|
const char = if (self.index != self.cmd_line.len)
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index])
|
|
else
|
|
0;
|
|
switch (char) {
|
|
0 => {
|
|
return strategy.yieldArg(self);
|
|
},
|
|
'"' => {
|
|
inside_quotes = !inside_quotes;
|
|
},
|
|
' ', '\t' => {
|
|
if (inside_quotes) {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
} else {
|
|
self.index += 1;
|
|
return strategy.yieldArg(self);
|
|
}
|
|
},
|
|
else => {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
// Skip spaces and tabs. The iterator completes if we reach the end of the string here.
|
|
while (true) : (self.index += 1) {
|
|
const char = if (self.index != self.cmd_line.len)
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index])
|
|
else
|
|
0;
|
|
switch (char) {
|
|
0 => return strategy.eof,
|
|
' ', '\t' => continue,
|
|
else => break,
|
|
}
|
|
}
|
|
|
|
// Parsing rules for subsequent arguments:
|
|
//
|
|
// - The end of the string always terminates the current argument.
|
|
// - When not in 'inside_quotes' mode, a space or tab terminates the current argument.
|
|
// - 2n backslashes followed by a quote emit n backslashes (note: n can be zero).
|
|
// If in 'inside_quotes' and the quote is immediately followed by a second quote,
|
|
// one quote is emitted and the other is skipped, otherwise, the quote is skipped
|
|
// and 'inside_quotes' is toggled.
|
|
// - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote.
|
|
// - n backslashes not followed by a quote emit n backslashes.
|
|
var backslash_count: usize = 0;
|
|
var inside_quotes = false;
|
|
while (true) : (self.index += 1) {
|
|
const char = if (self.index != self.cmd_line.len)
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index])
|
|
else
|
|
0;
|
|
switch (char) {
|
|
0 => {
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
|
|
return strategy.yieldArg(self);
|
|
},
|
|
' ', '\t' => {
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
|
|
backslash_count = 0;
|
|
if (inside_quotes) {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
} else return strategy.yieldArg(self);
|
|
},
|
|
'"' => {
|
|
const char_is_escaped_quote = backslash_count % 2 != 0;
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count / 2, last_emitted_code_unit);
|
|
backslash_count = 0;
|
|
if (char_is_escaped_quote) {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit);
|
|
} else {
|
|
if (inside_quotes and
|
|
self.index + 1 != self.cmd_line.len and
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index + 1]) == '"')
|
|
{
|
|
last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit);
|
|
self.index += 1;
|
|
} else {
|
|
inside_quotes = !inside_quotes;
|
|
}
|
|
}
|
|
},
|
|
'\\' => {
|
|
backslash_count += 1;
|
|
},
|
|
else => {
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
|
|
backslash_count = 0;
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Frees the iterator's copy of the command-line string and all previously returned
|
|
/// argument slices.
|
|
pub fn deinit(self: *Windows) void {
|
|
self.allocator.free(self.buffer);
|
|
}
|
|
};
|
|
|
|
pub const Posix = struct {
|
|
remaining: Vector,
|
|
|
|
pub const InitError = error{};
|
|
|
|
pub fn init(a: Args) Posix {
|
|
return .{ .remaining = a.vector };
|
|
}
|
|
|
|
pub fn next(it: *Posix) ?[:0]const u8 {
|
|
if (it.remaining.len == 0) return null;
|
|
const arg = it.remaining[0];
|
|
it.remaining = it.remaining[1..];
|
|
return std.mem.sliceTo(arg, 0);
|
|
}
|
|
|
|
pub fn skip(it: *Posix) bool {
|
|
if (it.remaining.len == 0) return false;
|
|
it.remaining = it.remaining[1..];
|
|
return true;
|
|
}
|
|
};
|
|
|
|
pub const Wasi = struct {
|
|
allocator: Allocator,
|
|
index: usize,
|
|
args: [][:0]u8,
|
|
|
|
pub const InitError = error{OutOfMemory} || std.posix.UnexpectedError;
|
|
|
|
/// You must call deinit to free the internal buffer of the
|
|
/// iterator after you are done.
|
|
pub fn init(allocator: Allocator) Wasi.InitError!Wasi {
|
|
const fetched_args = try Wasi.internalInit(allocator);
|
|
return Wasi{
|
|
.allocator = allocator,
|
|
.index = 0,
|
|
.args = fetched_args,
|
|
};
|
|
}
|
|
|
|
fn internalInit(allocator: Allocator) Wasi.InitError![][:0]u8 {
|
|
var count: usize = undefined;
|
|
var buf_size: usize = undefined;
|
|
|
|
switch (std.os.wasi.args_sizes_get(&count, &buf_size)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
if (count == 0) {
|
|
return &[_][:0]u8{};
|
|
}
|
|
|
|
const argv = try allocator.alloc([*:0]u8, count);
|
|
defer allocator.free(argv);
|
|
|
|
const argv_buf = try allocator.alloc(u8, buf_size);
|
|
|
|
switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
var result_args = try allocator.alloc([:0]u8, count);
|
|
var i: usize = 0;
|
|
while (i < count) : (i += 1) {
|
|
result_args[i] = std.mem.sliceTo(argv[i], 0);
|
|
}
|
|
|
|
return result_args;
|
|
}
|
|
|
|
pub fn next(self: *Wasi) ?[:0]const u8 {
|
|
if (self.index == self.args.len) return null;
|
|
|
|
const arg = self.args[self.index];
|
|
self.index += 1;
|
|
return arg;
|
|
}
|
|
|
|
pub fn skip(self: *Wasi) bool {
|
|
if (self.index == self.args.len) return false;
|
|
|
|
self.index += 1;
|
|
return true;
|
|
}
|
|
|
|
/// Call to free the internal buffer of the iterator.
|
|
pub fn deinit(self: *Wasi) void {
|
|
// Nothing is allocated when there are no args
|
|
if (self.args.len == 0) return;
|
|
|
|
const last_item = self.args[self.args.len - 1];
|
|
const last_byte_addr = @intFromPtr(last_item.ptr) + last_item.len + 1; // null terminated
|
|
const first_item_ptr = self.args[0].ptr;
|
|
const len = last_byte_addr - @intFromPtr(first_item_ptr);
|
|
self.allocator.free(first_item_ptr[0..len]);
|
|
self.allocator.free(self.args);
|
|
}
|
|
};
|
|
};
|
|
|
|
/// Holds the command-line arguments, with the program name as the first entry.
|
|
/// Use `iterateAllocator` for cross-platform code.
|
|
pub fn iterate(a: Args) Iterator {
|
|
return .init(a);
|
|
}
|
|
|
|
/// You must deinitialize iterator's internal buffers by calling `deinit` when
|
|
/// done.
|
|
pub fn iterateAllocator(a: Args, gpa: Allocator) Iterator.InitError!Iterator {
|
|
return .initAllocator(a, gpa);
|
|
}
|
|
|
|
pub const ToSliceError = Iterator.Windows.InitError || Iterator.Wasi.InitError;
|
|
|
|
/// Returned value may reference several allocations and may point into `a`.
|
|
/// Thefore, an arena-style allocator must be used.
|
|
///
|
|
/// * On Windows, the result is encoded as
|
|
/// [WTF-8](https://wtf-8.codeberg.page/).
|
|
/// * On other platforms, the result is an opaque sequence of bytes with no
|
|
/// particular encoding.
|
|
///
|
|
/// See also:
|
|
/// * `iterate`
|
|
/// * `iterateAllocator`
|
|
pub fn toSlice(a: Args, arena: Allocator) ToSliceError![][:0]const u8 {
|
|
if (native_os == .windows) {
|
|
var it = try a.iterateAllocator(arena);
|
|
var contents: std.ArrayList(u8) = .empty;
|
|
var slice_list: std.ArrayList(usize) = .empty;
|
|
while (it.next()) |arg| {
|
|
try contents.appendSlice(arena, arg[0 .. arg.len + 1]);
|
|
try slice_list.append(arena, arg.len);
|
|
}
|
|
const contents_slice = contents.items;
|
|
const slice_sizes = slice_list.items;
|
|
const slice_list_bytes = std.math.mul(usize, @sizeOf([]u8), slice_sizes.len) catch return error.OutOfMemory;
|
|
const total_bytes = std.math.add(usize, slice_list_bytes, contents_slice.len) catch return error.OutOfMemory;
|
|
const buf = try arena.alignedAlloc(u8, .of([]u8), total_bytes);
|
|
errdefer arena.free(buf);
|
|
|
|
const result_slice_list = std.mem.bytesAsSlice([:0]u8, buf[0..slice_list_bytes]);
|
|
const result_contents = buf[slice_list_bytes..];
|
|
@memcpy(result_contents[0..contents_slice.len], contents_slice);
|
|
|
|
var contents_index: usize = 0;
|
|
for (slice_sizes, 0..) |len, i| {
|
|
const new_index = contents_index + len;
|
|
result_slice_list[i] = result_contents[contents_index..new_index :0];
|
|
contents_index = new_index + 1;
|
|
}
|
|
|
|
return @ptrCast(result_slice_list);
|
|
} else if (native_os == .wasi and !builtin.link_libc) {
|
|
var count: usize = undefined;
|
|
var buf_size: usize = undefined;
|
|
|
|
switch (std.os.wasi.args_sizes_get(&count, &buf_size)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
if (count == 0) return &.{};
|
|
|
|
const argv = try arena.alloc([*:0]u8, count);
|
|
const argv_buf = try arena.alloc(u8, buf_size);
|
|
|
|
switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
const args = try arena.alloc([:0]const u8, count);
|
|
for (args, argv) |*dst, src| dst.* = std.mem.sliceTo(src, 0);
|
|
return args;
|
|
} else {
|
|
const args = try arena.alloc([:0]const u8, a.vector.len);
|
|
for (args, a.vector) |*dst, src| dst.* = std.mem.sliceTo(src, 0);
|
|
return args;
|
|
}
|
|
}
|
|
|
|
test "Iterator.Windows" {
|
|
const t = testIteratorWindows;
|
|
|
|
try t(
|
|
\\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 --eval="new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
|
|
, &.{
|
|
\\C:\Program Files\zig\zig.exe
|
|
,
|
|
\\run
|
|
,
|
|
\\.\src\main.zig
|
|
,
|
|
\\-target
|
|
,
|
|
\\x86_64-windows-gnu
|
|
,
|
|
\\-O
|
|
,
|
|
\\ReleaseSafe
|
|
,
|
|
\\--
|
|
,
|
|
\\--emoji=🗿
|
|
,
|
|
\\--eval=new Regex("Dwayne \"The Rock\" Johnson")
|
|
,
|
|
});
|
|
|
|
// Empty
|
|
try t("", &.{});
|
|
|
|
// Separators
|
|
try t("aa bb cc", &.{ "aa", "bb", "cc" });
|
|
try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" });
|
|
try t("aa\nbb\ncc", &.{"aa\nbb\ncc"});
|
|
try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"});
|
|
try t("aa\rbb\rcc", &.{"aa\rbb\rcc"});
|
|
try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"});
|
|
try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"});
|
|
try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"});
|
|
|
|
// Leading/trailing whitespace
|
|
try t(" ", &.{""});
|
|
try t(" aa bb ", &.{ "", "aa", "bb" });
|
|
try t("\t\t", &.{""});
|
|
try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" });
|
|
try t("\n\n", &.{"\n\n"});
|
|
try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"});
|
|
|
|
// Executable name with quotes/backslashes
|
|
try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"});
|
|
try t("\"", &.{""});
|
|
try t("\"\"", &.{""});
|
|
try t("\"\"\"", &.{""});
|
|
try t("\"\"\"\"", &.{""});
|
|
try t("\"\"\"\"\"", &.{""});
|
|
try t("aa\"bb\"cc\"dd", &.{"aabbccdd"});
|
|
try t("aa\"bb cc\"dd", &.{"aabb ccdd"});
|
|
try t("\"aa\\\"bb\"", &.{"aa\\bb"});
|
|
try t("\"aa\\\\\"", &.{"aa\\\\"});
|
|
try t("aa\\\"bb", &.{"aa\\bb"});
|
|
try t("aa\\\\\"bb", &.{"aa\\\\bb"});
|
|
|
|
// Arguments with quotes/backslashes
|
|
try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" });
|
|
try t(". aa\" \"bb\"\t\"cc\"\n\"dd\"", &.{ ".", "aa bb\tcc\ndd" });
|
|
try t(". ", &.{"."});
|
|
try t(". \"", &.{ ".", "" });
|
|
try t(". \"\"", &.{ ".", "" });
|
|
try t(". \"\"\"", &.{ ".", "\"" });
|
|
try t(". \"\"\"\"", &.{ ".", "\"" });
|
|
try t(". \"\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \" \"", &.{ ".", " " });
|
|
try t(". \" \"\"", &.{ ".", " \"" });
|
|
try t(". \" \"\"\"", &.{ ".", " \"" });
|
|
try t(". \" \"\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
|
try t(". \\\"", &.{ ".", "\"" });
|
|
try t(". \\\"\"", &.{ ".", "\"" });
|
|
try t(". \\\"\"\"", &.{ ".", "\"" });
|
|
try t(". \\\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" });
|
|
try t(". \" \\\"", &.{ ".", " \"" });
|
|
try t(". \" \\\"\"", &.{ ".", " \"" });
|
|
try t(". \" \\\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
|
try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
|
try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" });
|
|
try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" });
|
|
try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" });
|
|
|
|
// From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines
|
|
try t(
|
|
\\foo.exe "abc" d e
|
|
, &.{ "foo.exe", "abc", "d", "e" });
|
|
try t(
|
|
\\foo.exe a\\b d"e f"g h
|
|
, &.{ "foo.exe", "a\\\\b", "de fg", "h" });
|
|
try t(
|
|
\\foo.exe a\\\"b c d
|
|
, &.{ "foo.exe", "a\\\"b", "c", "d" });
|
|
try t(
|
|
\\foo.exe a\\\\"b c" d e
|
|
, &.{ "foo.exe", "a\\\\b c", "d", "e" });
|
|
try t(
|
|
\\foo.exe a"b"" c d
|
|
, &.{ "foo.exe", "ab\" c d" });
|
|
|
|
// From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX
|
|
try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" });
|
|
try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" });
|
|
try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" });
|
|
try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" });
|
|
try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" });
|
|
try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" });
|
|
try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" });
|
|
try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" });
|
|
try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" });
|
|
|
|
// Surrogate pair encoding of 𐐷 separated by quotes.
|
|
// Encoded as WTF-16:
|
|
// "<0xD801>"<0xDC37>
|
|
// Encoded as WTF-8:
|
|
// "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
|
|
// During parsing, the quotes drop out and the surrogate pair
|
|
// should end up encoded as its normal UTF-8 representation.
|
|
try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" });
|
|
}
|
|
|
|
fn testIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
|
|
const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line);
|
|
defer testing.allocator.free(cmd_line_w);
|
|
|
|
// next
|
|
{
|
|
var it = try Iterator.Windows.init(testing.allocator, cmd_line_w);
|
|
defer it.deinit();
|
|
|
|
for (expected_args) |expected| {
|
|
if (it.next()) |actual| {
|
|
try testing.expectEqualStrings(expected, actual);
|
|
} else {
|
|
return error.TestUnexpectedResult;
|
|
}
|
|
}
|
|
try testing.expect(it.next() == null);
|
|
}
|
|
|
|
// skip
|
|
{
|
|
var it = try Iterator.Windows.init(testing.allocator, cmd_line_w);
|
|
defer it.deinit();
|
|
|
|
for (0..expected_args.len) |_| {
|
|
try testing.expect(it.skip());
|
|
}
|
|
try testing.expect(!it.skip());
|
|
}
|
|
}
|
|
|
|
test "general parsing" {
|
|
try testGeneralCmdLine("a b\tc d", &.{ "a", "b", "c", "d" });
|
|
try testGeneralCmdLine("\"abc\" d e", &.{ "abc", "d", "e" });
|
|
try testGeneralCmdLine("a\\\\\\b d\"e f\"g h", &.{ "a\\\\\\b", "de fg", "h" });
|
|
try testGeneralCmdLine("a\\\\\\\"b c d", &.{ "a\\\"b", "c", "d" });
|
|
try testGeneralCmdLine("a\\\\\\\\\"b c\" d e", &.{ "a\\\\b c", "d", "e" });
|
|
try testGeneralCmdLine("a b\tc \"d f", &.{ "a", "b", "c", "d f" });
|
|
try testGeneralCmdLine("j k l\\", &.{ "j", "k", "l\\" });
|
|
try testGeneralCmdLine("\"\" x y z\\\\", &.{ "", "x", "y", "z\\\\" });
|
|
|
|
try testGeneralCmdLine("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\"", &.{
|
|
".\\..\\zig-cache\\build",
|
|
"bin\\zig.exe",
|
|
".\\..",
|
|
".\\..\\zig-cache",
|
|
"--help",
|
|
});
|
|
|
|
try testGeneralCmdLine(
|
|
\\ 'foo' "bar"
|
|
, &.{ "'foo'", "bar" });
|
|
}
|
|
|
|
fn testGeneralCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
|
|
var it = try IteratorGeneral(.{}).init(std.testing.allocator, input_cmd_line);
|
|
defer it.deinit();
|
|
for (expected_args) |expected_arg| {
|
|
const arg = it.next().?;
|
|
try testing.expectEqualStrings(expected_arg, arg);
|
|
}
|
|
try testing.expect(it.next() == null);
|
|
}
|
|
|
|
/// Optional parameters for `IteratorGeneral`
|
|
pub const IteratorGeneralOptions = struct {
|
|
comments: bool = false,
|
|
single_quotes: bool = false,
|
|
};
|
|
|
|
/// A general Iterator to parse a string into a set of arguments
|
|
pub fn IteratorGeneral(comptime options: IteratorGeneralOptions) type {
|
|
return struct {
|
|
allocator: Allocator,
|
|
index: usize = 0,
|
|
cmd_line: []const u8,
|
|
|
|
/// Should the cmd_line field be free'd (using the allocator) on deinit()?
|
|
free_cmd_line_on_deinit: bool,
|
|
|
|
/// buffer MUST be long enough to hold the cmd_line plus a null terminator.
|
|
/// buffer will we free'd (using the allocator) on deinit()
|
|
buffer: []u8,
|
|
start: usize = 0,
|
|
end: usize = 0,
|
|
|
|
pub const Self = @This();
|
|
|
|
pub const InitError = error{OutOfMemory};
|
|
|
|
/// cmd_line_utf8 MUST remain valid and constant while using this instance
|
|
pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
|
|
const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
|
|
errdefer allocator.free(buffer);
|
|
|
|
return Self{
|
|
.allocator = allocator,
|
|
.cmd_line = cmd_line_utf8,
|
|
.free_cmd_line_on_deinit = false,
|
|
.buffer = buffer,
|
|
};
|
|
}
|
|
|
|
/// cmd_line_utf8 will be free'd (with the allocator) on deinit()
|
|
pub fn initTakeOwnership(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
|
|
const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
|
|
errdefer allocator.free(buffer);
|
|
|
|
return Self{
|
|
.allocator = allocator,
|
|
.cmd_line = cmd_line_utf8,
|
|
.free_cmd_line_on_deinit = true,
|
|
.buffer = buffer,
|
|
};
|
|
}
|
|
|
|
// Skips over whitespace in the cmd_line.
|
|
// Returns false if the terminating sentinel is reached, true otherwise.
|
|
// Also skips over comments (if supported).
|
|
fn skipWhitespace(self: *Self) bool {
|
|
while (true) : (self.index += 1) {
|
|
const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
|
switch (character) {
|
|
0 => return false,
|
|
' ', '\t', '\r', '\n' => continue,
|
|
'#' => {
|
|
if (options.comments) {
|
|
while (true) : (self.index += 1) {
|
|
switch (self.cmd_line[self.index]) {
|
|
'\n' => break,
|
|
0 => return false,
|
|
else => continue,
|
|
}
|
|
}
|
|
continue;
|
|
} else {
|
|
break;
|
|
}
|
|
},
|
|
else => break,
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
pub fn skip(self: *Self) bool {
|
|
if (!self.skipWhitespace()) {
|
|
return false;
|
|
}
|
|
|
|
var backslash_count: usize = 0;
|
|
var in_quote = false;
|
|
while (true) : (self.index += 1) {
|
|
const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
|
switch (character) {
|
|
0 => return true,
|
|
'"', '\'' => {
|
|
if (!options.single_quotes and character == '\'') {
|
|
backslash_count = 0;
|
|
continue;
|
|
}
|
|
const quote_is_real = backslash_count % 2 == 0;
|
|
if (quote_is_real) {
|
|
in_quote = !in_quote;
|
|
}
|
|
},
|
|
'\\' => {
|
|
backslash_count += 1;
|
|
},
|
|
' ', '\t', '\r', '\n' => {
|
|
if (!in_quote) {
|
|
return true;
|
|
}
|
|
backslash_count = 0;
|
|
},
|
|
else => {
|
|
backslash_count = 0;
|
|
continue;
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns a slice of the internal buffer that contains the next argument.
|
|
/// Returns null when it reaches the end.
|
|
pub fn next(self: *Self) ?[:0]const u8 {
|
|
if (!self.skipWhitespace()) {
|
|
return null;
|
|
}
|
|
|
|
var backslash_count: usize = 0;
|
|
var in_quote = false;
|
|
while (true) : (self.index += 1) {
|
|
const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
|
switch (character) {
|
|
0 => {
|
|
self.emitBackslashes(backslash_count);
|
|
self.buffer[self.end] = 0;
|
|
const token = self.buffer[self.start..self.end :0];
|
|
self.end += 1;
|
|
self.start = self.end;
|
|
return token;
|
|
},
|
|
'"', '\'' => {
|
|
if (!options.single_quotes and character == '\'') {
|
|
self.emitBackslashes(backslash_count);
|
|
backslash_count = 0;
|
|
self.emitCharacter(character);
|
|
continue;
|
|
}
|
|
const quote_is_real = backslash_count % 2 == 0;
|
|
self.emitBackslashes(backslash_count / 2);
|
|
backslash_count = 0;
|
|
|
|
if (quote_is_real) {
|
|
in_quote = !in_quote;
|
|
} else {
|
|
self.emitCharacter('"');
|
|
}
|
|
},
|
|
'\\' => {
|
|
backslash_count += 1;
|
|
},
|
|
' ', '\t', '\r', '\n' => {
|
|
self.emitBackslashes(backslash_count);
|
|
backslash_count = 0;
|
|
if (in_quote) {
|
|
self.emitCharacter(character);
|
|
} else {
|
|
self.buffer[self.end] = 0;
|
|
const token = self.buffer[self.start..self.end :0];
|
|
self.end += 1;
|
|
self.start = self.end;
|
|
return token;
|
|
}
|
|
},
|
|
else => {
|
|
self.emitBackslashes(backslash_count);
|
|
backslash_count = 0;
|
|
self.emitCharacter(character);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
fn emitBackslashes(self: *Self, emit_count: usize) void {
|
|
var i: usize = 0;
|
|
while (i < emit_count) : (i += 1) {
|
|
self.emitCharacter('\\');
|
|
}
|
|
}
|
|
|
|
fn emitCharacter(self: *Self, char: u8) void {
|
|
self.buffer[self.end] = char;
|
|
self.end += 1;
|
|
}
|
|
|
|
/// Call to free the internal buffer of the iterator.
|
|
pub fn deinit(self: *Self) void {
|
|
self.allocator.free(self.buffer);
|
|
|
|
if (self.free_cmd_line_on_deinit) {
|
|
self.allocator.free(self.cmd_line);
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
test "response file arg parsing" {
|
|
try testResponseFileCmdLine(
|
|
\\a b
|
|
\\c d\
|
|
, &.{ "a", "b", "c", "d\\" });
|
|
try testResponseFileCmdLine("a b c d\\", &.{ "a", "b", "c", "d\\" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\j
|
|
\\ k l # this is a comment \\ \\\ \\\\ "none" "\\" "\\\"
|
|
\\ "m" #another comment
|
|
\\
|
|
, &.{ "j", "k", "l", "m" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\ "" q ""
|
|
\\ "r s # t" "u\" v" #another comment
|
|
\\
|
|
, &.{ "", "q", "", "r s # t", "u\" v" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\ -l"advapi32" a# b#c d#
|
|
\\e\\\
|
|
, &.{ "-ladvapi32", "a#", "b#c", "d#", "e\\\\\\" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\ 'foo' "bar"
|
|
, &.{ "foo", "bar" });
|
|
}
|
|
|
|
fn testResponseFileCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
|
|
var it = try IteratorGeneral(.{ .comments = true, .single_quotes = true })
|
|
.init(std.testing.allocator, input_cmd_line);
|
|
defer it.deinit();
|
|
for (expected_args) |expected_arg| {
|
|
const arg = it.next().?;
|
|
try testing.expectEqualStrings(expected_arg, arg);
|
|
}
|
|
try testing.expect(it.next() == null);
|
|
}
|