mirror of
https://codeberg.org/ziglang/zig.git
synced 2026-03-08 04:24:33 +01:00
this simplifies the implementation, allows specializing it, and allows deleting the corresponding free function. In practice this is how it is always used anyway.
976 lines
37 KiB
Zig
976 lines
37 KiB
Zig
const Args = @This();
|
|
|
|
const builtin = @import("builtin");
|
|
const native_os = builtin.os.tag;
|
|
|
|
const std = @import("../std.zig");
|
|
const Allocator = std.mem.Allocator;
|
|
const assert = std.debug.assert;
|
|
const testing = std.testing;
|
|
|
|
vector: Vector,
|
|
|
|
/// On WASI without libc, this is `void` because the environment has to be
|
|
/// queried and heap-allocated at runtime.
|
|
pub const Vector = switch (native_os) {
|
|
.windows => []const u16, // WTF-16 encoded
|
|
.wasi => switch (builtin.link_libc) {
|
|
false => void,
|
|
true => []const [*:0]const u8,
|
|
},
|
|
.freestanding, .other => void,
|
|
else => []const [*:0]const u8,
|
|
};
|
|
|
|
/// Cross-platform access to command line one argument at a time.
|
|
pub const Iterator = struct {
|
|
const Inner = switch (native_os) {
|
|
.windows => Windows,
|
|
.wasi => if (builtin.link_libc) Posix else Wasi,
|
|
else => Posix,
|
|
};
|
|
|
|
inner: Inner,
|
|
|
|
/// Initialize the args iterator. Consider using `initAllocator` instead
|
|
/// for cross-platform compatibility.
|
|
pub fn init(a: Args) Iterator {
|
|
if (native_os == .wasi) @compileError("In WASI, use initAllocator instead.");
|
|
if (native_os == .windows) @compileError("In Windows, use initAllocator instead.");
|
|
return .{ .inner = .init(a) };
|
|
}
|
|
|
|
pub const InitError = Inner.InitError;
|
|
|
|
/// You must deinitialize iterator's internal buffers by calling `deinit` when done.
|
|
pub fn initAllocator(a: Args, gpa: Allocator) InitError!Iterator {
|
|
if (native_os == .wasi and !builtin.link_libc) {
|
|
return .{ .inner = try .init(gpa) };
|
|
}
|
|
if (native_os == .windows) {
|
|
return .{ .inner = try .init(gpa, a.vector) };
|
|
}
|
|
|
|
return .{ .inner = .init(a) };
|
|
}
|
|
|
|
/// Return subsequent argument, or `null` if no more remaining.
|
|
///
|
|
/// Returned slice is pointing to the iterator's internal buffer.
|
|
/// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
|
|
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
|
|
pub fn next(it: *Iterator) ?[:0]const u8 {
|
|
return it.inner.next();
|
|
}
|
|
|
|
/// Parse past 1 argument without capturing it.
|
|
/// Returns `true` if skipped an arg, `false` if we are at the end.
|
|
pub fn skip(it: *Iterator) bool {
|
|
return it.inner.skip();
|
|
}
|
|
|
|
/// Required to release resources if the iterator was initialized with
|
|
/// `initAllocator` function.
|
|
pub fn deinit(it: *Iterator) void {
|
|
// Unless we're targeting WASI or Windows, this is a no-op.
|
|
if (native_os == .wasi and !builtin.link_libc) it.inner.deinit();
|
|
if (native_os == .windows) it.inner.deinit();
|
|
}
|
|
|
|
/// Iterator that implements the Windows command-line parsing algorithm.
|
|
///
|
|
/// The implementation is intended to be compatible with the post-2008 C runtime,
|
|
/// but is *not* intended to be compatible with `CommandLineToArgvW` since
|
|
/// `CommandLineToArgvW` uses the pre-2008 parsing rules.
|
|
///
|
|
/// This iterator faithfully implements the parsing behavior observed from the C runtime with
|
|
/// one exception: if the command-line string is empty, the iterator will immediately complete
|
|
/// without returning any arguments (whereas the C runtime will return a single argument
|
|
/// representing the name of the current executable).
|
|
///
|
|
/// The essential parts of the algorithm are described in Microsoft's documentation:
|
|
///
|
|
/// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments
|
|
///
|
|
/// David Deley explains some additional undocumented quirks in great detail:
|
|
///
|
|
/// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
|
|
pub const Windows = struct {
|
|
allocator: Allocator,
|
|
/// Encoded as WTF-16 LE.
|
|
cmd_line: []const u16,
|
|
index: usize = 0,
|
|
/// Owned by the iterator. Long enough to hold contiguous NUL-terminated slices
|
|
/// of each argument encoded as WTF-8.
|
|
buffer: []u8,
|
|
start: usize = 0,
|
|
end: usize = 0,
|
|
|
|
pub const InitError = error{OutOfMemory};
|
|
|
|
/// `cmd_line_w` *must* be a WTF16-LE-encoded string.
|
|
///
|
|
/// The iterator stores and uses `cmd_line_w`, so its memory must be valid for
|
|
/// at least as long as the returned Windows.
|
|
pub fn init(gpa: Allocator, cmd_line_w: []const u16) Windows.InitError!Windows {
|
|
const wtf8_len = std.unicode.calcWtf8Len(cmd_line_w);
|
|
|
|
// This buffer must be large enough to contain contiguous NUL-terminated slices
|
|
// of each argument.
|
|
// - During parsing, the length of a parsed argument will always be equal to
|
|
// to less than its unparsed length
|
|
// - The first argument needs one extra byte of space allocated for its NUL
|
|
// terminator, but for each subsequent argument the necessary whitespace
|
|
// between arguments guarantees room for their NUL terminator(s).
|
|
const buffer = try gpa.alloc(u8, wtf8_len + 1);
|
|
errdefer gpa.free(buffer);
|
|
|
|
return .{
|
|
.allocator = gpa,
|
|
.cmd_line = cmd_line_w,
|
|
.buffer = buffer,
|
|
};
|
|
}
|
|
|
|
/// Returns the next argument and advances the iterator. Returns `null` if at the end of the
|
|
/// command-line string. The iterator owns the returned slice.
|
|
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
|
|
pub fn next(self: *Windows) ?[:0]const u8 {
|
|
return self.nextWithStrategy(next_strategy);
|
|
}
|
|
|
|
/// Skips the next argument and advances the iterator. Returns `true` if an argument was
|
|
/// skipped, `false` if at the end of the command-line string.
|
|
pub fn skip(self: *Windows) bool {
|
|
return self.nextWithStrategy(skip_strategy);
|
|
}
|
|
|
|
const next_strategy = struct {
|
|
const T = ?[:0]const u8;
|
|
|
|
const eof = null;
|
|
|
|
/// Returns '\' if any backslashes are emitted, otherwise returns `last_emitted_code_unit`.
|
|
fn emitBackslashes(self: *Windows, count: usize, last_emitted_code_unit: ?u16) ?u16 {
|
|
for (0..count) |_| {
|
|
self.buffer[self.end] = '\\';
|
|
self.end += 1;
|
|
}
|
|
return if (count != 0) '\\' else last_emitted_code_unit;
|
|
}
|
|
|
|
/// If `last_emitted_code_unit` and `code_unit` form a surrogate pair, then
|
|
/// the previously emitted high surrogate is overwritten by the codepoint encoded
|
|
/// by the surrogate pair, and `null` is returned.
|
|
/// Otherwise, `code_unit` is emitted and returned.
|
|
fn emitCharacter(self: *Windows, code_unit: u16, last_emitted_code_unit: ?u16) ?u16 {
|
|
// Because we are emitting WTF-8, we need to
|
|
// check to see if we've emitted two consecutive surrogate
|
|
// codepoints that form a valid surrogate pair in order
|
|
// to ensure that we're always emitting well-formed WTF-8
|
|
// (https://wtf-8.codeberg.page/#concatenating).
|
|
//
|
|
// If we do have a valid surrogate pair, we need to emit
|
|
// the UTF-8 sequence for the codepoint that they encode
|
|
// instead of the WTF-8 encoding for the two surrogate pairs
|
|
// separately.
|
|
//
|
|
// This is relevant when dealing with a WTF-16 encoded
|
|
// command line like this:
|
|
// "<0xD801>"<0xDC37>
|
|
// which would get parsed and converted to WTF-8 as:
|
|
// <0xED><0xA0><0x81><0xED><0xB0><0xB7>
|
|
// but instead, we need to recognize the surrogate pair
|
|
// and emit the codepoint it encodes, which in this
|
|
// example is U+10437 (𐐷), which is encoded in UTF-8 as:
|
|
// <0xF0><0x90><0x90><0xB7>
|
|
if (last_emitted_code_unit != null and
|
|
std.unicode.utf16IsLowSurrogate(code_unit) and
|
|
std.unicode.utf16IsHighSurrogate(last_emitted_code_unit.?))
|
|
{
|
|
const codepoint = std.unicode.utf16DecodeSurrogatePair(&.{ last_emitted_code_unit.?, code_unit }) catch unreachable;
|
|
|
|
// Unpaired surrogate is 3 bytes long
|
|
const dest = self.buffer[self.end - 3 ..];
|
|
const len = std.unicode.utf8Encode(codepoint, dest) catch unreachable;
|
|
// All codepoints that require a surrogate pair (> U+FFFF) are encoded as 4 bytes
|
|
assert(len == 4);
|
|
self.end += 1;
|
|
return null;
|
|
}
|
|
|
|
const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable;
|
|
self.end += wtf8_len;
|
|
return code_unit;
|
|
}
|
|
|
|
fn yieldArg(self: *Windows) [:0]const u8 {
|
|
self.buffer[self.end] = 0;
|
|
const arg = self.buffer[self.start..self.end :0];
|
|
self.end += 1;
|
|
self.start = self.end;
|
|
return arg;
|
|
}
|
|
};
|
|
|
|
const skip_strategy = struct {
|
|
const T = bool;
|
|
|
|
const eof = false;
|
|
|
|
fn emitBackslashes(_: *Windows, _: usize, last_emitted_code_unit: ?u16) ?u16 {
|
|
return last_emitted_code_unit;
|
|
}
|
|
|
|
fn emitCharacter(_: *Windows, _: u16, last_emitted_code_unit: ?u16) ?u16 {
|
|
return last_emitted_code_unit;
|
|
}
|
|
|
|
fn yieldArg(_: *Windows) bool {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
fn nextWithStrategy(self: *Windows, comptime strategy: type) strategy.T {
|
|
var last_emitted_code_unit: ?u16 = null;
|
|
// The first argument (the executable name) uses different parsing rules.
|
|
if (self.index == 0) {
|
|
if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) {
|
|
// Immediately complete the iterator.
|
|
// The C runtime would return the name of the current executable here.
|
|
return strategy.eof;
|
|
}
|
|
|
|
var inside_quotes = false;
|
|
while (true) : (self.index += 1) {
|
|
const char = if (self.index != self.cmd_line.len)
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index])
|
|
else
|
|
0;
|
|
switch (char) {
|
|
0 => {
|
|
return strategy.yieldArg(self);
|
|
},
|
|
'"' => {
|
|
inside_quotes = !inside_quotes;
|
|
},
|
|
' ', '\t' => {
|
|
if (inside_quotes) {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
} else {
|
|
self.index += 1;
|
|
return strategy.yieldArg(self);
|
|
}
|
|
},
|
|
else => {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
// Skip spaces and tabs. The iterator completes if we reach the end of the string here.
|
|
while (true) : (self.index += 1) {
|
|
const char = if (self.index != self.cmd_line.len)
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index])
|
|
else
|
|
0;
|
|
switch (char) {
|
|
0 => return strategy.eof,
|
|
' ', '\t' => continue,
|
|
else => break,
|
|
}
|
|
}
|
|
|
|
// Parsing rules for subsequent arguments:
|
|
//
|
|
// - The end of the string always terminates the current argument.
|
|
// - When not in 'inside_quotes' mode, a space or tab terminates the current argument.
|
|
// - 2n backslashes followed by a quote emit n backslashes (note: n can be zero).
|
|
// If in 'inside_quotes' and the quote is immediately followed by a second quote,
|
|
// one quote is emitted and the other is skipped, otherwise, the quote is skipped
|
|
// and 'inside_quotes' is toggled.
|
|
// - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote.
|
|
// - n backslashes not followed by a quote emit n backslashes.
|
|
var backslash_count: usize = 0;
|
|
var inside_quotes = false;
|
|
while (true) : (self.index += 1) {
|
|
const char = if (self.index != self.cmd_line.len)
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index])
|
|
else
|
|
0;
|
|
switch (char) {
|
|
0 => {
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
|
|
return strategy.yieldArg(self);
|
|
},
|
|
' ', '\t' => {
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
|
|
backslash_count = 0;
|
|
if (inside_quotes) {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
} else return strategy.yieldArg(self);
|
|
},
|
|
'"' => {
|
|
const char_is_escaped_quote = backslash_count % 2 != 0;
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count / 2, last_emitted_code_unit);
|
|
backslash_count = 0;
|
|
if (char_is_escaped_quote) {
|
|
last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit);
|
|
} else {
|
|
if (inside_quotes and
|
|
self.index + 1 != self.cmd_line.len and
|
|
std.mem.littleToNative(u16, self.cmd_line[self.index + 1]) == '"')
|
|
{
|
|
last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit);
|
|
self.index += 1;
|
|
} else {
|
|
inside_quotes = !inside_quotes;
|
|
}
|
|
}
|
|
},
|
|
'\\' => {
|
|
backslash_count += 1;
|
|
},
|
|
else => {
|
|
last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
|
|
backslash_count = 0;
|
|
last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Frees the iterator's copy of the command-line string and all previously returned
|
|
/// argument slices.
|
|
pub fn deinit(self: *Windows) void {
|
|
self.allocator.free(self.buffer);
|
|
}
|
|
};
|
|
|
|
pub const Posix = struct {
|
|
remaining: Vector,
|
|
|
|
pub const InitError = error{};
|
|
|
|
pub fn init(a: Args) Posix {
|
|
return .{ .remaining = a.vector };
|
|
}
|
|
|
|
pub fn next(it: *Posix) ?[:0]const u8 {
|
|
if (it.remaining.len == 0) return null;
|
|
const arg = it.remaining[0];
|
|
it.remaining = it.remaining[1..];
|
|
return std.mem.sliceTo(arg, 0);
|
|
}
|
|
|
|
pub fn skip(it: *Posix) bool {
|
|
if (it.remaining.len == 0) return false;
|
|
it.remaining = it.remaining[1..];
|
|
return true;
|
|
}
|
|
};
|
|
|
|
pub const Wasi = struct {
|
|
allocator: Allocator,
|
|
index: usize,
|
|
args: [][:0]u8,
|
|
|
|
pub const InitError = error{OutOfMemory} || std.posix.UnexpectedError;
|
|
|
|
/// You must call deinit to free the internal buffer of the
|
|
/// iterator after you are done.
|
|
pub fn init(allocator: Allocator) Wasi.InitError!Wasi {
|
|
const fetched_args = try Wasi.internalInit(allocator);
|
|
return Wasi{
|
|
.allocator = allocator,
|
|
.index = 0,
|
|
.args = fetched_args,
|
|
};
|
|
}
|
|
|
|
fn internalInit(allocator: Allocator) Wasi.InitError![][:0]u8 {
|
|
var count: usize = undefined;
|
|
var buf_size: usize = undefined;
|
|
|
|
switch (std.os.wasi.args_sizes_get(&count, &buf_size)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
if (count == 0) {
|
|
return &[_][:0]u8{};
|
|
}
|
|
|
|
const argv = try allocator.alloc([*:0]u8, count);
|
|
defer allocator.free(argv);
|
|
|
|
const argv_buf = try allocator.alloc(u8, buf_size);
|
|
|
|
switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
var result_args = try allocator.alloc([:0]u8, count);
|
|
var i: usize = 0;
|
|
while (i < count) : (i += 1) {
|
|
result_args[i] = std.mem.sliceTo(argv[i], 0);
|
|
}
|
|
|
|
return result_args;
|
|
}
|
|
|
|
pub fn next(self: *Wasi) ?[:0]const u8 {
|
|
if (self.index == self.args.len) return null;
|
|
|
|
const arg = self.args[self.index];
|
|
self.index += 1;
|
|
return arg;
|
|
}
|
|
|
|
pub fn skip(self: *Wasi) bool {
|
|
if (self.index == self.args.len) return false;
|
|
|
|
self.index += 1;
|
|
return true;
|
|
}
|
|
|
|
/// Call to free the internal buffer of the iterator.
|
|
pub fn deinit(self: *Wasi) void {
|
|
// Nothing is allocated when there are no args
|
|
if (self.args.len == 0) return;
|
|
|
|
const last_item = self.args[self.args.len - 1];
|
|
const last_byte_addr = @intFromPtr(last_item.ptr) + last_item.len + 1; // null terminated
|
|
const first_item_ptr = self.args[0].ptr;
|
|
const len = last_byte_addr - @intFromPtr(first_item_ptr);
|
|
self.allocator.free(first_item_ptr[0..len]);
|
|
self.allocator.free(self.args);
|
|
}
|
|
};
|
|
};
|
|
|
|
/// Holds the command-line arguments, with the program name as the first entry.
|
|
/// Use `iterateAllocator` for cross-platform code.
|
|
pub fn iterate(a: Args) Iterator {
|
|
return .init(a);
|
|
}
|
|
|
|
/// You must deinitialize iterator's internal buffers by calling `deinit` when
|
|
/// done.
|
|
pub fn iterateAllocator(a: Args, gpa: Allocator) Iterator.InitError!Iterator {
|
|
return .initAllocator(a, gpa);
|
|
}
|
|
|
|
pub const ToSliceError = Iterator.Windows.InitError || Iterator.Wasi.InitError;
|
|
|
|
/// Returned value may reference several allocations and may point into `a`.
|
|
/// Thefore, an arena-style allocator must be used.
|
|
///
|
|
/// * On Windows, the result is encoded as
|
|
/// [WTF-8](https://wtf-8.codeberg.page/).
|
|
/// * On other platforms, the result is an opaque sequence of bytes with no
|
|
/// particular encoding.
|
|
///
|
|
/// See also:
|
|
/// * `iterate`
|
|
/// * `iterateAllocator`
|
|
pub fn toSlice(a: Args, arena: Allocator) ToSliceError![]const [:0]const u8 {
|
|
if (native_os == .windows) {
|
|
var it = try a.iterateAllocator(arena);
|
|
var contents: std.ArrayList(u8) = .empty;
|
|
var slice_list: std.ArrayList(usize) = .empty;
|
|
while (it.next()) |arg| {
|
|
try contents.appendSlice(arena, arg[0 .. arg.len + 1]);
|
|
try slice_list.append(arena, arg.len);
|
|
}
|
|
const contents_slice = contents.items;
|
|
const slice_sizes = slice_list.items;
|
|
const slice_list_bytes = std.math.mul(usize, @sizeOf([]u8), slice_sizes.len) catch return error.OutOfMemory;
|
|
const total_bytes = std.math.add(usize, slice_list_bytes, contents_slice.len) catch return error.OutOfMemory;
|
|
const buf = try arena.alignedAlloc(u8, .of([]u8), total_bytes);
|
|
errdefer arena.free(buf);
|
|
|
|
const result_slice_list = std.mem.bytesAsSlice([:0]u8, buf[0..slice_list_bytes]);
|
|
const result_contents = buf[slice_list_bytes..];
|
|
@memcpy(result_contents[0..contents_slice.len], contents_slice);
|
|
|
|
var contents_index: usize = 0;
|
|
for (slice_sizes, 0..) |len, i| {
|
|
const new_index = contents_index + len;
|
|
result_slice_list[i] = result_contents[contents_index..new_index :0];
|
|
contents_index = new_index + 1;
|
|
}
|
|
|
|
return result_slice_list;
|
|
} else if (native_os == .wasi and !builtin.link_libc) {
|
|
var count: usize = undefined;
|
|
var buf_size: usize = undefined;
|
|
|
|
switch (std.os.wasi.args_sizes_get(&count, &buf_size)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
if (count == 0) return &.{};
|
|
|
|
const argv = try arena.alloc([*:0]u8, count);
|
|
const argv_buf = try arena.alloc(u8, buf_size);
|
|
|
|
switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) {
|
|
.SUCCESS => {},
|
|
else => |err| return std.posix.unexpectedErrno(err),
|
|
}
|
|
|
|
const args = try arena.alloc([:0]const u8, count);
|
|
for (args, argv) |*dst, src| dst.* = std.mem.sliceTo(src, 0);
|
|
return args;
|
|
} else {
|
|
const args = try arena.alloc([:0]const u8, a.vector.len);
|
|
for (args, a.vector) |*dst, src| dst.* = std.mem.sliceTo(src, 0);
|
|
return args;
|
|
}
|
|
}
|
|
|
|
test "Iterator.Windows" {
|
|
const t = testIteratorWindows;
|
|
|
|
try t(
|
|
\\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 --eval="new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
|
|
, &.{
|
|
\\C:\Program Files\zig\zig.exe
|
|
,
|
|
\\run
|
|
,
|
|
\\.\src\main.zig
|
|
,
|
|
\\-target
|
|
,
|
|
\\x86_64-windows-gnu
|
|
,
|
|
\\-O
|
|
,
|
|
\\ReleaseSafe
|
|
,
|
|
\\--
|
|
,
|
|
\\--emoji=🗿
|
|
,
|
|
\\--eval=new Regex("Dwayne \"The Rock\" Johnson")
|
|
,
|
|
});
|
|
|
|
// Empty
|
|
try t("", &.{});
|
|
|
|
// Separators
|
|
try t("aa bb cc", &.{ "aa", "bb", "cc" });
|
|
try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" });
|
|
try t("aa\nbb\ncc", &.{"aa\nbb\ncc"});
|
|
try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"});
|
|
try t("aa\rbb\rcc", &.{"aa\rbb\rcc"});
|
|
try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"});
|
|
try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"});
|
|
try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"});
|
|
|
|
// Leading/trailing whitespace
|
|
try t(" ", &.{""});
|
|
try t(" aa bb ", &.{ "", "aa", "bb" });
|
|
try t("\t\t", &.{""});
|
|
try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" });
|
|
try t("\n\n", &.{"\n\n"});
|
|
try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"});
|
|
|
|
// Executable name with quotes/backslashes
|
|
try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"});
|
|
try t("\"", &.{""});
|
|
try t("\"\"", &.{""});
|
|
try t("\"\"\"", &.{""});
|
|
try t("\"\"\"\"", &.{""});
|
|
try t("\"\"\"\"\"", &.{""});
|
|
try t("aa\"bb\"cc\"dd", &.{"aabbccdd"});
|
|
try t("aa\"bb cc\"dd", &.{"aabb ccdd"});
|
|
try t("\"aa\\\"bb\"", &.{"aa\\bb"});
|
|
try t("\"aa\\\\\"", &.{"aa\\\\"});
|
|
try t("aa\\\"bb", &.{"aa\\bb"});
|
|
try t("aa\\\\\"bb", &.{"aa\\\\bb"});
|
|
|
|
// Arguments with quotes/backslashes
|
|
try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" });
|
|
try t(". aa\" \"bb\"\t\"cc\"\n\"dd\"", &.{ ".", "aa bb\tcc\ndd" });
|
|
try t(". ", &.{"."});
|
|
try t(". \"", &.{ ".", "" });
|
|
try t(". \"\"", &.{ ".", "" });
|
|
try t(". \"\"\"", &.{ ".", "\"" });
|
|
try t(". \"\"\"\"", &.{ ".", "\"" });
|
|
try t(". \"\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \" \"", &.{ ".", " " });
|
|
try t(". \" \"\"", &.{ ".", " \"" });
|
|
try t(". \" \"\"\"", &.{ ".", " \"" });
|
|
try t(". \" \"\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
|
try t(". \\\"", &.{ ".", "\"" });
|
|
try t(". \\\"\"", &.{ ".", "\"" });
|
|
try t(". \\\"\"\"", &.{ ".", "\"" });
|
|
try t(". \\\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" });
|
|
try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" });
|
|
try t(". \" \\\"", &.{ ".", " \"" });
|
|
try t(". \" \\\"\"", &.{ ".", " \"" });
|
|
try t(". \" \\\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" });
|
|
try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
|
try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" });
|
|
try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" });
|
|
try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" });
|
|
try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" });
|
|
|
|
// From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines
|
|
try t(
|
|
\\foo.exe "abc" d e
|
|
, &.{ "foo.exe", "abc", "d", "e" });
|
|
try t(
|
|
\\foo.exe a\\b d"e f"g h
|
|
, &.{ "foo.exe", "a\\\\b", "de fg", "h" });
|
|
try t(
|
|
\\foo.exe a\\\"b c d
|
|
, &.{ "foo.exe", "a\\\"b", "c", "d" });
|
|
try t(
|
|
\\foo.exe a\\\\"b c" d e
|
|
, &.{ "foo.exe", "a\\\\b c", "d", "e" });
|
|
try t(
|
|
\\foo.exe a"b"" c d
|
|
, &.{ "foo.exe", "ab\" c d" });
|
|
|
|
// From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX
|
|
try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" });
|
|
try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" });
|
|
try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" });
|
|
try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" });
|
|
try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" });
|
|
try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" });
|
|
try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" });
|
|
try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" });
|
|
try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" });
|
|
|
|
// Surrogate pair encoding of 𐐷 separated by quotes.
|
|
// Encoded as WTF-16:
|
|
// "<0xD801>"<0xDC37>
|
|
// Encoded as WTF-8:
|
|
// "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
|
|
// During parsing, the quotes drop out and the surrogate pair
|
|
// should end up encoded as its normal UTF-8 representation.
|
|
try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" });
|
|
}
|
|
|
|
fn testIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
|
|
const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line);
|
|
defer testing.allocator.free(cmd_line_w);
|
|
|
|
// next
|
|
{
|
|
var it = try Iterator.Windows.init(testing.allocator, cmd_line_w);
|
|
defer it.deinit();
|
|
|
|
for (expected_args) |expected| {
|
|
if (it.next()) |actual| {
|
|
try testing.expectEqualStrings(expected, actual);
|
|
} else {
|
|
return error.TestUnexpectedResult;
|
|
}
|
|
}
|
|
try testing.expect(it.next() == null);
|
|
}
|
|
|
|
// skip
|
|
{
|
|
var it = try Iterator.Windows.init(testing.allocator, cmd_line_w);
|
|
defer it.deinit();
|
|
|
|
for (0..expected_args.len) |_| {
|
|
try testing.expect(it.skip());
|
|
}
|
|
try testing.expect(!it.skip());
|
|
}
|
|
}
|
|
|
|
test "general parsing" {
|
|
try testGeneralCmdLine("a b\tc d", &.{ "a", "b", "c", "d" });
|
|
try testGeneralCmdLine("\"abc\" d e", &.{ "abc", "d", "e" });
|
|
try testGeneralCmdLine("a\\\\\\b d\"e f\"g h", &.{ "a\\\\\\b", "de fg", "h" });
|
|
try testGeneralCmdLine("a\\\\\\\"b c d", &.{ "a\\\"b", "c", "d" });
|
|
try testGeneralCmdLine("a\\\\\\\\\"b c\" d e", &.{ "a\\\\b c", "d", "e" });
|
|
try testGeneralCmdLine("a b\tc \"d f", &.{ "a", "b", "c", "d f" });
|
|
try testGeneralCmdLine("j k l\\", &.{ "j", "k", "l\\" });
|
|
try testGeneralCmdLine("\"\" x y z\\\\", &.{ "", "x", "y", "z\\\\" });
|
|
|
|
try testGeneralCmdLine("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\"", &.{
|
|
".\\..\\zig-cache\\build",
|
|
"bin\\zig.exe",
|
|
".\\..",
|
|
".\\..\\zig-cache",
|
|
"--help",
|
|
});
|
|
|
|
try testGeneralCmdLine(
|
|
\\ 'foo' "bar"
|
|
, &.{ "'foo'", "bar" });
|
|
}
|
|
|
|
fn testGeneralCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
|
|
var it = try IteratorGeneral(.{}).init(std.testing.allocator, input_cmd_line);
|
|
defer it.deinit();
|
|
for (expected_args) |expected_arg| {
|
|
const arg = it.next().?;
|
|
try testing.expectEqualStrings(expected_arg, arg);
|
|
}
|
|
try testing.expect(it.next() == null);
|
|
}
|
|
|
|
/// Optional parameters for `IteratorGeneral`
|
|
pub const IteratorGeneralOptions = struct {
|
|
comments: bool = false,
|
|
single_quotes: bool = false,
|
|
};
|
|
|
|
/// A general Iterator to parse a string into a set of arguments
|
|
pub fn IteratorGeneral(comptime options: IteratorGeneralOptions) type {
|
|
return struct {
|
|
allocator: Allocator,
|
|
index: usize = 0,
|
|
cmd_line: []const u8,
|
|
|
|
/// Should the cmd_line field be free'd (using the allocator) on deinit()?
|
|
free_cmd_line_on_deinit: bool,
|
|
|
|
/// buffer MUST be long enough to hold the cmd_line plus a null terminator.
|
|
/// buffer will we free'd (using the allocator) on deinit()
|
|
buffer: []u8,
|
|
start: usize = 0,
|
|
end: usize = 0,
|
|
|
|
pub const Self = @This();
|
|
|
|
pub const InitError = error{OutOfMemory};
|
|
|
|
/// cmd_line_utf8 MUST remain valid and constant while using this instance
|
|
pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
|
|
const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
|
|
errdefer allocator.free(buffer);
|
|
|
|
return Self{
|
|
.allocator = allocator,
|
|
.cmd_line = cmd_line_utf8,
|
|
.free_cmd_line_on_deinit = false,
|
|
.buffer = buffer,
|
|
};
|
|
}
|
|
|
|
/// cmd_line_utf8 will be free'd (with the allocator) on deinit()
|
|
pub fn initTakeOwnership(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
|
|
const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
|
|
errdefer allocator.free(buffer);
|
|
|
|
return Self{
|
|
.allocator = allocator,
|
|
.cmd_line = cmd_line_utf8,
|
|
.free_cmd_line_on_deinit = true,
|
|
.buffer = buffer,
|
|
};
|
|
}
|
|
|
|
// Skips over whitespace in the cmd_line.
|
|
// Returns false if the terminating sentinel is reached, true otherwise.
|
|
// Also skips over comments (if supported).
|
|
fn skipWhitespace(self: *Self) bool {
|
|
while (true) : (self.index += 1) {
|
|
const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
|
switch (character) {
|
|
0 => return false,
|
|
' ', '\t', '\r', '\n' => continue,
|
|
'#' => {
|
|
if (options.comments) {
|
|
while (true) : (self.index += 1) {
|
|
switch (self.cmd_line[self.index]) {
|
|
'\n' => break,
|
|
0 => return false,
|
|
else => continue,
|
|
}
|
|
}
|
|
continue;
|
|
} else {
|
|
break;
|
|
}
|
|
},
|
|
else => break,
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
pub fn skip(self: *Self) bool {
|
|
if (!self.skipWhitespace()) {
|
|
return false;
|
|
}
|
|
|
|
var backslash_count: usize = 0;
|
|
var in_quote = false;
|
|
while (true) : (self.index += 1) {
|
|
const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
|
switch (character) {
|
|
0 => return true,
|
|
'"', '\'' => {
|
|
if (!options.single_quotes and character == '\'') {
|
|
backslash_count = 0;
|
|
continue;
|
|
}
|
|
const quote_is_real = backslash_count % 2 == 0;
|
|
if (quote_is_real) {
|
|
in_quote = !in_quote;
|
|
}
|
|
},
|
|
'\\' => {
|
|
backslash_count += 1;
|
|
},
|
|
' ', '\t', '\r', '\n' => {
|
|
if (!in_quote) {
|
|
return true;
|
|
}
|
|
backslash_count = 0;
|
|
},
|
|
else => {
|
|
backslash_count = 0;
|
|
continue;
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns a slice of the internal buffer that contains the next argument.
|
|
/// Returns null when it reaches the end.
|
|
pub fn next(self: *Self) ?[:0]const u8 {
|
|
if (!self.skipWhitespace()) {
|
|
return null;
|
|
}
|
|
|
|
var backslash_count: usize = 0;
|
|
var in_quote = false;
|
|
while (true) : (self.index += 1) {
|
|
const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
|
|
switch (character) {
|
|
0 => {
|
|
self.emitBackslashes(backslash_count);
|
|
self.buffer[self.end] = 0;
|
|
const token = self.buffer[self.start..self.end :0];
|
|
self.end += 1;
|
|
self.start = self.end;
|
|
return token;
|
|
},
|
|
'"', '\'' => {
|
|
if (!options.single_quotes and character == '\'') {
|
|
self.emitBackslashes(backslash_count);
|
|
backslash_count = 0;
|
|
self.emitCharacter(character);
|
|
continue;
|
|
}
|
|
const quote_is_real = backslash_count % 2 == 0;
|
|
self.emitBackslashes(backslash_count / 2);
|
|
backslash_count = 0;
|
|
|
|
if (quote_is_real) {
|
|
in_quote = !in_quote;
|
|
} else {
|
|
self.emitCharacter('"');
|
|
}
|
|
},
|
|
'\\' => {
|
|
backslash_count += 1;
|
|
},
|
|
' ', '\t', '\r', '\n' => {
|
|
self.emitBackslashes(backslash_count);
|
|
backslash_count = 0;
|
|
if (in_quote) {
|
|
self.emitCharacter(character);
|
|
} else {
|
|
self.buffer[self.end] = 0;
|
|
const token = self.buffer[self.start..self.end :0];
|
|
self.end += 1;
|
|
self.start = self.end;
|
|
return token;
|
|
}
|
|
},
|
|
else => {
|
|
self.emitBackslashes(backslash_count);
|
|
backslash_count = 0;
|
|
self.emitCharacter(character);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
fn emitBackslashes(self: *Self, emit_count: usize) void {
|
|
var i: usize = 0;
|
|
while (i < emit_count) : (i += 1) {
|
|
self.emitCharacter('\\');
|
|
}
|
|
}
|
|
|
|
fn emitCharacter(self: *Self, char: u8) void {
|
|
self.buffer[self.end] = char;
|
|
self.end += 1;
|
|
}
|
|
|
|
/// Call to free the internal buffer of the iterator.
|
|
pub fn deinit(self: *Self) void {
|
|
self.allocator.free(self.buffer);
|
|
|
|
if (self.free_cmd_line_on_deinit) {
|
|
self.allocator.free(self.cmd_line);
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
test "response file arg parsing" {
|
|
try testResponseFileCmdLine(
|
|
\\a b
|
|
\\c d\
|
|
, &.{ "a", "b", "c", "d\\" });
|
|
try testResponseFileCmdLine("a b c d\\", &.{ "a", "b", "c", "d\\" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\j
|
|
\\ k l # this is a comment \\ \\\ \\\\ "none" "\\" "\\\"
|
|
\\ "m" #another comment
|
|
\\
|
|
, &.{ "j", "k", "l", "m" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\ "" q ""
|
|
\\ "r s # t" "u\" v" #another comment
|
|
\\
|
|
, &.{ "", "q", "", "r s # t", "u\" v" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\ -l"advapi32" a# b#c d#
|
|
\\e\\\
|
|
, &.{ "-ladvapi32", "a#", "b#c", "d#", "e\\\\\\" });
|
|
|
|
try testResponseFileCmdLine(
|
|
\\ 'foo' "bar"
|
|
, &.{ "foo", "bar" });
|
|
}
|
|
|
|
fn testResponseFileCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
|
|
var it = try IteratorGeneral(.{ .comments = true, .single_quotes = true })
|
|
.init(std.testing.allocator, input_cmd_line);
|
|
defer it.deinit();
|
|
for (expected_args) |expected_arg| {
|
|
const arg = it.next().?;
|
|
try testing.expectEqualStrings(expected_arg, arg);
|
|
}
|
|
try testing.expect(it.next() == null);
|
|
}
|