zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

blob e10764ce (37453B) - Raw


      1 const Args = @This();
      2 
      3 const builtin = @import("builtin");
      4 const native_os = builtin.os.tag;
      5 
      6 const std = @import("../std.zig");
      7 const Allocator = std.mem.Allocator;
      8 const assert = std.debug.assert;
      9 const testing = std.testing;
     10 
     11 vector: Vector,
     12 
     13 pub const Vector = switch (native_os) {
     14     .windows => []const u16, // WTF-16 encoded
     15     .freestanding, .other => void,
     16     else => []const [*:0]const u8,
     17 };
     18 
     19 /// Cross-platform access to command line one argument at a time.
     20 pub const Iterator = struct {
     21     const Inner = switch (native_os) {
     22         .windows => Windows,
     23         .wasi => if (builtin.link_libc) Posix else Wasi,
     24         else => Posix,
     25     };
     26 
     27     inner: Inner,
     28 
     29     /// Initialize the args iterator. Consider using `initAllocator` instead
     30     /// for cross-platform compatibility.
     31     pub fn init(a: Args) Iterator {
     32         if (native_os == .wasi) {
     33             @compileError("In WASI, use initAllocator instead.");
     34         }
     35         if (native_os == .windows) {
     36             @compileError("In Windows, use initAllocator instead.");
     37         }
     38 
     39         return .{ .inner = .init(a) };
     40     }
     41 
     42     pub const InitError = Inner.InitError;
     43 
     44     /// You must deinitialize iterator's internal buffers by calling `deinit` when done.
     45     pub fn initAllocator(a: Args, gpa: Allocator) InitError!Iterator {
     46         if (native_os == .wasi and !builtin.link_libc) {
     47             return .{ .inner = try .init(a, gpa) };
     48         }
     49         if (native_os == .windows) {
     50             return .{ .inner = try .init(a, gpa) };
     51         }
     52 
     53         return .{ .inner = .init(a) };
     54     }
     55 
     56     /// Return subsequent argument, or `null` if no more remaining.
     57     ///
     58     /// Returned slice is pointing to the iterator's internal buffer.
     59     /// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
     60     /// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
     61     pub fn next(it: *Iterator) ?[:0]const u8 {
     62         return it.inner.next();
     63     }
     64 
     65     /// Parse past 1 argument without capturing it.
     66     /// Returns `true` if skipped an arg, `false` if we are at the end.
     67     pub fn skip(it: *Iterator) bool {
     68         return it.inner.skip();
     69     }
     70 
     71     /// Required to release resources if the iterator was initialized with
     72     /// `initAllocator` function.
     73     pub fn deinit(it: *Iterator) void {
     74         // Unless we're targeting WASI or Windows, this is a no-op.
     75         if (native_os == .wasi and !builtin.link_libc) it.inner.deinit();
     76         if (native_os == .windows) it.inner.deinit();
     77     }
     78 
     79     /// Iterator that implements the Windows command-line parsing algorithm.
     80     ///
     81     /// The implementation is intended to be compatible with the post-2008 C runtime,
     82     /// but is *not* intended to be compatible with `CommandLineToArgvW` since
     83     /// `CommandLineToArgvW` uses the pre-2008 parsing rules.
     84     ///
     85     /// This iterator faithfully implements the parsing behavior observed from the C runtime with
     86     /// one exception: if the command-line string is empty, the iterator will immediately complete
     87     /// without returning any arguments (whereas the C runtime will return a single argument
     88     /// representing the name of the current executable).
     89     ///
     90     /// The essential parts of the algorithm are described in Microsoft's documentation:
     91     ///
     92     /// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments
     93     ///
     94     /// David Deley explains some additional undocumented quirks in great detail:
     95     ///
     96     /// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
     97     pub const Windows = struct {
     98         allocator: Allocator,
     99         /// Encoded as WTF-16 LE.
    100         cmd_line: []const u16,
    101         index: usize = 0,
    102         /// Owned by the iterator. Long enough to hold contiguous NUL-terminated slices
    103         /// of each argument encoded as WTF-8.
    104         buffer: []u8,
    105         start: usize = 0,
    106         end: usize = 0,
    107 
    108         pub const InitError = error{OutOfMemory};
    109 
    110         /// `cmd_line_w` *must* be a WTF16-LE-encoded string.
    111         ///
    112         /// The iterator stores and uses `cmd_line_w`, so its memory must be valid for
    113         /// at least as long as the returned Windows.
    114         pub fn init(allocator: Allocator, cmd_line_w: []const u16) Windows.InitError!Windows {
    115             const wtf8_len = std.unicode.calcWtf8Len(cmd_line_w);
    116 
    117             // This buffer must be large enough to contain contiguous NUL-terminated slices
    118             // of each argument.
    119             // - During parsing, the length of a parsed argument will always be equal to
    120             //   to less than its unparsed length
    121             // - The first argument needs one extra byte of space allocated for its NUL
    122             //   terminator, but for each subsequent argument the necessary whitespace
    123             //   between arguments guarantees room for their NUL terminator(s).
    124             const buffer = try allocator.alloc(u8, wtf8_len + 1);
    125             errdefer allocator.free(buffer);
    126 
    127             return .{
    128                 .allocator = allocator,
    129                 .cmd_line = cmd_line_w,
    130                 .buffer = buffer,
    131             };
    132         }
    133 
    134         /// Returns the next argument and advances the iterator. Returns `null` if at the end of the
    135         /// command-line string. The iterator owns the returned slice.
    136         /// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
    137         pub fn next(self: *Windows) ?[:0]const u8 {
    138             return self.nextWithStrategy(next_strategy);
    139         }
    140 
    141         /// Skips the next argument and advances the iterator. Returns `true` if an argument was
    142         /// skipped, `false` if at the end of the command-line string.
    143         pub fn skip(self: *Windows) bool {
    144             return self.nextWithStrategy(skip_strategy);
    145         }
    146 
    147         const next_strategy = struct {
    148             const T = ?[:0]const u8;
    149 
    150             const eof = null;
    151 
    152             /// Returns '\' if any backslashes are emitted, otherwise returns `last_emitted_code_unit`.
    153             fn emitBackslashes(self: *Windows, count: usize, last_emitted_code_unit: ?u16) ?u16 {
    154                 for (0..count) |_| {
    155                     self.buffer[self.end] = '\\';
    156                     self.end += 1;
    157                 }
    158                 return if (count != 0) '\\' else last_emitted_code_unit;
    159             }
    160 
    161             /// If `last_emitted_code_unit` and `code_unit` form a surrogate pair, then
    162             /// the previously emitted high surrogate is overwritten by the codepoint encoded
    163             /// by the surrogate pair, and `null` is returned.
    164             /// Otherwise, `code_unit` is emitted and returned.
    165             fn emitCharacter(self: *Windows, code_unit: u16, last_emitted_code_unit: ?u16) ?u16 {
    166                 // Because we are emitting WTF-8, we need to
    167                 // check to see if we've emitted two consecutive surrogate
    168                 // codepoints that form a valid surrogate pair in order
    169                 // to ensure that we're always emitting well-formed WTF-8
    170                 // (https://wtf-8.codeberg.page/#concatenating).
    171                 //
    172                 // If we do have a valid surrogate pair, we need to emit
    173                 // the UTF-8 sequence for the codepoint that they encode
    174                 // instead of the WTF-8 encoding for the two surrogate pairs
    175                 // separately.
    176                 //
    177                 // This is relevant when dealing with a WTF-16 encoded
    178                 // command line like this:
    179                 // "<0xD801>"<0xDC37>
    180                 // which would get parsed and converted to WTF-8 as:
    181                 // <0xED><0xA0><0x81><0xED><0xB0><0xB7>
    182                 // but instead, we need to recognize the surrogate pair
    183                 // and emit the codepoint it encodes, which in this
    184                 // example is U+10437 (𐐷), which is encoded in UTF-8 as:
    185                 // <0xF0><0x90><0x90><0xB7>
    186                 if (last_emitted_code_unit != null and
    187                     std.unicode.utf16IsLowSurrogate(code_unit) and
    188                     std.unicode.utf16IsHighSurrogate(last_emitted_code_unit.?))
    189                 {
    190                     const codepoint = std.unicode.utf16DecodeSurrogatePair(&.{ last_emitted_code_unit.?, code_unit }) catch unreachable;
    191 
    192                     // Unpaired surrogate is 3 bytes long
    193                     const dest = self.buffer[self.end - 3 ..];
    194                     const len = std.unicode.utf8Encode(codepoint, dest) catch unreachable;
    195                     // All codepoints that require a surrogate pair (> U+FFFF) are encoded as 4 bytes
    196                     assert(len == 4);
    197                     self.end += 1;
    198                     return null;
    199                 }
    200 
    201                 const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable;
    202                 self.end += wtf8_len;
    203                 return code_unit;
    204             }
    205 
    206             fn yieldArg(self: *Windows) [:0]const u8 {
    207                 self.buffer[self.end] = 0;
    208                 const arg = self.buffer[self.start..self.end :0];
    209                 self.end += 1;
    210                 self.start = self.end;
    211                 return arg;
    212             }
    213         };
    214 
    215         const skip_strategy = struct {
    216             const T = bool;
    217 
    218             const eof = false;
    219 
    220             fn emitBackslashes(_: *Windows, _: usize, last_emitted_code_unit: ?u16) ?u16 {
    221                 return last_emitted_code_unit;
    222             }
    223 
    224             fn emitCharacter(_: *Windows, _: u16, last_emitted_code_unit: ?u16) ?u16 {
    225                 return last_emitted_code_unit;
    226             }
    227 
    228             fn yieldArg(_: *Windows) bool {
    229                 return true;
    230             }
    231         };
    232 
    233         fn nextWithStrategy(self: *Windows, comptime strategy: type) strategy.T {
    234             var last_emitted_code_unit: ?u16 = null;
    235             // The first argument (the executable name) uses different parsing rules.
    236             if (self.index == 0) {
    237                 if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) {
    238                     // Immediately complete the iterator.
    239                     // The C runtime would return the name of the current executable here.
    240                     return strategy.eof;
    241                 }
    242 
    243                 var inside_quotes = false;
    244                 while (true) : (self.index += 1) {
    245                     const char = if (self.index != self.cmd_line.len)
    246                         std.mem.littleToNative(u16, self.cmd_line[self.index])
    247                     else
    248                         0;
    249                     switch (char) {
    250                         0 => {
    251                             return strategy.yieldArg(self);
    252                         },
    253                         '"' => {
    254                             inside_quotes = !inside_quotes;
    255                         },
    256                         ' ', '\t' => {
    257                             if (inside_quotes) {
    258                                 last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
    259                             } else {
    260                                 self.index += 1;
    261                                 return strategy.yieldArg(self);
    262                             }
    263                         },
    264                         else => {
    265                             last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
    266                         },
    267                     }
    268                 }
    269             }
    270 
    271             // Skip spaces and tabs. The iterator completes if we reach the end of the string here.
    272             while (true) : (self.index += 1) {
    273                 const char = if (self.index != self.cmd_line.len)
    274                     std.mem.littleToNative(u16, self.cmd_line[self.index])
    275                 else
    276                     0;
    277                 switch (char) {
    278                     0 => return strategy.eof,
    279                     ' ', '\t' => continue,
    280                     else => break,
    281                 }
    282             }
    283 
    284             // Parsing rules for subsequent arguments:
    285             //
    286             // - The end of the string always terminates the current argument.
    287             // - When not in 'inside_quotes' mode, a space or tab terminates the current argument.
    288             // - 2n backslashes followed by a quote emit n backslashes (note: n can be zero).
    289             //   If in 'inside_quotes' and the quote is immediately followed by a second quote,
    290             //   one quote is emitted and the other is skipped, otherwise, the quote is skipped
    291             //   and 'inside_quotes' is toggled.
    292             // - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote.
    293             // - n backslashes not followed by a quote emit n backslashes.
    294             var backslash_count: usize = 0;
    295             var inside_quotes = false;
    296             while (true) : (self.index += 1) {
    297                 const char = if (self.index != self.cmd_line.len)
    298                     std.mem.littleToNative(u16, self.cmd_line[self.index])
    299                 else
    300                     0;
    301                 switch (char) {
    302                     0 => {
    303                         last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
    304                         return strategy.yieldArg(self);
    305                     },
    306                     ' ', '\t' => {
    307                         last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
    308                         backslash_count = 0;
    309                         if (inside_quotes) {
    310                             last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
    311                         } else return strategy.yieldArg(self);
    312                     },
    313                     '"' => {
    314                         const char_is_escaped_quote = backslash_count % 2 != 0;
    315                         last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count / 2, last_emitted_code_unit);
    316                         backslash_count = 0;
    317                         if (char_is_escaped_quote) {
    318                             last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit);
    319                         } else {
    320                             if (inside_quotes and
    321                                 self.index + 1 != self.cmd_line.len and
    322                                 std.mem.littleToNative(u16, self.cmd_line[self.index + 1]) == '"')
    323                             {
    324                                 last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit);
    325                                 self.index += 1;
    326                             } else {
    327                                 inside_quotes = !inside_quotes;
    328                             }
    329                         }
    330                     },
    331                     '\\' => {
    332                         backslash_count += 1;
    333                     },
    334                     else => {
    335                         last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit);
    336                         backslash_count = 0;
    337                         last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit);
    338                     },
    339                 }
    340             }
    341         }
    342 
    343         /// Frees the iterator's copy of the command-line string and all previously returned
    344         /// argument slices.
    345         pub fn deinit(self: *Windows) void {
    346             self.allocator.free(self.buffer);
    347         }
    348     };
    349 
    350     pub const Posix = struct {
    351         remaining: Vector,
    352 
    353         pub const InitError = error{};
    354 
    355         pub fn init(a: Args) Posix {
    356             return .{ .remaining = a.vector };
    357         }
    358 
    359         pub fn next(it: *Posix) ?[:0]const u8 {
    360             if (it.remaining.len == 0) return null;
    361             const arg = it.remaining[0];
    362             it.remaining = it.remaining[1..];
    363             return std.mem.sliceTo(arg, 0);
    364         }
    365 
    366         pub fn skip(it: *Posix) bool {
    367             if (it.remaining.len == 0) return false;
    368             it.remaining = it.remaining[1..];
    369             return true;
    370         }
    371     };
    372 
    373     pub const Wasi = struct {
    374         allocator: Allocator,
    375         index: usize,
    376         args: [][:0]u8,
    377 
    378         pub const InitError = error{OutOfMemory} || std.posix.UnexpectedError;
    379 
    380         /// You must call deinit to free the internal buffer of the
    381         /// iterator after you are done.
    382         pub fn init(allocator: Allocator) Wasi.InitError!Wasi {
    383             const fetched_args = try Wasi.internalInit(allocator);
    384             return Wasi{
    385                 .allocator = allocator,
    386                 .index = 0,
    387                 .args = fetched_args,
    388             };
    389         }
    390 
    391         fn internalInit(allocator: Allocator) Wasi.InitError![][:0]u8 {
    392             var count: usize = undefined;
    393             var buf_size: usize = undefined;
    394 
    395             switch (std.os.wasi.args_sizes_get(&count, &buf_size)) {
    396                 .SUCCESS => {},
    397                 else => |err| return std.posix.unexpectedErrno(err),
    398             }
    399 
    400             if (count == 0) {
    401                 return &[_][:0]u8{};
    402             }
    403 
    404             const argv = try allocator.alloc([*:0]u8, count);
    405             defer allocator.free(argv);
    406 
    407             const argv_buf = try allocator.alloc(u8, buf_size);
    408 
    409             switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) {
    410                 .SUCCESS => {},
    411                 else => |err| return std.posix.unexpectedErrno(err),
    412             }
    413 
    414             var result_args = try allocator.alloc([:0]u8, count);
    415             var i: usize = 0;
    416             while (i < count) : (i += 1) {
    417                 result_args[i] = std.mem.sliceTo(argv[i], 0);
    418             }
    419 
    420             return result_args;
    421         }
    422 
    423         pub fn next(self: *Wasi) ?[:0]const u8 {
    424             if (self.index == self.args.len) return null;
    425 
    426             const arg = self.args[self.index];
    427             self.index += 1;
    428             return arg;
    429         }
    430 
    431         pub fn skip(self: *Wasi) bool {
    432             if (self.index == self.args.len) return false;
    433 
    434             self.index += 1;
    435             return true;
    436         }
    437 
    438         /// Call to free the internal buffer of the iterator.
    439         pub fn deinit(self: *Wasi) void {
    440             // Nothing is allocated when there are no args
    441             if (self.args.len == 0) return;
    442 
    443             const last_item = self.args[self.args.len - 1];
    444             const last_byte_addr = @intFromPtr(last_item.ptr) + last_item.len + 1; // null terminated
    445             const first_item_ptr = self.args[0].ptr;
    446             const len = last_byte_addr - @intFromPtr(first_item_ptr);
    447             self.allocator.free(first_item_ptr[0..len]);
    448             self.allocator.free(self.args);
    449         }
    450     };
    451 };
    452 
    453 /// Holds the command-line arguments, with the program name as the first entry.
    454 /// Use `iterateAllocator` for cross-platform code.
    455 pub fn iterate(a: Args) Iterator {
    456     return .init(a);
    457 }
    458 
    459 /// You must deinitialize iterator's internal buffers by calling `deinit` when
    460 /// done.
    461 pub fn iterateAllocator(a: Args, gpa: Allocator) Iterator.InitError!Iterator {
    462     return .initAllocator(a, gpa);
    463 }
    464 
    465 /// Returned value may reference several allocations; call `freeSlice` to
    466 /// release.
    467 ///
    468 /// * On Windows, the result is encoded as
    469 ///   [WTF-8](https://wtf-8.codeberg.page/).
    470 /// * On other platforms, the result is an opaque sequence of bytes with no
    471 ///   particular encoding.
    472 pub fn toSlice(a: Args, gpa: Allocator) Allocator.Error![][:0]u8 {
    473     var it = try a.iterateAllocator(gpa);
    474     defer it.deinit();
    475 
    476     var contents = std.array_list.Managed(u8).init(gpa);
    477     defer contents.deinit();
    478 
    479     var slice_list = std.array_list.Managed(usize).init(gpa);
    480     defer slice_list.deinit();
    481 
    482     while (it.next()) |arg| {
    483         try contents.appendSlice(arg[0 .. arg.len + 1]);
    484         try slice_list.append(arg.len);
    485     }
    486 
    487     const contents_slice = contents.items;
    488     const slice_sizes = slice_list.items;
    489     const slice_list_bytes = std.math.mul(usize, @sizeOf([]u8), slice_sizes.len) catch return error.OutOfMemory;
    490     const total_bytes = std.math.add(usize, slice_list_bytes, contents_slice.len) catch return error.OutOfMemory;
    491     const buf = try gpa.alignedAlloc(u8, .of([]u8), total_bytes);
    492     errdefer gpa.free(buf);
    493 
    494     const result_slice_list = std.mem.bytesAsSlice([:0]u8, buf[0..slice_list_bytes]);
    495     const result_contents = buf[slice_list_bytes..];
    496     @memcpy(result_contents[0..contents_slice.len], contents_slice);
    497 
    498     var contents_index: usize = 0;
    499     for (slice_sizes, 0..) |len, i| {
    500         const new_index = contents_index + len;
    501         result_slice_list[i] = result_contents[contents_index..new_index :0];
    502         contents_index = new_index + 1;
    503     }
    504 
    505     return result_slice_list;
    506 }
    507 
    508 /// Frees memory allocate by `toSlice`.
    509 pub fn freeSlice(gpa: Allocator, to_slice_result: []const [:0]u8) void {
    510     var total_bytes: usize = 0;
    511     for (to_slice_result) |arg| {
    512         total_bytes += @sizeOf([]u8) + arg.len + 1;
    513     }
    514     const unaligned_allocated_buf = @as([*]const u8, @ptrCast(to_slice_result.ptr))[0..total_bytes];
    515     const aligned_allocated_buf: []align(@alignOf([]u8)) const u8 = @alignCast(unaligned_allocated_buf);
    516     return gpa.free(aligned_allocated_buf);
    517 }
    518 
    519 test "Iterator.Windows" {
    520     const t = testIteratorWindows;
    521 
    522     try t(
    523         \\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 --eval="new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")"
    524     , &.{
    525         \\C:\Program Files\zig\zig.exe
    526         ,
    527         \\run
    528         ,
    529         \\.\src\main.zig
    530         ,
    531         \\-target
    532         ,
    533         \\x86_64-windows-gnu
    534         ,
    535         \\-O
    536         ,
    537         \\ReleaseSafe
    538         ,
    539         \\--
    540         ,
    541         \\--emoji=🗿
    542         ,
    543         \\--eval=new Regex("Dwayne \"The Rock\" Johnson")
    544         ,
    545     });
    546 
    547     // Empty
    548     try t("", &.{});
    549 
    550     // Separators
    551     try t("aa bb cc", &.{ "aa", "bb", "cc" });
    552     try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" });
    553     try t("aa\nbb\ncc", &.{"aa\nbb\ncc"});
    554     try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"});
    555     try t("aa\rbb\rcc", &.{"aa\rbb\rcc"});
    556     try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"});
    557     try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"});
    558     try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"});
    559 
    560     // Leading/trailing whitespace
    561     try t("  ", &.{""});
    562     try t("  aa  bb  ", &.{ "", "aa", "bb" });
    563     try t("\t\t", &.{""});
    564     try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" });
    565     try t("\n\n", &.{"\n\n"});
    566     try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"});
    567 
    568     // Executable name with quotes/backslashes
    569     try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"});
    570     try t("\"", &.{""});
    571     try t("\"\"", &.{""});
    572     try t("\"\"\"", &.{""});
    573     try t("\"\"\"\"", &.{""});
    574     try t("\"\"\"\"\"", &.{""});
    575     try t("aa\"bb\"cc\"dd", &.{"aabbccdd"});
    576     try t("aa\"bb cc\"dd", &.{"aabb ccdd"});
    577     try t("\"aa\\\"bb\"", &.{"aa\\bb"});
    578     try t("\"aa\\\\\"", &.{"aa\\\\"});
    579     try t("aa\\\"bb", &.{"aa\\bb"});
    580     try t("aa\\\\\"bb", &.{"aa\\\\bb"});
    581 
    582     // Arguments with quotes/backslashes
    583     try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" });
    584     try t(". aa\" \"bb\"\t\"cc\"\n\"dd\"", &.{ ".", "aa bb\tcc\ndd" });
    585     try t(". ", &.{"."});
    586     try t(". \"", &.{ ".", "" });
    587     try t(". \"\"", &.{ ".", "" });
    588     try t(". \"\"\"", &.{ ".", "\"" });
    589     try t(". \"\"\"\"", &.{ ".", "\"" });
    590     try t(". \"\"\"\"\"", &.{ ".", "\"\"" });
    591     try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" });
    592     try t(". \" \"", &.{ ".", " " });
    593     try t(". \" \"\"", &.{ ".", " \"" });
    594     try t(". \" \"\"\"", &.{ ".", " \"" });
    595     try t(". \" \"\"\"\"", &.{ ".", " \"\"" });
    596     try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" });
    597     try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" });
    598     try t(". \\\"", &.{ ".", "\"" });
    599     try t(". \\\"\"", &.{ ".", "\"" });
    600     try t(". \\\"\"\"", &.{ ".", "\"" });
    601     try t(". \\\"\"\"\"", &.{ ".", "\"\"" });
    602     try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" });
    603     try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" });
    604     try t(". \" \\\"", &.{ ".", " \"" });
    605     try t(". \" \\\"\"", &.{ ".", " \"" });
    606     try t(". \" \\\"\"\"", &.{ ".", " \"\"" });
    607     try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" });
    608     try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" });
    609     try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" });
    610     try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" });
    611     try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" });
    612     try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" });
    613 
    614     // From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines
    615     try t(
    616         \\foo.exe "abc" d e
    617     , &.{ "foo.exe", "abc", "d", "e" });
    618     try t(
    619         \\foo.exe a\\b d"e f"g h
    620     , &.{ "foo.exe", "a\\\\b", "de fg", "h" });
    621     try t(
    622         \\foo.exe a\\\"b c d
    623     , &.{ "foo.exe", "a\\\"b", "c", "d" });
    624     try t(
    625         \\foo.exe a\\\\"b c" d e
    626     , &.{ "foo.exe", "a\\\\b c", "d", "e" });
    627     try t(
    628         \\foo.exe a"b"" c d
    629     , &.{ "foo.exe", "ab\" c d" });
    630 
    631     // From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX
    632     try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" });
    633     try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" });
    634     try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" });
    635     try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" });
    636     try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" });
    637     try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" });
    638     try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" });
    639     try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" });
    640     try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" });
    641 
    642     // Surrogate pair encoding of 𐐷 separated by quotes.
    643     // Encoded as WTF-16:
    644     // "<0xD801>"<0xDC37>
    645     // Encoded as WTF-8:
    646     // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
    647     // During parsing, the quotes drop out and the surrogate pair
    648     // should end up encoded as its normal UTF-8 representation.
    649     try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" });
    650 }
    651 
    652 fn testIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
    653     const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line);
    654     defer testing.allocator.free(cmd_line_w);
    655 
    656     // next
    657     {
    658         var it = try Iterator.Windows.init(testing.allocator, cmd_line_w);
    659         defer it.deinit();
    660 
    661         for (expected_args) |expected| {
    662             if (it.next()) |actual| {
    663                 try testing.expectEqualStrings(expected, actual);
    664             } else {
    665                 return error.TestUnexpectedResult;
    666             }
    667         }
    668         try testing.expect(it.next() == null);
    669     }
    670 
    671     // skip
    672     {
    673         var it = try Iterator.Windows.init(testing.allocator, cmd_line_w);
    674         defer it.deinit();
    675 
    676         for (0..expected_args.len) |_| {
    677             try testing.expect(it.skip());
    678         }
    679         try testing.expect(!it.skip());
    680     }
    681 }
    682 
    683 test "general parsing" {
    684     try testGeneralCmdLine("a   b\tc d", &.{ "a", "b", "c", "d" });
    685     try testGeneralCmdLine("\"abc\" d e", &.{ "abc", "d", "e" });
    686     try testGeneralCmdLine("a\\\\\\b d\"e f\"g h", &.{ "a\\\\\\b", "de fg", "h" });
    687     try testGeneralCmdLine("a\\\\\\\"b c d", &.{ "a\\\"b", "c", "d" });
    688     try testGeneralCmdLine("a\\\\\\\\\"b c\" d e", &.{ "a\\\\b c", "d", "e" });
    689     try testGeneralCmdLine("a   b\tc \"d f", &.{ "a", "b", "c", "d f" });
    690     try testGeneralCmdLine("j k l\\", &.{ "j", "k", "l\\" });
    691     try testGeneralCmdLine("\"\" x y z\\\\", &.{ "", "x", "y", "z\\\\" });
    692 
    693     try testGeneralCmdLine("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\"", &.{
    694         ".\\..\\zig-cache\\build",
    695         "bin\\zig.exe",
    696         ".\\..",
    697         ".\\..\\zig-cache",
    698         "--help",
    699     });
    700 
    701     try testGeneralCmdLine(
    702         \\ 'foo' "bar"
    703     , &.{ "'foo'", "bar" });
    704 }
    705 
    706 fn testGeneralCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
    707     var it = try IteratorGeneral(.{}).init(std.testing.allocator, input_cmd_line);
    708     defer it.deinit();
    709     for (expected_args) |expected_arg| {
    710         const arg = it.next().?;
    711         try testing.expectEqualStrings(expected_arg, arg);
    712     }
    713     try testing.expect(it.next() == null);
    714 }
    715 
    716 /// Optional parameters for `IteratorGeneral`
    717 pub const IteratorGeneralOptions = struct {
    718     comments: bool = false,
    719     single_quotes: bool = false,
    720 };
    721 
    722 /// A general Iterator to parse a string into a set of arguments
    723 pub fn IteratorGeneral(comptime options: IteratorGeneralOptions) type {
    724     return struct {
    725         allocator: Allocator,
    726         index: usize = 0,
    727         cmd_line: []const u8,
    728 
    729         /// Should the cmd_line field be free'd (using the allocator) on deinit()?
    730         free_cmd_line_on_deinit: bool,
    731 
    732         /// buffer MUST be long enough to hold the cmd_line plus a null terminator.
    733         /// buffer will we free'd (using the allocator) on deinit()
    734         buffer: []u8,
    735         start: usize = 0,
    736         end: usize = 0,
    737 
    738         pub const Self = @This();
    739 
    740         pub const InitError = error{OutOfMemory};
    741 
    742         /// cmd_line_utf8 MUST remain valid and constant while using this instance
    743         pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
    744             const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
    745             errdefer allocator.free(buffer);
    746 
    747             return Self{
    748                 .allocator = allocator,
    749                 .cmd_line = cmd_line_utf8,
    750                 .free_cmd_line_on_deinit = false,
    751                 .buffer = buffer,
    752             };
    753         }
    754 
    755         /// cmd_line_utf8 will be free'd (with the allocator) on deinit()
    756         pub fn initTakeOwnership(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
    757             const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1);
    758             errdefer allocator.free(buffer);
    759 
    760             return Self{
    761                 .allocator = allocator,
    762                 .cmd_line = cmd_line_utf8,
    763                 .free_cmd_line_on_deinit = true,
    764                 .buffer = buffer,
    765             };
    766         }
    767 
    768         // Skips over whitespace in the cmd_line.
    769         // Returns false if the terminating sentinel is reached, true otherwise.
    770         // Also skips over comments (if supported).
    771         fn skipWhitespace(self: *Self) bool {
    772             while (true) : (self.index += 1) {
    773                 const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
    774                 switch (character) {
    775                     0 => return false,
    776                     ' ', '\t', '\r', '\n' => continue,
    777                     '#' => {
    778                         if (options.comments) {
    779                             while (true) : (self.index += 1) {
    780                                 switch (self.cmd_line[self.index]) {
    781                                     '\n' => break,
    782                                     0 => return false,
    783                                     else => continue,
    784                                 }
    785                             }
    786                             continue;
    787                         } else {
    788                             break;
    789                         }
    790                     },
    791                     else => break,
    792                 }
    793             }
    794             return true;
    795         }
    796 
    797         pub fn skip(self: *Self) bool {
    798             if (!self.skipWhitespace()) {
    799                 return false;
    800             }
    801 
    802             var backslash_count: usize = 0;
    803             var in_quote = false;
    804             while (true) : (self.index += 1) {
    805                 const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
    806                 switch (character) {
    807                     0 => return true,
    808                     '"', '\'' => {
    809                         if (!options.single_quotes and character == '\'') {
    810                             backslash_count = 0;
    811                             continue;
    812                         }
    813                         const quote_is_real = backslash_count % 2 == 0;
    814                         if (quote_is_real) {
    815                             in_quote = !in_quote;
    816                         }
    817                     },
    818                     '\\' => {
    819                         backslash_count += 1;
    820                     },
    821                     ' ', '\t', '\r', '\n' => {
    822                         if (!in_quote) {
    823                             return true;
    824                         }
    825                         backslash_count = 0;
    826                     },
    827                     else => {
    828                         backslash_count = 0;
    829                         continue;
    830                     },
    831                 }
    832             }
    833         }
    834 
    835         /// Returns a slice of the internal buffer that contains the next argument.
    836         /// Returns null when it reaches the end.
    837         pub fn next(self: *Self) ?[:0]const u8 {
    838             if (!self.skipWhitespace()) {
    839                 return null;
    840             }
    841 
    842             var backslash_count: usize = 0;
    843             var in_quote = false;
    844             while (true) : (self.index += 1) {
    845                 const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
    846                 switch (character) {
    847                     0 => {
    848                         self.emitBackslashes(backslash_count);
    849                         self.buffer[self.end] = 0;
    850                         const token = self.buffer[self.start..self.end :0];
    851                         self.end += 1;
    852                         self.start = self.end;
    853                         return token;
    854                     },
    855                     '"', '\'' => {
    856                         if (!options.single_quotes and character == '\'') {
    857                             self.emitBackslashes(backslash_count);
    858                             backslash_count = 0;
    859                             self.emitCharacter(character);
    860                             continue;
    861                         }
    862                         const quote_is_real = backslash_count % 2 == 0;
    863                         self.emitBackslashes(backslash_count / 2);
    864                         backslash_count = 0;
    865 
    866                         if (quote_is_real) {
    867                             in_quote = !in_quote;
    868                         } else {
    869                             self.emitCharacter('"');
    870                         }
    871                     },
    872                     '\\' => {
    873                         backslash_count += 1;
    874                     },
    875                     ' ', '\t', '\r', '\n' => {
    876                         self.emitBackslashes(backslash_count);
    877                         backslash_count = 0;
    878                         if (in_quote) {
    879                             self.emitCharacter(character);
    880                         } else {
    881                             self.buffer[self.end] = 0;
    882                             const token = self.buffer[self.start..self.end :0];
    883                             self.end += 1;
    884                             self.start = self.end;
    885                             return token;
    886                         }
    887                     },
    888                     else => {
    889                         self.emitBackslashes(backslash_count);
    890                         backslash_count = 0;
    891                         self.emitCharacter(character);
    892                     },
    893                 }
    894             }
    895         }
    896 
    897         fn emitBackslashes(self: *Self, emit_count: usize) void {
    898             var i: usize = 0;
    899             while (i < emit_count) : (i += 1) {
    900                 self.emitCharacter('\\');
    901             }
    902         }
    903 
    904         fn emitCharacter(self: *Self, char: u8) void {
    905             self.buffer[self.end] = char;
    906             self.end += 1;
    907         }
    908 
    909         /// Call to free the internal buffer of the iterator.
    910         pub fn deinit(self: *Self) void {
    911             self.allocator.free(self.buffer);
    912 
    913             if (self.free_cmd_line_on_deinit) {
    914                 self.allocator.free(self.cmd_line);
    915             }
    916         }
    917     };
    918 }
    919 
    920 test "response file arg parsing" {
    921     try testResponseFileCmdLine(
    922         \\a b
    923         \\c d\
    924     , &.{ "a", "b", "c", "d\\" });
    925     try testResponseFileCmdLine("a b c d\\", &.{ "a", "b", "c", "d\\" });
    926 
    927     try testResponseFileCmdLine(
    928         \\j
    929         \\ k l # this is a comment \\ \\\ \\\\ "none" "\\" "\\\"
    930         \\ "m" #another comment
    931         \\
    932     , &.{ "j", "k", "l", "m" });
    933 
    934     try testResponseFileCmdLine(
    935         \\ "" q ""
    936         \\ "r s # t" "u\" v" #another comment
    937         \\
    938     , &.{ "", "q", "", "r s # t", "u\" v" });
    939 
    940     try testResponseFileCmdLine(
    941         \\ -l"advapi32" a# b#c d#
    942         \\e\\\
    943     , &.{ "-ladvapi32", "a#", "b#c", "d#", "e\\\\\\" });
    944 
    945     try testResponseFileCmdLine(
    946         \\ 'foo' "bar"
    947     , &.{ "foo", "bar" });
    948 }
    949 
    950 fn testResponseFileCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void {
    951     var it = try IteratorGeneral(.{ .comments = true, .single_quotes = true })
    952         .init(std.testing.allocator, input_cmd_line);
    953     defer it.deinit();
    954     for (expected_args) |expected_arg| {
    955         const arg = it.next().?;
    956         try testing.expectEqualStrings(expected_arg, arg);
    957     }
    958     try testing.expect(it.next() == null);
    959 }