blob e10764ce (37453B) - Raw
1 const Args = @This(); 2 3 const builtin = @import("builtin"); 4 const native_os = builtin.os.tag; 5 6 const std = @import("../std.zig"); 7 const Allocator = std.mem.Allocator; 8 const assert = std.debug.assert; 9 const testing = std.testing; 10 11 vector: Vector, 12 13 pub const Vector = switch (native_os) { 14 .windows => []const u16, // WTF-16 encoded 15 .freestanding, .other => void, 16 else => []const [*:0]const u8, 17 }; 18 19 /// Cross-platform access to command line one argument at a time. 20 pub const Iterator = struct { 21 const Inner = switch (native_os) { 22 .windows => Windows, 23 .wasi => if (builtin.link_libc) Posix else Wasi, 24 else => Posix, 25 }; 26 27 inner: Inner, 28 29 /// Initialize the args iterator. Consider using `initAllocator` instead 30 /// for cross-platform compatibility. 31 pub fn init(a: Args) Iterator { 32 if (native_os == .wasi) { 33 @compileError("In WASI, use initAllocator instead."); 34 } 35 if (native_os == .windows) { 36 @compileError("In Windows, use initAllocator instead."); 37 } 38 39 return .{ .inner = .init(a) }; 40 } 41 42 pub const InitError = Inner.InitError; 43 44 /// You must deinitialize iterator's internal buffers by calling `deinit` when done. 45 pub fn initAllocator(a: Args, gpa: Allocator) InitError!Iterator { 46 if (native_os == .wasi and !builtin.link_libc) { 47 return .{ .inner = try .init(a, gpa) }; 48 } 49 if (native_os == .windows) { 50 return .{ .inner = try .init(a, gpa) }; 51 } 52 53 return .{ .inner = .init(a) }; 54 } 55 56 /// Return subsequent argument, or `null` if no more remaining. 57 /// 58 /// Returned slice is pointing to the iterator's internal buffer. 59 /// On Windows, the result is encoded as [WTF-8](https://wtf-8.codeberg.page/). 60 /// On other platforms, the result is an opaque sequence of bytes with no particular encoding. 61 pub fn next(it: *Iterator) ?[:0]const u8 { 62 return it.inner.next(); 63 } 64 65 /// Parse past 1 argument without capturing it. 66 /// Returns `true` if skipped an arg, `false` if we are at the end. 67 pub fn skip(it: *Iterator) bool { 68 return it.inner.skip(); 69 } 70 71 /// Required to release resources if the iterator was initialized with 72 /// `initAllocator` function. 73 pub fn deinit(it: *Iterator) void { 74 // Unless we're targeting WASI or Windows, this is a no-op. 75 if (native_os == .wasi and !builtin.link_libc) it.inner.deinit(); 76 if (native_os == .windows) it.inner.deinit(); 77 } 78 79 /// Iterator that implements the Windows command-line parsing algorithm. 80 /// 81 /// The implementation is intended to be compatible with the post-2008 C runtime, 82 /// but is *not* intended to be compatible with `CommandLineToArgvW` since 83 /// `CommandLineToArgvW` uses the pre-2008 parsing rules. 84 /// 85 /// This iterator faithfully implements the parsing behavior observed from the C runtime with 86 /// one exception: if the command-line string is empty, the iterator will immediately complete 87 /// without returning any arguments (whereas the C runtime will return a single argument 88 /// representing the name of the current executable). 89 /// 90 /// The essential parts of the algorithm are described in Microsoft's documentation: 91 /// 92 /// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments 93 /// 94 /// David Deley explains some additional undocumented quirks in great detail: 95 /// 96 /// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES 97 pub const Windows = struct { 98 allocator: Allocator, 99 /// Encoded as WTF-16 LE. 100 cmd_line: []const u16, 101 index: usize = 0, 102 /// Owned by the iterator. Long enough to hold contiguous NUL-terminated slices 103 /// of each argument encoded as WTF-8. 104 buffer: []u8, 105 start: usize = 0, 106 end: usize = 0, 107 108 pub const InitError = error{OutOfMemory}; 109 110 /// `cmd_line_w` *must* be a WTF16-LE-encoded string. 111 /// 112 /// The iterator stores and uses `cmd_line_w`, so its memory must be valid for 113 /// at least as long as the returned Windows. 114 pub fn init(allocator: Allocator, cmd_line_w: []const u16) Windows.InitError!Windows { 115 const wtf8_len = std.unicode.calcWtf8Len(cmd_line_w); 116 117 // This buffer must be large enough to contain contiguous NUL-terminated slices 118 // of each argument. 119 // - During parsing, the length of a parsed argument will always be equal to 120 // to less than its unparsed length 121 // - The first argument needs one extra byte of space allocated for its NUL 122 // terminator, but for each subsequent argument the necessary whitespace 123 // between arguments guarantees room for their NUL terminator(s). 124 const buffer = try allocator.alloc(u8, wtf8_len + 1); 125 errdefer allocator.free(buffer); 126 127 return .{ 128 .allocator = allocator, 129 .cmd_line = cmd_line_w, 130 .buffer = buffer, 131 }; 132 } 133 134 /// Returns the next argument and advances the iterator. Returns `null` if at the end of the 135 /// command-line string. The iterator owns the returned slice. 136 /// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/). 137 pub fn next(self: *Windows) ?[:0]const u8 { 138 return self.nextWithStrategy(next_strategy); 139 } 140 141 /// Skips the next argument and advances the iterator. Returns `true` if an argument was 142 /// skipped, `false` if at the end of the command-line string. 143 pub fn skip(self: *Windows) bool { 144 return self.nextWithStrategy(skip_strategy); 145 } 146 147 const next_strategy = struct { 148 const T = ?[:0]const u8; 149 150 const eof = null; 151 152 /// Returns '\' if any backslashes are emitted, otherwise returns `last_emitted_code_unit`. 153 fn emitBackslashes(self: *Windows, count: usize, last_emitted_code_unit: ?u16) ?u16 { 154 for (0..count) |_| { 155 self.buffer[self.end] = '\\'; 156 self.end += 1; 157 } 158 return if (count != 0) '\\' else last_emitted_code_unit; 159 } 160 161 /// If `last_emitted_code_unit` and `code_unit` form a surrogate pair, then 162 /// the previously emitted high surrogate is overwritten by the codepoint encoded 163 /// by the surrogate pair, and `null` is returned. 164 /// Otherwise, `code_unit` is emitted and returned. 165 fn emitCharacter(self: *Windows, code_unit: u16, last_emitted_code_unit: ?u16) ?u16 { 166 // Because we are emitting WTF-8, we need to 167 // check to see if we've emitted two consecutive surrogate 168 // codepoints that form a valid surrogate pair in order 169 // to ensure that we're always emitting well-formed WTF-8 170 // (https://wtf-8.codeberg.page/#concatenating). 171 // 172 // If we do have a valid surrogate pair, we need to emit 173 // the UTF-8 sequence for the codepoint that they encode 174 // instead of the WTF-8 encoding for the two surrogate pairs 175 // separately. 176 // 177 // This is relevant when dealing with a WTF-16 encoded 178 // command line like this: 179 // "<0xD801>"<0xDC37> 180 // which would get parsed and converted to WTF-8 as: 181 // <0xED><0xA0><0x81><0xED><0xB0><0xB7> 182 // but instead, we need to recognize the surrogate pair 183 // and emit the codepoint it encodes, which in this 184 // example is U+10437 (𐐷), which is encoded in UTF-8 as: 185 // <0xF0><0x90><0x90><0xB7> 186 if (last_emitted_code_unit != null and 187 std.unicode.utf16IsLowSurrogate(code_unit) and 188 std.unicode.utf16IsHighSurrogate(last_emitted_code_unit.?)) 189 { 190 const codepoint = std.unicode.utf16DecodeSurrogatePair(&.{ last_emitted_code_unit.?, code_unit }) catch unreachable; 191 192 // Unpaired surrogate is 3 bytes long 193 const dest = self.buffer[self.end - 3 ..]; 194 const len = std.unicode.utf8Encode(codepoint, dest) catch unreachable; 195 // All codepoints that require a surrogate pair (> U+FFFF) are encoded as 4 bytes 196 assert(len == 4); 197 self.end += 1; 198 return null; 199 } 200 201 const wtf8_len = std.unicode.wtf8Encode(code_unit, self.buffer[self.end..]) catch unreachable; 202 self.end += wtf8_len; 203 return code_unit; 204 } 205 206 fn yieldArg(self: *Windows) [:0]const u8 { 207 self.buffer[self.end] = 0; 208 const arg = self.buffer[self.start..self.end :0]; 209 self.end += 1; 210 self.start = self.end; 211 return arg; 212 } 213 }; 214 215 const skip_strategy = struct { 216 const T = bool; 217 218 const eof = false; 219 220 fn emitBackslashes(_: *Windows, _: usize, last_emitted_code_unit: ?u16) ?u16 { 221 return last_emitted_code_unit; 222 } 223 224 fn emitCharacter(_: *Windows, _: u16, last_emitted_code_unit: ?u16) ?u16 { 225 return last_emitted_code_unit; 226 } 227 228 fn yieldArg(_: *Windows) bool { 229 return true; 230 } 231 }; 232 233 fn nextWithStrategy(self: *Windows, comptime strategy: type) strategy.T { 234 var last_emitted_code_unit: ?u16 = null; 235 // The first argument (the executable name) uses different parsing rules. 236 if (self.index == 0) { 237 if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) { 238 // Immediately complete the iterator. 239 // The C runtime would return the name of the current executable here. 240 return strategy.eof; 241 } 242 243 var inside_quotes = false; 244 while (true) : (self.index += 1) { 245 const char = if (self.index != self.cmd_line.len) 246 std.mem.littleToNative(u16, self.cmd_line[self.index]) 247 else 248 0; 249 switch (char) { 250 0 => { 251 return strategy.yieldArg(self); 252 }, 253 '"' => { 254 inside_quotes = !inside_quotes; 255 }, 256 ' ', '\t' => { 257 if (inside_quotes) { 258 last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); 259 } else { 260 self.index += 1; 261 return strategy.yieldArg(self); 262 } 263 }, 264 else => { 265 last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); 266 }, 267 } 268 } 269 } 270 271 // Skip spaces and tabs. The iterator completes if we reach the end of the string here. 272 while (true) : (self.index += 1) { 273 const char = if (self.index != self.cmd_line.len) 274 std.mem.littleToNative(u16, self.cmd_line[self.index]) 275 else 276 0; 277 switch (char) { 278 0 => return strategy.eof, 279 ' ', '\t' => continue, 280 else => break, 281 } 282 } 283 284 // Parsing rules for subsequent arguments: 285 // 286 // - The end of the string always terminates the current argument. 287 // - When not in 'inside_quotes' mode, a space or tab terminates the current argument. 288 // - 2n backslashes followed by a quote emit n backslashes (note: n can be zero). 289 // If in 'inside_quotes' and the quote is immediately followed by a second quote, 290 // one quote is emitted and the other is skipped, otherwise, the quote is skipped 291 // and 'inside_quotes' is toggled. 292 // - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote. 293 // - n backslashes not followed by a quote emit n backslashes. 294 var backslash_count: usize = 0; 295 var inside_quotes = false; 296 while (true) : (self.index += 1) { 297 const char = if (self.index != self.cmd_line.len) 298 std.mem.littleToNative(u16, self.cmd_line[self.index]) 299 else 300 0; 301 switch (char) { 302 0 => { 303 last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); 304 return strategy.yieldArg(self); 305 }, 306 ' ', '\t' => { 307 last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); 308 backslash_count = 0; 309 if (inside_quotes) { 310 last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); 311 } else return strategy.yieldArg(self); 312 }, 313 '"' => { 314 const char_is_escaped_quote = backslash_count % 2 != 0; 315 last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count / 2, last_emitted_code_unit); 316 backslash_count = 0; 317 if (char_is_escaped_quote) { 318 last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); 319 } else { 320 if (inside_quotes and 321 self.index + 1 != self.cmd_line.len and 322 std.mem.littleToNative(u16, self.cmd_line[self.index + 1]) == '"') 323 { 324 last_emitted_code_unit = strategy.emitCharacter(self, '"', last_emitted_code_unit); 325 self.index += 1; 326 } else { 327 inside_quotes = !inside_quotes; 328 } 329 } 330 }, 331 '\\' => { 332 backslash_count += 1; 333 }, 334 else => { 335 last_emitted_code_unit = strategy.emitBackslashes(self, backslash_count, last_emitted_code_unit); 336 backslash_count = 0; 337 last_emitted_code_unit = strategy.emitCharacter(self, char, last_emitted_code_unit); 338 }, 339 } 340 } 341 } 342 343 /// Frees the iterator's copy of the command-line string and all previously returned 344 /// argument slices. 345 pub fn deinit(self: *Windows) void { 346 self.allocator.free(self.buffer); 347 } 348 }; 349 350 pub const Posix = struct { 351 remaining: Vector, 352 353 pub const InitError = error{}; 354 355 pub fn init(a: Args) Posix { 356 return .{ .remaining = a.vector }; 357 } 358 359 pub fn next(it: *Posix) ?[:0]const u8 { 360 if (it.remaining.len == 0) return null; 361 const arg = it.remaining[0]; 362 it.remaining = it.remaining[1..]; 363 return std.mem.sliceTo(arg, 0); 364 } 365 366 pub fn skip(it: *Posix) bool { 367 if (it.remaining.len == 0) return false; 368 it.remaining = it.remaining[1..]; 369 return true; 370 } 371 }; 372 373 pub const Wasi = struct { 374 allocator: Allocator, 375 index: usize, 376 args: [][:0]u8, 377 378 pub const InitError = error{OutOfMemory} || std.posix.UnexpectedError; 379 380 /// You must call deinit to free the internal buffer of the 381 /// iterator after you are done. 382 pub fn init(allocator: Allocator) Wasi.InitError!Wasi { 383 const fetched_args = try Wasi.internalInit(allocator); 384 return Wasi{ 385 .allocator = allocator, 386 .index = 0, 387 .args = fetched_args, 388 }; 389 } 390 391 fn internalInit(allocator: Allocator) Wasi.InitError![][:0]u8 { 392 var count: usize = undefined; 393 var buf_size: usize = undefined; 394 395 switch (std.os.wasi.args_sizes_get(&count, &buf_size)) { 396 .SUCCESS => {}, 397 else => |err| return std.posix.unexpectedErrno(err), 398 } 399 400 if (count == 0) { 401 return &[_][:0]u8{}; 402 } 403 404 const argv = try allocator.alloc([*:0]u8, count); 405 defer allocator.free(argv); 406 407 const argv_buf = try allocator.alloc(u8, buf_size); 408 409 switch (std.os.wasi.args_get(argv.ptr, argv_buf.ptr)) { 410 .SUCCESS => {}, 411 else => |err| return std.posix.unexpectedErrno(err), 412 } 413 414 var result_args = try allocator.alloc([:0]u8, count); 415 var i: usize = 0; 416 while (i < count) : (i += 1) { 417 result_args[i] = std.mem.sliceTo(argv[i], 0); 418 } 419 420 return result_args; 421 } 422 423 pub fn next(self: *Wasi) ?[:0]const u8 { 424 if (self.index == self.args.len) return null; 425 426 const arg = self.args[self.index]; 427 self.index += 1; 428 return arg; 429 } 430 431 pub fn skip(self: *Wasi) bool { 432 if (self.index == self.args.len) return false; 433 434 self.index += 1; 435 return true; 436 } 437 438 /// Call to free the internal buffer of the iterator. 439 pub fn deinit(self: *Wasi) void { 440 // Nothing is allocated when there are no args 441 if (self.args.len == 0) return; 442 443 const last_item = self.args[self.args.len - 1]; 444 const last_byte_addr = @intFromPtr(last_item.ptr) + last_item.len + 1; // null terminated 445 const first_item_ptr = self.args[0].ptr; 446 const len = last_byte_addr - @intFromPtr(first_item_ptr); 447 self.allocator.free(first_item_ptr[0..len]); 448 self.allocator.free(self.args); 449 } 450 }; 451 }; 452 453 /// Holds the command-line arguments, with the program name as the first entry. 454 /// Use `iterateAllocator` for cross-platform code. 455 pub fn iterate(a: Args) Iterator { 456 return .init(a); 457 } 458 459 /// You must deinitialize iterator's internal buffers by calling `deinit` when 460 /// done. 461 pub fn iterateAllocator(a: Args, gpa: Allocator) Iterator.InitError!Iterator { 462 return .initAllocator(a, gpa); 463 } 464 465 /// Returned value may reference several allocations; call `freeSlice` to 466 /// release. 467 /// 468 /// * On Windows, the result is encoded as 469 /// [WTF-8](https://wtf-8.codeberg.page/). 470 /// * On other platforms, the result is an opaque sequence of bytes with no 471 /// particular encoding. 472 pub fn toSlice(a: Args, gpa: Allocator) Allocator.Error![][:0]u8 { 473 var it = try a.iterateAllocator(gpa); 474 defer it.deinit(); 475 476 var contents = std.array_list.Managed(u8).init(gpa); 477 defer contents.deinit(); 478 479 var slice_list = std.array_list.Managed(usize).init(gpa); 480 defer slice_list.deinit(); 481 482 while (it.next()) |arg| { 483 try contents.appendSlice(arg[0 .. arg.len + 1]); 484 try slice_list.append(arg.len); 485 } 486 487 const contents_slice = contents.items; 488 const slice_sizes = slice_list.items; 489 const slice_list_bytes = std.math.mul(usize, @sizeOf([]u8), slice_sizes.len) catch return error.OutOfMemory; 490 const total_bytes = std.math.add(usize, slice_list_bytes, contents_slice.len) catch return error.OutOfMemory; 491 const buf = try gpa.alignedAlloc(u8, .of([]u8), total_bytes); 492 errdefer gpa.free(buf); 493 494 const result_slice_list = std.mem.bytesAsSlice([:0]u8, buf[0..slice_list_bytes]); 495 const result_contents = buf[slice_list_bytes..]; 496 @memcpy(result_contents[0..contents_slice.len], contents_slice); 497 498 var contents_index: usize = 0; 499 for (slice_sizes, 0..) |len, i| { 500 const new_index = contents_index + len; 501 result_slice_list[i] = result_contents[contents_index..new_index :0]; 502 contents_index = new_index + 1; 503 } 504 505 return result_slice_list; 506 } 507 508 /// Frees memory allocate by `toSlice`. 509 pub fn freeSlice(gpa: Allocator, to_slice_result: []const [:0]u8) void { 510 var total_bytes: usize = 0; 511 for (to_slice_result) |arg| { 512 total_bytes += @sizeOf([]u8) + arg.len + 1; 513 } 514 const unaligned_allocated_buf = @as([*]const u8, @ptrCast(to_slice_result.ptr))[0..total_bytes]; 515 const aligned_allocated_buf: []align(@alignOf([]u8)) const u8 = @alignCast(unaligned_allocated_buf); 516 return gpa.free(aligned_allocated_buf); 517 } 518 519 test "Iterator.Windows" { 520 const t = testIteratorWindows; 521 522 try t( 523 \\"C:\Program Files\zig\zig.exe" run .\src\main.zig -target x86_64-windows-gnu -O ReleaseSafe -- --emoji=🗿 --eval="new Regex(\"Dwayne \\\"The Rock\\\" Johnson\")" 524 , &.{ 525 \\C:\Program Files\zig\zig.exe 526 , 527 \\run 528 , 529 \\.\src\main.zig 530 , 531 \\-target 532 , 533 \\x86_64-windows-gnu 534 , 535 \\-O 536 , 537 \\ReleaseSafe 538 , 539 \\-- 540 , 541 \\--emoji=🗿 542 , 543 \\--eval=new Regex("Dwayne \"The Rock\" Johnson") 544 , 545 }); 546 547 // Empty 548 try t("", &.{}); 549 550 // Separators 551 try t("aa bb cc", &.{ "aa", "bb", "cc" }); 552 try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" }); 553 try t("aa\nbb\ncc", &.{"aa\nbb\ncc"}); 554 try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"}); 555 try t("aa\rbb\rcc", &.{"aa\rbb\rcc"}); 556 try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"}); 557 try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"}); 558 try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"}); 559 560 // Leading/trailing whitespace 561 try t(" ", &.{""}); 562 try t(" aa bb ", &.{ "", "aa", "bb" }); 563 try t("\t\t", &.{""}); 564 try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" }); 565 try t("\n\n", &.{"\n\n"}); 566 try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"}); 567 568 // Executable name with quotes/backslashes 569 try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"}); 570 try t("\"", &.{""}); 571 try t("\"\"", &.{""}); 572 try t("\"\"\"", &.{""}); 573 try t("\"\"\"\"", &.{""}); 574 try t("\"\"\"\"\"", &.{""}); 575 try t("aa\"bb\"cc\"dd", &.{"aabbccdd"}); 576 try t("aa\"bb cc\"dd", &.{"aabb ccdd"}); 577 try t("\"aa\\\"bb\"", &.{"aa\\bb"}); 578 try t("\"aa\\\\\"", &.{"aa\\\\"}); 579 try t("aa\\\"bb", &.{"aa\\bb"}); 580 try t("aa\\\\\"bb", &.{"aa\\\\bb"}); 581 582 // Arguments with quotes/backslashes 583 try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" }); 584 try t(". aa\" \"bb\"\t\"cc\"\n\"dd\"", &.{ ".", "aa bb\tcc\ndd" }); 585 try t(". ", &.{"."}); 586 try t(". \"", &.{ ".", "" }); 587 try t(". \"\"", &.{ ".", "" }); 588 try t(". \"\"\"", &.{ ".", "\"" }); 589 try t(". \"\"\"\"", &.{ ".", "\"" }); 590 try t(". \"\"\"\"\"", &.{ ".", "\"\"" }); 591 try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" }); 592 try t(". \" \"", &.{ ".", " " }); 593 try t(". \" \"\"", &.{ ".", " \"" }); 594 try t(". \" \"\"\"", &.{ ".", " \"" }); 595 try t(". \" \"\"\"\"", &.{ ".", " \"\"" }); 596 try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" }); 597 try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" }); 598 try t(". \\\"", &.{ ".", "\"" }); 599 try t(". \\\"\"", &.{ ".", "\"" }); 600 try t(". \\\"\"\"", &.{ ".", "\"" }); 601 try t(". \\\"\"\"\"", &.{ ".", "\"\"" }); 602 try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" }); 603 try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" }); 604 try t(". \" \\\"", &.{ ".", " \"" }); 605 try t(". \" \\\"\"", &.{ ".", " \"" }); 606 try t(". \" \\\"\"\"", &.{ ".", " \"\"" }); 607 try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" }); 608 try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" }); 609 try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" }); 610 try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" }); 611 try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" }); 612 try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" }); 613 614 // From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines 615 try t( 616 \\foo.exe "abc" d e 617 , &.{ "foo.exe", "abc", "d", "e" }); 618 try t( 619 \\foo.exe a\\b d"e f"g h 620 , &.{ "foo.exe", "a\\\\b", "de fg", "h" }); 621 try t( 622 \\foo.exe a\\\"b c d 623 , &.{ "foo.exe", "a\\\"b", "c", "d" }); 624 try t( 625 \\foo.exe a\\\\"b c" d e 626 , &.{ "foo.exe", "a\\\\b c", "d", "e" }); 627 try t( 628 \\foo.exe a"b"" c d 629 , &.{ "foo.exe", "ab\" c d" }); 630 631 // From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX 632 try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" }); 633 try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" }); 634 try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" }); 635 try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" }); 636 try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" }); 637 try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" }); 638 try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" }); 639 try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" }); 640 try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" }); 641 642 // Surrogate pair encoding of 𐐷 separated by quotes. 643 // Encoded as WTF-16: 644 // "<0xD801>"<0xDC37> 645 // Encoded as WTF-8: 646 // "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7> 647 // During parsing, the quotes drop out and the surrogate pair 648 // should end up encoded as its normal UTF-8 representation. 649 try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" }); 650 } 651 652 fn testIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void { 653 const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line); 654 defer testing.allocator.free(cmd_line_w); 655 656 // next 657 { 658 var it = try Iterator.Windows.init(testing.allocator, cmd_line_w); 659 defer it.deinit(); 660 661 for (expected_args) |expected| { 662 if (it.next()) |actual| { 663 try testing.expectEqualStrings(expected, actual); 664 } else { 665 return error.TestUnexpectedResult; 666 } 667 } 668 try testing.expect(it.next() == null); 669 } 670 671 // skip 672 { 673 var it = try Iterator.Windows.init(testing.allocator, cmd_line_w); 674 defer it.deinit(); 675 676 for (0..expected_args.len) |_| { 677 try testing.expect(it.skip()); 678 } 679 try testing.expect(!it.skip()); 680 } 681 } 682 683 test "general parsing" { 684 try testGeneralCmdLine("a b\tc d", &.{ "a", "b", "c", "d" }); 685 try testGeneralCmdLine("\"abc\" d e", &.{ "abc", "d", "e" }); 686 try testGeneralCmdLine("a\\\\\\b d\"e f\"g h", &.{ "a\\\\\\b", "de fg", "h" }); 687 try testGeneralCmdLine("a\\\\\\\"b c d", &.{ "a\\\"b", "c", "d" }); 688 try testGeneralCmdLine("a\\\\\\\\\"b c\" d e", &.{ "a\\\\b c", "d", "e" }); 689 try testGeneralCmdLine("a b\tc \"d f", &.{ "a", "b", "c", "d f" }); 690 try testGeneralCmdLine("j k l\\", &.{ "j", "k", "l\\" }); 691 try testGeneralCmdLine("\"\" x y z\\\\", &.{ "", "x", "y", "z\\\\" }); 692 693 try testGeneralCmdLine("\".\\..\\zig-cache\\build\" \"bin\\zig.exe\" \".\\..\" \".\\..\\zig-cache\" \"--help\"", &.{ 694 ".\\..\\zig-cache\\build", 695 "bin\\zig.exe", 696 ".\\..", 697 ".\\..\\zig-cache", 698 "--help", 699 }); 700 701 try testGeneralCmdLine( 702 \\ 'foo' "bar" 703 , &.{ "'foo'", "bar" }); 704 } 705 706 fn testGeneralCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void { 707 var it = try IteratorGeneral(.{}).init(std.testing.allocator, input_cmd_line); 708 defer it.deinit(); 709 for (expected_args) |expected_arg| { 710 const arg = it.next().?; 711 try testing.expectEqualStrings(expected_arg, arg); 712 } 713 try testing.expect(it.next() == null); 714 } 715 716 /// Optional parameters for `IteratorGeneral` 717 pub const IteratorGeneralOptions = struct { 718 comments: bool = false, 719 single_quotes: bool = false, 720 }; 721 722 /// A general Iterator to parse a string into a set of arguments 723 pub fn IteratorGeneral(comptime options: IteratorGeneralOptions) type { 724 return struct { 725 allocator: Allocator, 726 index: usize = 0, 727 cmd_line: []const u8, 728 729 /// Should the cmd_line field be free'd (using the allocator) on deinit()? 730 free_cmd_line_on_deinit: bool, 731 732 /// buffer MUST be long enough to hold the cmd_line plus a null terminator. 733 /// buffer will we free'd (using the allocator) on deinit() 734 buffer: []u8, 735 start: usize = 0, 736 end: usize = 0, 737 738 pub const Self = @This(); 739 740 pub const InitError = error{OutOfMemory}; 741 742 /// cmd_line_utf8 MUST remain valid and constant while using this instance 743 pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { 744 const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1); 745 errdefer allocator.free(buffer); 746 747 return Self{ 748 .allocator = allocator, 749 .cmd_line = cmd_line_utf8, 750 .free_cmd_line_on_deinit = false, 751 .buffer = buffer, 752 }; 753 } 754 755 /// cmd_line_utf8 will be free'd (with the allocator) on deinit() 756 pub fn initTakeOwnership(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self { 757 const buffer = try allocator.alloc(u8, cmd_line_utf8.len + 1); 758 errdefer allocator.free(buffer); 759 760 return Self{ 761 .allocator = allocator, 762 .cmd_line = cmd_line_utf8, 763 .free_cmd_line_on_deinit = true, 764 .buffer = buffer, 765 }; 766 } 767 768 // Skips over whitespace in the cmd_line. 769 // Returns false if the terminating sentinel is reached, true otherwise. 770 // Also skips over comments (if supported). 771 fn skipWhitespace(self: *Self) bool { 772 while (true) : (self.index += 1) { 773 const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; 774 switch (character) { 775 0 => return false, 776 ' ', '\t', '\r', '\n' => continue, 777 '#' => { 778 if (options.comments) { 779 while (true) : (self.index += 1) { 780 switch (self.cmd_line[self.index]) { 781 '\n' => break, 782 0 => return false, 783 else => continue, 784 } 785 } 786 continue; 787 } else { 788 break; 789 } 790 }, 791 else => break, 792 } 793 } 794 return true; 795 } 796 797 pub fn skip(self: *Self) bool { 798 if (!self.skipWhitespace()) { 799 return false; 800 } 801 802 var backslash_count: usize = 0; 803 var in_quote = false; 804 while (true) : (self.index += 1) { 805 const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; 806 switch (character) { 807 0 => return true, 808 '"', '\'' => { 809 if (!options.single_quotes and character == '\'') { 810 backslash_count = 0; 811 continue; 812 } 813 const quote_is_real = backslash_count % 2 == 0; 814 if (quote_is_real) { 815 in_quote = !in_quote; 816 } 817 }, 818 '\\' => { 819 backslash_count += 1; 820 }, 821 ' ', '\t', '\r', '\n' => { 822 if (!in_quote) { 823 return true; 824 } 825 backslash_count = 0; 826 }, 827 else => { 828 backslash_count = 0; 829 continue; 830 }, 831 } 832 } 833 } 834 835 /// Returns a slice of the internal buffer that contains the next argument. 836 /// Returns null when it reaches the end. 837 pub fn next(self: *Self) ?[:0]const u8 { 838 if (!self.skipWhitespace()) { 839 return null; 840 } 841 842 var backslash_count: usize = 0; 843 var in_quote = false; 844 while (true) : (self.index += 1) { 845 const character = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0; 846 switch (character) { 847 0 => { 848 self.emitBackslashes(backslash_count); 849 self.buffer[self.end] = 0; 850 const token = self.buffer[self.start..self.end :0]; 851 self.end += 1; 852 self.start = self.end; 853 return token; 854 }, 855 '"', '\'' => { 856 if (!options.single_quotes and character == '\'') { 857 self.emitBackslashes(backslash_count); 858 backslash_count = 0; 859 self.emitCharacter(character); 860 continue; 861 } 862 const quote_is_real = backslash_count % 2 == 0; 863 self.emitBackslashes(backslash_count / 2); 864 backslash_count = 0; 865 866 if (quote_is_real) { 867 in_quote = !in_quote; 868 } else { 869 self.emitCharacter('"'); 870 } 871 }, 872 '\\' => { 873 backslash_count += 1; 874 }, 875 ' ', '\t', '\r', '\n' => { 876 self.emitBackslashes(backslash_count); 877 backslash_count = 0; 878 if (in_quote) { 879 self.emitCharacter(character); 880 } else { 881 self.buffer[self.end] = 0; 882 const token = self.buffer[self.start..self.end :0]; 883 self.end += 1; 884 self.start = self.end; 885 return token; 886 } 887 }, 888 else => { 889 self.emitBackslashes(backslash_count); 890 backslash_count = 0; 891 self.emitCharacter(character); 892 }, 893 } 894 } 895 } 896 897 fn emitBackslashes(self: *Self, emit_count: usize) void { 898 var i: usize = 0; 899 while (i < emit_count) : (i += 1) { 900 self.emitCharacter('\\'); 901 } 902 } 903 904 fn emitCharacter(self: *Self, char: u8) void { 905 self.buffer[self.end] = char; 906 self.end += 1; 907 } 908 909 /// Call to free the internal buffer of the iterator. 910 pub fn deinit(self: *Self) void { 911 self.allocator.free(self.buffer); 912 913 if (self.free_cmd_line_on_deinit) { 914 self.allocator.free(self.cmd_line); 915 } 916 } 917 }; 918 } 919 920 test "response file arg parsing" { 921 try testResponseFileCmdLine( 922 \\a b 923 \\c d\ 924 , &.{ "a", "b", "c", "d\\" }); 925 try testResponseFileCmdLine("a b c d\\", &.{ "a", "b", "c", "d\\" }); 926 927 try testResponseFileCmdLine( 928 \\j 929 \\ k l # this is a comment \\ \\\ \\\\ "none" "\\" "\\\" 930 \\ "m" #another comment 931 \\ 932 , &.{ "j", "k", "l", "m" }); 933 934 try testResponseFileCmdLine( 935 \\ "" q "" 936 \\ "r s # t" "u\" v" #another comment 937 \\ 938 , &.{ "", "q", "", "r s # t", "u\" v" }); 939 940 try testResponseFileCmdLine( 941 \\ -l"advapi32" a# b#c d# 942 \\e\\\ 943 , &.{ "-ladvapi32", "a#", "b#c", "d#", "e\\\\\\" }); 944 945 try testResponseFileCmdLine( 946 \\ 'foo' "bar" 947 , &.{ "foo", "bar" }); 948 } 949 950 fn testResponseFileCmdLine(input_cmd_line: []const u8, expected_args: []const []const u8) !void { 951 var it = try IteratorGeneral(.{ .comments = true, .single_quotes = true }) 952 .init(std.testing.allocator, input_cmd_line); 953 defer it.deinit(); 954 for (expected_args) |expected_arg| { 955 const arg = it.next().?; 956 try testing.expectEqualStrings(expected_arg, arg); 957 } 958 try testing.expect(it.next() == null); 959 }