source_mapping.zig (50401B) - Raw
1 const std = @import("std"); 2 const Allocator = std.mem.Allocator; 3 const utils = @import("utils.zig"); 4 const UncheckedSliceWriter = utils.UncheckedSliceWriter; 5 6 pub const ParseLineCommandsResult = struct { 7 result: []u8, 8 mappings: SourceMappings, 9 }; 10 11 const CurrentMapping = struct { 12 line_num: usize = 1, 13 filename: std.ArrayListUnmanaged(u8) = .empty, 14 pending: bool = true, 15 ignore_contents: bool = false, 16 }; 17 18 pub const ParseAndRemoveLineCommandsOptions = struct { 19 initial_filename: ?[]const u8 = null, 20 }; 21 22 /// Parses and removes #line commands as well as all source code that is within a file 23 /// with .c or .h extensions. 24 /// 25 /// > RC treats files with the .c and .h extensions in a special manner. It 26 /// > assumes that a file with one of these extensions does not contain 27 /// > resources. If a file has the .c or .h file name extension, RC ignores all 28 /// > lines in the file except the preprocessor directives. Therefore, to 29 /// > include a file that contains resources in another resource script, give 30 /// > the file to be included an extension other than .c or .h. 31 /// from https://learn.microsoft.com/en-us/windows/win32/menurc/preprocessor-directives 32 /// 33 /// Returns a slice of `buf` with the aforementioned stuff removed as well as a mapping 34 /// between the lines and their corresponding lines in their original files. 35 /// 36 /// `buf` must be at least as long as `source` 37 /// In-place transformation is supported (i.e. `source` and `buf` can be the same slice) 38 /// 39 /// If `options.initial_filename` is provided, that filename is guaranteed to be 40 /// within the `mappings.files` table and `root_filename_offset` will be set appropriately. 41 pub fn parseAndRemoveLineCommands(allocator: Allocator, source: []const u8, buf: []u8, options: ParseAndRemoveLineCommandsOptions) error{ OutOfMemory, InvalidLineCommand, LineNumberOverflow }!ParseLineCommandsResult { 42 var parse_result = ParseLineCommandsResult{ 43 .result = undefined, 44 .mappings = .{}, 45 }; 46 errdefer parse_result.mappings.deinit(allocator); 47 48 var current_mapping: CurrentMapping = .{}; 49 defer current_mapping.filename.deinit(allocator); 50 51 if (options.initial_filename) |initial_filename| { 52 try current_mapping.filename.appendSlice(allocator, initial_filename); 53 parse_result.mappings.root_filename_offset = try parse_result.mappings.files.put(allocator, initial_filename); 54 } 55 56 // This implementation attempts to be comment and string aware in order 57 // to avoid errant #line <num> "<filename>" within multiline comments 58 // leading to problems in the source mapping after comments are removed, 59 // but it is not a perfect implementation (intentionally). 60 // 61 // The current implementation does not handle cases like 62 // /* foo */ #line ... 63 // #line ... // foo 64 // #line ... /* foo ... 65 // etc 66 // 67 // (the first example will not be recognized as a #line command, the second 68 // and third will error with InvalidLineCommand) 69 // 70 // This is fine, though, since #line commands are generated by the 71 // preprocessor so in normal circumstances they will be well-formed and 72 // consistent. The only realistic way the imperfect implementation could 73 // affect a 'real' use-case would be someone taking the output of a 74 // preprocessor, editing it manually to add comments before/after #line 75 // commands, and then running it through resinator with /:no-preprocess. 76 77 std.debug.assert(buf.len >= source.len); 78 var result = UncheckedSliceWriter{ .slice = buf }; 79 const State = enum { 80 line_start, 81 preprocessor, 82 non_preprocessor, 83 forward_slash, 84 line_comment, 85 multiline_comment, 86 multiline_comment_end, 87 single_quoted, 88 single_quoted_escape, 89 double_quoted, 90 double_quoted_escape, 91 }; 92 var state: State = .line_start; 93 var index: usize = 0; 94 var pending_start: ?usize = null; 95 var preprocessor_start: usize = 0; 96 var line_number: usize = 1; 97 while (index < source.len) : (index += 1) { 98 var c = source[index]; 99 state: switch (state) { 100 .line_start => switch (c) { 101 '#' => { 102 preprocessor_start = index; 103 state = .preprocessor; 104 if (pending_start == null) { 105 pending_start = index; 106 } 107 }, 108 '\r', '\n' => { 109 const is_crlf = formsLineEndingPair(source, c, index + 1); 110 if (!current_mapping.ignore_contents) { 111 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 112 113 result.write(c); 114 if (is_crlf) result.write(source[index + 1]); 115 line_number += 1; 116 } 117 if (is_crlf) index += 1; 118 pending_start = null; 119 }, 120 ' ', '\t', '\x0b', '\x0c' => { 121 if (pending_start == null) { 122 pending_start = index; 123 } 124 }, 125 '/' => { 126 if (!current_mapping.ignore_contents) { 127 result.writeSlice(source[pending_start orelse index .. index + 1]); 128 pending_start = null; 129 } 130 state = .forward_slash; 131 }, 132 '\'' => { 133 if (!current_mapping.ignore_contents) { 134 result.writeSlice(source[pending_start orelse index .. index + 1]); 135 pending_start = null; 136 } 137 state = .single_quoted; 138 }, 139 '"' => { 140 if (!current_mapping.ignore_contents) { 141 result.writeSlice(source[pending_start orelse index .. index + 1]); 142 pending_start = null; 143 } 144 state = .double_quoted; 145 }, 146 else => { 147 state = .non_preprocessor; 148 if (pending_start != null) { 149 if (!current_mapping.ignore_contents) { 150 result.writeSlice(source[pending_start.? .. index + 1]); 151 } 152 pending_start = null; 153 continue; 154 } 155 if (!current_mapping.ignore_contents) { 156 result.write(c); 157 } 158 }, 159 }, 160 .forward_slash => switch (c) { 161 '\r', '\n' => { 162 const is_crlf = formsLineEndingPair(source, c, index + 1); 163 if (!current_mapping.ignore_contents) { 164 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 165 166 result.write(c); 167 if (is_crlf) result.write(source[index + 1]); 168 line_number += 1; 169 } 170 if (is_crlf) index += 1; 171 state = .line_start; 172 pending_start = null; 173 }, 174 '/' => { 175 if (!current_mapping.ignore_contents) { 176 result.write(c); 177 } 178 state = .line_comment; 179 }, 180 '*' => { 181 if (!current_mapping.ignore_contents) { 182 result.write(c); 183 } 184 state = .multiline_comment; 185 }, 186 else => { 187 if (!current_mapping.ignore_contents) { 188 result.write(c); 189 } 190 state = .non_preprocessor; 191 }, 192 }, 193 .line_comment => switch (c) { 194 '\r', '\n' => { 195 const is_crlf = formsLineEndingPair(source, c, index + 1); 196 if (!current_mapping.ignore_contents) { 197 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 198 199 result.write(c); 200 if (is_crlf) result.write(source[index + 1]); 201 line_number += 1; 202 } 203 if (is_crlf) index += 1; 204 state = .line_start; 205 pending_start = null; 206 }, 207 else => { 208 if (!current_mapping.ignore_contents) { 209 result.write(c); 210 } 211 }, 212 }, 213 .multiline_comment => switch (c) { 214 '\r', '\n' => { 215 const is_crlf = formsLineEndingPair(source, c, index + 1); 216 if (!current_mapping.ignore_contents) { 217 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 218 219 result.write(c); 220 if (is_crlf) result.write(source[index + 1]); 221 line_number += 1; 222 } 223 if (is_crlf) index += 1; 224 pending_start = null; 225 }, 226 '*' => { 227 if (!current_mapping.ignore_contents) { 228 result.write(c); 229 } 230 state = .multiline_comment_end; 231 }, 232 else => { 233 if (!current_mapping.ignore_contents) { 234 result.write(c); 235 } 236 }, 237 }, 238 .multiline_comment_end => switch (c) { 239 '\r', '\n' => { 240 const is_crlf = formsLineEndingPair(source, c, index + 1); 241 if (!current_mapping.ignore_contents) { 242 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 243 244 result.write(c); 245 if (is_crlf) result.write(source[index + 1]); 246 line_number += 1; 247 } 248 if (is_crlf) index += 1; 249 state = .multiline_comment; 250 pending_start = null; 251 }, 252 '/' => { 253 if (!current_mapping.ignore_contents) { 254 result.write(c); 255 } 256 state = .non_preprocessor; 257 }, 258 '*' => { 259 if (!current_mapping.ignore_contents) { 260 result.write(c); 261 } 262 // stay in multiline_comment_end state 263 }, 264 else => { 265 if (!current_mapping.ignore_contents) { 266 result.write(c); 267 } 268 state = .multiline_comment; 269 }, 270 }, 271 .single_quoted => switch (c) { 272 '\r', '\n' => { 273 const is_crlf = formsLineEndingPair(source, c, index + 1); 274 if (!current_mapping.ignore_contents) { 275 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 276 277 result.write(c); 278 if (is_crlf) result.write(source[index + 1]); 279 line_number += 1; 280 } 281 if (is_crlf) index += 1; 282 state = .line_start; 283 pending_start = null; 284 }, 285 '\\' => { 286 if (!current_mapping.ignore_contents) { 287 result.write(c); 288 } 289 state = .single_quoted_escape; 290 }, 291 '\'' => { 292 if (!current_mapping.ignore_contents) { 293 result.write(c); 294 } 295 state = .non_preprocessor; 296 }, 297 else => { 298 if (!current_mapping.ignore_contents) { 299 result.write(c); 300 } 301 }, 302 }, 303 .single_quoted_escape => switch (c) { 304 '\r', '\n' => { 305 const is_crlf = formsLineEndingPair(source, c, index + 1); 306 if (!current_mapping.ignore_contents) { 307 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 308 309 result.write(c); 310 if (is_crlf) result.write(source[index + 1]); 311 line_number += 1; 312 } 313 if (is_crlf) index += 1; 314 state = .line_start; 315 pending_start = null; 316 }, 317 else => { 318 if (!current_mapping.ignore_contents) { 319 result.write(c); 320 } 321 state = .single_quoted; 322 }, 323 }, 324 .double_quoted => switch (c) { 325 '\r', '\n' => { 326 const is_crlf = formsLineEndingPair(source, c, index + 1); 327 if (!current_mapping.ignore_contents) { 328 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 329 330 result.write(c); 331 if (is_crlf) result.write(source[index + 1]); 332 line_number += 1; 333 } 334 if (is_crlf) index += 1; 335 state = .line_start; 336 pending_start = null; 337 }, 338 '\\' => { 339 if (!current_mapping.ignore_contents) { 340 result.write(c); 341 } 342 state = .double_quoted_escape; 343 }, 344 '"' => { 345 if (!current_mapping.ignore_contents) { 346 result.write(c); 347 } 348 state = .non_preprocessor; 349 }, 350 else => { 351 if (!current_mapping.ignore_contents) { 352 result.write(c); 353 } 354 }, 355 }, 356 .double_quoted_escape => switch (c) { 357 '\r', '\n' => { 358 const is_crlf = formsLineEndingPair(source, c, index + 1); 359 if (!current_mapping.ignore_contents) { 360 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 361 362 result.write(c); 363 if (is_crlf) result.write(source[index + 1]); 364 line_number += 1; 365 } 366 if (is_crlf) index += 1; 367 state = .line_start; 368 pending_start = null; 369 }, 370 else => { 371 if (!current_mapping.ignore_contents) { 372 result.write(c); 373 } 374 state = .double_quoted; 375 }, 376 }, 377 .preprocessor => switch (c) { 378 '\r', '\n' => { 379 // Now that we have the full line we can decide what to do with it 380 const preprocessor_str = source[preprocessor_start..index]; 381 if (std.mem.startsWith(u8, preprocessor_str, "#line")) { 382 try handleLineCommand(allocator, preprocessor_str, ¤t_mapping); 383 const is_crlf = formsLineEndingPair(source, c, index + 1); 384 if (is_crlf) index += 1; 385 state = .line_start; 386 pending_start = null; 387 } else { 388 // Backtrack and reparse the line in the non_preprocessor state, 389 // since it's possible that this line contains a multiline comment 390 // start, etc. 391 state = .non_preprocessor; 392 index = pending_start.?; 393 pending_start = null; 394 // TODO: This is a hacky way to implement this, c needs to be 395 // updated since we're using continue :state here 396 c = source[index]; 397 // continue to avoid the index += 1 of the while loop 398 continue :state .non_preprocessor; 399 } 400 }, 401 else => {}, 402 }, 403 .non_preprocessor => switch (c) { 404 '\r', '\n' => { 405 const is_crlf = formsLineEndingPair(source, c, index + 1); 406 if (!current_mapping.ignore_contents) { 407 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 408 409 result.write(c); 410 if (is_crlf) result.write(source[index + 1]); 411 line_number += 1; 412 } 413 if (is_crlf) index += 1; 414 state = .line_start; 415 pending_start = null; 416 }, 417 '/' => { 418 if (!current_mapping.ignore_contents) { 419 result.write(c); 420 } 421 state = .forward_slash; 422 }, 423 '\'' => { 424 if (!current_mapping.ignore_contents) { 425 result.write(c); 426 } 427 state = .single_quoted; 428 }, 429 '"' => { 430 if (!current_mapping.ignore_contents) { 431 result.write(c); 432 } 433 state = .double_quoted; 434 }, 435 else => { 436 if (!current_mapping.ignore_contents) { 437 result.write(c); 438 } 439 }, 440 }, 441 } 442 } else { 443 switch (state) { 444 .line_start => {}, 445 .forward_slash, 446 .line_comment, 447 .multiline_comment, 448 .multiline_comment_end, 449 .single_quoted, 450 .single_quoted_escape, 451 .double_quoted, 452 .double_quoted_escape, 453 .non_preprocessor, 454 => { 455 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 456 }, 457 .preprocessor => { 458 // Now that we have the full line we can decide what to do with it 459 const preprocessor_str = source[preprocessor_start..index]; 460 if (std.mem.startsWith(u8, preprocessor_str, "#line")) { 461 try handleLineCommand(allocator, preprocessor_str, ¤t_mapping); 462 } else { 463 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 464 if (!current_mapping.ignore_contents) { 465 result.writeSlice(source[pending_start.?..index]); 466 } 467 } 468 }, 469 } 470 } 471 472 parse_result.result = result.getWritten(); 473 474 // Remove whitespace from the end of the result. This avoids issues when the 475 // preprocessor adds a newline to the end of the file, since then the 476 // post-preprocessed source could have more lines than the corresponding input source and 477 // the inserted line can't be mapped to any lines in the original file. 478 // There's no way that whitespace at the end of a file can affect the parsing 479 // of the RC script so this is okay to do unconditionally. 480 // TODO: There might be a better way around this 481 while (parse_result.result.len > 0 and std.ascii.isWhitespace(parse_result.result[parse_result.result.len - 1])) { 482 parse_result.result.len -= 1; 483 } 484 485 // If there have been no line mappings at all, then we're dealing with an empty file. 486 // In this case, we want to fake a line mapping just so that we return something 487 // that is useable in the same way that a non-empty mapping would be. 488 if (parse_result.mappings.sources.root == null) { 489 try handleLineEnd(allocator, line_number, &parse_result.mappings, ¤t_mapping); 490 } 491 492 return parse_result; 493 } 494 495 /// Note: This should function the same as lex.LineHandler.currentIndexFormsLineEndingPair 496 pub fn formsLineEndingPair(source: []const u8, line_ending: u8, next_index: usize) bool { 497 if (next_index >= source.len) return false; 498 499 const next_ending = source[next_index]; 500 return utils.isLineEndingPair(line_ending, next_ending); 501 } 502 503 pub fn handleLineEnd(allocator: Allocator, post_processed_line_number: usize, mapping: *SourceMappings, current_mapping: *CurrentMapping) !void { 504 const filename_offset = try mapping.files.put(allocator, current_mapping.filename.items); 505 506 try mapping.set(post_processed_line_number, current_mapping.line_num, filename_offset); 507 508 current_mapping.line_num = std.math.add(usize, current_mapping.line_num, 1) catch return error.LineNumberOverflow; 509 current_mapping.pending = false; 510 } 511 512 // TODO: Might want to provide diagnostics on invalid line commands instead of just returning 513 pub fn handleLineCommand(allocator: Allocator, line_command: []const u8, current_mapping: *CurrentMapping) error{ OutOfMemory, InvalidLineCommand }!void { 514 // TODO: Are there other whitespace characters that should be included? 515 var tokenizer = std.mem.tokenizeAny(u8, line_command, " \t"); 516 const line_directive = tokenizer.next() orelse return error.InvalidLineCommand; // #line 517 if (!std.mem.eql(u8, line_directive, "#line")) return error.InvalidLineCommand; 518 const linenum_str = tokenizer.next() orelse return error.InvalidLineCommand; 519 const linenum = std.fmt.parseUnsigned(usize, linenum_str, 10) catch return error.InvalidLineCommand; 520 if (linenum == 0) return error.InvalidLineCommand; 521 522 var filename_literal = tokenizer.rest(); 523 while (filename_literal.len > 0 and std.ascii.isWhitespace(filename_literal[filename_literal.len - 1])) { 524 filename_literal.len -= 1; 525 } 526 if (filename_literal.len < 2) return error.InvalidLineCommand; 527 const is_quoted = filename_literal[0] == '"' and filename_literal[filename_literal.len - 1] == '"'; 528 if (!is_quoted) return error.InvalidLineCommand; 529 const unquoted_filename = filename_literal[1 .. filename_literal.len - 1]; 530 531 // Ignore <builtin> and <command line> 532 if (std.mem.eql(u8, unquoted_filename, "<builtin>") or std.mem.eql(u8, unquoted_filename, "<command line>")) return; 533 534 const filename = parseFilename(allocator, unquoted_filename) catch |err| switch (err) { 535 error.OutOfMemory => |e| return e, 536 else => return error.InvalidLineCommand, 537 }; 538 defer allocator.free(filename); 539 540 // \x00 bytes in the filename is incompatible with how StringTable works 541 if (std.mem.indexOfScalar(u8, filename, '\x00') != null) return error.InvalidLineCommand; 542 543 current_mapping.line_num = linenum; 544 current_mapping.filename.clearRetainingCapacity(); 545 try current_mapping.filename.appendSlice(allocator, filename); 546 current_mapping.pending = true; 547 current_mapping.ignore_contents = std.ascii.endsWithIgnoreCase(filename, ".c") or std.ascii.endsWithIgnoreCase(filename, ".h"); 548 } 549 550 pub fn parseAndRemoveLineCommandsAlloc(allocator: Allocator, source: []const u8, options: ParseAndRemoveLineCommandsOptions) !ParseLineCommandsResult { 551 const buf = try allocator.alloc(u8, source.len); 552 errdefer allocator.free(buf); 553 var result = try parseAndRemoveLineCommands(allocator, source, buf, options); 554 result.result = try allocator.realloc(buf, result.result.len); 555 return result; 556 } 557 558 /// C-style string parsing with a few caveats: 559 /// - The str cannot contain newlines or carriage returns 560 /// - Hex and octal escape are limited to u8 561 /// - No handling/support for L, u, or U prefixed strings 562 /// - The start and end double quotes should be omitted from the `str` 563 /// - Other than the above, does not assume any validity of the strings (i.e. there 564 /// may be unescaped double quotes within the str) and will return error.InvalidString 565 /// on any problems found. 566 /// 567 /// The result is a UTF-8 encoded string. 568 fn parseFilename(allocator: Allocator, str: []const u8) error{ OutOfMemory, InvalidString }![]u8 { 569 const State = enum { 570 string, 571 escape, 572 escape_hex, 573 escape_octal, 574 escape_u, 575 }; 576 577 var filename = try std.array_list.Managed(u8).initCapacity(allocator, str.len); 578 errdefer filename.deinit(); 579 var state: State = .string; 580 var index: usize = 0; 581 var escape_len: usize = undefined; 582 var escape_val: u64 = undefined; 583 var escape_expected_len: u8 = undefined; 584 while (index < str.len) : (index += 1) { 585 const c = str[index]; 586 switch (state) { 587 .string => switch (c) { 588 '\\' => state = .escape, 589 '"' => return error.InvalidString, 590 else => filename.appendAssumeCapacity(c), 591 }, 592 .escape => switch (c) { 593 '\'', '"', '\\', '?', 'n', 'r', 't', 'a', 'b', 'e', 'f', 'v' => { 594 const escaped_c = switch (c) { 595 '\'', '"', '\\', '?' => c, 596 'n' => '\n', 597 'r' => '\r', 598 't' => '\t', 599 'a' => '\x07', 600 'b' => '\x08', 601 'e' => '\x1b', // non-standard 602 'f' => '\x0c', 603 'v' => '\x0b', 604 else => unreachable, 605 }; 606 filename.appendAssumeCapacity(escaped_c); 607 state = .string; 608 }, 609 'x' => { 610 escape_val = 0; 611 escape_len = 0; 612 state = .escape_hex; 613 }, 614 '0'...'7' => { 615 escape_val = std.fmt.charToDigit(c, 8) catch unreachable; 616 escape_len = 1; 617 state = .escape_octal; 618 }, 619 'u' => { 620 escape_val = 0; 621 escape_len = 0; 622 state = .escape_u; 623 escape_expected_len = 4; 624 }, 625 'U' => { 626 escape_val = 0; 627 escape_len = 0; 628 state = .escape_u; 629 escape_expected_len = 8; 630 }, 631 else => return error.InvalidString, 632 }, 633 .escape_hex => switch (c) { 634 '0'...'9', 'a'...'f', 'A'...'F' => { 635 const digit = std.fmt.charToDigit(c, 16) catch unreachable; 636 if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 16) catch return error.InvalidString; 637 escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; 638 escape_len += 1; 639 }, 640 else => { 641 if (escape_len == 0) return error.InvalidString; 642 filename.appendAssumeCapacity(@intCast(escape_val)); 643 state = .string; 644 index -= 1; // reconsume 645 }, 646 }, 647 .escape_octal => switch (c) { 648 '0'...'7' => { 649 const digit = std.fmt.charToDigit(c, 8) catch unreachable; 650 if (escape_val != 0) escape_val = std.math.mul(u8, @as(u8, @intCast(escape_val)), 8) catch return error.InvalidString; 651 escape_val = std.math.add(u8, @as(u8, @intCast(escape_val)), digit) catch return error.InvalidString; 652 escape_len += 1; 653 if (escape_len == 3) { 654 filename.appendAssumeCapacity(@intCast(escape_val)); 655 state = .string; 656 } 657 }, 658 else => { 659 if (escape_len == 0) return error.InvalidString; 660 filename.appendAssumeCapacity(@intCast(escape_val)); 661 state = .string; 662 index -= 1; // reconsume 663 }, 664 }, 665 .escape_u => switch (c) { 666 '0'...'9', 'a'...'f', 'A'...'F' => { 667 const digit = std.fmt.charToDigit(c, 16) catch unreachable; 668 if (escape_val != 0) escape_val = std.math.mul(u21, @as(u21, @intCast(escape_val)), 16) catch return error.InvalidString; 669 escape_val = std.math.add(u21, @as(u21, @intCast(escape_val)), digit) catch return error.InvalidString; 670 escape_len += 1; 671 if (escape_len == escape_expected_len) { 672 var buf: [4]u8 = undefined; 673 const utf8_len = std.unicode.utf8Encode(@intCast(escape_val), &buf) catch return error.InvalidString; 674 filename.appendSliceAssumeCapacity(buf[0..utf8_len]); 675 state = .string; 676 } 677 }, 678 // Requires escape_expected_len valid hex digits 679 else => return error.InvalidString, 680 }, 681 } 682 } else { 683 switch (state) { 684 .string => {}, 685 .escape, .escape_u => return error.InvalidString, 686 .escape_hex => { 687 if (escape_len == 0) return error.InvalidString; 688 filename.appendAssumeCapacity(@intCast(escape_val)); 689 }, 690 .escape_octal => { 691 filename.appendAssumeCapacity(@intCast(escape_val)); 692 }, 693 } 694 } 695 696 return filename.toOwnedSlice(); 697 } 698 699 fn testParseFilename(expected: []const u8, input: []const u8) !void { 700 const parsed = try parseFilename(std.testing.allocator, input); 701 defer std.testing.allocator.free(parsed); 702 703 return std.testing.expectEqualSlices(u8, expected, parsed); 704 } 705 706 test parseFilename { 707 try testParseFilename("'\"?\\\t\n\r\x11", "\\'\\\"\\?\\\\\\t\\n\\r\\x11"); 708 try testParseFilename("\xABz\x53", "\\xABz\\123"); 709 try testParseFilename("⚡⚡", "\\u26A1\\U000026A1"); 710 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\"")); 711 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\")); 712 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\u")); 713 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\U")); 714 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\x")); 715 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xZZ")); 716 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\xABCDEF")); 717 try std.testing.expectError(error.InvalidString, parseFilename(std.testing.allocator, "\\777")); 718 } 719 720 pub const SourceMappings = struct { 721 sources: Sources = .{}, 722 files: StringTable = .{}, 723 /// The default assumes that the first filename added is the root file. 724 /// The value should be set to the correct offset if that assumption does not hold. 725 root_filename_offset: u32 = 0, 726 source_node_pool: std.heap.MemoryPool(Sources.Node) = std.heap.MemoryPool(Sources.Node).init(std.heap.page_allocator), 727 end_line: usize = 0, 728 729 const sourceCompare = struct { 730 fn compare(a: Source, b: Source) std.math.Order { 731 return std.math.order(a.start_line, b.start_line); 732 } 733 }.compare; 734 const Sources = std.Treap(Source, sourceCompare); 735 736 pub const Source = struct { 737 start_line: usize, 738 span: usize = 0, 739 corresponding_start_line: usize, 740 filename_offset: u32, 741 }; 742 743 pub fn deinit(self: *SourceMappings, allocator: Allocator) void { 744 self.files.deinit(allocator); 745 self.source_node_pool.deinit(); 746 } 747 748 /// Find the node that 'contains' the `line`, i.e. the node's start_line is 749 /// >= `line` 750 fn findNode(self: SourceMappings, line: usize) ?*Sources.Node { 751 var node = self.sources.root; 752 var last_gt: ?*Sources.Node = null; 753 754 var search_key: Source = undefined; 755 search_key.start_line = line; 756 while (node) |current| { 757 const order = sourceCompare(search_key, current.key); 758 if (order == .eq) break; 759 if (order == .gt) last_gt = current; 760 761 node = current.children[@intFromBool(order == .gt)] orelse { 762 // Regardless of the current order, last_gt will contain the 763 // the node we want to return. 764 // 765 // If search key is > current node's key, then last_gt will be 766 // current which we now know is the closest node that is <= 767 // the search key. 768 // 769 // 770 // If the key is < current node's key, we want to jump back to the 771 // node that the search key was most recently greater than. 772 // This is necessary for scenarios like (where the search key is 2): 773 // 774 // 1 775 // \ 776 // 6 777 // / 778 // 3 779 // 780 // In this example, we'll get down to the '3' node but ultimately want 781 // to return the '1' node. 782 // 783 // Note: If we've never seen a key that the search key is greater than, 784 // then we know that there's no valid node, so last_gt will be null. 785 return last_gt; 786 }; 787 } 788 789 return node; 790 } 791 792 /// Note: `line_num` and `corresponding_line_num` start at 1 793 pub fn set(self: *SourceMappings, line_num: usize, corresponding_line_num: usize, filename_offset: u32) !void { 794 const maybe_node = self.findNode(line_num); 795 796 const need_new_node = need_new_node: { 797 if (maybe_node) |node| { 798 if (node.key.filename_offset != filename_offset) { 799 break :need_new_node true; 800 } 801 // TODO: These use i65 to avoid truncation when any of the line number values 802 // use all 64 bits of the usize. In reality, line numbers can't really 803 // get that large so limiting the line number and using a smaller iX 804 // type here might be a better solution. 805 const exist_delta = @as(i65, @intCast(node.key.corresponding_start_line)) - @as(i65, @intCast(node.key.start_line)); 806 const cur_delta = @as(i65, @intCast(corresponding_line_num)) - @as(i65, @intCast(line_num)); 807 if (exist_delta != cur_delta) { 808 break :need_new_node true; 809 } 810 break :need_new_node false; 811 } 812 break :need_new_node true; 813 }; 814 if (need_new_node) { 815 // spans must not overlap 816 if (maybe_node) |node| { 817 std.debug.assert(node.key.start_line != line_num); 818 } 819 820 const key = Source{ 821 .start_line = line_num, 822 .corresponding_start_line = corresponding_line_num, 823 .filename_offset = filename_offset, 824 }; 825 var entry = self.sources.getEntryFor(key); 826 var new_node = try self.source_node_pool.create(); 827 new_node.key = key; 828 entry.set(new_node); 829 } 830 if (line_num > self.end_line) { 831 self.end_line = line_num; 832 } 833 } 834 835 /// Note: `line_num` starts at 1 836 pub fn get(self: SourceMappings, line_num: usize) ?Source { 837 const node = self.findNode(line_num) orelse return null; 838 return node.key; 839 } 840 841 pub const CorrespondingSpan = struct { 842 start_line: usize, 843 end_line: usize, 844 filename_offset: u32, 845 }; 846 847 pub fn getCorrespondingSpan(self: SourceMappings, line_num: usize) ?CorrespondingSpan { 848 const source = self.get(line_num) orelse return null; 849 const diff = line_num - source.start_line; 850 const start_line = source.corresponding_start_line + (if (line_num == source.start_line) 0 else source.span + diff); 851 const end_line = start_line + (if (line_num == source.start_line) source.span else 0); 852 return CorrespondingSpan{ 853 .start_line = start_line, 854 .end_line = end_line, 855 .filename_offset = source.filename_offset, 856 }; 857 } 858 859 pub fn collapse(self: *SourceMappings, line_num: usize, num_following_lines_to_collapse: usize) !void { 860 std.debug.assert(num_following_lines_to_collapse > 0); 861 var node = self.findNode(line_num).?; 862 const span_diff = num_following_lines_to_collapse; 863 if (node.key.start_line != line_num) { 864 const offset = line_num - node.key.start_line; 865 const key = Source{ 866 .start_line = line_num, 867 .span = num_following_lines_to_collapse, 868 .corresponding_start_line = node.key.corresponding_start_line + node.key.span + offset, 869 .filename_offset = node.key.filename_offset, 870 }; 871 var entry = self.sources.getEntryFor(key); 872 var new_node = try self.source_node_pool.create(); 873 new_node.key = key; 874 entry.set(new_node); 875 node = new_node; 876 } else { 877 node.key.span += span_diff; 878 } 879 880 // now subtract the span diff from the start line number of all of 881 // the following nodes in order 882 var it = Sources.InorderIterator{ .current = node }; 883 // skip past current, but store it 884 var prev = it.next().?; 885 while (it.next()) |inorder_node| { 886 inorder_node.key.start_line -= span_diff; 887 888 // This can only really happen if there are #line commands within 889 // a multiline comment, which should be skipped over. 890 std.debug.assert(prev.key.start_line <= inorder_node.key.start_line); 891 prev = inorder_node; 892 } 893 self.end_line -= span_diff; 894 } 895 896 /// Returns true if the line is from the main/root file (i.e. not a file that has been 897 /// `#include`d). 898 pub fn isRootFile(self: *const SourceMappings, line_num: usize) bool { 899 const source = self.get(line_num) orelse return false; 900 return source.filename_offset == self.root_filename_offset; 901 } 902 }; 903 904 test "SourceMappings collapse" { 905 const allocator = std.testing.allocator; 906 907 var mappings = SourceMappings{}; 908 defer mappings.deinit(allocator); 909 const filename_offset = try mappings.files.put(allocator, "test.rc"); 910 911 try mappings.set(1, 1, filename_offset); 912 try mappings.set(5, 5, filename_offset); 913 914 try mappings.collapse(2, 2); 915 916 try std.testing.expectEqual(@as(usize, 3), mappings.end_line); 917 const span_1 = mappings.getCorrespondingSpan(1).?; 918 try std.testing.expectEqual(@as(usize, 1), span_1.start_line); 919 try std.testing.expectEqual(@as(usize, 1), span_1.end_line); 920 const span_2 = mappings.getCorrespondingSpan(2).?; 921 try std.testing.expectEqual(@as(usize, 2), span_2.start_line); 922 try std.testing.expectEqual(@as(usize, 4), span_2.end_line); 923 const span_3 = mappings.getCorrespondingSpan(3).?; 924 try std.testing.expectEqual(@as(usize, 5), span_3.start_line); 925 try std.testing.expectEqual(@as(usize, 5), span_3.end_line); 926 } 927 928 /// Same thing as StringTable in Zig's src/Wasm.zig 929 pub const StringTable = struct { 930 data: std.ArrayListUnmanaged(u8) = .empty, 931 map: std.HashMapUnmanaged(u32, void, std.hash_map.StringIndexContext, std.hash_map.default_max_load_percentage) = .empty, 932 933 pub fn deinit(self: *StringTable, allocator: Allocator) void { 934 self.data.deinit(allocator); 935 self.map.deinit(allocator); 936 } 937 938 pub fn put(self: *StringTable, allocator: Allocator, value: []const u8) !u32 { 939 const result = try self.map.getOrPutContextAdapted( 940 allocator, 941 value, 942 std.hash_map.StringIndexAdapter{ .bytes = &self.data }, 943 .{ .bytes = &self.data }, 944 ); 945 if (result.found_existing) { 946 return result.key_ptr.*; 947 } 948 949 try self.data.ensureUnusedCapacity(allocator, value.len + 1); 950 const offset: u32 = @intCast(self.data.items.len); 951 952 self.data.appendSliceAssumeCapacity(value); 953 self.data.appendAssumeCapacity(0); 954 955 result.key_ptr.* = offset; 956 957 return offset; 958 } 959 960 pub fn get(self: StringTable, offset: u32) []const u8 { 961 std.debug.assert(offset < self.data.items.len); 962 return std.mem.sliceTo(@as([*:0]const u8, @ptrCast(self.data.items.ptr + offset)), 0); 963 } 964 965 pub fn getOffset(self: *StringTable, value: []const u8) ?u32 { 966 return self.map.getKeyAdapted( 967 value, 968 std.hash_map.StringIndexAdapter{ .bytes = &self.data }, 969 ); 970 } 971 }; 972 973 const ExpectedSourceSpan = struct { 974 start_line: usize, 975 end_line: usize, 976 filename: []const u8, 977 }; 978 979 fn testParseAndRemoveLineCommands( 980 expected: []const u8, 981 comptime expected_spans: []const ExpectedSourceSpan, 982 source: []const u8, 983 options: ParseAndRemoveLineCommandsOptions, 984 ) !void { 985 var results = try parseAndRemoveLineCommandsAlloc(std.testing.allocator, source, options); 986 defer std.testing.allocator.free(results.result); 987 defer results.mappings.deinit(std.testing.allocator); 988 989 try std.testing.expectEqualStrings(expected, results.result); 990 991 expectEqualMappings(expected_spans, results.mappings) catch |err| { 992 std.debug.print("\nexpected mappings:\n", .{}); 993 for (expected_spans, 0..) |span, i| { 994 const line_num = i + 1; 995 std.debug.print("{}: {s}:{}-{}\n", .{ line_num, span.filename, span.start_line, span.end_line }); 996 } 997 std.debug.print("\nactual mappings:\n", .{}); 998 var i: usize = 1; 999 while (i <= results.mappings.end_line) : (i += 1) { 1000 const span = results.mappings.getCorrespondingSpan(i).?; 1001 const filename = results.mappings.files.get(span.filename_offset); 1002 std.debug.print("{}: {s}:{}-{}\n", .{ i, filename, span.start_line, span.end_line }); 1003 } 1004 std.debug.print("\n", .{}); 1005 return err; 1006 }; 1007 } 1008 1009 fn expectEqualMappings(expected_spans: []const ExpectedSourceSpan, mappings: SourceMappings) !void { 1010 try std.testing.expectEqual(expected_spans.len, mappings.end_line); 1011 for (expected_spans, 0..) |expected_span, i| { 1012 const line_num = i + 1; 1013 const span = mappings.getCorrespondingSpan(line_num) orelse return error.MissingLineNum; 1014 const filename = mappings.files.get(span.filename_offset); 1015 try std.testing.expectEqual(expected_span.start_line, span.start_line); 1016 try std.testing.expectEqual(expected_span.end_line, span.end_line); 1017 try std.testing.expectEqualStrings(expected_span.filename, filename); 1018 } 1019 } 1020 1021 test "basic" { 1022 try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ 1023 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1024 }, "#line 1 \"blah.rc\"", .{}); 1025 } 1026 1027 test "only removes line commands" { 1028 try testParseAndRemoveLineCommands( 1029 \\#pragma code_page(65001) 1030 , &[_]ExpectedSourceSpan{ 1031 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1032 }, 1033 \\#line 1 "blah.rc" 1034 \\#pragma code_page(65001) 1035 , .{}); 1036 } 1037 1038 test "whitespace and line endings" { 1039 try testParseAndRemoveLineCommands("", &[_]ExpectedSourceSpan{ 1040 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1041 }, "#line \t 1 \t \"blah.rc\"\r\n", .{}); 1042 } 1043 1044 test "example" { 1045 try testParseAndRemoveLineCommands( 1046 \\ 1047 \\included RCDATA {"hello"} 1048 , &[_]ExpectedSourceSpan{ 1049 .{ .start_line = 1, .end_line = 1, .filename = "./included.rc" }, 1050 .{ .start_line = 2, .end_line = 2, .filename = "./included.rc" }, 1051 }, 1052 \\#line 1 "rcdata.rc" 1053 \\#line 1 "<built-in>" 1054 \\#line 1 "<built-in>" 1055 \\#line 355 "<built-in>" 1056 \\#line 1 "<command line>" 1057 \\#line 1 "<built-in>" 1058 \\#line 1 "rcdata.rc" 1059 \\#line 1 "./header.h" 1060 \\ 1061 \\ 1062 \\2 RCDATA {"blah"} 1063 \\ 1064 \\ 1065 \\#line 1 "./included.rc" 1066 \\ 1067 \\included RCDATA {"hello"} 1068 \\#line 7 "./header.h" 1069 \\#line 1 "rcdata.rc" 1070 , .{}); 1071 } 1072 1073 test "CRLF and other line endings" { 1074 try testParseAndRemoveLineCommands( 1075 "hello\r\n#pragma code_page(65001)\r\nworld", 1076 &[_]ExpectedSourceSpan{ 1077 .{ .start_line = 1, .end_line = 1, .filename = "crlf.rc" }, 1078 .{ .start_line = 2, .end_line = 2, .filename = "crlf.rc" }, 1079 .{ .start_line = 3, .end_line = 3, .filename = "crlf.rc" }, 1080 }, 1081 "#line 1 \"crlf.rc\"\r\n#line 1 \"<built-in>\"\r#line 1 \"crlf.rc\"\n\rhello\r\n#pragma code_page(65001)\r\nworld\r\n", 1082 .{}, 1083 ); 1084 } 1085 1086 test "no line commands" { 1087 try testParseAndRemoveLineCommands( 1088 \\1 RCDATA {"blah"} 1089 \\2 RCDATA {"blah"} 1090 , &[_]ExpectedSourceSpan{ 1091 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1092 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, 1093 }, 1094 \\1 RCDATA {"blah"} 1095 \\2 RCDATA {"blah"} 1096 , .{ .initial_filename = "blah.rc" }); 1097 } 1098 1099 test "in place" { 1100 var mut_source = "#line 1 \"blah.rc\"".*; 1101 var result = try parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{}); 1102 defer result.mappings.deinit(std.testing.allocator); 1103 try std.testing.expectEqualStrings("", result.result); 1104 } 1105 1106 test "line command within a multiline comment" { 1107 try testParseAndRemoveLineCommands( 1108 \\/* 1109 \\#line 1 "irrelevant.rc" 1110 \\ 1111 \\ 1112 \\*/ 1113 , &[_]ExpectedSourceSpan{ 1114 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1115 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, 1116 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" }, 1117 .{ .start_line = 4, .end_line = 4, .filename = "blah.rc" }, 1118 .{ .start_line = 5, .end_line = 5, .filename = "blah.rc" }, 1119 }, 1120 \\/* 1121 \\#line 1 "irrelevant.rc" 1122 \\ 1123 \\ 1124 \\*/ 1125 , .{ .initial_filename = "blah.rc" }); 1126 1127 // * but without / directly after 1128 try testParseAndRemoveLineCommands( 1129 \\/** / 1130 \\#line 1 "irrelevant.rc" 1131 \\*/ 1132 , &[_]ExpectedSourceSpan{ 1133 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1134 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, 1135 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" }, 1136 }, 1137 \\/** / 1138 \\#line 1 "irrelevant.rc" 1139 \\*/ 1140 , .{ .initial_filename = "blah.rc" }); 1141 1142 // /** and **/ 1143 try testParseAndRemoveLineCommands( 1144 \\/** 1145 \\#line 1 "irrelevant.rc" 1146 \\**/ 1147 \\foo 1148 , &[_]ExpectedSourceSpan{ 1149 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1150 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, 1151 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" }, 1152 .{ .start_line = 20, .end_line = 20, .filename = "blah.rc" }, 1153 }, 1154 \\/** 1155 \\#line 1 "irrelevant.rc" 1156 \\**/ 1157 \\#line 20 "blah.rc" 1158 \\foo 1159 , .{ .initial_filename = "blah.rc" }); 1160 } 1161 1162 test "whitespace preservation" { 1163 try testParseAndRemoveLineCommands( 1164 \\ / 1165 \\/ 1166 , &[_]ExpectedSourceSpan{ 1167 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1168 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, 1169 }, 1170 \\ / 1171 \\/ 1172 , .{ .initial_filename = "blah.rc" }); 1173 } 1174 1175 test "preprocessor line with a multiline comment after" { 1176 try testParseAndRemoveLineCommands( 1177 \\#pragma test /* 1178 \\ 1179 \\*/ 1180 , &[_]ExpectedSourceSpan{ 1181 .{ .start_line = 1, .end_line = 1, .filename = "blah.rc" }, 1182 .{ .start_line = 2, .end_line = 2, .filename = "blah.rc" }, 1183 .{ .start_line = 3, .end_line = 3, .filename = "blah.rc" }, 1184 }, 1185 \\#pragma test /* 1186 \\ 1187 \\*/ 1188 , .{ .initial_filename = "blah.rc" }); 1189 } 1190 1191 test "comment after line command" { 1192 var mut_source = "#line 1 \"blah.rc\" /*".*; 1193 try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{})); 1194 } 1195 1196 test "line command with 0 as line number" { 1197 var mut_source = "#line 0 \"blah.rc\"".*; 1198 try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{})); 1199 } 1200 1201 test "line number limits" { 1202 // TODO: Avoid usize for line numbers 1203 if (@sizeOf(usize) != 8) return error.SkipZigTest; 1204 1205 // greater than i64 max 1206 try testParseAndRemoveLineCommands( 1207 \\ 1208 , &[_]ExpectedSourceSpan{ 1209 .{ .start_line = 11111111111111111111, .end_line = 11111111111111111111, .filename = "blah.rc" }, 1210 }, 1211 \\#line 11111111111111111111 "blah.rc" 1212 , .{ .initial_filename = "blah.rc" }); 1213 1214 // equal to u64 max, overflows on line number increment 1215 { 1216 var mut_source = "#line 18446744073709551615 \"blah.rc\"".*; 1217 try std.testing.expectError(error.LineNumberOverflow, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{})); 1218 } 1219 1220 // greater than u64 max 1221 { 1222 var mut_source = "#line 18446744073709551616 \"blah.rc\"".*; 1223 try std.testing.expectError(error.InvalidLineCommand, parseAndRemoveLineCommands(std.testing.allocator, &mut_source, &mut_source, .{})); 1224 } 1225 }