stage2: improve compile errors from tokenizer

In order to not regress the quality of compile errors, some improvements
had to be made.

 * std.zig.parseCharLiteral is improved to return more detailed parse
   failure information.
 * tokenizer is improved to handle null bytes in the middle of strings,
   character literals, and line comments.
 * validating how many unicode escape digits in string literals is moved
   to std.zig.parseStringLiteral rather than handled in the tokenizer.
 * when a tokenizer error occurs, if the reported token is the 'invalid'
   tag, an error note is added to point to the invalid byte location.
   Further improvements would be:
   - Mention the expected set of allowed bytes at this location.
   - Display the invalid byte (if printable, print it, otherwise
     escape-print it).
This commit is contained in:
Andrew Kelley
2021-07-01 00:14:58 -07:00
parent 3f680abbe2
commit 24c432608f
6 changed files with 306 additions and 103 deletions

View File

@@ -2466,6 +2466,7 @@ pub fn astGenFile(mod: *Module, file: *Scope.File) !void {
defer msg.deinit();
const token_starts = file.tree.tokens.items(.start);
const token_tags = file.tree.tokens.items(.tag);
try file.tree.renderError(parse_err, msg.writer());
const err_msg = try gpa.create(ErrorMsg);
@@ -2477,6 +2478,14 @@ pub fn astGenFile(mod: *Module, file: *Scope.File) !void {
},
.msg = msg.toOwnedSlice(),
};
if (token_tags[parse_err.token] == .invalid) {
const bad_off = @intCast(u32, file.tree.tokenSlice(parse_err.token).len);
try mod.errNoteNonLazy(.{
.file_scope = file,
.parent_decl_node = 0,
.lazy = .{ .byte_abs = token_starts[parse_err.token] + bad_off },
}, err_msg, "invalid byte here", .{});
}
{
const lock = comp.mutex.acquire();