From b759308fb30b130ea5d084c3814b1820a589ceee Mon Sep 17 00:00:00 2001 From: Vexu Date: Sat, 19 Sep 2020 14:08:32 +0300 Subject: [PATCH] stage2: DepTokenizer print errors --- src-self-hosted/DepTokenizer.zig | 107 ++++++++++++++++++++++--------- 1 file changed, 78 insertions(+), 29 deletions(-) diff --git a/src-self-hosted/DepTokenizer.zig b/src-self-hosted/DepTokenizer.zig index 3ced32ce71..52cddd1606 100644 --- a/src-self-hosted/DepTokenizer.zig +++ b/src-self-hosted/DepTokenizer.zig @@ -228,7 +228,7 @@ pub fn next(self: *Tokenizer) ?Token { .rhs_continuation_linefeed, => return null, .target => { - return Token{ .incomplete_target = self.bytes[start..] }; + return errorPosition(.incomplete_target, start, self.bytes[start..]); }, .target_reverse_solidus, .target_dollar_sign, @@ -259,7 +259,7 @@ pub fn next(self: *Tokenizer) ?Token { return null; }, .prereq_quote => { - return Token{ .incomplete_quoted_prerequisite = self.bytes[start..] }; + return errorPosition(.incomplete_quoted_prerequisite, start, self.bytes[start..]); }, .prereq => { self.state = .lhs; @@ -278,6 +278,10 @@ pub fn next(self: *Tokenizer) ?Token { unreachable; } +fn errorPosition(comptime id: @TagType(Token), index: usize, bytes: []const u8) Token { + return @unionInit(Token, @tagName(id), .{ .index = index, .bytes = bytes }); +} + fn errorIllegalChar(comptime id: @TagType(Token), index: usize, char: u8) Token { return @unionInit(Token, @tagName(id), .{ .index = index, .char = char }); } @@ -309,8 +313,10 @@ pub const Token = union(enum) { target: []const u8, target_must_resolve: []const u8, prereq: []const u8, - incomplete_quoted_prerequisite: []const u8, - incomplete_target: []const u8, + + incomplete_quoted_prerequisite: IndexAndBytes, + incomplete_target: IndexAndBytes, + invalid_target: IndexAndChar, bad_target_escape: IndexAndChar, expected_dollar_sign: IndexAndChar, @@ -322,11 +328,15 @@ pub const Token = union(enum) { char: u8, }; + pub const IndexAndBytes = struct { + index: usize, + bytes: []const u8, + }; + /// Resolve escapes in target. Only valid with .target_must_resolve. - pub fn resolve(self: Token, buf: *std.ArrayList(u8)) std.mem.Allocator.Error!void { + pub fn resolve(self: Token, writer: anytype) @TypeOf(writer).Error!void { const bytes = self.target_must_resolve; // resolve called on incorrect token - try buf.ensureCapacity(bytes.len); // cannot be longer than the unescaped string var state: enum { start, escape, dollar } = .start; for (bytes) |c| { switch (state) { @@ -334,33 +344,74 @@ pub const Token = union(enum) { switch (c) { '\\' => state = .escape, '$' => state = .dollar, - else => buf.appendAssumeCapacity(c), + else => try writer.writeByte(c), } }, .escape => { switch (c) { ' ', '#', '\\' => {}, '$' => { - buf.appendAssumeCapacity('\\'); + try writer.writeByte('\\'); state = .dollar; continue; }, - else => buf.appendAssumeCapacity('\\'), + else => try writer.writeByte('\\'), } - buf.appendAssumeCapacity(c); + try writer.writeByte(c); state = .start; }, .dollar => { - buf.appendAssumeCapacity('$'); + try writer.writeByte('$'); switch (c) { '$' => {}, - else => buf.appendAssumeCapacity(c), + else => try writer.writeByte(c), } state = .start; }, } } } + + pub fn printError(self: Token, writer: anytype) @TypeOf(writer).Error!void { + switch (self) { + .target, .target_must_resolve, .prereq => unreachable, // not an error + .incomplete_quoted_prerequisite, + .incomplete_target, + => |index_and_bytes| { + try writer.print("{} '", .{self.errStr()}); + if (self == .incomplete_target) { + const tmp = Token{ .target_must_resolve = index_and_bytes.bytes }; + try tmp.resolve(writer); + } else { + try printCharValues(writer, index_and_bytes.bytes); + } + try writer.print("' at position {}", .{index_and_bytes.index}); + }, + .invalid_target, + .bad_target_escape, + .expected_dollar_sign, + .continuation_eol, + .incomplete_escape, + => |index_and_char| { + try writer.writeAll("illegal char "); + try printUnderstandableChar(writer, index_and_char.char); + try writer.print(" at position {}: {}", .{ index_and_char.index, self.errStr() }); + }, + } + } + + fn errStr(self: Token) []const u8 { + return switch (self) { + .target, .target_must_resolve, .prereq => unreachable, // not an error + .incomplete_quoted_prerequisite => "incomplete quoted prerequisite", + .incomplete_target => "incomplete target", + .invalid_target => "invalid target", + .bad_target_escape => "bad target escape", + .expected_dollar_sign => "expecting '$'", + .continuation_eol => "continuation expecting end-of-line", + .incomplete_escape => "incomplete escape", + }; + } }; test "empty file" { @@ -755,16 +806,16 @@ test "error incomplete target" { ); try depTokenizer("\\ foo.o", - \\ERROR: incomplete target ' foo.o' at position 1 + \\ERROR: incomplete target ' foo.o' at position 0 ); try depTokenizer("\\#foo.o", - \\ERROR: incomplete target '#foo.o' at position 1 + \\ERROR: incomplete target '#foo.o' at position 0 ); try depTokenizer("\\\\foo.o", - \\ERROR: incomplete target '\foo.o' at position 1 + \\ERROR: incomplete target '\foo.o' at position 0 ); try depTokenizer("$$foo.o", - \\ERROR: incomplete target '$foo.o' at position 1 + \\ERROR: incomplete target '$foo.o' at position 0 ); } @@ -862,7 +913,7 @@ fn depTokenizer(input: []const u8, expect: []const u8) !void { }, .target_must_resolve => { try buffer.appendSlice("target = {"); - try token.resolve(&resolve_buf); + try token.resolve(resolve_buf.writer()); for (resolve_buf.items) |b| { try buffer.append(printable_char_tab[b]); } @@ -870,7 +921,9 @@ fn depTokenizer(input: []const u8, expect: []const u8) !void { try buffer.appendSlice("}"); }, else => { - @panic("TODO"); + try buffer.appendSlice("ERROR: "); + try token.printError(buffer.outStream()); + break; }, } i += 1; @@ -1005,23 +1058,19 @@ fn printCharValues(out: anytype, bytes: []const u8) !void { } } -fn printUnderstandableChar(buffer: *std.ArrayListSentineled(u8, 0), char: u8) !void { +fn printUnderstandableChar(out: anytype, char: u8) !void { if (!std.ascii.isPrint(char) or char == ' ') { - try buffer.outStream().print("\\x{X:0>2}", .{char}); + try out.print("\\x{X:0>2}", .{char}); } else { - try buffer.appendSlice("'"); - try buffer.append(printable_char_tab[char]); - try buffer.appendSlice("'"); + try out.print("'{c}'", .{printable_char_tab[char]}); } } // zig fmt: off -const printable_char_tab: []const u8 = +const printable_char_tab: [256]u8 = ( "................................ !\"#$%&'()*+,-./0123456789:;<=>?" ++ "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~." ++ "................................................................" ++ - "................................................................"; -// zig fmt: on -comptime { - assert(printable_char_tab.len == 256); -} + "................................................................" +).*; +