commit 0cc2489d22a27b2dc82ee9ef72e945b9fa97c8fe (tree)
parent 534014f84e2e9605022ba6d6c2d2b7be1e575468
Author: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 29 Jan 2020 12:13:53 -0500
Merge pull request #4317 from Vexu/std-c
Add (unfinished) C parser to std lib
Diffstat:
4 files changed, 3701 insertions(+), 0 deletions(-)
diff --git a/lib/std/c.zig b/lib/std/c.zig
@@ -2,6 +2,12 @@ const builtin = @import("builtin");
const std = @import("std");
const page_size = std.mem.page_size;
+pub const tokenizer = @import("c/tokenizer.zig");
+pub const Token = tokenizer.Token;
+pub const Tokenizer = tokenizer.Tokenizer;
+pub const parse = @import("c/parse.zig").parse;
+pub const ast = @import("c/ast.zig");
+
pub usingnamespace @import("os/bits.zig");
pub usingnamespace switch (builtin.os) {
diff --git a/lib/std/c/ast.zig b/lib/std/c/ast.zig
@@ -0,0 +1,681 @@
+const std = @import("std");
+const SegmentedList = std.SegmentedList;
+const Token = std.c.Token;
+const Source = std.c.tokenizer.Source;
+
+pub const TokenIndex = usize;
+
+pub const Tree = struct {
+ tokens: TokenList,
+ sources: SourceList,
+ root_node: *Node.Root,
+ arena_allocator: std.heap.ArenaAllocator,
+ msgs: MsgList,
+
+ pub const SourceList = SegmentedList(Source, 4);
+ pub const TokenList = Source.TokenList;
+ pub const MsgList = SegmentedList(Msg, 0);
+
+ pub fn deinit(self: *Tree) void {
+ // Here we copy the arena allocator into stack memory, because
+ // otherwise it would destroy itself while it was still working.
+ var arena_allocator = self.arena_allocator;
+ arena_allocator.deinit();
+ // self is destroyed
+ }
+
+ pub fn tokenSlice(tree: *Tree, token: TokenIndex) []const u8 {
+ return tree.tokens.at(token).slice();
+ }
+
+ pub fn tokenEql(tree: *Tree, a: TokenIndex, b: TokenIndex) bool {
+ const atok = tree.tokens.at(a);
+ const btok = tree.tokens.at(b);
+ return atok.eql(btok.*);
+ }
+};
+
+pub const Msg = struct {
+ kind: enum {
+ Error,
+ Warning,
+ Note,
+ },
+ inner: Error,
+};
+
+pub const Error = union(enum) {
+ InvalidToken: SingleTokenError("invalid token '{}'"),
+ ExpectedToken: ExpectedToken,
+ ExpectedExpr: SingleTokenError("expected expression, found '{}'"),
+ ExpectedTypeName: SingleTokenError("expected type name, found '{}'"),
+ ExpectedFnBody: SingleTokenError("expected function body, found '{}'"),
+ ExpectedDeclarator: SingleTokenError("expected declarator, found '{}'"),
+ ExpectedInitializer: SingleTokenError("expected initializer, found '{}'"),
+ ExpectedEnumField: SingleTokenError("expected enum field, found '{}'"),
+ ExpectedType: SingleTokenError("expected enum field, found '{}'"),
+ InvalidTypeSpecifier: InvalidTypeSpecifier,
+ InvalidStorageClass: SingleTokenError("invalid storage class, found '{}'"),
+ InvalidDeclarator: SimpleError("invalid declarator"),
+ DuplicateQualifier: SingleTokenError("duplicate type qualifier '{}'"),
+ DuplicateSpecifier: SingleTokenError("duplicate declaration specifier '{}'"),
+ MustUseKwToRefer: MustUseKwToRefer,
+ FnSpecOnNonFn: SingleTokenError("function specifier '{}' on non function"),
+ NothingDeclared: SimpleError("declaration doesn't declare anything"),
+ QualifierIgnored: SingleTokenError("qualifier '{}' ignored"),
+
+ pub fn render(self: *const Error, tree: *Tree, stream: var) !void {
+ switch (self.*) {
+ .InvalidToken => |*x| return x.render(tree, stream),
+ .ExpectedToken => |*x| return x.render(tree, stream),
+ .ExpectedExpr => |*x| return x.render(tree, stream),
+ .ExpectedTypeName => |*x| return x.render(tree, stream),
+ .ExpectedDeclarator => |*x| return x.render(tree, stream),
+ .ExpectedFnBody => |*x| return x.render(tree, stream),
+ .ExpectedInitializer => |*x| return x.render(tree, stream),
+ .ExpectedEnumField => |*x| return x.render(tree, stream),
+ .ExpectedType => |*x| return x.render(tree, stream),
+ .InvalidTypeSpecifier => |*x| return x.render(tree, stream),
+ .InvalidStorageClass => |*x| return x.render(tree, stream),
+ .InvalidDeclarator => |*x| return x.render(tree, stream),
+ .DuplicateQualifier => |*x| return x.render(tree, stream),
+ .DuplicateSpecifier => |*x| return x.render(tree, stream),
+ .MustUseKwToRefer => |*x| return x.render(tree, stream),
+ .FnSpecOnNonFn => |*x| return x.render(tree, stream),
+ .NothingDeclared => |*x| return x.render(tree, stream),
+ .QualifierIgnored => |*x| return x.render(tree, stream),
+ }
+ }
+
+ pub fn loc(self: *const Error) TokenIndex {
+ switch (self.*) {
+ .InvalidToken => |x| return x.token,
+ .ExpectedToken => |x| return x.token,
+ .ExpectedExpr => |x| return x.token,
+ .ExpectedTypeName => |x| return x.token,
+ .ExpectedDeclarator => |x| return x.token,
+ .ExpectedFnBody => |x| return x.token,
+ .ExpectedInitializer => |x| return x.token,
+ .ExpectedEnumField => |x| return x.token,
+ .ExpectedType => |*x| return x.token,
+ .InvalidTypeSpecifier => |x| return x.token,
+ .InvalidStorageClass => |x| return x.token,
+ .InvalidDeclarator => |x| return x.token,
+ .DuplicateQualifier => |x| return x.token,
+ .DuplicateSpecifier => |x| return x.token,
+ .MustUseKwToRefer => |*x| return x.name,
+ .FnSpecOnNonFn => |*x| return x.name,
+ .NothingDeclared => |*x| return x.name,
+ .QualifierIgnored => |*x| return x.name,
+ }
+ }
+
+ pub const ExpectedToken = struct {
+ token: TokenIndex,
+ expected_id: @TagType(Token.Id),
+
+ pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void {
+ const found_token = tree.tokens.at(self.token);
+ if (found_token.id == .Invalid) {
+ return stream.print("expected '{}', found invalid bytes", .{self.expected_id.symbol()});
+ } else {
+ const token_name = found_token.id.symbol();
+ return stream.print("expected '{}', found '{}'", .{ self.expected_id.symbol(), token_name });
+ }
+ }
+ };
+
+ pub const InvalidTypeSpecifier = struct {
+ token: TokenIndex,
+ type_spec: *Node.TypeSpec,
+
+ pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void {
+ try stream.write("invalid type specifier '");
+ try type_spec.spec.print(tree, stream);
+ const token_name = tree.tokens.at(self.token).id.symbol();
+ return stream.print("{}'", .{token_name});
+ }
+ };
+
+ pub const MustUseKwToRefer = struct {
+ kw: TokenIndex,
+ name: TokenIndex,
+
+ pub fn render(self: *const ExpectedToken, tree: *Tree, stream: var) !void {
+ return stream.print("must use '{}' tag to refer to type '{}'", .{ tree.slice(kw), tree.slice(name) });
+ }
+ };
+
+ fn SingleTokenError(comptime msg: []const u8) type {
+ return struct {
+ token: TokenIndex,
+
+ pub fn render(self: *const @This(), tree: *Tree, stream: var) !void {
+ const actual_token = tree.tokens.at(self.token);
+ return stream.print(msg, .{actual_token.id.symbol()});
+ }
+ };
+ }
+
+ fn SimpleError(comptime msg: []const u8) type {
+ return struct {
+ const ThisError = @This();
+
+ token: TokenIndex,
+
+ pub fn render(self: *const ThisError, tokens: *Tree.TokenList, stream: var) !void {
+ return stream.write(msg);
+ }
+ };
+ }
+};
+
+pub const Type = struct {
+ pub const TypeList = std.SegmentedList(*Type, 4);
+ @"const": bool = false,
+ atomic: bool = false,
+ @"volatile": bool = false,
+ restrict: bool = false,
+
+ id: union(enum) {
+ Int: struct {
+ id: Id,
+ is_signed: bool,
+
+ pub const Id = enum {
+ Char,
+ Short,
+ Int,
+ Long,
+ LongLong,
+ };
+ },
+ Float: struct {
+ id: Id,
+
+ pub const Id = enum {
+ Float,
+ Double,
+ LongDouble,
+ };
+ },
+ Pointer: *Type,
+ Function: struct {
+ return_type: *Type,
+ param_types: TypeList,
+ },
+ Typedef: *Type,
+ Record: *Node.RecordType,
+ Enum: *Node.EnumType,
+
+ /// Special case for macro parameters that can be any type.
+ /// Only present if `retain_macros == true`.
+ Macro,
+ },
+};
+
+pub const Node = struct {
+ id: Id,
+
+ pub const Id = enum {
+ Root,
+ EnumField,
+ RecordField,
+ RecordDeclarator,
+ JumpStmt,
+ ExprStmt,
+ LabeledStmt,
+ CompoundStmt,
+ IfStmt,
+ SwitchStmt,
+ WhileStmt,
+ DoStmt,
+ ForStmt,
+ StaticAssert,
+ Declarator,
+ Pointer,
+ FnDecl,
+ Typedef,
+ VarDecl,
+ };
+
+ pub const Root = struct {
+ base: Node = Node{ .id = .Root },
+ decls: DeclList,
+ eof: TokenIndex,
+
+ pub const DeclList = SegmentedList(*Node, 4);
+ };
+
+ pub const DeclSpec = struct {
+ storage_class: union(enum) {
+ Auto: TokenIndex,
+ Extern: TokenIndex,
+ Register: TokenIndex,
+ Static: TokenIndex,
+ Typedef: TokenIndex,
+ None,
+ } = .None,
+ thread_local: ?TokenIndex = null,
+ type_spec: TypeSpec = TypeSpec{},
+ fn_spec: union(enum) {
+ Inline: TokenIndex,
+ Noreturn: TokenIndex,
+ None,
+ } = .None,
+ align_spec: ?struct {
+ alignas: TokenIndex,
+ expr: *Node,
+ rparen: TokenIndex,
+ } = null,
+ };
+
+ pub const TypeSpec = struct {
+ qual: TypeQual = TypeQual{},
+ spec: union(enum) {
+ /// error or default to int
+ None,
+ Void: TokenIndex,
+ Char: struct {
+ sign: ?TokenIndex = null,
+ char: TokenIndex,
+ },
+ Short: struct {
+ sign: ?TokenIndex = null,
+ short: TokenIndex = null,
+ int: ?TokenIndex = null,
+ },
+ Int: struct {
+ sign: ?TokenIndex = null,
+ int: ?TokenIndex = null,
+ },
+ Long: struct {
+ sign: ?TokenIndex = null,
+ long: TokenIndex,
+ longlong: ?TokenIndex = null,
+ int: ?TokenIndex = null,
+ },
+ Float: struct {
+ float: TokenIndex,
+ complex: ?TokenIndex = null,
+ },
+ Double: struct {
+ long: ?TokenIndex = null,
+ double: ?TokenIndex,
+ complex: ?TokenIndex = null,
+ },
+ Bool: TokenIndex,
+ Atomic: struct {
+ atomic: TokenIndex,
+ typename: *Node,
+ rparen: TokenIndex,
+ },
+ Enum: *EnumType,
+ Record: *RecordType,
+ Typedef: struct {
+ sym: TokenIndex,
+ sym_type: *Type,
+ },
+
+ pub fn print(self: *@This(), self: *const @This(), tree: *Tree, stream: var) !void {
+ switch (self.spec) {
+ .None => unreachable,
+ .Void => |index| try stream.write(tree.slice(index)),
+ .Char => |char| {
+ if (char.sign) |s| {
+ try stream.write(tree.slice(s));
+ try stream.writeByte(' ');
+ }
+ try stream.write(tree.slice(char.char));
+ },
+ .Short => |short| {
+ if (short.sign) |s| {
+ try stream.write(tree.slice(s));
+ try stream.writeByte(' ');
+ }
+ try stream.write(tree.slice(short.short));
+ if (short.int) |i| {
+ try stream.writeByte(' ');
+ try stream.write(tree.slice(i));
+ }
+ },
+ .Int => |int| {
+ if (int.sign) |s| {
+ try stream.write(tree.slice(s));
+ try stream.writeByte(' ');
+ }
+ if (int.int) |i| {
+ try stream.writeByte(' ');
+ try stream.write(tree.slice(i));
+ }
+ },
+ .Long => |long| {
+ if (long.sign) |s| {
+ try stream.write(tree.slice(s));
+ try stream.writeByte(' ');
+ }
+ try stream.write(tree.slice(long.long));
+ if (long.longlong) |l| {
+ try stream.writeByte(' ');
+ try stream.write(tree.slice(l));
+ }
+ if (long.int) |i| {
+ try stream.writeByte(' ');
+ try stream.write(tree.slice(i));
+ }
+ },
+ .Float => |float| {
+ try stream.write(tree.slice(float.float));
+ if (float.complex) |c| {
+ try stream.writeByte(' ');
+ try stream.write(tree.slice(c));
+ }
+ },
+ .Double => |double| {
+ if (double.long) |l| {
+ try stream.write(tree.slice(l));
+ try stream.writeByte(' ');
+ }
+ try stream.write(tree.slice(double.double));
+ if (double.complex) |c| {
+ try stream.writeByte(' ');
+ try stream.write(tree.slice(c));
+ }
+ },
+ .Bool => |index| try stream.write(tree.slice(index)),
+ .Typedef => |typedef| try stream.write(tree.slice(typedef.sym)),
+ else => try stream.print("TODO print {}", self.spec),
+ }
+ }
+ } = .None,
+ };
+
+ pub const EnumType = struct {
+ tok: TokenIndex,
+ name: ?TokenIndex,
+ body: ?struct {
+ lbrace: TokenIndex,
+
+ /// always EnumField
+ fields: FieldList,
+ rbrace: TokenIndex,
+ },
+
+ pub const FieldList = Root.DeclList;
+ };
+
+ pub const EnumField = struct {
+ base: Node = Node{ .id = .EnumField },
+ name: TokenIndex,
+ value: ?*Node,
+ };
+
+ pub const RecordType = struct {
+ tok: TokenIndex,
+ kind: enum {
+ Struct,
+ Union,
+ },
+ name: ?TokenIndex,
+ body: ?struct {
+ lbrace: TokenIndex,
+
+ /// RecordField or StaticAssert
+ fields: FieldList,
+ rbrace: TokenIndex,
+ },
+
+ pub const FieldList = Root.DeclList;
+ };
+
+ pub const RecordField = struct {
+ base: Node = Node{ .id = .RecordField },
+ type_spec: TypeSpec,
+ declarators: DeclaratorList,
+ semicolon: TokenIndex,
+
+ pub const DeclaratorList = Root.DeclList;
+ };
+
+ pub const RecordDeclarator = struct {
+ base: Node = Node{ .id = .RecordDeclarator },
+ declarator: ?*Declarator,
+ bit_field_expr: ?*Expr,
+ };
+
+ pub const TypeQual = struct {
+ @"const": ?TokenIndex = null,
+ atomic: ?TokenIndex = null,
+ @"volatile": ?TokenIndex = null,
+ restrict: ?TokenIndex = null,
+ };
+
+ pub const JumpStmt = struct {
+ base: Node = Node{ .id = .JumpStmt },
+ ltoken: TokenIndex,
+ kind: union(enum) {
+ Break,
+ Continue,
+ Return: ?*Node,
+ Goto: TokenIndex,
+ },
+ semicolon: TokenIndex,
+ };
+
+ pub const ExprStmt = struct {
+ base: Node = Node{ .id = .ExprStmt },
+ expr: ?*Expr,
+ semicolon: TokenIndex,
+ };
+
+ pub const LabeledStmt = struct {
+ base: Node = Node{ .id = .LabeledStmt },
+ kind: union(enum) {
+ Label: TokenIndex,
+ Case: TokenIndex,
+ Default: TokenIndex,
+ },
+ stmt: *Node,
+ };
+
+ pub const CompoundStmt = struct {
+ base: Node = Node{ .id = .CompoundStmt },
+ lbrace: TokenIndex,
+ statements: StmtList,
+ rbrace: TokenIndex,
+
+ pub const StmtList = Root.DeclList;
+ };
+
+ pub const IfStmt = struct {
+ base: Node = Node{ .id = .IfStmt },
+ @"if": TokenIndex,
+ cond: *Node,
+ body: *Node,
+ @"else": ?struct {
+ tok: TokenIndex,
+ body: *Node,
+ },
+ };
+
+ pub const SwitchStmt = struct {
+ base: Node = Node{ .id = .SwitchStmt },
+ @"switch": TokenIndex,
+ expr: *Expr,
+ rparen: TokenIndex,
+ stmt: *Node,
+ };
+
+ pub const WhileStmt = struct {
+ base: Node = Node{ .id = .WhileStmt },
+ @"while": TokenIndex,
+ cond: *Expr,
+ rparen: TokenIndex,
+ body: *Node,
+ };
+
+ pub const DoStmt = struct {
+ base: Node = Node{ .id = .DoStmt },
+ do: TokenIndex,
+ body: *Node,
+ @"while": TokenIndex,
+ cond: *Expr,
+ semicolon: TokenIndex,
+ };
+
+ pub const ForStmt = struct {
+ base: Node = Node{ .id = .ForStmt },
+ @"for": TokenIndex,
+ init: ?*Node,
+ cond: ?*Expr,
+ semicolon: TokenIndex,
+ incr: ?*Expr,
+ rparen: TokenIndex,
+ body: *Node,
+ };
+
+ pub const StaticAssert = struct {
+ base: Node = Node{ .id = .StaticAssert },
+ assert: TokenIndex,
+ expr: *Node,
+ semicolon: TokenIndex,
+ };
+
+ pub const Declarator = struct {
+ base: Node = Node{ .id = .Declarator },
+ pointer: ?*Pointer,
+ prefix: union(enum) {
+ None,
+ Identifer: TokenIndex,
+ Complex: struct {
+ lparen: TokenIndex,
+ inner: *Node,
+ rparen: TokenIndex,
+ },
+ },
+ suffix: union(enum) {
+ None,
+ Fn: struct {
+ lparen: TokenIndex,
+ params: Params,
+ rparen: TokenIndex,
+ },
+ Array: Arrays,
+ },
+
+ pub const Arrays = std.SegmentedList(*Array, 2);
+ pub const Params = std.SegmentedList(*Param, 4);
+ };
+
+ pub const Array = struct {
+ lbracket: TokenIndex,
+ inner: union(enum) {
+ Inferred,
+ Unspecified: TokenIndex,
+ Variable: struct {
+ asterisk: ?TokenIndex,
+ static: ?TokenIndex,
+ qual: TypeQual,
+ expr: *Expr,
+ },
+ },
+ rbracket: TokenIndex,
+ };
+
+ pub const Pointer = struct {
+ base: Node = Node{ .id = .Pointer },
+ asterisk: TokenIndex,
+ qual: TypeQual,
+ pointer: ?*Pointer,
+ };
+
+ pub const Param = struct {
+ kind: union(enum) {
+ Variable,
+ Old: TokenIndex,
+ Normal: struct {
+ decl_spec: *DeclSpec,
+ declarator: *Node,
+ },
+ },
+ };
+
+ pub const FnDecl = struct {
+ base: Node = Node{ .id = .FnDecl },
+ decl_spec: DeclSpec,
+ declarator: *Declarator,
+ old_decls: OldDeclList,
+ body: ?*CompoundStmt,
+
+ pub const OldDeclList = SegmentedList(*Node, 0);
+ };
+
+ pub const Typedef = struct {
+ base: Node = Node{ .id = .Typedef },
+ decl_spec: DeclSpec,
+ declarators: DeclaratorList,
+ semicolon: TokenIndex,
+
+ pub const DeclaratorList = Root.DeclList;
+ };
+
+ pub const VarDecl = struct {
+ base: Node = Node{ .id = .VarDecl },
+ decl_spec: DeclSpec,
+ initializers: Initializers,
+ semicolon: TokenIndex,
+
+ pub const Initializers = Root.DeclList;
+ };
+
+ pub const Initialized = struct {
+ base: Node = Node{ .id = Initialized },
+ declarator: *Declarator,
+ eq: TokenIndex,
+ init: Initializer,
+ };
+
+ pub const Initializer = union(enum) {
+ list: struct {
+ initializers: InitializerList,
+ rbrace: TokenIndex,
+ },
+ expr: *Expr,
+ pub const InitializerList = std.SegmentedList(*Initializer, 4);
+ };
+
+ pub const Macro = struct {
+ base: Node = Node{ .id = Macro },
+ kind: union(enum) {
+ Undef: []const u8,
+ Fn: struct {
+ params: []const []const u8,
+ expr: *Expr,
+ },
+ Expr: *Expr,
+ },
+ };
+};
+
+pub const Expr = struct {
+ id: Id,
+ ty: *Type,
+ value: union(enum) {
+ None,
+ },
+
+ pub const Id = enum {
+ Infix,
+ Literal,
+ };
+
+ pub const Infix = struct {
+ base: Expr = Expr{ .id = .Infix },
+ lhs: *Expr,
+ op_token: TokenIndex,
+ op: Op,
+ rhs: *Expr,
+
+ pub const Op = enum {};
+ };
+};
diff --git a/lib/std/c/parse.zig b/lib/std/c/parse.zig
@@ -0,0 +1,1431 @@
+const std = @import("std");
+const mem = std.mem;
+const assert = std.debug.assert;
+const Allocator = std.mem.Allocator;
+const ast = std.c.ast;
+const Node = ast.Node;
+const Type = ast.Type;
+const Tree = ast.Tree;
+const TokenIndex = ast.TokenIndex;
+const Token = std.c.Token;
+const TokenIterator = ast.Tree.TokenList.Iterator;
+
+pub const Error = error{ParseError} || Allocator.Error;
+
+pub const Options = struct {
+ // /// Keep simple macros unexpanded and add the definitions to the ast
+ // retain_macros: bool = false,
+ /// Warning or error
+ warn_as_err: union(enum) {
+ /// All warnings are warnings
+ None,
+
+ /// Some warnings are errors
+ Some: []@TagType(ast.Error),
+
+ /// All warnings are errors
+ All,
+ } = .All,
+};
+
+/// Result should be freed with tree.deinit() when there are
+/// no more references to any of the tokens or nodes.
+pub fn parse(allocator: *Allocator, source: []const u8, options: Options) !*Tree {
+ const tree = blk: {
+ // This block looks unnecessary, but is a "foot-shield" to prevent the SegmentedLists
+ // from being initialized with a pointer to this `arena`, which is created on
+ // the stack. Following code should instead refer to `&tree.arena_allocator`, a
+ // pointer to data which lives safely on the heap and will outlive `parse`.
+ var arena = std.heap.ArenaAllocator.init(allocator);
+ errdefer arena.deinit();
+ const tree = try arena.allocator.create(ast.Tree);
+ tree.* = .{
+ .root_node = undefined,
+ .arena_allocator = arena,
+ .tokens = undefined,
+ .sources = undefined,
+ };
+ break :blk tree;
+ };
+ errdefer tree.deinit();
+ const arena = &tree.arena_allocator.allocator;
+
+ tree.tokens = ast.Tree.TokenList.init(arena);
+ tree.sources = ast.Tree.SourceList.init(arena);
+
+ var tokenizer = std.zig.Tokenizer.init(source);
+ while (true) {
+ const tree_token = try tree.tokens.addOne();
+ tree_token.* = tokenizer.next();
+ if (tree_token.id == .Eof) break;
+ }
+ // TODO preprocess here
+ var it = tree.tokens.iterator(0);
+
+ while (true) {
+ const tok = it.peek().?.id;
+ switch (id) {
+ .LineComment,
+ .MultiLineComment,
+ => {
+ _ = it.next();
+ },
+ else => break,
+ }
+ }
+
+ var parse_arena = std.heap.ArenaAllocator.init(allocator);
+ defer parse_arena.deinit();
+
+ var parser = Parser{
+ .scopes = Parser.SymbolList.init(allocator),
+ .arena = &parse_arena.allocator,
+ .it = &it,
+ .tree = tree,
+ .options = options,
+ };
+ defer parser.symbols.deinit();
+
+ tree.root_node = try parser.root();
+ return tree;
+}
+
+const Parser = struct {
+ arena: *Allocator,
+ it: *TokenIterator,
+ tree: *Tree,
+
+ arena: *Allocator,
+ scopes: ScopeList,
+ options: Options,
+
+ const ScopeList = std.SegmentedLists(Scope);
+ const SymbolList = std.SegmentedLists(Symbol);
+
+ const Scope = struct {
+ kind: ScopeKind,
+ syms: SymbolList,
+ };
+
+ const Symbol = struct {
+ name: []const u8,
+ ty: *Type,
+ };
+
+ const ScopeKind = enum {
+ Block,
+ Loop,
+ Root,
+ Switch,
+ };
+
+ fn pushScope(parser: *Parser, kind: ScopeKind) !void {
+ const new = try parser.scopes.addOne();
+ new.* = .{
+ .kind = kind,
+ .syms = SymbolList.init(parser.arena),
+ };
+ }
+
+ fn popScope(parser: *Parser, len: usize) void {
+ _ = parser.scopes.pop();
+ }
+
+ fn getSymbol(parser: *Parser, tok: TokenIndex) ?*Symbol {
+ const name = parser.tree.tokenSlice(tok);
+ var scope_it = parser.scopes.iterator(parser.scopes.len);
+ while (scope_it.prev()) |scope| {
+ var sym_it = scope.syms.iterator(scope.syms.len);
+ while (sym_it.prev()) |sym| {
+ if (mem.eql(u8, sym.name, name)) {
+ return sym;
+ }
+ }
+ }
+ return null;
+ }
+
+ fn declareSymbol(parser: *Parser, type_spec: Node.TypeSpec, dr: *Node.Declarator) Error!void {
+ return; // TODO
+ }
+
+ /// Root <- ExternalDeclaration* eof
+ fn root(parser: *Parser) Allocator.Error!*Node.Root {
+ try parser.pushScope(.Root);
+ defer parser.popScope();
+ const node = try parser.arena.create(Node.Root);
+ node.* = .{
+ .decls = Node.Root.DeclList.init(parser.arena),
+ .eof = undefined,
+ };
+ while (parser.externalDeclarations() catch |e| switch (e) {
+ error.OutOfMemory => return error.OutOfMemory,
+ error.ParseError => return node,
+ }) |decl| {
+ try node.decls.push(decl);
+ }
+ node.eof = parser.eatToken(.Eof) orelse return node;
+ return node;
+ }
+
+ /// ExternalDeclaration
+ /// <- DeclSpec Declarator OldStyleDecl* CompoundStmt
+ /// / Declaration
+ /// OldStyleDecl <- DeclSpec Declarator (COMMA Declarator)* SEMICOLON
+ fn externalDeclarations(parser: *Parser) !?*Node {
+ return parser.declarationExtra(false);
+ }
+
+ /// Declaration
+ /// <- DeclSpec DeclInit SEMICOLON
+ /// / StaticAssert
+ /// DeclInit <- Declarator (EQUAL Initializer)? (COMMA Declarator (EQUAL Initializer)?)*
+ fn declaration(parser: *Parser) !?*Node {
+ return parser.declarationExtra(true);
+ }
+
+ fn declarationExtra(parser: *Parser, local: bool) !?*Node {
+ if (try parser.staticAssert()) |decl| return decl;
+ const begin = parser.it.index + 1;
+ var ds = Node.DeclSpec{};
+ const got_ds = try parser.declSpec(&ds);
+ if (local and !got_ds) {
+ // not a declaration
+ return null;
+ }
+ switch (ds.storage_class) {
+ .Auto, .Register => |tok| return parser.err(.{
+ .InvalidStorageClass = .{ .token = tok },
+ }),
+ .Typedef => {
+ const node = try parser.arena.create(Node.Typedef);
+ node.* = .{
+ .decl_spec = ds,
+ .declarators = Node.Typedef.DeclaratorList.init(parser.arena),
+ .semicolon = undefined,
+ };
+ while (true) {
+ const dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{
+ .ExpectedDeclarator = .{ .token = parser.it.index },
+ }));
+ try parser.declareSymbol(ds.type_spec, dr);
+ try node.declarators.push(&dr.base);
+ if (parser.eatToken(.Comma)) |_| {} else break;
+ }
+ return &node.base;
+ },
+ else => {},
+ }
+ var first_dr = try parser.declarator(.Must);
+ if (first_dr != null and declaratorIsFunction(first_dr.?)) {
+ // TODO typedeffed fn proto-only
+ const dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?);
+ try parser.declareSymbol(ds.type_spec, dr);
+ var old_decls = Node.FnDecl.OldDeclList.init(parser.arena);
+ const body = if (parser.eatToken(.Semicolon)) |_|
+ null
+ else blk: {
+ if (local) {
+ // TODO nested function warning
+ }
+ // TODO first_dr.is_old
+ // while (true) {
+ // var old_ds = Node.DeclSpec{};
+ // if (!(try parser.declSpec(&old_ds))) {
+ // // not old decl
+ // break;
+ // }
+ // var old_dr = (try parser.declarator(.Must));
+ // // if (old_dr == null)
+ // // try parser.err(.{
+ // // .NoParamName = .{ .token = parser.it.index },
+ // // });
+ // // try old_decls.push(decl);
+ // }
+ const body_node = (try parser.compoundStmt()) orelse return parser.err(.{
+ .ExpectedFnBody = .{ .token = parser.it.index },
+ });
+ break :blk @fieldParentPtr(Node.CompoundStmt, "base", body_node);
+ };
+
+ const node = try parser.arena.create(Node.FnDecl);
+ node.* = .{
+ .decl_spec = ds,
+ .declarator = dr,
+ .old_decls = old_decls,
+ .body = body,
+ };
+ return &node.base;
+ } else {
+ switch (ds.fn_spec) {
+ .Inline, .Noreturn => |tok| return parser.err(.{
+ .FnSpecOnNonFn = .{ .token = tok },
+ }),
+ else => {},
+ }
+ // TODO threadlocal without static or extern on local variable
+ const node = try parser.arena.create(Node.VarDecl);
+ node.* = .{
+ .decl_spec = ds,
+ .initializers = Node.VarDecl.Initializers.init(parser.arena),
+ .semicolon = undefined,
+ };
+ if (first_dr == null) {
+ node.semicolon = try parser.expectToken(.Semicolon);
+ const ok = switch (ds.type_spec.spec) {
+ .Enum => |e| e.name != null,
+ .Record => |r| r.name != null,
+ else => false,
+ };
+ const q = ds.type_spec.qual;
+ if (!ok)
+ try parser.warn(.{
+ .NothingDeclared = .{ .token = begin },
+ })
+ else if (q.@"const" orelse q.atomic orelse q.@"volatile" orelse q.restrict) |tok|
+ try parser.warn(.{
+ .QualifierIgnored = .{ .token = tok },
+ });
+ return &node.base;
+ }
+ var dr = @fieldParentPtr(Node.Declarator, "base", first_dr.?);
+ while (true) {
+ try parser.declareSymbol(ds.type_spec, dr);
+ if (parser.eatToken(.Equal)) |tok| {
+ try node.initializers.push((try parser.initializer(dr)) orelse return parser.err(.{
+ .ExpectedInitializer = .{ .token = parser.it.index },
+ }));
+ } else
+ try node.initializers.push(&dr.base);
+ if (parser.eatToken(.Comma) != null) break;
+ dr = @fieldParentPtr(Node.Declarator, "base", (try parser.declarator(.Must)) orelse return parser.err(.{
+ .ExpectedDeclarator = .{ .token = parser.it.index },
+ }));
+ }
+ node.semicolon = try parser.expectToken(.Semicolon);
+ return &node.base;
+ }
+ }
+
+ fn declaratorIsFunction(node: *Node) bool {
+ if (node.id != .Declarator) return false;
+ assert(node.id == .Declarator);
+ const dr = @fieldParentPtr(Node.Declarator, "base", node);
+ if (dr.suffix != .Fn) return false;
+ switch (dr.prefix) {
+ .None, .Identifer => return true,
+ .Complex => |inner| {
+ var inner_node = inner.inner;
+ while (true) {
+ if (inner_node.id != .Declarator) return false;
+ assert(inner_node.id == .Declarator);
+ const inner_dr = @fieldParentPtr(Node.Declarator, "base", inner_node);
+ if (inner_dr.pointer != null) return false;
+ switch (inner_dr.prefix) {
+ .None, .Identifer => return true,
+ .Complex => |c| inner_node = c.inner,
+ }
+ }
+ },
+ }
+ }
+
+ /// StaticAssert <- Keyword_static_assert LPAREN ConstExpr COMMA STRINGLITERAL RPAREN SEMICOLON
+ fn staticAssert(parser: *Parser) !?*Node {
+ const tok = parser.eatToken(.Keyword_static_assert) orelse return null;
+ _ = try parser.expectToken(.LParen);
+ const const_expr = (try parser.constExpr()) orelse parser.err(.{
+ .ExpectedExpr = .{ .token = parser.it.index },
+ });
+ _ = try parser.expectToken(.Comma);
+ const str = try parser.expectToken(.StringLiteral);
+ _ = try parser.expectToken(.RParen);
+ const node = try parser.arena.create(Node.StaticAssert);
+ node.* = .{
+ .assert = tok,
+ .expr = const_expr,
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+
+ /// DeclSpec <- (StorageClassSpec / TypeSpec / FnSpec / AlignSpec)*
+ /// returns true if any tokens were consumed
+ fn declSpec(parser: *Parser, ds: *Node.DeclSpec) !bool {
+ var got = false;
+ while ((try parser.storageClassSpec(ds)) or (try parser.typeSpec(&ds.type_spec)) or (try parser.fnSpec(ds)) or (try parser.alignSpec(ds))) {
+ got = true;
+ }
+ return got;
+ }
+
+ /// StorageClassSpec
+ /// <- Keyword_typedef / Keyword_extern / Keyword_static / Keyword_thread_local / Keyword_auto / Keyword_register
+ fn storageClassSpec(parser: *Parser, ds: *Node.DeclSpec) !bool {
+ blk: {
+ if (parser.eatToken(.Keyword_typedef)) |tok| {
+ if (ds.storage_class != .None or ds.thread_local != null)
+ break :blk;
+ ds.storage_class = .{ .Typedef = tok };
+ } else if (parser.eatToken(.Keyword_extern)) |tok| {
+ if (ds.storage_class != .None)
+ break :blk;
+ ds.storage_class = .{ .Extern = tok };
+ } else if (parser.eatToken(.Keyword_static)) |tok| {
+ if (ds.storage_class != .None)
+ break :blk;
+ ds.storage_class = .{ .Static = tok };
+ } else if (parser.eatToken(.Keyword_thread_local)) |tok| {
+ switch (ds.storage_class) {
+ .None, .Extern, .Static => {},
+ else => break :blk,
+ }
+ ds.thread_local = tok;
+ } else if (parser.eatToken(.Keyword_auto)) |tok| {
+ if (ds.storage_class != .None or ds.thread_local != null)
+ break :blk;
+ ds.storage_class = .{ .Auto = tok };
+ } else if (parser.eatToken(.Keyword_register)) |tok| {
+ if (ds.storage_class != .None or ds.thread_local != null)
+ break :blk;
+ ds.storage_class = .{ .Register = tok };
+ } else return false;
+ return true;
+ }
+ try parser.warn(.{
+ .DuplicateSpecifier = .{ .token = parser.it.index },
+ });
+ return true;
+ }
+
+ /// TypeSpec
+ /// <- Keyword_void / Keyword_char / Keyword_short / Keyword_int / Keyword_long / Keyword_float / Keyword_double
+ /// / Keyword_signed / Keyword_unsigned / Keyword_bool / Keyword_complex / Keyword_imaginary /
+ /// / Keyword_atomic LPAREN TypeName RPAREN
+ /// / EnumSpec
+ /// / RecordSpec
+ /// / IDENTIFIER // typedef name
+ /// / TypeQual
+ fn typeSpec(parser: *Parser, type_spec: *Node.TypeSpec) !bool {
+ blk: {
+ if (parser.eatToken(.Keyword_void)) |tok| {
+ if (type_spec.spec != .None)
+ break :blk;
+ type_spec.spec = .{ .Void = tok };
+ } else if (parser.eatToken(.Keyword_char)) |tok| {
+ switch (type_spec.spec) {
+ .None => {
+ type_spec.spec = .{
+ .Char = .{
+ .char = tok,
+ },
+ };
+ },
+ .Int => |int| {
+ if (int.int != null)
+ break :blk;
+ type_spec.spec = .{
+ .Char = .{
+ .char = tok,
+ .sign = int.sign,
+ },
+ };
+ },
+ else => break :blk,
+ }
+ } else if (parser.eatToken(.Keyword_short)) |tok| {
+ switch (type_spec.spec) {
+ .None => {
+ type_spec.spec = .{
+ .Short = .{
+ .short = tok,
+ },
+ };
+ },
+ .Int => |int| {
+ if (int.int != null)
+ break :blk;
+ type_spec.spec = .{
+ .Short = .{
+ .short = tok,
+ .sign = int.sign,
+ },
+ };
+ },
+ else => break :blk,
+ }
+ } else if (parser.eatToken(.Keyword_long)) |tok| {
+ switch (type_spec.spec) {
+ .None => {
+ type_spec.spec = .{
+ .Long = .{
+ .long = tok,
+ },
+ };
+ },
+ .Int => |int| {
+ type_spec.spec = .{
+ .Long = .{
+ .long = tok,
+ .sign = int.sign,
+ .int = int.int,
+ },
+ };
+ },
+ .Long => |*long| {
+ if (long.longlong != null)
+ break :blk;
+ long.longlong = tok;
+ },
+ .Double => |*double| {
+ if (double.long != null)
+ break :blk;
+ double.long = tok;
+ },
+ else => break :blk,
+ }
+ } else if (parser.eatToken(.Keyword_int)) |tok| {
+ switch (type_spec.spec) {
+ .None => {
+ type_spec.spec = .{
+ .Int = .{
+ .int = tok,
+ },
+ };
+ },
+ .Short => |*short| {
+ if (short.int != null)
+ break :blk;
+ short.int = tok;
+ },
+ .Int => |*int| {
+ if (int.int != null)
+ break :blk;
+ int.int = tok;
+ },
+ .Long => |*long| {
+ if (long.int != null)
+ break :blk;
+ long.int = tok;
+ },
+ else => break :blk,
+ }
+ } else if (parser.eatToken(.Keyword_signed) orelse parser.eatToken(.Keyword_unsigned)) |tok| {
+ switch (type_spec.spec) {
+ .None => {
+ type_spec.spec = .{
+ .Int = .{
+ .sign = tok,
+ },
+ };
+ },
+ .Char => |*char| {
+ if (char.sign != null)
+ break :blk;
+ char.sign = tok;
+ },
+ .Short => |*short| {
+ if (short.sign != null)
+ break :blk;
+ short.sign = tok;
+ },
+ .Int => |*int| {
+ if (int.sign != null)
+ break :blk;
+ int.sign = tok;
+ },
+ .Long => |*long| {
+ if (long.sign != null)
+ break :blk;
+ long.sign = tok;
+ },
+ else => break :blk,
+ }
+ } else if (parser.eatToken(.Keyword_float)) |tok| {
+ if (type_spec.spec != .None)
+ break :blk;
+ type_spec.spec = .{
+ .Float = .{
+ .float = tok,
+ },
+ };
+ } else if (parser.eatToken(.Keyword_double)) |tok| {
+ if (type_spec.spec != .None)
+ break :blk;
+ type_spec.spec = .{
+ .Double = .{
+ .double = tok,
+ },
+ };
+ } else if (parser.eatToken(.Keyword_complex)) |tok| {
+ switch (type_spec.spec) {
+ .None => {
+ type_spec.spec = .{
+ .Double = .{
+ .complex = tok,
+ .double = null,
+ },
+ };
+ },
+ .Float => |*float| {
+ if (float.complex != null)
+ break :blk;
+ float.complex = tok;
+ },
+ .Double => |*double| {
+ if (double.complex != null)
+ break :blk;
+ double.complex = tok;
+ },
+ else => break :blk,
+ }
+ } else if (parser.eatToken(.Keyword_bool)) |tok| {
+ if (type_spec.spec != .None)
+ break :blk;
+ type_spec.spec = .{ .Bool = tok };
+ } else if (parser.eatToken(.Keyword_atomic)) |tok| {
+ // might be _Atomic qualifier
+ if (parser.eatToken(.LParen)) |_| {
+ if (type_spec.spec != .None)
+ break :blk;
+ const name = (try parser.typeName()) orelse return parser.err(.{
+ .ExpectedTypeName = .{ .token = parser.it.index },
+ });
+ type_spec.spec.Atomic = .{
+ .atomic = tok,
+ .typename = name,
+ .rparen = try parser.expectToken(.RParen),
+ };
+ } else {
+ parser.putBackToken(tok);
+ }
+ } else if (parser.eatToken(.Keyword_enum)) |tok| {
+ if (type_spec.spec != .None)
+ break :blk;
+ type_spec.spec.Enum = try parser.enumSpec(tok);
+ } else if (parser.eatToken(.Keyword_union) orelse parser.eatToken(.Keyword_struct)) |tok| {
+ if (type_spec.spec != .None)
+ break :blk;
+ type_spec.spec.Record = try parser.recordSpec(tok);
+ } else if (parser.eatToken(.Identifier)) |tok| {
+ const ty = parser.getSymbol(tok) orelse {
+ parser.putBackToken(tok);
+ return false;
+ };
+ switch (ty.id) {
+ .Enum => |e| blk: {
+ if (e.name) |some|
+ if (!parser.tree.tokenEql(some, tok))
+ break :blk;
+ return parser.err(.{
+ .MustUseKwToRefer = .{ .kw = e.tok, .name = tok },
+ });
+ },
+ .Record => |r| blk: {
+ if (r.name) |some|
+ if (!parser.tree.tokenEql(some, tok))
+ break :blk;
+ return parser.err(.{
+ .MustUseKwToRefer = .{
+ .kw = r.tok,
+ .name = tok,
+ },
+ });
+ },
+ .Typedef => {
+ type_spec.spec = .{
+ .Typedef = .{
+ .sym = tok,
+ .sym_type = ty,
+ },
+ };
+ return true;
+ },
+ else => {},
+ }
+ parser.putBackToken(tok);
+ return false;
+ }
+ return parser.typeQual(&type_spec.qual);
+ }
+ return parser.err(.{
+ .InvalidTypeSpecifier = .{
+ .token = parser.it.index,
+ .type_spec = type_spec,
+ },
+ });
+ }
+
+ /// TypeQual <- Keyword_const / Keyword_restrict / Keyword_volatile / Keyword_atomic
+ fn typeQual(parser: *Parser, qual: *Node.TypeQual) !bool {
+ blk: {
+ if (parser.eatToken(.Keyword_const)) |tok| {
+ if (qual.@"const" != null)
+ break :blk;
+ qual.@"const" = tok;
+ } else if (parser.eatToken(.Keyword_restrict)) |tok| {
+ if (qual.atomic != null)
+ break :blk;
+ qual.atomic = tok;
+ } else if (parser.eatToken(.Keyword_volatile)) |tok| {
+ if (qual.@"volatile" != null)
+ break :blk;
+ qual.@"volatile" = tok;
+ } else if (parser.eatToken(.Keyword_atomic)) |tok| {
+ if (qual.atomic != null)
+ break :blk;
+ qual.atomic = tok;
+ } else return false;
+ return true;
+ }
+ try parser.warn(.{
+ .DuplicateQualifier = .{ .token = parser.it.index },
+ });
+ return true;
+ }
+
+ /// FnSpec <- Keyword_inline / Keyword_noreturn
+ fn fnSpec(parser: *Parser, ds: *Node.DeclSpec) !bool {
+ blk: {
+ if (parser.eatToken(.Keyword_inline)) |tok| {
+ if (ds.fn_spec != .None)
+ break :blk;
+ ds.fn_spec = .{ .Inline = tok };
+ } else if (parser.eatToken(.Keyword_noreturn)) |tok| {
+ if (ds.fn_spec != .None)
+ break :blk;
+ ds.fn_spec = .{ .Noreturn = tok };
+ } else return false;
+ return true;
+ }
+ try parser.warn(.{
+ .DuplicateSpecifier = .{ .token = parser.it.index },
+ });
+ return true;
+ }
+
+ /// AlignSpec <- Keyword_alignas LPAREN (TypeName / ConstExpr) RPAREN
+ fn alignSpec(parser: *Parser, ds: *Node.DeclSpec) !bool {
+ if (parser.eatToken(.Keyword_alignas)) |tok| {
+ _ = try parser.expectToken(.LParen);
+ const node = (try parser.typeName()) orelse (try parser.constExpr()) orelse parser.err(.{
+ .ExpectedExpr = .{ .token = parser.it.index },
+ });
+ if (ds.align_spec != null) {
+ try parser.warn(.{
+ .DuplicateSpecifier = .{ .token = parser.it.index },
+ });
+ }
+ ds.align_spec = .{
+ .alignas = tok,
+ .expr = node,
+ .rparen = try parser.expectToken(.RParen),
+ };
+ return true;
+ }
+ return false;
+ }
+
+ /// EnumSpec <- Keyword_enum IDENTIFIER? (LBRACE EnumField RBRACE)?
+ fn enumSpec(parser: *Parser, tok: TokenIndex) !*Node.EnumType {
+ const node = try parser.arena.create(Node.EnumType);
+ const name = parser.eatToken(.Identifier);
+ node.* = .{
+ .tok = tok,
+ .name = name,
+ .body = null,
+ };
+ const ty = try parser.arena.create(Type);
+ ty.* = .{
+ .id = .{
+ .Enum = node,
+ },
+ };
+ if (name) |some|
+ try parser.symbols.append(.{
+ .name = parser.tree.tokenSlice(some),
+ .ty = ty,
+ });
+ if (parser.eatToken(.LBrace)) |lbrace| {
+ var fields = Node.EnumType.FieldList.init(parser.arena);
+ try fields.push((try parser.enumField()) orelse return parser.err(.{
+ .ExpectedEnumField = .{ .token = parser.it.index },
+ }));
+ while (parser.eatToken(.Comma)) |_| {
+ try fields.push((try parser.enumField()) orelse break);
+ }
+ node.body = .{
+ .lbrace = lbrace,
+ .fields = fields,
+ .rbrace = try parser.expectToken(.RBrace),
+ };
+ }
+ return node;
+ }
+
+ /// EnumField <- IDENTIFIER (EQUAL ConstExpr)? (COMMA EnumField) COMMA?
+ fn enumField(parser: *Parser) !?*Node {
+ const name = parser.eatToken(.Identifier) orelse return null;
+ const node = try parser.arena.create(Node.EnumField);
+ node.* = .{
+ .name = name,
+ .value = null,
+ };
+ if (parser.eatToken(.Equal)) |eq| {
+ node.value = (try parser.constExpr()) orelse parser.err(.{
+ .ExpectedExpr = .{ .token = parser.it.index },
+ });
+ }
+ return &node.base;
+ }
+
+ /// RecordSpec <- (Keyword_struct / Keyword_union) IDENTIFIER? (LBRACE RecordField+ RBRACE)?
+ fn recordSpec(parser: *Parser, tok: TokenIndex) !*Node.RecordType {
+ const node = try parser.arena.create(Node.RecordType);
+ const name = parser.eatToken(.Identifier);
+ const is_struct = parser.tree.tokenSlice(tok)[0] == 's';
+ node.* = .{
+ .tok = tok,
+ .kind = if (is_struct) .Struct else .Union,
+ .name = name,
+ .body = null,
+ };
+ const ty = try parser.arena.create(Type);
+ ty.* = .{
+ .id = .{
+ .Record = node,
+ },
+ };
+ if (name) |some|
+ try parser.symbols.append(.{
+ .name = parser.tree.tokenSlice(some),
+ .ty = ty,
+ });
+ if (parser.eatToken(.LBrace)) |lbrace| {
+ try parser.pushScope(.Block);
+ defer parser.popScope();
+ var fields = Node.RecordType.FieldList.init(parser.arena);
+ while (true) {
+ if (parser.eatToken(.RBrace)) |rbrace| {
+ node.body = .{
+ .lbrace = lbrace,
+ .fields = fields,
+ .rbrace = rbrace,
+ };
+ break;
+ }
+ try fields.push(try parser.recordField());
+ }
+ }
+ return node;
+ }
+
+ /// RecordField
+ /// <- TypeSpec* (RecordDeclarator (COMMA RecordDeclarator))? SEMICOLON
+ /// \ StaticAssert
+ fn recordField(parser: *Parser) Error!*Node {
+ if (try parser.staticAssert()) |decl| return decl;
+ var got = false;
+ var type_spec = Node.TypeSpec{};
+ while (try parser.typeSpec(&type_spec)) got = true;
+ if (!got)
+ return parser.err(.{
+ .ExpectedType = .{ .token = parser.it.index },
+ });
+ const node = try parser.arena.create(Node.RecordField);
+ node.* = .{
+ .type_spec = type_spec,
+ .declarators = Node.RecordField.DeclaratorList.init(parser.arena),
+ .semicolon = undefined,
+ };
+ while (true) {
+ const rdr = try parser.recordDeclarator();
+ try parser.declareSymbol(type_spec, rdr.declarator);
+ try node.declarators.push(&rdr.base);
+ if (parser.eatToken(.Comma)) |_| {} else break;
+ }
+
+ node.semicolon = try parser.expectToken(.Semicolon);
+ return &node.base;
+ }
+
+ /// TypeName <- TypeSpec* AbstractDeclarator?
+ fn typeName(parser: *Parser) Error!?*Node {
+ @panic("TODO");
+ }
+
+ /// RecordDeclarator <- Declarator? (COLON ConstExpr)?
+ fn recordDeclarator(parser: *Parser) Error!*Node.RecordDeclarator {
+ @panic("TODO");
+ }
+
+ /// Pointer <- ASTERISK TypeQual* Pointer?
+ fn pointer(parser: *Parser) Error!?*Node.Pointer {
+ const asterisk = parser.eatToken(.Asterisk) orelse return null;
+ const node = try parser.arena.create(Node.Pointer);
+ node.* = .{
+ .asterisk = asterisk,
+ .qual = .{},
+ .pointer = null,
+ };
+ while (try parser.typeQual(&node.qual)) {}
+ node.pointer = try parser.pointer();
+ return node;
+ }
+
+ const Named = enum {
+ Must,
+ Allowed,
+ Forbidden,
+ };
+
+ /// Declarator <- Pointer? DeclaratorSuffix
+ /// DeclaratorPrefix
+ /// <- IDENTIFIER // if named != .Forbidden
+ /// / LPAREN Declarator RPAREN
+ /// / (none) // if named != .Must
+ /// DeclaratorSuffix
+ /// <- DeclaratorPrefix (LBRACKET ArrayDeclarator? RBRACKET)*
+ /// / DeclaratorPrefix LPAREN (ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?)? RPAREN
+ fn declarator(parser: *Parser, named: Named) Error!?*Node {
+ const ptr = try parser.pointer();
+ var node: *Node.Declarator = undefined;
+ var inner_fn = false;
+
+ // TODO sizof(int (int))
+ // prefix
+ if (parser.eatToken(.LParen)) |lparen| {
+ const inner = (try parser.declarator(named)) orelse return parser.err(.{
+ .ExpectedDeclarator = .{ .token = lparen + 1 },
+ });
+ inner_fn = declaratorIsFunction(inner);
+ node = try parser.arena.create(Node.Declarator);
+ node.* = .{
+ .pointer = ptr,
+ .prefix = .{
+ .Complex = .{
+ .lparen = lparen,
+ .inner = inner,
+ .rparen = try parser.expectToken(.RParen),
+ },
+ },
+ .suffix = .None,
+ };
+ } else if (named != .Forbidden) {
+ if (parser.eatToken(.Identifier)) |tok| {
+ node = try parser.arena.create(Node.Declarator);
+ node.* = .{
+ .pointer = ptr,
+ .prefix = .{ .Identifer = tok },
+ .suffix = .None,
+ };
+ } else if (named == .Must) {
+ return parser.err(.{
+ .ExpectedToken = .{ .token = parser.it.index, .expected_id = .Identifier },
+ });
+ } else {
+ if (ptr) |some|
+ return &some.base;
+ return null;
+ }
+ } else {
+ node = try parser.arena.create(Node.Declarator);
+ node.* = .{
+ .pointer = ptr,
+ .prefix = .None,
+ .suffix = .None,
+ };
+ }
+ // suffix
+ if (parser.eatToken(.LParen)) |lparen| {
+ if (inner_fn)
+ return parser.err(.{
+ .InvalidDeclarator = .{ .token = lparen },
+ });
+ node.suffix = .{
+ .Fn = .{
+ .lparen = lparen,
+ .params = Node.Declarator.Params.init(parser.arena),
+ .rparen = undefined,
+ },
+ };
+ try parser.paramDecl(node);
+ node.suffix.Fn.rparen = try parser.expectToken(.RParen);
+ } else if (parser.eatToken(.LBracket)) |tok| {
+ if (inner_fn)
+ return parser.err(.{
+ .InvalidDeclarator = .{ .token = tok },
+ });
+ node.suffix = .{ .Array = Node.Declarator.Arrays.init(parser.arena) };
+ var lbrace = tok;
+ while (true) {
+ try node.suffix.Array.push(try parser.arrayDeclarator(lbrace));
+ if (parser.eatToken(.LBracket)) |t| lbrace = t else break;
+ }
+ }
+ if (parser.eatToken(.LParen) orelse parser.eatToken(.LBracket)) |tok|
+ return parser.err(.{
+ .InvalidDeclarator = .{ .token = tok },
+ });
+ return &node.base;
+ }
+
+ /// ArrayDeclarator
+ /// <- ASTERISK
+ /// / Keyword_static TypeQual* AssignmentExpr
+ /// / TypeQual+ (ASTERISK / Keyword_static AssignmentExpr)
+ /// / TypeQual+ AssignmentExpr?
+ /// / AssignmentExpr
+ fn arrayDeclarator(parser: *Parser, lbracket: TokenIndex) !*Node.Array {
+ const arr = try parser.arena.create(Node.Array);
+ arr.* = .{
+ .lbracket = lbracket,
+ .inner = .Inferred,
+ .rbracket = undefined,
+ };
+ if (parser.eatToken(.Asterisk)) |tok| {
+ arr.inner = .{ .Unspecified = tok };
+ } else {
+ // TODO
+ }
+ arr.rbracket = try parser.expectToken(.RBracket);
+ return arr;
+ }
+
+ /// Params <- ParamDecl (COMMA ParamDecl)* (COMMA ELLIPSIS)?
+ /// ParamDecl <- DeclSpec (Declarator / AbstractDeclarator)
+ fn paramDecl(parser: *Parser, dr: *Node.Declarator) !void {
+ var old_style = false;
+ while (true) {
+ var ds = Node.DeclSpec{};
+ if (try parser.declSpec(&ds)) {
+ //TODO
+ // TODO try parser.declareSymbol(ds.type_spec, dr);
+ } else if (parser.eatToken(.Identifier)) |tok| {
+ old_style = true;
+ } else if (parser.eatToken(.Ellipsis)) |tok| {
+ // TODO
+ }
+ }
+ }
+
+ /// Expr <- AssignmentExpr (COMMA Expr)*
+ fn expr(parser: *Parser) Error!?*Expr {
+ @panic("TODO");
+ }
+
+ /// AssignmentExpr
+ /// <- ConditionalExpr // TODO recursive?
+ /// / UnaryExpr (EQUAL / ASTERISKEQUAL / SLASHEQUAL / PERCENTEQUAL / PLUSEQUAL / MINUSEQUA /
+ /// / ANGLEBRACKETANGLEBRACKETLEFTEQUAL / ANGLEBRACKETANGLEBRACKETRIGHTEQUAL /
+ /// / AMPERSANDEQUAL / CARETEQUAL / PIPEEQUAL) AssignmentExpr
+ fn assignmentExpr(parser: *Parser) !?*Expr {
+ @panic("TODO");
+ }
+
+ /// ConstExpr <- ConditionalExpr
+ fn constExpr(parser: *Parser) Error!?*Expr {
+ const start = parser.it.index;
+ const expression = try parser.conditionalExpr();
+ if (expression != null and expression.?.value == .None)
+ return parser.err(.{
+ .ConsExpr = start,
+ });
+ return expression;
+ }
+
+ /// ConditionalExpr <- LogicalOrExpr (QUESTIONMARK Expr COLON ConditionalExpr)?
+ fn conditionalExpr(parser: *Parser) Error!?*Expr {
+ @panic("TODO");
+ }
+
+ /// LogicalOrExpr <- LogicalAndExpr (PIPEPIPE LogicalOrExpr)*
+ fn logicalOrExpr(parser: *Parser) !*Node {
+ const lhs = (try parser.logicalAndExpr()) orelse return null;
+ }
+
+ /// LogicalAndExpr <- BinOrExpr (AMPERSANDAMPERSAND LogicalAndExpr)*
+ fn logicalAndExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// BinOrExpr <- BinXorExpr (PIPE BinOrExpr)*
+ fn binOrExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// BinXorExpr <- BinAndExpr (CARET BinXorExpr)*
+ fn binXorExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// BinAndExpr <- EqualityExpr (AMPERSAND BinAndExpr)*
+ fn binAndExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// EqualityExpr <- ComparisionExpr ((EQUALEQUAL / BANGEQUAL) EqualityExpr)*
+ fn equalityExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// ComparisionExpr <- ShiftExpr (ANGLEBRACKETLEFT / ANGLEBRACKETLEFTEQUAL /ANGLEBRACKETRIGHT / ANGLEBRACKETRIGHTEQUAL) ComparisionExpr)*
+ fn comparisionExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// ShiftExpr <- AdditiveExpr (ANGLEBRACKETANGLEBRACKETLEFT / ANGLEBRACKETANGLEBRACKETRIGHT) ShiftExpr)*
+ fn shiftExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// AdditiveExpr <- MultiplicativeExpr (PLUS / MINUS) AdditiveExpr)*
+ fn additiveExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// MultiplicativeExpr <- UnaryExpr (ASTERISK / SLASH / PERCENT) MultiplicativeExpr)*
+ fn multiplicativeExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// UnaryExpr
+ /// <- LPAREN TypeName RPAREN UnaryExpr
+ /// / Keyword_sizeof LAPERN TypeName RPAREN
+ /// / Keyword_sizeof UnaryExpr
+ /// / Keyword_alignof LAPERN TypeName RPAREN
+ /// / (AMPERSAND / ASTERISK / PLUS / PLUSPLUS / MINUS / MINUSMINUS / TILDE / BANG) UnaryExpr
+ /// / PrimaryExpr PostFixExpr*
+ fn unaryExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// PrimaryExpr
+ /// <- IDENTIFIER
+ /// / INTEGERLITERAL / FLOATLITERAL / STRINGLITERAL / CHARLITERAL
+ /// / LPAREN Expr RPAREN
+ /// / Keyword_generic LPAREN AssignmentExpr (COMMA Generic)+ RPAREN
+ fn primaryExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// Generic
+ /// <- TypeName COLON AssignmentExpr
+ /// / Keyword_default COLON AssignmentExpr
+ fn generic(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// PostFixExpr
+ /// <- LPAREN TypeName RPAREN LBRACE Initializers RBRACE
+ /// / LBRACKET Expr RBRACKET
+ /// / LPAREN (AssignmentExpr (COMMA AssignmentExpr)*)? RPAREN
+ /// / (PERIOD / ARROW) IDENTIFIER
+ /// / (PLUSPLUS / MINUSMINUS)
+ fn postFixExpr(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// Initializers <- ((Designator+ EQUAL)? Initializer COMMA)* (Designator+ EQUAL)? Initializer COMMA?
+ fn initializers(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// Initializer
+ /// <- LBRACE Initializers RBRACE
+ /// / AssignmentExpr
+ fn initializer(parser: *Parser, dr: *Node.Declarator) Error!?*Node {
+ @panic("TODO");
+ }
+
+ /// Designator
+ /// <- LBRACKET ConstExpr RBRACKET
+ /// / PERIOD IDENTIFIER
+ fn designator(parser: *Parser) !*Node {
+ @panic("TODO");
+ }
+
+ /// CompoundStmt <- LBRACE (Declaration / Stmt)* RBRACE
+ fn compoundStmt(parser: *Parser) Error!?*Node {
+ const lbrace = parser.eatToken(.LBrace) orelse return null;
+ try parser.pushScope(.Block);
+ defer parser.popScope();
+ const body_node = try parser.arena.create(Node.CompoundStmt);
+ body_node.* = .{
+ .lbrace = lbrace,
+ .statements = Node.CompoundStmt.StmtList.init(parser.arena),
+ .rbrace = undefined,
+ };
+ while (true) {
+ if (parser.eatToken(.RBRACE)) |rbrace| {
+ body_node.rbrace = rbrace;
+ break;
+ }
+ try body_node.statements.push((try parser.declaration()) orelse (try parser.stmt()));
+ }
+ return &body_node.base;
+ }
+
+ /// Stmt
+ /// <- CompoundStmt
+ /// / Keyword_if LPAREN Expr RPAREN Stmt (Keyword_ELSE Stmt)?
+ /// / Keyword_switch LPAREN Expr RPAREN Stmt
+ /// / Keyword_while LPAREN Expr RPAREN Stmt
+ /// / Keyword_do statement Keyword_while LPAREN Expr RPAREN SEMICOLON
+ /// / Keyword_for LPAREN (Declaration / ExprStmt) ExprStmt Expr? RPAREN Stmt
+ /// / Keyword_default COLON Stmt
+ /// / Keyword_case ConstExpr COLON Stmt
+ /// / Keyword_goto IDENTIFIER SEMICOLON
+ /// / Keyword_continue SEMICOLON
+ /// / Keyword_break SEMICOLON
+ /// / Keyword_return Expr? SEMICOLON
+ /// / IDENTIFIER COLON Stmt
+ /// / ExprStmt
+ fn stmt(parser: *Parser) Error!*Node {
+ if (try parser.compoundStmt()) |node| return node;
+ if (parser.eatToken(.Keyword_if)) |tok| {
+ const node = try parser.arena.create(Node.IfStmt);
+ _ = try parser.expectToken(.LParen);
+ node.* = .{
+ .@"if" = tok,
+ .cond = (try parser.expr()) orelse return parser.err(.{
+ .ExpectedExpr = .{ .token = parser.it.index },
+ }),
+ .body = undefined,
+ .@"else" = null,
+ };
+ _ = try parser.expectToken(.RParen);
+ node.body = try parser.stmt();
+ if (parser.eatToken(.Keyword_else)) |else_tok| {
+ node.@"else" = .{
+ .tok = else_tok,
+ .body = try parser.stmt(),
+ };
+ }
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_while)) |tok| {
+ try parser.pushScope(.Loop);
+ defer parser.popScope();
+ _ = try parser.expectToken(.LParen);
+ const cond = (try parser.expr()) orelse return parser.err(.{
+ .ExpectedExpr = .{ .token = parser.it.index },
+ });
+ const rparen = try parser.expectToken(.RParen);
+ const node = try parser.arena.create(Node.WhileStmt);
+ node.* = .{
+ .@"while" = tok,
+ .cond = cond,
+ .rparen = rparen,
+ .body = try parser.stmt(),
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_do)) |tok| {
+ try parser.pushScope(.Loop);
+ defer parser.popScope();
+ const body = try parser.stmt();
+ _ = try parser.expectToken(.LParen);
+ const cond = (try parser.expr()) orelse return parser.err(.{
+ .ExpectedExpr = .{ .token = parser.it.index },
+ });
+ _ = try parser.expectToken(.RParen);
+ const node = try parser.arena.create(Node.DoStmt);
+ node.* = .{
+ .do = tok,
+ .body = body,
+ .cond = cond,
+ .@"while" = @"while",
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_for)) |tok| {
+ try parser.pushScope(.Loop);
+ defer parser.popScope();
+ _ = try parser.expectToken(.LParen);
+ const init = if (try parser.declaration()) |decl| blk: {
+ // TODO disallow storage class other than auto and register
+ break :blk decl;
+ } else try parser.exprStmt();
+ const cond = try parser.expr();
+ const semicolon = try parser.expectToken(.Semicolon);
+ const incr = try parser.expr();
+ const rparen = try parser.expectToken(.RParen);
+ const node = try parser.arena.create(Node.ForStmt);
+ node.* = .{
+ .@"for" = tok,
+ .init = init,
+ .cond = cond,
+ .semicolon = semicolon,
+ .incr = incr,
+ .rparen = rparen,
+ .body = try parser.stmt(),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_switch)) |tok| {
+ try parser.pushScope(.Switch);
+ defer parser.popScope();
+ _ = try parser.expectToken(.LParen);
+ const switch_expr = try parser.exprStmt();
+ const rparen = try parser.expectToken(.RParen);
+ const node = try parser.arena.create(Node.SwitchStmt);
+ node.* = .{
+ .@"switch" = tok,
+ .expr = switch_expr,
+ .rparen = rparen,
+ .body = try parser.stmt(),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_default)) |tok| {
+ _ = try parser.expectToken(.Colon);
+ const node = try parser.arena.create(Node.LabeledStmt);
+ node.* = .{
+ .kind = .{ .Default = tok },
+ .stmt = try parser.stmt(),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_case)) |tok| {
+ _ = try parser.expectToken(.Colon);
+ const node = try parser.arena.create(Node.LabeledStmt);
+ node.* = .{
+ .kind = .{ .Case = tok },
+ .stmt = try parser.stmt(),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_goto)) |tok| {
+ const node = try parser.arena.create(Node.JumpStmt);
+ node.* = .{
+ .ltoken = tok,
+ .kind = .{ .Goto = tok },
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_continue)) |tok| {
+ const node = try parser.arena.create(Node.JumpStmt);
+ node.* = .{
+ .ltoken = tok,
+ .kind = .Continue,
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_break)) |tok| {
+ const node = try parser.arena.create(Node.JumpStmt);
+ node.* = .{
+ .ltoken = tok,
+ .kind = .Break,
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Keyword_return)) |tok| {
+ const node = try parser.arena.create(Node.JumpStmt);
+ node.* = .{
+ .ltoken = tok,
+ .kind = .{ .Return = try parser.expr() },
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+ if (parser.eatToken(.Identifier)) |tok| {
+ if (parser.eatToken(.Colon)) |_| {
+ const node = try parser.arena.create(Node.LabeledStmt);
+ node.* = .{
+ .kind = .{ .Label = tok },
+ .stmt = try parser.stmt(),
+ };
+ return &node.base;
+ }
+ parser.putBackToken(tok);
+ }
+ return parser.exprStmt();
+ }
+
+ /// ExprStmt <- Expr? SEMICOLON
+ fn exprStmt(parser: *Parser) !*Node {
+ const node = try parser.arena.create(Node.ExprStmt);
+ node.* = .{
+ .expr = try parser.expr(),
+ .semicolon = try parser.expectToken(.Semicolon),
+ };
+ return &node.base;
+ }
+
+ fn eatToken(parser: *Parser, id: @TagType(Token.Id)) ?TokenIndex {
+ while (true) {
+ switch ((parser.it.next() orelse return null).id) {
+ .LineComment, .MultiLineComment, .Nl => continue,
+ else => |next_id| if (next_id == id) {
+ return parser.it.index;
+ } else {
+ _ = parser.it.prev();
+ return null;
+ },
+ }
+ }
+ }
+
+ fn expectToken(parser: *Parser, id: @TagType(Token.Id)) Error!TokenIndex {
+ while (true) {
+ switch ((parser.it.next() orelse return error.ParseError).id) {
+ .LineComment, .MultiLineComment, .Nl => continue,
+ else => |next_id| if (next_id != id) {
+ return parser.err(.{
+ .ExpectedToken = .{ .token = parser.it.index, .expected_id = id },
+ });
+ } else {
+ return parser.it.index;
+ },
+ }
+ }
+ }
+
+ fn putBackToken(parser: *Parser, putting_back: TokenIndex) void {
+ while (true) {
+ const prev_tok = parser.it.next() orelse return;
+ switch (prev_tok.id) {
+ .LineComment, .MultiLineComment, .Nl => continue,
+ else => {
+ assert(parser.it.list.at(putting_back) == prev_tok);
+ return;
+ },
+ }
+ }
+ }
+
+ fn err(parser: *Parser, msg: ast.Error) Error {
+ try parser.tree.msgs.push(.{
+ .kind = .Error,
+ .inner = msg,
+ });
+ return error.ParseError;
+ }
+
+ fn warn(parser: *Parser, msg: ast.Error) Error!void {
+ const is_warning = switch (parser.options.warn_as_err) {
+ .None => true,
+ .Some => |list| for (list) |item| (if (item == msg) break false) else true,
+ .All => false,
+ };
+ try parser.tree.msgs.push(.{
+ .kind = if (is_warning) .Warning else .Error,
+ .inner = msg,
+ });
+ if (!is_warning) return error.ParseError;
+ }
+
+ fn note(parser: *Parser, msg: ast.Error) Error!void {
+ try parser.tree.msgs.push(.{
+ .kind = .Note,
+ .inner = msg,
+ });
+ }
+};
+
diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig
@@ -0,0 +1,1583 @@
+const std = @import("std");
+const mem = std.mem;
+
+pub const Source = struct {
+ buffer: []const u8,
+ file_name: []const u8,
+ tokens: TokenList,
+
+ pub const TokenList = std.SegmentedList(Token, 64);
+};
+
+pub const Token = struct {
+ id: Id,
+ start: usize,
+ end: usize,
+ source: *Source,
+
+ pub const Id = union(enum) {
+ Invalid,
+ Eof,
+ Nl,
+ Identifier,
+
+ /// special case for #include <...>
+ MacroString,
+ StringLiteral: StrKind,
+ CharLiteral: StrKind,
+ IntegerLiteral: NumSuffix,
+ FloatLiteral: NumSuffix,
+ Bang,
+ BangEqual,
+ Pipe,
+ PipePipe,
+ PipeEqual,
+ Equal,
+ EqualEqual,
+ LParen,
+ RParen,
+ LBrace,
+ RBrace,
+ LBracket,
+ RBracket,
+ Period,
+ Ellipsis,
+ Caret,
+ CaretEqual,
+ Plus,
+ PlusPlus,
+ PlusEqual,
+ Minus,
+ MinusMinus,
+ MinusEqual,
+ Asterisk,
+ AsteriskEqual,
+ Percent,
+ PercentEqual,
+ Arrow,
+ Colon,
+ Semicolon,
+ Slash,
+ SlashEqual,
+ Comma,
+ Ampersand,
+ AmpersandAmpersand,
+ AmpersandEqual,
+ QuestionMark,
+ AngleBracketLeft,
+ AngleBracketLeftEqual,
+ AngleBracketAngleBracketLeft,
+ AngleBracketAngleBracketLeftEqual,
+ AngleBracketRight,
+ AngleBracketRightEqual,
+ AngleBracketAngleBracketRight,
+ AngleBracketAngleBracketRightEqual,
+ Tilde,
+ LineComment,
+ MultiLineComment,
+ Hash,
+ HashHash,
+
+ Keyword_auto,
+ Keyword_break,
+ Keyword_case,
+ Keyword_char,
+ Keyword_const,
+ Keyword_continue,
+ Keyword_default,
+ Keyword_do,
+ Keyword_double,
+ Keyword_else,
+ Keyword_enum,
+ Keyword_extern,
+ Keyword_float,
+ Keyword_for,
+ Keyword_goto,
+ Keyword_if,
+ Keyword_int,
+ Keyword_long,
+ Keyword_register,
+ Keyword_return,
+ Keyword_short,
+ Keyword_signed,
+ Keyword_sizeof,
+ Keyword_static,
+ Keyword_struct,
+ Keyword_switch,
+ Keyword_typedef,
+ Keyword_union,
+ Keyword_unsigned,
+ Keyword_void,
+ Keyword_volatile,
+ Keyword_while,
+
+ // ISO C99
+ Keyword_bool,
+ Keyword_complex,
+ Keyword_imaginary,
+ Keyword_inline,
+ Keyword_restrict,
+
+ // ISO C11
+ Keyword_alignas,
+ Keyword_alignof,
+ Keyword_atomic,
+ Keyword_generic,
+ Keyword_noreturn,
+ Keyword_static_assert,
+ Keyword_thread_local,
+
+ // Preprocessor directives
+ Keyword_include,
+ Keyword_define,
+ Keyword_ifdef,
+ Keyword_ifndef,
+ Keyword_error,
+ Keyword_pragma,
+
+ pub fn symbol(id: @TagType(Id)) []const u8 {
+ return switch (id) {
+ .Invalid => "Invalid",
+ .Eof => "Eof",
+ .Nl => "NewLine",
+ .Identifier => "Identifier",
+ .MacroString => "MacroString",
+ .StringLiteral => "StringLiteral",
+ .CharLiteral => "CharLiteral",
+ .IntegerLiteral => "IntegerLiteral",
+ .FloatLiteral => "FloatLiteral",
+ .LineComment => "LineComment",
+ .MultiLineComment => "MultiLineComment",
+
+ .Bang => "!",
+ .BangEqual => "!=",
+ .Pipe => "|",
+ .PipePipe => "||",
+ .PipeEqual => "|=",
+ .Equal => "=",
+ .EqualEqual => "==",
+ .LParen => "(",
+ .RParen => ")",
+ .LBrace => "{",
+ .RBrace => "}",
+ .LBracket => "[",
+ .RBracket => "]",
+ .Period => ".",
+ .Ellipsis => "...",
+ .Caret => "^",
+ .CaretEqual => "^=",
+ .Plus => "+",
+ .PlusPlus => "++",
+ .PlusEqual => "+=",
+ .Minus => "-",
+ .MinusMinus => "--",
+ .MinusEqual => "-=",
+ .Asterisk => "*",
+ .AsteriskEqual => "*=",
+ .Percent => "%",
+ .PercentEqual => "%=",
+ .Arrow => "->",
+ .Colon => ":",
+ .Semicolon => ";",
+ .Slash => "/",
+ .SlashEqual => "/=",
+ .Comma => ",",
+ .Ampersand => "&",
+ .AmpersandAmpersand => "&&",
+ .AmpersandEqual => "&=",
+ .QuestionMark => "?",
+ .AngleBracketLeft => "<",
+ .AngleBracketLeftEqual => "<=",
+ .AngleBracketAngleBracketLeft => "<<",
+ .AngleBracketAngleBracketLeftEqual => "<<=",
+ .AngleBracketRight => ">",
+ .AngleBracketRightEqual => ">=",
+ .AngleBracketAngleBracketRight => ">>",
+ .AngleBracketAngleBracketRightEqual => ">>=",
+ .Tilde => "~",
+ .Hash => "#",
+ .HashHash => "##",
+ .Keyword_auto => "auto",
+ .Keyword_break => "break",
+ .Keyword_case => "case",
+ .Keyword_char => "char",
+ .Keyword_const => "const",
+ .Keyword_continue => "continue",
+ .Keyword_default => "default",
+ .Keyword_do => "do",
+ .Keyword_double => "double",
+ .Keyword_else => "else",
+ .Keyword_enum => "enum",
+ .Keyword_extern => "extern",
+ .Keyword_float => "float",
+ .Keyword_for => "for",
+ .Keyword_goto => "goto",
+ .Keyword_if => "if",
+ .Keyword_int => "int",
+ .Keyword_long => "long",
+ .Keyword_register => "register",
+ .Keyword_return => "return",
+ .Keyword_short => "short",
+ .Keyword_signed => "signed",
+ .Keyword_sizeof => "sizeof",
+ .Keyword_static => "static",
+ .Keyword_struct => "struct",
+ .Keyword_switch => "switch",
+ .Keyword_typedef => "typedef",
+ .Keyword_union => "union",
+ .Keyword_unsigned => "unsigned",
+ .Keyword_void => "void",
+ .Keyword_volatile => "volatile",
+ .Keyword_while => "while",
+ .Keyword_bool => "_Bool",
+ .Keyword_complex => "_Complex",
+ .Keyword_imaginary => "_Imaginary",
+ .Keyword_inline => "inline",
+ .Keyword_restrict => "restrict",
+ .Keyword_alignas => "_Alignas",
+ .Keyword_alignof => "_Alignof",
+ .Keyword_atomic => "_Atomic",
+ .Keyword_generic => "_Generic",
+ .Keyword_noreturn => "_Noreturn",
+ .Keyword_static_assert => "_Static_assert",
+ .Keyword_thread_local => "_Thread_local",
+ .Keyword_include => "include",
+ .Keyword_define => "define",
+ .Keyword_ifdef => "ifdef",
+ .Keyword_ifndef => "ifndef",
+ .Keyword_error => "error",
+ .Keyword_pragma => "pragma",
+ };
+ }
+ };
+
+ pub fn eql(a: Token, b: Token) bool {
+ // do we really need this cast here
+ if (@as(@TagType(Id), a.id) != b.id) return false;
+ return mem.eql(u8, a.slice(), b.slice());
+ }
+
+ pub fn slice(tok: Token) []const u8 {
+ return tok.source.buffer[tok.start..tok.end];
+ }
+
+ pub const Keyword = struct {
+ bytes: []const u8,
+ id: Id,
+ hash: u32,
+
+ fn init(bytes: []const u8, id: Id) Keyword {
+ @setEvalBranchQuota(2000);
+ return .{
+ .bytes = bytes,
+ .id = id,
+ .hash = std.hash_map.hashString(bytes),
+ };
+ }
+ };
+
+ // TODO extensions
+ pub const keywords = [_]Keyword{
+ Keyword.init("auto", .Keyword_auto),
+ Keyword.init("break", .Keyword_break),
+ Keyword.init("case", .Keyword_case),
+ Keyword.init("char", .Keyword_char),
+ Keyword.init("const", .Keyword_const),
+ Keyword.init("continue", .Keyword_continue),
+ Keyword.init("default", .Keyword_default),
+ Keyword.init("do", .Keyword_do),
+ Keyword.init("double", .Keyword_double),
+ Keyword.init("else", .Keyword_else),
+ Keyword.init("enum", .Keyword_enum),
+ Keyword.init("extern", .Keyword_extern),
+ Keyword.init("float", .Keyword_float),
+ Keyword.init("for", .Keyword_for),
+ Keyword.init("goto", .Keyword_goto),
+ Keyword.init("if", .Keyword_if),
+ Keyword.init("int", .Keyword_int),
+ Keyword.init("long", .Keyword_long),
+ Keyword.init("register", .Keyword_register),
+ Keyword.init("return", .Keyword_return),
+ Keyword.init("short", .Keyword_short),
+ Keyword.init("signed", .Keyword_signed),
+ Keyword.init("sizeof", .Keyword_sizeof),
+ Keyword.init("static", .Keyword_static),
+ Keyword.init("struct", .Keyword_struct),
+ Keyword.init("switch", .Keyword_switch),
+ Keyword.init("typedef", .Keyword_typedef),
+ Keyword.init("union", .Keyword_union),
+ Keyword.init("unsigned", .Keyword_unsigned),
+ Keyword.init("void", .Keyword_void),
+ Keyword.init("volatile", .Keyword_volatile),
+ Keyword.init("while", .Keyword_while),
+
+ // ISO C99
+ Keyword.init("_Bool", .Keyword_bool),
+ Keyword.init("_Complex", .Keyword_complex),
+ Keyword.init("_Imaginary", .Keyword_imaginary),
+ Keyword.init("inline", .Keyword_inline),
+ Keyword.init("restrict", .Keyword_restrict),
+
+ // ISO C11
+ Keyword.init("_Alignas", .Keyword_alignas),
+ Keyword.init("_Alignof", .Keyword_alignof),
+ Keyword.init("_Atomic", .Keyword_atomic),
+ Keyword.init("_Generic", .Keyword_generic),
+ Keyword.init("_Noreturn", .Keyword_noreturn),
+ Keyword.init("_Static_assert", .Keyword_static_assert),
+ Keyword.init("_Thread_local", .Keyword_thread_local),
+
+ // Preprocessor directives
+ Keyword.init("include", .Keyword_include),
+ Keyword.init("define", .Keyword_define),
+ Keyword.init("ifdef", .Keyword_ifdef),
+ Keyword.init("ifndef", .Keyword_ifndef),
+ Keyword.init("error", .Keyword_error),
+ Keyword.init("pragma", .Keyword_pragma),
+ };
+
+ // TODO perfect hash at comptime
+ // TODO do this in the preprocessor
+ pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id {
+ var hash = std.hash_map.hashString(bytes);
+ for (keywords) |kw| {
+ if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) {
+ switch (kw.id) {
+ .Keyword_include,
+ .Keyword_define,
+ .Keyword_ifdef,
+ .Keyword_ifndef,
+ .Keyword_error,
+ .Keyword_pragma,
+ => if (!pp_directive) return null,
+ else => {},
+ }
+ return kw.id;
+ }
+ }
+ return null;
+ }
+
+ pub const NumSuffix = enum {
+ None,
+ F,
+ L,
+ U,
+ LU,
+ LL,
+ LLU,
+ };
+
+ pub const StrKind = enum {
+ None,
+ Wide,
+ Utf8,
+ Utf16,
+ Utf32,
+ };
+};
+
+pub const Tokenizer = struct {
+ source: *Source,
+ index: usize = 0,
+ prev_tok_id: @TagType(Token.Id) = .Invalid,
+ pp_directive: bool = false,
+
+ pub fn next(self: *Tokenizer) Token {
+ const start_index = self.index;
+ var result = Token{
+ .id = .Eof,
+ .start = self.index,
+ .end = undefined,
+ .source = self.source,
+ };
+ var state: enum {
+ Start,
+ Cr,
+ BackSlash,
+ BackSlashCr,
+ u,
+ u8,
+ U,
+ L,
+ StringLiteral,
+ CharLiteralStart,
+ CharLiteral,
+ EscapeSequence,
+ CrEscape,
+ OctalEscape,
+ HexEscape,
+ UnicodeEscape,
+ Identifier,
+ Equal,
+ Bang,
+ Pipe,
+ Percent,
+ Asterisk,
+ Plus,
+
+ /// special case for #include <...>
+ MacroString,
+ AngleBracketLeft,
+ AngleBracketAngleBracketLeft,
+ AngleBracketRight,
+ AngleBracketAngleBracketRight,
+ Caret,
+ Period,
+ Period2,
+ Minus,
+ Slash,
+ Ampersand,
+ Hash,
+ LineComment,
+ MultiLineComment,
+ MultiLineCommentAsterisk,
+ Zero,
+ IntegerLiteralOct,
+ IntegerLiteralBinary,
+ IntegerLiteralHex,
+ IntegerLiteral,
+ IntegerSuffix,
+ IntegerSuffixU,
+ IntegerSuffixL,
+ IntegerSuffixLL,
+ IntegerSuffixUL,
+ FloatFraction,
+ FloatFractionHex,
+ FloatExponent,
+ FloatExponentDigits,
+ FloatSuffix,
+ } = .Start;
+ var string = false;
+ var counter: u32 = 0;
+ while (self.index < self.source.buffer.len) : (self.index += 1) {
+ const c = self.source.buffer[self.index];
+ switch (state) {
+ .Start => switch (c) {
+ '\n' => {
+ self.pp_directive = false;
+ result.id = .Nl;
+ self.index += 1;
+ break;
+ },
+ '\r' => {
+ state = .Cr;
+ },
+ '"' => {
+ result.id = .{ .StringLiteral = .None };
+ state = .StringLiteral;
+ },
+ '\'' => {
+ result.id = .{ .CharLiteral = .None };
+ state = .CharLiteralStart;
+ },
+ 'u' => {
+ state = .u;
+ },
+ 'U' => {
+ state = .U;
+ },
+ 'L' => {
+ state = .L;
+ },
+ 'a'...'t', 'v'...'z', 'A'...'K', 'M'...'T', 'V'...'Z', '_' => {
+ state = .Identifier;
+ },
+ '=' => {
+ state = .Equal;
+ },
+ '!' => {
+ state = .Bang;
+ },
+ '|' => {
+ state = .Pipe;
+ },
+ '(' => {
+ result.id = .LParen;
+ self.index += 1;
+ break;
+ },
+ ')' => {
+ result.id = .RParen;
+ self.index += 1;
+ break;
+ },
+ '[' => {
+ result.id = .LBracket;
+ self.index += 1;
+ break;
+ },
+ ']' => {
+ result.id = .RBracket;
+ self.index += 1;
+ break;
+ },
+ ';' => {
+ result.id = .Semicolon;
+ self.index += 1;
+ break;
+ },
+ ',' => {
+ result.id = .Comma;
+ self.index += 1;
+ break;
+ },
+ '?' => {
+ result.id = .QuestionMark;
+ self.index += 1;
+ break;
+ },
+ ':' => {
+ result.id = .Colon;
+ self.index += 1;
+ break;
+ },
+ '%' => {
+ state = .Percent;
+ },
+ '*' => {
+ state = .Asterisk;
+ },
+ '+' => {
+ state = .Plus;
+ },
+ '<' => {
+ if (self.prev_tok_id == .Keyword_include)
+ state = .MacroString
+ else
+ state = .AngleBracketLeft;
+ },
+ '>' => {
+ state = .AngleBracketRight;
+ },
+ '^' => {
+ state = .Caret;
+ },
+ '{' => {
+ result.id = .LBrace;
+ self.index += 1;
+ break;
+ },
+ '}' => {
+ result.id = .RBrace;
+ self.index += 1;
+ break;
+ },
+ '~' => {
+ result.id = .Tilde;
+ self.index += 1;
+ break;
+ },
+ '.' => {
+ state = .Period;
+ },
+ '-' => {
+ state = .Minus;
+ },
+ '/' => {
+ state = .Slash;
+ },
+ '&' => {
+ state = .Ampersand;
+ },
+ '#' => {
+ state = .Hash;
+ },
+ '0' => {
+ state = .Zero;
+ },
+ '1'...'9' => {
+ state = .IntegerLiteral;
+ },
+ '\\' => {
+ state = .BackSlash;
+ },
+ '\t', '\x0B', '\x0C', ' ' => {
+ result.start = self.index + 1;
+ },
+ else => {
+ // TODO handle invalid bytes better
+ result.id = .Invalid;
+ self.index += 1;
+ break;
+ },
+ },
+ .Cr => switch (c) {
+ '\n' => {
+ self.pp_directive = false;
+ result.id = .Nl;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Invalid;
+ break;
+ },
+ },
+ .BackSlash => switch (c) {
+ '\n' => {
+ state = .Start;
+ },
+ '\r' => {
+ state = .BackSlashCr;
+ },
+ '\t', '\x0B', '\x0C', ' ' => {
+ // TODO warn
+ },
+ else => {
+ result.id = .Invalid;
+ break;
+ },
+ },
+ .BackSlashCr => switch (c) {
+ '\n' => {
+ state = .Start;
+ },
+ else => {
+ result.id = .Invalid;
+ break;
+ },
+ },
+ .u => switch (c) {
+ '8' => {
+ state = .u8;
+ },
+ '\'' => {
+ result.id = .{ .CharLiteral = .Utf16 };
+ state = .CharLiteralStart;
+ },
+ '\"' => {
+ result.id = .{ .StringLiteral = .Utf16 };
+ state = .StringLiteral;
+ },
+ else => {
+ state = .Identifier;
+ },
+ },
+ .u8 => switch (c) {
+ '\"' => {
+ result.id = .{ .StringLiteral = .Utf8 };
+ state = .StringLiteral;
+ },
+ else => {
+ state = .Identifier;
+ },
+ },
+ .U => switch (c) {
+ '\'' => {
+ result.id = .{ .CharLiteral = .Utf32 };
+ state = .CharLiteralStart;
+ },
+ '\"' => {
+ result.id = .{ .StringLiteral = .Utf32 };
+ state = .StringLiteral;
+ },
+ else => {
+ state = .Identifier;
+ },
+ },
+ .L => switch (c) {
+ '\'' => {
+ result.id = .{ .CharLiteral = .Wide };
+ state = .CharLiteralStart;
+ },
+ '\"' => {
+ result.id = .{ .StringLiteral = .Wide };
+ state = .StringLiteral;
+ },
+ else => {
+ state = .Identifier;
+ },
+ },
+ .StringLiteral => switch (c) {
+ '\\' => {
+ string = true;
+ state = .EscapeSequence;
+ },
+ '"' => {
+ self.index += 1;
+ break;
+ },
+ '\n', '\r' => {
+ result.id = .Invalid;
+ break;
+ },
+ else => {},
+ },
+ .CharLiteralStart => switch (c) {
+ '\\' => {
+ string = false;
+ state = .EscapeSequence;
+ },
+ '\'', '\n' => {
+ result.id = .Invalid;
+ break;
+ },
+ else => {
+ state = .CharLiteral;
+ },
+ },
+ .CharLiteral => switch (c) {
+ '\\' => {
+ string = false;
+ state = .EscapeSequence;
+ },
+ '\'' => {
+ self.index += 1;
+ break;
+ },
+ '\n' => {
+ result.id = .Invalid;
+ break;
+ },
+ else => {},
+ },
+ .EscapeSequence => switch (c) {
+ '\'', '"', '?', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v', '\n' => {
+ state = if (string) .StringLiteral else .CharLiteral;
+ },
+ '\r' => {
+ state = .CrEscape;
+ },
+ '0'...'7' => {
+ counter = 1;
+ state = .OctalEscape;
+ },
+ 'x' => {
+ state = .HexEscape;
+ },
+ 'u' => {
+ counter = 4;
+ state = .OctalEscape;
+ },
+ 'U' => {
+ counter = 8;
+ state = .OctalEscape;
+ },
+ else => {
+ result.id = .Invalid;
+ break;
+ },
+ },
+ .CrEscape => switch (c) {
+ '\n' => {
+ state = if (string) .StringLiteral else .CharLiteral;
+ },
+ else => {
+ result.id = .Invalid;
+ break;
+ },
+ },
+ .OctalEscape => switch (c) {
+ '0'...'7' => {
+ counter += 1;
+ if (counter == 3) {
+ state = if (string) .StringLiteral else .CharLiteral;
+ }
+ },
+ else => {
+ state = if (string) .StringLiteral else .CharLiteral;
+ },
+ },
+ .HexEscape => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {},
+ else => {
+ state = if (string) .StringLiteral else .CharLiteral;
+ },
+ },
+ .UnicodeEscape => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {
+ counter -= 1;
+ if (counter == 0) {
+ state = if (string) .StringLiteral else .CharLiteral;
+ }
+ },
+ else => {
+ if (counter != 0) {
+ result.id = .Invalid;
+ break;
+ }
+ state = if (string) .StringLiteral else .CharLiteral;
+ },
+ },
+ .Identifier => switch (c) {
+ 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
+ else => {
+ result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier;
+ if (self.prev_tok_id == .Hash)
+ self.pp_directive = true;
+ break;
+ },
+ },
+ .Equal => switch (c) {
+ '=' => {
+ result.id = .EqualEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Equal;
+ break;
+ },
+ },
+ .Bang => switch (c) {
+ '=' => {
+ result.id = .BangEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Bang;
+ break;
+ },
+ },
+ .Pipe => switch (c) {
+ '=' => {
+ result.id = .PipeEqual;
+ self.index += 1;
+ break;
+ },
+ '|' => {
+ result.id = .PipePipe;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Pipe;
+ break;
+ },
+ },
+ .Percent => switch (c) {
+ '=' => {
+ result.id = .PercentEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Percent;
+ break;
+ },
+ },
+ .Asterisk => switch (c) {
+ '=' => {
+ result.id = .AsteriskEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Asterisk;
+ break;
+ },
+ },
+ .Plus => switch (c) {
+ '=' => {
+ result.id = .PlusEqual;
+ self.index += 1;
+ break;
+ },
+ '+' => {
+ result.id = .PlusPlus;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Plus;
+ break;
+ },
+ },
+ .MacroString => switch (c) {
+ '>' => {
+ result.id = .MacroString;
+ self.index += 1;
+ break;
+ },
+ else => {},
+ },
+ .AngleBracketLeft => switch (c) {
+ '<' => {
+ state = .AngleBracketAngleBracketLeft;
+ },
+ '=' => {
+ result.id = .AngleBracketLeftEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .AngleBracketLeft;
+ break;
+ },
+ },
+ .AngleBracketAngleBracketLeft => switch (c) {
+ '=' => {
+ result.id = .AngleBracketAngleBracketLeftEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .AngleBracketAngleBracketLeft;
+ break;
+ },
+ },
+ .AngleBracketRight => switch (c) {
+ '>' => {
+ state = .AngleBracketAngleBracketRight;
+ },
+ '=' => {
+ result.id = .AngleBracketRightEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .AngleBracketRight;
+ break;
+ },
+ },
+ .AngleBracketAngleBracketRight => switch (c) {
+ '=' => {
+ result.id = .AngleBracketAngleBracketRightEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .AngleBracketAngleBracketRight;
+ break;
+ },
+ },
+ .Caret => switch (c) {
+ '=' => {
+ result.id = .CaretEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Caret;
+ break;
+ },
+ },
+ .Period => switch (c) {
+ '.' => {
+ state = .Period2;
+ },
+ '0'...'9' => {
+ state = .FloatFraction;
+ },
+ else => {
+ result.id = .Period;
+ break;
+ },
+ },
+ .Period2 => switch (c) {
+ '.' => {
+ result.id = .Ellipsis;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Period;
+ self.index -= 1;
+ break;
+ },
+ },
+ .Minus => switch (c) {
+ '>' => {
+ result.id = .Arrow;
+ self.index += 1;
+ break;
+ },
+ '=' => {
+ result.id = .MinusEqual;
+ self.index += 1;
+ break;
+ },
+ '-' => {
+ result.id = .MinusMinus;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Minus;
+ break;
+ },
+ },
+ .Slash => switch (c) {
+ '/' => {
+ state = .LineComment;
+ },
+ '*' => {
+ state = .MultiLineComment;
+ },
+ '=' => {
+ result.id = .SlashEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Slash;
+ break;
+ },
+ },
+ .Ampersand => switch (c) {
+ '&' => {
+ result.id = .AmpersandAmpersand;
+ self.index += 1;
+ break;
+ },
+ '=' => {
+ result.id = .AmpersandEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Ampersand;
+ break;
+ },
+ },
+ .Hash => switch (c) {
+ '#' => {
+ result.id = .HashHash;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .Hash;
+ break;
+ },
+ },
+ .LineComment => switch (c) {
+ '\n' => {
+ result.id = .LineComment;
+ self.index += 1;
+ break;
+ },
+ else => {},
+ },
+ .MultiLineComment => switch (c) {
+ '*' => {
+ state = .MultiLineCommentAsterisk;
+ },
+ else => {},
+ },
+ .MultiLineCommentAsterisk => switch (c) {
+ '/' => {
+ result.id = .MultiLineComment;
+ self.index += 1;
+ break;
+ },
+ else => {
+ state = .MultiLineComment;
+ },
+ },
+ .Zero => switch (c) {
+ '0'...'9' => {
+ state = .IntegerLiteralOct;
+ },
+ 'b', 'B' => {
+ state = .IntegerLiteralBinary;
+ },
+ 'x', 'X' => {
+ state = .IntegerLiteralHex;
+ },
+ else => {
+ state = .IntegerSuffix;
+ self.index -= 1;
+ },
+ },
+ .IntegerLiteralOct => switch (c) {
+ '0'...'7' => {},
+ else => {
+ state = .IntegerSuffix;
+ self.index -= 1;
+ },
+ },
+ .IntegerLiteralBinary => switch (c) {
+ '0', '1' => {},
+ else => {
+ state = .IntegerSuffix;
+ self.index -= 1;
+ },
+ },
+ .IntegerLiteralHex => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {},
+ '.' => {
+ state = .FloatFractionHex;
+ },
+ 'p', 'P' => {
+ state = .FloatExponent;
+ },
+ else => {
+ state = .IntegerSuffix;
+ self.index -= 1;
+ },
+ },
+ .IntegerLiteral => switch (c) {
+ '0'...'9' => {},
+ '.' => {
+ state = .FloatFraction;
+ },
+ 'e', 'E' => {
+ state = .FloatExponent;
+ },
+ else => {
+ state = .IntegerSuffix;
+ self.index -= 1;
+ },
+ },
+ .IntegerSuffix => switch (c) {
+ 'u', 'U' => {
+ state = .IntegerSuffixU;
+ },
+ 'l', 'L' => {
+ state = .IntegerSuffixL;
+ },
+ else => {
+ result.id = .{ .IntegerLiteral = .None };
+ break;
+ },
+ },
+ .IntegerSuffixU => switch (c) {
+ 'l', 'L' => {
+ state = .IntegerSuffixUL;
+ },
+ else => {
+ result.id = .{ .IntegerLiteral = .U };
+ break;
+ },
+ },
+ .IntegerSuffixL => switch (c) {
+ 'l', 'L' => {
+ state = .IntegerSuffixLL;
+ },
+ 'u', 'U' => {
+ result.id = .{ .IntegerLiteral = .LU };
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .{ .IntegerLiteral = .L };
+ break;
+ },
+ },
+ .IntegerSuffixLL => switch (c) {
+ 'u', 'U' => {
+ result.id = .{ .IntegerLiteral = .LLU };
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .{ .IntegerLiteral = .LL };
+ break;
+ },
+ },
+ .IntegerSuffixUL => switch (c) {
+ 'l', 'L' => {
+ result.id = .{ .IntegerLiteral = .LLU };
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .{ .IntegerLiteral = .LU };
+ break;
+ },
+ },
+ .FloatFraction => switch (c) {
+ '0'...'9' => {},
+ 'e', 'E' => {
+ state = .FloatExponent;
+ },
+ else => {
+ self.index -= 1;
+ state = .FloatSuffix;
+ },
+ },
+ .FloatFractionHex => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {},
+ 'p', 'P' => {
+ state = .FloatExponent;
+ },
+ else => {
+ result.id = .Invalid;
+ break;
+ },
+ },
+ .FloatExponent => switch (c) {
+ '+', '-' => {
+ state = .FloatExponentDigits;
+ },
+ else => {
+ self.index -= 1;
+ state = .FloatExponentDigits;
+ },
+ },
+ .FloatExponentDigits => switch (c) {
+ '0'...'9' => {
+ counter += 1;
+ },
+ else => {
+ if (counter == 0) {
+ result.id = .Invalid;
+ break;
+ }
+ state = .FloatSuffix;
+ },
+ },
+ .FloatSuffix => switch (c) {
+ 'l', 'L' => {
+ result.id = .{ .FloatLiteral = .L };
+ self.index += 1;
+ break;
+ },
+ 'f', 'F' => {
+ result.id = .{ .FloatLiteral = .F };
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = .{ .FloatLiteral = .None };
+ break;
+ },
+ },
+ }
+ } else if (self.index == self.source.buffer.len) {
+ switch (state) {
+ .Start => {},
+ .u, .u8, .U, .L, .Identifier => {
+ result.id = Token.getKeyword(self.source.buffer[result.start..self.index], self.prev_tok_id == .Hash and !self.pp_directive) orelse .Identifier;
+ },
+
+ .Cr,
+ .BackSlash,
+ .BackSlashCr,
+ .Period2,
+ .StringLiteral,
+ .CharLiteralStart,
+ .CharLiteral,
+ .EscapeSequence,
+ .CrEscape,
+ .OctalEscape,
+ .HexEscape,
+ .UnicodeEscape,
+ .MultiLineComment,
+ .MultiLineCommentAsterisk,
+ .FloatFraction,
+ .FloatFractionHex,
+ .FloatExponent,
+ .FloatExponentDigits,
+ .MacroString,
+ => result.id = .Invalid,
+
+ .IntegerLiteralOct,
+ .IntegerLiteralBinary,
+ .IntegerLiteralHex,
+ .IntegerLiteral,
+ .IntegerSuffix,
+ .Zero,
+ => result.id = .{ .IntegerLiteral = .None },
+ .IntegerSuffixU => result.id = .{ .IntegerLiteral = .U },
+ .IntegerSuffixL => result.id = .{ .IntegerLiteral = .L },
+ .IntegerSuffixLL => result.id = .{ .IntegerLiteral = .LL },
+ .IntegerSuffixUL => result.id = .{ .IntegerLiteral = .LU },
+
+ .FloatSuffix => result.id = .{ .FloatLiteral = .None },
+ .Equal => result.id = .Equal,
+ .Bang => result.id = .Bang,
+ .Minus => result.id = .Minus,
+ .Slash => result.id = .Slash,
+ .Ampersand => result.id = .Ampersand,
+ .Hash => result.id = .Hash,
+ .Period => result.id = .Period,
+ .Pipe => result.id = .Pipe,
+ .AngleBracketAngleBracketRight => result.id = .AngleBracketAngleBracketRight,
+ .AngleBracketRight => result.id = .AngleBracketRight,
+ .AngleBracketAngleBracketLeft => result.id = .AngleBracketAngleBracketLeft,
+ .AngleBracketLeft => result.id = .AngleBracketLeft,
+ .Plus => result.id = .Plus,
+ .Percent => result.id = .Percent,
+ .Caret => result.id = .Caret,
+ .Asterisk => result.id = .Asterisk,
+ .LineComment => result.id = .LineComment,
+ }
+ }
+
+ self.prev_tok_id = result.id;
+ result.end = self.index;
+ return result;
+ }
+};
+
+test "operators" {
+ expectTokens(
+ \\ ! != | || |= = ==
+ \\ ( ) { } [ ] . .. ...
+ \\ ^ ^= + ++ += - -- -=
+ \\ * *= % %= -> : ; / /=
+ \\ , & && &= ? < <= <<
+ \\ <<= > >= >> >>= ~ # ##
+ \\
+ , &[_]Token.Id{
+ .Bang,
+ .BangEqual,
+ .Pipe,
+ .PipePipe,
+ .PipeEqual,
+ .Equal,
+ .EqualEqual,
+ .Nl,
+ .LParen,
+ .RParen,
+ .LBrace,
+ .RBrace,
+ .LBracket,
+ .RBracket,
+ .Period,
+ .Period,
+ .Period,
+ .Ellipsis,
+ .Nl,
+ .Caret,
+ .CaretEqual,
+ .Plus,
+ .PlusPlus,
+ .PlusEqual,
+ .Minus,
+ .MinusMinus,
+ .MinusEqual,
+ .Nl,
+ .Asterisk,
+ .AsteriskEqual,
+ .Percent,
+ .PercentEqual,
+ .Arrow,
+ .Colon,
+ .Semicolon,
+ .Slash,
+ .SlashEqual,
+ .Nl,
+ .Comma,
+ .Ampersand,
+ .AmpersandAmpersand,
+ .AmpersandEqual,
+ .QuestionMark,
+ .AngleBracketLeft,
+ .AngleBracketLeftEqual,
+ .AngleBracketAngleBracketLeft,
+ .Nl,
+ .AngleBracketAngleBracketLeftEqual,
+ .AngleBracketRight,
+ .AngleBracketRightEqual,
+ .AngleBracketAngleBracketRight,
+ .AngleBracketAngleBracketRightEqual,
+ .Tilde,
+ .Hash,
+ .HashHash,
+ .Nl,
+ });
+}
+
+test "keywords" {
+ expectTokens(
+ \\auto break case char const continue default do
+ \\double else enum extern float for goto if int
+ \\long register return short signed sizeof static
+ \\struct switch typedef union unsigned void volatile
+ \\while _Bool _Complex _Imaginary inline restrict _Alignas
+ \\_Alignof _Atomic _Generic _Noreturn _Static_assert _Thread_local
+ \\
+ , &[_]Token.Id{
+ .Keyword_auto,
+ .Keyword_break,
+ .Keyword_case,
+ .Keyword_char,
+ .Keyword_const,
+ .Keyword_continue,
+ .Keyword_default,
+ .Keyword_do,
+ .Nl,
+ .Keyword_double,
+ .Keyword_else,
+ .Keyword_enum,
+ .Keyword_extern,
+ .Keyword_float,
+ .Keyword_for,
+ .Keyword_goto,
+ .Keyword_if,
+ .Keyword_int,
+ .Nl,
+ .Keyword_long,
+ .Keyword_register,
+ .Keyword_return,
+ .Keyword_short,
+ .Keyword_signed,
+ .Keyword_sizeof,
+ .Keyword_static,
+ .Nl,
+ .Keyword_struct,
+ .Keyword_switch,
+ .Keyword_typedef,
+ .Keyword_union,
+ .Keyword_unsigned,
+ .Keyword_void,
+ .Keyword_volatile,
+ .Nl,
+ .Keyword_while,
+ .Keyword_bool,
+ .Keyword_complex,
+ .Keyword_imaginary,
+ .Keyword_inline,
+ .Keyword_restrict,
+ .Keyword_alignas,
+ .Nl,
+ .Keyword_alignof,
+ .Keyword_atomic,
+ .Keyword_generic,
+ .Keyword_noreturn,
+ .Keyword_static_assert,
+ .Keyword_thread_local,
+ .Nl,
+ });
+}
+
+test "preprocessor keywords" {
+ expectTokens(
+ \\#include <test>
+ \\#define #include <1
+ \\#ifdef
+ \\#ifndef
+ \\#error
+ \\#pragma
+ \\
+ , &[_]Token.Id{
+ .Hash,
+ .Keyword_include,
+ .MacroString,
+ .Nl,
+ .Hash,
+ .Keyword_define,
+ .Hash,
+ .Identifier,
+ .AngleBracketLeft,
+ .{ .IntegerLiteral = .None },
+ .Nl,
+ .Hash,
+ .Keyword_ifdef,
+ .Nl,
+ .Hash,
+ .Keyword_ifndef,
+ .Nl,
+ .Hash,
+ .Keyword_error,
+ .Nl,
+ .Hash,
+ .Keyword_pragma,
+ .Nl,
+ });
+}
+
+test "line continuation" {
+ expectTokens(
+ \\#define foo \
+ \\ bar
+ \\"foo\
+ \\ bar"
+ \\#define "foo"
+ \\ "bar"
+ \\#define "foo" \
+ \\ "bar"
+ , &[_]Token.Id{
+ .Hash,
+ .Keyword_define,
+ .Identifier,
+ .Identifier,
+ .Nl,
+ .{ .StringLiteral = .None },
+ .Nl,
+ .Hash,
+ .Keyword_define,
+ .{ .StringLiteral = .None },
+ .Nl,
+ .{ .StringLiteral = .None },
+ .Nl,
+ .Hash,
+ .Keyword_define,
+ .{ .StringLiteral = .None },
+ .{ .StringLiteral = .None },
+ });
+}
+
+test "string prefix" {
+ expectTokens(
+ \\"foo"
+ \\u"foo"
+ \\u8"foo"
+ \\U"foo"
+ \\L"foo"
+ \\'foo'
+ \\u'foo'
+ \\U'foo'
+ \\L'foo'
+ \\
+ , &[_]Token.Id{
+ .{ .StringLiteral = .None },
+ .Nl,
+ .{ .StringLiteral = .Utf16 },
+ .Nl,
+ .{ .StringLiteral = .Utf8 },
+ .Nl,
+ .{ .StringLiteral = .Utf32 },
+ .Nl,
+ .{ .StringLiteral = .Wide },
+ .Nl,
+ .{ .CharLiteral = .None },
+ .Nl,
+ .{ .CharLiteral = .Utf16 },
+ .Nl,
+ .{ .CharLiteral = .Utf32 },
+ .Nl,
+ .{ .CharLiteral = .Wide },
+ .Nl,
+ });
+}
+
+test "num suffixes" {
+ expectTokens(
+ \\ 1.0f 1.0L 1.0 .0 1.
+ \\ 0l 0lu 0ll 0llu 0
+ \\ 1u 1ul 1ull 1
+ \\
+ , &[_]Token.Id{
+ .{ .FloatLiteral = .F },
+ .{ .FloatLiteral = .L },
+ .{ .FloatLiteral = .None },
+ .{ .FloatLiteral = .None },
+ .{ .FloatLiteral = .None },
+ .Nl,
+ .{ .IntegerLiteral = .L },
+ .{ .IntegerLiteral = .LU },
+ .{ .IntegerLiteral = .LL },
+ .{ .IntegerLiteral = .LLU },
+ .{ .IntegerLiteral = .None },
+ .Nl,
+ .{ .IntegerLiteral = .U },
+ .{ .IntegerLiteral = .LU },
+ .{ .IntegerLiteral = .LLU },
+ .{ .IntegerLiteral = .None },
+ .Nl,
+ });
+}
+
+fn expectTokens(source: []const u8, expected_tokens: []const Token.Id) void {
+ var tokenizer = Tokenizer{
+ .source = &Source{
+ .buffer = source,
+ .file_name = undefined,
+ .tokens = undefined,
+ },
+ };
+ for (expected_tokens) |expected_token_id| {
+ const token = tokenizer.next();
+ if (!std.meta.eql(token.id, expected_token_id)) {
+ std.debug.panic("expected {}, found {}\n", .{ @tagName(expected_token_id), @tagName(token.id) });
+ }
+ }
+ const last_token = tokenizer.next();
+ std.testing.expect(last_token.id == .Eof);
+}