commit 1fb308ceeea0259ad021d67945ea5adc10960a85 (tree)
parent 3919afcad26d2359efe52f98cd4f2f0573527369
Author: Andrew Kelley <superjoe30@gmail.com>
Date: Fri, 9 Feb 2018 13:08:02 -0500
self hosted compiler: move tokenization and parsing to std lib
Diffstat:
12 files changed, 2110 insertions(+), 2102 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -477,6 +477,10 @@ set(ZIG_STD_FILES
"special/panic.zig"
"special/test_runner.zig"
"unicode.zig"
+ "zig/ast.zig"
+ "zig/index.zig"
+ "zig/parser.zig"
+ "zig/tokenizer.zig"
)
set(ZIG_C_HEADER_FILES
diff --git a/build.zig b/build.zig
@@ -108,10 +108,6 @@ pub fn build(b: &Builder) !void {
"std/special/compiler_rt/index.zig", "compiler-rt", "Run the compiler_rt tests",
with_lldb));
- test_step.dependOn(tests.addPkgTests(b, test_filter,
- "src-self-hosted/main.zig", "fmt", "Run the fmt tests",
- with_lldb));
-
test_step.dependOn(tests.addCompareOutputTests(b, test_filter));
test_step.dependOn(tests.addBuildExampleTests(b, test_filter));
test_step.dependOn(tests.addCompileErrorTests(b, test_filter));
diff --git a/src-self-hosted/ast.zig b/src-self-hosted/ast.zig
@@ -1,271 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const ArrayList = std.ArrayList;
-const Token = @import("tokenizer.zig").Token;
-const mem = std.mem;
-
-pub const Node = struct {
- id: Id,
-
- pub const Id = enum {
- Root,
- VarDecl,
- Identifier,
- FnProto,
- ParamDecl,
- Block,
- InfixOp,
- PrefixOp,
- IntegerLiteral,
- FloatLiteral,
- };
-
- pub fn iterate(base: &Node, index: usize) ?&Node {
- return switch (base.id) {
- Id.Root => @fieldParentPtr(NodeRoot, "base", base).iterate(index),
- Id.VarDecl => @fieldParentPtr(NodeVarDecl, "base", base).iterate(index),
- Id.Identifier => @fieldParentPtr(NodeIdentifier, "base", base).iterate(index),
- Id.FnProto => @fieldParentPtr(NodeFnProto, "base", base).iterate(index),
- Id.ParamDecl => @fieldParentPtr(NodeParamDecl, "base", base).iterate(index),
- Id.Block => @fieldParentPtr(NodeBlock, "base", base).iterate(index),
- Id.InfixOp => @fieldParentPtr(NodeInfixOp, "base", base).iterate(index),
- Id.PrefixOp => @fieldParentPtr(NodePrefixOp, "base", base).iterate(index),
- Id.IntegerLiteral => @fieldParentPtr(NodeIntegerLiteral, "base", base).iterate(index),
- Id.FloatLiteral => @fieldParentPtr(NodeFloatLiteral, "base", base).iterate(index),
- };
- }
-
- pub fn destroy(base: &Node, allocator: &mem.Allocator) void {
- return switch (base.id) {
- Id.Root => allocator.destroy(@fieldParentPtr(NodeRoot, "base", base)),
- Id.VarDecl => allocator.destroy(@fieldParentPtr(NodeVarDecl, "base", base)),
- Id.Identifier => allocator.destroy(@fieldParentPtr(NodeIdentifier, "base", base)),
- Id.FnProto => allocator.destroy(@fieldParentPtr(NodeFnProto, "base", base)),
- Id.ParamDecl => allocator.destroy(@fieldParentPtr(NodeParamDecl, "base", base)),
- Id.Block => allocator.destroy(@fieldParentPtr(NodeBlock, "base", base)),
- Id.InfixOp => allocator.destroy(@fieldParentPtr(NodeInfixOp, "base", base)),
- Id.PrefixOp => allocator.destroy(@fieldParentPtr(NodePrefixOp, "base", base)),
- Id.IntegerLiteral => allocator.destroy(@fieldParentPtr(NodeIntegerLiteral, "base", base)),
- Id.FloatLiteral => allocator.destroy(@fieldParentPtr(NodeFloatLiteral, "base", base)),
- };
- }
-};
-
-pub const NodeRoot = struct {
- base: Node,
- decls: ArrayList(&Node),
-
- pub fn iterate(self: &NodeRoot, index: usize) ?&Node {
- if (index < self.decls.len) {
- return self.decls.items[self.decls.len - index - 1];
- }
- return null;
- }
-};
-
-pub const NodeVarDecl = struct {
- base: Node,
- visib_token: ?Token,
- name_token: Token,
- eq_token: Token,
- mut_token: Token,
- comptime_token: ?Token,
- extern_token: ?Token,
- lib_name: ?&Node,
- type_node: ?&Node,
- align_node: ?&Node,
- init_node: ?&Node,
-
- pub fn iterate(self: &NodeVarDecl, index: usize) ?&Node {
- var i = index;
-
- if (self.type_node) |type_node| {
- if (i < 1) return type_node;
- i -= 1;
- }
-
- if (self.align_node) |align_node| {
- if (i < 1) return align_node;
- i -= 1;
- }
-
- if (self.init_node) |init_node| {
- if (i < 1) return init_node;
- i -= 1;
- }
-
- return null;
- }
-};
-
-pub const NodeIdentifier = struct {
- base: Node,
- name_token: Token,
-
- pub fn iterate(self: &NodeIdentifier, index: usize) ?&Node {
- return null;
- }
-};
-
-pub const NodeFnProto = struct {
- base: Node,
- visib_token: ?Token,
- fn_token: Token,
- name_token: ?Token,
- params: ArrayList(&Node),
- return_type: &Node,
- var_args_token: ?Token,
- extern_token: ?Token,
- inline_token: ?Token,
- cc_token: ?Token,
- body_node: ?&Node,
- lib_name: ?&Node, // populated if this is an extern declaration
- align_expr: ?&Node, // populated if align(A) is present
-
- pub fn iterate(self: &NodeFnProto, index: usize) ?&Node {
- var i = index;
-
- if (self.body_node) |body_node| {
- if (i < 1) return body_node;
- i -= 1;
- }
-
- if (i < 1) return self.return_type;
- i -= 1;
-
- if (self.align_expr) |align_expr| {
- if (i < 1) return align_expr;
- i -= 1;
- }
-
- if (i < self.params.len) return self.params.items[self.params.len - i - 1];
- i -= self.params.len;
-
- if (self.lib_name) |lib_name| {
- if (i < 1) return lib_name;
- i -= 1;
- }
-
- return null;
- }
-};
-
-pub const NodeParamDecl = struct {
- base: Node,
- comptime_token: ?Token,
- noalias_token: ?Token,
- name_token: ?Token,
- type_node: &Node,
- var_args_token: ?Token,
-
- pub fn iterate(self: &NodeParamDecl, index: usize) ?&Node {
- var i = index;
-
- if (i < 1) return self.type_node;
- i -= 1;
-
- return null;
- }
-};
-
-pub const NodeBlock = struct {
- base: Node,
- begin_token: Token,
- end_token: Token,
- statements: ArrayList(&Node),
-
- pub fn iterate(self: &NodeBlock, index: usize) ?&Node {
- var i = index;
-
- if (i < self.statements.len) return self.statements.items[i];
- i -= self.statements.len;
-
- return null;
- }
-};
-
-pub const NodeInfixOp = struct {
- base: Node,
- op_token: Token,
- lhs: &Node,
- op: InfixOp,
- rhs: &Node,
-
- const InfixOp = enum {
- EqualEqual,
- BangEqual,
- };
-
- pub fn iterate(self: &NodeInfixOp, index: usize) ?&Node {
- var i = index;
-
- if (i < 1) return self.lhs;
- i -= 1;
-
- switch (self.op) {
- InfixOp.EqualEqual => {},
- InfixOp.BangEqual => {},
- }
-
- if (i < 1) return self.rhs;
- i -= 1;
-
- return null;
- }
-};
-
-pub const NodePrefixOp = struct {
- base: Node,
- op_token: Token,
- op: PrefixOp,
- rhs: &Node,
-
- const PrefixOp = union(enum) {
- Return,
- AddrOf: AddrOfInfo,
- };
- const AddrOfInfo = struct {
- align_expr: ?&Node,
- bit_offset_start_token: ?Token,
- bit_offset_end_token: ?Token,
- const_token: ?Token,
- volatile_token: ?Token,
- };
-
- pub fn iterate(self: &NodePrefixOp, index: usize) ?&Node {
- var i = index;
-
- switch (self.op) {
- PrefixOp.Return => {},
- PrefixOp.AddrOf => |addr_of_info| {
- if (addr_of_info.align_expr) |align_expr| {
- if (i < 1) return align_expr;
- i -= 1;
- }
- },
- }
-
- if (i < 1) return self.rhs;
- i -= 1;
-
- return null;
- }
-};
-
-pub const NodeIntegerLiteral = struct {
- base: Node,
- token: Token,
-
- pub fn iterate(self: &NodeIntegerLiteral, index: usize) ?&Node {
- return null;
- }
-};
-
-pub const NodeFloatLiteral = struct {
- base: Node,
- token: Token,
-
- pub fn iterate(self: &NodeFloatLiteral, index: usize) ?&Node {
- return null;
- }
-};
diff --git a/src-self-hosted/main.zig b/src-self-hosted/main.zig
@@ -622,8 +622,3 @@ fn findZigLibDir(allocator: &mem.Allocator) ![]u8 {
return error.FileNotFound;
}
-
-test "import tests" {
- _ = @import("tokenizer.zig");
- _ = @import("parser.zig");
-}
diff --git a/src-self-hosted/module.zig b/src-self-hosted/module.zig
@@ -8,9 +8,9 @@ const c = @import("c.zig");
const builtin = @import("builtin");
const Target = @import("target.zig").Target;
const warn = std.debug.warn;
-const Tokenizer = @import("tokenizer.zig").Tokenizer;
-const Token = @import("tokenizer.zig").Token;
-const Parser = @import("parser.zig").Parser;
+const Tokenizer = std.zig.Tokenizer;
+const Token = std.zig.Token;
+const Parser = std.zig.Parser;
const ArrayList = std.ArrayList;
pub const Module = struct {
diff --git a/src-self-hosted/parser.zig b/src-self-hosted/parser.zig
@@ -1,1160 +0,0 @@
-const std = @import("std");
-const assert = std.debug.assert;
-const ArrayList = std.ArrayList;
-const mem = std.mem;
-const ast = @import("ast.zig");
-const Tokenizer = @import("tokenizer.zig").Tokenizer;
-const Token = @import("tokenizer.zig").Token;
-const builtin = @import("builtin");
-const io = std.io;
-
-// TODO when we make parse errors into error types instead of printing directly,
-// get rid of this
-const warn = std.debug.warn;
-
-pub const Parser = struct {
- allocator: &mem.Allocator,
- tokenizer: &Tokenizer,
- put_back_tokens: [2]Token,
- put_back_count: usize,
- source_file_name: []const u8,
-
- pub const Tree = struct {
- root_node: &ast.NodeRoot,
-
- pub fn deinit(self: &const Tree) void {
- // TODO free the whole arena
- }
- };
-
- // This memory contents are used only during a function call. It's used to repurpose memory;
- // we reuse the same bytes for the stack data structure used by parsing, tree rendering, and
- // source rendering.
- const utility_bytes_align = @alignOf( union { a: RenderAstFrame, b: State, c: RenderState } );
- utility_bytes: []align(utility_bytes_align) u8,
-
- /// `allocator` should be an arena allocator. Parser never calls free on anything. After you're
- /// done with a Parser, free the arena. After the arena is freed, no member functions of Parser
- /// may be called.
- pub fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator, source_file_name: []const u8) Parser {
- return Parser {
- .allocator = allocator,
- .tokenizer = tokenizer,
- .put_back_tokens = undefined,
- .put_back_count = 0,
- .source_file_name = source_file_name,
- .utility_bytes = []align(utility_bytes_align) u8{},
- };
- }
-
- pub fn deinit(self: &Parser) void {
- self.allocator.free(self.utility_bytes);
- }
-
- const TopLevelDeclCtx = struct {
- visib_token: ?Token,
- extern_token: ?Token,
- };
-
- const DestPtr = union(enum) {
- Field: &&ast.Node,
- NullableField: &?&ast.Node,
- List: &ArrayList(&ast.Node),
-
- pub fn store(self: &const DestPtr, value: &ast.Node) !void {
- switch (*self) {
- DestPtr.Field => |ptr| *ptr = value,
- DestPtr.NullableField => |ptr| *ptr = value,
- DestPtr.List => |list| try list.append(value),
- }
- }
- };
-
- const State = union(enum) {
- TopLevel,
- TopLevelExtern: ?Token,
- TopLevelDecl: TopLevelDeclCtx,
- Expression: DestPtr,
- ExpectOperand,
- Operand: &ast.Node,
- AfterOperand,
- InfixOp: &ast.NodeInfixOp,
- PrefixOp: &ast.NodePrefixOp,
- AddrOfModifiers: &ast.NodePrefixOp.AddrOfInfo,
- TypeExpr: DestPtr,
- VarDecl: &ast.NodeVarDecl,
- VarDeclAlign: &ast.NodeVarDecl,
- VarDeclEq: &ast.NodeVarDecl,
- ExpectToken: @TagType(Token.Id),
- FnProto: &ast.NodeFnProto,
- FnProtoAlign: &ast.NodeFnProto,
- ParamDecl: &ast.NodeFnProto,
- ParamDeclComma,
- FnDef: &ast.NodeFnProto,
- Block: &ast.NodeBlock,
- Statement: &ast.NodeBlock,
- };
-
- /// Returns an AST tree, allocated with the parser's allocator.
- /// Result should be freed with `freeAst` when done.
- pub fn parse(self: &Parser) !Tree {
- var stack = self.initUtilityArrayList(State);
- defer self.deinitUtilityArrayList(stack);
-
- const root_node = try self.createRoot();
- // TODO errdefer arena free root node
-
- try stack.append(State.TopLevel);
-
- while (true) {
- //{
- // const token = self.getNextToken();
- // warn("{} ", @tagName(token.id));
- // self.putBackToken(token);
- // var i: usize = stack.len;
- // while (i != 0) {
- // i -= 1;
- // warn("{} ", @tagName(stack.items[i]));
- // }
- // warn("\n");
- //}
-
- // This gives us 1 free append that can't fail
- const state = stack.pop();
-
- switch (state) {
- State.TopLevel => {
- const token = self.getNextToken();
- switch (token.id) {
- Token.Id.Keyword_pub, Token.Id.Keyword_export => {
- stack.append(State { .TopLevelExtern = token }) catch unreachable;
- continue;
- },
- Token.Id.Eof => return Tree {.root_node = root_node},
- else => {
- self.putBackToken(token);
- // TODO shouldn't need this cast
- stack.append(State { .TopLevelExtern = null }) catch unreachable;
- continue;
- },
- }
- },
- State.TopLevelExtern => |visib_token| {
- const token = self.getNextToken();
- if (token.id == Token.Id.Keyword_extern) {
- stack.append(State {
- .TopLevelDecl = TopLevelDeclCtx {
- .visib_token = visib_token,
- .extern_token = token,
- },
- }) catch unreachable;
- continue;
- }
- self.putBackToken(token);
- stack.append(State {
- .TopLevelDecl = TopLevelDeclCtx {
- .visib_token = visib_token,
- .extern_token = null,
- },
- }) catch unreachable;
- continue;
- },
- State.TopLevelDecl => |ctx| {
- const token = self.getNextToken();
- switch (token.id) {
- Token.Id.Keyword_var, Token.Id.Keyword_const => {
- stack.append(State.TopLevel) catch unreachable;
- // TODO shouldn't need these casts
- const var_decl_node = try self.createAttachVarDecl(&root_node.decls, ctx.visib_token,
- token, (?Token)(null), ctx.extern_token);
- try stack.append(State { .VarDecl = var_decl_node });
- continue;
- },
- Token.Id.Keyword_fn => {
- stack.append(State.TopLevel) catch unreachable;
- // TODO shouldn't need these casts
- const fn_proto = try self.createAttachFnProto(&root_node.decls, token,
- ctx.extern_token, (?Token)(null), (?Token)(null), (?Token)(null));
- try stack.append(State { .FnDef = fn_proto });
- try stack.append(State { .FnProto = fn_proto });
- continue;
- },
- Token.Id.StringLiteral => {
- @panic("TODO extern with string literal");
- },
- Token.Id.Keyword_nakedcc, Token.Id.Keyword_stdcallcc => {
- stack.append(State.TopLevel) catch unreachable;
- const fn_token = try self.eatToken(Token.Id.Keyword_fn);
- // TODO shouldn't need this cast
- const fn_proto = try self.createAttachFnProto(&root_node.decls, fn_token,
- ctx.extern_token, (?Token)(token), (?Token)(null), (?Token)(null));
- try stack.append(State { .FnDef = fn_proto });
- try stack.append(State { .FnProto = fn_proto });
- continue;
- },
- else => return self.parseError(token, "expected variable declaration or function, found {}", @tagName(token.id)),
- }
- },
- State.VarDecl => |var_decl| {
- var_decl.name_token = try self.eatToken(Token.Id.Identifier);
- stack.append(State { .VarDeclAlign = var_decl }) catch unreachable;
-
- const next_token = self.getNextToken();
- if (next_token.id == Token.Id.Colon) {
- try stack.append(State { .TypeExpr = DestPtr {.NullableField = &var_decl.type_node} });
- continue;
- }
-
- self.putBackToken(next_token);
- continue;
- },
- State.VarDeclAlign => |var_decl| {
- stack.append(State { .VarDeclEq = var_decl }) catch unreachable;
-
- const next_token = self.getNextToken();
- if (next_token.id == Token.Id.Keyword_align) {
- _ = try self.eatToken(Token.Id.LParen);
- try stack.append(State { .ExpectToken = Token.Id.RParen });
- try stack.append(State { .Expression = DestPtr{.NullableField = &var_decl.align_node} });
- continue;
- }
-
- self.putBackToken(next_token);
- continue;
- },
- State.VarDeclEq => |var_decl| {
- const token = self.getNextToken();
- if (token.id == Token.Id.Equal) {
- var_decl.eq_token = token;
- stack.append(State { .ExpectToken = Token.Id.Semicolon }) catch unreachable;
- try stack.append(State {
- .Expression = DestPtr {.NullableField = &var_decl.init_node},
- });
- continue;
- }
- if (token.id == Token.Id.Semicolon) {
- continue;
- }
- return self.parseError(token, "expected '=' or ';', found {}", @tagName(token.id));
- },
- State.ExpectToken => |token_id| {
- _ = try self.eatToken(token_id);
- continue;
- },
-
- State.Expression => |dest_ptr| {
- // save the dest_ptr for later
- stack.append(state) catch unreachable;
- try stack.append(State.ExpectOperand);
- continue;
- },
- State.ExpectOperand => {
- // we'll either get an operand (like 1 or x),
- // or a prefix operator (like ~ or return).
- const token = self.getNextToken();
- switch (token.id) {
- Token.Id.Keyword_return => {
- try stack.append(State { .PrefixOp = try self.createPrefixOp(token,
- ast.NodePrefixOp.PrefixOp.Return) });
- try stack.append(State.ExpectOperand);
- continue;
- },
- Token.Id.Ampersand => {
- const prefix_op = try self.createPrefixOp(token, ast.NodePrefixOp.PrefixOp{
- .AddrOf = ast.NodePrefixOp.AddrOfInfo {
- .align_expr = null,
- .bit_offset_start_token = null,
- .bit_offset_end_token = null,
- .const_token = null,
- .volatile_token = null,
- }
- });
- try stack.append(State { .PrefixOp = prefix_op });
- try stack.append(State.ExpectOperand);
- try stack.append(State { .AddrOfModifiers = &prefix_op.op.AddrOf });
- continue;
- },
- Token.Id.Identifier => {
- try stack.append(State {
- .Operand = &(try self.createIdentifier(token)).base
- });
- try stack.append(State.AfterOperand);
- continue;
- },
- Token.Id.IntegerLiteral => {
- try stack.append(State {
- .Operand = &(try self.createIntegerLiteral(token)).base
- });
- try stack.append(State.AfterOperand);
- continue;
- },
- Token.Id.FloatLiteral => {
- try stack.append(State {
- .Operand = &(try self.createFloatLiteral(token)).base
- });
- try stack.append(State.AfterOperand);
- continue;
- },
- else => return self.parseError(token, "expected primary expression, found {}", @tagName(token.id)),
- }
- },
-
- State.AfterOperand => {
- // we'll either get an infix operator (like != or ^),
- // or a postfix operator (like () or {}),
- // otherwise this expression is done (like on a ; or else).
- var token = self.getNextToken();
- switch (token.id) {
- Token.Id.EqualEqual => {
- try stack.append(State {
- .InfixOp = try self.createInfixOp(token, ast.NodeInfixOp.InfixOp.EqualEqual)
- });
- try stack.append(State.ExpectOperand);
- continue;
- },
- Token.Id.BangEqual => {
- try stack.append(State {
- .InfixOp = try self.createInfixOp(token, ast.NodeInfixOp.InfixOp.BangEqual)
- });
- try stack.append(State.ExpectOperand);
- continue;
- },
- else => {
- // no postfix/infix operator after this operand.
- self.putBackToken(token);
- // reduce the stack
- var expression: &ast.Node = stack.pop().Operand;
- while (true) {
- switch (stack.pop()) {
- State.Expression => |dest_ptr| {
- // we're done
- try dest_ptr.store(expression);
- break;
- },
- State.InfixOp => |infix_op| {
- infix_op.rhs = expression;
- infix_op.lhs = stack.pop().Operand;
- expression = &infix_op.base;
- continue;
- },
- State.PrefixOp => |prefix_op| {
- prefix_op.rhs = expression;
- expression = &prefix_op.base;
- continue;
- },
- else => unreachable,
- }
- }
- continue;
- },
- }
- },
-
- State.AddrOfModifiers => |addr_of_info| {
- var token = self.getNextToken();
- switch (token.id) {
- Token.Id.Keyword_align => {
- stack.append(state) catch unreachable;
- if (addr_of_info.align_expr != null) return self.parseError(token, "multiple align qualifiers");
- _ = try self.eatToken(Token.Id.LParen);
- try stack.append(State { .ExpectToken = Token.Id.RParen });
- try stack.append(State { .Expression = DestPtr{.NullableField = &addr_of_info.align_expr} });
- continue;
- },
- Token.Id.Keyword_const => {
- stack.append(state) catch unreachable;
- if (addr_of_info.const_token != null) return self.parseError(token, "duplicate qualifier: const");
- addr_of_info.const_token = token;
- continue;
- },
- Token.Id.Keyword_volatile => {
- stack.append(state) catch unreachable;
- if (addr_of_info.volatile_token != null) return self.parseError(token, "duplicate qualifier: volatile");
- addr_of_info.volatile_token = token;
- continue;
- },
- else => {
- self.putBackToken(token);
- continue;
- },
- }
- },
-
- State.TypeExpr => |dest_ptr| {
- const token = self.getNextToken();
- if (token.id == Token.Id.Keyword_var) {
- @panic("TODO param with type var");
- }
- self.putBackToken(token);
-
- stack.append(State { .Expression = dest_ptr }) catch unreachable;
- continue;
- },
-
- State.FnProto => |fn_proto| {
- stack.append(State { .FnProtoAlign = fn_proto }) catch unreachable;
- try stack.append(State { .ParamDecl = fn_proto });
- try stack.append(State { .ExpectToken = Token.Id.LParen });
-
- const next_token = self.getNextToken();
- if (next_token.id == Token.Id.Identifier) {
- fn_proto.name_token = next_token;
- continue;
- }
- self.putBackToken(next_token);
- continue;
- },
-
- State.FnProtoAlign => |fn_proto| {
- const token = self.getNextToken();
- if (token.id == Token.Id.Keyword_align) {
- @panic("TODO fn proto align");
- }
- self.putBackToken(token);
- stack.append(State {
- .TypeExpr = DestPtr {.Field = &fn_proto.return_type},
- }) catch unreachable;
- continue;
- },
-
- State.ParamDecl => |fn_proto| {
- var token = self.getNextToken();
- if (token.id == Token.Id.RParen) {
- continue;
- }
- const param_decl = try self.createAttachParamDecl(&fn_proto.params);
- if (token.id == Token.Id.Keyword_comptime) {
- param_decl.comptime_token = token;
- token = self.getNextToken();
- } else if (token.id == Token.Id.Keyword_noalias) {
- param_decl.noalias_token = token;
- token = self.getNextToken();
- }
- if (token.id == Token.Id.Identifier) {
- const next_token = self.getNextToken();
- if (next_token.id == Token.Id.Colon) {
- param_decl.name_token = token;
- token = self.getNextToken();
- } else {
- self.putBackToken(next_token);
- }
- }
- if (token.id == Token.Id.Ellipsis3) {
- param_decl.var_args_token = token;
- stack.append(State { .ExpectToken = Token.Id.RParen }) catch unreachable;
- continue;
- } else {
- self.putBackToken(token);
- }
-
- stack.append(State { .ParamDecl = fn_proto }) catch unreachable;
- try stack.append(State.ParamDeclComma);
- try stack.append(State {
- .TypeExpr = DestPtr {.Field = ¶m_decl.type_node}
- });
- continue;
- },
-
- State.ParamDeclComma => {
- const token = self.getNextToken();
- switch (token.id) {
- Token.Id.RParen => {
- _ = stack.pop(); // pop off the ParamDecl
- continue;
- },
- Token.Id.Comma => continue,
- else => return self.parseError(token, "expected ',' or ')', found {}", @tagName(token.id)),
- }
- },
-
- State.FnDef => |fn_proto| {
- const token = self.getNextToken();
- switch(token.id) {
- Token.Id.LBrace => {
- const block = try self.createBlock(token);
- fn_proto.body_node = &block.base;
- stack.append(State { .Block = block }) catch unreachable;
- continue;
- },
- Token.Id.Semicolon => continue,
- else => return self.parseError(token, "expected ';' or '{{', found {}", @tagName(token.id)),
- }
- },
-
- State.Block => |block| {
- const token = self.getNextToken();
- switch (token.id) {
- Token.Id.RBrace => {
- block.end_token = token;
- continue;
- },
- else => {
- self.putBackToken(token);
- stack.append(State { .Block = block }) catch unreachable;
- try stack.append(State { .Statement = block });
- continue;
- },
- }
- },
-
- State.Statement => |block| {
- {
- // Look for comptime var, comptime const
- const comptime_token = self.getNextToken();
- if (comptime_token.id == Token.Id.Keyword_comptime) {
- const mut_token = self.getNextToken();
- if (mut_token.id == Token.Id.Keyword_var or mut_token.id == Token.Id.Keyword_const) {
- // TODO shouldn't need these casts
- const var_decl = try self.createAttachVarDecl(&block.statements, (?Token)(null),
- mut_token, (?Token)(comptime_token), (?Token)(null));
- try stack.append(State { .VarDecl = var_decl });
- continue;
- }
- self.putBackToken(mut_token);
- }
- self.putBackToken(comptime_token);
- }
- {
- // Look for const, var
- const mut_token = self.getNextToken();
- if (mut_token.id == Token.Id.Keyword_var or mut_token.id == Token.Id.Keyword_const) {
- // TODO shouldn't need these casts
- const var_decl = try self.createAttachVarDecl(&block.statements, (?Token)(null),
- mut_token, (?Token)(null), (?Token)(null));
- try stack.append(State { .VarDecl = var_decl });
- continue;
- }
- self.putBackToken(mut_token);
- }
-
- stack.append(State { .ExpectToken = Token.Id.Semicolon }) catch unreachable;
- try stack.append(State { .Expression = DestPtr{.List = &block.statements} });
- continue;
- },
-
- // These are data, not control flow.
- State.InfixOp => unreachable,
- State.PrefixOp => unreachable,
- State.Operand => unreachable,
- }
- @import("std").debug.panic("{}", @tagName(state));
- //unreachable;
- }
- }
-
- fn createRoot(self: &Parser) !&ast.NodeRoot {
- const node = try self.allocator.create(ast.NodeRoot);
-
- *node = ast.NodeRoot {
- .base = ast.Node {.id = ast.Node.Id.Root},
- .decls = ArrayList(&ast.Node).init(self.allocator),
- };
- return node;
- }
-
- fn createVarDecl(self: &Parser, visib_token: &const ?Token, mut_token: &const Token, comptime_token: &const ?Token,
- extern_token: &const ?Token) !&ast.NodeVarDecl
- {
- const node = try self.allocator.create(ast.NodeVarDecl);
-
- *node = ast.NodeVarDecl {
- .base = ast.Node {.id = ast.Node.Id.VarDecl},
- .visib_token = *visib_token,
- .mut_token = *mut_token,
- .comptime_token = *comptime_token,
- .extern_token = *extern_token,
- .type_node = null,
- .align_node = null,
- .init_node = null,
- .lib_name = null,
- // initialized later
- .name_token = undefined,
- .eq_token = undefined,
- };
- return node;
- }
-
- fn createFnProto(self: &Parser, fn_token: &const Token, extern_token: &const ?Token,
- cc_token: &const ?Token, visib_token: &const ?Token, inline_token: &const ?Token) !&ast.NodeFnProto
- {
- const node = try self.allocator.create(ast.NodeFnProto);
-
- *node = ast.NodeFnProto {
- .base = ast.Node {.id = ast.Node.Id.FnProto},
- .visib_token = *visib_token,
- .name_token = null,
- .fn_token = *fn_token,
- .params = ArrayList(&ast.Node).init(self.allocator),
- .return_type = undefined,
- .var_args_token = null,
- .extern_token = *extern_token,
- .inline_token = *inline_token,
- .cc_token = *cc_token,
- .body_node = null,
- .lib_name = null,
- .align_expr = null,
- };
- return node;
- }
-
- fn createParamDecl(self: &Parser) !&ast.NodeParamDecl {
- const node = try self.allocator.create(ast.NodeParamDecl);
-
- *node = ast.NodeParamDecl {
- .base = ast.Node {.id = ast.Node.Id.ParamDecl},
- .comptime_token = null,
- .noalias_token = null,
- .name_token = null,
- .type_node = undefined,
- .var_args_token = null,
- };
- return node;
- }
-
- fn createBlock(self: &Parser, begin_token: &const Token) !&ast.NodeBlock {
- const node = try self.allocator.create(ast.NodeBlock);
-
- *node = ast.NodeBlock {
- .base = ast.Node {.id = ast.Node.Id.Block},
- .begin_token = *begin_token,
- .end_token = undefined,
- .statements = ArrayList(&ast.Node).init(self.allocator),
- };
- return node;
- }
-
- fn createInfixOp(self: &Parser, op_token: &const Token, op: &const ast.NodeInfixOp.InfixOp) !&ast.NodeInfixOp {
- const node = try self.allocator.create(ast.NodeInfixOp);
-
- *node = ast.NodeInfixOp {
- .base = ast.Node {.id = ast.Node.Id.InfixOp},
- .op_token = *op_token,
- .lhs = undefined,
- .op = *op,
- .rhs = undefined,
- };
- return node;
- }
-
- fn createPrefixOp(self: &Parser, op_token: &const Token, op: &const ast.NodePrefixOp.PrefixOp) !&ast.NodePrefixOp {
- const node = try self.allocator.create(ast.NodePrefixOp);
-
- *node = ast.NodePrefixOp {
- .base = ast.Node {.id = ast.Node.Id.PrefixOp},
- .op_token = *op_token,
- .op = *op,
- .rhs = undefined,
- };
- return node;
- }
-
- fn createIdentifier(self: &Parser, name_token: &const Token) !&ast.NodeIdentifier {
- const node = try self.allocator.create(ast.NodeIdentifier);
-
- *node = ast.NodeIdentifier {
- .base = ast.Node {.id = ast.Node.Id.Identifier},
- .name_token = *name_token,
- };
- return node;
- }
-
- fn createIntegerLiteral(self: &Parser, token: &const Token) !&ast.NodeIntegerLiteral {
- const node = try self.allocator.create(ast.NodeIntegerLiteral);
-
- *node = ast.NodeIntegerLiteral {
- .base = ast.Node {.id = ast.Node.Id.IntegerLiteral},
- .token = *token,
- };
- return node;
- }
-
- fn createFloatLiteral(self: &Parser, token: &const Token) !&ast.NodeFloatLiteral {
- const node = try self.allocator.create(ast.NodeFloatLiteral);
-
- *node = ast.NodeFloatLiteral {
- .base = ast.Node {.id = ast.Node.Id.FloatLiteral},
- .token = *token,
- };
- return node;
- }
-
- fn createAttachIdentifier(self: &Parser, dest_ptr: &const DestPtr, name_token: &const Token) !&ast.NodeIdentifier {
- const node = try self.createIdentifier(name_token);
- try dest_ptr.store(&node.base);
- return node;
- }
-
- fn createAttachParamDecl(self: &Parser, list: &ArrayList(&ast.Node)) !&ast.NodeParamDecl {
- const node = try self.createParamDecl();
- try list.append(&node.base);
- return node;
- }
-
- fn createAttachFnProto(self: &Parser, list: &ArrayList(&ast.Node), fn_token: &const Token,
- extern_token: &const ?Token, cc_token: &const ?Token, visib_token: &const ?Token,
- inline_token: &const ?Token) !&ast.NodeFnProto
- {
- const node = try self.createFnProto(fn_token, extern_token, cc_token, visib_token, inline_token);
- try list.append(&node.base);
- return node;
- }
-
- fn createAttachVarDecl(self: &Parser, list: &ArrayList(&ast.Node), visib_token: &const ?Token,
- mut_token: &const Token, comptime_token: &const ?Token, extern_token: &const ?Token) !&ast.NodeVarDecl
- {
- const node = try self.createVarDecl(visib_token, mut_token, comptime_token, extern_token);
- try list.append(&node.base);
- return node;
- }
-
- fn parseError(self: &Parser, token: &const Token, comptime fmt: []const u8, args: ...) error {
- const loc = self.tokenizer.getTokenLocation(token);
- warn("{}:{}:{}: error: " ++ fmt ++ "\n", self.source_file_name, loc.line + 1, loc.column + 1, args);
- warn("{}\n", self.tokenizer.buffer[loc.line_start..loc.line_end]);
- {
- var i: usize = 0;
- while (i < loc.column) : (i += 1) {
- warn(" ");
- }
- }
- {
- const caret_count = token.end - token.start;
- var i: usize = 0;
- while (i < caret_count) : (i += 1) {
- warn("~");
- }
- }
- warn("\n");
- return error.ParseError;
- }
-
- fn expectToken(self: &Parser, token: &const Token, id: @TagType(Token.Id)) !void {
- if (token.id != id) {
- return self.parseError(token, "expected {}, found {}", @tagName(id), @tagName(token.id));
- }
- }
-
- fn eatToken(self: &Parser, id: @TagType(Token.Id)) !Token {
- const token = self.getNextToken();
- try self.expectToken(token, id);
- return token;
- }
-
- fn putBackToken(self: &Parser, token: &const Token) void {
- self.put_back_tokens[self.put_back_count] = *token;
- self.put_back_count += 1;
- }
-
- fn getNextToken(self: &Parser) Token {
- if (self.put_back_count != 0) {
- const put_back_index = self.put_back_count - 1;
- const put_back_token = self.put_back_tokens[put_back_index];
- self.put_back_count = put_back_index;
- return put_back_token;
- } else {
- return self.tokenizer.next();
- }
- }
-
- const RenderAstFrame = struct {
- node: &ast.Node,
- indent: usize,
- };
-
- pub fn renderAst(self: &Parser, stream: var, root_node: &ast.NodeRoot) !void {
- var stack = self.initUtilityArrayList(RenderAstFrame);
- defer self.deinitUtilityArrayList(stack);
-
- try stack.append(RenderAstFrame {
- .node = &root_node.base,
- .indent = 0,
- });
-
- while (stack.popOrNull()) |frame| {
- {
- var i: usize = 0;
- while (i < frame.indent) : (i += 1) {
- try stream.print(" ");
- }
- }
- try stream.print("{}\n", @tagName(frame.node.id));
- var child_i: usize = 0;
- while (frame.node.iterate(child_i)) |child| : (child_i += 1) {
- try stack.append(RenderAstFrame {
- .node = child,
- .indent = frame.indent + 2,
- });
- }
- }
- }
-
- const RenderState = union(enum) {
- TopLevelDecl: &ast.Node,
- FnProtoRParen: &ast.NodeFnProto,
- ParamDecl: &ast.Node,
- Text: []const u8,
- Expression: &ast.Node,
- VarDecl: &ast.NodeVarDecl,
- Statement: &ast.Node,
- PrintIndent,
- Indent: usize,
- };
-
- pub fn renderSource(self: &Parser, stream: var, root_node: &ast.NodeRoot) !void {
- var stack = self.initUtilityArrayList(RenderState);
- defer self.deinitUtilityArrayList(stack);
-
- {
- var i = root_node.decls.len;
- while (i != 0) {
- i -= 1;
- const decl = root_node.decls.items[i];
- try stack.append(RenderState {.TopLevelDecl = decl});
- }
- }
-
- const indent_delta = 4;
- var indent: usize = 0;
- while (stack.popOrNull()) |state| {
- switch (state) {
- RenderState.TopLevelDecl => |decl| {
- switch (decl.id) {
- ast.Node.Id.FnProto => {
- const fn_proto = @fieldParentPtr(ast.NodeFnProto, "base", decl);
- if (fn_proto.visib_token) |visib_token| {
- switch (visib_token.id) {
- Token.Id.Keyword_pub => try stream.print("pub "),
- Token.Id.Keyword_export => try stream.print("export "),
- else => unreachable,
- }
- }
- if (fn_proto.extern_token) |extern_token| {
- try stream.print("{} ", self.tokenizer.getTokenSlice(extern_token));
- }
- try stream.print("fn");
-
- if (fn_proto.name_token) |name_token| {
- try stream.print(" {}", self.tokenizer.getTokenSlice(name_token));
- }
-
- try stream.print("(");
-
- try stack.append(RenderState { .Text = "\n" });
- if (fn_proto.body_node == null) {
- try stack.append(RenderState { .Text = ";" });
- }
-
- try stack.append(RenderState { .FnProtoRParen = fn_proto});
- var i = fn_proto.params.len;
- while (i != 0) {
- i -= 1;
- const param_decl_node = fn_proto.params.items[i];
- try stack.append(RenderState { .ParamDecl = param_decl_node});
- if (i != 0) {
- try stack.append(RenderState { .Text = ", " });
- }
- }
- },
- ast.Node.Id.VarDecl => {
- const var_decl = @fieldParentPtr(ast.NodeVarDecl, "base", decl);
- try stack.append(RenderState { .Text = "\n"});
- try stack.append(RenderState { .VarDecl = var_decl});
-
- },
- else => unreachable,
- }
- },
-
- RenderState.VarDecl => |var_decl| {
- if (var_decl.visib_token) |visib_token| {
- try stream.print("{} ", self.tokenizer.getTokenSlice(visib_token));
- }
- if (var_decl.extern_token) |extern_token| {
- try stream.print("{} ", self.tokenizer.getTokenSlice(extern_token));
- if (var_decl.lib_name != null) {
- @panic("TODO");
- }
- }
- if (var_decl.comptime_token) |comptime_token| {
- try stream.print("{} ", self.tokenizer.getTokenSlice(comptime_token));
- }
- try stream.print("{} ", self.tokenizer.getTokenSlice(var_decl.mut_token));
- try stream.print("{}", self.tokenizer.getTokenSlice(var_decl.name_token));
-
- try stack.append(RenderState { .Text = ";" });
- if (var_decl.init_node) |init_node| {
- try stack.append(RenderState { .Expression = init_node });
- try stack.append(RenderState { .Text = " = " });
- }
- if (var_decl.align_node) |align_node| {
- try stack.append(RenderState { .Text = ")" });
- try stack.append(RenderState { .Expression = align_node });
- try stack.append(RenderState { .Text = " align(" });
- }
- if (var_decl.type_node) |type_node| {
- try stream.print(": ");
- try stack.append(RenderState { .Expression = type_node });
- }
- },
-
- RenderState.ParamDecl => |base| {
- const param_decl = @fieldParentPtr(ast.NodeParamDecl, "base", base);
- if (param_decl.comptime_token) |comptime_token| {
- try stream.print("{} ", self.tokenizer.getTokenSlice(comptime_token));
- }
- if (param_decl.noalias_token) |noalias_token| {
- try stream.print("{} ", self.tokenizer.getTokenSlice(noalias_token));
- }
- if (param_decl.name_token) |name_token| {
- try stream.print("{}: ", self.tokenizer.getTokenSlice(name_token));
- }
- if (param_decl.var_args_token) |var_args_token| {
- try stream.print("{}", self.tokenizer.getTokenSlice(var_args_token));
- } else {
- try stack.append(RenderState { .Expression = param_decl.type_node});
- }
- },
- RenderState.Text => |bytes| {
- try stream.write(bytes);
- },
- RenderState.Expression => |base| switch (base.id) {
- ast.Node.Id.Identifier => {
- const identifier = @fieldParentPtr(ast.NodeIdentifier, "base", base);
- try stream.print("{}", self.tokenizer.getTokenSlice(identifier.name_token));
- },
- ast.Node.Id.Block => {
- const block = @fieldParentPtr(ast.NodeBlock, "base", base);
- try stream.write("{");
- try stack.append(RenderState { .Text = "}"});
- try stack.append(RenderState.PrintIndent);
- try stack.append(RenderState { .Indent = indent});
- try stack.append(RenderState { .Text = "\n"});
- var i = block.statements.len;
- while (i != 0) {
- i -= 1;
- const statement_node = block.statements.items[i];
- try stack.append(RenderState { .Statement = statement_node});
- try stack.append(RenderState.PrintIndent);
- try stack.append(RenderState { .Indent = indent + indent_delta});
- try stack.append(RenderState { .Text = "\n" });
- }
- },
- ast.Node.Id.InfixOp => {
- const prefix_op_node = @fieldParentPtr(ast.NodeInfixOp, "base", base);
- try stack.append(RenderState { .Expression = prefix_op_node.rhs });
- switch (prefix_op_node.op) {
- ast.NodeInfixOp.InfixOp.EqualEqual => {
- try stack.append(RenderState { .Text = " == "});
- },
- ast.NodeInfixOp.InfixOp.BangEqual => {
- try stack.append(RenderState { .Text = " != "});
- },
- else => unreachable,
- }
- try stack.append(RenderState { .Expression = prefix_op_node.lhs });
- },
- ast.Node.Id.PrefixOp => {
- const prefix_op_node = @fieldParentPtr(ast.NodePrefixOp, "base", base);
- try stack.append(RenderState { .Expression = prefix_op_node.rhs });
- switch (prefix_op_node.op) {
- ast.NodePrefixOp.PrefixOp.Return => {
- try stream.write("return ");
- },
- ast.NodePrefixOp.PrefixOp.AddrOf => |addr_of_info| {
- try stream.write("&");
- if (addr_of_info.volatile_token != null) {
- try stack.append(RenderState { .Text = "volatile "});
- }
- if (addr_of_info.const_token != null) {
- try stack.append(RenderState { .Text = "const "});
- }
- if (addr_of_info.align_expr) |align_expr| {
- try stream.print("align(");
- try stack.append(RenderState { .Text = ") "});
- try stack.append(RenderState { .Expression = align_expr});
- }
- },
- else => unreachable,
- }
- },
- ast.Node.Id.IntegerLiteral => {
- const integer_literal = @fieldParentPtr(ast.NodeIntegerLiteral, "base", base);
- try stream.print("{}", self.tokenizer.getTokenSlice(integer_literal.token));
- },
- ast.Node.Id.FloatLiteral => {
- const float_literal = @fieldParentPtr(ast.NodeFloatLiteral, "base", base);
- try stream.print("{}", self.tokenizer.getTokenSlice(float_literal.token));
- },
- else => unreachable,
- },
- RenderState.FnProtoRParen => |fn_proto| {
- try stream.print(")");
- if (fn_proto.align_expr != null) {
- @panic("TODO");
- }
- try stream.print(" ");
- if (fn_proto.body_node) |body_node| {
- try stack.append(RenderState { .Expression = body_node});
- try stack.append(RenderState { .Text = " "});
- }
- try stack.append(RenderState { .Expression = fn_proto.return_type});
- },
- RenderState.Statement => |base| {
- switch (base.id) {
- ast.Node.Id.VarDecl => {
- const var_decl = @fieldParentPtr(ast.NodeVarDecl, "base", base);
- try stack.append(RenderState { .VarDecl = var_decl});
- },
- else => {
- try stack.append(RenderState { .Text = ";"});
- try stack.append(RenderState { .Expression = base});
- },
- }
- },
- RenderState.Indent => |new_indent| indent = new_indent,
- RenderState.PrintIndent => try stream.writeByteNTimes(' ', indent),
- }
- }
- }
-
- fn initUtilityArrayList(self: &Parser, comptime T: type) ArrayList(T) {
- const new_byte_count = self.utility_bytes.len - self.utility_bytes.len % @sizeOf(T);
- self.utility_bytes = self.allocator.alignedShrink(u8, utility_bytes_align, self.utility_bytes, new_byte_count);
- const typed_slice = ([]T)(self.utility_bytes);
- return ArrayList(T) {
- .allocator = self.allocator,
- .items = typed_slice,
- .len = 0,
- };
- }
-
- fn deinitUtilityArrayList(self: &Parser, list: var) void {
- self.utility_bytes = ([]align(utility_bytes_align) u8)(list.items);
- }
-
-};
-
-var fixed_buffer_mem: [100 * 1024]u8 = undefined;
-
-fn testParse(source: []const u8, allocator: &mem.Allocator) ![]u8 {
- var padded_source: [0x100]u8 = undefined;
- std.mem.copy(u8, padded_source[0..source.len], source);
- padded_source[source.len + 0] = '\n';
- padded_source[source.len + 1] = '\n';
- padded_source[source.len + 2] = '\n';
-
- var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]);
- var parser = Parser.init(&tokenizer, allocator, "(memory buffer)");
- defer parser.deinit();
-
- const tree = try parser.parse();
- defer tree.deinit();
-
- var buffer = try std.Buffer.initSize(allocator, 0);
- var buffer_out_stream = io.BufferOutStream.init(&buffer);
- try parser.renderSource(&buffer_out_stream.stream, tree.root_node);
- return buffer.toOwnedSlice();
-}
-
-// TODO test for memory leaks
-// TODO test for valid frees
-fn testCanonical(source: []const u8) !void {
- const needed_alloc_count = x: {
- // Try it once with unlimited memory, make sure it works
- var fixed_allocator = mem.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
- var failing_allocator = std.debug.FailingAllocator.init(&fixed_allocator.allocator, @maxValue(usize));
- const result_source = try testParse(source, &failing_allocator.allocator);
- if (!mem.eql(u8, result_source, source)) {
- warn("\n====== expected this output: =========\n");
- warn("{}", source);
- warn("\n======== instead found this: =========\n");
- warn("{}", result_source);
- warn("\n======================================\n");
- return error.TestFailed;
- }
- failing_allocator.allocator.free(result_source);
- break :x failing_allocator.index;
- };
-
- var fail_index: usize = 0;
- while (fail_index < needed_alloc_count) : (fail_index += 1) {
- var fixed_allocator = mem.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
- var failing_allocator = std.debug.FailingAllocator.init(&fixed_allocator.allocator, fail_index);
- if (testParse(source, &failing_allocator.allocator)) |_| {
- return error.NondeterministicMemoryUsage;
- } else |err| {
- assert(err == error.OutOfMemory);
- // TODO make this pass
- //if (failing_allocator.allocated_bytes != failing_allocator.freed_bytes) {
- // warn("\nfail_index: {}/{}\nallocated bytes: {}\nfreed bytes: {}\nallocations: {}\ndeallocations: {}\n",
- // fail_index, needed_alloc_count,
- // failing_allocator.allocated_bytes, failing_allocator.freed_bytes,
- // failing_allocator.index, failing_allocator.deallocations);
- // return error.MemoryLeakDetected;
- //}
- }
- }
-}
-
-test "zig fmt" {
- try testCanonical(
- \\extern fn puts(s: &const u8) c_int;
- \\
- );
-
- try testCanonical(
- \\const a = b;
- \\pub const a = b;
- \\var a = b;
- \\pub var a = b;
- \\const a: i32 = b;
- \\pub const a: i32 = b;
- \\var a: i32 = b;
- \\pub var a: i32 = b;
- \\
- );
-
- try testCanonical(
- \\extern var foo: c_int;
- \\
- );
-
- try testCanonical(
- \\var foo: c_int align(1);
- \\
- );
-
- try testCanonical(
- \\fn main(argc: c_int, argv: &&u8) c_int {
- \\ const a = b;
- \\}
- \\
- );
-
- try testCanonical(
- \\fn foo(argc: c_int, argv: &&u8) c_int {
- \\ return 0;
- \\}
- \\
- );
-
- try testCanonical(
- \\extern fn f1(s: &align(&u8) u8) c_int;
- \\
- );
-
- try testCanonical(
- \\extern fn f1(s: &&align(1) &const &volatile u8) c_int;
- \\extern fn f2(s: &align(1) const &align(1) volatile &const volatile u8) c_int;
- \\extern fn f3(s: &align(1) const volatile u8) c_int;
- \\
- );
-
- try testCanonical(
- \\fn f1(a: bool, b: bool) bool {
- \\ a != b;
- \\ return a == b;
- \\}
- \\
- );
-}
diff --git a/src-self-hosted/tokenizer.zig b/src-self-hosted/tokenizer.zig
@@ -1,659 +0,0 @@
-const std = @import("std");
-const mem = std.mem;
-
-pub const Token = struct {
- id: Id,
- start: usize,
- end: usize,
-
- const KeywordId = struct {
- bytes: []const u8,
- id: Id,
- };
-
- const keywords = []KeywordId {
- KeywordId{.bytes="align", .id = Id.Keyword_align},
- KeywordId{.bytes="and", .id = Id.Keyword_and},
- KeywordId{.bytes="asm", .id = Id.Keyword_asm},
- KeywordId{.bytes="break", .id = Id.Keyword_break},
- KeywordId{.bytes="comptime", .id = Id.Keyword_comptime},
- KeywordId{.bytes="const", .id = Id.Keyword_const},
- KeywordId{.bytes="continue", .id = Id.Keyword_continue},
- KeywordId{.bytes="defer", .id = Id.Keyword_defer},
- KeywordId{.bytes="else", .id = Id.Keyword_else},
- KeywordId{.bytes="enum", .id = Id.Keyword_enum},
- KeywordId{.bytes="error", .id = Id.Keyword_error},
- KeywordId{.bytes="export", .id = Id.Keyword_export},
- KeywordId{.bytes="extern", .id = Id.Keyword_extern},
- KeywordId{.bytes="false", .id = Id.Keyword_false},
- KeywordId{.bytes="fn", .id = Id.Keyword_fn},
- KeywordId{.bytes="for", .id = Id.Keyword_for},
- KeywordId{.bytes="goto", .id = Id.Keyword_goto},
- KeywordId{.bytes="if", .id = Id.Keyword_if},
- KeywordId{.bytes="inline", .id = Id.Keyword_inline},
- KeywordId{.bytes="nakedcc", .id = Id.Keyword_nakedcc},
- KeywordId{.bytes="noalias", .id = Id.Keyword_noalias},
- KeywordId{.bytes="null", .id = Id.Keyword_null},
- KeywordId{.bytes="or", .id = Id.Keyword_or},
- KeywordId{.bytes="packed", .id = Id.Keyword_packed},
- KeywordId{.bytes="pub", .id = Id.Keyword_pub},
- KeywordId{.bytes="return", .id = Id.Keyword_return},
- KeywordId{.bytes="stdcallcc", .id = Id.Keyword_stdcallcc},
- KeywordId{.bytes="struct", .id = Id.Keyword_struct},
- KeywordId{.bytes="switch", .id = Id.Keyword_switch},
- KeywordId{.bytes="test", .id = Id.Keyword_test},
- KeywordId{.bytes="this", .id = Id.Keyword_this},
- KeywordId{.bytes="true", .id = Id.Keyword_true},
- KeywordId{.bytes="undefined", .id = Id.Keyword_undefined},
- KeywordId{.bytes="union", .id = Id.Keyword_union},
- KeywordId{.bytes="unreachable", .id = Id.Keyword_unreachable},
- KeywordId{.bytes="use", .id = Id.Keyword_use},
- KeywordId{.bytes="var", .id = Id.Keyword_var},
- KeywordId{.bytes="volatile", .id = Id.Keyword_volatile},
- KeywordId{.bytes="while", .id = Id.Keyword_while},
- };
-
- fn getKeyword(bytes: []const u8) ?Id {
- for (keywords) |kw| {
- if (mem.eql(u8, kw.bytes, bytes)) {
- return kw.id;
- }
- }
- return null;
- }
-
- const StrLitKind = enum {Normal, C};
-
- pub const Id = union(enum) {
- Invalid,
- Identifier,
- StringLiteral: StrLitKind,
- Eof,
- Builtin,
- Bang,
- Equal,
- EqualEqual,
- BangEqual,
- LParen,
- RParen,
- Semicolon,
- Percent,
- LBrace,
- RBrace,
- Period,
- Ellipsis2,
- Ellipsis3,
- Minus,
- Arrow,
- Colon,
- Slash,
- Comma,
- Ampersand,
- AmpersandEqual,
- IntegerLiteral,
- FloatLiteral,
- Keyword_align,
- Keyword_and,
- Keyword_asm,
- Keyword_break,
- Keyword_comptime,
- Keyword_const,
- Keyword_continue,
- Keyword_defer,
- Keyword_else,
- Keyword_enum,
- Keyword_error,
- Keyword_export,
- Keyword_extern,
- Keyword_false,
- Keyword_fn,
- Keyword_for,
- Keyword_goto,
- Keyword_if,
- Keyword_inline,
- Keyword_nakedcc,
- Keyword_noalias,
- Keyword_null,
- Keyword_or,
- Keyword_packed,
- Keyword_pub,
- Keyword_return,
- Keyword_stdcallcc,
- Keyword_struct,
- Keyword_switch,
- Keyword_test,
- Keyword_this,
- Keyword_true,
- Keyword_undefined,
- Keyword_union,
- Keyword_unreachable,
- Keyword_use,
- Keyword_var,
- Keyword_volatile,
- Keyword_while,
- };
-};
-
-pub const Tokenizer = struct {
- buffer: []const u8,
- index: usize,
- pending_invalid_token: ?Token,
-
- pub const Location = struct {
- line: usize,
- column: usize,
- line_start: usize,
- line_end: usize,
- };
-
- pub fn getTokenLocation(self: &Tokenizer, token: &const Token) Location {
- var loc = Location {
- .line = 0,
- .column = 0,
- .line_start = 0,
- .line_end = 0,
- };
- for (self.buffer) |c, i| {
- if (i == token.start) {
- loc.line_end = i;
- while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
- return loc;
- }
- if (c == '\n') {
- loc.line += 1;
- loc.column = 0;
- loc.line_start = i + 1;
- } else {
- loc.column += 1;
- }
- }
- return loc;
- }
-
- /// For debugging purposes
- pub fn dump(self: &Tokenizer, token: &const Token) void {
- std.debug.warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
- }
-
- /// buffer must end with "\n\n\n". This is so that attempting to decode
- /// a the 3 trailing bytes of a 4-byte utf8 sequence is never a buffer overflow.
- pub fn init(buffer: []const u8) Tokenizer {
- std.debug.assert(buffer[buffer.len - 1] == '\n');
- std.debug.assert(buffer[buffer.len - 2] == '\n');
- std.debug.assert(buffer[buffer.len - 3] == '\n');
- return Tokenizer {
- .buffer = buffer,
- .index = 0,
- .pending_invalid_token = null,
- };
- }
-
- const State = enum {
- Start,
- Identifier,
- Builtin,
- C,
- StringLiteral,
- StringLiteralBackslash,
- Equal,
- Bang,
- Minus,
- Slash,
- LineComment,
- Zero,
- IntegerLiteral,
- IntegerLiteralWithRadix,
- NumberDot,
- FloatFraction,
- FloatExponentUnsigned,
- FloatExponentNumber,
- Ampersand,
- Period,
- Period2,
- };
-
- pub fn next(self: &Tokenizer) Token {
- if (self.pending_invalid_token) |token| {
- self.pending_invalid_token = null;
- return token;
- }
- var state = State.Start;
- var result = Token {
- .id = Token.Id.Eof,
- .start = self.index,
- .end = undefined,
- };
- while (self.index < self.buffer.len) : (self.index += 1) {
- const c = self.buffer[self.index];
- switch (state) {
- State.Start => switch (c) {
- ' ', '\n' => {
- result.start = self.index + 1;
- },
- 'c' => {
- state = State.C;
- result.id = Token.Id.Identifier;
- },
- '"' => {
- state = State.StringLiteral;
- result.id = Token.Id { .StringLiteral = Token.StrLitKind.Normal };
- },
- 'a'...'b', 'd'...'z', 'A'...'Z', '_' => {
- state = State.Identifier;
- result.id = Token.Id.Identifier;
- },
- '@' => {
- state = State.Builtin;
- result.id = Token.Id.Builtin;
- },
- '=' => {
- state = State.Equal;
- },
- '!' => {
- state = State.Bang;
- },
- '(' => {
- result.id = Token.Id.LParen;
- self.index += 1;
- break;
- },
- ')' => {
- result.id = Token.Id.RParen;
- self.index += 1;
- break;
- },
- ';' => {
- result.id = Token.Id.Semicolon;
- self.index += 1;
- break;
- },
- ',' => {
- result.id = Token.Id.Comma;
- self.index += 1;
- break;
- },
- ':' => {
- result.id = Token.Id.Colon;
- self.index += 1;
- break;
- },
- '%' => {
- result.id = Token.Id.Percent;
- self.index += 1;
- break;
- },
- '{' => {
- result.id = Token.Id.LBrace;
- self.index += 1;
- break;
- },
- '}' => {
- result.id = Token.Id.RBrace;
- self.index += 1;
- break;
- },
- '.' => {
- state = State.Period;
- },
- '-' => {
- state = State.Minus;
- },
- '/' => {
- state = State.Slash;
- },
- '&' => {
- state = State.Ampersand;
- },
- '0' => {
- state = State.Zero;
- result.id = Token.Id.IntegerLiteral;
- },
- '1'...'9' => {
- state = State.IntegerLiteral;
- result.id = Token.Id.IntegerLiteral;
- },
- else => {
- result.id = Token.Id.Invalid;
- self.index += 1;
- break;
- },
- },
- State.Ampersand => switch (c) {
- '=' => {
- result.id = Token.Id.AmpersandEqual;
- self.index += 1;
- break;
- },
- else => {
- result.id = Token.Id.Ampersand;
- break;
- },
- },
- State.Identifier => switch (c) {
- 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
- else => {
- if (Token.getKeyword(self.buffer[result.start..self.index])) |id| {
- result.id = id;
- }
- break;
- },
- },
- State.Builtin => switch (c) {
- 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
- else => break,
- },
- State.C => switch (c) {
- '\\' => @panic("TODO"),
- '"' => {
- state = State.StringLiteral;
- result.id = Token.Id { .StringLiteral = Token.StrLitKind.C };
- },
- 'a'...'z', 'A'...'Z', '_', '0'...'9' => {
- state = State.Identifier;
- },
- else => break,
- },
- State.StringLiteral => switch (c) {
- '\\' => {
- state = State.StringLiteralBackslash;
- },
- '"' => {
- self.index += 1;
- break;
- },
- '\n' => break, // Look for this error later.
- else => self.checkLiteralCharacter(),
- },
-
- State.StringLiteralBackslash => switch (c) {
- '\n' => break, // Look for this error later.
- else => {
- state = State.StringLiteral;
- },
- },
-
- State.Bang => switch (c) {
- '=' => {
- result.id = Token.Id.BangEqual;
- self.index += 1;
- break;
- },
- else => {
- result.id = Token.Id.Bang;
- break;
- },
- },
-
- State.Equal => switch (c) {
- '=' => {
- result.id = Token.Id.EqualEqual;
- self.index += 1;
- break;
- },
- else => {
- result.id = Token.Id.Equal;
- break;
- },
- },
-
- State.Minus => switch (c) {
- '>' => {
- result.id = Token.Id.Arrow;
- self.index += 1;
- break;
- },
- else => {
- result.id = Token.Id.Minus;
- break;
- },
- },
-
- State.Period => switch (c) {
- '.' => {
- state = State.Period2;
- },
- else => {
- result.id = Token.Id.Period;
- break;
- },
- },
-
- State.Period2 => switch (c) {
- '.' => {
- result.id = Token.Id.Ellipsis3;
- self.index += 1;
- break;
- },
- else => {
- result.id = Token.Id.Ellipsis2;
- break;
- },
- },
-
- State.Slash => switch (c) {
- '/' => {
- result.id = undefined;
- state = State.LineComment;
- },
- else => {
- result.id = Token.Id.Slash;
- break;
- },
- },
- State.LineComment => switch (c) {
- '\n' => {
- state = State.Start;
- result = Token {
- .id = Token.Id.Eof,
- .start = self.index + 1,
- .end = undefined,
- };
- },
- else => self.checkLiteralCharacter(),
- },
- State.Zero => switch (c) {
- 'b', 'o', 'x' => {
- state = State.IntegerLiteralWithRadix;
- },
- else => {
- // reinterpret as a normal number
- self.index -= 1;
- state = State.IntegerLiteral;
- },
- },
- State.IntegerLiteral => switch (c) {
- '.' => {
- state = State.NumberDot;
- },
- 'p', 'P', 'e', 'E' => {
- state = State.FloatExponentUnsigned;
- },
- '0'...'9' => {},
- else => break,
- },
- State.IntegerLiteralWithRadix => switch (c) {
- '.' => {
- state = State.NumberDot;
- },
- 'p', 'P' => {
- state = State.FloatExponentUnsigned;
- },
- '0'...'9', 'a'...'f', 'A'...'F' => {},
- else => break,
- },
- State.NumberDot => switch (c) {
- '.' => {
- self.index -= 1;
- state = State.Start;
- break;
- },
- else => {
- self.index -= 1;
- result.id = Token.Id.FloatLiteral;
- state = State.FloatFraction;
- },
- },
- State.FloatFraction => switch (c) {
- 'p', 'P' => {
- state = State.FloatExponentUnsigned;
- },
- '0'...'9', 'a'...'f', 'A'...'F' => {},
- else => break,
- },
- State.FloatExponentUnsigned => switch (c) {
- '+', '-' => {
- state = State.FloatExponentNumber;
- },
- else => {
- // reinterpret as a normal exponent number
- self.index -= 1;
- state = State.FloatExponentNumber;
- }
- },
- State.FloatExponentNumber => switch (c) {
- '0'...'9', 'a'...'f', 'A'...'F' => {},
- else => break,
- },
- }
- }
- result.end = self.index;
-
- if (result.id == Token.Id.Eof) {
- if (self.pending_invalid_token) |token| {
- self.pending_invalid_token = null;
- return token;
- }
- }
-
- return result;
- }
-
- pub fn getTokenSlice(self: &const Tokenizer, token: &const Token) []const u8 {
- return self.buffer[token.start..token.end];
- }
-
- fn checkLiteralCharacter(self: &Tokenizer) void {
- if (self.pending_invalid_token != null) return;
- const invalid_length = self.getInvalidCharacterLength();
- if (invalid_length == 0) return;
- self.pending_invalid_token = Token {
- .id = Token.Id.Invalid,
- .start = self.index,
- .end = self.index + invalid_length,
- };
- }
-
- fn getInvalidCharacterLength(self: &Tokenizer) u3 {
- const c0 = self.buffer[self.index];
- if (c0 < 0x80) {
- if (c0 < 0x20 or c0 == 0x7f) {
- // ascii control codes are never allowed
- // (note that \n was checked before we got here)
- return 1;
- }
- // looks fine to me.
- return 0;
- } else {
- // check utf8-encoded character.
- const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
- // the last 3 bytes in the buffer are guaranteed to be '\n',
- // which means we don't need to do any bounds checking here.
- const bytes = self.buffer[self.index..self.index + length];
- switch (length) {
- 2 => {
- const value = std.unicode.utf8Decode2(bytes) catch return length;
- if (value == 0x85) return length; // U+0085 (NEL)
- },
- 3 => {
- const value = std.unicode.utf8Decode3(bytes) catch return length;
- if (value == 0x2028) return length; // U+2028 (LS)
- if (value == 0x2029) return length; // U+2029 (PS)
- },
- 4 => {
- _ = std.unicode.utf8Decode4(bytes) catch return length;
- },
- else => unreachable,
- }
- self.index += length - 1;
- return 0;
- }
- }
-};
-
-
-
-test "tokenizer" {
- testTokenize("test", []Token.Id {
- Token.Id.Keyword_test,
- });
-}
-
-test "tokenizer - invalid token characters" {
- testTokenize("#", []Token.Id{Token.Id.Invalid});
- testTokenize("`", []Token.Id{Token.Id.Invalid});
-}
-
-test "tokenizer - invalid literal/comment characters" {
- testTokenize("\"\x00\"", []Token.Id {
- Token.Id { .StringLiteral = Token.StrLitKind.Normal },
- Token.Id.Invalid,
- });
- testTokenize("//\x00", []Token.Id {
- Token.Id.Invalid,
- });
- testTokenize("//\x1f", []Token.Id {
- Token.Id.Invalid,
- });
- testTokenize("//\x7f", []Token.Id {
- Token.Id.Invalid,
- });
-}
-
-test "tokenizer - utf8" {
- testTokenize("//\xc2\x80", []Token.Id{});
- testTokenize("//\xf4\x8f\xbf\xbf", []Token.Id{});
-}
-
-test "tokenizer - invalid utf8" {
- testTokenize("//\x80", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xbf", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xf8", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xff", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xc2\xc0", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xe0", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xf0", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xf0\x90\x80\xc0", []Token.Id{Token.Id.Invalid});
-}
-
-test "tokenizer - illegal unicode codepoints" {
- // unicode newline characters.U+0085, U+2028, U+2029
- testTokenize("//\xc2\x84", []Token.Id{});
- testTokenize("//\xc2\x85", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xc2\x86", []Token.Id{});
- testTokenize("//\xe2\x80\xa7", []Token.Id{});
- testTokenize("//\xe2\x80\xa8", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xe2\x80\xa9", []Token.Id{Token.Id.Invalid});
- testTokenize("//\xe2\x80\xaa", []Token.Id{});
-}
-
-fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
- // (test authors, just make this bigger if you need it)
- var padded_source: [0x100]u8 = undefined;
- std.mem.copy(u8, padded_source[0..source.len], source);
- padded_source[source.len + 0] = '\n';
- padded_source[source.len + 1] = '\n';
- padded_source[source.len + 2] = '\n';
-
- var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]);
- for (expected_tokens) |expected_token_id| {
- const token = tokenizer.next();
- std.debug.assert(@TagType(Token.Id)(token.id) == @TagType(Token.Id)(expected_token_id));
- switch (expected_token_id) {
- Token.Id.StringLiteral => |expected_kind| {
- std.debug.assert(expected_kind == switch (token.id) { Token.Id.StringLiteral => |kind| kind, else => unreachable });
- },
- else => {},
- }
- }
- std.debug.assert(tokenizer.next().id == Token.Id.Eof);
-}
diff --git a/std/index.zig b/std/index.zig
@@ -28,6 +28,7 @@ pub const os = @import("os/index.zig");
pub const rand = @import("rand.zig");
pub const sort = @import("sort.zig");
pub const unicode = @import("unicode.zig");
+pub const zig = @import("zig/index.zig");
test "std" {
// run tests from these
@@ -58,4 +59,5 @@ test "std" {
_ = @import("rand.zig");
_ = @import("sort.zig");
_ = @import("unicode.zig");
+ _ = @import("zig/index.zig");
}
diff --git a/std/zig/ast.zig b/std/zig/ast.zig
@@ -0,0 +1,271 @@
+const std = @import("../index.zig");
+const assert = std.debug.assert;
+const ArrayList = std.ArrayList;
+const Token = std.zig.Token;
+const mem = std.mem;
+
+pub const Node = struct {
+ id: Id,
+
+ pub const Id = enum {
+ Root,
+ VarDecl,
+ Identifier,
+ FnProto,
+ ParamDecl,
+ Block,
+ InfixOp,
+ PrefixOp,
+ IntegerLiteral,
+ FloatLiteral,
+ };
+
+ pub fn iterate(base: &Node, index: usize) ?&Node {
+ return switch (base.id) {
+ Id.Root => @fieldParentPtr(NodeRoot, "base", base).iterate(index),
+ Id.VarDecl => @fieldParentPtr(NodeVarDecl, "base", base).iterate(index),
+ Id.Identifier => @fieldParentPtr(NodeIdentifier, "base", base).iterate(index),
+ Id.FnProto => @fieldParentPtr(NodeFnProto, "base", base).iterate(index),
+ Id.ParamDecl => @fieldParentPtr(NodeParamDecl, "base", base).iterate(index),
+ Id.Block => @fieldParentPtr(NodeBlock, "base", base).iterate(index),
+ Id.InfixOp => @fieldParentPtr(NodeInfixOp, "base", base).iterate(index),
+ Id.PrefixOp => @fieldParentPtr(NodePrefixOp, "base", base).iterate(index),
+ Id.IntegerLiteral => @fieldParentPtr(NodeIntegerLiteral, "base", base).iterate(index),
+ Id.FloatLiteral => @fieldParentPtr(NodeFloatLiteral, "base", base).iterate(index),
+ };
+ }
+
+ pub fn destroy(base: &Node, allocator: &mem.Allocator) void {
+ return switch (base.id) {
+ Id.Root => allocator.destroy(@fieldParentPtr(NodeRoot, "base", base)),
+ Id.VarDecl => allocator.destroy(@fieldParentPtr(NodeVarDecl, "base", base)),
+ Id.Identifier => allocator.destroy(@fieldParentPtr(NodeIdentifier, "base", base)),
+ Id.FnProto => allocator.destroy(@fieldParentPtr(NodeFnProto, "base", base)),
+ Id.ParamDecl => allocator.destroy(@fieldParentPtr(NodeParamDecl, "base", base)),
+ Id.Block => allocator.destroy(@fieldParentPtr(NodeBlock, "base", base)),
+ Id.InfixOp => allocator.destroy(@fieldParentPtr(NodeInfixOp, "base", base)),
+ Id.PrefixOp => allocator.destroy(@fieldParentPtr(NodePrefixOp, "base", base)),
+ Id.IntegerLiteral => allocator.destroy(@fieldParentPtr(NodeIntegerLiteral, "base", base)),
+ Id.FloatLiteral => allocator.destroy(@fieldParentPtr(NodeFloatLiteral, "base", base)),
+ };
+ }
+};
+
+pub const NodeRoot = struct {
+ base: Node,
+ decls: ArrayList(&Node),
+
+ pub fn iterate(self: &NodeRoot, index: usize) ?&Node {
+ if (index < self.decls.len) {
+ return self.decls.items[self.decls.len - index - 1];
+ }
+ return null;
+ }
+};
+
+pub const NodeVarDecl = struct {
+ base: Node,
+ visib_token: ?Token,
+ name_token: Token,
+ eq_token: Token,
+ mut_token: Token,
+ comptime_token: ?Token,
+ extern_token: ?Token,
+ lib_name: ?&Node,
+ type_node: ?&Node,
+ align_node: ?&Node,
+ init_node: ?&Node,
+
+ pub fn iterate(self: &NodeVarDecl, index: usize) ?&Node {
+ var i = index;
+
+ if (self.type_node) |type_node| {
+ if (i < 1) return type_node;
+ i -= 1;
+ }
+
+ if (self.align_node) |align_node| {
+ if (i < 1) return align_node;
+ i -= 1;
+ }
+
+ if (self.init_node) |init_node| {
+ if (i < 1) return init_node;
+ i -= 1;
+ }
+
+ return null;
+ }
+};
+
+pub const NodeIdentifier = struct {
+ base: Node,
+ name_token: Token,
+
+ pub fn iterate(self: &NodeIdentifier, index: usize) ?&Node {
+ return null;
+ }
+};
+
+pub const NodeFnProto = struct {
+ base: Node,
+ visib_token: ?Token,
+ fn_token: Token,
+ name_token: ?Token,
+ params: ArrayList(&Node),
+ return_type: &Node,
+ var_args_token: ?Token,
+ extern_token: ?Token,
+ inline_token: ?Token,
+ cc_token: ?Token,
+ body_node: ?&Node,
+ lib_name: ?&Node, // populated if this is an extern declaration
+ align_expr: ?&Node, // populated if align(A) is present
+
+ pub fn iterate(self: &NodeFnProto, index: usize) ?&Node {
+ var i = index;
+
+ if (self.body_node) |body_node| {
+ if (i < 1) return body_node;
+ i -= 1;
+ }
+
+ if (i < 1) return self.return_type;
+ i -= 1;
+
+ if (self.align_expr) |align_expr| {
+ if (i < 1) return align_expr;
+ i -= 1;
+ }
+
+ if (i < self.params.len) return self.params.items[self.params.len - i - 1];
+ i -= self.params.len;
+
+ if (self.lib_name) |lib_name| {
+ if (i < 1) return lib_name;
+ i -= 1;
+ }
+
+ return null;
+ }
+};
+
+pub const NodeParamDecl = struct {
+ base: Node,
+ comptime_token: ?Token,
+ noalias_token: ?Token,
+ name_token: ?Token,
+ type_node: &Node,
+ var_args_token: ?Token,
+
+ pub fn iterate(self: &NodeParamDecl, index: usize) ?&Node {
+ var i = index;
+
+ if (i < 1) return self.type_node;
+ i -= 1;
+
+ return null;
+ }
+};
+
+pub const NodeBlock = struct {
+ base: Node,
+ begin_token: Token,
+ end_token: Token,
+ statements: ArrayList(&Node),
+
+ pub fn iterate(self: &NodeBlock, index: usize) ?&Node {
+ var i = index;
+
+ if (i < self.statements.len) return self.statements.items[i];
+ i -= self.statements.len;
+
+ return null;
+ }
+};
+
+pub const NodeInfixOp = struct {
+ base: Node,
+ op_token: Token,
+ lhs: &Node,
+ op: InfixOp,
+ rhs: &Node,
+
+ const InfixOp = enum {
+ EqualEqual,
+ BangEqual,
+ };
+
+ pub fn iterate(self: &NodeInfixOp, index: usize) ?&Node {
+ var i = index;
+
+ if (i < 1) return self.lhs;
+ i -= 1;
+
+ switch (self.op) {
+ InfixOp.EqualEqual => {},
+ InfixOp.BangEqual => {},
+ }
+
+ if (i < 1) return self.rhs;
+ i -= 1;
+
+ return null;
+ }
+};
+
+pub const NodePrefixOp = struct {
+ base: Node,
+ op_token: Token,
+ op: PrefixOp,
+ rhs: &Node,
+
+ const PrefixOp = union(enum) {
+ Return,
+ AddrOf: AddrOfInfo,
+ };
+ const AddrOfInfo = struct {
+ align_expr: ?&Node,
+ bit_offset_start_token: ?Token,
+ bit_offset_end_token: ?Token,
+ const_token: ?Token,
+ volatile_token: ?Token,
+ };
+
+ pub fn iterate(self: &NodePrefixOp, index: usize) ?&Node {
+ var i = index;
+
+ switch (self.op) {
+ PrefixOp.Return => {},
+ PrefixOp.AddrOf => |addr_of_info| {
+ if (addr_of_info.align_expr) |align_expr| {
+ if (i < 1) return align_expr;
+ i -= 1;
+ }
+ },
+ }
+
+ if (i < 1) return self.rhs;
+ i -= 1;
+
+ return null;
+ }
+};
+
+pub const NodeIntegerLiteral = struct {
+ base: Node,
+ token: Token,
+
+ pub fn iterate(self: &NodeIntegerLiteral, index: usize) ?&Node {
+ return null;
+ }
+};
+
+pub const NodeFloatLiteral = struct {
+ base: Node,
+ token: Token,
+
+ pub fn iterate(self: &NodeFloatLiteral, index: usize) ?&Node {
+ return null;
+ }
+};
diff --git a/std/zig/index.zig b/std/zig/index.zig
@@ -0,0 +1,11 @@
+const tokenizer = @import("tokenizer.zig");
+pub const Token = tokenizer.Token;
+pub const Tokenizer = tokenizer.Tokenizer;
+pub const Parser = @import("parser.zig").Parser;
+pub const ast = @import("ast.zig");
+
+test "std.zig tests" {
+ _ = @import("tokenizer.zig");
+ _ = @import("parser.zig");
+ _ = @import("ast.zig");
+}
diff --git a/std/zig/parser.zig b/std/zig/parser.zig
@@ -0,0 +1,1160 @@
+const std = @import("../index.zig");
+const assert = std.debug.assert;
+const ArrayList = std.ArrayList;
+const mem = std.mem;
+const ast = std.zig.ast;
+const Tokenizer = std.zig.Tokenizer;
+const Token = std.zig.Token;
+const builtin = @import("builtin");
+const io = std.io;
+
+// TODO when we make parse errors into error types instead of printing directly,
+// get rid of this
+const warn = std.debug.warn;
+
+pub const Parser = struct {
+ allocator: &mem.Allocator,
+ tokenizer: &Tokenizer,
+ put_back_tokens: [2]Token,
+ put_back_count: usize,
+ source_file_name: []const u8,
+
+ pub const Tree = struct {
+ root_node: &ast.NodeRoot,
+
+ pub fn deinit(self: &const Tree) void {
+ // TODO free the whole arena
+ }
+ };
+
+ // This memory contents are used only during a function call. It's used to repurpose memory;
+ // we reuse the same bytes for the stack data structure used by parsing, tree rendering, and
+ // source rendering.
+ const utility_bytes_align = @alignOf( union { a: RenderAstFrame, b: State, c: RenderState } );
+ utility_bytes: []align(utility_bytes_align) u8,
+
+ /// `allocator` should be an arena allocator. Parser never calls free on anything. After you're
+ /// done with a Parser, free the arena. After the arena is freed, no member functions of Parser
+ /// may be called.
+ pub fn init(tokenizer: &Tokenizer, allocator: &mem.Allocator, source_file_name: []const u8) Parser {
+ return Parser {
+ .allocator = allocator,
+ .tokenizer = tokenizer,
+ .put_back_tokens = undefined,
+ .put_back_count = 0,
+ .source_file_name = source_file_name,
+ .utility_bytes = []align(utility_bytes_align) u8{},
+ };
+ }
+
+ pub fn deinit(self: &Parser) void {
+ self.allocator.free(self.utility_bytes);
+ }
+
+ const TopLevelDeclCtx = struct {
+ visib_token: ?Token,
+ extern_token: ?Token,
+ };
+
+ const DestPtr = union(enum) {
+ Field: &&ast.Node,
+ NullableField: &?&ast.Node,
+ List: &ArrayList(&ast.Node),
+
+ pub fn store(self: &const DestPtr, value: &ast.Node) !void {
+ switch (*self) {
+ DestPtr.Field => |ptr| *ptr = value,
+ DestPtr.NullableField => |ptr| *ptr = value,
+ DestPtr.List => |list| try list.append(value),
+ }
+ }
+ };
+
+ const State = union(enum) {
+ TopLevel,
+ TopLevelExtern: ?Token,
+ TopLevelDecl: TopLevelDeclCtx,
+ Expression: DestPtr,
+ ExpectOperand,
+ Operand: &ast.Node,
+ AfterOperand,
+ InfixOp: &ast.NodeInfixOp,
+ PrefixOp: &ast.NodePrefixOp,
+ AddrOfModifiers: &ast.NodePrefixOp.AddrOfInfo,
+ TypeExpr: DestPtr,
+ VarDecl: &ast.NodeVarDecl,
+ VarDeclAlign: &ast.NodeVarDecl,
+ VarDeclEq: &ast.NodeVarDecl,
+ ExpectToken: @TagType(Token.Id),
+ FnProto: &ast.NodeFnProto,
+ FnProtoAlign: &ast.NodeFnProto,
+ ParamDecl: &ast.NodeFnProto,
+ ParamDeclComma,
+ FnDef: &ast.NodeFnProto,
+ Block: &ast.NodeBlock,
+ Statement: &ast.NodeBlock,
+ };
+
+ /// Returns an AST tree, allocated with the parser's allocator.
+ /// Result should be freed with `freeAst` when done.
+ pub fn parse(self: &Parser) !Tree {
+ var stack = self.initUtilityArrayList(State);
+ defer self.deinitUtilityArrayList(stack);
+
+ const root_node = try self.createRoot();
+ // TODO errdefer arena free root node
+
+ try stack.append(State.TopLevel);
+
+ while (true) {
+ //{
+ // const token = self.getNextToken();
+ // warn("{} ", @tagName(token.id));
+ // self.putBackToken(token);
+ // var i: usize = stack.len;
+ // while (i != 0) {
+ // i -= 1;
+ // warn("{} ", @tagName(stack.items[i]));
+ // }
+ // warn("\n");
+ //}
+
+ // This gives us 1 free append that can't fail
+ const state = stack.pop();
+
+ switch (state) {
+ State.TopLevel => {
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.Keyword_pub, Token.Id.Keyword_export => {
+ stack.append(State { .TopLevelExtern = token }) catch unreachable;
+ continue;
+ },
+ Token.Id.Eof => return Tree {.root_node = root_node},
+ else => {
+ self.putBackToken(token);
+ // TODO shouldn't need this cast
+ stack.append(State { .TopLevelExtern = null }) catch unreachable;
+ continue;
+ },
+ }
+ },
+ State.TopLevelExtern => |visib_token| {
+ const token = self.getNextToken();
+ if (token.id == Token.Id.Keyword_extern) {
+ stack.append(State {
+ .TopLevelDecl = TopLevelDeclCtx {
+ .visib_token = visib_token,
+ .extern_token = token,
+ },
+ }) catch unreachable;
+ continue;
+ }
+ self.putBackToken(token);
+ stack.append(State {
+ .TopLevelDecl = TopLevelDeclCtx {
+ .visib_token = visib_token,
+ .extern_token = null,
+ },
+ }) catch unreachable;
+ continue;
+ },
+ State.TopLevelDecl => |ctx| {
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.Keyword_var, Token.Id.Keyword_const => {
+ stack.append(State.TopLevel) catch unreachable;
+ // TODO shouldn't need these casts
+ const var_decl_node = try self.createAttachVarDecl(&root_node.decls, ctx.visib_token,
+ token, (?Token)(null), ctx.extern_token);
+ try stack.append(State { .VarDecl = var_decl_node });
+ continue;
+ },
+ Token.Id.Keyword_fn => {
+ stack.append(State.TopLevel) catch unreachable;
+ // TODO shouldn't need these casts
+ const fn_proto = try self.createAttachFnProto(&root_node.decls, token,
+ ctx.extern_token, (?Token)(null), (?Token)(null), (?Token)(null));
+ try stack.append(State { .FnDef = fn_proto });
+ try stack.append(State { .FnProto = fn_proto });
+ continue;
+ },
+ Token.Id.StringLiteral => {
+ @panic("TODO extern with string literal");
+ },
+ Token.Id.Keyword_nakedcc, Token.Id.Keyword_stdcallcc => {
+ stack.append(State.TopLevel) catch unreachable;
+ const fn_token = try self.eatToken(Token.Id.Keyword_fn);
+ // TODO shouldn't need this cast
+ const fn_proto = try self.createAttachFnProto(&root_node.decls, fn_token,
+ ctx.extern_token, (?Token)(token), (?Token)(null), (?Token)(null));
+ try stack.append(State { .FnDef = fn_proto });
+ try stack.append(State { .FnProto = fn_proto });
+ continue;
+ },
+ else => return self.parseError(token, "expected variable declaration or function, found {}", @tagName(token.id)),
+ }
+ },
+ State.VarDecl => |var_decl| {
+ var_decl.name_token = try self.eatToken(Token.Id.Identifier);
+ stack.append(State { .VarDeclAlign = var_decl }) catch unreachable;
+
+ const next_token = self.getNextToken();
+ if (next_token.id == Token.Id.Colon) {
+ try stack.append(State { .TypeExpr = DestPtr {.NullableField = &var_decl.type_node} });
+ continue;
+ }
+
+ self.putBackToken(next_token);
+ continue;
+ },
+ State.VarDeclAlign => |var_decl| {
+ stack.append(State { .VarDeclEq = var_decl }) catch unreachable;
+
+ const next_token = self.getNextToken();
+ if (next_token.id == Token.Id.Keyword_align) {
+ _ = try self.eatToken(Token.Id.LParen);
+ try stack.append(State { .ExpectToken = Token.Id.RParen });
+ try stack.append(State { .Expression = DestPtr{.NullableField = &var_decl.align_node} });
+ continue;
+ }
+
+ self.putBackToken(next_token);
+ continue;
+ },
+ State.VarDeclEq => |var_decl| {
+ const token = self.getNextToken();
+ if (token.id == Token.Id.Equal) {
+ var_decl.eq_token = token;
+ stack.append(State { .ExpectToken = Token.Id.Semicolon }) catch unreachable;
+ try stack.append(State {
+ .Expression = DestPtr {.NullableField = &var_decl.init_node},
+ });
+ continue;
+ }
+ if (token.id == Token.Id.Semicolon) {
+ continue;
+ }
+ return self.parseError(token, "expected '=' or ';', found {}", @tagName(token.id));
+ },
+ State.ExpectToken => |token_id| {
+ _ = try self.eatToken(token_id);
+ continue;
+ },
+
+ State.Expression => |dest_ptr| {
+ // save the dest_ptr for later
+ stack.append(state) catch unreachable;
+ try stack.append(State.ExpectOperand);
+ continue;
+ },
+ State.ExpectOperand => {
+ // we'll either get an operand (like 1 or x),
+ // or a prefix operator (like ~ or return).
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.Keyword_return => {
+ try stack.append(State { .PrefixOp = try self.createPrefixOp(token,
+ ast.NodePrefixOp.PrefixOp.Return) });
+ try stack.append(State.ExpectOperand);
+ continue;
+ },
+ Token.Id.Ampersand => {
+ const prefix_op = try self.createPrefixOp(token, ast.NodePrefixOp.PrefixOp{
+ .AddrOf = ast.NodePrefixOp.AddrOfInfo {
+ .align_expr = null,
+ .bit_offset_start_token = null,
+ .bit_offset_end_token = null,
+ .const_token = null,
+ .volatile_token = null,
+ }
+ });
+ try stack.append(State { .PrefixOp = prefix_op });
+ try stack.append(State.ExpectOperand);
+ try stack.append(State { .AddrOfModifiers = &prefix_op.op.AddrOf });
+ continue;
+ },
+ Token.Id.Identifier => {
+ try stack.append(State {
+ .Operand = &(try self.createIdentifier(token)).base
+ });
+ try stack.append(State.AfterOperand);
+ continue;
+ },
+ Token.Id.IntegerLiteral => {
+ try stack.append(State {
+ .Operand = &(try self.createIntegerLiteral(token)).base
+ });
+ try stack.append(State.AfterOperand);
+ continue;
+ },
+ Token.Id.FloatLiteral => {
+ try stack.append(State {
+ .Operand = &(try self.createFloatLiteral(token)).base
+ });
+ try stack.append(State.AfterOperand);
+ continue;
+ },
+ else => return self.parseError(token, "expected primary expression, found {}", @tagName(token.id)),
+ }
+ },
+
+ State.AfterOperand => {
+ // we'll either get an infix operator (like != or ^),
+ // or a postfix operator (like () or {}),
+ // otherwise this expression is done (like on a ; or else).
+ var token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.EqualEqual => {
+ try stack.append(State {
+ .InfixOp = try self.createInfixOp(token, ast.NodeInfixOp.InfixOp.EqualEqual)
+ });
+ try stack.append(State.ExpectOperand);
+ continue;
+ },
+ Token.Id.BangEqual => {
+ try stack.append(State {
+ .InfixOp = try self.createInfixOp(token, ast.NodeInfixOp.InfixOp.BangEqual)
+ });
+ try stack.append(State.ExpectOperand);
+ continue;
+ },
+ else => {
+ // no postfix/infix operator after this operand.
+ self.putBackToken(token);
+ // reduce the stack
+ var expression: &ast.Node = stack.pop().Operand;
+ while (true) {
+ switch (stack.pop()) {
+ State.Expression => |dest_ptr| {
+ // we're done
+ try dest_ptr.store(expression);
+ break;
+ },
+ State.InfixOp => |infix_op| {
+ infix_op.rhs = expression;
+ infix_op.lhs = stack.pop().Operand;
+ expression = &infix_op.base;
+ continue;
+ },
+ State.PrefixOp => |prefix_op| {
+ prefix_op.rhs = expression;
+ expression = &prefix_op.base;
+ continue;
+ },
+ else => unreachable,
+ }
+ }
+ continue;
+ },
+ }
+ },
+
+ State.AddrOfModifiers => |addr_of_info| {
+ var token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.Keyword_align => {
+ stack.append(state) catch unreachable;
+ if (addr_of_info.align_expr != null) return self.parseError(token, "multiple align qualifiers");
+ _ = try self.eatToken(Token.Id.LParen);
+ try stack.append(State { .ExpectToken = Token.Id.RParen });
+ try stack.append(State { .Expression = DestPtr{.NullableField = &addr_of_info.align_expr} });
+ continue;
+ },
+ Token.Id.Keyword_const => {
+ stack.append(state) catch unreachable;
+ if (addr_of_info.const_token != null) return self.parseError(token, "duplicate qualifier: const");
+ addr_of_info.const_token = token;
+ continue;
+ },
+ Token.Id.Keyword_volatile => {
+ stack.append(state) catch unreachable;
+ if (addr_of_info.volatile_token != null) return self.parseError(token, "duplicate qualifier: volatile");
+ addr_of_info.volatile_token = token;
+ continue;
+ },
+ else => {
+ self.putBackToken(token);
+ continue;
+ },
+ }
+ },
+
+ State.TypeExpr => |dest_ptr| {
+ const token = self.getNextToken();
+ if (token.id == Token.Id.Keyword_var) {
+ @panic("TODO param with type var");
+ }
+ self.putBackToken(token);
+
+ stack.append(State { .Expression = dest_ptr }) catch unreachable;
+ continue;
+ },
+
+ State.FnProto => |fn_proto| {
+ stack.append(State { .FnProtoAlign = fn_proto }) catch unreachable;
+ try stack.append(State { .ParamDecl = fn_proto });
+ try stack.append(State { .ExpectToken = Token.Id.LParen });
+
+ const next_token = self.getNextToken();
+ if (next_token.id == Token.Id.Identifier) {
+ fn_proto.name_token = next_token;
+ continue;
+ }
+ self.putBackToken(next_token);
+ continue;
+ },
+
+ State.FnProtoAlign => |fn_proto| {
+ const token = self.getNextToken();
+ if (token.id == Token.Id.Keyword_align) {
+ @panic("TODO fn proto align");
+ }
+ self.putBackToken(token);
+ stack.append(State {
+ .TypeExpr = DestPtr {.Field = &fn_proto.return_type},
+ }) catch unreachable;
+ continue;
+ },
+
+ State.ParamDecl => |fn_proto| {
+ var token = self.getNextToken();
+ if (token.id == Token.Id.RParen) {
+ continue;
+ }
+ const param_decl = try self.createAttachParamDecl(&fn_proto.params);
+ if (token.id == Token.Id.Keyword_comptime) {
+ param_decl.comptime_token = token;
+ token = self.getNextToken();
+ } else if (token.id == Token.Id.Keyword_noalias) {
+ param_decl.noalias_token = token;
+ token = self.getNextToken();
+ }
+ if (token.id == Token.Id.Identifier) {
+ const next_token = self.getNextToken();
+ if (next_token.id == Token.Id.Colon) {
+ param_decl.name_token = token;
+ token = self.getNextToken();
+ } else {
+ self.putBackToken(next_token);
+ }
+ }
+ if (token.id == Token.Id.Ellipsis3) {
+ param_decl.var_args_token = token;
+ stack.append(State { .ExpectToken = Token.Id.RParen }) catch unreachable;
+ continue;
+ } else {
+ self.putBackToken(token);
+ }
+
+ stack.append(State { .ParamDecl = fn_proto }) catch unreachable;
+ try stack.append(State.ParamDeclComma);
+ try stack.append(State {
+ .TypeExpr = DestPtr {.Field = ¶m_decl.type_node}
+ });
+ continue;
+ },
+
+ State.ParamDeclComma => {
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.RParen => {
+ _ = stack.pop(); // pop off the ParamDecl
+ continue;
+ },
+ Token.Id.Comma => continue,
+ else => return self.parseError(token, "expected ',' or ')', found {}", @tagName(token.id)),
+ }
+ },
+
+ State.FnDef => |fn_proto| {
+ const token = self.getNextToken();
+ switch(token.id) {
+ Token.Id.LBrace => {
+ const block = try self.createBlock(token);
+ fn_proto.body_node = &block.base;
+ stack.append(State { .Block = block }) catch unreachable;
+ continue;
+ },
+ Token.Id.Semicolon => continue,
+ else => return self.parseError(token, "expected ';' or '{{', found {}", @tagName(token.id)),
+ }
+ },
+
+ State.Block => |block| {
+ const token = self.getNextToken();
+ switch (token.id) {
+ Token.Id.RBrace => {
+ block.end_token = token;
+ continue;
+ },
+ else => {
+ self.putBackToken(token);
+ stack.append(State { .Block = block }) catch unreachable;
+ try stack.append(State { .Statement = block });
+ continue;
+ },
+ }
+ },
+
+ State.Statement => |block| {
+ {
+ // Look for comptime var, comptime const
+ const comptime_token = self.getNextToken();
+ if (comptime_token.id == Token.Id.Keyword_comptime) {
+ const mut_token = self.getNextToken();
+ if (mut_token.id == Token.Id.Keyword_var or mut_token.id == Token.Id.Keyword_const) {
+ // TODO shouldn't need these casts
+ const var_decl = try self.createAttachVarDecl(&block.statements, (?Token)(null),
+ mut_token, (?Token)(comptime_token), (?Token)(null));
+ try stack.append(State { .VarDecl = var_decl });
+ continue;
+ }
+ self.putBackToken(mut_token);
+ }
+ self.putBackToken(comptime_token);
+ }
+ {
+ // Look for const, var
+ const mut_token = self.getNextToken();
+ if (mut_token.id == Token.Id.Keyword_var or mut_token.id == Token.Id.Keyword_const) {
+ // TODO shouldn't need these casts
+ const var_decl = try self.createAttachVarDecl(&block.statements, (?Token)(null),
+ mut_token, (?Token)(null), (?Token)(null));
+ try stack.append(State { .VarDecl = var_decl });
+ continue;
+ }
+ self.putBackToken(mut_token);
+ }
+
+ stack.append(State { .ExpectToken = Token.Id.Semicolon }) catch unreachable;
+ try stack.append(State { .Expression = DestPtr{.List = &block.statements} });
+ continue;
+ },
+
+ // These are data, not control flow.
+ State.InfixOp => unreachable,
+ State.PrefixOp => unreachable,
+ State.Operand => unreachable,
+ }
+ @import("std").debug.panic("{}", @tagName(state));
+ //unreachable;
+ }
+ }
+
+ fn createRoot(self: &Parser) !&ast.NodeRoot {
+ const node = try self.allocator.create(ast.NodeRoot);
+
+ *node = ast.NodeRoot {
+ .base = ast.Node {.id = ast.Node.Id.Root},
+ .decls = ArrayList(&ast.Node).init(self.allocator),
+ };
+ return node;
+ }
+
+ fn createVarDecl(self: &Parser, visib_token: &const ?Token, mut_token: &const Token, comptime_token: &const ?Token,
+ extern_token: &const ?Token) !&ast.NodeVarDecl
+ {
+ const node = try self.allocator.create(ast.NodeVarDecl);
+
+ *node = ast.NodeVarDecl {
+ .base = ast.Node {.id = ast.Node.Id.VarDecl},
+ .visib_token = *visib_token,
+ .mut_token = *mut_token,
+ .comptime_token = *comptime_token,
+ .extern_token = *extern_token,
+ .type_node = null,
+ .align_node = null,
+ .init_node = null,
+ .lib_name = null,
+ // initialized later
+ .name_token = undefined,
+ .eq_token = undefined,
+ };
+ return node;
+ }
+
+ fn createFnProto(self: &Parser, fn_token: &const Token, extern_token: &const ?Token,
+ cc_token: &const ?Token, visib_token: &const ?Token, inline_token: &const ?Token) !&ast.NodeFnProto
+ {
+ const node = try self.allocator.create(ast.NodeFnProto);
+
+ *node = ast.NodeFnProto {
+ .base = ast.Node {.id = ast.Node.Id.FnProto},
+ .visib_token = *visib_token,
+ .name_token = null,
+ .fn_token = *fn_token,
+ .params = ArrayList(&ast.Node).init(self.allocator),
+ .return_type = undefined,
+ .var_args_token = null,
+ .extern_token = *extern_token,
+ .inline_token = *inline_token,
+ .cc_token = *cc_token,
+ .body_node = null,
+ .lib_name = null,
+ .align_expr = null,
+ };
+ return node;
+ }
+
+ fn createParamDecl(self: &Parser) !&ast.NodeParamDecl {
+ const node = try self.allocator.create(ast.NodeParamDecl);
+
+ *node = ast.NodeParamDecl {
+ .base = ast.Node {.id = ast.Node.Id.ParamDecl},
+ .comptime_token = null,
+ .noalias_token = null,
+ .name_token = null,
+ .type_node = undefined,
+ .var_args_token = null,
+ };
+ return node;
+ }
+
+ fn createBlock(self: &Parser, begin_token: &const Token) !&ast.NodeBlock {
+ const node = try self.allocator.create(ast.NodeBlock);
+
+ *node = ast.NodeBlock {
+ .base = ast.Node {.id = ast.Node.Id.Block},
+ .begin_token = *begin_token,
+ .end_token = undefined,
+ .statements = ArrayList(&ast.Node).init(self.allocator),
+ };
+ return node;
+ }
+
+ fn createInfixOp(self: &Parser, op_token: &const Token, op: &const ast.NodeInfixOp.InfixOp) !&ast.NodeInfixOp {
+ const node = try self.allocator.create(ast.NodeInfixOp);
+
+ *node = ast.NodeInfixOp {
+ .base = ast.Node {.id = ast.Node.Id.InfixOp},
+ .op_token = *op_token,
+ .lhs = undefined,
+ .op = *op,
+ .rhs = undefined,
+ };
+ return node;
+ }
+
+ fn createPrefixOp(self: &Parser, op_token: &const Token, op: &const ast.NodePrefixOp.PrefixOp) !&ast.NodePrefixOp {
+ const node = try self.allocator.create(ast.NodePrefixOp);
+
+ *node = ast.NodePrefixOp {
+ .base = ast.Node {.id = ast.Node.Id.PrefixOp},
+ .op_token = *op_token,
+ .op = *op,
+ .rhs = undefined,
+ };
+ return node;
+ }
+
+ fn createIdentifier(self: &Parser, name_token: &const Token) !&ast.NodeIdentifier {
+ const node = try self.allocator.create(ast.NodeIdentifier);
+
+ *node = ast.NodeIdentifier {
+ .base = ast.Node {.id = ast.Node.Id.Identifier},
+ .name_token = *name_token,
+ };
+ return node;
+ }
+
+ fn createIntegerLiteral(self: &Parser, token: &const Token) !&ast.NodeIntegerLiteral {
+ const node = try self.allocator.create(ast.NodeIntegerLiteral);
+
+ *node = ast.NodeIntegerLiteral {
+ .base = ast.Node {.id = ast.Node.Id.IntegerLiteral},
+ .token = *token,
+ };
+ return node;
+ }
+
+ fn createFloatLiteral(self: &Parser, token: &const Token) !&ast.NodeFloatLiteral {
+ const node = try self.allocator.create(ast.NodeFloatLiteral);
+
+ *node = ast.NodeFloatLiteral {
+ .base = ast.Node {.id = ast.Node.Id.FloatLiteral},
+ .token = *token,
+ };
+ return node;
+ }
+
+ fn createAttachIdentifier(self: &Parser, dest_ptr: &const DestPtr, name_token: &const Token) !&ast.NodeIdentifier {
+ const node = try self.createIdentifier(name_token);
+ try dest_ptr.store(&node.base);
+ return node;
+ }
+
+ fn createAttachParamDecl(self: &Parser, list: &ArrayList(&ast.Node)) !&ast.NodeParamDecl {
+ const node = try self.createParamDecl();
+ try list.append(&node.base);
+ return node;
+ }
+
+ fn createAttachFnProto(self: &Parser, list: &ArrayList(&ast.Node), fn_token: &const Token,
+ extern_token: &const ?Token, cc_token: &const ?Token, visib_token: &const ?Token,
+ inline_token: &const ?Token) !&ast.NodeFnProto
+ {
+ const node = try self.createFnProto(fn_token, extern_token, cc_token, visib_token, inline_token);
+ try list.append(&node.base);
+ return node;
+ }
+
+ fn createAttachVarDecl(self: &Parser, list: &ArrayList(&ast.Node), visib_token: &const ?Token,
+ mut_token: &const Token, comptime_token: &const ?Token, extern_token: &const ?Token) !&ast.NodeVarDecl
+ {
+ const node = try self.createVarDecl(visib_token, mut_token, comptime_token, extern_token);
+ try list.append(&node.base);
+ return node;
+ }
+
+ fn parseError(self: &Parser, token: &const Token, comptime fmt: []const u8, args: ...) error {
+ const loc = self.tokenizer.getTokenLocation(token);
+ warn("{}:{}:{}: error: " ++ fmt ++ "\n", self.source_file_name, loc.line + 1, loc.column + 1, args);
+ warn("{}\n", self.tokenizer.buffer[loc.line_start..loc.line_end]);
+ {
+ var i: usize = 0;
+ while (i < loc.column) : (i += 1) {
+ warn(" ");
+ }
+ }
+ {
+ const caret_count = token.end - token.start;
+ var i: usize = 0;
+ while (i < caret_count) : (i += 1) {
+ warn("~");
+ }
+ }
+ warn("\n");
+ return error.ParseError;
+ }
+
+ fn expectToken(self: &Parser, token: &const Token, id: @TagType(Token.Id)) !void {
+ if (token.id != id) {
+ return self.parseError(token, "expected {}, found {}", @tagName(id), @tagName(token.id));
+ }
+ }
+
+ fn eatToken(self: &Parser, id: @TagType(Token.Id)) !Token {
+ const token = self.getNextToken();
+ try self.expectToken(token, id);
+ return token;
+ }
+
+ fn putBackToken(self: &Parser, token: &const Token) void {
+ self.put_back_tokens[self.put_back_count] = *token;
+ self.put_back_count += 1;
+ }
+
+ fn getNextToken(self: &Parser) Token {
+ if (self.put_back_count != 0) {
+ const put_back_index = self.put_back_count - 1;
+ const put_back_token = self.put_back_tokens[put_back_index];
+ self.put_back_count = put_back_index;
+ return put_back_token;
+ } else {
+ return self.tokenizer.next();
+ }
+ }
+
+ const RenderAstFrame = struct {
+ node: &ast.Node,
+ indent: usize,
+ };
+
+ pub fn renderAst(self: &Parser, stream: var, root_node: &ast.NodeRoot) !void {
+ var stack = self.initUtilityArrayList(RenderAstFrame);
+ defer self.deinitUtilityArrayList(stack);
+
+ try stack.append(RenderAstFrame {
+ .node = &root_node.base,
+ .indent = 0,
+ });
+
+ while (stack.popOrNull()) |frame| {
+ {
+ var i: usize = 0;
+ while (i < frame.indent) : (i += 1) {
+ try stream.print(" ");
+ }
+ }
+ try stream.print("{}\n", @tagName(frame.node.id));
+ var child_i: usize = 0;
+ while (frame.node.iterate(child_i)) |child| : (child_i += 1) {
+ try stack.append(RenderAstFrame {
+ .node = child,
+ .indent = frame.indent + 2,
+ });
+ }
+ }
+ }
+
+ const RenderState = union(enum) {
+ TopLevelDecl: &ast.Node,
+ FnProtoRParen: &ast.NodeFnProto,
+ ParamDecl: &ast.Node,
+ Text: []const u8,
+ Expression: &ast.Node,
+ VarDecl: &ast.NodeVarDecl,
+ Statement: &ast.Node,
+ PrintIndent,
+ Indent: usize,
+ };
+
+ pub fn renderSource(self: &Parser, stream: var, root_node: &ast.NodeRoot) !void {
+ var stack = self.initUtilityArrayList(RenderState);
+ defer self.deinitUtilityArrayList(stack);
+
+ {
+ var i = root_node.decls.len;
+ while (i != 0) {
+ i -= 1;
+ const decl = root_node.decls.items[i];
+ try stack.append(RenderState {.TopLevelDecl = decl});
+ }
+ }
+
+ const indent_delta = 4;
+ var indent: usize = 0;
+ while (stack.popOrNull()) |state| {
+ switch (state) {
+ RenderState.TopLevelDecl => |decl| {
+ switch (decl.id) {
+ ast.Node.Id.FnProto => {
+ const fn_proto = @fieldParentPtr(ast.NodeFnProto, "base", decl);
+ if (fn_proto.visib_token) |visib_token| {
+ switch (visib_token.id) {
+ Token.Id.Keyword_pub => try stream.print("pub "),
+ Token.Id.Keyword_export => try stream.print("export "),
+ else => unreachable,
+ }
+ }
+ if (fn_proto.extern_token) |extern_token| {
+ try stream.print("{} ", self.tokenizer.getTokenSlice(extern_token));
+ }
+ try stream.print("fn");
+
+ if (fn_proto.name_token) |name_token| {
+ try stream.print(" {}", self.tokenizer.getTokenSlice(name_token));
+ }
+
+ try stream.print("(");
+
+ try stack.append(RenderState { .Text = "\n" });
+ if (fn_proto.body_node == null) {
+ try stack.append(RenderState { .Text = ";" });
+ }
+
+ try stack.append(RenderState { .FnProtoRParen = fn_proto});
+ var i = fn_proto.params.len;
+ while (i != 0) {
+ i -= 1;
+ const param_decl_node = fn_proto.params.items[i];
+ try stack.append(RenderState { .ParamDecl = param_decl_node});
+ if (i != 0) {
+ try stack.append(RenderState { .Text = ", " });
+ }
+ }
+ },
+ ast.Node.Id.VarDecl => {
+ const var_decl = @fieldParentPtr(ast.NodeVarDecl, "base", decl);
+ try stack.append(RenderState { .Text = "\n"});
+ try stack.append(RenderState { .VarDecl = var_decl});
+
+ },
+ else => unreachable,
+ }
+ },
+
+ RenderState.VarDecl => |var_decl| {
+ if (var_decl.visib_token) |visib_token| {
+ try stream.print("{} ", self.tokenizer.getTokenSlice(visib_token));
+ }
+ if (var_decl.extern_token) |extern_token| {
+ try stream.print("{} ", self.tokenizer.getTokenSlice(extern_token));
+ if (var_decl.lib_name != null) {
+ @panic("TODO");
+ }
+ }
+ if (var_decl.comptime_token) |comptime_token| {
+ try stream.print("{} ", self.tokenizer.getTokenSlice(comptime_token));
+ }
+ try stream.print("{} ", self.tokenizer.getTokenSlice(var_decl.mut_token));
+ try stream.print("{}", self.tokenizer.getTokenSlice(var_decl.name_token));
+
+ try stack.append(RenderState { .Text = ";" });
+ if (var_decl.init_node) |init_node| {
+ try stack.append(RenderState { .Expression = init_node });
+ try stack.append(RenderState { .Text = " = " });
+ }
+ if (var_decl.align_node) |align_node| {
+ try stack.append(RenderState { .Text = ")" });
+ try stack.append(RenderState { .Expression = align_node });
+ try stack.append(RenderState { .Text = " align(" });
+ }
+ if (var_decl.type_node) |type_node| {
+ try stream.print(": ");
+ try stack.append(RenderState { .Expression = type_node });
+ }
+ },
+
+ RenderState.ParamDecl => |base| {
+ const param_decl = @fieldParentPtr(ast.NodeParamDecl, "base", base);
+ if (param_decl.comptime_token) |comptime_token| {
+ try stream.print("{} ", self.tokenizer.getTokenSlice(comptime_token));
+ }
+ if (param_decl.noalias_token) |noalias_token| {
+ try stream.print("{} ", self.tokenizer.getTokenSlice(noalias_token));
+ }
+ if (param_decl.name_token) |name_token| {
+ try stream.print("{}: ", self.tokenizer.getTokenSlice(name_token));
+ }
+ if (param_decl.var_args_token) |var_args_token| {
+ try stream.print("{}", self.tokenizer.getTokenSlice(var_args_token));
+ } else {
+ try stack.append(RenderState { .Expression = param_decl.type_node});
+ }
+ },
+ RenderState.Text => |bytes| {
+ try stream.write(bytes);
+ },
+ RenderState.Expression => |base| switch (base.id) {
+ ast.Node.Id.Identifier => {
+ const identifier = @fieldParentPtr(ast.NodeIdentifier, "base", base);
+ try stream.print("{}", self.tokenizer.getTokenSlice(identifier.name_token));
+ },
+ ast.Node.Id.Block => {
+ const block = @fieldParentPtr(ast.NodeBlock, "base", base);
+ try stream.write("{");
+ try stack.append(RenderState { .Text = "}"});
+ try stack.append(RenderState.PrintIndent);
+ try stack.append(RenderState { .Indent = indent});
+ try stack.append(RenderState { .Text = "\n"});
+ var i = block.statements.len;
+ while (i != 0) {
+ i -= 1;
+ const statement_node = block.statements.items[i];
+ try stack.append(RenderState { .Statement = statement_node});
+ try stack.append(RenderState.PrintIndent);
+ try stack.append(RenderState { .Indent = indent + indent_delta});
+ try stack.append(RenderState { .Text = "\n" });
+ }
+ },
+ ast.Node.Id.InfixOp => {
+ const prefix_op_node = @fieldParentPtr(ast.NodeInfixOp, "base", base);
+ try stack.append(RenderState { .Expression = prefix_op_node.rhs });
+ switch (prefix_op_node.op) {
+ ast.NodeInfixOp.InfixOp.EqualEqual => {
+ try stack.append(RenderState { .Text = " == "});
+ },
+ ast.NodeInfixOp.InfixOp.BangEqual => {
+ try stack.append(RenderState { .Text = " != "});
+ },
+ else => unreachable,
+ }
+ try stack.append(RenderState { .Expression = prefix_op_node.lhs });
+ },
+ ast.Node.Id.PrefixOp => {
+ const prefix_op_node = @fieldParentPtr(ast.NodePrefixOp, "base", base);
+ try stack.append(RenderState { .Expression = prefix_op_node.rhs });
+ switch (prefix_op_node.op) {
+ ast.NodePrefixOp.PrefixOp.Return => {
+ try stream.write("return ");
+ },
+ ast.NodePrefixOp.PrefixOp.AddrOf => |addr_of_info| {
+ try stream.write("&");
+ if (addr_of_info.volatile_token != null) {
+ try stack.append(RenderState { .Text = "volatile "});
+ }
+ if (addr_of_info.const_token != null) {
+ try stack.append(RenderState { .Text = "const "});
+ }
+ if (addr_of_info.align_expr) |align_expr| {
+ try stream.print("align(");
+ try stack.append(RenderState { .Text = ") "});
+ try stack.append(RenderState { .Expression = align_expr});
+ }
+ },
+ else => unreachable,
+ }
+ },
+ ast.Node.Id.IntegerLiteral => {
+ const integer_literal = @fieldParentPtr(ast.NodeIntegerLiteral, "base", base);
+ try stream.print("{}", self.tokenizer.getTokenSlice(integer_literal.token));
+ },
+ ast.Node.Id.FloatLiteral => {
+ const float_literal = @fieldParentPtr(ast.NodeFloatLiteral, "base", base);
+ try stream.print("{}", self.tokenizer.getTokenSlice(float_literal.token));
+ },
+ else => unreachable,
+ },
+ RenderState.FnProtoRParen => |fn_proto| {
+ try stream.print(")");
+ if (fn_proto.align_expr != null) {
+ @panic("TODO");
+ }
+ try stream.print(" ");
+ if (fn_proto.body_node) |body_node| {
+ try stack.append(RenderState { .Expression = body_node});
+ try stack.append(RenderState { .Text = " "});
+ }
+ try stack.append(RenderState { .Expression = fn_proto.return_type});
+ },
+ RenderState.Statement => |base| {
+ switch (base.id) {
+ ast.Node.Id.VarDecl => {
+ const var_decl = @fieldParentPtr(ast.NodeVarDecl, "base", base);
+ try stack.append(RenderState { .VarDecl = var_decl});
+ },
+ else => {
+ try stack.append(RenderState { .Text = ";"});
+ try stack.append(RenderState { .Expression = base});
+ },
+ }
+ },
+ RenderState.Indent => |new_indent| indent = new_indent,
+ RenderState.PrintIndent => try stream.writeByteNTimes(' ', indent),
+ }
+ }
+ }
+
+ fn initUtilityArrayList(self: &Parser, comptime T: type) ArrayList(T) {
+ const new_byte_count = self.utility_bytes.len - self.utility_bytes.len % @sizeOf(T);
+ self.utility_bytes = self.allocator.alignedShrink(u8, utility_bytes_align, self.utility_bytes, new_byte_count);
+ const typed_slice = ([]T)(self.utility_bytes);
+ return ArrayList(T) {
+ .allocator = self.allocator,
+ .items = typed_slice,
+ .len = 0,
+ };
+ }
+
+ fn deinitUtilityArrayList(self: &Parser, list: var) void {
+ self.utility_bytes = ([]align(utility_bytes_align) u8)(list.items);
+ }
+
+};
+
+var fixed_buffer_mem: [100 * 1024]u8 = undefined;
+
+fn testParse(source: []const u8, allocator: &mem.Allocator) ![]u8 {
+ var padded_source: [0x100]u8 = undefined;
+ std.mem.copy(u8, padded_source[0..source.len], source);
+ padded_source[source.len + 0] = '\n';
+ padded_source[source.len + 1] = '\n';
+ padded_source[source.len + 2] = '\n';
+
+ var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]);
+ var parser = Parser.init(&tokenizer, allocator, "(memory buffer)");
+ defer parser.deinit();
+
+ const tree = try parser.parse();
+ defer tree.deinit();
+
+ var buffer = try std.Buffer.initSize(allocator, 0);
+ var buffer_out_stream = io.BufferOutStream.init(&buffer);
+ try parser.renderSource(&buffer_out_stream.stream, tree.root_node);
+ return buffer.toOwnedSlice();
+}
+
+// TODO test for memory leaks
+// TODO test for valid frees
+fn testCanonical(source: []const u8) !void {
+ const needed_alloc_count = x: {
+ // Try it once with unlimited memory, make sure it works
+ var fixed_allocator = mem.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
+ var failing_allocator = std.debug.FailingAllocator.init(&fixed_allocator.allocator, @maxValue(usize));
+ const result_source = try testParse(source, &failing_allocator.allocator);
+ if (!mem.eql(u8, result_source, source)) {
+ warn("\n====== expected this output: =========\n");
+ warn("{}", source);
+ warn("\n======== instead found this: =========\n");
+ warn("{}", result_source);
+ warn("\n======================================\n");
+ return error.TestFailed;
+ }
+ failing_allocator.allocator.free(result_source);
+ break :x failing_allocator.index;
+ };
+
+ var fail_index: usize = 0;
+ while (fail_index < needed_alloc_count) : (fail_index += 1) {
+ var fixed_allocator = mem.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
+ var failing_allocator = std.debug.FailingAllocator.init(&fixed_allocator.allocator, fail_index);
+ if (testParse(source, &failing_allocator.allocator)) |_| {
+ return error.NondeterministicMemoryUsage;
+ } else |err| {
+ assert(err == error.OutOfMemory);
+ // TODO make this pass
+ //if (failing_allocator.allocated_bytes != failing_allocator.freed_bytes) {
+ // warn("\nfail_index: {}/{}\nallocated bytes: {}\nfreed bytes: {}\nallocations: {}\ndeallocations: {}\n",
+ // fail_index, needed_alloc_count,
+ // failing_allocator.allocated_bytes, failing_allocator.freed_bytes,
+ // failing_allocator.index, failing_allocator.deallocations);
+ // return error.MemoryLeakDetected;
+ //}
+ }
+ }
+}
+
+test "zig fmt" {
+ try testCanonical(
+ \\extern fn puts(s: &const u8) c_int;
+ \\
+ );
+
+ try testCanonical(
+ \\const a = b;
+ \\pub const a = b;
+ \\var a = b;
+ \\pub var a = b;
+ \\const a: i32 = b;
+ \\pub const a: i32 = b;
+ \\var a: i32 = b;
+ \\pub var a: i32 = b;
+ \\
+ );
+
+ try testCanonical(
+ \\extern var foo: c_int;
+ \\
+ );
+
+ try testCanonical(
+ \\var foo: c_int align(1);
+ \\
+ );
+
+ try testCanonical(
+ \\fn main(argc: c_int, argv: &&u8) c_int {
+ \\ const a = b;
+ \\}
+ \\
+ );
+
+ try testCanonical(
+ \\fn foo(argc: c_int, argv: &&u8) c_int {
+ \\ return 0;
+ \\}
+ \\
+ );
+
+ try testCanonical(
+ \\extern fn f1(s: &align(&u8) u8) c_int;
+ \\
+ );
+
+ try testCanonical(
+ \\extern fn f1(s: &&align(1) &const &volatile u8) c_int;
+ \\extern fn f2(s: &align(1) const &align(1) volatile &const volatile u8) c_int;
+ \\extern fn f3(s: &align(1) const volatile u8) c_int;
+ \\
+ );
+
+ try testCanonical(
+ \\fn f1(a: bool, b: bool) bool {
+ \\ a != b;
+ \\ return a == b;
+ \\}
+ \\
+ );
+}
diff --git a/std/zig/tokenizer.zig b/std/zig/tokenizer.zig
@@ -0,0 +1,659 @@
+const std = @import("../index.zig");
+const mem = std.mem;
+
+pub const Token = struct {
+ id: Id,
+ start: usize,
+ end: usize,
+
+ const KeywordId = struct {
+ bytes: []const u8,
+ id: Id,
+ };
+
+ const keywords = []KeywordId {
+ KeywordId{.bytes="align", .id = Id.Keyword_align},
+ KeywordId{.bytes="and", .id = Id.Keyword_and},
+ KeywordId{.bytes="asm", .id = Id.Keyword_asm},
+ KeywordId{.bytes="break", .id = Id.Keyword_break},
+ KeywordId{.bytes="comptime", .id = Id.Keyword_comptime},
+ KeywordId{.bytes="const", .id = Id.Keyword_const},
+ KeywordId{.bytes="continue", .id = Id.Keyword_continue},
+ KeywordId{.bytes="defer", .id = Id.Keyword_defer},
+ KeywordId{.bytes="else", .id = Id.Keyword_else},
+ KeywordId{.bytes="enum", .id = Id.Keyword_enum},
+ KeywordId{.bytes="error", .id = Id.Keyword_error},
+ KeywordId{.bytes="export", .id = Id.Keyword_export},
+ KeywordId{.bytes="extern", .id = Id.Keyword_extern},
+ KeywordId{.bytes="false", .id = Id.Keyword_false},
+ KeywordId{.bytes="fn", .id = Id.Keyword_fn},
+ KeywordId{.bytes="for", .id = Id.Keyword_for},
+ KeywordId{.bytes="goto", .id = Id.Keyword_goto},
+ KeywordId{.bytes="if", .id = Id.Keyword_if},
+ KeywordId{.bytes="inline", .id = Id.Keyword_inline},
+ KeywordId{.bytes="nakedcc", .id = Id.Keyword_nakedcc},
+ KeywordId{.bytes="noalias", .id = Id.Keyword_noalias},
+ KeywordId{.bytes="null", .id = Id.Keyword_null},
+ KeywordId{.bytes="or", .id = Id.Keyword_or},
+ KeywordId{.bytes="packed", .id = Id.Keyword_packed},
+ KeywordId{.bytes="pub", .id = Id.Keyword_pub},
+ KeywordId{.bytes="return", .id = Id.Keyword_return},
+ KeywordId{.bytes="stdcallcc", .id = Id.Keyword_stdcallcc},
+ KeywordId{.bytes="struct", .id = Id.Keyword_struct},
+ KeywordId{.bytes="switch", .id = Id.Keyword_switch},
+ KeywordId{.bytes="test", .id = Id.Keyword_test},
+ KeywordId{.bytes="this", .id = Id.Keyword_this},
+ KeywordId{.bytes="true", .id = Id.Keyword_true},
+ KeywordId{.bytes="undefined", .id = Id.Keyword_undefined},
+ KeywordId{.bytes="union", .id = Id.Keyword_union},
+ KeywordId{.bytes="unreachable", .id = Id.Keyword_unreachable},
+ KeywordId{.bytes="use", .id = Id.Keyword_use},
+ KeywordId{.bytes="var", .id = Id.Keyword_var},
+ KeywordId{.bytes="volatile", .id = Id.Keyword_volatile},
+ KeywordId{.bytes="while", .id = Id.Keyword_while},
+ };
+
+ fn getKeyword(bytes: []const u8) ?Id {
+ for (keywords) |kw| {
+ if (mem.eql(u8, kw.bytes, bytes)) {
+ return kw.id;
+ }
+ }
+ return null;
+ }
+
+ const StrLitKind = enum {Normal, C};
+
+ pub const Id = union(enum) {
+ Invalid,
+ Identifier,
+ StringLiteral: StrLitKind,
+ Eof,
+ Builtin,
+ Bang,
+ Equal,
+ EqualEqual,
+ BangEqual,
+ LParen,
+ RParen,
+ Semicolon,
+ Percent,
+ LBrace,
+ RBrace,
+ Period,
+ Ellipsis2,
+ Ellipsis3,
+ Minus,
+ Arrow,
+ Colon,
+ Slash,
+ Comma,
+ Ampersand,
+ AmpersandEqual,
+ IntegerLiteral,
+ FloatLiteral,
+ Keyword_align,
+ Keyword_and,
+ Keyword_asm,
+ Keyword_break,
+ Keyword_comptime,
+ Keyword_const,
+ Keyword_continue,
+ Keyword_defer,
+ Keyword_else,
+ Keyword_enum,
+ Keyword_error,
+ Keyword_export,
+ Keyword_extern,
+ Keyword_false,
+ Keyword_fn,
+ Keyword_for,
+ Keyword_goto,
+ Keyword_if,
+ Keyword_inline,
+ Keyword_nakedcc,
+ Keyword_noalias,
+ Keyword_null,
+ Keyword_or,
+ Keyword_packed,
+ Keyword_pub,
+ Keyword_return,
+ Keyword_stdcallcc,
+ Keyword_struct,
+ Keyword_switch,
+ Keyword_test,
+ Keyword_this,
+ Keyword_true,
+ Keyword_undefined,
+ Keyword_union,
+ Keyword_unreachable,
+ Keyword_use,
+ Keyword_var,
+ Keyword_volatile,
+ Keyword_while,
+ };
+};
+
+pub const Tokenizer = struct {
+ buffer: []const u8,
+ index: usize,
+ pending_invalid_token: ?Token,
+
+ pub const Location = struct {
+ line: usize,
+ column: usize,
+ line_start: usize,
+ line_end: usize,
+ };
+
+ pub fn getTokenLocation(self: &Tokenizer, token: &const Token) Location {
+ var loc = Location {
+ .line = 0,
+ .column = 0,
+ .line_start = 0,
+ .line_end = 0,
+ };
+ for (self.buffer) |c, i| {
+ if (i == token.start) {
+ loc.line_end = i;
+ while (loc.line_end < self.buffer.len and self.buffer[loc.line_end] != '\n') : (loc.line_end += 1) {}
+ return loc;
+ }
+ if (c == '\n') {
+ loc.line += 1;
+ loc.column = 0;
+ loc.line_start = i + 1;
+ } else {
+ loc.column += 1;
+ }
+ }
+ return loc;
+ }
+
+ /// For debugging purposes
+ pub fn dump(self: &Tokenizer, token: &const Token) void {
+ std.debug.warn("{} \"{}\"\n", @tagName(token.id), self.buffer[token.start..token.end]);
+ }
+
+ /// buffer must end with "\n\n\n". This is so that attempting to decode
+ /// a the 3 trailing bytes of a 4-byte utf8 sequence is never a buffer overflow.
+ pub fn init(buffer: []const u8) Tokenizer {
+ std.debug.assert(buffer[buffer.len - 1] == '\n');
+ std.debug.assert(buffer[buffer.len - 2] == '\n');
+ std.debug.assert(buffer[buffer.len - 3] == '\n');
+ return Tokenizer {
+ .buffer = buffer,
+ .index = 0,
+ .pending_invalid_token = null,
+ };
+ }
+
+ const State = enum {
+ Start,
+ Identifier,
+ Builtin,
+ C,
+ StringLiteral,
+ StringLiteralBackslash,
+ Equal,
+ Bang,
+ Minus,
+ Slash,
+ LineComment,
+ Zero,
+ IntegerLiteral,
+ IntegerLiteralWithRadix,
+ NumberDot,
+ FloatFraction,
+ FloatExponentUnsigned,
+ FloatExponentNumber,
+ Ampersand,
+ Period,
+ Period2,
+ };
+
+ pub fn next(self: &Tokenizer) Token {
+ if (self.pending_invalid_token) |token| {
+ self.pending_invalid_token = null;
+ return token;
+ }
+ var state = State.Start;
+ var result = Token {
+ .id = Token.Id.Eof,
+ .start = self.index,
+ .end = undefined,
+ };
+ while (self.index < self.buffer.len) : (self.index += 1) {
+ const c = self.buffer[self.index];
+ switch (state) {
+ State.Start => switch (c) {
+ ' ', '\n' => {
+ result.start = self.index + 1;
+ },
+ 'c' => {
+ state = State.C;
+ result.id = Token.Id.Identifier;
+ },
+ '"' => {
+ state = State.StringLiteral;
+ result.id = Token.Id { .StringLiteral = Token.StrLitKind.Normal };
+ },
+ 'a'...'b', 'd'...'z', 'A'...'Z', '_' => {
+ state = State.Identifier;
+ result.id = Token.Id.Identifier;
+ },
+ '@' => {
+ state = State.Builtin;
+ result.id = Token.Id.Builtin;
+ },
+ '=' => {
+ state = State.Equal;
+ },
+ '!' => {
+ state = State.Bang;
+ },
+ '(' => {
+ result.id = Token.Id.LParen;
+ self.index += 1;
+ break;
+ },
+ ')' => {
+ result.id = Token.Id.RParen;
+ self.index += 1;
+ break;
+ },
+ ';' => {
+ result.id = Token.Id.Semicolon;
+ self.index += 1;
+ break;
+ },
+ ',' => {
+ result.id = Token.Id.Comma;
+ self.index += 1;
+ break;
+ },
+ ':' => {
+ result.id = Token.Id.Colon;
+ self.index += 1;
+ break;
+ },
+ '%' => {
+ result.id = Token.Id.Percent;
+ self.index += 1;
+ break;
+ },
+ '{' => {
+ result.id = Token.Id.LBrace;
+ self.index += 1;
+ break;
+ },
+ '}' => {
+ result.id = Token.Id.RBrace;
+ self.index += 1;
+ break;
+ },
+ '.' => {
+ state = State.Period;
+ },
+ '-' => {
+ state = State.Minus;
+ },
+ '/' => {
+ state = State.Slash;
+ },
+ '&' => {
+ state = State.Ampersand;
+ },
+ '0' => {
+ state = State.Zero;
+ result.id = Token.Id.IntegerLiteral;
+ },
+ '1'...'9' => {
+ state = State.IntegerLiteral;
+ result.id = Token.Id.IntegerLiteral;
+ },
+ else => {
+ result.id = Token.Id.Invalid;
+ self.index += 1;
+ break;
+ },
+ },
+ State.Ampersand => switch (c) {
+ '=' => {
+ result.id = Token.Id.AmpersandEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = Token.Id.Ampersand;
+ break;
+ },
+ },
+ State.Identifier => switch (c) {
+ 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
+ else => {
+ if (Token.getKeyword(self.buffer[result.start..self.index])) |id| {
+ result.id = id;
+ }
+ break;
+ },
+ },
+ State.Builtin => switch (c) {
+ 'a'...'z', 'A'...'Z', '_', '0'...'9' => {},
+ else => break,
+ },
+ State.C => switch (c) {
+ '\\' => @panic("TODO"),
+ '"' => {
+ state = State.StringLiteral;
+ result.id = Token.Id { .StringLiteral = Token.StrLitKind.C };
+ },
+ 'a'...'z', 'A'...'Z', '_', '0'...'9' => {
+ state = State.Identifier;
+ },
+ else => break,
+ },
+ State.StringLiteral => switch (c) {
+ '\\' => {
+ state = State.StringLiteralBackslash;
+ },
+ '"' => {
+ self.index += 1;
+ break;
+ },
+ '\n' => break, // Look for this error later.
+ else => self.checkLiteralCharacter(),
+ },
+
+ State.StringLiteralBackslash => switch (c) {
+ '\n' => break, // Look for this error later.
+ else => {
+ state = State.StringLiteral;
+ },
+ },
+
+ State.Bang => switch (c) {
+ '=' => {
+ result.id = Token.Id.BangEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = Token.Id.Bang;
+ break;
+ },
+ },
+
+ State.Equal => switch (c) {
+ '=' => {
+ result.id = Token.Id.EqualEqual;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = Token.Id.Equal;
+ break;
+ },
+ },
+
+ State.Minus => switch (c) {
+ '>' => {
+ result.id = Token.Id.Arrow;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = Token.Id.Minus;
+ break;
+ },
+ },
+
+ State.Period => switch (c) {
+ '.' => {
+ state = State.Period2;
+ },
+ else => {
+ result.id = Token.Id.Period;
+ break;
+ },
+ },
+
+ State.Period2 => switch (c) {
+ '.' => {
+ result.id = Token.Id.Ellipsis3;
+ self.index += 1;
+ break;
+ },
+ else => {
+ result.id = Token.Id.Ellipsis2;
+ break;
+ },
+ },
+
+ State.Slash => switch (c) {
+ '/' => {
+ result.id = undefined;
+ state = State.LineComment;
+ },
+ else => {
+ result.id = Token.Id.Slash;
+ break;
+ },
+ },
+ State.LineComment => switch (c) {
+ '\n' => {
+ state = State.Start;
+ result = Token {
+ .id = Token.Id.Eof,
+ .start = self.index + 1,
+ .end = undefined,
+ };
+ },
+ else => self.checkLiteralCharacter(),
+ },
+ State.Zero => switch (c) {
+ 'b', 'o', 'x' => {
+ state = State.IntegerLiteralWithRadix;
+ },
+ else => {
+ // reinterpret as a normal number
+ self.index -= 1;
+ state = State.IntegerLiteral;
+ },
+ },
+ State.IntegerLiteral => switch (c) {
+ '.' => {
+ state = State.NumberDot;
+ },
+ 'p', 'P', 'e', 'E' => {
+ state = State.FloatExponentUnsigned;
+ },
+ '0'...'9' => {},
+ else => break,
+ },
+ State.IntegerLiteralWithRadix => switch (c) {
+ '.' => {
+ state = State.NumberDot;
+ },
+ 'p', 'P' => {
+ state = State.FloatExponentUnsigned;
+ },
+ '0'...'9', 'a'...'f', 'A'...'F' => {},
+ else => break,
+ },
+ State.NumberDot => switch (c) {
+ '.' => {
+ self.index -= 1;
+ state = State.Start;
+ break;
+ },
+ else => {
+ self.index -= 1;
+ result.id = Token.Id.FloatLiteral;
+ state = State.FloatFraction;
+ },
+ },
+ State.FloatFraction => switch (c) {
+ 'p', 'P' => {
+ state = State.FloatExponentUnsigned;
+ },
+ '0'...'9', 'a'...'f', 'A'...'F' => {},
+ else => break,
+ },
+ State.FloatExponentUnsigned => switch (c) {
+ '+', '-' => {
+ state = State.FloatExponentNumber;
+ },
+ else => {
+ // reinterpret as a normal exponent number
+ self.index -= 1;
+ state = State.FloatExponentNumber;
+ }
+ },
+ State.FloatExponentNumber => switch (c) {
+ '0'...'9', 'a'...'f', 'A'...'F' => {},
+ else => break,
+ },
+ }
+ }
+ result.end = self.index;
+
+ if (result.id == Token.Id.Eof) {
+ if (self.pending_invalid_token) |token| {
+ self.pending_invalid_token = null;
+ return token;
+ }
+ }
+
+ return result;
+ }
+
+ pub fn getTokenSlice(self: &const Tokenizer, token: &const Token) []const u8 {
+ return self.buffer[token.start..token.end];
+ }
+
+ fn checkLiteralCharacter(self: &Tokenizer) void {
+ if (self.pending_invalid_token != null) return;
+ const invalid_length = self.getInvalidCharacterLength();
+ if (invalid_length == 0) return;
+ self.pending_invalid_token = Token {
+ .id = Token.Id.Invalid,
+ .start = self.index,
+ .end = self.index + invalid_length,
+ };
+ }
+
+ fn getInvalidCharacterLength(self: &Tokenizer) u3 {
+ const c0 = self.buffer[self.index];
+ if (c0 < 0x80) {
+ if (c0 < 0x20 or c0 == 0x7f) {
+ // ascii control codes are never allowed
+ // (note that \n was checked before we got here)
+ return 1;
+ }
+ // looks fine to me.
+ return 0;
+ } else {
+ // check utf8-encoded character.
+ const length = std.unicode.utf8ByteSequenceLength(c0) catch return 1;
+ // the last 3 bytes in the buffer are guaranteed to be '\n',
+ // which means we don't need to do any bounds checking here.
+ const bytes = self.buffer[self.index..self.index + length];
+ switch (length) {
+ 2 => {
+ const value = std.unicode.utf8Decode2(bytes) catch return length;
+ if (value == 0x85) return length; // U+0085 (NEL)
+ },
+ 3 => {
+ const value = std.unicode.utf8Decode3(bytes) catch return length;
+ if (value == 0x2028) return length; // U+2028 (LS)
+ if (value == 0x2029) return length; // U+2029 (PS)
+ },
+ 4 => {
+ _ = std.unicode.utf8Decode4(bytes) catch return length;
+ },
+ else => unreachable,
+ }
+ self.index += length - 1;
+ return 0;
+ }
+ }
+};
+
+
+
+test "tokenizer" {
+ testTokenize("test", []Token.Id {
+ Token.Id.Keyword_test,
+ });
+}
+
+test "tokenizer - invalid token characters" {
+ testTokenize("#", []Token.Id{Token.Id.Invalid});
+ testTokenize("`", []Token.Id{Token.Id.Invalid});
+}
+
+test "tokenizer - invalid literal/comment characters" {
+ testTokenize("\"\x00\"", []Token.Id {
+ Token.Id { .StringLiteral = Token.StrLitKind.Normal },
+ Token.Id.Invalid,
+ });
+ testTokenize("//\x00", []Token.Id {
+ Token.Id.Invalid,
+ });
+ testTokenize("//\x1f", []Token.Id {
+ Token.Id.Invalid,
+ });
+ testTokenize("//\x7f", []Token.Id {
+ Token.Id.Invalid,
+ });
+}
+
+test "tokenizer - utf8" {
+ testTokenize("//\xc2\x80", []Token.Id{});
+ testTokenize("//\xf4\x8f\xbf\xbf", []Token.Id{});
+}
+
+test "tokenizer - invalid utf8" {
+ testTokenize("//\x80", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xbf", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xf8", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xff", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xc2\xc0", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xe0", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xf0", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xf0\x90\x80\xc0", []Token.Id{Token.Id.Invalid});
+}
+
+test "tokenizer - illegal unicode codepoints" {
+ // unicode newline characters.U+0085, U+2028, U+2029
+ testTokenize("//\xc2\x84", []Token.Id{});
+ testTokenize("//\xc2\x85", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xc2\x86", []Token.Id{});
+ testTokenize("//\xe2\x80\xa7", []Token.Id{});
+ testTokenize("//\xe2\x80\xa8", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xe2\x80\xa9", []Token.Id{Token.Id.Invalid});
+ testTokenize("//\xe2\x80\xaa", []Token.Id{});
+}
+
+fn testTokenize(source: []const u8, expected_tokens: []const Token.Id) void {
+ // (test authors, just make this bigger if you need it)
+ var padded_source: [0x100]u8 = undefined;
+ std.mem.copy(u8, padded_source[0..source.len], source);
+ padded_source[source.len + 0] = '\n';
+ padded_source[source.len + 1] = '\n';
+ padded_source[source.len + 2] = '\n';
+
+ var tokenizer = Tokenizer.init(padded_source[0..source.len + 3]);
+ for (expected_tokens) |expected_token_id| {
+ const token = tokenizer.next();
+ std.debug.assert(@TagType(Token.Id)(token.id) == @TagType(Token.Id)(expected_token_id));
+ switch (expected_token_id) {
+ Token.Id.StringLiteral => |expected_kind| {
+ std.debug.assert(expected_kind == switch (token.id) { Token.Id.StringLiteral => |kind| kind, else => unreachable });
+ },
+ else => {},
+ }
+ }
+ std.debug.assert(tokenizer.next().id == Token.Id.Eof);
+}