start with parser tests

2025-01-03 19:23:22 +02:00
parent 49c910b8b2
commit 1f134595de
6 changed files with 488 additions and 366 deletions
--- a/ast.h
+++ b/ast.h
@@ -9,91 +9,91 @@

 typedef enum {
    /// sub_list[lhs...rhs]
-    AST_NODE_TAG_ROOT,
+    AST_NODE_ROOT,
    /// `usingnamespace lhs;`. rhs unused. main_token is `usingnamespace`.
-    AST_NODE_TAG_USINGNAMESPACE,
+    AST_NODE_USINGNAMESPACE,
    /// lhs is test name token (must be string literal or identifier), if any.
    /// rhs is the body node.
-    AST_NODE_TAG_TEST_DECL,
+    AST_NODE_TEST_DECL,
    /// lhs is the index into extra_data.
    /// rhs is the initialization expression, if any.
    /// main_token is `var` or `const`.
-    AST_NODE_TAG_GLOBAL_VAR_DECL,
+    AST_NODE_GLOBAL_VAR_DECL,
    /// `var a: x align(y) = rhs`
    /// lhs is the index into extra_data.
    /// main_token is `var` or `const`.
-    AST_NODE_TAG_LOCAL_VAR_DECL,
+    AST_NODE_LOCAL_VAR_DECL,
    /// `var a: lhs = rhs`. lhs and rhs may be unused.
    /// Can be local or global.
    /// main_token is `var` or `const`.
-    AST_NODE_TAG_SIMPLE_VAR_DECL,
+    AST_NODE_SIMPLE_VAR_DECL,
    /// `var a align(lhs) = rhs`. lhs and rhs may be unused.
    /// Can be local or global.
    /// main_token is `var` or `const`.
-    AST_NODE_TAG_ALIGNED_VAR_DECL,
+    AST_NODE_ALIGNED_VAR_DECL,
    /// lhs is the identifier token payload if any,
    /// rhs is the deferred expression.
-    AST_NODE_TAG_ERRDEFER,
+    AST_NODE_ERRDEFER,
    /// lhs is unused.
    /// rhs is the deferred expression.
-    AST_NODE_TAG_DEFER,
+    AST_NODE_DEFER,
    /// lhs catch rhs
    /// lhs catch |err| rhs
    /// main_token is the `catch` keyword.
    /// payload is determined by looking at the next token after the `catch` keyword.
-    AST_NODE_TAG_CATCH,
+    AST_NODE_CATCH,
    /// `lhs.a`. main_token is the dot. rhs is the identifier token index.
-    AST_NODE_TAG_FIELD_ACCESS,
+    AST_NODE_FIELD_ACCESS,
    /// `lhs.?`. main_token is the dot. rhs is the `?` token index.
-    AST_NODE_TAG_UNWRAP_OPTIONAL,
+    AST_NODE_UNWRAP_OPTIONAL,
    /// `lhs == rhs`. main_token is op.
-    AST_NODE_TAG_EQUAL_EQUAL,
+    AST_NODE_EQUAL_EQUAL,
    /// `lhs != rhs`. main_token is op.
-    AST_NODE_TAG_BANG_EQUAL,
+    AST_NODE_BANG_EQUAL,
    /// `lhs < rhs`. main_token is op.
-    AST_NODE_TAG_LESS_THAN,
+    AST_NODE_LESS_THAN,
    /// `lhs > rhs`. main_token is op.
-    AST_NODE_TAG_GREATER_THAN,
+    AST_NODE_GREATER_THAN,
    /// `lhs <= rhs`. main_token is op.
-    AST_NODE_TAG_LESS_OR_EQUAL,
+    AST_NODE_LESS_OR_EQUAL,
    /// `lhs >= rhs`. main_token is op.
-    AST_NODE_TAG_GREATER_OR_EQUAL,
+    AST_NODE_GREATER_OR_EQUAL,
    /// `lhs *= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_MUL,
+    AST_NODE_ASSIGN_MUL,
    /// `lhs /= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_DIV,
+    AST_NODE_ASSIGN_DIV,
    /// `lhs %= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_MOD,
+    AST_NODE_ASSIGN_MOD,
    /// `lhs += rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_ADD,
+    AST_NODE_ASSIGN_ADD,
    /// `lhs -= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_SUB,
+    AST_NODE_ASSIGN_SUB,
    /// `lhs <<= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_SHL,
+    AST_NODE_ASSIGN_SHL,
    /// `lhs <<|= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_SHL_SAT,
+    AST_NODE_ASSIGN_SHL_SAT,
    /// `lhs >>= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_SHR,
+    AST_NODE_ASSIGN_SHR,
    /// `lhs &= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_BIT_AND,
+    AST_NODE_ASSIGN_BIT_AND,
    /// `lhs ^= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_BIT_XOR,
+    AST_NODE_ASSIGN_BIT_XOR,
    /// `lhs |= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_BIT_OR,
+    AST_NODE_ASSIGN_BIT_OR,
    /// `lhs *%= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_MUL_WRAP,
+    AST_NODE_ASSIGN_MUL_WRAP,
    /// `lhs +%= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_ADD_WRAP,
+    AST_NODE_ASSIGN_ADD_WRAP,
    /// `lhs -%= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_SUB_WRAP,
+    AST_NODE_ASSIGN_SUB_WRAP,
    /// `lhs *|= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_MUL_SAT,
+    AST_NODE_ASSIGN_MUL_SAT,
    /// `lhs +|= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_ADD_SAT,
+    AST_NODE_ASSIGN_ADD_SAT,
    /// `lhs -|= rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN_SUB_SAT,
+    AST_NODE_ASSIGN_SUB_SAT,
    /// `lhs = rhs`. main_token is op.
-    AST_NODE_TAG_ASSIGN,
+    AST_NODE_ASSIGN,
    /// `a, b, ... = rhs`. main_token is op. lhs is index into `extra_data`
    /// of an lhs elem count followed by an array of that many `Node.Index`,
    /// with each node having one of the following types:
@@ -107,73 +107,73 @@ typedef enum {
    /// standard assignment LHS (which must be evaluated as an lvalue).
    /// There may be a preceding `comptime` token, which does not create a
    /// corresponding `comptime` node so must be manually detected.
-    AST_NODE_TAG_ASSIGN_DESTRUCTURE,
+    AST_NODE_ASSIGN_DESTRUCTURE,
    /// `lhs || rhs`. main_token is the `||`.
-    AST_NODE_TAG_MERGE_ERROR_SETS,
+    AST_NODE_MERGE_ERROR_SETS,
    /// `lhs * rhs`. main_token is the `*`.
-    AST_NODE_TAG_MUL,
+    AST_NODE_MUL,
    /// `lhs / rhs`. main_token is the `/`.
-    AST_NODE_TAG_DIV,
+    AST_NODE_DIV,
    /// `lhs % rhs`. main_token is the `%`.
-    AST_NODE_TAG_MOD,
+    AST_NODE_MOD,
    /// `lhs ** rhs`. main_token is the `**`.
-    AST_NODE_TAG_ARRAY_MULT,
+    AST_NODE_ARRAY_MULT,
    /// `lhs *% rhs`. main_token is the `*%`.
-    AST_NODE_TAG_MUL_WRAP,
+    AST_NODE_MUL_WRAP,
    /// `lhs *| rhs`. main_token is the `*|`.
-    AST_NODE_TAG_MUL_SAT,
+    AST_NODE_MUL_SAT,
    /// `lhs + rhs`. main_token is the `+`.
-    AST_NODE_TAG_ADD,
+    AST_NODE_ADD,
    /// `lhs - rhs`. main_token is the `-`.
-    AST_NODE_TAG_SUB,
+    AST_NODE_SUB,
    /// `lhs ++ rhs`. main_token is the `++`.
-    AST_NODE_TAG_ARRAY_CAT,
+    AST_NODE_ARRAY_CAT,
    /// `lhs +% rhs`. main_token is the `+%`.
-    AST_NODE_TAG_ADD_WRAP,
+    AST_NODE_ADD_WRAP,
    /// `lhs -% rhs`. main_token is the `-%`.
-    AST_NODE_TAG_SUB_WRAP,
+    AST_NODE_SUB_WRAP,
    /// `lhs +| rhs`. main_token is the `+|`.
-    AST_NODE_TAG_ADD_SAT,
+    AST_NODE_ADD_SAT,
    /// `lhs -| rhs`. main_token is the `-|`.
-    AST_NODE_TAG_SUB_SAT,
+    AST_NODE_SUB_SAT,
    /// `lhs << rhs`. main_token is the `<<`.
-    AST_NODE_TAG_SHL,
+    AST_NODE_SHL,
    /// `lhs <<| rhs`. main_token is the `<<|`.
-    AST_NODE_TAG_SHL_SAT,
+    AST_NODE_SHL_SAT,
    /// `lhs >> rhs`. main_token is the `>>`.
-    AST_NODE_TAG_SHR,
+    AST_NODE_SHR,
    /// `lhs & rhs`. main_token is the `&`.
-    AST_NODE_TAG_BIT_AND,
+    AST_NODE_BIT_AND,
    /// `lhs ^ rhs`. main_token is the `^`.
-    AST_NODE_TAG_BIT_XOR,
+    AST_NODE_BIT_XOR,
    /// `lhs | rhs`. main_token is the `|`.
-    AST_NODE_TAG_BIT_OR,
+    AST_NODE_BIT_OR,
    /// `lhs orelse rhs`. main_token is the `orelse`.
-    AST_NODE_TAG_ORELSE,
+    AST_NODE_ORELSE,
    /// `lhs and rhs`. main_token is the `and`.
-    AST_NODE_TAG_BOOL_AND,
+    AST_NODE_BOOL_AND,
    /// `lhs or rhs`. main_token is the `or`.
-    AST_NODE_TAG_BOOL_OR,
+    AST_NODE_BOOL_OR,
    /// `op lhs`. rhs unused. main_token is op.
-    AST_NODE_TAG_BOOL_NOT,
+    AST_NODE_BOOL_NOT,
    /// `op lhs`. rhs unused. main_token is op.
-    AST_NODE_TAG_NEGATION,
+    AST_NODE_NEGATION,
    /// `op lhs`. rhs unused. main_token is op.
-    AST_NODE_TAG_BIT_NOT,
+    AST_NODE_BIT_NOT,
    /// `op lhs`. rhs unused. main_token is op.
-    AST_NODE_TAG_NEGATION_WRAP,
+    AST_NODE_NEGATION_WRAP,
    /// `op lhs`. rhs unused. main_token is op.
-    AST_NODE_TAG_ADDRESS_OF,
+    AST_NODE_ADDRESS_OF,
    /// `op lhs`. rhs unused. main_token is op.
-    AST_NODE_TAG_TRY,
+    AST_NODE_TRY,
    /// `op lhs`. rhs unused. main_token is op.
-    AST_NODE_TAG_AWAIT,
+    AST_NODE_AWAIT,
    /// `?lhs`. rhs unused. main_token is the `?`.
-    AST_NODE_TAG_OPTIONAL_TYPE,
+    AST_NODE_OPTIONAL_TYPE,
    /// `[lhs]rhs`.
-    AST_NODE_TAG_ARRAY_TYPE,
+    AST_NODE_ARRAY_TYPE,
    /// `[lhs:a]b`. `ArrayTypeSentinel[rhs]`.
-    AST_NODE_TAG_ARRAY_TYPE_SENTINEL,
+    AST_NODE_ARRAY_TYPE_SENTINEL,
    /// `[*]align(lhs) rhs`. lhs can be omitted.
    /// `*align(lhs) rhs`. lhs can be omitted.
    /// `[]rhs`.
@@ -181,7 +181,7 @@ typedef enum {
    /// if a slice, many-item pointer, or C-pointer
    /// main_token might be a ** token, which is shared with a parent/child
    /// pointer type and may require special handling.
-    AST_NODE_TAG_PTR_TYPE_ALIGNED,
+    AST_NODE_PTR_TYPE_ALIGNED,
    /// `[*:lhs]rhs`. lhs can be omitted.
    /// `*rhs`.
    /// `[:lhs]rhs`.
@@ -189,297 +189,297 @@ typedef enum {
    /// if a slice, many-item pointer, or C-pointer
    /// main_token might be a ** token, which is shared with a parent/child
    /// pointer type and may require special handling.
-    AST_NODE_TAG_PTR_TYPE_SENTINEL,
+    AST_NODE_PTR_TYPE_SENTINEL,
    /// lhs is index into ptr_type. rhs is the element type expression.
    /// main_token is the asterisk if a single item pointer or the lbracket
    /// if a slice, many-item pointer, or C-pointer
    /// main_token might be a ** token, which is shared with a parent/child
    /// pointer type and may require special handling.
-    AST_NODE_TAG_PTR_TYPE,
+    AST_NODE_PTR_TYPE,
    /// lhs is index into ptr_type_bit_range. rhs is the element type expression.
    /// main_token is the asterisk if a single item pointer or the lbracket
    /// if a slice, many-item pointer, or C-pointer
    /// main_token might be a ** token, which is shared with a parent/child
    /// pointer type and may require special handling.
-    AST_NODE_TAG_PTR_TYPE_BIT_RANGE,
+    AST_NODE_PTR_TYPE_BIT_RANGE,
    /// `lhs[rhs..]`
    /// main_token is the lbracket.
-    AST_NODE_TAG_SLICE_OPEN,
+    AST_NODE_SLICE_OPEN,
    /// `lhs[b..c]`. rhs is index into Slice
    /// main_token is the lbracket.
-    AST_NODE_TAG_SLICE,
+    AST_NODE_SLICE,
    /// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be omitted.
    /// main_token is the lbracket.
-    AST_NODE_TAG_SLICE_SENTINEL,
+    AST_NODE_SLICE_SENTINEL,
    /// `lhs.*`. rhs is unused.
-    AST_NODE_TAG_DEREF,
+    AST_NODE_DEREF,
    /// `lhs[rhs]`.
-    AST_NODE_TAG_ARRAY_ACCESS,
+    AST_NODE_ARRAY_ACCESS,
    /// `lhs{rhs}`. rhs can be omitted.
-    AST_NODE_TAG_ARRAY_INIT_ONE,
+    AST_NODE_ARRAY_INIT_ONE,
    /// `lhs{rhs,}`. rhs can *not* be omitted
-    AST_NODE_TAG_ARRAY_INIT_ONE_COMMA,
+    AST_NODE_ARRAY_INIT_ONE_COMMA,
    /// `.{lhs, rhs}`. lhs and rhs can be omitted.
-    AST_NODE_TAG_ARRAY_INIT_DOT_TWO,
+    AST_NODE_ARRAY_INIT_DOT_TWO,
    /// Same as `array_init_dot_two` except there is known to be a trailing comma
    /// before the final rbrace.
-    AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA,
+    AST_NODE_ARRAY_INIT_DOT_TWO_COMMA,
    /// `.{a, b}`. `sub_list[lhs..rhs]`.
-    AST_NODE_TAG_ARRAY_INIT_DOT,
+    AST_NODE_ARRAY_INIT_DOT,
    /// Same as `array_init_dot` except there is known to be a trailing comma
    /// before the final rbrace.
-    AST_NODE_TAG_ARRAY_INIT_DOT_COMMA,
+    AST_NODE_ARRAY_INIT_DOT_COMMA,
    /// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means `.{a, b}`.
-    AST_NODE_TAG_ARRAY_INIT,
+    AST_NODE_ARRAY_INIT,
    /// Same as `array_init` except there is known to be a trailing comma
    /// before the final rbrace.
-    AST_NODE_TAG_ARRAY_INIT_COMMA,
+    AST_NODE_ARRAY_INIT_COMMA,
    /// `lhs{.a = rhs}`. rhs can be omitted making it empty.
    /// main_token is the lbrace.
-    AST_NODE_TAG_STRUCT_INIT_ONE,
+    AST_NODE_STRUCT_INIT_ONE,
    /// `lhs{.a = rhs,}`. rhs can *not* be omitted.
    /// main_token is the lbrace.
-    AST_NODE_TAG_STRUCT_INIT_ONE_COMMA,
+    AST_NODE_STRUCT_INIT_ONE_COMMA,
    /// `.{.a = lhs, .b = rhs}`. lhs and rhs can be omitted.
    /// main_token is the lbrace.
    /// No trailing comma before the rbrace.
-    AST_NODE_TAG_STRUCT_INIT_DOT_TWO,
+    AST_NODE_STRUCT_INIT_DOT_TWO,
    /// Same as `struct_init_dot_two` except there is known to be a trailing comma
    /// before the final rbrace.
-    AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA,
+    AST_NODE_STRUCT_INIT_DOT_TWO_COMMA,
    /// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`.
    /// main_token is the lbrace.
-    AST_NODE_TAG_STRUCT_INIT_DOT,
+    AST_NODE_STRUCT_INIT_DOT,
    /// Same as `struct_init_dot` except there is known to be a trailing comma
    /// before the final rbrace.
-    AST_NODE_TAG_STRUCT_INIT_DOT_COMMA,
+    AST_NODE_STRUCT_INIT_DOT_COMMA,
    /// `lhs{.a = b, .c = d}`. `sub_range_list[rhs]`.
    /// lhs can be omitted which means `.{.a = b, .c = d}`.
    /// main_token is the lbrace.
-    AST_NODE_TAG_STRUCT_INIT,
+    AST_NODE_STRUCT_INIT,
    /// Same as `struct_init` except there is known to be a trailing comma
    /// before the final rbrace.
-    AST_NODE_TAG_STRUCT_INIT_COMMA,
+    AST_NODE_STRUCT_INIT_COMMA,
    /// `lhs(rhs)`. rhs can be omitted.
    /// main_token is the lparen.
-    AST_NODE_TAG_CALL_ONE,
+    AST_NODE_CALL_ONE,
    /// `lhs(rhs,)`. rhs can be omitted.
    /// main_token is the lparen.
-    AST_NODE_TAG_CALL_ONE_COMMA,
+    AST_NODE_CALL_ONE_COMMA,
    /// `async lhs(rhs)`. rhs can be omitted.
-    AST_NODE_TAG_ASYNC_CALL_ONE,
+    AST_NODE_ASYNC_CALL_ONE,
    /// `async lhs(rhs,)`.
-    AST_NODE_TAG_ASYNC_CALL_ONE_COMMA,
+    AST_NODE_ASYNC_CALL_ONE_COMMA,
    /// `lhs(a, b, c)`. `SubRange[rhs]`.
    /// main_token is the `(`.
-    AST_NODE_TAG_CALL,
+    AST_NODE_CALL,
    /// `lhs(a, b, c,)`. `SubRange[rhs]`.
    /// main_token is the `(`.
-    AST_NODE_TAG_CALL_COMMA,
+    AST_NODE_CALL_COMMA,
    /// `async lhs(a, b, c)`. `SubRange[rhs]`.
    /// main_token is the `(`.
-    AST_NODE_TAG_ASYNC_CALL,
+    AST_NODE_ASYNC_CALL,
    /// `async lhs(a, b, c,)`. `SubRange[rhs]`.
    /// main_token is the `(`.
-    AST_NODE_TAG_ASYNC_CALL_COMMA,
+    AST_NODE_ASYNC_CALL_COMMA,
    /// `switch(lhs) {}`. `SubRange[rhs]`.
    /// `main_token` is the identifier of a preceding label, if any; otherwise `switch`.
-    AST_NODE_TAG_SWITCH,
+    AST_NODE_SWITCH,
    /// Same as switch except there is known to be a trailing comma
    /// before the final rbrace
-    AST_NODE_TAG_SWITCH_COMMA,
+    AST_NODE_SWITCH_COMMA,
    /// `lhs => rhs`. If lhs is omitted it means `else`.
    /// main_token is the `=>`
-    AST_NODE_TAG_SWITCH_CASE_ONE,
+    AST_NODE_SWITCH_CASE_ONE,
    /// Same ast `switch_case_one` but the case is inline
-    AST_NODE_TAG_SWITCH_CASE_INLINE_ONE,
+    AST_NODE_SWITCH_CASE_INLINE_ONE,
    /// `a, b, c => rhs`. `SubRange[lhs]`.
    /// main_token is the `=>`
-    AST_NODE_TAG_SWITCH_CASE,
+    AST_NODE_SWITCH_CASE,
    /// Same ast `switch_case` but the case is inline
-    AST_NODE_TAG_SWITCH_CASE_INLINE,
+    AST_NODE_SWITCH_CASE_INLINE,
    /// `lhs...rhs`.
-    AST_NODE_TAG_SWITCH_RANGE,
+    AST_NODE_SWITCH_RANGE,
    /// `while (lhs) rhs`.
    /// `while (lhs) |x| rhs`.
-    AST_NODE_TAG_WHILE_SIMPLE,
+    AST_NODE_WHILE_SIMPLE,
    /// `while (lhs) : (a) b`. `WhileCont[rhs]`.
    /// `while (lhs) : (a) b`. `WhileCont[rhs]`.
-    AST_NODE_TAG_WHILE_CONT,
+    AST_NODE_WHILE_CONT,
    /// `while (lhs) : (a) b else c`. `While[rhs]`.
    /// `while (lhs) |x| : (a) b else c`. `While[rhs]`.
    /// `while (lhs) |x| : (a) b else |y| c`. `While[rhs]`.
    /// The cont expression part `: (a)` may be omitted.
-    AST_NODE_TAG_WHILE,
+    AST_NODE_WHILE,
    /// `for (lhs) rhs`.
-    AST_NODE_TAG_FOR_SIMPLE,
+    AST_NODE_FOR_SIMPLE,
    /// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`. `For[rhs]`.
-    AST_NODE_TAG_FOR,
+    AST_NODE_FOR,
    /// `lhs..rhs`. rhs can be omitted.
-    AST_NODE_TAG_FOR_RANGE,
+    AST_NODE_FOR_RANGE,
    /// `if (lhs) rhs`.
    /// `if (lhs) |a| rhs`.
-    AST_NODE_TAG_IF_SIMPLE,
+    AST_NODE_IF_SIMPLE,
    /// `if (lhs) a else b`. `If[rhs]`.
    /// `if (lhs) |x| a else b`. `If[rhs]`.
    /// `if (lhs) |x| a else |y| b`. `If[rhs]`.
-    AST_NODE_TAG_IF,
+    AST_NODE_IF,
    /// `suspend lhs`. lhs can be omitted. rhs is unused.
-    AST_NODE_TAG_SUSPEND,
+    AST_NODE_SUSPEND,
    /// `resume lhs`. rhs is unused.
-    AST_NODE_TAG_RESUME,
+    AST_NODE_RESUME,
    /// `continue :lhs rhs`
    /// both lhs and rhs may be omitted.
-    AST_NODE_TAG_CONTINUE,
+    AST_NODE_CONTINUE,
    /// `break :lhs rhs`
    /// both lhs and rhs may be omitted.
-    AST_NODE_TAG_BREAK,
+    AST_NODE_BREAK,
    /// `return lhs`. lhs can be omitted. rhs is unused.
-    AST_NODE_TAG_RETURN,
+    AST_NODE_RETURN,
    /// `fn (a: lhs) rhs`. lhs can be omitted.
    /// anytype and ... parameters are omitted from the AST tree.
    /// main_token is the `fn` keyword.
    /// extern function declarations use this tag.
-    AST_NODE_TAG_FN_PROTO_SIMPLE,
+    AST_NODE_FN_PROTO_SIMPLE,
    /// `fn (a: b, c: d) rhs`. `sub_range_list[lhs]`.
    /// anytype and ... parameters are omitted from the AST tree.
    /// main_token is the `fn` keyword.
    /// extern function declarations use this tag.
-    AST_NODE_TAG_FN_PROTO_MULTI,
+    AST_NODE_FN_PROTO_MULTI,
    /// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`. `FnProtoOne[lhs]`.
    /// zero or one parameters.
    /// anytype and ... parameters are omitted from the AST tree.
    /// main_token is the `fn` keyword.
    /// extern function declarations use this tag.
-    AST_NODE_TAG_FN_PROTO_ONE,
+    AST_NODE_FN_PROTO_ONE,
    /// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`. `FnProto[lhs]`.
    /// anytype and ... parameters are omitted from the AST tree.
    /// main_token is the `fn` keyword.
    /// extern function declarations use this tag.
-    AST_NODE_TAG_FN_PROTO,
+    AST_NODE_FN_PROTO,
    /// lhs is the fn_proto.
    /// rhs is the function body block.
    /// Note that extern function declarations use the fn_proto tags rather
    /// than this one.
-    AST_NODE_TAG_FN_DECL,
+    AST_NODE_FN_DECL,
    /// `anyframe->rhs`. main_token is `anyframe`. `lhs` is arrow token index.
-    AST_NODE_TAG_ANYFRAME_TYPE,
+    AST_NODE_ANYFRAME_TYPE,
    /// Both lhs and rhs unused.
-    AST_NODE_TAG_ANYFRAME_LITERAL,
+    AST_NODE_ANYFRAME_LITERAL,
    /// Both lhs and rhs unused.
-    AST_NODE_TAG_CHAR_LITERAL,
+    AST_NODE_CHAR_LITERAL,
    /// Both lhs and rhs unused.
-    AST_NODE_TAG_NUMBER_LITERAL,
+    AST_NODE_NUMBER_LITERAL,
    /// Both lhs and rhs unused.
-    AST_NODE_TAG_UNREACHABLE_LITERAL,
+    AST_NODE_UNREACHABLE_LITERAL,
    /// Both lhs and rhs unused.
    /// Most identifiers will not have explicit AST nodes, however for expressions
    /// which could be one of many different kinds of AST nodes, there will be an
    /// identifier AST node for it.
-    AST_NODE_TAG_IDENTIFIER,
+    AST_NODE_IDENTIFIER,
    /// lhs is the dot token index, rhs unused, main_token is the identifier.
-    AST_NODE_TAG_ENUM_LITERAL,
+    AST_NODE_ENUM_LITERAL,
    /// main_token is the string literal token
    /// Both lhs and rhs unused.
-    AST_NODE_TAG_STRING_LITERAL,
+    AST_NODE_STRING_LITERAL,
    /// main_token is the first token index (redundant with lhs)
    /// lhs is the first token index; rhs is the last token index.
    /// Could be a series of multiline_string_literal_line tokens, or a single
    /// string_literal token.
-    AST_NODE_TAG_MULTILINE_STRING_LITERAL,
+    AST_NODE_MULTILINE_STRING_LITERAL,
    /// `(lhs)`. main_token is the `(`; rhs is the token index of the `)`.
-    AST_NODE_TAG_GROUPED_EXPRESSION,
+    AST_NODE_GROUPED_EXPRESSION,
    /// `@a(lhs, rhs)`. lhs and rhs may be omitted.
    /// main_token is the builtin token.
-    AST_NODE_TAG_BUILTIN_CALL_TWO,
+    AST_NODE_BUILTIN_CALL_TWO,
    /// Same as builtin_call_two but there is known to be a trailing comma before the rparen.
-    AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA,
+    AST_NODE_BUILTIN_CALL_TWO_COMMA,
    /// `@a(b, c)`. `sub_list[lhs..rhs]`.
    /// main_token is the builtin token.
-    AST_NODE_TAG_BUILTIN_CALL,
+    AST_NODE_BUILTIN_CALL,
    /// Same as builtin_call but there is known to be a trailing comma before the rparen.
-    AST_NODE_TAG_BUILTIN_CALL_COMMA,
+    AST_NODE_BUILTIN_CALL_COMMA,
    /// `error{a, b}`.
    /// rhs is the rbrace, lhs is unused.
-    AST_NODE_TAG_ERROR_SET_DECL,
+    AST_NODE_ERROR_SET_DECL,
    /// `struct {}`, `union {}`, `opaque {}`, `enum {}`. `extra_data[lhs..rhs]`.
    /// main_token is `struct`, `union`, `opaque`, `enum` keyword.
-    AST_NODE_TAG_CONTAINER_DECL,
+    AST_NODE_CONTAINER_DECL,
    /// Same as ContainerDecl but there is known to be a trailing comma
    /// or semicolon before the rbrace.
-    AST_NODE_TAG_CONTAINER_DECL_TRAILING,
+    AST_NODE_CONTAINER_DECL_TRAILING,
    /// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum {lhs, rhs}`.
    /// lhs or rhs can be omitted.
    /// main_token is `struct`, `union`, `opaque`, `enum` keyword.
-    AST_NODE_TAG_CONTAINER_DECL_TWO,
+    AST_NODE_CONTAINER_DECL_TWO,
    /// Same as ContainerDeclTwo except there is known to be a trailing comma
    /// or semicolon before the rbrace.
-    AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING,
+    AST_NODE_CONTAINER_DECL_TWO_TRAILING,
    /// `struct(lhs)` / `union(lhs)` / `enum(lhs)`. `SubRange[rhs]`.
-    AST_NODE_TAG_CONTAINER_DECL_ARG,
+    AST_NODE_CONTAINER_DECL_ARG,
    /// Same as container_decl_arg but there is known to be a trailing
    /// comma or semicolon before the rbrace.
-    AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING,
+    AST_NODE_CONTAINER_DECL_ARG_TRAILING,
    /// `union(enum) {}`. `sub_list[lhs..rhs]`.
    /// Note that tagged unions with explicitly provided enums are represented
    /// by `container_decl_arg`.
-    AST_NODE_TAG_TAGGED_UNION,
+    AST_NODE_TAGGED_UNION,
    /// Same as tagged_union but there is known to be a trailing comma
    /// or semicolon before the rbrace.
-    AST_NODE_TAG_TAGGED_UNION_TRAILING,
+    AST_NODE_TAGGED_UNION_TRAILING,
    /// `union(enum) {lhs, rhs}`. lhs or rhs may be omitted.
    /// Note that tagged unions with explicitly provided enums are represented
    /// by `container_decl_arg`.
-    AST_NODE_TAG_TAGGED_UNION_TWO,
+    AST_NODE_TAGGED_UNION_TWO,
    /// Same as tagged_union_two but there is known to be a trailing comma
    /// or semicolon before the rbrace.
-    AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING,
+    AST_NODE_TAGGED_UNION_TWO_TRAILING,
    /// `union(enum(lhs)) {}`. `SubRange[rhs]`.
-    AST_NODE_TAG_TAGGED_UNION_ENUM_TAG,
+    AST_NODE_TAGGED_UNION_ENUM_TAG,
    /// Same as tagged_union_enum_tag but there is known to be a trailing comma
    /// or semicolon before the rbrace.
-    AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING,
+    AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING,
    /// `a: lhs = rhs,`. lhs and rhs can be omitted.
    /// main_token is the field name identifier.
    /// lastToken() does not include the possible trailing comma.
-    AST_NODE_TAG_CONTAINER_FIELD_INIT,
+    AST_NODE_CONTAINER_FIELD_INIT,
    /// `a: lhs align(rhs),`. rhs can be omitted.
    /// main_token is the field name identifier.
    /// lastToken() does not include the possible trailing comma.
-    AST_NODE_TAG_CONTAINER_FIELD_ALIGN,
+    AST_NODE_CONTAINER_FIELD_ALIGN,
    /// `a: lhs align(c) = d,`. `container_field_list[rhs]`.
    /// main_token is the field name identifier.
    /// lastToken() does not include the possible trailing comma.
-    AST_NODE_TAG_CONTAINER_FIELD,
+    AST_NODE_CONTAINER_FIELD,
    /// `comptime lhs`. rhs unused.
-    AST_NODE_TAG_COMPTIME,
+    AST_NODE_COMPTIME,
    /// `nosuspend lhs`. rhs unused.
-    AST_NODE_TAG_NOSUSPEND,
+    AST_NODE_NOSUSPEND,
    /// `{lhs rhs}`. rhs or lhs can be omitted.
    /// main_token points at the lbrace.
-    AST_NODE_TAG_BLOCK_TWO,
+    AST_NODE_BLOCK_TWO,
    /// Same as block_two but there is known to be a semicolon before the rbrace.
-    AST_NODE_TAG_BLOCK_TWO_SEMICOLON,
+    AST_NODE_BLOCK_TWO_SEMICOLON,
    /// `{}`. `sub_list[lhs..rhs]`.
    /// main_token points at the lbrace.
-    AST_NODE_TAG_BLOCK,
+    AST_NODE_BLOCK,
    /// Same as block but there is known to be a semicolon before the rbrace.
-    AST_NODE_TAG_BLOCK_SEMICOLON,
+    AST_NODE_BLOCK_SEMICOLON,
    /// `asm(lhs)`. rhs is the token index of the rparen.
-    AST_NODE_TAG_ASM_SIMPLE,
+    AST_NODE_ASM_SIMPLE,
    /// `asm(lhs, a)`. `Asm[rhs]`.
-    AST_NODE_TAG_ASM,
+    AST_NODE_ASM,
    /// `[a] "b" (c)`. lhs is 0, rhs is token index of the rparen.
    /// `[a] "b" (-> lhs)`. rhs is token index of the rparen.
    /// main_token is `a`.
-    AST_NODE_TAG_ASM_OUTPUT,
+    AST_NODE_ASM_OUTPUT,
    /// `[a] "b" (lhs)`. rhs is token index of the rparen.
    /// main_token is `a`.
-    AST_NODE_TAG_ASM_INPUT,
+    AST_NODE_ASM_INPUT,
    /// `error.a`. lhs is token index of `.`. rhs is token index of `a`.
-    AST_NODE_TAG_ERROR_VALUE,
+    AST_NODE_ERROR_VALUE,
    /// `lhs!rhs`. main_token is the `!`.
-    AST_NODE_TAG_ERROR_UNION,
+    AST_NODE_ERROR_UNION,
 } AstNodeTag;

 typedef uint32_t AstTokenIndex;
--- a/parser.c
+++ b/parser.c
@@ -182,7 +182,7 @@ static AstNodeIndex expectContainerField(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = AST_NODE_TAG_CONTAINER_FIELD_INIT,
+                .tag = AST_NODE_CONTAINER_FIELD_INIT,
                .main_token = main_token,
                .data = {
                    .lhs = type_expr,
@@ -193,7 +193,7 @@ static AstNodeIndex expectContainerField(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = AST_NODE_TAG_CONTAINER_FIELD_ALIGN,
+                .tag = AST_NODE_CONTAINER_FIELD_ALIGN,
                .main_token = main_token,
                .data = {
                    .lhs = type_expr,
@@ -204,7 +204,7 @@ static AstNodeIndex expectContainerField(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = AST_NODE_TAG_CONTAINER_FIELD,
+                .tag = AST_NODE_CONTAINER_FIELD,
                .main_token = main_token,
                .data = {
                    .lhs = type_expr,
@@ -244,7 +244,7 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = AST_NODE_TAG_IDENTIFIER,
+                .tag = AST_NODE_IDENTIFIER,
                .main_token = nextToken(p),
                .data = {} });
    case TOKEN_KEYWORD_INLINE:
@@ -310,7 +310,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
            return addNode(
                &p->nodes,
                (AstNodeItem) {
-                    .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
+                    .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE,
                    .main_token = lparen,
                    .data = {
                        .lhs = res,
@@ -321,7 +321,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
            return addNode(
                &p->nodes,
                (AstNodeItem) {
-                    .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
+                    .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE,
                    .main_token = lparen,
                    .data = {
                        .lhs = res,
@@ -333,7 +333,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
            return addNode(
                &p->nodes,
                (AstNodeItem) {
-                    .tag = comma ? AST_NODE_TAG_CALL_COMMA : AST_NODE_TAG_CALL,
+                    .tag = comma ? AST_NODE_CALL_COMMA : AST_NODE_CALL,
                    .main_token = lparen,
                    .data = {
                        .lhs = res,
@@ -368,7 +368,7 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) {
    return addNode(
        &p->nodes,
        (AstNodeItem) {
-            .tag = AST_NODE_TAG_ERROR_UNION,
+            .tag = AST_NODE_ERROR_UNION,
            .main_token = bang,
            .data = {
                .lhs = suffix_expr,
@@ -413,7 +413,7 @@ static AstNodeIndex parseFnProto(Parser* p) {
    if (fn_token == null_node)
        return null_node;

-    AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
+    AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO);

    eatToken(p, TOKEN_IDENTIFIER);

@@ -432,7 +432,7 @@ static AstNodeIndex parseFnProto(Parser* p) {
                p,
                fn_proto_index,
                (AstNodeItem) {
-                    .tag = AST_NODE_TAG_FN_PROTO_SIMPLE,
+                    .tag = AST_NODE_FN_PROTO_SIMPLE,
                    .main_token = fn_token,
                    .data = {
                        .lhs = params.payload.zero_or_one,
@@ -568,7 +568,7 @@ static AstNodeIndex parseBlock(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = AST_NODE_TAG_BLOCK_TWO,
+                .tag = AST_NODE_BLOCK_TWO,
                .main_token = lbrace,
                .data = {
                    .lhs = 0,
@@ -579,7 +579,7 @@ static AstNodeIndex parseBlock(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
+                .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO,
                .main_token = lbrace,
                .data = {
                    .lhs = p->scratch.arr[scratch_top.old_len],
@@ -590,7 +590,7 @@ static AstNodeIndex parseBlock(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
+                .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO,
                .main_token = lbrace,
                .data = {
                    .lhs = p->scratch.arr[scratch_top.old_len],
@@ -602,7 +602,7 @@ static AstNodeIndex parseBlock(Parser* p) {
        return addNode(
            &p->nodes,
            (AstNodeItem) {
-                .tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK,
+                .tag = semicolon ? AST_NODE_BLOCK_SEMICOLON : AST_NODE_BLOCK,
                .main_token = lbrace,
                .data = {
                    .lhs = span.start,
@@ -673,13 +673,13 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
            p->tok_i++;
            return fn_proto;
        case TOKEN_L_BRACE:;
-            AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
+            AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL);
            AstNodeIndex body_block = parseBlock(p);
            return setNode(
                p,
                fn_decl_index,
                (AstNodeItem) {
-                    .tag = AST_NODE_TAG_FN_DECL,
+                    .tag = AST_NODE_FN_DECL,
                    .main_token = p->nodes.main_tokens[fn_proto],
                    .data = { .lhs = fn_proto, .rhs = body_block },
                });
@@ -885,7 +885,7 @@ break_loop:;
 }

 void parseRoot(Parser* p) {
-    addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_ROOT, .main_token = 0 });
+    addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 });

    Members root_members = parseContainerMembers(p);
    AstSubRange root_decls = membersToSpan(root_members, p);
--- a/parser_test.zig
+++ b/parser_test.zig
@@ -2,194 +2,314 @@ const std = @import("std");
 const testing = std.testing;

 const Ast = std.zig.Ast;
+const Allocator = std.mem.Allocator;

 const c = @cImport({
    @cInclude("ast.h");
 });

+const zigToken = @import("./tokenizer_test.zig").zigToken;
+
 fn zigNode(token: c_uint) Ast.Node.Tag {
    return switch (token) {
-        c.AST_NODE_TAG_ROOT => .root,
-        c.AST_NODE_TAG_USINGNAMESPACE => .@"usingnamespace",
-        c.AST_NODE_TAG_TEST_DECL => .test_decl,
-        c.AST_NODE_TAG_GLOBAL_VAR_DECL => .global_var_decl,
-        c.AST_NODE_TAG_LOCAL_VAR_DECL => .local_var_decl,
-        c.AST_NODE_TAG_SIMPLE_VAR_DECL => .simple_var_decl,
-        c.AST_NODE_TAG_ALIGNED_VAR_DECL => .aligned_var_decl,
-        c.AST_NODE_TAG_ERRDEFER => .@"errdefer",
-        c.AST_NODE_TAG_DEFER => .@"defer",
-        c.AST_NODE_TAG_CATCH => .@"catch",
-        c.AST_NODE_TAG_FIELD_ACCESS => .field_access,
-        c.AST_NODE_TAG_UNWRAP_OPTIONAL => .unwrap_optional,
-        c.AST_NODE_TAG_EQUAL_EQUAL => .equal_equal,
-        c.AST_NODE_TAG_BANG_EQUAL => .bang_equal,
-        c.AST_NODE_TAG_LESS_THAN => .less_than,
-        c.AST_NODE_TAG_GREATER_THAN => .greater_than,
-        c.AST_NODE_TAG_LESS_OR_EQUAL => .less_or_equal,
-        c.AST_NODE_TAG_GREATER_OR_EQUAL => .greater_or_equal,
-        c.AST_NODE_TAG_ASSIGN_MUL => .assign_mul,
-        c.AST_NODE_TAG_ASSIGN_DIV => .assign_div,
-        c.AST_NODE_TAG_ASSIGN_MOD => .assign_mod,
-        c.AST_NODE_TAG_ASSIGN_ADD => .assign_add,
-        c.AST_NODE_TAG_ASSIGN_SUB => .assign_sub,
-        c.AST_NODE_TAG_ASSIGN_SHL => .assign_shl,
-        c.AST_NODE_TAG_ASSIGN_SHL_SAT => .assign_shl_sat,
-        c.AST_NODE_TAG_ASSIGN_SHR => .assign_shr,
-        c.AST_NODE_TAG_ASSIGN_BIT_AND => .assign_bit_and,
-        c.AST_NODE_TAG_ASSIGN_BIT_XOR => .assign_bit_xor,
-        c.AST_NODE_TAG_ASSIGN_BIT_OR => .assign_bit_or,
-        c.AST_NODE_TAG_ASSIGN_MUL_WRAP => .assign_mul_wrap,
-        c.AST_NODE_TAG_ASSIGN_ADD_WRAP => .assign_add_wrap,
-        c.AST_NODE_TAG_ASSIGN_SUB_WRAP => .assign_sub_wrap,
-        c.AST_NODE_TAG_ASSIGN_MUL_SAT => .assign_mul_sat,
-        c.AST_NODE_TAG_ASSIGN_ADD_SAT => .assign_add_sat,
-        c.AST_NODE_TAG_ASSIGN_SUB_SAT => .assign_sub_sat,
-        c.AST_NODE_TAG_ASSIGN => .assign,
-        c.AST_NODE_TAG_ASSIGN_DESTRUCTURE => .assign_destructure,
-        c.AST_NODE_TAG_MERGE_ERROR_SETS => .merge_error_sets,
-        c.AST_NODE_TAG_MUL => .mul,
-        c.AST_NODE_TAG_DIV => .div,
-        c.AST_NODE_TAG_MOD => .mod,
-        c.AST_NODE_TAG_ARRAY_MULT => .array_mult,
-        c.AST_NODE_TAG_MUL_WRAP => .mul_wrap,
-        c.AST_NODE_TAG_MUL_SAT => .mul_sat,
-        c.AST_NODE_TAG_ADD => .add,
-        c.AST_NODE_TAG_SUB => .sub,
-        c.AST_NODE_TAG_ARRAY_CAT => .array_cat,
-        c.AST_NODE_TAG_ADD_WRAP => .add_wrap,
-        c.AST_NODE_TAG_SUB_WRAP => .sub_wrap,
-        c.AST_NODE_TAG_ADD_SAT => .add_sat,
-        c.AST_NODE_TAG_SUB_SAT => .sub_sat,
-        c.AST_NODE_TAG_SHL => .shl,
-        c.AST_NODE_TAG_SHL_SAT => .shl_sat,
-        c.AST_NODE_TAG_SHR => .shr,
-        c.AST_NODE_TAG_BIT_AND => .bit_and,
-        c.AST_NODE_TAG_BIT_XOR => .bit_xor,
-        c.AST_NODE_TAG_BIT_OR => .bit_or,
-        c.AST_NODE_TAG_ORELSE => .@"orelse",
-        c.AST_NODE_TAG_BOOL_AND => .bool_and,
-        c.AST_NODE_TAG_BOOL_OR => .bool_or,
-        c.AST_NODE_TAG_BOOL_NOT => .bool_not,
-        c.AST_NODE_TAG_NEGATION => .negation,
-        c.AST_NODE_TAG_BIT_NOT => .bit_not,
-        c.AST_NODE_TAG_NEGATION_WRAP => .negation_wrap,
-        c.AST_NODE_TAG_ADDRESS_OF => .address_of,
-        c.AST_NODE_TAG_TRY => .@"try",
-        c.AST_NODE_TAG_AWAIT => .@"await",
-        c.AST_NODE_TAG_OPTIONAL_TYPE => .optional_type,
-        c.AST_NODE_TAG_ARRAY_TYPE => .array_type,
-        c.AST_NODE_TAG_ARRAY_TYPE_SENTINEL => .array_type_sentinel,
-        c.AST_NODE_TAG_PTR_TYPE_ALIGNED => .ptr_type_aligned,
-        c.AST_NODE_TAG_PTR_TYPE_SENTINEL => .ptr_type_sentinel,
-        c.AST_NODE_TAG_PTR_TYPE => .ptr_type,
-        c.AST_NODE_TAG_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range,
-        c.AST_NODE_TAG_SLICE_OPEN => .slice_open,
-        c.AST_NODE_TAG_SLICE => .slice,
-        c.AST_NODE_TAG_SLICE_SENTINEL => .slice_sentinel,
-        c.AST_NODE_TAG_DEREF => .deref,
-        c.AST_NODE_TAG_ARRAY_ACCESS => .array_access,
-        c.AST_NODE_TAG_ARRAY_INIT_ONE => .array_init_one,
-        c.AST_NODE_TAG_ARRAY_INIT_ONE_COMMA => .array_init_one_comma,
-        c.AST_NODE_TAG_ARRAY_INIT_DOT_TWO => .array_init_dot_two,
-        c.AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma,
-        c.AST_NODE_TAG_ARRAY_INIT_DOT => .array_init_dot,
-        c.AST_NODE_TAG_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma,
-        c.AST_NODE_TAG_ARRAY_INIT => .array_init,
-        c.AST_NODE_TAG_ARRAY_INIT_COMMA => .array_init_comma,
-        c.AST_NODE_TAG_STRUCT_INIT_ONE => .struct_init_one,
-        c.AST_NODE_TAG_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma,
-        c.AST_NODE_TAG_STRUCT_INIT_DOT_TWO => .struct_init_dot_two,
-        c.AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma,
-        c.AST_NODE_TAG_STRUCT_INIT_DOT => .struct_init_dot,
-        c.AST_NODE_TAG_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma,
-        c.AST_NODE_TAG_STRUCT_INIT => .struct_init,
-        c.AST_NODE_TAG_STRUCT_INIT_COMMA => .struct_init_comma,
-        c.AST_NODE_TAG_CALL_ONE => .call_one,
-        c.AST_NODE_TAG_CALL_ONE_COMMA => .call_one_comma,
-        c.AST_NODE_TAG_ASYNC_CALL_ONE => .async_call_one,
-        c.AST_NODE_TAG_ASYNC_CALL_ONE_COMMA => .async_call_one_comma,
-        c.AST_NODE_TAG_CALL => .call,
-        c.AST_NODE_TAG_CALL_COMMA => .call_comma,
-        c.AST_NODE_TAG_ASYNC_CALL => .async_call,
-        c.AST_NODE_TAG_ASYNC_CALL_COMMA => .async_call_comma,
-        c.AST_NODE_TAG_SWITCH => .@"switch",
-        c.AST_NODE_TAG_SWITCH_COMMA => .switch_comma,
-        c.AST_NODE_TAG_SWITCH_CASE_ONE => .switch_case_one,
-        c.AST_NODE_TAG_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one,
-        c.AST_NODE_TAG_SWITCH_CASE => .switch_case,
-        c.AST_NODE_TAG_SWITCH_CASE_INLINE => .switch_case_inline,
-        c.AST_NODE_TAG_SWITCH_RANGE => .switch_range,
-        c.AST_NODE_TAG_WHILE_SIMPLE => .while_simple,
-        c.AST_NODE_TAG_WHILE_CONT => .while_cont,
-        c.AST_NODE_TAG_WHILE => .@"while",
-        c.AST_NODE_TAG_FOR_SIMPLE => .for_simple,
-        c.AST_NODE_TAG_FOR => .@"for",
-        c.AST_NODE_TAG_FOR_RANGE => .for_range,
-        c.AST_NODE_TAG_IF_SIMPLE => .if_simple,
-        c.AST_NODE_TAG_IF => .@"if",
-        c.AST_NODE_TAG_SUSPEND => .@"suspend",
-        c.AST_NODE_TAG_RESUME => .@"resume",
-        c.AST_NODE_TAG_CONTINUE => .@"continue",
-        c.AST_NODE_TAG_BREAK => .@"break",
-        c.AST_NODE_TAG_RETURN => .@"return",
-        c.AST_NODE_TAG_FN_PROTO_SIMPLE => .fn_proto_simple,
-        c.AST_NODE_TAG_FN_PROTO_MULTI => .fn_proto_multi,
-        c.AST_NODE_TAG_FN_PROTO_ONE => .fn_proto_one,
-        c.AST_NODE_TAG_FN_PROTO => .fn_proto,
-        c.AST_NODE_TAG_FN_DECL => .fn_decl,
-        c.AST_NODE_TAG_ANYFRAME_TYPE => .anyframe_type,
-        c.AST_NODE_TAG_ANYFRAME_LITERAL => .anyframe_literal,
-        c.AST_NODE_TAG_CHAR_LITERAL => .char_literal,
-        c.AST_NODE_TAG_NUMBER_LITERAL => .number_literal,
-        c.AST_NODE_TAG_UNREACHABLE_LITERAL => .unreachable_literal,
-        c.AST_NODE_TAG_IDENTIFIER => .identifier,
-        c.AST_NODE_TAG_ENUM_LITERAL => .enum_literal,
-        c.AST_NODE_TAG_STRING_LITERAL => .string_literal,
-        c.AST_NODE_TAG_MULTILINE_STRING_LITERAL => .multiline_string_literal,
-        c.AST_NODE_TAG_GROUPED_EXPRESSION => .grouped_expression,
-        c.AST_NODE_TAG_BUILTIN_CALL_TWO => .builtin_call_two,
-        c.AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma,
-        c.AST_NODE_TAG_BUILTIN_CALL => .builtin_call,
-        c.AST_NODE_TAG_BUILTIN_CALL_COMMA => .builtin_call_comma,
-        c.AST_NODE_TAG_ERROR_SET_DECL => .error_set_decl,
-        c.AST_NODE_TAG_CONTAINER_DECL => .container_decl,
-        c.AST_NODE_TAG_CONTAINER_DECL_TRAILING => .container_decl_trailing,
-        c.AST_NODE_TAG_CONTAINER_DECL_TWO => .container_decl_two,
-        c.AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing,
-        c.AST_NODE_TAG_CONTAINER_DECL_ARG => .container_decl_arg,
-        c.AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing,
-        c.AST_NODE_TAG_TAGGED_UNION => .tagged_union,
-        c.AST_NODE_TAG_TAGGED_UNION_TRAILING => .tagged_union_trailing,
-        c.AST_NODE_TAG_TAGGED_UNION_TWO => .tagged_union_two,
-        c.AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing,
-        c.AST_NODE_TAG_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag,
-        c.AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing,
-        c.AST_NODE_TAG_CONTAINER_FIELD_INIT => .container_field_init,
-        c.AST_NODE_TAG_CONTAINER_FIELD_ALIGN => .container_field_align,
-        c.AST_NODE_TAG_CONTAINER_FIELD => .container_field,
-        c.AST_NODE_TAG_COMPTIME => .@"comptime",
-        c.AST_NODE_TAG_NOSUSPEND => .@"nosuspend",
-        c.AST_NODE_TAG_BLOCK_TWO => .block_two,
-        c.AST_NODE_TAG_BLOCK_TWO_SEMICOLON => .block_two_semicolon,
-        c.AST_NODE_TAG_BLOCK => .block,
-        c.AST_NODE_TAG_BLOCK_SEMICOLON => .block_semicolon,
-        c.AST_NODE_TAG_ASM_SIMPLE => .asm_simple,
-        c.AST_NODE_TAG_ASM => .@"asm",
-        c.AST_NODE_TAG_ASM_OUTPUT => .asm_output,
-        c.AST_NODE_TAG_ASM_INPUT => .asm_input,
-        c.AST_NODE_TAG_ERROR_VALUE => .error_value,
-        c.AST_NODE_TAG_ERROR_UNION => .error_union,
+        c.AST_NODE_ROOT => .root,
+        c.AST_NODE_USINGNAMESPACE => .@"usingnamespace",
+        c.AST_NODE_TEST_DECL => .test_decl,
+        c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl,
+        c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl,
+        c.AST_NODE_SIMPLE_VAR_DECL => .simple_var_decl,
+        c.AST_NODE_ALIGNED_VAR_DECL => .aligned_var_decl,
+        c.AST_NODE_ERRDEFER => .@"errdefer",
+        c.AST_NODE_DEFER => .@"defer",
+        c.AST_NODE_CATCH => .@"catch",
+        c.AST_NODE_FIELD_ACCESS => .field_access,
+        c.AST_NODE_UNWRAP_OPTIONAL => .unwrap_optional,
+        c.AST_NODE_EQUAL_EQUAL => .equal_equal,
+        c.AST_NODE_BANG_EQUAL => .bang_equal,
+        c.AST_NODE_LESS_THAN => .less_than,
+        c.AST_NODE_GREATER_THAN => .greater_than,
+        c.AST_NODE_LESS_OR_EQUAL => .less_or_equal,
+        c.AST_NODE_GREATER_OR_EQUAL => .greater_or_equal,
+        c.AST_NODE_ASSIGN_MUL => .assign_mul,
+        c.AST_NODE_ASSIGN_DIV => .assign_div,
+        c.AST_NODE_ASSIGN_MOD => .assign_mod,
+        c.AST_NODE_ASSIGN_ADD => .assign_add,
+        c.AST_NODE_ASSIGN_SUB => .assign_sub,
+        c.AST_NODE_ASSIGN_SHL => .assign_shl,
+        c.AST_NODE_ASSIGN_SHL_SAT => .assign_shl_sat,
+        c.AST_NODE_ASSIGN_SHR => .assign_shr,
+        c.AST_NODE_ASSIGN_BIT_AND => .assign_bit_and,
+        c.AST_NODE_ASSIGN_BIT_XOR => .assign_bit_xor,
+        c.AST_NODE_ASSIGN_BIT_OR => .assign_bit_or,
+        c.AST_NODE_ASSIGN_MUL_WRAP => .assign_mul_wrap,
+        c.AST_NODE_ASSIGN_ADD_WRAP => .assign_add_wrap,
+        c.AST_NODE_ASSIGN_SUB_WRAP => .assign_sub_wrap,
+        c.AST_NODE_ASSIGN_MUL_SAT => .assign_mul_sat,
+        c.AST_NODE_ASSIGN_ADD_SAT => .assign_add_sat,
+        c.AST_NODE_ASSIGN_SUB_SAT => .assign_sub_sat,
+        c.AST_NODE_ASSIGN => .assign,
+        c.AST_NODE_ASSIGN_DESTRUCTURE => .assign_destructure,
+        c.AST_NODE_MERGE_ERROR_SETS => .merge_error_sets,
+        c.AST_NODE_MUL => .mul,
+        c.AST_NODE_DIV => .div,
+        c.AST_NODE_MOD => .mod,
+        c.AST_NODE_ARRAY_MULT => .array_mult,
+        c.AST_NODE_MUL_WRAP => .mul_wrap,
+        c.AST_NODE_MUL_SAT => .mul_sat,
+        c.AST_NODE_ADD => .add,
+        c.AST_NODE_SUB => .sub,
+        c.AST_NODE_ARRAY_CAT => .array_cat,
+        c.AST_NODE_ADD_WRAP => .add_wrap,
+        c.AST_NODE_SUB_WRAP => .sub_wrap,
+        c.AST_NODE_ADD_SAT => .add_sat,
+        c.AST_NODE_SUB_SAT => .sub_sat,
+        c.AST_NODE_SHL => .shl,
+        c.AST_NODE_SHL_SAT => .shl_sat,
+        c.AST_NODE_SHR => .shr,
+        c.AST_NODE_BIT_AND => .bit_and,
+        c.AST_NODE_BIT_XOR => .bit_xor,
+        c.AST_NODE_BIT_OR => .bit_or,
+        c.AST_NODE_ORELSE => .@"orelse",
+        c.AST_NODE_BOOL_AND => .bool_and,
+        c.AST_NODE_BOOL_OR => .bool_or,
+        c.AST_NODE_BOOL_NOT => .bool_not,
+        c.AST_NODE_NEGATION => .negation,
+        c.AST_NODE_BIT_NOT => .bit_not,
+        c.AST_NODE_NEGATION_WRAP => .negation_wrap,
+        c.AST_NODE_ADDRESS_OF => .address_of,
+        c.AST_NODE_TRY => .@"try",
+        c.AST_NODE_AWAIT => .@"await",
+        c.AST_NODE_OPTIONAL_TYPE => .optional_type,
+        c.AST_NODE_ARRAY_TYPE => .array_type,
+        c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel,
+        c.AST_NODE_PTR_TYPE_ALIGNED => .ptr_type_aligned,
+        c.AST_NODE_PTR_TYPE_SENTINEL => .ptr_type_sentinel,
+        c.AST_NODE_PTR_TYPE => .ptr_type,
+        c.AST_NODE_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range,
+        c.AST_NODE_SLICE_OPEN => .slice_open,
+        c.AST_NODE_SLICE => .slice,
+        c.AST_NODE_SLICE_SENTINEL => .slice_sentinel,
+        c.AST_NODE_DEREF => .deref,
+        c.AST_NODE_ARRAY_ACCESS => .array_access,
+        c.AST_NODE_ARRAY_INIT_ONE => .array_init_one,
+        c.AST_NODE_ARRAY_INIT_ONE_COMMA => .array_init_one_comma,
+        c.AST_NODE_ARRAY_INIT_DOT_TWO => .array_init_dot_two,
+        c.AST_NODE_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma,
+        c.AST_NODE_ARRAY_INIT_DOT => .array_init_dot,
+        c.AST_NODE_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma,
+        c.AST_NODE_ARRAY_INIT => .array_init,
+        c.AST_NODE_ARRAY_INIT_COMMA => .array_init_comma,
+        c.AST_NODE_STRUCT_INIT_ONE => .struct_init_one,
+        c.AST_NODE_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma,
+        c.AST_NODE_STRUCT_INIT_DOT_TWO => .struct_init_dot_two,
+        c.AST_NODE_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma,
+        c.AST_NODE_STRUCT_INIT_DOT => .struct_init_dot,
+        c.AST_NODE_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma,
+        c.AST_NODE_STRUCT_INIT => .struct_init,
+        c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma,
+        c.AST_NODE_CALL_ONE => .call_one,
+        c.AST_NODE_CALL_ONE_COMMA => .call_one_comma,
+        c.AST_NODE_ASYNC_CALL_ONE => .async_call_one,
+        c.AST_NODE_ASYNC_CALL_ONE_COMMA => .async_call_one_comma,
+        c.AST_NODE_CALL => .call,
+        c.AST_NODE_CALL_COMMA => .call_comma,
+        c.AST_NODE_ASYNC_CALL => .async_call,
+        c.AST_NODE_ASYNC_CALL_COMMA => .async_call_comma,
+        c.AST_NODE_SWITCH => .@"switch",
+        c.AST_NODE_SWITCH_COMMA => .switch_comma,
+        c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one,
+        c.AST_NODE_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one,
+        c.AST_NODE_SWITCH_CASE => .switch_case,
+        c.AST_NODE_SWITCH_CASE_INLINE => .switch_case_inline,
+        c.AST_NODE_SWITCH_RANGE => .switch_range,
+        c.AST_NODE_WHILE_SIMPLE => .while_simple,
+        c.AST_NODE_WHILE_CONT => .while_cont,
+        c.AST_NODE_WHILE => .@"while",
+        c.AST_NODE_FOR_SIMPLE => .for_simple,
+        c.AST_NODE_FOR => .@"for",
+        c.AST_NODE_FOR_RANGE => .for_range,
+        c.AST_NODE_IF_SIMPLE => .if_simple,
+        c.AST_NODE_IF => .@"if",
+        c.AST_NODE_SUSPEND => .@"suspend",
+        c.AST_NODE_RESUME => .@"resume",
+        c.AST_NODE_CONTINUE => .@"continue",
+        c.AST_NODE_BREAK => .@"break",
+        c.AST_NODE_RETURN => .@"return",
+        c.AST_NODE_FN_PROTO_SIMPLE => .fn_proto_simple,
+        c.AST_NODE_FN_PROTO_MULTI => .fn_proto_multi,
+        c.AST_NODE_FN_PROTO_ONE => .fn_proto_one,
+        c.AST_NODE_FN_PROTO => .fn_proto,
+        c.AST_NODE_FN_DECL => .fn_decl,
+        c.AST_NODE_ANYFRAME_TYPE => .anyframe_type,
+        c.AST_NODE_ANYFRAME_LITERAL => .anyframe_literal,
+        c.AST_NODE_CHAR_LITERAL => .char_literal,
+        c.AST_NODE_NUMBER_LITERAL => .number_literal,
+        c.AST_NODE_UNREACHABLE_LITERAL => .unreachable_literal,
+        c.AST_NODE_IDENTIFIER => .identifier,
+        c.AST_NODE_ENUM_LITERAL => .enum_literal,
+        c.AST_NODE_STRING_LITERAL => .string_literal,
+        c.AST_NODE_MULTILINE_STRING_LITERAL => .multiline_string_literal,
+        c.AST_NODE_GROUPED_EXPRESSION => .grouped_expression,
+        c.AST_NODE_BUILTIN_CALL_TWO => .builtin_call_two,
+        c.AST_NODE_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma,
+        c.AST_NODE_BUILTIN_CALL => .builtin_call,
+        c.AST_NODE_BUILTIN_CALL_COMMA => .builtin_call_comma,
+        c.AST_NODE_ERROR_SET_DECL => .error_set_decl,
+        c.AST_NODE_CONTAINER_DECL => .container_decl,
+        c.AST_NODE_CONTAINER_DECL_TRAILING => .container_decl_trailing,
+        c.AST_NODE_CONTAINER_DECL_TWO => .container_decl_two,
+        c.AST_NODE_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing,
+        c.AST_NODE_CONTAINER_DECL_ARG => .container_decl_arg,
+        c.AST_NODE_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing,
+        c.AST_NODE_TAGGED_UNION => .tagged_union,
+        c.AST_NODE_TAGGED_UNION_TRAILING => .tagged_union_trailing,
+        c.AST_NODE_TAGGED_UNION_TWO => .tagged_union_two,
+        c.AST_NODE_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing,
+        c.AST_NODE_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag,
+        c.AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing,
+        c.AST_NODE_CONTAINER_FIELD_INIT => .container_field_init,
+        c.AST_NODE_CONTAINER_FIELD_ALIGN => .container_field_align,
+        c.AST_NODE_CONTAINER_FIELD => .container_field,
+        c.AST_NODE_COMPTIME => .@"comptime",
+        c.AST_NODE_NOSUSPEND => .@"nosuspend",
+        c.AST_NODE_BLOCK_TWO => .block_two,
+        c.AST_NODE_BLOCK_TWO_SEMICOLON => .block_two_semicolon,
+        c.AST_NODE_BLOCK => .block,
+        c.AST_NODE_BLOCK_SEMICOLON => .block_semicolon,
+        c.AST_NODE_ASM_SIMPLE => .asm_simple,
+        c.AST_NODE_ASM => .@"asm",
+        c.AST_NODE_ASM_OUTPUT => .asm_output,
+        c.AST_NODE_ASM_INPUT => .asm_input,
+        c.AST_NODE_ERROR_VALUE => .error_value,
+        c.AST_NODE_ERROR_UNION => .error_union,
        else => undefined,
    };
 }

-fn zigAst(c_ast: c.Ast) Ast {
+// zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit().
+fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast {
+    var tokens = Ast.TokenList{};
+    try tokens.ensureTotalCapacity(gpa, c_ast.tokens.len);
+    errdefer tokens.deinit(gpa);
+
+    for (0..c_ast.tokens.len) |i|
+        tokens.set(i, .{
+            .tag = zigToken(c_ast.tokens.tags[i]),
+            .start = c_ast.tokens.starts[i],
+        });
+
+    var nodes = Ast.NodeList{};
+    try nodes.ensureTotalCapacity(gpa, c_ast.nodes.len);
+    errdefer nodes.deinit(gpa);
+
+    for (0..c_ast.nodes.len) |i|
+        nodes.set(i, .{
+            .tag = zigNode(c_ast.nodes.tags[i]),
+            .main_token = c_ast.nodes.main_tokens[i],
+            .data = Ast.Node.Data{
+                .lhs = c_ast.nodes.datas[i].lhs,
+                .rhs = c_ast.nodes.datas[i].rhs,
+            },
+        });
+
+    var extra_data = try gpa.alloc(Ast.Node.Index, c_ast.extra_data.len);
+    errdefer gpa.free(extra_data);
+    std.mem.copyForwards(
+        Ast.Node.Index,
+        extra_data[0..],
+        c_ast.extra_data.arr[0..c_ast.extra_data.len],
+    );
+
+    // creating a dummy `errors` slice, so deinit can free it.
+    const errors = try gpa.alloc(Ast.Error, 0);
+    errdefer gpa.deinit(errors);
+
    return Ast{
-        .source = c_ast.source[0..c_ast.source_len],
-        //.tokens =
+        .source = c_ast.source[0..c_ast.source_len :0],
+        .mode = .zig,
+        .tokens = tokens.slice(),
+        .nodes = nodes.slice(),
+        .extra_data = extra_data,
+        .errors = errors,
    };
 }

 test "Ast header smoke test" {
-    try std.testing.expectEqual(zigNode(c.AST_NODE_TAG_IF), Ast.Node.Tag.@"if");
+    try std.testing.expectEqual(zigNode(c.AST_NODE_IF), Ast.Node.Tag.@"if");
+}
+
+// copy-past from parser_test.zig
+const mem = std.mem;
+const print = std.debug.print;
+const io = std.io;
+const maxInt = std.math.maxInt;
+
+var fixed_buffer_mem: [100 * 1024]u8 = undefined;
+
+fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 {
+    const stderr = io.getStdErr().writer();
+
+    //var tree = try std.zig.Ast.parse(allocator, source, .zig);
+    const c_tree = c.astParse(source, @intCast(source.len));
+    var tree = try zigAst(allocator, c_tree);
+    defer tree.deinit(allocator);
+
+    for (tree.errors) |parse_error| {
+        const loc = tree.tokenLocation(0, parse_error.token);
+        try stderr.print("(memory buffer):{d}:{d}: error: ", .{ loc.line + 1, loc.column + 1 });
+        try tree.renderError(parse_error, stderr);
+        try stderr.print("\n{s}\n", .{source[loc.line_start..loc.line_end]});
+        {
+            var i: usize = 0;
+            while (i < loc.column) : (i += 1) {
+                try stderr.writeAll(" ");
+            }
+            try stderr.writeAll("^");
+        }
+        try stderr.writeAll("\n");
+    }
+    if (tree.errors.len != 0) {
+        return error.ParseError;
+    }
+
+    const formatted = try tree.render(allocator);
+    anything_changed.* = !mem.eql(u8, formatted, source);
+    return formatted;
+}
+fn testTransformImpl(allocator: mem.Allocator, fba: *std.heap.FixedBufferAllocator, source: [:0]const u8, expected_source: []const u8) !void {
+    // reset the fixed buffer allocator each run so that it can be re-used for each
+    // iteration of the failing index
+    fba.reset();
+    var anything_changed: bool = undefined;
+    const result_source = try testParse(source, allocator, &anything_changed);
+    try std.testing.expectEqualStrings(expected_source, result_source);
+    const changes_expected = source.ptr != expected_source.ptr;
+    if (anything_changed != changes_expected) {
+        print("std.zig.render returned {} instead of {}\n", .{ anything_changed, changes_expected });
+        return error.TestFailed;
+    }
+    try std.testing.expect(anything_changed == changes_expected);
+    allocator.free(result_source);
+}
+fn testTransform(source: [:0]const u8, expected_source: []const u8) !void {
+    var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
+    return std.testing.checkAllAllocationFailures(fixed_allocator.allocator(), testTransformImpl, .{ &fixed_allocator, source, expected_source });
+}
+fn testCanonical(source: [:0]const u8) !void {
+    return testTransform(source, source);
+}
+
+test "zig fmt: remove extra whitespace at start and end of file with comment between" {
+    if (true) return error.SkipZigTest;
+
+    try testTransform(
+        \\
+        \\
+        \\// hello
+        \\
+        \\
+    ,
+        \\// hello
+        \\
+    );
 }
--- a/test_all.zig
+++ b/test_all.zig
@@ -1,3 +1,4 @@
 test "zig0 test suite" {
    _ = @import("tokenizer_test.zig");
+    _ = @import("parser_test.zig");
 }
--- a/tokenizer.c
+++ b/tokenizer.c
@@ -461,10 +461,9 @@ state:
            const char* start = self->buffer + result.loc.start;
            uint32_t len = self->index - result.loc.start;
            TokenizerTag tag = getKeyword(start, len);
-            if (tag != TOKEN_INVALID) {
+            if (tag != TOKEN_INVALID)
                result.tag = tag;
        }
-        }
        break;

    case TOKENIZER_STATE_BUILTIN:
@@ -865,7 +864,8 @@ state:
                    .tag = TOKEN_EOF,
                    .loc = {
                        .start = self->index,
-                        .end = self->index }
+                        .end = self->index,
+                    }
                };
            }
            break;
@@ -939,7 +939,8 @@ state:
                    .tag = TOKEN_EOF,
                    .loc = {
                        .start = self->index,
-                        .end = self->index }
+                        .end = self->index,
+                    }
                };
            }
            break;
--- a/tokenizer_test.zig
+++ b/tokenizer_test.zig
@@ -8,7 +8,7 @@ const c = @cImport({
    @cInclude("tokenizer.h");
 });

-fn zigToken(token: c_uint) Token.Tag {
+pub fn zigToken(token: c_uint) Token.Tag {
    return switch (token) {
        c.TOKEN_INVALID => .invalid,
        c.TOKEN_INVALID_PERIODASTERISKS => .invalid_periodasterisks,