start with parser tests

This commit is contained in:
2025-01-03 19:23:22 +02:00
parent 49c910b8b2
commit 1f134595de
6 changed files with 488 additions and 366 deletions

340
ast.h
View File

@@ -9,91 +9,91 @@
typedef enum {
/// sub_list[lhs...rhs]
AST_NODE_TAG_ROOT,
AST_NODE_ROOT,
/// `usingnamespace lhs;`. rhs unused. main_token is `usingnamespace`.
AST_NODE_TAG_USINGNAMESPACE,
AST_NODE_USINGNAMESPACE,
/// lhs is test name token (must be string literal or identifier), if any.
/// rhs is the body node.
AST_NODE_TAG_TEST_DECL,
AST_NODE_TEST_DECL,
/// lhs is the index into extra_data.
/// rhs is the initialization expression, if any.
/// main_token is `var` or `const`.
AST_NODE_TAG_GLOBAL_VAR_DECL,
AST_NODE_GLOBAL_VAR_DECL,
/// `var a: x align(y) = rhs`
/// lhs is the index into extra_data.
/// main_token is `var` or `const`.
AST_NODE_TAG_LOCAL_VAR_DECL,
AST_NODE_LOCAL_VAR_DECL,
/// `var a: lhs = rhs`. lhs and rhs may be unused.
/// Can be local or global.
/// main_token is `var` or `const`.
AST_NODE_TAG_SIMPLE_VAR_DECL,
AST_NODE_SIMPLE_VAR_DECL,
/// `var a align(lhs) = rhs`. lhs and rhs may be unused.
/// Can be local or global.
/// main_token is `var` or `const`.
AST_NODE_TAG_ALIGNED_VAR_DECL,
AST_NODE_ALIGNED_VAR_DECL,
/// lhs is the identifier token payload if any,
/// rhs is the deferred expression.
AST_NODE_TAG_ERRDEFER,
AST_NODE_ERRDEFER,
/// lhs is unused.
/// rhs is the deferred expression.
AST_NODE_TAG_DEFER,
AST_NODE_DEFER,
/// lhs catch rhs
/// lhs catch |err| rhs
/// main_token is the `catch` keyword.
/// payload is determined by looking at the next token after the `catch` keyword.
AST_NODE_TAG_CATCH,
AST_NODE_CATCH,
/// `lhs.a`. main_token is the dot. rhs is the identifier token index.
AST_NODE_TAG_FIELD_ACCESS,
AST_NODE_FIELD_ACCESS,
/// `lhs.?`. main_token is the dot. rhs is the `?` token index.
AST_NODE_TAG_UNWRAP_OPTIONAL,
AST_NODE_UNWRAP_OPTIONAL,
/// `lhs == rhs`. main_token is op.
AST_NODE_TAG_EQUAL_EQUAL,
AST_NODE_EQUAL_EQUAL,
/// `lhs != rhs`. main_token is op.
AST_NODE_TAG_BANG_EQUAL,
AST_NODE_BANG_EQUAL,
/// `lhs < rhs`. main_token is op.
AST_NODE_TAG_LESS_THAN,
AST_NODE_LESS_THAN,
/// `lhs > rhs`. main_token is op.
AST_NODE_TAG_GREATER_THAN,
AST_NODE_GREATER_THAN,
/// `lhs <= rhs`. main_token is op.
AST_NODE_TAG_LESS_OR_EQUAL,
AST_NODE_LESS_OR_EQUAL,
/// `lhs >= rhs`. main_token is op.
AST_NODE_TAG_GREATER_OR_EQUAL,
AST_NODE_GREATER_OR_EQUAL,
/// `lhs *= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_MUL,
AST_NODE_ASSIGN_MUL,
/// `lhs /= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_DIV,
AST_NODE_ASSIGN_DIV,
/// `lhs %= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_MOD,
AST_NODE_ASSIGN_MOD,
/// `lhs += rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_ADD,
AST_NODE_ASSIGN_ADD,
/// `lhs -= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_SUB,
AST_NODE_ASSIGN_SUB,
/// `lhs <<= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_SHL,
AST_NODE_ASSIGN_SHL,
/// `lhs <<|= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_SHL_SAT,
AST_NODE_ASSIGN_SHL_SAT,
/// `lhs >>= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_SHR,
AST_NODE_ASSIGN_SHR,
/// `lhs &= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_BIT_AND,
AST_NODE_ASSIGN_BIT_AND,
/// `lhs ^= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_BIT_XOR,
AST_NODE_ASSIGN_BIT_XOR,
/// `lhs |= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_BIT_OR,
AST_NODE_ASSIGN_BIT_OR,
/// `lhs *%= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_MUL_WRAP,
AST_NODE_ASSIGN_MUL_WRAP,
/// `lhs +%= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_ADD_WRAP,
AST_NODE_ASSIGN_ADD_WRAP,
/// `lhs -%= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_SUB_WRAP,
AST_NODE_ASSIGN_SUB_WRAP,
/// `lhs *|= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_MUL_SAT,
AST_NODE_ASSIGN_MUL_SAT,
/// `lhs +|= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_ADD_SAT,
AST_NODE_ASSIGN_ADD_SAT,
/// `lhs -|= rhs`. main_token is op.
AST_NODE_TAG_ASSIGN_SUB_SAT,
AST_NODE_ASSIGN_SUB_SAT,
/// `lhs = rhs`. main_token is op.
AST_NODE_TAG_ASSIGN,
AST_NODE_ASSIGN,
/// `a, b, ... = rhs`. main_token is op. lhs is index into `extra_data`
/// of an lhs elem count followed by an array of that many `Node.Index`,
/// with each node having one of the following types:
@@ -107,73 +107,73 @@ typedef enum {
/// standard assignment LHS (which must be evaluated as an lvalue).
/// There may be a preceding `comptime` token, which does not create a
/// corresponding `comptime` node so must be manually detected.
AST_NODE_TAG_ASSIGN_DESTRUCTURE,
AST_NODE_ASSIGN_DESTRUCTURE,
/// `lhs || rhs`. main_token is the `||`.
AST_NODE_TAG_MERGE_ERROR_SETS,
AST_NODE_MERGE_ERROR_SETS,
/// `lhs * rhs`. main_token is the `*`.
AST_NODE_TAG_MUL,
AST_NODE_MUL,
/// `lhs / rhs`. main_token is the `/`.
AST_NODE_TAG_DIV,
AST_NODE_DIV,
/// `lhs % rhs`. main_token is the `%`.
AST_NODE_TAG_MOD,
AST_NODE_MOD,
/// `lhs ** rhs`. main_token is the `**`.
AST_NODE_TAG_ARRAY_MULT,
AST_NODE_ARRAY_MULT,
/// `lhs *% rhs`. main_token is the `*%`.
AST_NODE_TAG_MUL_WRAP,
AST_NODE_MUL_WRAP,
/// `lhs *| rhs`. main_token is the `*|`.
AST_NODE_TAG_MUL_SAT,
AST_NODE_MUL_SAT,
/// `lhs + rhs`. main_token is the `+`.
AST_NODE_TAG_ADD,
AST_NODE_ADD,
/// `lhs - rhs`. main_token is the `-`.
AST_NODE_TAG_SUB,
AST_NODE_SUB,
/// `lhs ++ rhs`. main_token is the `++`.
AST_NODE_TAG_ARRAY_CAT,
AST_NODE_ARRAY_CAT,
/// `lhs +% rhs`. main_token is the `+%`.
AST_NODE_TAG_ADD_WRAP,
AST_NODE_ADD_WRAP,
/// `lhs -% rhs`. main_token is the `-%`.
AST_NODE_TAG_SUB_WRAP,
AST_NODE_SUB_WRAP,
/// `lhs +| rhs`. main_token is the `+|`.
AST_NODE_TAG_ADD_SAT,
AST_NODE_ADD_SAT,
/// `lhs -| rhs`. main_token is the `-|`.
AST_NODE_TAG_SUB_SAT,
AST_NODE_SUB_SAT,
/// `lhs << rhs`. main_token is the `<<`.
AST_NODE_TAG_SHL,
AST_NODE_SHL,
/// `lhs <<| rhs`. main_token is the `<<|`.
AST_NODE_TAG_SHL_SAT,
AST_NODE_SHL_SAT,
/// `lhs >> rhs`. main_token is the `>>`.
AST_NODE_TAG_SHR,
AST_NODE_SHR,
/// `lhs & rhs`. main_token is the `&`.
AST_NODE_TAG_BIT_AND,
AST_NODE_BIT_AND,
/// `lhs ^ rhs`. main_token is the `^`.
AST_NODE_TAG_BIT_XOR,
AST_NODE_BIT_XOR,
/// `lhs | rhs`. main_token is the `|`.
AST_NODE_TAG_BIT_OR,
AST_NODE_BIT_OR,
/// `lhs orelse rhs`. main_token is the `orelse`.
AST_NODE_TAG_ORELSE,
AST_NODE_ORELSE,
/// `lhs and rhs`. main_token is the `and`.
AST_NODE_TAG_BOOL_AND,
AST_NODE_BOOL_AND,
/// `lhs or rhs`. main_token is the `or`.
AST_NODE_TAG_BOOL_OR,
AST_NODE_BOOL_OR,
/// `op lhs`. rhs unused. main_token is op.
AST_NODE_TAG_BOOL_NOT,
AST_NODE_BOOL_NOT,
/// `op lhs`. rhs unused. main_token is op.
AST_NODE_TAG_NEGATION,
AST_NODE_NEGATION,
/// `op lhs`. rhs unused. main_token is op.
AST_NODE_TAG_BIT_NOT,
AST_NODE_BIT_NOT,
/// `op lhs`. rhs unused. main_token is op.
AST_NODE_TAG_NEGATION_WRAP,
AST_NODE_NEGATION_WRAP,
/// `op lhs`. rhs unused. main_token is op.
AST_NODE_TAG_ADDRESS_OF,
AST_NODE_ADDRESS_OF,
/// `op lhs`. rhs unused. main_token is op.
AST_NODE_TAG_TRY,
AST_NODE_TRY,
/// `op lhs`. rhs unused. main_token is op.
AST_NODE_TAG_AWAIT,
AST_NODE_AWAIT,
/// `?lhs`. rhs unused. main_token is the `?`.
AST_NODE_TAG_OPTIONAL_TYPE,
AST_NODE_OPTIONAL_TYPE,
/// `[lhs]rhs`.
AST_NODE_TAG_ARRAY_TYPE,
AST_NODE_ARRAY_TYPE,
/// `[lhs:a]b`. `ArrayTypeSentinel[rhs]`.
AST_NODE_TAG_ARRAY_TYPE_SENTINEL,
AST_NODE_ARRAY_TYPE_SENTINEL,
/// `[*]align(lhs) rhs`. lhs can be omitted.
/// `*align(lhs) rhs`. lhs can be omitted.
/// `[]rhs`.
@@ -181,7 +181,7 @@ typedef enum {
/// if a slice, many-item pointer, or C-pointer
/// main_token might be a ** token, which is shared with a parent/child
/// pointer type and may require special handling.
AST_NODE_TAG_PTR_TYPE_ALIGNED,
AST_NODE_PTR_TYPE_ALIGNED,
/// `[*:lhs]rhs`. lhs can be omitted.
/// `*rhs`.
/// `[:lhs]rhs`.
@@ -189,297 +189,297 @@ typedef enum {
/// if a slice, many-item pointer, or C-pointer
/// main_token might be a ** token, which is shared with a parent/child
/// pointer type and may require special handling.
AST_NODE_TAG_PTR_TYPE_SENTINEL,
AST_NODE_PTR_TYPE_SENTINEL,
/// lhs is index into ptr_type. rhs is the element type expression.
/// main_token is the asterisk if a single item pointer or the lbracket
/// if a slice, many-item pointer, or C-pointer
/// main_token might be a ** token, which is shared with a parent/child
/// pointer type and may require special handling.
AST_NODE_TAG_PTR_TYPE,
AST_NODE_PTR_TYPE,
/// lhs is index into ptr_type_bit_range. rhs is the element type expression.
/// main_token is the asterisk if a single item pointer or the lbracket
/// if a slice, many-item pointer, or C-pointer
/// main_token might be a ** token, which is shared with a parent/child
/// pointer type and may require special handling.
AST_NODE_TAG_PTR_TYPE_BIT_RANGE,
AST_NODE_PTR_TYPE_BIT_RANGE,
/// `lhs[rhs..]`
/// main_token is the lbracket.
AST_NODE_TAG_SLICE_OPEN,
AST_NODE_SLICE_OPEN,
/// `lhs[b..c]`. rhs is index into Slice
/// main_token is the lbracket.
AST_NODE_TAG_SLICE,
AST_NODE_SLICE,
/// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be omitted.
/// main_token is the lbracket.
AST_NODE_TAG_SLICE_SENTINEL,
AST_NODE_SLICE_SENTINEL,
/// `lhs.*`. rhs is unused.
AST_NODE_TAG_DEREF,
AST_NODE_DEREF,
/// `lhs[rhs]`.
AST_NODE_TAG_ARRAY_ACCESS,
AST_NODE_ARRAY_ACCESS,
/// `lhs{rhs}`. rhs can be omitted.
AST_NODE_TAG_ARRAY_INIT_ONE,
AST_NODE_ARRAY_INIT_ONE,
/// `lhs{rhs,}`. rhs can *not* be omitted
AST_NODE_TAG_ARRAY_INIT_ONE_COMMA,
AST_NODE_ARRAY_INIT_ONE_COMMA,
/// `.{lhs, rhs}`. lhs and rhs can be omitted.
AST_NODE_TAG_ARRAY_INIT_DOT_TWO,
AST_NODE_ARRAY_INIT_DOT_TWO,
/// Same as `array_init_dot_two` except there is known to be a trailing comma
/// before the final rbrace.
AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA,
AST_NODE_ARRAY_INIT_DOT_TWO_COMMA,
/// `.{a, b}`. `sub_list[lhs..rhs]`.
AST_NODE_TAG_ARRAY_INIT_DOT,
AST_NODE_ARRAY_INIT_DOT,
/// Same as `array_init_dot` except there is known to be a trailing comma
/// before the final rbrace.
AST_NODE_TAG_ARRAY_INIT_DOT_COMMA,
AST_NODE_ARRAY_INIT_DOT_COMMA,
/// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means `.{a, b}`.
AST_NODE_TAG_ARRAY_INIT,
AST_NODE_ARRAY_INIT,
/// Same as `array_init` except there is known to be a trailing comma
/// before the final rbrace.
AST_NODE_TAG_ARRAY_INIT_COMMA,
AST_NODE_ARRAY_INIT_COMMA,
/// `lhs{.a = rhs}`. rhs can be omitted making it empty.
/// main_token is the lbrace.
AST_NODE_TAG_STRUCT_INIT_ONE,
AST_NODE_STRUCT_INIT_ONE,
/// `lhs{.a = rhs,}`. rhs can *not* be omitted.
/// main_token is the lbrace.
AST_NODE_TAG_STRUCT_INIT_ONE_COMMA,
AST_NODE_STRUCT_INIT_ONE_COMMA,
/// `.{.a = lhs, .b = rhs}`. lhs and rhs can be omitted.
/// main_token is the lbrace.
/// No trailing comma before the rbrace.
AST_NODE_TAG_STRUCT_INIT_DOT_TWO,
AST_NODE_STRUCT_INIT_DOT_TWO,
/// Same as `struct_init_dot_two` except there is known to be a trailing comma
/// before the final rbrace.
AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA,
AST_NODE_STRUCT_INIT_DOT_TWO_COMMA,
/// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`.
/// main_token is the lbrace.
AST_NODE_TAG_STRUCT_INIT_DOT,
AST_NODE_STRUCT_INIT_DOT,
/// Same as `struct_init_dot` except there is known to be a trailing comma
/// before the final rbrace.
AST_NODE_TAG_STRUCT_INIT_DOT_COMMA,
AST_NODE_STRUCT_INIT_DOT_COMMA,
/// `lhs{.a = b, .c = d}`. `sub_range_list[rhs]`.
/// lhs can be omitted which means `.{.a = b, .c = d}`.
/// main_token is the lbrace.
AST_NODE_TAG_STRUCT_INIT,
AST_NODE_STRUCT_INIT,
/// Same as `struct_init` except there is known to be a trailing comma
/// before the final rbrace.
AST_NODE_TAG_STRUCT_INIT_COMMA,
AST_NODE_STRUCT_INIT_COMMA,
/// `lhs(rhs)`. rhs can be omitted.
/// main_token is the lparen.
AST_NODE_TAG_CALL_ONE,
AST_NODE_CALL_ONE,
/// `lhs(rhs,)`. rhs can be omitted.
/// main_token is the lparen.
AST_NODE_TAG_CALL_ONE_COMMA,
AST_NODE_CALL_ONE_COMMA,
/// `async lhs(rhs)`. rhs can be omitted.
AST_NODE_TAG_ASYNC_CALL_ONE,
AST_NODE_ASYNC_CALL_ONE,
/// `async lhs(rhs,)`.
AST_NODE_TAG_ASYNC_CALL_ONE_COMMA,
AST_NODE_ASYNC_CALL_ONE_COMMA,
/// `lhs(a, b, c)`. `SubRange[rhs]`.
/// main_token is the `(`.
AST_NODE_TAG_CALL,
AST_NODE_CALL,
/// `lhs(a, b, c,)`. `SubRange[rhs]`.
/// main_token is the `(`.
AST_NODE_TAG_CALL_COMMA,
AST_NODE_CALL_COMMA,
/// `async lhs(a, b, c)`. `SubRange[rhs]`.
/// main_token is the `(`.
AST_NODE_TAG_ASYNC_CALL,
AST_NODE_ASYNC_CALL,
/// `async lhs(a, b, c,)`. `SubRange[rhs]`.
/// main_token is the `(`.
AST_NODE_TAG_ASYNC_CALL_COMMA,
AST_NODE_ASYNC_CALL_COMMA,
/// `switch(lhs) {}`. `SubRange[rhs]`.
/// `main_token` is the identifier of a preceding label, if any; otherwise `switch`.
AST_NODE_TAG_SWITCH,
AST_NODE_SWITCH,
/// Same as switch except there is known to be a trailing comma
/// before the final rbrace
AST_NODE_TAG_SWITCH_COMMA,
AST_NODE_SWITCH_COMMA,
/// `lhs => rhs`. If lhs is omitted it means `else`.
/// main_token is the `=>`
AST_NODE_TAG_SWITCH_CASE_ONE,
AST_NODE_SWITCH_CASE_ONE,
/// Same ast `switch_case_one` but the case is inline
AST_NODE_TAG_SWITCH_CASE_INLINE_ONE,
AST_NODE_SWITCH_CASE_INLINE_ONE,
/// `a, b, c => rhs`. `SubRange[lhs]`.
/// main_token is the `=>`
AST_NODE_TAG_SWITCH_CASE,
AST_NODE_SWITCH_CASE,
/// Same ast `switch_case` but the case is inline
AST_NODE_TAG_SWITCH_CASE_INLINE,
AST_NODE_SWITCH_CASE_INLINE,
/// `lhs...rhs`.
AST_NODE_TAG_SWITCH_RANGE,
AST_NODE_SWITCH_RANGE,
/// `while (lhs) rhs`.
/// `while (lhs) |x| rhs`.
AST_NODE_TAG_WHILE_SIMPLE,
AST_NODE_WHILE_SIMPLE,
/// `while (lhs) : (a) b`. `WhileCont[rhs]`.
/// `while (lhs) : (a) b`. `WhileCont[rhs]`.
AST_NODE_TAG_WHILE_CONT,
AST_NODE_WHILE_CONT,
/// `while (lhs) : (a) b else c`. `While[rhs]`.
/// `while (lhs) |x| : (a) b else c`. `While[rhs]`.
/// `while (lhs) |x| : (a) b else |y| c`. `While[rhs]`.
/// The cont expression part `: (a)` may be omitted.
AST_NODE_TAG_WHILE,
AST_NODE_WHILE,
/// `for (lhs) rhs`.
AST_NODE_TAG_FOR_SIMPLE,
AST_NODE_FOR_SIMPLE,
/// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`. `For[rhs]`.
AST_NODE_TAG_FOR,
AST_NODE_FOR,
/// `lhs..rhs`. rhs can be omitted.
AST_NODE_TAG_FOR_RANGE,
AST_NODE_FOR_RANGE,
/// `if (lhs) rhs`.
/// `if (lhs) |a| rhs`.
AST_NODE_TAG_IF_SIMPLE,
AST_NODE_IF_SIMPLE,
/// `if (lhs) a else b`. `If[rhs]`.
/// `if (lhs) |x| a else b`. `If[rhs]`.
/// `if (lhs) |x| a else |y| b`. `If[rhs]`.
AST_NODE_TAG_IF,
AST_NODE_IF,
/// `suspend lhs`. lhs can be omitted. rhs is unused.
AST_NODE_TAG_SUSPEND,
AST_NODE_SUSPEND,
/// `resume lhs`. rhs is unused.
AST_NODE_TAG_RESUME,
AST_NODE_RESUME,
/// `continue :lhs rhs`
/// both lhs and rhs may be omitted.
AST_NODE_TAG_CONTINUE,
AST_NODE_CONTINUE,
/// `break :lhs rhs`
/// both lhs and rhs may be omitted.
AST_NODE_TAG_BREAK,
AST_NODE_BREAK,
/// `return lhs`. lhs can be omitted. rhs is unused.
AST_NODE_TAG_RETURN,
AST_NODE_RETURN,
/// `fn (a: lhs) rhs`. lhs can be omitted.
/// anytype and ... parameters are omitted from the AST tree.
/// main_token is the `fn` keyword.
/// extern function declarations use this tag.
AST_NODE_TAG_FN_PROTO_SIMPLE,
AST_NODE_FN_PROTO_SIMPLE,
/// `fn (a: b, c: d) rhs`. `sub_range_list[lhs]`.
/// anytype and ... parameters are omitted from the AST tree.
/// main_token is the `fn` keyword.
/// extern function declarations use this tag.
AST_NODE_TAG_FN_PROTO_MULTI,
AST_NODE_FN_PROTO_MULTI,
/// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`. `FnProtoOne[lhs]`.
/// zero or one parameters.
/// anytype and ... parameters are omitted from the AST tree.
/// main_token is the `fn` keyword.
/// extern function declarations use this tag.
AST_NODE_TAG_FN_PROTO_ONE,
AST_NODE_FN_PROTO_ONE,
/// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`. `FnProto[lhs]`.
/// anytype and ... parameters are omitted from the AST tree.
/// main_token is the `fn` keyword.
/// extern function declarations use this tag.
AST_NODE_TAG_FN_PROTO,
AST_NODE_FN_PROTO,
/// lhs is the fn_proto.
/// rhs is the function body block.
/// Note that extern function declarations use the fn_proto tags rather
/// than this one.
AST_NODE_TAG_FN_DECL,
AST_NODE_FN_DECL,
/// `anyframe->rhs`. main_token is `anyframe`. `lhs` is arrow token index.
AST_NODE_TAG_ANYFRAME_TYPE,
AST_NODE_ANYFRAME_TYPE,
/// Both lhs and rhs unused.
AST_NODE_TAG_ANYFRAME_LITERAL,
AST_NODE_ANYFRAME_LITERAL,
/// Both lhs and rhs unused.
AST_NODE_TAG_CHAR_LITERAL,
AST_NODE_CHAR_LITERAL,
/// Both lhs and rhs unused.
AST_NODE_TAG_NUMBER_LITERAL,
AST_NODE_NUMBER_LITERAL,
/// Both lhs and rhs unused.
AST_NODE_TAG_UNREACHABLE_LITERAL,
AST_NODE_UNREACHABLE_LITERAL,
/// Both lhs and rhs unused.
/// Most identifiers will not have explicit AST nodes, however for expressions
/// which could be one of many different kinds of AST nodes, there will be an
/// identifier AST node for it.
AST_NODE_TAG_IDENTIFIER,
AST_NODE_IDENTIFIER,
/// lhs is the dot token index, rhs unused, main_token is the identifier.
AST_NODE_TAG_ENUM_LITERAL,
AST_NODE_ENUM_LITERAL,
/// main_token is the string literal token
/// Both lhs and rhs unused.
AST_NODE_TAG_STRING_LITERAL,
AST_NODE_STRING_LITERAL,
/// main_token is the first token index (redundant with lhs)
/// lhs is the first token index; rhs is the last token index.
/// Could be a series of multiline_string_literal_line tokens, or a single
/// string_literal token.
AST_NODE_TAG_MULTILINE_STRING_LITERAL,
AST_NODE_MULTILINE_STRING_LITERAL,
/// `(lhs)`. main_token is the `(`; rhs is the token index of the `)`.
AST_NODE_TAG_GROUPED_EXPRESSION,
AST_NODE_GROUPED_EXPRESSION,
/// `@a(lhs, rhs)`. lhs and rhs may be omitted.
/// main_token is the builtin token.
AST_NODE_TAG_BUILTIN_CALL_TWO,
AST_NODE_BUILTIN_CALL_TWO,
/// Same as builtin_call_two but there is known to be a trailing comma before the rparen.
AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA,
AST_NODE_BUILTIN_CALL_TWO_COMMA,
/// `@a(b, c)`. `sub_list[lhs..rhs]`.
/// main_token is the builtin token.
AST_NODE_TAG_BUILTIN_CALL,
AST_NODE_BUILTIN_CALL,
/// Same as builtin_call but there is known to be a trailing comma before the rparen.
AST_NODE_TAG_BUILTIN_CALL_COMMA,
AST_NODE_BUILTIN_CALL_COMMA,
/// `error{a, b}`.
/// rhs is the rbrace, lhs is unused.
AST_NODE_TAG_ERROR_SET_DECL,
AST_NODE_ERROR_SET_DECL,
/// `struct {}`, `union {}`, `opaque {}`, `enum {}`. `extra_data[lhs..rhs]`.
/// main_token is `struct`, `union`, `opaque`, `enum` keyword.
AST_NODE_TAG_CONTAINER_DECL,
AST_NODE_CONTAINER_DECL,
/// Same as ContainerDecl but there is known to be a trailing comma
/// or semicolon before the rbrace.
AST_NODE_TAG_CONTAINER_DECL_TRAILING,
AST_NODE_CONTAINER_DECL_TRAILING,
/// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum {lhs, rhs}`.
/// lhs or rhs can be omitted.
/// main_token is `struct`, `union`, `opaque`, `enum` keyword.
AST_NODE_TAG_CONTAINER_DECL_TWO,
AST_NODE_CONTAINER_DECL_TWO,
/// Same as ContainerDeclTwo except there is known to be a trailing comma
/// or semicolon before the rbrace.
AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING,
AST_NODE_CONTAINER_DECL_TWO_TRAILING,
/// `struct(lhs)` / `union(lhs)` / `enum(lhs)`. `SubRange[rhs]`.
AST_NODE_TAG_CONTAINER_DECL_ARG,
AST_NODE_CONTAINER_DECL_ARG,
/// Same as container_decl_arg but there is known to be a trailing
/// comma or semicolon before the rbrace.
AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING,
AST_NODE_CONTAINER_DECL_ARG_TRAILING,
/// `union(enum) {}`. `sub_list[lhs..rhs]`.
/// Note that tagged unions with explicitly provided enums are represented
/// by `container_decl_arg`.
AST_NODE_TAG_TAGGED_UNION,
AST_NODE_TAGGED_UNION,
/// Same as tagged_union but there is known to be a trailing comma
/// or semicolon before the rbrace.
AST_NODE_TAG_TAGGED_UNION_TRAILING,
AST_NODE_TAGGED_UNION_TRAILING,
/// `union(enum) {lhs, rhs}`. lhs or rhs may be omitted.
/// Note that tagged unions with explicitly provided enums are represented
/// by `container_decl_arg`.
AST_NODE_TAG_TAGGED_UNION_TWO,
AST_NODE_TAGGED_UNION_TWO,
/// Same as tagged_union_two but there is known to be a trailing comma
/// or semicolon before the rbrace.
AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING,
AST_NODE_TAGGED_UNION_TWO_TRAILING,
/// `union(enum(lhs)) {}`. `SubRange[rhs]`.
AST_NODE_TAG_TAGGED_UNION_ENUM_TAG,
AST_NODE_TAGGED_UNION_ENUM_TAG,
/// Same as tagged_union_enum_tag but there is known to be a trailing comma
/// or semicolon before the rbrace.
AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING,
AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING,
/// `a: lhs = rhs,`. lhs and rhs can be omitted.
/// main_token is the field name identifier.
/// lastToken() does not include the possible trailing comma.
AST_NODE_TAG_CONTAINER_FIELD_INIT,
AST_NODE_CONTAINER_FIELD_INIT,
/// `a: lhs align(rhs),`. rhs can be omitted.
/// main_token is the field name identifier.
/// lastToken() does not include the possible trailing comma.
AST_NODE_TAG_CONTAINER_FIELD_ALIGN,
AST_NODE_CONTAINER_FIELD_ALIGN,
/// `a: lhs align(c) = d,`. `container_field_list[rhs]`.
/// main_token is the field name identifier.
/// lastToken() does not include the possible trailing comma.
AST_NODE_TAG_CONTAINER_FIELD,
AST_NODE_CONTAINER_FIELD,
/// `comptime lhs`. rhs unused.
AST_NODE_TAG_COMPTIME,
AST_NODE_COMPTIME,
/// `nosuspend lhs`. rhs unused.
AST_NODE_TAG_NOSUSPEND,
AST_NODE_NOSUSPEND,
/// `{lhs rhs}`. rhs or lhs can be omitted.
/// main_token points at the lbrace.
AST_NODE_TAG_BLOCK_TWO,
AST_NODE_BLOCK_TWO,
/// Same as block_two but there is known to be a semicolon before the rbrace.
AST_NODE_TAG_BLOCK_TWO_SEMICOLON,
AST_NODE_BLOCK_TWO_SEMICOLON,
/// `{}`. `sub_list[lhs..rhs]`.
/// main_token points at the lbrace.
AST_NODE_TAG_BLOCK,
AST_NODE_BLOCK,
/// Same as block but there is known to be a semicolon before the rbrace.
AST_NODE_TAG_BLOCK_SEMICOLON,
AST_NODE_BLOCK_SEMICOLON,
/// `asm(lhs)`. rhs is the token index of the rparen.
AST_NODE_TAG_ASM_SIMPLE,
AST_NODE_ASM_SIMPLE,
/// `asm(lhs, a)`. `Asm[rhs]`.
AST_NODE_TAG_ASM,
AST_NODE_ASM,
/// `[a] "b" (c)`. lhs is 0, rhs is token index of the rparen.
/// `[a] "b" (-> lhs)`. rhs is token index of the rparen.
/// main_token is `a`.
AST_NODE_TAG_ASM_OUTPUT,
AST_NODE_ASM_OUTPUT,
/// `[a] "b" (lhs)`. rhs is token index of the rparen.
/// main_token is `a`.
AST_NODE_TAG_ASM_INPUT,
AST_NODE_ASM_INPUT,
/// `error.a`. lhs is token index of `.`. rhs is token index of `a`.
AST_NODE_TAG_ERROR_VALUE,
AST_NODE_ERROR_VALUE,
/// `lhs!rhs`. main_token is the `!`.
AST_NODE_TAG_ERROR_UNION,
AST_NODE_ERROR_UNION,
} AstNodeTag;
typedef uint32_t AstTokenIndex;

View File

@@ -182,7 +182,7 @@ static AstNodeIndex expectContainerField(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_CONTAINER_FIELD_INIT,
.tag = AST_NODE_CONTAINER_FIELD_INIT,
.main_token = main_token,
.data = {
.lhs = type_expr,
@@ -193,7 +193,7 @@ static AstNodeIndex expectContainerField(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_CONTAINER_FIELD_ALIGN,
.tag = AST_NODE_CONTAINER_FIELD_ALIGN,
.main_token = main_token,
.data = {
.lhs = type_expr,
@@ -204,7 +204,7 @@ static AstNodeIndex expectContainerField(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_CONTAINER_FIELD,
.tag = AST_NODE_CONTAINER_FIELD,
.main_token = main_token,
.data = {
.lhs = type_expr,
@@ -244,7 +244,7 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_IDENTIFIER,
.tag = AST_NODE_IDENTIFIER,
.main_token = nextToken(p),
.data = {} });
case TOKEN_KEYWORD_INLINE:
@@ -310,7 +310,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
.tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE,
.main_token = lparen,
.data = {
.lhs = res,
@@ -321,7 +321,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE,
.tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE,
.main_token = lparen,
.data = {
.lhs = res,
@@ -333,7 +333,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = comma ? AST_NODE_TAG_CALL_COMMA : AST_NODE_TAG_CALL,
.tag = comma ? AST_NODE_CALL_COMMA : AST_NODE_CALL,
.main_token = lparen,
.data = {
.lhs = res,
@@ -368,7 +368,7 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_ERROR_UNION,
.tag = AST_NODE_ERROR_UNION,
.main_token = bang,
.data = {
.lhs = suffix_expr,
@@ -413,7 +413,7 @@ static AstNodeIndex parseFnProto(Parser* p) {
if (fn_token == null_node)
return null_node;
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO);
AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO);
eatToken(p, TOKEN_IDENTIFIER);
@@ -432,7 +432,7 @@ static AstNodeIndex parseFnProto(Parser* p) {
p,
fn_proto_index,
(AstNodeItem) {
.tag = AST_NODE_TAG_FN_PROTO_SIMPLE,
.tag = AST_NODE_FN_PROTO_SIMPLE,
.main_token = fn_token,
.data = {
.lhs = params.payload.zero_or_one,
@@ -568,7 +568,7 @@ static AstNodeIndex parseBlock(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = AST_NODE_TAG_BLOCK_TWO,
.tag = AST_NODE_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = 0,
@@ -579,7 +579,7 @@ static AstNodeIndex parseBlock(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
.tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = p->scratch.arr[scratch_top.old_len],
@@ -590,7 +590,7 @@ static AstNodeIndex parseBlock(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO,
.tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO,
.main_token = lbrace,
.data = {
.lhs = p->scratch.arr[scratch_top.old_len],
@@ -602,7 +602,7 @@ static AstNodeIndex parseBlock(Parser* p) {
return addNode(
&p->nodes,
(AstNodeItem) {
.tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK,
.tag = semicolon ? AST_NODE_BLOCK_SEMICOLON : AST_NODE_BLOCK,
.main_token = lbrace,
.data = {
.lhs = span.start,
@@ -673,13 +673,13 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) {
p->tok_i++;
return fn_proto;
case TOKEN_L_BRACE:;
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL);
AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL);
AstNodeIndex body_block = parseBlock(p);
return setNode(
p,
fn_decl_index,
(AstNodeItem) {
.tag = AST_NODE_TAG_FN_DECL,
.tag = AST_NODE_FN_DECL,
.main_token = p->nodes.main_tokens[fn_proto],
.data = { .lhs = fn_proto, .rhs = body_block },
});
@@ -885,7 +885,7 @@ break_loop:;
}
void parseRoot(Parser* p) {
addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_ROOT, .main_token = 0 });
addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 });
Members root_members = parseContainerMembers(p);
AstSubRange root_decls = membersToSpan(root_members, p);

View File

@@ -2,194 +2,314 @@ const std = @import("std");
const testing = std.testing;
const Ast = std.zig.Ast;
const Allocator = std.mem.Allocator;
const c = @cImport({
@cInclude("ast.h");
});
const zigToken = @import("./tokenizer_test.zig").zigToken;
fn zigNode(token: c_uint) Ast.Node.Tag {
return switch (token) {
c.AST_NODE_TAG_ROOT => .root,
c.AST_NODE_TAG_USINGNAMESPACE => .@"usingnamespace",
c.AST_NODE_TAG_TEST_DECL => .test_decl,
c.AST_NODE_TAG_GLOBAL_VAR_DECL => .global_var_decl,
c.AST_NODE_TAG_LOCAL_VAR_DECL => .local_var_decl,
c.AST_NODE_TAG_SIMPLE_VAR_DECL => .simple_var_decl,
c.AST_NODE_TAG_ALIGNED_VAR_DECL => .aligned_var_decl,
c.AST_NODE_TAG_ERRDEFER => .@"errdefer",
c.AST_NODE_TAG_DEFER => .@"defer",
c.AST_NODE_TAG_CATCH => .@"catch",
c.AST_NODE_TAG_FIELD_ACCESS => .field_access,
c.AST_NODE_TAG_UNWRAP_OPTIONAL => .unwrap_optional,
c.AST_NODE_TAG_EQUAL_EQUAL => .equal_equal,
c.AST_NODE_TAG_BANG_EQUAL => .bang_equal,
c.AST_NODE_TAG_LESS_THAN => .less_than,
c.AST_NODE_TAG_GREATER_THAN => .greater_than,
c.AST_NODE_TAG_LESS_OR_EQUAL => .less_or_equal,
c.AST_NODE_TAG_GREATER_OR_EQUAL => .greater_or_equal,
c.AST_NODE_TAG_ASSIGN_MUL => .assign_mul,
c.AST_NODE_TAG_ASSIGN_DIV => .assign_div,
c.AST_NODE_TAG_ASSIGN_MOD => .assign_mod,
c.AST_NODE_TAG_ASSIGN_ADD => .assign_add,
c.AST_NODE_TAG_ASSIGN_SUB => .assign_sub,
c.AST_NODE_TAG_ASSIGN_SHL => .assign_shl,
c.AST_NODE_TAG_ASSIGN_SHL_SAT => .assign_shl_sat,
c.AST_NODE_TAG_ASSIGN_SHR => .assign_shr,
c.AST_NODE_TAG_ASSIGN_BIT_AND => .assign_bit_and,
c.AST_NODE_TAG_ASSIGN_BIT_XOR => .assign_bit_xor,
c.AST_NODE_TAG_ASSIGN_BIT_OR => .assign_bit_or,
c.AST_NODE_TAG_ASSIGN_MUL_WRAP => .assign_mul_wrap,
c.AST_NODE_TAG_ASSIGN_ADD_WRAP => .assign_add_wrap,
c.AST_NODE_TAG_ASSIGN_SUB_WRAP => .assign_sub_wrap,
c.AST_NODE_TAG_ASSIGN_MUL_SAT => .assign_mul_sat,
c.AST_NODE_TAG_ASSIGN_ADD_SAT => .assign_add_sat,
c.AST_NODE_TAG_ASSIGN_SUB_SAT => .assign_sub_sat,
c.AST_NODE_TAG_ASSIGN => .assign,
c.AST_NODE_TAG_ASSIGN_DESTRUCTURE => .assign_destructure,
c.AST_NODE_TAG_MERGE_ERROR_SETS => .merge_error_sets,
c.AST_NODE_TAG_MUL => .mul,
c.AST_NODE_TAG_DIV => .div,
c.AST_NODE_TAG_MOD => .mod,
c.AST_NODE_TAG_ARRAY_MULT => .array_mult,
c.AST_NODE_TAG_MUL_WRAP => .mul_wrap,
c.AST_NODE_TAG_MUL_SAT => .mul_sat,
c.AST_NODE_TAG_ADD => .add,
c.AST_NODE_TAG_SUB => .sub,
c.AST_NODE_TAG_ARRAY_CAT => .array_cat,
c.AST_NODE_TAG_ADD_WRAP => .add_wrap,
c.AST_NODE_TAG_SUB_WRAP => .sub_wrap,
c.AST_NODE_TAG_ADD_SAT => .add_sat,
c.AST_NODE_TAG_SUB_SAT => .sub_sat,
c.AST_NODE_TAG_SHL => .shl,
c.AST_NODE_TAG_SHL_SAT => .shl_sat,
c.AST_NODE_TAG_SHR => .shr,
c.AST_NODE_TAG_BIT_AND => .bit_and,
c.AST_NODE_TAG_BIT_XOR => .bit_xor,
c.AST_NODE_TAG_BIT_OR => .bit_or,
c.AST_NODE_TAG_ORELSE => .@"orelse",
c.AST_NODE_TAG_BOOL_AND => .bool_and,
c.AST_NODE_TAG_BOOL_OR => .bool_or,
c.AST_NODE_TAG_BOOL_NOT => .bool_not,
c.AST_NODE_TAG_NEGATION => .negation,
c.AST_NODE_TAG_BIT_NOT => .bit_not,
c.AST_NODE_TAG_NEGATION_WRAP => .negation_wrap,
c.AST_NODE_TAG_ADDRESS_OF => .address_of,
c.AST_NODE_TAG_TRY => .@"try",
c.AST_NODE_TAG_AWAIT => .@"await",
c.AST_NODE_TAG_OPTIONAL_TYPE => .optional_type,
c.AST_NODE_TAG_ARRAY_TYPE => .array_type,
c.AST_NODE_TAG_ARRAY_TYPE_SENTINEL => .array_type_sentinel,
c.AST_NODE_TAG_PTR_TYPE_ALIGNED => .ptr_type_aligned,
c.AST_NODE_TAG_PTR_TYPE_SENTINEL => .ptr_type_sentinel,
c.AST_NODE_TAG_PTR_TYPE => .ptr_type,
c.AST_NODE_TAG_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range,
c.AST_NODE_TAG_SLICE_OPEN => .slice_open,
c.AST_NODE_TAG_SLICE => .slice,
c.AST_NODE_TAG_SLICE_SENTINEL => .slice_sentinel,
c.AST_NODE_TAG_DEREF => .deref,
c.AST_NODE_TAG_ARRAY_ACCESS => .array_access,
c.AST_NODE_TAG_ARRAY_INIT_ONE => .array_init_one,
c.AST_NODE_TAG_ARRAY_INIT_ONE_COMMA => .array_init_one_comma,
c.AST_NODE_TAG_ARRAY_INIT_DOT_TWO => .array_init_dot_two,
c.AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma,
c.AST_NODE_TAG_ARRAY_INIT_DOT => .array_init_dot,
c.AST_NODE_TAG_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma,
c.AST_NODE_TAG_ARRAY_INIT => .array_init,
c.AST_NODE_TAG_ARRAY_INIT_COMMA => .array_init_comma,
c.AST_NODE_TAG_STRUCT_INIT_ONE => .struct_init_one,
c.AST_NODE_TAG_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma,
c.AST_NODE_TAG_STRUCT_INIT_DOT_TWO => .struct_init_dot_two,
c.AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma,
c.AST_NODE_TAG_STRUCT_INIT_DOT => .struct_init_dot,
c.AST_NODE_TAG_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma,
c.AST_NODE_TAG_STRUCT_INIT => .struct_init,
c.AST_NODE_TAG_STRUCT_INIT_COMMA => .struct_init_comma,
c.AST_NODE_TAG_CALL_ONE => .call_one,
c.AST_NODE_TAG_CALL_ONE_COMMA => .call_one_comma,
c.AST_NODE_TAG_ASYNC_CALL_ONE => .async_call_one,
c.AST_NODE_TAG_ASYNC_CALL_ONE_COMMA => .async_call_one_comma,
c.AST_NODE_TAG_CALL => .call,
c.AST_NODE_TAG_CALL_COMMA => .call_comma,
c.AST_NODE_TAG_ASYNC_CALL => .async_call,
c.AST_NODE_TAG_ASYNC_CALL_COMMA => .async_call_comma,
c.AST_NODE_TAG_SWITCH => .@"switch",
c.AST_NODE_TAG_SWITCH_COMMA => .switch_comma,
c.AST_NODE_TAG_SWITCH_CASE_ONE => .switch_case_one,
c.AST_NODE_TAG_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one,
c.AST_NODE_TAG_SWITCH_CASE => .switch_case,
c.AST_NODE_TAG_SWITCH_CASE_INLINE => .switch_case_inline,
c.AST_NODE_TAG_SWITCH_RANGE => .switch_range,
c.AST_NODE_TAG_WHILE_SIMPLE => .while_simple,
c.AST_NODE_TAG_WHILE_CONT => .while_cont,
c.AST_NODE_TAG_WHILE => .@"while",
c.AST_NODE_TAG_FOR_SIMPLE => .for_simple,
c.AST_NODE_TAG_FOR => .@"for",
c.AST_NODE_TAG_FOR_RANGE => .for_range,
c.AST_NODE_TAG_IF_SIMPLE => .if_simple,
c.AST_NODE_TAG_IF => .@"if",
c.AST_NODE_TAG_SUSPEND => .@"suspend",
c.AST_NODE_TAG_RESUME => .@"resume",
c.AST_NODE_TAG_CONTINUE => .@"continue",
c.AST_NODE_TAG_BREAK => .@"break",
c.AST_NODE_TAG_RETURN => .@"return",
c.AST_NODE_TAG_FN_PROTO_SIMPLE => .fn_proto_simple,
c.AST_NODE_TAG_FN_PROTO_MULTI => .fn_proto_multi,
c.AST_NODE_TAG_FN_PROTO_ONE => .fn_proto_one,
c.AST_NODE_TAG_FN_PROTO => .fn_proto,
c.AST_NODE_TAG_FN_DECL => .fn_decl,
c.AST_NODE_TAG_ANYFRAME_TYPE => .anyframe_type,
c.AST_NODE_TAG_ANYFRAME_LITERAL => .anyframe_literal,
c.AST_NODE_TAG_CHAR_LITERAL => .char_literal,
c.AST_NODE_TAG_NUMBER_LITERAL => .number_literal,
c.AST_NODE_TAG_UNREACHABLE_LITERAL => .unreachable_literal,
c.AST_NODE_TAG_IDENTIFIER => .identifier,
c.AST_NODE_TAG_ENUM_LITERAL => .enum_literal,
c.AST_NODE_TAG_STRING_LITERAL => .string_literal,
c.AST_NODE_TAG_MULTILINE_STRING_LITERAL => .multiline_string_literal,
c.AST_NODE_TAG_GROUPED_EXPRESSION => .grouped_expression,
c.AST_NODE_TAG_BUILTIN_CALL_TWO => .builtin_call_two,
c.AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma,
c.AST_NODE_TAG_BUILTIN_CALL => .builtin_call,
c.AST_NODE_TAG_BUILTIN_CALL_COMMA => .builtin_call_comma,
c.AST_NODE_TAG_ERROR_SET_DECL => .error_set_decl,
c.AST_NODE_TAG_CONTAINER_DECL => .container_decl,
c.AST_NODE_TAG_CONTAINER_DECL_TRAILING => .container_decl_trailing,
c.AST_NODE_TAG_CONTAINER_DECL_TWO => .container_decl_two,
c.AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing,
c.AST_NODE_TAG_CONTAINER_DECL_ARG => .container_decl_arg,
c.AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing,
c.AST_NODE_TAG_TAGGED_UNION => .tagged_union,
c.AST_NODE_TAG_TAGGED_UNION_TRAILING => .tagged_union_trailing,
c.AST_NODE_TAG_TAGGED_UNION_TWO => .tagged_union_two,
c.AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing,
c.AST_NODE_TAG_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag,
c.AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing,
c.AST_NODE_TAG_CONTAINER_FIELD_INIT => .container_field_init,
c.AST_NODE_TAG_CONTAINER_FIELD_ALIGN => .container_field_align,
c.AST_NODE_TAG_CONTAINER_FIELD => .container_field,
c.AST_NODE_TAG_COMPTIME => .@"comptime",
c.AST_NODE_TAG_NOSUSPEND => .@"nosuspend",
c.AST_NODE_TAG_BLOCK_TWO => .block_two,
c.AST_NODE_TAG_BLOCK_TWO_SEMICOLON => .block_two_semicolon,
c.AST_NODE_TAG_BLOCK => .block,
c.AST_NODE_TAG_BLOCK_SEMICOLON => .block_semicolon,
c.AST_NODE_TAG_ASM_SIMPLE => .asm_simple,
c.AST_NODE_TAG_ASM => .@"asm",
c.AST_NODE_TAG_ASM_OUTPUT => .asm_output,
c.AST_NODE_TAG_ASM_INPUT => .asm_input,
c.AST_NODE_TAG_ERROR_VALUE => .error_value,
c.AST_NODE_TAG_ERROR_UNION => .error_union,
c.AST_NODE_ROOT => .root,
c.AST_NODE_USINGNAMESPACE => .@"usingnamespace",
c.AST_NODE_TEST_DECL => .test_decl,
c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl,
c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl,
c.AST_NODE_SIMPLE_VAR_DECL => .simple_var_decl,
c.AST_NODE_ALIGNED_VAR_DECL => .aligned_var_decl,
c.AST_NODE_ERRDEFER => .@"errdefer",
c.AST_NODE_DEFER => .@"defer",
c.AST_NODE_CATCH => .@"catch",
c.AST_NODE_FIELD_ACCESS => .field_access,
c.AST_NODE_UNWRAP_OPTIONAL => .unwrap_optional,
c.AST_NODE_EQUAL_EQUAL => .equal_equal,
c.AST_NODE_BANG_EQUAL => .bang_equal,
c.AST_NODE_LESS_THAN => .less_than,
c.AST_NODE_GREATER_THAN => .greater_than,
c.AST_NODE_LESS_OR_EQUAL => .less_or_equal,
c.AST_NODE_GREATER_OR_EQUAL => .greater_or_equal,
c.AST_NODE_ASSIGN_MUL => .assign_mul,
c.AST_NODE_ASSIGN_DIV => .assign_div,
c.AST_NODE_ASSIGN_MOD => .assign_mod,
c.AST_NODE_ASSIGN_ADD => .assign_add,
c.AST_NODE_ASSIGN_SUB => .assign_sub,
c.AST_NODE_ASSIGN_SHL => .assign_shl,
c.AST_NODE_ASSIGN_SHL_SAT => .assign_shl_sat,
c.AST_NODE_ASSIGN_SHR => .assign_shr,
c.AST_NODE_ASSIGN_BIT_AND => .assign_bit_and,
c.AST_NODE_ASSIGN_BIT_XOR => .assign_bit_xor,
c.AST_NODE_ASSIGN_BIT_OR => .assign_bit_or,
c.AST_NODE_ASSIGN_MUL_WRAP => .assign_mul_wrap,
c.AST_NODE_ASSIGN_ADD_WRAP => .assign_add_wrap,
c.AST_NODE_ASSIGN_SUB_WRAP => .assign_sub_wrap,
c.AST_NODE_ASSIGN_MUL_SAT => .assign_mul_sat,
c.AST_NODE_ASSIGN_ADD_SAT => .assign_add_sat,
c.AST_NODE_ASSIGN_SUB_SAT => .assign_sub_sat,
c.AST_NODE_ASSIGN => .assign,
c.AST_NODE_ASSIGN_DESTRUCTURE => .assign_destructure,
c.AST_NODE_MERGE_ERROR_SETS => .merge_error_sets,
c.AST_NODE_MUL => .mul,
c.AST_NODE_DIV => .div,
c.AST_NODE_MOD => .mod,
c.AST_NODE_ARRAY_MULT => .array_mult,
c.AST_NODE_MUL_WRAP => .mul_wrap,
c.AST_NODE_MUL_SAT => .mul_sat,
c.AST_NODE_ADD => .add,
c.AST_NODE_SUB => .sub,
c.AST_NODE_ARRAY_CAT => .array_cat,
c.AST_NODE_ADD_WRAP => .add_wrap,
c.AST_NODE_SUB_WRAP => .sub_wrap,
c.AST_NODE_ADD_SAT => .add_sat,
c.AST_NODE_SUB_SAT => .sub_sat,
c.AST_NODE_SHL => .shl,
c.AST_NODE_SHL_SAT => .shl_sat,
c.AST_NODE_SHR => .shr,
c.AST_NODE_BIT_AND => .bit_and,
c.AST_NODE_BIT_XOR => .bit_xor,
c.AST_NODE_BIT_OR => .bit_or,
c.AST_NODE_ORELSE => .@"orelse",
c.AST_NODE_BOOL_AND => .bool_and,
c.AST_NODE_BOOL_OR => .bool_or,
c.AST_NODE_BOOL_NOT => .bool_not,
c.AST_NODE_NEGATION => .negation,
c.AST_NODE_BIT_NOT => .bit_not,
c.AST_NODE_NEGATION_WRAP => .negation_wrap,
c.AST_NODE_ADDRESS_OF => .address_of,
c.AST_NODE_TRY => .@"try",
c.AST_NODE_AWAIT => .@"await",
c.AST_NODE_OPTIONAL_TYPE => .optional_type,
c.AST_NODE_ARRAY_TYPE => .array_type,
c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel,
c.AST_NODE_PTR_TYPE_ALIGNED => .ptr_type_aligned,
c.AST_NODE_PTR_TYPE_SENTINEL => .ptr_type_sentinel,
c.AST_NODE_PTR_TYPE => .ptr_type,
c.AST_NODE_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range,
c.AST_NODE_SLICE_OPEN => .slice_open,
c.AST_NODE_SLICE => .slice,
c.AST_NODE_SLICE_SENTINEL => .slice_sentinel,
c.AST_NODE_DEREF => .deref,
c.AST_NODE_ARRAY_ACCESS => .array_access,
c.AST_NODE_ARRAY_INIT_ONE => .array_init_one,
c.AST_NODE_ARRAY_INIT_ONE_COMMA => .array_init_one_comma,
c.AST_NODE_ARRAY_INIT_DOT_TWO => .array_init_dot_two,
c.AST_NODE_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma,
c.AST_NODE_ARRAY_INIT_DOT => .array_init_dot,
c.AST_NODE_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma,
c.AST_NODE_ARRAY_INIT => .array_init,
c.AST_NODE_ARRAY_INIT_COMMA => .array_init_comma,
c.AST_NODE_STRUCT_INIT_ONE => .struct_init_one,
c.AST_NODE_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma,
c.AST_NODE_STRUCT_INIT_DOT_TWO => .struct_init_dot_two,
c.AST_NODE_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma,
c.AST_NODE_STRUCT_INIT_DOT => .struct_init_dot,
c.AST_NODE_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma,
c.AST_NODE_STRUCT_INIT => .struct_init,
c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma,
c.AST_NODE_CALL_ONE => .call_one,
c.AST_NODE_CALL_ONE_COMMA => .call_one_comma,
c.AST_NODE_ASYNC_CALL_ONE => .async_call_one,
c.AST_NODE_ASYNC_CALL_ONE_COMMA => .async_call_one_comma,
c.AST_NODE_CALL => .call,
c.AST_NODE_CALL_COMMA => .call_comma,
c.AST_NODE_ASYNC_CALL => .async_call,
c.AST_NODE_ASYNC_CALL_COMMA => .async_call_comma,
c.AST_NODE_SWITCH => .@"switch",
c.AST_NODE_SWITCH_COMMA => .switch_comma,
c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one,
c.AST_NODE_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one,
c.AST_NODE_SWITCH_CASE => .switch_case,
c.AST_NODE_SWITCH_CASE_INLINE => .switch_case_inline,
c.AST_NODE_SWITCH_RANGE => .switch_range,
c.AST_NODE_WHILE_SIMPLE => .while_simple,
c.AST_NODE_WHILE_CONT => .while_cont,
c.AST_NODE_WHILE => .@"while",
c.AST_NODE_FOR_SIMPLE => .for_simple,
c.AST_NODE_FOR => .@"for",
c.AST_NODE_FOR_RANGE => .for_range,
c.AST_NODE_IF_SIMPLE => .if_simple,
c.AST_NODE_IF => .@"if",
c.AST_NODE_SUSPEND => .@"suspend",
c.AST_NODE_RESUME => .@"resume",
c.AST_NODE_CONTINUE => .@"continue",
c.AST_NODE_BREAK => .@"break",
c.AST_NODE_RETURN => .@"return",
c.AST_NODE_FN_PROTO_SIMPLE => .fn_proto_simple,
c.AST_NODE_FN_PROTO_MULTI => .fn_proto_multi,
c.AST_NODE_FN_PROTO_ONE => .fn_proto_one,
c.AST_NODE_FN_PROTO => .fn_proto,
c.AST_NODE_FN_DECL => .fn_decl,
c.AST_NODE_ANYFRAME_TYPE => .anyframe_type,
c.AST_NODE_ANYFRAME_LITERAL => .anyframe_literal,
c.AST_NODE_CHAR_LITERAL => .char_literal,
c.AST_NODE_NUMBER_LITERAL => .number_literal,
c.AST_NODE_UNREACHABLE_LITERAL => .unreachable_literal,
c.AST_NODE_IDENTIFIER => .identifier,
c.AST_NODE_ENUM_LITERAL => .enum_literal,
c.AST_NODE_STRING_LITERAL => .string_literal,
c.AST_NODE_MULTILINE_STRING_LITERAL => .multiline_string_literal,
c.AST_NODE_GROUPED_EXPRESSION => .grouped_expression,
c.AST_NODE_BUILTIN_CALL_TWO => .builtin_call_two,
c.AST_NODE_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma,
c.AST_NODE_BUILTIN_CALL => .builtin_call,
c.AST_NODE_BUILTIN_CALL_COMMA => .builtin_call_comma,
c.AST_NODE_ERROR_SET_DECL => .error_set_decl,
c.AST_NODE_CONTAINER_DECL => .container_decl,
c.AST_NODE_CONTAINER_DECL_TRAILING => .container_decl_trailing,
c.AST_NODE_CONTAINER_DECL_TWO => .container_decl_two,
c.AST_NODE_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing,
c.AST_NODE_CONTAINER_DECL_ARG => .container_decl_arg,
c.AST_NODE_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing,
c.AST_NODE_TAGGED_UNION => .tagged_union,
c.AST_NODE_TAGGED_UNION_TRAILING => .tagged_union_trailing,
c.AST_NODE_TAGGED_UNION_TWO => .tagged_union_two,
c.AST_NODE_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing,
c.AST_NODE_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag,
c.AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing,
c.AST_NODE_CONTAINER_FIELD_INIT => .container_field_init,
c.AST_NODE_CONTAINER_FIELD_ALIGN => .container_field_align,
c.AST_NODE_CONTAINER_FIELD => .container_field,
c.AST_NODE_COMPTIME => .@"comptime",
c.AST_NODE_NOSUSPEND => .@"nosuspend",
c.AST_NODE_BLOCK_TWO => .block_two,
c.AST_NODE_BLOCK_TWO_SEMICOLON => .block_two_semicolon,
c.AST_NODE_BLOCK => .block,
c.AST_NODE_BLOCK_SEMICOLON => .block_semicolon,
c.AST_NODE_ASM_SIMPLE => .asm_simple,
c.AST_NODE_ASM => .@"asm",
c.AST_NODE_ASM_OUTPUT => .asm_output,
c.AST_NODE_ASM_INPUT => .asm_input,
c.AST_NODE_ERROR_VALUE => .error_value,
c.AST_NODE_ERROR_UNION => .error_union,
else => undefined,
};
}
fn zigAst(c_ast: c.Ast) Ast {
// zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit().
fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast {
var tokens = Ast.TokenList{};
try tokens.ensureTotalCapacity(gpa, c_ast.tokens.len);
errdefer tokens.deinit(gpa);
for (0..c_ast.tokens.len) |i|
tokens.set(i, .{
.tag = zigToken(c_ast.tokens.tags[i]),
.start = c_ast.tokens.starts[i],
});
var nodes = Ast.NodeList{};
try nodes.ensureTotalCapacity(gpa, c_ast.nodes.len);
errdefer nodes.deinit(gpa);
for (0..c_ast.nodes.len) |i|
nodes.set(i, .{
.tag = zigNode(c_ast.nodes.tags[i]),
.main_token = c_ast.nodes.main_tokens[i],
.data = Ast.Node.Data{
.lhs = c_ast.nodes.datas[i].lhs,
.rhs = c_ast.nodes.datas[i].rhs,
},
});
var extra_data = try gpa.alloc(Ast.Node.Index, c_ast.extra_data.len);
errdefer gpa.free(extra_data);
std.mem.copyForwards(
Ast.Node.Index,
extra_data[0..],
c_ast.extra_data.arr[0..c_ast.extra_data.len],
);
// creating a dummy `errors` slice, so deinit can free it.
const errors = try gpa.alloc(Ast.Error, 0);
errdefer gpa.deinit(errors);
return Ast{
.source = c_ast.source[0..c_ast.source_len],
//.tokens =
.source = c_ast.source[0..c_ast.source_len :0],
.mode = .zig,
.tokens = tokens.slice(),
.nodes = nodes.slice(),
.extra_data = extra_data,
.errors = errors,
};
}
test "Ast header smoke test" {
try std.testing.expectEqual(zigNode(c.AST_NODE_TAG_IF), Ast.Node.Tag.@"if");
try std.testing.expectEqual(zigNode(c.AST_NODE_IF), Ast.Node.Tag.@"if");
}
// copy-past from parser_test.zig
const mem = std.mem;
const print = std.debug.print;
const io = std.io;
const maxInt = std.math.maxInt;
var fixed_buffer_mem: [100 * 1024]u8 = undefined;
fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 {
const stderr = io.getStdErr().writer();
//var tree = try std.zig.Ast.parse(allocator, source, .zig);
const c_tree = c.astParse(source, @intCast(source.len));
var tree = try zigAst(allocator, c_tree);
defer tree.deinit(allocator);
for (tree.errors) |parse_error| {
const loc = tree.tokenLocation(0, parse_error.token);
try stderr.print("(memory buffer):{d}:{d}: error: ", .{ loc.line + 1, loc.column + 1 });
try tree.renderError(parse_error, stderr);
try stderr.print("\n{s}\n", .{source[loc.line_start..loc.line_end]});
{
var i: usize = 0;
while (i < loc.column) : (i += 1) {
try stderr.writeAll(" ");
}
try stderr.writeAll("^");
}
try stderr.writeAll("\n");
}
if (tree.errors.len != 0) {
return error.ParseError;
}
const formatted = try tree.render(allocator);
anything_changed.* = !mem.eql(u8, formatted, source);
return formatted;
}
fn testTransformImpl(allocator: mem.Allocator, fba: *std.heap.FixedBufferAllocator, source: [:0]const u8, expected_source: []const u8) !void {
// reset the fixed buffer allocator each run so that it can be re-used for each
// iteration of the failing index
fba.reset();
var anything_changed: bool = undefined;
const result_source = try testParse(source, allocator, &anything_changed);
try std.testing.expectEqualStrings(expected_source, result_source);
const changes_expected = source.ptr != expected_source.ptr;
if (anything_changed != changes_expected) {
print("std.zig.render returned {} instead of {}\n", .{ anything_changed, changes_expected });
return error.TestFailed;
}
try std.testing.expect(anything_changed == changes_expected);
allocator.free(result_source);
}
fn testTransform(source: [:0]const u8, expected_source: []const u8) !void {
var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]);
return std.testing.checkAllAllocationFailures(fixed_allocator.allocator(), testTransformImpl, .{ &fixed_allocator, source, expected_source });
}
fn testCanonical(source: [:0]const u8) !void {
return testTransform(source, source);
}
test "zig fmt: remove extra whitespace at start and end of file with comment between" {
if (true) return error.SkipZigTest;
try testTransform(
\\
\\
\\// hello
\\
\\
,
\\// hello
\\
);
}

View File

@@ -1,3 +1,4 @@
test "zig0 test suite" {
_ = @import("tokenizer_test.zig");
_ = @import("parser_test.zig");
}

View File

@@ -461,10 +461,9 @@ state:
const char* start = self->buffer + result.loc.start;
uint32_t len = self->index - result.loc.start;
TokenizerTag tag = getKeyword(start, len);
if (tag != TOKEN_INVALID) {
if (tag != TOKEN_INVALID)
result.tag = tag;
}
}
break;
case TOKENIZER_STATE_BUILTIN:
@@ -865,7 +864,8 @@ state:
.tag = TOKEN_EOF,
.loc = {
.start = self->index,
.end = self->index }
.end = self->index,
}
};
}
break;
@@ -939,7 +939,8 @@ state:
.tag = TOKEN_EOF,
.loc = {
.start = self->index,
.end = self->index }
.end = self->index,
}
};
}
break;

View File

@@ -8,7 +8,7 @@ const c = @cImport({
@cInclude("tokenizer.h");
});
fn zigToken(token: c_uint) Token.Tag {
pub fn zigToken(token: c_uint) Token.Tag {
return switch (token) {
c.TOKEN_INVALID => .invalid,
c.TOKEN_INVALID_PERIODASTERISKS => .invalid_periodasterisks,