From 1f134595de812b82b8ea175aebb088d603a660f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Fri, 3 Jan 2025 19:23:22 +0200 Subject: [PATCH] start with parser tests --- ast.h | 340 ++++++++++++++++---------------- parser.c | 34 ++-- parser_test.zig | 468 ++++++++++++++++++++++++++++----------------- test_all.zig | 1 + tokenizer.c | 9 +- tokenizer_test.zig | 2 +- 6 files changed, 488 insertions(+), 366 deletions(-) diff --git a/ast.h b/ast.h index db9a3ba..7a28abc 100644 --- a/ast.h +++ b/ast.h @@ -9,91 +9,91 @@ typedef enum { /// sub_list[lhs...rhs] - AST_NODE_TAG_ROOT, + AST_NODE_ROOT, /// `usingnamespace lhs;`. rhs unused. main_token is `usingnamespace`. - AST_NODE_TAG_USINGNAMESPACE, + AST_NODE_USINGNAMESPACE, /// lhs is test name token (must be string literal or identifier), if any. /// rhs is the body node. - AST_NODE_TAG_TEST_DECL, + AST_NODE_TEST_DECL, /// lhs is the index into extra_data. /// rhs is the initialization expression, if any. /// main_token is `var` or `const`. - AST_NODE_TAG_GLOBAL_VAR_DECL, + AST_NODE_GLOBAL_VAR_DECL, /// `var a: x align(y) = rhs` /// lhs is the index into extra_data. /// main_token is `var` or `const`. - AST_NODE_TAG_LOCAL_VAR_DECL, + AST_NODE_LOCAL_VAR_DECL, /// `var a: lhs = rhs`. lhs and rhs may be unused. /// Can be local or global. /// main_token is `var` or `const`. - AST_NODE_TAG_SIMPLE_VAR_DECL, + AST_NODE_SIMPLE_VAR_DECL, /// `var a align(lhs) = rhs`. lhs and rhs may be unused. /// Can be local or global. /// main_token is `var` or `const`. - AST_NODE_TAG_ALIGNED_VAR_DECL, + AST_NODE_ALIGNED_VAR_DECL, /// lhs is the identifier token payload if any, /// rhs is the deferred expression. - AST_NODE_TAG_ERRDEFER, + AST_NODE_ERRDEFER, /// lhs is unused. /// rhs is the deferred expression. - AST_NODE_TAG_DEFER, + AST_NODE_DEFER, /// lhs catch rhs /// lhs catch |err| rhs /// main_token is the `catch` keyword. /// payload is determined by looking at the next token after the `catch` keyword. - AST_NODE_TAG_CATCH, + AST_NODE_CATCH, /// `lhs.a`. main_token is the dot. rhs is the identifier token index. - AST_NODE_TAG_FIELD_ACCESS, + AST_NODE_FIELD_ACCESS, /// `lhs.?`. main_token is the dot. rhs is the `?` token index. - AST_NODE_TAG_UNWRAP_OPTIONAL, + AST_NODE_UNWRAP_OPTIONAL, /// `lhs == rhs`. main_token is op. - AST_NODE_TAG_EQUAL_EQUAL, + AST_NODE_EQUAL_EQUAL, /// `lhs != rhs`. main_token is op. - AST_NODE_TAG_BANG_EQUAL, + AST_NODE_BANG_EQUAL, /// `lhs < rhs`. main_token is op. - AST_NODE_TAG_LESS_THAN, + AST_NODE_LESS_THAN, /// `lhs > rhs`. main_token is op. - AST_NODE_TAG_GREATER_THAN, + AST_NODE_GREATER_THAN, /// `lhs <= rhs`. main_token is op. - AST_NODE_TAG_LESS_OR_EQUAL, + AST_NODE_LESS_OR_EQUAL, /// `lhs >= rhs`. main_token is op. - AST_NODE_TAG_GREATER_OR_EQUAL, + AST_NODE_GREATER_OR_EQUAL, /// `lhs *= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_MUL, + AST_NODE_ASSIGN_MUL, /// `lhs /= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_DIV, + AST_NODE_ASSIGN_DIV, /// `lhs %= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_MOD, + AST_NODE_ASSIGN_MOD, /// `lhs += rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_ADD, + AST_NODE_ASSIGN_ADD, /// `lhs -= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_SUB, + AST_NODE_ASSIGN_SUB, /// `lhs <<= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_SHL, + AST_NODE_ASSIGN_SHL, /// `lhs <<|= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_SHL_SAT, + AST_NODE_ASSIGN_SHL_SAT, /// `lhs >>= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_SHR, + AST_NODE_ASSIGN_SHR, /// `lhs &= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_BIT_AND, + AST_NODE_ASSIGN_BIT_AND, /// `lhs ^= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_BIT_XOR, + AST_NODE_ASSIGN_BIT_XOR, /// `lhs |= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_BIT_OR, + AST_NODE_ASSIGN_BIT_OR, /// `lhs *%= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_MUL_WRAP, + AST_NODE_ASSIGN_MUL_WRAP, /// `lhs +%= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_ADD_WRAP, + AST_NODE_ASSIGN_ADD_WRAP, /// `lhs -%= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_SUB_WRAP, + AST_NODE_ASSIGN_SUB_WRAP, /// `lhs *|= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_MUL_SAT, + AST_NODE_ASSIGN_MUL_SAT, /// `lhs +|= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_ADD_SAT, + AST_NODE_ASSIGN_ADD_SAT, /// `lhs -|= rhs`. main_token is op. - AST_NODE_TAG_ASSIGN_SUB_SAT, + AST_NODE_ASSIGN_SUB_SAT, /// `lhs = rhs`. main_token is op. - AST_NODE_TAG_ASSIGN, + AST_NODE_ASSIGN, /// `a, b, ... = rhs`. main_token is op. lhs is index into `extra_data` /// of an lhs elem count followed by an array of that many `Node.Index`, /// with each node having one of the following types: @@ -107,73 +107,73 @@ typedef enum { /// standard assignment LHS (which must be evaluated as an lvalue). /// There may be a preceding `comptime` token, which does not create a /// corresponding `comptime` node so must be manually detected. - AST_NODE_TAG_ASSIGN_DESTRUCTURE, + AST_NODE_ASSIGN_DESTRUCTURE, /// `lhs || rhs`. main_token is the `||`. - AST_NODE_TAG_MERGE_ERROR_SETS, + AST_NODE_MERGE_ERROR_SETS, /// `lhs * rhs`. main_token is the `*`. - AST_NODE_TAG_MUL, + AST_NODE_MUL, /// `lhs / rhs`. main_token is the `/`. - AST_NODE_TAG_DIV, + AST_NODE_DIV, /// `lhs % rhs`. main_token is the `%`. - AST_NODE_TAG_MOD, + AST_NODE_MOD, /// `lhs ** rhs`. main_token is the `**`. - AST_NODE_TAG_ARRAY_MULT, + AST_NODE_ARRAY_MULT, /// `lhs *% rhs`. main_token is the `*%`. - AST_NODE_TAG_MUL_WRAP, + AST_NODE_MUL_WRAP, /// `lhs *| rhs`. main_token is the `*|`. - AST_NODE_TAG_MUL_SAT, + AST_NODE_MUL_SAT, /// `lhs + rhs`. main_token is the `+`. - AST_NODE_TAG_ADD, + AST_NODE_ADD, /// `lhs - rhs`. main_token is the `-`. - AST_NODE_TAG_SUB, + AST_NODE_SUB, /// `lhs ++ rhs`. main_token is the `++`. - AST_NODE_TAG_ARRAY_CAT, + AST_NODE_ARRAY_CAT, /// `lhs +% rhs`. main_token is the `+%`. - AST_NODE_TAG_ADD_WRAP, + AST_NODE_ADD_WRAP, /// `lhs -% rhs`. main_token is the `-%`. - AST_NODE_TAG_SUB_WRAP, + AST_NODE_SUB_WRAP, /// `lhs +| rhs`. main_token is the `+|`. - AST_NODE_TAG_ADD_SAT, + AST_NODE_ADD_SAT, /// `lhs -| rhs`. main_token is the `-|`. - AST_NODE_TAG_SUB_SAT, + AST_NODE_SUB_SAT, /// `lhs << rhs`. main_token is the `<<`. - AST_NODE_TAG_SHL, + AST_NODE_SHL, /// `lhs <<| rhs`. main_token is the `<<|`. - AST_NODE_TAG_SHL_SAT, + AST_NODE_SHL_SAT, /// `lhs >> rhs`. main_token is the `>>`. - AST_NODE_TAG_SHR, + AST_NODE_SHR, /// `lhs & rhs`. main_token is the `&`. - AST_NODE_TAG_BIT_AND, + AST_NODE_BIT_AND, /// `lhs ^ rhs`. main_token is the `^`. - AST_NODE_TAG_BIT_XOR, + AST_NODE_BIT_XOR, /// `lhs | rhs`. main_token is the `|`. - AST_NODE_TAG_BIT_OR, + AST_NODE_BIT_OR, /// `lhs orelse rhs`. main_token is the `orelse`. - AST_NODE_TAG_ORELSE, + AST_NODE_ORELSE, /// `lhs and rhs`. main_token is the `and`. - AST_NODE_TAG_BOOL_AND, + AST_NODE_BOOL_AND, /// `lhs or rhs`. main_token is the `or`. - AST_NODE_TAG_BOOL_OR, + AST_NODE_BOOL_OR, /// `op lhs`. rhs unused. main_token is op. - AST_NODE_TAG_BOOL_NOT, + AST_NODE_BOOL_NOT, /// `op lhs`. rhs unused. main_token is op. - AST_NODE_TAG_NEGATION, + AST_NODE_NEGATION, /// `op lhs`. rhs unused. main_token is op. - AST_NODE_TAG_BIT_NOT, + AST_NODE_BIT_NOT, /// `op lhs`. rhs unused. main_token is op. - AST_NODE_TAG_NEGATION_WRAP, + AST_NODE_NEGATION_WRAP, /// `op lhs`. rhs unused. main_token is op. - AST_NODE_TAG_ADDRESS_OF, + AST_NODE_ADDRESS_OF, /// `op lhs`. rhs unused. main_token is op. - AST_NODE_TAG_TRY, + AST_NODE_TRY, /// `op lhs`. rhs unused. main_token is op. - AST_NODE_TAG_AWAIT, + AST_NODE_AWAIT, /// `?lhs`. rhs unused. main_token is the `?`. - AST_NODE_TAG_OPTIONAL_TYPE, + AST_NODE_OPTIONAL_TYPE, /// `[lhs]rhs`. - AST_NODE_TAG_ARRAY_TYPE, + AST_NODE_ARRAY_TYPE, /// `[lhs:a]b`. `ArrayTypeSentinel[rhs]`. - AST_NODE_TAG_ARRAY_TYPE_SENTINEL, + AST_NODE_ARRAY_TYPE_SENTINEL, /// `[*]align(lhs) rhs`. lhs can be omitted. /// `*align(lhs) rhs`. lhs can be omitted. /// `[]rhs`. @@ -181,7 +181,7 @@ typedef enum { /// if a slice, many-item pointer, or C-pointer /// main_token might be a ** token, which is shared with a parent/child /// pointer type and may require special handling. - AST_NODE_TAG_PTR_TYPE_ALIGNED, + AST_NODE_PTR_TYPE_ALIGNED, /// `[*:lhs]rhs`. lhs can be omitted. /// `*rhs`. /// `[:lhs]rhs`. @@ -189,297 +189,297 @@ typedef enum { /// if a slice, many-item pointer, or C-pointer /// main_token might be a ** token, which is shared with a parent/child /// pointer type and may require special handling. - AST_NODE_TAG_PTR_TYPE_SENTINEL, + AST_NODE_PTR_TYPE_SENTINEL, /// lhs is index into ptr_type. rhs is the element type expression. /// main_token is the asterisk if a single item pointer or the lbracket /// if a slice, many-item pointer, or C-pointer /// main_token might be a ** token, which is shared with a parent/child /// pointer type and may require special handling. - AST_NODE_TAG_PTR_TYPE, + AST_NODE_PTR_TYPE, /// lhs is index into ptr_type_bit_range. rhs is the element type expression. /// main_token is the asterisk if a single item pointer or the lbracket /// if a slice, many-item pointer, or C-pointer /// main_token might be a ** token, which is shared with a parent/child /// pointer type and may require special handling. - AST_NODE_TAG_PTR_TYPE_BIT_RANGE, + AST_NODE_PTR_TYPE_BIT_RANGE, /// `lhs[rhs..]` /// main_token is the lbracket. - AST_NODE_TAG_SLICE_OPEN, + AST_NODE_SLICE_OPEN, /// `lhs[b..c]`. rhs is index into Slice /// main_token is the lbracket. - AST_NODE_TAG_SLICE, + AST_NODE_SLICE, /// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be omitted. /// main_token is the lbracket. - AST_NODE_TAG_SLICE_SENTINEL, + AST_NODE_SLICE_SENTINEL, /// `lhs.*`. rhs is unused. - AST_NODE_TAG_DEREF, + AST_NODE_DEREF, /// `lhs[rhs]`. - AST_NODE_TAG_ARRAY_ACCESS, + AST_NODE_ARRAY_ACCESS, /// `lhs{rhs}`. rhs can be omitted. - AST_NODE_TAG_ARRAY_INIT_ONE, + AST_NODE_ARRAY_INIT_ONE, /// `lhs{rhs,}`. rhs can *not* be omitted - AST_NODE_TAG_ARRAY_INIT_ONE_COMMA, + AST_NODE_ARRAY_INIT_ONE_COMMA, /// `.{lhs, rhs}`. lhs and rhs can be omitted. - AST_NODE_TAG_ARRAY_INIT_DOT_TWO, + AST_NODE_ARRAY_INIT_DOT_TWO, /// Same as `array_init_dot_two` except there is known to be a trailing comma /// before the final rbrace. - AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA, + AST_NODE_ARRAY_INIT_DOT_TWO_COMMA, /// `.{a, b}`. `sub_list[lhs..rhs]`. - AST_NODE_TAG_ARRAY_INIT_DOT, + AST_NODE_ARRAY_INIT_DOT, /// Same as `array_init_dot` except there is known to be a trailing comma /// before the final rbrace. - AST_NODE_TAG_ARRAY_INIT_DOT_COMMA, + AST_NODE_ARRAY_INIT_DOT_COMMA, /// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means `.{a, b}`. - AST_NODE_TAG_ARRAY_INIT, + AST_NODE_ARRAY_INIT, /// Same as `array_init` except there is known to be a trailing comma /// before the final rbrace. - AST_NODE_TAG_ARRAY_INIT_COMMA, + AST_NODE_ARRAY_INIT_COMMA, /// `lhs{.a = rhs}`. rhs can be omitted making it empty. /// main_token is the lbrace. - AST_NODE_TAG_STRUCT_INIT_ONE, + AST_NODE_STRUCT_INIT_ONE, /// `lhs{.a = rhs,}`. rhs can *not* be omitted. /// main_token is the lbrace. - AST_NODE_TAG_STRUCT_INIT_ONE_COMMA, + AST_NODE_STRUCT_INIT_ONE_COMMA, /// `.{.a = lhs, .b = rhs}`. lhs and rhs can be omitted. /// main_token is the lbrace. /// No trailing comma before the rbrace. - AST_NODE_TAG_STRUCT_INIT_DOT_TWO, + AST_NODE_STRUCT_INIT_DOT_TWO, /// Same as `struct_init_dot_two` except there is known to be a trailing comma /// before the final rbrace. - AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA, + AST_NODE_STRUCT_INIT_DOT_TWO_COMMA, /// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`. /// main_token is the lbrace. - AST_NODE_TAG_STRUCT_INIT_DOT, + AST_NODE_STRUCT_INIT_DOT, /// Same as `struct_init_dot` except there is known to be a trailing comma /// before the final rbrace. - AST_NODE_TAG_STRUCT_INIT_DOT_COMMA, + AST_NODE_STRUCT_INIT_DOT_COMMA, /// `lhs{.a = b, .c = d}`. `sub_range_list[rhs]`. /// lhs can be omitted which means `.{.a = b, .c = d}`. /// main_token is the lbrace. - AST_NODE_TAG_STRUCT_INIT, + AST_NODE_STRUCT_INIT, /// Same as `struct_init` except there is known to be a trailing comma /// before the final rbrace. - AST_NODE_TAG_STRUCT_INIT_COMMA, + AST_NODE_STRUCT_INIT_COMMA, /// `lhs(rhs)`. rhs can be omitted. /// main_token is the lparen. - AST_NODE_TAG_CALL_ONE, + AST_NODE_CALL_ONE, /// `lhs(rhs,)`. rhs can be omitted. /// main_token is the lparen. - AST_NODE_TAG_CALL_ONE_COMMA, + AST_NODE_CALL_ONE_COMMA, /// `async lhs(rhs)`. rhs can be omitted. - AST_NODE_TAG_ASYNC_CALL_ONE, + AST_NODE_ASYNC_CALL_ONE, /// `async lhs(rhs,)`. - AST_NODE_TAG_ASYNC_CALL_ONE_COMMA, + AST_NODE_ASYNC_CALL_ONE_COMMA, /// `lhs(a, b, c)`. `SubRange[rhs]`. /// main_token is the `(`. - AST_NODE_TAG_CALL, + AST_NODE_CALL, /// `lhs(a, b, c,)`. `SubRange[rhs]`. /// main_token is the `(`. - AST_NODE_TAG_CALL_COMMA, + AST_NODE_CALL_COMMA, /// `async lhs(a, b, c)`. `SubRange[rhs]`. /// main_token is the `(`. - AST_NODE_TAG_ASYNC_CALL, + AST_NODE_ASYNC_CALL, /// `async lhs(a, b, c,)`. `SubRange[rhs]`. /// main_token is the `(`. - AST_NODE_TAG_ASYNC_CALL_COMMA, + AST_NODE_ASYNC_CALL_COMMA, /// `switch(lhs) {}`. `SubRange[rhs]`. /// `main_token` is the identifier of a preceding label, if any; otherwise `switch`. - AST_NODE_TAG_SWITCH, + AST_NODE_SWITCH, /// Same as switch except there is known to be a trailing comma /// before the final rbrace - AST_NODE_TAG_SWITCH_COMMA, + AST_NODE_SWITCH_COMMA, /// `lhs => rhs`. If lhs is omitted it means `else`. /// main_token is the `=>` - AST_NODE_TAG_SWITCH_CASE_ONE, + AST_NODE_SWITCH_CASE_ONE, /// Same ast `switch_case_one` but the case is inline - AST_NODE_TAG_SWITCH_CASE_INLINE_ONE, + AST_NODE_SWITCH_CASE_INLINE_ONE, /// `a, b, c => rhs`. `SubRange[lhs]`. /// main_token is the `=>` - AST_NODE_TAG_SWITCH_CASE, + AST_NODE_SWITCH_CASE, /// Same ast `switch_case` but the case is inline - AST_NODE_TAG_SWITCH_CASE_INLINE, + AST_NODE_SWITCH_CASE_INLINE, /// `lhs...rhs`. - AST_NODE_TAG_SWITCH_RANGE, + AST_NODE_SWITCH_RANGE, /// `while (lhs) rhs`. /// `while (lhs) |x| rhs`. - AST_NODE_TAG_WHILE_SIMPLE, + AST_NODE_WHILE_SIMPLE, /// `while (lhs) : (a) b`. `WhileCont[rhs]`. /// `while (lhs) : (a) b`. `WhileCont[rhs]`. - AST_NODE_TAG_WHILE_CONT, + AST_NODE_WHILE_CONT, /// `while (lhs) : (a) b else c`. `While[rhs]`. /// `while (lhs) |x| : (a) b else c`. `While[rhs]`. /// `while (lhs) |x| : (a) b else |y| c`. `While[rhs]`. /// The cont expression part `: (a)` may be omitted. - AST_NODE_TAG_WHILE, + AST_NODE_WHILE, /// `for (lhs) rhs`. - AST_NODE_TAG_FOR_SIMPLE, + AST_NODE_FOR_SIMPLE, /// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`. `For[rhs]`. - AST_NODE_TAG_FOR, + AST_NODE_FOR, /// `lhs..rhs`. rhs can be omitted. - AST_NODE_TAG_FOR_RANGE, + AST_NODE_FOR_RANGE, /// `if (lhs) rhs`. /// `if (lhs) |a| rhs`. - AST_NODE_TAG_IF_SIMPLE, + AST_NODE_IF_SIMPLE, /// `if (lhs) a else b`. `If[rhs]`. /// `if (lhs) |x| a else b`. `If[rhs]`. /// `if (lhs) |x| a else |y| b`. `If[rhs]`. - AST_NODE_TAG_IF, + AST_NODE_IF, /// `suspend lhs`. lhs can be omitted. rhs is unused. - AST_NODE_TAG_SUSPEND, + AST_NODE_SUSPEND, /// `resume lhs`. rhs is unused. - AST_NODE_TAG_RESUME, + AST_NODE_RESUME, /// `continue :lhs rhs` /// both lhs and rhs may be omitted. - AST_NODE_TAG_CONTINUE, + AST_NODE_CONTINUE, /// `break :lhs rhs` /// both lhs and rhs may be omitted. - AST_NODE_TAG_BREAK, + AST_NODE_BREAK, /// `return lhs`. lhs can be omitted. rhs is unused. - AST_NODE_TAG_RETURN, + AST_NODE_RETURN, /// `fn (a: lhs) rhs`. lhs can be omitted. /// anytype and ... parameters are omitted from the AST tree. /// main_token is the `fn` keyword. /// extern function declarations use this tag. - AST_NODE_TAG_FN_PROTO_SIMPLE, + AST_NODE_FN_PROTO_SIMPLE, /// `fn (a: b, c: d) rhs`. `sub_range_list[lhs]`. /// anytype and ... parameters are omitted from the AST tree. /// main_token is the `fn` keyword. /// extern function declarations use this tag. - AST_NODE_TAG_FN_PROTO_MULTI, + AST_NODE_FN_PROTO_MULTI, /// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`. `FnProtoOne[lhs]`. /// zero or one parameters. /// anytype and ... parameters are omitted from the AST tree. /// main_token is the `fn` keyword. /// extern function declarations use this tag. - AST_NODE_TAG_FN_PROTO_ONE, + AST_NODE_FN_PROTO_ONE, /// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`. `FnProto[lhs]`. /// anytype and ... parameters are omitted from the AST tree. /// main_token is the `fn` keyword. /// extern function declarations use this tag. - AST_NODE_TAG_FN_PROTO, + AST_NODE_FN_PROTO, /// lhs is the fn_proto. /// rhs is the function body block. /// Note that extern function declarations use the fn_proto tags rather /// than this one. - AST_NODE_TAG_FN_DECL, + AST_NODE_FN_DECL, /// `anyframe->rhs`. main_token is `anyframe`. `lhs` is arrow token index. - AST_NODE_TAG_ANYFRAME_TYPE, + AST_NODE_ANYFRAME_TYPE, /// Both lhs and rhs unused. - AST_NODE_TAG_ANYFRAME_LITERAL, + AST_NODE_ANYFRAME_LITERAL, /// Both lhs and rhs unused. - AST_NODE_TAG_CHAR_LITERAL, + AST_NODE_CHAR_LITERAL, /// Both lhs and rhs unused. - AST_NODE_TAG_NUMBER_LITERAL, + AST_NODE_NUMBER_LITERAL, /// Both lhs and rhs unused. - AST_NODE_TAG_UNREACHABLE_LITERAL, + AST_NODE_UNREACHABLE_LITERAL, /// Both lhs and rhs unused. /// Most identifiers will not have explicit AST nodes, however for expressions /// which could be one of many different kinds of AST nodes, there will be an /// identifier AST node for it. - AST_NODE_TAG_IDENTIFIER, + AST_NODE_IDENTIFIER, /// lhs is the dot token index, rhs unused, main_token is the identifier. - AST_NODE_TAG_ENUM_LITERAL, + AST_NODE_ENUM_LITERAL, /// main_token is the string literal token /// Both lhs and rhs unused. - AST_NODE_TAG_STRING_LITERAL, + AST_NODE_STRING_LITERAL, /// main_token is the first token index (redundant with lhs) /// lhs is the first token index; rhs is the last token index. /// Could be a series of multiline_string_literal_line tokens, or a single /// string_literal token. - AST_NODE_TAG_MULTILINE_STRING_LITERAL, + AST_NODE_MULTILINE_STRING_LITERAL, /// `(lhs)`. main_token is the `(`; rhs is the token index of the `)`. - AST_NODE_TAG_GROUPED_EXPRESSION, + AST_NODE_GROUPED_EXPRESSION, /// `@a(lhs, rhs)`. lhs and rhs may be omitted. /// main_token is the builtin token. - AST_NODE_TAG_BUILTIN_CALL_TWO, + AST_NODE_BUILTIN_CALL_TWO, /// Same as builtin_call_two but there is known to be a trailing comma before the rparen. - AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA, + AST_NODE_BUILTIN_CALL_TWO_COMMA, /// `@a(b, c)`. `sub_list[lhs..rhs]`. /// main_token is the builtin token. - AST_NODE_TAG_BUILTIN_CALL, + AST_NODE_BUILTIN_CALL, /// Same as builtin_call but there is known to be a trailing comma before the rparen. - AST_NODE_TAG_BUILTIN_CALL_COMMA, + AST_NODE_BUILTIN_CALL_COMMA, /// `error{a, b}`. /// rhs is the rbrace, lhs is unused. - AST_NODE_TAG_ERROR_SET_DECL, + AST_NODE_ERROR_SET_DECL, /// `struct {}`, `union {}`, `opaque {}`, `enum {}`. `extra_data[lhs..rhs]`. /// main_token is `struct`, `union`, `opaque`, `enum` keyword. - AST_NODE_TAG_CONTAINER_DECL, + AST_NODE_CONTAINER_DECL, /// Same as ContainerDecl but there is known to be a trailing comma /// or semicolon before the rbrace. - AST_NODE_TAG_CONTAINER_DECL_TRAILING, + AST_NODE_CONTAINER_DECL_TRAILING, /// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum {lhs, rhs}`. /// lhs or rhs can be omitted. /// main_token is `struct`, `union`, `opaque`, `enum` keyword. - AST_NODE_TAG_CONTAINER_DECL_TWO, + AST_NODE_CONTAINER_DECL_TWO, /// Same as ContainerDeclTwo except there is known to be a trailing comma /// or semicolon before the rbrace. - AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING, + AST_NODE_CONTAINER_DECL_TWO_TRAILING, /// `struct(lhs)` / `union(lhs)` / `enum(lhs)`. `SubRange[rhs]`. - AST_NODE_TAG_CONTAINER_DECL_ARG, + AST_NODE_CONTAINER_DECL_ARG, /// Same as container_decl_arg but there is known to be a trailing /// comma or semicolon before the rbrace. - AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING, + AST_NODE_CONTAINER_DECL_ARG_TRAILING, /// `union(enum) {}`. `sub_list[lhs..rhs]`. /// Note that tagged unions with explicitly provided enums are represented /// by `container_decl_arg`. - AST_NODE_TAG_TAGGED_UNION, + AST_NODE_TAGGED_UNION, /// Same as tagged_union but there is known to be a trailing comma /// or semicolon before the rbrace. - AST_NODE_TAG_TAGGED_UNION_TRAILING, + AST_NODE_TAGGED_UNION_TRAILING, /// `union(enum) {lhs, rhs}`. lhs or rhs may be omitted. /// Note that tagged unions with explicitly provided enums are represented /// by `container_decl_arg`. - AST_NODE_TAG_TAGGED_UNION_TWO, + AST_NODE_TAGGED_UNION_TWO, /// Same as tagged_union_two but there is known to be a trailing comma /// or semicolon before the rbrace. - AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING, + AST_NODE_TAGGED_UNION_TWO_TRAILING, /// `union(enum(lhs)) {}`. `SubRange[rhs]`. - AST_NODE_TAG_TAGGED_UNION_ENUM_TAG, + AST_NODE_TAGGED_UNION_ENUM_TAG, /// Same as tagged_union_enum_tag but there is known to be a trailing comma /// or semicolon before the rbrace. - AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING, + AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING, /// `a: lhs = rhs,`. lhs and rhs can be omitted. /// main_token is the field name identifier. /// lastToken() does not include the possible trailing comma. - AST_NODE_TAG_CONTAINER_FIELD_INIT, + AST_NODE_CONTAINER_FIELD_INIT, /// `a: lhs align(rhs),`. rhs can be omitted. /// main_token is the field name identifier. /// lastToken() does not include the possible trailing comma. - AST_NODE_TAG_CONTAINER_FIELD_ALIGN, + AST_NODE_CONTAINER_FIELD_ALIGN, /// `a: lhs align(c) = d,`. `container_field_list[rhs]`. /// main_token is the field name identifier. /// lastToken() does not include the possible trailing comma. - AST_NODE_TAG_CONTAINER_FIELD, + AST_NODE_CONTAINER_FIELD, /// `comptime lhs`. rhs unused. - AST_NODE_TAG_COMPTIME, + AST_NODE_COMPTIME, /// `nosuspend lhs`. rhs unused. - AST_NODE_TAG_NOSUSPEND, + AST_NODE_NOSUSPEND, /// `{lhs rhs}`. rhs or lhs can be omitted. /// main_token points at the lbrace. - AST_NODE_TAG_BLOCK_TWO, + AST_NODE_BLOCK_TWO, /// Same as block_two but there is known to be a semicolon before the rbrace. - AST_NODE_TAG_BLOCK_TWO_SEMICOLON, + AST_NODE_BLOCK_TWO_SEMICOLON, /// `{}`. `sub_list[lhs..rhs]`. /// main_token points at the lbrace. - AST_NODE_TAG_BLOCK, + AST_NODE_BLOCK, /// Same as block but there is known to be a semicolon before the rbrace. - AST_NODE_TAG_BLOCK_SEMICOLON, + AST_NODE_BLOCK_SEMICOLON, /// `asm(lhs)`. rhs is the token index of the rparen. - AST_NODE_TAG_ASM_SIMPLE, + AST_NODE_ASM_SIMPLE, /// `asm(lhs, a)`. `Asm[rhs]`. - AST_NODE_TAG_ASM, + AST_NODE_ASM, /// `[a] "b" (c)`. lhs is 0, rhs is token index of the rparen. /// `[a] "b" (-> lhs)`. rhs is token index of the rparen. /// main_token is `a`. - AST_NODE_TAG_ASM_OUTPUT, + AST_NODE_ASM_OUTPUT, /// `[a] "b" (lhs)`. rhs is token index of the rparen. /// main_token is `a`. - AST_NODE_TAG_ASM_INPUT, + AST_NODE_ASM_INPUT, /// `error.a`. lhs is token index of `.`. rhs is token index of `a`. - AST_NODE_TAG_ERROR_VALUE, + AST_NODE_ERROR_VALUE, /// `lhs!rhs`. main_token is the `!`. - AST_NODE_TAG_ERROR_UNION, + AST_NODE_ERROR_UNION, } AstNodeTag; typedef uint32_t AstTokenIndex; diff --git a/parser.c b/parser.c index 131f2c4..d85a0ab 100644 --- a/parser.c +++ b/parser.c @@ -182,7 +182,7 @@ static AstNodeIndex expectContainerField(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = AST_NODE_TAG_CONTAINER_FIELD_INIT, + .tag = AST_NODE_CONTAINER_FIELD_INIT, .main_token = main_token, .data = { .lhs = type_expr, @@ -193,7 +193,7 @@ static AstNodeIndex expectContainerField(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = AST_NODE_TAG_CONTAINER_FIELD_ALIGN, + .tag = AST_NODE_CONTAINER_FIELD_ALIGN, .main_token = main_token, .data = { .lhs = type_expr, @@ -204,7 +204,7 @@ static AstNodeIndex expectContainerField(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = AST_NODE_TAG_CONTAINER_FIELD, + .tag = AST_NODE_CONTAINER_FIELD, .main_token = main_token, .data = { .lhs = type_expr, @@ -244,7 +244,7 @@ static AstNodeIndex parsePrimaryTypeExpr(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = AST_NODE_TAG_IDENTIFIER, + .tag = AST_NODE_IDENTIFIER, .main_token = nextToken(p), .data = {} }); case TOKEN_KEYWORD_INLINE: @@ -310,7 +310,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE, + .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, .main_token = lparen, .data = { .lhs = res, @@ -321,7 +321,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = comma ? AST_NODE_TAG_CALL_ONE_COMMA : AST_NODE_TAG_CALL_ONE, + .tag = comma ? AST_NODE_CALL_ONE_COMMA : AST_NODE_CALL_ONE, .main_token = lparen, .data = { .lhs = res, @@ -333,7 +333,7 @@ static AstNodeIndex parseSuffixExpr(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = comma ? AST_NODE_TAG_CALL_COMMA : AST_NODE_TAG_CALL, + .tag = comma ? AST_NODE_CALL_COMMA : AST_NODE_CALL, .main_token = lparen, .data = { .lhs = res, @@ -368,7 +368,7 @@ static AstNodeIndex parseErrorUnionExpr(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = AST_NODE_TAG_ERROR_UNION, + .tag = AST_NODE_ERROR_UNION, .main_token = bang, .data = { .lhs = suffix_expr, @@ -413,7 +413,7 @@ static AstNodeIndex parseFnProto(Parser* p) { if (fn_token == null_node) return null_node; - AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_TAG_FN_PROTO); + AstNodeIndex fn_proto_index = reserveNode(p, AST_NODE_FN_PROTO); eatToken(p, TOKEN_IDENTIFIER); @@ -432,7 +432,7 @@ static AstNodeIndex parseFnProto(Parser* p) { p, fn_proto_index, (AstNodeItem) { - .tag = AST_NODE_TAG_FN_PROTO_SIMPLE, + .tag = AST_NODE_FN_PROTO_SIMPLE, .main_token = fn_token, .data = { .lhs = params.payload.zero_or_one, @@ -568,7 +568,7 @@ static AstNodeIndex parseBlock(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = AST_NODE_TAG_BLOCK_TWO, + .tag = AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = 0, @@ -579,7 +579,7 @@ static AstNodeIndex parseBlock(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO, + .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], @@ -590,7 +590,7 @@ static AstNodeIndex parseBlock(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = semicolon ? AST_NODE_TAG_BLOCK_TWO_SEMICOLON : AST_NODE_TAG_BLOCK_TWO, + .tag = semicolon ? AST_NODE_BLOCK_TWO_SEMICOLON : AST_NODE_BLOCK_TWO, .main_token = lbrace, .data = { .lhs = p->scratch.arr[scratch_top.old_len], @@ -602,7 +602,7 @@ static AstNodeIndex parseBlock(Parser* p) { return addNode( &p->nodes, (AstNodeItem) { - .tag = semicolon ? AST_NODE_TAG_BLOCK_SEMICOLON : AST_NODE_TAG_BLOCK, + .tag = semicolon ? AST_NODE_BLOCK_SEMICOLON : AST_NODE_BLOCK, .main_token = lbrace, .data = { .lhs = span.start, @@ -673,13 +673,13 @@ static AstNodeIndex expectTopLevelDecl(Parser* p) { p->tok_i++; return fn_proto; case TOKEN_L_BRACE:; - AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_TAG_FN_DECL); + AstNodeIndex fn_decl_index = reserveNode(p, AST_NODE_FN_DECL); AstNodeIndex body_block = parseBlock(p); return setNode( p, fn_decl_index, (AstNodeItem) { - .tag = AST_NODE_TAG_FN_DECL, + .tag = AST_NODE_FN_DECL, .main_token = p->nodes.main_tokens[fn_proto], .data = { .lhs = fn_proto, .rhs = body_block }, }); @@ -885,7 +885,7 @@ break_loop:; } void parseRoot(Parser* p) { - addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_TAG_ROOT, .main_token = 0 }); + addNode(&p->nodes, (AstNodeItem) { .tag = AST_NODE_ROOT, .main_token = 0 }); Members root_members = parseContainerMembers(p); AstSubRange root_decls = membersToSpan(root_members, p); diff --git a/parser_test.zig b/parser_test.zig index 2ffb2ba..0b1ee79 100644 --- a/parser_test.zig +++ b/parser_test.zig @@ -2,194 +2,314 @@ const std = @import("std"); const testing = std.testing; const Ast = std.zig.Ast; +const Allocator = std.mem.Allocator; const c = @cImport({ @cInclude("ast.h"); }); +const zigToken = @import("./tokenizer_test.zig").zigToken; + fn zigNode(token: c_uint) Ast.Node.Tag { return switch (token) { - c.AST_NODE_TAG_ROOT => .root, - c.AST_NODE_TAG_USINGNAMESPACE => .@"usingnamespace", - c.AST_NODE_TAG_TEST_DECL => .test_decl, - c.AST_NODE_TAG_GLOBAL_VAR_DECL => .global_var_decl, - c.AST_NODE_TAG_LOCAL_VAR_DECL => .local_var_decl, - c.AST_NODE_TAG_SIMPLE_VAR_DECL => .simple_var_decl, - c.AST_NODE_TAG_ALIGNED_VAR_DECL => .aligned_var_decl, - c.AST_NODE_TAG_ERRDEFER => .@"errdefer", - c.AST_NODE_TAG_DEFER => .@"defer", - c.AST_NODE_TAG_CATCH => .@"catch", - c.AST_NODE_TAG_FIELD_ACCESS => .field_access, - c.AST_NODE_TAG_UNWRAP_OPTIONAL => .unwrap_optional, - c.AST_NODE_TAG_EQUAL_EQUAL => .equal_equal, - c.AST_NODE_TAG_BANG_EQUAL => .bang_equal, - c.AST_NODE_TAG_LESS_THAN => .less_than, - c.AST_NODE_TAG_GREATER_THAN => .greater_than, - c.AST_NODE_TAG_LESS_OR_EQUAL => .less_or_equal, - c.AST_NODE_TAG_GREATER_OR_EQUAL => .greater_or_equal, - c.AST_NODE_TAG_ASSIGN_MUL => .assign_mul, - c.AST_NODE_TAG_ASSIGN_DIV => .assign_div, - c.AST_NODE_TAG_ASSIGN_MOD => .assign_mod, - c.AST_NODE_TAG_ASSIGN_ADD => .assign_add, - c.AST_NODE_TAG_ASSIGN_SUB => .assign_sub, - c.AST_NODE_TAG_ASSIGN_SHL => .assign_shl, - c.AST_NODE_TAG_ASSIGN_SHL_SAT => .assign_shl_sat, - c.AST_NODE_TAG_ASSIGN_SHR => .assign_shr, - c.AST_NODE_TAG_ASSIGN_BIT_AND => .assign_bit_and, - c.AST_NODE_TAG_ASSIGN_BIT_XOR => .assign_bit_xor, - c.AST_NODE_TAG_ASSIGN_BIT_OR => .assign_bit_or, - c.AST_NODE_TAG_ASSIGN_MUL_WRAP => .assign_mul_wrap, - c.AST_NODE_TAG_ASSIGN_ADD_WRAP => .assign_add_wrap, - c.AST_NODE_TAG_ASSIGN_SUB_WRAP => .assign_sub_wrap, - c.AST_NODE_TAG_ASSIGN_MUL_SAT => .assign_mul_sat, - c.AST_NODE_TAG_ASSIGN_ADD_SAT => .assign_add_sat, - c.AST_NODE_TAG_ASSIGN_SUB_SAT => .assign_sub_sat, - c.AST_NODE_TAG_ASSIGN => .assign, - c.AST_NODE_TAG_ASSIGN_DESTRUCTURE => .assign_destructure, - c.AST_NODE_TAG_MERGE_ERROR_SETS => .merge_error_sets, - c.AST_NODE_TAG_MUL => .mul, - c.AST_NODE_TAG_DIV => .div, - c.AST_NODE_TAG_MOD => .mod, - c.AST_NODE_TAG_ARRAY_MULT => .array_mult, - c.AST_NODE_TAG_MUL_WRAP => .mul_wrap, - c.AST_NODE_TAG_MUL_SAT => .mul_sat, - c.AST_NODE_TAG_ADD => .add, - c.AST_NODE_TAG_SUB => .sub, - c.AST_NODE_TAG_ARRAY_CAT => .array_cat, - c.AST_NODE_TAG_ADD_WRAP => .add_wrap, - c.AST_NODE_TAG_SUB_WRAP => .sub_wrap, - c.AST_NODE_TAG_ADD_SAT => .add_sat, - c.AST_NODE_TAG_SUB_SAT => .sub_sat, - c.AST_NODE_TAG_SHL => .shl, - c.AST_NODE_TAG_SHL_SAT => .shl_sat, - c.AST_NODE_TAG_SHR => .shr, - c.AST_NODE_TAG_BIT_AND => .bit_and, - c.AST_NODE_TAG_BIT_XOR => .bit_xor, - c.AST_NODE_TAG_BIT_OR => .bit_or, - c.AST_NODE_TAG_ORELSE => .@"orelse", - c.AST_NODE_TAG_BOOL_AND => .bool_and, - c.AST_NODE_TAG_BOOL_OR => .bool_or, - c.AST_NODE_TAG_BOOL_NOT => .bool_not, - c.AST_NODE_TAG_NEGATION => .negation, - c.AST_NODE_TAG_BIT_NOT => .bit_not, - c.AST_NODE_TAG_NEGATION_WRAP => .negation_wrap, - c.AST_NODE_TAG_ADDRESS_OF => .address_of, - c.AST_NODE_TAG_TRY => .@"try", - c.AST_NODE_TAG_AWAIT => .@"await", - c.AST_NODE_TAG_OPTIONAL_TYPE => .optional_type, - c.AST_NODE_TAG_ARRAY_TYPE => .array_type, - c.AST_NODE_TAG_ARRAY_TYPE_SENTINEL => .array_type_sentinel, - c.AST_NODE_TAG_PTR_TYPE_ALIGNED => .ptr_type_aligned, - c.AST_NODE_TAG_PTR_TYPE_SENTINEL => .ptr_type_sentinel, - c.AST_NODE_TAG_PTR_TYPE => .ptr_type, - c.AST_NODE_TAG_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range, - c.AST_NODE_TAG_SLICE_OPEN => .slice_open, - c.AST_NODE_TAG_SLICE => .slice, - c.AST_NODE_TAG_SLICE_SENTINEL => .slice_sentinel, - c.AST_NODE_TAG_DEREF => .deref, - c.AST_NODE_TAG_ARRAY_ACCESS => .array_access, - c.AST_NODE_TAG_ARRAY_INIT_ONE => .array_init_one, - c.AST_NODE_TAG_ARRAY_INIT_ONE_COMMA => .array_init_one_comma, - c.AST_NODE_TAG_ARRAY_INIT_DOT_TWO => .array_init_dot_two, - c.AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma, - c.AST_NODE_TAG_ARRAY_INIT_DOT => .array_init_dot, - c.AST_NODE_TAG_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma, - c.AST_NODE_TAG_ARRAY_INIT => .array_init, - c.AST_NODE_TAG_ARRAY_INIT_COMMA => .array_init_comma, - c.AST_NODE_TAG_STRUCT_INIT_ONE => .struct_init_one, - c.AST_NODE_TAG_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma, - c.AST_NODE_TAG_STRUCT_INIT_DOT_TWO => .struct_init_dot_two, - c.AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma, - c.AST_NODE_TAG_STRUCT_INIT_DOT => .struct_init_dot, - c.AST_NODE_TAG_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma, - c.AST_NODE_TAG_STRUCT_INIT => .struct_init, - c.AST_NODE_TAG_STRUCT_INIT_COMMA => .struct_init_comma, - c.AST_NODE_TAG_CALL_ONE => .call_one, - c.AST_NODE_TAG_CALL_ONE_COMMA => .call_one_comma, - c.AST_NODE_TAG_ASYNC_CALL_ONE => .async_call_one, - c.AST_NODE_TAG_ASYNC_CALL_ONE_COMMA => .async_call_one_comma, - c.AST_NODE_TAG_CALL => .call, - c.AST_NODE_TAG_CALL_COMMA => .call_comma, - c.AST_NODE_TAG_ASYNC_CALL => .async_call, - c.AST_NODE_TAG_ASYNC_CALL_COMMA => .async_call_comma, - c.AST_NODE_TAG_SWITCH => .@"switch", - c.AST_NODE_TAG_SWITCH_COMMA => .switch_comma, - c.AST_NODE_TAG_SWITCH_CASE_ONE => .switch_case_one, - c.AST_NODE_TAG_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one, - c.AST_NODE_TAG_SWITCH_CASE => .switch_case, - c.AST_NODE_TAG_SWITCH_CASE_INLINE => .switch_case_inline, - c.AST_NODE_TAG_SWITCH_RANGE => .switch_range, - c.AST_NODE_TAG_WHILE_SIMPLE => .while_simple, - c.AST_NODE_TAG_WHILE_CONT => .while_cont, - c.AST_NODE_TAG_WHILE => .@"while", - c.AST_NODE_TAG_FOR_SIMPLE => .for_simple, - c.AST_NODE_TAG_FOR => .@"for", - c.AST_NODE_TAG_FOR_RANGE => .for_range, - c.AST_NODE_TAG_IF_SIMPLE => .if_simple, - c.AST_NODE_TAG_IF => .@"if", - c.AST_NODE_TAG_SUSPEND => .@"suspend", - c.AST_NODE_TAG_RESUME => .@"resume", - c.AST_NODE_TAG_CONTINUE => .@"continue", - c.AST_NODE_TAG_BREAK => .@"break", - c.AST_NODE_TAG_RETURN => .@"return", - c.AST_NODE_TAG_FN_PROTO_SIMPLE => .fn_proto_simple, - c.AST_NODE_TAG_FN_PROTO_MULTI => .fn_proto_multi, - c.AST_NODE_TAG_FN_PROTO_ONE => .fn_proto_one, - c.AST_NODE_TAG_FN_PROTO => .fn_proto, - c.AST_NODE_TAG_FN_DECL => .fn_decl, - c.AST_NODE_TAG_ANYFRAME_TYPE => .anyframe_type, - c.AST_NODE_TAG_ANYFRAME_LITERAL => .anyframe_literal, - c.AST_NODE_TAG_CHAR_LITERAL => .char_literal, - c.AST_NODE_TAG_NUMBER_LITERAL => .number_literal, - c.AST_NODE_TAG_UNREACHABLE_LITERAL => .unreachable_literal, - c.AST_NODE_TAG_IDENTIFIER => .identifier, - c.AST_NODE_TAG_ENUM_LITERAL => .enum_literal, - c.AST_NODE_TAG_STRING_LITERAL => .string_literal, - c.AST_NODE_TAG_MULTILINE_STRING_LITERAL => .multiline_string_literal, - c.AST_NODE_TAG_GROUPED_EXPRESSION => .grouped_expression, - c.AST_NODE_TAG_BUILTIN_CALL_TWO => .builtin_call_two, - c.AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma, - c.AST_NODE_TAG_BUILTIN_CALL => .builtin_call, - c.AST_NODE_TAG_BUILTIN_CALL_COMMA => .builtin_call_comma, - c.AST_NODE_TAG_ERROR_SET_DECL => .error_set_decl, - c.AST_NODE_TAG_CONTAINER_DECL => .container_decl, - c.AST_NODE_TAG_CONTAINER_DECL_TRAILING => .container_decl_trailing, - c.AST_NODE_TAG_CONTAINER_DECL_TWO => .container_decl_two, - c.AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing, - c.AST_NODE_TAG_CONTAINER_DECL_ARG => .container_decl_arg, - c.AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing, - c.AST_NODE_TAG_TAGGED_UNION => .tagged_union, - c.AST_NODE_TAG_TAGGED_UNION_TRAILING => .tagged_union_trailing, - c.AST_NODE_TAG_TAGGED_UNION_TWO => .tagged_union_two, - c.AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing, - c.AST_NODE_TAG_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag, - c.AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing, - c.AST_NODE_TAG_CONTAINER_FIELD_INIT => .container_field_init, - c.AST_NODE_TAG_CONTAINER_FIELD_ALIGN => .container_field_align, - c.AST_NODE_TAG_CONTAINER_FIELD => .container_field, - c.AST_NODE_TAG_COMPTIME => .@"comptime", - c.AST_NODE_TAG_NOSUSPEND => .@"nosuspend", - c.AST_NODE_TAG_BLOCK_TWO => .block_two, - c.AST_NODE_TAG_BLOCK_TWO_SEMICOLON => .block_two_semicolon, - c.AST_NODE_TAG_BLOCK => .block, - c.AST_NODE_TAG_BLOCK_SEMICOLON => .block_semicolon, - c.AST_NODE_TAG_ASM_SIMPLE => .asm_simple, - c.AST_NODE_TAG_ASM => .@"asm", - c.AST_NODE_TAG_ASM_OUTPUT => .asm_output, - c.AST_NODE_TAG_ASM_INPUT => .asm_input, - c.AST_NODE_TAG_ERROR_VALUE => .error_value, - c.AST_NODE_TAG_ERROR_UNION => .error_union, + c.AST_NODE_ROOT => .root, + c.AST_NODE_USINGNAMESPACE => .@"usingnamespace", + c.AST_NODE_TEST_DECL => .test_decl, + c.AST_NODE_GLOBAL_VAR_DECL => .global_var_decl, + c.AST_NODE_LOCAL_VAR_DECL => .local_var_decl, + c.AST_NODE_SIMPLE_VAR_DECL => .simple_var_decl, + c.AST_NODE_ALIGNED_VAR_DECL => .aligned_var_decl, + c.AST_NODE_ERRDEFER => .@"errdefer", + c.AST_NODE_DEFER => .@"defer", + c.AST_NODE_CATCH => .@"catch", + c.AST_NODE_FIELD_ACCESS => .field_access, + c.AST_NODE_UNWRAP_OPTIONAL => .unwrap_optional, + c.AST_NODE_EQUAL_EQUAL => .equal_equal, + c.AST_NODE_BANG_EQUAL => .bang_equal, + c.AST_NODE_LESS_THAN => .less_than, + c.AST_NODE_GREATER_THAN => .greater_than, + c.AST_NODE_LESS_OR_EQUAL => .less_or_equal, + c.AST_NODE_GREATER_OR_EQUAL => .greater_or_equal, + c.AST_NODE_ASSIGN_MUL => .assign_mul, + c.AST_NODE_ASSIGN_DIV => .assign_div, + c.AST_NODE_ASSIGN_MOD => .assign_mod, + c.AST_NODE_ASSIGN_ADD => .assign_add, + c.AST_NODE_ASSIGN_SUB => .assign_sub, + c.AST_NODE_ASSIGN_SHL => .assign_shl, + c.AST_NODE_ASSIGN_SHL_SAT => .assign_shl_sat, + c.AST_NODE_ASSIGN_SHR => .assign_shr, + c.AST_NODE_ASSIGN_BIT_AND => .assign_bit_and, + c.AST_NODE_ASSIGN_BIT_XOR => .assign_bit_xor, + c.AST_NODE_ASSIGN_BIT_OR => .assign_bit_or, + c.AST_NODE_ASSIGN_MUL_WRAP => .assign_mul_wrap, + c.AST_NODE_ASSIGN_ADD_WRAP => .assign_add_wrap, + c.AST_NODE_ASSIGN_SUB_WRAP => .assign_sub_wrap, + c.AST_NODE_ASSIGN_MUL_SAT => .assign_mul_sat, + c.AST_NODE_ASSIGN_ADD_SAT => .assign_add_sat, + c.AST_NODE_ASSIGN_SUB_SAT => .assign_sub_sat, + c.AST_NODE_ASSIGN => .assign, + c.AST_NODE_ASSIGN_DESTRUCTURE => .assign_destructure, + c.AST_NODE_MERGE_ERROR_SETS => .merge_error_sets, + c.AST_NODE_MUL => .mul, + c.AST_NODE_DIV => .div, + c.AST_NODE_MOD => .mod, + c.AST_NODE_ARRAY_MULT => .array_mult, + c.AST_NODE_MUL_WRAP => .mul_wrap, + c.AST_NODE_MUL_SAT => .mul_sat, + c.AST_NODE_ADD => .add, + c.AST_NODE_SUB => .sub, + c.AST_NODE_ARRAY_CAT => .array_cat, + c.AST_NODE_ADD_WRAP => .add_wrap, + c.AST_NODE_SUB_WRAP => .sub_wrap, + c.AST_NODE_ADD_SAT => .add_sat, + c.AST_NODE_SUB_SAT => .sub_sat, + c.AST_NODE_SHL => .shl, + c.AST_NODE_SHL_SAT => .shl_sat, + c.AST_NODE_SHR => .shr, + c.AST_NODE_BIT_AND => .bit_and, + c.AST_NODE_BIT_XOR => .bit_xor, + c.AST_NODE_BIT_OR => .bit_or, + c.AST_NODE_ORELSE => .@"orelse", + c.AST_NODE_BOOL_AND => .bool_and, + c.AST_NODE_BOOL_OR => .bool_or, + c.AST_NODE_BOOL_NOT => .bool_not, + c.AST_NODE_NEGATION => .negation, + c.AST_NODE_BIT_NOT => .bit_not, + c.AST_NODE_NEGATION_WRAP => .negation_wrap, + c.AST_NODE_ADDRESS_OF => .address_of, + c.AST_NODE_TRY => .@"try", + c.AST_NODE_AWAIT => .@"await", + c.AST_NODE_OPTIONAL_TYPE => .optional_type, + c.AST_NODE_ARRAY_TYPE => .array_type, + c.AST_NODE_ARRAY_TYPE_SENTINEL => .array_type_sentinel, + c.AST_NODE_PTR_TYPE_ALIGNED => .ptr_type_aligned, + c.AST_NODE_PTR_TYPE_SENTINEL => .ptr_type_sentinel, + c.AST_NODE_PTR_TYPE => .ptr_type, + c.AST_NODE_PTR_TYPE_BIT_RANGE => .ptr_type_bit_range, + c.AST_NODE_SLICE_OPEN => .slice_open, + c.AST_NODE_SLICE => .slice, + c.AST_NODE_SLICE_SENTINEL => .slice_sentinel, + c.AST_NODE_DEREF => .deref, + c.AST_NODE_ARRAY_ACCESS => .array_access, + c.AST_NODE_ARRAY_INIT_ONE => .array_init_one, + c.AST_NODE_ARRAY_INIT_ONE_COMMA => .array_init_one_comma, + c.AST_NODE_ARRAY_INIT_DOT_TWO => .array_init_dot_two, + c.AST_NODE_ARRAY_INIT_DOT_TWO_COMMA => .array_init_dot_two_comma, + c.AST_NODE_ARRAY_INIT_DOT => .array_init_dot, + c.AST_NODE_ARRAY_INIT_DOT_COMMA => .array_init_dot_comma, + c.AST_NODE_ARRAY_INIT => .array_init, + c.AST_NODE_ARRAY_INIT_COMMA => .array_init_comma, + c.AST_NODE_STRUCT_INIT_ONE => .struct_init_one, + c.AST_NODE_STRUCT_INIT_ONE_COMMA => .struct_init_one_comma, + c.AST_NODE_STRUCT_INIT_DOT_TWO => .struct_init_dot_two, + c.AST_NODE_STRUCT_INIT_DOT_TWO_COMMA => .struct_init_dot_two_comma, + c.AST_NODE_STRUCT_INIT_DOT => .struct_init_dot, + c.AST_NODE_STRUCT_INIT_DOT_COMMA => .struct_init_dot_comma, + c.AST_NODE_STRUCT_INIT => .struct_init, + c.AST_NODE_STRUCT_INIT_COMMA => .struct_init_comma, + c.AST_NODE_CALL_ONE => .call_one, + c.AST_NODE_CALL_ONE_COMMA => .call_one_comma, + c.AST_NODE_ASYNC_CALL_ONE => .async_call_one, + c.AST_NODE_ASYNC_CALL_ONE_COMMA => .async_call_one_comma, + c.AST_NODE_CALL => .call, + c.AST_NODE_CALL_COMMA => .call_comma, + c.AST_NODE_ASYNC_CALL => .async_call, + c.AST_NODE_ASYNC_CALL_COMMA => .async_call_comma, + c.AST_NODE_SWITCH => .@"switch", + c.AST_NODE_SWITCH_COMMA => .switch_comma, + c.AST_NODE_SWITCH_CASE_ONE => .switch_case_one, + c.AST_NODE_SWITCH_CASE_INLINE_ONE => .switch_case_inline_one, + c.AST_NODE_SWITCH_CASE => .switch_case, + c.AST_NODE_SWITCH_CASE_INLINE => .switch_case_inline, + c.AST_NODE_SWITCH_RANGE => .switch_range, + c.AST_NODE_WHILE_SIMPLE => .while_simple, + c.AST_NODE_WHILE_CONT => .while_cont, + c.AST_NODE_WHILE => .@"while", + c.AST_NODE_FOR_SIMPLE => .for_simple, + c.AST_NODE_FOR => .@"for", + c.AST_NODE_FOR_RANGE => .for_range, + c.AST_NODE_IF_SIMPLE => .if_simple, + c.AST_NODE_IF => .@"if", + c.AST_NODE_SUSPEND => .@"suspend", + c.AST_NODE_RESUME => .@"resume", + c.AST_NODE_CONTINUE => .@"continue", + c.AST_NODE_BREAK => .@"break", + c.AST_NODE_RETURN => .@"return", + c.AST_NODE_FN_PROTO_SIMPLE => .fn_proto_simple, + c.AST_NODE_FN_PROTO_MULTI => .fn_proto_multi, + c.AST_NODE_FN_PROTO_ONE => .fn_proto_one, + c.AST_NODE_FN_PROTO => .fn_proto, + c.AST_NODE_FN_DECL => .fn_decl, + c.AST_NODE_ANYFRAME_TYPE => .anyframe_type, + c.AST_NODE_ANYFRAME_LITERAL => .anyframe_literal, + c.AST_NODE_CHAR_LITERAL => .char_literal, + c.AST_NODE_NUMBER_LITERAL => .number_literal, + c.AST_NODE_UNREACHABLE_LITERAL => .unreachable_literal, + c.AST_NODE_IDENTIFIER => .identifier, + c.AST_NODE_ENUM_LITERAL => .enum_literal, + c.AST_NODE_STRING_LITERAL => .string_literal, + c.AST_NODE_MULTILINE_STRING_LITERAL => .multiline_string_literal, + c.AST_NODE_GROUPED_EXPRESSION => .grouped_expression, + c.AST_NODE_BUILTIN_CALL_TWO => .builtin_call_two, + c.AST_NODE_BUILTIN_CALL_TWO_COMMA => .builtin_call_two_comma, + c.AST_NODE_BUILTIN_CALL => .builtin_call, + c.AST_NODE_BUILTIN_CALL_COMMA => .builtin_call_comma, + c.AST_NODE_ERROR_SET_DECL => .error_set_decl, + c.AST_NODE_CONTAINER_DECL => .container_decl, + c.AST_NODE_CONTAINER_DECL_TRAILING => .container_decl_trailing, + c.AST_NODE_CONTAINER_DECL_TWO => .container_decl_two, + c.AST_NODE_CONTAINER_DECL_TWO_TRAILING => .container_decl_two_trailing, + c.AST_NODE_CONTAINER_DECL_ARG => .container_decl_arg, + c.AST_NODE_CONTAINER_DECL_ARG_TRAILING => .container_decl_arg_trailing, + c.AST_NODE_TAGGED_UNION => .tagged_union, + c.AST_NODE_TAGGED_UNION_TRAILING => .tagged_union_trailing, + c.AST_NODE_TAGGED_UNION_TWO => .tagged_union_two, + c.AST_NODE_TAGGED_UNION_TWO_TRAILING => .tagged_union_two_trailing, + c.AST_NODE_TAGGED_UNION_ENUM_TAG => .tagged_union_enum_tag, + c.AST_NODE_TAGGED_UNION_ENUM_TAG_TRAILING => .tagged_union_enum_tag_trailing, + c.AST_NODE_CONTAINER_FIELD_INIT => .container_field_init, + c.AST_NODE_CONTAINER_FIELD_ALIGN => .container_field_align, + c.AST_NODE_CONTAINER_FIELD => .container_field, + c.AST_NODE_COMPTIME => .@"comptime", + c.AST_NODE_NOSUSPEND => .@"nosuspend", + c.AST_NODE_BLOCK_TWO => .block_two, + c.AST_NODE_BLOCK_TWO_SEMICOLON => .block_two_semicolon, + c.AST_NODE_BLOCK => .block, + c.AST_NODE_BLOCK_SEMICOLON => .block_semicolon, + c.AST_NODE_ASM_SIMPLE => .asm_simple, + c.AST_NODE_ASM => .@"asm", + c.AST_NODE_ASM_OUTPUT => .asm_output, + c.AST_NODE_ASM_INPUT => .asm_input, + c.AST_NODE_ERROR_VALUE => .error_value, + c.AST_NODE_ERROR_UNION => .error_union, else => undefined, }; } -fn zigAst(c_ast: c.Ast) Ast { +// zigAst converts a c.Ast to std.Zig.Ast. The resulting Ast should be freed with deinit(). +fn zigAst(gpa: Allocator, c_ast: c.Ast) !Ast { + var tokens = Ast.TokenList{}; + try tokens.ensureTotalCapacity(gpa, c_ast.tokens.len); + errdefer tokens.deinit(gpa); + + for (0..c_ast.tokens.len) |i| + tokens.set(i, .{ + .tag = zigToken(c_ast.tokens.tags[i]), + .start = c_ast.tokens.starts[i], + }); + + var nodes = Ast.NodeList{}; + try nodes.ensureTotalCapacity(gpa, c_ast.nodes.len); + errdefer nodes.deinit(gpa); + + for (0..c_ast.nodes.len) |i| + nodes.set(i, .{ + .tag = zigNode(c_ast.nodes.tags[i]), + .main_token = c_ast.nodes.main_tokens[i], + .data = Ast.Node.Data{ + .lhs = c_ast.nodes.datas[i].lhs, + .rhs = c_ast.nodes.datas[i].rhs, + }, + }); + + var extra_data = try gpa.alloc(Ast.Node.Index, c_ast.extra_data.len); + errdefer gpa.free(extra_data); + std.mem.copyForwards( + Ast.Node.Index, + extra_data[0..], + c_ast.extra_data.arr[0..c_ast.extra_data.len], + ); + + // creating a dummy `errors` slice, so deinit can free it. + const errors = try gpa.alloc(Ast.Error, 0); + errdefer gpa.deinit(errors); + return Ast{ - .source = c_ast.source[0..c_ast.source_len], - //.tokens = + .source = c_ast.source[0..c_ast.source_len :0], + .mode = .zig, + .tokens = tokens.slice(), + .nodes = nodes.slice(), + .extra_data = extra_data, + .errors = errors, }; } test "Ast header smoke test" { - try std.testing.expectEqual(zigNode(c.AST_NODE_TAG_IF), Ast.Node.Tag.@"if"); + try std.testing.expectEqual(zigNode(c.AST_NODE_IF), Ast.Node.Tag.@"if"); +} + +// copy-past from parser_test.zig +const mem = std.mem; +const print = std.debug.print; +const io = std.io; +const maxInt = std.math.maxInt; + +var fixed_buffer_mem: [100 * 1024]u8 = undefined; + +fn testParse(source: [:0]const u8, allocator: mem.Allocator, anything_changed: *bool) ![]u8 { + const stderr = io.getStdErr().writer(); + + //var tree = try std.zig.Ast.parse(allocator, source, .zig); + const c_tree = c.astParse(source, @intCast(source.len)); + var tree = try zigAst(allocator, c_tree); + defer tree.deinit(allocator); + + for (tree.errors) |parse_error| { + const loc = tree.tokenLocation(0, parse_error.token); + try stderr.print("(memory buffer):{d}:{d}: error: ", .{ loc.line + 1, loc.column + 1 }); + try tree.renderError(parse_error, stderr); + try stderr.print("\n{s}\n", .{source[loc.line_start..loc.line_end]}); + { + var i: usize = 0; + while (i < loc.column) : (i += 1) { + try stderr.writeAll(" "); + } + try stderr.writeAll("^"); + } + try stderr.writeAll("\n"); + } + if (tree.errors.len != 0) { + return error.ParseError; + } + + const formatted = try tree.render(allocator); + anything_changed.* = !mem.eql(u8, formatted, source); + return formatted; +} +fn testTransformImpl(allocator: mem.Allocator, fba: *std.heap.FixedBufferAllocator, source: [:0]const u8, expected_source: []const u8) !void { + // reset the fixed buffer allocator each run so that it can be re-used for each + // iteration of the failing index + fba.reset(); + var anything_changed: bool = undefined; + const result_source = try testParse(source, allocator, &anything_changed); + try std.testing.expectEqualStrings(expected_source, result_source); + const changes_expected = source.ptr != expected_source.ptr; + if (anything_changed != changes_expected) { + print("std.zig.render returned {} instead of {}\n", .{ anything_changed, changes_expected }); + return error.TestFailed; + } + try std.testing.expect(anything_changed == changes_expected); + allocator.free(result_source); +} +fn testTransform(source: [:0]const u8, expected_source: []const u8) !void { + var fixed_allocator = std.heap.FixedBufferAllocator.init(fixed_buffer_mem[0..]); + return std.testing.checkAllAllocationFailures(fixed_allocator.allocator(), testTransformImpl, .{ &fixed_allocator, source, expected_source }); +} +fn testCanonical(source: [:0]const u8) !void { + return testTransform(source, source); +} + +test "zig fmt: remove extra whitespace at start and end of file with comment between" { + if (true) return error.SkipZigTest; + + try testTransform( + \\ + \\ + \\// hello + \\ + \\ + , + \\// hello + \\ + ); } diff --git a/test_all.zig b/test_all.zig index 7be8d27..4486132 100644 --- a/test_all.zig +++ b/test_all.zig @@ -1,3 +1,4 @@ test "zig0 test suite" { _ = @import("tokenizer_test.zig"); + _ = @import("parser_test.zig"); } diff --git a/tokenizer.c b/tokenizer.c index d7a2278..dd64a56 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -461,9 +461,8 @@ state: const char* start = self->buffer + result.loc.start; uint32_t len = self->index - result.loc.start; TokenizerTag tag = getKeyword(start, len); - if (tag != TOKEN_INVALID) { + if (tag != TOKEN_INVALID) result.tag = tag; - } } break; @@ -865,7 +864,8 @@ state: .tag = TOKEN_EOF, .loc = { .start = self->index, - .end = self->index } + .end = self->index, + } }; } break; @@ -939,7 +939,8 @@ state: .tag = TOKEN_EOF, .loc = { .start = self->index, - .end = self->index } + .end = self->index, + } }; } break; diff --git a/tokenizer_test.zig b/tokenizer_test.zig index d685434..a2fd5a9 100644 --- a/tokenizer_test.zig +++ b/tokenizer_test.zig @@ -8,7 +8,7 @@ const c = @cImport({ @cInclude("tokenizer.h"); }); -fn zigToken(token: c_uint) Token.Tag { +pub fn zigToken(token: c_uint) Token.Tag { return switch (token) { c.TOKEN_INVALID => .invalid, c.TOKEN_INVALID_PERIODASTERISKS => .invalid_periodasterisks,