diff --git a/README.md b/README.md new file mode 100644 index 0000000..42ece43 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +zig1 aspires to be a zig 0.13.0-2448-gd48611ba67c7 interpreter. diff --git a/ast.c b/ast.c new file mode 100644 index 0000000..f94ddb7 --- /dev/null +++ b/ast.c @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include + +#include "ast.h" + +int ast_parse(const char* source, uint32_t len, ast *result) { + uint32_t estimated_token_count = len / 8; + + tokenizer_tag* tokens_tag = calloc(estimated_token_count, sizeof(tokenizer_tag)); + if (tokens_tag == NULL) { + perror("calloc"); + return 1; + } + ast_index* tokens_start = calloc(estimated_token_count, sizeof(ast_index)); + if (tokens_start == NULL) { + free(tokens_tag); + perror("calloc"); + return 1; + } + + tokenizer tokenizer = tokenizer_init(source, len); + for (uint32_t i = 0; i <= estimated_token_count; i++) { + if (i == estimated_token_count) { + fprintf(stderr, "too many tokens, bump estimated_token_count\n"); + return 1; + } + tokenizer_token token = tokenizer_next(&tokenizer); + tokens_tag[i] = token.tag; + tokens_start[i] = token.loc.start; + } + + /* TODO parser */ + + return 0; +} diff --git a/ast.h b/ast.h new file mode 100644 index 0000000..443e5f8 --- /dev/null +++ b/ast.h @@ -0,0 +1,511 @@ +#ifndef _ZIG1_AST_H__ +#define _ZIG1_AST_H__ + +#include +#include + +#include "tokenizer.h" + +typedef enum { + /// sub_list[lhs...rhs] + AST_NODE_TAG_ROOT, + /// `usingnamespace lhs;`. rhs unused. main_token is `usingnamespace`. + AST_NODE_TAG_USINGNAMESPACE, + /// lhs is test name token (must be string literal or identifier), if any. + /// rhs is the body node. + AST_NODE_TAG_TEST_DECL, + /// lhs is the index into extra_data. + /// rhs is the initialization expression, if any. + /// main_token is `var` or `const`. + AST_NODE_TAG_GLOBAL_VAR_DECL, + /// `var a: x align(y) = rhs` + /// lhs is the index into extra_data. + /// main_token is `var` or `const`. + AST_NODE_TAG_LOCAL_VAR_DECL, + /// `var a: lhs = rhs`. lhs and rhs may be unused. + /// Can be local or global. + /// main_token is `var` or `const`. + AST_NODE_TAG_SIMPLE_VAR_DECL, + /// `var a align(lhs) = rhs`. lhs and rhs may be unused. + /// Can be local or global. + /// main_token is `var` or `const`. + AST_NODE_TAG_ALIGNED_VAR_DECL, + /// lhs is the identifier token payload if any, + /// rhs is the deferred expression. + AST_NODE_TAG_AST_NODE_TAG_ERRDEFER, + /// lhs is unused. + /// rhs is the deferred expression. + AST_NODE_TAG_AST_NODE_TAG_DEFER, + /// lhs catch rhs + /// lhs catch |err| rhs + /// main_token is the `catch` keyword. + /// payload is determined by looking at the next token after the `catch` keyword. + AST_NODE_TAG_AST_NODE_TAG_CATCH, + /// `lhs.a`. main_token is the dot. rhs is the identifier token index. + AST_NODE_TAG_FIELD_ACCESS, + /// `lhs.?`. main_token is the dot. rhs is the `?` token index. + AST_NODE_TAG_UNWRAP_OPTIONAL, + /// `lhs == rhs`. main_token is op. + AST_NODE_TAG_EQUAL_EQUAL, + /// `lhs != rhs`. main_token is op. + AST_NODE_TAG_BANG_EQUAL, + /// `lhs < rhs`. main_token is op. + AST_NODE_TAG_LESS_THAN, + /// `lhs > rhs`. main_token is op. + AST_NODE_TAG_GREATER_THAN, + /// `lhs <= rhs`. main_token is op. + AST_NODE_TAG_LESS_OR_EQUAL, + /// `lhs >= rhs`. main_token is op. + AST_NODE_TAG_GREATER_OR_EQUAL, + /// `lhs *= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_MUL, + /// `lhs /= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_DIV, + /// `lhs %= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_MOD, + /// `lhs += rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_ADD, + /// `lhs -= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_SUB, + /// `lhs <<= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_SHL, + /// `lhs <<|= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_SHL_SAT, + /// `lhs >>= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_SHR, + /// `lhs &= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_BIT_AND, + /// `lhs ^= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_BIT_XOR, + /// `lhs |= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_BIT_OR, + /// `lhs *%= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_MUL_WRAP, + /// `lhs +%= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_ADD_WRAP, + /// `lhs -%= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_SUB_WRAP, + /// `lhs *|= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_MUL_SAT, + /// `lhs +|= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_ADD_SAT, + /// `lhs -|= rhs`. main_token is op. + AST_NODE_TAG_ASSIGN_SUB_SAT, + /// `lhs = rhs`. main_token is op. + AST_NODE_TAG_ASSIGN, + /// `a, b, ... = rhs`. main_token is op. lhs is index into `extra_data` + /// of an lhs elem count followed by an array of that many `Node.Index`, + /// with each node having one of the following types: + /// * `global_var_decl` + /// * `local_var_decl` + /// * `simple_var_decl` + /// * `aligned_var_decl` + /// * Any expression node + /// The first 3 types correspond to a `var` or `const` lhs node (note + /// that their `rhs` is always 0). An expression node corresponds to a + /// standard assignment LHS (which must be evaluated as an lvalue). + /// There may be a preceding `comptime` token, which does not create a + /// corresponding `comptime` node so must be manually detected. + AST_NODE_TAG_ASSIGN_DESTRUCTURE, + /// `lhs || rhs`. main_token is the `||`. + AST_NODE_TAG_MERGE_ERROR_SETS, + /// `lhs * rhs`. main_token is the `*`. + AST_NODE_TAG_MUL, + /// `lhs / rhs`. main_token is the `/`. + AST_NODE_TAG_DIV, + /// `lhs % rhs`. main_token is the `%`. + AST_NODE_TAG_MOD, + /// `lhs ** rhs`. main_token is the `**`. + AST_NODE_TAG_ARRAY_MULT, + /// `lhs *% rhs`. main_token is the `*%`. + AST_NODE_TAG_MUL_WRAP, + /// `lhs *| rhs`. main_token is the `*|`. + AST_NODE_TAG_MUL_SAT, + /// `lhs + rhs`. main_token is the `+`. + AST_NODE_TAG_ADD, + /// `lhs - rhs`. main_token is the `-`. + AST_NODE_TAG_SUB, + /// `lhs ++ rhs`. main_token is the `++`. + AST_NODE_TAG_ARRAY_CAT, + /// `lhs +% rhs`. main_token is the `+%`. + AST_NODE_TAG_ADD_WRAP, + /// `lhs -% rhs`. main_token is the `-%`. + AST_NODE_TAG_SUB_WRAP, + /// `lhs +| rhs`. main_token is the `+|`. + AST_NODE_TAG_ADD_SAT, + /// `lhs -| rhs`. main_token is the `-|`. + AST_NODE_TAG_SUB_SAT, + /// `lhs << rhs`. main_token is the `<<`. + AST_NODE_TAG_SHL, + /// `lhs <<| rhs`. main_token is the `<<|`. + AST_NODE_TAG_SHL_SAT, + /// `lhs >> rhs`. main_token is the `>>`. + AST_NODE_TAG_SHR, + /// `lhs & rhs`. main_token is the `&`. + AST_NODE_TAG_BIT_AND, + /// `lhs ^ rhs`. main_token is the `^`. + AST_NODE_TAG_BIT_XOR, + /// `lhs | rhs`. main_token is the `|`. + AST_NODE_TAG_BIT_OR, + /// `lhs orelse rhs`. main_token is the `orelse`. + AST_NODE_TAG_AST_NODE_TAG_ORELSE, + /// `lhs and rhs`. main_token is the `and`. + AST_NODE_TAG_BOOL_AND, + /// `lhs or rhs`. main_token is the `or`. + AST_NODE_TAG_BOOL_OR, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TAG_BOOL_NOT, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TAG_NEGATION, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TAG_BIT_NOT, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TAG_NEGATION_WRAP, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TAG_ADDRESS_OF, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TAG_AST_NODE_TAG_TRY, + /// `op lhs`. rhs unused. main_token is op. + AST_NODE_TAG_AST_NODE_TAG_AWAIT, + /// `?lhs`. rhs unused. main_token is the `?`. + AST_NODE_TAG_OPTIONAL_TYPE, + /// `[lhs]rhs`. + AST_NODE_TAG_ARRAY_TYPE, + /// `[lhs:a]b`. `ArrayTypeSentinel[rhs]`. + AST_NODE_TAG_ARRAY_TYPE_SENTINEL, + /// `[*]align(lhs) rhs`. lhs can be omitted. + /// `*align(lhs) rhs`. lhs can be omitted. + /// `[]rhs`. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_TAG_PTR_TYPE_ALIGNED, + /// `[*:lhs]rhs`. lhs can be omitted. + /// `*rhs`. + /// `[:lhs]rhs`. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_TAG_PTR_TYPE_SENTINEL, + /// lhs is index into ptr_type. rhs is the element type expression. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_TAG_PTR_TYPE, + /// lhs is index into ptr_type_bit_range. rhs is the element type expression. + /// main_token is the asterisk if a single item pointer or the lbracket + /// if a slice, many-item pointer, or C-pointer + /// main_token might be a ** token, which is shared with a parent/child + /// pointer type and may require special handling. + AST_NODE_TAG_PTR_TYPE_BIT_RANGE, + /// `lhs[rhs..]` + /// main_token is the lbracket. + AST_NODE_TAG_SLICE_OPEN, + /// `lhs[b..c]`. rhs is index into Slice + /// main_token is the lbracket. + AST_NODE_TAG_SLICE, + /// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be omitted. + /// main_token is the lbracket. + AST_NODE_TAG_SLICE_SENTINEL, + /// `lhs.*`. rhs is unused. + AST_NODE_TAG_DEREF, + /// `lhs[rhs]`. + AST_NODE_TAG_ARRAY_ACCESS, + /// `lhs{rhs}`. rhs can be omitted. + AST_NODE_TAG_ARRAY_INIT_ONE, + /// `lhs{rhs,}`. rhs can *not* be omitted + AST_NODE_TAG_ARRAY_INIT_ONE_COMMA, + /// `.{lhs, rhs}`. lhs and rhs can be omitted. + AST_NODE_TAG_ARRAY_INIT_DOT_TWO, + /// Same as `array_init_dot_two` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA, + /// `.{a, b}`. `sub_list[lhs..rhs]`. + AST_NODE_TAG_ARRAY_INIT_DOT, + /// Same as `array_init_dot` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_TAG_ARRAY_INIT_DOT_COMMA, + /// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means `.{a, b}`. + AST_NODE_TAG_ARRAY_INIT, + /// Same as `array_init` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_TAG_ARRAY_INIT_COMMA, + /// `lhs{.a = rhs}`. rhs can be omitted making it empty. + /// main_token is the lbrace. + AST_NODE_TAG_STRUCT_INIT_ONE, + /// `lhs{.a = rhs,}`. rhs can *not* be omitted. + /// main_token is the lbrace. + AST_NODE_TAG_STRUCT_INIT_ONE_COMMA, + /// `.{.a = lhs, .b = rhs}`. lhs and rhs can be omitted. + /// main_token is the lbrace. + /// No trailing comma before the rbrace. + AST_NODE_TAG_STRUCT_INIT_DOT_TWO, + /// Same as `struct_init_dot_two` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA, + /// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`. + /// main_token is the lbrace. + AST_NODE_TAG_STRUCT_INIT_DOT, + /// Same as `struct_init_dot` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_TAG_STRUCT_INIT_DOT_COMMA, + /// `lhs{.a = b, .c = d}`. `sub_range_list[rhs]`. + /// lhs can be omitted which means `.{.a = b, .c = d}`. + /// main_token is the lbrace. + AST_NODE_TAG_STRUCT_INIT, + /// Same as `struct_init` except there is known to be a trailing comma + /// before the final rbrace. + AST_NODE_TAG_STRUCT_INIT_COMMA, + /// `lhs(rhs)`. rhs can be omitted. + /// main_token is the lparen. + AST_NODE_TAG_CALL_ONE, + /// `lhs(rhs,)`. rhs can be omitted. + /// main_token is the lparen. + AST_NODE_TAG_CALL_ONE_COMMA, + /// `async lhs(rhs)`. rhs can be omitted. + AST_NODE_TAG_ASYNC_CALL_ONE, + /// `async lhs(rhs,)`. + AST_NODE_TAG_ASYNC_CALL_ONE_COMMA, + /// `lhs(a, b, c)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_TAG_CALL, + /// `lhs(a, b, c,)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_TAG_CALL_COMMA, + /// `async lhs(a, b, c)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_TAG_ASYNC_CALL, + /// `async lhs(a, b, c,)`. `SubRange[rhs]`. + /// main_token is the `(`. + AST_NODE_TAG_ASYNC_CALL_COMMA, + /// `switch(lhs) {}`. `SubRange[rhs]`. + /// `main_token` is the identifier of a preceding label, if any; otherwise `switch`. + AST_NODE_TAG_AST_NODE_TAG_SWITCH, + /// Same as switch except there is known to be a trailing comma + /// before the final rbrace + AST_NODE_TAG_SWITCH_COMMA, + /// `lhs => rhs`. If lhs is omitted it means `else`. + /// main_token is the `=>` + AST_NODE_TAG_SWITCH_CASE_ONE, + /// Same ast `switch_case_one` but the case is inline + AST_NODE_TAG_SWITCH_CASE_INLINE_ONE, + /// `a, b, c => rhs`. `SubRange[lhs]`. + /// main_token is the `=>` + AST_NODE_TAG_SWITCH_CASE, + /// Same ast `switch_case` but the case is inline + AST_NODE_TAG_SWITCH_CASE_INLINE, + /// `lhs...rhs`. + AST_NODE_TAG_SWITCH_RANGE, + /// `while (lhs) rhs`. + /// `while (lhs) |x| rhs`. + AST_NODE_TAG_WHILE_SIMPLE, + /// `while (lhs) : (a) b`. `WhileCont[rhs]`. + /// `while (lhs) : (a) b`. `WhileCont[rhs]`. + AST_NODE_TAG_WHILE_CONT, + /// `while (lhs) : (a) b else c`. `While[rhs]`. + /// `while (lhs) |x| : (a) b else c`. `While[rhs]`. + /// `while (lhs) |x| : (a) b else |y| c`. `While[rhs]`. + /// The cont expression part `: (a)` may be omitted. + AST_NODE_TAG_AST_NODE_TAG_WHILE, + /// `for (lhs) rhs`. + AST_NODE_TAG_FOR_SIMPLE, + /// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`. `For[rhs]`. + AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_FOR, + /// `lhs..rhs`. rhs can be omitted. + AST_NODE_TAG_AST_NODE_TAG_FOR_RANGE, + /// `if (lhs) rhs`. + /// `if (lhs) |a| rhs`. + AST_NODE_TAG_IF_SIMPLE, + /// `if (lhs) a else b`. `If[rhs]`. + /// `if (lhs) |x| a else b`. `If[rhs]`. + /// `if (lhs) |x| a else |y| b`. `If[rhs]`. + AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_IF, + /// `suspend lhs`. lhs can be omitted. rhs is unused. + AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_SUSPEND, + /// `resume lhs`. rhs is unused. + AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_RESUME, + /// `continue :lhs rhs` + /// both lhs and rhs may be omitted. + AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_CONTINUE, + /// `break :lhs rhs` + /// both lhs and rhs may be omitted. + AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_BREAK, + /// `return lhs`. lhs can be omitted. rhs is unused. + AST_NODE_TAG_AST_NODE_TAG_RETURN, + /// `fn (a: lhs) rhs`. lhs can be omitted. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_TAG_FN_PROTO_SIMPLE, + /// `fn (a: b, c: d) rhs`. `sub_range_list[lhs]`. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_TAG_FN_PROTO_MULTI, + /// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`. `FnProtoOne[lhs]`. + /// zero or one parameters. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_TAG_FN_PROTO_ONE, + /// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`. `FnProto[lhs]`. + /// anytype and ... parameters are omitted from the AST tree. + /// main_token is the `fn` keyword. + /// extern function declarations use this tag. + AST_NODE_TAG_FN_PROTO, + /// lhs is the fn_proto. + /// rhs is the function body block. + /// Note that extern function declarations use the fn_proto tags rather + /// than this one. + AST_NODE_TAG_FN_DECL, + /// `anyframe->rhs`. main_token is `anyframe`. `lhs` is arrow token index. + AST_NODE_TAG_ANYFRAME_TYPE, + /// Both lhs and rhs unused. + AST_NODE_TAG_ANYFRAME_LITERAL, + /// Both lhs and rhs unused. + AST_NODE_TAG_CHAR_LITERAL, + /// Both lhs and rhs unused. + AST_NODE_TAG_NUMBER_LITERAL, + /// Both lhs and rhs unused. + AST_NODE_TAG_UNREACHABLE_LITERAL, + /// Both lhs and rhs unused. + /// Most identifiers will not have explicit AST nodes, however for expressions + /// which could be one of many different kinds of AST nodes, there will be an + /// identifier AST node for it. + AST_NODE_TAG_IDENTIFIER, + /// lhs is the dot token index, rhs unused, main_token is the identifier. + AST_NODE_TAG_ENUM_LITERAL, + /// main_token is the string literal token + /// Both lhs and rhs unused. + AST_NODE_TAG_STRING_LITERAL, + /// main_token is the first token index (redundant with lhs) + /// lhs is the first token index; rhs is the last token index. + /// Could be a series of multiline_string_literal_line tokens, or a single + /// string_literal token. + AST_NODE_TAG_MULTILINE_STRING_LITERAL, + /// `(lhs)`. main_token is the `(`; rhs is the token index of the `)`. + AST_NODE_TAG_GROUPED_EXPRESSION, + /// `@a(lhs, rhs)`. lhs and rhs may be omitted. + /// main_token is the builtin token. + AST_NODE_TAG_BUILTIN_CALL_TWO, + /// Same as builtin_call_two but there is known to be a trailing comma before the rparen. + AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA, + /// `@a(b, c)`. `sub_list[lhs..rhs]`. + /// main_token is the builtin token. + AST_NODE_TAG_BUILTIN_CALL, + /// Same as builtin_call but there is known to be a trailing comma before the rparen. + AST_NODE_TAG_BUILTIN_CALL_COMMA, + /// `error{a, b}`. + /// rhs is the rbrace, lhs is unused. + AST_NODE_TAG_ERROR_SET_DECL, + /// `struct {}`, `union {}`, `opaque {}`, `enum {}`. `extra_data[lhs..rhs]`. + /// main_token is `struct`, `union`, `opaque`, `enum` keyword. + AST_NODE_TAG_CONTAINER_DECL, + /// Same as ContainerDecl but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAG_CONTAINER_DECL_TRAILING, + /// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum {lhs, rhs}`. + /// lhs or rhs can be omitted. + /// main_token is `struct`, `union`, `opaque`, `enum` keyword. + AST_NODE_TAG_CONTAINER_DECL_TWO, + /// Same as ContainerDeclTwo except there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING, + /// `struct(lhs)` / `union(lhs)` / `enum(lhs)`. `SubRange[rhs]`. + AST_NODE_TAG_CONTAINER_DECL_ARG, + /// Same as container_decl_arg but there is known to be a trailing + /// comma or semicolon before the rbrace. + AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING, + /// `union(enum) {}`. `sub_list[lhs..rhs]`. + /// Note that tagged unions with explicitly provided enums are represented + /// by `container_decl_arg`. + AST_NODE_TAG_TAGGED_UNION, + /// Same as tagged_union but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAG_TAGGED_UNION_TRAILING, + /// `union(enum) {lhs, rhs}`. lhs or rhs may be omitted. + /// Note that tagged unions with explicitly provided enums are represented + /// by `container_decl_arg`. + AST_NODE_TAG_TAGGED_UNION_TWO, + /// Same as tagged_union_two but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING, + /// `union(enum(lhs)) {}`. `SubRange[rhs]`. + AST_NODE_TAG_TAGGED_UNION_ENUM_TAG, + /// Same as tagged_union_enum_tag but there is known to be a trailing comma + /// or semicolon before the rbrace. + AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING, + /// `a: lhs = rhs,`. lhs and rhs can be omitted. + /// main_token is the field name identifier. + /// lastToken() does not include the possible trailing comma. + AST_NODE_TAG_CONTAINER_FIELD_INIT, + /// `a: lhs align(rhs),`. rhs can be omitted. + /// main_token is the field name identifier. + /// lastToken() does not include the possible trailing comma. + AST_NODE_TAG_CONTAINER_FIELD_ALIGN, + /// `a: lhs align(c) = d,`. `container_field_list[rhs]`. + /// main_token is the field name identifier. + /// lastToken() does not include the possible trailing comma. + AST_NODE_TAG_CONTAINER_FIELD, + /// `comptime lhs`. rhs unused. + AST_NODE_TAG_COMPTIME, + /// `nosuspend lhs`. rhs unused. + AST_NODE_TAG_NOSUSPEND, + /// `{lhs rhs}`. rhs or lhs can be omitted. + /// main_token points at the lbrace. + AST_NODE_TAG_BLOCK_TWO, + /// Same as block_two but there is known to be a semicolon before the rbrace. + AST_NODE_TAG_BLOCK_TWO_SEMICOLON, + /// `{}`. `sub_list[lhs..rhs]`. + /// main_token points at the lbrace. + AST_NODE_TAG_BLOCK, + /// Same as block but there is known to be a semicolon before the rbrace. + AST_NODE_TAG_BLOCK_SEMICOLON, + /// `asm(lhs)`. rhs is the token index of the rparen. + AST_NODE_TAG_ASM_SIMPLE, + /// `asm(lhs, a)`. `Asm[rhs]`. + AST_NODE_TAG_ASM, + /// `[a] "b" (c)`. lhs is 0, rhs is token index of the rparen. + /// `[a] "b" (-> lhs)`. rhs is token index of the rparen. + /// main_token is `a`. + AST_NODE_TAG_ASM_OUTPUT, + /// `[a] "b" (lhs)`. rhs is token index of the rparen. + /// main_token is `a`. + AST_NODE_TAG_ASM_INPUT, + /// `error.a`. lhs is token index of `.`. rhs is token index of `a`. + AST_NODE_TAG_ERROR_VALUE, + /// `lhs!rhs`. main_token is the `!`. + AST_NODE_TAG_ERROR_UNION, +} ast_node_tag; + +typedef uint32_t ast_token_index; +typedef uint32_t ast_node_index; +typedef uint32_t ast_index; + +typedef struct { + ast_node_tag tag; + ast_token_index main_token; + struct { + ast_index lhs, rhs; + } data; +} ast_node; + +typedef struct { + const char* source; + uint32_t source_len; + + tokenizer_tag* tokens_tag; + ast_index* tokens_start; + uint32_t tokens_len; + + ast_node* nodes; + uint32_t nodes_len; + ast_node_index* extra_data; +} ast; + +int ast_parse(const char* source, uint32_t len, ast *result); + +#endif diff --git a/tokenizer.c b/tokenizer.c index 90af352..9e1bb41 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -1,5 +1,3 @@ -// tokenizer for zig d48611ba67c7871cb348f28a01b89d8771170dd8 - #include #include #include @@ -9,63 +7,63 @@ typedef struct { const char* keyword; - token_tag tag; + tokenizer_tag tag; } keyword_map; const keyword_map keywords[] = { - { "addrspace", TOKEN_TAG_KEYWORD_ADDRSPACE }, - { "align", TOKEN_TAG_KEYWORD_ALIGN }, - { "allowzero", TOKEN_TAG_KEYWORD_ALLOWZERO }, - { "and", TOKEN_TAG_KEYWORD_AND }, - { "anyframe", TOKEN_TAG_KEYWORD_ANYFRAME }, - { "anytype", TOKEN_TAG_KEYWORD_ANYTYPE }, - { "asm", TOKEN_TAG_KEYWORD_ASM }, - { "async", TOKEN_TAG_KEYWORD_ASYNC }, - { "await", TOKEN_TAG_KEYWORD_AWAIT }, - { "break", TOKEN_TAG_KEYWORD_BREAK }, - { "callconv", TOKEN_TAG_KEYWORD_CALLCONV }, - { "catch", TOKEN_TAG_KEYWORD_CATCH }, - { "comptime", TOKEN_TAG_KEYWORD_COMPTIME }, - { "const", TOKEN_TAG_KEYWORD_CONST }, - { "continue", TOKEN_TAG_KEYWORD_CONTINUE }, - { "defer", TOKEN_TAG_KEYWORD_DEFER }, - { "else", TOKEN_TAG_KEYWORD_ELSE }, - { "enum", TOKEN_TAG_KEYWORD_ENUM }, - { "errdefer", TOKEN_TAG_KEYWORD_ERRDEFER }, - { "error", TOKEN_TAG_KEYWORD_ERROR }, - { "export", TOKEN_TAG_KEYWORD_EXPORT }, - { "extern", TOKEN_TAG_KEYWORD_EXTERN }, - { "fn", TOKEN_TAG_KEYWORD_FN }, - { "for", TOKEN_TAG_KEYWORD_FOR }, - { "if", TOKEN_TAG_KEYWORD_IF }, - { "inline", TOKEN_TAG_KEYWORD_INLINE }, - { "linksection", TOKEN_TAG_KEYWORD_LINKSECTION }, - { "noalias", TOKEN_TAG_KEYWORD_NOALIAS }, - { "noinline", TOKEN_TAG_KEYWORD_NOINLINE }, - { "nosuspend", TOKEN_TAG_KEYWORD_NOSUSPEND }, - { "opaque", TOKEN_TAG_KEYWORD_OPAQUE }, - { "or", TOKEN_TAG_KEYWORD_OR }, - { "orelse", TOKEN_TAG_KEYWORD_ORELSE }, - { "packed", TOKEN_TAG_KEYWORD_PACKED }, - { "pub", TOKEN_TAG_KEYWORD_PUB }, - { "resume", TOKEN_TAG_KEYWORD_RESUME }, - { "return", TOKEN_TAG_KEYWORD_RETURN }, - { "struct", TOKEN_TAG_KEYWORD_STRUCT }, - { "suspend", TOKEN_TAG_KEYWORD_SUSPEND }, - { "switch", TOKEN_TAG_KEYWORD_SWITCH }, - { "test", TOKEN_TAG_KEYWORD_TEST }, - { "threadlocal", TOKEN_TAG_KEYWORD_THREADLOCAL }, - { "try", TOKEN_TAG_KEYWORD_TRY }, - { "union", TOKEN_TAG_KEYWORD_UNION }, - { "unreachable", TOKEN_TAG_KEYWORD_UNREACHABLE }, - { "usingnamespace", TOKEN_TAG_KEYWORD_USINGNAMESPACE }, - { "var", TOKEN_TAG_KEYWORD_VAR }, - { "volatile", TOKEN_TAG_KEYWORD_VOLATILE }, - { "while", TOKEN_TAG_KEYWORD_WHILE } + { "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE }, + { "align", TOKENIZER_TAG_KEYWORD_ALIGN }, + { "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO }, + { "and", TOKENIZER_TAG_KEYWORD_AND }, + { "anyframe", TOKENIZER_TAG_KEYWORD_ANYFRAME }, + { "anytype", TOKENIZER_TAG_KEYWORD_ANYTYPE }, + { "asm", TOKENIZER_TAG_KEYWORD_ASM }, + { "async", TOKENIZER_TAG_KEYWORD_ASYNC }, + { "await", TOKENIZER_TAG_KEYWORD_AWAIT }, + { "break", TOKENIZER_TAG_KEYWORD_BREAK }, + { "callconv", TOKENIZER_TAG_KEYWORD_CALLCONV }, + { "catch", TOKENIZER_TAG_KEYWORD_CATCH }, + { "comptime", TOKENIZER_TAG_KEYWORD_COMPTIME }, + { "const", TOKENIZER_TAG_KEYWORD_CONST }, + { "continue", TOKENIZER_TAG_KEYWORD_CONTINUE }, + { "defer", TOKENIZER_TAG_KEYWORD_DEFER }, + { "else", TOKENIZER_TAG_KEYWORD_ELSE }, + { "enum", TOKENIZER_TAG_KEYWORD_ENUM }, + { "errdefer", TOKENIZER_TAG_KEYWORD_ERRDEFER }, + { "error", TOKENIZER_TAG_KEYWORD_ERROR }, + { "export", TOKENIZER_TAG_KEYWORD_EXPORT }, + { "extern", TOKENIZER_TAG_KEYWORD_EXTERN }, + { "fn", TOKENIZER_TAG_KEYWORD_FN }, + { "for", TOKENIZER_TAG_KEYWORD_FOR }, + { "if", TOKENIZER_TAG_KEYWORD_IF }, + { "inline", TOKENIZER_TAG_KEYWORD_INLINE }, + { "linksection", TOKENIZER_TAG_KEYWORD_LINKSECTION }, + { "noalias", TOKENIZER_TAG_KEYWORD_NOALIAS }, + { "noinline", TOKENIZER_TAG_KEYWORD_NOINLINE }, + { "nosuspend", TOKENIZER_TAG_KEYWORD_NOSUSPEND }, + { "opaque", TOKENIZER_TAG_KEYWORD_OPAQUE }, + { "or", TOKENIZER_TAG_KEYWORD_OR }, + { "orelse", TOKENIZER_TAG_KEYWORD_ORELSE }, + { "packed", TOKENIZER_TAG_KEYWORD_PACKED }, + { "pub", TOKENIZER_TAG_KEYWORD_PUB }, + { "resume", TOKENIZER_TAG_KEYWORD_RESUME }, + { "return", TOKENIZER_TAG_KEYWORD_RETURN }, + { "struct", TOKENIZER_TAG_KEYWORD_STRUCT }, + { "suspend", TOKENIZER_TAG_KEYWORD_SUSPEND }, + { "switch", TOKENIZER_TAG_KEYWORD_SWITCH }, + { "test", TOKENIZER_TAG_KEYWORD_TEST }, + { "threadlocal", TOKENIZER_TAG_KEYWORD_THREADLOCAL }, + { "try", TOKENIZER_TAG_KEYWORD_TRY }, + { "union", TOKENIZER_TAG_KEYWORD_UNION }, + { "unreachable", TOKENIZER_TAG_KEYWORD_UNREACHABLE }, + { "usingnamespace", TOKENIZER_TAG_KEYWORD_USINGNAMESPACE }, + { "var", TOKENIZER_TAG_KEYWORD_VAR }, + { "volatile", TOKENIZER_TAG_KEYWORD_VOLATILE }, + { "while", TOKENIZER_TAG_KEYWORD_WHILE } }; // TODO binary search -static token_tag get_keyword(const char* bytes, uint32_t len) +static tokenizer_tag get_keyword(const char* bytes, uint32_t len) { for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) { size_t klen = strlen(keywords[i].keyword); @@ -75,14 +73,14 @@ static token_tag get_keyword(const char* bytes, uint32_t len) if (len == klen) { return keywords[i].tag; } else { - return TOKEN_TAG_INVALID; + return TOKENIZER_TAG_INVALID; } } else if (cmp < 0) { - return TOKEN_TAG_INVALID; + return TOKENIZER_TAG_INVALID; } continue; } - return TOKEN_TAG_INVALID; + return TOKENIZER_TAG_INVALID; } tokenizer tokenizer_init(const char* buffer, uint32_t len) @@ -94,32 +92,32 @@ tokenizer tokenizer_init(const char* buffer, uint32_t len) }; } -token tokenizer_next(tokenizer* self) +tokenizer_token tokenizer_next(tokenizer* self) { - token result = (token) { - .tag = TOKEN_TAG_INVALID, + tokenizer_token result = (tokenizer_token) { + .tag = TOKENIZER_TAG_INVALID, .loc = { .start = 0, }, }; - token_state state = TOKEN_STATE_START; + tokenizer_state state = TOKENIZER_STATE_START; state: switch (state) { - case TOKEN_STATE_START: + case TOKENIZER_STATE_START: switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { - return (token) { - .tag = TOKEN_TAG_EOF, + return (tokenizer_token) { + .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, .end = self->index, } }; } else { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } case ' ': @@ -130,388 +128,388 @@ state: result.loc.start = self->index; goto state; case '"': - result.tag = TOKEN_TAG_STRING_LITERAL; - state = TOKEN_STATE_STRING_LITERAL; + result.tag = TOKENIZER_TAG_STRING_LITERAL; + state = TOKENIZER_STATE_STRING_LITERAL; goto state; case '\'': - result.tag = TOKEN_TAG_CHAR_LITERAL; - state = TOKEN_STATE_CHAR_LITERAL; + result.tag = TOKENIZER_TAG_CHAR_LITERAL; + state = TOKENIZER_STATE_CHAR_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': - result.tag = TOKEN_TAG_IDENTIFIER; - state = TOKEN_STATE_IDENTIFIER; + result.tag = TOKENIZER_TAG_IDENTIFIER; + state = TOKENIZER_STATE_IDENTIFIER; goto state; case '@': - state = TOKEN_STATE_SAW_AT_SIGN; + state = TOKENIZER_STATE_SAW_AT_SIGN; goto state; case '=': - state = TOKEN_STATE_EQUAL; + state = TOKENIZER_STATE_EQUAL; goto state; case '!': - state = TOKEN_STATE_BANG; + state = TOKENIZER_STATE_BANG; goto state; case '|': - state = TOKEN_STATE_PIPE; + state = TOKENIZER_STATE_PIPE; goto state; case '(': - result.tag = TOKEN_TAG_L_PAREN; + result.tag = TOKENIZER_TAG_L_PAREN; self->index++; break; case ')': - result.tag = TOKEN_TAG_R_PAREN; + result.tag = TOKENIZER_TAG_R_PAREN; self->index++; break; case '[': - result.tag = TOKEN_TAG_L_BRACKET; + result.tag = TOKENIZER_TAG_L_BRACKET; self->index++; break; case ']': - result.tag = TOKEN_TAG_R_BRACKET; + result.tag = TOKENIZER_TAG_R_BRACKET; self->index++; break; case ';': - result.tag = TOKEN_TAG_SEMICOLON; + result.tag = TOKENIZER_TAG_SEMICOLON; self->index++; break; case ',': - result.tag = TOKEN_TAG_COMMA; + result.tag = TOKENIZER_TAG_COMMA; self->index++; break; case '?': - result.tag = TOKEN_TAG_QUESTION_MARK; + result.tag = TOKENIZER_TAG_QUESTION_MARK; self->index++; break; case ':': - result.tag = TOKEN_TAG_COLON; + result.tag = TOKENIZER_TAG_COLON; self->index++; break; case '%': - state = TOKEN_STATE_PERCENT; + state = TOKENIZER_STATE_PERCENT; goto state; case '*': - state = TOKEN_STATE_ASTERISK; + state = TOKENIZER_STATE_ASTERISK; goto state; case '+': - state = TOKEN_STATE_PLUS; + state = TOKENIZER_STATE_PLUS; goto state; case '<': - state = TOKEN_STATE_ANGLE_BRACKET_LEFT; + state = TOKENIZER_STATE_ANGLE_BRACKET_LEFT; goto state; case '>': - state = TOKEN_STATE_ANGLE_BRACKET_RIGHT; + state = TOKENIZER_STATE_ANGLE_BRACKET_RIGHT; goto state; case '^': - state = TOKEN_STATE_CARET; + state = TOKENIZER_STATE_CARET; goto state; case '\\': - result.tag = TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE; - state = TOKEN_STATE_BACKSLASH; + result.tag = TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE; + state = TOKENIZER_STATE_BACKSLASH; goto state; case '{': - result.tag = TOKEN_TAG_L_BRACE; + result.tag = TOKENIZER_TAG_L_BRACE; self->index++; break; case '}': - result.tag = TOKEN_TAG_R_BRACE; + result.tag = TOKENIZER_TAG_R_BRACE; self->index++; break; case '~': - result.tag = TOKEN_TAG_TILDE; + result.tag = TOKENIZER_TAG_TILDE; self->index++; break; case '.': - state = TOKEN_STATE_PERIOD; + state = TOKENIZER_STATE_PERIOD; goto state; case '-': - state = TOKEN_STATE_MINUS; + state = TOKENIZER_STATE_MINUS; goto state; case '/': - state = TOKEN_STATE_SLASH; + state = TOKENIZER_STATE_SLASH; goto state; case '&': - state = TOKEN_STATE_AMPERSAND; + state = TOKENIZER_STATE_AMPERSAND; goto state; case '0' ... '9': - result.tag = TOKEN_TAG_NUMBER_LITERAL; + result.tag = TOKENIZER_TAG_NUMBER_LITERAL; self->index++; - state = TOKEN_STATE_INT; + state = TOKENIZER_STATE_INT; goto state; default: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; }; break; - case TOKEN_STATE_EXPECT_NEWLINE: + case TOKENIZER_STATE_EXPECT_NEWLINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; } else { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': self->index++; result.loc.start = self->index; - state = TOKEN_STATE_START; + state = TOKENIZER_STATE_START; goto state; default: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; - case TOKEN_STATE_INVALID: + case TOKENIZER_STATE_INVALID: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; } else { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; case '\n': - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; default: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; - case TOKEN_STATE_SAW_AT_SIGN: + case TOKENIZER_STATE_SAW_AT_SIGN: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; case '"': - result.tag = TOKEN_TAG_IDENTIFIER; - state = TOKEN_STATE_STRING_LITERAL; + result.tag = TOKENIZER_TAG_IDENTIFIER; + state = TOKENIZER_STATE_STRING_LITERAL; goto state; case 'a' ... 'z': case 'A' ... 'Z': case '_': - result.tag = TOKEN_TAG_BUILTIN; - state = TOKEN_STATE_BUILTIN; + result.tag = TOKENIZER_TAG_BUILTIN; + state = TOKENIZER_STATE_BUILTIN; goto state; default: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; - case TOKEN_STATE_AMPERSAND: + case TOKENIZER_STATE_AMPERSAND: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_AMPERSAND_EQUAL; + result.tag = TOKENIZER_TAG_AMPERSAND_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_AMPERSAND; + result.tag = TOKENIZER_TAG_AMPERSAND; break; } break; - case TOKEN_STATE_ASTERISK: + case TOKENIZER_STATE_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_ASTERISK_EQUAL; + result.tag = TOKENIZER_TAG_ASTERISK_EQUAL; self->index++; break; case '*': - result.tag = TOKEN_TAG_ASTERISK_ASTERISK; + result.tag = TOKENIZER_TAG_ASTERISK_ASTERISK; self->index++; break; case '%': - state = TOKEN_STATE_ASTERISK_PERCENT; + state = TOKENIZER_STATE_ASTERISK_PERCENT; goto state; case '|': - state = TOKEN_STATE_ASTERISK_PIPE; + state = TOKENIZER_STATE_ASTERISK_PIPE; goto state; default: - result.tag = TOKEN_TAG_ASTERISK; + result.tag = TOKENIZER_TAG_ASTERISK; break; } break; - case TOKEN_STATE_ASTERISK_PERCENT: + case TOKENIZER_STATE_ASTERISK_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_ASTERISK_PERCENT_EQUAL; + result.tag = TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_ASTERISK_PERCENT; + result.tag = TOKENIZER_TAG_ASTERISK_PERCENT; break; } break; - case TOKEN_STATE_ASTERISK_PIPE: + case TOKENIZER_STATE_ASTERISK_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_ASTERISK_PIPE_EQUAL; + result.tag = TOKENIZER_TAG_ASTERISK_PIPE_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_ASTERISK_PIPE; + result.tag = TOKENIZER_TAG_ASTERISK_PIPE; break; } break; - case TOKEN_STATE_PERCENT: + case TOKENIZER_STATE_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_PERCENT_EQUAL; + result.tag = TOKENIZER_TAG_PERCENT_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_PERCENT; + result.tag = TOKENIZER_TAG_PERCENT; break; } break; - case TOKEN_STATE_PLUS: + case TOKENIZER_STATE_PLUS: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_PLUS_EQUAL; + result.tag = TOKENIZER_TAG_PLUS_EQUAL; self->index++; break; case '+': - result.tag = TOKEN_TAG_PLUS_PLUS; + result.tag = TOKENIZER_TAG_PLUS_PLUS; self->index++; break; case '%': - state = TOKEN_STATE_PLUS_PERCENT; + state = TOKENIZER_STATE_PLUS_PERCENT; goto state; case '|': - state = TOKEN_STATE_PLUS_PIPE; + state = TOKENIZER_STATE_PLUS_PIPE; goto state; default: - result.tag = TOKEN_TAG_PLUS; + result.tag = TOKENIZER_TAG_PLUS; break; } break; - case TOKEN_STATE_PLUS_PERCENT: + case TOKENIZER_STATE_PLUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_PLUS_PERCENT_EQUAL; + result.tag = TOKENIZER_TAG_PLUS_PERCENT_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_PLUS_PERCENT; + result.tag = TOKENIZER_TAG_PLUS_PERCENT; break; } break; - case TOKEN_STATE_PLUS_PIPE: + case TOKENIZER_STATE_PLUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_PLUS_PIPE_EQUAL; + result.tag = TOKENIZER_TAG_PLUS_PIPE_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_PLUS_PIPE; + result.tag = TOKENIZER_TAG_PLUS_PIPE; break; } break; - case TOKEN_STATE_CARET: + case TOKENIZER_STATE_CARET: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_CARET_EQUAL; + result.tag = TOKENIZER_TAG_CARET_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_CARET; + result.tag = TOKENIZER_TAG_CARET; break; } break; - case TOKEN_STATE_IDENTIFIER: + case TOKENIZER_STATE_IDENTIFIER: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': - state = TOKEN_STATE_IDENTIFIER; + state = TOKENIZER_STATE_IDENTIFIER; goto state; default:; // Once we're at C23, this semicolon can be removed. const char* start = self->buffer + result.loc.start; uint32_t len = self->index - result.loc.start; - token_tag tag = get_keyword(start, len); - if (tag != TOKEN_TAG_INVALID) { + tokenizer_tag tag = get_keyword(start, len); + if (tag != TOKENIZER_TAG_INVALID) { result.tag = tag; } } break; - case TOKEN_STATE_BUILTIN: + case TOKENIZER_STATE_BUILTIN: self->index++; switch (self->buffer[self->index]) { case 'a' ... 'z': case 'A' ... 'Z': case '_': case '0' ... '9': - state = TOKEN_STATE_BUILTIN; + state = TOKENIZER_STATE_BUILTIN; goto state; break; } break; - case TOKEN_STATE_BACKSLASH: + case TOKENIZER_STATE_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; case '\\': - state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE; + state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; case '\n': - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; default: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; - case TOKEN_STATE_STRING_LITERAL: + case TOKENIZER_STATE_STRING_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } else { - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; } break; case '\n': - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; case '\\': - state = TOKEN_STATE_STRING_LITERAL_BACKSLASH; + state = TOKENIZER_STATE_STRING_LITERAL_BACKSLASH; goto state; case '"': self->index++; @@ -519,43 +517,43 @@ state: case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - state = TOKEN_STATE_STRING_LITERAL; + state = TOKENIZER_STATE_STRING_LITERAL; goto state; } break; - case TOKEN_STATE_STRING_LITERAL_BACKSLASH: + case TOKENIZER_STATE_STRING_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; default: - state = TOKEN_STATE_STRING_LITERAL; + state = TOKENIZER_STATE_STRING_LITERAL; goto state; } break; - case TOKEN_STATE_CHAR_LITERAL: + case TOKENIZER_STATE_CHAR_LITERAL: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } else { - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; } break; case '\n': - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; case '\\': - state = TOKEN_STATE_CHAR_LITERAL_BACKSLASH; + state = TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH; goto state; case '\'': self->index++; @@ -563,45 +561,45 @@ state: case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - state = TOKEN_STATE_CHAR_LITERAL; + state = TOKENIZER_STATE_CHAR_LITERAL; goto state; } break; - case TOKEN_STATE_CHAR_LITERAL_BACKSLASH: + case TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } else { - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; } break; case '\n': - result.tag = TOKEN_TAG_INVALID; + result.tag = TOKENIZER_TAG_INVALID; break; case 0x01 ... 0x09: case 0x0b ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - state = TOKEN_STATE_CHAR_LITERAL; + state = TOKENIZER_STATE_CHAR_LITERAL; goto state; } break; - case TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE: + case TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; @@ -609,7 +607,7 @@ state: break; case '\r': if (self->buffer[self->index + 1] != '\n') { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; @@ -617,250 +615,250 @@ state: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE; + state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE; goto state; } break; - case TOKEN_STATE_BANG: + case TOKENIZER_STATE_BANG: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_BANG_EQUAL; + result.tag = TOKENIZER_TAG_BANG_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_BANG; + result.tag = TOKENIZER_TAG_BANG; break; } break; - case TOKEN_STATE_PIPE: + case TOKENIZER_STATE_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_PIPE_EQUAL; + result.tag = TOKENIZER_TAG_PIPE_EQUAL; self->index++; break; case '|': - result.tag = TOKEN_TAG_PIPE_PIPE; + result.tag = TOKENIZER_TAG_PIPE_PIPE; self->index++; break; default: - result.tag = TOKEN_TAG_PIPE; + result.tag = TOKENIZER_TAG_PIPE; break; } break; - case TOKEN_STATE_EQUAL: + case TOKENIZER_STATE_EQUAL: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_EQUAL_EQUAL; + result.tag = TOKENIZER_TAG_EQUAL_EQUAL; self->index++; break; case '>': - result.tag = TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT; + result.tag = TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT; self->index++; break; default: - result.tag = TOKEN_TAG_EQUAL; + result.tag = TOKENIZER_TAG_EQUAL; break; } break; - case TOKEN_STATE_MINUS: + case TOKENIZER_STATE_MINUS: self->index++; switch (self->buffer[self->index]) { case '>': - result.tag = TOKEN_TAG_ARROW; + result.tag = TOKENIZER_TAG_ARROW; self->index++; break; case '=': - result.tag = TOKEN_TAG_MINUS_EQUAL; + result.tag = TOKENIZER_TAG_MINUS_EQUAL; self->index++; break; case '%': - state = TOKEN_STATE_MINUS_PERCENT; + state = TOKENIZER_STATE_MINUS_PERCENT; goto state; case '|': - state = TOKEN_STATE_MINUS_PIPE; + state = TOKENIZER_STATE_MINUS_PIPE; goto state; default: - result.tag = TOKEN_TAG_MINUS; + result.tag = TOKENIZER_TAG_MINUS; break; } break; - case TOKEN_STATE_MINUS_PERCENT: + case TOKENIZER_STATE_MINUS_PERCENT: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_MINUS_PERCENT_EQUAL; + result.tag = TOKENIZER_TAG_MINUS_PERCENT_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_MINUS_PERCENT; + result.tag = TOKENIZER_TAG_MINUS_PERCENT; break; } break; - case TOKEN_STATE_MINUS_PIPE: + case TOKENIZER_STATE_MINUS_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_MINUS_PIPE_EQUAL; + result.tag = TOKENIZER_TAG_MINUS_PIPE_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_MINUS_PIPE; + result.tag = TOKENIZER_TAG_MINUS_PIPE; break; } break; - case TOKEN_STATE_ANGLE_BRACKET_LEFT: + case TOKENIZER_STATE_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '<': - state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; + state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; goto state; case '=': - result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_LEFT; break; } break; - case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: + case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL; self->index++; break; case '|': - state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; + state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; goto state; default: - result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT; break; } break; - case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: + case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE; break; } break; - case TOKEN_STATE_ANGLE_BRACKET_RIGHT: + case TOKENIZER_STATE_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '>': - state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; + state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; goto state; case '=': - result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_RIGHT; break; } break; - case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: + case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: self->index++; switch (self->buffer[self->index]) { case '=': - result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; + result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT; break; } break; - case TOKEN_STATE_PERIOD: + case TOKENIZER_STATE_PERIOD: self->index++; switch (self->buffer[self->index]) { case '.': - state = TOKEN_STATE_PERIOD_2; + state = TOKENIZER_STATE_PERIOD_2; goto state; case '*': - state = TOKEN_STATE_PERIOD_ASTERISK; + state = TOKENIZER_STATE_PERIOD_ASTERISK; goto state; default: - result.tag = TOKEN_TAG_PERIOD; + result.tag = TOKENIZER_TAG_PERIOD; break; } break; - case TOKEN_STATE_PERIOD_2: + case TOKENIZER_STATE_PERIOD_2: self->index++; switch (self->buffer[self->index]) { case '.': - result.tag = TOKEN_TAG_ELLIPSIS3; + result.tag = TOKENIZER_TAG_ELLIPSIS3; self->index++; break; default: - result.tag = TOKEN_TAG_ELLIPSIS2; + result.tag = TOKENIZER_TAG_ELLIPSIS2; break; } break; - case TOKEN_STATE_PERIOD_ASTERISK: + case TOKENIZER_STATE_PERIOD_ASTERISK: self->index++; switch (self->buffer[self->index]) { case '*': - result.tag = TOKEN_TAG_INVALID_PERIODASTERISKS; + result.tag = TOKENIZER_TAG_INVALID_PERIODASTERISKS; break; default: - result.tag = TOKEN_TAG_PERIOD_ASTERISK; + result.tag = TOKENIZER_TAG_PERIOD_ASTERISK; break; } break; - case TOKEN_STATE_SLASH: + case TOKENIZER_STATE_SLASH: self->index++; switch (self->buffer[self->index]) { case '/': - state = TOKEN_STATE_LINE_COMMENT_START; + state = TOKENIZER_STATE_LINE_COMMENT_START; goto state; case '=': - result.tag = TOKEN_TAG_SLASH_EQUAL; + result.tag = TOKENIZER_TAG_SLASH_EQUAL; self->index++; break; default: - result.tag = TOKEN_TAG_SLASH; + result.tag = TOKENIZER_TAG_SLASH; break; } break; - case TOKEN_STATE_LINE_COMMENT_START: + case TOKENIZER_STATE_LINE_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } else { - return (token) { - .tag = TOKEN_TAG_EOF, + return (tokenizer_token) { + .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, .end = self->index } @@ -868,73 +866,73 @@ state: } break; case '!': - result.tag = TOKEN_TAG_CONTAINER_DOC_COMMENT; - state = TOKEN_STATE_DOC_COMMENT; + result.tag = TOKENIZER_TAG_CONTAINER_DOC_COMMENT; + state = TOKENIZER_STATE_DOC_COMMENT; goto state; case '\n': self->index++; result.loc.start = self->index; - state = TOKEN_STATE_START; + state = TOKENIZER_STATE_START; goto state; case '/': - state = TOKEN_STATE_DOC_COMMENT_START; + state = TOKENIZER_STATE_DOC_COMMENT_START; goto state; case '\r': - state = TOKEN_STATE_EXPECT_NEWLINE; + state = TOKENIZER_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - state = TOKEN_STATE_LINE_COMMENT; + state = TOKENIZER_STATE_LINE_COMMENT; goto state; } break; - case TOKEN_STATE_DOC_COMMENT_START: + case TOKENIZER_STATE_DOC_COMMENT_START: self->index++; switch (self->buffer[self->index]) { case 0: case '\n': - result.tag = TOKEN_TAG_DOC_COMMENT; + result.tag = TOKENIZER_TAG_DOC_COMMENT; break; case '\r': if (self->buffer[self->index + 1] == '\n') { - result.tag = TOKEN_TAG_DOC_COMMENT; + result.tag = TOKENIZER_TAG_DOC_COMMENT; } else { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; case '/': - state = TOKEN_STATE_LINE_COMMENT; + state = TOKENIZER_STATE_LINE_COMMENT; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - result.tag = TOKEN_TAG_DOC_COMMENT; - state = TOKEN_STATE_DOC_COMMENT; + result.tag = TOKENIZER_TAG_DOC_COMMENT; + state = TOKENIZER_STATE_DOC_COMMENT; goto state; } break; - case TOKEN_STATE_LINE_COMMENT: + case TOKENIZER_STATE_LINE_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: if (self->index != self->buffer_len) { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } else { - return (token) { - .tag = TOKEN_TAG_EOF, + return (tokenizer_token) { + .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, .end = self->index } @@ -944,24 +942,24 @@ state: case '\n': self->index++; result.loc.start = self->index; - state = TOKEN_STATE_START; + state = TOKENIZER_STATE_START; goto state; case '\r': - state = TOKEN_STATE_EXPECT_NEWLINE; + state = TOKENIZER_STATE_EXPECT_NEWLINE; goto state; case 0x01 ... 0x09: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - state = TOKEN_STATE_LINE_COMMENT; + state = TOKENIZER_STATE_LINE_COMMENT; goto state; } break; - case TOKEN_STATE_DOC_COMMENT: + case TOKENIZER_STATE_DOC_COMMENT: self->index++; switch (self->buffer[self->index]) { case 0: @@ -969,7 +967,7 @@ state: break; case '\r': if (self->buffer[self->index + 1] != '\n') { - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; } break; @@ -977,18 +975,18 @@ state: case 0x0b ... 0x0c: case 0x0e ... 0x1f: case 0x7f: - state = TOKEN_STATE_INVALID; + state = TOKENIZER_STATE_INVALID; goto state; default: - state = TOKEN_STATE_DOC_COMMENT; + state = TOKENIZER_STATE_DOC_COMMENT; goto state; } break; - case TOKEN_STATE_INT: + case TOKENIZER_STATE_INT: switch (self->buffer[self->index]) { case '.': - state = TOKEN_STATE_INT_PERIOD; + state = TOKENIZER_STATE_INT_PERIOD; goto state; case '_': case 'a' ... 'd': @@ -999,34 +997,34 @@ state: case 'Q' ... 'Z': case '0' ... '9': self->index++; - state = TOKEN_STATE_INT; + state = TOKENIZER_STATE_INT; goto state; case 'e': case 'E': case 'p': case 'P': - state = TOKEN_STATE_INT_EXPONENT; + state = TOKENIZER_STATE_INT_EXPONENT; goto state; default: break; } break; - case TOKEN_STATE_INT_EXPONENT: + case TOKENIZER_STATE_INT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; - state = TOKEN_STATE_FLOAT; + state = TOKENIZER_STATE_FLOAT; goto state; default: - state = TOKEN_STATE_INT; + state = TOKENIZER_STATE_INT; goto state; } break; - case TOKEN_STATE_INT_PERIOD: + case TOKENIZER_STATE_INT_PERIOD: self->index++; switch (self->buffer[self->index]) { case '_': @@ -1038,13 +1036,13 @@ state: case 'Q' ... 'Z': case '0' ... '9': self->index++; - state = TOKEN_STATE_FLOAT; + state = TOKENIZER_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': - state = TOKEN_STATE_FLOAT_EXPONENT; + state = TOKENIZER_STATE_FLOAT_EXPONENT; goto state; default: self->index--; @@ -1052,7 +1050,7 @@ state: } break; - case TOKEN_STATE_FLOAT: + case TOKENIZER_STATE_FLOAT: switch (self->buffer[self->index]) { case '_': case 'a' ... 'd': @@ -1063,29 +1061,29 @@ state: case 'Q' ... 'Z': case '0' ... '9': self->index++; - state = TOKEN_STATE_FLOAT; + state = TOKENIZER_STATE_FLOAT; goto state; case 'e': case 'E': case 'p': case 'P': - state = TOKEN_STATE_FLOAT_EXPONENT; + state = TOKENIZER_STATE_FLOAT_EXPONENT; goto state; default: break; } break; - case TOKEN_STATE_FLOAT_EXPONENT: + case TOKENIZER_STATE_FLOAT_EXPONENT: self->index++; switch (self->buffer[self->index]) { case '-': case '+': self->index++; - state = TOKEN_STATE_FLOAT; + state = TOKENIZER_STATE_FLOAT; goto state; default: - state = TOKEN_STATE_FLOAT; + state = TOKENIZER_STATE_FLOAT; goto state; } break; diff --git a/tokenizer.h b/tokenizer.h index 81cc196..206ab50 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -1,188 +1,188 @@ -#ifndef __ZIG1_TOKENIZER_H__ -#define __ZIG1_TOKENIZER_H__ +#ifndef _ZIG1_TOKENIZER_H__ +#define _ZIG1_TOKENIZER_H__ #include #include typedef enum { - TOKEN_TAG_INVALID, - TOKEN_TAG_INVALID_PERIODASTERISKS, - TOKEN_TAG_IDENTIFIER, - TOKEN_TAG_STRING_LITERAL, - TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE, - TOKEN_TAG_CHAR_LITERAL, - TOKEN_TAG_EOF, - TOKEN_TAG_BUILTIN, - TOKEN_TAG_BANG, - TOKEN_TAG_PIPE, - TOKEN_TAG_PIPE_PIPE, - TOKEN_TAG_PIPE_EQUAL, - TOKEN_TAG_EQUAL, - TOKEN_TAG_EQUAL_EQUAL, - TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT, - TOKEN_TAG_BANG_EQUAL, - TOKEN_TAG_L_PAREN, - TOKEN_TAG_R_PAREN, - TOKEN_TAG_SEMICOLON, - TOKEN_TAG_PERCENT, - TOKEN_TAG_PERCENT_EQUAL, - TOKEN_TAG_L_BRACE, - TOKEN_TAG_R_BRACE, - TOKEN_TAG_L_BRACKET, - TOKEN_TAG_R_BRACKET, - TOKEN_TAG_PERIOD, - TOKEN_TAG_PERIOD_ASTERISK, - TOKEN_TAG_ELLIPSIS2, - TOKEN_TAG_ELLIPSIS3, - TOKEN_TAG_CARET, - TOKEN_TAG_CARET_EQUAL, - TOKEN_TAG_PLUS, - TOKEN_TAG_PLUS_PLUS, - TOKEN_TAG_PLUS_EQUAL, - TOKEN_TAG_PLUS_PERCENT, - TOKEN_TAG_PLUS_PERCENT_EQUAL, - TOKEN_TAG_PLUS_PIPE, - TOKEN_TAG_PLUS_PIPE_EQUAL, - TOKEN_TAG_MINUS, - TOKEN_TAG_MINUS_EQUAL, - TOKEN_TAG_MINUS_PERCENT, - TOKEN_TAG_MINUS_PERCENT_EQUAL, - TOKEN_TAG_MINUS_PIPE, - TOKEN_TAG_MINUS_PIPE_EQUAL, - TOKEN_TAG_ASTERISK, - TOKEN_TAG_ASTERISK_EQUAL, - TOKEN_TAG_ASTERISK_ASTERISK, - TOKEN_TAG_ASTERISK_PERCENT, - TOKEN_TAG_ASTERISK_PERCENT_EQUAL, - TOKEN_TAG_ASTERISK_PIPE, - TOKEN_TAG_ASTERISK_PIPE_EQUAL, - TOKEN_TAG_ARROW, - TOKEN_TAG_COLON, - TOKEN_TAG_SLASH, - TOKEN_TAG_SLASH_EQUAL, - TOKEN_TAG_COMMA, - TOKEN_TAG_AMPERSAND, - TOKEN_TAG_AMPERSAND_EQUAL, - TOKEN_TAG_QUESTION_MARK, - TOKEN_TAG_ANGLE_BRACKET_LEFT, - TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL, - TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, - TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL, - TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, - TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL, - TOKEN_TAG_ANGLE_BRACKET_RIGHT, - TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL, - TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, - TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL, - TOKEN_TAG_TILDE, - TOKEN_TAG_NUMBER_LITERAL, - TOKEN_TAG_DOC_COMMENT, - TOKEN_TAG_CONTAINER_DOC_COMMENT, - TOKEN_TAG_KEYWORD_ADDRSPACE, - TOKEN_TAG_KEYWORD_ALIGN, - TOKEN_TAG_KEYWORD_ALLOWZERO, - TOKEN_TAG_KEYWORD_AND, - TOKEN_TAG_KEYWORD_ANYFRAME, - TOKEN_TAG_KEYWORD_ANYTYPE, - TOKEN_TAG_KEYWORD_ASM, - TOKEN_TAG_KEYWORD_ASYNC, - TOKEN_TAG_KEYWORD_AWAIT, - TOKEN_TAG_KEYWORD_BREAK, - TOKEN_TAG_KEYWORD_CALLCONV, - TOKEN_TAG_KEYWORD_CATCH, - TOKEN_TAG_KEYWORD_COMPTIME, - TOKEN_TAG_KEYWORD_CONST, - TOKEN_TAG_KEYWORD_CONTINUE, - TOKEN_TAG_KEYWORD_DEFER, - TOKEN_TAG_KEYWORD_ELSE, - TOKEN_TAG_KEYWORD_ENUM, - TOKEN_TAG_KEYWORD_ERRDEFER, - TOKEN_TAG_KEYWORD_ERROR, - TOKEN_TAG_KEYWORD_EXPORT, - TOKEN_TAG_KEYWORD_EXTERN, - TOKEN_TAG_KEYWORD_FN, - TOKEN_TAG_KEYWORD_FOR, - TOKEN_TAG_KEYWORD_IF, - TOKEN_TAG_KEYWORD_INLINE, - TOKEN_TAG_KEYWORD_NOALIAS, - TOKEN_TAG_KEYWORD_NOINLINE, - TOKEN_TAG_KEYWORD_NOSUSPEND, - TOKEN_TAG_KEYWORD_OPAQUE, - TOKEN_TAG_KEYWORD_OR, - TOKEN_TAG_KEYWORD_ORELSE, - TOKEN_TAG_KEYWORD_PACKED, - TOKEN_TAG_KEYWORD_PUB, - TOKEN_TAG_KEYWORD_RESUME, - TOKEN_TAG_KEYWORD_RETURN, - TOKEN_TAG_KEYWORD_LINKSECTION, - TOKEN_TAG_KEYWORD_STRUCT, - TOKEN_TAG_KEYWORD_SUSPEND, - TOKEN_TAG_KEYWORD_SWITCH, - TOKEN_TAG_KEYWORD_TEST, - TOKEN_TAG_KEYWORD_THREADLOCAL, - TOKEN_TAG_KEYWORD_TRY, - TOKEN_TAG_KEYWORD_UNION, - TOKEN_TAG_KEYWORD_UNREACHABLE, - TOKEN_TAG_KEYWORD_USINGNAMESPACE, - TOKEN_TAG_KEYWORD_VAR, - TOKEN_TAG_KEYWORD_VOLATILE, - TOKEN_TAG_KEYWORD_WHILE, -} token_tag; + TOKENIZER_TAG_INVALID, + TOKENIZER_TAG_INVALID_PERIODASTERISKS, + TOKENIZER_TAG_IDENTIFIER, + TOKENIZER_TAG_STRING_LITERAL, + TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE, + TOKENIZER_TAG_CHAR_LITERAL, + TOKENIZER_TAG_EOF, + TOKENIZER_TAG_BUILTIN, + TOKENIZER_TAG_BANG, + TOKENIZER_TAG_PIPE, + TOKENIZER_TAG_PIPE_PIPE, + TOKENIZER_TAG_PIPE_EQUAL, + TOKENIZER_TAG_EQUAL, + TOKENIZER_TAG_EQUAL_EQUAL, + TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT, + TOKENIZER_TAG_BANG_EQUAL, + TOKENIZER_TAG_L_PAREN, + TOKENIZER_TAG_R_PAREN, + TOKENIZER_TAG_SEMICOLON, + TOKENIZER_TAG_PERCENT, + TOKENIZER_TAG_PERCENT_EQUAL, + TOKENIZER_TAG_L_BRACE, + TOKENIZER_TAG_R_BRACE, + TOKENIZER_TAG_L_BRACKET, + TOKENIZER_TAG_R_BRACKET, + TOKENIZER_TAG_PERIOD, + TOKENIZER_TAG_PERIOD_ASTERISK, + TOKENIZER_TAG_ELLIPSIS2, + TOKENIZER_TAG_ELLIPSIS3, + TOKENIZER_TAG_CARET, + TOKENIZER_TAG_CARET_EQUAL, + TOKENIZER_TAG_PLUS, + TOKENIZER_TAG_PLUS_PLUS, + TOKENIZER_TAG_PLUS_EQUAL, + TOKENIZER_TAG_PLUS_PERCENT, + TOKENIZER_TAG_PLUS_PERCENT_EQUAL, + TOKENIZER_TAG_PLUS_PIPE, + TOKENIZER_TAG_PLUS_PIPE_EQUAL, + TOKENIZER_TAG_MINUS, + TOKENIZER_TAG_MINUS_EQUAL, + TOKENIZER_TAG_MINUS_PERCENT, + TOKENIZER_TAG_MINUS_PERCENT_EQUAL, + TOKENIZER_TAG_MINUS_PIPE, + TOKENIZER_TAG_MINUS_PIPE_EQUAL, + TOKENIZER_TAG_ASTERISK, + TOKENIZER_TAG_ASTERISK_EQUAL, + TOKENIZER_TAG_ASTERISK_ASTERISK, + TOKENIZER_TAG_ASTERISK_PERCENT, + TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL, + TOKENIZER_TAG_ASTERISK_PIPE, + TOKENIZER_TAG_ASTERISK_PIPE_EQUAL, + TOKENIZER_TAG_ARROW, + TOKENIZER_TAG_COLON, + TOKENIZER_TAG_SLASH, + TOKENIZER_TAG_SLASH_EQUAL, + TOKENIZER_TAG_COMMA, + TOKENIZER_TAG_AMPERSAND, + TOKENIZER_TAG_AMPERSAND_EQUAL, + TOKENIZER_TAG_QUESTION_MARK, + TOKENIZER_TAG_ANGLE_BRACKET_LEFT, + TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL, + TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, + TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL, + TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, + TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL, + TOKENIZER_TAG_ANGLE_BRACKET_RIGHT, + TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL, + TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, + TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL, + TOKENIZER_TAG_TILDE, + TOKENIZER_TAG_NUMBER_LITERAL, + TOKENIZER_TAG_DOC_COMMENT, + TOKENIZER_TAG_CONTAINER_DOC_COMMENT, + TOKENIZER_TAG_KEYWORD_ADDRSPACE, + TOKENIZER_TAG_KEYWORD_ALIGN, + TOKENIZER_TAG_KEYWORD_ALLOWZERO, + TOKENIZER_TAG_KEYWORD_AND, + TOKENIZER_TAG_KEYWORD_ANYFRAME, + TOKENIZER_TAG_KEYWORD_ANYTYPE, + TOKENIZER_TAG_KEYWORD_ASM, + TOKENIZER_TAG_KEYWORD_ASYNC, + TOKENIZER_TAG_KEYWORD_AWAIT, + TOKENIZER_TAG_KEYWORD_BREAK, + TOKENIZER_TAG_KEYWORD_CALLCONV, + TOKENIZER_TAG_KEYWORD_CATCH, + TOKENIZER_TAG_KEYWORD_COMPTIME, + TOKENIZER_TAG_KEYWORD_CONST, + TOKENIZER_TAG_KEYWORD_CONTINUE, + TOKENIZER_TAG_KEYWORD_DEFER, + TOKENIZER_TAG_KEYWORD_ELSE, + TOKENIZER_TAG_KEYWORD_ENUM, + TOKENIZER_TAG_KEYWORD_ERRDEFER, + TOKENIZER_TAG_KEYWORD_ERROR, + TOKENIZER_TAG_KEYWORD_EXPORT, + TOKENIZER_TAG_KEYWORD_EXTERN, + TOKENIZER_TAG_KEYWORD_FN, + TOKENIZER_TAG_KEYWORD_FOR, + TOKENIZER_TAG_KEYWORD_IF, + TOKENIZER_TAG_KEYWORD_INLINE, + TOKENIZER_TAG_KEYWORD_NOALIAS, + TOKENIZER_TAG_KEYWORD_NOINLINE, + TOKENIZER_TAG_KEYWORD_NOSUSPEND, + TOKENIZER_TAG_KEYWORD_OPAQUE, + TOKENIZER_TAG_KEYWORD_OR, + TOKENIZER_TAG_KEYWORD_ORELSE, + TOKENIZER_TAG_KEYWORD_PACKED, + TOKENIZER_TAG_KEYWORD_PUB, + TOKENIZER_TAG_KEYWORD_RESUME, + TOKENIZER_TAG_KEYWORD_RETURN, + TOKENIZER_TAG_KEYWORD_LINKSECTION, + TOKENIZER_TAG_KEYWORD_STRUCT, + TOKENIZER_TAG_KEYWORD_SUSPEND, + TOKENIZER_TAG_KEYWORD_SWITCH, + TOKENIZER_TAG_KEYWORD_TEST, + TOKENIZER_TAG_KEYWORD_THREADLOCAL, + TOKENIZER_TAG_KEYWORD_TRY, + TOKENIZER_TAG_KEYWORD_UNION, + TOKENIZER_TAG_KEYWORD_UNREACHABLE, + TOKENIZER_TAG_KEYWORD_USINGNAMESPACE, + TOKENIZER_TAG_KEYWORD_VAR, + TOKENIZER_TAG_KEYWORD_VOLATILE, + TOKENIZER_TAG_KEYWORD_WHILE, +} tokenizer_tag; typedef enum { - TOKEN_STATE_START, - TOKEN_STATE_EXPECT_NEWLINE, - TOKEN_STATE_IDENTIFIER, - TOKEN_STATE_BUILTIN, - TOKEN_STATE_STRING_LITERAL, - TOKEN_STATE_STRING_LITERAL_BACKSLASH, - TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE, - TOKEN_STATE_CHAR_LITERAL, - TOKEN_STATE_CHAR_LITERAL_BACKSLASH, - TOKEN_STATE_BACKSLASH, - TOKEN_STATE_EQUAL, - TOKEN_STATE_BANG, - TOKEN_STATE_PIPE, - TOKEN_STATE_MINUS, - TOKEN_STATE_MINUS_PERCENT, - TOKEN_STATE_MINUS_PIPE, - TOKEN_STATE_ASTERISK, - TOKEN_STATE_ASTERISK_PERCENT, - TOKEN_STATE_ASTERISK_PIPE, - TOKEN_STATE_SLASH, - TOKEN_STATE_LINE_COMMENT_START, - TOKEN_STATE_LINE_COMMENT, - TOKEN_STATE_DOC_COMMENT_START, - TOKEN_STATE_DOC_COMMENT, - TOKEN_STATE_INT, - TOKEN_STATE_INT_EXPONENT, - TOKEN_STATE_INT_PERIOD, - TOKEN_STATE_FLOAT, - TOKEN_STATE_FLOAT_EXPONENT, - TOKEN_STATE_AMPERSAND, - TOKEN_STATE_CARET, - TOKEN_STATE_PERCENT, - TOKEN_STATE_PLUS, - TOKEN_STATE_PLUS_PERCENT, - TOKEN_STATE_PLUS_PIPE, - TOKEN_STATE_ANGLE_BRACKET_LEFT, - TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, - TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, - TOKEN_STATE_ANGLE_BRACKET_RIGHT, - TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, - TOKEN_STATE_PERIOD, - TOKEN_STATE_PERIOD_2, - TOKEN_STATE_PERIOD_ASTERISK, - TOKEN_STATE_SAW_AT_SIGN, - TOKEN_STATE_INVALID, -} token_state; + TOKENIZER_STATE_START, + TOKENIZER_STATE_EXPECT_NEWLINE, + TOKENIZER_STATE_IDENTIFIER, + TOKENIZER_STATE_BUILTIN, + TOKENIZER_STATE_STRING_LITERAL, + TOKENIZER_STATE_STRING_LITERAL_BACKSLASH, + TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE, + TOKENIZER_STATE_CHAR_LITERAL, + TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH, + TOKENIZER_STATE_BACKSLASH, + TOKENIZER_STATE_EQUAL, + TOKENIZER_STATE_BANG, + TOKENIZER_STATE_PIPE, + TOKENIZER_STATE_MINUS, + TOKENIZER_STATE_MINUS_PERCENT, + TOKENIZER_STATE_MINUS_PIPE, + TOKENIZER_STATE_ASTERISK, + TOKENIZER_STATE_ASTERISK_PERCENT, + TOKENIZER_STATE_ASTERISK_PIPE, + TOKENIZER_STATE_SLASH, + TOKENIZER_STATE_LINE_COMMENT_START, + TOKENIZER_STATE_LINE_COMMENT, + TOKENIZER_STATE_DOC_COMMENT_START, + TOKENIZER_STATE_DOC_COMMENT, + TOKENIZER_STATE_INT, + TOKENIZER_STATE_INT_EXPONENT, + TOKENIZER_STATE_INT_PERIOD, + TOKENIZER_STATE_FLOAT, + TOKENIZER_STATE_FLOAT_EXPONENT, + TOKENIZER_STATE_AMPERSAND, + TOKENIZER_STATE_CARET, + TOKENIZER_STATE_PERCENT, + TOKENIZER_STATE_PLUS, + TOKENIZER_STATE_PLUS_PERCENT, + TOKENIZER_STATE_PLUS_PIPE, + TOKENIZER_STATE_ANGLE_BRACKET_LEFT, + TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, + TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, + TOKENIZER_STATE_ANGLE_BRACKET_RIGHT, + TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, + TOKENIZER_STATE_PERIOD, + TOKENIZER_STATE_PERIOD_2, + TOKENIZER_STATE_PERIOD_ASTERISK, + TOKENIZER_STATE_SAW_AT_SIGN, + TOKENIZER_STATE_INVALID, +} tokenizer_state; typedef struct { - token_tag tag; + tokenizer_tag tag; struct { uint32_t start, end; } loc; -} token; +} tokenizer_token; typedef struct { const char* buffer; @@ -191,6 +191,6 @@ typedef struct { } tokenizer; tokenizer tokenizer_init(const char* buffer, uint32_t len); -token tokenizer_next(tokenizer* self); +tokenizer_token tokenizer_next(tokenizer* self); #endif diff --git a/tokenizer_test.zig b/tokenizer_test.zig index c7847ac..e36920a 100644 --- a/tokenizer_test.zig +++ b/tokenizer_test.zig @@ -10,128 +10,128 @@ const c = @cImport({ fn zigToken(token: c_uint) Token.Tag { return switch (token) { - c.TOKEN_TAG_INVALID => .invalid, - c.TOKEN_TAG_INVALID_PERIODASTERISKS => .invalid_periodasterisks, - c.TOKEN_TAG_IDENTIFIER => .identifier, - c.TOKEN_TAG_STRING_LITERAL => .string_literal, - c.TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE => .multiline_string_literal_line, - c.TOKEN_TAG_CHAR_LITERAL => .char_literal, - c.TOKEN_TAG_EOF => .eof, - c.TOKEN_TAG_BUILTIN => .builtin, - c.TOKEN_TAG_BANG => .bang, - c.TOKEN_TAG_PIPE => .pipe, - c.TOKEN_TAG_PIPE_PIPE => .pipe_pipe, - c.TOKEN_TAG_PIPE_EQUAL => .pipe_equal, - c.TOKEN_TAG_EQUAL => .equal, - c.TOKEN_TAG_EQUAL_EQUAL => .equal_equal, - c.TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT => .equal_angle_bracket_right, - c.TOKEN_TAG_BANG_EQUAL => .bang_equal, - c.TOKEN_TAG_L_PAREN => .l_paren, - c.TOKEN_TAG_R_PAREN => .r_paren, - c.TOKEN_TAG_SEMICOLON => .semicolon, - c.TOKEN_TAG_PERCENT => .percent, - c.TOKEN_TAG_PERCENT_EQUAL => .percent_equal, - c.TOKEN_TAG_L_BRACE => .l_brace, - c.TOKEN_TAG_R_BRACE => .r_brace, - c.TOKEN_TAG_L_BRACKET => .l_bracket, - c.TOKEN_TAG_R_BRACKET => .r_bracket, - c.TOKEN_TAG_PERIOD => .period, - c.TOKEN_TAG_PERIOD_ASTERISK => .period_asterisk, - c.TOKEN_TAG_ELLIPSIS2 => .ellipsis2, - c.TOKEN_TAG_ELLIPSIS3 => .ellipsis3, - c.TOKEN_TAG_CARET => .caret, - c.TOKEN_TAG_CARET_EQUAL => .caret_equal, - c.TOKEN_TAG_PLUS => .plus, - c.TOKEN_TAG_PLUS_PLUS => .plus_plus, - c.TOKEN_TAG_PLUS_EQUAL => .plus_equal, - c.TOKEN_TAG_PLUS_PERCENT => .plus_percent, - c.TOKEN_TAG_PLUS_PERCENT_EQUAL => .plus_percent_equal, - c.TOKEN_TAG_PLUS_PIPE => .plus_pipe, - c.TOKEN_TAG_PLUS_PIPE_EQUAL => .plus_pipe_equal, - c.TOKEN_TAG_MINUS => .minus, - c.TOKEN_TAG_MINUS_EQUAL => .minus_equal, - c.TOKEN_TAG_MINUS_PERCENT => .minus_percent, - c.TOKEN_TAG_MINUS_PERCENT_EQUAL => .minus_percent_equal, - c.TOKEN_TAG_MINUS_PIPE => .minus_pipe, - c.TOKEN_TAG_MINUS_PIPE_EQUAL => .minus_pipe_equal, - c.TOKEN_TAG_ASTERISK => .asterisk, - c.TOKEN_TAG_ASTERISK_EQUAL => .asterisk_equal, - c.TOKEN_TAG_ASTERISK_ASTERISK => .asterisk_asterisk, - c.TOKEN_TAG_ASTERISK_PERCENT => .asterisk_percent, - c.TOKEN_TAG_ASTERISK_PERCENT_EQUAL => .asterisk_percent_equal, - c.TOKEN_TAG_ASTERISK_PIPE => .asterisk_pipe, - c.TOKEN_TAG_ASTERISK_PIPE_EQUAL => .asterisk_pipe_equal, - c.TOKEN_TAG_ARROW => .arrow, - c.TOKEN_TAG_COLON => .colon, - c.TOKEN_TAG_SLASH => .slash, - c.TOKEN_TAG_SLASH_EQUAL => .slash_equal, - c.TOKEN_TAG_COMMA => .comma, - c.TOKEN_TAG_AMPERSAND => .ampersand, - c.TOKEN_TAG_AMPERSAND_EQUAL => .ampersand_equal, - c.TOKEN_TAG_QUESTION_MARK => .question_mark, - c.TOKEN_TAG_ANGLE_BRACKET_LEFT => .angle_bracket_left, - c.TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_left_equal, - c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT => .angle_bracket_angle_bracket_left, - c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_angle_bracket_left_equal, - c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE => .angle_bracket_angle_bracket_left_pipe, - c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL => .angle_bracket_angle_bracket_left_pipe_equal, - c.TOKEN_TAG_ANGLE_BRACKET_RIGHT => .angle_bracket_right, - c.TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_right_equal, - c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT => .angle_bracket_angle_bracket_right, - c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_angle_bracket_right_equal, - c.TOKEN_TAG_TILDE => .tilde, - c.TOKEN_TAG_NUMBER_LITERAL => .number_literal, - c.TOKEN_TAG_DOC_COMMENT => .doc_comment, - c.TOKEN_TAG_CONTAINER_DOC_COMMENT => .container_doc_comment, - c.TOKEN_TAG_KEYWORD_ADDRSPACE => .keyword_addrspace, - c.TOKEN_TAG_KEYWORD_ALIGN => .keyword_align, - c.TOKEN_TAG_KEYWORD_ALLOWZERO => .keyword_allowzero, - c.TOKEN_TAG_KEYWORD_AND => .keyword_and, - c.TOKEN_TAG_KEYWORD_ANYFRAME => .keyword_anyframe, - c.TOKEN_TAG_KEYWORD_ANYTYPE => .keyword_anytype, - c.TOKEN_TAG_KEYWORD_ASM => .keyword_asm, - c.TOKEN_TAG_KEYWORD_ASYNC => .keyword_async, - c.TOKEN_TAG_KEYWORD_AWAIT => .keyword_await, - c.TOKEN_TAG_KEYWORD_BREAK => .keyword_break, - c.TOKEN_TAG_KEYWORD_CALLCONV => .keyword_callconv, - c.TOKEN_TAG_KEYWORD_CATCH => .keyword_catch, - c.TOKEN_TAG_KEYWORD_COMPTIME => .keyword_comptime, - c.TOKEN_TAG_KEYWORD_CONST => .keyword_const, - c.TOKEN_TAG_KEYWORD_CONTINUE => .keyword_continue, - c.TOKEN_TAG_KEYWORD_DEFER => .keyword_defer, - c.TOKEN_TAG_KEYWORD_ELSE => .keyword_else, - c.TOKEN_TAG_KEYWORD_ENUM => .keyword_enum, - c.TOKEN_TAG_KEYWORD_ERRDEFER => .keyword_errdefer, - c.TOKEN_TAG_KEYWORD_ERROR => .keyword_error, - c.TOKEN_TAG_KEYWORD_EXPORT => .keyword_export, - c.TOKEN_TAG_KEYWORD_EXTERN => .keyword_extern, - c.TOKEN_TAG_KEYWORD_FN => .keyword_fn, - c.TOKEN_TAG_KEYWORD_FOR => .keyword_for, - c.TOKEN_TAG_KEYWORD_IF => .keyword_if, - c.TOKEN_TAG_KEYWORD_INLINE => .keyword_inline, - c.TOKEN_TAG_KEYWORD_NOALIAS => .keyword_noalias, - c.TOKEN_TAG_KEYWORD_NOINLINE => .keyword_noinline, - c.TOKEN_TAG_KEYWORD_NOSUSPEND => .keyword_nosuspend, - c.TOKEN_TAG_KEYWORD_OPAQUE => .keyword_opaque, - c.TOKEN_TAG_KEYWORD_OR => .keyword_or, - c.TOKEN_TAG_KEYWORD_ORELSE => .keyword_orelse, - c.TOKEN_TAG_KEYWORD_PACKED => .keyword_packed, - c.TOKEN_TAG_KEYWORD_PUB => .keyword_pub, - c.TOKEN_TAG_KEYWORD_RESUME => .keyword_resume, - c.TOKEN_TAG_KEYWORD_RETURN => .keyword_return, - c.TOKEN_TAG_KEYWORD_LINKSECTION => .keyword_linksection, - c.TOKEN_TAG_KEYWORD_STRUCT => .keyword_struct, - c.TOKEN_TAG_KEYWORD_SUSPEND => .keyword_suspend, - c.TOKEN_TAG_KEYWORD_SWITCH => .keyword_switch, - c.TOKEN_TAG_KEYWORD_TEST => .keyword_test, - c.TOKEN_TAG_KEYWORD_THREADLOCAL => .keyword_threadlocal, - c.TOKEN_TAG_KEYWORD_TRY => .keyword_try, - c.TOKEN_TAG_KEYWORD_UNION => .keyword_union, - c.TOKEN_TAG_KEYWORD_UNREACHABLE => .keyword_unreachable, - c.TOKEN_TAG_KEYWORD_USINGNAMESPACE => .keyword_usingnamespace, - c.TOKEN_TAG_KEYWORD_VAR => .keyword_var, - c.TOKEN_TAG_KEYWORD_VOLATILE => .keyword_volatile, - c.TOKEN_TAG_KEYWORD_WHILE => .keyword_while, + c.TOKENIZER_TAG_INVALID => .invalid, + c.TOKENIZER_TAG_INVALID_PERIODASTERISKS => .invalid_periodasterisks, + c.TOKENIZER_TAG_IDENTIFIER => .identifier, + c.TOKENIZER_TAG_STRING_LITERAL => .string_literal, + c.TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE => .multiline_string_literal_line, + c.TOKENIZER_TAG_CHAR_LITERAL => .char_literal, + c.TOKENIZER_TAG_EOF => .eof, + c.TOKENIZER_TAG_BUILTIN => .builtin, + c.TOKENIZER_TAG_BANG => .bang, + c.TOKENIZER_TAG_PIPE => .pipe, + c.TOKENIZER_TAG_PIPE_PIPE => .pipe_pipe, + c.TOKENIZER_TAG_PIPE_EQUAL => .pipe_equal, + c.TOKENIZER_TAG_EQUAL => .equal, + c.TOKENIZER_TAG_EQUAL_EQUAL => .equal_equal, + c.TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT => .equal_angle_bracket_right, + c.TOKENIZER_TAG_BANG_EQUAL => .bang_equal, + c.TOKENIZER_TAG_L_PAREN => .l_paren, + c.TOKENIZER_TAG_R_PAREN => .r_paren, + c.TOKENIZER_TAG_SEMICOLON => .semicolon, + c.TOKENIZER_TAG_PERCENT => .percent, + c.TOKENIZER_TAG_PERCENT_EQUAL => .percent_equal, + c.TOKENIZER_TAG_L_BRACE => .l_brace, + c.TOKENIZER_TAG_R_BRACE => .r_brace, + c.TOKENIZER_TAG_L_BRACKET => .l_bracket, + c.TOKENIZER_TAG_R_BRACKET => .r_bracket, + c.TOKENIZER_TAG_PERIOD => .period, + c.TOKENIZER_TAG_PERIOD_ASTERISK => .period_asterisk, + c.TOKENIZER_TAG_ELLIPSIS2 => .ellipsis2, + c.TOKENIZER_TAG_ELLIPSIS3 => .ellipsis3, + c.TOKENIZER_TAG_CARET => .caret, + c.TOKENIZER_TAG_CARET_EQUAL => .caret_equal, + c.TOKENIZER_TAG_PLUS => .plus, + c.TOKENIZER_TAG_PLUS_PLUS => .plus_plus, + c.TOKENIZER_TAG_PLUS_EQUAL => .plus_equal, + c.TOKENIZER_TAG_PLUS_PERCENT => .plus_percent, + c.TOKENIZER_TAG_PLUS_PERCENT_EQUAL => .plus_percent_equal, + c.TOKENIZER_TAG_PLUS_PIPE => .plus_pipe, + c.TOKENIZER_TAG_PLUS_PIPE_EQUAL => .plus_pipe_equal, + c.TOKENIZER_TAG_MINUS => .minus, + c.TOKENIZER_TAG_MINUS_EQUAL => .minus_equal, + c.TOKENIZER_TAG_MINUS_PERCENT => .minus_percent, + c.TOKENIZER_TAG_MINUS_PERCENT_EQUAL => .minus_percent_equal, + c.TOKENIZER_TAG_MINUS_PIPE => .minus_pipe, + c.TOKENIZER_TAG_MINUS_PIPE_EQUAL => .minus_pipe_equal, + c.TOKENIZER_TAG_ASTERISK => .asterisk, + c.TOKENIZER_TAG_ASTERISK_EQUAL => .asterisk_equal, + c.TOKENIZER_TAG_ASTERISK_ASTERISK => .asterisk_asterisk, + c.TOKENIZER_TAG_ASTERISK_PERCENT => .asterisk_percent, + c.TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL => .asterisk_percent_equal, + c.TOKENIZER_TAG_ASTERISK_PIPE => .asterisk_pipe, + c.TOKENIZER_TAG_ASTERISK_PIPE_EQUAL => .asterisk_pipe_equal, + c.TOKENIZER_TAG_ARROW => .arrow, + c.TOKENIZER_TAG_COLON => .colon, + c.TOKENIZER_TAG_SLASH => .slash, + c.TOKENIZER_TAG_SLASH_EQUAL => .slash_equal, + c.TOKENIZER_TAG_COMMA => .comma, + c.TOKENIZER_TAG_AMPERSAND => .ampersand, + c.TOKENIZER_TAG_AMPERSAND_EQUAL => .ampersand_equal, + c.TOKENIZER_TAG_QUESTION_MARK => .question_mark, + c.TOKENIZER_TAG_ANGLE_BRACKET_LEFT => .angle_bracket_left, + c.TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_left_equal, + c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT => .angle_bracket_angle_bracket_left, + c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_angle_bracket_left_equal, + c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE => .angle_bracket_angle_bracket_left_pipe, + c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL => .angle_bracket_angle_bracket_left_pipe_equal, + c.TOKENIZER_TAG_ANGLE_BRACKET_RIGHT => .angle_bracket_right, + c.TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_right_equal, + c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT => .angle_bracket_angle_bracket_right, + c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_angle_bracket_right_equal, + c.TOKENIZER_TAG_TILDE => .tilde, + c.TOKENIZER_TAG_NUMBER_LITERAL => .number_literal, + c.TOKENIZER_TAG_DOC_COMMENT => .doc_comment, + c.TOKENIZER_TAG_CONTAINER_DOC_COMMENT => .container_doc_comment, + c.TOKENIZER_TAG_KEYWORD_ADDRSPACE => .keyword_addrspace, + c.TOKENIZER_TAG_KEYWORD_ALIGN => .keyword_align, + c.TOKENIZER_TAG_KEYWORD_ALLOWZERO => .keyword_allowzero, + c.TOKENIZER_TAG_KEYWORD_AND => .keyword_and, + c.TOKENIZER_TAG_KEYWORD_ANYFRAME => .keyword_anyframe, + c.TOKENIZER_TAG_KEYWORD_ANYTYPE => .keyword_anytype, + c.TOKENIZER_TAG_KEYWORD_ASM => .keyword_asm, + c.TOKENIZER_TAG_KEYWORD_ASYNC => .keyword_async, + c.TOKENIZER_TAG_KEYWORD_AWAIT => .keyword_await, + c.TOKENIZER_TAG_KEYWORD_BREAK => .keyword_break, + c.TOKENIZER_TAG_KEYWORD_CALLCONV => .keyword_callconv, + c.TOKENIZER_TAG_KEYWORD_CATCH => .keyword_catch, + c.TOKENIZER_TAG_KEYWORD_COMPTIME => .keyword_comptime, + c.TOKENIZER_TAG_KEYWORD_CONST => .keyword_const, + c.TOKENIZER_TAG_KEYWORD_CONTINUE => .keyword_continue, + c.TOKENIZER_TAG_KEYWORD_DEFER => .keyword_defer, + c.TOKENIZER_TAG_KEYWORD_ELSE => .keyword_else, + c.TOKENIZER_TAG_KEYWORD_ENUM => .keyword_enum, + c.TOKENIZER_TAG_KEYWORD_ERRDEFER => .keyword_errdefer, + c.TOKENIZER_TAG_KEYWORD_ERROR => .keyword_error, + c.TOKENIZER_TAG_KEYWORD_EXPORT => .keyword_export, + c.TOKENIZER_TAG_KEYWORD_EXTERN => .keyword_extern, + c.TOKENIZER_TAG_KEYWORD_FN => .keyword_fn, + c.TOKENIZER_TAG_KEYWORD_FOR => .keyword_for, + c.TOKENIZER_TAG_KEYWORD_IF => .keyword_if, + c.TOKENIZER_TAG_KEYWORD_INLINE => .keyword_inline, + c.TOKENIZER_TAG_KEYWORD_NOALIAS => .keyword_noalias, + c.TOKENIZER_TAG_KEYWORD_NOINLINE => .keyword_noinline, + c.TOKENIZER_TAG_KEYWORD_NOSUSPEND => .keyword_nosuspend, + c.TOKENIZER_TAG_KEYWORD_OPAQUE => .keyword_opaque, + c.TOKENIZER_TAG_KEYWORD_OR => .keyword_or, + c.TOKENIZER_TAG_KEYWORD_ORELSE => .keyword_orelse, + c.TOKENIZER_TAG_KEYWORD_PACKED => .keyword_packed, + c.TOKENIZER_TAG_KEYWORD_PUB => .keyword_pub, + c.TOKENIZER_TAG_KEYWORD_RESUME => .keyword_resume, + c.TOKENIZER_TAG_KEYWORD_RETURN => .keyword_return, + c.TOKENIZER_TAG_KEYWORD_LINKSECTION => .keyword_linksection, + c.TOKENIZER_TAG_KEYWORD_STRUCT => .keyword_struct, + c.TOKENIZER_TAG_KEYWORD_SUSPEND => .keyword_suspend, + c.TOKENIZER_TAG_KEYWORD_SWITCH => .keyword_switch, + c.TOKENIZER_TAG_KEYWORD_TEST => .keyword_test, + c.TOKENIZER_TAG_KEYWORD_THREADLOCAL => .keyword_threadlocal, + c.TOKENIZER_TAG_KEYWORD_TRY => .keyword_try, + c.TOKENIZER_TAG_KEYWORD_UNION => .keyword_union, + c.TOKENIZER_TAG_KEYWORD_UNREACHABLE => .keyword_unreachable, + c.TOKENIZER_TAG_KEYWORD_USINGNAMESPACE => .keyword_usingnamespace, + c.TOKENIZER_TAG_KEYWORD_VAR => .keyword_var, + c.TOKENIZER_TAG_KEYWORD_VOLATILE => .keyword_volatile, + c.TOKENIZER_TAG_KEYWORD_WHILE => .keyword_while, else => undefined, }; }