commit c2915d2eaa7dfe79d219505ee9f750a195f86673 (tree)
parent 6863e34fbcca71d4fc1c72dbf81b317c6afaebda
Author: Motiejus Jakštys <motiejus@jakstys.lt>
Date: Sun, 15 Dec 2024 00:04:23 +0200
start ast + fix type names in tokenizer
Diffstat:
| A | README.md | | | 1 | + |
| A | ast.c | | | 38 | ++++++++++++++++++++++++++++++++++++++ |
| A | ast.h | | | 511 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| M | tokenizer.c | | | 580 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- |
| M | tokenizer.h | | | 348 | ++++++++++++++++++++++++++++++++++++++++---------------------------------------- |
| M | tokenizer_test.zig | | | 244 | ++++++++++++++++++++++++++++++++++++++++---------------------------------------- |
6 files changed, 1135 insertions(+), 587 deletions(-)
diff --git a/README.md b/README.md
@@ -0,0 +1 @@
+zig1 aspires to be a zig 0.13.0-2448-gd48611ba67c7 interpreter.
diff --git a/ast.c b/ast.c
@@ -0,0 +1,38 @@
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "ast.h"
+
+int ast_parse(const char* source, uint32_t len, ast *result) {
+ uint32_t estimated_token_count = len / 8;
+
+ tokenizer_tag* tokens_tag = calloc(estimated_token_count, sizeof(tokenizer_tag));
+ if (tokens_tag == NULL) {
+ perror("calloc");
+ return 1;
+ }
+ ast_index* tokens_start = calloc(estimated_token_count, sizeof(ast_index));
+ if (tokens_start == NULL) {
+ free(tokens_tag);
+ perror("calloc");
+ return 1;
+ }
+
+ tokenizer tokenizer = tokenizer_init(source, len);
+ for (uint32_t i = 0; i <= estimated_token_count; i++) {
+ if (i == estimated_token_count) {
+ fprintf(stderr, "too many tokens, bump estimated_token_count\n");
+ return 1;
+ }
+ tokenizer_token token = tokenizer_next(&tokenizer);
+ tokens_tag[i] = token.tag;
+ tokens_start[i] = token.loc.start;
+ }
+
+ /* TODO parser */
+
+ return 0;
+}
diff --git a/ast.h b/ast.h
@@ -0,0 +1,511 @@
+#ifndef _ZIG1_AST_H__
+#define _ZIG1_AST_H__
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "tokenizer.h"
+
+typedef enum {
+ /// sub_list[lhs...rhs]
+ AST_NODE_TAG_ROOT,
+ /// `usingnamespace lhs;`. rhs unused. main_token is `usingnamespace`.
+ AST_NODE_TAG_USINGNAMESPACE,
+ /// lhs is test name token (must be string literal or identifier), if any.
+ /// rhs is the body node.
+ AST_NODE_TAG_TEST_DECL,
+ /// lhs is the index into extra_data.
+ /// rhs is the initialization expression, if any.
+ /// main_token is `var` or `const`.
+ AST_NODE_TAG_GLOBAL_VAR_DECL,
+ /// `var a: x align(y) = rhs`
+ /// lhs is the index into extra_data.
+ /// main_token is `var` or `const`.
+ AST_NODE_TAG_LOCAL_VAR_DECL,
+ /// `var a: lhs = rhs`. lhs and rhs may be unused.
+ /// Can be local or global.
+ /// main_token is `var` or `const`.
+ AST_NODE_TAG_SIMPLE_VAR_DECL,
+ /// `var a align(lhs) = rhs`. lhs and rhs may be unused.
+ /// Can be local or global.
+ /// main_token is `var` or `const`.
+ AST_NODE_TAG_ALIGNED_VAR_DECL,
+ /// lhs is the identifier token payload if any,
+ /// rhs is the deferred expression.
+ AST_NODE_TAG_AST_NODE_TAG_ERRDEFER,
+ /// lhs is unused.
+ /// rhs is the deferred expression.
+ AST_NODE_TAG_AST_NODE_TAG_DEFER,
+ /// lhs catch rhs
+ /// lhs catch |err| rhs
+ /// main_token is the `catch` keyword.
+ /// payload is determined by looking at the next token after the `catch` keyword.
+ AST_NODE_TAG_AST_NODE_TAG_CATCH,
+ /// `lhs.a`. main_token is the dot. rhs is the identifier token index.
+ AST_NODE_TAG_FIELD_ACCESS,
+ /// `lhs.?`. main_token is the dot. rhs is the `?` token index.
+ AST_NODE_TAG_UNWRAP_OPTIONAL,
+ /// `lhs == rhs`. main_token is op.
+ AST_NODE_TAG_EQUAL_EQUAL,
+ /// `lhs != rhs`. main_token is op.
+ AST_NODE_TAG_BANG_EQUAL,
+ /// `lhs < rhs`. main_token is op.
+ AST_NODE_TAG_LESS_THAN,
+ /// `lhs > rhs`. main_token is op.
+ AST_NODE_TAG_GREATER_THAN,
+ /// `lhs <= rhs`. main_token is op.
+ AST_NODE_TAG_LESS_OR_EQUAL,
+ /// `lhs >= rhs`. main_token is op.
+ AST_NODE_TAG_GREATER_OR_EQUAL,
+ /// `lhs *= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_MUL,
+ /// `lhs /= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_DIV,
+ /// `lhs %= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_MOD,
+ /// `lhs += rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_ADD,
+ /// `lhs -= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_SUB,
+ /// `lhs <<= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_SHL,
+ /// `lhs <<|= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_SHL_SAT,
+ /// `lhs >>= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_SHR,
+ /// `lhs &= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_BIT_AND,
+ /// `lhs ^= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_BIT_XOR,
+ /// `lhs |= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_BIT_OR,
+ /// `lhs *%= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_MUL_WRAP,
+ /// `lhs +%= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_ADD_WRAP,
+ /// `lhs -%= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_SUB_WRAP,
+ /// `lhs *|= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_MUL_SAT,
+ /// `lhs +|= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_ADD_SAT,
+ /// `lhs -|= rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN_SUB_SAT,
+ /// `lhs = rhs`. main_token is op.
+ AST_NODE_TAG_ASSIGN,
+ /// `a, b, ... = rhs`. main_token is op. lhs is index into `extra_data`
+ /// of an lhs elem count followed by an array of that many `Node.Index`,
+ /// with each node having one of the following types:
+ /// * `global_var_decl`
+ /// * `local_var_decl`
+ /// * `simple_var_decl`
+ /// * `aligned_var_decl`
+ /// * Any expression node
+ /// The first 3 types correspond to a `var` or `const` lhs node (note
+ /// that their `rhs` is always 0). An expression node corresponds to a
+ /// standard assignment LHS (which must be evaluated as an lvalue).
+ /// There may be a preceding `comptime` token, which does not create a
+ /// corresponding `comptime` node so must be manually detected.
+ AST_NODE_TAG_ASSIGN_DESTRUCTURE,
+ /// `lhs || rhs`. main_token is the `||`.
+ AST_NODE_TAG_MERGE_ERROR_SETS,
+ /// `lhs * rhs`. main_token is the `*`.
+ AST_NODE_TAG_MUL,
+ /// `lhs / rhs`. main_token is the `/`.
+ AST_NODE_TAG_DIV,
+ /// `lhs % rhs`. main_token is the `%`.
+ AST_NODE_TAG_MOD,
+ /// `lhs ** rhs`. main_token is the `**`.
+ AST_NODE_TAG_ARRAY_MULT,
+ /// `lhs *% rhs`. main_token is the `*%`.
+ AST_NODE_TAG_MUL_WRAP,
+ /// `lhs *| rhs`. main_token is the `*|`.
+ AST_NODE_TAG_MUL_SAT,
+ /// `lhs + rhs`. main_token is the `+`.
+ AST_NODE_TAG_ADD,
+ /// `lhs - rhs`. main_token is the `-`.
+ AST_NODE_TAG_SUB,
+ /// `lhs ++ rhs`. main_token is the `++`.
+ AST_NODE_TAG_ARRAY_CAT,
+ /// `lhs +% rhs`. main_token is the `+%`.
+ AST_NODE_TAG_ADD_WRAP,
+ /// `lhs -% rhs`. main_token is the `-%`.
+ AST_NODE_TAG_SUB_WRAP,
+ /// `lhs +| rhs`. main_token is the `+|`.
+ AST_NODE_TAG_ADD_SAT,
+ /// `lhs -| rhs`. main_token is the `-|`.
+ AST_NODE_TAG_SUB_SAT,
+ /// `lhs << rhs`. main_token is the `<<`.
+ AST_NODE_TAG_SHL,
+ /// `lhs <<| rhs`. main_token is the `<<|`.
+ AST_NODE_TAG_SHL_SAT,
+ /// `lhs >> rhs`. main_token is the `>>`.
+ AST_NODE_TAG_SHR,
+ /// `lhs & rhs`. main_token is the `&`.
+ AST_NODE_TAG_BIT_AND,
+ /// `lhs ^ rhs`. main_token is the `^`.
+ AST_NODE_TAG_BIT_XOR,
+ /// `lhs | rhs`. main_token is the `|`.
+ AST_NODE_TAG_BIT_OR,
+ /// `lhs orelse rhs`. main_token is the `orelse`.
+ AST_NODE_TAG_AST_NODE_TAG_ORELSE,
+ /// `lhs and rhs`. main_token is the `and`.
+ AST_NODE_TAG_BOOL_AND,
+ /// `lhs or rhs`. main_token is the `or`.
+ AST_NODE_TAG_BOOL_OR,
+ /// `op lhs`. rhs unused. main_token is op.
+ AST_NODE_TAG_BOOL_NOT,
+ /// `op lhs`. rhs unused. main_token is op.
+ AST_NODE_TAG_NEGATION,
+ /// `op lhs`. rhs unused. main_token is op.
+ AST_NODE_TAG_BIT_NOT,
+ /// `op lhs`. rhs unused. main_token is op.
+ AST_NODE_TAG_NEGATION_WRAP,
+ /// `op lhs`. rhs unused. main_token is op.
+ AST_NODE_TAG_ADDRESS_OF,
+ /// `op lhs`. rhs unused. main_token is op.
+ AST_NODE_TAG_AST_NODE_TAG_TRY,
+ /// `op lhs`. rhs unused. main_token is op.
+ AST_NODE_TAG_AST_NODE_TAG_AWAIT,
+ /// `?lhs`. rhs unused. main_token is the `?`.
+ AST_NODE_TAG_OPTIONAL_TYPE,
+ /// `[lhs]rhs`.
+ AST_NODE_TAG_ARRAY_TYPE,
+ /// `[lhs:a]b`. `ArrayTypeSentinel[rhs]`.
+ AST_NODE_TAG_ARRAY_TYPE_SENTINEL,
+ /// `[*]align(lhs) rhs`. lhs can be omitted.
+ /// `*align(lhs) rhs`. lhs can be omitted.
+ /// `[]rhs`.
+ /// main_token is the asterisk if a single item pointer or the lbracket
+ /// if a slice, many-item pointer, or C-pointer
+ /// main_token might be a ** token, which is shared with a parent/child
+ /// pointer type and may require special handling.
+ AST_NODE_TAG_PTR_TYPE_ALIGNED,
+ /// `[*:lhs]rhs`. lhs can be omitted.
+ /// `*rhs`.
+ /// `[:lhs]rhs`.
+ /// main_token is the asterisk if a single item pointer or the lbracket
+ /// if a slice, many-item pointer, or C-pointer
+ /// main_token might be a ** token, which is shared with a parent/child
+ /// pointer type and may require special handling.
+ AST_NODE_TAG_PTR_TYPE_SENTINEL,
+ /// lhs is index into ptr_type. rhs is the element type expression.
+ /// main_token is the asterisk if a single item pointer or the lbracket
+ /// if a slice, many-item pointer, or C-pointer
+ /// main_token might be a ** token, which is shared with a parent/child
+ /// pointer type and may require special handling.
+ AST_NODE_TAG_PTR_TYPE,
+ /// lhs is index into ptr_type_bit_range. rhs is the element type expression.
+ /// main_token is the asterisk if a single item pointer or the lbracket
+ /// if a slice, many-item pointer, or C-pointer
+ /// main_token might be a ** token, which is shared with a parent/child
+ /// pointer type and may require special handling.
+ AST_NODE_TAG_PTR_TYPE_BIT_RANGE,
+ /// `lhs[rhs..]`
+ /// main_token is the lbracket.
+ AST_NODE_TAG_SLICE_OPEN,
+ /// `lhs[b..c]`. rhs is index into Slice
+ /// main_token is the lbracket.
+ AST_NODE_TAG_SLICE,
+ /// `lhs[b..c :d]`. rhs is index into SliceSentinel. Slice end c can be omitted.
+ /// main_token is the lbracket.
+ AST_NODE_TAG_SLICE_SENTINEL,
+ /// `lhs.*`. rhs is unused.
+ AST_NODE_TAG_DEREF,
+ /// `lhs[rhs]`.
+ AST_NODE_TAG_ARRAY_ACCESS,
+ /// `lhs{rhs}`. rhs can be omitted.
+ AST_NODE_TAG_ARRAY_INIT_ONE,
+ /// `lhs{rhs,}`. rhs can *not* be omitted
+ AST_NODE_TAG_ARRAY_INIT_ONE_COMMA,
+ /// `.{lhs, rhs}`. lhs and rhs can be omitted.
+ AST_NODE_TAG_ARRAY_INIT_DOT_TWO,
+ /// Same as `array_init_dot_two` except there is known to be a trailing comma
+ /// before the final rbrace.
+ AST_NODE_TAG_ARRAY_INIT_DOT_TWO_COMMA,
+ /// `.{a, b}`. `sub_list[lhs..rhs]`.
+ AST_NODE_TAG_ARRAY_INIT_DOT,
+ /// Same as `array_init_dot` except there is known to be a trailing comma
+ /// before the final rbrace.
+ AST_NODE_TAG_ARRAY_INIT_DOT_COMMA,
+ /// `lhs{a, b}`. `sub_range_list[rhs]`. lhs can be omitted which means `.{a, b}`.
+ AST_NODE_TAG_ARRAY_INIT,
+ /// Same as `array_init` except there is known to be a trailing comma
+ /// before the final rbrace.
+ AST_NODE_TAG_ARRAY_INIT_COMMA,
+ /// `lhs{.a = rhs}`. rhs can be omitted making it empty.
+ /// main_token is the lbrace.
+ AST_NODE_TAG_STRUCT_INIT_ONE,
+ /// `lhs{.a = rhs,}`. rhs can *not* be omitted.
+ /// main_token is the lbrace.
+ AST_NODE_TAG_STRUCT_INIT_ONE_COMMA,
+ /// `.{.a = lhs, .b = rhs}`. lhs and rhs can be omitted.
+ /// main_token is the lbrace.
+ /// No trailing comma before the rbrace.
+ AST_NODE_TAG_STRUCT_INIT_DOT_TWO,
+ /// Same as `struct_init_dot_two` except there is known to be a trailing comma
+ /// before the final rbrace.
+ AST_NODE_TAG_STRUCT_INIT_DOT_TWO_COMMA,
+ /// `.{.a = b, .c = d}`. `sub_list[lhs..rhs]`.
+ /// main_token is the lbrace.
+ AST_NODE_TAG_STRUCT_INIT_DOT,
+ /// Same as `struct_init_dot` except there is known to be a trailing comma
+ /// before the final rbrace.
+ AST_NODE_TAG_STRUCT_INIT_DOT_COMMA,
+ /// `lhs{.a = b, .c = d}`. `sub_range_list[rhs]`.
+ /// lhs can be omitted which means `.{.a = b, .c = d}`.
+ /// main_token is the lbrace.
+ AST_NODE_TAG_STRUCT_INIT,
+ /// Same as `struct_init` except there is known to be a trailing comma
+ /// before the final rbrace.
+ AST_NODE_TAG_STRUCT_INIT_COMMA,
+ /// `lhs(rhs)`. rhs can be omitted.
+ /// main_token is the lparen.
+ AST_NODE_TAG_CALL_ONE,
+ /// `lhs(rhs,)`. rhs can be omitted.
+ /// main_token is the lparen.
+ AST_NODE_TAG_CALL_ONE_COMMA,
+ /// `async lhs(rhs)`. rhs can be omitted.
+ AST_NODE_TAG_ASYNC_CALL_ONE,
+ /// `async lhs(rhs,)`.
+ AST_NODE_TAG_ASYNC_CALL_ONE_COMMA,
+ /// `lhs(a, b, c)`. `SubRange[rhs]`.
+ /// main_token is the `(`.
+ AST_NODE_TAG_CALL,
+ /// `lhs(a, b, c,)`. `SubRange[rhs]`.
+ /// main_token is the `(`.
+ AST_NODE_TAG_CALL_COMMA,
+ /// `async lhs(a, b, c)`. `SubRange[rhs]`.
+ /// main_token is the `(`.
+ AST_NODE_TAG_ASYNC_CALL,
+ /// `async lhs(a, b, c,)`. `SubRange[rhs]`.
+ /// main_token is the `(`.
+ AST_NODE_TAG_ASYNC_CALL_COMMA,
+ /// `switch(lhs) {}`. `SubRange[rhs]`.
+ /// `main_token` is the identifier of a preceding label, if any; otherwise `switch`.
+ AST_NODE_TAG_AST_NODE_TAG_SWITCH,
+ /// Same as switch except there is known to be a trailing comma
+ /// before the final rbrace
+ AST_NODE_TAG_SWITCH_COMMA,
+ /// `lhs => rhs`. If lhs is omitted it means `else`.
+ /// main_token is the `=>`
+ AST_NODE_TAG_SWITCH_CASE_ONE,
+ /// Same ast `switch_case_one` but the case is inline
+ AST_NODE_TAG_SWITCH_CASE_INLINE_ONE,
+ /// `a, b, c => rhs`. `SubRange[lhs]`.
+ /// main_token is the `=>`
+ AST_NODE_TAG_SWITCH_CASE,
+ /// Same ast `switch_case` but the case is inline
+ AST_NODE_TAG_SWITCH_CASE_INLINE,
+ /// `lhs...rhs`.
+ AST_NODE_TAG_SWITCH_RANGE,
+ /// `while (lhs) rhs`.
+ /// `while (lhs) |x| rhs`.
+ AST_NODE_TAG_WHILE_SIMPLE,
+ /// `while (lhs) : (a) b`. `WhileCont[rhs]`.
+ /// `while (lhs) : (a) b`. `WhileCont[rhs]`.
+ AST_NODE_TAG_WHILE_CONT,
+ /// `while (lhs) : (a) b else c`. `While[rhs]`.
+ /// `while (lhs) |x| : (a) b else c`. `While[rhs]`.
+ /// `while (lhs) |x| : (a) b else |y| c`. `While[rhs]`.
+ /// The cont expression part `: (a)` may be omitted.
+ AST_NODE_TAG_AST_NODE_TAG_WHILE,
+ /// `for (lhs) rhs`.
+ AST_NODE_TAG_FOR_SIMPLE,
+ /// `for (lhs[0..inputs]) lhs[inputs + 1] else lhs[inputs + 2]`. `For[rhs]`.
+ AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_FOR,
+ /// `lhs..rhs`. rhs can be omitted.
+ AST_NODE_TAG_AST_NODE_TAG_FOR_RANGE,
+ /// `if (lhs) rhs`.
+ /// `if (lhs) |a| rhs`.
+ AST_NODE_TAG_IF_SIMPLE,
+ /// `if (lhs) a else b`. `If[rhs]`.
+ /// `if (lhs) |x| a else b`. `If[rhs]`.
+ /// `if (lhs) |x| a else |y| b`. `If[rhs]`.
+ AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_IF,
+ /// `suspend lhs`. lhs can be omitted. rhs is unused.
+ AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_SUSPEND,
+ /// `resume lhs`. rhs is unused.
+ AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_RESUME,
+ /// `continue :lhs rhs`
+ /// both lhs and rhs may be omitted.
+ AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_CONTINUE,
+ /// `break :lhs rhs`
+ /// both lhs and rhs may be omitted.
+ AST_NODE_TAG_AST_NODE_TAG_AST_NODE_TAG_BREAK,
+ /// `return lhs`. lhs can be omitted. rhs is unused.
+ AST_NODE_TAG_AST_NODE_TAG_RETURN,
+ /// `fn (a: lhs) rhs`. lhs can be omitted.
+ /// anytype and ... parameters are omitted from the AST tree.
+ /// main_token is the `fn` keyword.
+ /// extern function declarations use this tag.
+ AST_NODE_TAG_FN_PROTO_SIMPLE,
+ /// `fn (a: b, c: d) rhs`. `sub_range_list[lhs]`.
+ /// anytype and ... parameters are omitted from the AST tree.
+ /// main_token is the `fn` keyword.
+ /// extern function declarations use this tag.
+ AST_NODE_TAG_FN_PROTO_MULTI,
+ /// `fn (a: b) addrspace(e) linksection(f) callconv(g) rhs`. `FnProtoOne[lhs]`.
+ /// zero or one parameters.
+ /// anytype and ... parameters are omitted from the AST tree.
+ /// main_token is the `fn` keyword.
+ /// extern function declarations use this tag.
+ AST_NODE_TAG_FN_PROTO_ONE,
+ /// `fn (a: b, c: d) addrspace(e) linksection(f) callconv(g) rhs`. `FnProto[lhs]`.
+ /// anytype and ... parameters are omitted from the AST tree.
+ /// main_token is the `fn` keyword.
+ /// extern function declarations use this tag.
+ AST_NODE_TAG_FN_PROTO,
+ /// lhs is the fn_proto.
+ /// rhs is the function body block.
+ /// Note that extern function declarations use the fn_proto tags rather
+ /// than this one.
+ AST_NODE_TAG_FN_DECL,
+ /// `anyframe->rhs`. main_token is `anyframe`. `lhs` is arrow token index.
+ AST_NODE_TAG_ANYFRAME_TYPE,
+ /// Both lhs and rhs unused.
+ AST_NODE_TAG_ANYFRAME_LITERAL,
+ /// Both lhs and rhs unused.
+ AST_NODE_TAG_CHAR_LITERAL,
+ /// Both lhs and rhs unused.
+ AST_NODE_TAG_NUMBER_LITERAL,
+ /// Both lhs and rhs unused.
+ AST_NODE_TAG_UNREACHABLE_LITERAL,
+ /// Both lhs and rhs unused.
+ /// Most identifiers will not have explicit AST nodes, however for expressions
+ /// which could be one of many different kinds of AST nodes, there will be an
+ /// identifier AST node for it.
+ AST_NODE_TAG_IDENTIFIER,
+ /// lhs is the dot token index, rhs unused, main_token is the identifier.
+ AST_NODE_TAG_ENUM_LITERAL,
+ /// main_token is the string literal token
+ /// Both lhs and rhs unused.
+ AST_NODE_TAG_STRING_LITERAL,
+ /// main_token is the first token index (redundant with lhs)
+ /// lhs is the first token index; rhs is the last token index.
+ /// Could be a series of multiline_string_literal_line tokens, or a single
+ /// string_literal token.
+ AST_NODE_TAG_MULTILINE_STRING_LITERAL,
+ /// `(lhs)`. main_token is the `(`; rhs is the token index of the `)`.
+ AST_NODE_TAG_GROUPED_EXPRESSION,
+ /// `@a(lhs, rhs)`. lhs and rhs may be omitted.
+ /// main_token is the builtin token.
+ AST_NODE_TAG_BUILTIN_CALL_TWO,
+ /// Same as builtin_call_two but there is known to be a trailing comma before the rparen.
+ AST_NODE_TAG_BUILTIN_CALL_TWO_COMMA,
+ /// `@a(b, c)`. `sub_list[lhs..rhs]`.
+ /// main_token is the builtin token.
+ AST_NODE_TAG_BUILTIN_CALL,
+ /// Same as builtin_call but there is known to be a trailing comma before the rparen.
+ AST_NODE_TAG_BUILTIN_CALL_COMMA,
+ /// `error{a, b}`.
+ /// rhs is the rbrace, lhs is unused.
+ AST_NODE_TAG_ERROR_SET_DECL,
+ /// `struct {}`, `union {}`, `opaque {}`, `enum {}`. `extra_data[lhs..rhs]`.
+ /// main_token is `struct`, `union`, `opaque`, `enum` keyword.
+ AST_NODE_TAG_CONTAINER_DECL,
+ /// Same as ContainerDecl but there is known to be a trailing comma
+ /// or semicolon before the rbrace.
+ AST_NODE_TAG_CONTAINER_DECL_TRAILING,
+ /// `struct {lhs, rhs}`, `union {lhs, rhs}`, `opaque {lhs, rhs}`, `enum {lhs, rhs}`.
+ /// lhs or rhs can be omitted.
+ /// main_token is `struct`, `union`, `opaque`, `enum` keyword.
+ AST_NODE_TAG_CONTAINER_DECL_TWO,
+ /// Same as ContainerDeclTwo except there is known to be a trailing comma
+ /// or semicolon before the rbrace.
+ AST_NODE_TAG_CONTAINER_DECL_TWO_TRAILING,
+ /// `struct(lhs)` / `union(lhs)` / `enum(lhs)`. `SubRange[rhs]`.
+ AST_NODE_TAG_CONTAINER_DECL_ARG,
+ /// Same as container_decl_arg but there is known to be a trailing
+ /// comma or semicolon before the rbrace.
+ AST_NODE_TAG_CONTAINER_DECL_ARG_TRAILING,
+ /// `union(enum) {}`. `sub_list[lhs..rhs]`.
+ /// Note that tagged unions with explicitly provided enums are represented
+ /// by `container_decl_arg`.
+ AST_NODE_TAG_TAGGED_UNION,
+ /// Same as tagged_union but there is known to be a trailing comma
+ /// or semicolon before the rbrace.
+ AST_NODE_TAG_TAGGED_UNION_TRAILING,
+ /// `union(enum) {lhs, rhs}`. lhs or rhs may be omitted.
+ /// Note that tagged unions with explicitly provided enums are represented
+ /// by `container_decl_arg`.
+ AST_NODE_TAG_TAGGED_UNION_TWO,
+ /// Same as tagged_union_two but there is known to be a trailing comma
+ /// or semicolon before the rbrace.
+ AST_NODE_TAG_TAGGED_UNION_TWO_TRAILING,
+ /// `union(enum(lhs)) {}`. `SubRange[rhs]`.
+ AST_NODE_TAG_TAGGED_UNION_ENUM_TAG,
+ /// Same as tagged_union_enum_tag but there is known to be a trailing comma
+ /// or semicolon before the rbrace.
+ AST_NODE_TAG_TAGGED_UNION_ENUM_TAG_TRAILING,
+ /// `a: lhs = rhs,`. lhs and rhs can be omitted.
+ /// main_token is the field name identifier.
+ /// lastToken() does not include the possible trailing comma.
+ AST_NODE_TAG_CONTAINER_FIELD_INIT,
+ /// `a: lhs align(rhs),`. rhs can be omitted.
+ /// main_token is the field name identifier.
+ /// lastToken() does not include the possible trailing comma.
+ AST_NODE_TAG_CONTAINER_FIELD_ALIGN,
+ /// `a: lhs align(c) = d,`. `container_field_list[rhs]`.
+ /// main_token is the field name identifier.
+ /// lastToken() does not include the possible trailing comma.
+ AST_NODE_TAG_CONTAINER_FIELD,
+ /// `comptime lhs`. rhs unused.
+ AST_NODE_TAG_COMPTIME,
+ /// `nosuspend lhs`. rhs unused.
+ AST_NODE_TAG_NOSUSPEND,
+ /// `{lhs rhs}`. rhs or lhs can be omitted.
+ /// main_token points at the lbrace.
+ AST_NODE_TAG_BLOCK_TWO,
+ /// Same as block_two but there is known to be a semicolon before the rbrace.
+ AST_NODE_TAG_BLOCK_TWO_SEMICOLON,
+ /// `{}`. `sub_list[lhs..rhs]`.
+ /// main_token points at the lbrace.
+ AST_NODE_TAG_BLOCK,
+ /// Same as block but there is known to be a semicolon before the rbrace.
+ AST_NODE_TAG_BLOCK_SEMICOLON,
+ /// `asm(lhs)`. rhs is the token index of the rparen.
+ AST_NODE_TAG_ASM_SIMPLE,
+ /// `asm(lhs, a)`. `Asm[rhs]`.
+ AST_NODE_TAG_ASM,
+ /// `[a] "b" (c)`. lhs is 0, rhs is token index of the rparen.
+ /// `[a] "b" (-> lhs)`. rhs is token index of the rparen.
+ /// main_token is `a`.
+ AST_NODE_TAG_ASM_OUTPUT,
+ /// `[a] "b" (lhs)`. rhs is token index of the rparen.
+ /// main_token is `a`.
+ AST_NODE_TAG_ASM_INPUT,
+ /// `error.a`. lhs is token index of `.`. rhs is token index of `a`.
+ AST_NODE_TAG_ERROR_VALUE,
+ /// `lhs!rhs`. main_token is the `!`.
+ AST_NODE_TAG_ERROR_UNION,
+} ast_node_tag;
+
+typedef uint32_t ast_token_index;
+typedef uint32_t ast_node_index;
+typedef uint32_t ast_index;
+
+typedef struct {
+ ast_node_tag tag;
+ ast_token_index main_token;
+ struct {
+ ast_index lhs, rhs;
+ } data;
+} ast_node;
+
+typedef struct {
+ const char* source;
+ uint32_t source_len;
+
+ tokenizer_tag* tokens_tag;
+ ast_index* tokens_start;
+ uint32_t tokens_len;
+
+ ast_node* nodes;
+ uint32_t nodes_len;
+ ast_node_index* extra_data;
+} ast;
+
+int ast_parse(const char* source, uint32_t len, ast *result);
+
+#endif
diff --git a/tokenizer.c b/tokenizer.c
@@ -1,5 +1,3 @@
-// tokenizer for zig d48611ba67c7871cb348f28a01b89d8771170dd8
-
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
@@ -9,63 +7,63 @@
typedef struct {
const char* keyword;
- token_tag tag;
+ tokenizer_tag tag;
} keyword_map;
const keyword_map keywords[] = {
- { "addrspace", TOKEN_TAG_KEYWORD_ADDRSPACE },
- { "align", TOKEN_TAG_KEYWORD_ALIGN },
- { "allowzero", TOKEN_TAG_KEYWORD_ALLOWZERO },
- { "and", TOKEN_TAG_KEYWORD_AND },
- { "anyframe", TOKEN_TAG_KEYWORD_ANYFRAME },
- { "anytype", TOKEN_TAG_KEYWORD_ANYTYPE },
- { "asm", TOKEN_TAG_KEYWORD_ASM },
- { "async", TOKEN_TAG_KEYWORD_ASYNC },
- { "await", TOKEN_TAG_KEYWORD_AWAIT },
- { "break", TOKEN_TAG_KEYWORD_BREAK },
- { "callconv", TOKEN_TAG_KEYWORD_CALLCONV },
- { "catch", TOKEN_TAG_KEYWORD_CATCH },
- { "comptime", TOKEN_TAG_KEYWORD_COMPTIME },
- { "const", TOKEN_TAG_KEYWORD_CONST },
- { "continue", TOKEN_TAG_KEYWORD_CONTINUE },
- { "defer", TOKEN_TAG_KEYWORD_DEFER },
- { "else", TOKEN_TAG_KEYWORD_ELSE },
- { "enum", TOKEN_TAG_KEYWORD_ENUM },
- { "errdefer", TOKEN_TAG_KEYWORD_ERRDEFER },
- { "error", TOKEN_TAG_KEYWORD_ERROR },
- { "export", TOKEN_TAG_KEYWORD_EXPORT },
- { "extern", TOKEN_TAG_KEYWORD_EXTERN },
- { "fn", TOKEN_TAG_KEYWORD_FN },
- { "for", TOKEN_TAG_KEYWORD_FOR },
- { "if", TOKEN_TAG_KEYWORD_IF },
- { "inline", TOKEN_TAG_KEYWORD_INLINE },
- { "linksection", TOKEN_TAG_KEYWORD_LINKSECTION },
- { "noalias", TOKEN_TAG_KEYWORD_NOALIAS },
- { "noinline", TOKEN_TAG_KEYWORD_NOINLINE },
- { "nosuspend", TOKEN_TAG_KEYWORD_NOSUSPEND },
- { "opaque", TOKEN_TAG_KEYWORD_OPAQUE },
- { "or", TOKEN_TAG_KEYWORD_OR },
- { "orelse", TOKEN_TAG_KEYWORD_ORELSE },
- { "packed", TOKEN_TAG_KEYWORD_PACKED },
- { "pub", TOKEN_TAG_KEYWORD_PUB },
- { "resume", TOKEN_TAG_KEYWORD_RESUME },
- { "return", TOKEN_TAG_KEYWORD_RETURN },
- { "struct", TOKEN_TAG_KEYWORD_STRUCT },
- { "suspend", TOKEN_TAG_KEYWORD_SUSPEND },
- { "switch", TOKEN_TAG_KEYWORD_SWITCH },
- { "test", TOKEN_TAG_KEYWORD_TEST },
- { "threadlocal", TOKEN_TAG_KEYWORD_THREADLOCAL },
- { "try", TOKEN_TAG_KEYWORD_TRY },
- { "union", TOKEN_TAG_KEYWORD_UNION },
- { "unreachable", TOKEN_TAG_KEYWORD_UNREACHABLE },
- { "usingnamespace", TOKEN_TAG_KEYWORD_USINGNAMESPACE },
- { "var", TOKEN_TAG_KEYWORD_VAR },
- { "volatile", TOKEN_TAG_KEYWORD_VOLATILE },
- { "while", TOKEN_TAG_KEYWORD_WHILE }
+ { "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE },
+ { "align", TOKENIZER_TAG_KEYWORD_ALIGN },
+ { "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO },
+ { "and", TOKENIZER_TAG_KEYWORD_AND },
+ { "anyframe", TOKENIZER_TAG_KEYWORD_ANYFRAME },
+ { "anytype", TOKENIZER_TAG_KEYWORD_ANYTYPE },
+ { "asm", TOKENIZER_TAG_KEYWORD_ASM },
+ { "async", TOKENIZER_TAG_KEYWORD_ASYNC },
+ { "await", TOKENIZER_TAG_KEYWORD_AWAIT },
+ { "break", TOKENIZER_TAG_KEYWORD_BREAK },
+ { "callconv", TOKENIZER_TAG_KEYWORD_CALLCONV },
+ { "catch", TOKENIZER_TAG_KEYWORD_CATCH },
+ { "comptime", TOKENIZER_TAG_KEYWORD_COMPTIME },
+ { "const", TOKENIZER_TAG_KEYWORD_CONST },
+ { "continue", TOKENIZER_TAG_KEYWORD_CONTINUE },
+ { "defer", TOKENIZER_TAG_KEYWORD_DEFER },
+ { "else", TOKENIZER_TAG_KEYWORD_ELSE },
+ { "enum", TOKENIZER_TAG_KEYWORD_ENUM },
+ { "errdefer", TOKENIZER_TAG_KEYWORD_ERRDEFER },
+ { "error", TOKENIZER_TAG_KEYWORD_ERROR },
+ { "export", TOKENIZER_TAG_KEYWORD_EXPORT },
+ { "extern", TOKENIZER_TAG_KEYWORD_EXTERN },
+ { "fn", TOKENIZER_TAG_KEYWORD_FN },
+ { "for", TOKENIZER_TAG_KEYWORD_FOR },
+ { "if", TOKENIZER_TAG_KEYWORD_IF },
+ { "inline", TOKENIZER_TAG_KEYWORD_INLINE },
+ { "linksection", TOKENIZER_TAG_KEYWORD_LINKSECTION },
+ { "noalias", TOKENIZER_TAG_KEYWORD_NOALIAS },
+ { "noinline", TOKENIZER_TAG_KEYWORD_NOINLINE },
+ { "nosuspend", TOKENIZER_TAG_KEYWORD_NOSUSPEND },
+ { "opaque", TOKENIZER_TAG_KEYWORD_OPAQUE },
+ { "or", TOKENIZER_TAG_KEYWORD_OR },
+ { "orelse", TOKENIZER_TAG_KEYWORD_ORELSE },
+ { "packed", TOKENIZER_TAG_KEYWORD_PACKED },
+ { "pub", TOKENIZER_TAG_KEYWORD_PUB },
+ { "resume", TOKENIZER_TAG_KEYWORD_RESUME },
+ { "return", TOKENIZER_TAG_KEYWORD_RETURN },
+ { "struct", TOKENIZER_TAG_KEYWORD_STRUCT },
+ { "suspend", TOKENIZER_TAG_KEYWORD_SUSPEND },
+ { "switch", TOKENIZER_TAG_KEYWORD_SWITCH },
+ { "test", TOKENIZER_TAG_KEYWORD_TEST },
+ { "threadlocal", TOKENIZER_TAG_KEYWORD_THREADLOCAL },
+ { "try", TOKENIZER_TAG_KEYWORD_TRY },
+ { "union", TOKENIZER_TAG_KEYWORD_UNION },
+ { "unreachable", TOKENIZER_TAG_KEYWORD_UNREACHABLE },
+ { "usingnamespace", TOKENIZER_TAG_KEYWORD_USINGNAMESPACE },
+ { "var", TOKENIZER_TAG_KEYWORD_VAR },
+ { "volatile", TOKENIZER_TAG_KEYWORD_VOLATILE },
+ { "while", TOKENIZER_TAG_KEYWORD_WHILE }
};
// TODO binary search
-static token_tag get_keyword(const char* bytes, uint32_t len)
+static tokenizer_tag get_keyword(const char* bytes, uint32_t len)
{
for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) {
size_t klen = strlen(keywords[i].keyword);
@@ -75,14 +73,14 @@ static token_tag get_keyword(const char* bytes, uint32_t len)
if (len == klen) {
return keywords[i].tag;
} else {
- return TOKEN_TAG_INVALID;
+ return TOKENIZER_TAG_INVALID;
}
} else if (cmp < 0) {
- return TOKEN_TAG_INVALID;
+ return TOKENIZER_TAG_INVALID;
}
continue;
}
- return TOKEN_TAG_INVALID;
+ return TOKENIZER_TAG_INVALID;
}
tokenizer tokenizer_init(const char* buffer, uint32_t len)
@@ -94,32 +92,32 @@ tokenizer tokenizer_init(const char* buffer, uint32_t len)
};
}
-token tokenizer_next(tokenizer* self)
+tokenizer_token tokenizer_next(tokenizer* self)
{
- token result = (token) {
- .tag = TOKEN_TAG_INVALID,
+ tokenizer_token result = (tokenizer_token) {
+ .tag = TOKENIZER_TAG_INVALID,
.loc = {
.start = 0,
},
};
- token_state state = TOKEN_STATE_START;
+ tokenizer_state state = TOKENIZER_STATE_START;
state:
switch (state) {
- case TOKEN_STATE_START:
+ case TOKENIZER_STATE_START:
switch (self->buffer[self->index]) {
case 0:
if (self->index == self->buffer_len) {
- return (token) {
- .tag = TOKEN_TAG_EOF,
+ return (tokenizer_token) {
+ .tag = TOKENIZER_TAG_EOF,
.loc = {
.start = self->index,
.end = self->index,
}
};
} else {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
case ' ':
@@ -130,388 +128,388 @@ state:
result.loc.start = self->index;
goto state;
case '"':
- result.tag = TOKEN_TAG_STRING_LITERAL;
- state = TOKEN_STATE_STRING_LITERAL;
+ result.tag = TOKENIZER_TAG_STRING_LITERAL;
+ state = TOKENIZER_STATE_STRING_LITERAL;
goto state;
case '\'':
- result.tag = TOKEN_TAG_CHAR_LITERAL;
- state = TOKEN_STATE_CHAR_LITERAL;
+ result.tag = TOKENIZER_TAG_CHAR_LITERAL;
+ state = TOKENIZER_STATE_CHAR_LITERAL;
goto state;
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
- result.tag = TOKEN_TAG_IDENTIFIER;
- state = TOKEN_STATE_IDENTIFIER;
+ result.tag = TOKENIZER_TAG_IDENTIFIER;
+ state = TOKENIZER_STATE_IDENTIFIER;
goto state;
case '@':
- state = TOKEN_STATE_SAW_AT_SIGN;
+ state = TOKENIZER_STATE_SAW_AT_SIGN;
goto state;
case '=':
- state = TOKEN_STATE_EQUAL;
+ state = TOKENIZER_STATE_EQUAL;
goto state;
case '!':
- state = TOKEN_STATE_BANG;
+ state = TOKENIZER_STATE_BANG;
goto state;
case '|':
- state = TOKEN_STATE_PIPE;
+ state = TOKENIZER_STATE_PIPE;
goto state;
case '(':
- result.tag = TOKEN_TAG_L_PAREN;
+ result.tag = TOKENIZER_TAG_L_PAREN;
self->index++;
break;
case ')':
- result.tag = TOKEN_TAG_R_PAREN;
+ result.tag = TOKENIZER_TAG_R_PAREN;
self->index++;
break;
case '[':
- result.tag = TOKEN_TAG_L_BRACKET;
+ result.tag = TOKENIZER_TAG_L_BRACKET;
self->index++;
break;
case ']':
- result.tag = TOKEN_TAG_R_BRACKET;
+ result.tag = TOKENIZER_TAG_R_BRACKET;
self->index++;
break;
case ';':
- result.tag = TOKEN_TAG_SEMICOLON;
+ result.tag = TOKENIZER_TAG_SEMICOLON;
self->index++;
break;
case ',':
- result.tag = TOKEN_TAG_COMMA;
+ result.tag = TOKENIZER_TAG_COMMA;
self->index++;
break;
case '?':
- result.tag = TOKEN_TAG_QUESTION_MARK;
+ result.tag = TOKENIZER_TAG_QUESTION_MARK;
self->index++;
break;
case ':':
- result.tag = TOKEN_TAG_COLON;
+ result.tag = TOKENIZER_TAG_COLON;
self->index++;
break;
case '%':
- state = TOKEN_STATE_PERCENT;
+ state = TOKENIZER_STATE_PERCENT;
goto state;
case '*':
- state = TOKEN_STATE_ASTERISK;
+ state = TOKENIZER_STATE_ASTERISK;
goto state;
case '+':
- state = TOKEN_STATE_PLUS;
+ state = TOKENIZER_STATE_PLUS;
goto state;
case '<':
- state = TOKEN_STATE_ANGLE_BRACKET_LEFT;
+ state = TOKENIZER_STATE_ANGLE_BRACKET_LEFT;
goto state;
case '>':
- state = TOKEN_STATE_ANGLE_BRACKET_RIGHT;
+ state = TOKENIZER_STATE_ANGLE_BRACKET_RIGHT;
goto state;
case '^':
- state = TOKEN_STATE_CARET;
+ state = TOKENIZER_STATE_CARET;
goto state;
case '\\':
- result.tag = TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE;
- state = TOKEN_STATE_BACKSLASH;
+ result.tag = TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE;
+ state = TOKENIZER_STATE_BACKSLASH;
goto state;
case '{':
- result.tag = TOKEN_TAG_L_BRACE;
+ result.tag = TOKENIZER_TAG_L_BRACE;
self->index++;
break;
case '}':
- result.tag = TOKEN_TAG_R_BRACE;
+ result.tag = TOKENIZER_TAG_R_BRACE;
self->index++;
break;
case '~':
- result.tag = TOKEN_TAG_TILDE;
+ result.tag = TOKENIZER_TAG_TILDE;
self->index++;
break;
case '.':
- state = TOKEN_STATE_PERIOD;
+ state = TOKENIZER_STATE_PERIOD;
goto state;
case '-':
- state = TOKEN_STATE_MINUS;
+ state = TOKENIZER_STATE_MINUS;
goto state;
case '/':
- state = TOKEN_STATE_SLASH;
+ state = TOKENIZER_STATE_SLASH;
goto state;
case '&':
- state = TOKEN_STATE_AMPERSAND;
+ state = TOKENIZER_STATE_AMPERSAND;
goto state;
case '0' ... '9':
- result.tag = TOKEN_TAG_NUMBER_LITERAL;
+ result.tag = TOKENIZER_TAG_NUMBER_LITERAL;
self->index++;
- state = TOKEN_STATE_INT;
+ state = TOKENIZER_STATE_INT;
goto state;
default:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
};
break;
- case TOKEN_STATE_EXPECT_NEWLINE:
+ case TOKENIZER_STATE_EXPECT_NEWLINE:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index == self->buffer_len) {
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
} else {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
case '\n':
self->index++;
result.loc.start = self->index;
- state = TOKEN_STATE_START;
+ state = TOKENIZER_STATE_START;
goto state;
default:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
- case TOKEN_STATE_INVALID:
+ case TOKENIZER_STATE_INVALID:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index == self->buffer_len) {
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
} else {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
case '\n':
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
default:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
- case TOKEN_STATE_SAW_AT_SIGN:
+ case TOKENIZER_STATE_SAW_AT_SIGN:
self->index++;
switch (self->buffer[self->index]) {
case 0:
case '\n':
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
case '"':
- result.tag = TOKEN_TAG_IDENTIFIER;
- state = TOKEN_STATE_STRING_LITERAL;
+ result.tag = TOKENIZER_TAG_IDENTIFIER;
+ state = TOKENIZER_STATE_STRING_LITERAL;
goto state;
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
- result.tag = TOKEN_TAG_BUILTIN;
- state = TOKEN_STATE_BUILTIN;
+ result.tag = TOKENIZER_TAG_BUILTIN;
+ state = TOKENIZER_STATE_BUILTIN;
goto state;
default:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
- case TOKEN_STATE_AMPERSAND:
+ case TOKENIZER_STATE_AMPERSAND:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_AMPERSAND_EQUAL;
+ result.tag = TOKENIZER_TAG_AMPERSAND_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_AMPERSAND;
+ result.tag = TOKENIZER_TAG_AMPERSAND;
break;
}
break;
- case TOKEN_STATE_ASTERISK:
+ case TOKENIZER_STATE_ASTERISK:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_ASTERISK_EQUAL;
+ result.tag = TOKENIZER_TAG_ASTERISK_EQUAL;
self->index++;
break;
case '*':
- result.tag = TOKEN_TAG_ASTERISK_ASTERISK;
+ result.tag = TOKENIZER_TAG_ASTERISK_ASTERISK;
self->index++;
break;
case '%':
- state = TOKEN_STATE_ASTERISK_PERCENT;
+ state = TOKENIZER_STATE_ASTERISK_PERCENT;
goto state;
case '|':
- state = TOKEN_STATE_ASTERISK_PIPE;
+ state = TOKENIZER_STATE_ASTERISK_PIPE;
goto state;
default:
- result.tag = TOKEN_TAG_ASTERISK;
+ result.tag = TOKENIZER_TAG_ASTERISK;
break;
}
break;
- case TOKEN_STATE_ASTERISK_PERCENT:
+ case TOKENIZER_STATE_ASTERISK_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_ASTERISK_PERCENT_EQUAL;
+ result.tag = TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_ASTERISK_PERCENT;
+ result.tag = TOKENIZER_TAG_ASTERISK_PERCENT;
break;
}
break;
- case TOKEN_STATE_ASTERISK_PIPE:
+ case TOKENIZER_STATE_ASTERISK_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_ASTERISK_PIPE_EQUAL;
+ result.tag = TOKENIZER_TAG_ASTERISK_PIPE_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_ASTERISK_PIPE;
+ result.tag = TOKENIZER_TAG_ASTERISK_PIPE;
break;
}
break;
- case TOKEN_STATE_PERCENT:
+ case TOKENIZER_STATE_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_PERCENT_EQUAL;
+ result.tag = TOKENIZER_TAG_PERCENT_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_PERCENT;
+ result.tag = TOKENIZER_TAG_PERCENT;
break;
}
break;
- case TOKEN_STATE_PLUS:
+ case TOKENIZER_STATE_PLUS:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_PLUS_EQUAL;
+ result.tag = TOKENIZER_TAG_PLUS_EQUAL;
self->index++;
break;
case '+':
- result.tag = TOKEN_TAG_PLUS_PLUS;
+ result.tag = TOKENIZER_TAG_PLUS_PLUS;
self->index++;
break;
case '%':
- state = TOKEN_STATE_PLUS_PERCENT;
+ state = TOKENIZER_STATE_PLUS_PERCENT;
goto state;
case '|':
- state = TOKEN_STATE_PLUS_PIPE;
+ state = TOKENIZER_STATE_PLUS_PIPE;
goto state;
default:
- result.tag = TOKEN_TAG_PLUS;
+ result.tag = TOKENIZER_TAG_PLUS;
break;
}
break;
- case TOKEN_STATE_PLUS_PERCENT:
+ case TOKENIZER_STATE_PLUS_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_PLUS_PERCENT_EQUAL;
+ result.tag = TOKENIZER_TAG_PLUS_PERCENT_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_PLUS_PERCENT;
+ result.tag = TOKENIZER_TAG_PLUS_PERCENT;
break;
}
break;
- case TOKEN_STATE_PLUS_PIPE:
+ case TOKENIZER_STATE_PLUS_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_PLUS_PIPE_EQUAL;
+ result.tag = TOKENIZER_TAG_PLUS_PIPE_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_PLUS_PIPE;
+ result.tag = TOKENIZER_TAG_PLUS_PIPE;
break;
}
break;
- case TOKEN_STATE_CARET:
+ case TOKENIZER_STATE_CARET:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_CARET_EQUAL;
+ result.tag = TOKENIZER_TAG_CARET_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_CARET;
+ result.tag = TOKENIZER_TAG_CARET;
break;
}
break;
- case TOKEN_STATE_IDENTIFIER:
+ case TOKENIZER_STATE_IDENTIFIER:
self->index++;
switch (self->buffer[self->index]) {
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
case '0' ... '9':
- state = TOKEN_STATE_IDENTIFIER;
+ state = TOKENIZER_STATE_IDENTIFIER;
goto state;
default:; // Once we're at C23, this semicolon can be removed.
const char* start = self->buffer + result.loc.start;
uint32_t len = self->index - result.loc.start;
- token_tag tag = get_keyword(start, len);
- if (tag != TOKEN_TAG_INVALID) {
+ tokenizer_tag tag = get_keyword(start, len);
+ if (tag != TOKENIZER_TAG_INVALID) {
result.tag = tag;
}
}
break;
- case TOKEN_STATE_BUILTIN:
+ case TOKENIZER_STATE_BUILTIN:
self->index++;
switch (self->buffer[self->index]) {
case 'a' ... 'z':
case 'A' ... 'Z':
case '_':
case '0' ... '9':
- state = TOKEN_STATE_BUILTIN;
+ state = TOKENIZER_STATE_BUILTIN;
goto state;
break;
}
break;
- case TOKEN_STATE_BACKSLASH:
+ case TOKENIZER_STATE_BACKSLASH:
self->index++;
switch (self->buffer[self->index]) {
case 0:
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
case '\\':
- state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE;
+ state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE;
goto state;
case '\n':
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
default:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
- case TOKEN_STATE_STRING_LITERAL:
+ case TOKENIZER_STATE_STRING_LITERAL:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
} else {
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
}
break;
case '\n':
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
case '\\':
- state = TOKEN_STATE_STRING_LITERAL_BACKSLASH;
+ state = TOKENIZER_STATE_STRING_LITERAL_BACKSLASH;
goto state;
case '"':
self->index++;
@@ -519,43 +517,43 @@ state:
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- state = TOKEN_STATE_STRING_LITERAL;
+ state = TOKENIZER_STATE_STRING_LITERAL;
goto state;
}
break;
- case TOKEN_STATE_STRING_LITERAL_BACKSLASH:
+ case TOKENIZER_STATE_STRING_LITERAL_BACKSLASH:
self->index++;
switch (self->buffer[self->index]) {
case 0:
case '\n':
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
default:
- state = TOKEN_STATE_STRING_LITERAL;
+ state = TOKENIZER_STATE_STRING_LITERAL;
goto state;
}
break;
- case TOKEN_STATE_CHAR_LITERAL:
+ case TOKENIZER_STATE_CHAR_LITERAL:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
} else {
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
}
break;
case '\n':
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
case '\\':
- state = TOKEN_STATE_CHAR_LITERAL_BACKSLASH;
+ state = TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH;
goto state;
case '\'':
self->index++;
@@ -563,45 +561,45 @@ state:
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- state = TOKEN_STATE_CHAR_LITERAL;
+ state = TOKENIZER_STATE_CHAR_LITERAL;
goto state;
}
break;
- case TOKEN_STATE_CHAR_LITERAL_BACKSLASH:
+ case TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
} else {
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
}
break;
case '\n':
- result.tag = TOKEN_TAG_INVALID;
+ result.tag = TOKENIZER_TAG_INVALID;
break;
case 0x01 ... 0x09:
case 0x0b ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- state = TOKEN_STATE_CHAR_LITERAL;
+ state = TOKENIZER_STATE_CHAR_LITERAL;
goto state;
}
break;
- case TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE:
+ case TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
@@ -609,7 +607,7 @@ state:
break;
case '\r':
if (self->buffer[self->index + 1] != '\n') {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
@@ -617,250 +615,250 @@ state:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- state = TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE;
+ state = TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE;
goto state;
}
break;
- case TOKEN_STATE_BANG:
+ case TOKENIZER_STATE_BANG:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_BANG_EQUAL;
+ result.tag = TOKENIZER_TAG_BANG_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_BANG;
+ result.tag = TOKENIZER_TAG_BANG;
break;
}
break;
- case TOKEN_STATE_PIPE:
+ case TOKENIZER_STATE_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_PIPE_EQUAL;
+ result.tag = TOKENIZER_TAG_PIPE_EQUAL;
self->index++;
break;
case '|':
- result.tag = TOKEN_TAG_PIPE_PIPE;
+ result.tag = TOKENIZER_TAG_PIPE_PIPE;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_PIPE;
+ result.tag = TOKENIZER_TAG_PIPE;
break;
}
break;
- case TOKEN_STATE_EQUAL:
+ case TOKENIZER_STATE_EQUAL:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_EQUAL_EQUAL;
+ result.tag = TOKENIZER_TAG_EQUAL_EQUAL;
self->index++;
break;
case '>':
- result.tag = TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT;
+ result.tag = TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_EQUAL;
+ result.tag = TOKENIZER_TAG_EQUAL;
break;
}
break;
- case TOKEN_STATE_MINUS:
+ case TOKENIZER_STATE_MINUS:
self->index++;
switch (self->buffer[self->index]) {
case '>':
- result.tag = TOKEN_TAG_ARROW;
+ result.tag = TOKENIZER_TAG_ARROW;
self->index++;
break;
case '=':
- result.tag = TOKEN_TAG_MINUS_EQUAL;
+ result.tag = TOKENIZER_TAG_MINUS_EQUAL;
self->index++;
break;
case '%':
- state = TOKEN_STATE_MINUS_PERCENT;
+ state = TOKENIZER_STATE_MINUS_PERCENT;
goto state;
case '|':
- state = TOKEN_STATE_MINUS_PIPE;
+ state = TOKENIZER_STATE_MINUS_PIPE;
goto state;
default:
- result.tag = TOKEN_TAG_MINUS;
+ result.tag = TOKENIZER_TAG_MINUS;
break;
}
break;
- case TOKEN_STATE_MINUS_PERCENT:
+ case TOKENIZER_STATE_MINUS_PERCENT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_MINUS_PERCENT_EQUAL;
+ result.tag = TOKENIZER_TAG_MINUS_PERCENT_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_MINUS_PERCENT;
+ result.tag = TOKENIZER_TAG_MINUS_PERCENT;
break;
}
break;
- case TOKEN_STATE_MINUS_PIPE:
+ case TOKENIZER_STATE_MINUS_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_MINUS_PIPE_EQUAL;
+ result.tag = TOKENIZER_TAG_MINUS_PIPE_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_MINUS_PIPE;
+ result.tag = TOKENIZER_TAG_MINUS_PIPE;
break;
}
break;
- case TOKEN_STATE_ANGLE_BRACKET_LEFT:
+ case TOKENIZER_STATE_ANGLE_BRACKET_LEFT:
self->index++;
switch (self->buffer[self->index]) {
case '<':
- state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT;
+ state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT;
goto state;
case '=':
- result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_ANGLE_BRACKET_LEFT;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_LEFT;
break;
}
break;
- case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
+ case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL;
self->index++;
break;
case '|':
- state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE;
+ state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE;
goto state;
default:
- result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT;
break;
}
break;
- case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
+ case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE;
break;
}
break;
- case TOKEN_STATE_ANGLE_BRACKET_RIGHT:
+ case TOKENIZER_STATE_ANGLE_BRACKET_RIGHT:
self->index++;
switch (self->buffer[self->index]) {
case '>':
- state = TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT;
+ state = TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT;
goto state;
case '=':
- result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_ANGLE_BRACKET_RIGHT;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_RIGHT;
break;
}
break;
- case TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
+ case TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT:
self->index++;
switch (self->buffer[self->index]) {
case '=':
- result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT;
+ result.tag = TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT;
break;
}
break;
- case TOKEN_STATE_PERIOD:
+ case TOKENIZER_STATE_PERIOD:
self->index++;
switch (self->buffer[self->index]) {
case '.':
- state = TOKEN_STATE_PERIOD_2;
+ state = TOKENIZER_STATE_PERIOD_2;
goto state;
case '*':
- state = TOKEN_STATE_PERIOD_ASTERISK;
+ state = TOKENIZER_STATE_PERIOD_ASTERISK;
goto state;
default:
- result.tag = TOKEN_TAG_PERIOD;
+ result.tag = TOKENIZER_TAG_PERIOD;
break;
}
break;
- case TOKEN_STATE_PERIOD_2:
+ case TOKENIZER_STATE_PERIOD_2:
self->index++;
switch (self->buffer[self->index]) {
case '.':
- result.tag = TOKEN_TAG_ELLIPSIS3;
+ result.tag = TOKENIZER_TAG_ELLIPSIS3;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_ELLIPSIS2;
+ result.tag = TOKENIZER_TAG_ELLIPSIS2;
break;
}
break;
- case TOKEN_STATE_PERIOD_ASTERISK:
+ case TOKENIZER_STATE_PERIOD_ASTERISK:
self->index++;
switch (self->buffer[self->index]) {
case '*':
- result.tag = TOKEN_TAG_INVALID_PERIODASTERISKS;
+ result.tag = TOKENIZER_TAG_INVALID_PERIODASTERISKS;
break;
default:
- result.tag = TOKEN_TAG_PERIOD_ASTERISK;
+ result.tag = TOKENIZER_TAG_PERIOD_ASTERISK;
break;
}
break;
- case TOKEN_STATE_SLASH:
+ case TOKENIZER_STATE_SLASH:
self->index++;
switch (self->buffer[self->index]) {
case '/':
- state = TOKEN_STATE_LINE_COMMENT_START;
+ state = TOKENIZER_STATE_LINE_COMMENT_START;
goto state;
case '=':
- result.tag = TOKEN_TAG_SLASH_EQUAL;
+ result.tag = TOKENIZER_TAG_SLASH_EQUAL;
self->index++;
break;
default:
- result.tag = TOKEN_TAG_SLASH;
+ result.tag = TOKENIZER_TAG_SLASH;
break;
}
break;
- case TOKEN_STATE_LINE_COMMENT_START:
+ case TOKENIZER_STATE_LINE_COMMENT_START:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
} else {
- return (token) {
- .tag = TOKEN_TAG_EOF,
+ return (tokenizer_token) {
+ .tag = TOKENIZER_TAG_EOF,
.loc = {
.start = self->index,
.end = self->index }
@@ -868,73 +866,73 @@ state:
}
break;
case '!':
- result.tag = TOKEN_TAG_CONTAINER_DOC_COMMENT;
- state = TOKEN_STATE_DOC_COMMENT;
+ result.tag = TOKENIZER_TAG_CONTAINER_DOC_COMMENT;
+ state = TOKENIZER_STATE_DOC_COMMENT;
goto state;
case '\n':
self->index++;
result.loc.start = self->index;
- state = TOKEN_STATE_START;
+ state = TOKENIZER_STATE_START;
goto state;
case '/':
- state = TOKEN_STATE_DOC_COMMENT_START;
+ state = TOKENIZER_STATE_DOC_COMMENT_START;
goto state;
case '\r':
- state = TOKEN_STATE_EXPECT_NEWLINE;
+ state = TOKENIZER_STATE_EXPECT_NEWLINE;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- state = TOKEN_STATE_LINE_COMMENT;
+ state = TOKENIZER_STATE_LINE_COMMENT;
goto state;
}
break;
- case TOKEN_STATE_DOC_COMMENT_START:
+ case TOKENIZER_STATE_DOC_COMMENT_START:
self->index++;
switch (self->buffer[self->index]) {
case 0:
case '\n':
- result.tag = TOKEN_TAG_DOC_COMMENT;
+ result.tag = TOKENIZER_TAG_DOC_COMMENT;
break;
case '\r':
if (self->buffer[self->index + 1] == '\n') {
- result.tag = TOKEN_TAG_DOC_COMMENT;
+ result.tag = TOKENIZER_TAG_DOC_COMMENT;
} else {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
case '/':
- state = TOKEN_STATE_LINE_COMMENT;
+ state = TOKENIZER_STATE_LINE_COMMENT;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- result.tag = TOKEN_TAG_DOC_COMMENT;
- state = TOKEN_STATE_DOC_COMMENT;
+ result.tag = TOKENIZER_TAG_DOC_COMMENT;
+ state = TOKENIZER_STATE_DOC_COMMENT;
goto state;
}
break;
- case TOKEN_STATE_LINE_COMMENT:
+ case TOKENIZER_STATE_LINE_COMMENT:
self->index++;
switch (self->buffer[self->index]) {
case 0:
if (self->index != self->buffer_len) {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
} else {
- return (token) {
- .tag = TOKEN_TAG_EOF,
+ return (tokenizer_token) {
+ .tag = TOKENIZER_TAG_EOF,
.loc = {
.start = self->index,
.end = self->index }
@@ -944,24 +942,24 @@ state:
case '\n':
self->index++;
result.loc.start = self->index;
- state = TOKEN_STATE_START;
+ state = TOKENIZER_STATE_START;
goto state;
case '\r':
- state = TOKEN_STATE_EXPECT_NEWLINE;
+ state = TOKENIZER_STATE_EXPECT_NEWLINE;
goto state;
case 0x01 ... 0x09:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- state = TOKEN_STATE_LINE_COMMENT;
+ state = TOKENIZER_STATE_LINE_COMMENT;
goto state;
}
break;
- case TOKEN_STATE_DOC_COMMENT:
+ case TOKENIZER_STATE_DOC_COMMENT:
self->index++;
switch (self->buffer[self->index]) {
case 0:
@@ -969,7 +967,7 @@ state:
break;
case '\r':
if (self->buffer[self->index + 1] != '\n') {
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
}
break;
@@ -977,18 +975,18 @@ state:
case 0x0b ... 0x0c:
case 0x0e ... 0x1f:
case 0x7f:
- state = TOKEN_STATE_INVALID;
+ state = TOKENIZER_STATE_INVALID;
goto state;
default:
- state = TOKEN_STATE_DOC_COMMENT;
+ state = TOKENIZER_STATE_DOC_COMMENT;
goto state;
}
break;
- case TOKEN_STATE_INT:
+ case TOKENIZER_STATE_INT:
switch (self->buffer[self->index]) {
case '.':
- state = TOKEN_STATE_INT_PERIOD;
+ state = TOKENIZER_STATE_INT_PERIOD;
goto state;
case '_':
case 'a' ... 'd':
@@ -999,34 +997,34 @@ state:
case 'Q' ... 'Z':
case '0' ... '9':
self->index++;
- state = TOKEN_STATE_INT;
+ state = TOKENIZER_STATE_INT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
- state = TOKEN_STATE_INT_EXPONENT;
+ state = TOKENIZER_STATE_INT_EXPONENT;
goto state;
default:
break;
}
break;
- case TOKEN_STATE_INT_EXPONENT:
+ case TOKENIZER_STATE_INT_EXPONENT:
self->index++;
switch (self->buffer[self->index]) {
case '-':
case '+':
self->index++;
- state = TOKEN_STATE_FLOAT;
+ state = TOKENIZER_STATE_FLOAT;
goto state;
default:
- state = TOKEN_STATE_INT;
+ state = TOKENIZER_STATE_INT;
goto state;
}
break;
- case TOKEN_STATE_INT_PERIOD:
+ case TOKENIZER_STATE_INT_PERIOD:
self->index++;
switch (self->buffer[self->index]) {
case '_':
@@ -1038,13 +1036,13 @@ state:
case 'Q' ... 'Z':
case '0' ... '9':
self->index++;
- state = TOKEN_STATE_FLOAT;
+ state = TOKENIZER_STATE_FLOAT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
- state = TOKEN_STATE_FLOAT_EXPONENT;
+ state = TOKENIZER_STATE_FLOAT_EXPONENT;
goto state;
default:
self->index--;
@@ -1052,7 +1050,7 @@ state:
}
break;
- case TOKEN_STATE_FLOAT:
+ case TOKENIZER_STATE_FLOAT:
switch (self->buffer[self->index]) {
case '_':
case 'a' ... 'd':
@@ -1063,29 +1061,29 @@ state:
case 'Q' ... 'Z':
case '0' ... '9':
self->index++;
- state = TOKEN_STATE_FLOAT;
+ state = TOKENIZER_STATE_FLOAT;
goto state;
case 'e':
case 'E':
case 'p':
case 'P':
- state = TOKEN_STATE_FLOAT_EXPONENT;
+ state = TOKENIZER_STATE_FLOAT_EXPONENT;
goto state;
default:
break;
}
break;
- case TOKEN_STATE_FLOAT_EXPONENT:
+ case TOKENIZER_STATE_FLOAT_EXPONENT:
self->index++;
switch (self->buffer[self->index]) {
case '-':
case '+':
self->index++;
- state = TOKEN_STATE_FLOAT;
+ state = TOKENIZER_STATE_FLOAT;
goto state;
default:
- state = TOKEN_STATE_FLOAT;
+ state = TOKENIZER_STATE_FLOAT;
goto state;
}
break;
diff --git a/tokenizer.h b/tokenizer.h
@@ -1,188 +1,188 @@
-#ifndef __ZIG1_TOKENIZER_H__
-#define __ZIG1_TOKENIZER_H__
+#ifndef _ZIG1_TOKENIZER_H__
+#define _ZIG1_TOKENIZER_H__
#include <stdbool.h>
#include <stdint.h>
typedef enum {
- TOKEN_TAG_INVALID,
- TOKEN_TAG_INVALID_PERIODASTERISKS,
- TOKEN_TAG_IDENTIFIER,
- TOKEN_TAG_STRING_LITERAL,
- TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE,
- TOKEN_TAG_CHAR_LITERAL,
- TOKEN_TAG_EOF,
- TOKEN_TAG_BUILTIN,
- TOKEN_TAG_BANG,
- TOKEN_TAG_PIPE,
- TOKEN_TAG_PIPE_PIPE,
- TOKEN_TAG_PIPE_EQUAL,
- TOKEN_TAG_EQUAL,
- TOKEN_TAG_EQUAL_EQUAL,
- TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT,
- TOKEN_TAG_BANG_EQUAL,
- TOKEN_TAG_L_PAREN,
- TOKEN_TAG_R_PAREN,
- TOKEN_TAG_SEMICOLON,
- TOKEN_TAG_PERCENT,
- TOKEN_TAG_PERCENT_EQUAL,
- TOKEN_TAG_L_BRACE,
- TOKEN_TAG_R_BRACE,
- TOKEN_TAG_L_BRACKET,
- TOKEN_TAG_R_BRACKET,
- TOKEN_TAG_PERIOD,
- TOKEN_TAG_PERIOD_ASTERISK,
- TOKEN_TAG_ELLIPSIS2,
- TOKEN_TAG_ELLIPSIS3,
- TOKEN_TAG_CARET,
- TOKEN_TAG_CARET_EQUAL,
- TOKEN_TAG_PLUS,
- TOKEN_TAG_PLUS_PLUS,
- TOKEN_TAG_PLUS_EQUAL,
- TOKEN_TAG_PLUS_PERCENT,
- TOKEN_TAG_PLUS_PERCENT_EQUAL,
- TOKEN_TAG_PLUS_PIPE,
- TOKEN_TAG_PLUS_PIPE_EQUAL,
- TOKEN_TAG_MINUS,
- TOKEN_TAG_MINUS_EQUAL,
- TOKEN_TAG_MINUS_PERCENT,
- TOKEN_TAG_MINUS_PERCENT_EQUAL,
- TOKEN_TAG_MINUS_PIPE,
- TOKEN_TAG_MINUS_PIPE_EQUAL,
- TOKEN_TAG_ASTERISK,
- TOKEN_TAG_ASTERISK_EQUAL,
- TOKEN_TAG_ASTERISK_ASTERISK,
- TOKEN_TAG_ASTERISK_PERCENT,
- TOKEN_TAG_ASTERISK_PERCENT_EQUAL,
- TOKEN_TAG_ASTERISK_PIPE,
- TOKEN_TAG_ASTERISK_PIPE_EQUAL,
- TOKEN_TAG_ARROW,
- TOKEN_TAG_COLON,
- TOKEN_TAG_SLASH,
- TOKEN_TAG_SLASH_EQUAL,
- TOKEN_TAG_COMMA,
- TOKEN_TAG_AMPERSAND,
- TOKEN_TAG_AMPERSAND_EQUAL,
- TOKEN_TAG_QUESTION_MARK,
- TOKEN_TAG_ANGLE_BRACKET_LEFT,
- TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL,
- TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
- TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL,
- TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
- TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL,
- TOKEN_TAG_ANGLE_BRACKET_RIGHT,
- TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL,
- TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
- TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL,
- TOKEN_TAG_TILDE,
- TOKEN_TAG_NUMBER_LITERAL,
- TOKEN_TAG_DOC_COMMENT,
- TOKEN_TAG_CONTAINER_DOC_COMMENT,
- TOKEN_TAG_KEYWORD_ADDRSPACE,
- TOKEN_TAG_KEYWORD_ALIGN,
- TOKEN_TAG_KEYWORD_ALLOWZERO,
- TOKEN_TAG_KEYWORD_AND,
- TOKEN_TAG_KEYWORD_ANYFRAME,
- TOKEN_TAG_KEYWORD_ANYTYPE,
- TOKEN_TAG_KEYWORD_ASM,
- TOKEN_TAG_KEYWORD_ASYNC,
- TOKEN_TAG_KEYWORD_AWAIT,
- TOKEN_TAG_KEYWORD_BREAK,
- TOKEN_TAG_KEYWORD_CALLCONV,
- TOKEN_TAG_KEYWORD_CATCH,
- TOKEN_TAG_KEYWORD_COMPTIME,
- TOKEN_TAG_KEYWORD_CONST,
- TOKEN_TAG_KEYWORD_CONTINUE,
- TOKEN_TAG_KEYWORD_DEFER,
- TOKEN_TAG_KEYWORD_ELSE,
- TOKEN_TAG_KEYWORD_ENUM,
- TOKEN_TAG_KEYWORD_ERRDEFER,
- TOKEN_TAG_KEYWORD_ERROR,
- TOKEN_TAG_KEYWORD_EXPORT,
- TOKEN_TAG_KEYWORD_EXTERN,
- TOKEN_TAG_KEYWORD_FN,
- TOKEN_TAG_KEYWORD_FOR,
- TOKEN_TAG_KEYWORD_IF,
- TOKEN_TAG_KEYWORD_INLINE,
- TOKEN_TAG_KEYWORD_NOALIAS,
- TOKEN_TAG_KEYWORD_NOINLINE,
- TOKEN_TAG_KEYWORD_NOSUSPEND,
- TOKEN_TAG_KEYWORD_OPAQUE,
- TOKEN_TAG_KEYWORD_OR,
- TOKEN_TAG_KEYWORD_ORELSE,
- TOKEN_TAG_KEYWORD_PACKED,
- TOKEN_TAG_KEYWORD_PUB,
- TOKEN_TAG_KEYWORD_RESUME,
- TOKEN_TAG_KEYWORD_RETURN,
- TOKEN_TAG_KEYWORD_LINKSECTION,
- TOKEN_TAG_KEYWORD_STRUCT,
- TOKEN_TAG_KEYWORD_SUSPEND,
- TOKEN_TAG_KEYWORD_SWITCH,
- TOKEN_TAG_KEYWORD_TEST,
- TOKEN_TAG_KEYWORD_THREADLOCAL,
- TOKEN_TAG_KEYWORD_TRY,
- TOKEN_TAG_KEYWORD_UNION,
- TOKEN_TAG_KEYWORD_UNREACHABLE,
- TOKEN_TAG_KEYWORD_USINGNAMESPACE,
- TOKEN_TAG_KEYWORD_VAR,
- TOKEN_TAG_KEYWORD_VOLATILE,
- TOKEN_TAG_KEYWORD_WHILE,
-} token_tag;
+ TOKENIZER_TAG_INVALID,
+ TOKENIZER_TAG_INVALID_PERIODASTERISKS,
+ TOKENIZER_TAG_IDENTIFIER,
+ TOKENIZER_TAG_STRING_LITERAL,
+ TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE,
+ TOKENIZER_TAG_CHAR_LITERAL,
+ TOKENIZER_TAG_EOF,
+ TOKENIZER_TAG_BUILTIN,
+ TOKENIZER_TAG_BANG,
+ TOKENIZER_TAG_PIPE,
+ TOKENIZER_TAG_PIPE_PIPE,
+ TOKENIZER_TAG_PIPE_EQUAL,
+ TOKENIZER_TAG_EQUAL,
+ TOKENIZER_TAG_EQUAL_EQUAL,
+ TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT,
+ TOKENIZER_TAG_BANG_EQUAL,
+ TOKENIZER_TAG_L_PAREN,
+ TOKENIZER_TAG_R_PAREN,
+ TOKENIZER_TAG_SEMICOLON,
+ TOKENIZER_TAG_PERCENT,
+ TOKENIZER_TAG_PERCENT_EQUAL,
+ TOKENIZER_TAG_L_BRACE,
+ TOKENIZER_TAG_R_BRACE,
+ TOKENIZER_TAG_L_BRACKET,
+ TOKENIZER_TAG_R_BRACKET,
+ TOKENIZER_TAG_PERIOD,
+ TOKENIZER_TAG_PERIOD_ASTERISK,
+ TOKENIZER_TAG_ELLIPSIS2,
+ TOKENIZER_TAG_ELLIPSIS3,
+ TOKENIZER_TAG_CARET,
+ TOKENIZER_TAG_CARET_EQUAL,
+ TOKENIZER_TAG_PLUS,
+ TOKENIZER_TAG_PLUS_PLUS,
+ TOKENIZER_TAG_PLUS_EQUAL,
+ TOKENIZER_TAG_PLUS_PERCENT,
+ TOKENIZER_TAG_PLUS_PERCENT_EQUAL,
+ TOKENIZER_TAG_PLUS_PIPE,
+ TOKENIZER_TAG_PLUS_PIPE_EQUAL,
+ TOKENIZER_TAG_MINUS,
+ TOKENIZER_TAG_MINUS_EQUAL,
+ TOKENIZER_TAG_MINUS_PERCENT,
+ TOKENIZER_TAG_MINUS_PERCENT_EQUAL,
+ TOKENIZER_TAG_MINUS_PIPE,
+ TOKENIZER_TAG_MINUS_PIPE_EQUAL,
+ TOKENIZER_TAG_ASTERISK,
+ TOKENIZER_TAG_ASTERISK_EQUAL,
+ TOKENIZER_TAG_ASTERISK_ASTERISK,
+ TOKENIZER_TAG_ASTERISK_PERCENT,
+ TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL,
+ TOKENIZER_TAG_ASTERISK_PIPE,
+ TOKENIZER_TAG_ASTERISK_PIPE_EQUAL,
+ TOKENIZER_TAG_ARROW,
+ TOKENIZER_TAG_COLON,
+ TOKENIZER_TAG_SLASH,
+ TOKENIZER_TAG_SLASH_EQUAL,
+ TOKENIZER_TAG_COMMA,
+ TOKENIZER_TAG_AMPERSAND,
+ TOKENIZER_TAG_AMPERSAND_EQUAL,
+ TOKENIZER_TAG_QUESTION_MARK,
+ TOKENIZER_TAG_ANGLE_BRACKET_LEFT,
+ TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL,
+ TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
+ TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL,
+ TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
+ TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL,
+ TOKENIZER_TAG_ANGLE_BRACKET_RIGHT,
+ TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL,
+ TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
+ TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL,
+ TOKENIZER_TAG_TILDE,
+ TOKENIZER_TAG_NUMBER_LITERAL,
+ TOKENIZER_TAG_DOC_COMMENT,
+ TOKENIZER_TAG_CONTAINER_DOC_COMMENT,
+ TOKENIZER_TAG_KEYWORD_ADDRSPACE,
+ TOKENIZER_TAG_KEYWORD_ALIGN,
+ TOKENIZER_TAG_KEYWORD_ALLOWZERO,
+ TOKENIZER_TAG_KEYWORD_AND,
+ TOKENIZER_TAG_KEYWORD_ANYFRAME,
+ TOKENIZER_TAG_KEYWORD_ANYTYPE,
+ TOKENIZER_TAG_KEYWORD_ASM,
+ TOKENIZER_TAG_KEYWORD_ASYNC,
+ TOKENIZER_TAG_KEYWORD_AWAIT,
+ TOKENIZER_TAG_KEYWORD_BREAK,
+ TOKENIZER_TAG_KEYWORD_CALLCONV,
+ TOKENIZER_TAG_KEYWORD_CATCH,
+ TOKENIZER_TAG_KEYWORD_COMPTIME,
+ TOKENIZER_TAG_KEYWORD_CONST,
+ TOKENIZER_TAG_KEYWORD_CONTINUE,
+ TOKENIZER_TAG_KEYWORD_DEFER,
+ TOKENIZER_TAG_KEYWORD_ELSE,
+ TOKENIZER_TAG_KEYWORD_ENUM,
+ TOKENIZER_TAG_KEYWORD_ERRDEFER,
+ TOKENIZER_TAG_KEYWORD_ERROR,
+ TOKENIZER_TAG_KEYWORD_EXPORT,
+ TOKENIZER_TAG_KEYWORD_EXTERN,
+ TOKENIZER_TAG_KEYWORD_FN,
+ TOKENIZER_TAG_KEYWORD_FOR,
+ TOKENIZER_TAG_KEYWORD_IF,
+ TOKENIZER_TAG_KEYWORD_INLINE,
+ TOKENIZER_TAG_KEYWORD_NOALIAS,
+ TOKENIZER_TAG_KEYWORD_NOINLINE,
+ TOKENIZER_TAG_KEYWORD_NOSUSPEND,
+ TOKENIZER_TAG_KEYWORD_OPAQUE,
+ TOKENIZER_TAG_KEYWORD_OR,
+ TOKENIZER_TAG_KEYWORD_ORELSE,
+ TOKENIZER_TAG_KEYWORD_PACKED,
+ TOKENIZER_TAG_KEYWORD_PUB,
+ TOKENIZER_TAG_KEYWORD_RESUME,
+ TOKENIZER_TAG_KEYWORD_RETURN,
+ TOKENIZER_TAG_KEYWORD_LINKSECTION,
+ TOKENIZER_TAG_KEYWORD_STRUCT,
+ TOKENIZER_TAG_KEYWORD_SUSPEND,
+ TOKENIZER_TAG_KEYWORD_SWITCH,
+ TOKENIZER_TAG_KEYWORD_TEST,
+ TOKENIZER_TAG_KEYWORD_THREADLOCAL,
+ TOKENIZER_TAG_KEYWORD_TRY,
+ TOKENIZER_TAG_KEYWORD_UNION,
+ TOKENIZER_TAG_KEYWORD_UNREACHABLE,
+ TOKENIZER_TAG_KEYWORD_USINGNAMESPACE,
+ TOKENIZER_TAG_KEYWORD_VAR,
+ TOKENIZER_TAG_KEYWORD_VOLATILE,
+ TOKENIZER_TAG_KEYWORD_WHILE,
+} tokenizer_tag;
typedef enum {
- TOKEN_STATE_START,
- TOKEN_STATE_EXPECT_NEWLINE,
- TOKEN_STATE_IDENTIFIER,
- TOKEN_STATE_BUILTIN,
- TOKEN_STATE_STRING_LITERAL,
- TOKEN_STATE_STRING_LITERAL_BACKSLASH,
- TOKEN_STATE_MULTILINE_STRING_LITERAL_LINE,
- TOKEN_STATE_CHAR_LITERAL,
- TOKEN_STATE_CHAR_LITERAL_BACKSLASH,
- TOKEN_STATE_BACKSLASH,
- TOKEN_STATE_EQUAL,
- TOKEN_STATE_BANG,
- TOKEN_STATE_PIPE,
- TOKEN_STATE_MINUS,
- TOKEN_STATE_MINUS_PERCENT,
- TOKEN_STATE_MINUS_PIPE,
- TOKEN_STATE_ASTERISK,
- TOKEN_STATE_ASTERISK_PERCENT,
- TOKEN_STATE_ASTERISK_PIPE,
- TOKEN_STATE_SLASH,
- TOKEN_STATE_LINE_COMMENT_START,
- TOKEN_STATE_LINE_COMMENT,
- TOKEN_STATE_DOC_COMMENT_START,
- TOKEN_STATE_DOC_COMMENT,
- TOKEN_STATE_INT,
- TOKEN_STATE_INT_EXPONENT,
- TOKEN_STATE_INT_PERIOD,
- TOKEN_STATE_FLOAT,
- TOKEN_STATE_FLOAT_EXPONENT,
- TOKEN_STATE_AMPERSAND,
- TOKEN_STATE_CARET,
- TOKEN_STATE_PERCENT,
- TOKEN_STATE_PLUS,
- TOKEN_STATE_PLUS_PERCENT,
- TOKEN_STATE_PLUS_PIPE,
- TOKEN_STATE_ANGLE_BRACKET_LEFT,
- TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
- TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
- TOKEN_STATE_ANGLE_BRACKET_RIGHT,
- TOKEN_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
- TOKEN_STATE_PERIOD,
- TOKEN_STATE_PERIOD_2,
- TOKEN_STATE_PERIOD_ASTERISK,
- TOKEN_STATE_SAW_AT_SIGN,
- TOKEN_STATE_INVALID,
-} token_state;
+ TOKENIZER_STATE_START,
+ TOKENIZER_STATE_EXPECT_NEWLINE,
+ TOKENIZER_STATE_IDENTIFIER,
+ TOKENIZER_STATE_BUILTIN,
+ TOKENIZER_STATE_STRING_LITERAL,
+ TOKENIZER_STATE_STRING_LITERAL_BACKSLASH,
+ TOKENIZER_STATE_MULTILINE_STRING_LITERAL_LINE,
+ TOKENIZER_STATE_CHAR_LITERAL,
+ TOKENIZER_STATE_CHAR_LITERAL_BACKSLASH,
+ TOKENIZER_STATE_BACKSLASH,
+ TOKENIZER_STATE_EQUAL,
+ TOKENIZER_STATE_BANG,
+ TOKENIZER_STATE_PIPE,
+ TOKENIZER_STATE_MINUS,
+ TOKENIZER_STATE_MINUS_PERCENT,
+ TOKENIZER_STATE_MINUS_PIPE,
+ TOKENIZER_STATE_ASTERISK,
+ TOKENIZER_STATE_ASTERISK_PERCENT,
+ TOKENIZER_STATE_ASTERISK_PIPE,
+ TOKENIZER_STATE_SLASH,
+ TOKENIZER_STATE_LINE_COMMENT_START,
+ TOKENIZER_STATE_LINE_COMMENT,
+ TOKENIZER_STATE_DOC_COMMENT_START,
+ TOKENIZER_STATE_DOC_COMMENT,
+ TOKENIZER_STATE_INT,
+ TOKENIZER_STATE_INT_EXPONENT,
+ TOKENIZER_STATE_INT_PERIOD,
+ TOKENIZER_STATE_FLOAT,
+ TOKENIZER_STATE_FLOAT_EXPONENT,
+ TOKENIZER_STATE_AMPERSAND,
+ TOKENIZER_STATE_CARET,
+ TOKENIZER_STATE_PERCENT,
+ TOKENIZER_STATE_PLUS,
+ TOKENIZER_STATE_PLUS_PERCENT,
+ TOKENIZER_STATE_PLUS_PIPE,
+ TOKENIZER_STATE_ANGLE_BRACKET_LEFT,
+ TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT,
+ TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE,
+ TOKENIZER_STATE_ANGLE_BRACKET_RIGHT,
+ TOKENIZER_STATE_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT,
+ TOKENIZER_STATE_PERIOD,
+ TOKENIZER_STATE_PERIOD_2,
+ TOKENIZER_STATE_PERIOD_ASTERISK,
+ TOKENIZER_STATE_SAW_AT_SIGN,
+ TOKENIZER_STATE_INVALID,
+} tokenizer_state;
typedef struct {
- token_tag tag;
+ tokenizer_tag tag;
struct {
uint32_t start, end;
} loc;
-} token;
+} tokenizer_token;
typedef struct {
const char* buffer;
@@ -191,6 +191,6 @@ typedef struct {
} tokenizer;
tokenizer tokenizer_init(const char* buffer, uint32_t len);
-token tokenizer_next(tokenizer* self);
+tokenizer_token tokenizer_next(tokenizer* self);
#endif
diff --git a/tokenizer_test.zig b/tokenizer_test.zig
@@ -10,128 +10,128 @@ const c = @cImport({
fn zigToken(token: c_uint) Token.Tag {
return switch (token) {
- c.TOKEN_TAG_INVALID => .invalid,
- c.TOKEN_TAG_INVALID_PERIODASTERISKS => .invalid_periodasterisks,
- c.TOKEN_TAG_IDENTIFIER => .identifier,
- c.TOKEN_TAG_STRING_LITERAL => .string_literal,
- c.TOKEN_TAG_MULTILINE_STRING_LITERAL_LINE => .multiline_string_literal_line,
- c.TOKEN_TAG_CHAR_LITERAL => .char_literal,
- c.TOKEN_TAG_EOF => .eof,
- c.TOKEN_TAG_BUILTIN => .builtin,
- c.TOKEN_TAG_BANG => .bang,
- c.TOKEN_TAG_PIPE => .pipe,
- c.TOKEN_TAG_PIPE_PIPE => .pipe_pipe,
- c.TOKEN_TAG_PIPE_EQUAL => .pipe_equal,
- c.TOKEN_TAG_EQUAL => .equal,
- c.TOKEN_TAG_EQUAL_EQUAL => .equal_equal,
- c.TOKEN_TAG_EQUAL_ANGLE_BRACKET_RIGHT => .equal_angle_bracket_right,
- c.TOKEN_TAG_BANG_EQUAL => .bang_equal,
- c.TOKEN_TAG_L_PAREN => .l_paren,
- c.TOKEN_TAG_R_PAREN => .r_paren,
- c.TOKEN_TAG_SEMICOLON => .semicolon,
- c.TOKEN_TAG_PERCENT => .percent,
- c.TOKEN_TAG_PERCENT_EQUAL => .percent_equal,
- c.TOKEN_TAG_L_BRACE => .l_brace,
- c.TOKEN_TAG_R_BRACE => .r_brace,
- c.TOKEN_TAG_L_BRACKET => .l_bracket,
- c.TOKEN_TAG_R_BRACKET => .r_bracket,
- c.TOKEN_TAG_PERIOD => .period,
- c.TOKEN_TAG_PERIOD_ASTERISK => .period_asterisk,
- c.TOKEN_TAG_ELLIPSIS2 => .ellipsis2,
- c.TOKEN_TAG_ELLIPSIS3 => .ellipsis3,
- c.TOKEN_TAG_CARET => .caret,
- c.TOKEN_TAG_CARET_EQUAL => .caret_equal,
- c.TOKEN_TAG_PLUS => .plus,
- c.TOKEN_TAG_PLUS_PLUS => .plus_plus,
- c.TOKEN_TAG_PLUS_EQUAL => .plus_equal,
- c.TOKEN_TAG_PLUS_PERCENT => .plus_percent,
- c.TOKEN_TAG_PLUS_PERCENT_EQUAL => .plus_percent_equal,
- c.TOKEN_TAG_PLUS_PIPE => .plus_pipe,
- c.TOKEN_TAG_PLUS_PIPE_EQUAL => .plus_pipe_equal,
- c.TOKEN_TAG_MINUS => .minus,
- c.TOKEN_TAG_MINUS_EQUAL => .minus_equal,
- c.TOKEN_TAG_MINUS_PERCENT => .minus_percent,
- c.TOKEN_TAG_MINUS_PERCENT_EQUAL => .minus_percent_equal,
- c.TOKEN_TAG_MINUS_PIPE => .minus_pipe,
- c.TOKEN_TAG_MINUS_PIPE_EQUAL => .minus_pipe_equal,
- c.TOKEN_TAG_ASTERISK => .asterisk,
- c.TOKEN_TAG_ASTERISK_EQUAL => .asterisk_equal,
- c.TOKEN_TAG_ASTERISK_ASTERISK => .asterisk_asterisk,
- c.TOKEN_TAG_ASTERISK_PERCENT => .asterisk_percent,
- c.TOKEN_TAG_ASTERISK_PERCENT_EQUAL => .asterisk_percent_equal,
- c.TOKEN_TAG_ASTERISK_PIPE => .asterisk_pipe,
- c.TOKEN_TAG_ASTERISK_PIPE_EQUAL => .asterisk_pipe_equal,
- c.TOKEN_TAG_ARROW => .arrow,
- c.TOKEN_TAG_COLON => .colon,
- c.TOKEN_TAG_SLASH => .slash,
- c.TOKEN_TAG_SLASH_EQUAL => .slash_equal,
- c.TOKEN_TAG_COMMA => .comma,
- c.TOKEN_TAG_AMPERSAND => .ampersand,
- c.TOKEN_TAG_AMPERSAND_EQUAL => .ampersand_equal,
- c.TOKEN_TAG_QUESTION_MARK => .question_mark,
- c.TOKEN_TAG_ANGLE_BRACKET_LEFT => .angle_bracket_left,
- c.TOKEN_TAG_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_left_equal,
- c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT => .angle_bracket_angle_bracket_left,
- c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_angle_bracket_left_equal,
- c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE => .angle_bracket_angle_bracket_left_pipe,
- c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL => .angle_bracket_angle_bracket_left_pipe_equal,
- c.TOKEN_TAG_ANGLE_BRACKET_RIGHT => .angle_bracket_right,
- c.TOKEN_TAG_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_right_equal,
- c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT => .angle_bracket_angle_bracket_right,
- c.TOKEN_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_angle_bracket_right_equal,
- c.TOKEN_TAG_TILDE => .tilde,
- c.TOKEN_TAG_NUMBER_LITERAL => .number_literal,
- c.TOKEN_TAG_DOC_COMMENT => .doc_comment,
- c.TOKEN_TAG_CONTAINER_DOC_COMMENT => .container_doc_comment,
- c.TOKEN_TAG_KEYWORD_ADDRSPACE => .keyword_addrspace,
- c.TOKEN_TAG_KEYWORD_ALIGN => .keyword_align,
- c.TOKEN_TAG_KEYWORD_ALLOWZERO => .keyword_allowzero,
- c.TOKEN_TAG_KEYWORD_AND => .keyword_and,
- c.TOKEN_TAG_KEYWORD_ANYFRAME => .keyword_anyframe,
- c.TOKEN_TAG_KEYWORD_ANYTYPE => .keyword_anytype,
- c.TOKEN_TAG_KEYWORD_ASM => .keyword_asm,
- c.TOKEN_TAG_KEYWORD_ASYNC => .keyword_async,
- c.TOKEN_TAG_KEYWORD_AWAIT => .keyword_await,
- c.TOKEN_TAG_KEYWORD_BREAK => .keyword_break,
- c.TOKEN_TAG_KEYWORD_CALLCONV => .keyword_callconv,
- c.TOKEN_TAG_KEYWORD_CATCH => .keyword_catch,
- c.TOKEN_TAG_KEYWORD_COMPTIME => .keyword_comptime,
- c.TOKEN_TAG_KEYWORD_CONST => .keyword_const,
- c.TOKEN_TAG_KEYWORD_CONTINUE => .keyword_continue,
- c.TOKEN_TAG_KEYWORD_DEFER => .keyword_defer,
- c.TOKEN_TAG_KEYWORD_ELSE => .keyword_else,
- c.TOKEN_TAG_KEYWORD_ENUM => .keyword_enum,
- c.TOKEN_TAG_KEYWORD_ERRDEFER => .keyword_errdefer,
- c.TOKEN_TAG_KEYWORD_ERROR => .keyword_error,
- c.TOKEN_TAG_KEYWORD_EXPORT => .keyword_export,
- c.TOKEN_TAG_KEYWORD_EXTERN => .keyword_extern,
- c.TOKEN_TAG_KEYWORD_FN => .keyword_fn,
- c.TOKEN_TAG_KEYWORD_FOR => .keyword_for,
- c.TOKEN_TAG_KEYWORD_IF => .keyword_if,
- c.TOKEN_TAG_KEYWORD_INLINE => .keyword_inline,
- c.TOKEN_TAG_KEYWORD_NOALIAS => .keyword_noalias,
- c.TOKEN_TAG_KEYWORD_NOINLINE => .keyword_noinline,
- c.TOKEN_TAG_KEYWORD_NOSUSPEND => .keyword_nosuspend,
- c.TOKEN_TAG_KEYWORD_OPAQUE => .keyword_opaque,
- c.TOKEN_TAG_KEYWORD_OR => .keyword_or,
- c.TOKEN_TAG_KEYWORD_ORELSE => .keyword_orelse,
- c.TOKEN_TAG_KEYWORD_PACKED => .keyword_packed,
- c.TOKEN_TAG_KEYWORD_PUB => .keyword_pub,
- c.TOKEN_TAG_KEYWORD_RESUME => .keyword_resume,
- c.TOKEN_TAG_KEYWORD_RETURN => .keyword_return,
- c.TOKEN_TAG_KEYWORD_LINKSECTION => .keyword_linksection,
- c.TOKEN_TAG_KEYWORD_STRUCT => .keyword_struct,
- c.TOKEN_TAG_KEYWORD_SUSPEND => .keyword_suspend,
- c.TOKEN_TAG_KEYWORD_SWITCH => .keyword_switch,
- c.TOKEN_TAG_KEYWORD_TEST => .keyword_test,
- c.TOKEN_TAG_KEYWORD_THREADLOCAL => .keyword_threadlocal,
- c.TOKEN_TAG_KEYWORD_TRY => .keyword_try,
- c.TOKEN_TAG_KEYWORD_UNION => .keyword_union,
- c.TOKEN_TAG_KEYWORD_UNREACHABLE => .keyword_unreachable,
- c.TOKEN_TAG_KEYWORD_USINGNAMESPACE => .keyword_usingnamespace,
- c.TOKEN_TAG_KEYWORD_VAR => .keyword_var,
- c.TOKEN_TAG_KEYWORD_VOLATILE => .keyword_volatile,
- c.TOKEN_TAG_KEYWORD_WHILE => .keyword_while,
+ c.TOKENIZER_TAG_INVALID => .invalid,
+ c.TOKENIZER_TAG_INVALID_PERIODASTERISKS => .invalid_periodasterisks,
+ c.TOKENIZER_TAG_IDENTIFIER => .identifier,
+ c.TOKENIZER_TAG_STRING_LITERAL => .string_literal,
+ c.TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE => .multiline_string_literal_line,
+ c.TOKENIZER_TAG_CHAR_LITERAL => .char_literal,
+ c.TOKENIZER_TAG_EOF => .eof,
+ c.TOKENIZER_TAG_BUILTIN => .builtin,
+ c.TOKENIZER_TAG_BANG => .bang,
+ c.TOKENIZER_TAG_PIPE => .pipe,
+ c.TOKENIZER_TAG_PIPE_PIPE => .pipe_pipe,
+ c.TOKENIZER_TAG_PIPE_EQUAL => .pipe_equal,
+ c.TOKENIZER_TAG_EQUAL => .equal,
+ c.TOKENIZER_TAG_EQUAL_EQUAL => .equal_equal,
+ c.TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT => .equal_angle_bracket_right,
+ c.TOKENIZER_TAG_BANG_EQUAL => .bang_equal,
+ c.TOKENIZER_TAG_L_PAREN => .l_paren,
+ c.TOKENIZER_TAG_R_PAREN => .r_paren,
+ c.TOKENIZER_TAG_SEMICOLON => .semicolon,
+ c.TOKENIZER_TAG_PERCENT => .percent,
+ c.TOKENIZER_TAG_PERCENT_EQUAL => .percent_equal,
+ c.TOKENIZER_TAG_L_BRACE => .l_brace,
+ c.TOKENIZER_TAG_R_BRACE => .r_brace,
+ c.TOKENIZER_TAG_L_BRACKET => .l_bracket,
+ c.TOKENIZER_TAG_R_BRACKET => .r_bracket,
+ c.TOKENIZER_TAG_PERIOD => .period,
+ c.TOKENIZER_TAG_PERIOD_ASTERISK => .period_asterisk,
+ c.TOKENIZER_TAG_ELLIPSIS2 => .ellipsis2,
+ c.TOKENIZER_TAG_ELLIPSIS3 => .ellipsis3,
+ c.TOKENIZER_TAG_CARET => .caret,
+ c.TOKENIZER_TAG_CARET_EQUAL => .caret_equal,
+ c.TOKENIZER_TAG_PLUS => .plus,
+ c.TOKENIZER_TAG_PLUS_PLUS => .plus_plus,
+ c.TOKENIZER_TAG_PLUS_EQUAL => .plus_equal,
+ c.TOKENIZER_TAG_PLUS_PERCENT => .plus_percent,
+ c.TOKENIZER_TAG_PLUS_PERCENT_EQUAL => .plus_percent_equal,
+ c.TOKENIZER_TAG_PLUS_PIPE => .plus_pipe,
+ c.TOKENIZER_TAG_PLUS_PIPE_EQUAL => .plus_pipe_equal,
+ c.TOKENIZER_TAG_MINUS => .minus,
+ c.TOKENIZER_TAG_MINUS_EQUAL => .minus_equal,
+ c.TOKENIZER_TAG_MINUS_PERCENT => .minus_percent,
+ c.TOKENIZER_TAG_MINUS_PERCENT_EQUAL => .minus_percent_equal,
+ c.TOKENIZER_TAG_MINUS_PIPE => .minus_pipe,
+ c.TOKENIZER_TAG_MINUS_PIPE_EQUAL => .minus_pipe_equal,
+ c.TOKENIZER_TAG_ASTERISK => .asterisk,
+ c.TOKENIZER_TAG_ASTERISK_EQUAL => .asterisk_equal,
+ c.TOKENIZER_TAG_ASTERISK_ASTERISK => .asterisk_asterisk,
+ c.TOKENIZER_TAG_ASTERISK_PERCENT => .asterisk_percent,
+ c.TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL => .asterisk_percent_equal,
+ c.TOKENIZER_TAG_ASTERISK_PIPE => .asterisk_pipe,
+ c.TOKENIZER_TAG_ASTERISK_PIPE_EQUAL => .asterisk_pipe_equal,
+ c.TOKENIZER_TAG_ARROW => .arrow,
+ c.TOKENIZER_TAG_COLON => .colon,
+ c.TOKENIZER_TAG_SLASH => .slash,
+ c.TOKENIZER_TAG_SLASH_EQUAL => .slash_equal,
+ c.TOKENIZER_TAG_COMMA => .comma,
+ c.TOKENIZER_TAG_AMPERSAND => .ampersand,
+ c.TOKENIZER_TAG_AMPERSAND_EQUAL => .ampersand_equal,
+ c.TOKENIZER_TAG_QUESTION_MARK => .question_mark,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_LEFT => .angle_bracket_left,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_left_equal,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT => .angle_bracket_angle_bracket_left,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL => .angle_bracket_angle_bracket_left_equal,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE => .angle_bracket_angle_bracket_left_pipe,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL => .angle_bracket_angle_bracket_left_pipe_equal,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_RIGHT => .angle_bracket_right,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_right_equal,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT => .angle_bracket_angle_bracket_right,
+ c.TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL => .angle_bracket_angle_bracket_right_equal,
+ c.TOKENIZER_TAG_TILDE => .tilde,
+ c.TOKENIZER_TAG_NUMBER_LITERAL => .number_literal,
+ c.TOKENIZER_TAG_DOC_COMMENT => .doc_comment,
+ c.TOKENIZER_TAG_CONTAINER_DOC_COMMENT => .container_doc_comment,
+ c.TOKENIZER_TAG_KEYWORD_ADDRSPACE => .keyword_addrspace,
+ c.TOKENIZER_TAG_KEYWORD_ALIGN => .keyword_align,
+ c.TOKENIZER_TAG_KEYWORD_ALLOWZERO => .keyword_allowzero,
+ c.TOKENIZER_TAG_KEYWORD_AND => .keyword_and,
+ c.TOKENIZER_TAG_KEYWORD_ANYFRAME => .keyword_anyframe,
+ c.TOKENIZER_TAG_KEYWORD_ANYTYPE => .keyword_anytype,
+ c.TOKENIZER_TAG_KEYWORD_ASM => .keyword_asm,
+ c.TOKENIZER_TAG_KEYWORD_ASYNC => .keyword_async,
+ c.TOKENIZER_TAG_KEYWORD_AWAIT => .keyword_await,
+ c.TOKENIZER_TAG_KEYWORD_BREAK => .keyword_break,
+ c.TOKENIZER_TAG_KEYWORD_CALLCONV => .keyword_callconv,
+ c.TOKENIZER_TAG_KEYWORD_CATCH => .keyword_catch,
+ c.TOKENIZER_TAG_KEYWORD_COMPTIME => .keyword_comptime,
+ c.TOKENIZER_TAG_KEYWORD_CONST => .keyword_const,
+ c.TOKENIZER_TAG_KEYWORD_CONTINUE => .keyword_continue,
+ c.TOKENIZER_TAG_KEYWORD_DEFER => .keyword_defer,
+ c.TOKENIZER_TAG_KEYWORD_ELSE => .keyword_else,
+ c.TOKENIZER_TAG_KEYWORD_ENUM => .keyword_enum,
+ c.TOKENIZER_TAG_KEYWORD_ERRDEFER => .keyword_errdefer,
+ c.TOKENIZER_TAG_KEYWORD_ERROR => .keyword_error,
+ c.TOKENIZER_TAG_KEYWORD_EXPORT => .keyword_export,
+ c.TOKENIZER_TAG_KEYWORD_EXTERN => .keyword_extern,
+ c.TOKENIZER_TAG_KEYWORD_FN => .keyword_fn,
+ c.TOKENIZER_TAG_KEYWORD_FOR => .keyword_for,
+ c.TOKENIZER_TAG_KEYWORD_IF => .keyword_if,
+ c.TOKENIZER_TAG_KEYWORD_INLINE => .keyword_inline,
+ c.TOKENIZER_TAG_KEYWORD_NOALIAS => .keyword_noalias,
+ c.TOKENIZER_TAG_KEYWORD_NOINLINE => .keyword_noinline,
+ c.TOKENIZER_TAG_KEYWORD_NOSUSPEND => .keyword_nosuspend,
+ c.TOKENIZER_TAG_KEYWORD_OPAQUE => .keyword_opaque,
+ c.TOKENIZER_TAG_KEYWORD_OR => .keyword_or,
+ c.TOKENIZER_TAG_KEYWORD_ORELSE => .keyword_orelse,
+ c.TOKENIZER_TAG_KEYWORD_PACKED => .keyword_packed,
+ c.TOKENIZER_TAG_KEYWORD_PUB => .keyword_pub,
+ c.TOKENIZER_TAG_KEYWORD_RESUME => .keyword_resume,
+ c.TOKENIZER_TAG_KEYWORD_RETURN => .keyword_return,
+ c.TOKENIZER_TAG_KEYWORD_LINKSECTION => .keyword_linksection,
+ c.TOKENIZER_TAG_KEYWORD_STRUCT => .keyword_struct,
+ c.TOKENIZER_TAG_KEYWORD_SUSPEND => .keyword_suspend,
+ c.TOKENIZER_TAG_KEYWORD_SWITCH => .keyword_switch,
+ c.TOKENIZER_TAG_KEYWORD_TEST => .keyword_test,
+ c.TOKENIZER_TAG_KEYWORD_THREADLOCAL => .keyword_threadlocal,
+ c.TOKENIZER_TAG_KEYWORD_TRY => .keyword_try,
+ c.TOKENIZER_TAG_KEYWORD_UNION => .keyword_union,
+ c.TOKENIZER_TAG_KEYWORD_UNREACHABLE => .keyword_unreachable,
+ c.TOKENIZER_TAG_KEYWORD_USINGNAMESPACE => .keyword_usingnamespace,
+ c.TOKENIZER_TAG_KEYWORD_VAR => .keyword_var,
+ c.TOKENIZER_TAG_KEYWORD_VOLATILE => .keyword_volatile,
+ c.TOKENIZER_TAG_KEYWORD_WHILE => .keyword_while,
else => undefined,
};
}