diff --git a/ast.c b/ast.c index f8bd3cd..80fb9d1 100644 --- a/ast.c +++ b/ast.c @@ -4,24 +4,26 @@ #include #include "ast.h" -#include "parse.h" +#include "parser.h" + +#define N 1024 ast ast_parse(const char* source, const uint32_t len, int* err) { uint32_t estimated_token_count = len / 8; - tokenizer_tag* token_tags = NULL; - ast_index* token_starts = NULL; - ast_node_tag* nodes_tags = NULL; - ast_token_index* main_tokens = NULL; - ast_data* nodes_datas = NULL; - ast_node_index* extra_data_arr = NULL; - ast_node_index* scratch_arr = NULL; + tokenizerTag* token_tags = NULL; + astIndex* token_starts = NULL; + astNodeTag* nodes_tags = NULL; + astTokenIndex* main_tokens = NULL; + astData* nodes_datas = NULL; + astNodeIndex* extra_data_arr = NULL; + astNodeIndex* scratch_arr = NULL; - if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizer_tag)))) + if (!(token_tags = calloc(estimated_token_count, sizeof(tokenizerTag)))) goto err; - if (!(token_starts = calloc(estimated_token_count, sizeof(ast_index)))) + if (!(token_starts = calloc(estimated_token_count, sizeof(astIndex)))) goto err; tokenizer tok = tokenizer_init(source, len); @@ -31,26 +33,26 @@ ast ast_parse(const char* source, const uint32_t len, int* err) fprintf(stderr, "too many tokens, bump estimated_token_count\n"); goto err; } - tokenizer_token token = tokenizer_next(&tok); + tokenizerToken token = tokenizer_next(&tok); token_tags[tokens_len] = token.tag; token_starts[tokens_len] = token.loc.start; } uint32_t estimated_node_count = (tokens_len + 2) / 2; - if (!(nodes_tags = calloc(estimated_node_count, sizeof(ast_node_tag)))) + if (!(nodes_tags = calloc(estimated_node_count, sizeof(astNodeTag)))) goto err; - if (!(main_tokens = calloc(estimated_node_count, sizeof(ast_token_index)))) + if (!(main_tokens = calloc(estimated_node_count, sizeof(astTokenIndex)))) goto err; - if (!(nodes_datas = calloc(estimated_node_count, sizeof(ast_data)))) + if (!(nodes_datas = calloc(estimated_node_count, sizeof(astData)))) goto err; - if (!(extra_data_arr = calloc(16, sizeof(ast_token_index)))) + if (!(extra_data_arr = calloc(N, sizeof(astNodeIndex)))) goto err; - if (!(scratch_arr = calloc(16, sizeof(ast_token_index)))) + if (!(scratch_arr = calloc(N, sizeof(astNodeIndex)))) goto err; parser p = (parser) { @@ -60,20 +62,20 @@ ast ast_parse(const char* source, const uint32_t len, int* err) .token_starts = token_starts, .tokens_len = tokens_len, .tok_i = 0, - .nodes = (ast_node_list) { + .nodes = (astNodeList) { .len = 0, .cap = estimated_node_count, .tags = nodes_tags, .main_tokens = main_tokens, .datas = nodes_datas, }, - .extra_data = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = extra_data_arr }, - .scratch = (parser_node_index_slice) { .len = 0, .cap = 16, .arr = scratch_arr }, + .extra_data = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = extra_data_arr }, + .scratch = (parserNodeIndexSlice) { .len = 0, .cap = N, .arr = scratch_arr }, }; free(scratch_arr); - // TODO work + parse_root(&p); return (ast) { .source = source, diff --git a/ast.h b/ast.h index 728d274..7b96778 100644 --- a/ast.h +++ b/ast.h @@ -479,23 +479,23 @@ typedef enum { AST_NODE_TAG_ERROR_VALUE, /// `lhs!rhs`. main_token is the `!`. AST_NODE_TAG_ERROR_UNION, -} ast_node_tag; +} astNodeTag; -typedef uint32_t ast_token_index; -typedef uint32_t ast_node_index; -typedef uint32_t ast_index; +typedef int32_t astTokenIndex; +typedef uint32_t astNodeIndex; +typedef uint32_t astIndex; typedef struct { - ast_index lhs, rhs; -} ast_data; + astIndex lhs, rhs; +} astData; typedef struct { uint32_t len; uint32_t cap; - ast_node_tag* tags; - ast_token_index* main_tokens; - ast_data* datas; -} ast_node_list; + astNodeTag* tags; + astTokenIndex* main_tokens; + astData* datas; +} astNodeList; typedef struct { const char* source; @@ -503,13 +503,13 @@ typedef struct { struct { uint32_t len; - tokenizer_tag* tags; - ast_index* starts; + tokenizerTag* tags; + astIndex* starts; } tokens; - ast_node_list nodes; + astNodeList nodes; - ast_node_index* extra_data; + astNodeIndex* extra_data; uint32_t extra_data_len; } ast; diff --git a/build.zig b/build.zig index afbedcd..3f23427 100644 --- a/build.zig +++ b/build.zig @@ -1,6 +1,11 @@ const std = @import("std"); -const c_lib_files = &[_][]const u8{ "tokenizer.c", "ast.c", "zig1.c" }; +const c_lib_files = &[_][]const u8{ + "tokenizer.c", + "ast.c", + "zig1.c", + "parser.c", +}; const all_c_files = c_lib_files ++ &[_][]const u8{"main.c"}; const cflags = &[_][]const u8{ "-std=c11", @@ -19,6 +24,7 @@ const cflags = &[_][]const u8{ "-Wdouble-promotion", "-fstack-protector-all", "-Wimplicit-fallthrough", + "-Wno-unused-function", // TODO remove once refactoring is done //"-D_FORTIFY_SOURCE=2", // consider when optimization flags are enabled }; @@ -37,24 +43,21 @@ pub fn build(b: *std.Build) !void { if (std.mem.eql(u8, cc, "zig")) lib.addCSourceFiles(.{ .files = c_lib_files, .flags = cflags }) else for (c_lib_files) |cfile| { - const objfile = try std.fmt.allocPrint( - b.allocator, - "{s}.o", - .{cfile[0 .. cfile.len - 2]}, - ); const cc1 = b.addSystemCommand(&.{cc}); - cc1.addArgs(cflags); - cc1.addArg("-g"); - cc1.addArgs(switch (optimize) { - .Debug => &.{"-O0"}, - .ReleaseFast, .ReleaseSafe => &.{"-O3"}, // TODO ubsan? - .ReleaseSmall => &.{"-Os"}, + cc1.addArgs(cflags ++ .{"-g"}); + cc1.addArg(switch (optimize) { + .Debug => "-O0", + .ReleaseFast, .ReleaseSafe => "-O3", // ubsan? + .ReleaseSmall => "-Os", }); cc1.addArg("-c"); cc1.addFileArg(b.path(cfile)); cc1.addArg("-o"); - const obj = cc1.addOutputFileArg(objfile); - lib.addObjectFile(obj); + lib.addObjectFile(cc1.addOutputFileArg(try std.fmt.allocPrint( + b.allocator, + "{s}.o", + .{cfile[0 .. cfile.len - 2]}, + ))); } lib.linkLibC(); @@ -91,11 +94,13 @@ pub fn build(b: *std.Build) !void { const cppcheck = b.addSystemCommand(&.{"cppcheck"}); cppcheck.addArgs(&.{ + "--quiet", + "--error-exitcode=1", "--enable=all", "--suppress=missingIncludeSystem", "--suppress=checkersReport", - "--quiet", "--suppress=unusedFunction", // TODO remove after plumbing is done + "--suppress=unusedStructMember", // TODO remove after plumbing is done }); for (all_c_files) |cfile| cppcheck.addFileArg(b.path(cfile)); lint_step.dependOn(&cppcheck.step); diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..aef725d --- /dev/null +++ b/parser.c @@ -0,0 +1,190 @@ +#include + +#include "parser.h" + +typedef struct { + uint32_t len; + astNodeIndex lhs, rhs; + bool trailing; +} members; + +typedef struct { + enum { + FIELD_STATE_NONE, + FIELD_STATE_SEEN, + FIELD_STATE_END // sets "end" + } tag; + union { + uint32_t end; + } payload; +} field_state; + +static astTokenIndex next_token(parser* p) +{ + return ++p->tok_i; +} + +static astTokenIndex eat_token(parser* p, tokenizerTag tag) +{ + return (p->token_tags[p->tok_i] == tag) ? next_token(p) : -1; +} + +static members parse_container_members(parser* p) +{ + const uint32_t scratch_top = p->scratch.len; + members res = (members) {}; + // ast_token_index last_field; + while (eat_token(p, TOKENIZER_TAG_CONTAINER_DOC_COMMENT) != -1) + ; + + // bool trailing = false; + while (1) { + + // SKIP eat doc comments + + switch (p->token_tags[p->tok_i]) { + case TOKENIZER_TAG_INVALID: + case TOKENIZER_TAG_INVALID_PERIODASTERISKS: + case TOKENIZER_TAG_IDENTIFIER: + case TOKENIZER_TAG_STRING_LITERAL: + case TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE: + case TOKENIZER_TAG_CHAR_LITERAL: + case TOKENIZER_TAG_EOF: + case TOKENIZER_TAG_BUILTIN: + case TOKENIZER_TAG_BANG: + case TOKENIZER_TAG_PIPE: + case TOKENIZER_TAG_PIPE_PIPE: + case TOKENIZER_TAG_PIPE_EQUAL: + case TOKENIZER_TAG_EQUAL: + case TOKENIZER_TAG_EQUAL_EQUAL: + case TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT: + case TOKENIZER_TAG_BANG_EQUAL: + case TOKENIZER_TAG_L_PAREN: + case TOKENIZER_TAG_R_PAREN: + case TOKENIZER_TAG_SEMICOLON: + case TOKENIZER_TAG_PERCENT: + case TOKENIZER_TAG_PERCENT_EQUAL: + case TOKENIZER_TAG_L_BRACE: + case TOKENIZER_TAG_R_BRACE: + case TOKENIZER_TAG_L_BRACKET: + case TOKENIZER_TAG_R_BRACKET: + case TOKENIZER_TAG_PERIOD: + case TOKENIZER_TAG_PERIOD_ASTERISK: + case TOKENIZER_TAG_ELLIPSIS2: + case TOKENIZER_TAG_ELLIPSIS3: + case TOKENIZER_TAG_CARET: + case TOKENIZER_TAG_CARET_EQUAL: + case TOKENIZER_TAG_PLUS: + case TOKENIZER_TAG_PLUS_PLUS: + case TOKENIZER_TAG_PLUS_EQUAL: + case TOKENIZER_TAG_PLUS_PERCENT: + case TOKENIZER_TAG_PLUS_PERCENT_EQUAL: + case TOKENIZER_TAG_PLUS_PIPE: + case TOKENIZER_TAG_PLUS_PIPE_EQUAL: + case TOKENIZER_TAG_MINUS: + case TOKENIZER_TAG_MINUS_EQUAL: + case TOKENIZER_TAG_MINUS_PERCENT: + case TOKENIZER_TAG_MINUS_PERCENT_EQUAL: + case TOKENIZER_TAG_MINUS_PIPE: + case TOKENIZER_TAG_MINUS_PIPE_EQUAL: + case TOKENIZER_TAG_ASTERISK: + case TOKENIZER_TAG_ASTERISK_EQUAL: + case TOKENIZER_TAG_ASTERISK_ASTERISK: + case TOKENIZER_TAG_ASTERISK_PERCENT: + case TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL: + case TOKENIZER_TAG_ASTERISK_PIPE: + case TOKENIZER_TAG_ASTERISK_PIPE_EQUAL: + case TOKENIZER_TAG_ARROW: + case TOKENIZER_TAG_COLON: + case TOKENIZER_TAG_SLASH: + case TOKENIZER_TAG_SLASH_EQUAL: + case TOKENIZER_TAG_COMMA: + case TOKENIZER_TAG_AMPERSAND: + case TOKENIZER_TAG_AMPERSAND_EQUAL: + case TOKENIZER_TAG_QUESTION_MARK: + case TOKENIZER_TAG_ANGLE_BRACKET_LEFT: + case TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL: + case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT: + case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL: + case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE: + case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL: + case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT: + case TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL: + case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT: + case TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL: + case TOKENIZER_TAG_TILDE: + case TOKENIZER_TAG_NUMBER_LITERAL: + case TOKENIZER_TAG_DOC_COMMENT: + case TOKENIZER_TAG_CONTAINER_DOC_COMMENT: + case TOKENIZER_TAG_KEYWORD_ADDRSPACE: + case TOKENIZER_TAG_KEYWORD_ALIGN: + case TOKENIZER_TAG_KEYWORD_ALLOWZERO: + case TOKENIZER_TAG_KEYWORD_AND: + case TOKENIZER_TAG_KEYWORD_ANYFRAME: + case TOKENIZER_TAG_KEYWORD_ANYTYPE: + case TOKENIZER_TAG_KEYWORD_ASM: + case TOKENIZER_TAG_KEYWORD_ASYNC: + case TOKENIZER_TAG_KEYWORD_AWAIT: + case TOKENIZER_TAG_KEYWORD_BREAK: + case TOKENIZER_TAG_KEYWORD_CALLCONV: + case TOKENIZER_TAG_KEYWORD_CATCH: + case TOKENIZER_TAG_KEYWORD_COMPTIME: + case TOKENIZER_TAG_KEYWORD_CONST: + case TOKENIZER_TAG_KEYWORD_CONTINUE: + case TOKENIZER_TAG_KEYWORD_DEFER: + case TOKENIZER_TAG_KEYWORD_ELSE: + case TOKENIZER_TAG_KEYWORD_ENUM: + case TOKENIZER_TAG_KEYWORD_ERRDEFER: + case TOKENIZER_TAG_KEYWORD_ERROR: + case TOKENIZER_TAG_KEYWORD_EXPORT: + case TOKENIZER_TAG_KEYWORD_EXTERN: + case TOKENIZER_TAG_KEYWORD_FN: + case TOKENIZER_TAG_KEYWORD_FOR: + case TOKENIZER_TAG_KEYWORD_IF: + case TOKENIZER_TAG_KEYWORD_INLINE: + case TOKENIZER_TAG_KEYWORD_NOALIAS: + case TOKENIZER_TAG_KEYWORD_NOINLINE: + case TOKENIZER_TAG_KEYWORD_NOSUSPEND: + case TOKENIZER_TAG_KEYWORD_OPAQUE: + case TOKENIZER_TAG_KEYWORD_OR: + case TOKENIZER_TAG_KEYWORD_ORELSE: + case TOKENIZER_TAG_KEYWORD_PACKED: + case TOKENIZER_TAG_KEYWORD_RESUME: + case TOKENIZER_TAG_KEYWORD_RETURN: + case TOKENIZER_TAG_KEYWORD_LINKSECTION: + case TOKENIZER_TAG_KEYWORD_STRUCT: + case TOKENIZER_TAG_KEYWORD_SUSPEND: + case TOKENIZER_TAG_KEYWORD_SWITCH: + case TOKENIZER_TAG_KEYWORD_TEST: + case TOKENIZER_TAG_KEYWORD_THREADLOCAL: + case TOKENIZER_TAG_KEYWORD_TRY: + case TOKENIZER_TAG_KEYWORD_UNION: + case TOKENIZER_TAG_KEYWORD_UNREACHABLE: + case TOKENIZER_TAG_KEYWORD_USINGNAMESPACE: + case TOKENIZER_TAG_KEYWORD_VAR: + case TOKENIZER_TAG_KEYWORD_VOLATILE: + case TOKENIZER_TAG_KEYWORD_WHILE:; + const char* str = tokenizerTagString[p->token_tags[p->tok_i]]; + fprintf(stderr, "keyword %s not implemented\n", str); + goto cleanup; + case TOKENIZER_TAG_KEYWORD_PUB: + p->tok_i++; + break; + // TODO do work + } + } + +cleanup: + p->scratch.len = scratch_top; + return res; +} + +int parse_root(parser* p) +{ + p->nodes.tags[p->nodes.len++] = AST_NODE_TAG_ROOT; + p->nodes.main_tokens[p->nodes.len] = 0; + + // members root_members = parse_container_members(p); + + return 0; +} diff --git a/parse.h b/parser.h similarity index 52% rename from parse.h rename to parser.h index 765fa98..9532bf6 100644 --- a/parse.h +++ b/parser.h @@ -9,22 +9,24 @@ typedef struct { uint32_t len; uint32_t cap; - ast_node_index* arr; -} parser_node_index_slice; + astNodeIndex* arr; +} parserNodeIndexSlice; typedef struct { const char* source; const uint32_t source_len; - tokenizer_tag* token_tags; - ast_index* token_starts; + tokenizerTag* token_tags; + astIndex* token_starts; uint32_t tokens_len; - ast_token_index tok_i; + astTokenIndex tok_i; - ast_node_list nodes; - parser_node_index_slice extra_data; - parser_node_index_slice scratch; + astNodeList nodes; + parserNodeIndexSlice extra_data; + parserNodeIndexSlice scratch; } parser; +int parse_root(parser*); + #endif diff --git a/tokenizer.c b/tokenizer.c index a2df204..9852e53 100644 --- a/tokenizer.c +++ b/tokenizer.c @@ -7,10 +7,10 @@ typedef struct { const char* keyword; - tokenizer_tag tag; -} keyword_map; + tokenizerTag tag; +} keywordMap; -const keyword_map keywords[] = { +const keywordMap keywords[] = { { "addrspace", TOKENIZER_TAG_KEYWORD_ADDRSPACE }, { "align", TOKENIZER_TAG_KEYWORD_ALIGN }, { "allowzero", TOKENIZER_TAG_KEYWORD_ALLOWZERO }, @@ -63,9 +63,9 @@ const keyword_map keywords[] = { }; // TODO binary search -static tokenizer_tag get_keyword(const char* bytes, const uint32_t len) +static tokenizerTag get_keyword(const char* bytes, const uint32_t len) { - for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keyword_map); i++) { + for (unsigned long i = 0; i < sizeof(keywords) / sizeof(keywordMap); i++) { size_t klen = strlen(keywords[i].keyword); size_t minlen = klen < len ? klen : len; int cmp = strncmp(bytes, keywords[i].keyword, minlen); @@ -91,16 +91,16 @@ tokenizer tokenizer_init(const char* buffer, const uint32_t len) }; } -tokenizer_token tokenizer_next(tokenizer* self) +tokenizerToken tokenizer_next(tokenizer* self) { - tokenizer_token result = (tokenizer_token) { + tokenizerToken result = (tokenizerToken) { .tag = TOKENIZER_TAG_INVALID, .loc = { .start = 0, }, }; - tokenizer_state state = TOKENIZER_STATE_START; + tokenizerState state = TOKENIZER_STATE_START; state: switch (state) { @@ -108,7 +108,7 @@ state: switch (self->buffer[self->index]) { case 0: if (self->index == self->buffer_len) { - return (tokenizer_token) { + return (tokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, @@ -455,7 +455,7 @@ state: default:; // Once we're at C23, this semicolon can be removed. const char* start = self->buffer + result.loc.start; uint32_t len = self->index - result.loc.start; - tokenizer_tag tag = get_keyword(start, len); + tokenizerTag tag = get_keyword(start, len); if (tag != TOKENIZER_TAG_INVALID) { result.tag = tag; } @@ -856,7 +856,7 @@ state: state = TOKENIZER_STATE_INVALID; goto state; } else { - return (tokenizer_token) { + return (tokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, @@ -930,7 +930,7 @@ state: state = TOKENIZER_STATE_INVALID; goto state; } else { - return (tokenizer_token) { + return (tokenizerToken) { .tag = TOKENIZER_TAG_EOF, .loc = { .start = self->index, diff --git a/tokenizer.h b/tokenizer.h index d1dc242..2b04e30 100644 --- a/tokenizer.h +++ b/tokenizer.h @@ -4,130 +4,140 @@ #include #include +#define FOREACH_TOKENIZER_TAG_ENUM(TAG) \ + TAG(TOKENIZER_TAG_INVALID) \ + TAG(TOKENIZER_TAG_INVALID_PERIODASTERISKS) \ + TAG(TOKENIZER_TAG_IDENTIFIER) \ + TAG(TOKENIZER_TAG_STRING_LITERAL) \ + TAG(TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE) \ + TAG(TOKENIZER_TAG_CHAR_LITERAL) \ + TAG(TOKENIZER_TAG_EOF) \ + TAG(TOKENIZER_TAG_BUILTIN) \ + TAG(TOKENIZER_TAG_BANG) \ + TAG(TOKENIZER_TAG_PIPE) \ + TAG(TOKENIZER_TAG_PIPE_PIPE) \ + TAG(TOKENIZER_TAG_PIPE_EQUAL) \ + TAG(TOKENIZER_TAG_EQUAL) \ + TAG(TOKENIZER_TAG_EQUAL_EQUAL) \ + TAG(TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT) \ + TAG(TOKENIZER_TAG_BANG_EQUAL) \ + TAG(TOKENIZER_TAG_L_PAREN) \ + TAG(TOKENIZER_TAG_R_PAREN) \ + TAG(TOKENIZER_TAG_SEMICOLON) \ + TAG(TOKENIZER_TAG_PERCENT) \ + TAG(TOKENIZER_TAG_PERCENT_EQUAL) \ + TAG(TOKENIZER_TAG_L_BRACE) \ + TAG(TOKENIZER_TAG_R_BRACE) \ + TAG(TOKENIZER_TAG_L_BRACKET) \ + TAG(TOKENIZER_TAG_R_BRACKET) \ + TAG(TOKENIZER_TAG_PERIOD) \ + TAG(TOKENIZER_TAG_PERIOD_ASTERISK) \ + TAG(TOKENIZER_TAG_ELLIPSIS2) \ + TAG(TOKENIZER_TAG_ELLIPSIS3) \ + TAG(TOKENIZER_TAG_CARET) \ + TAG(TOKENIZER_TAG_CARET_EQUAL) \ + TAG(TOKENIZER_TAG_PLUS) \ + TAG(TOKENIZER_TAG_PLUS_PLUS) \ + TAG(TOKENIZER_TAG_PLUS_EQUAL) \ + TAG(TOKENIZER_TAG_PLUS_PERCENT) \ + TAG(TOKENIZER_TAG_PLUS_PERCENT_EQUAL) \ + TAG(TOKENIZER_TAG_PLUS_PIPE) \ + TAG(TOKENIZER_TAG_PLUS_PIPE_EQUAL) \ + TAG(TOKENIZER_TAG_MINUS) \ + TAG(TOKENIZER_TAG_MINUS_EQUAL) \ + TAG(TOKENIZER_TAG_MINUS_PERCENT) \ + TAG(TOKENIZER_TAG_MINUS_PERCENT_EQUAL) \ + TAG(TOKENIZER_TAG_MINUS_PIPE) \ + TAG(TOKENIZER_TAG_MINUS_PIPE_EQUAL) \ + TAG(TOKENIZER_TAG_ASTERISK) \ + TAG(TOKENIZER_TAG_ASTERISK_EQUAL) \ + TAG(TOKENIZER_TAG_ASTERISK_ASTERISK) \ + TAG(TOKENIZER_TAG_ASTERISK_PERCENT) \ + TAG(TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL) \ + TAG(TOKENIZER_TAG_ASTERISK_PIPE) \ + TAG(TOKENIZER_TAG_ASTERISK_PIPE_EQUAL) \ + TAG(TOKENIZER_TAG_ARROW) \ + TAG(TOKENIZER_TAG_COLON) \ + TAG(TOKENIZER_TAG_SLASH) \ + TAG(TOKENIZER_TAG_SLASH_EQUAL) \ + TAG(TOKENIZER_TAG_COMMA) \ + TAG(TOKENIZER_TAG_AMPERSAND) \ + TAG(TOKENIZER_TAG_AMPERSAND_EQUAL) \ + TAG(TOKENIZER_TAG_QUESTION_MARK) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_LEFT) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_RIGHT) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT) \ + TAG(TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL) \ + TAG(TOKENIZER_TAG_TILDE) \ + TAG(TOKENIZER_TAG_NUMBER_LITERAL) \ + TAG(TOKENIZER_TAG_DOC_COMMENT) \ + TAG(TOKENIZER_TAG_CONTAINER_DOC_COMMENT) \ + TAG(TOKENIZER_TAG_KEYWORD_ADDRSPACE) \ + TAG(TOKENIZER_TAG_KEYWORD_ALIGN) \ + TAG(TOKENIZER_TAG_KEYWORD_ALLOWZERO) \ + TAG(TOKENIZER_TAG_KEYWORD_AND) \ + TAG(TOKENIZER_TAG_KEYWORD_ANYFRAME) \ + TAG(TOKENIZER_TAG_KEYWORD_ANYTYPE) \ + TAG(TOKENIZER_TAG_KEYWORD_ASM) \ + TAG(TOKENIZER_TAG_KEYWORD_ASYNC) \ + TAG(TOKENIZER_TAG_KEYWORD_AWAIT) \ + TAG(TOKENIZER_TAG_KEYWORD_BREAK) \ + TAG(TOKENIZER_TAG_KEYWORD_CALLCONV) \ + TAG(TOKENIZER_TAG_KEYWORD_CATCH) \ + TAG(TOKENIZER_TAG_KEYWORD_COMPTIME) \ + TAG(TOKENIZER_TAG_KEYWORD_CONST) \ + TAG(TOKENIZER_TAG_KEYWORD_CONTINUE) \ + TAG(TOKENIZER_TAG_KEYWORD_DEFER) \ + TAG(TOKENIZER_TAG_KEYWORD_ELSE) \ + TAG(TOKENIZER_TAG_KEYWORD_ENUM) \ + TAG(TOKENIZER_TAG_KEYWORD_ERRDEFER) \ + TAG(TOKENIZER_TAG_KEYWORD_ERROR) \ + TAG(TOKENIZER_TAG_KEYWORD_EXPORT) \ + TAG(TOKENIZER_TAG_KEYWORD_EXTERN) \ + TAG(TOKENIZER_TAG_KEYWORD_FN) \ + TAG(TOKENIZER_TAG_KEYWORD_FOR) \ + TAG(TOKENIZER_TAG_KEYWORD_IF) \ + TAG(TOKENIZER_TAG_KEYWORD_INLINE) \ + TAG(TOKENIZER_TAG_KEYWORD_NOALIAS) \ + TAG(TOKENIZER_TAG_KEYWORD_NOINLINE) \ + TAG(TOKENIZER_TAG_KEYWORD_NOSUSPEND) \ + TAG(TOKENIZER_TAG_KEYWORD_OPAQUE) \ + TAG(TOKENIZER_TAG_KEYWORD_OR) \ + TAG(TOKENIZER_TAG_KEYWORD_ORELSE) \ + TAG(TOKENIZER_TAG_KEYWORD_PACKED) \ + TAG(TOKENIZER_TAG_KEYWORD_PUB) \ + TAG(TOKENIZER_TAG_KEYWORD_RESUME) \ + TAG(TOKENIZER_TAG_KEYWORD_RETURN) \ + TAG(TOKENIZER_TAG_KEYWORD_LINKSECTION) \ + TAG(TOKENIZER_TAG_KEYWORD_STRUCT) \ + TAG(TOKENIZER_TAG_KEYWORD_SUSPEND) \ + TAG(TOKENIZER_TAG_KEYWORD_SWITCH) \ + TAG(TOKENIZER_TAG_KEYWORD_TEST) \ + TAG(TOKENIZER_TAG_KEYWORD_THREADLOCAL) \ + TAG(TOKENIZER_TAG_KEYWORD_TRY) \ + TAG(TOKENIZER_TAG_KEYWORD_UNION) \ + TAG(TOKENIZER_TAG_KEYWORD_UNREACHABLE) \ + TAG(TOKENIZER_TAG_KEYWORD_USINGNAMESPACE) \ + TAG(TOKENIZER_TAG_KEYWORD_VAR) \ + TAG(TOKENIZER_TAG_KEYWORD_VOLATILE) \ + TAG(TOKENIZER_TAG_KEYWORD_WHILE) \ + +#define GENERATE_ENUM(ENUM) ENUM, +#define GENERATE_STRING(STRING) #STRING, + typedef enum { - TOKENIZER_TAG_INVALID, - TOKENIZER_TAG_INVALID_PERIODASTERISKS, - TOKENIZER_TAG_IDENTIFIER, - TOKENIZER_TAG_STRING_LITERAL, - TOKENIZER_TAG_MULTILINE_STRING_LITERAL_LINE, - TOKENIZER_TAG_CHAR_LITERAL, - TOKENIZER_TAG_EOF, - TOKENIZER_TAG_BUILTIN, - TOKENIZER_TAG_BANG, - TOKENIZER_TAG_PIPE, - TOKENIZER_TAG_PIPE_PIPE, - TOKENIZER_TAG_PIPE_EQUAL, - TOKENIZER_TAG_EQUAL, - TOKENIZER_TAG_EQUAL_EQUAL, - TOKENIZER_TAG_EQUAL_ANGLE_BRACKET_RIGHT, - TOKENIZER_TAG_BANG_EQUAL, - TOKENIZER_TAG_L_PAREN, - TOKENIZER_TAG_R_PAREN, - TOKENIZER_TAG_SEMICOLON, - TOKENIZER_TAG_PERCENT, - TOKENIZER_TAG_PERCENT_EQUAL, - TOKENIZER_TAG_L_BRACE, - TOKENIZER_TAG_R_BRACE, - TOKENIZER_TAG_L_BRACKET, - TOKENIZER_TAG_R_BRACKET, - TOKENIZER_TAG_PERIOD, - TOKENIZER_TAG_PERIOD_ASTERISK, - TOKENIZER_TAG_ELLIPSIS2, - TOKENIZER_TAG_ELLIPSIS3, - TOKENIZER_TAG_CARET, - TOKENIZER_TAG_CARET_EQUAL, - TOKENIZER_TAG_PLUS, - TOKENIZER_TAG_PLUS_PLUS, - TOKENIZER_TAG_PLUS_EQUAL, - TOKENIZER_TAG_PLUS_PERCENT, - TOKENIZER_TAG_PLUS_PERCENT_EQUAL, - TOKENIZER_TAG_PLUS_PIPE, - TOKENIZER_TAG_PLUS_PIPE_EQUAL, - TOKENIZER_TAG_MINUS, - TOKENIZER_TAG_MINUS_EQUAL, - TOKENIZER_TAG_MINUS_PERCENT, - TOKENIZER_TAG_MINUS_PERCENT_EQUAL, - TOKENIZER_TAG_MINUS_PIPE, - TOKENIZER_TAG_MINUS_PIPE_EQUAL, - TOKENIZER_TAG_ASTERISK, - TOKENIZER_TAG_ASTERISK_EQUAL, - TOKENIZER_TAG_ASTERISK_ASTERISK, - TOKENIZER_TAG_ASTERISK_PERCENT, - TOKENIZER_TAG_ASTERISK_PERCENT_EQUAL, - TOKENIZER_TAG_ASTERISK_PIPE, - TOKENIZER_TAG_ASTERISK_PIPE_EQUAL, - TOKENIZER_TAG_ARROW, - TOKENIZER_TAG_COLON, - TOKENIZER_TAG_SLASH, - TOKENIZER_TAG_SLASH_EQUAL, - TOKENIZER_TAG_COMMA, - TOKENIZER_TAG_AMPERSAND, - TOKENIZER_TAG_AMPERSAND_EQUAL, - TOKENIZER_TAG_QUESTION_MARK, - TOKENIZER_TAG_ANGLE_BRACKET_LEFT, - TOKENIZER_TAG_ANGLE_BRACKET_LEFT_EQUAL, - TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT, - TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_EQUAL, - TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE, - TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_LEFT_PIPE_EQUAL, - TOKENIZER_TAG_ANGLE_BRACKET_RIGHT, - TOKENIZER_TAG_ANGLE_BRACKET_RIGHT_EQUAL, - TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT, - TOKENIZER_TAG_ANGLE_BRACKET_ANGLE_BRACKET_RIGHT_EQUAL, - TOKENIZER_TAG_TILDE, - TOKENIZER_TAG_NUMBER_LITERAL, - TOKENIZER_TAG_DOC_COMMENT, - TOKENIZER_TAG_CONTAINER_DOC_COMMENT, - TOKENIZER_TAG_KEYWORD_ADDRSPACE, - TOKENIZER_TAG_KEYWORD_ALIGN, - TOKENIZER_TAG_KEYWORD_ALLOWZERO, - TOKENIZER_TAG_KEYWORD_AND, - TOKENIZER_TAG_KEYWORD_ANYFRAME, - TOKENIZER_TAG_KEYWORD_ANYTYPE, - TOKENIZER_TAG_KEYWORD_ASM, - TOKENIZER_TAG_KEYWORD_ASYNC, - TOKENIZER_TAG_KEYWORD_AWAIT, - TOKENIZER_TAG_KEYWORD_BREAK, - TOKENIZER_TAG_KEYWORD_CALLCONV, - TOKENIZER_TAG_KEYWORD_CATCH, - TOKENIZER_TAG_KEYWORD_COMPTIME, - TOKENIZER_TAG_KEYWORD_CONST, - TOKENIZER_TAG_KEYWORD_CONTINUE, - TOKENIZER_TAG_KEYWORD_DEFER, - TOKENIZER_TAG_KEYWORD_ELSE, - TOKENIZER_TAG_KEYWORD_ENUM, - TOKENIZER_TAG_KEYWORD_ERRDEFER, - TOKENIZER_TAG_KEYWORD_ERROR, - TOKENIZER_TAG_KEYWORD_EXPORT, - TOKENIZER_TAG_KEYWORD_EXTERN, - TOKENIZER_TAG_KEYWORD_FN, - TOKENIZER_TAG_KEYWORD_FOR, - TOKENIZER_TAG_KEYWORD_IF, - TOKENIZER_TAG_KEYWORD_INLINE, - TOKENIZER_TAG_KEYWORD_NOALIAS, - TOKENIZER_TAG_KEYWORD_NOINLINE, - TOKENIZER_TAG_KEYWORD_NOSUSPEND, - TOKENIZER_TAG_KEYWORD_OPAQUE, - TOKENIZER_TAG_KEYWORD_OR, - TOKENIZER_TAG_KEYWORD_ORELSE, - TOKENIZER_TAG_KEYWORD_PACKED, - TOKENIZER_TAG_KEYWORD_PUB, - TOKENIZER_TAG_KEYWORD_RESUME, - TOKENIZER_TAG_KEYWORD_RETURN, - TOKENIZER_TAG_KEYWORD_LINKSECTION, - TOKENIZER_TAG_KEYWORD_STRUCT, - TOKENIZER_TAG_KEYWORD_SUSPEND, - TOKENIZER_TAG_KEYWORD_SWITCH, - TOKENIZER_TAG_KEYWORD_TEST, - TOKENIZER_TAG_KEYWORD_THREADLOCAL, - TOKENIZER_TAG_KEYWORD_TRY, - TOKENIZER_TAG_KEYWORD_UNION, - TOKENIZER_TAG_KEYWORD_UNREACHABLE, - TOKENIZER_TAG_KEYWORD_USINGNAMESPACE, - TOKENIZER_TAG_KEYWORD_VAR, - TOKENIZER_TAG_KEYWORD_VOLATILE, - TOKENIZER_TAG_KEYWORD_WHILE, -} tokenizer_tag; + FOREACH_TOKENIZER_TAG_ENUM(GENERATE_ENUM) +} tokenizerTag; + +static const char *tokenizerTagString[] = { + FOREACH_TOKENIZER_TAG_ENUM(GENERATE_STRING) +}; typedef enum { TOKENIZER_STATE_START, @@ -175,14 +185,14 @@ typedef enum { TOKENIZER_STATE_PERIOD_ASTERISK, TOKENIZER_STATE_SAW_AT_SIGN, TOKENIZER_STATE_INVALID, -} tokenizer_state; +} tokenizerState; typedef struct { - tokenizer_tag tag; + tokenizerTag tag; struct { uint32_t start, end; } loc; -} tokenizer_token; +} tokenizerToken; typedef struct { const char* buffer; @@ -191,6 +201,6 @@ typedef struct { } tokenizer; tokenizer tokenizer_init(const char* buffer, uint32_t len); -tokenizer_token tokenizer_next(tokenizer* self); +tokenizerToken tokenizer_next(tokenizer* self); #endif