diff --git a/CMakeLists.txt b/CMakeLists.txt index f11d25479b..cdeacca7f4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,7 @@ set(ZIG_SOURCES "${CMAKE_SOURCE_DIR}/src/parser.cpp" "${CMAKE_SOURCE_DIR}/src/tokenizer.cpp" "${CMAKE_SOURCE_DIR}/src/util.cpp" + "${CMAKE_SOURCE_DIR}/src/codegen.cpp" ) set(CONFIGURE_OUT_FILE "${CMAKE_BINARY_DIR}/config.h") diff --git a/src/buffer.hpp b/src/buffer.hpp index 88d9415d34..310093f16e 100644 --- a/src/buffer.hpp +++ b/src/buffer.hpp @@ -149,5 +149,14 @@ static inline Buf *buf_dirname(Buf *buf) { return buf_create_from_mem("", 0); } +static inline uint32_t buf_hash(Buf *buf) { + // FNV 32-bit hash + uint32_t h = 2166136261; + for (int i = 0; i < buf_len(buf); i += 1) { + h = h ^ ((uint8_t)buf->list.at(i)); + h = h * 16777619; + } + return h; +} #endif diff --git a/src/codegen.cpp b/src/codegen.cpp new file mode 100644 index 0000000000..70f4acfde8 --- /dev/null +++ b/src/codegen.cpp @@ -0,0 +1,87 @@ +#include "codegen.hpp" +#include "hash_map.hpp" + +#include + +struct CodeGen { + AstNode *root; + HashMap fn_decls; + ZigList errors; +}; + +CodeGen *create_codegen(AstNode *root) { + CodeGen *g = allocate(1); + g->root = root; + g->fn_decls.init(32); + return g; +} + +static void add_node_error(CodeGen *g, AstNode *node, Buf *msg) { + g->errors.add_one(); + ErrorMsg *last_msg = &g->errors.last(); + last_msg->line_start = node->line; + last_msg->column_start = node->column; + last_msg->line_end = -1; + last_msg->column_end = -1; + last_msg->msg = msg; +} + +static void analyze_node(CodeGen *g, AstNode *node) { + switch (node->type) { + case NodeTypeRoot: + for (int i = 0; i < node->data.root.fn_decls.length; i += 1) { + AstNode *child = node->data.root.fn_decls.at(i); + analyze_node(g, child); + } + break; + case NodeTypeFnDecl: + { + auto entry = g->fn_decls.maybe_get(&node->data.fn_decl.name); + if (entry) { + add_node_error(g, node, + buf_sprintf("redefinition of '%s'", buf_ptr(&node->data.fn_decl.name))); + } else { + g->fn_decls.put(&node->data.fn_decl.name, node); + for (int i = 0; i < node->data.fn_decl.params.length; i += 1) { + AstNode *child = node->data.fn_decl.params.at(i); + analyze_node(g, child); + } + analyze_node(g, node->data.fn_decl.return_type); + analyze_node(g, node->data.fn_decl.body); + } + break; + } + case NodeTypeParamDecl: + analyze_node(g, node->data.param_decl.type); + break; + case NodeTypeType: + break; + case NodeTypePointerType: + break; + case NodeTypeBlock: + break; + case NodeTypeStatement: + break; + case NodeTypeExpressionStatement: + break; + case NodeTypeReturnStatement: + break; + case NodeTypeExpression: + break; + case NodeTypeFnCall: + break; + } +} + +void semantic_analyze(CodeGen *g) { + // Pass 1. + analyze_node(g, g->root); +} + +void code_gen(CodeGen *g) { + +} + +ZigList *codegen_error_messages(CodeGen *g) { + return &g->errors; +} diff --git a/src/codegen.hpp b/src/codegen.hpp new file mode 100644 index 0000000000..1cffdf585e --- /dev/null +++ b/src/codegen.hpp @@ -0,0 +1,25 @@ +#ifndef ZIG_CODEGEN_HPP +#define ZIG_CODEGEN_HPP + +#include "parser.hpp" + +struct CodeGen; + +struct ErrorMsg { + int line_start; + int column_start; + int line_end; + int column_end; + Buf *msg; +}; + + +CodeGen *create_codegen(AstNode *root); + +void semantic_analyze(CodeGen *g); + +void code_gen(CodeGen *g); + +ZigList *codegen_error_messages(CodeGen *g); + +#endif diff --git a/src/hash_map.hpp b/src/hash_map.hpp new file mode 100644 index 0000000000..1e3f921e96 --- /dev/null +++ b/src/hash_map.hpp @@ -0,0 +1,220 @@ +#ifndef ZIG_HASH_MAP_HPP +#define ZIG_HASH_MAP_HPP + +#include "util.hpp" + +#include + +template +class HashMap { +public: + void init(int capacity) { + init_capacity(capacity); + } + void deinit(void) { + free(_entries); + } + + struct Entry { + bool used; + int distance_from_start_index; + K key; + V value; + }; + + void clear() { + for (int i = 0; i < _capacity; i += 1) { + _entries[i].used = false; + } + _size = 0; + _max_distance_from_start_index = 0; + _modification_count += 1; + } + + int size() const { + return _size; + } + + void put(const K &key, const V &value) { + _modification_count += 1; + internal_put(key, value); + + // if we get too full (80%), double the capacity + if (_size * 5 >= _capacity * 4) { + Entry *old_entries = _entries; + int old_capacity = _capacity; + init_capacity(_capacity * 2); + // dump all of the old elements into the new table + for (int i = 0; i < old_capacity; i += 1) { + Entry *old_entry = &old_entries[i]; + if (old_entry->used) + internal_put(old_entry->key, old_entry->value); + } + free(old_entries); + } + } + + const V &get(const K &key) const { + Entry *entry = internal_get(key); + if (!entry) + zig_panic("key not found"); + return entry->value; + } + + Entry *maybe_get(const K &key) const { + return internal_get(key); + } + + void remove(const K &key) { + _modification_count += 1; + int start_index = key_to_index(key); + for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { + int index = (start_index + roll_over) % _capacity; + Entry *entry = &_entries[index]; + + if (!entry->used) + zig_panic("key not found"); + + if (!EqualFn(entry->key, key)) + continue; + + for (; roll_over < _capacity; roll_over += 1) { + int next_index = (start_index + roll_over + 1) % _capacity; + Entry *next_entry = &_entries[next_index]; + if (!next_entry->used || next_entry->distance_from_start_index == 0) { + entry->used = false; + _size -= 1; + return; + } + *entry = *next_entry; + entry->distance_from_start_index -= 1; + entry = next_entry; + } + zig_panic("shifting everything in the table"); + } + zig_panic("key not found"); + } + + class Iterator { + public: + Entry *next() { + if (_inital_modification_count != _table->_modification_count) + zig_panic("concurrent modification"); + if (_count >= _table->size()) + return NULL; + for (; _index < _table->_capacity; _index += 1) { + Entry *entry = &_table->_entries[_index]; + if (entry->used) { + _index += 1; + _count += 1; + return entry; + } + } + zig_panic("no next item"); + } + private: + const HashMap * _table; + // how many items have we returned + int _count = 0; + // iterator through the entry array + int _index = 0; + // used to detect concurrent modification + uint32_t _inital_modification_count; + Iterator(const HashMap * table) : + _table(table), _inital_modification_count(table->_modification_count) { + } + friend HashMap; + }; + + // you must not modify the underlying HashMap while this iterator is still in use + Iterator entry_iterator() const { + return Iterator(this); + } + +private: + + Entry *_entries; + int _capacity; + int _size; + int _max_distance_from_start_index; + // this is used to detect bugs where a hashtable is edited while an iterator is running. + uint32_t _modification_count = 0; + + void init_capacity(int capacity) { + _capacity = capacity; + _entries = allocate(_capacity); + _size = 0; + _max_distance_from_start_index = 0; + for (int i = 0; i < _capacity; i += 1) { + _entries[i].used = false; + } + } + + void internal_put(K key, V value) { + int start_index = key_to_index(key); + for (int roll_over = 0, distance_from_start_index = 0; + roll_over < _capacity; roll_over += 1, distance_from_start_index += 1) + { + int index = (start_index + roll_over) % _capacity; + Entry *entry = &_entries[index]; + + if (entry->used && !EqualFn(entry->key, key)) { + if (entry->distance_from_start_index < distance_from_start_index) { + // robin hood to the rescue + Entry tmp = *entry; + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + *entry = { + true, + distance_from_start_index, + key, + value, + }; + key = tmp.key; + value = tmp.value; + distance_from_start_index = tmp.distance_from_start_index; + } + continue; + } + + if (!entry->used) { + // adding an entry. otherwise overwriting old value with + // same key + _size += 1; + } + + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + *entry = { + true, + distance_from_start_index, + key, + value, + }; + return; + } + zig_panic("put into a full HashMap"); + } + + + Entry *internal_get(const K &key) const { + int start_index = key_to_index(key); + for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { + int index = (start_index + roll_over) % _capacity; + Entry *entry = &_entries[index]; + + if (!entry->used) + return NULL; + + if (EqualFn(entry->key, key)) + return entry; + } + return NULL; + } + + int key_to_index(const K &key) const { + return (int)(HashFunction(key) % ((uint32_t)_capacity)); + } +}; + +#endif diff --git a/src/main.cpp b/src/main.cpp index ff7deadd80..cf94ce4a62 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -12,6 +12,7 @@ #include "parser.hpp" #include "tokenizer.hpp" #include "error.hpp" +#include "codegen.hpp" #include #include @@ -75,24 +76,41 @@ static int build(const char *arg0, const char *in_file, const char *out_file, Zi cur_dir_path = buf_dirname(buf_create_from_str(in_file)); } - Buf *in_data = fetch_file(in_f); - fprintf(stderr, "Original source:\n"); fprintf(stderr, "----------------\n"); + Buf *in_data = fetch_file(in_f); fprintf(stderr, "%s\n", buf_ptr(in_data)); - ZigList *tokens = tokenize(in_data, cur_dir_path); - fprintf(stderr, "\nTokens:\n"); fprintf(stderr, "---------\n"); + ZigList *tokens = tokenize(in_data, cur_dir_path); print_tokens(in_data, tokens); - AstNode *root = ast_parse(in_data, tokens); - assert(root); fprintf(stderr, "\nAST:\n"); fprintf(stderr, "------\n"); + AstNode *root = ast_parse(in_data, tokens); + assert(root); ast_print(root, 0); + fprintf(stderr, "\nSemantic Analysis:\n"); + fprintf(stderr, "--------------------\n"); + CodeGen *codegen = create_codegen(root); + semantic_analyze(codegen); + ZigList *errors = codegen_error_messages(codegen); + if (errors->length == 0) { + fprintf(stderr, "OK\n"); + } else { + for (int i = 0; i < errors->length; i += 1) { + ErrorMsg *err = &errors->at(i); + fprintf(stderr, "Error: Line %d, column %d: %s\n", err->line_start, err->column_start, + buf_ptr(err->msg)); + } + return 1; + } + + fprintf(stderr, "\nCode Generation:\n"); + fprintf(stderr, "------------------\n"); + code_gen(codegen); return 0; } diff --git a/src/parser.cpp b/src/parser.cpp index ae375f0173..ac1d9caf3d 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -133,9 +133,11 @@ struct ParseContext { ZigList *tokens; }; -static AstNode *ast_create_node(NodeType type) { +static AstNode *ast_create_node(NodeType type, Token *first_token) { AstNode *node = allocate(1); node->type = type; + node->line = first_token->start_line; + node->column = first_token->start_column; return node; } @@ -163,11 +165,11 @@ Type : token(Symbol) | PointerType; PointerType : token(Star) token(Const) Type | token(Star) token(Mut) Type; */ static AstNode *ast_parse_type(ParseContext *pc, int token_index, int *new_token_index) { - AstNode *node = ast_create_node(NodeTypeType); - Token *token = &pc->tokens->at(token_index); token_index += 1; + AstNode *node = ast_create_node(NodeTypeType, token); + if (token->id == TokenIdSymbol) { node->data.type.type = AstNodeTypeTypePrimitive; ast_buf_from_token(pc, token, &node->data.type.primitive_name); @@ -197,12 +199,13 @@ ParamDecl : token(Symbol) token(Colon) Type { }; */ static AstNode *ast_parse_param_decl(ParseContext *pc, int token_index, int *new_token_index) { - AstNode *node = ast_create_node(NodeTypeParamDecl); - Token *param_name = &pc->tokens->at(token_index); token_index += 1; ast_expect_token(pc, param_name, TokenIdSymbol); + AstNode *node = ast_create_node(NodeTypeParamDecl, param_name); + + ast_buf_from_token(pc, param_name, &node->data.param_decl.name); Token *colon = &pc->tokens->at(token_index); @@ -280,12 +283,13 @@ static void ast_parse_fn_call_param_list(ParseContext *pc, int token_index, int FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ; */ static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_token_index) { - AstNode *node = ast_create_node(NodeTypeFnCall); - Token *fn_name = &pc->tokens->at(token_index); token_index += 1; ast_expect_token(pc, fn_name, TokenIdSymbol); + AstNode *node = ast_create_node(NodeTypeFnCall, fn_name); + + ast_buf_from_token(pc, fn_name, &node->data.fn_call.name); ast_parse_fn_call_param_list(pc, token_index, &token_index, &node->data.fn_call.params); @@ -295,9 +299,8 @@ static AstNode *ast_parse_fn_call(ParseContext *pc, int token_index, int *new_to } static AstNode *ast_parse_expression(ParseContext *pc, int token_index, int *new_token_index) { - AstNode *node = ast_create_node(NodeTypeExpression); - Token *token = &pc->tokens->at(token_index); + AstNode *node = ast_create_node(NodeTypeExpression, token); if (token->id == TokenIdSymbol) { node->data.expression.type = AstNodeExpressionTypeFnCall; node->data.expression.data.fn_call = ast_parse_fn_call(pc, token_index, &token_index); @@ -329,9 +332,9 @@ Expression : token(Number) | token(String) | FnCall ; FnCall : token(Symbol) token(LParen) list(Expression, token(Comma)) token(RParen) ; */ static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_token_index) { - AstNode *node = ast_create_node(NodeTypeStatement); - Token *token = &pc->tokens->at(token_index); + AstNode *node = ast_create_node(NodeTypeStatement, token); + if (token->id == TokenIdKeywordReturn) { token_index += 1; node->data.statement.type = AstNodeStatementTypeReturn; @@ -362,12 +365,13 @@ static AstNode *ast_parse_statement(ParseContext *pc, int token_index, int *new_ Block : token(LBrace) many(Statement) token(RBrace); */ static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_token_index) { - AstNode *node = ast_create_node(NodeTypeBlock); - Token *l_brace = &pc->tokens->at(token_index); token_index += 1; ast_expect_token(pc, l_brace, TokenIdLBrace); + AstNode *node = ast_create_node(NodeTypeBlock, l_brace); + + for (;;) { Token *token = &pc->tokens->at(token_index); if (token->id == TokenIdRBrace) { @@ -386,12 +390,13 @@ static AstNode *ast_parse_block(ParseContext *pc, int token_index, int *new_toke FnDecl : token(Fn) token(Symbol) ParamDeclList option(token(Arrow) Type) Block; */ static AstNode *ast_parse_fn_decl(ParseContext *pc, int token_index, int *new_token_index) { - AstNode *node = ast_create_node(NodeTypeFnDecl); - Token *fn_token = &pc->tokens->at(token_index); token_index += 1; ast_expect_token(pc, fn_token, TokenIdKeywordFn); + AstNode *node = ast_create_node(NodeTypeFnDecl, fn_token); + + Token *fn_name = &pc->tokens->at(token_index); token_index += 1; ast_expect_token(pc, fn_name, TokenIdSymbol); @@ -437,7 +442,7 @@ static void ast_parse_fn_decl_list(ParseContext *pc, int token_index, ZigList *tokens) { ParseContext pc = {0}; pc.buf = buf; - pc.root = ast_create_node(NodeTypeRoot); + pc.root = ast_create_node(NodeTypeRoot, &tokens->at(0)); pc.tokens = tokens; int new_token_index; diff --git a/src/parser.hpp b/src/parser.hpp index 2a46f76698..7a00fb88f1 100644 --- a/src/parser.hpp +++ b/src/parser.hpp @@ -97,6 +97,8 @@ struct AstNodeFnCall { struct AstNode { enum NodeType type; AstNode *parent; + int line; + int column; union { AstNodeRoot root; AstNodeFnDecl fn_decl;