diff --git a/src/hash_map.hpp b/src/hash_map.hpp index 69e5568093..45440d61e7 100644 --- a/src/hash_map.hpp +++ b/src/hash_map.hpp @@ -19,45 +19,64 @@ public: init_capacity(capacity); } void deinit(void) { - heap::c_allocator.deallocate(_entries, _capacity); + _entries.deinit(); + heap::c_allocator.deallocate(_index_bytes, + _indexes_len * capacity_index_size(_indexes_len)); } struct Entry { K key; V value; - bool used; - int distance_from_start_index; }; void clear() { - for (int i = 0; i < _capacity; i += 1) { - _entries[i].used = false; - } - _size = 0; + _entries.clear(); + memset(_index_bytes, 0, _indexes_len * capacity_index_size(_indexes_len)); _max_distance_from_start_index = 0; _modification_count += 1; } - int size() const { - return _size; + size_t size() const { + return _entries.length; } void put(const K &key, const V &value) { _modification_count += 1; - internal_put(key, value); - // if we get too full (60%), double the capacity - if (_size * 5 >= _capacity * 3) { - Entry *old_entries = _entries; - int old_capacity = _capacity; - init_capacity(_capacity * 2); - // dump all of the old elements into the new table - for (int i = 0; i < old_capacity; i += 1) { - Entry *old_entry = &old_entries[i]; - if (old_entry->used) - internal_put(old_entry->key, old_entry->value); + // if we would get too full (60%), double the indexes size + if ((_entries.length + 1) * 5 >= _indexes_len * 3) { + heap::c_allocator.deallocate(_index_bytes, + _indexes_len * capacity_index_size(_indexes_len)); + _indexes_len *= 2; + size_t sz = capacity_index_size(_indexes_len); + // This zero initializes the bytes, setting them all empty. + _index_bytes = heap::c_allocator.allocate(_indexes_len * sz); + _max_distance_from_start_index = 0; + for (size_t i = 0; i < _entries.length; i += 1) { + Entry *entry = &_entries.items[i]; + switch (sz) { + case 1: + put_index(key_to_index(entry->key), i, (uint8_t*)_index_bytes); + continue; + case 2: + put_index(key_to_index(entry->key), i, (uint16_t*)_index_bytes); + continue; + case 4: + put_index(key_to_index(entry->key), i, (uint32_t*)_index_bytes); + continue; + default: + put_index(key_to_index(entry->key), i, (size_t*)_index_bytes); + continue; + } } - heap::c_allocator.deallocate(old_entries, old_capacity); + } + + + switch (capacity_index_size(_indexes_len)) { + case 1: return internal_put(key, value, (uint8_t*)_index_bytes); + case 2: return internal_put(key, value, (uint16_t*)_index_bytes); + case 4: return internal_put(key, value, (uint32_t*)_index_bytes); + default: return internal_put(key, value, (size_t*)_index_bytes); } } @@ -81,40 +100,21 @@ public: return internal_get(key); } - void maybe_remove(const K &key) { - if (maybe_get(key)) { - remove(key); - } + bool remove(const K &key) { + bool deleted_something = maybe_remove(key); + if (!deleted_something) + zig_panic("key not found"); + return deleted_something; } - void remove(const K &key) { + bool maybe_remove(const K &key) { _modification_count += 1; - int start_index = key_to_index(key); - for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { - int index = (start_index + roll_over) % _capacity; - Entry *entry = &_entries[index]; - - if (!entry->used) - zig_panic("key not found"); - - if (!EqualFn(entry->key, key)) - continue; - - for (; roll_over < _capacity; roll_over += 1) { - int next_index = (start_index + roll_over + 1) % _capacity; - Entry *next_entry = &_entries[next_index]; - if (!next_entry->used || next_entry->distance_from_start_index == 0) { - entry->used = false; - _size -= 1; - return; - } - *entry = *next_entry; - entry->distance_from_start_index -= 1; - entry = next_entry; - } - zig_panic("shifting everything in the table"); + switch (capacity_index_size(_indexes_len)) { + case 1: return internal_remove(key, (uint8_t*)_index_bytes); + case 2: return internal_remove(key, (uint16_t*)_index_bytes); + case 4: return internal_remove(key, (uint32_t*)_index_bytes); + default: return internal_remove(key, (size_t*)_index_bytes); } - zig_panic("key not found"); } class Iterator { @@ -122,24 +122,16 @@ public: Entry *next() { if (_inital_modification_count != _table->_modification_count) zig_panic("concurrent modification"); - if (_count >= _table->size()) - return NULL; - for (; _index < _table->_capacity; _index += 1) { - Entry *entry = &_table->_entries[_index]; - if (entry->used) { - _index += 1; - _count += 1; - return entry; - } - } - zig_panic("no next item"); + if (_index >= _table->_entries.length) + return nullptr; + Entry *entry = &_table->_entries.items[_index]; + _index += 1; + return entry; } private: const HashMap * _table; - // how many items have we returned - int _count = 0; // iterator through the entry array - int _index = 0; + size_t _index = 0; // used to detect concurrent modification uint32_t _inital_modification_count; Iterator(const HashMap * table) : @@ -154,89 +146,166 @@ public: } private: + // Maintains insertion order. + ZigList _entries; + // If _indexes_len is less than 2**8, this is an array of uint8_t. + // If _indexes_len is less than 2**16, it is an array of uint16_t. + // If _indexes_len is less than 2**32, it is an array of uint32_t. + // Otherwise it is size_t. + // It's off by 1. 0 means empty slot, 1 means index 0, etc. + uint8_t *_index_bytes; + // This is the number of indexes. When indexes are bytes, it equals number of bytes. + // When indexes are uint16_t, _indexes_len is half the number of bytes. + size_t _indexes_len; - Entry *_entries; - int _capacity; - int _size; - int _max_distance_from_start_index; - // this is used to detect bugs where a hashtable is edited while an iterator is running. + size_t _max_distance_from_start_index; + // This is used to detect bugs where a hashtable is edited while an iterator is running. uint32_t _modification_count; - void init_capacity(int capacity) { - _capacity = capacity; - _entries = heap::c_allocator.allocate(_capacity); - _size = 0; + void init_capacity(size_t capacity) { + _entries = {}; + _entries.ensure_capacity(capacity); + // So that at capacity it will only be 60% full. + _indexes_len = capacity * 5 / 3; + size_t sz = capacity_index_size(_indexes_len); + // This zero initializes _index_bytes which sets them all to empty. + _index_bytes = heap::c_allocator.allocate(_indexes_len * sz); + _max_distance_from_start_index = 0; - for (int i = 0; i < _capacity; i += 1) { - _entries[i].used = false; - } + _modification_count = 0; } - void internal_put(K key, V value) { - int start_index = key_to_index(key); - for (int roll_over = 0, distance_from_start_index = 0; - roll_over < _capacity; roll_over += 1, distance_from_start_index += 1) + static size_t capacity_index_size(size_t len) { + if (len < UINT8_MAX) + return 1; + if (len < UINT16_MAX) + return 2; + if (len < UINT32_MAX) + return 4; + return sizeof(size_t); + } + + template + void internal_put(const K &key, const V &value, I *indexes) { + size_t start_index = key_to_index(key); + for (size_t roll_over = 0, distance_from_start_index = 0; + roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) { - int index = (start_index + roll_over) % _capacity; - Entry *entry = &_entries[index]; - - if (entry->used && !EqualFn(entry->key, key)) { - if (entry->distance_from_start_index < distance_from_start_index) { - // robin hood to the rescue - Entry tmp = *entry; - if (distance_from_start_index > _max_distance_from_start_index) - _max_distance_from_start_index = distance_from_start_index; - *entry = { - key, - value, - true, - distance_from_start_index, - }; - key = tmp.key; - value = tmp.value; - distance_from_start_index = tmp.distance_from_start_index; - } - continue; + size_t index_index = (start_index + roll_over) % _indexes_len; + I index_data = indexes[index_index]; + if (index_data == 0) { + _entries.append({key, value}); + indexes[index_index] = _entries.length; + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + return; } - - if (!entry->used) { - // adding an entry. otherwise overwriting old value with - // same key - _size += 1; + Entry *entry = &_entries.items[index_data - 1]; + if (EqualFn(entry->key, key)) { + *entry = {key, value}; + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + return; } - - if (distance_from_start_index > _max_distance_from_start_index) - _max_distance_from_start_index = distance_from_start_index; - *entry = { - key, - value, - true, - distance_from_start_index, - }; - return; } - zig_panic("put into a full HashMap"); + zig_unreachable(); } + template + void put_index(size_t start_index, size_t entry_index, I *indexes) { + for (size_t roll_over = 0, distance_from_start_index = 0; + roll_over < _indexes_len; roll_over += 1, distance_from_start_index += 1) + { + size_t index_index = (start_index + roll_over) % _indexes_len; + if (indexes[index_index] == 0) { + indexes[index_index] = entry_index + 1; + if (distance_from_start_index > _max_distance_from_start_index) + _max_distance_from_start_index = distance_from_start_index; + return; + } + } + zig_unreachable(); + } Entry *internal_get(const K &key) const { - int start_index = key_to_index(key); - for (int roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { - int index = (start_index + roll_over) % _capacity; - Entry *entry = &_entries[index]; + switch (capacity_index_size(_indexes_len)) { + case 1: return internal_get2(key, (uint8_t*)_index_bytes); + case 2: return internal_get2(key, (uint16_t*)_index_bytes); + case 4: return internal_get2(key, (uint32_t*)_index_bytes); + default: return internal_get2(key, (size_t*)_index_bytes); + } + } - if (!entry->used) - return NULL; + template + Entry *internal_get2(const K &key, I *indexes) const { + size_t start_index = key_to_index(key); + for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { + size_t index_index = (start_index + roll_over) % _indexes_len; + size_t index_data = indexes[index_index]; + if (index_data == 0) + return nullptr; + Entry *entry = &_entries.items[index_data - 1]; if (EqualFn(entry->key, key)) return entry; } - return NULL; + return nullptr; } - int key_to_index(const K &key) const { - return (int)(HashFunction(key) % ((uint32_t)_capacity)); + size_t key_to_index(const K &key) const { + return ((size_t)HashFunction(key)) % _indexes_len; + } + + template + bool internal_remove(const K &key, I *indexes) { + size_t start_index = key_to_index(key); + for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { + size_t index_index = (start_index + roll_over) % _indexes_len; + size_t index_data = indexes[index_index]; + if (index_data == 0) + return false; + + size_t index = index_data - 1; + Entry *entry = &_entries.items[index]; + if (!EqualFn(entry->key, key)) + continue; + + indexes[index_index] = 0; + _entries.swap_remove(index); + if (_entries.length > 0 && _entries.length != index) { + // Because of the swap remove, now we need to update the index that was + // pointing to the last entry and is now pointing to this removed item slot. + update_entry_index(_entries.length, index, indexes); + } + + // Now we have to shift over the following indexes. + roll_over += 1; + for (; roll_over <= _max_distance_from_start_index; roll_over += 1) { + size_t next_index = (start_index + roll_over) % _indexes_len; + if (indexes[next_index] == 0) + break; + size_t next_start_index = key_to_index(_entries.items[indexes[next_index]].key); + if (next_start_index != start_index) + break; + indexes[next_index - 1] = indexes[next_index]; + } + + return true; + } + return false; + } + + template + void update_entry_index(size_t old_entry_index, size_t new_entry_index, I *indexes) { + size_t start_index = key_to_index(_entries.items[new_entry_index].key); + for (size_t roll_over = 0; roll_over <= _max_distance_from_start_index; roll_over += 1) { + size_t index_index = (start_index + roll_over) % _indexes_len; + if (indexes[index_index] == old_entry_index + 1) { + indexes[index_index] = new_entry_index + 1; + return; + } + } + zig_unreachable(); } }; - #endif diff --git a/test/stage1/behavior/type_info.zig b/test/stage1/behavior/type_info.zig index 68ff3aa310..2685a3552e 100644 --- a/test/stage1/behavior/type_info.zig +++ b/test/stage1/behavior/type_info.zig @@ -251,14 +251,13 @@ fn testStruct() void { } const TestStruct = packed struct { - const Self = @This(); - fieldA: usize, fieldB: void, fieldC: *Self, fieldD: u32 = 4, pub fn foo(self: *const Self) void {} + const Self = @This(); }; test "type info: function type info" {