diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig index c3f50da13d..198c69e2a7 100644 --- a/lib/std/c/tokenizer.zig +++ b/lib/std/c/tokenizer.zig @@ -277,83 +277,79 @@ pub const Token = struct { }; // TODO extensions - pub const keywords = [_]Keyword{ - Keyword.init("auto", .Keyword_auto), - Keyword.init("break", .Keyword_break), - Keyword.init("case", .Keyword_case), - Keyword.init("char", .Keyword_char), - Keyword.init("const", .Keyword_const), - Keyword.init("continue", .Keyword_continue), - Keyword.init("default", .Keyword_default), - Keyword.init("do", .Keyword_do), - Keyword.init("double", .Keyword_double), - Keyword.init("else", .Keyword_else), - Keyword.init("enum", .Keyword_enum), - Keyword.init("extern", .Keyword_extern), - Keyword.init("float", .Keyword_float), - Keyword.init("for", .Keyword_for), - Keyword.init("goto", .Keyword_goto), - Keyword.init("if", .Keyword_if), - Keyword.init("int", .Keyword_int), - Keyword.init("long", .Keyword_long), - Keyword.init("register", .Keyword_register), - Keyword.init("return", .Keyword_return), - Keyword.init("short", .Keyword_short), - Keyword.init("signed", .Keyword_signed), - Keyword.init("sizeof", .Keyword_sizeof), - Keyword.init("static", .Keyword_static), - Keyword.init("struct", .Keyword_struct), - Keyword.init("switch", .Keyword_switch), - Keyword.init("typedef", .Keyword_typedef), - Keyword.init("union", .Keyword_union), - Keyword.init("unsigned", .Keyword_unsigned), - Keyword.init("void", .Keyword_void), - Keyword.init("volatile", .Keyword_volatile), - Keyword.init("while", .Keyword_while), + pub const keywords = std.ComptimeStringMap(Id, .{ + .{"auto", .Keyword_auto}, + .{"break", .Keyword_break}, + .{"case", .Keyword_case}, + .{"char", .Keyword_char}, + .{"const", .Keyword_const}, + .{"continue", .Keyword_continue}, + .{"default", .Keyword_default}, + .{"do", .Keyword_do}, + .{"double", .Keyword_double}, + .{"else", .Keyword_else}, + .{"enum", .Keyword_enum}, + .{"extern", .Keyword_extern}, + .{"float", .Keyword_float}, + .{"for", .Keyword_for}, + .{"goto", .Keyword_goto}, + .{"if", .Keyword_if}, + .{"int", .Keyword_int}, + .{"long", .Keyword_long}, + .{"register", .Keyword_register}, + .{"return", .Keyword_return}, + .{"short", .Keyword_short}, + .{"signed", .Keyword_signed}, + .{"sizeof", .Keyword_sizeof}, + .{"static", .Keyword_static}, + .{"struct", .Keyword_struct}, + .{"switch", .Keyword_switch}, + .{"typedef", .Keyword_typedef}, + .{"union", .Keyword_union}, + .{"unsigned", .Keyword_unsigned}, + .{"void", .Keyword_void}, + .{"volatile", .Keyword_volatile}, + .{"while", .Keyword_while}, // ISO C99 - Keyword.init("_Bool", .Keyword_bool), - Keyword.init("_Complex", .Keyword_complex), - Keyword.init("_Imaginary", .Keyword_imaginary), - Keyword.init("inline", .Keyword_inline), - Keyword.init("restrict", .Keyword_restrict), + .{"_Bool", .Keyword_bool}, + .{"_Complex", .Keyword_complex}, + .{"_Imaginary", .Keyword_imaginary}, + .{"inline", .Keyword_inline}, + .{"restrict", .Keyword_restrict}, // ISO C11 - Keyword.init("_Alignas", .Keyword_alignas), - Keyword.init("_Alignof", .Keyword_alignof), - Keyword.init("_Atomic", .Keyword_atomic), - Keyword.init("_Generic", .Keyword_generic), - Keyword.init("_Noreturn", .Keyword_noreturn), - Keyword.init("_Static_assert", .Keyword_static_assert), - Keyword.init("_Thread_local", .Keyword_thread_local), + .{"_Alignas", .Keyword_alignas}, + .{"_Alignof", .Keyword_alignof}, + .{"_Atomic", .Keyword_atomic}, + .{"_Generic", .Keyword_generic}, + .{"_Noreturn", .Keyword_noreturn}, + .{"_Static_assert", .Keyword_static_assert}, + .{"_Thread_local", .Keyword_thread_local}, // Preprocessor directives - Keyword.init("include", .Keyword_include), - Keyword.init("define", .Keyword_define), - Keyword.init("ifdef", .Keyword_ifdef), - Keyword.init("ifndef", .Keyword_ifndef), - Keyword.init("error", .Keyword_error), - Keyword.init("pragma", .Keyword_pragma), - }; + .{"include", .Keyword_include}, + .{"define", .Keyword_define}, + .{"ifdef", .Keyword_ifdef}, + .{"ifndef", .Keyword_ifndef}, + .{"error", .Keyword_error}, + .{"pragma", .Keyword_pragma}, + }); - // TODO perfect hash at comptime // TODO do this in the preprocessor pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id { - var hash = std.hash_map.hashString(bytes); - for (keywords) |kw| { - if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) { - switch (kw.id) { - .Keyword_include, - .Keyword_define, - .Keyword_ifdef, - .Keyword_ifndef, - .Keyword_error, - .Keyword_pragma, - => if (!pp_directive) return null, - else => {}, - } - return kw.id; + if (keywords.get(bytes)) |id| { + switch (id) { + .Keyword_include, + .Keyword_define, + .Keyword_ifdef, + .Keyword_ifndef, + .Keyword_error, + .Keyword_pragma, + => if (!pp_directive) return null, + else => {}, } + return id; } return null; } diff --git a/lib/std/comptime_string_map.zig b/lib/std/comptime_string_map.zig new file mode 100644 index 0000000000..313f1fcdda --- /dev/null +++ b/lib/std/comptime_string_map.zig @@ -0,0 +1,177 @@ +const std = @import("std.zig"); +const mem = std.mem; + +/// Like ComptimeStringHashMap but optimized for small sets of disparate string keys. +/// Works by separating the keys by length at comptime and only checking strings of +/// equal length at runtime. +/// +/// `kvs` expects a list literal containing list literals or an array/slice of structs +/// where `.@"0"` is the `[]const u8` key and `.@"1"` is the associated value of type `V`. +/// TODO: https://github.com/ziglang/zig/issues/4335 +pub fn ComptimeStringMap(comptime V: type, comptime kvs: var) type { + const precomputed = comptime blk: { + @setEvalBranchQuota(2000); + const KV = struct { + key: []const u8, + value: V, + }; + var sorted_kvs: [kvs.len]KV = undefined; + const lenAsc = (struct { + fn lenAsc(a: KV, b: KV) bool { + return a.key.len < b.key.len; + } + }).lenAsc; + for (kvs) |kv, i| { + if (V != void) { + sorted_kvs[i] = .{.key = kv.@"0", .value = kv.@"1"}; + } else { + sorted_kvs[i] = .{.key = kv.@"0", .value = {}}; + } + } + std.sort.sort(KV, &sorted_kvs, lenAsc); + const min_len = sorted_kvs[0].key.len; + const max_len = sorted_kvs[sorted_kvs.len - 1].key.len; + var len_indexes: [max_len + 1]usize = undefined; + var len: usize = 0; + var i: usize = 0; + while (len <= max_len) : (len += 1) { + // find the first keyword len == len + while (len > sorted_kvs[i].key.len) { + i += 1; + } + len_indexes[len] = i; + } + break :blk .{ + .min_len = min_len, + .max_len = max_len, + .sorted_kvs = sorted_kvs, + .len_indexes = len_indexes, + }; + }; + + return struct { + pub fn has(str: []const u8) bool { + return get(str) != null; + } + + pub fn get(str: []const u8) ?V { + if (str.len < precomputed.min_len or str.len > precomputed.max_len) + return null; + + var i = precomputed.len_indexes[str.len]; + while (true) { + const kv = precomputed.sorted_kvs[i]; + if (kv.key.len != str.len) + return null; + if (mem.eql(u8, kv.key, str)) + return kv.value; + i += 1; + if (i >= precomputed.sorted_kvs.len) + return null; + } + } + }; +} + +const TestEnum = enum { + A, + B, + C, + D, + E, +}; + +test "ComptimeStringMap list literal of list literals" { + const map = ComptimeStringMap(TestEnum, .{ + .{"these", .D}, + .{"have", .A}, + .{"nothing", .B}, + .{"incommon", .C}, + .{"samelen", .E}, + }); + + testMap(map); +} + +test "ComptimeStringMap array of structs" { + const KV = struct { + @"0": []const u8, + @"1": TestEnum, + }; + const map = ComptimeStringMap(TestEnum, [_]KV{ + .{.@"0" = "these", .@"1" = .D}, + .{.@"0" = "have", .@"1" = .A}, + .{.@"0" = "nothing", .@"1" = .B}, + .{.@"0" = "incommon", .@"1" = .C}, + .{.@"0" = "samelen", .@"1" = .E}, + }); + + testMap(map); +} + +test "ComptimeStringMap slice of structs" { + const KV = struct { + @"0": []const u8, + @"1": TestEnum, + }; + const slice: []const KV = &[_]KV{ + .{.@"0" = "these", .@"1" = .D}, + .{.@"0" = "have", .@"1" = .A}, + .{.@"0" = "nothing", .@"1" = .B}, + .{.@"0" = "incommon", .@"1" = .C}, + .{.@"0" = "samelen", .@"1" = .E}, + }; + const map = ComptimeStringMap(TestEnum, slice); + + testMap(map); +} + +fn testMap(comptime map: var) void { + std.testing.expectEqual(TestEnum.A, map.get("have").?); + std.testing.expectEqual(TestEnum.B, map.get("nothing").?); + std.testing.expect(null == map.get("missing")); + std.testing.expectEqual(TestEnum.D, map.get("these").?); + std.testing.expectEqual(TestEnum.E, map.get("samelen").?); + + std.testing.expect(!map.has("missing")); + std.testing.expect(map.has("these")); +} + +test "ComptimeStringMap void value type, slice of structs" { + const KV = struct { + @"0": []const u8, + }; + const slice: []const KV = &[_]KV{ + .{.@"0" = "these"}, + .{.@"0" = "have"}, + .{.@"0" = "nothing"}, + .{.@"0" = "incommon"}, + .{.@"0" = "samelen"}, + }; + const map = ComptimeStringMap(void, slice); + + testSet(map); +} + +test "ComptimeStringMap void value type, list literal of list literals" { + const map = ComptimeStringMap(void, .{ + .{"these"}, + .{"have"}, + .{"nothing"}, + .{"incommon"}, + .{"samelen"}, + }); + + testSet(map); +} + +fn testSet(comptime map: var) void { + std.testing.expectEqual({}, map.get("have").?); + std.testing.expectEqual({}, map.get("nothing").?); + std.testing.expect(null == map.get("missing")); + std.testing.expectEqual({}, map.get("these").?); + std.testing.expectEqual({}, map.get("samelen").?); + + std.testing.expect(!map.has("missing")); + std.testing.expect(map.has("these")); +} diff --git a/lib/std/meta.zig b/lib/std/meta.zig index 68426323b2..f5c4ab59fd 100644 --- a/lib/std/meta.zig +++ b/lib/std/meta.zig @@ -53,12 +53,37 @@ test "std.meta.tagName" { } pub fn stringToEnum(comptime T: type, str: []const u8) ?T { - inline for (@typeInfo(T).Enum.fields) |enumField| { - if (mem.eql(u8, str, enumField.name)) { - return @field(T, enumField.name); + // Using ComptimeStringMap here is more performant, but it will start to take too + // long to compile if the enum is large enough, due to the current limits of comptime + // performance when doing things like constructing lookup maps at comptime. + // TODO The '100' here is arbitrary and should be increased when possible: + // - https://github.com/ziglang/zig/issues/4055 + // - https://github.com/ziglang/zig/issues/3863 + if (@typeInfo(T).Enum.fields.len <= 100) { + const kvs = comptime build_kvs: { + // In order to generate an array of structs that play nice with anonymous + // list literals, we need to give them "0" and "1" field names. + // TODO https://github.com/ziglang/zig/issues/4335 + const EnumKV = struct { + @"0": []const u8, + @"1": T, + }; + var kvs_array: [@typeInfo(T).Enum.fields.len]EnumKV = undefined; + inline for (@typeInfo(T).Enum.fields) |enumField, i| { + kvs_array[i] = .{ .@"0" = enumField.name, .@"1" = @field(T, enumField.name) }; + } + break :build_kvs kvs_array[0..]; + }; + const map = std.ComptimeStringMap(T, kvs); + return map.get(str); + } else { + inline for (@typeInfo(T).Enum.fields) |enumField| { + if (mem.eql(u8, str, enumField.name)) { + return @field(T, enumField.name); + } } + return null; } - return null; } test "std.meta.stringToEnum" { diff --git a/lib/std/std.zig b/lib/std/std.zig index 9920ca3378..b1cab77109 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -8,6 +8,7 @@ pub const BloomFilter = @import("bloom_filter.zig").BloomFilter; pub const BufMap = @import("buf_map.zig").BufMap; pub const BufSet = @import("buf_set.zig").BufSet; pub const ChildProcess = @import("child_process.zig").ChildProcess; +pub const ComptimeStringMap = @import("comptime_string_map.zig").ComptimeStringMap; pub const DynLib = @import("dynamic_library.zig").DynLib; pub const HashMap = @import("hash_map.zig").HashMap; pub const Mutex = @import("mutex.zig").Mutex; diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig index defb888027..64d7b7cd8d 100644 --- a/lib/std/zig/tokenizer.zig +++ b/lib/std/zig/tokenizer.zig @@ -10,115 +10,62 @@ pub const Token = struct { end: usize, }; - pub const Keyword = struct { - bytes: []const u8, - id: Id, - - fn init(bytes: []const u8, id: Id) Keyword { - return .{ - .bytes = bytes, - .id = id, - }; - } - }; - - pub const keywords = [_]Keyword{ - Keyword.init("align", .Keyword_align), - Keyword.init("allowzero", .Keyword_allowzero), - Keyword.init("and", .Keyword_and), - Keyword.init("anyframe", .Keyword_anyframe), - Keyword.init("asm", .Keyword_asm), - Keyword.init("async", .Keyword_async), - Keyword.init("await", .Keyword_await), - Keyword.init("break", .Keyword_break), - Keyword.init("callconv", .Keyword_callconv), - Keyword.init("catch", .Keyword_catch), - Keyword.init("comptime", .Keyword_comptime), - Keyword.init("const", .Keyword_const), - Keyword.init("continue", .Keyword_continue), - Keyword.init("defer", .Keyword_defer), - Keyword.init("else", .Keyword_else), - Keyword.init("enum", .Keyword_enum), - Keyword.init("errdefer", .Keyword_errdefer), - Keyword.init("error", .Keyword_error), - Keyword.init("export", .Keyword_export), - Keyword.init("extern", .Keyword_extern), - Keyword.init("false", .Keyword_false), - Keyword.init("fn", .Keyword_fn), - Keyword.init("for", .Keyword_for), - Keyword.init("if", .Keyword_if), - Keyword.init("inline", .Keyword_inline), - Keyword.init("noalias", .Keyword_noalias), - Keyword.init("noasync", .Keyword_nosuspend), // TODO: remove this - Keyword.init("noinline", .Keyword_noinline), - Keyword.init("nosuspend", .Keyword_nosuspend), - Keyword.init("null", .Keyword_null), - Keyword.init("or", .Keyword_or), - Keyword.init("orelse", .Keyword_orelse), - Keyword.init("packed", .Keyword_packed), - Keyword.init("pub", .Keyword_pub), - Keyword.init("resume", .Keyword_resume), - Keyword.init("return", .Keyword_return), - Keyword.init("linksection", .Keyword_linksection), - Keyword.init("struct", .Keyword_struct), - Keyword.init("suspend", .Keyword_suspend), - Keyword.init("switch", .Keyword_switch), - Keyword.init("test", .Keyword_test), - Keyword.init("threadlocal", .Keyword_threadlocal), - Keyword.init("true", .Keyword_true), - Keyword.init("try", .Keyword_try), - Keyword.init("undefined", .Keyword_undefined), - Keyword.init("union", .Keyword_union), - Keyword.init("unreachable", .Keyword_unreachable), - Keyword.init("usingnamespace", .Keyword_usingnamespace), - Keyword.init("var", .Keyword_var), - Keyword.init("volatile", .Keyword_volatile), - Keyword.init("while", .Keyword_while), - }; + pub const keywords = std.ComptimeStringMap(Id, .{ + .{"align", .Keyword_align}, + .{"allowzero", .Keyword_allowzero}, + .{"and", .Keyword_and}, + .{"anyframe", .Keyword_anyframe}, + .{"asm", .Keyword_asm}, + .{"async", .Keyword_async}, + .{"await", .Keyword_await}, + .{"break", .Keyword_break}, + .{"callconv", .Keyword_callconv}, + .{"catch", .Keyword_catch}, + .{"comptime", .Keyword_comptime}, + .{"const", .Keyword_const}, + .{"continue", .Keyword_continue}, + .{"defer", .Keyword_defer}, + .{"else", .Keyword_else}, + .{"enum", .Keyword_enum}, + .{"errdefer", .Keyword_errdefer}, + .{"error", .Keyword_error}, + .{"export", .Keyword_export}, + .{"extern", .Keyword_extern}, + .{"false", .Keyword_false}, + .{"fn", .Keyword_fn}, + .{"for", .Keyword_for}, + .{"if", .Keyword_if}, + .{"inline", .Keyword_inline}, + .{"noalias", .Keyword_noalias}, + .{"noasync", .Keyword_nosuspend}, // TODO: remove this + .{"noinline", .Keyword_noinline}, + .{"nosuspend", .Keyword_nosuspend}, + .{"null", .Keyword_null}, + .{"or", .Keyword_or}, + .{"orelse", .Keyword_orelse}, + .{"packed", .Keyword_packed}, + .{"pub", .Keyword_pub}, + .{"resume", .Keyword_resume}, + .{"return", .Keyword_return}, + .{"linksection", .Keyword_linksection}, + .{"struct", .Keyword_struct}, + .{"suspend", .Keyword_suspend}, + .{"switch", .Keyword_switch}, + .{"test", .Keyword_test}, + .{"threadlocal", .Keyword_threadlocal}, + .{"true", .Keyword_true}, + .{"try", .Keyword_try}, + .{"undefined", .Keyword_undefined}, + .{"union", .Keyword_union}, + .{"unreachable", .Keyword_unreachable}, + .{"usingnamespace", .Keyword_usingnamespace}, + .{"var", .Keyword_var}, + .{"volatile", .Keyword_volatile}, + .{"while", .Keyword_while}, + }); pub fn getKeyword(bytes: []const u8) ?Id { - const precomputed = comptime blk: { - @setEvalBranchQuota(2000); - var sorted_keywords = keywords; - const lenAsc = (struct { - fn lenAsc(a: Keyword, b: Keyword) bool { - return a.bytes.len < b.bytes.len; - } - }).lenAsc; - std.sort.sort(Keyword, &sorted_keywords, lenAsc); - const min_len = sorted_keywords[0].bytes.len; - const max_len = sorted_keywords[sorted_keywords.len - 1].bytes.len; - var len_indexes: [max_len + 1]usize = undefined; - var len: usize = 0; - var kw_i: usize = 0; - while (len <= max_len) : (len += 1) { - // find the first keyword len == len - while (len > sorted_keywords[kw_i].bytes.len) { - kw_i += 1; - } - len_indexes[len] = kw_i; - } - break :blk .{ - .min_len = min_len, - .max_len = max_len, - .sorted_keywords = sorted_keywords, - .len_indexes = len_indexes, - }; - }; - if (bytes.len < precomputed.min_len or bytes.len > precomputed.max_len) - return null; - - var i = precomputed.len_indexes[bytes.len]; - while (true) { - const kw = precomputed.sorted_keywords[i]; - if (kw.bytes.len != bytes.len) - return null; - if (mem.eql(u8, kw.bytes, bytes)) - return kw.id; - i += 1; - if (i >= precomputed.sorted_keywords.len) - return null; - } + return keywords.get(bytes); } pub const Id = enum {