commit 1e0de896b8343e82a46a23fc287eab4fce06b423 (tree)
parent 6e89692d81bd5d50a634c32a1588a7c505860f32
Author: Andrew Kelley <andrew@ziglang.org>
Date: Thu, 28 May 2020 23:06:40 -0400
Merge pull request #5452 from squeek502/comptime-string-map
Add std.ComptimeStringMap based on the tokenizer optimization in #5442
Diffstat:
5 files changed, 324 insertions(+), 178 deletions(-)
diff --git a/lib/std/c/tokenizer.zig b/lib/std/c/tokenizer.zig
@@ -277,83 +277,79 @@ pub const Token = struct {
};
// TODO extensions
- pub const keywords = [_]Keyword{
- Keyword.init("auto", .Keyword_auto),
- Keyword.init("break", .Keyword_break),
- Keyword.init("case", .Keyword_case),
- Keyword.init("char", .Keyword_char),
- Keyword.init("const", .Keyword_const),
- Keyword.init("continue", .Keyword_continue),
- Keyword.init("default", .Keyword_default),
- Keyword.init("do", .Keyword_do),
- Keyword.init("double", .Keyword_double),
- Keyword.init("else", .Keyword_else),
- Keyword.init("enum", .Keyword_enum),
- Keyword.init("extern", .Keyword_extern),
- Keyword.init("float", .Keyword_float),
- Keyword.init("for", .Keyword_for),
- Keyword.init("goto", .Keyword_goto),
- Keyword.init("if", .Keyword_if),
- Keyword.init("int", .Keyword_int),
- Keyword.init("long", .Keyword_long),
- Keyword.init("register", .Keyword_register),
- Keyword.init("return", .Keyword_return),
- Keyword.init("short", .Keyword_short),
- Keyword.init("signed", .Keyword_signed),
- Keyword.init("sizeof", .Keyword_sizeof),
- Keyword.init("static", .Keyword_static),
- Keyword.init("struct", .Keyword_struct),
- Keyword.init("switch", .Keyword_switch),
- Keyword.init("typedef", .Keyword_typedef),
- Keyword.init("union", .Keyword_union),
- Keyword.init("unsigned", .Keyword_unsigned),
- Keyword.init("void", .Keyword_void),
- Keyword.init("volatile", .Keyword_volatile),
- Keyword.init("while", .Keyword_while),
+ pub const keywords = std.ComptimeStringMap(Id, .{
+ .{"auto", .Keyword_auto},
+ .{"break", .Keyword_break},
+ .{"case", .Keyword_case},
+ .{"char", .Keyword_char},
+ .{"const", .Keyword_const},
+ .{"continue", .Keyword_continue},
+ .{"default", .Keyword_default},
+ .{"do", .Keyword_do},
+ .{"double", .Keyword_double},
+ .{"else", .Keyword_else},
+ .{"enum", .Keyword_enum},
+ .{"extern", .Keyword_extern},
+ .{"float", .Keyword_float},
+ .{"for", .Keyword_for},
+ .{"goto", .Keyword_goto},
+ .{"if", .Keyword_if},
+ .{"int", .Keyword_int},
+ .{"long", .Keyword_long},
+ .{"register", .Keyword_register},
+ .{"return", .Keyword_return},
+ .{"short", .Keyword_short},
+ .{"signed", .Keyword_signed},
+ .{"sizeof", .Keyword_sizeof},
+ .{"static", .Keyword_static},
+ .{"struct", .Keyword_struct},
+ .{"switch", .Keyword_switch},
+ .{"typedef", .Keyword_typedef},
+ .{"union", .Keyword_union},
+ .{"unsigned", .Keyword_unsigned},
+ .{"void", .Keyword_void},
+ .{"volatile", .Keyword_volatile},
+ .{"while", .Keyword_while},
// ISO C99
- Keyword.init("_Bool", .Keyword_bool),
- Keyword.init("_Complex", .Keyword_complex),
- Keyword.init("_Imaginary", .Keyword_imaginary),
- Keyword.init("inline", .Keyword_inline),
- Keyword.init("restrict", .Keyword_restrict),
+ .{"_Bool", .Keyword_bool},
+ .{"_Complex", .Keyword_complex},
+ .{"_Imaginary", .Keyword_imaginary},
+ .{"inline", .Keyword_inline},
+ .{"restrict", .Keyword_restrict},
// ISO C11
- Keyword.init("_Alignas", .Keyword_alignas),
- Keyword.init("_Alignof", .Keyword_alignof),
- Keyword.init("_Atomic", .Keyword_atomic),
- Keyword.init("_Generic", .Keyword_generic),
- Keyword.init("_Noreturn", .Keyword_noreturn),
- Keyword.init("_Static_assert", .Keyword_static_assert),
- Keyword.init("_Thread_local", .Keyword_thread_local),
+ .{"_Alignas", .Keyword_alignas},
+ .{"_Alignof", .Keyword_alignof},
+ .{"_Atomic", .Keyword_atomic},
+ .{"_Generic", .Keyword_generic},
+ .{"_Noreturn", .Keyword_noreturn},
+ .{"_Static_assert", .Keyword_static_assert},
+ .{"_Thread_local", .Keyword_thread_local},
// Preprocessor directives
- Keyword.init("include", .Keyword_include),
- Keyword.init("define", .Keyword_define),
- Keyword.init("ifdef", .Keyword_ifdef),
- Keyword.init("ifndef", .Keyword_ifndef),
- Keyword.init("error", .Keyword_error),
- Keyword.init("pragma", .Keyword_pragma),
- };
+ .{"include", .Keyword_include},
+ .{"define", .Keyword_define},
+ .{"ifdef", .Keyword_ifdef},
+ .{"ifndef", .Keyword_ifndef},
+ .{"error", .Keyword_error},
+ .{"pragma", .Keyword_pragma},
+ });
- // TODO perfect hash at comptime
// TODO do this in the preprocessor
pub fn getKeyword(bytes: []const u8, pp_directive: bool) ?Id {
- var hash = std.hash_map.hashString(bytes);
- for (keywords) |kw| {
- if (kw.hash == hash and mem.eql(u8, kw.bytes, bytes)) {
- switch (kw.id) {
- .Keyword_include,
- .Keyword_define,
- .Keyword_ifdef,
- .Keyword_ifndef,
- .Keyword_error,
- .Keyword_pragma,
- => if (!pp_directive) return null,
- else => {},
- }
- return kw.id;
+ if (keywords.get(bytes)) |id| {
+ switch (id) {
+ .Keyword_include,
+ .Keyword_define,
+ .Keyword_ifdef,
+ .Keyword_ifndef,
+ .Keyword_error,
+ .Keyword_pragma,
+ => if (!pp_directive) return null,
+ else => {},
}
+ return id;
}
return null;
}
diff --git a/lib/std/comptime_string_map.zig b/lib/std/comptime_string_map.zig
@@ -0,0 +1,177 @@
+const std = @import("std.zig");
+const mem = std.mem;
+
+/// Like ComptimeStringHashMap but optimized for small sets of disparate string keys.
+/// Works by separating the keys by length at comptime and only checking strings of
+/// equal length at runtime.
+///
+/// `kvs` expects a list literal containing list literals or an array/slice of structs
+/// where `.@"0"` is the `[]const u8` key and `.@"1"` is the associated value of type `V`.
+/// TODO: https://github.com/ziglang/zig/issues/4335
+pub fn ComptimeStringMap(comptime V: type, comptime kvs: var) type {
+ const precomputed = comptime blk: {
+ @setEvalBranchQuota(2000);
+ const KV = struct {
+ key: []const u8,
+ value: V,
+ };
+ var sorted_kvs: [kvs.len]KV = undefined;
+ const lenAsc = (struct {
+ fn lenAsc(a: KV, b: KV) bool {
+ return a.key.len < b.key.len;
+ }
+ }).lenAsc;
+ for (kvs) |kv, i| {
+ if (V != void) {
+ sorted_kvs[i] = .{.key = kv.@"0", .value = kv.@"1"};
+ } else {
+ sorted_kvs[i] = .{.key = kv.@"0", .value = {}};
+ }
+ }
+ std.sort.sort(KV, &sorted_kvs, lenAsc);
+ const min_len = sorted_kvs[0].key.len;
+ const max_len = sorted_kvs[sorted_kvs.len - 1].key.len;
+ var len_indexes: [max_len + 1]usize = undefined;
+ var len: usize = 0;
+ var i: usize = 0;
+ while (len <= max_len) : (len += 1) {
+ // find the first keyword len == len
+ while (len > sorted_kvs[i].key.len) {
+ i += 1;
+ }
+ len_indexes[len] = i;
+ }
+ break :blk .{
+ .min_len = min_len,
+ .max_len = max_len,
+ .sorted_kvs = sorted_kvs,
+ .len_indexes = len_indexes,
+ };
+ };
+
+ return struct {
+ pub fn has(str: []const u8) bool {
+ return get(str) != null;
+ }
+
+ pub fn get(str: []const u8) ?V {
+ if (str.len < precomputed.min_len or str.len > precomputed.max_len)
+ return null;
+
+ var i = precomputed.len_indexes[str.len];
+ while (true) {
+ const kv = precomputed.sorted_kvs[i];
+ if (kv.key.len != str.len)
+ return null;
+ if (mem.eql(u8, kv.key, str))
+ return kv.value;
+ i += 1;
+ if (i >= precomputed.sorted_kvs.len)
+ return null;
+ }
+ }
+ };
+}
+
+const TestEnum = enum {
+ A,
+ B,
+ C,
+ D,
+ E,
+};
+
+test "ComptimeStringMap list literal of list literals" {
+ const map = ComptimeStringMap(TestEnum, .{
+ .{"these", .D},
+ .{"have", .A},
+ .{"nothing", .B},
+ .{"incommon", .C},
+ .{"samelen", .E},
+ });
+
+ testMap(map);
+}
+
+test "ComptimeStringMap array of structs" {
+ const KV = struct {
+ @"0": []const u8,
+ @"1": TestEnum,
+ };
+ const map = ComptimeStringMap(TestEnum, [_]KV{
+ .{.@"0" = "these", .@"1" = .D},
+ .{.@"0" = "have", .@"1" = .A},
+ .{.@"0" = "nothing", .@"1" = .B},
+ .{.@"0" = "incommon", .@"1" = .C},
+ .{.@"0" = "samelen", .@"1" = .E},
+ });
+
+ testMap(map);
+}
+
+test "ComptimeStringMap slice of structs" {
+ const KV = struct {
+ @"0": []const u8,
+ @"1": TestEnum,
+ };
+ const slice: []const KV = &[_]KV{
+ .{.@"0" = "these", .@"1" = .D},
+ .{.@"0" = "have", .@"1" = .A},
+ .{.@"0" = "nothing", .@"1" = .B},
+ .{.@"0" = "incommon", .@"1" = .C},
+ .{.@"0" = "samelen", .@"1" = .E},
+ };
+ const map = ComptimeStringMap(TestEnum, slice);
+
+ testMap(map);
+}
+
+fn testMap(comptime map: var) void {
+ std.testing.expectEqual(TestEnum.A, map.get("have").?);
+ std.testing.expectEqual(TestEnum.B, map.get("nothing").?);
+ std.testing.expect(null == map.get("missing"));
+ std.testing.expectEqual(TestEnum.D, map.get("these").?);
+ std.testing.expectEqual(TestEnum.E, map.get("samelen").?);
+
+ std.testing.expect(!map.has("missing"));
+ std.testing.expect(map.has("these"));
+}
+
+test "ComptimeStringMap void value type, slice of structs" {
+ const KV = struct {
+ @"0": []const u8,
+ };
+ const slice: []const KV = &[_]KV{
+ .{.@"0" = "these"},
+ .{.@"0" = "have"},
+ .{.@"0" = "nothing"},
+ .{.@"0" = "incommon"},
+ .{.@"0" = "samelen"},
+ };
+ const map = ComptimeStringMap(void, slice);
+
+ testSet(map);
+}
+
+test "ComptimeStringMap void value type, list literal of list literals" {
+ const map = ComptimeStringMap(void, .{
+ .{"these"},
+ .{"have"},
+ .{"nothing"},
+ .{"incommon"},
+ .{"samelen"},
+ });
+
+ testSet(map);
+}
+
+fn testSet(comptime map: var) void {
+ std.testing.expectEqual({}, map.get("have").?);
+ std.testing.expectEqual({}, map.get("nothing").?);
+ std.testing.expect(null == map.get("missing"));
+ std.testing.expectEqual({}, map.get("these").?);
+ std.testing.expectEqual({}, map.get("samelen").?);
+
+ std.testing.expect(!map.has("missing"));
+ std.testing.expect(map.has("these"));
+}
diff --git a/lib/std/meta.zig b/lib/std/meta.zig
@@ -53,12 +53,37 @@ test "std.meta.tagName" {
}
pub fn stringToEnum(comptime T: type, str: []const u8) ?T {
- inline for (@typeInfo(T).Enum.fields) |enumField| {
- if (mem.eql(u8, str, enumField.name)) {
- return @field(T, enumField.name);
+ // Using ComptimeStringMap here is more performant, but it will start to take too
+ // long to compile if the enum is large enough, due to the current limits of comptime
+ // performance when doing things like constructing lookup maps at comptime.
+ // TODO The '100' here is arbitrary and should be increased when possible:
+ // - https://github.com/ziglang/zig/issues/4055
+ // - https://github.com/ziglang/zig/issues/3863
+ if (@typeInfo(T).Enum.fields.len <= 100) {
+ const kvs = comptime build_kvs: {
+ // In order to generate an array of structs that play nice with anonymous
+ // list literals, we need to give them "0" and "1" field names.
+ // TODO https://github.com/ziglang/zig/issues/4335
+ const EnumKV = struct {
+ @"0": []const u8,
+ @"1": T,
+ };
+ var kvs_array: [@typeInfo(T).Enum.fields.len]EnumKV = undefined;
+ inline for (@typeInfo(T).Enum.fields) |enumField, i| {
+ kvs_array[i] = .{ .@"0" = enumField.name, .@"1" = @field(T, enumField.name) };
+ }
+ break :build_kvs kvs_array[0..];
+ };
+ const map = std.ComptimeStringMap(T, kvs);
+ return map.get(str);
+ } else {
+ inline for (@typeInfo(T).Enum.fields) |enumField| {
+ if (mem.eql(u8, str, enumField.name)) {
+ return @field(T, enumField.name);
+ }
}
+ return null;
}
- return null;
}
test "std.meta.stringToEnum" {
diff --git a/lib/std/std.zig b/lib/std/std.zig
@@ -8,6 +8,7 @@ pub const BloomFilter = @import("bloom_filter.zig").BloomFilter;
pub const BufMap = @import("buf_map.zig").BufMap;
pub const BufSet = @import("buf_set.zig").BufSet;
pub const ChildProcess = @import("child_process.zig").ChildProcess;
+pub const ComptimeStringMap = @import("comptime_string_map.zig").ComptimeStringMap;
pub const DynLib = @import("dynamic_library.zig").DynLib;
pub const HashMap = @import("hash_map.zig").HashMap;
pub const Mutex = @import("mutex.zig").Mutex;
diff --git a/lib/std/zig/tokenizer.zig b/lib/std/zig/tokenizer.zig
@@ -10,115 +10,62 @@ pub const Token = struct {
end: usize,
};
- pub const Keyword = struct {
- bytes: []const u8,
- id: Id,
-
- fn init(bytes: []const u8, id: Id) Keyword {
- return .{
- .bytes = bytes,
- .id = id,
- };
- }
- };
-
- pub const keywords = [_]Keyword{
- Keyword.init("align", .Keyword_align),
- Keyword.init("allowzero", .Keyword_allowzero),
- Keyword.init("and", .Keyword_and),
- Keyword.init("anyframe", .Keyword_anyframe),
- Keyword.init("asm", .Keyword_asm),
- Keyword.init("async", .Keyword_async),
- Keyword.init("await", .Keyword_await),
- Keyword.init("break", .Keyword_break),
- Keyword.init("callconv", .Keyword_callconv),
- Keyword.init("catch", .Keyword_catch),
- Keyword.init("comptime", .Keyword_comptime),
- Keyword.init("const", .Keyword_const),
- Keyword.init("continue", .Keyword_continue),
- Keyword.init("defer", .Keyword_defer),
- Keyword.init("else", .Keyword_else),
- Keyword.init("enum", .Keyword_enum),
- Keyword.init("errdefer", .Keyword_errdefer),
- Keyword.init("error", .Keyword_error),
- Keyword.init("export", .Keyword_export),
- Keyword.init("extern", .Keyword_extern),
- Keyword.init("false", .Keyword_false),
- Keyword.init("fn", .Keyword_fn),
- Keyword.init("for", .Keyword_for),
- Keyword.init("if", .Keyword_if),
- Keyword.init("inline", .Keyword_inline),
- Keyword.init("noalias", .Keyword_noalias),
- Keyword.init("noasync", .Keyword_nosuspend), // TODO: remove this
- Keyword.init("noinline", .Keyword_noinline),
- Keyword.init("nosuspend", .Keyword_nosuspend),
- Keyword.init("null", .Keyword_null),
- Keyword.init("or", .Keyword_or),
- Keyword.init("orelse", .Keyword_orelse),
- Keyword.init("packed", .Keyword_packed),
- Keyword.init("pub", .Keyword_pub),
- Keyword.init("resume", .Keyword_resume),
- Keyword.init("return", .Keyword_return),
- Keyword.init("linksection", .Keyword_linksection),
- Keyword.init("struct", .Keyword_struct),
- Keyword.init("suspend", .Keyword_suspend),
- Keyword.init("switch", .Keyword_switch),
- Keyword.init("test", .Keyword_test),
- Keyword.init("threadlocal", .Keyword_threadlocal),
- Keyword.init("true", .Keyword_true),
- Keyword.init("try", .Keyword_try),
- Keyword.init("undefined", .Keyword_undefined),
- Keyword.init("union", .Keyword_union),
- Keyword.init("unreachable", .Keyword_unreachable),
- Keyword.init("usingnamespace", .Keyword_usingnamespace),
- Keyword.init("var", .Keyword_var),
- Keyword.init("volatile", .Keyword_volatile),
- Keyword.init("while", .Keyword_while),
- };
+ pub const keywords = std.ComptimeStringMap(Id, .{
+ .{"align", .Keyword_align},
+ .{"allowzero", .Keyword_allowzero},
+ .{"and", .Keyword_and},
+ .{"anyframe", .Keyword_anyframe},
+ .{"asm", .Keyword_asm},
+ .{"async", .Keyword_async},
+ .{"await", .Keyword_await},
+ .{"break", .Keyword_break},
+ .{"callconv", .Keyword_callconv},
+ .{"catch", .Keyword_catch},
+ .{"comptime", .Keyword_comptime},
+ .{"const", .Keyword_const},
+ .{"continue", .Keyword_continue},
+ .{"defer", .Keyword_defer},
+ .{"else", .Keyword_else},
+ .{"enum", .Keyword_enum},
+ .{"errdefer", .Keyword_errdefer},
+ .{"error", .Keyword_error},
+ .{"export", .Keyword_export},
+ .{"extern", .Keyword_extern},
+ .{"false", .Keyword_false},
+ .{"fn", .Keyword_fn},
+ .{"for", .Keyword_for},
+ .{"if", .Keyword_if},
+ .{"inline", .Keyword_inline},
+ .{"noalias", .Keyword_noalias},
+ .{"noasync", .Keyword_nosuspend}, // TODO: remove this
+ .{"noinline", .Keyword_noinline},
+ .{"nosuspend", .Keyword_nosuspend},
+ .{"null", .Keyword_null},
+ .{"or", .Keyword_or},
+ .{"orelse", .Keyword_orelse},
+ .{"packed", .Keyword_packed},
+ .{"pub", .Keyword_pub},
+ .{"resume", .Keyword_resume},
+ .{"return", .Keyword_return},
+ .{"linksection", .Keyword_linksection},
+ .{"struct", .Keyword_struct},
+ .{"suspend", .Keyword_suspend},
+ .{"switch", .Keyword_switch},
+ .{"test", .Keyword_test},
+ .{"threadlocal", .Keyword_threadlocal},
+ .{"true", .Keyword_true},
+ .{"try", .Keyword_try},
+ .{"undefined", .Keyword_undefined},
+ .{"union", .Keyword_union},
+ .{"unreachable", .Keyword_unreachable},
+ .{"usingnamespace", .Keyword_usingnamespace},
+ .{"var", .Keyword_var},
+ .{"volatile", .Keyword_volatile},
+ .{"while", .Keyword_while},
+ });
pub fn getKeyword(bytes: []const u8) ?Id {
- const precomputed = comptime blk: {
- @setEvalBranchQuota(2000);
- var sorted_keywords = keywords;
- const lenAsc = (struct {
- fn lenAsc(a: Keyword, b: Keyword) bool {
- return a.bytes.len < b.bytes.len;
- }
- }).lenAsc;
- std.sort.sort(Keyword, &sorted_keywords, lenAsc);
- const min_len = sorted_keywords[0].bytes.len;
- const max_len = sorted_keywords[sorted_keywords.len - 1].bytes.len;
- var len_indexes: [max_len + 1]usize = undefined;
- var len: usize = 0;
- var kw_i: usize = 0;
- while (len <= max_len) : (len += 1) {
- // find the first keyword len == len
- while (len > sorted_keywords[kw_i].bytes.len) {
- kw_i += 1;
- }
- len_indexes[len] = kw_i;
- }
- break :blk .{
- .min_len = min_len,
- .max_len = max_len,
- .sorted_keywords = sorted_keywords,
- .len_indexes = len_indexes,
- };
- };
- if (bytes.len < precomputed.min_len or bytes.len > precomputed.max_len)
- return null;
-
- var i = precomputed.len_indexes[bytes.len];
- while (true) {
- const kw = precomputed.sorted_keywords[i];
- if (kw.bytes.len != bytes.len)
- return null;
- if (mem.eql(u8, kw.bytes, bytes))
- return kw.id;
- i += 1;
- if (i >= precomputed.sorted_keywords.len)
- return null;
- }
+ return keywords.get(bytes);
}
pub const Id = enum {