commit 69ff89fd1207bc95adf7a349319973c6070ce540 (tree)
parent 0ecdbdb3cb3d9558b5d2dbd928ead124d98c74ca
Author: Andrew Kelley <andrew@ziglang.org>
Date: Mon, 25 May 2020 15:12:23 -0400
stage2 parser: heuristics to pre-allocate token arrays
throughput: 72.2 MiB/s => 75.3 MiB/s
I also tried the idea from the deleted comment in this commit and it
made the throughput worse.
Diffstat:
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/lib/std/zig/parse.zig b/lib/std/zig/parse.zig
@@ -14,13 +14,16 @@ pub const Error = error{ParseError} || Allocator.Error;
/// Result should be freed with tree.deinit() when there are
/// no more references to any of the tokens or nodes.
pub fn parse(gpa: *Allocator, source: []const u8) Allocator.Error!*Tree {
- // TODO optimization idea: ensureCapacity on the tokens list and
- // then appendAssumeCapacity inside the loop.
var token_ids = std.ArrayList(Token.Id).init(gpa);
defer token_ids.deinit();
var token_locs = std.ArrayList(Token.Loc).init(gpa);
defer token_locs.deinit();
+ // Empirically, the zig std lib has an 8:1 ratio of source bytes to token count.
+ const estimated_token_count = source.len / 8;
+ try token_ids.ensureCapacity(estimated_token_count);
+ try token_locs.ensureCapacity(estimated_token_count);
+
var tokenizer = std.zig.Tokenizer.init(source);
while (true) {
const token = tokenizer.next();