From 6c6d9d9c2c385976105dd977b8edd4be0e4587e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Motiejus=20Jak=C5=A1tys?= Date: Wed, 6 Jul 2022 12:32:49 +0300 Subject: [PATCH] replace users_arr and groups_arr with smaller arrays RSS savings are trivial, it was not worth it. --- src/Corpus.zig | 117 ++++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 55 deletions(-) diff --git a/src/Corpus.zig b/src/Corpus.zig index d0bc4ff..1a31d6d 100644 --- a/src/Corpus.zig +++ b/src/Corpus.zig @@ -40,35 +40,76 @@ pub fn init( if (usersConst.len >= math.maxInt(u32)) return error.TooMany; if (groupsConst.len >= math.maxInt(u32)) return error.TooMany; + var users = MultiArrayList(User){}; + var groups = MultiArrayList(Group){}; + var getgr_bufsize: usize = 0; + var getpw_bufsize: usize = 0; + var arena = ArenaAllocator.init(baseAllocator); var allocator = arena.allocator(); errdefer arena.deinit(); - var groups_arr = try allocator.alloc(Group, groupsConst.len); - var users_arr = try allocator.alloc(User, usersConst.len); + const NameIdx = struct { + name: []const u8, + idx: usize, + }; - var getgr_bufsize: usize = 0; - for (groupsConst) |*group, i| { - groups_arr[i] = try group.clone(allocator); - getgr_bufsize = math.max(getgr_bufsize, group.strlenZ()); - } - var getpw_bufsize: usize = 0; - for (usersConst) |*user, i| { - users_arr[i] = try user.clone(allocator); - getpw_bufsize = math.max(getpw_bufsize, user.strlenZ()); + const GidIdx = struct { + gid: u32, + idx: usize, + }; + + const Compare = struct { + fn name(_: void, a: NameIdx, b: NameIdx) bool { + var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator(); + var utf8_b = (unicode.Utf8View.init(b.name) catch unreachable).iterator(); + while (utf8_a.nextCodepoint()) |codepoint_a| { + if (utf8_b.nextCodepoint()) |codepoint_b| { + if (codepoint_a == codepoint_b) { + continue; + } else return codepoint_a < codepoint_b; + } + + // a is a prefix of b. It is thus shorter. + return true; + } + // b is a prefix of a + return false; + } + fn gid(_: void, a: GidIdx, b: GidIdx) bool { + return a.gid < b.gid; + } + }; + + { + var name_idx = try baseAllocator.alloc(NameIdx, usersConst.len); + defer baseAllocator.free(name_idx); + for (usersConst) |user, i| + name_idx[i] = NameIdx{ .name = user.name, .idx = i }; + sort.sort(NameIdx, name_idx, {}, Compare.name); + + try users.ensureTotalCapacity(allocator, usersConst.len); + for (name_idx) |entry| { + const user = try usersConst[entry.idx].clone(allocator); + users.appendAssumeCapacity(user); + getpw_bufsize = math.max(getpw_bufsize, user.strlenZ()); + } } - sort.sort(User, users_arr, {}, cmpUser); - sort.sort(Group, groups_arr, {}, cmpGroup); + { + var gid_idx = try baseAllocator.alloc(GidIdx, groupsConst.len); + defer baseAllocator.free(gid_idx); + for (groupsConst) |group, i| + gid_idx[i] = GidIdx{ .gid = group.gid, .idx = i }; + sort.sort(GidIdx, gid_idx, {}, Compare.gid); - var users = MultiArrayList(User){}; - try users.ensureTotalCapacity(allocator, users_arr.len); - for (users_arr) |user| - users.appendAssumeCapacity(user); - var groups = MultiArrayList(Group){}; - try groups.ensureTotalCapacity(allocator, groups_arr.len); - for (groups_arr) |group| - groups.appendAssumeCapacity(group); + try groups.ensureTotalCapacity(allocator, groupsConst.len); + for (gid_idx) |entry| { + const group = try groupsConst[entry.idx].clone(allocator); + groups.appendAssumeCapacity(group); + getgr_bufsize = math.max(getgr_bufsize, group.strlenZ()); + } + } // verify whatever comes to cmph are unique: user names var name2user = StringHashMap(u32).init(allocator); @@ -167,28 +208,6 @@ pub fn deinit(self: *Corpus) void { self.* = undefined; } -// cmpUser compares two users for sorting. By username's utf8 codepoints, ascending. -fn cmpUser(_: void, a: User, b: User) bool { - var utf8_a = (unicode.Utf8View.init(a.name) catch unreachable).iterator(); - var utf8_b = (unicode.Utf8View.init(b.name) catch unreachable).iterator(); - while (utf8_a.nextCodepoint()) |codepoint_a| { - if (utf8_b.nextCodepoint()) |codepoint_b| { - if (codepoint_a == codepoint_b) { - continue; - } else return codepoint_a < codepoint_b; - } - - // a is a prefix of b. It is thus shorter. - return true; - } - // b is a prefix of a - return false; -} - -fn cmpGroup(_: void, a: Group, b: Group) bool { - return a.gid < b.gid; -} - fn testUser(name: []const u8) User { var result = mem.zeroes(User); result.name = name; @@ -198,18 +217,6 @@ fn testUser(name: []const u8) User { const testing = std.testing; const someMembers = @import("Group.zig").someMembers; -test "users compare function" { - const a = testUser("a"); - const b = testUser("b"); - const bb = testUser("bb"); - try testing.expect(cmpUser({}, a, b)); - try testing.expect(!cmpUser({}, b, a)); - try testing.expect(cmpUser({}, a, bb)); - try testing.expect(!cmpUser({}, bb, a)); - try testing.expect(cmpUser({}, bb, b)); - try testing.expect(!cmpUser({}, b, bb)); -} - pub fn testCorpus(allocator: Allocator) !Corpus { const users = [_]User{ User{ .uid = 0,