From 57ea6207d3cb2db706bdc06c14605e4b901736dd Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 26 Apr 2023 13:48:38 -0700 Subject: [PATCH] std.ArrayList: mark the appendNTimes methods inline The previous commit introduced an optimization to the LLVM backend that makes `@memset` lower more optimally when the element is comptime-known and has a repeating byte pattern. By making these functions inline, if the element parameter is comptime-known at the callsite, it will be comptime-known in the `@memset` call, causing more use of the LLVM `memset` intrinsic rather than an inline for loop when using the LLVM backend. This affects, for example, std.crypto.argon2, which calls appendNTimesAssumeCapacity with a `[128]u64` as the element. This is now lowered with a single `memset` call. --- lib/std/array_list.zig | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig index 1791482bc4..352aef14fc 100644 --- a/lib/std/array_list.zig +++ b/lib/std/array_list.zig @@ -306,18 +306,22 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type { /// Append a value to the list `n` times. /// Allocates more memory as necessary. /// Invalidates pointers if additional memory is needed. - pub fn appendNTimes(self: *Self, value: T, n: usize) Allocator.Error!void { + /// The function is inline so that a comptime-known `value` parameter will + /// have a more optimal memset codegen in case it has a repeated byte pattern. + pub inline fn appendNTimes(self: *Self, value: T, n: usize) Allocator.Error!void { const old_len = self.items.len; try self.resize(self.items.len + n); - mem.set(T, self.items[old_len..self.items.len], value); + @memset(self.items[old_len..self.items.len], value); } /// Append a value to the list `n` times. /// Asserts the capacity is enough. **Does not** invalidate pointers. - pub fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void { + /// The function is inline so that a comptime-known `value` parameter will + /// have a more optimal memset codegen in case it has a repeated byte pattern. + pub inline fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void { const new_len = self.items.len + n; assert(new_len <= self.capacity); - mem.set(T, self.items.ptr[self.items.len..new_len], value); + @memset(self.items.ptr[self.items.len..new_len], value); self.items.len = new_len; } @@ -766,19 +770,23 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ /// Append a value to the list `n` times. /// Allocates more memory as necessary. /// Invalidates pointers if additional memory is needed. - pub fn appendNTimes(self: *Self, allocator: Allocator, value: T, n: usize) Allocator.Error!void { + /// The function is inline so that a comptime-known `value` parameter will + /// have a more optimal memset codegen in case it has a repeated byte pattern. + pub inline fn appendNTimes(self: *Self, allocator: Allocator, value: T, n: usize) Allocator.Error!void { const old_len = self.items.len; try self.resize(allocator, self.items.len + n); - mem.set(T, self.items[old_len..self.items.len], value); + @memset(self.items[old_len..self.items.len], value); } /// Append a value to the list `n` times. /// **Does not** invalidate pointers. /// Asserts the capacity is enough. - pub fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void { + /// The function is inline so that a comptime-known `value` parameter will + /// have a more optimal memset codegen in case it has a repeated byte pattern. + pub inline fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void { const new_len = self.items.len + n; assert(new_len <= self.capacity); - mem.set(T, self.items.ptr[self.items.len..new_len], value); + @memset(self.items.ptr[self.items.len..new_len], value); self.items.len = new_len; }