commit 57ea6207d3cb2db706bdc06c14605e4b901736dd (tree)
parent 82fc360613e7070ffe7124fb070b151a382bd31b
Author: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 26 Apr 2023 13:48:38 -0700
std.ArrayList: mark the appendNTimes methods inline
The previous commit introduced an optimization to the LLVM backend that
makes `@memset` lower more optimally when the element is comptime-known
and has a repeating byte pattern.
By making these functions inline, if the element parameter is
comptime-known at the callsite, it will be comptime-known in the
`@memset` call, causing more use of the LLVM `memset` intrinsic rather
than an inline for loop when using the LLVM backend.
This affects, for example, std.crypto.argon2, which calls
appendNTimesAssumeCapacity with a `[128]u64` as the element. This is now
lowered with a single `memset` call.
Diffstat:
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/lib/std/array_list.zig b/lib/std/array_list.zig
@@ -306,18 +306,22 @@ pub fn ArrayListAligned(comptime T: type, comptime alignment: ?u29) type {
/// Append a value to the list `n` times.
/// Allocates more memory as necessary.
/// Invalidates pointers if additional memory is needed.
- pub fn appendNTimes(self: *Self, value: T, n: usize) Allocator.Error!void {
+ /// The function is inline so that a comptime-known `value` parameter will
+ /// have a more optimal memset codegen in case it has a repeated byte pattern.
+ pub inline fn appendNTimes(self: *Self, value: T, n: usize) Allocator.Error!void {
const old_len = self.items.len;
try self.resize(self.items.len + n);
- mem.set(T, self.items[old_len..self.items.len], value);
+ @memset(self.items[old_len..self.items.len], value);
}
/// Append a value to the list `n` times.
/// Asserts the capacity is enough. **Does not** invalidate pointers.
- pub fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void {
+ /// The function is inline so that a comptime-known `value` parameter will
+ /// have a more optimal memset codegen in case it has a repeated byte pattern.
+ pub inline fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void {
const new_len = self.items.len + n;
assert(new_len <= self.capacity);
- mem.set(T, self.items.ptr[self.items.len..new_len], value);
+ @memset(self.items.ptr[self.items.len..new_len], value);
self.items.len = new_len;
}
@@ -766,19 +770,23 @@ pub fn ArrayListAlignedUnmanaged(comptime T: type, comptime alignment: ?u29) typ
/// Append a value to the list `n` times.
/// Allocates more memory as necessary.
/// Invalidates pointers if additional memory is needed.
- pub fn appendNTimes(self: *Self, allocator: Allocator, value: T, n: usize) Allocator.Error!void {
+ /// The function is inline so that a comptime-known `value` parameter will
+ /// have a more optimal memset codegen in case it has a repeated byte pattern.
+ pub inline fn appendNTimes(self: *Self, allocator: Allocator, value: T, n: usize) Allocator.Error!void {
const old_len = self.items.len;
try self.resize(allocator, self.items.len + n);
- mem.set(T, self.items[old_len..self.items.len], value);
+ @memset(self.items[old_len..self.items.len], value);
}
/// Append a value to the list `n` times.
/// **Does not** invalidate pointers.
/// Asserts the capacity is enough.
- pub fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void {
+ /// The function is inline so that a comptime-known `value` parameter will
+ /// have a more optimal memset codegen in case it has a repeated byte pattern.
+ pub inline fn appendNTimesAssumeCapacity(self: *Self, value: T, n: usize) void {
const new_len = self.items.len + n;
assert(new_len <= self.capacity);
- mem.set(T, self.items.ptr[self.items.len..new_len], value);
+ @memset(self.items.ptr[self.items.len..new_len], value);
self.items.len = new_len;
}