memcpy.zig (6452B) - Raw
1 const std = @import("std"); 2 const assert = std.debug.assert; 3 const common = @import("./common.zig"); 4 const builtin = @import("builtin"); 5 6 comptime { 7 if (builtin.object_format != .c) { 8 const export_options: std.builtin.ExportOptions = .{ 9 .name = "memcpy", 10 .linkage = common.linkage, 11 .visibility = common.visibility, 12 }; 13 14 if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64) 15 @export(&memcpySmall, export_options) 16 else 17 @export(&memcpyFast, export_options); 18 } 19 } 20 21 const Element = common.PreferredLoadStoreElement; 22 23 comptime { 24 assert(std.math.isPowerOfTwo(@sizeOf(Element))); 25 } 26 27 fn memcpySmall(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 { 28 @setRuntimeSafety(false); 29 30 for (0..len) |i| { 31 dest.?[i] = src.?[i]; 32 } 33 34 return dest; 35 } 36 37 fn memcpyFast(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 { 38 @setRuntimeSafety(false); 39 40 const small_limit = 2 * @sizeOf(Element); 41 42 if (copySmallLength(small_limit, dest.?, src.?, len)) return dest; 43 44 copyForwards(dest.?, src.?, len); 45 46 return dest; 47 } 48 49 inline fn copySmallLength( 50 comptime small_limit: comptime_int, 51 dest: [*]u8, 52 src: [*]const u8, 53 len: usize, 54 ) bool { 55 if (len < 16) { 56 copyLessThan16(dest, src, len); 57 return true; 58 } 59 60 if (comptime 2 < (std.math.log2(small_limit) + 1) / 2) { 61 if (copy16ToSmallLimit(small_limit, dest, src, len)) return true; 62 } 63 64 return false; 65 } 66 67 inline fn copyLessThan16( 68 dest: [*]u8, 69 src: [*]const u8, 70 len: usize, 71 ) void { 72 @setRuntimeSafety(false); 73 if (len < 4) { 74 if (len == 0) return; 75 dest[0] = src[0]; 76 dest[len / 2] = src[len / 2]; 77 dest[len - 1] = src[len - 1]; 78 return; 79 } 80 copyRange4(4, dest, src, len); 81 } 82 83 inline fn copy16ToSmallLimit( 84 comptime small_limit: comptime_int, 85 dest: [*]u8, 86 src: [*]const u8, 87 len: usize, 88 ) bool { 89 @setRuntimeSafety(false); 90 inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| { 91 const limit = 1 << (2 * p); 92 if (len < limit) { 93 copyRange4(limit / 4, dest, src, len); 94 return true; 95 } 96 } 97 return false; 98 } 99 100 inline fn copyForwards( 101 noalias dest: [*]u8, 102 noalias src: [*]const u8, 103 len: usize, 104 ) void { 105 @setRuntimeSafety(false); 106 107 copyFixedLength(dest, src, @sizeOf(Element)); 108 const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element); 109 const n = len - alignment_offset; 110 const d = dest + alignment_offset; 111 const s = src + alignment_offset; 112 113 copyBlocksAlignedSource(@ptrCast(d), @ptrCast(@alignCast(s)), n); 114 115 // copy last `@sizeOf(Element)` bytes unconditionally, since block copy 116 // methods only copy a multiple of `@sizeOf(Element)` bytes. 117 const offset = len - @sizeOf(Element); 118 copyFixedLength(dest + offset, src + offset, @sizeOf(Element)); 119 } 120 121 inline fn copyBlocksAlignedSource( 122 noalias dest: [*]align(1) Element, 123 noalias src: [*]const Element, 124 max_bytes: usize, 125 ) void { 126 copyBlocks(dest, src, max_bytes); 127 } 128 129 /// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`, 130 /// that is less than `max_bytes` where `T` is the child type of `src` and 131 /// `dest`. 132 inline fn copyBlocks( 133 noalias dest: anytype, 134 noalias src: anytype, 135 max_bytes: usize, 136 ) void { 137 @setRuntimeSafety(false); 138 139 const T = @typeInfo(@TypeOf(dest)).pointer.child; 140 comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child); 141 142 const loop_count = max_bytes / @sizeOf(T); 143 144 for (dest[0..loop_count], src[0..loop_count]) |*d, s| { 145 d.* = s; 146 } 147 } 148 149 inline fn copyFixedLength( 150 noalias dest: [*]u8, 151 noalias src: [*]const u8, 152 comptime len: comptime_int, 153 ) void { 154 @setRuntimeSafety(false); 155 comptime assert(std.math.isPowerOfTwo(len)); 156 157 const T = if (len >= @sizeOf(Element)) 158 Element 159 else if (len > @sizeOf(usize)) 160 @Vector(len, u8) 161 else 162 @Type(.{ .int = .{ .signedness = .unsigned, .bits = len * 8 } }); 163 164 const loop_count = @divExact(len, @sizeOf(T)); 165 166 const d: [*]align(1) T = @ptrCast(dest); 167 const s: [*]align(1) const T = @ptrCast(src); 168 169 inline for (0..loop_count) |i| { 170 d[i] = s[i]; 171 } 172 } 173 174 /// copy `len` bytes from `src` to `dest`; `len` must be in the range 175 /// `[copy_len, 4 * copy_len)`. 176 inline fn copyRange4( 177 comptime copy_len: comptime_int, 178 noalias dest: [*]u8, 179 noalias src: [*]const u8, 180 len: usize, 181 ) void { 182 @setRuntimeSafety(false); 183 comptime assert(std.math.isPowerOfTwo(copy_len)); 184 185 const a = len & (copy_len * 2); 186 const b = a / 2; 187 188 const last = len - copy_len; 189 const pen = last - b; 190 191 copyFixedLength(dest, src, copy_len); 192 copyFixedLength(dest + b, src + b, copy_len); 193 copyFixedLength(dest + pen, src + pen, copy_len); 194 copyFixedLength(dest + last, src + last, copy_len); 195 } 196 197 test "memcpy" { 198 if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; 199 200 const S = struct { 201 fn testFunc(comptime copy_func: anytype) !void { 202 const max_len = 1024; 203 var buffer: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined; 204 for (&buffer, 0..) |*b, i| { 205 b.* = @intCast(i % 97); 206 } 207 var dest: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined; 208 209 for (0..max_len) |copy_len| { 210 for (0..@alignOf(Element)) |s_offset| { 211 for (0..@alignOf(Element)) |d_offset| { 212 @memset(&dest, 0xff); 213 const s = buffer[s_offset..][0..copy_len]; 214 const d = dest[d_offset..][0..copy_len]; 215 _ = copy_func(@ptrCast(d.ptr), @ptrCast(s.ptr), s.len); 216 std.testing.expectEqualSlices(u8, s, d) catch |e| { 217 std.debug.print("error encountered for length={d}, s_offset={d}, d_offset={d}\n", .{ 218 copy_len, s_offset, d_offset, 219 }); 220 return e; 221 }; 222 } 223 } 224 } 225 } 226 }; 227 228 try S.testFunc(memcpySmall); 229 try S.testFunc(memcpyFast); 230 }