memmove.zig (7246B) - Raw
1 const std = @import("std"); 2 const common = @import("./common.zig"); 3 const builtin = @import("builtin"); 4 const assert = std.debug.assert; 5 const memcpy = @import("memcpy.zig"); 6 7 const Element = common.PreferredLoadStoreElement; 8 9 comptime { 10 if (builtin.object_format != .c) { 11 const export_options: std.builtin.ExportOptions = .{ 12 .name = "memmove", 13 .linkage = common.linkage, 14 .visibility = common.visibility, 15 }; 16 17 if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64) 18 @export(&memmoveSmall, export_options) 19 else 20 @export(&memmoveFast, export_options); 21 } 22 } 23 24 fn memmoveSmall(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 { 25 const dest = opt_dest.?; 26 const src = opt_src.?; 27 28 if (@intFromPtr(dest) < @intFromPtr(src)) { 29 for (0..len) |i| { 30 dest[i] = src[i]; 31 } 32 } else { 33 for (0..len) |i| { 34 dest[len - 1 - i] = src[len - 1 - i]; 35 } 36 } 37 38 return dest; 39 } 40 41 fn memmoveFast(dest: ?[*]u8, src: ?[*]u8, len: usize) callconv(.c) ?[*]u8 { 42 @setRuntimeSafety(common.test_safety); 43 const small_limit = @max(2 * @sizeOf(Element), @sizeOf(Element)); 44 45 if (copySmallLength(small_limit, dest.?, src.?, len)) return dest; 46 47 const dest_address = @intFromPtr(dest); 48 const src_address = @intFromPtr(src); 49 50 if (src_address < dest_address) { 51 copyBackwards(dest.?, src.?, len); 52 } else { 53 copyForwards(dest.?, src.?, len); 54 } 55 56 return dest; 57 } 58 59 inline fn copySmallLength( 60 comptime small_limit: comptime_int, 61 dest: [*]u8, 62 src: [*]const u8, 63 len: usize, 64 ) bool { 65 if (len < 16) { 66 copyLessThan16(dest, src, len); 67 return true; 68 } 69 70 if (comptime 2 < (std.math.log2(small_limit) + 1) / 2) { 71 if (copy16ToSmallLimit(small_limit, dest, src, len)) return true; 72 } 73 74 return false; 75 } 76 77 inline fn copyLessThan16( 78 dest: [*]u8, 79 src: [*]const u8, 80 len: usize, 81 ) void { 82 @setRuntimeSafety(common.test_safety); 83 if (len < 4) { 84 if (len == 0) return; 85 const b = len / 2; 86 const d0 = src[0]; 87 const db = src[b]; 88 const de = src[len - 1]; 89 dest[0] = d0; 90 dest[b] = db; 91 dest[len - 1] = de; 92 return; 93 } 94 copyRange4(4, dest, src, len); 95 } 96 97 inline fn copy16ToSmallLimit( 98 comptime small_limit: comptime_int, 99 dest: [*]u8, 100 src: [*]const u8, 101 len: usize, 102 ) bool { 103 @setRuntimeSafety(common.test_safety); 104 inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| { 105 const limit = 1 << (2 * p); 106 if (len < limit) { 107 copyRange4(limit / 4, dest, src, len); 108 return true; 109 } 110 } 111 return false; 112 } 113 114 /// copy `len` bytes from `src` to `dest`; `len` must be in the range 115 /// `[copy_len, 4 * copy_len)`. 116 inline fn copyRange4( 117 comptime copy_len: comptime_int, 118 dest: [*]u8, 119 src: [*]const u8, 120 len: usize, 121 ) void { 122 @setRuntimeSafety(common.test_safety); 123 comptime assert(std.math.isPowerOfTwo(copy_len)); 124 assert(len >= copy_len); 125 assert(len < 4 * copy_len); 126 127 const a = len & (copy_len * 2); 128 const b = a / 2; 129 130 const last = len - copy_len; 131 const pen = last - b; 132 133 const d0 = src[0..copy_len].*; 134 const d1 = src[b..][0..copy_len].*; 135 const d2 = src[pen..][0..copy_len].*; 136 const d3 = src[last..][0..copy_len].*; 137 138 // the slice dest[0..len] is needed to workaround -ODebug miscompilation 139 dest[0..len][0..copy_len].* = d0; 140 dest[b..][0..copy_len].* = d1; 141 dest[pen..][0..copy_len].* = d2; 142 dest[last..][0..copy_len].* = d3; 143 } 144 145 inline fn copyForwards( 146 dest: [*]u8, 147 src: [*]const u8, 148 len: usize, 149 ) void { 150 @setRuntimeSafety(common.test_safety); 151 assert(len >= 2 * @sizeOf(Element)); 152 153 const head = src[0..@sizeOf(Element)].*; 154 const tail = src[len - @sizeOf(Element) ..][0..@sizeOf(Element)].*; 155 const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element); 156 const n = len - alignment_offset; 157 const d = dest + alignment_offset; 158 const s = src + alignment_offset; 159 160 copyBlocksAlignedSource(@ptrCast(d), @ptrCast(@alignCast(s)), n); 161 162 // copy last `copy_size` bytes unconditionally, since block copy 163 // methods only copy a multiple of `copy_size` bytes. 164 dest[len - @sizeOf(Element) ..][0..@sizeOf(Element)].* = tail; 165 dest[0..@sizeOf(Element)].* = head; 166 } 167 168 inline fn copyBlocksAlignedSource( 169 dest: [*]align(1) Element, 170 src: [*]const Element, 171 max_bytes: usize, 172 ) void { 173 copyBlocks(dest, src, max_bytes); 174 } 175 176 /// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`, 177 /// that is less than `max_bytes` where `T` is the child type of `src` and 178 /// `dest`; `max_bytes` must be at least `@sizeOf(T)`. 179 inline fn copyBlocks( 180 dest: anytype, 181 src: anytype, 182 max_bytes: usize, 183 ) void { 184 @setRuntimeSafety(common.test_safety); 185 186 const T = @typeInfo(@TypeOf(dest)).pointer.child; 187 comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child); 188 189 const loop_count = max_bytes / @sizeOf(T); 190 191 for (dest[0..loop_count], src[0..loop_count]) |*d, s| { 192 d.* = s; 193 } 194 } 195 196 inline fn copyBackwards( 197 dest: [*]u8, 198 src: [*]const u8, 199 len: usize, 200 ) void { 201 const end_bytes = src[len - @sizeOf(Element) ..][0..@sizeOf(Element)].*; 202 const start_bytes = src[0..@sizeOf(Element)].*; 203 204 const d_addr: usize = std.mem.alignBackward(usize, @intFromPtr(dest) + len, @alignOf(Element)); 205 const d: [*]Element = @ptrFromInt(d_addr); 206 const n = d_addr - @intFromPtr(dest); 207 const s: [*]align(1) const Element = @ptrCast(src + n); 208 209 const loop_count = n / @sizeOf(Element); 210 var i: usize = 1; 211 while (i < loop_count + 1) : (i += 1) { 212 (d - i)[0] = (s - i)[0]; 213 } 214 215 dest[0..@sizeOf(Element)].* = start_bytes; 216 dest[len - @sizeOf(Element) ..][0..@sizeOf(Element)].* = end_bytes; 217 } 218 219 test memmoveFast { 220 if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; 221 222 const max_len = 1024; 223 var buffer: [max_len + @alignOf(Element) - 1]u8 = undefined; 224 for (&buffer, 0..) |*b, i| { 225 b.* = @intCast(i % 97); 226 } 227 228 var move_buffer: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined; 229 230 for (0..max_len) |copy_len| { 231 for (0..@alignOf(Element)) |s_offset| { 232 for (0..@alignOf(Element)) |d_offset| { 233 for (&move_buffer, buffer) |*d, s| { 234 d.* = s; 235 } 236 const dest = move_buffer[d_offset..][0..copy_len]; 237 const src = move_buffer[s_offset..][0..copy_len]; 238 _ = memmoveFast(dest.ptr, src.ptr, copy_len); 239 std.testing.expectEqualSlices(u8, buffer[s_offset..][0..copy_len], dest) catch |e| { 240 std.debug.print( 241 "error occured with source offset {d} and destination offset {d}\n", 242 .{ 243 s_offset, 244 d_offset, 245 }, 246 ); 247 return e; 248 }; 249 } 250 } 251 } 252 }