zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

memcpy.zig (6452B) - Raw


      1 const std = @import("std");
      2 const assert = std.debug.assert;
      3 const common = @import("./common.zig");
      4 const builtin = @import("builtin");
      5 
      6 comptime {
      7     if (builtin.object_format != .c) {
      8         const export_options: std.builtin.ExportOptions = .{
      9             .name = "memcpy",
     10             .linkage = common.linkage,
     11             .visibility = common.visibility,
     12         };
     13 
     14         if (builtin.mode == .ReleaseSmall or builtin.zig_backend == .stage2_aarch64)
     15             @export(&memcpySmall, export_options)
     16         else
     17             @export(&memcpyFast, export_options);
     18     }
     19 }
     20 
     21 const Element = common.PreferredLoadStoreElement;
     22 
     23 comptime {
     24     assert(std.math.isPowerOfTwo(@sizeOf(Element)));
     25 }
     26 
     27 fn memcpySmall(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
     28     @setRuntimeSafety(false);
     29 
     30     for (0..len) |i| {
     31         dest.?[i] = src.?[i];
     32     }
     33 
     34     return dest;
     35 }
     36 
     37 fn memcpyFast(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.c) ?[*]u8 {
     38     @setRuntimeSafety(false);
     39 
     40     const small_limit = 2 * @sizeOf(Element);
     41 
     42     if (copySmallLength(small_limit, dest.?, src.?, len)) return dest;
     43 
     44     copyForwards(dest.?, src.?, len);
     45 
     46     return dest;
     47 }
     48 
     49 inline fn copySmallLength(
     50     comptime small_limit: comptime_int,
     51     dest: [*]u8,
     52     src: [*]const u8,
     53     len: usize,
     54 ) bool {
     55     if (len < 16) {
     56         copyLessThan16(dest, src, len);
     57         return true;
     58     }
     59 
     60     if (comptime 2 < (std.math.log2(small_limit) + 1) / 2) {
     61         if (copy16ToSmallLimit(small_limit, dest, src, len)) return true;
     62     }
     63 
     64     return false;
     65 }
     66 
     67 inline fn copyLessThan16(
     68     dest: [*]u8,
     69     src: [*]const u8,
     70     len: usize,
     71 ) void {
     72     @setRuntimeSafety(false);
     73     if (len < 4) {
     74         if (len == 0) return;
     75         dest[0] = src[0];
     76         dest[len / 2] = src[len / 2];
     77         dest[len - 1] = src[len - 1];
     78         return;
     79     }
     80     copyRange4(4, dest, src, len);
     81 }
     82 
     83 inline fn copy16ToSmallLimit(
     84     comptime small_limit: comptime_int,
     85     dest: [*]u8,
     86     src: [*]const u8,
     87     len: usize,
     88 ) bool {
     89     @setRuntimeSafety(false);
     90     inline for (2..(std.math.log2(small_limit) + 1) / 2 + 1) |p| {
     91         const limit = 1 << (2 * p);
     92         if (len < limit) {
     93             copyRange4(limit / 4, dest, src, len);
     94             return true;
     95         }
     96     }
     97     return false;
     98 }
     99 
    100 inline fn copyForwards(
    101     noalias dest: [*]u8,
    102     noalias src: [*]const u8,
    103     len: usize,
    104 ) void {
    105     @setRuntimeSafety(false);
    106 
    107     copyFixedLength(dest, src, @sizeOf(Element));
    108     const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element);
    109     const n = len - alignment_offset;
    110     const d = dest + alignment_offset;
    111     const s = src + alignment_offset;
    112 
    113     copyBlocksAlignedSource(@ptrCast(d), @ptrCast(@alignCast(s)), n);
    114 
    115     // copy last `@sizeOf(Element)` bytes unconditionally, since block copy
    116     // methods only copy a multiple of `@sizeOf(Element)` bytes.
    117     const offset = len - @sizeOf(Element);
    118     copyFixedLength(dest + offset, src + offset, @sizeOf(Element));
    119 }
    120 
    121 inline fn copyBlocksAlignedSource(
    122     noalias dest: [*]align(1) Element,
    123     noalias src: [*]const Element,
    124     max_bytes: usize,
    125 ) void {
    126     copyBlocks(dest, src, max_bytes);
    127 }
    128 
    129 /// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`,
    130 /// that is less than `max_bytes` where `T` is the child type of `src` and
    131 /// `dest`.
    132 inline fn copyBlocks(
    133     noalias dest: anytype,
    134     noalias src: anytype,
    135     max_bytes: usize,
    136 ) void {
    137     @setRuntimeSafety(false);
    138 
    139     const T = @typeInfo(@TypeOf(dest)).pointer.child;
    140     comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child);
    141 
    142     const loop_count = max_bytes / @sizeOf(T);
    143 
    144     for (dest[0..loop_count], src[0..loop_count]) |*d, s| {
    145         d.* = s;
    146     }
    147 }
    148 
    149 inline fn copyFixedLength(
    150     noalias dest: [*]u8,
    151     noalias src: [*]const u8,
    152     comptime len: comptime_int,
    153 ) void {
    154     @setRuntimeSafety(false);
    155     comptime assert(std.math.isPowerOfTwo(len));
    156 
    157     const T = if (len >= @sizeOf(Element))
    158         Element
    159     else if (len > @sizeOf(usize))
    160         @Vector(len, u8)
    161     else
    162         @Type(.{ .int = .{ .signedness = .unsigned, .bits = len * 8 } });
    163 
    164     const loop_count = @divExact(len, @sizeOf(T));
    165 
    166     const d: [*]align(1) T = @ptrCast(dest);
    167     const s: [*]align(1) const T = @ptrCast(src);
    168 
    169     inline for (0..loop_count) |i| {
    170         d[i] = s[i];
    171     }
    172 }
    173 
    174 /// copy `len` bytes from `src` to `dest`; `len` must be in the range
    175 /// `[copy_len, 4 * copy_len)`.
    176 inline fn copyRange4(
    177     comptime copy_len: comptime_int,
    178     noalias dest: [*]u8,
    179     noalias src: [*]const u8,
    180     len: usize,
    181 ) void {
    182     @setRuntimeSafety(false);
    183     comptime assert(std.math.isPowerOfTwo(copy_len));
    184 
    185     const a = len & (copy_len * 2);
    186     const b = a / 2;
    187 
    188     const last = len - copy_len;
    189     const pen = last - b;
    190 
    191     copyFixedLength(dest, src, copy_len);
    192     copyFixedLength(dest + b, src + b, copy_len);
    193     copyFixedLength(dest + pen, src + pen, copy_len);
    194     copyFixedLength(dest + last, src + last, copy_len);
    195 }
    196 
    197 test "memcpy" {
    198     if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
    199 
    200     const S = struct {
    201         fn testFunc(comptime copy_func: anytype) !void {
    202             const max_len = 1024;
    203             var buffer: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
    204             for (&buffer, 0..) |*b, i| {
    205                 b.* = @intCast(i % 97);
    206             }
    207             var dest: [max_len + @alignOf(Element) - 1]u8 align(@alignOf(Element)) = undefined;
    208 
    209             for (0..max_len) |copy_len| {
    210                 for (0..@alignOf(Element)) |s_offset| {
    211                     for (0..@alignOf(Element)) |d_offset| {
    212                         @memset(&dest, 0xff);
    213                         const s = buffer[s_offset..][0..copy_len];
    214                         const d = dest[d_offset..][0..copy_len];
    215                         _ = copy_func(@ptrCast(d.ptr), @ptrCast(s.ptr), s.len);
    216                         std.testing.expectEqualSlices(u8, s, d) catch |e| {
    217                             std.debug.print("error encountered for length={d}, s_offset={d}, d_offset={d}\n", .{
    218                                 copy_len, s_offset, d_offset,
    219                             });
    220                             return e;
    221                         };
    222                     }
    223                 }
    224             }
    225         }
    226     };
    227 
    228     try S.testFunc(memcpySmall);
    229     try S.testFunc(memcpyFast);
    230 }