Merge pull request #22513 from ziglang/memcpy

enhance memcpy and remove redundant implementations
This commit is contained in:
Andrew Kelley
2025-01-17 22:52:03 -05:00
committed by GitHub
51 changed files with 163 additions and 3267 deletions

View File

@@ -233,7 +233,6 @@ comptime {
_ = @import("compiler_rt/memcpy.zig");
_ = @import("compiler_rt/memset.zig");
_ = @import("compiler_rt/memmove.zig");
_ = @import("compiler_rt/memcmp.zig");
_ = @import("compiler_rt/bcmp.zig");
_ = @import("compiler_rt/ssp.zig");

View File

@@ -5,24 +5,169 @@ const builtin = @import("builtin");
comptime {
if (builtin.object_format != .c) {
@export(&memcpy, .{ .name = "memcpy", .linkage = common.linkage, .visibility = common.visibility });
@export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
}
}
pub fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
@setRuntimeSafety(false);
const llvm_cannot_lower = switch (builtin.cpu.arch) {
.arm, .armeb, .thumb, .thumbeb => builtin.zig_backend == .stage2_llvm,
else => false,
};
if (len != 0) {
var d = dest.?;
var s = src.?;
var n = len;
while (true) {
d[0] = s[0];
n -= 1;
if (n == 0) break;
d += 1;
s += 1;
fn memcpy(noalias opt_dest: ?[*]u8, noalias opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
if (llvm_cannot_lower) {
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
return opt_dest;
} else {
return memmove(opt_dest, opt_src, len);
}
}
/// A port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
fn memmove(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
if (llvm_cannot_lower) {
if (@intFromPtr(opt_dest) < @intFromPtr(opt_src)) {
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
return opt_dest;
} else {
var index = len;
while (index != 0) {
index -= 1;
opt_dest.?[index] = opt_src.?[index];
}
return opt_dest;
}
}
if (len == 0) {
@branchHint(.unlikely);
return opt_dest;
}
const dest = opt_dest.?;
const src = opt_src.?;
if (len < 8) {
@branchHint(.unlikely);
if (len == 1) {
@branchHint(.unlikely);
dest[0] = src[0];
} else if (len >= 4) {
@branchHint(.unlikely);
blockCopy(dest, src, 4, len);
} else {
blockCopy(dest, src, 2, len);
}
return dest;
}
if (len > 32) {
@branchHint(.unlikely);
if (len > 256) {
@branchHint(.unlikely);
copyMove(dest, src, len);
return dest;
}
copyLong(dest, src, len);
return dest;
}
if (len > 16) {
@branchHint(.unlikely);
blockCopy(dest, src, 16, len);
return dest;
}
blockCopy(dest, src, 8, len);
return dest;
}
inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void {
const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*;
const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*;
dest[0..block_size].* = first;
dest[len - block_size ..][0..block_size].* = second;
}
inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void {
var array: [8]@Vector(32, u8) = undefined;
inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| {
array[i * 2] = src[(N / 2) - 32 ..][0..32].*;
array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*;
if (len <= N) {
@branchHint(.unlikely);
for (0..i + 1) |j| {
dest[j * 32 ..][0..32].* = array[j * 2];
dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1];
}
return;
}
}
}
inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void {
if (@intFromPtr(src) >= @intFromPtr(dest)) {
@branchHint(.unlikely);
copyForward(dest, src, len);
} else if (@intFromPtr(src) + len > @intFromPtr(dest)) {
@branchHint(.unlikely);
overlapBwd(dest, src, len);
} else {
copyForward(dest, src, len);
}
}
inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void {
const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*;
const N: usize = len & ~@as(usize, 127);
var i: usize = 0;
while (i < N) : (i += 128) {
dest[i..][0..32].* = src[i..][0..32].*;
dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*;
dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*;
dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*;
}
if (len - i <= 32) {
@branchHint(.unlikely);
dest[len - 32 ..][0..32].* = tail;
} else {
copyLong(dest[i..], src[i..], len - i);
}
}
inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void {
var array: [5]@Vector(32, u8) = undefined;
array[0] = src[len - 32 ..][0..32].*;
inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*;
const end: usize = (@intFromPtr(dest) + len - 32) & 31;
const range = len - end;
var s = src + range;
var d = dest + range;
while (@intFromPtr(s) > @intFromPtr(src + 128)) {
// zig fmt: off
const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*;
const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*;
const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*;
const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*;
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first;
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second;
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third;
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth;
// zig fmt: on
s -= 128;
d -= 128;
}
inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec;
dest[len - 32 ..][0..32].* = array[0];
}

View File

@@ -1,25 +0,0 @@
const std = @import("std");
const common = @import("./common.zig");
comptime {
@export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
}
pub fn memmove(dest: ?[*]u8, src: ?[*]const u8, n: usize) callconv(.C) ?[*]u8 {
@setRuntimeSafety(false);
if (@intFromPtr(dest) < @intFromPtr(src)) {
var index: usize = 0;
while (index != n) : (index += 1) {
dest.?[index] = src.?[index];
}
} else {
var index = n;
while (index != 0) {
index -= 1;
dest.?[index] = src.?[index];
}
}
return dest;
}

View File

@@ -1,186 +0,0 @@
/*
* memcpy - copy memory area
*
* Copyright (c) 2012-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* Assumptions:
*
* ARMv8-a, AArch64, unaligned accesses.
*
*/
#define dstin x0
#define src x1
#define count x2
#define dst x3
#define srcend x4
#define dstend x5
#define A_l x6
#define A_lw w6
#define A_h x7
#define B_l x8
#define B_lw w8
#define B_h x9
#define C_l x10
#define C_lw w10
#define C_h x11
#define D_l x12
#define D_h x13
#define E_l x14
#define E_h x15
#define F_l x16
#define F_h x17
#define G_l count
#define G_h dst
#define H_l src
#define H_h srcend
#define tmp1 x14
/* This implementation of memcpy uses unaligned accesses and branchless
sequences to keep the code small, simple and improve performance.
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
copies of up to 128 bytes, and large copies. The overhead of the overlap
check is negligible since it is only required for large copies.
Large copies use a software pipelined loop processing 64 bytes per iteration.
The destination pointer is 16-byte aligned to minimize unaligned accesses.
The loop tail is handled by always copying 64 bytes from the end.
*/
.global memcpy
.type memcpy,%function
memcpy:
add srcend, src, count
add dstend, dstin, count
cmp count, 128
b.hi .Lcopy_long
cmp count, 32
b.hi .Lcopy32_128
/* Small copies: 0..32 bytes. */
cmp count, 16
b.lo .Lcopy16
ldp A_l, A_h, [src]
ldp D_l, D_h, [srcend, -16]
stp A_l, A_h, [dstin]
stp D_l, D_h, [dstend, -16]
ret
/* Copy 8-15 bytes. */
.Lcopy16:
tbz count, 3, .Lcopy8
ldr A_l, [src]
ldr A_h, [srcend, -8]
str A_l, [dstin]
str A_h, [dstend, -8]
ret
.p2align 3
/* Copy 4-7 bytes. */
.Lcopy8:
tbz count, 2, .Lcopy4
ldr A_lw, [src]
ldr B_lw, [srcend, -4]
str A_lw, [dstin]
str B_lw, [dstend, -4]
ret
/* Copy 0..3 bytes using a branchless sequence. */
.Lcopy4:
cbz count, .Lcopy0
lsr tmp1, count, 1
ldrb A_lw, [src]
ldrb C_lw, [srcend, -1]
ldrb B_lw, [src, tmp1]
strb A_lw, [dstin]
strb B_lw, [dstin, tmp1]
strb C_lw, [dstend, -1]
.Lcopy0:
ret
.p2align 4
/* Medium copies: 33..128 bytes. */
.Lcopy32_128:
ldp A_l, A_h, [src]
ldp B_l, B_h, [src, 16]
ldp C_l, C_h, [srcend, -32]
ldp D_l, D_h, [srcend, -16]
cmp count, 64
b.hi .Lcopy128
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstend, -32]
stp D_l, D_h, [dstend, -16]
ret
.p2align 4
/* Copy 65..128 bytes. */
.Lcopy128:
ldp E_l, E_h, [src, 32]
ldp F_l, F_h, [src, 48]
cmp count, 96
b.ls .Lcopy96
ldp G_l, G_h, [srcend, -64]
ldp H_l, H_h, [srcend, -48]
stp G_l, G_h, [dstend, -64]
stp H_l, H_h, [dstend, -48]
.Lcopy96:
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp E_l, E_h, [dstin, 32]
stp F_l, F_h, [dstin, 48]
stp C_l, C_h, [dstend, -32]
stp D_l, D_h, [dstend, -16]
ret
.p2align 4
/* Copy more than 128 bytes. */
.Lcopy_long:
/* Copy 16 bytes and then align dst to 16-byte alignment. */
ldp D_l, D_h, [src]
and tmp1, dstin, 15
bic dst, dstin, 15
sub src, src, tmp1
add count, count, tmp1 /* Count is now 16 too large. */
ldp A_l, A_h, [src, 16]
stp D_l, D_h, [dstin]
ldp B_l, B_h, [src, 32]
ldp C_l, C_h, [src, 48]
ldp D_l, D_h, [src, 64]!
subs count, count, 128 + 16 /* Test and readjust count. */
b.ls .Lcopy64_from_end
.Lloop64:
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [src, 16]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [src, 32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [src, 48]
stp D_l, D_h, [dst, 64]!
ldp D_l, D_h, [src, 64]!
subs count, count, 64
b.hi .Lloop64
/* Write the last iteration and copy 64 bytes from the end. */
.Lcopy64_from_end:
ldp E_l, E_h, [srcend, -64]
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [srcend, -48]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [srcend, -32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [srcend, -16]
stp D_l, D_h, [dst, 64]
stp E_l, E_h, [dstend, -64]
stp A_l, A_h, [dstend, -48]
stp B_l, B_h, [dstend, -32]
stp C_l, C_h, [dstend, -16]
ret
.size memcpy,.-memcpy

View File

@@ -1,45 +0,0 @@
.syntax unified
.global __aeabi_memcpy8
.global __aeabi_memcpy4
.global __aeabi_memcpy
.global __aeabi_memmove8
.global __aeabi_memmove4
.global __aeabi_memmove
.type __aeabi_memcpy8,%function
.type __aeabi_memcpy4,%function
.type __aeabi_memcpy,%function
.type __aeabi_memmove8,%function
.type __aeabi_memmove4,%function
.type __aeabi_memmove,%function
__aeabi_memmove8:
__aeabi_memmove4:
__aeabi_memmove:
cmp r0, r1
bls 3f
cmp r2, #0
beq 2f
adds r0, r0, r2
adds r2, r1, r2
1: subs r2, r2, #1
ldrb r3, [r2]
subs r0, r0, #1
strb r3, [r0]
cmp r1, r2
bne 1b
2: bx lr
__aeabi_memcpy8:
__aeabi_memcpy4:
__aeabi_memcpy:
3: cmp r2, #0
beq 2f
adds r2, r1, r2
1: ldrb r3, [r1]
adds r1, r1, #1
strb r3, [r0]
adds r0, r0, #1
cmp r1, r2
bne 1b
2: bx lr

View File

@@ -1,479 +0,0 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Optimized memcpy() for ARM.
*
* note that memcpy() always returns the destination pointer,
* so we have to preserve R0.
*/
/*
* This file has been modified from the original for use in musl libc.
* The main changes are: addition of .type memcpy,%function to make the
* code safely callable from thumb mode, adjusting the return
* instructions to be compatible with pre-thumb ARM cpus, removal of
* prefetch code that is not compatible with older cpus and support for
* building as thumb 2 and big-endian.
*/
.syntax unified
.global memcpy
.type memcpy,%function
memcpy:
/* The stack must always be 64-bits aligned to be compliant with the
* ARM ABI. Since we have to save R0, we might as well save R4
* which we can use for better pipelining of the reads below
*/
.fnstart
.save {r0, r4, lr}
stmfd sp!, {r0, r4, lr}
/* Making room for r5-r11 which will be spilled later */
.pad #28
sub sp, sp, #28
/* it simplifies things to take care of len<4 early */
cmp r2, #4
blo copy_last_3_and_return
/* compute the offset to align the source
* offset = (4-(src&3))&3 = -src & 3
*/
rsb r3, r1, #0
ands r3, r3, #3
beq src_aligned
/* align source to 32 bits. We need to insert 2 instructions between
* a ldr[b|h] and str[b|h] because byte and half-word instructions
* stall 2 cycles.
*/
movs r12, r3, lsl #31
sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
ldrbmi r3, [r1], #1
ldrbcs r4, [r1], #1
ldrbcs r12,[r1], #1
strbmi r3, [r0], #1
strbcs r4, [r0], #1
strbcs r12,[r0], #1
src_aligned:
/* see if src and dst are aligned together (congruent) */
eor r12, r0, r1
tst r12, #3
bne non_congruent
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
* frame. Don't update sp.
*/
stmea sp, {r5-r11}
/* align the destination to a cache-line */
rsb r3, r0, #0
ands r3, r3, #0x1C
beq congruent_aligned32
cmp r3, r2
andhi r3, r2, #0x1C
/* conditionnaly copies 0 to 7 words (length in r3) */
movs r12, r3, lsl #28
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
ldmmi r1!, {r8, r9} /* 8 bytes */
stmcs r0!, {r4, r5, r6, r7}
stmmi r0!, {r8, r9}
tst r3, #0x4
ldrne r10,[r1], #4 /* 4 bytes */
strne r10,[r0], #4
sub r2, r2, r3
congruent_aligned32:
/*
* here source is aligned to 32 bytes.
*/
cached_aligned32:
subs r2, r2, #32
blo less_than_32_left
/*
* We preload a cache-line up to 64 bytes ahead. On the 926, this will
* stall only until the requested world is fetched, but the linefill
* continues in the the background.
* While the linefill is going, we write our previous cache-line
* into the write-buffer (which should have some free space).
* When the linefill is done, the writebuffer will
* start dumping its content into memory
*
* While all this is going, we then load a full cache line into
* 8 registers, this cache line should be in the cache by now
* (or partly in the cache).
*
* This code should work well regardless of the source/dest alignment.
*
*/
/* Align the preload register to a cache-line because the cpu does
* "critical word first" (the first word requested is loaded first).
*/
@ bic r12, r1, #0x1F
@ add r12, r12, #64
1: ldmia r1!, { r4-r11 }
subs r2, r2, #32
/*
* NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
* for ARM9 preload will not be safely guarded by the preceding subs.
* When it is safely guarded the only possibility to have SIGSEGV here
* is because the caller overstates the length.
*/
@ ldrhi r3, [r12], #32 /* cheap ARM9 preload */
stmia r0!, { r4-r11 }
bhs 1b
add r2, r2, #32
less_than_32_left:
/*
* less than 32 bytes left at this point (length in r2)
*/
/* skip all this if there is nothing to do, which should
* be a common case (if not executed the code below takes
* about 16 cycles)
*/
tst r2, #0x1F
beq 1f
/* conditionnaly copies 0 to 31 bytes */
movs r12, r2, lsl #28
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
ldmmi r1!, {r8, r9} /* 8 bytes */
stmcs r0!, {r4, r5, r6, r7}
stmmi r0!, {r8, r9}
movs r12, r2, lsl #30
ldrcs r3, [r1], #4 /* 4 bytes */
ldrhmi r4, [r1], #2 /* 2 bytes */
strcs r3, [r0], #4
strhmi r4, [r0], #2
tst r2, #0x1
ldrbne r3, [r1] /* last byte */
strbne r3, [r0]
/* we're done! restore everything and return */
1: ldmfd sp!, {r5-r11}
ldmfd sp!, {r0, r4, lr}
bx lr
/********************************************************************/
non_congruent:
/*
* here source is aligned to 4 bytes
* but destination is not.
*
* in the code below r2 is the number of bytes read
* (the number of bytes written is always smaller, because we have
* partial words in the shift queue)
*/
cmp r2, #4
blo copy_last_3_and_return
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
* frame. Don't update sp.
*/
stmea sp, {r5-r11}
/* compute shifts needed to align src to dest */
rsb r5, r0, #0
and r5, r5, #3 /* r5 = # bytes in partial words */
mov r12, r5, lsl #3 /* r12 = right */
rsb lr, r12, #32 /* lr = left */
/* read the first word */
ldr r3, [r1], #4
sub r2, r2, #4
/* write a partial word (0 to 3 bytes), such that destination
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
*/
movs r5, r5, lsl #31
#if __ARMEB__
movmi r3, r3, ror #24
strbmi r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
#endif
cmp r2, #4
blo partial_word_tail
#if __ARMEB__
mov r3, r3, lsr r12
mov r3, r3, lsl r12
#endif
/* Align destination to 32 bytes (cache line boundary) */
1: tst r0, #0x1c
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
#if __ARMEB__
mov r4, r5, lsr lr
orr r4, r4, r3
mov r3, r5, lsl r12
#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
blo partial_word_tail
/* copy 32 bytes at a time */
2: subs r2, r2, #32
blo less_than_thirtytwo
/* Use immediate mode for the shifts, because there is an extra cycle
* for register shifts, which could account for up to 50% of
* performance hit.
*/
cmp r12, #24
beq loop24
cmp r12, #8
beq loop8
loop16:
ldr r12, [r1], #4
1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
#if __ARMEB__
orr r3, r3, r4, lsr #16
mov r4, r4, lsl #16
orr r4, r4, r5, lsr #16
mov r5, r5, lsl #16
orr r5, r5, r6, lsr #16
mov r6, r6, lsl #16
orr r6, r6, r7, lsr #16
mov r7, r7, lsl #16
orr r7, r7, r8, lsr #16
mov r8, r8, lsl #16
orr r8, r8, r9, lsr #16
mov r9, r9, lsl #16
orr r9, r9, r10, lsr #16
mov r10, r10, lsl #16
orr r10, r10, r11, lsr #16
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsl #16
#else
orr r3, r3, r4, lsl #16
mov r4, r4, lsr #16
orr r4, r4, r5, lsl #16
mov r5, r5, lsr #16
orr r5, r5, r6, lsl #16
mov r6, r6, lsr #16
orr r6, r6, r7, lsl #16
mov r7, r7, lsr #16
orr r7, r7, r8, lsl #16
mov r8, r8, lsr #16
orr r8, r8, r9, lsl #16
mov r9, r9, lsr #16
orr r9, r9, r10, lsl #16
mov r10, r10, lsr #16
orr r10, r10, r11, lsl #16
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #16
#endif
bhs 1b
b less_than_thirtytwo
loop8:
ldr r12, [r1], #4
1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
#if __ARMEB__
orr r3, r3, r4, lsr #24
mov r4, r4, lsl #8
orr r4, r4, r5, lsr #24
mov r5, r5, lsl #8
orr r5, r5, r6, lsr #24
mov r6, r6, lsl #8
orr r6, r6, r7, lsr #24
mov r7, r7, lsl #8
orr r7, r7, r8, lsr #24
mov r8, r8, lsl #8
orr r8, r8, r9, lsr #24
mov r9, r9, lsl #8
orr r9, r9, r10, lsr #24
mov r10, r10, lsl #8
orr r10, r10, r11, lsr #24
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsl #8
#else
orr r3, r3, r4, lsl #24
mov r4, r4, lsr #8
orr r4, r4, r5, lsl #24
mov r5, r5, lsr #8
orr r5, r5, r6, lsl #24
mov r6, r6, lsr #8
orr r6, r6, r7, lsl #24
mov r7, r7, lsr #8
orr r7, r7, r8, lsl #24
mov r8, r8, lsr #8
orr r8, r8, r9, lsl #24
mov r9, r9, lsr #8
orr r9, r9, r10, lsl #24
mov r10, r10, lsr #8
orr r10, r10, r11, lsl #24
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #8
#endif
bhs 1b
b less_than_thirtytwo
loop24:
ldr r12, [r1], #4
1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
#if __ARMEB__
orr r3, r3, r4, lsr #8
mov r4, r4, lsl #24
orr r4, r4, r5, lsr #8
mov r5, r5, lsl #24
orr r5, r5, r6, lsr #8
mov r6, r6, lsl #24
orr r6, r6, r7, lsr #8
mov r7, r7, lsl #24
orr r7, r7, r8, lsr #8
mov r8, r8, lsl #24
orr r8, r8, r9, lsr #8
mov r9, r9, lsl #24
orr r9, r9, r10, lsr #8
mov r10, r10, lsl #24
orr r10, r10, r11, lsr #8
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsl #24
#else
orr r3, r3, r4, lsl #8
mov r4, r4, lsr #24
orr r4, r4, r5, lsl #8
mov r5, r5, lsr #24
orr r5, r5, r6, lsl #8
mov r6, r6, lsr #24
orr r6, r6, r7, lsl #8
mov r7, r7, lsr #24
orr r7, r7, r8, lsl #8
mov r8, r8, lsr #24
orr r8, r8, r9, lsl #8
mov r9, r9, lsr #24
orr r9, r9, r10, lsl #8
mov r10, r10, lsr #24
orr r10, r10, r11, lsl #8
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #24
#endif
bhs 1b
less_than_thirtytwo:
/* copy the last 0 to 31 bytes of the source */
rsb r12, lr, #32 /* we corrupted r12, recompute it */
add r2, r2, #32
cmp r2, #4
blo partial_word_tail
1: ldr r5, [r1], #4
sub r2, r2, #4
#if __ARMEB__
mov r4, r5, lsr lr
orr r4, r4, r3
mov r3, r5, lsl r12
#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
partial_word_tail:
/* we have a partial word in the input buffer */
movs r5, lr, lsl #(31-3)
#if __ARMEB__
movmi r3, r3, ror #24
strbmi r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
#endif
/* Refill spilled registers from the stack. Don't update sp. */
ldmfd sp, {r5-r11}
copy_last_3_and_return:
movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
ldrbmi r2, [r1], #1
ldrbcs r3, [r1], #1
ldrbcs r12,[r1]
strbmi r2, [r0], #1
strbcs r3, [r0], #1
strbcs r12,[r0]
/* we're done! restore sp and spilled registers and return */
add sp, sp, #28
ldmfd sp!, {r0, r4, lr}
bx lr

View File

@@ -1,32 +0,0 @@
.global memcpy
.global __memcpy_fwd
.hidden __memcpy_fwd
.type memcpy,@function
memcpy:
__memcpy_fwd:
push %esi
push %edi
mov 12(%esp),%edi
mov 16(%esp),%esi
mov 20(%esp),%ecx
mov %edi,%eax
cmp $4,%ecx
jc 1f
test $3,%edi
jz 1f
2: movsb
dec %ecx
test $3,%edi
jnz 2b
1: mov %ecx,%edx
shr $2,%ecx
rep
movsl
and $3,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: pop %edi
pop %esi
ret

View File

@@ -1,22 +0,0 @@
.global memmove
.type memmove,@function
memmove:
mov 4(%esp),%eax
sub 8(%esp),%eax
cmp 12(%esp),%eax
.hidden __memcpy_fwd
jae __memcpy_fwd
push %esi
push %edi
mov 12(%esp),%edi
mov 16(%esp),%esi
mov 20(%esp),%ecx
lea -1(%edi,%ecx),%edi
lea -1(%esi,%ecx),%esi
std
rep movsb
cld
lea 1(%edi),%eax
pop %edi
pop %esi
ret

View File

@@ -1,124 +0,0 @@
#include <string.h>
#include <stdint.h>
#include <endian.h>
void *memcpy(void *restrict dest, const void *restrict src, size_t n)
{
unsigned char *d = dest;
const unsigned char *s = src;
#ifdef __GNUC__
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LS >>
#define RS <<
#else
#define LS <<
#define RS >>
#endif
typedef uint32_t __attribute__((__may_alias__)) u32;
uint32_t w, x;
for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
if ((uintptr_t)d % 4 == 0) {
for (; n>=16; s+=16, d+=16, n-=16) {
*(u32 *)(d+0) = *(u32 *)(s+0);
*(u32 *)(d+4) = *(u32 *)(s+4);
*(u32 *)(d+8) = *(u32 *)(s+8);
*(u32 *)(d+12) = *(u32 *)(s+12);
}
if (n&8) {
*(u32 *)(d+0) = *(u32 *)(s+0);
*(u32 *)(d+4) = *(u32 *)(s+4);
d += 8; s += 8;
}
if (n&4) {
*(u32 *)(d+0) = *(u32 *)(s+0);
d += 4; s += 4;
}
if (n&2) {
*d++ = *s++; *d++ = *s++;
}
if (n&1) {
*d = *s;
}
return dest;
}
if (n >= 32) switch ((uintptr_t)d % 4) {
case 1:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
n -= 3;
for (; n>=17; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+1);
*(u32 *)(d+0) = (w LS 24) | (x RS 8);
w = *(u32 *)(s+5);
*(u32 *)(d+4) = (x LS 24) | (w RS 8);
x = *(u32 *)(s+9);
*(u32 *)(d+8) = (w LS 24) | (x RS 8);
w = *(u32 *)(s+13);
*(u32 *)(d+12) = (x LS 24) | (w RS 8);
}
break;
case 2:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
n -= 2;
for (; n>=18; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+2);
*(u32 *)(d+0) = (w LS 16) | (x RS 16);
w = *(u32 *)(s+6);
*(u32 *)(d+4) = (x LS 16) | (w RS 16);
x = *(u32 *)(s+10);
*(u32 *)(d+8) = (w LS 16) | (x RS 16);
w = *(u32 *)(s+14);
*(u32 *)(d+12) = (x LS 16) | (w RS 16);
}
break;
case 3:
w = *(u32 *)s;
*d++ = *s++;
n -= 1;
for (; n>=19; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+3);
*(u32 *)(d+0) = (w LS 8) | (x RS 24);
w = *(u32 *)(s+7);
*(u32 *)(d+4) = (x LS 8) | (w RS 24);
x = *(u32 *)(s+11);
*(u32 *)(d+8) = (w LS 8) | (x RS 24);
w = *(u32 *)(s+15);
*(u32 *)(d+12) = (x LS 8) | (w RS 24);
}
break;
}
if (n&16) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&8) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&4) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&2) {
*d++ = *s++; *d++ = *s++;
}
if (n&1) {
*d = *s;
}
return dest;
#endif
for (; n; n--) *d++ = *s++;
return dest;
}

View File

@@ -1,42 +0,0 @@
#include <string.h>
#include <stdint.h>
#ifdef __GNUC__
typedef __attribute__((__may_alias__)) size_t WT;
#define WS (sizeof(WT))
#endif
void *memmove(void *dest, const void *src, size_t n)
{
char *d = dest;
const char *s = src;
if (d==s) return d;
if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);
if (d<s) {
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
while ((uintptr_t)d % WS) {
if (!n--) return dest;
*d++ = *s++;
}
for (; n>=WS; n-=WS, d+=WS, s+=WS) *(WT *)d = *(WT *)s;
}
#endif
for (; n; n--) *d++ = *s++;
} else {
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
while ((uintptr_t)(d+n) % WS) {
if (!n--) return dest;
d[n] = s[n];
}
while (n>=WS) n-=WS, *(WT *)(d+n) = *(WT *)(s+n);
}
#endif
while (n) n--, d[n] = s[n];
}
return dest;
}

View File

@@ -1,25 +0,0 @@
.global memcpy
.global __memcpy_fwd
.hidden __memcpy_fwd
.type memcpy,@function
memcpy:
__memcpy_fwd:
mov %rdi,%rax
cmp $8,%rdx
jc 1f
test $7,%edi
jz 1f
2: movsb
dec %rdx
test $7,%edi
jnz 2b
1: mov %rdx,%rcx
shr $3,%rcx
rep
movsq
and $7,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: ret

View File

@@ -1,16 +0,0 @@
.global memmove
.type memmove,@function
memmove:
mov %rdi,%rax
sub %rsi,%rax
cmp %rdx,%rax
.hidden __memcpy_fwd
jae __memcpy_fwd
mov %rdx,%rcx
lea -1(%rdi,%rdx),%rdi
lea -1(%rsi,%rdx),%rsi
std
rep movsb
cld
lea 1(%rdi),%rax
ret

View File

@@ -1,70 +0,0 @@
#if __ARM_PCS_VFP
.syntax unified
.fpu vfp
.global fegetround
.type fegetround,%function
fegetround:
fmrx r0, fpscr
and r0, r0, #0xc00000
bx lr
.global __fesetround
.hidden __fesetround
.type __fesetround,%function
__fesetround:
fmrx r3, fpscr
bic r3, r3, #0xc00000
orr r3, r3, r0
fmxr fpscr, r3
mov r0, #0
bx lr
.global fetestexcept
.type fetestexcept,%function
fetestexcept:
and r0, r0, #0x1f
fmrx r3, fpscr
and r0, r0, r3
bx lr
.global feclearexcept
.type feclearexcept,%function
feclearexcept:
and r0, r0, #0x1f
fmrx r3, fpscr
bic r3, r3, r0
fmxr fpscr, r3
mov r0, #0
bx lr
.global feraiseexcept
.type feraiseexcept,%function
feraiseexcept:
and r0, r0, #0x1f
fmrx r3, fpscr
orr r3, r3, r0
fmxr fpscr, r3
mov r0, #0
bx lr
.global fegetenv
.type fegetenv,%function
fegetenv:
fmrx r3, fpscr
str r3, [r0]
mov r0, #0
bx lr
.global fesetenv
.type fesetenv,%function
fesetenv:
cmn r0, #1
moveq r3, #0
ldrne r3, [r0]
fmxr fpscr, r3
mov r0, #0
bx lr
#endif

View File

@@ -1,72 +0,0 @@
#ifndef __mips_soft_float
.set noreorder
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
and $4, $4, 0x7c
cfc1 $5, $31
or $5, $5, $4
xor $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global feraiseexcept
.type feraiseexcept,@function
feraiseexcept:
and $4, $4, 0x7c
cfc1 $5, $31
or $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global fetestexcept
.type fetestexcept,@function
fetestexcept:
and $4, $4, 0x7c
cfc1 $2, $31
jr $ra
and $2, $2, $4
.global fegetround
.type fegetround,@function
fegetround:
cfc1 $2, $31
jr $ra
andi $2, $2, 3
.global __fesetround
.hidden __fesetround
.type __fesetround,@function
__fesetround:
cfc1 $5, $31
li $6, -4
and $5, $5, $6
or $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global fegetenv
.type fegetenv,@function
fegetenv:
cfc1 $5, $31
sw $5, 0($4)
jr $ra
li $2, 0
.global fesetenv
.type fesetenv,@function
fesetenv:
addiu $5, $4, 1
beq $5, $0, 1f
nop
lw $5, 0($4)
1: ctc1 $5, $31
jr $ra
li $2, 0
#endif

View File

@@ -1,72 +0,0 @@
#ifndef __mips_soft_float
.set noreorder
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
and $4, $4, 0x7c
cfc1 $5, $31
or $5, $5, $4
xor $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global feraiseexcept
.type feraiseexcept,@function
feraiseexcept:
and $4, $4, 0x7c
cfc1 $5, $31
or $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global fetestexcept
.type fetestexcept,@function
fetestexcept:
and $4, $4, 0x7c
cfc1 $2, $31
jr $ra
and $2, $2, $4
.global fegetround
.type fegetround,@function
fegetround:
cfc1 $2, $31
jr $ra
andi $2, $2, 3
.global __fesetround
.hidden __fesetround
.type __fesetround,@function
__fesetround:
cfc1 $5, $31
li $6, -4
and $5, $5, $6
or $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global fegetenv
.type fegetenv,@function
fegetenv:
cfc1 $5, $31
sw $5, 0($4)
jr $ra
li $2, 0
.global fesetenv
.type fesetenv,@function
fesetenv:
daddiu $5, $4, 1
beq $5, $0, 1f
nop
lw $5, 0($4)
1: ctc1 $5, $31
jr $ra
li $2, 0
#endif

View File

@@ -1,71 +0,0 @@
#ifndef __mips_soft_float
.set noreorder
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
and $4, $4, 0x7c
cfc1 $5, $31
or $5, $5, $4
xor $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global feraiseexcept
.type feraiseexcept,@function
feraiseexcept:
and $4, $4, 0x7c
cfc1 $5, $31
or $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global fetestexcept
.type fetestexcept,@function
fetestexcept:
and $4, $4, 0x7c
cfc1 $2, $31
jr $ra
and $2, $2, $4
.global fegetround
.type fegetround,@function
fegetround:
cfc1 $2, $31
jr $ra
andi $2, $2, 3
.global __fesetround
.hidden __fesetround
.type __fesetround,@function
__fesetround:
cfc1 $5, $31
li $6, -4
and $5, $5, $6
or $5, $5, $4
ctc1 $5, $31
jr $ra
li $2, 0
.global fegetenv
.type fegetenv,@function
fegetenv:
cfc1 $5, $31
sw $5, 0($4)
jr $ra
li $2, 0
.global fesetenv
.type fesetenv,@function
fesetenv:
addiu $5, $4, 1
beq $5, $0, 1f
nop
lw $5, 0($4)
1: ctc1 $5, $31
jr $ra
li $2, 0
#endif

View File

@@ -1,130 +0,0 @@
#if !defined(_SOFT_FLOAT) && !defined(__NO_FPRS__)
.global feclearexcept
.type feclearexcept,@function
feclearexcept:
andis. 3,3,0x3e00
/* if (r3 & FE_INVALID) r3 |= all_invalid_flags */
andis. 0,3,0x2000
stwu 1,-16(1)
beq- 0,1f
oris 3,3,0x01f8
ori 3,3,0x0700
1:
/*
* note: fpscr contains various fpu status and control
* flags and we dont check if r3 may alter other flags
* than the exception related ones
* ufpscr &= ~r3
*/
mffs 0
stfd 0,8(1)
lwz 9,12(1)
andc 9,9,3
stw 9,12(1)
lfd 0,8(1)
mtfsf 255,0
/* return 0 */
li 3,0
addi 1,1,16
blr
.global feraiseexcept
.type feraiseexcept,@function
feraiseexcept:
andis. 3,3,0x3e00
/* if (r3 & FE_INVALID) r3 |= software_invalid_flag */
andis. 0,3,0x2000
stwu 1,-16(1)
beq- 0,1f
ori 3,3,0x0400
1:
/* fpscr |= r3 */
mffs 0
stfd 0,8(1)
lwz 9,12(1)
or 9,9,3
stw 9,12(1)
lfd 0,8(1)
mtfsf 255,0
/* return 0 */
li 3,0
addi 1,1,16
blr
.global fetestexcept
.type fetestexcept,@function
fetestexcept:
andis. 3,3,0x3e00
/* return r3 & fpscr */
stwu 1,-16(1)
mffs 0
stfd 0,8(1)
lwz 9,12(1)
addi 1,1,16
and 3,3,9
blr
.global fegetround
.type fegetround,@function
fegetround:
/* return fpscr & 3 */
stwu 1,-16(1)
mffs 0
stfd 0,8(1)
lwz 3,12(1)
addi 1,1,16
clrlwi 3,3,30
blr
.global __fesetround
.hidden __fesetround
.type __fesetround,@function
__fesetround:
/*
* note: invalid input is not checked, r3 < 4 must hold
* fpscr = (fpscr & -4U) | r3
*/
stwu 1,-16(1)
mffs 0
stfd 0,8(1)
lwz 9,12(1)
clrrwi 9,9,2
or 9,9,3
stw 9,12(1)
lfd 0,8(1)
mtfsf 255,0
/* return 0 */
li 3,0
addi 1,1,16
blr
.global fegetenv
.type fegetenv,@function
fegetenv:
/* *r3 = fpscr */
mffs 0
stfd 0,0(3)
/* return 0 */
li 3,0
blr
.global fesetenv
.type fesetenv,@function
fesetenv:
cmpwi 3, -1
bne 1f
mflr 4
bl 2f
.zero 8
2: mflr 3
mtlr 4
1: /* fpscr = *r3 */
lfd 0,0(3)
mtfsf 255,0
/* return 0 */
li 3,0
blr
#endif

View File

@@ -1,56 +0,0 @@
#ifdef __riscv_flen
.global feclearexcept
.type feclearexcept, %function
feclearexcept:
csrc fflags, a0
li a0, 0
ret
.global feraiseexcept
.type feraiseexcept, %function
feraiseexcept:
csrs fflags, a0
li a0, 0
ret
.global fetestexcept
.type fetestexcept, %function
fetestexcept:
frflags t0
and a0, t0, a0
ret
.global fegetround
.type fegetround, %function
fegetround:
frrm a0
ret
.global __fesetround
.type __fesetround, %function
__fesetround:
fsrm t0, a0
li a0, 0
ret
.global fegetenv
.type fegetenv, %function
fegetenv:
frcsr t0
sw t0, 0(a0)
li a0, 0
ret
.global fesetenv
.type fesetenv, %function
fesetenv:
li t2, -1
li t1, 0
beq a0, t2, 1f
lw t1, 0(a0)
1: fscsr t1
li a0, 0
ret
#endif

View File

@@ -1,81 +0,0 @@
#if __SH_FPU_ANY__ || __SH4__
.global fegetround
.type fegetround, @function
fegetround:
sts fpscr, r0
rts
and #3, r0
.global __fesetround
.hidden __fesetround
.type __fesetround, @function
__fesetround:
sts fpscr, r0
mov #-4, r1
and r1, r0
or r4, r0
lds r0, fpscr
rts
mov #0, r0
.global fetestexcept
.type fetestexcept, @function
fetestexcept:
sts fpscr, r0
and r4, r0
rts
and #0x7c, r0
.global feclearexcept
.type feclearexcept, @function
feclearexcept:
mov r4, r0
and #0x7c, r0
not r0, r4
sts fpscr, r0
and r4, r0
lds r0, fpscr
rts
mov #0, r0
.global feraiseexcept
.type feraiseexcept, @function
feraiseexcept:
mov r4, r0
and #0x7c, r0
sts fpscr, r4
or r4, r0
lds r0, fpscr
rts
mov #0, r0
.global fegetenv
.type fegetenv, @function
fegetenv:
sts fpscr, r0
mov.l r0, @r4
rts
mov #0, r0
.global fesetenv
.type fesetenv, @function
fesetenv:
mov r4, r0
cmp/eq #-1, r0
bf 1f
! the default environment is complicated by the fact that we need to
! preserve the current precision bit, which we do not know a priori
sts fpscr, r0
mov #8, r1
swap.w r1, r1
bra 2f
and r1, r0
1: mov.l @r4, r0 ! non-default environment
2: lds r0, fpscr
rts
mov #0, r0
#endif

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,55 +0,0 @@
.syntax unified
.text
.global __tlsdesc_static
.hidden __tlsdesc_static
.type __tlsdesc_static,%function
__tlsdesc_static:
ldr r0,[r0]
bx lr
.global __tlsdesc_dynamic
.hidden __tlsdesc_dynamic
.type __tlsdesc_dynamic,%function
__tlsdesc_dynamic:
push {r2,r3,ip,lr}
ldr r1,[r0]
ldr r2,[r1,#4] // r2 = offset
ldr r1,[r1] // r1 = modid
#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
mrc p15,0,r0,c13,c0,3
#else
ldr r0,1f
add r0,r0,pc
ldr r0,[r0]
2:
#if __ARM_ARCH >= 5
blx r0 // r0 = tp
#else
#if __thumb__
add lr,pc,#1
#else
mov lr,pc
#endif
bx r0
#endif
#endif
ldr r3,[r0,#-4] // r3 = dtv
ldr ip,[r3,r1,LSL #2]
sub r0,ip,r0
add r0,r0,r2 // r0 = r3[r1]-r0+r2
#if __ARM_ARCH >= 5
pop {r2,r3,ip,pc}
#else
pop {r2,r3,ip,lr}
bx lr
#endif
#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
#else
.align 2
1: .word __a_gettp_ptr - 2b
#endif

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,3 +0,0 @@
#define __dlsym __dlsym_redir_time64
#define dlsym __dlsym_time64
#include "dlsym.s"

View File

@@ -1,50 +0,0 @@
.syntax unified
.global _longjmp
.global longjmp
.type _longjmp,%function
.type longjmp,%function
_longjmp:
longjmp:
mov ip,r0
movs r0,r1
moveq r0,#1
ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
ldmia ip!, {r2,lr}
mov sp,r2
adr r1,1f
ldr r2,1f
ldr r1,[r1,r2]
#if __ARM_ARCH < 8
tst r1,#0x260
beq 3f
// HWCAP_ARM_FPA
tst r1,#0x20
beq 2f
ldc p2, cr4, [ip], #48
#endif
2: tst r1,#0x40
beq 2f
.fpu vfp
vldmia ip!, {d8-d15}
.fpu softvfp
.eabi_attribute 10, 0
.eabi_attribute 27, 0
#if __ARM_ARCH < 8
// HWCAP_ARM_IWMMXT
2: tst r1,#0x200
beq 3f
ldcl p1, cr10, [ip], #8
ldcl p1, cr11, [ip], #8
ldcl p1, cr12, [ip], #8
ldcl p1, cr13, [ip], #8
ldcl p1, cr14, [ip], #8
ldcl p1, cr15, [ip], #8
#endif
2:
3: bx lr
.hidden __hwcap
.align 2
1: .word __hwcap-1b

View File

@@ -1,52 +0,0 @@
.syntax unified
.global __setjmp
.global _setjmp
.global setjmp
.type __setjmp,%function
.type _setjmp,%function
.type setjmp,%function
__setjmp:
_setjmp:
setjmp:
mov ip,r0
stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
mov r2,sp
stmia ip!,{r2,lr}
mov r0,#0
adr r1,1f
ldr r2,1f
ldr r1,[r1,r2]
#if __ARM_ARCH < 8
tst r1,#0x260
beq 3f
// HWCAP_ARM_FPA
tst r1,#0x20
beq 2f
stc p2, cr4, [ip], #48
#endif
2: tst r1,#0x40
beq 2f
.fpu vfp
vstmia ip!, {d8-d15}
.fpu softvfp
.eabi_attribute 10, 0
.eabi_attribute 27, 0
#if __ARM_ARCH < 8
// HWCAP_ARM_IWMMXT
2: tst r1,#0x200
beq 3f
stcl p1, cr10, [ip], #8
stcl p1, cr11, [ip], #8
stcl p1, cr12, [ip], #8
stcl p1, cr13, [ip], #8
stcl p1, cr14, [ip], #8
stcl p1, cr15, [ip], #8
#endif
2:
3: bx lr
.hidden __hwcap
.align 2
1: .word __hwcap-1b

View File

@@ -1,34 +0,0 @@
.set noreorder
.global _longjmp
.global longjmp
.type _longjmp,@function
.type longjmp,@function
_longjmp:
longjmp:
move $2, $5
bne $2, $0, 1f
nop
addu $2, $2, 1
1:
#ifndef __mips_soft_float
l.d $f20, 56($4)
l.d $f22, 64($4)
l.d $f24, 72($4)
l.d $f26, 80($4)
l.d $f28, 88($4)
l.d $f30, 96($4)
#endif
lw $ra, 0($4)
lw $sp, 4($4)
lw $16, 8($4)
lw $17, 12($4)
lw $18, 16($4)
lw $19, 20($4)
lw $20, 24($4)
lw $21, 28($4)
lw $22, 32($4)
lw $23, 36($4)
lw $30, 40($4)
jr $ra
lw $28, 44($4)

View File

@@ -1,33 +0,0 @@
.set noreorder
.global __setjmp
.global _setjmp
.global setjmp
.type __setjmp,@function
.type _setjmp,@function
.type setjmp,@function
__setjmp:
_setjmp:
setjmp:
sw $ra, 0($4)
sw $sp, 4($4)
sw $16, 8($4)
sw $17, 12($4)
sw $18, 16($4)
sw $19, 20($4)
sw $20, 24($4)
sw $21, 28($4)
sw $22, 32($4)
sw $23, 36($4)
sw $30, 40($4)
sw $28, 44($4)
#ifndef __mips_soft_float
s.d $f20, 56($4)
s.d $f22, 64($4)
s.d $f24, 72($4)
s.d $f26, 80($4)
s.d $f28, 88($4)
s.d $f30, 96($4)
#endif
jr $ra
li $2, 0

View File

@@ -1,37 +0,0 @@
.set noreorder
.global _longjmp
.global longjmp
.type _longjmp,@function
.type longjmp,@function
_longjmp:
longjmp:
move $2, $5
bne $2, $0, 1f
nop
daddu $2, $2, 1
1:
#ifndef __mips_soft_float
ldc1 $24, 96($4)
ldc1 $25, 104($4)
ldc1 $26, 112($4)
ldc1 $27, 120($4)
ldc1 $28, 128($4)
ldc1 $29, 136($4)
ldc1 $30, 144($4)
ldc1 $31, 152($4)
#endif
ld $ra, 0($4)
ld $sp, 8($4)
ld $gp, 16($4)
ld $16, 24($4)
ld $17, 32($4)
ld $18, 40($4)
ld $19, 48($4)
ld $20, 56($4)
ld $21, 64($4)
ld $22, 72($4)
ld $23, 80($4)
ld $30, 88($4)
jr $ra
nop

View File

@@ -1,34 +0,0 @@
.set noreorder
.global __setjmp
.global _setjmp
.global setjmp
.type __setjmp,@function
.type _setjmp,@function
.type setjmp,@function
__setjmp:
_setjmp:
setjmp:
sd $ra, 0($4)
sd $sp, 8($4)
sd $gp, 16($4)
sd $16, 24($4)
sd $17, 32($4)
sd $18, 40($4)
sd $19, 48($4)
sd $20, 56($4)
sd $21, 64($4)
sd $22, 72($4)
sd $23, 80($4)
sd $30, 88($4)
#ifndef __mips_soft_float
sdc1 $24, 96($4)
sdc1 $25, 104($4)
sdc1 $26, 112($4)
sdc1 $27, 120($4)
sdc1 $28, 128($4)
sdc1 $29, 136($4)
sdc1 $30, 144($4)
sdc1 $31, 152($4)
#endif
jr $ra
li $2, 0

View File

@@ -1,36 +0,0 @@
.set noreorder
.global _longjmp
.global longjmp
.type _longjmp,@function
.type longjmp,@function
_longjmp:
longjmp:
move $2, $5
bne $2, $0, 1f
nop
addu $2, $2, 1
1:
#ifndef __mips_soft_float
ldc1 $24, 96($4)
ldc1 $25, 104($4)
ldc1 $26, 112($4)
ldc1 $27, 120($4)
ldc1 $28, 128($4)
ldc1 $29, 136($4)
ldc1 $30, 144($4)
ldc1 $31, 152($4)
#endif
ld $ra, 0($4)
ld $sp, 8($4)
ld $gp, 16($4)
ld $16, 24($4)
ld $17, 32($4)
ld $18, 40($4)
ld $19, 48($4)
ld $20, 56($4)
ld $21, 64($4)
ld $22, 72($4)
ld $23, 80($4)
ld $30, 88($4)
jr $ra
nop

View File

@@ -1,34 +0,0 @@
.set noreorder
.global __setjmp
.global _setjmp
.global setjmp
.type __setjmp,@function
.type _setjmp,@function
.type setjmp,@function
__setjmp:
_setjmp:
setjmp:
sd $ra, 0($4)
sd $sp, 8($4)
sd $gp, 16($4)
sd $16, 24($4)
sd $17, 32($4)
sd $18, 40($4)
sd $19, 48($4)
sd $20, 56($4)
sd $21, 64($4)
sd $22, 72($4)
sd $23, 80($4)
sd $30, 88($4)
#ifndef __mips_soft_float
sdc1 $24, 96($4)
sdc1 $25, 104($4)
sdc1 $26, 112($4)
sdc1 $27, 120($4)
sdc1 $28, 128($4)
sdc1 $29, 136($4)
sdc1 $30, 144($4)
sdc1 $31, 152($4)
#endif
jr $ra
li $2, 0

View File

@@ -1,99 +0,0 @@
.global _longjmp
.global longjmp
.type _longjmp,@function
.type longjmp,@function
_longjmp:
longjmp:
/*
* void longjmp(jmp_buf env, int val);
* put val into return register and restore the env saved in setjmp
* if val(r4) is 0, put 1 there.
*/
/* 0) move old return address into r0 */
lwz 0, 0(3)
/* 1) put it into link reg */
mtlr 0
/* 2 ) restore stack ptr */
lwz 1, 4(3)
/* 3) restore control reg */
lwz 0, 8(3)
mtcr 0
/* 4) restore r14-r31 */
lwz 14, 12(3)
lwz 15, 16(3)
lwz 16, 20(3)
lwz 17, 24(3)
lwz 18, 28(3)
lwz 19, 32(3)
lwz 20, 36(3)
lwz 21, 40(3)
lwz 22, 44(3)
lwz 23, 48(3)
lwz 24, 52(3)
lwz 25, 56(3)
lwz 26, 60(3)
lwz 27, 64(3)
lwz 28, 68(3)
lwz 29, 72(3)
lwz 30, 76(3)
lwz 31, 80(3)
#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
mflr 0
bl 1f
.hidden __hwcap
.long __hwcap-.
1: mflr 4
lwz 5, 0(4)
lwzx 4, 4, 5
andis. 4, 4, 0x80
beq 1f
.long 0x11c35b01 /* evldd 14,88(3) */
.long 0x11e36301 /* ... */
.long 0x12036b01
.long 0x12237301
.long 0x12437b01
.long 0x12638301
.long 0x12838b01
.long 0x12a39301
.long 0x12c39b01
.long 0x12e3a301
.long 0x1303ab01
.long 0x1323b301
.long 0x1343bb01
.long 0x1363c301
.long 0x1383cb01
.long 0x13a3d301
.long 0x13c3db01
.long 0x13e3e301 /* evldd 31,224(3) */
.long 0x11a3eb01 /* evldd 13,232(3) */
1: mtlr 0
#else
lfd 14,88(3)
lfd 15,96(3)
lfd 16,104(3)
lfd 17,112(3)
lfd 18,120(3)
lfd 19,128(3)
lfd 20,136(3)
lfd 21,144(3)
lfd 22,152(3)
lfd 23,160(3)
lfd 24,168(3)
lfd 25,176(3)
lfd 26,184(3)
lfd 27,192(3)
lfd 28,200(3)
lfd 29,208(3)
lfd 30,216(3)
lfd 31,224(3)
#endif
/* 5) put val into return reg r3 */
mr 3, 4
/* 6) check if return value is 0, make it 1 in that case */
cmpwi cr7, 4, 0
bne cr7, 1f
li 3, 1
1:
blr

View File

@@ -1,93 +0,0 @@
.global ___setjmp
.hidden ___setjmp
.global __setjmp
.global _setjmp
.global setjmp
.type __setjmp,@function
.type _setjmp,@function
.type setjmp,@function
___setjmp:
__setjmp:
_setjmp:
setjmp:
/* 0) store IP int 0, then into the jmpbuf pointed to by r3 (first arg) */
mflr 0
stw 0, 0(3)
/* 1) store reg1 (SP) */
stw 1, 4(3)
/* 2) store cr */
mfcr 0
stw 0, 8(3)
/* 3) store r14-31 */
stw 14, 12(3)
stw 15, 16(3)
stw 16, 20(3)
stw 17, 24(3)
stw 18, 28(3)
stw 19, 32(3)
stw 20, 36(3)
stw 21, 40(3)
stw 22, 44(3)
stw 23, 48(3)
stw 24, 52(3)
stw 25, 56(3)
stw 26, 60(3)
stw 27, 64(3)
stw 28, 68(3)
stw 29, 72(3)
stw 30, 76(3)
stw 31, 80(3)
#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
mflr 0
bl 1f
.hidden __hwcap
.long __hwcap-.
1: mflr 4
lwz 5, 0(4)
lwzx 4, 4, 5
andis. 4, 4, 0x80
beq 1f
.long 0x11c35b21 /* evstdd 14,88(3) */
.long 0x11e36321 /* ... */
.long 0x12036b21
.long 0x12237321
.long 0x12437b21
.long 0x12638321
.long 0x12838b21
.long 0x12a39321
.long 0x12c39b21
.long 0x12e3a321
.long 0x1303ab21
.long 0x1323b321
.long 0x1343bb21
.long 0x1363c321
.long 0x1383cb21
.long 0x13a3d321
.long 0x13c3db21
.long 0x13e3e321 /* evstdd 31,224(3) */
.long 0x11a3eb21 /* evstdd 13,232(3) */
1: mtlr 0
#else
stfd 14,88(3)
stfd 15,96(3)
stfd 16,104(3)
stfd 17,112(3)
stfd 18,120(3)
stfd 19,128(3)
stfd 20,136(3)
stfd 21,144(3)
stfd 22,152(3)
stfd 23,160(3)
stfd 24,168(3)
stfd 25,176(3)
stfd 26,184(3)
stfd 27,192(3)
stfd 28,200(3)
stfd 29,208(3)
stfd 30,216(3)
stfd 31,224(3)
#endif
/* 4) set return value to 0 */
li 3, 0
/* 5) return */
blr

View File

@@ -1,42 +0,0 @@
.global __longjmp
.global _longjmp
.global longjmp
.type __longjmp, %function
.type _longjmp, %function
.type longjmp, %function
__longjmp:
_longjmp:
longjmp:
ld s0, 0(a0)
ld s1, 8(a0)
ld s2, 16(a0)
ld s3, 24(a0)
ld s4, 32(a0)
ld s5, 40(a0)
ld s6, 48(a0)
ld s7, 56(a0)
ld s8, 64(a0)
ld s9, 72(a0)
ld s10, 80(a0)
ld s11, 88(a0)
ld sp, 96(a0)
ld ra, 104(a0)
#ifndef __riscv_float_abi_soft
fld fs0, 112(a0)
fld fs1, 120(a0)
fld fs2, 128(a0)
fld fs3, 136(a0)
fld fs4, 144(a0)
fld fs5, 152(a0)
fld fs6, 160(a0)
fld fs7, 168(a0)
fld fs8, 176(a0)
fld fs9, 184(a0)
fld fs10, 192(a0)
fld fs11, 200(a0)
#endif
seqz a0, a1
add a0, a0, a1
ret

View File

@@ -1,41 +0,0 @@
.global __setjmp
.global _setjmp
.global setjmp
.type __setjmp, %function
.type _setjmp, %function
.type setjmp, %function
__setjmp:
_setjmp:
setjmp:
sd s0, 0(a0)
sd s1, 8(a0)
sd s2, 16(a0)
sd s3, 24(a0)
sd s4, 32(a0)
sd s5, 40(a0)
sd s6, 48(a0)
sd s7, 56(a0)
sd s8, 64(a0)
sd s9, 72(a0)
sd s10, 80(a0)
sd s11, 88(a0)
sd sp, 96(a0)
sd ra, 104(a0)
#ifndef __riscv_float_abi_soft
fsd fs0, 112(a0)
fsd fs1, 120(a0)
fsd fs2, 128(a0)
fsd fs3, 136(a0)
fsd fs4, 144(a0)
fsd fs5, 152(a0)
fsd fs6, 160(a0)
fsd fs7, 168(a0)
fsd fs8, 176(a0)
fsd fs9, 184(a0)
fsd fs10, 192(a0)
fsd fs11, 200(a0)
#endif
li a0, 0
ret

View File

@@ -1,28 +0,0 @@
.global _longjmp
.global longjmp
.type _longjmp, @function
.type longjmp, @function
_longjmp:
longjmp:
mov.l @r4+, r8
mov.l @r4+, r9
mov.l @r4+, r10
mov.l @r4+, r11
mov.l @r4+, r12
mov.l @r4+, r13
mov.l @r4+, r14
mov.l @r4+, r15
lds.l @r4+, pr
#if __SH_FPU_ANY__ || __SH4__
fmov.s @r4+, fr12
fmov.s @r4+, fr13
fmov.s @r4+, fr14
fmov.s @r4+, fr15
#endif
tst r5, r5
movt r0
add r5, r0
rts
nop

View File

@@ -1,32 +0,0 @@
.global ___setjmp
.hidden ___setjmp
.global __setjmp
.global _setjmp
.global setjmp
.type __setjmp, @function
.type _setjmp, @function
.type setjmp, @function
___setjmp:
__setjmp:
_setjmp:
setjmp:
#if __SH_FPU_ANY__ || __SH4__
add #52, r4
fmov.s fr15, @-r4
fmov.s fr14, @-r4
fmov.s fr13, @-r4
fmov.s fr12, @-r4
#else
add #36, r4
#endif
sts.l pr, @-r4
mov.l r15, @-r4
mov.l r14, @-r4
mov.l r13, @-r4
mov.l r12, @-r4
mov.l r11, @-r4
mov.l r10, @-r4
mov.l r9, @-r4
mov.l r8, @-r4
rts
mov #0, r0

View File

@@ -1,186 +0,0 @@
/*
* memcpy - copy memory area
*
* Copyright (c) 2012-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* Assumptions:
*
* ARMv8-a, AArch64, unaligned accesses.
*
*/
#define dstin x0
#define src x1
#define count x2
#define dst x3
#define srcend x4
#define dstend x5
#define A_l x6
#define A_lw w6
#define A_h x7
#define B_l x8
#define B_lw w8
#define B_h x9
#define C_l x10
#define C_lw w10
#define C_h x11
#define D_l x12
#define D_h x13
#define E_l x14
#define E_h x15
#define F_l x16
#define F_h x17
#define G_l count
#define G_h dst
#define H_l src
#define H_h srcend
#define tmp1 x14
/* This implementation of memcpy uses unaligned accesses and branchless
sequences to keep the code small, simple and improve performance.
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
copies of up to 128 bytes, and large copies. The overhead of the overlap
check is negligible since it is only required for large copies.
Large copies use a software pipelined loop processing 64 bytes per iteration.
The destination pointer is 16-byte aligned to minimize unaligned accesses.
The loop tail is handled by always copying 64 bytes from the end.
*/
.global memcpy
.type memcpy,%function
memcpy:
add srcend, src, count
add dstend, dstin, count
cmp count, 128
b.hi .Lcopy_long
cmp count, 32
b.hi .Lcopy32_128
/* Small copies: 0..32 bytes. */
cmp count, 16
b.lo .Lcopy16
ldp A_l, A_h, [src]
ldp D_l, D_h, [srcend, -16]
stp A_l, A_h, [dstin]
stp D_l, D_h, [dstend, -16]
ret
/* Copy 8-15 bytes. */
.Lcopy16:
tbz count, 3, .Lcopy8
ldr A_l, [src]
ldr A_h, [srcend, -8]
str A_l, [dstin]
str A_h, [dstend, -8]
ret
.p2align 3
/* Copy 4-7 bytes. */
.Lcopy8:
tbz count, 2, .Lcopy4
ldr A_lw, [src]
ldr B_lw, [srcend, -4]
str A_lw, [dstin]
str B_lw, [dstend, -4]
ret
/* Copy 0..3 bytes using a branchless sequence. */
.Lcopy4:
cbz count, .Lcopy0
lsr tmp1, count, 1
ldrb A_lw, [src]
ldrb C_lw, [srcend, -1]
ldrb B_lw, [src, tmp1]
strb A_lw, [dstin]
strb B_lw, [dstin, tmp1]
strb C_lw, [dstend, -1]
.Lcopy0:
ret
.p2align 4
/* Medium copies: 33..128 bytes. */
.Lcopy32_128:
ldp A_l, A_h, [src]
ldp B_l, B_h, [src, 16]
ldp C_l, C_h, [srcend, -32]
ldp D_l, D_h, [srcend, -16]
cmp count, 64
b.hi .Lcopy128
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstend, -32]
stp D_l, D_h, [dstend, -16]
ret
.p2align 4
/* Copy 65..128 bytes. */
.Lcopy128:
ldp E_l, E_h, [src, 32]
ldp F_l, F_h, [src, 48]
cmp count, 96
b.ls .Lcopy96
ldp G_l, G_h, [srcend, -64]
ldp H_l, H_h, [srcend, -48]
stp G_l, G_h, [dstend, -64]
stp H_l, H_h, [dstend, -48]
.Lcopy96:
stp A_l, A_h, [dstin]
stp B_l, B_h, [dstin, 16]
stp E_l, E_h, [dstin, 32]
stp F_l, F_h, [dstin, 48]
stp C_l, C_h, [dstend, -32]
stp D_l, D_h, [dstend, -16]
ret
.p2align 4
/* Copy more than 128 bytes. */
.Lcopy_long:
/* Copy 16 bytes and then align dst to 16-byte alignment. */
ldp D_l, D_h, [src]
and tmp1, dstin, 15
bic dst, dstin, 15
sub src, src, tmp1
add count, count, tmp1 /* Count is now 16 too large. */
ldp A_l, A_h, [src, 16]
stp D_l, D_h, [dstin]
ldp B_l, B_h, [src, 32]
ldp C_l, C_h, [src, 48]
ldp D_l, D_h, [src, 64]!
subs count, count, 128 + 16 /* Test and readjust count. */
b.ls .Lcopy64_from_end
.Lloop64:
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [src, 16]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [src, 32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [src, 48]
stp D_l, D_h, [dst, 64]!
ldp D_l, D_h, [src, 64]!
subs count, count, 64
b.hi .Lloop64
/* Write the last iteration and copy 64 bytes from the end. */
.Lcopy64_from_end:
ldp E_l, E_h, [srcend, -64]
stp A_l, A_h, [dst, 16]
ldp A_l, A_h, [srcend, -48]
stp B_l, B_h, [dst, 32]
ldp B_l, B_h, [srcend, -32]
stp C_l, C_h, [dst, 48]
ldp C_l, C_h, [srcend, -16]
stp D_l, D_h, [dst, 64]
stp E_l, E_h, [dstend, -64]
stp A_l, A_h, [dstend, -48]
stp B_l, B_h, [dstend, -32]
stp C_l, C_h, [dstend, -16]
ret
.size memcpy,.-memcpy

View File

@@ -1,115 +0,0 @@
/*
* memset - fill memory with a constant byte
*
* Copyright (c) 2012-2020, Arm Limited.
* SPDX-License-Identifier: MIT
*/
/* Assumptions:
*
* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
*
*/
#define dstin x0
#define val x1
#define valw w1
#define count x2
#define dst x3
#define dstend x4
#define zva_val x5
.global memset
.type memset,%function
memset:
dup v0.16B, valw
add dstend, dstin, count
cmp count, 96
b.hi .Lset_long
cmp count, 16
b.hs .Lset_medium
mov val, v0.D[0]
/* Set 0..15 bytes. */
tbz count, 3, 1f
str val, [dstin]
str val, [dstend, -8]
ret
nop
1: tbz count, 2, 2f
str valw, [dstin]
str valw, [dstend, -4]
ret
2: cbz count, 3f
strb valw, [dstin]
tbz count, 1, 3f
strh valw, [dstend, -2]
3: ret
/* Set 17..96 bytes. */
.Lset_medium:
str q0, [dstin]
tbnz count, 6, .Lset96
str q0, [dstend, -16]
tbz count, 5, 1f
str q0, [dstin, 16]
str q0, [dstend, -32]
1: ret
.p2align 4
/* Set 64..96 bytes. Write 64 bytes from the start and
32 bytes from the end. */
.Lset96:
str q0, [dstin, 16]
stp q0, q0, [dstin, 32]
stp q0, q0, [dstend, -32]
ret
.p2align 4
.Lset_long:
and valw, valw, 255
bic dst, dstin, 15
str q0, [dstin]
cmp count, 160
ccmp valw, 0, 0, hs
b.ne .Lno_zva
#ifndef SKIP_ZVA_CHECK
mrs zva_val, dczid_el0
and zva_val, zva_val, 31
cmp zva_val, 4 /* ZVA size is 64 bytes. */
b.ne .Lno_zva
#endif
str q0, [dst, 16]
stp q0, q0, [dst, 32]
bic dst, dst, 63
sub count, dstend, dst /* Count is now 64 too large. */
sub count, count, 128 /* Adjust count and bias for loop. */
.p2align 4
.Lzva_loop:
add dst, dst, 64
dc zva, dst
subs count, count, 64
b.hi .Lzva_loop
stp q0, q0, [dstend, -64]
stp q0, q0, [dstend, -32]
ret
.Lno_zva:
sub count, dstend, dst /* Count is 16 too large. */
sub dst, dst, 16 /* Dst is biased by -32. */
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
.Lno_zva_loop:
stp q0, q0, [dst, 32]
stp q0, q0, [dst, 64]!
subs count, count, 64
b.hi .Lno_zva_loop
stp q0, q0, [dstend, -64]
stp q0, q0, [dstend, -32]
ret
.size memset,.-memset

View File

@@ -1,479 +0,0 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Optimized memcpy() for ARM.
*
* note that memcpy() always returns the destination pointer,
* so we have to preserve R0.
*/
/*
* This file has been modified from the original for use in musl libc.
* The main changes are: addition of .type memcpy,%function to make the
* code safely callable from thumb mode, adjusting the return
* instructions to be compatible with pre-thumb ARM cpus, removal of
* prefetch code that is not compatible with older cpus and support for
* building as thumb 2 and big-endian.
*/
.syntax unified
.global memcpy
.type memcpy,%function
memcpy:
/* The stack must always be 64-bits aligned to be compliant with the
* ARM ABI. Since we have to save R0, we might as well save R4
* which we can use for better pipelining of the reads below
*/
.fnstart
.save {r0, r4, lr}
stmfd sp!, {r0, r4, lr}
/* Making room for r5-r11 which will be spilled later */
.pad #28
sub sp, sp, #28
/* it simplifies things to take care of len<4 early */
cmp r2, #4
blo copy_last_3_and_return
/* compute the offset to align the source
* offset = (4-(src&3))&3 = -src & 3
*/
rsb r3, r1, #0
ands r3, r3, #3
beq src_aligned
/* align source to 32 bits. We need to insert 2 instructions between
* a ldr[b|h] and str[b|h] because byte and half-word instructions
* stall 2 cycles.
*/
movs r12, r3, lsl #31
sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
ldrbmi r3, [r1], #1
ldrbcs r4, [r1], #1
ldrbcs r12,[r1], #1
strbmi r3, [r0], #1
strbcs r4, [r0], #1
strbcs r12,[r0], #1
src_aligned:
/* see if src and dst are aligned together (congruent) */
eor r12, r0, r1
tst r12, #3
bne non_congruent
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
* frame. Don't update sp.
*/
stmea sp, {r5-r11}
/* align the destination to a cache-line */
rsb r3, r0, #0
ands r3, r3, #0x1C
beq congruent_aligned32
cmp r3, r2
andhi r3, r2, #0x1C
/* conditionnaly copies 0 to 7 words (length in r3) */
movs r12, r3, lsl #28
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
ldmmi r1!, {r8, r9} /* 8 bytes */
stmcs r0!, {r4, r5, r6, r7}
stmmi r0!, {r8, r9}
tst r3, #0x4
ldrne r10,[r1], #4 /* 4 bytes */
strne r10,[r0], #4
sub r2, r2, r3
congruent_aligned32:
/*
* here source is aligned to 32 bytes.
*/
cached_aligned32:
subs r2, r2, #32
blo less_than_32_left
/*
* We preload a cache-line up to 64 bytes ahead. On the 926, this will
* stall only until the requested world is fetched, but the linefill
* continues in the the background.
* While the linefill is going, we write our previous cache-line
* into the write-buffer (which should have some free space).
* When the linefill is done, the writebuffer will
* start dumping its content into memory
*
* While all this is going, we then load a full cache line into
* 8 registers, this cache line should be in the cache by now
* (or partly in the cache).
*
* This code should work well regardless of the source/dest alignment.
*
*/
/* Align the preload register to a cache-line because the cpu does
* "critical word first" (the first word requested is loaded first).
*/
@ bic r12, r1, #0x1F
@ add r12, r12, #64
1: ldmia r1!, { r4-r11 }
subs r2, r2, #32
/*
* NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
* for ARM9 preload will not be safely guarded by the preceding subs.
* When it is safely guarded the only possibility to have SIGSEGV here
* is because the caller overstates the length.
*/
@ ldrhi r3, [r12], #32 /* cheap ARM9 preload */
stmia r0!, { r4-r11 }
bhs 1b
add r2, r2, #32
less_than_32_left:
/*
* less than 32 bytes left at this point (length in r2)
*/
/* skip all this if there is nothing to do, which should
* be a common case (if not executed the code below takes
* about 16 cycles)
*/
tst r2, #0x1F
beq 1f
/* conditionnaly copies 0 to 31 bytes */
movs r12, r2, lsl #28
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
ldmmi r1!, {r8, r9} /* 8 bytes */
stmcs r0!, {r4, r5, r6, r7}
stmmi r0!, {r8, r9}
movs r12, r2, lsl #30
ldrcs r3, [r1], #4 /* 4 bytes */
ldrhmi r4, [r1], #2 /* 2 bytes */
strcs r3, [r0], #4
strhmi r4, [r0], #2
tst r2, #0x1
ldrbne r3, [r1] /* last byte */
strbne r3, [r0]
/* we're done! restore everything and return */
1: ldmfd sp!, {r5-r11}
ldmfd sp!, {r0, r4, lr}
bx lr
/********************************************************************/
non_congruent:
/*
* here source is aligned to 4 bytes
* but destination is not.
*
* in the code below r2 is the number of bytes read
* (the number of bytes written is always smaller, because we have
* partial words in the shift queue)
*/
cmp r2, #4
blo copy_last_3_and_return
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
* frame. Don't update sp.
*/
stmea sp, {r5-r11}
/* compute shifts needed to align src to dest */
rsb r5, r0, #0
and r5, r5, #3 /* r5 = # bytes in partial words */
mov r12, r5, lsl #3 /* r12 = right */
rsb lr, r12, #32 /* lr = left */
/* read the first word */
ldr r3, [r1], #4
sub r2, r2, #4
/* write a partial word (0 to 3 bytes), such that destination
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
*/
movs r5, r5, lsl #31
#if __ARMEB__
movmi r3, r3, ror #24
strbmi r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
#endif
cmp r2, #4
blo partial_word_tail
#if __ARMEB__
mov r3, r3, lsr r12
mov r3, r3, lsl r12
#endif
/* Align destination to 32 bytes (cache line boundary) */
1: tst r0, #0x1c
beq 2f
ldr r5, [r1], #4
sub r2, r2, #4
#if __ARMEB__
mov r4, r5, lsr lr
orr r4, r4, r3
mov r3, r5, lsl r12
#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
blo partial_word_tail
/* copy 32 bytes at a time */
2: subs r2, r2, #32
blo less_than_thirtytwo
/* Use immediate mode for the shifts, because there is an extra cycle
* for register shifts, which could account for up to 50% of
* performance hit.
*/
cmp r12, #24
beq loop24
cmp r12, #8
beq loop8
loop16:
ldr r12, [r1], #4
1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
#if __ARMEB__
orr r3, r3, r4, lsr #16
mov r4, r4, lsl #16
orr r4, r4, r5, lsr #16
mov r5, r5, lsl #16
orr r5, r5, r6, lsr #16
mov r6, r6, lsl #16
orr r6, r6, r7, lsr #16
mov r7, r7, lsl #16
orr r7, r7, r8, lsr #16
mov r8, r8, lsl #16
orr r8, r8, r9, lsr #16
mov r9, r9, lsl #16
orr r9, r9, r10, lsr #16
mov r10, r10, lsl #16
orr r10, r10, r11, lsr #16
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsl #16
#else
orr r3, r3, r4, lsl #16
mov r4, r4, lsr #16
orr r4, r4, r5, lsl #16
mov r5, r5, lsr #16
orr r5, r5, r6, lsl #16
mov r6, r6, lsr #16
orr r6, r6, r7, lsl #16
mov r7, r7, lsr #16
orr r7, r7, r8, lsl #16
mov r8, r8, lsr #16
orr r8, r8, r9, lsl #16
mov r9, r9, lsr #16
orr r9, r9, r10, lsl #16
mov r10, r10, lsr #16
orr r10, r10, r11, lsl #16
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #16
#endif
bhs 1b
b less_than_thirtytwo
loop8:
ldr r12, [r1], #4
1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
#if __ARMEB__
orr r3, r3, r4, lsr #24
mov r4, r4, lsl #8
orr r4, r4, r5, lsr #24
mov r5, r5, lsl #8
orr r5, r5, r6, lsr #24
mov r6, r6, lsl #8
orr r6, r6, r7, lsr #24
mov r7, r7, lsl #8
orr r7, r7, r8, lsr #24
mov r8, r8, lsl #8
orr r8, r8, r9, lsr #24
mov r9, r9, lsl #8
orr r9, r9, r10, lsr #24
mov r10, r10, lsl #8
orr r10, r10, r11, lsr #24
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsl #8
#else
orr r3, r3, r4, lsl #24
mov r4, r4, lsr #8
orr r4, r4, r5, lsl #24
mov r5, r5, lsr #8
orr r5, r5, r6, lsl #24
mov r6, r6, lsr #8
orr r6, r6, r7, lsl #24
mov r7, r7, lsr #8
orr r7, r7, r8, lsl #24
mov r8, r8, lsr #8
orr r8, r8, r9, lsl #24
mov r9, r9, lsr #8
orr r9, r9, r10, lsl #24
mov r10, r10, lsr #8
orr r10, r10, r11, lsl #24
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #8
#endif
bhs 1b
b less_than_thirtytwo
loop24:
ldr r12, [r1], #4
1: mov r4, r12
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
subs r2, r2, #32
ldrhs r12, [r1], #4
#if __ARMEB__
orr r3, r3, r4, lsr #8
mov r4, r4, lsl #24
orr r4, r4, r5, lsr #8
mov r5, r5, lsl #24
orr r5, r5, r6, lsr #8
mov r6, r6, lsl #24
orr r6, r6, r7, lsr #8
mov r7, r7, lsl #24
orr r7, r7, r8, lsr #8
mov r8, r8, lsl #24
orr r8, r8, r9, lsr #8
mov r9, r9, lsl #24
orr r9, r9, r10, lsr #8
mov r10, r10, lsl #24
orr r10, r10, r11, lsr #8
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsl #24
#else
orr r3, r3, r4, lsl #8
mov r4, r4, lsr #24
orr r4, r4, r5, lsl #8
mov r5, r5, lsr #24
orr r5, r5, r6, lsl #8
mov r6, r6, lsr #24
orr r6, r6, r7, lsl #8
mov r7, r7, lsr #24
orr r7, r7, r8, lsl #8
mov r8, r8, lsr #24
orr r8, r8, r9, lsl #8
mov r9, r9, lsr #24
orr r9, r9, r10, lsl #8
mov r10, r10, lsr #24
orr r10, r10, r11, lsl #8
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
mov r3, r11, lsr #24
#endif
bhs 1b
less_than_thirtytwo:
/* copy the last 0 to 31 bytes of the source */
rsb r12, lr, #32 /* we corrupted r12, recompute it */
add r2, r2, #32
cmp r2, #4
blo partial_word_tail
1: ldr r5, [r1], #4
sub r2, r2, #4
#if __ARMEB__
mov r4, r5, lsr lr
orr r4, r4, r3
mov r3, r5, lsl r12
#else
mov r4, r5, lsl lr
orr r4, r4, r3
mov r3, r5, lsr r12
#endif
str r4, [r0], #4
cmp r2, #4
bhs 1b
partial_word_tail:
/* we have a partial word in the input buffer */
movs r5, lr, lsl #(31-3)
#if __ARMEB__
movmi r3, r3, ror #24
strbmi r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
movcs r3, r3, ror #24
strbcs r3, [r0], #1
#else
strbmi r3, [r0], #1
movmi r3, r3, lsr #8
strbcs r3, [r0], #1
movcs r3, r3, lsr #8
strbcs r3, [r0], #1
#endif
/* Refill spilled registers from the stack. Don't update sp. */
ldmfd sp, {r5-r11}
copy_last_3_and_return:
movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
ldrbmi r2, [r1], #1
ldrbcs r3, [r1], #1
ldrbcs r12,[r1]
strbmi r2, [r0], #1
strbcs r3, [r0], #1
strbcs r12,[r0]
/* we're done! restore sp and spilled registers and return */
add sp, sp, #28
ldmfd sp!, {r0, r4, lr}
bx lr

View File

@@ -1,128 +0,0 @@
#include <string.h>
#include <stdint.h>
#include <endian.h>
void *memcpy(void *restrict dest, const void *restrict src, size_t n)
{
#if defined(__wasm_bulk_memory__)
if (n > BULK_MEMORY_THRESHOLD)
return __builtin_memcpy(dest, src, n);
#endif
unsigned char *d = dest;
const unsigned char *s = src;
#ifdef __GNUC__
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define LS >>
#define RS <<
#else
#define LS <<
#define RS >>
#endif
typedef uint32_t __attribute__((__may_alias__)) u32;
uint32_t w, x;
for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
if ((uintptr_t)d % 4 == 0) {
for (; n>=16; s+=16, d+=16, n-=16) {
*(u32 *)(d+0) = *(u32 *)(s+0);
*(u32 *)(d+4) = *(u32 *)(s+4);
*(u32 *)(d+8) = *(u32 *)(s+8);
*(u32 *)(d+12) = *(u32 *)(s+12);
}
if (n&8) {
*(u32 *)(d+0) = *(u32 *)(s+0);
*(u32 *)(d+4) = *(u32 *)(s+4);
d += 8; s += 8;
}
if (n&4) {
*(u32 *)(d+0) = *(u32 *)(s+0);
d += 4; s += 4;
}
if (n&2) {
*d++ = *s++; *d++ = *s++;
}
if (n&1) {
*d = *s;
}
return dest;
}
if (n >= 32) switch ((uintptr_t)d % 4) {
case 1:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
n -= 3;
for (; n>=17; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+1);
*(u32 *)(d+0) = (w LS 24) | (x RS 8);
w = *(u32 *)(s+5);
*(u32 *)(d+4) = (x LS 24) | (w RS 8);
x = *(u32 *)(s+9);
*(u32 *)(d+8) = (w LS 24) | (x RS 8);
w = *(u32 *)(s+13);
*(u32 *)(d+12) = (x LS 24) | (w RS 8);
}
break;
case 2:
w = *(u32 *)s;
*d++ = *s++;
*d++ = *s++;
n -= 2;
for (; n>=18; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+2);
*(u32 *)(d+0) = (w LS 16) | (x RS 16);
w = *(u32 *)(s+6);
*(u32 *)(d+4) = (x LS 16) | (w RS 16);
x = *(u32 *)(s+10);
*(u32 *)(d+8) = (w LS 16) | (x RS 16);
w = *(u32 *)(s+14);
*(u32 *)(d+12) = (x LS 16) | (w RS 16);
}
break;
case 3:
w = *(u32 *)s;
*d++ = *s++;
n -= 1;
for (; n>=19; s+=16, d+=16, n-=16) {
x = *(u32 *)(s+3);
*(u32 *)(d+0) = (w LS 8) | (x RS 24);
w = *(u32 *)(s+7);
*(u32 *)(d+4) = (x LS 8) | (w RS 24);
x = *(u32 *)(s+11);
*(u32 *)(d+8) = (w LS 8) | (x RS 24);
w = *(u32 *)(s+15);
*(u32 *)(d+12) = (x LS 8) | (w RS 24);
}
break;
}
if (n&16) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&8) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&4) {
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
}
if (n&2) {
*d++ = *s++; *d++ = *s++;
}
if (n&1) {
*d = *s;
}
return dest;
#endif
for (; n; n--) *d++ = *s++;
return dest;
}

View File

@@ -1,46 +0,0 @@
#include <string.h>
#include <stdint.h>
#ifdef __GNUC__
typedef __attribute__((__may_alias__)) size_t WT;
#define WS (sizeof(WT))
#endif
void *memmove(void *dest, const void *src, size_t n)
{
#if defined(__wasm_bulk_memory__)
if (n > BULK_MEMORY_THRESHOLD)
return __builtin_memmove(dest, src, n);
#endif
char *d = dest;
const char *s = src;
if (d==s) return d;
if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);
if (d<s) {
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
while ((uintptr_t)d % WS) {
if (!n--) return dest;
*d++ = *s++;
}
for (; n>=WS; n-=WS, d+=WS, s+=WS) *(WT *)d = *(WT *)s;
}
#endif
for (; n; n--) *d++ = *s++;
} else {
#ifdef __GNUC__
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
while ((uintptr_t)(d+n) % WS) {
if (!n--) return dest;
d[n] = s[n];
}
while (n>=WS) n-=WS, *(WT *)(d+n) = *(WT *)(s+n);
}
#endif
while (n) n--, d[n] = s[n];
}
return dest;
}

View File

@@ -45,29 +45,21 @@ pub const File = union(enum) {
/// Encodes symbol rank so that the following ordering applies:
/// * strong in object
/// * weak in object
/// * tentative in object
/// * strong in archive/dylib
/// * weak in object
/// * weak in archive/dylib
/// * tentative in object
/// * tentative in archive
/// * unclaimed
/// Ties are broken by file priority.
pub fn getSymbolRank(file: File, args: struct {
archive: bool = false,
weak: bool = false,
tentative: bool = false,
}) u32 {
if (file != .dylib and !args.archive) {
const base: u32 = blk: {
if (args.tentative) break :blk 3;
break :blk if (args.weak) 2 else 1;
};
return (base << 16) + file.getIndex();
}
const base: u32 = blk: {
if (args.tentative) break :blk 3;
break :blk if (args.weak) 2 else 1;
};
return base + (file.getIndex() << 24);
const archive_or_dylib = @as(u32, @intFromBool(file == .dylib or args.archive)) << 29;
const strength: u32 = if (args.tentative) 0b10 << 30 else if (args.weak) 0b01 << 30 else 0b00 << 30;
return strength | archive_or_dylib | file.getIndex();
}
pub fn getAtom(file: File, atom_index: Atom.Index) ?*Atom {

View File

@@ -1899,25 +1899,18 @@ const src_files = [_][]const u8{
"musl/src/stdlib/strtol.c",
"musl/src/stdlib/wcstod.c",
"musl/src/stdlib/wcstol.c",
"musl/src/string/aarch64/memcpy.S",
"musl/src/string/aarch64/memset.S",
"musl/src/string/arm/__aeabi_memcpy.s",
"musl/src/string/arm/__aeabi_memset.s",
"musl/src/string/arm/memcpy.S",
"musl/src/string/bcmp.c",
"musl/src/string/bcopy.c",
"musl/src/string/bzero.c",
"musl/src/string/explicit_bzero.c",
"musl/src/string/i386/memcpy.s",
"musl/src/string/i386/memmove.s",
"musl/src/string/i386/memset.s",
"musl/src/string/index.c",
"musl/src/string/memccpy.c",
"musl/src/string/memchr.c",
"musl/src/string/memcmp.c",
"musl/src/string/memcpy.c",
"musl/src/string/memmem.c",
"musl/src/string/memmove.c",
"musl/src/string/mempcpy.c",
"musl/src/string/memrchr.c",
"musl/src/string/memset.c",
@@ -1981,8 +1974,6 @@ const src_files = [_][]const u8{
"musl/src/string/wmemcpy.c",
"musl/src/string/wmemmove.c",
"musl/src/string/wmemset.c",
"musl/src/string/x86_64/memcpy.s",
"musl/src/string/x86_64/memmove.s",
"musl/src/string/x86_64/memset.s",
"musl/src/temp/mkdtemp.c",
"musl/src/temp/mkostemp.c",

View File

@@ -694,9 +694,7 @@ const libc_top_half_src_files = [_][]const u8{
"wasi/libc-top-half/musl/src/string/memccpy.c",
"wasi/libc-top-half/musl/src/string/memchr.c",
"wasi/libc-top-half/musl/src/string/memcmp.c",
"wasi/libc-top-half/musl/src/string/memcpy.c",
"wasi/libc-top-half/musl/src/string/memmem.c",
"wasi/libc-top-half/musl/src/string/memmove.c",
"wasi/libc-top-half/musl/src/string/mempcpy.c",
"wasi/libc-top-half/musl/src/string/memrchr.c",
"wasi/libc-top-half/musl/src/string/memset.c",