Merge pull request #22513 from ziglang/memcpy
enhance memcpy and remove redundant implementations
This commit is contained in:
@@ -233,7 +233,6 @@ comptime {
|
||||
|
||||
_ = @import("compiler_rt/memcpy.zig");
|
||||
_ = @import("compiler_rt/memset.zig");
|
||||
_ = @import("compiler_rt/memmove.zig");
|
||||
_ = @import("compiler_rt/memcmp.zig");
|
||||
_ = @import("compiler_rt/bcmp.zig");
|
||||
_ = @import("compiler_rt/ssp.zig");
|
||||
|
||||
@@ -5,24 +5,169 @@ const builtin = @import("builtin");
|
||||
comptime {
|
||||
if (builtin.object_format != .c) {
|
||||
@export(&memcpy, .{ .name = "memcpy", .linkage = common.linkage, .visibility = common.visibility });
|
||||
@export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
|
||||
}
|
||||
}
|
||||
|
||||
pub fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
|
||||
@setRuntimeSafety(false);
|
||||
const llvm_cannot_lower = switch (builtin.cpu.arch) {
|
||||
.arm, .armeb, .thumb, .thumbeb => builtin.zig_backend == .stage2_llvm,
|
||||
else => false,
|
||||
};
|
||||
|
||||
if (len != 0) {
|
||||
var d = dest.?;
|
||||
var s = src.?;
|
||||
var n = len;
|
||||
while (true) {
|
||||
d[0] = s[0];
|
||||
n -= 1;
|
||||
if (n == 0) break;
|
||||
d += 1;
|
||||
s += 1;
|
||||
fn memcpy(noalias opt_dest: ?[*]u8, noalias opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
|
||||
if (llvm_cannot_lower) {
|
||||
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
|
||||
return opt_dest;
|
||||
} else {
|
||||
return memmove(opt_dest, opt_src, len);
|
||||
}
|
||||
}
|
||||
|
||||
/// A port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
|
||||
fn memmove(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
|
||||
if (llvm_cannot_lower) {
|
||||
if (@intFromPtr(opt_dest) < @intFromPtr(opt_src)) {
|
||||
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
|
||||
return opt_dest;
|
||||
} else {
|
||||
var index = len;
|
||||
while (index != 0) {
|
||||
index -= 1;
|
||||
opt_dest.?[index] = opt_src.?[index];
|
||||
}
|
||||
return opt_dest;
|
||||
}
|
||||
}
|
||||
|
||||
if (len == 0) {
|
||||
@branchHint(.unlikely);
|
||||
return opt_dest;
|
||||
}
|
||||
|
||||
const dest = opt_dest.?;
|
||||
const src = opt_src.?;
|
||||
|
||||
if (len < 8) {
|
||||
@branchHint(.unlikely);
|
||||
if (len == 1) {
|
||||
@branchHint(.unlikely);
|
||||
dest[0] = src[0];
|
||||
} else if (len >= 4) {
|
||||
@branchHint(.unlikely);
|
||||
blockCopy(dest, src, 4, len);
|
||||
} else {
|
||||
blockCopy(dest, src, 2, len);
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
if (len > 32) {
|
||||
@branchHint(.unlikely);
|
||||
if (len > 256) {
|
||||
@branchHint(.unlikely);
|
||||
copyMove(dest, src, len);
|
||||
return dest;
|
||||
}
|
||||
copyLong(dest, src, len);
|
||||
return dest;
|
||||
}
|
||||
|
||||
if (len > 16) {
|
||||
@branchHint(.unlikely);
|
||||
blockCopy(dest, src, 16, len);
|
||||
return dest;
|
||||
}
|
||||
|
||||
blockCopy(dest, src, 8, len);
|
||||
|
||||
return dest;
|
||||
}
|
||||
|
||||
inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void {
|
||||
const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*;
|
||||
const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*;
|
||||
dest[0..block_size].* = first;
|
||||
dest[len - block_size ..][0..block_size].* = second;
|
||||
}
|
||||
|
||||
inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void {
|
||||
var array: [8]@Vector(32, u8) = undefined;
|
||||
|
||||
inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| {
|
||||
array[i * 2] = src[(N / 2) - 32 ..][0..32].*;
|
||||
array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*;
|
||||
|
||||
if (len <= N) {
|
||||
@branchHint(.unlikely);
|
||||
for (0..i + 1) |j| {
|
||||
dest[j * 32 ..][0..32].* = array[j * 2];
|
||||
dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1];
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void {
|
||||
if (@intFromPtr(src) >= @intFromPtr(dest)) {
|
||||
@branchHint(.unlikely);
|
||||
copyForward(dest, src, len);
|
||||
} else if (@intFromPtr(src) + len > @intFromPtr(dest)) {
|
||||
@branchHint(.unlikely);
|
||||
overlapBwd(dest, src, len);
|
||||
} else {
|
||||
copyForward(dest, src, len);
|
||||
}
|
||||
}
|
||||
|
||||
inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void {
|
||||
const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*;
|
||||
|
||||
const N: usize = len & ~@as(usize, 127);
|
||||
var i: usize = 0;
|
||||
|
||||
while (i < N) : (i += 128) {
|
||||
dest[i..][0..32].* = src[i..][0..32].*;
|
||||
dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*;
|
||||
dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*;
|
||||
dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*;
|
||||
}
|
||||
|
||||
if (len - i <= 32) {
|
||||
@branchHint(.unlikely);
|
||||
dest[len - 32 ..][0..32].* = tail;
|
||||
} else {
|
||||
copyLong(dest[i..], src[i..], len - i);
|
||||
}
|
||||
}
|
||||
|
||||
inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void {
|
||||
var array: [5]@Vector(32, u8) = undefined;
|
||||
array[0] = src[len - 32 ..][0..32].*;
|
||||
inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*;
|
||||
|
||||
const end: usize = (@intFromPtr(dest) + len - 32) & 31;
|
||||
const range = len - end;
|
||||
var s = src + range;
|
||||
var d = dest + range;
|
||||
|
||||
while (@intFromPtr(s) > @intFromPtr(src + 128)) {
|
||||
// zig fmt: off
|
||||
const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*;
|
||||
const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*;
|
||||
const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*;
|
||||
const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*;
|
||||
|
||||
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first;
|
||||
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second;
|
||||
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third;
|
||||
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth;
|
||||
// zig fmt: on
|
||||
|
||||
s -= 128;
|
||||
d -= 128;
|
||||
}
|
||||
|
||||
inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec;
|
||||
dest[len - 32 ..][0..32].* = array[0];
|
||||
}
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
const std = @import("std");
|
||||
const common = @import("./common.zig");
|
||||
|
||||
comptime {
|
||||
@export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
|
||||
}
|
||||
|
||||
pub fn memmove(dest: ?[*]u8, src: ?[*]const u8, n: usize) callconv(.C) ?[*]u8 {
|
||||
@setRuntimeSafety(false);
|
||||
|
||||
if (@intFromPtr(dest) < @intFromPtr(src)) {
|
||||
var index: usize = 0;
|
||||
while (index != n) : (index += 1) {
|
||||
dest.?[index] = src.?[index];
|
||||
}
|
||||
} else {
|
||||
var index = n;
|
||||
while (index != 0) {
|
||||
index -= 1;
|
||||
dest.?[index] = src.?[index];
|
||||
}
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
186
lib/libc/musl/src/string/aarch64/memcpy.S
vendored
186
lib/libc/musl/src/string/aarch64/memcpy.S
vendored
@@ -1,186 +0,0 @@
|
||||
/*
|
||||
* memcpy - copy memory area
|
||||
*
|
||||
* Copyright (c) 2012-2020, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*
|
||||
*/
|
||||
|
||||
#define dstin x0
|
||||
#define src x1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define srcend x4
|
||||
#define dstend x5
|
||||
#define A_l x6
|
||||
#define A_lw w6
|
||||
#define A_h x7
|
||||
#define B_l x8
|
||||
#define B_lw w8
|
||||
#define B_h x9
|
||||
#define C_l x10
|
||||
#define C_lw w10
|
||||
#define C_h x11
|
||||
#define D_l x12
|
||||
#define D_h x13
|
||||
#define E_l x14
|
||||
#define E_h x15
|
||||
#define F_l x16
|
||||
#define F_h x17
|
||||
#define G_l count
|
||||
#define G_h dst
|
||||
#define H_l src
|
||||
#define H_h srcend
|
||||
#define tmp1 x14
|
||||
|
||||
/* This implementation of memcpy uses unaligned accesses and branchless
|
||||
sequences to keep the code small, simple and improve performance.
|
||||
|
||||
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
|
||||
copies of up to 128 bytes, and large copies. The overhead of the overlap
|
||||
check is negligible since it is only required for large copies.
|
||||
|
||||
Large copies use a software pipelined loop processing 64 bytes per iteration.
|
||||
The destination pointer is 16-byte aligned to minimize unaligned accesses.
|
||||
The loop tail is handled by always copying 64 bytes from the end.
|
||||
*/
|
||||
|
||||
.global memcpy
|
||||
.type memcpy,%function
|
||||
memcpy:
|
||||
add srcend, src, count
|
||||
add dstend, dstin, count
|
||||
cmp count, 128
|
||||
b.hi .Lcopy_long
|
||||
cmp count, 32
|
||||
b.hi .Lcopy32_128
|
||||
|
||||
/* Small copies: 0..32 bytes. */
|
||||
cmp count, 16
|
||||
b.lo .Lcopy16
|
||||
ldp A_l, A_h, [src]
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
stp A_l, A_h, [dstin]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
/* Copy 8-15 bytes. */
|
||||
.Lcopy16:
|
||||
tbz count, 3, .Lcopy8
|
||||
ldr A_l, [src]
|
||||
ldr A_h, [srcend, -8]
|
||||
str A_l, [dstin]
|
||||
str A_h, [dstend, -8]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
/* Copy 4-7 bytes. */
|
||||
.Lcopy8:
|
||||
tbz count, 2, .Lcopy4
|
||||
ldr A_lw, [src]
|
||||
ldr B_lw, [srcend, -4]
|
||||
str A_lw, [dstin]
|
||||
str B_lw, [dstend, -4]
|
||||
ret
|
||||
|
||||
/* Copy 0..3 bytes using a branchless sequence. */
|
||||
.Lcopy4:
|
||||
cbz count, .Lcopy0
|
||||
lsr tmp1, count, 1
|
||||
ldrb A_lw, [src]
|
||||
ldrb C_lw, [srcend, -1]
|
||||
ldrb B_lw, [src, tmp1]
|
||||
strb A_lw, [dstin]
|
||||
strb B_lw, [dstin, tmp1]
|
||||
strb C_lw, [dstend, -1]
|
||||
.Lcopy0:
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Medium copies: 33..128 bytes. */
|
||||
.Lcopy32_128:
|
||||
ldp A_l, A_h, [src]
|
||||
ldp B_l, B_h, [src, 16]
|
||||
ldp C_l, C_h, [srcend, -32]
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
cmp count, 64
|
||||
b.hi .Lcopy128
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy 65..128 bytes. */
|
||||
.Lcopy128:
|
||||
ldp E_l, E_h, [src, 32]
|
||||
ldp F_l, F_h, [src, 48]
|
||||
cmp count, 96
|
||||
b.ls .Lcopy96
|
||||
ldp G_l, G_h, [srcend, -64]
|
||||
ldp H_l, H_h, [srcend, -48]
|
||||
stp G_l, G_h, [dstend, -64]
|
||||
stp H_l, H_h, [dstend, -48]
|
||||
.Lcopy96:
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp E_l, E_h, [dstin, 32]
|
||||
stp F_l, F_h, [dstin, 48]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy more than 128 bytes. */
|
||||
.Lcopy_long:
|
||||
|
||||
/* Copy 16 bytes and then align dst to 16-byte alignment. */
|
||||
|
||||
ldp D_l, D_h, [src]
|
||||
and tmp1, dstin, 15
|
||||
bic dst, dstin, 15
|
||||
sub src, src, tmp1
|
||||
add count, count, tmp1 /* Count is now 16 too large. */
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp D_l, D_h, [dstin]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 128 + 16 /* Test and readjust count. */
|
||||
b.ls .Lcopy64_from_end
|
||||
|
||||
.Lloop64:
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
stp D_l, D_h, [dst, 64]!
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 64
|
||||
b.hi .Lloop64
|
||||
|
||||
/* Write the last iteration and copy 64 bytes from the end. */
|
||||
.Lcopy64_from_end:
|
||||
ldp E_l, E_h, [srcend, -64]
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [srcend, -48]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [srcend, -16]
|
||||
stp D_l, D_h, [dst, 64]
|
||||
stp E_l, E_h, [dstend, -64]
|
||||
stp A_l, A_h, [dstend, -48]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
stp C_l, C_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.size memcpy,.-memcpy
|
||||
45
lib/libc/musl/src/string/arm/__aeabi_memcpy.s
vendored
45
lib/libc/musl/src/string/arm/__aeabi_memcpy.s
vendored
@@ -1,45 +0,0 @@
|
||||
.syntax unified
|
||||
|
||||
.global __aeabi_memcpy8
|
||||
.global __aeabi_memcpy4
|
||||
.global __aeabi_memcpy
|
||||
.global __aeabi_memmove8
|
||||
.global __aeabi_memmove4
|
||||
.global __aeabi_memmove
|
||||
|
||||
.type __aeabi_memcpy8,%function
|
||||
.type __aeabi_memcpy4,%function
|
||||
.type __aeabi_memcpy,%function
|
||||
.type __aeabi_memmove8,%function
|
||||
.type __aeabi_memmove4,%function
|
||||
.type __aeabi_memmove,%function
|
||||
|
||||
__aeabi_memmove8:
|
||||
__aeabi_memmove4:
|
||||
__aeabi_memmove:
|
||||
cmp r0, r1
|
||||
bls 3f
|
||||
cmp r2, #0
|
||||
beq 2f
|
||||
adds r0, r0, r2
|
||||
adds r2, r1, r2
|
||||
1: subs r2, r2, #1
|
||||
ldrb r3, [r2]
|
||||
subs r0, r0, #1
|
||||
strb r3, [r0]
|
||||
cmp r1, r2
|
||||
bne 1b
|
||||
2: bx lr
|
||||
__aeabi_memcpy8:
|
||||
__aeabi_memcpy4:
|
||||
__aeabi_memcpy:
|
||||
3: cmp r2, #0
|
||||
beq 2f
|
||||
adds r2, r1, r2
|
||||
1: ldrb r3, [r1]
|
||||
adds r1, r1, #1
|
||||
strb r3, [r0]
|
||||
adds r0, r0, #1
|
||||
cmp r1, r2
|
||||
bne 1b
|
||||
2: bx lr
|
||||
479
lib/libc/musl/src/string/arm/memcpy.S
vendored
479
lib/libc/musl/src/string/arm/memcpy.S
vendored
@@ -1,479 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2008 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Optimized memcpy() for ARM.
|
||||
*
|
||||
* note that memcpy() always returns the destination pointer,
|
||||
* so we have to preserve R0.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file has been modified from the original for use in musl libc.
|
||||
* The main changes are: addition of .type memcpy,%function to make the
|
||||
* code safely callable from thumb mode, adjusting the return
|
||||
* instructions to be compatible with pre-thumb ARM cpus, removal of
|
||||
* prefetch code that is not compatible with older cpus and support for
|
||||
* building as thumb 2 and big-endian.
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
|
||||
.global memcpy
|
||||
.type memcpy,%function
|
||||
memcpy:
|
||||
/* The stack must always be 64-bits aligned to be compliant with the
|
||||
* ARM ABI. Since we have to save R0, we might as well save R4
|
||||
* which we can use for better pipelining of the reads below
|
||||
*/
|
||||
.fnstart
|
||||
.save {r0, r4, lr}
|
||||
stmfd sp!, {r0, r4, lr}
|
||||
/* Making room for r5-r11 which will be spilled later */
|
||||
.pad #28
|
||||
sub sp, sp, #28
|
||||
|
||||
/* it simplifies things to take care of len<4 early */
|
||||
cmp r2, #4
|
||||
blo copy_last_3_and_return
|
||||
|
||||
/* compute the offset to align the source
|
||||
* offset = (4-(src&3))&3 = -src & 3
|
||||
*/
|
||||
rsb r3, r1, #0
|
||||
ands r3, r3, #3
|
||||
beq src_aligned
|
||||
|
||||
/* align source to 32 bits. We need to insert 2 instructions between
|
||||
* a ldr[b|h] and str[b|h] because byte and half-word instructions
|
||||
* stall 2 cycles.
|
||||
*/
|
||||
movs r12, r3, lsl #31
|
||||
sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
|
||||
ldrbmi r3, [r1], #1
|
||||
ldrbcs r4, [r1], #1
|
||||
ldrbcs r12,[r1], #1
|
||||
strbmi r3, [r0], #1
|
||||
strbcs r4, [r0], #1
|
||||
strbcs r12,[r0], #1
|
||||
|
||||
src_aligned:
|
||||
|
||||
/* see if src and dst are aligned together (congruent) */
|
||||
eor r12, r0, r1
|
||||
tst r12, #3
|
||||
bne non_congruent
|
||||
|
||||
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
|
||||
* frame. Don't update sp.
|
||||
*/
|
||||
stmea sp, {r5-r11}
|
||||
|
||||
/* align the destination to a cache-line */
|
||||
rsb r3, r0, #0
|
||||
ands r3, r3, #0x1C
|
||||
beq congruent_aligned32
|
||||
cmp r3, r2
|
||||
andhi r3, r2, #0x1C
|
||||
|
||||
/* conditionnaly copies 0 to 7 words (length in r3) */
|
||||
movs r12, r3, lsl #28
|
||||
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
|
||||
ldmmi r1!, {r8, r9} /* 8 bytes */
|
||||
stmcs r0!, {r4, r5, r6, r7}
|
||||
stmmi r0!, {r8, r9}
|
||||
tst r3, #0x4
|
||||
ldrne r10,[r1], #4 /* 4 bytes */
|
||||
strne r10,[r0], #4
|
||||
sub r2, r2, r3
|
||||
|
||||
congruent_aligned32:
|
||||
/*
|
||||
* here source is aligned to 32 bytes.
|
||||
*/
|
||||
|
||||
cached_aligned32:
|
||||
subs r2, r2, #32
|
||||
blo less_than_32_left
|
||||
|
||||
/*
|
||||
* We preload a cache-line up to 64 bytes ahead. On the 926, this will
|
||||
* stall only until the requested world is fetched, but the linefill
|
||||
* continues in the the background.
|
||||
* While the linefill is going, we write our previous cache-line
|
||||
* into the write-buffer (which should have some free space).
|
||||
* When the linefill is done, the writebuffer will
|
||||
* start dumping its content into memory
|
||||
*
|
||||
* While all this is going, we then load a full cache line into
|
||||
* 8 registers, this cache line should be in the cache by now
|
||||
* (or partly in the cache).
|
||||
*
|
||||
* This code should work well regardless of the source/dest alignment.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Align the preload register to a cache-line because the cpu does
|
||||
* "critical word first" (the first word requested is loaded first).
|
||||
*/
|
||||
@ bic r12, r1, #0x1F
|
||||
@ add r12, r12, #64
|
||||
|
||||
1: ldmia r1!, { r4-r11 }
|
||||
subs r2, r2, #32
|
||||
|
||||
/*
|
||||
* NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
|
||||
* for ARM9 preload will not be safely guarded by the preceding subs.
|
||||
* When it is safely guarded the only possibility to have SIGSEGV here
|
||||
* is because the caller overstates the length.
|
||||
*/
|
||||
@ ldrhi r3, [r12], #32 /* cheap ARM9 preload */
|
||||
stmia r0!, { r4-r11 }
|
||||
bhs 1b
|
||||
|
||||
add r2, r2, #32
|
||||
|
||||
less_than_32_left:
|
||||
/*
|
||||
* less than 32 bytes left at this point (length in r2)
|
||||
*/
|
||||
|
||||
/* skip all this if there is nothing to do, which should
|
||||
* be a common case (if not executed the code below takes
|
||||
* about 16 cycles)
|
||||
*/
|
||||
tst r2, #0x1F
|
||||
beq 1f
|
||||
|
||||
/* conditionnaly copies 0 to 31 bytes */
|
||||
movs r12, r2, lsl #28
|
||||
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
|
||||
ldmmi r1!, {r8, r9} /* 8 bytes */
|
||||
stmcs r0!, {r4, r5, r6, r7}
|
||||
stmmi r0!, {r8, r9}
|
||||
movs r12, r2, lsl #30
|
||||
ldrcs r3, [r1], #4 /* 4 bytes */
|
||||
ldrhmi r4, [r1], #2 /* 2 bytes */
|
||||
strcs r3, [r0], #4
|
||||
strhmi r4, [r0], #2
|
||||
tst r2, #0x1
|
||||
ldrbne r3, [r1] /* last byte */
|
||||
strbne r3, [r0]
|
||||
|
||||
/* we're done! restore everything and return */
|
||||
1: ldmfd sp!, {r5-r11}
|
||||
ldmfd sp!, {r0, r4, lr}
|
||||
bx lr
|
||||
|
||||
/********************************************************************/
|
||||
|
||||
non_congruent:
|
||||
/*
|
||||
* here source is aligned to 4 bytes
|
||||
* but destination is not.
|
||||
*
|
||||
* in the code below r2 is the number of bytes read
|
||||
* (the number of bytes written is always smaller, because we have
|
||||
* partial words in the shift queue)
|
||||
*/
|
||||
cmp r2, #4
|
||||
blo copy_last_3_and_return
|
||||
|
||||
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
|
||||
* frame. Don't update sp.
|
||||
*/
|
||||
stmea sp, {r5-r11}
|
||||
|
||||
/* compute shifts needed to align src to dest */
|
||||
rsb r5, r0, #0
|
||||
and r5, r5, #3 /* r5 = # bytes in partial words */
|
||||
mov r12, r5, lsl #3 /* r12 = right */
|
||||
rsb lr, r12, #32 /* lr = left */
|
||||
|
||||
/* read the first word */
|
||||
ldr r3, [r1], #4
|
||||
sub r2, r2, #4
|
||||
|
||||
/* write a partial word (0 to 3 bytes), such that destination
|
||||
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
|
||||
*/
|
||||
movs r5, r5, lsl #31
|
||||
|
||||
#if __ARMEB__
|
||||
movmi r3, r3, ror #24
|
||||
strbmi r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
#else
|
||||
strbmi r3, [r0], #1
|
||||
movmi r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, lsr #8
|
||||
#endif
|
||||
|
||||
cmp r2, #4
|
||||
blo partial_word_tail
|
||||
|
||||
#if __ARMEB__
|
||||
mov r3, r3, lsr r12
|
||||
mov r3, r3, lsl r12
|
||||
#endif
|
||||
|
||||
/* Align destination to 32 bytes (cache line boundary) */
|
||||
1: tst r0, #0x1c
|
||||
beq 2f
|
||||
ldr r5, [r1], #4
|
||||
sub r2, r2, #4
|
||||
#if __ARMEB__
|
||||
mov r4, r5, lsr lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsl r12
|
||||
#else
|
||||
mov r4, r5, lsl lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsr r12
|
||||
#endif
|
||||
str r4, [r0], #4
|
||||
cmp r2, #4
|
||||
bhs 1b
|
||||
blo partial_word_tail
|
||||
|
||||
/* copy 32 bytes at a time */
|
||||
2: subs r2, r2, #32
|
||||
blo less_than_thirtytwo
|
||||
|
||||
/* Use immediate mode for the shifts, because there is an extra cycle
|
||||
* for register shifts, which could account for up to 50% of
|
||||
* performance hit.
|
||||
*/
|
||||
|
||||
cmp r12, #24
|
||||
beq loop24
|
||||
cmp r12, #8
|
||||
beq loop8
|
||||
|
||||
loop16:
|
||||
ldr r12, [r1], #4
|
||||
1: mov r4, r12
|
||||
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
|
||||
subs r2, r2, #32
|
||||
ldrhs r12, [r1], #4
|
||||
#if __ARMEB__
|
||||
orr r3, r3, r4, lsr #16
|
||||
mov r4, r4, lsl #16
|
||||
orr r4, r4, r5, lsr #16
|
||||
mov r5, r5, lsl #16
|
||||
orr r5, r5, r6, lsr #16
|
||||
mov r6, r6, lsl #16
|
||||
orr r6, r6, r7, lsr #16
|
||||
mov r7, r7, lsl #16
|
||||
orr r7, r7, r8, lsr #16
|
||||
mov r8, r8, lsl #16
|
||||
orr r8, r8, r9, lsr #16
|
||||
mov r9, r9, lsl #16
|
||||
orr r9, r9, r10, lsr #16
|
||||
mov r10, r10, lsl #16
|
||||
orr r10, r10, r11, lsr #16
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsl #16
|
||||
#else
|
||||
orr r3, r3, r4, lsl #16
|
||||
mov r4, r4, lsr #16
|
||||
orr r4, r4, r5, lsl #16
|
||||
mov r5, r5, lsr #16
|
||||
orr r5, r5, r6, lsl #16
|
||||
mov r6, r6, lsr #16
|
||||
orr r6, r6, r7, lsl #16
|
||||
mov r7, r7, lsr #16
|
||||
orr r7, r7, r8, lsl #16
|
||||
mov r8, r8, lsr #16
|
||||
orr r8, r8, r9, lsl #16
|
||||
mov r9, r9, lsr #16
|
||||
orr r9, r9, r10, lsl #16
|
||||
mov r10, r10, lsr #16
|
||||
orr r10, r10, r11, lsl #16
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsr #16
|
||||
#endif
|
||||
bhs 1b
|
||||
b less_than_thirtytwo
|
||||
|
||||
loop8:
|
||||
ldr r12, [r1], #4
|
||||
1: mov r4, r12
|
||||
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
|
||||
subs r2, r2, #32
|
||||
ldrhs r12, [r1], #4
|
||||
#if __ARMEB__
|
||||
orr r3, r3, r4, lsr #24
|
||||
mov r4, r4, lsl #8
|
||||
orr r4, r4, r5, lsr #24
|
||||
mov r5, r5, lsl #8
|
||||
orr r5, r5, r6, lsr #24
|
||||
mov r6, r6, lsl #8
|
||||
orr r6, r6, r7, lsr #24
|
||||
mov r7, r7, lsl #8
|
||||
orr r7, r7, r8, lsr #24
|
||||
mov r8, r8, lsl #8
|
||||
orr r8, r8, r9, lsr #24
|
||||
mov r9, r9, lsl #8
|
||||
orr r9, r9, r10, lsr #24
|
||||
mov r10, r10, lsl #8
|
||||
orr r10, r10, r11, lsr #24
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsl #8
|
||||
#else
|
||||
orr r3, r3, r4, lsl #24
|
||||
mov r4, r4, lsr #8
|
||||
orr r4, r4, r5, lsl #24
|
||||
mov r5, r5, lsr #8
|
||||
orr r5, r5, r6, lsl #24
|
||||
mov r6, r6, lsr #8
|
||||
orr r6, r6, r7, lsl #24
|
||||
mov r7, r7, lsr #8
|
||||
orr r7, r7, r8, lsl #24
|
||||
mov r8, r8, lsr #8
|
||||
orr r8, r8, r9, lsl #24
|
||||
mov r9, r9, lsr #8
|
||||
orr r9, r9, r10, lsl #24
|
||||
mov r10, r10, lsr #8
|
||||
orr r10, r10, r11, lsl #24
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsr #8
|
||||
#endif
|
||||
bhs 1b
|
||||
b less_than_thirtytwo
|
||||
|
||||
loop24:
|
||||
ldr r12, [r1], #4
|
||||
1: mov r4, r12
|
||||
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
|
||||
subs r2, r2, #32
|
||||
ldrhs r12, [r1], #4
|
||||
#if __ARMEB__
|
||||
orr r3, r3, r4, lsr #8
|
||||
mov r4, r4, lsl #24
|
||||
orr r4, r4, r5, lsr #8
|
||||
mov r5, r5, lsl #24
|
||||
orr r5, r5, r6, lsr #8
|
||||
mov r6, r6, lsl #24
|
||||
orr r6, r6, r7, lsr #8
|
||||
mov r7, r7, lsl #24
|
||||
orr r7, r7, r8, lsr #8
|
||||
mov r8, r8, lsl #24
|
||||
orr r8, r8, r9, lsr #8
|
||||
mov r9, r9, lsl #24
|
||||
orr r9, r9, r10, lsr #8
|
||||
mov r10, r10, lsl #24
|
||||
orr r10, r10, r11, lsr #8
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsl #24
|
||||
#else
|
||||
orr r3, r3, r4, lsl #8
|
||||
mov r4, r4, lsr #24
|
||||
orr r4, r4, r5, lsl #8
|
||||
mov r5, r5, lsr #24
|
||||
orr r5, r5, r6, lsl #8
|
||||
mov r6, r6, lsr #24
|
||||
orr r6, r6, r7, lsl #8
|
||||
mov r7, r7, lsr #24
|
||||
orr r7, r7, r8, lsl #8
|
||||
mov r8, r8, lsr #24
|
||||
orr r8, r8, r9, lsl #8
|
||||
mov r9, r9, lsr #24
|
||||
orr r9, r9, r10, lsl #8
|
||||
mov r10, r10, lsr #24
|
||||
orr r10, r10, r11, lsl #8
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsr #24
|
||||
#endif
|
||||
bhs 1b
|
||||
|
||||
less_than_thirtytwo:
|
||||
/* copy the last 0 to 31 bytes of the source */
|
||||
rsb r12, lr, #32 /* we corrupted r12, recompute it */
|
||||
add r2, r2, #32
|
||||
cmp r2, #4
|
||||
blo partial_word_tail
|
||||
|
||||
1: ldr r5, [r1], #4
|
||||
sub r2, r2, #4
|
||||
#if __ARMEB__
|
||||
mov r4, r5, lsr lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsl r12
|
||||
#else
|
||||
mov r4, r5, lsl lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsr r12
|
||||
#endif
|
||||
str r4, [r0], #4
|
||||
cmp r2, #4
|
||||
bhs 1b
|
||||
|
||||
partial_word_tail:
|
||||
/* we have a partial word in the input buffer */
|
||||
movs r5, lr, lsl #(31-3)
|
||||
#if __ARMEB__
|
||||
movmi r3, r3, ror #24
|
||||
strbmi r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
#else
|
||||
strbmi r3, [r0], #1
|
||||
movmi r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
#endif
|
||||
|
||||
/* Refill spilled registers from the stack. Don't update sp. */
|
||||
ldmfd sp, {r5-r11}
|
||||
|
||||
copy_last_3_and_return:
|
||||
movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
|
||||
ldrbmi r2, [r1], #1
|
||||
ldrbcs r3, [r1], #1
|
||||
ldrbcs r12,[r1]
|
||||
strbmi r2, [r0], #1
|
||||
strbcs r3, [r0], #1
|
||||
strbcs r12,[r0]
|
||||
|
||||
/* we're done! restore sp and spilled registers and return */
|
||||
add sp, sp, #28
|
||||
ldmfd sp!, {r0, r4, lr}
|
||||
bx lr
|
||||
|
||||
32
lib/libc/musl/src/string/i386/memcpy.s
vendored
32
lib/libc/musl/src/string/i386/memcpy.s
vendored
@@ -1,32 +0,0 @@
|
||||
.global memcpy
|
||||
.global __memcpy_fwd
|
||||
.hidden __memcpy_fwd
|
||||
.type memcpy,@function
|
||||
memcpy:
|
||||
__memcpy_fwd:
|
||||
push %esi
|
||||
push %edi
|
||||
mov 12(%esp),%edi
|
||||
mov 16(%esp),%esi
|
||||
mov 20(%esp),%ecx
|
||||
mov %edi,%eax
|
||||
cmp $4,%ecx
|
||||
jc 1f
|
||||
test $3,%edi
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %ecx
|
||||
test $3,%edi
|
||||
jnz 2b
|
||||
1: mov %ecx,%edx
|
||||
shr $2,%ecx
|
||||
rep
|
||||
movsl
|
||||
and $3,%edx
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %edx
|
||||
jnz 2b
|
||||
1: pop %edi
|
||||
pop %esi
|
||||
ret
|
||||
22
lib/libc/musl/src/string/i386/memmove.s
vendored
22
lib/libc/musl/src/string/i386/memmove.s
vendored
@@ -1,22 +0,0 @@
|
||||
.global memmove
|
||||
.type memmove,@function
|
||||
memmove:
|
||||
mov 4(%esp),%eax
|
||||
sub 8(%esp),%eax
|
||||
cmp 12(%esp),%eax
|
||||
.hidden __memcpy_fwd
|
||||
jae __memcpy_fwd
|
||||
push %esi
|
||||
push %edi
|
||||
mov 12(%esp),%edi
|
||||
mov 16(%esp),%esi
|
||||
mov 20(%esp),%ecx
|
||||
lea -1(%edi,%ecx),%edi
|
||||
lea -1(%esi,%ecx),%esi
|
||||
std
|
||||
rep movsb
|
||||
cld
|
||||
lea 1(%edi),%eax
|
||||
pop %edi
|
||||
pop %esi
|
||||
ret
|
||||
124
lib/libc/musl/src/string/memcpy.c
vendored
124
lib/libc/musl/src/string/memcpy.c
vendored
@@ -1,124 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <endian.h>
|
||||
|
||||
void *memcpy(void *restrict dest, const void *restrict src, size_t n)
|
||||
{
|
||||
unsigned char *d = dest;
|
||||
const unsigned char *s = src;
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define LS >>
|
||||
#define RS <<
|
||||
#else
|
||||
#define LS <<
|
||||
#define RS >>
|
||||
#endif
|
||||
|
||||
typedef uint32_t __attribute__((__may_alias__)) u32;
|
||||
uint32_t w, x;
|
||||
|
||||
for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
|
||||
|
||||
if ((uintptr_t)d % 4 == 0) {
|
||||
for (; n>=16; s+=16, d+=16, n-=16) {
|
||||
*(u32 *)(d+0) = *(u32 *)(s+0);
|
||||
*(u32 *)(d+4) = *(u32 *)(s+4);
|
||||
*(u32 *)(d+8) = *(u32 *)(s+8);
|
||||
*(u32 *)(d+12) = *(u32 *)(s+12);
|
||||
}
|
||||
if (n&8) {
|
||||
*(u32 *)(d+0) = *(u32 *)(s+0);
|
||||
*(u32 *)(d+4) = *(u32 *)(s+4);
|
||||
d += 8; s += 8;
|
||||
}
|
||||
if (n&4) {
|
||||
*(u32 *)(d+0) = *(u32 *)(s+0);
|
||||
d += 4; s += 4;
|
||||
}
|
||||
if (n&2) {
|
||||
*d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&1) {
|
||||
*d = *s;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
if (n >= 32) switch ((uintptr_t)d % 4) {
|
||||
case 1:
|
||||
w = *(u32 *)s;
|
||||
*d++ = *s++;
|
||||
*d++ = *s++;
|
||||
*d++ = *s++;
|
||||
n -= 3;
|
||||
for (; n>=17; s+=16, d+=16, n-=16) {
|
||||
x = *(u32 *)(s+1);
|
||||
*(u32 *)(d+0) = (w LS 24) | (x RS 8);
|
||||
w = *(u32 *)(s+5);
|
||||
*(u32 *)(d+4) = (x LS 24) | (w RS 8);
|
||||
x = *(u32 *)(s+9);
|
||||
*(u32 *)(d+8) = (w LS 24) | (x RS 8);
|
||||
w = *(u32 *)(s+13);
|
||||
*(u32 *)(d+12) = (x LS 24) | (w RS 8);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
w = *(u32 *)s;
|
||||
*d++ = *s++;
|
||||
*d++ = *s++;
|
||||
n -= 2;
|
||||
for (; n>=18; s+=16, d+=16, n-=16) {
|
||||
x = *(u32 *)(s+2);
|
||||
*(u32 *)(d+0) = (w LS 16) | (x RS 16);
|
||||
w = *(u32 *)(s+6);
|
||||
*(u32 *)(d+4) = (x LS 16) | (w RS 16);
|
||||
x = *(u32 *)(s+10);
|
||||
*(u32 *)(d+8) = (w LS 16) | (x RS 16);
|
||||
w = *(u32 *)(s+14);
|
||||
*(u32 *)(d+12) = (x LS 16) | (w RS 16);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
w = *(u32 *)s;
|
||||
*d++ = *s++;
|
||||
n -= 1;
|
||||
for (; n>=19; s+=16, d+=16, n-=16) {
|
||||
x = *(u32 *)(s+3);
|
||||
*(u32 *)(d+0) = (w LS 8) | (x RS 24);
|
||||
w = *(u32 *)(s+7);
|
||||
*(u32 *)(d+4) = (x LS 8) | (w RS 24);
|
||||
x = *(u32 *)(s+11);
|
||||
*(u32 *)(d+8) = (w LS 8) | (x RS 24);
|
||||
w = *(u32 *)(s+15);
|
||||
*(u32 *)(d+12) = (x LS 8) | (w RS 24);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (n&16) {
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&8) {
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&4) {
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&2) {
|
||||
*d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&1) {
|
||||
*d = *s;
|
||||
}
|
||||
return dest;
|
||||
#endif
|
||||
|
||||
for (; n; n--) *d++ = *s++;
|
||||
return dest;
|
||||
}
|
||||
42
lib/libc/musl/src/string/memmove.c
vendored
42
lib/libc/musl/src/string/memmove.c
vendored
@@ -1,42 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
typedef __attribute__((__may_alias__)) size_t WT;
|
||||
#define WS (sizeof(WT))
|
||||
#endif
|
||||
|
||||
void *memmove(void *dest, const void *src, size_t n)
|
||||
{
|
||||
char *d = dest;
|
||||
const char *s = src;
|
||||
|
||||
if (d==s) return d;
|
||||
if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);
|
||||
|
||||
if (d<s) {
|
||||
#ifdef __GNUC__
|
||||
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
|
||||
while ((uintptr_t)d % WS) {
|
||||
if (!n--) return dest;
|
||||
*d++ = *s++;
|
||||
}
|
||||
for (; n>=WS; n-=WS, d+=WS, s+=WS) *(WT *)d = *(WT *)s;
|
||||
}
|
||||
#endif
|
||||
for (; n; n--) *d++ = *s++;
|
||||
} else {
|
||||
#ifdef __GNUC__
|
||||
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
|
||||
while ((uintptr_t)(d+n) % WS) {
|
||||
if (!n--) return dest;
|
||||
d[n] = s[n];
|
||||
}
|
||||
while (n>=WS) n-=WS, *(WT *)(d+n) = *(WT *)(s+n);
|
||||
}
|
||||
#endif
|
||||
while (n) n--, d[n] = s[n];
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
25
lib/libc/musl/src/string/x86_64/memcpy.s
vendored
25
lib/libc/musl/src/string/x86_64/memcpy.s
vendored
@@ -1,25 +0,0 @@
|
||||
.global memcpy
|
||||
.global __memcpy_fwd
|
||||
.hidden __memcpy_fwd
|
||||
.type memcpy,@function
|
||||
memcpy:
|
||||
__memcpy_fwd:
|
||||
mov %rdi,%rax
|
||||
cmp $8,%rdx
|
||||
jc 1f
|
||||
test $7,%edi
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %rdx
|
||||
test $7,%edi
|
||||
jnz 2b
|
||||
1: mov %rdx,%rcx
|
||||
shr $3,%rcx
|
||||
rep
|
||||
movsq
|
||||
and $7,%edx
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %edx
|
||||
jnz 2b
|
||||
1: ret
|
||||
16
lib/libc/musl/src/string/x86_64/memmove.s
vendored
16
lib/libc/musl/src/string/x86_64/memmove.s
vendored
@@ -1,16 +0,0 @@
|
||||
.global memmove
|
||||
.type memmove,@function
|
||||
memmove:
|
||||
mov %rdi,%rax
|
||||
sub %rsi,%rax
|
||||
cmp %rdx,%rax
|
||||
.hidden __memcpy_fwd
|
||||
jae __memcpy_fwd
|
||||
mov %rdx,%rcx
|
||||
lea -1(%rdi,%rdx),%rdi
|
||||
lea -1(%rsi,%rdx),%rsi
|
||||
std
|
||||
rep movsb
|
||||
cld
|
||||
lea 1(%rdi),%rax
|
||||
ret
|
||||
@@ -1,70 +0,0 @@
|
||||
#if __ARM_PCS_VFP
|
||||
|
||||
.syntax unified
|
||||
.fpu vfp
|
||||
|
||||
.global fegetround
|
||||
.type fegetround,%function
|
||||
fegetround:
|
||||
fmrx r0, fpscr
|
||||
and r0, r0, #0xc00000
|
||||
bx lr
|
||||
|
||||
.global __fesetround
|
||||
.hidden __fesetround
|
||||
.type __fesetround,%function
|
||||
__fesetround:
|
||||
fmrx r3, fpscr
|
||||
bic r3, r3, #0xc00000
|
||||
orr r3, r3, r0
|
||||
fmxr fpscr, r3
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept,%function
|
||||
fetestexcept:
|
||||
and r0, r0, #0x1f
|
||||
fmrx r3, fpscr
|
||||
and r0, r0, r3
|
||||
bx lr
|
||||
|
||||
.global feclearexcept
|
||||
.type feclearexcept,%function
|
||||
feclearexcept:
|
||||
and r0, r0, #0x1f
|
||||
fmrx r3, fpscr
|
||||
bic r3, r3, r0
|
||||
fmxr fpscr, r3
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
.global feraiseexcept
|
||||
.type feraiseexcept,%function
|
||||
feraiseexcept:
|
||||
and r0, r0, #0x1f
|
||||
fmrx r3, fpscr
|
||||
orr r3, r3, r0
|
||||
fmxr fpscr, r3
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
.global fegetenv
|
||||
.type fegetenv,%function
|
||||
fegetenv:
|
||||
fmrx r3, fpscr
|
||||
str r3, [r0]
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv,%function
|
||||
fesetenv:
|
||||
cmn r0, #1
|
||||
moveq r3, #0
|
||||
ldrne r3, [r0]
|
||||
fmxr fpscr, r3
|
||||
mov r0, #0
|
||||
bx lr
|
||||
|
||||
#endif
|
||||
@@ -1,72 +0,0 @@
|
||||
#ifndef __mips_soft_float
|
||||
|
||||
.set noreorder
|
||||
|
||||
.global feclearexcept
|
||||
.type feclearexcept,@function
|
||||
feclearexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $5, $31
|
||||
or $5, $5, $4
|
||||
xor $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global feraiseexcept
|
||||
.type feraiseexcept,@function
|
||||
feraiseexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $5, $31
|
||||
or $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept,@function
|
||||
fetestexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $2, $31
|
||||
jr $ra
|
||||
and $2, $2, $4
|
||||
|
||||
.global fegetround
|
||||
.type fegetround,@function
|
||||
fegetround:
|
||||
cfc1 $2, $31
|
||||
jr $ra
|
||||
andi $2, $2, 3
|
||||
|
||||
.global __fesetround
|
||||
.hidden __fesetround
|
||||
.type __fesetround,@function
|
||||
__fesetround:
|
||||
cfc1 $5, $31
|
||||
li $6, -4
|
||||
and $5, $5, $6
|
||||
or $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fegetenv
|
||||
.type fegetenv,@function
|
||||
fegetenv:
|
||||
cfc1 $5, $31
|
||||
sw $5, 0($4)
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv,@function
|
||||
fesetenv:
|
||||
addiu $5, $4, 1
|
||||
beq $5, $0, 1f
|
||||
nop
|
||||
lw $5, 0($4)
|
||||
1: ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
#endif
|
||||
@@ -1,72 +0,0 @@
|
||||
#ifndef __mips_soft_float
|
||||
|
||||
.set noreorder
|
||||
|
||||
.global feclearexcept
|
||||
.type feclearexcept,@function
|
||||
feclearexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $5, $31
|
||||
or $5, $5, $4
|
||||
xor $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global feraiseexcept
|
||||
.type feraiseexcept,@function
|
||||
feraiseexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $5, $31
|
||||
or $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept,@function
|
||||
fetestexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $2, $31
|
||||
jr $ra
|
||||
and $2, $2, $4
|
||||
|
||||
.global fegetround
|
||||
.type fegetround,@function
|
||||
fegetround:
|
||||
cfc1 $2, $31
|
||||
jr $ra
|
||||
andi $2, $2, 3
|
||||
|
||||
.global __fesetround
|
||||
.hidden __fesetround
|
||||
.type __fesetround,@function
|
||||
__fesetround:
|
||||
cfc1 $5, $31
|
||||
li $6, -4
|
||||
and $5, $5, $6
|
||||
or $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fegetenv
|
||||
.type fegetenv,@function
|
||||
fegetenv:
|
||||
cfc1 $5, $31
|
||||
sw $5, 0($4)
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv,@function
|
||||
fesetenv:
|
||||
daddiu $5, $4, 1
|
||||
beq $5, $0, 1f
|
||||
nop
|
||||
lw $5, 0($4)
|
||||
1: ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
#endif
|
||||
@@ -1,71 +0,0 @@
|
||||
#ifndef __mips_soft_float
|
||||
|
||||
.set noreorder
|
||||
.global feclearexcept
|
||||
.type feclearexcept,@function
|
||||
feclearexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $5, $31
|
||||
or $5, $5, $4
|
||||
xor $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global feraiseexcept
|
||||
.type feraiseexcept,@function
|
||||
feraiseexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $5, $31
|
||||
or $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept,@function
|
||||
fetestexcept:
|
||||
and $4, $4, 0x7c
|
||||
cfc1 $2, $31
|
||||
jr $ra
|
||||
and $2, $2, $4
|
||||
|
||||
.global fegetround
|
||||
.type fegetround,@function
|
||||
fegetround:
|
||||
cfc1 $2, $31
|
||||
jr $ra
|
||||
andi $2, $2, 3
|
||||
|
||||
.global __fesetround
|
||||
.hidden __fesetround
|
||||
.type __fesetround,@function
|
||||
__fesetround:
|
||||
cfc1 $5, $31
|
||||
li $6, -4
|
||||
and $5, $5, $6
|
||||
or $5, $5, $4
|
||||
ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fegetenv
|
||||
.type fegetenv,@function
|
||||
fegetenv:
|
||||
cfc1 $5, $31
|
||||
sw $5, 0($4)
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv,@function
|
||||
fesetenv:
|
||||
addiu $5, $4, 1
|
||||
beq $5, $0, 1f
|
||||
nop
|
||||
lw $5, 0($4)
|
||||
1: ctc1 $5, $31
|
||||
jr $ra
|
||||
li $2, 0
|
||||
|
||||
#endif
|
||||
@@ -1,130 +0,0 @@
|
||||
#if !defined(_SOFT_FLOAT) && !defined(__NO_FPRS__)
|
||||
.global feclearexcept
|
||||
.type feclearexcept,@function
|
||||
feclearexcept:
|
||||
andis. 3,3,0x3e00
|
||||
/* if (r3 & FE_INVALID) r3 |= all_invalid_flags */
|
||||
andis. 0,3,0x2000
|
||||
stwu 1,-16(1)
|
||||
beq- 0,1f
|
||||
oris 3,3,0x01f8
|
||||
ori 3,3,0x0700
|
||||
1:
|
||||
/*
|
||||
* note: fpscr contains various fpu status and control
|
||||
* flags and we dont check if r3 may alter other flags
|
||||
* than the exception related ones
|
||||
* ufpscr &= ~r3
|
||||
*/
|
||||
mffs 0
|
||||
stfd 0,8(1)
|
||||
lwz 9,12(1)
|
||||
andc 9,9,3
|
||||
stw 9,12(1)
|
||||
lfd 0,8(1)
|
||||
mtfsf 255,0
|
||||
|
||||
/* return 0 */
|
||||
li 3,0
|
||||
addi 1,1,16
|
||||
blr
|
||||
|
||||
.global feraiseexcept
|
||||
.type feraiseexcept,@function
|
||||
feraiseexcept:
|
||||
andis. 3,3,0x3e00
|
||||
/* if (r3 & FE_INVALID) r3 |= software_invalid_flag */
|
||||
andis. 0,3,0x2000
|
||||
stwu 1,-16(1)
|
||||
beq- 0,1f
|
||||
ori 3,3,0x0400
|
||||
1:
|
||||
/* fpscr |= r3 */
|
||||
mffs 0
|
||||
stfd 0,8(1)
|
||||
lwz 9,12(1)
|
||||
or 9,9,3
|
||||
stw 9,12(1)
|
||||
lfd 0,8(1)
|
||||
mtfsf 255,0
|
||||
|
||||
/* return 0 */
|
||||
li 3,0
|
||||
addi 1,1,16
|
||||
blr
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept,@function
|
||||
fetestexcept:
|
||||
andis. 3,3,0x3e00
|
||||
/* return r3 & fpscr */
|
||||
stwu 1,-16(1)
|
||||
mffs 0
|
||||
stfd 0,8(1)
|
||||
lwz 9,12(1)
|
||||
addi 1,1,16
|
||||
and 3,3,9
|
||||
blr
|
||||
|
||||
.global fegetround
|
||||
.type fegetround,@function
|
||||
fegetround:
|
||||
/* return fpscr & 3 */
|
||||
stwu 1,-16(1)
|
||||
mffs 0
|
||||
stfd 0,8(1)
|
||||
lwz 3,12(1)
|
||||
addi 1,1,16
|
||||
clrlwi 3,3,30
|
||||
blr
|
||||
|
||||
.global __fesetround
|
||||
.hidden __fesetround
|
||||
.type __fesetround,@function
|
||||
__fesetround:
|
||||
/*
|
||||
* note: invalid input is not checked, r3 < 4 must hold
|
||||
* fpscr = (fpscr & -4U) | r3
|
||||
*/
|
||||
stwu 1,-16(1)
|
||||
mffs 0
|
||||
stfd 0,8(1)
|
||||
lwz 9,12(1)
|
||||
clrrwi 9,9,2
|
||||
or 9,9,3
|
||||
stw 9,12(1)
|
||||
lfd 0,8(1)
|
||||
mtfsf 255,0
|
||||
|
||||
/* return 0 */
|
||||
li 3,0
|
||||
addi 1,1,16
|
||||
blr
|
||||
|
||||
.global fegetenv
|
||||
.type fegetenv,@function
|
||||
fegetenv:
|
||||
/* *r3 = fpscr */
|
||||
mffs 0
|
||||
stfd 0,0(3)
|
||||
/* return 0 */
|
||||
li 3,0
|
||||
blr
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv,@function
|
||||
fesetenv:
|
||||
cmpwi 3, -1
|
||||
bne 1f
|
||||
mflr 4
|
||||
bl 2f
|
||||
.zero 8
|
||||
2: mflr 3
|
||||
mtlr 4
|
||||
1: /* fpscr = *r3 */
|
||||
lfd 0,0(3)
|
||||
mtfsf 255,0
|
||||
/* return 0 */
|
||||
li 3,0
|
||||
blr
|
||||
#endif
|
||||
@@ -1,56 +0,0 @@
|
||||
#ifdef __riscv_flen
|
||||
|
||||
.global feclearexcept
|
||||
.type feclearexcept, %function
|
||||
feclearexcept:
|
||||
csrc fflags, a0
|
||||
li a0, 0
|
||||
ret
|
||||
|
||||
.global feraiseexcept
|
||||
.type feraiseexcept, %function
|
||||
feraiseexcept:
|
||||
csrs fflags, a0
|
||||
li a0, 0
|
||||
ret
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept, %function
|
||||
fetestexcept:
|
||||
frflags t0
|
||||
and a0, t0, a0
|
||||
ret
|
||||
|
||||
.global fegetround
|
||||
.type fegetround, %function
|
||||
fegetround:
|
||||
frrm a0
|
||||
ret
|
||||
|
||||
.global __fesetround
|
||||
.type __fesetround, %function
|
||||
__fesetround:
|
||||
fsrm t0, a0
|
||||
li a0, 0
|
||||
ret
|
||||
|
||||
.global fegetenv
|
||||
.type fegetenv, %function
|
||||
fegetenv:
|
||||
frcsr t0
|
||||
sw t0, 0(a0)
|
||||
li a0, 0
|
||||
ret
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv, %function
|
||||
fesetenv:
|
||||
li t2, -1
|
||||
li t1, 0
|
||||
beq a0, t2, 1f
|
||||
lw t1, 0(a0)
|
||||
1: fscsr t1
|
||||
li a0, 0
|
||||
ret
|
||||
|
||||
#endif
|
||||
@@ -1,81 +0,0 @@
|
||||
#if __SH_FPU_ANY__ || __SH4__
|
||||
|
||||
.global fegetround
|
||||
.type fegetround, @function
|
||||
fegetround:
|
||||
sts fpscr, r0
|
||||
rts
|
||||
and #3, r0
|
||||
|
||||
.global __fesetround
|
||||
.hidden __fesetround
|
||||
.type __fesetround, @function
|
||||
__fesetround:
|
||||
sts fpscr, r0
|
||||
mov #-4, r1
|
||||
and r1, r0
|
||||
or r4, r0
|
||||
lds r0, fpscr
|
||||
rts
|
||||
mov #0, r0
|
||||
|
||||
.global fetestexcept
|
||||
.type fetestexcept, @function
|
||||
fetestexcept:
|
||||
sts fpscr, r0
|
||||
and r4, r0
|
||||
rts
|
||||
and #0x7c, r0
|
||||
|
||||
.global feclearexcept
|
||||
.type feclearexcept, @function
|
||||
feclearexcept:
|
||||
mov r4, r0
|
||||
and #0x7c, r0
|
||||
not r0, r4
|
||||
sts fpscr, r0
|
||||
and r4, r0
|
||||
lds r0, fpscr
|
||||
rts
|
||||
mov #0, r0
|
||||
|
||||
.global feraiseexcept
|
||||
.type feraiseexcept, @function
|
||||
feraiseexcept:
|
||||
mov r4, r0
|
||||
and #0x7c, r0
|
||||
sts fpscr, r4
|
||||
or r4, r0
|
||||
lds r0, fpscr
|
||||
rts
|
||||
mov #0, r0
|
||||
|
||||
.global fegetenv
|
||||
.type fegetenv, @function
|
||||
fegetenv:
|
||||
sts fpscr, r0
|
||||
mov.l r0, @r4
|
||||
rts
|
||||
mov #0, r0
|
||||
|
||||
.global fesetenv
|
||||
.type fesetenv, @function
|
||||
fesetenv:
|
||||
mov r4, r0
|
||||
cmp/eq #-1, r0
|
||||
bf 1f
|
||||
|
||||
! the default environment is complicated by the fact that we need to
|
||||
! preserve the current precision bit, which we do not know a priori
|
||||
sts fpscr, r0
|
||||
mov #8, r1
|
||||
swap.w r1, r1
|
||||
bra 2f
|
||||
and r1, r0
|
||||
|
||||
1: mov.l @r4, r0 ! non-default environment
|
||||
2: lds r0, fpscr
|
||||
rts
|
||||
mov #0, r0
|
||||
|
||||
#endif
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,55 +0,0 @@
|
||||
.syntax unified
|
||||
|
||||
.text
|
||||
.global __tlsdesc_static
|
||||
.hidden __tlsdesc_static
|
||||
.type __tlsdesc_static,%function
|
||||
__tlsdesc_static:
|
||||
ldr r0,[r0]
|
||||
bx lr
|
||||
|
||||
.global __tlsdesc_dynamic
|
||||
.hidden __tlsdesc_dynamic
|
||||
.type __tlsdesc_dynamic,%function
|
||||
__tlsdesc_dynamic:
|
||||
push {r2,r3,ip,lr}
|
||||
ldr r1,[r0]
|
||||
ldr r2,[r1,#4] // r2 = offset
|
||||
ldr r1,[r1] // r1 = modid
|
||||
|
||||
#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|
||||
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
|
||||
mrc p15,0,r0,c13,c0,3
|
||||
#else
|
||||
ldr r0,1f
|
||||
add r0,r0,pc
|
||||
ldr r0,[r0]
|
||||
2:
|
||||
#if __ARM_ARCH >= 5
|
||||
blx r0 // r0 = tp
|
||||
#else
|
||||
#if __thumb__
|
||||
add lr,pc,#1
|
||||
#else
|
||||
mov lr,pc
|
||||
#endif
|
||||
bx r0
|
||||
#endif
|
||||
#endif
|
||||
ldr r3,[r0,#-4] // r3 = dtv
|
||||
ldr ip,[r3,r1,LSL #2]
|
||||
sub r0,ip,r0
|
||||
add r0,r0,r2 // r0 = r3[r1]-r0+r2
|
||||
#if __ARM_ARCH >= 5
|
||||
pop {r2,r3,ip,pc}
|
||||
#else
|
||||
pop {r2,r3,ip,lr}
|
||||
bx lr
|
||||
#endif
|
||||
|
||||
#if ((__ARM_ARCH_6K__ || __ARM_ARCH_6KZ__ || __ARM_ARCH_6ZK__) && !__thumb__) \
|
||||
|| __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
|
||||
#else
|
||||
.align 2
|
||||
1: .word __a_gettp_ptr - 2b
|
||||
#endif
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,3 +0,0 @@
|
||||
#define __dlsym __dlsym_redir_time64
|
||||
#define dlsym __dlsym_time64
|
||||
#include "dlsym.s"
|
||||
@@ -1,50 +0,0 @@
|
||||
.syntax unified
|
||||
.global _longjmp
|
||||
.global longjmp
|
||||
.type _longjmp,%function
|
||||
.type longjmp,%function
|
||||
_longjmp:
|
||||
longjmp:
|
||||
mov ip,r0
|
||||
movs r0,r1
|
||||
moveq r0,#1
|
||||
ldmia ip!, {v1,v2,v3,v4,v5,v6,sl,fp}
|
||||
ldmia ip!, {r2,lr}
|
||||
mov sp,r2
|
||||
|
||||
adr r1,1f
|
||||
ldr r2,1f
|
||||
ldr r1,[r1,r2]
|
||||
|
||||
#if __ARM_ARCH < 8
|
||||
tst r1,#0x260
|
||||
beq 3f
|
||||
// HWCAP_ARM_FPA
|
||||
tst r1,#0x20
|
||||
beq 2f
|
||||
ldc p2, cr4, [ip], #48
|
||||
#endif
|
||||
2: tst r1,#0x40
|
||||
beq 2f
|
||||
.fpu vfp
|
||||
vldmia ip!, {d8-d15}
|
||||
.fpu softvfp
|
||||
.eabi_attribute 10, 0
|
||||
.eabi_attribute 27, 0
|
||||
#if __ARM_ARCH < 8
|
||||
// HWCAP_ARM_IWMMXT
|
||||
2: tst r1,#0x200
|
||||
beq 3f
|
||||
ldcl p1, cr10, [ip], #8
|
||||
ldcl p1, cr11, [ip], #8
|
||||
ldcl p1, cr12, [ip], #8
|
||||
ldcl p1, cr13, [ip], #8
|
||||
ldcl p1, cr14, [ip], #8
|
||||
ldcl p1, cr15, [ip], #8
|
||||
#endif
|
||||
2:
|
||||
3: bx lr
|
||||
|
||||
.hidden __hwcap
|
||||
.align 2
|
||||
1: .word __hwcap-1b
|
||||
@@ -1,52 +0,0 @@
|
||||
.syntax unified
|
||||
.global __setjmp
|
||||
.global _setjmp
|
||||
.global setjmp
|
||||
.type __setjmp,%function
|
||||
.type _setjmp,%function
|
||||
.type setjmp,%function
|
||||
__setjmp:
|
||||
_setjmp:
|
||||
setjmp:
|
||||
mov ip,r0
|
||||
stmia ip!,{v1,v2,v3,v4,v5,v6,sl,fp}
|
||||
mov r2,sp
|
||||
stmia ip!,{r2,lr}
|
||||
mov r0,#0
|
||||
|
||||
adr r1,1f
|
||||
ldr r2,1f
|
||||
ldr r1,[r1,r2]
|
||||
|
||||
#if __ARM_ARCH < 8
|
||||
tst r1,#0x260
|
||||
beq 3f
|
||||
// HWCAP_ARM_FPA
|
||||
tst r1,#0x20
|
||||
beq 2f
|
||||
stc p2, cr4, [ip], #48
|
||||
#endif
|
||||
2: tst r1,#0x40
|
||||
beq 2f
|
||||
.fpu vfp
|
||||
vstmia ip!, {d8-d15}
|
||||
.fpu softvfp
|
||||
.eabi_attribute 10, 0
|
||||
.eabi_attribute 27, 0
|
||||
#if __ARM_ARCH < 8
|
||||
// HWCAP_ARM_IWMMXT
|
||||
2: tst r1,#0x200
|
||||
beq 3f
|
||||
stcl p1, cr10, [ip], #8
|
||||
stcl p1, cr11, [ip], #8
|
||||
stcl p1, cr12, [ip], #8
|
||||
stcl p1, cr13, [ip], #8
|
||||
stcl p1, cr14, [ip], #8
|
||||
stcl p1, cr15, [ip], #8
|
||||
#endif
|
||||
2:
|
||||
3: bx lr
|
||||
|
||||
.hidden __hwcap
|
||||
.align 2
|
||||
1: .word __hwcap-1b
|
||||
@@ -1,34 +0,0 @@
|
||||
.set noreorder
|
||||
|
||||
.global _longjmp
|
||||
.global longjmp
|
||||
.type _longjmp,@function
|
||||
.type longjmp,@function
|
||||
_longjmp:
|
||||
longjmp:
|
||||
move $2, $5
|
||||
bne $2, $0, 1f
|
||||
nop
|
||||
addu $2, $2, 1
|
||||
1:
|
||||
#ifndef __mips_soft_float
|
||||
l.d $f20, 56($4)
|
||||
l.d $f22, 64($4)
|
||||
l.d $f24, 72($4)
|
||||
l.d $f26, 80($4)
|
||||
l.d $f28, 88($4)
|
||||
l.d $f30, 96($4)
|
||||
#endif
|
||||
lw $ra, 0($4)
|
||||
lw $sp, 4($4)
|
||||
lw $16, 8($4)
|
||||
lw $17, 12($4)
|
||||
lw $18, 16($4)
|
||||
lw $19, 20($4)
|
||||
lw $20, 24($4)
|
||||
lw $21, 28($4)
|
||||
lw $22, 32($4)
|
||||
lw $23, 36($4)
|
||||
lw $30, 40($4)
|
||||
jr $ra
|
||||
lw $28, 44($4)
|
||||
@@ -1,33 +0,0 @@
|
||||
.set noreorder
|
||||
|
||||
.global __setjmp
|
||||
.global _setjmp
|
||||
.global setjmp
|
||||
.type __setjmp,@function
|
||||
.type _setjmp,@function
|
||||
.type setjmp,@function
|
||||
__setjmp:
|
||||
_setjmp:
|
||||
setjmp:
|
||||
sw $ra, 0($4)
|
||||
sw $sp, 4($4)
|
||||
sw $16, 8($4)
|
||||
sw $17, 12($4)
|
||||
sw $18, 16($4)
|
||||
sw $19, 20($4)
|
||||
sw $20, 24($4)
|
||||
sw $21, 28($4)
|
||||
sw $22, 32($4)
|
||||
sw $23, 36($4)
|
||||
sw $30, 40($4)
|
||||
sw $28, 44($4)
|
||||
#ifndef __mips_soft_float
|
||||
s.d $f20, 56($4)
|
||||
s.d $f22, 64($4)
|
||||
s.d $f24, 72($4)
|
||||
s.d $f26, 80($4)
|
||||
s.d $f28, 88($4)
|
||||
s.d $f30, 96($4)
|
||||
#endif
|
||||
jr $ra
|
||||
li $2, 0
|
||||
@@ -1,37 +0,0 @@
|
||||
.set noreorder
|
||||
.global _longjmp
|
||||
.global longjmp
|
||||
.type _longjmp,@function
|
||||
.type longjmp,@function
|
||||
_longjmp:
|
||||
longjmp:
|
||||
move $2, $5
|
||||
|
||||
bne $2, $0, 1f
|
||||
nop
|
||||
daddu $2, $2, 1
|
||||
1:
|
||||
#ifndef __mips_soft_float
|
||||
ldc1 $24, 96($4)
|
||||
ldc1 $25, 104($4)
|
||||
ldc1 $26, 112($4)
|
||||
ldc1 $27, 120($4)
|
||||
ldc1 $28, 128($4)
|
||||
ldc1 $29, 136($4)
|
||||
ldc1 $30, 144($4)
|
||||
ldc1 $31, 152($4)
|
||||
#endif
|
||||
ld $ra, 0($4)
|
||||
ld $sp, 8($4)
|
||||
ld $gp, 16($4)
|
||||
ld $16, 24($4)
|
||||
ld $17, 32($4)
|
||||
ld $18, 40($4)
|
||||
ld $19, 48($4)
|
||||
ld $20, 56($4)
|
||||
ld $21, 64($4)
|
||||
ld $22, 72($4)
|
||||
ld $23, 80($4)
|
||||
ld $30, 88($4)
|
||||
jr $ra
|
||||
nop
|
||||
@@ -1,34 +0,0 @@
|
||||
.set noreorder
|
||||
.global __setjmp
|
||||
.global _setjmp
|
||||
.global setjmp
|
||||
.type __setjmp,@function
|
||||
.type _setjmp,@function
|
||||
.type setjmp,@function
|
||||
__setjmp:
|
||||
_setjmp:
|
||||
setjmp:
|
||||
sd $ra, 0($4)
|
||||
sd $sp, 8($4)
|
||||
sd $gp, 16($4)
|
||||
sd $16, 24($4)
|
||||
sd $17, 32($4)
|
||||
sd $18, 40($4)
|
||||
sd $19, 48($4)
|
||||
sd $20, 56($4)
|
||||
sd $21, 64($4)
|
||||
sd $22, 72($4)
|
||||
sd $23, 80($4)
|
||||
sd $30, 88($4)
|
||||
#ifndef __mips_soft_float
|
||||
sdc1 $24, 96($4)
|
||||
sdc1 $25, 104($4)
|
||||
sdc1 $26, 112($4)
|
||||
sdc1 $27, 120($4)
|
||||
sdc1 $28, 128($4)
|
||||
sdc1 $29, 136($4)
|
||||
sdc1 $30, 144($4)
|
||||
sdc1 $31, 152($4)
|
||||
#endif
|
||||
jr $ra
|
||||
li $2, 0
|
||||
@@ -1,36 +0,0 @@
|
||||
.set noreorder
|
||||
.global _longjmp
|
||||
.global longjmp
|
||||
.type _longjmp,@function
|
||||
.type longjmp,@function
|
||||
_longjmp:
|
||||
longjmp:
|
||||
move $2, $5
|
||||
bne $2, $0, 1f
|
||||
nop
|
||||
addu $2, $2, 1
|
||||
1:
|
||||
#ifndef __mips_soft_float
|
||||
ldc1 $24, 96($4)
|
||||
ldc1 $25, 104($4)
|
||||
ldc1 $26, 112($4)
|
||||
ldc1 $27, 120($4)
|
||||
ldc1 $28, 128($4)
|
||||
ldc1 $29, 136($4)
|
||||
ldc1 $30, 144($4)
|
||||
ldc1 $31, 152($4)
|
||||
#endif
|
||||
ld $ra, 0($4)
|
||||
ld $sp, 8($4)
|
||||
ld $gp, 16($4)
|
||||
ld $16, 24($4)
|
||||
ld $17, 32($4)
|
||||
ld $18, 40($4)
|
||||
ld $19, 48($4)
|
||||
ld $20, 56($4)
|
||||
ld $21, 64($4)
|
||||
ld $22, 72($4)
|
||||
ld $23, 80($4)
|
||||
ld $30, 88($4)
|
||||
jr $ra
|
||||
nop
|
||||
@@ -1,34 +0,0 @@
|
||||
.set noreorder
|
||||
.global __setjmp
|
||||
.global _setjmp
|
||||
.global setjmp
|
||||
.type __setjmp,@function
|
||||
.type _setjmp,@function
|
||||
.type setjmp,@function
|
||||
__setjmp:
|
||||
_setjmp:
|
||||
setjmp:
|
||||
sd $ra, 0($4)
|
||||
sd $sp, 8($4)
|
||||
sd $gp, 16($4)
|
||||
sd $16, 24($4)
|
||||
sd $17, 32($4)
|
||||
sd $18, 40($4)
|
||||
sd $19, 48($4)
|
||||
sd $20, 56($4)
|
||||
sd $21, 64($4)
|
||||
sd $22, 72($4)
|
||||
sd $23, 80($4)
|
||||
sd $30, 88($4)
|
||||
#ifndef __mips_soft_float
|
||||
sdc1 $24, 96($4)
|
||||
sdc1 $25, 104($4)
|
||||
sdc1 $26, 112($4)
|
||||
sdc1 $27, 120($4)
|
||||
sdc1 $28, 128($4)
|
||||
sdc1 $29, 136($4)
|
||||
sdc1 $30, 144($4)
|
||||
sdc1 $31, 152($4)
|
||||
#endif
|
||||
jr $ra
|
||||
li $2, 0
|
||||
@@ -1,99 +0,0 @@
|
||||
.global _longjmp
|
||||
.global longjmp
|
||||
.type _longjmp,@function
|
||||
.type longjmp,@function
|
||||
_longjmp:
|
||||
longjmp:
|
||||
/*
|
||||
* void longjmp(jmp_buf env, int val);
|
||||
* put val into return register and restore the env saved in setjmp
|
||||
* if val(r4) is 0, put 1 there.
|
||||
*/
|
||||
/* 0) move old return address into r0 */
|
||||
lwz 0, 0(3)
|
||||
/* 1) put it into link reg */
|
||||
mtlr 0
|
||||
/* 2 ) restore stack ptr */
|
||||
lwz 1, 4(3)
|
||||
/* 3) restore control reg */
|
||||
lwz 0, 8(3)
|
||||
mtcr 0
|
||||
/* 4) restore r14-r31 */
|
||||
lwz 14, 12(3)
|
||||
lwz 15, 16(3)
|
||||
lwz 16, 20(3)
|
||||
lwz 17, 24(3)
|
||||
lwz 18, 28(3)
|
||||
lwz 19, 32(3)
|
||||
lwz 20, 36(3)
|
||||
lwz 21, 40(3)
|
||||
lwz 22, 44(3)
|
||||
lwz 23, 48(3)
|
||||
lwz 24, 52(3)
|
||||
lwz 25, 56(3)
|
||||
lwz 26, 60(3)
|
||||
lwz 27, 64(3)
|
||||
lwz 28, 68(3)
|
||||
lwz 29, 72(3)
|
||||
lwz 30, 76(3)
|
||||
lwz 31, 80(3)
|
||||
#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
|
||||
mflr 0
|
||||
bl 1f
|
||||
.hidden __hwcap
|
||||
.long __hwcap-.
|
||||
1: mflr 4
|
||||
lwz 5, 0(4)
|
||||
lwzx 4, 4, 5
|
||||
andis. 4, 4, 0x80
|
||||
beq 1f
|
||||
.long 0x11c35b01 /* evldd 14,88(3) */
|
||||
.long 0x11e36301 /* ... */
|
||||
.long 0x12036b01
|
||||
.long 0x12237301
|
||||
.long 0x12437b01
|
||||
.long 0x12638301
|
||||
.long 0x12838b01
|
||||
.long 0x12a39301
|
||||
.long 0x12c39b01
|
||||
.long 0x12e3a301
|
||||
.long 0x1303ab01
|
||||
.long 0x1323b301
|
||||
.long 0x1343bb01
|
||||
.long 0x1363c301
|
||||
.long 0x1383cb01
|
||||
.long 0x13a3d301
|
||||
.long 0x13c3db01
|
||||
.long 0x13e3e301 /* evldd 31,224(3) */
|
||||
.long 0x11a3eb01 /* evldd 13,232(3) */
|
||||
1: mtlr 0
|
||||
#else
|
||||
lfd 14,88(3)
|
||||
lfd 15,96(3)
|
||||
lfd 16,104(3)
|
||||
lfd 17,112(3)
|
||||
lfd 18,120(3)
|
||||
lfd 19,128(3)
|
||||
lfd 20,136(3)
|
||||
lfd 21,144(3)
|
||||
lfd 22,152(3)
|
||||
lfd 23,160(3)
|
||||
lfd 24,168(3)
|
||||
lfd 25,176(3)
|
||||
lfd 26,184(3)
|
||||
lfd 27,192(3)
|
||||
lfd 28,200(3)
|
||||
lfd 29,208(3)
|
||||
lfd 30,216(3)
|
||||
lfd 31,224(3)
|
||||
#endif
|
||||
/* 5) put val into return reg r3 */
|
||||
mr 3, 4
|
||||
|
||||
/* 6) check if return value is 0, make it 1 in that case */
|
||||
cmpwi cr7, 4, 0
|
||||
bne cr7, 1f
|
||||
li 3, 1
|
||||
1:
|
||||
blr
|
||||
|
||||
@@ -1,93 +0,0 @@
|
||||
.global ___setjmp
|
||||
.hidden ___setjmp
|
||||
.global __setjmp
|
||||
.global _setjmp
|
||||
.global setjmp
|
||||
.type __setjmp,@function
|
||||
.type _setjmp,@function
|
||||
.type setjmp,@function
|
||||
___setjmp:
|
||||
__setjmp:
|
||||
_setjmp:
|
||||
setjmp:
|
||||
/* 0) store IP int 0, then into the jmpbuf pointed to by r3 (first arg) */
|
||||
mflr 0
|
||||
stw 0, 0(3)
|
||||
/* 1) store reg1 (SP) */
|
||||
stw 1, 4(3)
|
||||
/* 2) store cr */
|
||||
mfcr 0
|
||||
stw 0, 8(3)
|
||||
/* 3) store r14-31 */
|
||||
stw 14, 12(3)
|
||||
stw 15, 16(3)
|
||||
stw 16, 20(3)
|
||||
stw 17, 24(3)
|
||||
stw 18, 28(3)
|
||||
stw 19, 32(3)
|
||||
stw 20, 36(3)
|
||||
stw 21, 40(3)
|
||||
stw 22, 44(3)
|
||||
stw 23, 48(3)
|
||||
stw 24, 52(3)
|
||||
stw 25, 56(3)
|
||||
stw 26, 60(3)
|
||||
stw 27, 64(3)
|
||||
stw 28, 68(3)
|
||||
stw 29, 72(3)
|
||||
stw 30, 76(3)
|
||||
stw 31, 80(3)
|
||||
#if defined(_SOFT_FLOAT) || defined(__NO_FPRS__)
|
||||
mflr 0
|
||||
bl 1f
|
||||
.hidden __hwcap
|
||||
.long __hwcap-.
|
||||
1: mflr 4
|
||||
lwz 5, 0(4)
|
||||
lwzx 4, 4, 5
|
||||
andis. 4, 4, 0x80
|
||||
beq 1f
|
||||
.long 0x11c35b21 /* evstdd 14,88(3) */
|
||||
.long 0x11e36321 /* ... */
|
||||
.long 0x12036b21
|
||||
.long 0x12237321
|
||||
.long 0x12437b21
|
||||
.long 0x12638321
|
||||
.long 0x12838b21
|
||||
.long 0x12a39321
|
||||
.long 0x12c39b21
|
||||
.long 0x12e3a321
|
||||
.long 0x1303ab21
|
||||
.long 0x1323b321
|
||||
.long 0x1343bb21
|
||||
.long 0x1363c321
|
||||
.long 0x1383cb21
|
||||
.long 0x13a3d321
|
||||
.long 0x13c3db21
|
||||
.long 0x13e3e321 /* evstdd 31,224(3) */
|
||||
.long 0x11a3eb21 /* evstdd 13,232(3) */
|
||||
1: mtlr 0
|
||||
#else
|
||||
stfd 14,88(3)
|
||||
stfd 15,96(3)
|
||||
stfd 16,104(3)
|
||||
stfd 17,112(3)
|
||||
stfd 18,120(3)
|
||||
stfd 19,128(3)
|
||||
stfd 20,136(3)
|
||||
stfd 21,144(3)
|
||||
stfd 22,152(3)
|
||||
stfd 23,160(3)
|
||||
stfd 24,168(3)
|
||||
stfd 25,176(3)
|
||||
stfd 26,184(3)
|
||||
stfd 27,192(3)
|
||||
stfd 28,200(3)
|
||||
stfd 29,208(3)
|
||||
stfd 30,216(3)
|
||||
stfd 31,224(3)
|
||||
#endif
|
||||
/* 4) set return value to 0 */
|
||||
li 3, 0
|
||||
/* 5) return */
|
||||
blr
|
||||
@@ -1,42 +0,0 @@
|
||||
.global __longjmp
|
||||
.global _longjmp
|
||||
.global longjmp
|
||||
.type __longjmp, %function
|
||||
.type _longjmp, %function
|
||||
.type longjmp, %function
|
||||
__longjmp:
|
||||
_longjmp:
|
||||
longjmp:
|
||||
ld s0, 0(a0)
|
||||
ld s1, 8(a0)
|
||||
ld s2, 16(a0)
|
||||
ld s3, 24(a0)
|
||||
ld s4, 32(a0)
|
||||
ld s5, 40(a0)
|
||||
ld s6, 48(a0)
|
||||
ld s7, 56(a0)
|
||||
ld s8, 64(a0)
|
||||
ld s9, 72(a0)
|
||||
ld s10, 80(a0)
|
||||
ld s11, 88(a0)
|
||||
ld sp, 96(a0)
|
||||
ld ra, 104(a0)
|
||||
|
||||
#ifndef __riscv_float_abi_soft
|
||||
fld fs0, 112(a0)
|
||||
fld fs1, 120(a0)
|
||||
fld fs2, 128(a0)
|
||||
fld fs3, 136(a0)
|
||||
fld fs4, 144(a0)
|
||||
fld fs5, 152(a0)
|
||||
fld fs6, 160(a0)
|
||||
fld fs7, 168(a0)
|
||||
fld fs8, 176(a0)
|
||||
fld fs9, 184(a0)
|
||||
fld fs10, 192(a0)
|
||||
fld fs11, 200(a0)
|
||||
#endif
|
||||
|
||||
seqz a0, a1
|
||||
add a0, a0, a1
|
||||
ret
|
||||
@@ -1,41 +0,0 @@
|
||||
.global __setjmp
|
||||
.global _setjmp
|
||||
.global setjmp
|
||||
.type __setjmp, %function
|
||||
.type _setjmp, %function
|
||||
.type setjmp, %function
|
||||
__setjmp:
|
||||
_setjmp:
|
||||
setjmp:
|
||||
sd s0, 0(a0)
|
||||
sd s1, 8(a0)
|
||||
sd s2, 16(a0)
|
||||
sd s3, 24(a0)
|
||||
sd s4, 32(a0)
|
||||
sd s5, 40(a0)
|
||||
sd s6, 48(a0)
|
||||
sd s7, 56(a0)
|
||||
sd s8, 64(a0)
|
||||
sd s9, 72(a0)
|
||||
sd s10, 80(a0)
|
||||
sd s11, 88(a0)
|
||||
sd sp, 96(a0)
|
||||
sd ra, 104(a0)
|
||||
|
||||
#ifndef __riscv_float_abi_soft
|
||||
fsd fs0, 112(a0)
|
||||
fsd fs1, 120(a0)
|
||||
fsd fs2, 128(a0)
|
||||
fsd fs3, 136(a0)
|
||||
fsd fs4, 144(a0)
|
||||
fsd fs5, 152(a0)
|
||||
fsd fs6, 160(a0)
|
||||
fsd fs7, 168(a0)
|
||||
fsd fs8, 176(a0)
|
||||
fsd fs9, 184(a0)
|
||||
fsd fs10, 192(a0)
|
||||
fsd fs11, 200(a0)
|
||||
#endif
|
||||
|
||||
li a0, 0
|
||||
ret
|
||||
@@ -1,28 +0,0 @@
|
||||
.global _longjmp
|
||||
.global longjmp
|
||||
.type _longjmp, @function
|
||||
.type longjmp, @function
|
||||
_longjmp:
|
||||
longjmp:
|
||||
mov.l @r4+, r8
|
||||
mov.l @r4+, r9
|
||||
mov.l @r4+, r10
|
||||
mov.l @r4+, r11
|
||||
mov.l @r4+, r12
|
||||
mov.l @r4+, r13
|
||||
mov.l @r4+, r14
|
||||
mov.l @r4+, r15
|
||||
lds.l @r4+, pr
|
||||
#if __SH_FPU_ANY__ || __SH4__
|
||||
fmov.s @r4+, fr12
|
||||
fmov.s @r4+, fr13
|
||||
fmov.s @r4+, fr14
|
||||
fmov.s @r4+, fr15
|
||||
#endif
|
||||
|
||||
tst r5, r5
|
||||
movt r0
|
||||
add r5, r0
|
||||
|
||||
rts
|
||||
nop
|
||||
@@ -1,32 +0,0 @@
|
||||
.global ___setjmp
|
||||
.hidden ___setjmp
|
||||
.global __setjmp
|
||||
.global _setjmp
|
||||
.global setjmp
|
||||
.type __setjmp, @function
|
||||
.type _setjmp, @function
|
||||
.type setjmp, @function
|
||||
___setjmp:
|
||||
__setjmp:
|
||||
_setjmp:
|
||||
setjmp:
|
||||
#if __SH_FPU_ANY__ || __SH4__
|
||||
add #52, r4
|
||||
fmov.s fr15, @-r4
|
||||
fmov.s fr14, @-r4
|
||||
fmov.s fr13, @-r4
|
||||
fmov.s fr12, @-r4
|
||||
#else
|
||||
add #36, r4
|
||||
#endif
|
||||
sts.l pr, @-r4
|
||||
mov.l r15, @-r4
|
||||
mov.l r14, @-r4
|
||||
mov.l r13, @-r4
|
||||
mov.l r12, @-r4
|
||||
mov.l r11, @-r4
|
||||
mov.l r10, @-r4
|
||||
mov.l r9, @-r4
|
||||
mov.l r8, @-r4
|
||||
rts
|
||||
mov #0, r0
|
||||
@@ -1,186 +0,0 @@
|
||||
/*
|
||||
* memcpy - copy memory area
|
||||
*
|
||||
* Copyright (c) 2012-2020, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, unaligned accesses.
|
||||
*
|
||||
*/
|
||||
|
||||
#define dstin x0
|
||||
#define src x1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define srcend x4
|
||||
#define dstend x5
|
||||
#define A_l x6
|
||||
#define A_lw w6
|
||||
#define A_h x7
|
||||
#define B_l x8
|
||||
#define B_lw w8
|
||||
#define B_h x9
|
||||
#define C_l x10
|
||||
#define C_lw w10
|
||||
#define C_h x11
|
||||
#define D_l x12
|
||||
#define D_h x13
|
||||
#define E_l x14
|
||||
#define E_h x15
|
||||
#define F_l x16
|
||||
#define F_h x17
|
||||
#define G_l count
|
||||
#define G_h dst
|
||||
#define H_l src
|
||||
#define H_h srcend
|
||||
#define tmp1 x14
|
||||
|
||||
/* This implementation of memcpy uses unaligned accesses and branchless
|
||||
sequences to keep the code small, simple and improve performance.
|
||||
|
||||
Copies are split into 3 main cases: small copies of up to 32 bytes, medium
|
||||
copies of up to 128 bytes, and large copies. The overhead of the overlap
|
||||
check is negligible since it is only required for large copies.
|
||||
|
||||
Large copies use a software pipelined loop processing 64 bytes per iteration.
|
||||
The destination pointer is 16-byte aligned to minimize unaligned accesses.
|
||||
The loop tail is handled by always copying 64 bytes from the end.
|
||||
*/
|
||||
|
||||
.global memcpy
|
||||
.type memcpy,%function
|
||||
memcpy:
|
||||
add srcend, src, count
|
||||
add dstend, dstin, count
|
||||
cmp count, 128
|
||||
b.hi .Lcopy_long
|
||||
cmp count, 32
|
||||
b.hi .Lcopy32_128
|
||||
|
||||
/* Small copies: 0..32 bytes. */
|
||||
cmp count, 16
|
||||
b.lo .Lcopy16
|
||||
ldp A_l, A_h, [src]
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
stp A_l, A_h, [dstin]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
/* Copy 8-15 bytes. */
|
||||
.Lcopy16:
|
||||
tbz count, 3, .Lcopy8
|
||||
ldr A_l, [src]
|
||||
ldr A_h, [srcend, -8]
|
||||
str A_l, [dstin]
|
||||
str A_h, [dstend, -8]
|
||||
ret
|
||||
|
||||
.p2align 3
|
||||
/* Copy 4-7 bytes. */
|
||||
.Lcopy8:
|
||||
tbz count, 2, .Lcopy4
|
||||
ldr A_lw, [src]
|
||||
ldr B_lw, [srcend, -4]
|
||||
str A_lw, [dstin]
|
||||
str B_lw, [dstend, -4]
|
||||
ret
|
||||
|
||||
/* Copy 0..3 bytes using a branchless sequence. */
|
||||
.Lcopy4:
|
||||
cbz count, .Lcopy0
|
||||
lsr tmp1, count, 1
|
||||
ldrb A_lw, [src]
|
||||
ldrb C_lw, [srcend, -1]
|
||||
ldrb B_lw, [src, tmp1]
|
||||
strb A_lw, [dstin]
|
||||
strb B_lw, [dstin, tmp1]
|
||||
strb C_lw, [dstend, -1]
|
||||
.Lcopy0:
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Medium copies: 33..128 bytes. */
|
||||
.Lcopy32_128:
|
||||
ldp A_l, A_h, [src]
|
||||
ldp B_l, B_h, [src, 16]
|
||||
ldp C_l, C_h, [srcend, -32]
|
||||
ldp D_l, D_h, [srcend, -16]
|
||||
cmp count, 64
|
||||
b.hi .Lcopy128
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy 65..128 bytes. */
|
||||
.Lcopy128:
|
||||
ldp E_l, E_h, [src, 32]
|
||||
ldp F_l, F_h, [src, 48]
|
||||
cmp count, 96
|
||||
b.ls .Lcopy96
|
||||
ldp G_l, G_h, [srcend, -64]
|
||||
ldp H_l, H_h, [srcend, -48]
|
||||
stp G_l, G_h, [dstend, -64]
|
||||
stp H_l, H_h, [dstend, -48]
|
||||
.Lcopy96:
|
||||
stp A_l, A_h, [dstin]
|
||||
stp B_l, B_h, [dstin, 16]
|
||||
stp E_l, E_h, [dstin, 32]
|
||||
stp F_l, F_h, [dstin, 48]
|
||||
stp C_l, C_h, [dstend, -32]
|
||||
stp D_l, D_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
/* Copy more than 128 bytes. */
|
||||
.Lcopy_long:
|
||||
|
||||
/* Copy 16 bytes and then align dst to 16-byte alignment. */
|
||||
|
||||
ldp D_l, D_h, [src]
|
||||
and tmp1, dstin, 15
|
||||
bic dst, dstin, 15
|
||||
sub src, src, tmp1
|
||||
add count, count, tmp1 /* Count is now 16 too large. */
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp D_l, D_h, [dstin]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 128 + 16 /* Test and readjust count. */
|
||||
b.ls .Lcopy64_from_end
|
||||
|
||||
.Lloop64:
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [src, 16]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [src, 32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [src, 48]
|
||||
stp D_l, D_h, [dst, 64]!
|
||||
ldp D_l, D_h, [src, 64]!
|
||||
subs count, count, 64
|
||||
b.hi .Lloop64
|
||||
|
||||
/* Write the last iteration and copy 64 bytes from the end. */
|
||||
.Lcopy64_from_end:
|
||||
ldp E_l, E_h, [srcend, -64]
|
||||
stp A_l, A_h, [dst, 16]
|
||||
ldp A_l, A_h, [srcend, -48]
|
||||
stp B_l, B_h, [dst, 32]
|
||||
ldp B_l, B_h, [srcend, -32]
|
||||
stp C_l, C_h, [dst, 48]
|
||||
ldp C_l, C_h, [srcend, -16]
|
||||
stp D_l, D_h, [dst, 64]
|
||||
stp E_l, E_h, [dstend, -64]
|
||||
stp A_l, A_h, [dstend, -48]
|
||||
stp B_l, B_h, [dstend, -32]
|
||||
stp C_l, C_h, [dstend, -16]
|
||||
ret
|
||||
|
||||
.size memcpy,.-memcpy
|
||||
@@ -1,115 +0,0 @@
|
||||
/*
|
||||
* memset - fill memory with a constant byte
|
||||
*
|
||||
* Copyright (c) 2012-2020, Arm Limited.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
/* Assumptions:
|
||||
*
|
||||
* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
|
||||
*
|
||||
*/
|
||||
|
||||
#define dstin x0
|
||||
#define val x1
|
||||
#define valw w1
|
||||
#define count x2
|
||||
#define dst x3
|
||||
#define dstend x4
|
||||
#define zva_val x5
|
||||
|
||||
.global memset
|
||||
.type memset,%function
|
||||
memset:
|
||||
|
||||
dup v0.16B, valw
|
||||
add dstend, dstin, count
|
||||
|
||||
cmp count, 96
|
||||
b.hi .Lset_long
|
||||
cmp count, 16
|
||||
b.hs .Lset_medium
|
||||
mov val, v0.D[0]
|
||||
|
||||
/* Set 0..15 bytes. */
|
||||
tbz count, 3, 1f
|
||||
str val, [dstin]
|
||||
str val, [dstend, -8]
|
||||
ret
|
||||
nop
|
||||
1: tbz count, 2, 2f
|
||||
str valw, [dstin]
|
||||
str valw, [dstend, -4]
|
||||
ret
|
||||
2: cbz count, 3f
|
||||
strb valw, [dstin]
|
||||
tbz count, 1, 3f
|
||||
strh valw, [dstend, -2]
|
||||
3: ret
|
||||
|
||||
/* Set 17..96 bytes. */
|
||||
.Lset_medium:
|
||||
str q0, [dstin]
|
||||
tbnz count, 6, .Lset96
|
||||
str q0, [dstend, -16]
|
||||
tbz count, 5, 1f
|
||||
str q0, [dstin, 16]
|
||||
str q0, [dstend, -32]
|
||||
1: ret
|
||||
|
||||
.p2align 4
|
||||
/* Set 64..96 bytes. Write 64 bytes from the start and
|
||||
32 bytes from the end. */
|
||||
.Lset96:
|
||||
str q0, [dstin, 16]
|
||||
stp q0, q0, [dstin, 32]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
.Lset_long:
|
||||
and valw, valw, 255
|
||||
bic dst, dstin, 15
|
||||
str q0, [dstin]
|
||||
cmp count, 160
|
||||
ccmp valw, 0, 0, hs
|
||||
b.ne .Lno_zva
|
||||
|
||||
#ifndef SKIP_ZVA_CHECK
|
||||
mrs zva_val, dczid_el0
|
||||
and zva_val, zva_val, 31
|
||||
cmp zva_val, 4 /* ZVA size is 64 bytes. */
|
||||
b.ne .Lno_zva
|
||||
#endif
|
||||
str q0, [dst, 16]
|
||||
stp q0, q0, [dst, 32]
|
||||
bic dst, dst, 63
|
||||
sub count, dstend, dst /* Count is now 64 too large. */
|
||||
sub count, count, 128 /* Adjust count and bias for loop. */
|
||||
|
||||
.p2align 4
|
||||
.Lzva_loop:
|
||||
add dst, dst, 64
|
||||
dc zva, dst
|
||||
subs count, count, 64
|
||||
b.hi .Lzva_loop
|
||||
stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.Lno_zva:
|
||||
sub count, dstend, dst /* Count is 16 too large. */
|
||||
sub dst, dst, 16 /* Dst is biased by -32. */
|
||||
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
|
||||
.Lno_zva_loop:
|
||||
stp q0, q0, [dst, 32]
|
||||
stp q0, q0, [dst, 64]!
|
||||
subs count, count, 64
|
||||
b.hi .Lno_zva_loop
|
||||
stp q0, q0, [dstend, -64]
|
||||
stp q0, q0, [dstend, -32]
|
||||
ret
|
||||
|
||||
.size memset,.-memset
|
||||
|
||||
@@ -1,479 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2008 The Android Open Source Project
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
||||
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
||||
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* Optimized memcpy() for ARM.
|
||||
*
|
||||
* note that memcpy() always returns the destination pointer,
|
||||
* so we have to preserve R0.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file has been modified from the original for use in musl libc.
|
||||
* The main changes are: addition of .type memcpy,%function to make the
|
||||
* code safely callable from thumb mode, adjusting the return
|
||||
* instructions to be compatible with pre-thumb ARM cpus, removal of
|
||||
* prefetch code that is not compatible with older cpus and support for
|
||||
* building as thumb 2 and big-endian.
|
||||
*/
|
||||
|
||||
.syntax unified
|
||||
|
||||
.global memcpy
|
||||
.type memcpy,%function
|
||||
memcpy:
|
||||
/* The stack must always be 64-bits aligned to be compliant with the
|
||||
* ARM ABI. Since we have to save R0, we might as well save R4
|
||||
* which we can use for better pipelining of the reads below
|
||||
*/
|
||||
.fnstart
|
||||
.save {r0, r4, lr}
|
||||
stmfd sp!, {r0, r4, lr}
|
||||
/* Making room for r5-r11 which will be spilled later */
|
||||
.pad #28
|
||||
sub sp, sp, #28
|
||||
|
||||
/* it simplifies things to take care of len<4 early */
|
||||
cmp r2, #4
|
||||
blo copy_last_3_and_return
|
||||
|
||||
/* compute the offset to align the source
|
||||
* offset = (4-(src&3))&3 = -src & 3
|
||||
*/
|
||||
rsb r3, r1, #0
|
||||
ands r3, r3, #3
|
||||
beq src_aligned
|
||||
|
||||
/* align source to 32 bits. We need to insert 2 instructions between
|
||||
* a ldr[b|h] and str[b|h] because byte and half-word instructions
|
||||
* stall 2 cycles.
|
||||
*/
|
||||
movs r12, r3, lsl #31
|
||||
sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
|
||||
ldrbmi r3, [r1], #1
|
||||
ldrbcs r4, [r1], #1
|
||||
ldrbcs r12,[r1], #1
|
||||
strbmi r3, [r0], #1
|
||||
strbcs r4, [r0], #1
|
||||
strbcs r12,[r0], #1
|
||||
|
||||
src_aligned:
|
||||
|
||||
/* see if src and dst are aligned together (congruent) */
|
||||
eor r12, r0, r1
|
||||
tst r12, #3
|
||||
bne non_congruent
|
||||
|
||||
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
|
||||
* frame. Don't update sp.
|
||||
*/
|
||||
stmea sp, {r5-r11}
|
||||
|
||||
/* align the destination to a cache-line */
|
||||
rsb r3, r0, #0
|
||||
ands r3, r3, #0x1C
|
||||
beq congruent_aligned32
|
||||
cmp r3, r2
|
||||
andhi r3, r2, #0x1C
|
||||
|
||||
/* conditionnaly copies 0 to 7 words (length in r3) */
|
||||
movs r12, r3, lsl #28
|
||||
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
|
||||
ldmmi r1!, {r8, r9} /* 8 bytes */
|
||||
stmcs r0!, {r4, r5, r6, r7}
|
||||
stmmi r0!, {r8, r9}
|
||||
tst r3, #0x4
|
||||
ldrne r10,[r1], #4 /* 4 bytes */
|
||||
strne r10,[r0], #4
|
||||
sub r2, r2, r3
|
||||
|
||||
congruent_aligned32:
|
||||
/*
|
||||
* here source is aligned to 32 bytes.
|
||||
*/
|
||||
|
||||
cached_aligned32:
|
||||
subs r2, r2, #32
|
||||
blo less_than_32_left
|
||||
|
||||
/*
|
||||
* We preload a cache-line up to 64 bytes ahead. On the 926, this will
|
||||
* stall only until the requested world is fetched, but the linefill
|
||||
* continues in the the background.
|
||||
* While the linefill is going, we write our previous cache-line
|
||||
* into the write-buffer (which should have some free space).
|
||||
* When the linefill is done, the writebuffer will
|
||||
* start dumping its content into memory
|
||||
*
|
||||
* While all this is going, we then load a full cache line into
|
||||
* 8 registers, this cache line should be in the cache by now
|
||||
* (or partly in the cache).
|
||||
*
|
||||
* This code should work well regardless of the source/dest alignment.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Align the preload register to a cache-line because the cpu does
|
||||
* "critical word first" (the first word requested is loaded first).
|
||||
*/
|
||||
@ bic r12, r1, #0x1F
|
||||
@ add r12, r12, #64
|
||||
|
||||
1: ldmia r1!, { r4-r11 }
|
||||
subs r2, r2, #32
|
||||
|
||||
/*
|
||||
* NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
|
||||
* for ARM9 preload will not be safely guarded by the preceding subs.
|
||||
* When it is safely guarded the only possibility to have SIGSEGV here
|
||||
* is because the caller overstates the length.
|
||||
*/
|
||||
@ ldrhi r3, [r12], #32 /* cheap ARM9 preload */
|
||||
stmia r0!, { r4-r11 }
|
||||
bhs 1b
|
||||
|
||||
add r2, r2, #32
|
||||
|
||||
less_than_32_left:
|
||||
/*
|
||||
* less than 32 bytes left at this point (length in r2)
|
||||
*/
|
||||
|
||||
/* skip all this if there is nothing to do, which should
|
||||
* be a common case (if not executed the code below takes
|
||||
* about 16 cycles)
|
||||
*/
|
||||
tst r2, #0x1F
|
||||
beq 1f
|
||||
|
||||
/* conditionnaly copies 0 to 31 bytes */
|
||||
movs r12, r2, lsl #28
|
||||
ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
|
||||
ldmmi r1!, {r8, r9} /* 8 bytes */
|
||||
stmcs r0!, {r4, r5, r6, r7}
|
||||
stmmi r0!, {r8, r9}
|
||||
movs r12, r2, lsl #30
|
||||
ldrcs r3, [r1], #4 /* 4 bytes */
|
||||
ldrhmi r4, [r1], #2 /* 2 bytes */
|
||||
strcs r3, [r0], #4
|
||||
strhmi r4, [r0], #2
|
||||
tst r2, #0x1
|
||||
ldrbne r3, [r1] /* last byte */
|
||||
strbne r3, [r0]
|
||||
|
||||
/* we're done! restore everything and return */
|
||||
1: ldmfd sp!, {r5-r11}
|
||||
ldmfd sp!, {r0, r4, lr}
|
||||
bx lr
|
||||
|
||||
/********************************************************************/
|
||||
|
||||
non_congruent:
|
||||
/*
|
||||
* here source is aligned to 4 bytes
|
||||
* but destination is not.
|
||||
*
|
||||
* in the code below r2 is the number of bytes read
|
||||
* (the number of bytes written is always smaller, because we have
|
||||
* partial words in the shift queue)
|
||||
*/
|
||||
cmp r2, #4
|
||||
blo copy_last_3_and_return
|
||||
|
||||
/* Use post-incriment mode for stm to spill r5-r11 to reserved stack
|
||||
* frame. Don't update sp.
|
||||
*/
|
||||
stmea sp, {r5-r11}
|
||||
|
||||
/* compute shifts needed to align src to dest */
|
||||
rsb r5, r0, #0
|
||||
and r5, r5, #3 /* r5 = # bytes in partial words */
|
||||
mov r12, r5, lsl #3 /* r12 = right */
|
||||
rsb lr, r12, #32 /* lr = left */
|
||||
|
||||
/* read the first word */
|
||||
ldr r3, [r1], #4
|
||||
sub r2, r2, #4
|
||||
|
||||
/* write a partial word (0 to 3 bytes), such that destination
|
||||
* becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
|
||||
*/
|
||||
movs r5, r5, lsl #31
|
||||
|
||||
#if __ARMEB__
|
||||
movmi r3, r3, ror #24
|
||||
strbmi r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
#else
|
||||
strbmi r3, [r0], #1
|
||||
movmi r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, lsr #8
|
||||
#endif
|
||||
|
||||
cmp r2, #4
|
||||
blo partial_word_tail
|
||||
|
||||
#if __ARMEB__
|
||||
mov r3, r3, lsr r12
|
||||
mov r3, r3, lsl r12
|
||||
#endif
|
||||
|
||||
/* Align destination to 32 bytes (cache line boundary) */
|
||||
1: tst r0, #0x1c
|
||||
beq 2f
|
||||
ldr r5, [r1], #4
|
||||
sub r2, r2, #4
|
||||
#if __ARMEB__
|
||||
mov r4, r5, lsr lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsl r12
|
||||
#else
|
||||
mov r4, r5, lsl lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsr r12
|
||||
#endif
|
||||
str r4, [r0], #4
|
||||
cmp r2, #4
|
||||
bhs 1b
|
||||
blo partial_word_tail
|
||||
|
||||
/* copy 32 bytes at a time */
|
||||
2: subs r2, r2, #32
|
||||
blo less_than_thirtytwo
|
||||
|
||||
/* Use immediate mode for the shifts, because there is an extra cycle
|
||||
* for register shifts, which could account for up to 50% of
|
||||
* performance hit.
|
||||
*/
|
||||
|
||||
cmp r12, #24
|
||||
beq loop24
|
||||
cmp r12, #8
|
||||
beq loop8
|
||||
|
||||
loop16:
|
||||
ldr r12, [r1], #4
|
||||
1: mov r4, r12
|
||||
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
|
||||
subs r2, r2, #32
|
||||
ldrhs r12, [r1], #4
|
||||
#if __ARMEB__
|
||||
orr r3, r3, r4, lsr #16
|
||||
mov r4, r4, lsl #16
|
||||
orr r4, r4, r5, lsr #16
|
||||
mov r5, r5, lsl #16
|
||||
orr r5, r5, r6, lsr #16
|
||||
mov r6, r6, lsl #16
|
||||
orr r6, r6, r7, lsr #16
|
||||
mov r7, r7, lsl #16
|
||||
orr r7, r7, r8, lsr #16
|
||||
mov r8, r8, lsl #16
|
||||
orr r8, r8, r9, lsr #16
|
||||
mov r9, r9, lsl #16
|
||||
orr r9, r9, r10, lsr #16
|
||||
mov r10, r10, lsl #16
|
||||
orr r10, r10, r11, lsr #16
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsl #16
|
||||
#else
|
||||
orr r3, r3, r4, lsl #16
|
||||
mov r4, r4, lsr #16
|
||||
orr r4, r4, r5, lsl #16
|
||||
mov r5, r5, lsr #16
|
||||
orr r5, r5, r6, lsl #16
|
||||
mov r6, r6, lsr #16
|
||||
orr r6, r6, r7, lsl #16
|
||||
mov r7, r7, lsr #16
|
||||
orr r7, r7, r8, lsl #16
|
||||
mov r8, r8, lsr #16
|
||||
orr r8, r8, r9, lsl #16
|
||||
mov r9, r9, lsr #16
|
||||
orr r9, r9, r10, lsl #16
|
||||
mov r10, r10, lsr #16
|
||||
orr r10, r10, r11, lsl #16
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsr #16
|
||||
#endif
|
||||
bhs 1b
|
||||
b less_than_thirtytwo
|
||||
|
||||
loop8:
|
||||
ldr r12, [r1], #4
|
||||
1: mov r4, r12
|
||||
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
|
||||
subs r2, r2, #32
|
||||
ldrhs r12, [r1], #4
|
||||
#if __ARMEB__
|
||||
orr r3, r3, r4, lsr #24
|
||||
mov r4, r4, lsl #8
|
||||
orr r4, r4, r5, lsr #24
|
||||
mov r5, r5, lsl #8
|
||||
orr r5, r5, r6, lsr #24
|
||||
mov r6, r6, lsl #8
|
||||
orr r6, r6, r7, lsr #24
|
||||
mov r7, r7, lsl #8
|
||||
orr r7, r7, r8, lsr #24
|
||||
mov r8, r8, lsl #8
|
||||
orr r8, r8, r9, lsr #24
|
||||
mov r9, r9, lsl #8
|
||||
orr r9, r9, r10, lsr #24
|
||||
mov r10, r10, lsl #8
|
||||
orr r10, r10, r11, lsr #24
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsl #8
|
||||
#else
|
||||
orr r3, r3, r4, lsl #24
|
||||
mov r4, r4, lsr #8
|
||||
orr r4, r4, r5, lsl #24
|
||||
mov r5, r5, lsr #8
|
||||
orr r5, r5, r6, lsl #24
|
||||
mov r6, r6, lsr #8
|
||||
orr r6, r6, r7, lsl #24
|
||||
mov r7, r7, lsr #8
|
||||
orr r7, r7, r8, lsl #24
|
||||
mov r8, r8, lsr #8
|
||||
orr r8, r8, r9, lsl #24
|
||||
mov r9, r9, lsr #8
|
||||
orr r9, r9, r10, lsl #24
|
||||
mov r10, r10, lsr #8
|
||||
orr r10, r10, r11, lsl #24
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsr #8
|
||||
#endif
|
||||
bhs 1b
|
||||
b less_than_thirtytwo
|
||||
|
||||
loop24:
|
||||
ldr r12, [r1], #4
|
||||
1: mov r4, r12
|
||||
ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
|
||||
subs r2, r2, #32
|
||||
ldrhs r12, [r1], #4
|
||||
#if __ARMEB__
|
||||
orr r3, r3, r4, lsr #8
|
||||
mov r4, r4, lsl #24
|
||||
orr r4, r4, r5, lsr #8
|
||||
mov r5, r5, lsl #24
|
||||
orr r5, r5, r6, lsr #8
|
||||
mov r6, r6, lsl #24
|
||||
orr r6, r6, r7, lsr #8
|
||||
mov r7, r7, lsl #24
|
||||
orr r7, r7, r8, lsr #8
|
||||
mov r8, r8, lsl #24
|
||||
orr r8, r8, r9, lsr #8
|
||||
mov r9, r9, lsl #24
|
||||
orr r9, r9, r10, lsr #8
|
||||
mov r10, r10, lsl #24
|
||||
orr r10, r10, r11, lsr #8
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsl #24
|
||||
#else
|
||||
orr r3, r3, r4, lsl #8
|
||||
mov r4, r4, lsr #24
|
||||
orr r4, r4, r5, lsl #8
|
||||
mov r5, r5, lsr #24
|
||||
orr r5, r5, r6, lsl #8
|
||||
mov r6, r6, lsr #24
|
||||
orr r6, r6, r7, lsl #8
|
||||
mov r7, r7, lsr #24
|
||||
orr r7, r7, r8, lsl #8
|
||||
mov r8, r8, lsr #24
|
||||
orr r8, r8, r9, lsl #8
|
||||
mov r9, r9, lsr #24
|
||||
orr r9, r9, r10, lsl #8
|
||||
mov r10, r10, lsr #24
|
||||
orr r10, r10, r11, lsl #8
|
||||
stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
|
||||
mov r3, r11, lsr #24
|
||||
#endif
|
||||
bhs 1b
|
||||
|
||||
less_than_thirtytwo:
|
||||
/* copy the last 0 to 31 bytes of the source */
|
||||
rsb r12, lr, #32 /* we corrupted r12, recompute it */
|
||||
add r2, r2, #32
|
||||
cmp r2, #4
|
||||
blo partial_word_tail
|
||||
|
||||
1: ldr r5, [r1], #4
|
||||
sub r2, r2, #4
|
||||
#if __ARMEB__
|
||||
mov r4, r5, lsr lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsl r12
|
||||
#else
|
||||
mov r4, r5, lsl lr
|
||||
orr r4, r4, r3
|
||||
mov r3, r5, lsr r12
|
||||
#endif
|
||||
str r4, [r0], #4
|
||||
cmp r2, #4
|
||||
bhs 1b
|
||||
|
||||
partial_word_tail:
|
||||
/* we have a partial word in the input buffer */
|
||||
movs r5, lr, lsl #(31-3)
|
||||
#if __ARMEB__
|
||||
movmi r3, r3, ror #24
|
||||
strbmi r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, ror #24
|
||||
strbcs r3, [r0], #1
|
||||
#else
|
||||
strbmi r3, [r0], #1
|
||||
movmi r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
movcs r3, r3, lsr #8
|
||||
strbcs r3, [r0], #1
|
||||
#endif
|
||||
|
||||
/* Refill spilled registers from the stack. Don't update sp. */
|
||||
ldmfd sp, {r5-r11}
|
||||
|
||||
copy_last_3_and_return:
|
||||
movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
|
||||
ldrbmi r2, [r1], #1
|
||||
ldrbcs r3, [r1], #1
|
||||
ldrbcs r12,[r1]
|
||||
strbmi r2, [r0], #1
|
||||
strbcs r3, [r0], #1
|
||||
strbcs r12,[r0]
|
||||
|
||||
/* we're done! restore sp and spilled registers and return */
|
||||
add sp, sp, #28
|
||||
ldmfd sp!, {r0, r4, lr}
|
||||
bx lr
|
||||
|
||||
128
lib/libc/wasi/libc-top-half/musl/src/string/memcpy.c
vendored
128
lib/libc/wasi/libc-top-half/musl/src/string/memcpy.c
vendored
@@ -1,128 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <endian.h>
|
||||
|
||||
void *memcpy(void *restrict dest, const void *restrict src, size_t n)
|
||||
{
|
||||
#if defined(__wasm_bulk_memory__)
|
||||
if (n > BULK_MEMORY_THRESHOLD)
|
||||
return __builtin_memcpy(dest, src, n);
|
||||
#endif
|
||||
unsigned char *d = dest;
|
||||
const unsigned char *s = src;
|
||||
|
||||
#ifdef __GNUC__
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
#define LS >>
|
||||
#define RS <<
|
||||
#else
|
||||
#define LS <<
|
||||
#define RS >>
|
||||
#endif
|
||||
|
||||
typedef uint32_t __attribute__((__may_alias__)) u32;
|
||||
uint32_t w, x;
|
||||
|
||||
for (; (uintptr_t)s % 4 && n; n--) *d++ = *s++;
|
||||
|
||||
if ((uintptr_t)d % 4 == 0) {
|
||||
for (; n>=16; s+=16, d+=16, n-=16) {
|
||||
*(u32 *)(d+0) = *(u32 *)(s+0);
|
||||
*(u32 *)(d+4) = *(u32 *)(s+4);
|
||||
*(u32 *)(d+8) = *(u32 *)(s+8);
|
||||
*(u32 *)(d+12) = *(u32 *)(s+12);
|
||||
}
|
||||
if (n&8) {
|
||||
*(u32 *)(d+0) = *(u32 *)(s+0);
|
||||
*(u32 *)(d+4) = *(u32 *)(s+4);
|
||||
d += 8; s += 8;
|
||||
}
|
||||
if (n&4) {
|
||||
*(u32 *)(d+0) = *(u32 *)(s+0);
|
||||
d += 4; s += 4;
|
||||
}
|
||||
if (n&2) {
|
||||
*d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&1) {
|
||||
*d = *s;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
|
||||
if (n >= 32) switch ((uintptr_t)d % 4) {
|
||||
case 1:
|
||||
w = *(u32 *)s;
|
||||
*d++ = *s++;
|
||||
*d++ = *s++;
|
||||
*d++ = *s++;
|
||||
n -= 3;
|
||||
for (; n>=17; s+=16, d+=16, n-=16) {
|
||||
x = *(u32 *)(s+1);
|
||||
*(u32 *)(d+0) = (w LS 24) | (x RS 8);
|
||||
w = *(u32 *)(s+5);
|
||||
*(u32 *)(d+4) = (x LS 24) | (w RS 8);
|
||||
x = *(u32 *)(s+9);
|
||||
*(u32 *)(d+8) = (w LS 24) | (x RS 8);
|
||||
w = *(u32 *)(s+13);
|
||||
*(u32 *)(d+12) = (x LS 24) | (w RS 8);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
w = *(u32 *)s;
|
||||
*d++ = *s++;
|
||||
*d++ = *s++;
|
||||
n -= 2;
|
||||
for (; n>=18; s+=16, d+=16, n-=16) {
|
||||
x = *(u32 *)(s+2);
|
||||
*(u32 *)(d+0) = (w LS 16) | (x RS 16);
|
||||
w = *(u32 *)(s+6);
|
||||
*(u32 *)(d+4) = (x LS 16) | (w RS 16);
|
||||
x = *(u32 *)(s+10);
|
||||
*(u32 *)(d+8) = (w LS 16) | (x RS 16);
|
||||
w = *(u32 *)(s+14);
|
||||
*(u32 *)(d+12) = (x LS 16) | (w RS 16);
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
w = *(u32 *)s;
|
||||
*d++ = *s++;
|
||||
n -= 1;
|
||||
for (; n>=19; s+=16, d+=16, n-=16) {
|
||||
x = *(u32 *)(s+3);
|
||||
*(u32 *)(d+0) = (w LS 8) | (x RS 24);
|
||||
w = *(u32 *)(s+7);
|
||||
*(u32 *)(d+4) = (x LS 8) | (w RS 24);
|
||||
x = *(u32 *)(s+11);
|
||||
*(u32 *)(d+8) = (w LS 8) | (x RS 24);
|
||||
w = *(u32 *)(s+15);
|
||||
*(u32 *)(d+12) = (x LS 8) | (w RS 24);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (n&16) {
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&8) {
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&4) {
|
||||
*d++ = *s++; *d++ = *s++; *d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&2) {
|
||||
*d++ = *s++; *d++ = *s++;
|
||||
}
|
||||
if (n&1) {
|
||||
*d = *s;
|
||||
}
|
||||
return dest;
|
||||
#endif
|
||||
|
||||
for (; n; n--) *d++ = *s++;
|
||||
return dest;
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __GNUC__
|
||||
typedef __attribute__((__may_alias__)) size_t WT;
|
||||
#define WS (sizeof(WT))
|
||||
#endif
|
||||
|
||||
void *memmove(void *dest, const void *src, size_t n)
|
||||
{
|
||||
#if defined(__wasm_bulk_memory__)
|
||||
if (n > BULK_MEMORY_THRESHOLD)
|
||||
return __builtin_memmove(dest, src, n);
|
||||
#endif
|
||||
char *d = dest;
|
||||
const char *s = src;
|
||||
|
||||
if (d==s) return d;
|
||||
if ((uintptr_t)s-(uintptr_t)d-n <= -2*n) return memcpy(d, s, n);
|
||||
|
||||
if (d<s) {
|
||||
#ifdef __GNUC__
|
||||
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
|
||||
while ((uintptr_t)d % WS) {
|
||||
if (!n--) return dest;
|
||||
*d++ = *s++;
|
||||
}
|
||||
for (; n>=WS; n-=WS, d+=WS, s+=WS) *(WT *)d = *(WT *)s;
|
||||
}
|
||||
#endif
|
||||
for (; n; n--) *d++ = *s++;
|
||||
} else {
|
||||
#ifdef __GNUC__
|
||||
if ((uintptr_t)s % WS == (uintptr_t)d % WS) {
|
||||
while ((uintptr_t)(d+n) % WS) {
|
||||
if (!n--) return dest;
|
||||
d[n] = s[n];
|
||||
}
|
||||
while (n>=WS) n-=WS, *(WT *)(d+n) = *(WT *)(s+n);
|
||||
}
|
||||
#endif
|
||||
while (n) n--, d[n] = s[n];
|
||||
}
|
||||
|
||||
return dest;
|
||||
}
|
||||
@@ -45,29 +45,21 @@ pub const File = union(enum) {
|
||||
|
||||
/// Encodes symbol rank so that the following ordering applies:
|
||||
/// * strong in object
|
||||
/// * weak in object
|
||||
/// * tentative in object
|
||||
/// * strong in archive/dylib
|
||||
/// * weak in object
|
||||
/// * weak in archive/dylib
|
||||
/// * tentative in object
|
||||
/// * tentative in archive
|
||||
/// * unclaimed
|
||||
/// Ties are broken by file priority.
|
||||
pub fn getSymbolRank(file: File, args: struct {
|
||||
archive: bool = false,
|
||||
weak: bool = false,
|
||||
tentative: bool = false,
|
||||
}) u32 {
|
||||
if (file != .dylib and !args.archive) {
|
||||
const base: u32 = blk: {
|
||||
if (args.tentative) break :blk 3;
|
||||
break :blk if (args.weak) 2 else 1;
|
||||
};
|
||||
return (base << 16) + file.getIndex();
|
||||
}
|
||||
const base: u32 = blk: {
|
||||
if (args.tentative) break :blk 3;
|
||||
break :blk if (args.weak) 2 else 1;
|
||||
};
|
||||
return base + (file.getIndex() << 24);
|
||||
const archive_or_dylib = @as(u32, @intFromBool(file == .dylib or args.archive)) << 29;
|
||||
const strength: u32 = if (args.tentative) 0b10 << 30 else if (args.weak) 0b01 << 30 else 0b00 << 30;
|
||||
return strength | archive_or_dylib | file.getIndex();
|
||||
}
|
||||
|
||||
pub fn getAtom(file: File, atom_index: Atom.Index) ?*Atom {
|
||||
|
||||
@@ -1899,25 +1899,18 @@ const src_files = [_][]const u8{
|
||||
"musl/src/stdlib/strtol.c",
|
||||
"musl/src/stdlib/wcstod.c",
|
||||
"musl/src/stdlib/wcstol.c",
|
||||
"musl/src/string/aarch64/memcpy.S",
|
||||
"musl/src/string/aarch64/memset.S",
|
||||
"musl/src/string/arm/__aeabi_memcpy.s",
|
||||
"musl/src/string/arm/__aeabi_memset.s",
|
||||
"musl/src/string/arm/memcpy.S",
|
||||
"musl/src/string/bcmp.c",
|
||||
"musl/src/string/bcopy.c",
|
||||
"musl/src/string/bzero.c",
|
||||
"musl/src/string/explicit_bzero.c",
|
||||
"musl/src/string/i386/memcpy.s",
|
||||
"musl/src/string/i386/memmove.s",
|
||||
"musl/src/string/i386/memset.s",
|
||||
"musl/src/string/index.c",
|
||||
"musl/src/string/memccpy.c",
|
||||
"musl/src/string/memchr.c",
|
||||
"musl/src/string/memcmp.c",
|
||||
"musl/src/string/memcpy.c",
|
||||
"musl/src/string/memmem.c",
|
||||
"musl/src/string/memmove.c",
|
||||
"musl/src/string/mempcpy.c",
|
||||
"musl/src/string/memrchr.c",
|
||||
"musl/src/string/memset.c",
|
||||
@@ -1981,8 +1974,6 @@ const src_files = [_][]const u8{
|
||||
"musl/src/string/wmemcpy.c",
|
||||
"musl/src/string/wmemmove.c",
|
||||
"musl/src/string/wmemset.c",
|
||||
"musl/src/string/x86_64/memcpy.s",
|
||||
"musl/src/string/x86_64/memmove.s",
|
||||
"musl/src/string/x86_64/memset.s",
|
||||
"musl/src/temp/mkdtemp.c",
|
||||
"musl/src/temp/mkostemp.c",
|
||||
|
||||
@@ -694,9 +694,7 @@ const libc_top_half_src_files = [_][]const u8{
|
||||
"wasi/libc-top-half/musl/src/string/memccpy.c",
|
||||
"wasi/libc-top-half/musl/src/string/memchr.c",
|
||||
"wasi/libc-top-half/musl/src/string/memcmp.c",
|
||||
"wasi/libc-top-half/musl/src/string/memcpy.c",
|
||||
"wasi/libc-top-half/musl/src/string/memmem.c",
|
||||
"wasi/libc-top-half/musl/src/string/memmove.c",
|
||||
"wasi/libc-top-half/musl/src/string/mempcpy.c",
|
||||
"wasi/libc-top-half/musl/src/string/memrchr.c",
|
||||
"wasi/libc-top-half/musl/src/string/memset.c",
|
||||
|
||||
Reference in New Issue
Block a user