update musl sources to 1.2.1

This commit is contained in:
Andrew Kelley
2020-11-06 14:13:57 -07:00
parent dba94f453a
commit e5fab3ba68
157 changed files with 2371 additions and 1380 deletions

View File

@@ -98,12 +98,6 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
__asm_syscall(R7_OPERAND, "0"(r0), "r"(r1), "r"(r2), "r"(r3), "r"(r4), "r"(r5));
}
#define VDSO_USEFUL
#define VDSO_CGT32_SYM "__vdso_clock_gettime"
#define VDSO_CGT32_VER "LINUX_2.6"
#define VDSO_CGT_SYM "__vdso_clock_gettime64"
#define VDSO_CGT_VER "LINUX_2.6"
#define SYSCALL_FADVISE_6_ARG
#define SYSCALL_IPC_BROKEN_MODE

View File

@@ -93,7 +93,7 @@ typedef struct __ucontext {
#define SIGTRAP 5
#define SIGABRT 6
#define SIGIOT SIGABRT
#define SIGSTKFLT 7
#define SIGEMT 7
#define SIGFPE 8
#define SIGKILL 9
#define SIGBUS 10

View File

@@ -18,26 +18,26 @@
static inline long __syscall0(long n)
{
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+r"(r2), "=r"(r7)
:
"addu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2)
: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall1(long n, long a)
{
register long r4 __asm__("$4") = a;
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+r"(r2), "=r"(r7)
: "r"(r4)
"addu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2), "r"(r4)
: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall2(long n, long a, long b)
@@ -45,13 +45,13 @@ static inline long __syscall2(long n, long a, long b)
register long r4 __asm__("$4") = a;
register long r5 __asm__("$5") = b;
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+r"(r2), "=r"(r7)
: "r"(r4), "r"(r5)
"addu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5)
: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall3(long n, long a, long b, long c)
@@ -60,13 +60,13 @@ static inline long __syscall3(long n, long a, long b, long c)
register long r5 __asm__("$5") = b;
register long r6 __asm__("$6") = c;
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+r"(r2), "=r"(r7)
: "r"(r4), "r"(r5), "r"(r6)
"addu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall4(long n, long a, long b, long c, long d)
@@ -75,13 +75,13 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
register long r5 __asm__("$5") = b;
register long r6 __asm__("$6") = c;
register long r7 __asm__("$7") = d;
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+r"(r2), "+r"(r7)
: "r"(r4), "r"(r5), "r"(r6)
"addu $2,$0,%2 ; syscall"
: "=&r"(r2), "+r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
: SYSCALL_CLOBBERLIST, "$8", "$9", "$10");
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall5(long n, long a, long b, long c, long d, long e)
@@ -91,15 +91,15 @@ static inline long __syscall5(long n, long a, long b, long c, long d, long e)
register long r6 __asm__("$6") = c;
register long r7 __asm__("$7") = d;
register long r8 __asm__("$8") = e;
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"subu $sp,$sp,32 ; sw $8,16($sp) ; "
"syscall ;"
"addu $2,$0,%3 ; syscall ;"
"addu $sp,$sp,32"
: "+r"(r2), "+r"(r7), "+r"(r8)
: "r"(r4), "r"(r5), "r"(r6)
: "=&r"(r2), "+r"(r7), "+r"(r8)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
: SYSCALL_CLOBBERLIST, "$9", "$10");
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
@@ -110,15 +110,15 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
register long r7 __asm__("$7") = d;
register long r8 __asm__("$8") = e;
register long r9 __asm__("$9") = f;
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; "
"syscall ;"
"addu $2,$0,%4 ; syscall ;"
"addu $sp,$sp,32"
: "+r"(r2), "+r"(r7), "+r"(r8), "+r"(r9)
: "r"(r4), "r"(r5), "r"(r6)
: "=&r"(r2), "+r"(r7), "+r"(r8), "+r"(r9)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
: SYSCALL_CLOBBERLIST, "$10");
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall7(long n, long a, long b, long c, long d, long e, long f, long g)
@@ -130,15 +130,15 @@ static inline long __syscall7(long n, long a, long b, long c, long d, long e, lo
register long r8 __asm__("$8") = e;
register long r9 __asm__("$9") = f;
register long r10 __asm__("$10") = g;
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"subu $sp,$sp,32 ; sw $8,16($sp) ; sw $9,20($sp) ; sw $10,24($sp) ; "
"syscall ;"
"addu $2,$0,%5 ; syscall ;"
"addu $sp,$sp,32"
: "+r"(r2), "+r"(r7), "+r"(r8), "+r"(r9), "+r"(r10)
: "r"(r4), "r"(r5), "r"(r6)
: "=&r"(r2), "+r"(r7), "+r"(r8), "+r"(r9), "+r"(r10)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
#define VDSO_USEFUL

View File

@@ -112,7 +112,7 @@ typedef struct __ucontext {
#define SIGTRAP 5
#define SIGABRT 6
#define SIGIOT SIGABRT
#define SIGSTKFLT 7
#define SIGEMT 7
#define SIGFPE 8
#define SIGKILL 9
#define SIGBUS 10

View File

@@ -16,26 +16,26 @@
static inline long __syscall0(long n)
{
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+&r"(r2), "=r"(r7)
:
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall1(long n, long a)
{
register long r4 __asm__("$4") = a;
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+&r"(r2), "=r"(r7)
: "r"(r4)
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2), "r"(r4)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall2(long n, long a, long b)
@@ -43,14 +43,14 @@ static inline long __syscall2(long n, long a, long b)
register long r4 __asm__("$4") = a;
register long r5 __asm__("$5") = b;
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+&r"(r2), "=r"(r7)
: "r"(r4), "r"(r5)
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall3(long n, long a, long b, long c)
@@ -59,14 +59,14 @@ static inline long __syscall3(long n, long a, long b, long c)
register long r5 __asm__("$5") = b;
register long r6 __asm__("$6") = c;
register long r7 __asm__("$7");
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+&r"(r2), "=r"(r7)
: "r"(r4), "r"(r5), "r"(r6)
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "=r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall4(long n, long a, long b, long c, long d)
@@ -75,14 +75,14 @@ static inline long __syscall4(long n, long a, long b, long c, long d)
register long r5 __asm__("$5") = b;
register long r6 __asm__("$6") = c;
register long r7 __asm__("$7") = d;
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+&r"(r2), "+r"(r7)
: "r"(r4), "r"(r5), "r"(r6)
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "+r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall5(long n, long a, long b, long c, long d, long e)
@@ -92,14 +92,14 @@ static inline long __syscall5(long n, long a, long b, long c, long d, long e)
register long r6 __asm__("$6") = c;
register long r7 __asm__("$7") = d;
register long r8 __asm__("$8") = e;
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+&r"(r2), "+r"(r7)
: "r"(r4), "r"(r5), "r"(r6), "r"(r8)
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "+r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
static inline long __syscall6(long n, long a, long b, long c, long d, long e, long f)
@@ -110,14 +110,14 @@ static inline long __syscall6(long n, long a, long b, long c, long d, long e, lo
register long r7 __asm__("$7") = d;
register long r8 __asm__("$8") = e;
register long r9 __asm__("$9") = f;
register long r2 __asm__("$2") = n;
register long r2 __asm__("$2");
__asm__ __volatile__ (
"syscall"
: "+&r"(r2), "+r"(r7)
: "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9)
"daddu $2,$0,%2 ; syscall"
: "=&r"(r2), "+r"(r7)
: "ir"(n), "0"(r2), "r"(r4), "r"(r5), "r"(r6), "r"(r8), "r"(r9)
: SYSCALL_CLOBBERLIST);
return r7 ? -r2 : r2;
return r7 && r2>0 ? -r2 : r2;
}
#define VDSO_USEFUL

View File

@@ -4,7 +4,6 @@
#define MAP_NORESERVE 0x40
#undef MAP_LOCKED
#define MAP_LOCKED 0x80
#undef MAP_SYNC
#undef MCL_CURRENT
#define MCL_CURRENT 0x2000

View File

@@ -4,7 +4,6 @@
#define MAP_NORESERVE 0x40
#undef MAP_LOCKED
#define MAP_LOCKED 0x80
#undef MAP_SYNC
#undef MCL_CURRENT
#define MCL_CURRENT 0x2000

View File

@@ -24,11 +24,6 @@ struct in_addr inet_makeaddr(in_addr_t, in_addr_t);
in_addr_t inet_lnaof(struct in_addr);
in_addr_t inet_netof(struct in_addr);
#undef INET_ADDRSTRLEN
#undef INET6_ADDRSTRLEN
#define INET_ADDRSTRLEN 16
#define INET6_ADDRSTRLEN 46
#ifdef __cplusplus
}
#endif

View File

@@ -60,8 +60,6 @@ struct ipv6_mreq {
extern const struct in6_addr in6addr_any, in6addr_loopback;
#undef INET_ADDRSTRLEN
#undef INET6_ADDRSTRLEN
#define INET_ADDRSTRLEN 16
#define INET6_ADDRSTRLEN 46

View File

@@ -74,6 +74,9 @@ extern "C" {
#define PTHREAD_BARRIER_SERIAL_THREAD (-1)
#define PTHREAD_NULL ((pthread_t)0)
int pthread_create(pthread_t *__restrict, const pthread_attr_t *__restrict, void *(*)(void *), void *__restrict);
int pthread_detach(pthread_t);
_Noreturn void pthread_exit(void *);

View File

@@ -1,8 +1,9 @@
/* This file is sorted such that 'errors' which represent exceptional
* conditions under which a correct program may fail come first, followed
* by messages that indicate an incorrect program or system failure. The
* macro E() along with double-inclusion is used to ensure that ordering
* of the strings remains synchronized. */
/* The first entry is a catch-all for codes not enumerated here.
* This file is included multiple times to declare and define a structure
* with these messages, and then to define a lookup table translating
* error codes to offsets of corresponding fields in the structure. */
E(0, "No error information")
E(EILSEQ, "Illegal byte sequence")
E(EDOM, "Domain error")
@@ -101,5 +102,3 @@ E(EDQUOT, "Quota exceeded")
E(ENOMEDIUM, "No medium found")
E(EMEDIUMTYPE, "Wrong medium type")
E(EMULTIHOP, "Multihop attempted")
E(0, "No error information")

View File

@@ -1,30 +1,41 @@
#include <errno.h>
#include <stddef.h>
#include <string.h>
#include "locale_impl.h"
#define E(a,b) ((unsigned char)a),
static const unsigned char errid[] = {
/* mips has one error code outside of the 8-bit range due to a
* historical typo, so we just remap it. */
#if EDQUOT==1133
#define EDQUOT_ORIG 1133
#undef EDQUOT
#define EDQUOT 109
#endif
static const struct errmsgstr_t {
#define E(n, s) char str##n[sizeof(s)];
#include "__strerror.h"
#undef E
} errmsgstr = {
#define E(n, s) s,
#include "__strerror.h"
#undef E
};
#undef E
#define E(a,b) b "\0"
static const char errmsg[] =
static const unsigned short errmsgidx[] = {
#define E(n, s) [n] = offsetof(struct errmsgstr_t, str##n),
#include "__strerror.h"
;
#undef E
};
char *__strerror_l(int e, locale_t loc)
{
const char *s;
int i;
/* mips has one error code outside of the 8-bit range due to a
* historical typo, so we just remap it. */
if (EDQUOT==1133) {
if (e==109) e=-1;
else if (e==EDQUOT) e=109;
}
for (i=0; errid[i] && errid[i] != e; i++);
for (s=errmsg; i; s++, i--) for (; *s; s++);
#ifdef EDQUOT_ORIG
if (e==EDQUOT) e=0;
else if (e==EDQUOT_ORIG) e=EDQUOT;
#endif
if (e >= sizeof errmsgidx / sizeof *errmsgidx) e = 0;
s = (char *)&errmsgstr + errmsgidx[e];
return (char *)LCTRANS(s, LC_MESSAGES, loc);
}

View File

@@ -12,6 +12,8 @@ fegetround:
.type __fesetround, @function
__fesetround:
sts fpscr, r0
mov #-4, r1
and r1, r0
or r4, r0
lds r0, fpscr
rts

View File

@@ -315,4 +315,19 @@ static inline int a_clz_64(uint64_t x)
}
#endif
#ifndef a_clz_32
#define a_clz_32 a_clz_32
static inline int a_clz_32(uint32_t x)
{
x >>= 1;
x |= x >> 1;
x |= x >> 2;
x |= x >> 4;
x |= x >> 8;
x |= x >> 16;
x++;
return 31-a_ctz_32(x);
}
#endif
#endif

View File

@@ -105,4 +105,9 @@ hidden void __dl_vseterr(const char *, va_list);
hidden ptrdiff_t __tlsdesc_static(), __tlsdesc_dynamic();
hidden extern int __malloc_replaced;
hidden extern int __aligned_alloc_replaced;
hidden void __malloc_donate(char *, char *);
hidden int __malloc_allzerop(void *);
#endif

View File

@@ -18,10 +18,11 @@ struct tls_module {
};
struct __libc {
int can_do_threads;
int threaded;
int secure;
volatile int threads_minus_1;
char can_do_threads;
char threaded;
char secure;
volatile signed char need_locks;
int threads_minus_1;
size_t *auxv;
struct tls_module *tls_head;
size_t tls_size, tls_align, tls_cnt;

View File

@@ -236,13 +236,13 @@ hidden int __rem_pio2(double,double*);
hidden double __sin(double,double,int);
hidden double __cos(double,double);
hidden double __tan(double,double,int);
hidden double __expo2(double);
hidden double __expo2(double,double);
hidden int __rem_pio2f(float,double*);
hidden float __sindf(double);
hidden float __cosdf(double);
hidden float __tandf(double,int);
hidden float __expo2f(float);
hidden float __expo2f(float,float);
hidden int __rem_pio2l(long double, long double *);
hidden long double __sinl(long double, long double, int);

View File

@@ -1,46 +0,0 @@
#ifndef MALLOC_IMPL_H
#define MALLOC_IMPL_H
#include <sys/mman.h>
hidden void *__expand_heap(size_t *);
hidden void __malloc_donate(char *, char *);
hidden void *__memalign(size_t, size_t);
struct chunk {
size_t psize, csize;
struct chunk *next, *prev;
};
struct bin {
volatile int lock[2];
struct chunk *head;
struct chunk *tail;
};
#define SIZE_ALIGN (4*sizeof(size_t))
#define SIZE_MASK (-SIZE_ALIGN)
#define OVERHEAD (2*sizeof(size_t))
#define MMAP_THRESHOLD (0x1c00*SIZE_ALIGN)
#define DONTCARE 16
#define RECLAIM 163840
#define CHUNK_SIZE(c) ((c)->csize & -2)
#define CHUNK_PSIZE(c) ((c)->psize & -2)
#define PREV_CHUNK(c) ((struct chunk *)((char *)(c) - CHUNK_PSIZE(c)))
#define NEXT_CHUNK(c) ((struct chunk *)((char *)(c) + CHUNK_SIZE(c)))
#define MEM_TO_CHUNK(p) (struct chunk *)((char *)(p) - OVERHEAD)
#define CHUNK_TO_MEM(c) (void *)((char *)(c) + OVERHEAD)
#define BIN_TO_CHUNK(i) (MEM_TO_CHUNK(&mal.bins[i].head))
#define C_INUSE ((size_t)1)
#define IS_MMAPPED(c) !((c)->csize & (C_INUSE))
hidden void __bin_chunk(struct chunk *);
hidden extern int __malloc_replaced;
#endif

View File

@@ -32,6 +32,6 @@ int __shgetc(FILE *f)
else
f->shend = f->rend;
f->shcnt = f->buf - f->rpos + cnt;
if (f->rpos[-1] != c) f->rpos[-1] = c;
if (f->rpos <= f->buf) f->rpos[-1] = c;
return c;
}

View File

@@ -60,8 +60,6 @@ hidden size_t __stdout_write(FILE *, const unsigned char *, size_t);
hidden off_t __stdio_seek(FILE *, off_t, int);
hidden int __stdio_close(FILE *);
hidden size_t __string_read(FILE *, unsigned char *, size_t);
hidden int __toread(FILE *);
hidden int __towrite(FILE *);

View File

@@ -1 +1 @@
#define VERSION "1.2.0"
#define VERSION "1.2.1"

View File

@@ -9,6 +9,14 @@
int msgctl(int q, int cmd, struct msqid_ds *buf)
{
#if IPC_TIME64
struct msqid_ds out, *orig;
if (cmd&IPC_TIME64) {
out = (struct msqid_ds){0};
orig = buf;
buf = &out;
}
#endif
#ifdef SYSCALL_IPC_BROKEN_MODE
struct msqid_ds tmp;
if (cmd == IPC_SET) {
@@ -32,6 +40,8 @@ int msgctl(int q, int cmd, struct msqid_ds *buf)
#endif
#if IPC_TIME64
if (r >= 0 && (cmd&IPC_TIME64)) {
buf = orig;
*buf = out;
IPC_HILO(buf, msg_stime);
IPC_HILO(buf, msg_rtime);
IPC_HILO(buf, msg_ctime);

View File

@@ -28,6 +28,14 @@ int semctl(int id, int num, int cmd, ...)
arg = va_arg(ap, union semun);
va_end(ap);
}
#if IPC_TIME64
struct semid_ds out, *orig;
if (cmd&IPC_TIME64) {
out = (struct semid_ds){0};
orig = arg.buf;
arg.buf = &out;
}
#endif
#ifdef SYSCALL_IPC_BROKEN_MODE
struct semid_ds tmp;
if (cmd == IPC_SET) {
@@ -51,6 +59,8 @@ int semctl(int id, int num, int cmd, ...)
#endif
#if IPC_TIME64
if (r >= 0 && (cmd&IPC_TIME64)) {
arg.buf = orig;
*arg.buf = out;
IPC_HILO(arg.buf, sem_otime);
IPC_HILO(arg.buf, sem_ctime);
}

View File

@@ -9,6 +9,14 @@
int shmctl(int id, int cmd, struct shmid_ds *buf)
{
#if IPC_TIME64
struct shmid_ds out, *orig;
if (cmd&IPC_TIME64) {
out = (struct shmid_ds){0};
orig = buf;
buf = &out;
}
#endif
#ifdef SYSCALL_IPC_BROKEN_MODE
struct shmid_ds tmp;
if (cmd == IPC_SET) {
@@ -32,6 +40,8 @@ int shmctl(int id, int cmd, struct shmid_ds *buf)
#endif
#if IPC_TIME64
if (r >= 0 && (cmd&IPC_TIME64)) {
buf = orig;
*buf = out;
IPC_HILO(buf, shm_atime);
IPC_HILO(buf, shm_dtime);
IPC_HILO(buf, shm_ctime);

View File

@@ -38,55 +38,52 @@ int clock_adjtime (clockid_t clock_id, struct timex *utx)
{
int r = -ENOSYS;
#ifdef SYS_clock_adjtime64
if (SYS_clock_adjtime == SYS_clock_adjtime64 ||
(utx->modes & ADJ_SETOFFSET) && !IS32BIT(utx->time.tv_sec)) {
struct ktimex64 ktx = {
.modes = utx->modes,
.offset = utx->offset,
.freq = utx->freq,
.maxerror = utx->maxerror,
.esterror = utx->esterror,
.status = utx->status,
.constant = utx->constant,
.precision = utx->precision,
.tolerance = utx->tolerance,
.time_sec = utx->time.tv_sec,
.time_usec = utx->time.tv_usec,
.tick = utx->tick,
.ppsfreq = utx->ppsfreq,
.jitter = utx->jitter,
.shift = utx->shift,
.stabil = utx->stabil,
.jitcnt = utx->jitcnt,
.calcnt = utx->calcnt,
.errcnt = utx->errcnt,
.stbcnt = utx->stbcnt,
.tai = utx->tai,
};
r = __syscall(SYS_clock_adjtime, clock_id, &ktx);
if (r>=0) {
utx->modes = ktx.modes;
utx->offset = ktx.offset;
utx->freq = ktx.freq;
utx->maxerror = ktx.maxerror;
utx->esterror = ktx.esterror;
utx->status = ktx.status;
utx->constant = ktx.constant;
utx->precision = ktx.precision;
utx->tolerance = ktx.tolerance;
utx->time.tv_sec = ktx.time_sec;
utx->time.tv_usec = ktx.time_usec;
utx->tick = ktx.tick;
utx->ppsfreq = ktx.ppsfreq;
utx->jitter = ktx.jitter;
utx->shift = ktx.shift;
utx->stabil = ktx.stabil;
utx->jitcnt = ktx.jitcnt;
utx->calcnt = ktx.calcnt;
utx->errcnt = ktx.errcnt;
utx->stbcnt = ktx.stbcnt;
utx->tai = ktx.tai;
}
struct ktimex64 ktx = {
.modes = utx->modes,
.offset = utx->offset,
.freq = utx->freq,
.maxerror = utx->maxerror,
.esterror = utx->esterror,
.status = utx->status,
.constant = utx->constant,
.precision = utx->precision,
.tolerance = utx->tolerance,
.time_sec = utx->time.tv_sec,
.time_usec = utx->time.tv_usec,
.tick = utx->tick,
.ppsfreq = utx->ppsfreq,
.jitter = utx->jitter,
.shift = utx->shift,
.stabil = utx->stabil,
.jitcnt = utx->jitcnt,
.calcnt = utx->calcnt,
.errcnt = utx->errcnt,
.stbcnt = utx->stbcnt,
.tai = utx->tai,
};
r = __syscall(SYS_clock_adjtime64, clock_id, &ktx);
if (r>=0) {
utx->modes = ktx.modes;
utx->offset = ktx.offset;
utx->freq = ktx.freq;
utx->maxerror = ktx.maxerror;
utx->esterror = ktx.esterror;
utx->status = ktx.status;
utx->constant = ktx.constant;
utx->precision = ktx.precision;
utx->tolerance = ktx.tolerance;
utx->time.tv_sec = ktx.time_sec;
utx->time.tv_usec = ktx.time_usec;
utx->tick = ktx.tick;
utx->ppsfreq = ktx.ppsfreq;
utx->jitter = ktx.jitter;
utx->shift = ktx.shift;
utx->stabil = ktx.stabil;
utx->jitcnt = ktx.jitcnt;
utx->calcnt = ktx.calcnt;
utx->errcnt = ktx.errcnt;
utx->stbcnt = ktx.stbcnt;
utx->tai = ktx.tai;
}
if (SYS_clock_adjtime == SYS_clock_adjtime64 || r!=-ENOSYS)
return __syscall_ret(r);

View File

@@ -1,22 +0,0 @@
In principle, this memory allocator is roughly equivalent to Doug
Lea's dlmalloc with fine-grained locking.
malloc:
Uses a freelist binned by chunk size, with a bitmap to optimize
searching for the smallest non-empty bin which can satisfy an
allocation. If no free chunks are available, it creates a new chunk of
the requested size and attempts to merge it with any existing free
chunk immediately below the newly created chunk.
Whether the chunk was obtained from a bin or newly created, it's
likely to be larger than the requested allocation. malloc always
finishes its work by passing the new chunk to realloc, which will
split it into two chunks and free the tail portion.

View File

@@ -1,7 +0,0 @@
#include <stdlib.h>
#include "malloc_impl.h"
void *aligned_alloc(size_t align, size_t len)
{
return __memalign(align, len);
}

View File

@@ -0,0 +1,45 @@
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
#include "dynlink.h"
static size_t mal0_clear(char *p, size_t n)
{
const size_t pagesz = 4096; /* arbitrary */
if (n < pagesz) return n;
#ifdef __GNUC__
typedef uint64_t __attribute__((__may_alias__)) T;
#else
typedef unsigned char T;
#endif
char *pp = p + n;
size_t i = (uintptr_t)pp & (pagesz - 1);
for (;;) {
pp = memset(pp - i, 0, i);
if (pp - p < pagesz) return pp - p;
for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T))
if (((T *)pp)[-1] | ((T *)pp)[-2])
break;
}
}
static int allzerop(void *p)
{
return 0;
}
weak_alias(allzerop, __malloc_allzerop);
void *calloc(size_t m, size_t n)
{
if (n && m > (size_t)-1/n) {
errno = ENOMEM;
return 0;
}
n *= m;
void *p = malloc(n);
if (!p || (!__malloc_replaced && __malloc_allzerop(p)))
return p;
n = mal0_clear(p, n);
return memset(p, 0, n);
}

View File

@@ -1,71 +0,0 @@
#include <limits.h>
#include <stdint.h>
#include <errno.h>
#include <sys/mman.h>
#include "libc.h"
#include "syscall.h"
#include "malloc_impl.h"
/* This function returns true if the interval [old,new]
* intersects the 'len'-sized interval below &libc.auxv
* (interpreted as the main-thread stack) or below &b
* (the current stack). It is used to defend against
* buggy brk implementations that can cross the stack. */
static int traverses_stack_p(uintptr_t old, uintptr_t new)
{
const uintptr_t len = 8<<20;
uintptr_t a, b;
b = (uintptr_t)libc.auxv;
a = b > len ? b-len : 0;
if (new>a && old<b) return 1;
b = (uintptr_t)&b;
a = b > len ? b-len : 0;
if (new>a && old<b) return 1;
return 0;
}
/* Expand the heap in-place if brk can be used, or otherwise via mmap,
* using an exponential lower bound on growth by mmap to make
* fragmentation asymptotically irrelevant. The size argument is both
* an input and an output, since the caller needs to know the size
* allocated, which will be larger than requested due to page alignment
* and mmap minimum size rules. The caller is responsible for locking
* to prevent concurrent calls. */
void *__expand_heap(size_t *pn)
{
static uintptr_t brk;
static unsigned mmap_step;
size_t n = *pn;
if (n > SIZE_MAX/2 - PAGE_SIZE) {
errno = ENOMEM;
return 0;
}
n += -n & PAGE_SIZE-1;
if (!brk) {
brk = __syscall(SYS_brk, 0);
brk += -brk & PAGE_SIZE-1;
}
if (n < SIZE_MAX-brk && !traverses_stack_p(brk, brk+n)
&& __syscall(SYS_brk, brk+n)==brk+n) {
*pn = n;
brk += n;
return (void *)(brk-n);
}
size_t min = (size_t)PAGE_SIZE << mmap_step/2;
if (n < min) n = min;
void *area = __mmap(0, n, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (area == MAP_FAILED) return 0;
*pn = n;
mmap_step++;
return area;
}

View File

@@ -2,58 +2,102 @@
#include <stdint.h>
#include <limits.h>
#include <errno.h>
#include <sys/mman.h>
#include "libc.h"
#include "lock.h"
#include "malloc_impl.h"
#include "syscall.h"
#define ALIGN 16
/* This function returns true if the interval [old,new]
* intersects the 'len'-sized interval below &libc.auxv
* (interpreted as the main-thread stack) or below &b
* (the current stack). It is used to defend against
* buggy brk implementations that can cross the stack. */
static int traverses_stack_p(uintptr_t old, uintptr_t new)
{
const uintptr_t len = 8<<20;
uintptr_t a, b;
b = (uintptr_t)libc.auxv;
a = b > len ? b-len : 0;
if (new>a && old<b) return 1;
b = (uintptr_t)&b;
a = b > len ? b-len : 0;
if (new>a && old<b) return 1;
return 0;
}
static void *__simple_malloc(size_t n)
{
static char *cur, *end;
static uintptr_t brk, cur, end;
static volatile int lock[1];
size_t align=1, pad;
static unsigned mmap_step;
size_t align=1;
void *p;
if (n > SIZE_MAX/2) {
errno = ENOMEM;
return 0;
}
if (!n) n++;
while (align<n && align<ALIGN)
align += align;
LOCK(lock);
pad = -(uintptr_t)cur & align-1;
if (n <= SIZE_MAX/2 + ALIGN) n += pad;
cur += -cur & align-1;
if (n > end-cur) {
size_t m = n;
char *new = __expand_heap(&m);
if (!new) {
UNLOCK(lock);
return 0;
size_t req = n - (end-cur) + PAGE_SIZE-1 & -PAGE_SIZE;
if (!cur) {
brk = __syscall(SYS_brk, 0);
brk += -brk & PAGE_SIZE-1;
cur = end = brk;
}
if (new != end) {
cur = new;
n -= pad;
pad = 0;
if (brk == end && req < SIZE_MAX-brk
&& !traverses_stack_p(brk, brk+req)
&& __syscall(SYS_brk, brk+req)==brk+req) {
brk = end += req;
} else {
int new_area = 0;
req = n + PAGE_SIZE-1 & -PAGE_SIZE;
/* Only make a new area rather than individual mmap
* if wasted space would be over 1/8 of the map. */
if (req-n > req/8) {
/* Geometric area size growth up to 64 pages,
* bounding waste by 1/8 of the area. */
size_t min = PAGE_SIZE<<(mmap_step/2);
if (min-n > end-cur) {
if (req < min) {
req = min;
if (mmap_step < 12)
mmap_step++;
}
new_area = 1;
}
}
void *mem = __mmap(0, req, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (mem == MAP_FAILED || !new_area) {
UNLOCK(lock);
return mem==MAP_FAILED ? 0 : mem;
}
cur = (uintptr_t)mem;
end = cur + req;
}
end = new + m;
}
p = cur + pad;
p = (void *)cur;
cur += n;
UNLOCK(lock);
return p;
}
weak_alias(__simple_malloc, malloc);
static void *__simple_calloc(size_t m, size_t n)
{
if (n && m > (size_t)-1/n) {
errno = ENOMEM;
return 0;
}
return __simple_malloc(n * m);
}
weak_alias(__simple_calloc, calloc);

View File

@@ -1,548 +0,0 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdint.h>
#include <errno.h>
#include <sys/mman.h>
#include "libc.h"
#include "atomic.h"
#include "pthread_impl.h"
#include "malloc_impl.h"
#if defined(__GNUC__) && defined(__PIC__)
#define inline inline __attribute__((always_inline))
#endif
static struct {
volatile uint64_t binmap;
struct bin bins[64];
volatile int free_lock[2];
} mal;
int __malloc_replaced;
/* Synchronization tools */
static inline void lock(volatile int *lk)
{
if (libc.threads_minus_1)
while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
}
static inline void unlock(volatile int *lk)
{
if (lk[0]) {
a_store(lk, 0);
if (lk[1]) __wake(lk, 1, 1);
}
}
static inline void lock_bin(int i)
{
lock(mal.bins[i].lock);
if (!mal.bins[i].head)
mal.bins[i].head = mal.bins[i].tail = BIN_TO_CHUNK(i);
}
static inline void unlock_bin(int i)
{
unlock(mal.bins[i].lock);
}
static int first_set(uint64_t x)
{
#if 1
return a_ctz_64(x);
#else
static const char debruijn64[64] = {
0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
};
static const char debruijn32[32] = {
0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
};
if (sizeof(long) < 8) {
uint32_t y = x;
if (!y) {
y = x>>32;
return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
}
return debruijn32[(y&-y)*0x076be629 >> 27];
}
return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
#endif
}
static const unsigned char bin_tab[60] = {
32,33,34,35,36,36,37,37,38,38,39,39,
40,40,40,40,41,41,41,41,42,42,42,42,43,43,43,43,
44,44,44,44,44,44,44,44,45,45,45,45,45,45,45,45,
46,46,46,46,46,46,46,46,47,47,47,47,47,47,47,47,
};
static int bin_index(size_t x)
{
x = x / SIZE_ALIGN - 1;
if (x <= 32) return x;
if (x < 512) return bin_tab[x/8-4];
if (x > 0x1c00) return 63;
return bin_tab[x/128-4] + 16;
}
static int bin_index_up(size_t x)
{
x = x / SIZE_ALIGN - 1;
if (x <= 32) return x;
x--;
if (x < 512) return bin_tab[x/8-4] + 1;
return bin_tab[x/128-4] + 17;
}
#if 0
void __dump_heap(int x)
{
struct chunk *c;
int i;
for (c = (void *)mal.heap; CHUNK_SIZE(c); c = NEXT_CHUNK(c))
fprintf(stderr, "base %p size %zu (%d) flags %d/%d\n",
c, CHUNK_SIZE(c), bin_index(CHUNK_SIZE(c)),
c->csize & 15,
NEXT_CHUNK(c)->psize & 15);
for (i=0; i<64; i++) {
if (mal.bins[i].head != BIN_TO_CHUNK(i) && mal.bins[i].head) {
fprintf(stderr, "bin %d: %p\n", i, mal.bins[i].head);
if (!(mal.binmap & 1ULL<<i))
fprintf(stderr, "missing from binmap!\n");
} else if (mal.binmap & 1ULL<<i)
fprintf(stderr, "binmap wrongly contains %d!\n", i);
}
}
#endif
static struct chunk *expand_heap(size_t n)
{
static int heap_lock[2];
static void *end;
void *p;
struct chunk *w;
/* The argument n already accounts for the caller's chunk
* overhead needs, but if the heap can't be extended in-place,
* we need room for an extra zero-sized sentinel chunk. */
n += SIZE_ALIGN;
lock(heap_lock);
p = __expand_heap(&n);
if (!p) {
unlock(heap_lock);
return 0;
}
/* If not just expanding existing space, we need to make a
* new sentinel chunk below the allocated space. */
if (p != end) {
/* Valid/safe because of the prologue increment. */
n -= SIZE_ALIGN;
p = (char *)p + SIZE_ALIGN;
w = MEM_TO_CHUNK(p);
w->psize = 0 | C_INUSE;
}
/* Record new heap end and fill in footer. */
end = (char *)p + n;
w = MEM_TO_CHUNK(end);
w->psize = n | C_INUSE;
w->csize = 0 | C_INUSE;
/* Fill in header, which may be new or may be replacing a
* zero-size sentinel header at the old end-of-heap. */
w = MEM_TO_CHUNK(p);
w->csize = n | C_INUSE;
unlock(heap_lock);
return w;
}
static int adjust_size(size_t *n)
{
/* Result of pointer difference must fit in ptrdiff_t. */
if (*n-1 > PTRDIFF_MAX - SIZE_ALIGN - PAGE_SIZE) {
if (*n) {
errno = ENOMEM;
return -1;
} else {
*n = SIZE_ALIGN;
return 0;
}
}
*n = (*n + OVERHEAD + SIZE_ALIGN - 1) & SIZE_MASK;
return 0;
}
static void unbin(struct chunk *c, int i)
{
if (c->prev == c->next)
a_and_64(&mal.binmap, ~(1ULL<<i));
c->prev->next = c->next;
c->next->prev = c->prev;
c->csize |= C_INUSE;
NEXT_CHUNK(c)->psize |= C_INUSE;
}
static int alloc_fwd(struct chunk *c)
{
int i;
size_t k;
while (!((k=c->csize) & C_INUSE)) {
i = bin_index(k);
lock_bin(i);
if (c->csize == k) {
unbin(c, i);
unlock_bin(i);
return 1;
}
unlock_bin(i);
}
return 0;
}
static int alloc_rev(struct chunk *c)
{
int i;
size_t k;
while (!((k=c->psize) & C_INUSE)) {
i = bin_index(k);
lock_bin(i);
if (c->psize == k) {
unbin(PREV_CHUNK(c), i);
unlock_bin(i);
return 1;
}
unlock_bin(i);
}
return 0;
}
/* pretrim - trims a chunk _prior_ to removing it from its bin.
* Must be called with i as the ideal bin for size n, j the bin
* for the _free_ chunk self, and bin j locked. */
static int pretrim(struct chunk *self, size_t n, int i, int j)
{
size_t n1;
struct chunk *next, *split;
/* We cannot pretrim if it would require re-binning. */
if (j < 40) return 0;
if (j < i+3) {
if (j != 63) return 0;
n1 = CHUNK_SIZE(self);
if (n1-n <= MMAP_THRESHOLD) return 0;
} else {
n1 = CHUNK_SIZE(self);
}
if (bin_index(n1-n) != j) return 0;
next = NEXT_CHUNK(self);
split = (void *)((char *)self + n);
split->prev = self->prev;
split->next = self->next;
split->prev->next = split;
split->next->prev = split;
split->psize = n | C_INUSE;
split->csize = n1-n;
next->psize = n1-n;
self->csize = n | C_INUSE;
return 1;
}
static void trim(struct chunk *self, size_t n)
{
size_t n1 = CHUNK_SIZE(self);
struct chunk *next, *split;
if (n >= n1 - DONTCARE) return;
next = NEXT_CHUNK(self);
split = (void *)((char *)self + n);
split->psize = n | C_INUSE;
split->csize = n1-n | C_INUSE;
next->psize = n1-n | C_INUSE;
self->csize = n | C_INUSE;
__bin_chunk(split);
}
void *malloc(size_t n)
{
struct chunk *c;
int i, j;
if (adjust_size(&n) < 0) return 0;
if (n > MMAP_THRESHOLD) {
size_t len = n + OVERHEAD + PAGE_SIZE - 1 & -PAGE_SIZE;
char *base = __mmap(0, len, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (base == (void *)-1) return 0;
c = (void *)(base + SIZE_ALIGN - OVERHEAD);
c->csize = len - (SIZE_ALIGN - OVERHEAD);
c->psize = SIZE_ALIGN - OVERHEAD;
return CHUNK_TO_MEM(c);
}
i = bin_index_up(n);
for (;;) {
uint64_t mask = mal.binmap & -(1ULL<<i);
if (!mask) {
c = expand_heap(n);
if (!c) return 0;
if (alloc_rev(c)) {
struct chunk *x = c;
c = PREV_CHUNK(c);
NEXT_CHUNK(x)->psize = c->csize =
x->csize + CHUNK_SIZE(c);
}
break;
}
j = first_set(mask);
lock_bin(j);
c = mal.bins[j].head;
if (c != BIN_TO_CHUNK(j)) {
if (!pretrim(c, n, i, j)) unbin(c, j);
unlock_bin(j);
break;
}
unlock_bin(j);
}
/* Now patch up in case we over-allocated */
trim(c, n);
return CHUNK_TO_MEM(c);
}
static size_t mal0_clear(char *p, size_t pagesz, size_t n)
{
#ifdef __GNUC__
typedef uint64_t __attribute__((__may_alias__)) T;
#else
typedef unsigned char T;
#endif
char *pp = p + n;
size_t i = (uintptr_t)pp & (pagesz - 1);
for (;;) {
pp = memset(pp - i, 0, i);
if (pp - p < pagesz) return pp - p;
for (i = pagesz; i; i -= 2*sizeof(T), pp -= 2*sizeof(T))
if (((T *)pp)[-1] | ((T *)pp)[-2])
break;
}
}
void *calloc(size_t m, size_t n)
{
if (n && m > (size_t)-1/n) {
errno = ENOMEM;
return 0;
}
n *= m;
void *p = malloc(n);
if (!p) return p;
if (!__malloc_replaced) {
if (IS_MMAPPED(MEM_TO_CHUNK(p)))
return p;
if (n >= PAGE_SIZE)
n = mal0_clear(p, PAGE_SIZE, n);
}
return memset(p, 0, n);
}
void *realloc(void *p, size_t n)
{
struct chunk *self, *next;
size_t n0, n1;
void *new;
if (!p) return malloc(n);
if (adjust_size(&n) < 0) return 0;
self = MEM_TO_CHUNK(p);
n1 = n0 = CHUNK_SIZE(self);
if (IS_MMAPPED(self)) {
size_t extra = self->psize;
char *base = (char *)self - extra;
size_t oldlen = n0 + extra;
size_t newlen = n + extra;
/* Crash on realloc of freed chunk */
if (extra & 1) a_crash();
if (newlen < PAGE_SIZE && (new = malloc(n-OVERHEAD))) {
n0 = n;
goto copy_free_ret;
}
newlen = (newlen + PAGE_SIZE-1) & -PAGE_SIZE;
if (oldlen == newlen) return p;
base = __mremap(base, oldlen, newlen, MREMAP_MAYMOVE);
if (base == (void *)-1)
goto copy_realloc;
self = (void *)(base + extra);
self->csize = newlen - extra;
return CHUNK_TO_MEM(self);
}
next = NEXT_CHUNK(self);
/* Crash on corrupted footer (likely from buffer overflow) */
if (next->psize != self->csize) a_crash();
/* Merge adjacent chunks if we need more space. This is not
* a waste of time even if we fail to get enough space, because our
* subsequent call to free would otherwise have to do the merge. */
if (n > n1 && alloc_fwd(next)) {
n1 += CHUNK_SIZE(next);
next = NEXT_CHUNK(next);
}
/* FIXME: find what's wrong here and reenable it..? */
if (0 && n > n1 && alloc_rev(self)) {
self = PREV_CHUNK(self);
n1 += CHUNK_SIZE(self);
}
self->csize = n1 | C_INUSE;
next->psize = n1 | C_INUSE;
/* If we got enough space, split off the excess and return */
if (n <= n1) {
//memmove(CHUNK_TO_MEM(self), p, n0-OVERHEAD);
trim(self, n);
return CHUNK_TO_MEM(self);
}
copy_realloc:
/* As a last resort, allocate a new chunk and copy to it. */
new = malloc(n-OVERHEAD);
if (!new) return 0;
copy_free_ret:
memcpy(new, p, n0-OVERHEAD);
free(CHUNK_TO_MEM(self));
return new;
}
void __bin_chunk(struct chunk *self)
{
struct chunk *next = NEXT_CHUNK(self);
size_t final_size, new_size, size;
int reclaim=0;
int i;
final_size = new_size = CHUNK_SIZE(self);
/* Crash on corrupted footer (likely from buffer overflow) */
if (next->psize != self->csize) a_crash();
for (;;) {
if (self->psize & next->csize & C_INUSE) {
self->csize = final_size | C_INUSE;
next->psize = final_size | C_INUSE;
i = bin_index(final_size);
lock_bin(i);
lock(mal.free_lock);
if (self->psize & next->csize & C_INUSE)
break;
unlock(mal.free_lock);
unlock_bin(i);
}
if (alloc_rev(self)) {
self = PREV_CHUNK(self);
size = CHUNK_SIZE(self);
final_size += size;
if (new_size+size > RECLAIM && (new_size+size^size) > size)
reclaim = 1;
}
if (alloc_fwd(next)) {
size = CHUNK_SIZE(next);
final_size += size;
if (new_size+size > RECLAIM && (new_size+size^size) > size)
reclaim = 1;
next = NEXT_CHUNK(next);
}
}
if (!(mal.binmap & 1ULL<<i))
a_or_64(&mal.binmap, 1ULL<<i);
self->csize = final_size;
next->psize = final_size;
unlock(mal.free_lock);
self->next = BIN_TO_CHUNK(i);
self->prev = mal.bins[i].tail;
self->next->prev = self;
self->prev->next = self;
/* Replace middle of large chunks with fresh zero pages */
if (reclaim) {
uintptr_t a = (uintptr_t)self + SIZE_ALIGN+PAGE_SIZE-1 & -PAGE_SIZE;
uintptr_t b = (uintptr_t)next - SIZE_ALIGN & -PAGE_SIZE;
#if 1
__madvise((void *)a, b-a, MADV_DONTNEED);
#else
__mmap((void *)a, b-a, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
#endif
}
unlock_bin(i);
}
static void unmap_chunk(struct chunk *self)
{
size_t extra = self->psize;
char *base = (char *)self - extra;
size_t len = CHUNK_SIZE(self) + extra;
/* Crash on double free */
if (extra & 1) a_crash();
__munmap(base, len);
}
void free(void *p)
{
if (!p) return;
struct chunk *self = MEM_TO_CHUNK(p);
if (IS_MMAPPED(self))
unmap_chunk(self);
else
__bin_chunk(self);
}
void __malloc_donate(char *start, char *end)
{
size_t align_start_up = (SIZE_ALIGN-1) & (-(uintptr_t)start - OVERHEAD);
size_t align_end_down = (SIZE_ALIGN-1) & (uintptr_t)end;
/* Getting past this condition ensures that the padding for alignment
* and header overhead will not overflow and will leave a nonzero
* multiple of SIZE_ALIGN bytes between start and end. */
if (end - start <= OVERHEAD + align_start_up + align_end_down)
return;
start += align_start_up + OVERHEAD;
end -= align_end_down;
struct chunk *c = MEM_TO_CHUNK(start), *n = MEM_TO_CHUNK(end);
c->psize = n->csize = C_INUSE;
c->csize = n->psize = C_INUSE | (end-start);
__bin_chunk(c);
}

View File

@@ -1,9 +0,0 @@
#include <malloc.h>
#include "malloc_impl.h"
hidden void *(*const __realloc_dep)(void *, size_t) = realloc;
size_t malloc_usable_size(void *p)
{
return p ? CHUNK_SIZE(MEM_TO_CHUNK(p)) - OVERHEAD : 0;
}

View File

@@ -0,0 +1,57 @@
#include <stdlib.h>
#include <errno.h>
#include "meta.h"
void *aligned_alloc(size_t align, size_t len)
{
if ((align & -align) != align) {
errno = EINVAL;
return 0;
}
if (len > SIZE_MAX - align || align >= (1ULL<<31)*UNIT) {
errno = ENOMEM;
return 0;
}
if (DISABLE_ALIGNED_ALLOC) {
errno = ENOMEM;
return 0;
}
if (align <= UNIT) align = UNIT;
unsigned char *p = malloc(len + align - UNIT);
struct meta *g = get_meta(p);
int idx = get_slot_index(p);
size_t stride = get_stride(g);
unsigned char *start = g->mem->storage + stride*idx;
unsigned char *end = g->mem->storage + stride*(idx+1) - IB;
size_t adj = -(uintptr_t)p & (align-1);
if (!adj) {
set_size(p, end, len);
return p;
}
p += adj;
uint32_t offset = (size_t)(p-g->mem->storage)/UNIT;
if (offset <= 0xffff) {
*(uint16_t *)(p-2) = offset;
p[-4] = 0;
} else {
// use a 32-bit offset if 16-bit doesn't fit. for this,
// 16-bit field must be zero, [-4] byte nonzero.
*(uint16_t *)(p-2) = 0;
*(uint32_t *)(p-8) = offset;
p[-4] = 1;
}
p[-3] = idx;
set_size(p, end, len);
// store offset to aligned enframing. this facilitates cycling
// offset and also iteration of heap for debugging/measurement.
// for extreme overalignment it won't fit but these are classless
// allocations anyway.
*(uint16_t *)(start - 2) = (size_t)(p-start)/UNIT;
start[-3] = 7<<5;
return p;
}

View File

@@ -0,0 +1,39 @@
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#include <string.h>
#include <sys/mman.h>
#include <errno.h>
#include "meta.h"
static void donate(unsigned char *base, size_t len)
{
uintptr_t a = (uintptr_t)base;
uintptr_t b = a + len;
a += -a & (UNIT-1);
b -= b & (UNIT-1);
memset(base, 0, len);
for (int sc=47; sc>0 && b>a; sc-=4) {
if (b-a < (size_classes[sc]+1)*UNIT) continue;
struct meta *m = alloc_meta();
m->avail_mask = 0;
m->freed_mask = 1;
m->mem = (void *)a;
m->mem->meta = m;
m->last_idx = 0;
m->freeable = 0;
m->sizeclass = sc;
m->maplen = 0;
*((unsigned char *)m->mem+UNIT-4) = 0;
*((unsigned char *)m->mem+UNIT-3) = 255;
m->mem->storage[size_classes[sc]*UNIT-4] = 0;
queue(&ctx.active[sc], m);
a += (size_classes[sc]+1)*UNIT;
}
}
void __malloc_donate(char *start, char *end)
{
donate((void *)start, end-start);
}

View File

@@ -0,0 +1,143 @@
#define _BSD_SOURCE
#include <stdlib.h>
#include <sys/mman.h>
#include "meta.h"
struct mapinfo {
void *base;
size_t len;
};
static struct mapinfo nontrivial_free(struct meta *, int);
static struct mapinfo free_group(struct meta *g)
{
struct mapinfo mi = { 0 };
int sc = g->sizeclass;
if (sc < 48) {
ctx.usage_by_class[sc] -= g->last_idx+1;
}
if (g->maplen) {
step_seq();
record_seq(sc);
mi.base = g->mem;
mi.len = g->maplen*4096UL;
} else {
void *p = g->mem;
struct meta *m = get_meta(p);
int idx = get_slot_index(p);
g->mem->meta = 0;
// not checking size/reserved here; it's intentionally invalid
mi = nontrivial_free(m, idx);
}
free_meta(g);
return mi;
}
static int okay_to_free(struct meta *g)
{
int sc = g->sizeclass;
if (!g->freeable) return 0;
// always free individual mmaps not suitable for reuse
if (sc >= 48 || get_stride(g) < UNIT*size_classes[sc])
return 1;
// always free groups allocated inside another group's slot
// since recreating them should not be expensive and they
// might be blocking freeing of a much larger group.
if (!g->maplen) return 1;
// if there is another non-full group, free this one to
// consolidate future allocations, reduce fragmentation.
if (g->next != g) return 1;
// free any group in a size class that's not bouncing
if (!is_bouncing(sc)) return 1;
size_t cnt = g->last_idx+1;
size_t usage = ctx.usage_by_class[sc];
// if usage is high enough that a larger count should be
// used, free the low-count group so a new one will be made.
if (9*cnt <= usage && cnt < 20)
return 1;
// otherwise, keep the last group in a bouncing class.
return 0;
}
static struct mapinfo nontrivial_free(struct meta *g, int i)
{
uint32_t self = 1u<<i;
int sc = g->sizeclass;
uint32_t mask = g->freed_mask | g->avail_mask;
if (mask+self == (2u<<g->last_idx)-1 && okay_to_free(g)) {
// any multi-slot group is necessarily on an active list
// here, but single-slot groups might or might not be.
if (g->next) {
assert(sc < 48);
int activate_new = (ctx.active[sc]==g);
dequeue(&ctx.active[sc], g);
if (activate_new && ctx.active[sc])
activate_group(ctx.active[sc]);
}
return free_group(g);
} else if (!mask) {
assert(sc < 48);
// might still be active if there were no allocations
// after last available slot was taken.
if (ctx.active[sc] != g) {
queue(&ctx.active[sc], g);
}
}
a_or(&g->freed_mask, self);
return (struct mapinfo){ 0 };
}
void free(void *p)
{
if (!p) return;
struct meta *g = get_meta(p);
int idx = get_slot_index(p);
size_t stride = get_stride(g);
unsigned char *start = g->mem->storage + stride*idx;
unsigned char *end = start + stride - IB;
get_nominal_size(p, end);
uint32_t self = 1u<<idx, all = (2u<<g->last_idx)-1;
((unsigned char *)p)[-3] = 255;
// invalidate offset to group header, and cycle offset of
// used region within slot if current offset is zero.
*(uint16_t *)((char *)p-2) = 0;
// release any whole pages contained in the slot to be freed
// unless it's a single-slot group that will be unmapped.
if (((uintptr_t)(start-1) ^ (uintptr_t)end) >= 2*PGSZ && g->last_idx) {
unsigned char *base = start + (-(uintptr_t)start & (PGSZ-1));
size_t len = (end-base) & -PGSZ;
if (len) madvise(base, len, MADV_FREE);
}
// atomic free without locking if this is neither first or last slot
for (;;) {
uint32_t freed = g->freed_mask;
uint32_t avail = g->avail_mask;
uint32_t mask = freed | avail;
assert(!(mask&self));
if (!freed || mask+self==all) break;
if (!MT)
g->freed_mask = freed+self;
else if (a_cas(&g->freed_mask, freed, freed+self)!=freed)
continue;
return;
}
wrlock();
struct mapinfo mi = nontrivial_free(g, idx);
unlock();
if (mi.len) munmap(mi.base, mi.len);
}

View File

@@ -0,0 +1,77 @@
#ifndef MALLOC_GLUE_H
#define MALLOC_GLUE_H
#include <stdint.h>
#include <sys/mman.h>
#include <pthread.h>
#include <unistd.h>
#include <elf.h>
#include <string.h>
#include "atomic.h"
#include "syscall.h"
#include "libc.h"
#include "lock.h"
#include "dynlink.h"
// use macros to appropriately namespace these.
#define size_classes __malloc_size_classes
#define ctx __malloc_context
#define alloc_meta __malloc_alloc_meta
#define is_allzero __malloc_allzerop
#define dump_heap __dump_heap
#if USE_REAL_ASSERT
#include <assert.h>
#else
#undef assert
#define assert(x) do { if (!(x)) a_crash(); } while(0)
#endif
#define brk(p) ((uintptr_t)__syscall(SYS_brk, p))
#define mmap __mmap
#define madvise __madvise
#define mremap __mremap
#define DISABLE_ALIGNED_ALLOC (__malloc_replaced && !__aligned_alloc_replaced)
static inline uint64_t get_random_secret()
{
uint64_t secret = (uintptr_t)&secret * 1103515245;
for (size_t i=0; libc.auxv[i]; i+=2)
if (libc.auxv[i]==AT_RANDOM)
memcpy(&secret, (char *)libc.auxv[i+1]+8, sizeof secret);
return secret;
}
#ifndef PAGESIZE
#define PAGESIZE PAGE_SIZE
#endif
#define MT (libc.need_locks)
#define RDLOCK_IS_EXCLUSIVE 1
__attribute__((__visibility__("hidden")))
extern int __malloc_lock[1];
#define LOCK_OBJ_DEF \
int __malloc_lock[1];
static inline void rdlock()
{
if (MT) LOCK(__malloc_lock);
}
static inline void wrlock()
{
if (MT) LOCK(__malloc_lock);
}
static inline void unlock()
{
UNLOCK(__malloc_lock);
}
static inline void upgradelock()
{
}
#endif

View File

@@ -0,0 +1,387 @@
#include <stdlib.h>
#include <stdint.h>
#include <limits.h>
#include <string.h>
#include <sys/mman.h>
#include <errno.h>
#include "meta.h"
LOCK_OBJ_DEF;
const uint16_t size_classes[] = {
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 12, 15,
18, 20, 25, 31,
36, 42, 50, 63,
72, 84, 102, 127,
146, 170, 204, 255,
292, 340, 409, 511,
584, 682, 818, 1023,
1169, 1364, 1637, 2047,
2340, 2730, 3276, 4095,
4680, 5460, 6552, 8191,
};
static const uint8_t small_cnt_tab[][3] = {
{ 30, 30, 30 },
{ 31, 15, 15 },
{ 20, 10, 10 },
{ 31, 15, 7 },
{ 25, 12, 6 },
{ 21, 10, 5 },
{ 18, 8, 4 },
{ 31, 15, 7 },
{ 28, 14, 6 },
};
static const uint8_t med_cnt_tab[4] = { 28, 24, 20, 32 };
struct malloc_context ctx = { 0 };
struct meta *alloc_meta(void)
{
struct meta *m;
unsigned char *p;
if (!ctx.init_done) {
#ifndef PAGESIZE
ctx.pagesize = get_page_size();
#endif
ctx.secret = get_random_secret();
ctx.init_done = 1;
}
size_t pagesize = PGSZ;
if (pagesize < 4096) pagesize = 4096;
if ((m = dequeue_head(&ctx.free_meta_head))) return m;
if (!ctx.avail_meta_count) {
int need_unprotect = 1;
if (!ctx.avail_meta_area_count && ctx.brk!=-1) {
uintptr_t new = ctx.brk + pagesize;
int need_guard = 0;
if (!ctx.brk) {
need_guard = 1;
ctx.brk = brk(0);
// some ancient kernels returned _ebss
// instead of next page as initial brk.
ctx.brk += -ctx.brk & (pagesize-1);
new = ctx.brk + 2*pagesize;
}
if (brk(new) != new) {
ctx.brk = -1;
} else {
if (need_guard) mmap((void *)ctx.brk, pagesize,
PROT_NONE, MAP_ANON|MAP_PRIVATE|MAP_FIXED, -1, 0);
ctx.brk = new;
ctx.avail_meta_areas = (void *)(new - pagesize);
ctx.avail_meta_area_count = pagesize>>12;
need_unprotect = 0;
}
}
if (!ctx.avail_meta_area_count) {
size_t n = 2UL << ctx.meta_alloc_shift;
p = mmap(0, n*pagesize, PROT_NONE,
MAP_PRIVATE|MAP_ANON, -1, 0);
if (p==MAP_FAILED) return 0;
ctx.avail_meta_areas = p + pagesize;
ctx.avail_meta_area_count = (n-1)*(pagesize>>12);
ctx.meta_alloc_shift++;
}
p = ctx.avail_meta_areas;
if ((uintptr_t)p & (pagesize-1)) need_unprotect = 0;
if (need_unprotect)
if (mprotect(p, pagesize, PROT_READ|PROT_WRITE)
&& errno != ENOSYS)
return 0;
ctx.avail_meta_area_count--;
ctx.avail_meta_areas = p + 4096;
if (ctx.meta_area_tail) {
ctx.meta_area_tail->next = (void *)p;
} else {
ctx.meta_area_head = (void *)p;
}
ctx.meta_area_tail = (void *)p;
ctx.meta_area_tail->check = ctx.secret;
ctx.avail_meta_count = ctx.meta_area_tail->nslots
= (4096-sizeof(struct meta_area))/sizeof *m;
ctx.avail_meta = ctx.meta_area_tail->slots;
}
ctx.avail_meta_count--;
m = ctx.avail_meta++;
m->prev = m->next = 0;
return m;
}
static uint32_t try_avail(struct meta **pm)
{
struct meta *m = *pm;
uint32_t first;
if (!m) return 0;
uint32_t mask = m->avail_mask;
if (!mask) {
if (!m) return 0;
if (!m->freed_mask) {
dequeue(pm, m);
m = *pm;
if (!m) return 0;
} else {
m = m->next;
*pm = m;
}
mask = m->freed_mask;
// skip fully-free group unless it's the only one
// or it's a permanently non-freeable group
if (mask == (2u<<m->last_idx)-1 && m->freeable) {
m = m->next;
*pm = m;
mask = m->freed_mask;
}
// activate more slots in a not-fully-active group
// if needed, but only as a last resort. prefer using
// any other group with free slots. this avoids
// touching & dirtying as-yet-unused pages.
if (!(mask & ((2u<<m->mem->active_idx)-1))) {
if (m->next != m) {
m = m->next;
*pm = m;
} else {
int cnt = m->mem->active_idx + 2;
int size = size_classes[m->sizeclass]*UNIT;
int span = UNIT + size*cnt;
// activate up to next 4k boundary
while ((span^(span+size-1)) < 4096) {
cnt++;
span += size;
}
if (cnt > m->last_idx+1)
cnt = m->last_idx+1;
m->mem->active_idx = cnt-1;
}
}
mask = activate_group(m);
assert(mask);
decay_bounces(m->sizeclass);
}
first = mask&-mask;
m->avail_mask = mask-first;
return first;
}
static int alloc_slot(int, size_t);
static struct meta *alloc_group(int sc, size_t req)
{
size_t size = UNIT*size_classes[sc];
int i = 0, cnt;
unsigned char *p;
struct meta *m = alloc_meta();
if (!m) return 0;
size_t usage = ctx.usage_by_class[sc];
size_t pagesize = PGSZ;
int active_idx;
if (sc < 9) {
while (i<2 && 4*small_cnt_tab[sc][i] > usage)
i++;
cnt = small_cnt_tab[sc][i];
} else {
// lookup max number of slots fitting in power-of-two size
// from a table, along with number of factors of two we
// can divide out without a remainder or reaching 1.
cnt = med_cnt_tab[sc&3];
// reduce cnt to avoid excessive eagar allocation.
while (!(cnt&1) && 4*cnt > usage)
cnt >>= 1;
// data structures don't support groups whose slot offsets
// in units don't fit in 16 bits.
while (size*cnt >= 65536*UNIT)
cnt >>= 1;
}
// If we selected a count of 1 above but it's not sufficient to use
// mmap, increase to 2. Then it might be; if not it will nest.
if (cnt==1 && size*cnt+UNIT <= pagesize/2) cnt = 2;
// All choices of size*cnt are "just below" a power of two, so anything
// larger than half the page size should be allocated as whole pages.
if (size*cnt+UNIT > pagesize/2) {
// check/update bounce counter to start/increase retention
// of freed maps, and inhibit use of low-count, odd-size
// small mappings and single-slot groups if activated.
int nosmall = is_bouncing(sc);
account_bounce(sc);
step_seq();
// since the following count reduction opportunities have
// an absolute memory usage cost, don't overdo them. count
// coarse usage as part of usage.
if (!(sc&1) && sc<32) usage += ctx.usage_by_class[sc+1];
// try to drop to a lower count if the one found above
// increases usage by more than 25%. these reduced counts
// roughly fill an integral number of pages, just not a
// power of two, limiting amount of unusable space.
if (4*cnt > usage && !nosmall) {
if (0);
else if ((sc&3)==1 && size*cnt>8*pagesize) cnt = 2;
else if ((sc&3)==2 && size*cnt>4*pagesize) cnt = 3;
else if ((sc&3)==0 && size*cnt>8*pagesize) cnt = 3;
else if ((sc&3)==0 && size*cnt>2*pagesize) cnt = 5;
}
size_t needed = size*cnt + UNIT;
needed += -needed & (pagesize-1);
// produce an individually-mmapped allocation if usage is low,
// bounce counter hasn't triggered, and either it saves memory
// or it avoids eagar slot allocation without wasting too much.
if (!nosmall && cnt<=7) {
req += IB + UNIT;
req += -req & (pagesize-1);
if (req<size+UNIT || (req>=4*pagesize && 2*cnt>usage)) {
cnt = 1;
needed = req;
}
}
p = mmap(0, needed, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
if (p==MAP_FAILED) {
free_meta(m);
return 0;
}
m->maplen = needed>>12;
ctx.mmap_counter++;
active_idx = (4096-UNIT)/size-1;
if (active_idx > cnt-1) active_idx = cnt-1;
if (active_idx < 0) active_idx = 0;
} else {
int j = size_to_class(UNIT+cnt*size-IB);
int idx = alloc_slot(j, UNIT+cnt*size-IB);
if (idx < 0) {
free_meta(m);
return 0;
}
struct meta *g = ctx.active[j];
p = enframe(g, idx, UNIT*size_classes[j]-IB, ctx.mmap_counter);
m->maplen = 0;
p[-3] = (p[-3]&31) | (6<<5);
for (int i=0; i<=cnt; i++)
p[UNIT+i*size-4] = 0;
active_idx = cnt-1;
}
ctx.usage_by_class[sc] += cnt;
m->avail_mask = (2u<<active_idx)-1;
m->freed_mask = (2u<<(cnt-1))-1 - m->avail_mask;
m->mem = (void *)p;
m->mem->meta = m;
m->mem->active_idx = active_idx;
m->last_idx = cnt-1;
m->freeable = 1;
m->sizeclass = sc;
return m;
}
static int alloc_slot(int sc, size_t req)
{
uint32_t first = try_avail(&ctx.active[sc]);
if (first) return a_ctz_32(first);
struct meta *g = alloc_group(sc, req);
if (!g) return -1;
g->avail_mask--;
queue(&ctx.active[sc], g);
return 0;
}
void *malloc(size_t n)
{
if (size_overflows(n)) return 0;
struct meta *g;
uint32_t mask, first;
int sc;
int idx;
int ctr;
if (n >= MMAP_THRESHOLD) {
size_t needed = n + IB + UNIT;
void *p = mmap(0, needed, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON, -1, 0);
if (p==MAP_FAILED) return 0;
wrlock();
step_seq();
g = alloc_meta();
if (!g) {
unlock();
munmap(p, needed);
return 0;
}
g->mem = p;
g->mem->meta = g;
g->last_idx = 0;
g->freeable = 1;
g->sizeclass = 63;
g->maplen = (needed+4095)/4096;
g->avail_mask = g->freed_mask = 0;
// use a global counter to cycle offset in
// individually-mmapped allocations.
ctx.mmap_counter++;
idx = 0;
goto success;
}
sc = size_to_class(n);
rdlock();
g = ctx.active[sc];
// use coarse size classes initially when there are not yet
// any groups of desired size. this allows counts of 2 or 3
// to be allocated at first rather than having to start with
// 7 or 5, the min counts for even size classes.
if (!g && sc>=4 && sc<32 && sc!=6 && !(sc&1) && !ctx.usage_by_class[sc]) {
size_t usage = ctx.usage_by_class[sc|1];
// if a new group may be allocated, count it toward
// usage in deciding if we can use coarse class.
if (!ctx.active[sc|1] || (!ctx.active[sc|1]->avail_mask
&& !ctx.active[sc|1]->freed_mask))
usage += 3;
if (usage <= 12)
sc |= 1;
g = ctx.active[sc];
}
for (;;) {
mask = g ? g->avail_mask : 0;
first = mask&-mask;
if (!first) break;
if (RDLOCK_IS_EXCLUSIVE || !MT)
g->avail_mask = mask-first;
else if (a_cas(&g->avail_mask, mask, mask-first)!=mask)
continue;
idx = a_ctz_32(first);
goto success;
}
upgradelock();
idx = alloc_slot(sc, n);
if (idx < 0) {
unlock();
return 0;
}
g = ctx.active[sc];
success:
ctr = ctx.mmap_counter;
unlock();
return enframe(g, idx, n, ctr);
}
int is_allzero(void *p)
{
struct meta *g = get_meta(p);
return g->sizeclass >= 48 ||
get_stride(g) < UNIT*size_classes[g->sizeclass];
}

View File

@@ -0,0 +1,12 @@
#include <stdlib.h>
#include "meta.h"
size_t malloc_usable_size(void *p)
{
struct meta *g = get_meta(p);
int idx = get_slot_index(p);
size_t stride = get_stride(g);
unsigned char *start = g->mem->storage + stride*idx;
unsigned char *end = start + stride - IB;
return get_nominal_size(p, end);
}

View File

@@ -0,0 +1,288 @@
#ifndef MALLOC_META_H
#define MALLOC_META_H
#include <stdint.h>
#include <errno.h>
#include <limits.h>
#include "glue.h"
__attribute__((__visibility__("hidden")))
extern const uint16_t size_classes[];
#define MMAP_THRESHOLD 131052
#define UNIT 16
#define IB 4
struct group {
struct meta *meta;
unsigned char active_idx:5;
char pad[UNIT - sizeof(struct meta *) - 1];
unsigned char storage[];
};
struct meta {
struct meta *prev, *next;
struct group *mem;
volatile int avail_mask, freed_mask;
uintptr_t last_idx:5;
uintptr_t freeable:1;
uintptr_t sizeclass:6;
uintptr_t maplen:8*sizeof(uintptr_t)-12;
};
struct meta_area {
uint64_t check;
struct meta_area *next;
int nslots;
struct meta slots[];
};
struct malloc_context {
uint64_t secret;
#ifndef PAGESIZE
size_t pagesize;
#endif
int init_done;
unsigned mmap_counter;
struct meta *free_meta_head;
struct meta *avail_meta;
size_t avail_meta_count, avail_meta_area_count, meta_alloc_shift;
struct meta_area *meta_area_head, *meta_area_tail;
unsigned char *avail_meta_areas;
struct meta *active[48];
size_t usage_by_class[48];
uint8_t unmap_seq[32], bounces[32];
uint8_t seq;
uintptr_t brk;
};
__attribute__((__visibility__("hidden")))
extern struct malloc_context ctx;
#ifdef PAGESIZE
#define PGSZ PAGESIZE
#else
#define PGSZ ctx.pagesize
#endif
__attribute__((__visibility__("hidden")))
struct meta *alloc_meta(void);
__attribute__((__visibility__("hidden")))
int is_allzero(void *);
static inline void queue(struct meta **phead, struct meta *m)
{
assert(!m->next);
assert(!m->prev);
if (*phead) {
struct meta *head = *phead;
m->next = head;
m->prev = head->prev;
m->next->prev = m->prev->next = m;
} else {
m->prev = m->next = m;
*phead = m;
}
}
static inline void dequeue(struct meta **phead, struct meta *m)
{
if (m->next != m) {
m->prev->next = m->next;
m->next->prev = m->prev;
if (*phead == m) *phead = m->next;
} else {
*phead = 0;
}
m->prev = m->next = 0;
}
static inline struct meta *dequeue_head(struct meta **phead)
{
struct meta *m = *phead;
if (m) dequeue(phead, m);
return m;
}
static inline void free_meta(struct meta *m)
{
*m = (struct meta){0};
queue(&ctx.free_meta_head, m);
}
static inline uint32_t activate_group(struct meta *m)
{
assert(!m->avail_mask);
uint32_t mask, act = (2u<<m->mem->active_idx)-1;
do mask = m->freed_mask;
while (a_cas(&m->freed_mask, mask, mask&~act)!=mask);
return m->avail_mask = mask & act;
}
static inline int get_slot_index(const unsigned char *p)
{
return p[-3] & 31;
}
static inline struct meta *get_meta(const unsigned char *p)
{
assert(!((uintptr_t)p & 15));
int offset = *(const uint16_t *)(p - 2);
int index = get_slot_index(p);
if (p[-4]) {
assert(!offset);
offset = *(uint32_t *)(p - 8);
assert(offset > 0xffff);
}
const struct group *base = (const void *)(p - UNIT*offset - UNIT);
const struct meta *meta = base->meta;
assert(meta->mem == base);
assert(index <= meta->last_idx);
assert(!(meta->avail_mask & (1u<<index)));
assert(!(meta->freed_mask & (1u<<index)));
const struct meta_area *area = (void *)((uintptr_t)meta & -4096);
assert(area->check == ctx.secret);
if (meta->sizeclass < 48) {
assert(offset >= size_classes[meta->sizeclass]*index);
assert(offset < size_classes[meta->sizeclass]*(index+1));
} else {
assert(meta->sizeclass == 63);
}
if (meta->maplen) {
assert(offset <= meta->maplen*4096UL/UNIT - 1);
}
return (struct meta *)meta;
}
static inline size_t get_nominal_size(const unsigned char *p, const unsigned char *end)
{
size_t reserved = p[-3] >> 5;
if (reserved >= 5) {
assert(reserved == 5);
reserved = *(const uint32_t *)(end-4);
assert(reserved >= 5);
assert(!end[-5]);
}
assert(reserved <= end-p);
assert(!*(end-reserved));
// also check the slot's overflow byte
assert(!*end);
return end-reserved-p;
}
static inline size_t get_stride(const struct meta *g)
{
if (!g->last_idx && g->maplen) {
return g->maplen*4096UL - UNIT;
} else {
return UNIT*size_classes[g->sizeclass];
}
}
static inline void set_size(unsigned char *p, unsigned char *end, size_t n)
{
int reserved = end-p-n;
if (reserved) end[-reserved] = 0;
if (reserved >= 5) {
*(uint32_t *)(end-4) = reserved;
end[-5] = 0;
reserved = 5;
}
p[-3] = (p[-3]&31) + (reserved<<5);
}
static inline void *enframe(struct meta *g, int idx, size_t n, int ctr)
{
size_t stride = get_stride(g);
size_t slack = (stride-IB-n)/UNIT;
unsigned char *p = g->mem->storage + stride*idx;
unsigned char *end = p+stride-IB;
// cycle offset within slot to increase interval to address
// reuse, facilitate trapping double-free.
int off = (p[-3] ? *(uint16_t *)(p-2) + 1 : ctr) & 255;
assert(!p[-4]);
if (off > slack) {
size_t m = slack;
m |= m>>1; m |= m>>2; m |= m>>4;
off &= m;
if (off > slack) off -= slack+1;
assert(off <= slack);
}
if (off) {
// store offset in unused header at offset zero
// if enframing at non-zero offset.
*(uint16_t *)(p-2) = off;
p[-3] = 7<<5;
p += UNIT*off;
// for nonzero offset there is no permanent check
// byte, so make one.
p[-4] = 0;
}
*(uint16_t *)(p-2) = (size_t)(p-g->mem->storage)/UNIT;
p[-3] = idx;
set_size(p, end, n);
return p;
}
static inline int size_to_class(size_t n)
{
n = (n+IB-1)>>4;
if (n<10) return n;
n++;
int i = (28-a_clz_32(n))*4 + 8;
if (n>size_classes[i+1]) i+=2;
if (n>size_classes[i]) i++;
return i;
}
static inline int size_overflows(size_t n)
{
if (n >= SIZE_MAX/2 - 4096) {
errno = ENOMEM;
return 1;
}
return 0;
}
static inline void step_seq(void)
{
if (ctx.seq==255) {
for (int i=0; i<32; i++) ctx.unmap_seq[i] = 0;
ctx.seq = 1;
} else {
ctx.seq++;
}
}
static inline void record_seq(int sc)
{
if (sc-7U < 32) ctx.unmap_seq[sc-7] = ctx.seq;
}
static inline void account_bounce(int sc)
{
if (sc-7U < 32) {
int seq = ctx.unmap_seq[sc-7];
if (seq && ctx.seq-seq < 10) {
if (ctx.bounces[sc-7]+1 < 100)
ctx.bounces[sc-7]++;
else
ctx.bounces[sc-7] = 150;
}
}
}
static inline void decay_bounces(int sc)
{
if (sc-7U < 32 && ctx.bounces[sc-7])
ctx.bounces[sc-7]--;
}
static inline int is_bouncing(int sc)
{
return (sc-7U < 32 && ctx.bounces[sc-7] >= 100);
}
#endif

View File

@@ -0,0 +1,51 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <sys/mman.h>
#include <string.h>
#include "meta.h"
void *realloc(void *p, size_t n)
{
if (!p) return malloc(n);
if (size_overflows(n)) return 0;
struct meta *g = get_meta(p);
int idx = get_slot_index(p);
size_t stride = get_stride(g);
unsigned char *start = g->mem->storage + stride*idx;
unsigned char *end = start + stride - IB;
size_t old_size = get_nominal_size(p, end);
size_t avail_size = end-(unsigned char *)p;
void *new;
// only resize in-place if size class matches
if (n <= avail_size && n<MMAP_THRESHOLD
&& size_to_class(n)+1 >= g->sizeclass) {
set_size(p, end, n);
return p;
}
// use mremap if old and new size are both mmap-worthy
if (g->sizeclass>=48 && n>=MMAP_THRESHOLD) {
assert(g->sizeclass==63);
size_t base = (unsigned char *)p-start;
size_t needed = (n + base + UNIT + IB + 4095) & -4096;
new = g->maplen*4096UL == needed ? g->mem :
mremap(g->mem, g->maplen*4096UL, needed, MREMAP_MAYMOVE);
if (new!=MAP_FAILED) {
g->mem = new;
g->maplen = needed/4096;
p = g->mem->storage + base;
end = g->mem->storage + (needed - UNIT) - IB;
*end = 0;
set_size(p, end, n);
return p;
}
}
new = malloc(n);
if (!new) return 0;
memcpy(new, p, n < old_size ? n : old_size);
free(p);
return new;
}

View File

@@ -1,54 +1,7 @@
#define _BSD_SOURCE
#include <stdlib.h>
#include <stdint.h>
#include <errno.h>
#include "malloc_impl.h"
void *__memalign(size_t align, size_t len)
void *memalign(size_t align, size_t len)
{
unsigned char *mem, *new;
if ((align & -align) != align) {
errno = EINVAL;
return 0;
}
if (len > SIZE_MAX - align || __malloc_replaced) {
errno = ENOMEM;
return 0;
}
if (align <= SIZE_ALIGN)
return malloc(len);
if (!(mem = malloc(len + align-1)))
return 0;
new = (void *)((uintptr_t)mem + align-1 & -align);
if (new == mem) return mem;
struct chunk *c = MEM_TO_CHUNK(mem);
struct chunk *n = MEM_TO_CHUNK(new);
if (IS_MMAPPED(c)) {
/* Apply difference between aligned and original
* address to the "extra" field of mmapped chunk. */
n->psize = c->psize + (new-mem);
n->csize = c->csize - (new-mem);
return new;
}
struct chunk *t = NEXT_CHUNK(c);
/* Split the allocated chunk into two chunks. The aligned part
* that will be used has the size in its footer reduced by the
* difference between the aligned and original addresses, and
* the resulting size copied to its header. A new header and
* footer are written for the split-off part to be freed. */
n->psize = c->csize = C_INUSE | (new-mem);
n->csize = t->psize -= new-mem;
__bin_chunk(c);
return new;
return aligned_alloc(align, len);
}
weak_alias(__memalign, memalign);

View File

@@ -1,11 +1,10 @@
#include <stdlib.h>
#include <errno.h>
#include "malloc_impl.h"
int posix_memalign(void **res, size_t align, size_t len)
{
if (align < sizeof(void *)) return EINVAL;
void *mem = __memalign(align, len);
void *mem = aligned_alloc(align, len);
if (!mem) return errno;
*res = mem;
return 0;

View File

@@ -0,0 +1,4 @@
#include "dynlink.h"
int __malloc_replaced;
int __aligned_alloc_replaced;

View File

@@ -5,12 +5,13 @@ static const int k = 2043;
static const double kln2 = 0x1.62066151add8bp+10;
/* exp(x)/2 for x >= log(DBL_MAX), slightly better than 0.5*exp(x/2)*exp(x/2) */
double __expo2(double x)
double __expo2(double x, double sign)
{
double scale;
/* note that k is odd and scale*scale overflows */
INSERT_WORDS(scale, (uint32_t)(0x3ff + k/2) << 20, 0);
/* exp(x - k ln2) * 2**(k-1) */
return exp(x - kln2) * scale * scale;
/* in directed rounding correct sign before rounding or overflow is important */
return exp(x - kln2) * (sign * scale) * scale;
}

View File

@@ -5,12 +5,13 @@ static const int k = 235;
static const float kln2 = 0x1.45c778p+7f;
/* expf(x)/2 for x >= log(FLT_MAX), slightly better than 0.5f*expf(x/2)*expf(x/2) */
float __expo2f(float x)
float __expo2f(float x, float sign)
{
float scale;
/* note that k is odd and scale*scale overflows */
SET_FLOAT_WORD(scale, (uint32_t)(0x7f + k/2) << 23);
/* exp(x - k ln2) * 2**(k-1) */
return expf(x - kln2) * scale * scale;
/* in directed rounding correct sign before rounding or overflow is important */
return expf(x - kln2) * (sign * scale) * scale;
}

View File

@@ -36,6 +36,7 @@
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb54442d18p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
@@ -117,11 +118,23 @@ int __rem_pio2(double x, double *y)
}
if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
medium:
/* rint(x/(pi/2)), Assume round-to-nearest. */
/* rint(x/(pi/2)) */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
r = x - fn*pio2_1;
w = fn*pio2_1t; /* 1st round, good to 85 bits */
/* Matters with directed rounding. */
if (predict_false(r - w < -pio4)) {
n--;
fn--;
r = x - fn*pio2_1;
w = fn*pio2_1t;
} else if (predict_false(r - w > pio4)) {
n++;
fn++;
r = x - fn*pio2_1;
w = fn*pio2_1t;
}
y[0] = r - w;
u.f = y[0];
ey = u.i>>52 & 0x7ff;

View File

@@ -35,6 +35,7 @@
*/
static const double
toint = 1.5/EPS,
pio4 = 0x1.921fb6p-1,
invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
pio2_1 = 1.57079631090164184570e+00, /* 0x3FF921FB, 0x50000000 */
pio2_1t = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
@@ -50,10 +51,20 @@ int __rem_pio2f(float x, double *y)
ix = u.i & 0x7fffffff;
/* 25+53 bit pi is good enough for medium size */
if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
/* Use a specialized rint() to get fn. Assume round-to-nearest. */
/* Use a specialized rint() to get fn. */
fn = (double_t)x*invpio2 + toint - toint;
n = (int32_t)fn;
*y = x - fn*pio2_1 - fn*pio2_1t;
/* Matters with directed rounding. */
if (predict_false(*y < -pio4)) {
n--;
fn--;
*y = x - fn*pio2_1 - fn*pio2_1t;
} else if (predict_false(*y > pio4)) {
n++;
fn++;
*y = x - fn*pio2_1 - fn*pio2_1t;
}
return n;
}
if(ix>=0x7f800000) { /* x is inf or NaN */

View File

@@ -44,6 +44,7 @@ pio2_1 = 1.57079632679597125389e+00, /* 0x3FF921FB, 0x54444000 */
pio2_2 = -1.07463465549783099519e-12, /* -0x12e7b967674000.0p-92 */
pio2_3 = 6.36831716351370313614e-25; /* 0x18a2e037074000.0p-133 */
static const long double
pio4 = 0x1.921fb54442d1846ap-1L,
invpio2 = 6.36619772367581343076e-01L, /* 0xa2f9836e4e44152a.0p-64 */
pio2_1t = -1.07463465549719416346e-12L, /* -0x973dcb3b399d747f.0p-103 */
pio2_2t = 6.36831716351095013979e-25L, /* 0xc51701b839a25205.0p-144 */
@@ -57,6 +58,7 @@ pio2_3t = -2.75299651904407171810e-37L; /* -0xbb5bf6c7ddd660ce.0p-185 */
#define NX 5
#define NY 3
static const long double
pio4 = 0x1.921fb54442d18469898cc51701b8p-1L,
invpio2 = 6.3661977236758134307553505349005747e-01L, /* 0x145f306dc9c882a53f84eafa3ea6a.0p-113 */
pio2_1 = 1.5707963267948966192292994253909555e+00L, /* 0x1921fb54442d18469800000000000.0p-112 */
pio2_1t = 2.0222662487959507323996846200947577e-21L, /* 0x13198a2e03707344a4093822299f3.0p-181 */
@@ -76,11 +78,23 @@ int __rem_pio2l(long double x, long double *y)
u.f = x;
ex = u.i.se & 0x7fff;
if (SMALL(u)) {
/* rint(x/(pi/2)), Assume round-to-nearest. */
/* rint(x/(pi/2)) */
fn = x*invpio2 + toint - toint;
n = QUOBITS(fn);
r = x-fn*pio2_1;
w = fn*pio2_1t; /* 1st round good to 102/180 bits (ld80/ld128) */
/* Matters with directed rounding. */
if (predict_false(r - w < -pio4)) {
n--;
fn--;
r = x - fn*pio2_1;
w = fn*pio2_1t;
} else if (predict_false(r - w > pio4)) {
n++;
fn++;
r = x - fn*pio2_1;
w = fn*pio2_1t;
}
y[0] = r-w;
u.f = y[0];
ey = u.i.se & 0x7fff;

View File

@@ -35,6 +35,6 @@ double cosh(double x)
/* |x| > log(DBL_MAX) or nan */
/* note: the result is stored to handle overflow */
t = __expo2(x);
t = __expo2(x, 1.0);
return t;
}

View File

@@ -28,6 +28,6 @@ float coshf(float x)
}
/* |x| > log(FLT_MAX) or nan */
t = __expo2f(x);
t = __expo2f(x, 1.0f);
return t;
}

View File

@@ -0,0 +1,7 @@
#include <math.h>
double fabs(double x)
{
__asm__ ("fabs" : "+t"(x));
return x;
}

View File

@@ -1,6 +0,0 @@
.global fabs
.type fabs,@function
fabs:
fldl 4(%esp)
fabs
ret

View File

@@ -0,0 +1,7 @@
#include <math.h>
float fabsf(float x)
{
__asm__ ("fabs" : "+t"(x));
return x;
}

View File

@@ -1,6 +0,0 @@
.global fabsf
.type fabsf,@function
fabsf:
flds 4(%esp)
fabs
ret

View File

@@ -0,0 +1,7 @@
#include <math.h>
long double fabsl(long double x)
{
__asm__ ("fabs" : "+t"(x));
return x;
}

View File

@@ -1,6 +0,0 @@
.global fabsl
.type fabsl,@function
fabsl:
fldt 4(%esp)
fabs
ret

View File

@@ -0,0 +1,10 @@
#include <math.h>
double fmod(double x, double y)
{
unsigned short fpsr;
// fprem does not introduce excess precision into x
do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
while (fpsr & 0x400);
return x;
}

View File

@@ -1,11 +0,0 @@
.global fmod
.type fmod,@function
fmod:
fldl 12(%esp)
fldl 4(%esp)
1: fprem
fnstsw %ax
sahf
jp 1b
fstp %st(1)
ret

View File

@@ -0,0 +1,10 @@
#include <math.h>
float fmodf(float x, float y)
{
unsigned short fpsr;
// fprem does not introduce excess precision into x
do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
while (fpsr & 0x400);
return x;
}

View File

@@ -1,11 +0,0 @@
.global fmodf
.type fmodf,@function
fmodf:
flds 8(%esp)
flds 4(%esp)
1: fprem
fnstsw %ax
sahf
jp 1b
fstp %st(1)
ret

View File

@@ -0,0 +1,9 @@
#include <math.h>
long double fmodl(long double x, long double y)
{
unsigned short fpsr;
do __asm__ ("fprem; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
while (fpsr & 0x400);
return x;
}

View File

@@ -1,11 +0,0 @@
.global fmodl
.type fmodl,@function
fmodl:
fldt 16(%esp)
fldt 4(%esp)
1: fprem
fnstsw %ax
sahf
jp 1b
fstp %st(1)
ret

View File

@@ -0,0 +1,8 @@
#include <math.h>
long long llrint(double x)
{
long long r;
__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
return r;
}

View File

@@ -1,8 +0,0 @@
.global llrint
.type llrint,@function
llrint:
fldl 4(%esp)
fistpll 4(%esp)
mov 4(%esp),%eax
mov 8(%esp),%edx
ret

View File

@@ -0,0 +1,8 @@
#include <math.h>
long long llrintf(float x)
{
long long r;
__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
return r;
}

View File

@@ -1,9 +0,0 @@
.global llrintf
.type llrintf,@function
llrintf:
sub $8,%esp
flds 12(%esp)
fistpll (%esp)
pop %eax
pop %edx
ret

View File

@@ -0,0 +1,8 @@
#include <math.h>
long long llrintl(long double x)
{
long long r;
__asm__ ("fistpll %0" : "=m"(r) : "t"(x) : "st");
return r;
}

View File

@@ -1,8 +0,0 @@
.global llrintl
.type llrintl,@function
llrintl:
fldt 4(%esp)
fistpll 4(%esp)
mov 4(%esp),%eax
mov 8(%esp),%edx
ret

View File

@@ -0,0 +1,8 @@
#include <math.h>
long lrint(double x)
{
long r;
__asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st");
return r;
}

View File

@@ -1,7 +0,0 @@
.global lrint
.type lrint,@function
lrint:
fldl 4(%esp)
fistpl 4(%esp)
mov 4(%esp),%eax
ret

View File

@@ -0,0 +1,8 @@
#include <math.h>
long lrintf(float x)
{
long r;
__asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st");
return r;
}

View File

@@ -1,7 +0,0 @@
.global lrintf
.type lrintf,@function
lrintf:
flds 4(%esp)
fistpl 4(%esp)
mov 4(%esp),%eax
ret

View File

@@ -0,0 +1,8 @@
#include <math.h>
long lrintl(long double x)
{
long r;
__asm__ ("fistpl %0" : "=m"(r) : "t"(x) : "st");
return r;
}

View File

@@ -1,7 +0,0 @@
.global lrintl
.type lrintl,@function
lrintl:
fldt 4(%esp)
fistpl 4(%esp)
mov 4(%esp),%eax
ret

View File

@@ -0,0 +1,12 @@
#include <math.h>
double remainder(double x, double y)
{
unsigned short fpsr;
// fprem1 does not introduce excess precision into x
do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
while (fpsr & 0x400);
return x;
}
weak_alias(remainder, drem);

View File

@@ -1,14 +0,0 @@
.global remainder
.type remainder,@function
remainder:
.weak drem
.type drem,@function
drem:
fldl 12(%esp)
fldl 4(%esp)
1: fprem1
fnstsw %ax
sahf
jp 1b
fstp %st(1)
ret

View File

@@ -0,0 +1,12 @@
#include <math.h>
float remainderf(float x, float y)
{
unsigned short fpsr;
// fprem1 does not introduce excess precision into x
do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
while (fpsr & 0x400);
return x;
}
weak_alias(remainderf, dremf);

View File

@@ -1,14 +0,0 @@
.global remainderf
.type remainderf,@function
remainderf:
.weak dremf
.type dremf,@function
dremf:
flds 8(%esp)
flds 4(%esp)
1: fprem1
fnstsw %ax
sahf
jp 1b
fstp %st(1)
ret

View File

@@ -0,0 +1,9 @@
#include <math.h>
long double remainderl(long double x, long double y)
{
unsigned short fpsr;
do __asm__ ("fprem1; fnstsw %%ax" : "+t"(x), "=a"(fpsr) : "u"(y));
while (fpsr & 0x400);
return x;
}

View File

@@ -1,11 +0,0 @@
.global remainderl
.type remainderl,@function
remainderl:
fldt 16(%esp)
fldt 4(%esp)
1: fprem1
fnstsw %ax
sahf
jp 1b
fstp %st(1)
ret

View File

@@ -0,0 +1,7 @@
#include <math.h>
double rint(double x)
{
__asm__ ("frndint" : "+t"(x));
return x;
}

View File

@@ -1,6 +0,0 @@
.global rint
.type rint,@function
rint:
fldl 4(%esp)
frndint
ret

View File

@@ -0,0 +1,7 @@
#include <math.h>
float rintf(float x)
{
__asm__ ("frndint" : "+t"(x));
return x;
}

View File

@@ -1,6 +0,0 @@
.global rintf
.type rintf,@function
rintf:
flds 4(%esp)
frndint
ret

View File

@@ -0,0 +1,7 @@
#include <math.h>
long double rintl(long double x)
{
__asm__ ("frndint" : "+t"(x));
return x;
}

View File

@@ -1,6 +0,0 @@
.global rintl
.type rintl,@function
rintl:
fldt 4(%esp)
frndint
ret

View File

@@ -0,0 +1,15 @@
#include "libm.h"
double sqrt(double x)
{
union ldshape ux;
unsigned fpsr;
__asm__ ("fsqrt; fnstsw %%ax": "=t"(ux.f), "=a"(fpsr) : "0"(x));
if ((ux.i.m & 0x7ff) != 0x400)
return (double)ux.f;
/* Rounding to double would have encountered an exact halfway case.
Adjust mantissa downwards if fsqrt rounded up, else upwards.
(result of fsqrt could not have been exact) */
ux.i.m ^= (fpsr & 0x200) + 0x300;
return (double)ux.f;
}

View File

@@ -1,21 +0,0 @@
.global sqrt
.type sqrt,@function
sqrt: fldl 4(%esp)
fsqrt
fnstsw %ax
sub $12,%esp
fld %st(0)
fstpt (%esp)
mov (%esp),%ecx
and $0x7ff,%ecx
cmp $0x400,%ecx
jnz 1f
and $0x200,%eax
sub $0x100,%eax
sub %eax,(%esp)
fstp %st(0)
fldt (%esp)
1: add $12,%esp
fstpl 4(%esp)
fldl 4(%esp)
ret

View File

@@ -0,0 +1,12 @@
#include <math.h>
float sqrtf(float x)
{
long double t;
/* The long double result has sufficient precision so that
* second rounding to float still keeps the returned value
* correctly rounded, see Pierre Roux, "Innocuous Double
* Rounding of Basic Arithmetic Operations". */
__asm__ ("fsqrt" : "=t"(t) : "0"(x));
return (float)t;
}

View File

@@ -1,7 +0,0 @@
.global sqrtf
.type sqrtf,@function
sqrtf: flds 4(%esp)
fsqrt
fstps 4(%esp)
flds 4(%esp)
ret

View File

@@ -0,0 +1,7 @@
#include <math.h>
long double sqrtl(long double x)
{
__asm__ ("fsqrt" : "+t"(x));
return x;
}

View File

@@ -1,5 +0,0 @@
.global sqrtl
.type sqrtl,@function
sqrtl: fldt 4(%esp)
fsqrt
ret

View File

@@ -0,0 +1,15 @@
#include <math.h>
#if __HAVE_68881__
long double sqrtl(long double x)
{
__asm__ ("fsqrt.x %1,%0" : "=f"(x) : "fm"(x));
return x;
}
#else
#include "../sqrtl.c"
#endif

View File

@@ -34,6 +34,6 @@ double sinh(double x)
/* |x| > log(DBL_MAX) or nan */
/* note: the result is stored to handle overflow */
t = 2*h*__expo2(absx);
t = __expo2(absx, 2*h);
return t;
}

View File

@@ -26,6 +26,6 @@ float sinhf(float x)
}
/* |x| > logf(FLT_MAX) or nan */
t = 2*h*__expo2f(absx);
t = __expo2f(absx, 2*h);
return t;
}

View File

@@ -0,0 +1,10 @@
#include <math.h>
double fabs(double x)
{
double t;
__asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0
__asm__ ("psrlq $1, %0" : "+x"(t)); // t >>= 1
__asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t
return x;
}

View File

@@ -1,9 +0,0 @@
.global fabs
.type fabs,@function
fabs:
xor %eax,%eax
dec %rax
shr %rax
movq %rax,%xmm1
andpd %xmm1,%xmm0
ret

View File

@@ -0,0 +1,10 @@
#include <math.h>
float fabsf(float x)
{
float t;
__asm__ ("pcmpeqd %0, %0" : "=x"(t)); // t = ~0
__asm__ ("psrld $1, %0" : "+x"(t)); // t >>= 1
__asm__ ("andps %1, %0" : "+x"(x) : "x"(t)); // x &= t
return x;
}

View File

@@ -1,7 +0,0 @@
.global fabsf
.type fabsf,@function
fabsf:
mov $0x7fffffff,%eax
movq %rax,%xmm1
andps %xmm1,%xmm0
ret

View File

@@ -0,0 +1,7 @@
#include <math.h>
long double fabsl(long double x)
{
__asm__ ("fabs" : "+t"(x));
return x;
}

Some files were not shown because too many files have changed in this diff Show More