/programs/develop/ktcc/trunk/source/lib/Makefile |
---|
0,0 → 1,128 |
# |
# Tiny C Compiler Makefile for libtcc1.a |
# |
TOP = .. |
include $(TOP)/Makefile |
VPATH = $(top_srcdir)/lib $(top_srcdir)/win32/lib |
ifndef TARGET # native library |
ifdef CONFIG_WIN64 |
TARGET = x86_64-win |
else |
ifdef CONFIG_WIN32 |
TARGET = i386-win |
else |
ifeq ($(ARCH),i386) |
TARGET = i386 |
else |
ifeq ($(ARCH),x86-64) |
TARGET = x86_64 |
else |
ifeq ($(ARCH),arm) |
TARGET = arm |
XCC = $(CC) |
else |
ifeq ($(ARCH),arm64) |
TARGET = arm64 |
else |
endif |
endif |
endif |
endif |
endif |
endif |
endif |
BCHECK_O = bcheck.o |
DIR = $(TARGET) |
native : ../libtcc1.a |
cross : $(DIR)/libtcc1.a |
native : TCC = $(TOP)/tcc$(EXESUF) |
cross : TCC = $(TOP)/$(TARGET)-tcc$(EXESUF) |
I386_O = libtcc1.o alloca86.o alloca86-bt.o $(BCHECK_O) |
X86_64_O = libtcc1.o alloca86_64.o alloca86_64-bt.o $(BCHECK_O) |
ARM_O = libtcc1.o armeabi.o alloca-arm.o |
WIN32_O = $(I386_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o |
WIN64_O = $(X86_64_O) crt1.o wincrt1.o dllcrt1.o dllmain.o chkstk.o |
ARM64_O = lib-arm64.o |
# build TCC runtime library to contain PIC code, so it can be linked |
# into shared libraries |
PICFLAGS = -fPIC |
# don't compile with -fstack-protector-strong, TCC doesn't handle it |
# correctly |
CFLAGS := $(filter-out -fstack-protector-strong,$(CFLAGS)) |
ifeq "$(TARGET)" "i386-win" |
OBJ = $(addprefix $(DIR)/,$(WIN32_O)) |
TGT = -DTCC_TARGET_I386 -DTCC_TARGET_PE |
XCC ?= $(TCC) -B$(top_srcdir)/win32 -I$(top_srcdir)/include |
XAR ?= $(DIR)/tiny_libmaker$(EXESUF) |
PICFLAGS = |
else |
ifeq "$(TARGET)" "x86_64-win" |
OBJ = $(addprefix $(DIR)/,$(WIN64_O)) |
TGT = -DTCC_TARGET_X86_64 -DTCC_TARGET_PE |
XCC = $(TCC) -B$(top_srcdir)/win32 -I$(top_srcdir)/include |
XAR ?= $(DIR)/tiny_libmaker$(EXESUF) |
PICFLAGS = |
else |
ifeq "$(TARGET)" "i386" |
OBJ = $(addprefix $(DIR)/,$(I386_O)) |
TGT = -DTCC_TARGET_I386 |
XCC ?= $(TCC) -B$(TOP) |
else |
ifeq "$(TARGET)" "x86_64" |
OBJ = $(addprefix $(DIR)/,$(X86_64_O)) |
TGT = -DTCC_TARGET_X86_64 |
XCC ?= $(TCC) -B$(TOP) |
else |
ifeq "$(TARGET)" "arm" |
OBJ = $(addprefix $(DIR)/,$(ARM_O)) |
TGT = -DTCC_TARGET_ARM |
XCC ?= $(TCC) -B$(TOP) |
else |
ifeq "$(TARGET)" "arm64" |
OBJ = $(addprefix $(DIR)/,$(ARM64_O)) |
TGT = -DTCC_TARGET_ARM64 |
XCC ?= $(TCC) -B$(TOP) |
else |
$(error libtcc1.a not supported on target '$(TARGET)') |
endif |
endif |
endif |
endif |
endif |
endif |
XFLAGS = $(filter-out -b,$(CPPFLAGS) $(CFLAGS) $(PICFLAGS) $(TGT)) |
ifeq ($(TARGETOS),Darwin) |
XAR = $(DIR)/tiny_libmaker$(EXESUF) |
XFLAGS += -D_ANSI_SOURCE |
BCHECK_O = |
endif |
XAR ?= $(AR) |
$(DIR)/libtcc1.a ../libtcc1.a : $(OBJ) $(XAR) |
$(XAR) rcs $@ $(OBJ) |
$(DIR)/%.o : %.c |
$(XCC) -c $< -o $@ $(XFLAGS) |
$(DIR)/%.o : %.S |
$(XCC) -c $< -o $@ $(XFLAGS) |
$(DIR)/%$(EXESUF) : $(TOP)/win32/tools/%.c |
$(CC) -o $@ $< $(XFLAGS) $(LDFLAGS) |
$(OBJ) $(XAR) : $(DIR)/exists |
$(DIR)/exists : |
mkdir -p $(DIR) |
@echo $@ > $@ |
clean : |
rm -rfv i386-win x86_64-win i386 x86_64 arm64 |
/programs/develop/ktcc/trunk/source/lib/alloca-arm.S |
---|
0,0 → 1,11 |
.text |
.align 2 |
.global alloca |
.type alloca, %function |
alloca: |
rsb sp, r0, sp |
bic sp, sp, #7 |
mov r0, sp |
mov pc, lr |
.size alloca, .-alloca |
.section .note.GNU-stack,"",%progbits |
/programs/develop/ktcc/trunk/source/lib/alloca86-bt.S |
---|
0,0 → 1,47 |
/* ---------------------------------------------- */ |
/* alloca86-bt.S */ |
.globl __bound_alloca |
__bound_alloca: |
pop %edx |
pop %eax |
mov %eax, %ecx |
add $3,%eax |
and $-4,%eax |
jz p6 |
#ifdef TCC_TARGET_PE |
p4: |
cmp $4096,%eax |
jbe p5 |
test %eax,-4096(%esp) |
sub $4096,%esp |
sub $4096,%eax |
jmp p4 |
p5: |
#endif |
sub %eax,%esp |
mov %esp,%eax |
push %edx |
push %eax |
push %ecx |
push %eax |
call __bound_new_region |
add $8, %esp |
pop %eax |
pop %edx |
p6: |
push %edx |
push %edx |
ret |
/* mark stack as nonexecutable */ |
#if defined __ELF__ && defined __linux__ |
.section .note.GNU-stack,"",@progbits |
#endif |
/* ---------------------------------------------- */ |
/programs/develop/ktcc/trunk/source/lib/alloca86.S |
---|
0,0 → 1,35 |
/* ---------------------------------------------- */ |
/* alloca86.S */ |
.globl alloca |
alloca: |
pop %edx |
pop %eax |
add $3,%eax |
and $-4,%eax |
jz p3 |
#ifdef TCC_TARGET_PE |
p1: |
cmp $4096,%eax |
jbe p2 |
test %eax,-4096(%esp) |
sub $4096,%esp |
sub $4096,%eax |
jmp p1 |
p2: |
#endif |
sub %eax,%esp |
mov %esp,%eax |
p3: |
push %edx |
push %edx |
ret |
/* mark stack as nonexecutable */ |
#if defined __ELF__ && defined __linux__ |
.section .note.GNU-stack,"",@progbits |
#endif |
/* ---------------------------------------------- */ |
/programs/develop/ktcc/trunk/source/lib/alloca86_64-bt.S |
---|
0,0 → 1,60 |
/* ---------------------------------------------- */ |
/* alloca86_64.S */ |
.globl __bound_alloca |
__bound_alloca: |
#ifdef TCC_TARGET_PE |
# bound checking is not implemented |
pop %rdx |
mov %rcx,%rax |
add $15,%rax |
and $-16,%rax |
jz p3 |
p1: |
cmp $4096,%rax |
jbe p2 |
test %rax,-4096(%rsp) |
sub $4096,%rsp |
sub $4096,%rax |
jmp p1 |
p2: |
sub %rax,%rsp |
mov %rsp,%rax |
add $32,%rax |
p3: |
push %rdx |
ret |
#else |
pop %rdx |
mov %rdi,%rax |
movl %rax,%rsi # size, a second parm to the __bound_new_region |
add $15,%rax |
and $-16,%rax |
jz p3 |
sub %rax,%rsp |
mov %rsp,%rdi # pointer, a first parm to the __bound_new_region |
mov %rsp,%rax |
push %rdx |
push %rax |
call __bound_new_region |
pop %rax |
pop %rdx |
p3: |
push %rdx |
ret |
#endif |
/* mark stack as nonexecutable */ |
#if defined __ELF__ && defined __linux__ |
.section .note.GNU-stack,"",@progbits |
#endif |
/* ---------------------------------------------- */ |
/programs/develop/ktcc/trunk/source/lib/alloca86_64.S |
---|
0,0 → 1,42 |
/* ---------------------------------------------- */ |
/* alloca86_64.S */ |
.globl alloca |
alloca: |
pop %rdx |
#ifdef TCC_TARGET_PE |
mov %rcx,%rax |
#else |
mov %rdi,%rax |
#endif |
add $15,%rax |
and $-16,%rax |
jz p3 |
#ifdef TCC_TARGET_PE |
p1: |
cmp $4096,%rax |
jbe p2 |
test %rax,-4096(%rsp) |
sub $4096,%rsp |
sub $4096,%rax |
jmp p1 |
p2: |
#endif |
sub %rax,%rsp |
mov %rsp,%rax |
#ifdef TCC_TARGET_PE |
add $32,%rax |
#endif |
p3: |
push %rdx |
ret |
/* mark stack as nonexecutable */ |
#if defined __ELF__ && defined __linux__ |
.section .note.GNU-stack,"",@progbits |
#endif |
/* ---------------------------------------------- */ |
/programs/develop/ktcc/trunk/source/lib/armeabi.c |
---|
0,0 → 1,489 |
/* TCC ARM runtime EABI |
Copyright (C) 2013 Thomas Preud'homme |
Permission is hereby granted, free of charge, to any person obtaining a copy |
of this software and associated documentation files (the "Software"), to deal |
in the Software without restriction, including without limitation the rights |
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
copies of the Software, and to permit persons to whom the Software is |
furnished to do so, subject to the following conditions: |
The above copyright notice and this permission notice shall be included in |
all copies or substantial portions of the software. |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
THE SOFTWARE.*/ |
#include <limits.h> |
/* We rely on the little endianness and EABI calling convention for this to |
work */ |
typedef struct double_unsigned_struct { |
unsigned low; |
unsigned high; |
} double_unsigned_struct; |
typedef struct unsigned_int_struct { |
unsigned low; |
int high; |
} unsigned_int_struct; |
#define REGS_RETURN(name, type) \ |
void name ## _return(type ret) {} |
/* Float helper functions */ |
#define FLOAT_EXP_BITS 8 |
#define FLOAT_FRAC_BITS 23 |
#define DOUBLE_EXP_BITS 11 |
#define DOUBLE_FRAC_BITS 52 |
#define ONE_EXP(type) ((1 << (type ## _EXP_BITS - 1)) - 1) |
REGS_RETURN(unsigned_int_struct, unsigned_int_struct) |
REGS_RETURN(double_unsigned_struct, double_unsigned_struct) |
/* float -> integer: (sign) 1.fraction x 2^(exponent - exp_for_one) */ |
/* float to [unsigned] long long conversion */ |
#define DEFINE__AEABI_F2XLZ(name, with_sign) \ |
void __aeabi_ ## name(unsigned val) \ |
{ \ |
int exp, high_shift, sign; \ |
double_unsigned_struct ret; \ |
\ |
/* compute sign */ \ |
sign = val >> 31; \ |
\ |
/* compute real exponent */ \ |
exp = val >> FLOAT_FRAC_BITS; \ |
exp &= (1 << FLOAT_EXP_BITS) - 1; \ |
exp -= ONE_EXP(FLOAT); \ |
\ |
/* undefined behavior if truncated value cannot be represented */ \ |
if (with_sign) { \ |
if (exp > 62) /* |val| too big, double cannot represent LLONG_MAX */ \ |
return; \ |
} else { \ |
if ((sign && exp >= 0) || exp > 63) /* if val < 0 || val too big */ \ |
return; \ |
} \ |
\ |
val &= (1 << FLOAT_FRAC_BITS) - 1; \ |
if (exp >= 32) { \ |
ret.high = 1 << (exp - 32); \ |
if (exp - 32 >= FLOAT_FRAC_BITS) { \ |
ret.high |= val << (exp - 32 - FLOAT_FRAC_BITS); \ |
ret.low = 0; \ |
} else { \ |
high_shift = FLOAT_FRAC_BITS - (exp - 32); \ |
ret.high |= val >> high_shift; \ |
ret.low = val << (32 - high_shift); \ |
} \ |
} else { \ |
ret.high = 0; \ |
ret.low = 1 << exp; \ |
if (exp > FLOAT_FRAC_BITS) \ |
ret.low |= val << (exp - FLOAT_FRAC_BITS); \ |
else \ |
ret.low |= val >> (FLOAT_FRAC_BITS - exp); \ |
} \ |
\ |
/* encode negative integer using 2's complement */ \ |
if (with_sign && sign) { \ |
ret.low = ~ret.low; \ |
ret.high = ~ret.high; \ |
if (ret.low == UINT_MAX) { \ |
ret.low = 0; \ |
ret.high++; \ |
} else \ |
ret.low++; \ |
} \ |
\ |
double_unsigned_struct_return(ret); \ |
} |
/* float to unsigned long long conversion */ |
DEFINE__AEABI_F2XLZ(f2ulz, 0) |
/* float to long long conversion */ |
DEFINE__AEABI_F2XLZ(f2lz, 1) |
/* double to [unsigned] long long conversion */ |
#define DEFINE__AEABI_D2XLZ(name, with_sign) \ |
void __aeabi_ ## name(double_unsigned_struct val) \ |
{ \ |
int exp, high_shift, sign; \ |
double_unsigned_struct ret; \ |
\ |
/* compute sign */ \ |
sign = val.high >> 31; \ |
\ |
/* compute real exponent */ \ |
exp = (val.high >> (DOUBLE_FRAC_BITS - 32)); \ |
exp &= (1 << DOUBLE_EXP_BITS) - 1; \ |
exp -= ONE_EXP(DOUBLE); \ |
\ |
/* undefined behavior if truncated value cannot be represented */ \ |
if (with_sign) { \ |
if (exp > 62) /* |val| too big, double cannot represent LLONG_MAX */ \ |
return; \ |
} else { \ |
if ((sign && exp >= 0) || exp > 63) /* if val < 0 || val too big */ \ |
return; \ |
} \ |
\ |
val.high &= (1 << (DOUBLE_FRAC_BITS - 32)) - 1; \ |
if (exp >= 32) { \ |
ret.high = 1 << (exp - 32); \ |
if (exp >= DOUBLE_FRAC_BITS) { \ |
high_shift = exp - DOUBLE_FRAC_BITS; \ |
ret.high |= val.high << high_shift; \ |
ret.high |= val.low >> (32 - high_shift); \ |
ret.low = val.low << high_shift; \ |
} else { \ |
high_shift = DOUBLE_FRAC_BITS - exp; \ |
ret.high |= val.high >> high_shift; \ |
ret.low = val.high << (32 - high_shift); \ |
ret.low |= val.low >> high_shift; \ |
} \ |
} else { \ |
ret.high = 0; \ |
ret.low = 1 << exp; \ |
if (exp > DOUBLE_FRAC_BITS - 32) { \ |
high_shift = exp - DOUBLE_FRAC_BITS - 32; \ |
ret.low |= val.high << high_shift; \ |
ret.low |= val.low >> (32 - high_shift); \ |
} else \ |
ret.low |= val.high >> (DOUBLE_FRAC_BITS - 32 - exp); \ |
} \ |
\ |
/* encode negative integer using 2's complement */ \ |
if (with_sign && sign) { \ |
ret.low = ~ret.low; \ |
ret.high = ~ret.high; \ |
if (ret.low == UINT_MAX) { \ |
ret.low = 0; \ |
ret.high++; \ |
} else \ |
ret.low++; \ |
} \ |
\ |
double_unsigned_struct_return(ret); \ |
} |
/* double to unsigned long long conversion */ |
DEFINE__AEABI_D2XLZ(d2ulz, 0) |
/* double to long long conversion */ |
DEFINE__AEABI_D2XLZ(d2lz, 1) |
/* long long to float conversion */ |
#define DEFINE__AEABI_XL2F(name, with_sign) \ |
unsigned __aeabi_ ## name(unsigned long long v) \ |
{ \ |
int s /* shift */, flb /* first lost bit */, sign = 0; \ |
unsigned p = 0 /* power */, ret; \ |
double_unsigned_struct val; \ |
\ |
/* fraction in negative float is encoded in 1's complement */ \ |
if (with_sign && (v & (1ULL << 63))) { \ |
sign = 1; \ |
v = ~v + 1; \ |
} \ |
val.low = v; \ |
val.high = v >> 32; \ |
/* fill fraction bits */ \ |
for (s = 31, p = 1 << 31; p && !(val.high & p); s--, p >>= 1); \ |
if (p) { \ |
ret = val.high & (p - 1); \ |
if (s < FLOAT_FRAC_BITS) { \ |
ret <<= FLOAT_FRAC_BITS - s; \ |
ret |= val.low >> (32 - (FLOAT_FRAC_BITS - s)); \ |
flb = (val.low >> (32 - (FLOAT_FRAC_BITS - s - 1))) & 1; \ |
} else { \ |
flb = (ret >> (s - FLOAT_FRAC_BITS - 1)) & 1; \ |
ret >>= s - FLOAT_FRAC_BITS; \ |
} \ |
s += 32; \ |
} else { \ |
for (s = 31, p = 1 << 31; p && !(val.low & p); s--, p >>= 1); \ |
if (p) { \ |
ret = val.low & (p - 1); \ |
if (s <= FLOAT_FRAC_BITS) { \ |
ret <<= FLOAT_FRAC_BITS - s; \ |
flb = 0; \ |
} else { \ |
flb = (ret >> (s - FLOAT_FRAC_BITS - 1)) & 1; \ |
ret >>= s - FLOAT_FRAC_BITS; \ |
} \ |
} else \ |
return 0; \ |
} \ |
if (flb) \ |
ret++; \ |
\ |
/* fill exponent bits */ \ |
ret |= (s + ONE_EXP(FLOAT)) << FLOAT_FRAC_BITS; \ |
\ |
/* fill sign bit */ \ |
ret |= sign << 31; \ |
\ |
return ret; \ |
} |
/* unsigned long long to float conversion */ |
DEFINE__AEABI_XL2F(ul2f, 0) |
/* long long to float conversion */ |
DEFINE__AEABI_XL2F(l2f, 1) |
/* long long to double conversion */ |
#define __AEABI_XL2D(name, with_sign) \ |
void __aeabi_ ## name(unsigned long long v) \ |
{ \ |
int s /* shift */, high_shift, sign = 0; \ |
unsigned tmp, p = 0; \ |
double_unsigned_struct val, ret; \ |
\ |
/* fraction in negative float is encoded in 1's complement */ \ |
if (with_sign && (v & (1ULL << 63))) { \ |
sign = 1; \ |
v = ~v + 1; \ |
} \ |
val.low = v; \ |
val.high = v >> 32; \ |
\ |
/* fill fraction bits */ \ |
for (s = 31, p = 1 << 31; p && !(val.high & p); s--, p >>= 1); \ |
if (p) { \ |
tmp = val.high & (p - 1); \ |
if (s < DOUBLE_FRAC_BITS - 32) { \ |
high_shift = DOUBLE_FRAC_BITS - 32 - s; \ |
ret.high = tmp << high_shift; \ |
ret.high |= val.low >> (32 - high_shift); \ |
ret.low = val.low << high_shift; \ |
} else { \ |
high_shift = s - (DOUBLE_FRAC_BITS - 32); \ |
ret.high = tmp >> high_shift; \ |
ret.low = tmp << (32 - high_shift); \ |
ret.low |= val.low >> high_shift; \ |
if ((val.low >> (high_shift - 1)) & 1) { \ |
if (ret.low == UINT_MAX) { \ |
ret.high++; \ |
ret.low = 0; \ |
} else \ |
ret.low++; \ |
} \ |
} \ |
s += 32; \ |
} else { \ |
for (s = 31, p = 1 << 31; p && !(val.low & p); s--, p >>= 1); \ |
if (p) { \ |
tmp = val.low & (p - 1); \ |
if (s <= DOUBLE_FRAC_BITS - 32) { \ |
high_shift = DOUBLE_FRAC_BITS - 32 - s; \ |
ret.high = tmp << high_shift; \ |
ret.low = 0; \ |
} else { \ |
high_shift = s - (DOUBLE_FRAC_BITS - 32); \ |
ret.high = tmp >> high_shift; \ |
ret.low = tmp << (32 - high_shift); \ |
} \ |
} else { \ |
ret.high = ret.low = 0; \ |
double_unsigned_struct_return(ret); \ |
} \ |
} \ |
\ |
/* fill exponent bits */ \ |
ret.high |= (s + ONE_EXP(DOUBLE)) << (DOUBLE_FRAC_BITS - 32); \ |
\ |
/* fill sign bit */ \ |
ret.high |= sign << 31; \ |
\ |
double_unsigned_struct_return(ret); \ |
} |
/* unsigned long long to double conversion */ |
__AEABI_XL2D(ul2d, 0) |
/* long long to double conversion */ |
__AEABI_XL2D(l2d, 1) |
/* Long long helper functions */ |
/* TODO: add error in case of den == 0 (see §4.3.1 and §4.3.2) */ |
#define define_aeabi_xdivmod_signed_type(basetype, type) \ |
typedef struct type { \ |
basetype quot; \ |
unsigned basetype rem; \ |
} type |
#define define_aeabi_xdivmod_unsigned_type(basetype, type) \ |
typedef struct type { \ |
basetype quot; \ |
basetype rem; \ |
} type |
#define AEABI_UXDIVMOD(name,type, rettype, typemacro) \ |
static inline rettype aeabi_ ## name (type num, type den) \ |
{ \ |
rettype ret; \ |
type quot = 0; \ |
\ |
/* Increase quotient while it is less than numerator */ \ |
while (num >= den) { \ |
type q = 1; \ |
\ |
/* Find closest power of two */ \ |
while ((q << 1) * den <= num && q * den <= typemacro ## _MAX / 2) \ |
q <<= 1; \ |
\ |
/* Compute difference between current quotient and numerator */ \ |
num -= q * den; \ |
quot += q; \ |
} \ |
ret.quot = quot; \ |
ret.rem = num; \ |
return ret; \ |
} |
#define __AEABI_XDIVMOD(name, type, uiname, rettype, urettype, typemacro) \ |
void __aeabi_ ## name(type numerator, type denominator) \ |
{ \ |
unsigned type num, den; \ |
urettype uxdiv_ret; \ |
rettype ret; \ |
\ |
if (numerator >= 0) \ |
num = numerator; \ |
else \ |
num = 0 - numerator; \ |
if (denominator >= 0) \ |
den = denominator; \ |
else \ |
den = 0 - denominator; \ |
uxdiv_ret = aeabi_ ## uiname(num, den); \ |
/* signs differ */ \ |
if ((numerator & typemacro ## _MIN) != (denominator & typemacro ## _MIN)) \ |
ret.quot = 0 - uxdiv_ret.quot; \ |
else \ |
ret.quot = uxdiv_ret.quot; \ |
if (numerator < 0) \ |
ret.rem = 0 - uxdiv_ret.rem; \ |
else \ |
ret.rem = uxdiv_ret.rem; \ |
\ |
rettype ## _return(ret); \ |
} |
define_aeabi_xdivmod_signed_type(long long, lldiv_t); |
define_aeabi_xdivmod_unsigned_type(unsigned long long, ulldiv_t); |
define_aeabi_xdivmod_signed_type(int, idiv_t); |
define_aeabi_xdivmod_unsigned_type(unsigned, uidiv_t); |
REGS_RETURN(lldiv_t, lldiv_t) |
REGS_RETURN(ulldiv_t, ulldiv_t) |
REGS_RETURN(idiv_t, idiv_t) |
REGS_RETURN(uidiv_t, uidiv_t) |
AEABI_UXDIVMOD(uldivmod, unsigned long long, ulldiv_t, ULONG) |
__AEABI_XDIVMOD(ldivmod, long long, uldivmod, lldiv_t, ulldiv_t, LLONG) |
void __aeabi_uldivmod(unsigned long long num, unsigned long long den) |
{ |
ulldiv_t_return(aeabi_uldivmod(num, den)); |
} |
void __aeabi_llsl(double_unsigned_struct val, int shift) |
{ |
double_unsigned_struct ret; |
if (shift >= 32) { |
val.high = val.low; |
val.low = 0; |
shift -= 32; |
} |
if (shift > 0) { |
ret.low = val.low << shift; |
ret.high = (val.high << shift) | (val.low >> (32 - shift)); |
double_unsigned_struct_return(ret); |
return; |
} |
double_unsigned_struct_return(val); |
} |
#define aeabi_lsr(val, shift, fill, type) \ |
type ## _struct ret; \ |
\ |
if (shift >= 32) { \ |
val.low = val.high; \ |
val.high = fill; \ |
shift -= 32; \ |
} \ |
if (shift > 0) { \ |
ret.high = val.high >> shift; \ |
ret.low = (val.high << (32 - shift)) | (val.low >> shift); \ |
type ## _struct_return(ret); \ |
return; \ |
} \ |
type ## _struct_return(val); |
void __aeabi_llsr(double_unsigned_struct val, int shift) |
{ |
aeabi_lsr(val, shift, 0, double_unsigned); |
} |
void __aeabi_lasr(unsigned_int_struct val, int shift) |
{ |
aeabi_lsr(val, shift, val.high >> 31, unsigned_int); |
} |
/* Integer division functions */ |
AEABI_UXDIVMOD(uidivmod, unsigned, uidiv_t, UINT) |
int __aeabi_idiv(int numerator, int denominator) |
{ |
unsigned num, den; |
uidiv_t ret; |
if (numerator >= 0) |
num = numerator; |
else |
num = 0 - numerator; |
if (denominator >= 0) |
den = denominator; |
else |
den = 0 - denominator; |
ret = aeabi_uidivmod(num, den); |
if ((numerator & INT_MIN) != (denominator & INT_MIN)) /* signs differ */ |
ret.quot *= -1; |
return ret.quot; |
} |
unsigned __aeabi_uidiv(unsigned num, unsigned den) |
{ |
return aeabi_uidivmod(num, den).quot; |
} |
__AEABI_XDIVMOD(idivmod, int, uidivmod, idiv_t, uidiv_t, INT) |
void __aeabi_uidivmod(unsigned num, unsigned den) |
{ |
uidiv_t_return(aeabi_uidivmod(num, den)); |
} |
/programs/develop/ktcc/trunk/source/lib/bcheck.c |
---|
0,0 → 1,950 |
/* |
* Tiny C Memory and bounds checker |
* |
* Copyright (c) 2002 Fabrice Bellard |
* |
* This library is free software; you can redistribute it and/or |
* modify it under the terms of the GNU Lesser General Public |
* License as published by the Free Software Foundation; either |
* version 2 of the License, or (at your option) any later version. |
* |
* This library is distributed in the hope that it will be useful, |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
* Lesser General Public License for more details. |
* |
* You should have received a copy of the GNU Lesser General Public |
* License along with this library; if not, write to the Free Software |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
*/ |
#include <stdlib.h> |
#include <stdio.h> |
#include <stdarg.h> |
#include <string.h> |
#if !defined(__FreeBSD__) && !defined(__FreeBSD_kernel__) \ |
&& !defined(__DragonFly__) && !defined(__OpenBSD__) && !defined(__NetBSD__) |
#include <malloc.h> |
#endif |
#if !defined(_WIN32) |
#include <unistd.h> |
#endif |
/* #define BOUND_DEBUG */ |
#ifdef BOUND_DEBUG |
#define dprintf(a...) fprintf(a) |
#else |
#define dprintf(a...) |
#endif |
/* define so that bound array is static (faster, but use memory if |
bound checking not used) */ |
/* #define BOUND_STATIC */ |
/* use malloc hooks. Currently the code cannot be reliable if no hooks */ |
#define CONFIG_TCC_MALLOC_HOOKS |
#define HAVE_MEMALIGN |
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ |
|| defined(__DragonFly__) || defined(__dietlibc__) \ |
|| defined(__UCLIBC__) || defined(__OpenBSD__) || defined(__NetBSD__) \ |
|| defined(_WIN32) || defined(TCC_UCLIBC) |
#warning Bound checking does not support malloc (etc.) in this environment. |
#undef CONFIG_TCC_MALLOC_HOOKS |
#undef HAVE_MEMALIGN |
#endif |
#define BOUND_T1_BITS 13 |
#define BOUND_T2_BITS 11 |
#define BOUND_T3_BITS (sizeof(size_t)*8 - BOUND_T1_BITS - BOUND_T2_BITS) |
#define BOUND_E_BITS (sizeof(size_t)) |
#define BOUND_T1_SIZE (1 << BOUND_T1_BITS) |
#define BOUND_T2_SIZE (1 << BOUND_T2_BITS) |
#define BOUND_T3_SIZE (1 << BOUND_T3_BITS) |
#define BOUND_T23_BITS (BOUND_T2_BITS + BOUND_T3_BITS) |
#define BOUND_T23_SIZE (1 << BOUND_T23_BITS) |
/* this pointer is generated when bound check is incorrect */ |
#define INVALID_POINTER ((void *)(-2)) |
/* size of an empty region */ |
#define EMPTY_SIZE ((size_t)(-1)) |
/* size of an invalid region */ |
#define INVALID_SIZE 0 |
typedef struct BoundEntry { |
size_t start; |
size_t size; |
struct BoundEntry *next; |
size_t is_invalid; /* true if pointers outside region are invalid */ |
} BoundEntry; |
/* external interface */ |
void __bound_init(void); |
void __bound_new_region(void *p, size_t size); |
int __bound_delete_region(void *p); |
#ifdef __attribute__ |
/* an __attribute__ macro is defined in the system headers */ |
#undef __attribute__ |
#endif |
#define FASTCALL __attribute__((regparm(3))) |
void *__bound_malloc(size_t size, const void *caller); |
void *__bound_memalign(size_t size, size_t align, const void *caller); |
void __bound_free(void *ptr, const void *caller); |
void *__bound_realloc(void *ptr, size_t size, const void *caller); |
static void *libc_malloc(size_t size); |
static void libc_free(void *ptr); |
static void install_malloc_hooks(void); |
static void restore_malloc_hooks(void); |
#ifdef CONFIG_TCC_MALLOC_HOOKS |
static void *saved_malloc_hook; |
static void *saved_free_hook; |
static void *saved_realloc_hook; |
static void *saved_memalign_hook; |
#endif |
/* TCC definitions */ |
extern char __bounds_start; /* start of static bounds table */ |
/* error message, just for TCC */ |
const char *__bound_error_msg; |
/* runtime error output */ |
extern void rt_error(size_t pc, const char *fmt, ...); |
#ifdef BOUND_STATIC |
static BoundEntry *__bound_t1[BOUND_T1_SIZE]; /* page table */ |
#else |
static BoundEntry **__bound_t1; /* page table */ |
#endif |
static BoundEntry *__bound_empty_t2; /* empty page, for unused pages */ |
static BoundEntry *__bound_invalid_t2; /* invalid page, for invalid pointers */ |
static BoundEntry *__bound_find_region(BoundEntry *e1, void *p) |
{ |
size_t addr, tmp; |
BoundEntry *e; |
e = e1; |
while (e != NULL) { |
addr = (size_t)p; |
addr -= e->start; |
if (addr <= e->size) { |
/* put region at the head */ |
tmp = e1->start; |
e1->start = e->start; |
e->start = tmp; |
tmp = e1->size; |
e1->size = e->size; |
e->size = tmp; |
return e1; |
} |
e = e->next; |
} |
/* no entry found: return empty entry or invalid entry */ |
if (e1->is_invalid) |
return __bound_invalid_t2; |
else |
return __bound_empty_t2; |
} |
/* print a bound error message */ |
static void bound_error(const char *fmt, ...) |
{ |
__bound_error_msg = fmt; |
fprintf(stderr,"%s %s: %s\n", __FILE__, __FUNCTION__, fmt); |
*(int *)0 = 0; /* force a runtime error */ |
} |
static void bound_alloc_error(void) |
{ |
bound_error("not enough memory for bound checking code"); |
} |
/* return '(p + offset)' for pointer arithmetic (a pointer can reach |
the end of a region in this case */ |
void * FASTCALL __bound_ptr_add(void *p, size_t offset) |
{ |
size_t addr = (size_t)p; |
BoundEntry *e; |
__bound_init(); |
dprintf(stderr, "%s %s: %p %p\n", __FILE__, __FUNCTION__, p, offset); |
e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)]; |
e = (BoundEntry *)((char *)e + |
((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) & |
((BOUND_T2_SIZE - 1) << BOUND_E_BITS))); |
addr -= e->start; |
if (addr > e->size) { |
e = __bound_find_region(e, p); |
addr = (size_t)p - e->start; |
} |
addr += offset; |
if (addr >= e->size) { |
fprintf(stderr,"%s %s: %p is outside of the region\n", __FILE__, __FUNCTION__, p + offset); |
return INVALID_POINTER; /* return an invalid pointer */ |
} |
return p + offset; |
} |
/* return '(p + offset)' for pointer indirection (the resulting must |
be strictly inside the region */ |
#define BOUND_PTR_INDIR(dsize) \ |
void * FASTCALL __bound_ptr_indir ## dsize (void *p, size_t offset) \ |
{ \ |
size_t addr = (size_t)p; \ |
BoundEntry *e; \ |
\ |
dprintf(stderr, "%s %s: %p %p start\n", __FILE__, __FUNCTION__, p, offset); \ |
\ |
__bound_init(); \ |
e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)]; \ |
e = (BoundEntry *)((char *)e + \ |
((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) & \ |
((BOUND_T2_SIZE - 1) << BOUND_E_BITS))); \ |
addr -= e->start; \ |
if (addr > e->size) { \ |
e = __bound_find_region(e, p); \ |
addr = (size_t)p - e->start; \ |
} \ |
addr += offset + dsize; \ |
if (addr > e->size) { \ |
fprintf(stderr,"%s %s: %p is outside of the region\n", __FILE__, __FUNCTION__, p + offset); \ |
return INVALID_POINTER; /* return an invalid pointer */ \ |
} \ |
dprintf(stderr, "%s %s: return p+offset = %p\n", __FILE__, __FUNCTION__, p + offset); \ |
return p + offset; \ |
} |
BOUND_PTR_INDIR(1) |
BOUND_PTR_INDIR(2) |
BOUND_PTR_INDIR(4) |
BOUND_PTR_INDIR(8) |
BOUND_PTR_INDIR(12) |
BOUND_PTR_INDIR(16) |
/* return the frame pointer of the caller */ |
#define GET_CALLER_FP(fp)\ |
{\ |
fp = (size_t)__builtin_frame_address(1);\ |
} |
/* called when entering a function to add all the local regions */ |
void FASTCALL __bound_local_new(void *p1) |
{ |
size_t addr, size, fp, *p = p1; |
dprintf(stderr, "%s, %s start p1=%p\n", __FILE__, __FUNCTION__, p); |
GET_CALLER_FP(fp); |
for(;;) { |
addr = p[0]; |
if (addr == 0) |
break; |
addr += fp; |
size = p[1]; |
p += 2; |
__bound_new_region((void *)addr, size); |
} |
dprintf(stderr, "%s, %s end\n", __FILE__, __FUNCTION__); |
} |
/* called when leaving a function to delete all the local regions */ |
void FASTCALL __bound_local_delete(void *p1) |
{ |
size_t addr, fp, *p = p1; |
GET_CALLER_FP(fp); |
for(;;) { |
addr = p[0]; |
if (addr == 0) |
break; |
addr += fp; |
p += 2; |
__bound_delete_region((void *)addr); |
} |
} |
static BoundEntry *__bound_new_page(void) |
{ |
BoundEntry *page; |
size_t i; |
page = libc_malloc(sizeof(BoundEntry) * BOUND_T2_SIZE); |
if (!page) |
bound_alloc_error(); |
for(i=0;i<BOUND_T2_SIZE;i++) { |
/* put empty entries */ |
page[i].start = 0; |
page[i].size = EMPTY_SIZE; |
page[i].next = NULL; |
page[i].is_invalid = 0; |
} |
return page; |
} |
/* currently we use malloc(). Should use bound_new_page() */ |
static BoundEntry *bound_new_entry(void) |
{ |
BoundEntry *e; |
e = libc_malloc(sizeof(BoundEntry)); |
return e; |
} |
static void bound_free_entry(BoundEntry *e) |
{ |
libc_free(e); |
} |
static BoundEntry *get_page(size_t index) |
{ |
BoundEntry *page; |
page = __bound_t1[index]; |
if (!page || page == __bound_empty_t2 || page == __bound_invalid_t2) { |
/* create a new page if necessary */ |
page = __bound_new_page(); |
__bound_t1[index] = page; |
} |
return page; |
} |
/* mark a region as being invalid (can only be used during init) */ |
static void mark_invalid(size_t addr, size_t size) |
{ |
size_t start, end; |
BoundEntry *page; |
size_t t1_start, t1_end, i, j, t2_start, t2_end; |
start = addr; |
end = addr + size; |
t2_start = (start + BOUND_T3_SIZE - 1) >> BOUND_T3_BITS; |
if (end != 0) |
t2_end = end >> BOUND_T3_BITS; |
else |
t2_end = 1 << (BOUND_T1_BITS + BOUND_T2_BITS); |
#if 0 |
dprintf(stderr, "mark_invalid: start = %x %x\n", t2_start, t2_end); |
#endif |
/* first we handle full pages */ |
t1_start = (t2_start + BOUND_T2_SIZE - 1) >> BOUND_T2_BITS; |
t1_end = t2_end >> BOUND_T2_BITS; |
i = t2_start & (BOUND_T2_SIZE - 1); |
j = t2_end & (BOUND_T2_SIZE - 1); |
if (t1_start == t1_end) { |
page = get_page(t2_start >> BOUND_T2_BITS); |
for(; i < j; i++) { |
page[i].size = INVALID_SIZE; |
page[i].is_invalid = 1; |
} |
} else { |
if (i > 0) { |
page = get_page(t2_start >> BOUND_T2_BITS); |
for(; i < BOUND_T2_SIZE; i++) { |
page[i].size = INVALID_SIZE; |
page[i].is_invalid = 1; |
} |
} |
for(i = t1_start; i < t1_end; i++) { |
__bound_t1[i] = __bound_invalid_t2; |
} |
if (j != 0) { |
page = get_page(t1_end); |
for(i = 0; i < j; i++) { |
page[i].size = INVALID_SIZE; |
page[i].is_invalid = 1; |
} |
} |
} |
} |
void __bound_init(void) |
{ |
size_t i; |
BoundEntry *page; |
size_t start, size; |
size_t *p; |
static int inited; |
if (inited) |
return; |
inited = 1; |
dprintf(stderr, "%s, %s() start\n", __FILE__, __FUNCTION__); |
/* save malloc hooks and install bound check hooks */ |
install_malloc_hooks(); |
#ifndef BOUND_STATIC |
__bound_t1 = libc_malloc(BOUND_T1_SIZE * sizeof(BoundEntry *)); |
if (!__bound_t1) |
bound_alloc_error(); |
#endif |
__bound_empty_t2 = __bound_new_page(); |
for(i=0;i<BOUND_T1_SIZE;i++) { |
__bound_t1[i] = __bound_empty_t2; |
} |
page = __bound_new_page(); |
for(i=0;i<BOUND_T2_SIZE;i++) { |
/* put invalid entries */ |
page[i].start = 0; |
page[i].size = INVALID_SIZE; |
page[i].next = NULL; |
page[i].is_invalid = 1; |
} |
__bound_invalid_t2 = page; |
/* invalid pointer zone */ |
start = (size_t)INVALID_POINTER & ~(BOUND_T23_SIZE - 1); |
size = BOUND_T23_SIZE; |
mark_invalid(start, size); |
#if defined(CONFIG_TCC_MALLOC_HOOKS) |
/* malloc zone is also marked invalid. can only use that with |
* hooks because all libs should use the same malloc. The solution |
* would be to build a new malloc for tcc. |
* |
* usually heap (= malloc zone) comes right after bss, i.e. after _end, but |
* not always - either if we are running from under `tcc -b -run`, or if |
* address space randomization is turned on(a), heap start will be separated |
* from bss end. |
* |
* So sbrk(0) will be a good approximation for start_brk: |
* |
* - if we are a separately compiled program, __bound_init() runs early, |
* and sbrk(0) should be equal or very near to start_brk(b) (in case other |
* constructors malloc something), or |
* |
* - if we are running from under `tcc -b -run`, sbrk(0) will return |
* start of heap portion which is under this program control, and not |
* mark as invalid earlier allocated memory. |
* |
* |
* (a) /proc/sys/kernel/randomize_va_space = 2, on Linux; |
* usually turned on by default. |
* |
* (b) on Linux >= v3.3, the alternative is to read |
* start_brk from /proc/self/stat |
*/ |
start = (size_t)sbrk(0); |
size = 128 * 0x100000; |
mark_invalid(start, size); |
#endif |
/* add all static bound check values */ |
p = (size_t *)&__bounds_start; |
while (p[0] != 0) { |
__bound_new_region((void *)p[0], p[1]); |
p += 2; |
} |
dprintf(stderr, "%s, %s() end\n\n", __FILE__, __FUNCTION__); |
} |
void __bound_main_arg(void **p) |
{ |
void *start = p; |
while (*p++); |
dprintf(stderr, "%s, %s calling __bound_new_region(%p, %p)\n", |
__FILE__, __FUNCTION__, (void *) p - start); |
__bound_new_region(start, (void *) p - start); |
} |
void __bound_exit(void) |
{ |
restore_malloc_hooks(); |
} |
static inline void add_region(BoundEntry *e, |
size_t start, size_t size) |
{ |
BoundEntry *e1; |
if (e->start == 0) { |
/* no region : add it */ |
e->start = start; |
e->size = size; |
} else { |
/* already regions in the list: add it at the head */ |
e1 = bound_new_entry(); |
e1->start = e->start; |
e1->size = e->size; |
e1->next = e->next; |
e->start = start; |
e->size = size; |
e->next = e1; |
} |
} |
/* create a new region. It should not already exist in the region list */ |
void __bound_new_region(void *p, size_t size) |
{ |
size_t start, end; |
BoundEntry *page, *e, *e2; |
size_t t1_start, t1_end, i, t2_start, t2_end; |
__bound_init(); |
dprintf(stderr, "%s, %s(%p, %p) start\n", |
__FILE__, __FUNCTION__, p, size); |
start = (size_t)p; |
end = start + size; |
t1_start = start >> (BOUND_T2_BITS + BOUND_T3_BITS); |
t1_end = end >> (BOUND_T2_BITS + BOUND_T3_BITS); |
/* start */ |
page = get_page(t1_start); |
t2_start = (start >> (BOUND_T3_BITS - BOUND_E_BITS)) & |
((BOUND_T2_SIZE - 1) << BOUND_E_BITS); |
t2_end = (end >> (BOUND_T3_BITS - BOUND_E_BITS)) & |
((BOUND_T2_SIZE - 1) << BOUND_E_BITS); |
e = (BoundEntry *)((char *)page + t2_start); |
add_region(e, start, size); |
if (t1_end == t1_start) { |
/* same ending page */ |
e2 = (BoundEntry *)((char *)page + t2_end); |
if (e2 > e) { |
e++; |
for(;e<e2;e++) { |
e->start = start; |
e->size = size; |
} |
add_region(e, start, size); |
} |
} else { |
/* mark until end of page */ |
e2 = page + BOUND_T2_SIZE; |
e++; |
for(;e<e2;e++) { |
e->start = start; |
e->size = size; |
} |
/* mark intermediate pages, if any */ |
for(i=t1_start+1;i<t1_end;i++) { |
page = get_page(i); |
e2 = page + BOUND_T2_SIZE; |
for(e=page;e<e2;e++) { |
e->start = start; |
e->size = size; |
} |
} |
/* last page */ |
page = get_page(t1_end); |
e2 = (BoundEntry *)((char *)page + t2_end); |
for(e=page;e<e2;e++) { |
e->start = start; |
e->size = size; |
} |
add_region(e, start, size); |
} |
dprintf(stderr, "%s, %s end\n", __FILE__, __FUNCTION__); |
} |
/* delete a region */ |
static inline void delete_region(BoundEntry *e, |
void *p, size_t empty_size) |
{ |
size_t addr; |
BoundEntry *e1; |
addr = (size_t)p; |
addr -= e->start; |
if (addr <= e->size) { |
/* region found is first one */ |
e1 = e->next; |
if (e1 == NULL) { |
/* no more region: mark it empty */ |
e->start = 0; |
e->size = empty_size; |
} else { |
/* copy next region in head */ |
e->start = e1->start; |
e->size = e1->size; |
e->next = e1->next; |
bound_free_entry(e1); |
} |
} else { |
/* find the matching region */ |
for(;;) { |
e1 = e; |
e = e->next; |
/* region not found: do nothing */ |
if (e == NULL) |
break; |
addr = (size_t)p - e->start; |
if (addr <= e->size) { |
/* found: remove entry */ |
e1->next = e->next; |
bound_free_entry(e); |
break; |
} |
} |
} |
} |
/* WARNING: 'p' must be the starting point of the region. */ |
/* return non zero if error */ |
int __bound_delete_region(void *p) |
{ |
size_t start, end, addr, size, empty_size; |
BoundEntry *page, *e, *e2; |
size_t t1_start, t1_end, t2_start, t2_end, i; |
__bound_init(); |
dprintf(stderr, "%s %s() start\n", __FILE__, __FUNCTION__); |
start = (size_t)p; |
t1_start = start >> (BOUND_T2_BITS + BOUND_T3_BITS); |
t2_start = (start >> (BOUND_T3_BITS - BOUND_E_BITS)) & |
((BOUND_T2_SIZE - 1) << BOUND_E_BITS); |
/* find region size */ |
page = __bound_t1[t1_start]; |
e = (BoundEntry *)((char *)page + t2_start); |
addr = start - e->start; |
if (addr > e->size) |
e = __bound_find_region(e, p); |
/* test if invalid region */ |
if (e->size == EMPTY_SIZE || (size_t)p != e->start) |
return -1; |
/* compute the size we put in invalid regions */ |
if (e->is_invalid) |
empty_size = INVALID_SIZE; |
else |
empty_size = EMPTY_SIZE; |
size = e->size; |
end = start + size; |
/* now we can free each entry */ |
t1_end = end >> (BOUND_T2_BITS + BOUND_T3_BITS); |
t2_end = (end >> (BOUND_T3_BITS - BOUND_E_BITS)) & |
((BOUND_T2_SIZE - 1) << BOUND_E_BITS); |
delete_region(e, p, empty_size); |
if (t1_end == t1_start) { |
/* same ending page */ |
e2 = (BoundEntry *)((char *)page + t2_end); |
if (e2 > e) { |
e++; |
for(;e<e2;e++) { |
e->start = 0; |
e->size = empty_size; |
} |
delete_region(e, p, empty_size); |
} |
} else { |
/* mark until end of page */ |
e2 = page + BOUND_T2_SIZE; |
e++; |
for(;e<e2;e++) { |
e->start = 0; |
e->size = empty_size; |
} |
/* mark intermediate pages, if any */ |
/* XXX: should free them */ |
for(i=t1_start+1;i<t1_end;i++) { |
page = get_page(i); |
e2 = page + BOUND_T2_SIZE; |
for(e=page;e<e2;e++) { |
e->start = 0; |
e->size = empty_size; |
} |
} |
/* last page */ |
page = get_page(t1_end); |
e2 = (BoundEntry *)((char *)page + t2_end); |
for(e=page;e<e2;e++) { |
e->start = 0; |
e->size = empty_size; |
} |
delete_region(e, p, empty_size); |
} |
dprintf(stderr, "%s %s() end\n", __FILE__, __FUNCTION__); |
return 0; |
} |
/* return the size of the region starting at p, or EMPTY_SIZE if non |
existent region. */ |
static size_t get_region_size(void *p) |
{ |
size_t addr = (size_t)p; |
BoundEntry *e; |
e = __bound_t1[addr >> (BOUND_T2_BITS + BOUND_T3_BITS)]; |
e = (BoundEntry *)((char *)e + |
((addr >> (BOUND_T3_BITS - BOUND_E_BITS)) & |
((BOUND_T2_SIZE - 1) << BOUND_E_BITS))); |
addr -= e->start; |
if (addr > e->size) |
e = __bound_find_region(e, p); |
if (e->start != (size_t)p) |
return EMPTY_SIZE; |
return e->size; |
} |
/* patched memory functions */ |
/* force compiler to perform stores coded up to this point */ |
#define barrier() __asm__ __volatile__ ("": : : "memory") |
static void install_malloc_hooks(void) |
{ |
#ifdef CONFIG_TCC_MALLOC_HOOKS |
saved_malloc_hook = __malloc_hook; |
saved_free_hook = __free_hook; |
saved_realloc_hook = __realloc_hook; |
saved_memalign_hook = __memalign_hook; |
__malloc_hook = __bound_malloc; |
__free_hook = __bound_free; |
__realloc_hook = __bound_realloc; |
__memalign_hook = __bound_memalign; |
barrier(); |
#endif |
} |
static void restore_malloc_hooks(void) |
{ |
#ifdef CONFIG_TCC_MALLOC_HOOKS |
__malloc_hook = saved_malloc_hook; |
__free_hook = saved_free_hook; |
__realloc_hook = saved_realloc_hook; |
__memalign_hook = saved_memalign_hook; |
barrier(); |
#endif |
} |
static void *libc_malloc(size_t size) |
{ |
void *ptr; |
restore_malloc_hooks(); |
ptr = malloc(size); |
install_malloc_hooks(); |
return ptr; |
} |
static void libc_free(void *ptr) |
{ |
restore_malloc_hooks(); |
free(ptr); |
install_malloc_hooks(); |
} |
/* XXX: we should use a malloc which ensure that it is unlikely that |
two malloc'ed data have the same address if 'free' are made in |
between. */ |
void *__bound_malloc(size_t size, const void *caller) |
{ |
void *ptr; |
/* we allocate one more byte to ensure the regions will be |
separated by at least one byte. With the glibc malloc, it may |
be in fact not necessary */ |
ptr = libc_malloc(size + 1); |
if (!ptr) |
return NULL; |
dprintf(stderr, "%s, %s calling __bound_new_region(%p, %p)\n", |
__FILE__, __FUNCTION__, ptr, size); |
__bound_new_region(ptr, size); |
return ptr; |
} |
void *__bound_memalign(size_t size, size_t align, const void *caller) |
{ |
void *ptr; |
restore_malloc_hooks(); |
#ifndef HAVE_MEMALIGN |
if (align > 4) { |
/* XXX: handle it ? */ |
ptr = NULL; |
} else { |
/* we suppose that malloc aligns to at least four bytes */ |
ptr = malloc(size + 1); |
} |
#else |
/* we allocate one more byte to ensure the regions will be |
separated by at least one byte. With the glibc malloc, it may |
be in fact not necessary */ |
ptr = memalign(size + 1, align); |
#endif |
install_malloc_hooks(); |
if (!ptr) |
return NULL; |
dprintf(stderr, "%s, %s calling __bound_new_region(%p, %p)\n", |
__FILE__, __FUNCTION__, ptr, size); |
__bound_new_region(ptr, size); |
return ptr; |
} |
void __bound_free(void *ptr, const void *caller) |
{ |
if (ptr == NULL) |
return; |
if (__bound_delete_region(ptr) != 0) |
bound_error("freeing invalid region"); |
libc_free(ptr); |
} |
void *__bound_realloc(void *ptr, size_t size, const void *caller) |
{ |
void *ptr1; |
size_t old_size; |
if (size == 0) { |
__bound_free(ptr, caller); |
return NULL; |
} else { |
ptr1 = __bound_malloc(size, caller); |
if (ptr == NULL || ptr1 == NULL) |
return ptr1; |
old_size = get_region_size(ptr); |
if (old_size == EMPTY_SIZE) |
bound_error("realloc'ing invalid pointer"); |
memcpy(ptr1, ptr, old_size); |
__bound_free(ptr, caller); |
return ptr1; |
} |
} |
#ifndef CONFIG_TCC_MALLOC_HOOKS |
void *__bound_calloc(size_t nmemb, size_t size) |
{ |
void *ptr; |
size = size * nmemb; |
ptr = __bound_malloc(size, NULL); |
if (!ptr) |
return NULL; |
memset(ptr, 0, size); |
return ptr; |
} |
#endif |
#if 0 |
static void bound_dump(void) |
{ |
BoundEntry *page, *e; |
size_t i, j; |
fprintf(stderr, "region dump:\n"); |
for(i=0;i<BOUND_T1_SIZE;i++) { |
page = __bound_t1[i]; |
for(j=0;j<BOUND_T2_SIZE;j++) { |
e = page + j; |
/* do not print invalid or empty entries */ |
if (e->size != EMPTY_SIZE && e->start != 0) { |
fprintf(stderr, "%08x:", |
(i << (BOUND_T2_BITS + BOUND_T3_BITS)) + |
(j << BOUND_T3_BITS)); |
do { |
fprintf(stderr, " %08lx:%08lx", e->start, e->start + e->size); |
e = e->next; |
} while (e != NULL); |
fprintf(stderr, "\n"); |
} |
} |
} |
} |
#endif |
/* some useful checked functions */ |
/* check that (p ... p + size - 1) lies inside 'p' region, if any */ |
static void __bound_check(const void *p, size_t size) |
{ |
if (size == 0) |
return; |
p = __bound_ptr_add((void *)p, size - 1); |
if (p == INVALID_POINTER) |
bound_error("invalid pointer"); |
} |
void *__bound_memcpy(void *dst, const void *src, size_t size) |
{ |
void* p; |
dprintf(stderr, "%s %s: start, dst=%p src=%p size=%p\n", __FILE__, __FUNCTION__, dst, src, size); |
__bound_check(dst, size); |
__bound_check(src, size); |
/* check also region overlap */ |
if (src >= dst && src < dst + size) |
bound_error("overlapping regions in memcpy()"); |
p = memcpy(dst, src, size); |
dprintf(stderr, "%s %s: end, p=%p\n", __FILE__, __FUNCTION__, p); |
return p; |
} |
void *__bound_memmove(void *dst, const void *src, size_t size) |
{ |
__bound_check(dst, size); |
__bound_check(src, size); |
return memmove(dst, src, size); |
} |
void *__bound_memset(void *dst, int c, size_t size) |
{ |
__bound_check(dst, size); |
return memset(dst, c, size); |
} |
/* XXX: could be optimized */ |
int __bound_strlen(const char *s) |
{ |
const char *p; |
size_t len; |
len = 0; |
for(;;) { |
p = __bound_ptr_indir1((char *)s, len); |
if (p == INVALID_POINTER) |
bound_error("bad pointer in strlen()"); |
if (*p == '\0') |
break; |
len++; |
} |
return len; |
} |
char *__bound_strcpy(char *dst, const char *src) |
{ |
size_t len; |
void *p; |
dprintf(stderr, "%s %s: strcpy start, dst=%p src=%p\n", __FILE__, __FUNCTION__, dst, src); |
len = __bound_strlen(src); |
p = __bound_memcpy(dst, src, len + 1); |
dprintf(stderr, "%s %s: strcpy end, p=%p\n", __FILE__, __FUNCTION__, dst, src, p); |
return p; |
} |
/programs/develop/ktcc/trunk/source/lib/buildtcclib1.bat |
---|
0,0 → 1,2 |
:kos32-gcc -c libtcc1.c -DTCC_TARGET_I386 -ID:\VSProjects\msys-kos32-4.8.2\sdk\sources\newlib\libc\include |
D:\VSProjects\msys-kos32-4.8.2\ktcc\trunk\libc\kos32-tcc.exe libtcc1.c -c -DTCC_TARGET_I386 |
/programs/develop/ktcc/trunk/source/lib/lib-arm64.c |
---|
0,0 → 1,652 |
/* |
* TCC runtime library for arm64. |
* |
* Copyright (c) 2015 Edmund Grimley Evans |
* |
* Copying and distribution of this file, with or without modification, |
* are permitted in any medium without royalty provided the copyright |
* notice and this notice are preserved. This file is offered as-is, |
* without any warranty. |
*/ |
#include <stdint.h> |
#include <string.h> |
void __clear_cache(void *beg, void *end) |
{ |
__arm64_clear_cache(beg, end); |
} |
typedef struct { |
uint64_t x0, x1; |
} u128_t; |
static long double f3_zero(int sgn) |
{ |
long double f; |
u128_t x = { 0, (uint64_t)sgn << 63 }; |
memcpy(&f, &x, 16); |
return f; |
} |
static long double f3_infinity(int sgn) |
{ |
long double f; |
u128_t x = { 0, (uint64_t)sgn << 63 | 0x7fff000000000000 }; |
memcpy(&f, &x, 16); |
return f; |
} |
static long double f3_NaN(void) |
{ |
long double f; |
#if 0 |
// ARM's default NaN usually has just the top fraction bit set: |
u128_t x = { 0, 0x7fff800000000000 }; |
#else |
// GCC's library sets all fraction bits: |
u128_t x = { -1, 0x7fffffffffffffff }; |
#endif |
memcpy(&f, &x, 16); |
return f; |
} |
static int fp3_convert_NaN(long double *f, int sgn, u128_t mnt) |
{ |
u128_t x = { mnt.x0, |
mnt.x1 | 0x7fff800000000000 | (uint64_t)sgn << 63 }; |
memcpy(f, &x, 16); |
return 1; |
} |
static int fp3_detect_NaNs(long double *f, |
int a_sgn, int a_exp, u128_t a, |
int b_sgn, int b_exp, u128_t b) |
{ |
// Detect signalling NaNs: |
if (a_exp == 32767 && (a.x0 | a.x1 << 16) && !(a.x1 >> 47 & 1)) |
return fp3_convert_NaN(f, a_sgn, a); |
if (b_exp == 32767 && (b.x0 | b.x1 << 16) && !(b.x1 >> 47 & 1)) |
return fp3_convert_NaN(f, b_sgn, b); |
// Detect quiet NaNs: |
if (a_exp == 32767 && (a.x0 | a.x1 << 16)) |
return fp3_convert_NaN(f, a_sgn, a); |
if (b_exp == 32767 && (b.x0 | b.x1 << 16)) |
return fp3_convert_NaN(f, b_sgn, b); |
return 0; |
} |
static void f3_unpack(int *sgn, int32_t *exp, u128_t *mnt, long double f) |
{ |
u128_t x; |
memcpy(&x, &f, 16); |
*sgn = x.x1 >> 63; |
*exp = x.x1 >> 48 & 32767; |
x.x1 = x.x1 << 16 >> 16; |
if (*exp) |
x.x1 |= (uint64_t)1 << 48; |
else |
*exp = 1; |
*mnt = x; |
} |
static u128_t f3_normalise(int32_t *exp, u128_t mnt) |
{ |
int sh; |
if (!(mnt.x0 | mnt.x1)) |
return mnt; |
if (!mnt.x1) { |
mnt.x1 = mnt.x0; |
mnt.x0 = 0; |
*exp -= 64; |
} |
for (sh = 32; sh; sh >>= 1) { |
if (!(mnt.x1 >> (64 - sh))) { |
mnt.x1 = mnt.x1 << sh | mnt.x0 >> (64 - sh); |
mnt.x0 = mnt.x0 << sh; |
*exp -= sh; |
} |
} |
return mnt; |
} |
static u128_t f3_sticky_shift(int32_t sh, u128_t x) |
{ |
if (sh >= 128) { |
x.x0 = !!(x.x0 | x.x1); |
x.x1 = 0; |
return x; |
} |
if (sh >= 64) { |
x.x0 = x.x1 | !!x.x0; |
x.x1 = 0; |
sh -= 64; |
} |
if (sh > 0) { |
x.x0 = x.x0 >> sh | x.x1 << (64 - sh) | !!(x.x0 << (64 - sh)); |
x.x1 = x.x1 >> sh; |
} |
return x; |
} |
static long double f3_round(int sgn, int32_t exp, u128_t x) |
{ |
long double f; |
int error; |
if (exp > 0) { |
x = f3_sticky_shift(13, x); |
} |
else { |
x = f3_sticky_shift(14 - exp, x); |
exp = 0; |
} |
error = x.x0 & 3; |
x.x0 = x.x0 >> 2 | x.x1 << 62; |
x.x1 = x.x1 >> 2; |
if (error == 3 || ((error == 2) & (x.x0 & 1))) { |
if (!++x.x0) { |
++x.x1; |
if (x.x1 == (uint64_t)1 << 48) |
exp = 1; |
else if (x.x1 == (uint64_t)1 << 49) { |
++exp; |
x.x0 = x.x0 >> 1 | x.x1 << 63; |
x.x1 = x.x1 >> 1; |
} |
} |
} |
if (exp >= 32767) |
return f3_infinity(sgn); |
x.x1 = x.x1 << 16 >> 16 | (uint64_t)exp << 48 | (uint64_t)sgn << 63; |
memcpy(&f, &x, 16); |
return f; |
} |
static long double f3_add(long double fa, long double fb, int neg) |
{ |
u128_t a, b, x; |
int32_t a_exp, b_exp, x_exp; |
int a_sgn, b_sgn, x_sgn; |
long double fx; |
f3_unpack(&a_sgn, &a_exp, &a, fa); |
f3_unpack(&b_sgn, &b_exp, &b, fb); |
if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b)) |
return fx; |
b_sgn ^= neg; |
// Handle infinities and zeroes: |
if (a_exp == 32767 && b_exp == 32767 && a_sgn != b_sgn) |
return f3_NaN(); |
if (a_exp == 32767) |
return f3_infinity(a_sgn); |
if (b_exp == 32767) |
return f3_infinity(b_sgn); |
if (!(a.x0 | a.x1 | b.x0 | b.x1)) |
return f3_zero(a_sgn & b_sgn); |
a.x1 = a.x1 << 3 | a.x0 >> 61; |
a.x0 = a.x0 << 3; |
b.x1 = b.x1 << 3 | b.x0 >> 61; |
b.x0 = b.x0 << 3; |
if (a_exp <= b_exp) { |
a = f3_sticky_shift(b_exp - a_exp, a); |
a_exp = b_exp; |
} |
else { |
b = f3_sticky_shift(a_exp - b_exp, b); |
b_exp = a_exp; |
} |
x_sgn = a_sgn; |
x_exp = a_exp; |
if (a_sgn == b_sgn) { |
x.x0 = a.x0 + b.x0; |
x.x1 = a.x1 + b.x1 + (x.x0 < a.x0); |
} |
else { |
x.x0 = a.x0 - b.x0; |
x.x1 = a.x1 - b.x1 - (x.x0 > a.x0); |
if (x.x1 >> 63) { |
x_sgn ^= 1; |
x.x0 = -x.x0; |
x.x1 = -x.x1 - !!x.x0; |
} |
} |
if (!(x.x0 | x.x1)) |
return f3_zero(0); |
x = f3_normalise(&x_exp, x); |
return f3_round(x_sgn, x_exp + 12, x); |
} |
long double __addtf3(long double a, long double b) |
{ |
return f3_add(a, b, 0); |
} |
long double __subtf3(long double a, long double b) |
{ |
return f3_add(a, b, 1); |
} |
long double __multf3(long double fa, long double fb) |
{ |
u128_t a, b, x; |
int32_t a_exp, b_exp, x_exp; |
int a_sgn, b_sgn, x_sgn; |
long double fx; |
f3_unpack(&a_sgn, &a_exp, &a, fa); |
f3_unpack(&b_sgn, &b_exp, &b, fb); |
if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b)) |
return fx; |
// Handle infinities and zeroes: |
if ((a_exp == 32767 && !(b.x0 | b.x1)) || |
(b_exp == 32767 && !(a.x0 | a.x1))) |
return f3_NaN(); |
if (a_exp == 32767 || b_exp == 32767) |
return f3_infinity(a_sgn ^ b_sgn); |
if (!(a.x0 | a.x1) || !(b.x0 | b.x1)) |
return f3_zero(a_sgn ^ b_sgn); |
a = f3_normalise(&a_exp, a); |
b = f3_normalise(&b_exp, b); |
x_sgn = a_sgn ^ b_sgn; |
x_exp = a_exp + b_exp - 16352; |
{ |
// Convert to base (1 << 30), discarding bottom 6 bits, which are zero, |
// so there are (32, 30, 30, 30) bits in (a3, a2, a1, a0): |
uint64_t a0 = a.x0 << 28 >> 34; |
uint64_t b0 = b.x0 << 28 >> 34; |
uint64_t a1 = a.x0 >> 36 | a.x1 << 62 >> 34; |
uint64_t b1 = b.x0 >> 36 | b.x1 << 62 >> 34; |
uint64_t a2 = a.x1 << 32 >> 34; |
uint64_t b2 = b.x1 << 32 >> 34; |
uint64_t a3 = a.x1 >> 32; |
uint64_t b3 = b.x1 >> 32; |
// Use 16 small multiplications and additions that do not overflow: |
uint64_t x0 = a0 * b0; |
uint64_t x1 = (x0 >> 30) + a0 * b1 + a1 * b0; |
uint64_t x2 = (x1 >> 30) + a0 * b2 + a1 * b1 + a2 * b0; |
uint64_t x3 = (x2 >> 30) + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; |
uint64_t x4 = (x3 >> 30) + a1 * b3 + a2 * b2 + a3 * b1; |
uint64_t x5 = (x4 >> 30) + a2 * b3 + a3 * b2; |
uint64_t x6 = (x5 >> 30) + a3 * b3; |
// We now have (64, 30, 30, ...) bits in (x6, x5, x4, ...). |
// Take the top 128 bits, setting bottom bit if any lower bits were set: |
uint64_t y0 = (x5 << 34 | x4 << 34 >> 30 | x3 << 34 >> 60 | |
!!(x3 << 38 | (x2 | x1 | x0) << 34)); |
uint64_t y1 = x6; |
// Top bit may be zero. Renormalise: |
if (!(y1 >> 63)) { |
y1 = y1 << 1 | y0 >> 63; |
y0 = y0 << 1; |
--x_exp; |
} |
x.x0 = y0; |
x.x1 = y1; |
} |
return f3_round(x_sgn, x_exp, x); |
} |
long double __divtf3(long double fa, long double fb) |
{ |
u128_t a, b, x; |
int32_t a_exp, b_exp, x_exp; |
int a_sgn, b_sgn, x_sgn, i; |
long double fx; |
f3_unpack(&a_sgn, &a_exp, &a, fa); |
f3_unpack(&b_sgn, &b_exp, &b, fb); |
if (fp3_detect_NaNs(&fx, a_sgn, a_exp, a, b_sgn, b_exp, b)) |
return fx; |
// Handle infinities and zeroes: |
if ((a_exp == 32767 && b_exp == 32767) || |
(!(a.x0 | a.x1) && !(b.x0 | b.x1))) |
return f3_NaN(); |
if (a_exp == 32767 || !(b.x0 | b.x1)) |
return f3_infinity(a_sgn ^ b_sgn); |
if (!(a.x0 | a.x1) || b_exp == 32767) |
return f3_zero(a_sgn ^ b_sgn); |
a = f3_normalise(&a_exp, a); |
b = f3_normalise(&b_exp, b); |
x_sgn = a_sgn ^ b_sgn; |
x_exp = a_exp - b_exp + 16395; |
a.x0 = a.x0 >> 1 | a.x1 << 63; |
a.x1 = a.x1 >> 1; |
b.x0 = b.x0 >> 1 | b.x1 << 63; |
b.x1 = b.x1 >> 1; |
x.x0 = 0; |
x.x1 = 0; |
for (i = 0; i < 116; i++) { |
x.x1 = x.x1 << 1 | x.x0 >> 63; |
x.x0 = x.x0 << 1; |
if (a.x1 > b.x1 || (a.x1 == b.x1 && a.x0 >= b.x0)) { |
a.x1 = a.x1 - b.x1 - (a.x0 < b.x0); |
a.x0 = a.x0 - b.x0; |
x.x0 |= 1; |
} |
a.x1 = a.x1 << 1 | a.x0 >> 63; |
a.x0 = a.x0 << 1; |
} |
x.x0 |= !!(a.x0 | a.x1); |
x = f3_normalise(&x_exp, x); |
return f3_round(x_sgn, x_exp, x); |
} |
long double __extendsftf2(float f) |
{ |
long double fx; |
u128_t x; |
uint32_t a; |
uint64_t aa; |
memcpy(&a, &f, 4); |
aa = a; |
x.x0 = 0; |
if (!(a << 1)) |
x.x1 = aa << 32; |
else if (a << 1 >> 24 == 255) |
x.x1 = (0x7fff000000000000 | aa >> 31 << 63 | aa << 41 >> 16 | |
(uint64_t)!!(a << 9) << 47); |
else |
x.x1 = (aa >> 31 << 63 | ((aa >> 23 & 255) + 16256) << 48 | |
aa << 41 >> 16); |
memcpy(&fx, &x, 16); |
return fx; |
} |
long double __extenddftf2(double f) |
{ |
long double fx; |
u128_t x; |
uint64_t a; |
memcpy(&a, &f, 8); |
x.x0 = a << 60; |
if (!(a << 1)) |
x.x1 = a; |
else if (a << 1 >> 53 == 2047) |
x.x1 = (0x7fff000000000000 | a >> 63 << 63 | a << 12 >> 16 | |
(uint64_t)!!(a << 12) << 47); |
else |
x.x1 = a >> 63 << 63 | ((a >> 52 & 2047) + 15360) << 48 | a << 12 >> 16; |
memcpy(&fx, &x, 16); |
return fx; |
} |
float __trunctfsf2(long double f) |
{ |
u128_t mnt; |
int32_t exp; |
int sgn; |
uint32_t x; |
float fx; |
f3_unpack(&sgn, &exp, &mnt, f); |
if (exp == 32767 && (mnt.x0 | mnt.x1 << 16)) |
x = 0x7fc00000 | (uint32_t)sgn << 31 | (mnt.x1 >> 25 & 0x007fffff); |
else if (exp > 16510) |
x = 0x7f800000 | (uint32_t)sgn << 31; |
else if (exp < 16233) |
x = (uint32_t)sgn << 31; |
else { |
exp -= 16257; |
x = mnt.x1 >> 23 | !!(mnt.x0 | mnt.x1 << 41); |
if (exp < 0) { |
x = x >> -exp | !!(x << (32 + exp)); |
exp = 0; |
} |
if ((x & 3) == 3 || (x & 7) == 6) |
x += 4; |
x = ((x >> 2) + (exp << 23)) | (uint32_t)sgn << 31; |
} |
memcpy(&fx, &x, 4); |
return fx; |
} |
double __trunctfdf2(long double f) |
{ |
u128_t mnt; |
int32_t exp; |
int sgn; |
uint64_t x; |
double fx; |
f3_unpack(&sgn, &exp, &mnt, f); |
if (exp == 32767 && (mnt.x0 | mnt.x1 << 16)) |
x = (0x7ff8000000000000 | (uint64_t)sgn << 63 | |
mnt.x1 << 16 >> 12 | mnt.x0 >> 60); |
else if (exp > 17406) |
x = 0x7ff0000000000000 | (uint64_t)sgn << 63; |
else if (exp < 15308) |
x = (uint64_t)sgn << 63; |
else { |
exp -= 15361; |
x = mnt.x1 << 6 | mnt.x0 >> 58 | !!(mnt.x0 << 6); |
if (exp < 0) { |
x = x >> -exp | !!(x << (64 + exp)); |
exp = 0; |
} |
if ((x & 3) == 3 || (x & 7) == 6) |
x += 4; |
x = ((x >> 2) + ((uint64_t)exp << 52)) | (uint64_t)sgn << 63; |
} |
memcpy(&fx, &x, 8); |
return fx; |
} |
int32_t __fixtfsi(long double fa) |
{ |
u128_t a; |
int32_t a_exp; |
int a_sgn; |
int32_t x; |
f3_unpack(&a_sgn, &a_exp, &a, fa); |
if (a_exp < 16369) |
return 0; |
if (a_exp > 16413) |
return a_sgn ? -0x80000000 : 0x7fffffff; |
x = a.x1 >> (16431 - a_exp); |
return a_sgn ? -x : x; |
} |
int64_t __fixtfdi(long double fa) |
{ |
u128_t a; |
int32_t a_exp; |
int a_sgn; |
int64_t x; |
f3_unpack(&a_sgn, &a_exp, &a, fa); |
if (a_exp < 16383) |
return 0; |
if (a_exp > 16445) |
return a_sgn ? -0x8000000000000000 : 0x7fffffffffffffff; |
x = (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp); |
return a_sgn ? -x : x; |
} |
uint32_t __fixunstfsi(long double fa) |
{ |
u128_t a; |
int32_t a_exp; |
int a_sgn; |
f3_unpack(&a_sgn, &a_exp, &a, fa); |
if (a_sgn || a_exp < 16369) |
return 0; |
if (a_exp > 16414) |
return -1; |
return a.x1 >> (16431 - a_exp); |
} |
uint64_t __fixunstfdi(long double fa) |
{ |
u128_t a; |
int32_t a_exp; |
int a_sgn; |
f3_unpack(&a_sgn, &a_exp, &a, fa); |
if (a_sgn || a_exp < 16383) |
return 0; |
if (a_exp > 16446) |
return -1; |
return (a.x1 << 15 | a.x0 >> 49) >> (16446 - a_exp); |
} |
long double __floatsitf(int32_t a) |
{ |
int sgn = 0; |
int exp = 16414; |
uint32_t mnt = a; |
u128_t x = { 0, 0 }; |
long double f; |
int i; |
if (a) { |
if (a < 0) { |
sgn = 1; |
mnt = -mnt; |
} |
for (i = 16; i; i >>= 1) |
if (!(mnt >> (32 - i))) { |
mnt <<= i; |
exp -= i; |
} |
x.x1 = ((uint64_t)sgn << 63 | (uint64_t)exp << 48 | |
(uint64_t)(mnt << 1) << 16); |
} |
memcpy(&f, &x, 16); |
return f; |
} |
long double __floatditf(int64_t a) |
{ |
int sgn = 0; |
int exp = 16446; |
uint64_t mnt = a; |
u128_t x = { 0, 0 }; |
long double f; |
int i; |
if (a) { |
if (a < 0) { |
sgn = 1; |
mnt = -mnt; |
} |
for (i = 32; i; i >>= 1) |
if (!(mnt >> (64 - i))) { |
mnt <<= i; |
exp -= i; |
} |
x.x0 = mnt << 49; |
x.x1 = (uint64_t)sgn << 63 | (uint64_t)exp << 48 | mnt << 1 >> 16; |
} |
memcpy(&f, &x, 16); |
return f; |
} |
long double __floatunsitf(uint32_t a) |
{ |
int exp = 16414; |
uint32_t mnt = a; |
u128_t x = { 0, 0 }; |
long double f; |
int i; |
if (a) { |
for (i = 16; i; i >>= 1) |
if (!(mnt >> (32 - i))) { |
mnt <<= i; |
exp -= i; |
} |
x.x1 = (uint64_t)exp << 48 | (uint64_t)(mnt << 1) << 16; |
} |
memcpy(&f, &x, 16); |
return f; |
} |
long double __floatunditf(uint64_t a) |
{ |
int exp = 16446; |
uint64_t mnt = a; |
u128_t x = { 0, 0 }; |
long double f; |
int i; |
if (a) { |
for (i = 32; i; i >>= 1) |
if (!(mnt >> (64 - i))) { |
mnt <<= i; |
exp -= i; |
} |
x.x0 = mnt << 49; |
x.x1 = (uint64_t)exp << 48 | mnt << 1 >> 16; |
} |
memcpy(&f, &x, 16); |
return f; |
} |
static int f3_cmp(long double fa, long double fb) |
{ |
u128_t a, b; |
memcpy(&a, &fa, 16); |
memcpy(&b, &fb, 16); |
return (!(a.x0 | a.x1 << 1 | b.x0 | b.x1 << 1) ? 0 : |
((a.x1 << 1 >> 49 == 0x7fff && (a.x0 | a.x1 << 16)) || |
(b.x1 << 1 >> 49 == 0x7fff && (b.x0 | b.x1 << 16))) ? 2 : |
a.x1 >> 63 != b.x1 >> 63 ? (int)(b.x1 >> 63) - (int)(a.x1 >> 63) : |
a.x1 < b.x1 ? (int)(a.x1 >> 63 << 1) - 1 : |
a.x1 > b.x1 ? 1 - (int)(a.x1 >> 63 << 1) : |
a.x0 < b.x0 ? (int)(a.x1 >> 63 << 1) - 1 : |
b.x0 < a.x0 ? 1 - (int)(a.x1 >> 63 << 1) : 0); |
} |
int __eqtf2(long double a, long double b) |
{ |
return !!f3_cmp(a, b); |
} |
int __netf2(long double a, long double b) |
{ |
return !!f3_cmp(a, b); |
} |
int __lttf2(long double a, long double b) |
{ |
return f3_cmp(a, b); |
} |
int __letf2(long double a, long double b) |
{ |
return f3_cmp(a, b); |
} |
int __gttf2(long double a, long double b) |
{ |
return -f3_cmp(b, a); |
} |
int __getf2(long double a, long double b) |
{ |
return -f3_cmp(b, a); |
} |
/programs/develop/ktcc/trunk/source/lib/libtcc1.c |
---|
0,0 → 1,753 |
/* TCC runtime library. |
Parts of this code are (c) 2002 Fabrice Bellard |
Copyright (C) 1987, 1988, 1992, 1994, 1995 Free Software Foundation, Inc. |
This file is free software; you can redistribute it and/or modify it |
under the terms of the GNU General Public License as published by the |
Free Software Foundation; either version 2, or (at your option) any |
later version. |
In addition to the permissions in the GNU General Public License, the |
Free Software Foundation gives you unlimited permission to link the |
compiled version of this file into combinations with other programs, |
and to distribute those combinations without any restriction coming |
from the use of this file. (The General Public License restrictions |
do apply in other respects; for example, they cover modification of |
the file, and distribution when not linked into a combine |
executable.) |
This file is distributed in the hope that it will be useful, but |
WITHOUT ANY WARRANTY; without even the implied warranty of |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
General Public License for more details. |
You should have received a copy of the GNU General Public License |
along with this program; see the file COPYING. If not, write to |
the Free Software Foundation, 59 Temple Place - Suite 330, |
Boston, MA 02111-1307, USA. |
*/ |
//#include <stdint.h> |
#define W_TYPE_SIZE 32 |
#define BITS_PER_UNIT 8 |
typedef int Wtype; |
typedef unsigned int UWtype; |
typedef unsigned int USItype; |
typedef long long DWtype; |
typedef unsigned long long UDWtype; |
struct DWstruct { |
Wtype low, high; |
}; |
typedef union |
{ |
struct DWstruct s; |
DWtype ll; |
} DWunion; |
typedef long double XFtype; |
#define WORD_SIZE (sizeof (Wtype) * BITS_PER_UNIT) |
#define HIGH_WORD_COEFF (((UDWtype) 1) << WORD_SIZE) |
/* the following deal with IEEE single-precision numbers */ |
#define EXCESS 126 |
#define SIGNBIT 0x80000000 |
#define HIDDEN (1 << 23) |
#define SIGN(fp) ((fp) & SIGNBIT) |
#define EXP(fp) (((fp) >> 23) & 0xFF) |
#define MANT(fp) (((fp) & 0x7FFFFF) | HIDDEN) |
#define PACK(s,e,m) ((s) | ((e) << 23) | (m)) |
/* the following deal with IEEE double-precision numbers */ |
#define EXCESSD 1022 |
#define HIDDEND (1 << 20) |
#define EXPD(fp) (((fp.l.upper) >> 20) & 0x7FF) |
#define SIGND(fp) ((fp.l.upper) & SIGNBIT) |
#define MANTD(fp) (((((fp.l.upper) & 0xFFFFF) | HIDDEND) << 10) | \ |
(fp.l.lower >> 22)) |
#define HIDDEND_LL ((long long)1 << 52) |
#define MANTD_LL(fp) ((fp.ll & (HIDDEND_LL-1)) | HIDDEND_LL) |
#define PACKD_LL(s,e,m) (((long long)((s)+((e)<<20))<<32)|(m)) |
/* the following deal with x86 long double-precision numbers */ |
#define EXCESSLD 16382 |
#define EXPLD(fp) (fp.l.upper & 0x7fff) |
#define SIGNLD(fp) ((fp.l.upper) & 0x8000) |
/* only for x86 */ |
union ldouble_long { |
long double ld; |
struct { |
unsigned long long lower; |
unsigned short upper; |
} l; |
}; |
union double_long { |
double d; |
#if 1 |
struct { |
unsigned int lower; |
int upper; |
} l; |
#else |
struct { |
int upper; |
unsigned int lower; |
} l; |
#endif |
long long ll; |
}; |
union float_long { |
float f; |
unsigned int l; |
}; |
/* XXX: we don't support several builtin supports for now */ |
#if !defined(TCC_TARGET_X86_64) && !defined(TCC_TARGET_ARM) |
/* XXX: use gcc/tcc intrinsic ? */ |
#if defined(TCC_TARGET_I386) |
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
__asm__ ("subl %5,%1\n\tsbbl %3,%0" \ |
: "=r" ((USItype) (sh)), \ |
"=&r" ((USItype) (sl)) \ |
: "0" ((USItype) (ah)), \ |
"g" ((USItype) (bh)), \ |
"1" ((USItype) (al)), \ |
"g" ((USItype) (bl))) |
#define umul_ppmm(w1, w0, u, v) \ |
__asm__ ("mull %3" \ |
: "=a" ((USItype) (w0)), \ |
"=d" ((USItype) (w1)) \ |
: "%0" ((USItype) (u)), \ |
"rm" ((USItype) (v))) |
#define udiv_qrnnd(q, r, n1, n0, dv) \ |
__asm__ ("divl %4" \ |
: "=a" ((USItype) (q)), \ |
"=d" ((USItype) (r)) \ |
: "0" ((USItype) (n0)), \ |
"1" ((USItype) (n1)), \ |
"rm" ((USItype) (dv))) |
#define count_leading_zeros(count, x) \ |
do { \ |
USItype __cbtmp; \ |
__asm__ ("bsrl %1,%0" \ |
: "=r" (__cbtmp) : "rm" ((USItype) (x))); \ |
(count) = __cbtmp ^ 31; \ |
} while (0) |
#else |
#error unsupported CPU type |
#endif |
/* most of this code is taken from libgcc2.c from gcc */ |
static UDWtype __udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) |
{ |
DWunion ww; |
DWunion nn, dd; |
DWunion rr; |
UWtype d0, d1, n0, n1, n2; |
UWtype q0, q1; |
UWtype b, bm; |
nn.ll = n; |
dd.ll = d; |
d0 = dd.s.low; |
d1 = dd.s.high; |
n0 = nn.s.low; |
n1 = nn.s.high; |
#if !defined(UDIV_NEEDS_NORMALIZATION) |
if (d1 == 0) |
{ |
if (d0 > n1) |
{ |
/* 0q = nn / 0D */ |
udiv_qrnnd (q0, n0, n1, n0, d0); |
q1 = 0; |
/* Remainder in n0. */ |
} |
else |
{ |
/* qq = NN / 0d */ |
if (d0 == 0) |
d0 = 1 / d0; /* Divide intentionally by zero. */ |
udiv_qrnnd (q1, n1, 0, n1, d0); |
udiv_qrnnd (q0, n0, n1, n0, d0); |
/* Remainder in n0. */ |
} |
if (rp != 0) |
{ |
rr.s.low = n0; |
rr.s.high = 0; |
*rp = rr.ll; |
} |
} |
#else /* UDIV_NEEDS_NORMALIZATION */ |
if (d1 == 0) |
{ |
if (d0 > n1) |
{ |
/* 0q = nn / 0D */ |
count_leading_zeros (bm, d0); |
if (bm != 0) |
{ |
/* Normalize, i.e. make the most significant bit of the |
denominator set. */ |
d0 = d0 << bm; |
n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm)); |
n0 = n0 << bm; |
} |
udiv_qrnnd (q0, n0, n1, n0, d0); |
q1 = 0; |
/* Remainder in n0 >> bm. */ |
} |
else |
{ |
/* qq = NN / 0d */ |
if (d0 == 0) |
d0 = 1 / d0; /* Divide intentionally by zero. */ |
count_leading_zeros (bm, d0); |
if (bm == 0) |
{ |
/* From (n1 >= d0) /\ (the most significant bit of d0 is set), |
conclude (the most significant bit of n1 is set) /\ (the |
leading quotient digit q1 = 1). |
This special case is necessary, not an optimization. |
(Shifts counts of W_TYPE_SIZE are undefined.) */ |
n1 -= d0; |
q1 = 1; |
} |
else |
{ |
/* Normalize. */ |
b = W_TYPE_SIZE - bm; |
d0 = d0 << bm; |
n2 = n1 >> b; |
n1 = (n1 << bm) | (n0 >> b); |
n0 = n0 << bm; |
udiv_qrnnd (q1, n1, n2, n1, d0); |
} |
/* n1 != d0... */ |
udiv_qrnnd (q0, n0, n1, n0, d0); |
/* Remainder in n0 >> bm. */ |
} |
if (rp != 0) |
{ |
rr.s.low = n0 >> bm; |
rr.s.high = 0; |
*rp = rr.ll; |
} |
} |
#endif /* UDIV_NEEDS_NORMALIZATION */ |
else |
{ |
if (d1 > n1) |
{ |
/* 00 = nn / DD */ |
q0 = 0; |
q1 = 0; |
/* Remainder in n1n0. */ |
if (rp != 0) |
{ |
rr.s.low = n0; |
rr.s.high = n1; |
*rp = rr.ll; |
} |
} |
else |
{ |
/* 0q = NN / dd */ |
count_leading_zeros (bm, d1); |
if (bm == 0) |
{ |
/* From (n1 >= d1) /\ (the most significant bit of d1 is set), |
conclude (the most significant bit of n1 is set) /\ (the |
quotient digit q0 = 0 or 1). |
This special case is necessary, not an optimization. */ |
/* The condition on the next line takes advantage of that |
n1 >= d1 (true due to program flow). */ |
if (n1 > d1 || n0 >= d0) |
{ |
q0 = 1; |
sub_ddmmss (n1, n0, n1, n0, d1, d0); |
} |
else |
q0 = 0; |
q1 = 0; |
if (rp != 0) |
{ |
rr.s.low = n0; |
rr.s.high = n1; |
*rp = rr.ll; |
} |
} |
else |
{ |
UWtype m1, m0; |
/* Normalize. */ |
b = W_TYPE_SIZE - bm; |
d1 = (d1 << bm) | (d0 >> b); |
d0 = d0 << bm; |
n2 = n1 >> b; |
n1 = (n1 << bm) | (n0 >> b); |
n0 = n0 << bm; |
udiv_qrnnd (q0, n1, n2, n1, d1); |
umul_ppmm (m1, m0, q0, d0); |
if (m1 > n1 || (m1 == n1 && m0 > n0)) |
{ |
q0--; |
sub_ddmmss (m1, m0, m1, m0, d1, d0); |
} |
q1 = 0; |
/* Remainder in (n1n0 - m1m0) >> bm. */ |
if (rp != 0) |
{ |
sub_ddmmss (n1, n0, n1, n0, m1, m0); |
rr.s.low = (n1 << b) | (n0 >> bm); |
rr.s.high = n1 >> bm; |
*rp = rr.ll; |
} |
} |
} |
} |
ww.s.low = q0; |
ww.s.high = q1; |
return ww.ll; |
} |
#define __negdi2(a) (-(a)) |
long long __divdi3(long long u, long long v) |
{ |
int c = 0; |
DWunion uu, vv; |
DWtype w; |
uu.ll = u; |
vv.ll = v; |
if (uu.s.high < 0) { |
c = ~c; |
uu.ll = __negdi2 (uu.ll); |
} |
if (vv.s.high < 0) { |
c = ~c; |
vv.ll = __negdi2 (vv.ll); |
} |
w = __udivmoddi4 (uu.ll, vv.ll, (UDWtype *) 0); |
if (c) |
w = __negdi2 (w); |
return w; |
} |
long long __moddi3(long long u, long long v) |
{ |
int c = 0; |
DWunion uu, vv; |
DWtype w; |
uu.ll = u; |
vv.ll = v; |
if (uu.s.high < 0) { |
c = ~c; |
uu.ll = __negdi2 (uu.ll); |
} |
if (vv.s.high < 0) |
vv.ll = __negdi2 (vv.ll); |
__udivmoddi4 (uu.ll, vv.ll, (UDWtype *) &w); |
if (c) |
w = __negdi2 (w); |
return w; |
} |
unsigned long long __udivdi3(unsigned long long u, unsigned long long v) |
{ |
return __udivmoddi4 (u, v, (UDWtype *) 0); |
} |
unsigned long long __umoddi3(unsigned long long u, unsigned long long v) |
{ |
UDWtype w; |
__udivmoddi4 (u, v, &w); |
return w; |
} |
/* XXX: fix tcc's code generator to do this instead */ |
long long __ashrdi3(long long a, int b) |
{ |
#ifdef __TINYC__ |
DWunion u; |
u.ll = a; |
if (b >= 32) { |
u.s.low = u.s.high >> (b - 32); |
u.s.high = u.s.high >> 31; |
} else if (b != 0) { |
u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b)); |
u.s.high = u.s.high >> b; |
} |
return u.ll; |
#else |
return a >> b; |
#endif |
} |
/* XXX: fix tcc's code generator to do this instead */ |
unsigned long long __lshrdi3(unsigned long long a, int b) |
{ |
#ifdef __TINYC__ |
DWunion u; |
u.ll = a; |
if (b >= 32) { |
u.s.low = (unsigned)u.s.high >> (b - 32); |
u.s.high = 0; |
} else if (b != 0) { |
u.s.low = ((unsigned)u.s.low >> b) | (u.s.high << (32 - b)); |
u.s.high = (unsigned)u.s.high >> b; |
} |
return u.ll; |
#else |
return a >> b; |
#endif |
} |
/* XXX: fix tcc's code generator to do this instead */ |
long long __ashldi3(long long a, int b) |
{ |
#ifdef __TINYC__ |
DWunion u; |
u.ll = a; |
if (b >= 32) { |
u.s.high = (unsigned)u.s.low << (b - 32); |
u.s.low = 0; |
} else if (b != 0) { |
u.s.high = ((unsigned)u.s.high << b) | ((unsigned)u.s.low >> (32 - b)); |
u.s.low = (unsigned)u.s.low << b; |
} |
return u.ll; |
#else |
return a << b; |
#endif |
} |
#ifndef COMMIT_4ad186c5ef61_IS_FIXED |
long long __tcc_cvt_ftol(long double x) |
{ |
unsigned c0, c1; |
long long ret; |
__asm__ __volatile__ ("fnstcw %0" : "=m" (c0)); |
c1 = c0 | 0x0C00; |
__asm__ __volatile__ ("fldcw %0" : : "m" (c1)); |
__asm__ __volatile__ ("fistpll %0" : "=m" (ret)); |
__asm__ __volatile__ ("fldcw %0" : : "m" (c0)); |
return ret; |
} |
#endif |
#endif /* !__x86_64__ */ |
/* XXX: fix tcc's code generator to do this instead */ |
float __floatundisf(unsigned long long a) |
{ |
DWunion uu; |
XFtype r; |
uu.ll = a; |
if (uu.s.high >= 0) { |
return (float)uu.ll; |
} else { |
r = (XFtype)uu.ll; |
r += 18446744073709551616.0; |
return (float)r; |
} |
} |
double __floatundidf(unsigned long long a) |
{ |
DWunion uu; |
XFtype r; |
uu.ll = a; |
if (uu.s.high >= 0) { |
return (double)uu.ll; |
} else { |
r = (XFtype)uu.ll; |
r += 18446744073709551616.0; |
return (double)r; |
} |
} |
long double __floatundixf(unsigned long long a) |
{ |
DWunion uu; |
XFtype r; |
uu.ll = a; |
if (uu.s.high >= 0) { |
return (long double)uu.ll; |
} else { |
r = (XFtype)uu.ll; |
r += 18446744073709551616.0; |
return (long double)r; |
} |
} |
unsigned long long __fixunssfdi (float a1) |
{ |
register union float_long fl1; |
register int exp; |
register unsigned long l; |
fl1.f = a1; |
if (fl1.l == 0) |
return (0); |
exp = EXP (fl1.l) - EXCESS - 24; |
l = MANT(fl1.l); |
if (exp >= 41) |
return (unsigned long long)-1; |
else if (exp >= 0) |
return (unsigned long long)l << exp; |
else if (exp >= -23) |
return l >> -exp; |
else |
return 0; |
} |
unsigned long long __fixunsdfdi (double a1) |
{ |
register union double_long dl1; |
register int exp; |
register unsigned long long l; |
dl1.d = a1; |
if (dl1.ll == 0) |
return (0); |
exp = EXPD (dl1) - EXCESSD - 53; |
l = MANTD_LL(dl1); |
if (exp >= 12) |
return (unsigned long long)-1; |
else if (exp >= 0) |
return l << exp; |
else if (exp >= -52) |
return l >> -exp; |
else |
return 0; |
} |
unsigned long long __fixunsxfdi (long double a1) |
{ |
register union ldouble_long dl1; |
register int exp; |
register unsigned long long l; |
dl1.ld = a1; |
if (dl1.l.lower == 0 && dl1.l.upper == 0) |
return (0); |
exp = EXPLD (dl1) - EXCESSLD - 64; |
l = dl1.l.lower; |
if (exp > 0) |
return (unsigned long long)-1; |
else if (exp >= -63) |
return l >> -exp; |
else |
return 0; |
} |
long long __fixsfdi (float a1) |
{ |
long long ret; int s; |
ret = __fixunssfdi((s = a1 >= 0) ? a1 : -a1); |
return s ? ret : -ret; |
} |
long long __fixdfdi (double a1) |
{ |
long long ret; int s; |
ret = __fixunsdfdi((s = a1 >= 0) ? a1 : -a1); |
return s ? ret : -ret; |
} |
long long __fixxfdi (long double a1) |
{ |
long long ret; int s; |
ret = __fixunsxfdi((s = a1 >= 0) ? a1 : -a1); |
return s ? ret : -ret; |
} |
#if defined(TCC_TARGET_X86_64) && !defined(_WIN64) |
#ifndef __TINYC__ |
#include <stdlib.h> |
#include <stdio.h> |
#include <string.h> |
#else |
/* Avoid including stdlib.h because it is not easily available when |
cross compiling */ |
#include <stddef.h> /* size_t definition is needed for a x86_64-tcc to parse memset() */ |
extern void *malloc(unsigned long long); |
extern void *memset(void *s, int c, size_t n); |
extern void free(void*); |
extern void abort(void); |
#endif |
enum __va_arg_type { |
__va_gen_reg, __va_float_reg, __va_stack |
}; |
//This should be in sync with the declaration on our include/stdarg.h |
/* GCC compatible definition of va_list. */ |
typedef struct { |
unsigned int gp_offset; |
unsigned int fp_offset; |
union { |
unsigned int overflow_offset; |
char *overflow_arg_area; |
}; |
char *reg_save_area; |
} __va_list_struct; |
#undef __va_start |
#undef __va_arg |
#undef __va_copy |
#undef __va_end |
void __va_start(__va_list_struct *ap, void *fp) |
{ |
memset(ap, 0, sizeof(__va_list_struct)); |
*ap = *(__va_list_struct *)((char *)fp - 16); |
ap->overflow_arg_area = (char *)fp + ap->overflow_offset; |
ap->reg_save_area = (char *)fp - 176 - 16; |
} |
void *__va_arg(__va_list_struct *ap, |
enum __va_arg_type arg_type, |
int size, int align) |
{ |
size = (size + 7) & ~7; |
align = (align + 7) & ~7; |
switch (arg_type) { |
case __va_gen_reg: |
if (ap->gp_offset + size <= 48) { |
ap->gp_offset += size; |
return ap->reg_save_area + ap->gp_offset - size; |
} |
goto use_overflow_area; |
case __va_float_reg: |
if (ap->fp_offset < 128 + 48) { |
ap->fp_offset += 16; |
return ap->reg_save_area + ap->fp_offset - 16; |
} |
size = 8; |
goto use_overflow_area; |
case __va_stack: |
use_overflow_area: |
ap->overflow_arg_area += size; |
ap->overflow_arg_area = (char*)((intptr_t)(ap->overflow_arg_area + align - 1) & -(intptr_t)align); |
return ap->overflow_arg_area - size; |
default: |
#ifndef __TINYC__ |
fprintf(stderr, "unknown ABI type for __va_arg\n"); |
#endif |
abort(); |
} |
} |
#endif /* __x86_64__ */ |
/* Flushing for tccrun */ |
#if defined(TCC_TARGET_X86_64) || defined(TCC_TARGET_I386) |
void __clear_cache(void *beginning, void *end) |
{ |
} |
#elif defined(TCC_TARGET_ARM) |
#define _GNU_SOURCE |
#include <unistd.h> |
#include <sys/syscall.h> |
#include <stdio.h> |
void __clear_cache(void *beginning, void *end) |
{ |
/* __ARM_NR_cacheflush is kernel private and should not be used in user space. |
* However, there is no ARM asm parser in tcc so we use it for now */ |
#if 1 |
syscall(__ARM_NR_cacheflush, beginning, end, 0); |
#else |
__asm__ ("push {r7}\n\t" |
"mov r7, #0xf0002\n\t" |
"mov r2, #0\n\t" |
"swi 0\n\t" |
"pop {r7}\n\t" |
"ret"); |
#endif |
} |
#else |
#warning __clear_cache not defined for this architecture, avoid using tcc -run |
#endif |
/programs/develop/ktcc/trunk/source/lib/testfp.c |
---|
0,0 → 1,510 |
/* |
* Test 128-bit floating-point arithmetic on arm64: |
* build with two different compilers and compare the output. |
* |
* Copyright (c) 2015 Edmund Grimley Evans |
* |
* Copying and distribution of this file, with or without modification, |
* are permitted in any medium without royalty provided the copyright |
* notice and this notice are preserved. This file is offered as-is, |
* without any warranty. |
*/ |
#include <stdint.h> |
#include <stdio.h> |
#include <stdlib.h> |
#include <string.h> |
#define check(x) ((x) ? (void)0 : check_fail(#x, __FILE__, __LINE__)) |
void check_fail(const char *assertion, const char *file, unsigned int line) |
{ |
printf("%s:%d: Check (%s) failed.", file, line, assertion); |
exit(1); |
} |
typedef struct { |
unsigned long long x0, x1; |
} u128_t; |
float copy_fi(uint32_t x) |
{ |
float f; |
memcpy(&f, &x, 4); |
return f; |
} |
double copy_di(uint64_t x) |
{ |
double f; |
memcpy(&f, &x, 8); |
return f; |
} |
long double copy_ldi(u128_t x) |
{ |
long double f; |
memcpy(&f, &x, 16); |
return f; |
} |
uint32_t copy_if(float f) |
{ |
uint32_t x; |
memcpy(&x, &f, 4); |
return x; |
} |
uint64_t copy_id(double f) |
{ |
uint64_t x; |
memcpy(&x, &f, 8); |
return x; |
} |
u128_t copy_ild(long double f) |
{ |
u128_t x; |
memcpy(&x, &f, 16); |
return x; |
} |
long double make(int sgn, int exp, uint64_t high, uint64_t low) |
{ |
u128_t x = { low, |
(0x0000ffffffffffff & high) | |
(0x7fff000000000000 & (uint64_t)exp << 48) | |
(0x8000000000000000 & (uint64_t)sgn << 63) }; |
return copy_ldi(x); |
} |
void cmp(long double a, long double b) |
{ |
u128_t ax = copy_ild(a); |
u128_t bx = copy_ild(b); |
int eq = (a == b); |
int ne = (a != b); |
int lt = (a < b); |
int le = (a <= b); |
int gt = (a > b); |
int ge = (a >= b); |
check(eq == 0 || eq == 1); |
check(lt == 0 || lt == 1); |
check(gt == 0 || gt == 1); |
check(ne == !eq && le == (lt | eq) && ge == (gt | eq)); |
check(eq + lt + gt < 2); |
printf("cmp %016llx%016llx %016llx%016llx %d %d %d\n", |
ax.x1, ax.x0, bx.x1, bx.x0, lt, eq, gt); |
} |
void cmps(void) |
{ |
int i, j; |
for (i = 0; i < 2; i++) |
for (j = 0; j < 2; j++) |
cmp(make(i, 0, 0, 0), make(j, 0, 0, 0)); |
for (i = 0; i < 2; i++) { |
for (j = 0; j < 64; j++) { |
long double f1 = make(i, 32767, (uint64_t)1 << j, 0); |
long double f2 = make(i, 32767, 0, (uint64_t)1 << j); |
cmp(f1, 0); |
cmp(f2, 0); |
cmp(0, f1); |
cmp(0, f2); |
} |
} |
for (i = 0; i < 6; i++) |
for (j = 0; j < 6; j++) |
cmp(make(i & 1, i >> 1, 0, 0), |
make(j & 1, j >> 1, 0, 0)); |
for (i = 0; i < 2; i++) { |
for (j = 0; j < 2; j++) { |
int a, b; |
for (a = 0; a < 2; a++) { |
for (b = 0; b < 2; b++) { |
cmp(make(i, j, a, b), make(i, j, 0, 0)); |
cmp(make(i, j, 0, 0), make(i, j, a, b)); |
} |
} |
} |
} |
} |
void xop(const char *name, long double a, long double b, long double c) |
{ |
u128_t ax = copy_ild(a); |
u128_t bx = copy_ild(b); |
u128_t cx = copy_ild(c); |
printf("%s %016llx%016llx %016llx%016llx %016llx%016llx\n", |
name, ax.x1, ax.x0, bx.x1, bx.x0, cx.x1, cx.x0); |
} |
void fadd(long double a, long double b) |
{ |
xop("add", a, b, a + b); |
} |
void fsub(long double a, long double b) |
{ |
xop("sub", a, b, a - b); |
} |
void fmul(long double a, long double b) |
{ |
xop("mul", a, b, a * b); |
} |
void fdiv(long double a, long double b) |
{ |
xop("div", a, b, a / b); |
} |
void nanz(void) |
{ |
// Check NaNs: |
{ |
long double x[7]; |
int i, j, n = 0; |
x[n++] = make(0, 32000, 0x95132b76effc, 0xd79035214b4f8d53); |
x[n++] = make(1, 32001, 0xbe71d7a51587, 0x30601c6815d6c3ac); |
x[n++] = make(0, 32767, 0, 1); |
x[n++] = make(0, 32767, (uint64_t)1 << 46, 0); |
x[n++] = make(1, 32767, (uint64_t)1 << 47, 0); |
x[n++] = make(1, 32767, 0x7596c7099ad5, 0xe25fed2c58f73fc9); |
x[n++] = make(0, 32767, 0x835d143360f9, 0x5e315efb35630666); |
check(n == sizeof(x) / sizeof(*x)); |
for (i = 0; i < n; i++) { |
for (j = 0; j < n; j++) { |
fadd(x[i], x[j]); |
fsub(x[i], x[j]); |
fmul(x[i], x[j]); |
fdiv(x[i], x[j]); |
} |
} |
} |
// Check infinities and zeroes: |
{ |
long double x[6]; |
int i, j, n = 0; |
x[n++] = make(1, 32000, 0x62acda85f700, 0x47b6c9f35edc4044); |
x[n++] = make(0, 32001, 0x94b7abf55af7, 0x9f425fe354428e19); |
x[n++] = make(0, 32767, 0, 0); |
x[n++] = make(1, 32767, 0, 0); |
x[n++] = make(0, 0, 0, 0); |
x[n++] = make(1, 0, 0, 0); |
check(n == sizeof(x) / sizeof(*x)); |
for (i = 0; i < n; i++) { |
for (j = 0; j < n; j++) { |
fadd(x[i], x[j]); |
fsub(x[i], x[j]); |
fmul(x[i], x[j]); |
fdiv(x[i], x[j]); |
} |
} |
} |
} |
void adds(void) |
{ |
// Check shifting and add/sub: |
{ |
int i; |
for (i = -130; i <= 130; i++) { |
int s1 = (uint32_t)i % 3 < 1; |
int s2 = (uint32_t)i % 5 < 2; |
fadd(make(s1, 16384 , 0x502c065e4f71a65d, 0xd2f9bdb031f4f031), |
make(s2, 16384 + i, 0xae267395a9bc1033, 0xb56b5800da1ba448)); |
} |
} |
// Check normalisation: |
{ |
uint64_t a0 = 0xc6bab0a6afbef5ed; |
uint64_t a1 = 0x4f84136c4a2e9b52; |
int ee[] = { 0, 1, 10000 }; |
int e, i; |
for (e = 0; e < sizeof(ee) / sizeof(*ee); e++) { |
int exp = ee[e]; |
fsub(make(0, exp, a1, a0), make(0, 0, 0, 0)); |
for (i = 63; i >= 0; i--) |
fsub(make(0, exp, a1 | (uint64_t)1 << i >> 1, a0), |
make(0, exp, a1 >> i << i, 0)); |
for (i = 63; i >=0; i--) |
fsub(make(0, exp, a1, a0 | (uint64_t)1 << i >> 1), |
make(0, exp, a1, a0 >> i << i)); |
} |
} |
// Carry/overflow from rounding: |
{ |
fadd(make(0, 114, -1, -1), make(0, 1, 0, 0)); |
fadd(make(0, 32766, -1, -1), make(0, 32653, 0, 0)); |
fsub(make(1, 32766, -1, -1), make(0, 32653, 0, 0)); |
} |
} |
void muls(void) |
{ |
int i, j; |
{ |
long double max = make(0, 32766, -1, -1); |
long double min = make(0, 0, 0, 1); |
fmul(max, max); |
fmul(max, min); |
fmul(min, min); |
} |
for (i = 117; i > 0; i--) |
fmul(make(0, 16268, 0x643dcea76edc, 0xe0877a598403627a), |
make(i & 1, i, 0, 0)); |
fmul(make(0, 16383, -1, -3), make(0, 16383, 0, 1)); |
// Round to next exponent: |
fmul(make(0, 16383, -1, -2), make(0, 16383, 0, 1)); |
// Round from subnormal to normal: |
fmul(make(0, 1, -1, -1), make(0, 16382, 0, 0)); |
for (i = 0; i < 2; i++) |
for (j = 0; j < 112; j++) |
fmul(make(0, 16383, (uint64_t)1 << i, 0), |
make(0, 16383, |
j < 64 ? 0 : (uint64_t)1 << (j - 64), |
j < 64 ? (uint64_t)1 << j : 0)); |
} |
void divs(void) |
{ |
int i; |
{ |
long double max = make(0, 32766, -1, -1); |
long double min = make(0, 0, 0, 1); |
fdiv(max, max); |
fdiv(max, min); |
fdiv(min, max); |
fdiv(min, min); |
} |
for (i = 0; i < 64; i++) |
fdiv(make(0, 16383, -1, -1), make(0, 16383, -1, -(uint64_t)1 << i)); |
for (i = 0; i < 48; i++) |
fdiv(make(0, 16383, -1, -1), make(0, 16383, -(uint64_t)1 << i, 0)); |
} |
void cvtlsw(int32_t a) |
{ |
long double f = a; |
u128_t x = copy_ild(f); |
printf("cvtlsw %08lx %016llx%016llx\n", (long)(uint32_t)a, x.x1, x.x0); |
} |
void cvtlsx(int64_t a) |
{ |
long double f = a; |
u128_t x = copy_ild(f); |
printf("cvtlsx %016llx %016llx%016llx\n", |
(long long)(uint64_t)a, x.x1, x.x0); |
} |
void cvtluw(uint32_t a) |
{ |
long double f = a; |
u128_t x = copy_ild(f); |
printf("cvtluw %08lx %016llx%016llx\n", (long)a, x.x1, x.x0); |
} |
void cvtlux(uint64_t a) |
{ |
long double f = a; |
u128_t x = copy_ild(f); |
printf("cvtlux %016llx %016llx%016llx\n", (long long)a, x.x1, x.x0); |
} |
void cvtil(long double a) |
{ |
u128_t x = copy_ild(a); |
int32_t b1 = a; |
int64_t b2 = a; |
uint32_t b3 = a; |
uint64_t b4 = a; |
printf("cvtswl %016llx%016llx %08lx\n", |
x.x1, x.x0, (long)(uint32_t)b1); |
printf("cvtsxl %016llx%016llx %016llx\n", |
x.x1, x.x0, (long long)(uint64_t)b2); |
printf("cvtuwl %016llx%016llx %08lx\n", |
x.x1, x.x0, (long)b3); |
printf("cvtuxl %016llx%016llx %016llx\n", |
x.x1, x.x0, (long long)b4); |
} |
void cvtlf(float a) |
{ |
uint32_t ax = copy_if(a); |
long double b = a; |
u128_t bx = copy_ild(b); |
printf("cvtlf %08lx %016llx%016llx\n", (long)ax, bx.x1, bx.x0); |
} |
void cvtld(double a) |
{ |
uint64_t ax = copy_id(a); |
long double b = a; |
u128_t bx = copy_ild(b); |
printf("cvtld %016llx %016llx%016llx\n", (long long)ax, bx.x1, bx.x0); |
} |
void cvtfl(long double a) |
{ |
u128_t ax = copy_ild(a); |
float b = a; |
uint32_t bx = copy_if(b); |
printf("cvtfl %016llx%016llx %08lx\n", ax.x1, ax.x0, (long)bx); |
} |
void cvtdl(long double a) |
{ |
u128_t ax = copy_ild(a); |
double b = a; |
uint64_t bx = copy_id(b); |
printf("cvtdl %016llx%016llx %016llx\n", ax.x1, ax.x0, (long long)bx); |
} |
void cvts(void) |
{ |
int i, j; |
{ |
uint32_t x = 0xad040c5b; |
cvtlsw(0); |
for (i = 0; i < 31; i++) |
cvtlsw(x >> (31 - i)); |
for (i = 0; i < 31; i++) |
cvtlsw(-(x >> (31 - i))); |
cvtlsw(0x80000000); |
} |
{ |
uint64_t x = 0xb630a248cad9afd2; |
cvtlsx(0); |
for (i = 0; i < 63; i++) |
cvtlsx(x >> (63 - i)); |
for (i = 0; i < 63; i++) |
cvtlsx(-(x >> (63 - i))); |
cvtlsx(0x8000000000000000); |
} |
{ |
uint32_t x = 0xad040c5b; |
cvtluw(0); |
for (i = 0; i < 32; i++) |
cvtluw(x >> (31 - i)); |
} |
{ |
uint64_t x = 0xb630a248cad9afd2; |
cvtlux(0); |
for (i = 0; i < 64; i++) |
cvtlux(x >> (63 - i)); |
} |
for (i = 0; i < 2; i++) { |
cvtil(make(i, 32767, 0, 1)); |
cvtil(make(i, 32767, (uint64_t)1 << 47, 0)); |
cvtil(make(i, 32767, 123, 456)); |
cvtil(make(i, 32767, 0, 0)); |
cvtil(make(i, 16382, -1, -1)); |
cvtil(make(i, 16383, -1, -1)); |
cvtil(make(i, 16384, 0x7fffffffffff, -1)); |
cvtil(make(i, 16384, 0x800000000000, 0)); |
for (j = 0; j < 68; j++) |
cvtil(make(i, 16381 + j, 0xd4822c0a10ec, 0x1fe2f8b2669f5c9d)); |
} |
cvtlf(copy_fi(0x00000000)); |
cvtlf(copy_fi(0x456789ab)); |
cvtlf(copy_fi(0x7f800000)); |
cvtlf(copy_fi(0x7f923456)); |
cvtlf(copy_fi(0x7fdbcdef)); |
cvtlf(copy_fi(0x80000000)); |
cvtlf(copy_fi(0xabcdef12)); |
cvtlf(copy_fi(0xff800000)); |
cvtlf(copy_fi(0xff923456)); |
cvtlf(copy_fi(0xffdbcdef)); |
cvtld(copy_di(0x0000000000000000)); |
cvtld(copy_di(0x456789abcdef0123)); |
cvtld(copy_di(0x7ff0000000000000)); |
cvtld(copy_di(0x7ff123456789abcd)); |
cvtld(copy_di(0x7ffabcdef1234567)); |
cvtld(copy_di(0x8000000000000000)); |
cvtld(copy_di(0xcdef123456789abc)); |
cvtld(copy_di(0xfff0000000000000)); |
cvtld(copy_di(0xfff123456789abcd)); |
cvtld(copy_di(0xfffabcdef1234567)); |
for (i = 0; i < 2; i++) { \ |
cvtfl(make(i, 0, 0, 0)); |
cvtfl(make(i, 16232, -1, -1)); |
cvtfl(make(i, 16233, 0, 0)); |
cvtfl(make(i, 16233, 0, 1)); |
cvtfl(make(i, 16383, 0xab0ffd000000, 0)); |
cvtfl(make(i, 16383, 0xab0ffd000001, 0)); |
cvtfl(make(i, 16383, 0xab0ffeffffff, 0)); |
cvtfl(make(i, 16383, 0xab0fff000000, 0)); |
cvtfl(make(i, 16383, 0xab0fff000001, 0)); |
cvtfl(make(i, 16510, 0xfffffeffffff, -1)); |
cvtfl(make(i, 16510, 0xffffff000000, 0)); |
cvtfl(make(i, 16511, 0, 0)); |
cvtfl(make(i, 32767, 0, 0)); |
cvtfl(make(i, 32767, 0, 1)); |
cvtfl(make(i, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5)); |
cvtfl(make(i, 32767, 0x800000000000, 1)); |
cvtfl(make(i, 32767, 0xa11caaaf6a52, 0x696033e871eab099)); |
} |
for (i = 0; i < 2; i++) { |
cvtdl(make(i, 0, 0, 0)); |
cvtdl(make(i, 15307, -1, -1)); |
cvtdl(make(i, 15308, 0, 0)); |
cvtdl(make(i, 15308, 0, 1)); |
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xe800000000000000)); |
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xe800000000000001)); |
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf7ffffffffffffff)); |
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf800000000000000)); |
cvtdl(make(i, 16383, 0xabc123abc0ff, 0xf800000000000001)); |
cvtdl(make(i, 17406, 0xffffffffffff, 0xf7ffffffffffffff)); |
cvtdl(make(i, 17406, 0xffffffffffff, 0xf800000000000000)); |
cvtdl(make(i, 17407, 0, 0)); |
cvtdl(make(i, 32767, 0, 0)); |
cvtdl(make(i, 32767, 0, 1)); |
cvtdl(make(i, 32767, 0x4cbe01ac5f40, 0x75cee3c6afbb00b5)); |
cvtdl(make(i, 32767, 0x800000000000, 1)); |
cvtdl(make(i, 32767, 0xa11caaaf6a52, 0x696033e871eab099)); |
} |
} |
void tests(void) |
{ |
cmps(); |
nanz(); |
adds(); |
muls(); |
divs(); |
cvts(); |
} |
int main() |
{ |
#ifdef __aarch64__ |
tests(); |
#else |
printf("This test program is intended for a little-endian architecture\n" |
"with an IEEE-standard 128-bit long double.\n"); |
#endif |
return 0; |
} |