diff options
Diffstat (limited to 'lib')
138 files changed, 7346 insertions, 0 deletions
diff --git a/lib/Makefile.mk b/lib/Makefile.mk new file mode 100644 index 000000000..0855879ea --- /dev/null +++ b/lib/Makefile.mk @@ -0,0 +1,20 @@ +#===- lib/Makefile.mk --------------------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +Dir := lib +SubDirs := i386 ppc x86_64 + +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) +Target := Generic + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard $(Dir)/*.h) + +include make/subdir.mk diff --git a/lib/absvdi2.c b/lib/absvdi2.c new file mode 100644 index 000000000..b49554ecc --- /dev/null +++ b/lib/absvdi2.c @@ -0,0 +1,29 @@ +//===-- absvdi2.c - Implement __absvdi2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __absvdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: absolute value + +// Effects: aborts if abs(x) < 0 + +di_int +__absvdi2(di_int a) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + if (a == ((di_int)1 << (N-1))) + abort(); + const di_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/lib/absvsi2.c b/lib/absvsi2.c new file mode 100644 index 000000000..5b28da089 --- /dev/null +++ b/lib/absvsi2.c @@ -0,0 +1,29 @@ +//===-- absvsi2.c - Implement __absvsi2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __absvsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: absolute value + +// Effects: aborts if abs(x) < 0 + +si_int +__absvsi2(si_int a) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + if (a == (1 << (N-1))) + abort(); + const si_int t = a >> (N - 1); + return (a ^ t) - t; +} diff --git a/lib/absvti2.c b/lib/absvti2.c new file mode 100644 index 000000000..c0869719d --- /dev/null +++ b/lib/absvti2.c @@ -0,0 +1,33 @@ +//===-- absvti2.c - Implement __absvdi2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __absvti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: absolute value + +// Effects: aborts if abs(x) < 0 + +ti_int +__absvti2(ti_int a) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + if (a == ((ti_int)1 << (N-1))) + abort(); + const ti_int s = a >> (N - 1); + return (a ^ s) - s; +} + +#endif diff --git a/lib/addvdi3.c b/lib/addvdi3.c new file mode 100644 index 000000000..88436e86c --- /dev/null +++ b/lib/addvdi3.c @@ -0,0 +1,36 @@ +//===-- addvdi3.c - Implement __addvdi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __addvdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a + b + +// Effects: aborts if a + b overflows + +di_int +__addvdi3(di_int a, di_int b) +{ + di_int s = a + b; + if (b >= 0) + { + if (s < a) + abort(); + } + else + { + if (s >= a) + abort(); + } + return s; +} diff --git a/lib/addvsi3.c b/lib/addvsi3.c new file mode 100644 index 000000000..0eb10c242 --- /dev/null +++ b/lib/addvsi3.c @@ -0,0 +1,36 @@ +//===-- addvsi3.c - Implement __addvsi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __addvsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a + b + +// Effects: aborts if a + b overflows + +si_int +__addvsi3(si_int a, si_int b) +{ + si_int s = a + b; + if (b >= 0) + { + if (s < a) + abort(); + } + else + { + if (s >= a) + abort(); + } + return s; +} diff --git a/lib/addvti3.c b/lib/addvti3.c new file mode 100644 index 000000000..5c87d3e8c --- /dev/null +++ b/lib/addvti3.c @@ -0,0 +1,40 @@ +//===-- addvti3.c - Implement __addvti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __addvti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a + b + +// Effects: aborts if a + b overflows + +ti_int +__addvti3(ti_int a, ti_int b) +{ + ti_int s = a + b; + if (b >= 0) + { + if (s < a) + abort(); + } + else + { + if (s >= a) + abort(); + } + return s; +} + +#endif diff --git a/lib/apple_versioning.c b/lib/apple_versioning.c new file mode 100644 index 000000000..44373f4d7 --- /dev/null +++ b/lib/apple_versioning.c @@ -0,0 +1,146 @@ +//===-- apple_versioning.c - Adds versioning symbols for ld ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + + +#if __APPLE__ + #if __ppc__ + #define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp3 __asm("$ld$hide$os10.3$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp3 = 0; \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; + #else + #define NOT_HERE_BEFORE_10_6(sym) \ + extern const char sym##_tmp4 __asm("$ld$hide$os10.4$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp4 = 0; \ + extern const char sym##_tmp5 __asm("$ld$hide$os10.5$_" #sym ); \ + __attribute__((visibility("default"))) const char sym##_tmp5 = 0; + #endif + + +// +// Symbols in libSystem.dylib in 10.6 and later, +// but are in libgcc_s.dylib in earlier versions +// +NOT_HERE_BEFORE_10_6(__absvdi2) +NOT_HERE_BEFORE_10_6(__absvsi2) +NOT_HERE_BEFORE_10_6(__absvti2) +NOT_HERE_BEFORE_10_6(__addvdi3) +NOT_HERE_BEFORE_10_6(__addvsi3) +NOT_HERE_BEFORE_10_6(__addvti3) +NOT_HERE_BEFORE_10_6(__ashldi3) +NOT_HERE_BEFORE_10_6(__ashlti3) +NOT_HERE_BEFORE_10_6(__ashrdi3) +NOT_HERE_BEFORE_10_6(__ashrti3) +NOT_HERE_BEFORE_10_6(__clear_cache) +NOT_HERE_BEFORE_10_6(__clzdi2) +NOT_HERE_BEFORE_10_6(__clzsi2) +NOT_HERE_BEFORE_10_6(__clzti2) +NOT_HERE_BEFORE_10_6(__cmpdi2) +NOT_HERE_BEFORE_10_6(__cmpti2) +NOT_HERE_BEFORE_10_6(__ctzdi2) +NOT_HERE_BEFORE_10_6(__ctzsi2) +NOT_HERE_BEFORE_10_6(__ctzti2) +NOT_HERE_BEFORE_10_6(__divdc3) +NOT_HERE_BEFORE_10_6(__divdi3) +NOT_HERE_BEFORE_10_6(__divsc3) +NOT_HERE_BEFORE_10_6(__divtc3) +NOT_HERE_BEFORE_10_6(__divti3) +NOT_HERE_BEFORE_10_6(__divxc3) +NOT_HERE_BEFORE_10_6(__enable_execute_stack) +NOT_HERE_BEFORE_10_6(__ffsdi2) +NOT_HERE_BEFORE_10_6(__ffsti2) +NOT_HERE_BEFORE_10_6(__fixdfdi) +NOT_HERE_BEFORE_10_6(__fixdfti) +NOT_HERE_BEFORE_10_6(__fixsfdi) +NOT_HERE_BEFORE_10_6(__fixsfti) +NOT_HERE_BEFORE_10_6(__fixtfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfdi) +NOT_HERE_BEFORE_10_6(__fixunsdfsi) +NOT_HERE_BEFORE_10_6(__fixunsdfti) +NOT_HERE_BEFORE_10_6(__fixunssfdi) +NOT_HERE_BEFORE_10_6(__fixunssfsi) +NOT_HERE_BEFORE_10_6(__fixunssfti) +NOT_HERE_BEFORE_10_6(__fixunstfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfdi) +NOT_HERE_BEFORE_10_6(__fixunsxfsi) +NOT_HERE_BEFORE_10_6(__fixunsxfti) +NOT_HERE_BEFORE_10_6(__fixxfdi) +NOT_HERE_BEFORE_10_6(__fixxfti) +NOT_HERE_BEFORE_10_6(__floatdidf) +NOT_HERE_BEFORE_10_6(__floatdisf) +NOT_HERE_BEFORE_10_6(__floatditf) +NOT_HERE_BEFORE_10_6(__floatdixf) +NOT_HERE_BEFORE_10_6(__floattidf) +NOT_HERE_BEFORE_10_6(__floattisf) +NOT_HERE_BEFORE_10_6(__floattixf) +NOT_HERE_BEFORE_10_6(__floatundidf) +NOT_HERE_BEFORE_10_6(__floatundisf) +NOT_HERE_BEFORE_10_6(__floatunditf) +NOT_HERE_BEFORE_10_6(__floatundixf) +NOT_HERE_BEFORE_10_6(__floatuntidf) +NOT_HERE_BEFORE_10_6(__floatuntisf) +NOT_HERE_BEFORE_10_6(__floatuntixf) +NOT_HERE_BEFORE_10_6(__gcc_personality_v0) +NOT_HERE_BEFORE_10_6(__lshrdi3) +NOT_HERE_BEFORE_10_6(__lshrti3) +NOT_HERE_BEFORE_10_6(__moddi3) +NOT_HERE_BEFORE_10_6(__modti3) +NOT_HERE_BEFORE_10_6(__muldc3) +NOT_HERE_BEFORE_10_6(__muldi3) +NOT_HERE_BEFORE_10_6(__mulsc3) +NOT_HERE_BEFORE_10_6(__multc3) +NOT_HERE_BEFORE_10_6(__multi3) +NOT_HERE_BEFORE_10_6(__mulvdi3) +NOT_HERE_BEFORE_10_6(__mulvsi3) +NOT_HERE_BEFORE_10_6(__mulvti3) +NOT_HERE_BEFORE_10_6(__mulxc3) +NOT_HERE_BEFORE_10_6(__negdi2) +NOT_HERE_BEFORE_10_6(__negti2) +NOT_HERE_BEFORE_10_6(__negvdi2) +NOT_HERE_BEFORE_10_6(__negvsi2) +NOT_HERE_BEFORE_10_6(__negvti2) +NOT_HERE_BEFORE_10_6(__paritydi2) +NOT_HERE_BEFORE_10_6(__paritysi2) +NOT_HERE_BEFORE_10_6(__parityti2) +NOT_HERE_BEFORE_10_6(__popcountdi2) +NOT_HERE_BEFORE_10_6(__popcountsi2) +NOT_HERE_BEFORE_10_6(__popcountti2) +NOT_HERE_BEFORE_10_6(__powidf2) +NOT_HERE_BEFORE_10_6(__powisf2) +NOT_HERE_BEFORE_10_6(__powitf2) +NOT_HERE_BEFORE_10_6(__powixf2) +NOT_HERE_BEFORE_10_6(__subvdi3) +NOT_HERE_BEFORE_10_6(__subvsi3) +NOT_HERE_BEFORE_10_6(__subvti3) +NOT_HERE_BEFORE_10_6(__ucmpdi2) +NOT_HERE_BEFORE_10_6(__ucmpti2) +NOT_HERE_BEFORE_10_6(__udivdi3) +NOT_HERE_BEFORE_10_6(__udivmoddi4) +NOT_HERE_BEFORE_10_6(__udivmodti4) +NOT_HERE_BEFORE_10_6(__udivti3) +NOT_HERE_BEFORE_10_6(__umoddi3) +NOT_HERE_BEFORE_10_6(__umodti3) + + +#if __ppc__ +NOT_HERE_BEFORE_10_6(__gcc_qadd) +NOT_HERE_BEFORE_10_6(__gcc_qdiv) +NOT_HERE_BEFORE_10_6(__gcc_qmul) +NOT_HERE_BEFORE_10_6(__gcc_qsub) +NOT_HERE_BEFORE_10_6(__trampoline_setup) +#endif + + +#endif // __APPLE__ + diff --git a/lib/ashldi3.c b/lib/ashldi3.c new file mode 100644 index 000000000..e9e24c31d --- /dev/null +++ b/lib/ashldi3.c @@ -0,0 +1,40 @@ +//===-- ashldi3.c - Implement __ashldi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashldi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a << b + +// Precondition: 0 <= b < bits_in_dword + +di_int +__ashldi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) // bits_in_word <= b < bits_in_dword + { + result.low = 0; + result.high = input.low << (b - bits_in_word); + } + else // 0 <= b < bits_in_word + { + if (b == 0) + return a; + result.low = input.low << b; + result.high = (input.high << b) | (input.low >> (bits_in_word - b)); + } + return result.all; +} diff --git a/lib/ashlti3.c b/lib/ashlti3.c new file mode 100644 index 000000000..1b6968192 --- /dev/null +++ b/lib/ashlti3.c @@ -0,0 +1,44 @@ +//===-- ashlti3.c - Implement __ashlti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashlti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: a << b + +// Precondition: 0 <= b < bits_in_tword + +ti_int +__ashlti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) // bits_in_dword <= b < bits_in_tword + { + result.low = 0; + result.high = input.low << (b - bits_in_dword); + } + else // 0 <= b < bits_in_dword + { + if (b == 0) + return a; + result.low = input.low << b; + result.high = (input.high << b) | (input.low >> (bits_in_dword - b)); + } + return result.all; +} + +#endif diff --git a/lib/ashrdi3.c b/lib/ashrdi3.c new file mode 100644 index 000000000..fc2e6d843 --- /dev/null +++ b/lib/ashrdi3.c @@ -0,0 +1,41 @@ +//===-- ashrdi3.c - Implement __ashrdi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashrdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: arithmetic a >> b + +// Precondition: 0 <= b < bits_in_dword + +di_int +__ashrdi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + dwords input; + dwords result; + input.all = a; + if (b & bits_in_word) // bits_in_word <= b < bits_in_dword + { + // result.high = input.high < 0 ? -1 : 0 + result.high = input.high >> (bits_in_word - 1); + result.low = input.high >> (b - bits_in_word); + } + else // 0 <= b < bits_in_word + { + if (b == 0) + return a; + result.high = input.high >> b; + result.low = (input.high << (bits_in_word - b)) | (input.low >> b); + } + return result.all; +} diff --git a/lib/ashrti3.c b/lib/ashrti3.c new file mode 100644 index 000000000..bfdd2eb99 --- /dev/null +++ b/lib/ashrti3.c @@ -0,0 +1,45 @@ +//===-- ashrti3.c - Implement __ashrti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ashrti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: arithmetic a >> b + +// Precondition: 0 <= b < bits_in_tword + +ti_int +__ashrti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + twords input; + twords result; + input.all = a; + if (b & bits_in_dword) // bits_in_dword <= b < bits_in_tword + { + // result.high = input.high < 0 ? -1 : 0 + result.high = input.high >> (bits_in_dword - 1); + result.low = input.high >> (b - bits_in_dword); + } + else // 0 <= b < bits_in_dword + { + if (b == 0) + return a; + result.high = input.high >> b; + result.low = (input.high << (bits_in_dword - b)) | (input.low >> b); + } + return result.all; +} + +#endif diff --git a/lib/clear_cache.c b/lib/clear_cache.c new file mode 100644 index 000000000..8176c24e6 --- /dev/null +++ b/lib/clear_cache.c @@ -0,0 +1,38 @@ +//===-- clear_cache.c - Implement __clear_cache ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <stdlib.h> + +#if __APPLE__ + #include <libkern/OSCacheControl.h> +#endif + +// +// The compiler generates calls to __clear_cache() when creating +// trampoline functions on the stack for use with nested functions. +// It is expected to invalidate the instruction cache for the +// specified range. +// +void __clear_cache(void* start, void* end) +{ +#if __i386__ || __x86_64__ +// +// Intel processors have a unified instruction and data cache +// so there is nothing to do +// +#else + #if __APPLE__ + // On Darwin, sys_icache_invalidate() provides this functionality + sys_icache_invalidate(start, end-start); + #else + abort(); + #endif +#endif +} + diff --git a/lib/clzdi2.c b/lib/clzdi2.c new file mode 100644 index 000000000..7e58a5aa7 --- /dev/null +++ b/lib/clzdi2.c @@ -0,0 +1,28 @@ +//===-- clzdi2.c - Implement __clzdi2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +si_int +__clzdi2(di_int a) +{ + dwords x; + x.all = a; + const si_int f = -(x.high == 0); + return __builtin_clz((x.high & ~f) | (x.low & f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/lib/clzsi2.c b/lib/clzsi2.c new file mode 100644 index 000000000..52062ab22 --- /dev/null +++ b/lib/clzsi2.c @@ -0,0 +1,51 @@ +//===-- clzsi2.c - Implement __clzsi2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +si_int +__clzsi2(si_int a) +{ + su_int x = (su_int)a; + si_int t = ((x & 0xFFFF0000) == 0) << 4; // if (x is small) t = 16 else 0 + x >>= 16 - t; // x = [0 - 0xFFFF] + su_int r = t; // r = [0, 16] + // return r + clz(x) + t = ((x & 0xFF00) == 0) << 3; + x >>= 8 - t; // x = [0 - 0xFF] + r += t; // r = [0, 8, 16, 24] + // return r + clz(x) + t = ((x & 0xF0) == 0) << 2; + x >>= 4 - t; // x = [0 - 0xF] + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + clz(x) + t = ((x & 0xC) == 0) << 1; + x >>= 2 - t; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + clz(x) +// switch (x) +// { +// case 0: +// return r + 2; +// case 1: +// return r + 1; +// case 2: +// case 3: +// return r; +// } + return r + ((2 - x) & -((x & 2) == 0)); +} diff --git a/lib/clzti2.c b/lib/clzti2.c new file mode 100644 index 000000000..b53106977 --- /dev/null +++ b/lib/clzti2.c @@ -0,0 +1,32 @@ +//===-- clzti2.c - Implement __clzti2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __clzti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: the number of leading 0-bits + +// Precondition: a != 0 + +si_int +__clzti2(ti_int a) +{ + twords x; + x.all = a; + const di_int f = -(x.high == 0); + return __builtin_clzll((x.high & ~f) | (x.low & f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif diff --git a/lib/cmpdi2.c b/lib/cmpdi2.c new file mode 100644 index 000000000..f398ea257 --- /dev/null +++ b/lib/cmpdi2.c @@ -0,0 +1,36 @@ +//===-- cmpdi2.c - Implement __cmpdi2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __cmpdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +si_int +__cmpdi2(di_int a, di_int b) +{ + dwords x; + x.all = a; + dwords y; + y.all = b; + if (x.high < y.high) + return 0; + if (x.high > y.high) + return 2; + if (x.low < y.low) + return 0; + if (x.low > y.low) + return 2; + return 1; +} diff --git a/lib/cmpti2.c b/lib/cmpti2.c new file mode 100644 index 000000000..775977a74 --- /dev/null +++ b/lib/cmpti2.c @@ -0,0 +1,40 @@ +//===-- cmpti2.c - Implement __cmpti2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __cmpti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +si_int +__cmpti2(ti_int a, ti_int b) +{ + twords x; + x.all = a; + twords y; + y.all = b; + if (x.high < y.high) + return 0; + if (x.high > y.high) + return 2; + if (x.low < y.low) + return 0; + if (x.low > y.low) + return 2; + return 1; +} + +#endif diff --git a/lib/ctzdi2.c b/lib/ctzdi2.c new file mode 100644 index 000000000..8ea3ab319 --- /dev/null +++ b/lib/ctzdi2.c @@ -0,0 +1,28 @@ +//===-- ctzdi2.c - Implement __ctzdi2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +si_int +__ctzdi2(di_int a) +{ + dwords x; + x.all = a; + const si_int f = -(x.low == 0); + return __builtin_ctz((x.high & f) | (x.low & ~f)) + + (f & ((si_int)(sizeof(si_int) * CHAR_BIT))); +} diff --git a/lib/ctzsi2.c b/lib/ctzsi2.c new file mode 100644 index 000000000..1e6c2fe18 --- /dev/null +++ b/lib/ctzsi2.c @@ -0,0 +1,54 @@ +//===-- ctzsi2.c - Implement __ctzsi2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +si_int +__ctzsi2(si_int a) +{ + su_int x = (su_int)a; + si_int t = ((x & 0x0000FFFF) == 0) << 4; // if (x has no small bits) t = 16 else 0 + x >>= t; // x = [0 - 0xFFFF] + higher garbage bits + su_int r = t; // r = [0, 16] + // return r + ctz(x) + t = ((x & 0x00FF) == 0) << 3; + x >>= t; // x = [0 - 0xFF] + higher garbage bits + r += t; // r = [0, 8, 16, 24] + // return r + ctz(x) + t = ((x & 0x0F) == 0) << 2; + x >>= t; // x = [0 - 0xF] + higher garbage bits + r += t; // r = [0, 4, 8, 12, 16, 20, 24, 28] + // return r + ctz(x) + t = ((x & 0x3) == 0) << 1; + x >>= t; + x &= 3; // x = [0 - 3] + r += t; // r = [0 - 30] and is even + // return r + ctz(x) +// The branch-less return statement below is equivalent +// to the following switch statement: +// switch (x) +// { +// case 0: +// return r + 2; +// case 2: +// return r + 1; +// case 1: +// case 3: +// return r; +// } + return r + ((2 - (x >> 1)) & -((x & 1) == 0)); +} diff --git a/lib/ctzti2.c b/lib/ctzti2.c new file mode 100644 index 000000000..828db2662 --- /dev/null +++ b/lib/ctzti2.c @@ -0,0 +1,32 @@ +//===-- ctzti2.c - Implement __ctzti2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ctzti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: the number of trailing 0-bits + +// Precondition: a != 0 + +si_int +__ctzti2(ti_int a) +{ + twords x; + x.all = a; + const di_int f = -(x.low == 0); + return __builtin_ctzll((x.high & f) | (x.low & ~f)) + + ((si_int)f & ((si_int)(sizeof(di_int) * CHAR_BIT))); +} + +#endif diff --git a/lib/divdc3.c b/lib/divdc3.c new file mode 100644 index 000000000..1b19e665f --- /dev/null +++ b/lib/divdc3.c @@ -0,0 +1,58 @@ +//===-- divdc3.c - Implement __divdc3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __divdc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <math.h> +#include <complex.h> + +// Returns: the quotient of (a + ib) / (c + id) + +double _Complex +__divdc3(double __a, double __b, double __c, double __d) +{ + int __ilogbw = 0; + double __logbw = logb(fmax(fabs(__c), fabs(__d))); + if (isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = scalbn(__c, -__ilogbw); + __d = scalbn(__d, -__ilogbw); + } + double __denom = __c * __c + __d * __d; + double _Complex z; + __real__ z = scalbn((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__ z = scalbn((__b * __c - __a * __d) / __denom, -__ilogbw); + if (isnan(__real__ z) && isnan(__imag__ z)) + { + if ((__denom == 0.0) && (!isnan(__a) || !isnan(__b))) + { + __real__ z = copysign(INFINITY, __c) * __a; + __imag__ z = copysign(INFINITY, __c) * __b; + } + else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d)) + { + __a = copysign(isinf(__a) ? 1.0 : 0.0, __a); + __b = copysign(isinf(__b) ? 1.0 : 0.0, __b); + __real__ z = INFINITY * (__a * __c + __b * __d); + __imag__ z = INFINITY * (__b * __c - __a * __d); + } + else if (isinf(__logbw) && __logbw > 0.0 && isfinite(__a) && isfinite(__b)) + { + __c = copysign(isinf(__c) ? 1.0 : 0.0, __c); + __d = copysign(isinf(__d) ? 1.0 : 0.0, __d); + __real__ z = 0.0 * (__a * __c + __b * __d); + __imag__ z = 0.0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/lib/divdi3.c b/lib/divdi3.c new file mode 100644 index 000000000..9580156b9 --- /dev/null +++ b/lib/divdi3.c @@ -0,0 +1,30 @@ +//===-- divdi3.c - Implement __divdi3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __divdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); + +// Returns: a / b + +di_int +__divdi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s_a = a >> bits_in_dword_m1; // s_a = a < 0 ? -1 : 0 + di_int s_b = b >> bits_in_dword_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_a ^= s_b; // sign of quotient + return (__udivmoddi4(a, b, (du_int*)0) ^ s_a) - s_a; // negate if s_a == -1 +} diff --git a/lib/divsc3.c b/lib/divsc3.c new file mode 100644 index 000000000..bea15cdc4 --- /dev/null +++ b/lib/divsc3.c @@ -0,0 +1,58 @@ +//===-- divsc3.c - Implement __divsc3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __divsc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <math.h> +#include <complex.h> + +// Returns: the quotient of (a + ib) / (c + id) + +float _Complex +__divsc3(float __a, float __b, float __c, float __d) +{ + int __ilogbw = 0; + float __logbw = logbf(fmaxf(fabsf(__c), fabsf(__d))); + if (isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = scalbnf(__c, -__ilogbw); + __d = scalbnf(__d, -__ilogbw); + } + float __denom = __c * __c + __d * __d; + float _Complex z; + __real__ z = scalbnf((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__ z = scalbnf((__b * __c - __a * __d) / __denom, -__ilogbw); + if (isnan(__real__ z) && isnan(__imag__ z)) + { + if ((__denom == 0) && (!isnan(__a) || !isnan(__b))) + { + __real__ z = copysignf(INFINITY, __c) * __a; + __imag__ z = copysignf(INFINITY, __c) * __b; + } + else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d)) + { + __a = copysignf(isinf(__a) ? 1 : 0, __a); + __b = copysignf(isinf(__b) ? 1 : 0, __b); + __real__ z = INFINITY * (__a * __c + __b * __d); + __imag__ z = INFINITY * (__b * __c - __a * __d); + } + else if (isinf(__logbw) && __logbw > 0 && isfinite(__a) && isfinite(__b)) + { + __c = copysignf(isinf(__c) ? 1 : 0, __c); + __d = copysignf(isinf(__d) ? 1 : 0, __d); + __real__ z = 0 * (__a * __c + __b * __d); + __imag__ z = 0 * (__b * __c - __a * __d); + } + } + return z; +} diff --git a/lib/divsi3.c b/lib/divsi3.c new file mode 100644 index 000000000..8d720eb1d --- /dev/null +++ b/lib/divsi3.c @@ -0,0 +1,30 @@ +//===-- divsi3.c - Implement __divsi3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __divsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +su_int __udivsi3(su_int n, su_int d); + +// Returns: a / b + +si_int +__divsi3(si_int a, si_int b) +{ + const int bits_in_word_m1 = (int)(sizeof(si_int) * CHAR_BIT) - 1; + si_int s_a = a >> bits_in_word_m1; // s_a = a < 0 ? -1 : 0 + si_int s_b = b >> bits_in_word_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_a ^= s_b; // sign of quotient + return (__udivsi3(a, b) ^ s_a) - s_a; // negate if s_a == -1 +} diff --git a/lib/divti3.c b/lib/divti3.c new file mode 100644 index 000000000..b8eda254c --- /dev/null +++ b/lib/divti3.c @@ -0,0 +1,34 @@ +//===-- divti3.c - Implement __divti3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __divti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); + +// Returns: a / b + +ti_int +__divti3(ti_int a, ti_int b) +{ + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s_a = a >> bits_in_tword_m1; // s_a = a < 0 ? -1 : 0 + ti_int s_b = b >> bits_in_tword_m1; // s_b = b < 0 ? -1 : 0 + a = (a ^ s_a) - s_a; // negate if s_a == -1 + b = (b ^ s_b) - s_b; // negate if s_b == -1 + s_a ^= s_b; // sign of quotient + return (__udivmodti4(a, b, (tu_int*)0) ^ s_a) - s_a; // negate if s_a == -1 +} + +#endif diff --git a/lib/divxc3.c b/lib/divxc3.c new file mode 100644 index 000000000..4ee09b9db --- /dev/null +++ b/lib/divxc3.c @@ -0,0 +1,62 @@ +//===-- divxc3.c - Implement __divxc3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __divxc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" +#include <math.h> +#include <complex.h> + +// Returns: the quotient of (a + ib) / (c + id) + +long double _Complex +__divxc3(long double __a, long double __b, long double __c, long double __d) +{ + int __ilogbw = 0; + long double __logbw = logbl(fmaxl(fabsl(__c), fabsl(__d))); + if (isfinite(__logbw)) + { + __ilogbw = (int)__logbw; + __c = scalbnl(__c, -__ilogbw); + __d = scalbnl(__d, -__ilogbw); + } + long double __denom = __c * __c + __d * __d; + long double _Complex z; + __real__ z = scalbnl((__a * __c + __b * __d) / __denom, -__ilogbw); + __imag__ z = scalbnl((__b * __c - __a * __d) / __denom, -__ilogbw); + if (isnan(__real__ z) && isnan(__imag__ z)) + { + if ((__denom == 0) && (!isnan(__a) || !isnan(__b))) + { + __real__ z = copysignl(INFINITY, __c) * __a; + __imag__ z = copysignl(INFINITY, __c) * __b; + } + else if ((isinf(__a) || isinf(__b)) && isfinite(__c) && isfinite(__d)) + { + __a = copysignl(isinf(__a) ? 1 : 0, __a); + __b = copysignl(isinf(__b) ? 1 : 0, __b); + __real__ z = INFINITY * (__a * __c + __b * __d); + __imag__ z = INFINITY * (__b * __c - __a * __d); + } + else if (isinf(__logbw) && __logbw > 0 && isfinite(__a) && isfinite(__b)) + { + __c = copysignl(isinf(__c) ? 1 : 0, __c); + __d = copysignl(isinf(__d) ? 1 : 0, __d); + __real__ z = 0 * (__a * __c + __b * __d); + __imag__ z = 0 * (__b * __c - __a * __d); + } + } + return z; +} + +#endif diff --git a/lib/enable_execute_stack.c b/lib/enable_execute_stack.c new file mode 100644 index 000000000..2c1878238 --- /dev/null +++ b/lib/enable_execute_stack.c @@ -0,0 +1,36 @@ +//===-- enable_execute_stack.c - Implement __enable_execute_stack ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <stdint.h> +#include <sys/mman.h> + + +// +// The compiler generates calls to __enable_execute_stack() when creating +// trampoline functions on the stack for use with nested functions. +// It is expected to mark the page(s) containing the address +// and the next 48 bytes as executable. Since the stack is normally rw- +// that means changing the protection on those page(s) to rwx. +// +void __enable_execute_stack(void* addr) +{ +#if __APPLE__ + // On Darwin, pagesize is always 4096 bytes + const uintptr_t pageSize = 4096; +#else + abort(); +#endif + const uintptr_t pageAlignMask = ~(pageSize-1); + uintptr_t p = (uintptr_t)addr; + unsigned char* startPage = (unsigned char*)(p & pageAlignMask); + unsigned char* endPage = (unsigned char*)((p+48+pageSize) & pageAlignMask); + mprotect(startPage, endPage-startPage, PROT_READ | PROT_WRITE | PROT_EXEC); +} + + diff --git a/lib/eprintf.c b/lib/eprintf.c new file mode 100644 index 000000000..56d6c18fb --- /dev/null +++ b/lib/eprintf.c @@ -0,0 +1,31 @@ +//===---------- eprintf.c - Implements __eprintf --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + + +#include <stdio.h> +#include <stdlib.h> + + +// +// __eprintf() was used in an old version of <assert.h>. +// It can eventually go away, but it is needed when linking +// .o files built with the old <assert.h>. +// +// It should never be exported from a dylib, so it is marked +// visibility hidden. +// +__attribute__((visibility("hidden"))) +void __eprintf(const char* format, const char* assertion_expression, + const char* line, const char* file) +{ + fprintf(stderr, format, assertion_expression, line, file); + fflush(stderr); + abort(); +} diff --git a/lib/ffsdi2.c b/lib/ffsdi2.c new file mode 100644 index 000000000..93afc6ce3 --- /dev/null +++ b/lib/ffsdi2.c @@ -0,0 +1,31 @@ +//===-- ffsdi2.c - Implement __ffsdi2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ffsdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: the index of the least significant 1-bit in a, or +// the value zero if a is zero. The least significant bit is index one. + +si_int +__ffsdi2(di_int a) +{ + dwords x; + x.all = a; + if (x.low == 0) + { + if (x.high == 0) + return 0; + return __builtin_ctz(x.high) + (1 + sizeof(si_int) * CHAR_BIT); + } + return __builtin_ctz(x.low) + 1; +} diff --git a/lib/ffsti2.c b/lib/ffsti2.c new file mode 100644 index 000000000..194c2cbb4 --- /dev/null +++ b/lib/ffsti2.c @@ -0,0 +1,35 @@ +//===-- ffsti2.c - Implement __ffsti2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ffsti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: the index of the least significant 1-bit in a, or +// the value zero if a is zero. The least significant bit is index one. + +si_int +__ffsti2(ti_int a) +{ + twords x; + x.all = a; + if (x.low == 0) + { + if (x.high == 0) + return 0; + return __builtin_ctzll(x.high) + (1 + sizeof(di_int) * CHAR_BIT); + } + return __builtin_ctzll(x.low) + 1; +} + +#endif diff --git a/lib/fixdfdi.c b/lib/fixdfdi.c new file mode 100644 index 000000000..87368b7e5 --- /dev/null +++ b/lib/fixdfdi.c @@ -0,0 +1,41 @@ +//===-- fixdfdi.c - Implement __fixdfdi -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixdfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: double is a IEEE 64 bit floating point type +// su_int is a 32 bit integral type +// value in double is representable in di_int (no range checking performed) + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +di_int +__fixdfdi(double a) +{ + double_bits fb; + fb.f = a; + int e = ((fb.u.high & 0x7FF00000) >> 20) - 1023; + if (e < 0) + return 0; + di_int s = (si_int)(fb.u.high & 0x80000000) >> 31; + dwords r; + r.high = (fb.u.high & 0x000FFFFF) | 0x00100000; + r.low = fb.u.low; + if (e > 52) + r.all <<= (e - 52); + else + r.all >>= (52 - e); + return (r.all ^ s) - s; +} diff --git a/lib/fixdfti.c b/lib/fixdfti.c new file mode 100644 index 000000000..94fc8d3d1 --- /dev/null +++ b/lib/fixdfti.c @@ -0,0 +1,43 @@ +//===-- fixdfti.c - Implement __fixdfti -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixdfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: double is a IEEE 64 bit floating point type +// su_int is a 32 bit integral type +// value in double is representable in ti_int (no range checking performed) + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +ti_int +__fixdfti(double a) +{ + double_bits fb; + fb.f = a; + int e = ((fb.u.high & 0x7FF00000) >> 20) - 1023; + if (e < 0) + return 0; + ti_int s = (si_int)(fb.u.high & 0x80000000) >> 31; + ti_int r = 0x0010000000000000uLL | (0x000FFFFFFFFFFFFFuLL & fb.u.all); + if (e > 52) + r <<= (e - 52); + else + r >>= (52 - e); + return (r ^ s) - s; +} + +#endif diff --git a/lib/fixsfdi.c b/lib/fixsfdi.c new file mode 100644 index 000000000..5458f0276 --- /dev/null +++ b/lib/fixsfdi.c @@ -0,0 +1,39 @@ +//===-- fixsfdi.c - Implement __fixsfdi -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixsfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: float is a IEEE 32 bit floating point type +// su_int is a 32 bit integral type +// value in float is representable in di_int (no range checking performed) + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +di_int +__fixsfdi(float a) +{ + float_bits fb; + fb.f = a; + int e = ((fb.u & 0x7F800000) >> 23) - 127; + if (e < 0) + return 0; + di_int s = (si_int)(fb.u & 0x80000000) >> 31; + di_int r = (fb.u & 0x007FFFFF) | 0x00800000; + if (e > 23) + r <<= (e - 23); + else + r >>= (23 - e); + return (r ^ s) - s; +} diff --git a/lib/fixsfti.c b/lib/fixsfti.c new file mode 100644 index 000000000..8e3958757 --- /dev/null +++ b/lib/fixsfti.c @@ -0,0 +1,43 @@ +//===-- fixsfti.c - Implement __fixsfti -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixsfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: float is a IEEE 32 bit floating point type +// su_int is a 32 bit integral type +// value in float is representable in ti_int (no range checking performed) + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +ti_int +__fixsfti(float a) +{ + float_bits fb; + fb.f = a; + int e = ((fb.u & 0x7F800000) >> 23) - 127; + if (e < 0) + return 0; + ti_int s = (si_int)(fb.u & 0x80000000) >> 31; + ti_int r = (fb.u & 0x007FFFFF) | 0x00800000; + if (e > 23) + r <<= (e - 23); + else + r >>= (23 - e); + return (r ^ s) - s; +} + +#endif diff --git a/lib/fixunsdfdi.c b/lib/fixunsdfdi.c new file mode 100644 index 000000000..f7e1041ad --- /dev/null +++ b/lib/fixunsdfdi.c @@ -0,0 +1,42 @@ +//===-- fixunsdfdi.c - Implement __fixunsdfdi -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsdfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: double is a IEEE 64 bit floating point type +// du_int is a 64 bit integral type +// value in double is representable in du_int or is negative +// (no range checking performed) + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +du_int +__fixunsdfdi(double a) +{ + double_bits fb; + fb.f = a; + int e = ((fb.u.high & 0x7FF00000) >> 20) - 1023; + if (e < 0 || (fb.u.high & 0x80000000)) + return 0; + udwords r; + r.high = (fb.u.high & 0x000FFFFF) | 0x00100000; + r.low = fb.u.low; + if (e > 52) + r.all <<= (e - 52); + else + r.all >>= (52 - e); + return r.all; +} diff --git a/lib/fixunsdfsi.c b/lib/fixunsdfsi.c new file mode 100644 index 000000000..383a35efe --- /dev/null +++ b/lib/fixunsdfsi.c @@ -0,0 +1,39 @@ +//===-- fixunsdfsi.c - Implement __fixunsdfsi -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsdfsi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a unsigned int, rounding toward zero. +// Negative values all become zero. + +// Assumption: double is a IEEE 64 bit floating point type +// su_int is a 32 bit integral type +// value in double is representable in su_int or is negative +// (no range checking performed) + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +su_int +__fixunsdfsi(double a) +{ + double_bits fb; + fb.f = a; + int e = ((fb.u.high & 0x7FF00000) >> 20) - 1023; + if (e < 0 || (fb.u.high & 0x80000000)) + return 0; + return ( + 0x80000000u | + ((fb.u.high & 0x000FFFFF) << 11) | + (fb.u.low >> 21) + ) >> (31 - e); +} diff --git a/lib/fixunsdfti.c b/lib/fixunsdfti.c new file mode 100644 index 000000000..4b88c6f6b --- /dev/null +++ b/lib/fixunsdfti.c @@ -0,0 +1,44 @@ +//===-- fixunsdfti.c - Implement __fixunsdfti -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsdfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: double is a IEEE 64 bit floating point type +// tu_int is a 64 bit integral type +// value in double is representable in tu_int or is negative +// (no range checking performed) + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +tu_int +__fixunsdfti(double a) +{ + double_bits fb; + fb.f = a; + int e = ((fb.u.high & 0x7FF00000) >> 20) - 1023; + if (e < 0 || (fb.u.high & 0x80000000)) + return 0; + tu_int r = 0x0010000000000000uLL | (fb.u.all & 0x000FFFFFFFFFFFFFuLL); + if (e > 52) + r <<= (e - 52); + else + r >>= (52 - e); + return r; +} + +#endif diff --git a/lib/fixunssfdi.c b/lib/fixunssfdi.c new file mode 100644 index 000000000..6495c68e7 --- /dev/null +++ b/lib/fixunssfdi.c @@ -0,0 +1,40 @@ +//===-- fixunssfdi.c - Implement __fixunssfdi -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunssfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: float is a IEEE 32 bit floating point type +// du_int is a 64 bit integral type +// value in float is representable in du_int or is negative +// (no range checking performed) + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +du_int +__fixunssfdi(float a) +{ + float_bits fb; + fb.f = a; + int e = ((fb.u & 0x7F800000) >> 23) - 127; + if (e < 0 || (fb.u & 0x80000000)) + return 0; + du_int r = (fb.u & 0x007FFFFF) | 0x00800000; + if (e > 23) + r <<= (e - 23); + else + r >>= (23 - e); + return r; +} diff --git a/lib/fixunssfsi.c b/lib/fixunssfsi.c new file mode 100644 index 000000000..45c09cd53 --- /dev/null +++ b/lib/fixunssfsi.c @@ -0,0 +1,40 @@ +//===-- fixunssfsi.c - Implement __fixunssfsi -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunssfsi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: convert a to a unsigned int, rounding toward zero. +// Negative values all become zero. + +// Assumption: float is a IEEE 32 bit floating point type +// su_int is a 32 bit integral type +// value in float is representable in su_int or is negative +// (no range checking performed) + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +su_int +__fixunssfsi(float a) +{ + float_bits fb; + fb.f = a; + int e = ((fb.u & 0x7F800000) >> 23) - 127; + if (e < 0 || (fb.u & 0x80000000)) + return 0; + su_int r = (fb.u & 0x007FFFFF) | 0x00800000; + if (e > 23) + r <<= (e - 23); + else + r >>= (23 - e); + return r; +} diff --git a/lib/fixunssfti.c b/lib/fixunssfti.c new file mode 100644 index 000000000..b2f47a824 --- /dev/null +++ b/lib/fixunssfti.c @@ -0,0 +1,44 @@ +//===-- fixunssfti.c - Implement __fixunssfti -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunssfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: float is a IEEE 32 bit floating point type +// tu_int is a 64 bit integral type +// value in float is representable in tu_int or is negative +// (no range checking performed) + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +tu_int +__fixunssfti(float a) +{ + float_bits fb; + fb.f = a; + int e = ((fb.u & 0x7F800000) >> 23) - 127; + if (e < 0 || (fb.u & 0x80000000)) + return 0; + tu_int r = (fb.u & 0x007FFFFF) | 0x00800000; + if (e > 23) + r <<= (e - 23); + else + r >>= (23 - e); + return r; +} + +#endif diff --git a/lib/fixunsxfdi.c b/lib/fixunsxfdi.c new file mode 100644 index 000000000..b0abb9362 --- /dev/null +++ b/lib/fixunsxfdi.c @@ -0,0 +1,40 @@ +//===-- fixunsxfdi.c - Implement __fixunsxfdi -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsxfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 bytes +// du_int is a 64 bit integral type +// value in long double is representable in du_int or is negative +// (no range checking performed) + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +du_int +__fixunsxfdi(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.low & 0x00008000)) + return 0; + return fb.u.low.all >> (63 - e); +} + +#endif diff --git a/lib/fixunsxfsi.c b/lib/fixunsxfsi.c new file mode 100644 index 000000000..2c186a689 --- /dev/null +++ b/lib/fixunsxfsi.c @@ -0,0 +1,40 @@ +//===-- fixunsxfsi.c - Implement __fixunsxfsi -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsxfsi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a unsigned int, rounding toward zero. +// Negative values all become zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 bytes +// su_int is a 32 bit integral type +// value in long double is representable in su_int or is negative +// (no range checking performed) + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +su_int +__fixunsxfsi(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.low & 0x00008000)) + return 0; + return fb.u.low.high >> (31 - e); +} + +#endif diff --git a/lib/fixunsxfti.c b/lib/fixunsxfti.c new file mode 100644 index 000000000..993594dfa --- /dev/null +++ b/lib/fixunsxfti.c @@ -0,0 +1,45 @@ +//===-- fixunsxfti.c - Implement __fixunsxfti -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixunsxfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: convert a to a unsigned long long, rounding toward zero. +// Negative values all become zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 bytes +// tu_int is a 64 bit integral type +// value in long double is representable in tu_int or is negative +// (no range checking performed) + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +tu_int +__fixunsxfti(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.low & 0x00007FFF) - 16383; + if (e < 0 || (fb.u.high.low & 0x00008000)) + return 0; + tu_int r = fb.u.low.all; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return r; +} + +#endif diff --git a/lib/fixxfdi.c b/lib/fixxfdi.c new file mode 100644 index 000000000..2a7d20814 --- /dev/null +++ b/lib/fixxfdi.c @@ -0,0 +1,41 @@ +//===-- fixxfdi.c - Implement __fixxfdi -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixxfdi for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 bytes +// su_int is a 32 bit integral type +// value in long double is representable in di_int (no range checking performed) + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +di_int +__fixxfdi(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + di_int s = -(si_int)((fb.u.high.low & 0x00008000) >> 15); + di_int r = fb.u.low.all; + r = (du_int)r >> (63 - e); + return (r ^ s) - s; +} + +#endif diff --git a/lib/fixxfti.c b/lib/fixxfti.c new file mode 100644 index 000000000..d9cb3eaf0 --- /dev/null +++ b/lib/fixxfti.c @@ -0,0 +1,44 @@ +//===-- fixxfti.c - Implement __fixxfti -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __fixxfti for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: convert a to a signed long long, rounding toward zero. + +// Assumption: long double is an intel 80 bit floating point type padded with 6 bytes +// su_int is a 32 bit integral type +// value in long double is representable in ti_int (no range checking performed) + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +ti_int +__fixxfti(long double a) +{ + long_double_bits fb; + fb.f = a; + int e = (fb.u.high.low & 0x00007FFF) - 16383; + if (e < 0) + return 0; + ti_int s = -(si_int)((fb.u.high.low & 0x00008000) >> 15); + ti_int r = fb.u.low.all; + if (e > 63) + r <<= (e - 63); + else + r >>= (63 - e); + return (r ^ s) - s; +} + +#endif diff --git a/lib/floatdidf.c b/lib/floatdidf.c new file mode 100644 index 000000000..f76d1b899 --- /dev/null +++ b/lib/floatdidf.c @@ -0,0 +1,102 @@ +//===-- floatdidf.c - Implement __floatdidf -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// di_int is a 64 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +#ifndef __SOFT_FP__ +// Support for systems that have hardware floating-point; we'll set the inexact flag +// as a side-effect of this computation. +#include <stdint.h> + +double +__floatdidf(di_int a) +{ + static const double twop52 = 0x1.0p52; + static const double twop32 = 0x1.0p32; + + union { int64_t x; double d; } low = { .d = twop52 }; + + const double high = (int32_t)(a >> 32) * twop32; + low.x |= a & INT64_C(0x00000000ffffffff); + + const double result = (high - twop52) + low.d; + return result; +} + +#else +// Support for systems that don't have hardware floating-point; there are no flags to +// set, and we don't want to code-gen to an unknown soft-float implementation. + +double +__floatdidf(di_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((du_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } + else + { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.high = ((su_int)s & 0x80000000) | // sign + ((e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.low = (su_int)a; // mantissa-low + return fb.f; +} +#endif diff --git a/lib/floatdisf.c b/lib/floatdisf.c new file mode 100644 index 000000000..5a9284819 --- /dev/null +++ b/lib/floatdisf.c @@ -0,0 +1,76 @@ +//===-- floatdisf.c - Implement __floatdisf -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// di_int is a 64 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +float +__floatdisf(di_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __builtin_clzll(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > FLT_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((du_int)a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((du_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } + else + { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | // sign + ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} diff --git a/lib/floatdixf.c b/lib/floatdixf.c new file mode 100644 index 000000000..8d6911ec0 --- /dev/null +++ b/lib/floatdixf.c @@ -0,0 +1,43 @@ +//===-- floatdixf.c - Implement __floatdixf -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatdixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits +// di_int is a 64 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +long double +__floatdixf(di_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(di_int) * CHAR_BIT; + const di_int s = a >> (N-1); + a = (a ^ s) - s; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz ; // exponent + long_double_bits fb; + fb.u.high.low = ((su_int)s & 0x00008000) | // sign + (e + 16383); // exponent + fb.u.low.all = a << clz; // mantissa + return fb.f; +} + +#endif diff --git a/lib/floattidf.c b/lib/floattidf.c new file mode 100644 index 000000000..0f72b33f3 --- /dev/null +++ b/lib/floattidf.c @@ -0,0 +1,83 @@ +//===-- floattidf.c - Implement __floattidf -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +si_int __clzti2(ti_int a); + +double +__floattidf(ti_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } + else + { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.high = ((su_int)s & 0x80000000) | // sign + ((e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.low = (su_int)a; // mantissa-low + return fb.f; +} + +#endif diff --git a/lib/floattisf.c b/lib/floattisf.c new file mode 100644 index 000000000..25bda9759 --- /dev/null +++ b/lib/floattisf.c @@ -0,0 +1,82 @@ +//===-- floattisf.c - Implement __floattisf -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// ti_int is a 128 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +si_int __clzti2(ti_int a); + +float +__floattisf(ti_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > FLT_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((tu_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } + else + { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((su_int)s & 0x80000000) | // sign + ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} + +#endif diff --git a/lib/floattixf.c b/lib/floattixf.c new file mode 100644 index 000000000..520b6ed08 --- /dev/null +++ b/lib/floattixf.c @@ -0,0 +1,83 @@ +//===-- floattixf.c - Implement __floattixf -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floattixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits +// ti_int is a 128 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +si_int __clzti2(ti_int a); + +long double +__floattixf(ti_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(ti_int) * CHAR_BIT; + const ti_int s = a >> (N-1); + a = (a ^ s) - s; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > LDBL_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit LDBL_MANT_DIG-1 bits to the right of 1 + // Q = bit LDBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = ((tu_int)a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << LDBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to LDBL_MANT_DIG bits + } + else + { + a <<= (LDBL_MANT_DIG - sd); + // a is now rounded to LDBL_MANT_DIG bits + } + long_double_bits fb; + fb.u.high.low = ((su_int)s & 0x8000) | // sign + (e + 16383); // exponent + fb.u.low.all = (du_int)a; // mantissa + return fb.f; +} + +#endif diff --git a/lib/floatundidf.c b/lib/floatundidf.c new file mode 100644 index 000000000..f019d693b --- /dev/null +++ b/lib/floatundidf.c @@ -0,0 +1,101 @@ +//===-- floatundidf.c - Implement __floatundidf ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// du_int is a 64 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +#ifndef __SOFT_FP__ +// Support for systems that have hardware floating-point; we'll set the inexact flag +// as a side-effect of this computation. +#include <stdint.h> + +double +__floatundidf(du_int a) +{ + static const double twop52 = 0x1.0p52; + static const double twop84 = 0x1.0p84; + static const double twop84_plus_twop52 = 0x1.00000001p84; + + union { uint64_t x; double d; } high = { .d = twop84 }; + union { uint64_t x; double d; } low = { .d = twop52 }; + + high.x |= a >> 32; + low.x |= a & UINT64_C(0x00000000ffffffff); + + const double result = (high.d - twop84_plus_twop52) + low.d; + return result; +} + +#else +// Support for systems that don't have hardware floating-point; there are no flags to +// set, and we don't want to code-gen to an unknown soft-float implementation. + +double +__floatundidf(du_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((du_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } + else + { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.high = ((e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.low = (su_int)a; // mantissa-low + return fb.f; +} +#endif
\ No newline at end of file diff --git a/lib/floatundisf.c b/lib/floatundisf.c new file mode 100644 index 000000000..66099e025 --- /dev/null +++ b/lib/floatundisf.c @@ -0,0 +1,73 @@ +//===-- floatundisf.c - Implement __floatundisf ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// du_int is a 64 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +float +__floatundisf(du_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int sd = N - __builtin_clzll(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > FLT_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((du_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((du_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } + else + { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} diff --git a/lib/floatundixf.c b/lib/floatundixf.c new file mode 100644 index 000000000..6e8c4839f --- /dev/null +++ b/lib/floatundixf.c @@ -0,0 +1,40 @@ +//===-- floatundixf.c - Implement __floatundixf ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits +// du_int is a 64 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +long double +__floatundixf(du_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(du_int) * CHAR_BIT; + int clz = __builtin_clzll(a); + int e = (N - 1) - clz ; // exponent + long_double_bits fb; + fb.u.high.low = (e + 16383); // exponent + fb.u.low.all = a << clz; // mantissa + return fb.f; +} + +#endif diff --git a/lib/floatuntidf.c b/lib/floatuntidf.c new file mode 100644 index 000000000..1b87fcfc4 --- /dev/null +++ b/lib/floatuntidf.c @@ -0,0 +1,80 @@ +//===-- floatuntidf.c - Implement __floatuntidf ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a double, rounding toward even. + +// Assumption: double is a IEEE 64 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee eeee mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +si_int __clzti2(ti_int a); + +double +__floatuntidf(tu_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > DBL_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit DBL_MANT_DIG-1 bits to the right of 1 + // Q = bit DBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case DBL_MANT_DIG + 1: + a <<= 1; + break; + case DBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (DBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + DBL_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to DBL_MANT_DIG or DBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << DBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to DBL_MANT_DIG bits + } + else + { + a <<= (DBL_MANT_DIG - sd); + // a is now rounded to DBL_MANT_DIG bits + } + double_bits fb; + fb.u.high = ((e + 1023) << 20) | // exponent + ((su_int)(a >> 32) & 0x000FFFFF); // mantissa-high + fb.u.low = (su_int)a; // mantissa-low + return fb.f; +} + +#endif diff --git a/lib/floatuntisf.c b/lib/floatuntisf.c new file mode 100644 index 000000000..e75d99571 --- /dev/null +++ b/lib/floatuntisf.c @@ -0,0 +1,79 @@ +//===-- floatuntisf.c - Implement __floatuntisf ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntisf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a float, rounding toward even. + +// Assumption: float is a IEEE 32 bit floating point type +// tu_int is a 128 bit integral type + +// seee eeee emmm mmmm mmmm mmmm mmmm mmmm + +si_int __clzti2(ti_int a); + +float +__floatuntisf(tu_int a) +{ + if (a == 0) + return 0.0F; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > FLT_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit FLT_MANT_DIG-1 bits to the right of 1 + // Q = bit FLT_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case FLT_MANT_DIG + 1: + a <<= 1; + break; + case FLT_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (FLT_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + FLT_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to FLT_MANT_DIG or FLT_MANT_DIG+1 bits + if (a & ((tu_int)1 << FLT_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to FLT_MANT_DIG bits + } + else + { + a <<= (FLT_MANT_DIG - sd); + // a is now rounded to FLT_MANT_DIG bits + } + float_bits fb; + fb.u = ((e + 127) << 23) | // exponent + ((su_int)a & 0x007FFFFF); // mantissa + return fb.f; +} + +#endif diff --git a/lib/floatuntixf.c b/lib/floatuntixf.c new file mode 100644 index 000000000..a28db10f6 --- /dev/null +++ b/lib/floatuntixf.c @@ -0,0 +1,80 @@ +//===-- floatuntixf.c - Implement __floatuntixf ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatuntixf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <float.h> + +// Returns: convert a to a long double, rounding toward even. + +// Assumption: long double is a IEEE 80 bit floating point type padded to 128 bits +// tu_int is a 128 bit integral type + +// gggg gggg gggg gggg gggg gggg gggg gggg | gggg gggg gggg gggg seee eeee eeee eeee | +// 1mmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm | mmmm mmmm mmmm mmmm mmmm mmmm mmmm mmmm + +si_int __clzti2(ti_int a); + +long double +__floatuntixf(tu_int a) +{ + if (a == 0) + return 0.0; + const unsigned N = sizeof(tu_int) * CHAR_BIT; + int sd = N - __clzti2(a); // number of significant digits + int e = sd - 1; // exponent + if (sd > LDBL_MANT_DIG) + { + // start: 0000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQxxxxxxxxxxxxxxxxxx + // finish: 000000000000000000000000000000000000001xxxxxxxxxxxxxxxxxxxxxxPQR + // 12345678901234567890123456 + // 1 = msb 1 bit + // P = bit LDBL_MANT_DIG-1 bits to the right of 1 + // Q = bit LDBL_MANT_DIG bits to the right of 1 + // R = "or" of all bits to the right of Q + switch (sd) + { + case LDBL_MANT_DIG + 1: + a <<= 1; + break; + case LDBL_MANT_DIG + 2: + break; + default: + a = (a >> (sd - (LDBL_MANT_DIG+2))) | + ((a & ((tu_int)(-1) >> ((N + LDBL_MANT_DIG+2) - sd))) != 0); + }; + // finish: + a |= (a & 4) != 0; // Or P into R + ++a; // round - this step may add a significant bit + a >>= 2; // dump Q and R + // a is now rounded to LDBL_MANT_DIG or LDBL_MANT_DIG+1 bits + if (a & ((tu_int)1 << LDBL_MANT_DIG)) + { + a >>= 1; + ++e; + } + // a is now rounded to LDBL_MANT_DIG bits + } + else + { + a <<= (LDBL_MANT_DIG - sd); + // a is now rounded to LDBL_MANT_DIG bits + } + long_double_bits fb; + fb.u.high.low = (e + 16383); // exponent + fb.u.low.all = (du_int)a; // mantissa + return fb.f; +} + +#endif diff --git a/lib/gcc_personality_v0.c b/lib/gcc_personality_v0.c new file mode 100644 index 000000000..55d1166b8 --- /dev/null +++ b/lib/gcc_personality_v0.c @@ -0,0 +1,237 @@ +//===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +// +// _Unwind_* stuff based on C++ ABI public documentation +// http://refspecs.freestandards.org/abi-eh-1.21.html +// +typedef enum { + _URC_NO_REASON = 0, + _URC_FOREIGN_EXCEPTION_CAUGHT = 1, + _URC_FATAL_PHASE2_ERROR = 2, + _URC_FATAL_PHASE1_ERROR = 3, + _URC_NORMAL_STOP = 4, + _URC_END_OF_STACK = 5, + _URC_HANDLER_FOUND = 6, + _URC_INSTALL_CONTEXT = 7, + _URC_CONTINUE_UNWIND = 8 +} _Unwind_Reason_Code; + +typedef enum { + _UA_SEARCH_PHASE = 1, + _UA_CLEANUP_PHASE = 2, + _UA_HANDLER_FRAME = 4, + _UA_FORCE_UNWIND = 8, + _UA_END_OF_STACK = 16 +} _Unwind_Action; + +typedef struct _Unwind_Context* _Unwind_Context_t; + +struct _Unwind_Exception { + uint64_t exception_class; + _Unwind_Reason_Code (*exception_cleanup)(_Unwind_Reason_Code reason, + struct _Unwind_Exception* exc); + uintptr_t private_1; + uintptr_t private_2; +}; + +extern const uint8_t* _Unwind_GetLanguageSpecificData(_Unwind_Context_t c); +extern void _Unwind_SetGR(_Unwind_Context_t c, int i, uintptr_t n); +extern void _Unwind_SetIP(_Unwind_Context_t, uintptr_t new_value); +extern uintptr_t _Unwind_GetIP(_Unwind_Context_t context); +extern uintptr_t _Unwind_GetRegionStart(_Unwind_Context_t context); + + +// +// Pointer encodings documented at: +// http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html +// +#define DW_EH_PE_omit 0xff // no data follows + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 // gcc extension + + + +// read a uleb128 encoded value and advance pointer +static uintptr_t readULEB128(const uint8_t** data) +{ + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t* p = *data; + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + *data = p; + return result; +} + +// read a pointer encoded value and advance pointer +static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) +{ + const uint8_t* p = *data; + uintptr_t result = 0; + + if ( encoding == DW_EH_PE_omit ) + return 0; + + // first get value + switch (encoding & 0x0F) { + case DW_EH_PE_absptr: + result = *((uintptr_t*)p); + p += sizeof(uintptr_t); + break; + case DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + case DW_EH_PE_udata2: + result = *((uint16_t*)p); + p += sizeof(uint16_t); + break; + case DW_EH_PE_udata4: + result = *((uint32_t*)p); + p += sizeof(uint32_t); + break; + case DW_EH_PE_udata8: + result = *((uint64_t*)p); + p += sizeof(uint64_t); + break; + case DW_EH_PE_sdata2: + result = *((int16_t*)p); + p += sizeof(int16_t); + break; + case DW_EH_PE_sdata4: + result = *((int32_t*)p); + p += sizeof(int32_t); + break; + case DW_EH_PE_sdata8: + result = *((int64_t*)p); + p += sizeof(int64_t); + break; + case DW_EH_PE_sleb128: + default: + // not supported + abort(); + break; + } + + // then add relative offset + switch ( encoding & 0x70 ) { + case DW_EH_PE_absptr: + // do nothing + break; + case DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case DW_EH_PE_textrel: + case DW_EH_PE_datarel: + case DW_EH_PE_funcrel: + case DW_EH_PE_aligned: + default: + // not supported + abort(); + break; + } + + // then apply indirection + if (encoding & DW_EH_PE_indirect) { + result = *((uintptr_t*)result); + } + + *data = p; + return result; +} + + +// +// The C compiler makes references to __gcc_personality_v0 in +// the dwarf unwind information for translation units that use +// __attribute__((cleanup(xx))) on local variables. +// This personality routine is called by the system unwinder +// on each frame as the stack is unwound during a C++ exception +// throw through a C function compiled with -fexceptions. +// +_Unwind_Reason_Code __gcc_personality_v0(int version, _Unwind_Action actions, + uint64_t exceptionClass, struct _Unwind_Exception* exceptionObject, + _Unwind_Context_t context) +{ + // Since C does not have catch clauses, there is nothing to do during + // phase 1 (the search phase). + if ( actions & _UA_SEARCH_PHASE ) + return _URC_CONTINUE_UNWIND; + + // There is nothing to do if there is no LSDA for this frame. + const uint8_t* lsda = _Unwind_GetLanguageSpecificData(context); + if ( lsda == NULL ) + return _URC_CONTINUE_UNWIND; + + uintptr_t pc = _Unwind_GetIP(context)-1; + uintptr_t funcStart = _Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + + // Parse LSDA header. + uint8_t lpStartEncoding = *lsda++; + if (lpStartEncoding != DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + uint8_t ttypeEncoding = *lsda++; + if (ttypeEncoding != DW_EH_PE_omit) { + readULEB128(&lsda); + } + // Walk call-site table looking for range that includes current PC. + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t* callSiteTableStart = lsda; + const uint8_t* callSiteTableEnd = callSiteTableStart + callSiteTableLength; + const uint8_t* p=callSiteTableStart; + while (p < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&p, callSiteEncoding); + uintptr_t length = readEncodedPointer(&p, callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); + readULEB128(&p); // action value not used for C code + if ( landingPad == 0 ) + continue; // no landing pad for this entry + if ( (start <= pcOffset) && (pcOffset < (start+length)) ) { + // Found landing pad for the PC. + // Set Instruction Pointer to so we re-enter function + // at landing pad. The landing pad is created by the compiler + // to take two parameters in registers. + _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); + _Unwind_SetIP(context, funcStart+landingPad); + return _URC_INSTALL_CONTEXT; + } + } + + // No landing pad found, continue unwinding. + return _URC_CONTINUE_UNWIND; +} + diff --git a/lib/i386/Makefile.mk b/lib/i386/Makefile.mk new file mode 100644 index 000000000..140ee9977 --- /dev/null +++ b/lib/i386/Makefile.mk @@ -0,0 +1,22 @@ +#===- lib/i386/Makefile.mk ---------------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +Dir := lib/i386 +SubDirs := +OnlyArchs := i386 + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.s),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.s=%.o) +Target := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard $(Dir)/*.h) + +include make/subdir.mk diff --git a/lib/i386/ashldi3.s b/lib/i386/ashldi3.s new file mode 100644 index 000000000..3de2dfc83 --- /dev/null +++ b/lib/i386/ashldi3.s @@ -0,0 +1,65 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// di_int __ashldi3(di_int input, int count); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.align 4 +.globl ___ashldi3 +___ashldi3: + movd 12(%esp), %xmm2 // Load count +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + psllq %xmm2, %xmm0 // shift input by count + movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.align 4 +.globl ___ashldi3 +___ashldi3: + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 2f // goto 2 + testl $0x1f, %ecx // If count == 0 + jz 1f // goto 1 + + pushl %ebx + movl %eax, %ebx // copy low + shll %cl, %eax // left shift low by count + shll %cl, %edx // left shift high by count + neg %cl + shrl %cl, %ebx // right shift low by 32 - count + orl %ebx, %edx // or the result into the high word + popl %ebx +1: ret + +2: movl %eax, %edx // Move low to high + xorl %eax, %eax // clear low + shll %cl, %edx // shift high by count - 32 + ret + +#endif // __SSE2__ +#endif // __i386__ diff --git a/lib/i386/ashrdi3.s b/lib/i386/ashrdi3.s new file mode 100644 index 000000000..db01f2132 --- /dev/null +++ b/lib/i386/ashrdi3.s @@ -0,0 +1,75 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// di_int __ashrdi3(di_int input, int count); + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.align 4 +.globl ___ashrdi3 +___ashrdi3: + movd 12(%esp), %xmm2 // Load count + movl 8(%esp), %eax +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + + psrlq %xmm2, %xmm0 // unsigned shift input by count + + testl %eax, %eax // check the sign-bit of the input + jns 1f // early out for positive inputs + + // If the input is negative, we need to construct the shifted sign bit + // to or into the result, as xmm does not have a signed right shift. + pcmpeqb %xmm1, %xmm1 // -1ULL + psrlq $58, %xmm1 // 0x3f + pandn %xmm1, %xmm2 // 63 - count + pcmpeqb %xmm1, %xmm1 // -1ULL + psubq %xmm1, %xmm2 // 64 - count + psllq %xmm2, %xmm1 // -1 << (64 - count) = leading sign bits + por %xmm1, %xmm0 + + // Move the result back to the general purpose registers and return +1: movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.align 4 +.globl ___ashrdi3 +___ashrdi3: + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 2f // goto 2 + testl $0x1f, %ecx // If count == 0 + jz 1f // goto 1 + + pushl %ebx + movl %edx, %ebx // copy high + shrl %cl, %eax // right shift low by count + sarl %cl, %edx // right shift high by count + neg %cl + shll %cl, %ebx // left shift high by 32 - count + orl %ebx, %eax // or the result into the low word + popl %ebx +1: ret + +2: movl %edx, %eax // Move high to low + sarl $31, %edx // clear high + sarl %cl, %eax // shift low by count - 32 + ret + +#endif // __SSE2__ +#endif // __i386__ diff --git a/lib/i386/divdi3.s b/lib/i386/divdi3.s new file mode 100644 index 000000000..3b50266d7 --- /dev/null +++ b/lib/i386/divdi3.s @@ -0,0 +1,160 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// di_int __divdi3(di_int a, di_int b); + +// result = a / b. +// both inputs and the output are 64-bit signed integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.align 4 +.globl ___divdi3 +___divdi3: + +/* This is currently implemented by wrapping the unsigned divide up in an absolute + value, then restoring the correct sign at the end of the computation. This could + certainly be improved upon. */ + + pushl %esi + movl 20(%esp), %edx // high word of b + movl 16(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (b < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(b) + movl %edx, 20(%esp) + movl %eax, 16(%esp) // store abs(b) back to stack + movl %ecx, %esi // set aside sign of b + + movl 12(%esp), %edx // high word of b + movl 8(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (a < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(a) + movl %edx, 12(%esp) + movl %eax, 8(%esp) // store abs(a) back to stack + xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) + + pushl %ebx + movl 24(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 16(%esp), %edx // Load the high and low words of a, and jump + movl 12(%esp), %eax // to [1] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 1f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + + +1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + + +9: /* High word of b is zero on this branch */ + + movl 16(%esp), %eax // Find qhi and rhi such that + movl 20(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 12(%esp), %eax // Find qlo such that + divl %ecx // + movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %ebx // Restore callee-save registers + popl %esi + retl // Return + +#endif // __i386__ diff --git a/lib/i386/floatdidf.s b/lib/i386/floatdidf.s new file mode 100644 index 000000000..34736ac58 --- /dev/null +++ b/lib/i386/floatdidf.s @@ -0,0 +1,32 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// double __floatundidf(du_int a); + +#ifdef __i386__ + +.const +.align 4 +twop52: .quad 0x4330000000000000 +twop32: .quad 0x41f0000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.align 4 +.globl ___floatdidf +___floatdidf: + cvtsi2sd 8(%esp), %xmm1 + movss 4(%esp), %xmm0 // low 32 bits of a + calll 0f +0: popl %eax + mulsd REL_ADDR(twop32), %xmm1 // a_hi as a double (without rounding) + movsd REL_ADDR(twop52), %xmm2 // 0x1.0p52 + subsd %xmm2, %xmm1 // a_hi - 0x1p52 (no rounding occurs) + orpd %xmm2, %xmm0 // 0x1p52 + a_lo (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + movsd %xmm0, 4(%esp) + fldl 4(%esp) + ret + +#endif // __i386__ diff --git a/lib/i386/floatdisf.s b/lib/i386/floatdisf.s new file mode 100644 index 000000000..20b80d1d8 --- /dev/null +++ b/lib/i386/floatdisf.s @@ -0,0 +1,30 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// float __floatdisf(di_int a); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ + +.text +.align 4 +.globl ___floatdisf +___floatdisf: +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 + movq %xmm0, 4(%esp) +#endif + fildll 4(%esp) + fstps 4(%esp) + flds 4(%esp) + ret + +#endif // __i386__ diff --git a/lib/i386/floatdixf.s b/lib/i386/floatdixf.s new file mode 100644 index 000000000..71e2d2abc --- /dev/null +++ b/lib/i386/floatdixf.s @@ -0,0 +1,28 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// float __floatdixf(di_int a); + +#ifdef __i386__ + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +.text +.align 4 +.globl ___floatdixf +___floatdixf: +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 + movq %xmm0, 4(%esp) +#endif + fildll 4(%esp) + ret + +#endif // __i386__
\ No newline at end of file diff --git a/lib/i386/floatundidf.s b/lib/i386/floatundidf.s new file mode 100644 index 000000000..d9be85343 --- /dev/null +++ b/lib/i386/floatundidf.s @@ -0,0 +1,43 @@ +//===-- floatundidf.s - Implement __floatundidf for i386 ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// double __floatundidf(du_int a); + +#ifdef __i386__ + +.const +.align 4 +twop52: .quad 0x4330000000000000 +twop84_plus_twop52: + .quad 0x4530000000100000 +twop84: .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.align 4 +.globl ___floatundidf +___floatundidf: + movss 8(%esp), %xmm1 // high 32 bits of a + movss 4(%esp), %xmm0 // low 32 bits of a + calll 0f +0: popl %eax + orpd REL_ADDR(twop84), %xmm1 // 0x1p84 + a_hi (no rounding occurs) + subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) + orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + movsd %xmm0, 4(%esp) + fldl 4(%esp) + ret + +#endif // __i386__ diff --git a/lib/i386/floatundisf.s b/lib/i386/floatundisf.s new file mode 100644 index 000000000..a1b29667c --- /dev/null +++ b/lib/i386/floatundisf.s @@ -0,0 +1,95 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// float __floatundisf(du_int a); + +// Note that there is a hardware instruction, fildll, that does most of what +// this function needs to do. However, because of our ia32 ABI, it will take +// a write-small read-large stall, so the software implementation here is +// actually several cycles faster. + +// This is a branch-free implementation. A branchy implementation might be +// faster for the common case if you know something a priori about the input +// distribution. + +/* branch-free x87 implementation - one cycle slower than without x87. + +#ifdef __i386__ + +.const +.align 3 + + .quad 0x43f0000000000000 +twop64: .quad 0x0000000000000000 + +#define TWOp64 twop64-0b(%ecx,%eax,8) + +.text +.align 4 +.globl ___floatundisf +___floatundisf: + movl 8(%esp), %eax + movd 8(%esp), %xmm1 + movd 4(%esp), %xmm0 + punpckldq %xmm1, %xmm0 + calll 0f +0: popl %ecx + sarl $31, %eax + movq %xmm0, 4(%esp) + fildll 4(%esp) + faddl TWOp64 + fstps 4(%esp) + flds 4(%esp) + ret + +#endif // __i386__ + +*/ + +/* branch-free, x87-free implementation - faster at the expense of code size */ + +#ifdef __i386__ + +.const +.align 3 +twop52: .quad 0x4330000000000000 + .quad 0x0000000000000fff +sticky: .quad 0x0000000000000000 + .long 0x00000012 +twelve: .long 0x00000000 + +#define TWOp52 twop52-0b(%ecx) +#define STICKY sticky-0b(%ecx,%eax,8) + +.text +.align 4 +.globl ___floatundisf +___floatundisf: + movl 8(%esp), %eax + movd 8(%esp), %xmm1 + movd 4(%esp), %xmm0 + punpckldq %xmm1, %xmm0 + + calll 0f +0: popl %ecx + shrl %eax // high 31 bits of input as sint32 + addl $0x7ff80000, %eax + sarl $31, %eax // (big input) ? -1 : 0 + movsd STICKY, %xmm1 // (big input) ? 0xfff : 0 + movl $12, %edx + andl %eax, %edx // (big input) ? 12 : 0 + movd %edx, %xmm3 + andpd %xmm0, %xmm1 // (big input) ? input & 0xfff : 0 + movsd TWOp52, %xmm2 // 0x1.0p52 + psrlq %xmm3, %xmm0 // (big input) ? input >> 12 : input + orpd %xmm2, %xmm1 // 0x1.0p52 + ((big input) ? input & 0xfff : input) + orpd %xmm1, %xmm0 // 0x1.0p52 + ((big input) ? (input >> 12 | input & 0xfff) : input) + subsd %xmm2, %xmm0 // (double)((big input) ? (input >> 12 | input & 0xfff) : input) + cvtsd2ss %xmm0, %xmm0 // (float)((big input) ? (input >> 12 | input & 0xfff) : input) + pslld $23, %xmm3 + paddd %xmm3, %xmm0 // (float)input + movd %xmm0, 4(%esp) + flds 4(%esp) + ret + +#endif // __i386__ diff --git a/lib/i386/floatundixf.s b/lib/i386/floatundixf.s new file mode 100644 index 000000000..c24689aac --- /dev/null +++ b/lib/i386/floatundixf.s @@ -0,0 +1,34 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __floatundixf(du_int a);16 + +#ifdef __i386__ + +.const +.align 4 +twop52: .quad 0x4330000000000000 +twop84_plus_twop52_neg: + .quad 0xc530000000100000 +twop84: .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)-0b(%eax) + +.text +.align 4 +.globl ___floatundixf +___floatundixf: + calll 0f +0: popl %eax + movss 8(%esp), %xmm0 // hi 32 bits of input + movss 4(%esp), %xmm1 // lo 32 bits of input + orpd REL_ADDR(twop84), %xmm0 // 2^84 + hi (as a double) + orpd REL_ADDR(twop52), %xmm1 // 2^52 + lo (as a double) + addsd REL_ADDR(twop84_plus_twop52_neg), %xmm0 // hi - 2^52 (no rounding occurs) + movsd %xmm1, 4(%esp) + fldl 4(%esp) + movsd %xmm0, 4(%esp) + faddl 4(%esp) + ret + +#endif // __i386__
\ No newline at end of file diff --git a/lib/i386/lshrdi3.s b/lib/i386/lshrdi3.s new file mode 100644 index 000000000..5992c2131 --- /dev/null +++ b/lib/i386/lshrdi3.s @@ -0,0 +1,65 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// di_int __lshrdi3(di_int input, int count); + +// This routine has some extra memory traffic, loading the 64-bit input via two +// 32-bit loads, then immediately storing it back to the stack via a single 64-bit +// store. This is to avoid a write-small, read-large stall. +// However, if callers of this routine can be safely assumed to store the argument +// via a 64-bt store, this is unnecessary memory traffic, and should be avoided. +// It can be turned off by defining the TRUST_CALLERS_USE_64_BIT_STORES macro. + +#ifdef __i386__ +#ifdef __SSE2__ + +.text +.align 4 +.globl ___lshrdi3 +___lshrdi3: + movd 12(%esp), %xmm2 // Load count +#ifndef TRUST_CALLERS_USE_64_BIT_STORES + movd 4(%esp), %xmm0 + movd 8(%esp), %xmm1 + punpckldq %xmm1, %xmm0 // Load input +#else + movq 4(%esp), %xmm0 // Load input +#endif + psrlq %xmm2, %xmm0 // shift input by count + movd %xmm0, %eax + psrlq $32, %xmm0 + movd %xmm0, %edx + ret + +#else // Use GPRs instead of SSE2 instructions, if they aren't available. + +.text +.align 4 +.globl ___lshrdi3 +___lshrdi3: + movl 12(%esp), %ecx // Load count + movl 8(%esp), %edx // Load high + movl 4(%esp), %eax // Load low + + testl $0x20, %ecx // If count >= 32 + jnz 2f // goto 2 + testl $0x1f, %ecx // If count == 0 + jz 1f // goto 1 + + pushl %ebx + movl %edx, %ebx // copy high + shrl %cl, %eax // right shift low by count + shrl %cl, %edx // right shift high by count + neg %cl + shll %cl, %ebx // left shift high by 32 - count + orl %ebx, %eax // or the result into the low word + popl %ebx +1: ret + +2: movl %edx, %eax // Move high to low + xorl %edx, %edx // clear high + shrl %cl, %eax // shift low by count - 32 + ret + +#endif // __SSE2__ +#endif // __i386__ diff --git a/lib/i386/moddi3.s b/lib/i386/moddi3.s new file mode 100644 index 000000000..af1f38a13 --- /dev/null +++ b/lib/i386/moddi3.s @@ -0,0 +1,165 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// di_int __moddi3(di_int a, di_int b); + +// result = remainder of a / b. +// both inputs and the output are 64-bit signed integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// + +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.align 4 +.globl ___moddi3 +___moddi3: + +/* This is currently implemented by wrapping the unsigned modulus up in an absolute + value. This could certainly be improved upon. */ + + pushl %esi + movl 20(%esp), %edx // high word of b + movl 16(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (b < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(b) + movl %edx, 20(%esp) + movl %eax, 16(%esp) // store abs(b) back to stack + + movl 12(%esp), %edx // high word of b + movl 8(%esp), %eax // low word of b + movl %edx, %ecx + sarl $31, %ecx // (a < 0) ? -1 : 0 + xorl %ecx, %eax + xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a + subl %ecx, %eax + sbbl %ecx, %edx // EDX:EAX = abs(a) + movl %edx, 12(%esp) + movl %eax, 8(%esp) // store abs(a) back to stack + movl %ecx, %esi // set aside sign of a + + pushl %ebx + movl 24(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 16(%esp), %edx // Load the high and low words of a, and jump + movl 12(%esp), %eax // to [2] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 2f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 1f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +1: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 24(%esp) // q*blo + movl 16(%esp), %ebx + movl 20(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 28(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 3f // if positive, this is the result. + addl 24(%esp), %ebx // otherwise + adcl 28(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +3: movl %ebx, %eax + movl %ecx, %edx + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %edi // Restore callee-save registers + popl %ebx + popl %esi + retl // Return + +9: /* High word of b is zero on this branch */ + + movl 16(%esp), %eax // Find qhi and rhi such that + movl 20(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 12(%esp), %eax // Find rlo such that + divl %ecx // + movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + xorl %edx, %edx // and return 0:rlo + + addl %esi, %eax // Restore correct sign to result + adcl %esi, %edx + xorl %esi, %eax + xorl %esi, %edx + popl %esi + retl // Return + + +#endif // __i386__ diff --git a/lib/i386/muldi3.s b/lib/i386/muldi3.s new file mode 100644 index 000000000..9f29c3ced --- /dev/null +++ b/lib/i386/muldi3.s @@ -0,0 +1,28 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// di_int __muldi3(di_int a, di_int b); + +#ifdef __i386__ + +.text +.align 4 +.globl ___muldi3 +___muldi3: + pushl %ebx + movl 16(%esp), %eax // b.lo + movl 12(%esp), %ecx // a.hi + imull %eax, %ecx // b.lo * a.hi + + movl 8(%esp), %edx // a.lo + movl 20(%esp), %ebx // b.hi + imull %edx, %ebx // a.lo * b.hi + + mull %edx // EDX:EAX = a.lo * b.lo + addl %ecx, %ebx // EBX = (a.lo*b.hi + a.hi*b.lo) + addl %ebx, %edx + + popl %ebx + retl + +#endif // __i386__ diff --git a/lib/i386/udivdi3.s b/lib/i386/udivdi3.s new file mode 100644 index 000000000..977cee36c --- /dev/null +++ b/lib/i386/udivdi3.s @@ -0,0 +1,113 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// du_int __udivdi3(du_int a, du_int b); + +// result = a / b. +// both inputs and the output are 64-bit unsigned integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.align 4 +.globl ___udivdi3 +___udivdi3: + + pushl %ebx + movl 20(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 12(%esp), %edx // Load the high and low words of a, and jump + movl 8(%esp), %eax // to [1] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 1f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + popl %edi + popl %ebx + retl + + +1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + sbbl $0, %edi // decrement q if remainder is negative + xorl %edx, %edx + movl %edi, %eax + popl %edi + popl %ebx + retl + + +9: /* High word of b is zero on this branch */ + + movl 12(%esp), %eax // Find qhi and rhi such that + movl 16(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 8(%esp), %eax // Find qlo such that + divl %ecx // + movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + retl // and return qhi:qlo + +#endif // __i386__ diff --git a/lib/i386/umoddi3.s b/lib/i386/umoddi3.s new file mode 100644 index 000000000..2ddd71006 --- /dev/null +++ b/lib/i386/umoddi3.s @@ -0,0 +1,124 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// du_int __umoddi3(du_int a, du_int b); + +// result = remainder of a / b. +// both inputs and the output are 64-bit unsigned integers. +// This will do whatever the underlying hardware is set to do on division by zero. +// No other exceptions are generated, as the divide cannot overflow. +// +// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware +// on x86_64. The performance goal is ~40 cycles per divide, which is faster than +// currently possible via simulation of integer divides on the x87 unit. +// + +// Stephen Canon, December 2008 + +#ifdef __i386__ + +.text +.align 4 +.globl ___umoddi3 +___umoddi3: + + pushl %ebx + movl 20(%esp), %ebx // Find the index i of the leading bit in b. + bsrl %ebx, %ecx // If the high word of b is zero, jump to + jz 9f // the code to handle that special case [9]. + + /* High word of b is known to be non-zero on this branch */ + + movl 16(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b + + shrl %cl, %eax // Practically, this means that bhi is given by: + shrl %eax // + notl %ecx // bhi = (high word of b) << (31 - i) | + shll %cl, %ebx // (low word of b) >> (1 + i) + orl %eax, %ebx // + movl 12(%esp), %edx // Load the high and low words of a, and jump + movl 8(%esp), %eax // to [2] if the high word is larger than bhi + cmpl %ebx, %edx // to avoid overflowing the upcoming divide. + jae 2f + + /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r + + pushl %edi + notl %ecx + shrl %eax + shrl %cl, %eax // q = qs >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 1f // if positive, this is the result. + addl 20(%esp), %ebx // otherwise + adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +1: movl %ebx, %eax + movl %ecx, %edx + + popl %edi + popl %ebx + retl + + +2: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ + + subl %ebx, %edx // subtract bhi from ahi so that divide will not + divl %ebx // overflow, and find q and r such that + // + // ahi:alo = (1:q)*bhi + r + // + // Note that q is a number in (31-i).(1+i) + // fix point. + + pushl %edi + notl %ecx + shrl %eax + orl $0x80000000, %eax + shrl %cl, %eax // q = (1:qs) >> (1 + i) + movl %eax, %edi + mull 20(%esp) // q*blo + movl 12(%esp), %ebx + movl 16(%esp), %ecx // ECX:EBX = a + subl %eax, %ebx + sbbl %edx, %ecx // ECX:EBX = a - q*blo + movl 24(%esp), %eax + imull %edi, %eax // q*bhi + subl %eax, %ecx // ECX:EBX = a - q*b + + jnc 3f // if positive, this is the result. + addl 20(%esp), %ebx // otherwise + adcl 24(%esp), %ecx // ECX:EBX = a - (q-1)*b = result +3: movl %ebx, %eax + movl %ecx, %edx + + popl %edi + popl %ebx + retl + + + +9: /* High word of b is zero on this branch */ + + movl 12(%esp), %eax // Find qhi and rhi such that + movl 16(%esp), %ecx // + xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b + divl %ecx // + movl %eax, %ebx // + movl 8(%esp), %eax // Find rlo such that + divl %ecx // + movl %edx, %eax // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b + popl %ebx // + xorl %edx, %edx // and return 0:rlo + retl // + +#endif // __i386__ diff --git a/lib/int_lib.h b/lib/int_lib.h new file mode 100644 index 000000000..7d09c856f --- /dev/null +++ b/lib/int_lib.h @@ -0,0 +1,143 @@ +//===-- int_lib.h - configuration header for libgcc replacement -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a configuration header for libgcc replacement. +// This file is not part of the interface of this library. +// +//===----------------------------------------------------------------------===// + +#ifndef INT_LIB_H +#define INT_LIB_H + +// Assumption: signed integral is 2's complement +// Assumption: right shift of signed negative is arithmetic shift + +#include <limits.h> + +#ifdef __LITTLE_ENDIAN__ +#if __LITTLE_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 1 +#define _YUGA_BIG_ENDIAN 0 +#endif +#endif + +#ifdef __BIG_ENDIAN__ +#if __BIG_ENDIAN__ +#define _YUGA_LITTLE_ENDIAN 0 +#define _YUGA_BIG_ENDIAN 1 +#endif +#endif + +#if !defined(_YUGA_LITTLE_ENDIAN) || !defined(_YUGA_BIG_ENDIAN) +#error unable to determine endian +#endif + +typedef int si_int; +typedef unsigned su_int; + +typedef long long di_int; +typedef unsigned long long du_int; + +typedef union +{ + di_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + si_int high; +#else + si_int high; + su_int low; +#endif + }; +} dwords; + +typedef union +{ + du_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + su_int low; + su_int high; +#else + su_int high; + su_int low; +#endif + }; +} udwords; + +#if __x86_64 + +typedef int ti_int __attribute__ ((mode (TI))); +typedef unsigned tu_int __attribute__ ((mode (TI))); + +typedef union +{ + ti_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + di_int high; +#else + di_int high; + du_int low; +#endif + }; +} twords; + +typedef union +{ + tu_int all; + struct + { +#if _YUGA_LITTLE_ENDIAN + du_int low; + du_int high; +#else + du_int high; + du_int low; +#endif + }; +} utwords; + +#endif + +typedef union +{ + su_int u; + float f; +} float_bits; + +typedef union +{ + udwords u; + double f; +} double_bits; + +typedef struct +{ +#if _YUGA_LITTLE_ENDIAN + udwords low; + udwords high; +#else + udwords high; + udwords low; +#endif +} uqwords; + +typedef union +{ + uqwords u; + long double f; +} long_double_bits; + +#endif diff --git a/lib/lshrdi3.c b/lib/lshrdi3.c new file mode 100644 index 000000000..62eb1a4a0 --- /dev/null +++ b/lib/lshrdi3.c @@ -0,0 +1,40 @@ +//===-- lshrdi3.c - Implement __lshrdi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __lshrdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: logical a >> b + +// Precondition: 0 <= b < bits_in_dword + +di_int +__lshrdi3(di_int a, si_int b) +{ + const int bits_in_word = (int)(sizeof(si_int) * CHAR_BIT); + udwords input; + udwords result; + input.all = a; + if (b & bits_in_word) // bits_in_word <= b < bits_in_dword + { + result.high = 0; + result.low = input.high >> (b - bits_in_word); + } + else // 0 <= b < bits_in_word + { + if (b == 0) + return a; + result.high = input.high >> b; + result.low = (input.high << (bits_in_word - b)) | (input.low >> b); + } + return result.all; +} diff --git a/lib/lshrti3.c b/lib/lshrti3.c new file mode 100644 index 000000000..99258cee1 --- /dev/null +++ b/lib/lshrti3.c @@ -0,0 +1,44 @@ +//===-- lshrti3.c - Implement __lshrti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __lshrti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: logical a >> b + +// Precondition: 0 <= b < bits_in_tword + +ti_int +__lshrti3(ti_int a, si_int b) +{ + const int bits_in_dword = (int)(sizeof(di_int) * CHAR_BIT); + utwords input; + utwords result; + input.all = a; + if (b & bits_in_dword) // bits_in_dword <= b < bits_in_tword + { + result.high = 0; + result.low = input.high >> (b - bits_in_dword); + } + else // 0 <= b < bits_in_dword + { + if (b == 0) + return a; + result.high = input.high >> b; + result.low = (input.high << (bits_in_dword - b)) | (input.low >> b); + } + return result.all; +} + +#endif diff --git a/lib/moddi3.c b/lib/moddi3.c new file mode 100644 index 000000000..5e903914a --- /dev/null +++ b/lib/moddi3.c @@ -0,0 +1,31 @@ +//===-- moddi3.c - Implement __moddi3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __moddi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); + +// Returns: a % b + +di_int +__moddi3(di_int a, di_int b) +{ + const int bits_in_dword_m1 = (int)(sizeof(di_int) * CHAR_BIT) - 1; + di_int s = b >> bits_in_dword_m1; // s = b < 0 ? -1 : 0 + b = (b ^ s) - s; // negate if s == -1 + s = a >> bits_in_dword_m1; // s = a < 0 ? -1 : 0 + a = (a ^ s) - s; // negate if s == -1 + di_int r; + __udivmoddi4(a, b, (du_int*)&r); + return (r ^ s) - s; // negate if s == -1 +} diff --git a/lib/modsi3.c b/lib/modsi3.c new file mode 100644 index 000000000..d6dfe616d --- /dev/null +++ b/lib/modsi3.c @@ -0,0 +1,22 @@ +//===-- modsi3.c - Implement __modsi3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __modsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a % b + +si_int +__modsi3(si_int a, si_int b) +{ + return a - (a / b) * b; +} diff --git a/lib/modti3.c b/lib/modti3.c new file mode 100644 index 000000000..91ff8aa5d --- /dev/null +++ b/lib/modti3.c @@ -0,0 +1,35 @@ +//===-- modti3.c - Implement __modti3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __modti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); + +// Returns: a % b + +ti_int +__modti3(ti_int a, ti_int b) +{ + const int bits_in_tword_m1 = (int)(sizeof(ti_int) * CHAR_BIT) - 1; + ti_int s = b >> bits_in_tword_m1; // s = b < 0 ? -1 : 0 + b = (b ^ s) - s; // negate if s == -1 + s = a >> bits_in_tword_m1; // s = a < 0 ? -1 : 0 + a = (a ^ s) - s; // negate if s == -1 + ti_int r; + __udivmodti4(a, b, (tu_int*)&r); + return (r ^ s) - s; // negate if s == -1 +} + +#endif diff --git a/lib/muldc3.c b/lib/muldc3.c new file mode 100644 index 000000000..b945e8e38 --- /dev/null +++ b/lib/muldc3.c @@ -0,0 +1,73 @@ +//===-- muldc3.c - Implement __muldc3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __muldc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <math.h> +#include <complex.h> + +// Returns: the product of a + ib and c + id + +double _Complex +__muldc3(double __a, double __b, double __c, double __d) +{ + double __ac = __a * __c; + double __bd = __b * __d; + double __ad = __a * __d; + double __bc = __b * __c; + double _Complex z; + __real__ z = __ac - __bd; + __imag__ z = __ad + __bc; + if (isnan(__real__ z) && isnan(__imag__ z)) + { + int __recalc = 0; + if (isinf(__a) || isinf(__b)) + { + __a = copysign(isinf(__a) ? 1 : 0, __a); + __b = copysign(isinf(__b) ? 1 : 0, __b); + if (isnan(__c)) + __c = copysign(0, __c); + if (isnan(__d)) + __d = copysign(0, __d); + __recalc = 1; + } + if (isinf(__c) || isinf(__d)) + { + __c = copysign(isinf(__c) ? 1 : 0, __c); + __d = copysign(isinf(__d) ? 1 : 0, __d); + if (isnan(__a)) + __a = copysign(0, __a); + if (isnan(__b)) + __b = copysign(0, __b); + __recalc = 1; + } + if (!__recalc && (isinf(__ac) || isinf(__bd) || + isinf(__ad) || isinf(__bc))) + { + if (isnan(__a)) + __a = copysign(0, __a); + if (isnan(__b)) + __b = copysign(0, __b); + if (isnan(__c)) + __c = copysign(0, __c); + if (isnan(__d)) + __d = copysign(0, __d); + __recalc = 1; + } + if (__recalc) + { + __real__ z = INFINITY * (__a * __c - __b * __d); + __imag__ z = INFINITY * (__a * __d + __b * __c); + } + } + return z; +} diff --git a/lib/muldi3.c b/lib/muldi3.c new file mode 100644 index 000000000..98299e6f0 --- /dev/null +++ b/lib/muldi3.c @@ -0,0 +1,53 @@ +//===-- muldi3.c - Implement __muldi3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __muldi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a * b + +static +di_int +__muldsi3(su_int a, su_int b) +{ + dwords r; + const int bits_in_word_2 = (int)(sizeof(si_int) * CHAR_BIT) / 2; + const su_int lower_mask = (su_int)~0 >> bits_in_word_2; + r.low = (a & lower_mask) * (b & lower_mask); + su_int t = r.low >> bits_in_word_2; + r.low &= lower_mask; + t += (a >> bits_in_word_2) * (b & lower_mask); + r.low += (t & lower_mask) << bits_in_word_2; + r.high = t >> bits_in_word_2; + t = r.low >> bits_in_word_2; + r.low &= lower_mask; + t += (b >> bits_in_word_2) * (a & lower_mask); + r.low += (t & lower_mask) << bits_in_word_2; + r.high += t >> bits_in_word_2; + r.high += (a >> bits_in_word_2) * (b >> bits_in_word_2); + return r.all; +} + +// Returns: a * b + +di_int +__muldi3(di_int a, di_int b) +{ + dwords x; + x.all = a; + dwords y; + y.all = b; + dwords r; + r.all = __muldsi3(x.low, y.low); + r.high += x.high * y.low + x.low * y.high; + return r.all; +} diff --git a/lib/mulsc3.c b/lib/mulsc3.c new file mode 100644 index 000000000..bc47a47ea --- /dev/null +++ b/lib/mulsc3.c @@ -0,0 +1,73 @@ +//===-- mulsc3.c - Implement __mulsc3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __mulsc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <math.h> +#include <complex.h> + +// Returns: the product of a + ib and c + id + +float _Complex +__mulsc3(float __a, float __b, float __c, float __d) +{ + float __ac = __a * __c; + float __bd = __b * __d; + float __ad = __a * __d; + float __bc = __b * __c; + float _Complex z; + __real__ z = __ac - __bd; + __imag__ z = __ad + __bc; + if (isnan(__real__ z) && isnan(__imag__ z)) + { + int __recalc = 0; + if (isinf(__a) || isinf(__b)) + { + __a = copysignf(isinf(__a) ? 1 : 0, __a); + __b = copysignf(isinf(__b) ? 1 : 0, __b); + if (isnan(__c)) + __c = copysignf(0, __c); + if (isnan(__d)) + __d = copysignf(0, __d); + __recalc = 1; + } + if (isinf(__c) || isinf(__d)) + { + __c = copysignf(isinf(__c) ? 1 : 0, __c); + __d = copysignf(isinf(__d) ? 1 : 0, __d); + if (isnan(__a)) + __a = copysignf(0, __a); + if (isnan(__b)) + __b = copysignf(0, __b); + __recalc = 1; + } + if (!__recalc && (isinf(__ac) || isinf(__bd) || + isinf(__ad) || isinf(__bc))) + { + if (isnan(__a)) + __a = copysignf(0, __a); + if (isnan(__b)) + __b = copysignf(0, __b); + if (isnan(__c)) + __c = copysignf(0, __c); + if (isnan(__d)) + __d = copysignf(0, __d); + __recalc = 1; + } + if (__recalc) + { + __real__ z = INFINITY * (__a * __c - __b * __d); + __imag__ z = INFINITY * (__a * __d + __b * __c); + } + } + return z; +} diff --git a/lib/multi3.c b/lib/multi3.c new file mode 100644 index 000000000..8ba6550b6 --- /dev/null +++ b/lib/multi3.c @@ -0,0 +1,57 @@ +//===-- multi3.c - Implement __multi3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __multi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: a * b + +static +ti_int +__mulddi3(du_int a, du_int b) +{ + twords r; + const int bits_in_dword_2 = (int)(sizeof(di_int) * CHAR_BIT) / 2; + const du_int lower_mask = (du_int)~0 >> bits_in_dword_2; + r.low = (a & lower_mask) * (b & lower_mask); + du_int t = r.low >> bits_in_dword_2; + r.low &= lower_mask; + t += (a >> bits_in_dword_2) * (b & lower_mask); + r.low += (t & lower_mask) << bits_in_dword_2; + r.high = t >> bits_in_dword_2; + t = r.low >> bits_in_dword_2; + r.low &= lower_mask; + t += (b >> bits_in_dword_2) * (a & lower_mask); + r.low += (t & lower_mask) << bits_in_dword_2; + r.high += t >> bits_in_dword_2; + r.high += (a >> bits_in_dword_2) * (b >> bits_in_dword_2); + return r.all; +} + +// Returns: a * b + +ti_int +__multi3(ti_int a, ti_int b) +{ + twords x; + x.all = a; + twords y; + y.all = b; + twords r; + r.all = __mulddi3(x.low, y.low); + r.high += x.high * y.low + x.low * y.high; + return r.all; +} + +#endif diff --git a/lib/mulvdi3.c b/lib/mulvdi3.c new file mode 100644 index 000000000..862a8c006 --- /dev/null +++ b/lib/mulvdi3.c @@ -0,0 +1,56 @@ +//===-- mulvdi3.c - Implement __mulvdi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __mulvdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a * b + +// Effects: aborts if a * b overflows + +di_int +__mulvdi3(di_int a, di_int b) +{ + const int N = (int)(sizeof(di_int) * CHAR_BIT); + const di_int MIN = (di_int)1 << (N-1); + const di_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + abort(); + } + di_int sa = a >> (N - 1); + di_int abs_a = (a ^ sa) - sa; + di_int sb = b >> (N - 1); + di_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + abort(); + } + else + { + if (abs_a > MIN / -abs_b) + abort(); + } + return a * b; +} diff --git a/lib/mulvsi3.c b/lib/mulvsi3.c new file mode 100644 index 000000000..cef0c3551 --- /dev/null +++ b/lib/mulvsi3.c @@ -0,0 +1,56 @@ +//===-- mulvsi3.c - Implement __mulvsi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __mulvsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a * b + +// Effects: aborts if a * b overflows + +si_int +__mulvsi3(si_int a, si_int b) +{ + const int N = (int)(sizeof(si_int) * CHAR_BIT); + const si_int MIN = (si_int)1 << (N-1); + const si_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + abort(); + } + si_int sa = a >> (N - 1); + si_int abs_a = (a ^ sa) - sa; + si_int sb = b >> (N - 1); + si_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + abort(); + } + else + { + if (abs_a > MIN / -abs_b) + abort(); + } + return a * b; +} diff --git a/lib/mulvti3.c b/lib/mulvti3.c new file mode 100644 index 000000000..82efe8caa --- /dev/null +++ b/lib/mulvti3.c @@ -0,0 +1,60 @@ +//===-- mulvti3.c - Implement __mulvti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __mulvti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a * b + +// Effects: aborts if a * b overflows + +ti_int +__mulvti3(ti_int a, ti_int b) +{ + const int N = (int)(sizeof(ti_int) * CHAR_BIT); + const ti_int MIN = (ti_int)1 << (N-1); + const ti_int MAX = ~MIN; + if (a == MIN) + { + if (b == 0 || b == 1) + return a * b; + abort(); + } + if (b == MIN) + { + if (a == 0 || a == 1) + return a * b; + abort(); + } + ti_int sa = a >> (N - 1); + ti_int abs_a = (a ^ sa) - sa; + ti_int sb = b >> (N - 1); + ti_int abs_b = (b ^ sb) - sb; + if (abs_a < 2 || abs_b < 2) + return a * b; + if (sa == sb) + { + if (abs_a > MAX / abs_b) + abort(); + } + else + { + if (abs_a > MIN / -abs_b) + abort(); + } + return a * b; +} + +#endif diff --git a/lib/mulxc3.c b/lib/mulxc3.c new file mode 100644 index 000000000..e38ab0dcf --- /dev/null +++ b/lib/mulxc3.c @@ -0,0 +1,77 @@ +//===-- mulxc3.c - Implement __mulxc3 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __mulxc3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" +#include <math.h> +#include <complex.h> + +// Returns: the product of a + ib and c + id + +long double _Complex +__mulxc3(long double __a, long double __b, long double __c, long double __d) +{ + long double __ac = __a * __c; + long double __bd = __b * __d; + long double __ad = __a * __d; + long double __bc = __b * __c; + long double _Complex z; + __real__ z = __ac - __bd; + __imag__ z = __ad + __bc; + if (isnan(__real__ z) && isnan(__imag__ z)) + { + int __recalc = 0; + if (isinf(__a) || isinf(__b)) + { + __a = copysignl(isinf(__a) ? 1 : 0, __a); + __b = copysignl(isinf(__b) ? 1 : 0, __b); + if (isnan(__c)) + __c = copysignl(0, __c); + if (isnan(__d)) + __d = copysignl(0, __d); + __recalc = 1; + } + if (isinf(__c) || isinf(__d)) + { + __c = copysignl(isinf(__c) ? 1 : 0, __c); + __d = copysignl(isinf(__d) ? 1 : 0, __d); + if (isnan(__a)) + __a = copysignl(0, __a); + if (isnan(__b)) + __b = copysignl(0, __b); + __recalc = 1; + } + if (!__recalc && (isinf(__ac) || isinf(__bd) || + isinf(__ad) || isinf(__bc))) + { + if (isnan(__a)) + __a = copysignl(0, __a); + if (isnan(__b)) + __b = copysignl(0, __b); + if (isnan(__c)) + __c = copysignl(0, __c); + if (isnan(__d)) + __d = copysignl(0, __d); + __recalc = 1; + } + if (__recalc) + { + __real__ z = INFINITY * (__a * __c - __b * __d); + __imag__ z = INFINITY * (__a * __d + __b * __c); + } + } + return z; +} + +#endif diff --git a/lib/negdi2.c b/lib/negdi2.c new file mode 100644 index 000000000..db2d865bd --- /dev/null +++ b/lib/negdi2.c @@ -0,0 +1,22 @@ +//===-- negdi2.c - Implement __negdi2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __negdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: -a + +di_int +__negdi2(di_int a) +{ + return ~a + 1; +} diff --git a/lib/negti2.c b/lib/negti2.c new file mode 100644 index 000000000..c1c8a1296 --- /dev/null +++ b/lib/negti2.c @@ -0,0 +1,26 @@ +//===-- negti2.c - Implement __negti2 -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __negti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: -a + +ti_int +__negti2(ti_int a) +{ + return ~a + 1; +} + +#endif diff --git a/lib/negvdi2.c b/lib/negvdi2.c new file mode 100644 index 000000000..eb9f6e916 --- /dev/null +++ b/lib/negvdi2.c @@ -0,0 +1,28 @@ +//===-- negvdi2.c - Implement __negvdi2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __negvdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: -a + +// Effects: aborts if -a overflows + +di_int +__negvdi2(di_int a) +{ + const di_int MIN = (di_int)1 << ((int)(sizeof(di_int) * CHAR_BIT)-1); + if (a == MIN) + abort(); + return -a; +} diff --git a/lib/negvsi2.c b/lib/negvsi2.c new file mode 100644 index 000000000..7e42655e6 --- /dev/null +++ b/lib/negvsi2.c @@ -0,0 +1,28 @@ +//===-- negvsi2.c - Implement __negvsi2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __negvsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: -a + +// Effects: aborts if -a overflows + +si_int +__negvsi2(si_int a) +{ + const si_int MIN = (si_int)1 << ((int)(sizeof(si_int) * CHAR_BIT)-1); + if (a == MIN) + abort(); + return -a; +} diff --git a/lib/negvti2.c b/lib/negvti2.c new file mode 100644 index 000000000..ced05e885 --- /dev/null +++ b/lib/negvti2.c @@ -0,0 +1,32 @@ +//===-- negvti2.c - Implement __negvti2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __negvti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: -a + +// Effects: aborts if -a overflows + +ti_int +__negvti2(ti_int a) +{ + const ti_int MIN = (ti_int)1 << ((int)(sizeof(ti_int) * CHAR_BIT)-1); + if (a == MIN) + abort(); + return -a; +} + +#endif diff --git a/lib/paritydi2.c b/lib/paritydi2.c new file mode 100644 index 000000000..ea7c56588 --- /dev/null +++ b/lib/paritydi2.c @@ -0,0 +1,26 @@ +//===-- paritydi2.c - Implement __paritydi2 -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __paritydi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: 1 if number of bits is odd else returns 0 + +si_int __paritysi2(si_int a); + +si_int +__paritydi2(di_int a) +{ + dwords x; + x.all = a; + return __paritysi2(x.high ^ x.low); +} diff --git a/lib/paritysi2.c b/lib/paritysi2.c new file mode 100644 index 000000000..c38f9cf2a --- /dev/null +++ b/lib/paritysi2.c @@ -0,0 +1,26 @@ +//===-- paritysi2.c - Implement __paritysi2 -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __paritysi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: 1 if number of bits is odd else returns 0 + +si_int +__paritysi2(si_int a) +{ + su_int x = (su_int)a; + x ^= x >> 16; + x ^= x >> 8; + x ^= x >> 4; + return (0x6996 >> (x & 0xF)) & 1; +} diff --git a/lib/parityti2.c b/lib/parityti2.c new file mode 100644 index 000000000..6842a5192 --- /dev/null +++ b/lib/parityti2.c @@ -0,0 +1,30 @@ +//===-- parityti2.c - Implement __parityti2 -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __parityti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: 1 if number of bits is odd else returns 0 + +si_int __paritydi2(di_int a); + +si_int +__parityti2(ti_int a) +{ + twords x; + x.all = a; + return __paritydi2(x.high ^ x.low); +} + +#endif diff --git a/lib/popcountdi2.c b/lib/popcountdi2.c new file mode 100644 index 000000000..338bbb3ca --- /dev/null +++ b/lib/popcountdi2.c @@ -0,0 +1,35 @@ +//===-- popcountdi2.c - Implement __popcountdi2 ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __popcountdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: count of 1 bits + +si_int +__popcountdi2(di_int a) +{ + du_int x2 = (du_int)a; + x2 = x2 - ((x2 >> 1) & 0x5555555555555555uLL); + // Every 2 bits holds the sum of every pair of bits (32) + x2 = ((x2 >> 2) & 0x3333333333333333uLL) + (x2 & 0x3333333333333333uLL); + // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (16) + x2 = (x2 + (x2 >> 4)) & 0x0F0F0F0F0F0F0F0FuLL; + // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (8) + su_int x = (su_int)(x2 + (x2 >> 32)); + // The lower 32 bits hold four 16 bit sums (5 significant bits). + // Upper 32 bits are garbage + x = x + (x >> 16); + // The lower 16 bits hold two 32 bit sums (6 significant bits). + // Upper 16 bits are garbage + return (x + (x >> 8)) & 0x0000007F; // (7 significant bits) +} diff --git a/lib/popcountsi2.c b/lib/popcountsi2.c new file mode 100644 index 000000000..68220aef9 --- /dev/null +++ b/lib/popcountsi2.c @@ -0,0 +1,32 @@ +//===-- popcountsi2.c - Implement __popcountsi2 ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __popcountsi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: count of 1 bits + +si_int +__popcountsi2(si_int a) +{ + su_int x = (su_int)a; + x = x - ((x >> 1) & 0x55555555); + // Every 2 bits holds the sum of every pair of bits + x = ((x >> 2) & 0x33333333) + (x & 0x33333333); + // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) + x = (x + (x >> 4)) & 0x0F0F0F0F; + // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) + x = (x + (x >> 16)); + // The lower 16 bits hold two 8 bit sums (5 significant bits). + // Upper 16 bits are garbage + return (x + (x >> 8)) & 0x0000003F; // (6 significant bits) +} diff --git a/lib/popcountti2.c b/lib/popcountti2.c new file mode 100644 index 000000000..9ac052d91 --- /dev/null +++ b/lib/popcountti2.c @@ -0,0 +1,43 @@ +//===-- popcountti2.c - Implement __popcountti2 ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __popcountti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: count of 1 bits + +si_int +__popcountti2(ti_int a) +{ + tu_int x3 = (tu_int)a; + x3 = x3 - ((x3 >> 1) & (((tu_int)0x5555555555555555uLL << 64) | + 0x5555555555555555uLL)); + // Every 2 bits holds the sum of every pair of bits (64) + x3 = ((x3 >> 2) & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)) + + (x3 & (((tu_int)0x3333333333333333uLL << 64) | 0x3333333333333333uLL)); + // Every 4 bits holds the sum of every 4-set of bits (3 significant bits) (32) + x3 = (x3 + (x3 >> 4)) + & (((tu_int)0x0F0F0F0F0F0F0F0FuLL << 64) | 0x0F0F0F0F0F0F0F0FuLL); + // Every 8 bits holds the sum of every 8-set of bits (4 significant bits) (16) + du_int x2 = (du_int)(x3 + (x3 >> 64)); + // Every 8 bits holds the sum of every 8-set of bits (5 significant bits) (8) + su_int x = (su_int)(x2 + (x2 >> 32)); + // Every 8 bits holds the sum of every 8-set of bits (6 significant bits) (4) + x = x + (x >> 16); + // Every 8 bits holds the sum of every 8-set of bits (7 significant bits) (2) + // Upper 16 bits are garbage + return (x + (x >> 8)) & 0xFF; // (8 significant bits) +} + +#endif diff --git a/lib/powidf2.c b/lib/powidf2.c new file mode 100644 index 000000000..3237e266d --- /dev/null +++ b/lib/powidf2.c @@ -0,0 +1,33 @@ +//===-- powidf2.cpp - Implement __powidf2 ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __powidf2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a ^ b + +double +__powidf2(double a, si_int b) +{ + const int recip = b < 0; + double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} diff --git a/lib/powisf2.c b/lib/powisf2.c new file mode 100644 index 000000000..fcaaa1f26 --- /dev/null +++ b/lib/powisf2.c @@ -0,0 +1,33 @@ +//===-- powisf2.cpp - Implement __powisf2 ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __powisf2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a ^ b + +float +__powisf2(float a, si_int b) +{ + const int recip = b < 0; + float r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} diff --git a/lib/powitf2.c b/lib/powitf2.c new file mode 100644 index 000000000..34aa2cb6a --- /dev/null +++ b/lib/powitf2.c @@ -0,0 +1,37 @@ +//===-- powitf2.cpp - Implement __powitf2 ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __powitf2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if _ARCH_PPC + +#include "int_lib.h" + +// Returns: a ^ b + +long double +__powitf2(long double a, si_int b) +{ + const int recip = b < 0; + long double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} + +#endif diff --git a/lib/powixf2.c b/lib/powixf2.c new file mode 100644 index 000000000..07e145a44 --- /dev/null +++ b/lib/powixf2.c @@ -0,0 +1,37 @@ +//===-- powixf2.cpp - Implement __powixf2 ---------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __powixf2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if !_ARCH_PPC + +#include "int_lib.h" + +// Returns: a ^ b + +long double +__powixf2(long double a, si_int b) +{ + const int recip = b < 0; + long double r = 1; + while (1) + { + if (b & 1) + r *= a; + b /= 2; + if (b == 0) + break; + a *= a; + } + return recip ? 1/r : r; +} + +#endif diff --git a/lib/ppc/DD.h b/lib/ppc/DD.h new file mode 100644 index 000000000..32acecd5f --- /dev/null +++ b/lib/ppc/DD.h @@ -0,0 +1,46 @@ +#ifndef __DD_HEADER +#define __DD_HEADER + +#include <stdint.h> + +typedef union { + long double ld; + struct { + double hi; + double lo; + }; +} DD; + +typedef union { + double d; + uint64_t x; +} doublebits; + +#define LOWORDER(xy,xHi,xLo,yHi,yLo) \ + (((((xHi)*(yHi) - (xy)) + (xHi)*(yLo)) + (xLo)*(yHi)) + (xLo)*(yLo)) + +static inline double __attribute__((always_inline)) +fabs(double x) +{ + doublebits result = { .d = x }; + result.x &= UINT64_C(0x7fffffffffffffff); + return result.d; +} + +static inline double __attribute__((always_inline)) +high26bits(double x) +{ + doublebits result = { .d = x }; + result.x &= UINT64_C(0xfffffffff8000000); + return result.d; +} + +static inline int __attribute__((always_inline)) +different_sign(double x, double y) +{ + doublebits xsignbit = { .d = x }, ysignbit = { .d = y }; + int result = (int)(xsignbit.x >> 63) ^ (int)(ysignbit.x >> 63); + return result; +} + +#endif // __DD_HEADER diff --git a/lib/ppc/Makefile.mk b/lib/ppc/Makefile.mk new file mode 100644 index 000000000..5d0f2b3df --- /dev/null +++ b/lib/ppc/Makefile.mk @@ -0,0 +1,22 @@ +#===- lib/ppc/Makefile.mk ----------------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +Dir := lib/ppc +SubDirs := +OnlyArchs := ppc + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.s),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.s=%.o) +Target := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard $(Dir)/*.h) + +include make/subdir.mk diff --git a/lib/ppc/divtc3.c b/lib/ppc/divtc3.c new file mode 100644 index 000000000..bec6b5353 --- /dev/null +++ b/lib/ppc/divtc3.c @@ -0,0 +1,88 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +#include "DD.h" +#include <math.h> + +#define makeFinite(x) { \ + (x).hi = __builtin_copysign(isinf((x).hi) ? 1.0 : 0.0, (x).hi); \ + (x).lo = 0.0; \ + } + +long double __gcc_qadd(long double, long double); +long double __gcc_qsub(long double, long double); +long double __gcc_qmul(long double, long double); +long double __gcc_qdiv(long double, long double); + +long double _Complex +__divtc3(long double a, long double b, long double c, long double d) +{ + DD cDD = { .ld = c }; + DD dDD = { .ld = d }; + + int ilogbw = 0; + const double logbw = logb(__builtin_fmax( __builtin_fabs(cDD.hi), __builtin_fabs(dDD.hi) )); + + if (isfinite(logbw)) + { + ilogbw = (int)logbw; + + cDD.hi = scalbn(cDD.hi, -ilogbw); + cDD.lo = scalbn(cDD.lo, -ilogbw); + dDD.hi = scalbn(dDD.hi, -ilogbw); + dDD.lo = scalbn(dDD.lo, -ilogbw); + } + + const long double denom = __gcc_qadd(__gcc_qmul(cDD.ld, cDD.ld), __gcc_qmul(dDD.ld, dDD.ld)); + const long double realNumerator = __gcc_qadd(__gcc_qmul(a,cDD.ld), __gcc_qmul(b,dDD.ld)); + const long double imagNumerator = __gcc_qsub(__gcc_qmul(b,cDD.ld), __gcc_qmul(a,dDD.ld)); + + DD real = { .ld = __gcc_qdiv(realNumerator, denom) }; + DD imag = { .ld = __gcc_qdiv(imagNumerator, denom) }; + + real.hi = scalbn(real.hi, -ilogbw); + real.lo = scalbn(real.lo, -ilogbw); + imag.hi = scalbn(imag.hi, -ilogbw); + imag.lo = scalbn(imag.lo, -ilogbw); + + if (isnan(real.hi) && isnan(imag.hi)) + { + DD aDD = { .ld = a }; + DD bDD = { .ld = b }; + DD rDD = { .ld = denom }; + + if ((rDD.hi == 0.0) && (!isnan(aDD.hi) || !isnan(bDD.hi))) + { + real.hi = __builtin_copysign(INFINITY,cDD.hi) * aDD.hi; + real.lo = 0.0; + imag.hi = __builtin_copysign(INFINITY,cDD.hi) * bDD.hi; + imag.lo = 0.0; + } + + else if ((isinf(aDD.hi) || isinf(bDD.hi)) && isfinite(cDD.hi) && isfinite(dDD.hi)) + { + makeFinite(aDD); + makeFinite(bDD); + real.hi = INFINITY * (aDD.hi*cDD.hi + bDD.hi*dDD.hi); + real.lo = 0.0; + imag.hi = INFINITY * (bDD.hi*cDD.hi - aDD.hi*dDD.hi); + imag.lo = 0.0; + } + + else if ((isinf(cDD.hi) || isinf(dDD.hi)) && isfinite(aDD.hi) && isfinite(bDD.hi)) + { + makeFinite(cDD); + makeFinite(dDD); + real.hi = __builtin_copysign(0.0,(aDD.hi*cDD.hi + bDD.hi*dDD.hi)); + real.lo = 0.0; + imag.hi = __builtin_copysign(0.0,(bDD.hi*cDD.hi - aDD.hi*dDD.hi)); + imag.lo = 0.0; + } + } + + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; +} diff --git a/lib/ppc/fixtfdi.c b/lib/ppc/fixtfdi.c new file mode 100644 index 000000000..c95241701 --- /dev/null +++ b/lib/ppc/fixtfdi.c @@ -0,0 +1,100 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// int64_t __fixunstfdi(long double x); +// This file implements the PowerPC 128-bit double-double -> int64_t conversion + +#include "DD.h" +#include <stdint.h> + +uint64_t __fixtfdi(long double input) +{ + const DD x = { .ld = input }; + const doublebits hibits = { .d = x.hi }; + + const uint32_t absHighWord = (uint32_t)(hibits.x >> 32) & UINT32_C(0x7fffffff); + const uint32_t absHighWordMinusOne = absHighWord - UINT32_C(0x3ff00000); + + // If (1.0 - tiny) <= input < 0x1.0p63: + if (UINT32_C(0x03f00000) > absHighWordMinusOne) + { + // Do an unsigned conversion of the absolute value, then restore the sign. + const int unbiasedHeadExponent = absHighWordMinusOne >> 20; + + int64_t result = hibits.x & INT64_C(0x000fffffffffffff); // mantissa(hi) + result |= INT64_C(0x0010000000000000); // matissa(hi) with implicit bit + result <<= 10; // mantissa(hi) with one zero preceeding bit. + + const int64_t hiNegationMask = ((int64_t)(hibits.x)) >> 63; + + // If the tail is non-zero, we need to patch in the tail bits. + if (0.0 != x.lo) + { + const doublebits lobits = { .d = x.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + // At this point we have the mantissa of |tail| + // We need to negate it if head and tail have different signs. + const int64_t loNegationMask = ((int64_t)(lobits.x)) >> 63; + const int64_t negationMask = loNegationMask ^ hiNegationMask; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + // Now we have the mantissa of tail as a signed 2s-complement integer + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + // Shift the tail mantissa into the right position, accounting for the + // bias of 10 that we shifted the head mantissa by. + tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 10))); + + result += tailMantissa; + } + + result >>= (62 - unbiasedHeadExponent); + + // Restore the sign of the result and return + result = (result ^ hiNegationMask) - hiNegationMask; + return result; + + } + + // Edge cases handled here: + + // |x| < 1, result is zero. + if (1.0 > __builtin_fabs(x.hi)) + return INT64_C(0); + + // x very close to INT64_MIN, care must be taken to see which side we are on. + if (x.hi == -0x1.0p63) { + + int64_t result = INT64_MIN; + + if (0.0 < x.lo) + { + // If the tail is positive, the correct result is something other than INT64_MIN. + // we'll need to figure out what it is. + + const doublebits lobits = { .d = x.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + // Now we negate the tailMantissa + tailMantissa = (tailMantissa ^ INT64_C(-1)) + INT64_C(1); + + // And shift it by the appropriate amount + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + tailMantissa >>= 1075 - biasedTailExponent; + + result -= tailMantissa; + } + + return result; + } + + // Signed overflows, infinities, and NaNs + if (x.hi > 0.0) + return INT64_MAX; + else + return INT64_MIN; +} diff --git a/lib/ppc/fixunstfdi.c b/lib/ppc/fixunstfdi.c new file mode 100644 index 000000000..35ce3a912 --- /dev/null +++ b/lib/ppc/fixunstfdi.c @@ -0,0 +1,58 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// uint64_t __fixunstfdi(long double x); +// This file implements the PowerPC 128-bit double-double -> uint64_t conversion + +#include "DD.h" +#include <stdint.h> + +uint64_t __fixunstfdi(long double input) +{ + const DD x = { .ld = input }; + const doublebits hibits = { .d = x.hi }; + + const uint32_t highWordMinusOne = (uint32_t)(hibits.x >> 32) - UINT32_C(0x3ff00000); + + // If (1.0 - tiny) <= input < 0x1.0p64: + if (UINT32_C(0x04000000) > highWordMinusOne) + { + const int unbiasedHeadExponent = highWordMinusOne >> 20; + + uint64_t result = hibits.x & UINT64_C(0x000fffffffffffff); // mantissa(hi) + result |= UINT64_C(0x0010000000000000); // matissa(hi) with implicit bit + result <<= 11; // mantissa(hi) left aligned in the int64 field. + + // If the tail is non-zero, we need to patch in the tail bits. + if (0.0 != x.lo) + { + const doublebits lobits = { .d = x.lo }; + int64_t tailMantissa = lobits.x & INT64_C(0x000fffffffffffff); + tailMantissa |= INT64_C(0x0010000000000000); + + // At this point we have the mantissa of |tail| + + const int64_t negationMask = ((int64_t)(lobits.x)) >> 63; + tailMantissa = (tailMantissa ^ negationMask) - negationMask; + + // Now we have the mantissa of tail as a signed 2s-complement integer + + const int biasedTailExponent = (int)(lobits.x >> 52) & 0x7ff; + + // Shift the tail mantissa into the right position, accounting for the + // bias of 11 that we shifted the head mantissa by. + tailMantissa >>= (unbiasedHeadExponent - (biasedTailExponent - (1023 - 11))); + + result += tailMantissa; + } + + result >>= (63 - unbiasedHeadExponent); + return result; + } + + // Edge cases are handled here, with saturation. + if (1.0 > x.hi) + return UINT64_C(0); + else + return UINT64_MAX; +} diff --git a/lib/ppc/floatditf.c b/lib/ppc/floatditf.c new file mode 100644 index 000000000..081dc8cbe --- /dev/null +++ b/lib/ppc/floatditf.c @@ -0,0 +1,34 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __floatditf(long long x); +// This file implements the PowerPC long long -> long double conversion + +#include "DD.h" +#include <stdint.h> + +long double __floatditf(int64_t a) { + + static const double twop32 = 0x1.0p32; + static const double twop52 = 0x1.0p52; + + doublebits low = { .d = twop52 }; + low.x |= a & UINT64_C(0x00000000ffffffff); // 0x1.0p52 + low 32 bits of a. + + const double high_addend = (double)((int32_t)(a >> 32))*twop32 - twop52; + + // At this point, we have two double precision numbers + // high_addend and low.d, and we wish to return their sum + // as a canonicalized long double: + + // This implementation sets the inexact flag spuriously. + // This could be avoided, but at some substantial cost. + + DD result; + + result.hi = high_addend + low.d; + result.lo = (high_addend - result.hi) + low.d; + + return result.ld; + +} diff --git a/lib/ppc/floatunditf.c b/lib/ppc/floatunditf.c new file mode 100644 index 000000000..63f0b4467 --- /dev/null +++ b/lib/ppc/floatunditf.c @@ -0,0 +1,40 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __floatunditf(unsigned long long x); +// This file implements the PowerPC unsigned long long -> long double conversion + +#include "DD.h" +#include <stdint.h> + +long double __floatunditf(uint64_t a) { + + // Begins with an exact copy of the code from __floatundidf + + static const double twop52 = 0x1.0p52; + static const double twop84 = 0x1.0p84; + static const double twop84_plus_twop52 = 0x1.00000001p84; + + doublebits high = { .d = twop84 }; + doublebits low = { .d = twop52 }; + + high.x |= a >> 32; // 0x1.0p84 + high 32 bits of a + low.x |= a & UINT64_C(0x00000000ffffffff); // 0x1.0p52 + low 32 bits of a + + const double high_addend = high.d - twop84_plus_twop52; + + // At this point, we have two double precision numbers + // high_addend and low.d, and we wish to return their sum + // as a canonicalized long double: + + // This implementation sets the inexact flag spuriously. + // This could be avoided, but at some substantial cost. + + DD result; + + result.hi = high_addend + low.d; + result.lo = (high_addend - result.hi) + low.d; + + return result.ld; + +} diff --git a/lib/ppc/gcc_qadd.c b/lib/ppc/gcc_qadd.c new file mode 100644 index 000000000..eb3fdd11e --- /dev/null +++ b/lib/ppc/gcc_qadd.c @@ -0,0 +1,74 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __gcc_qadd(long double x, long double y); +// This file implements the PowerPC 128-bit double-double add operation. +// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + +#include "DD.h" + +long double __gcc_qadd(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.hi, a = dst.lo, + B = src.hi, b = src.lo; + + // If both operands are zero: + if ((A == 0.0) && (B == 0.0)) { + dst.hi = A + B; + dst.lo = 0.0; + return dst.ld; + } + + // If either operand is NaN or infinity: + const doublebits abits = { .d = A }; + const doublebits bbits = { .d = B }; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.hi = A + B; + dst.lo = 0.0; + return dst.ld; + } + + // If the computation overflows: + // This may be playing things a little bit fast and loose, but it will do for a start. + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = { .d = testForOverflow }; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.hi = testForOverflow; + dst.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (fabs(A) <= fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (fabs(a) <= fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.hi = Y = W + w; + dst.lo = (W - Y) + w; + + return dst.ld; +} diff --git a/lib/ppc/gcc_qdiv.c b/lib/ppc/gcc_qdiv.c new file mode 100644 index 000000000..53e6c55ec --- /dev/null +++ b/lib/ppc/gcc_qdiv.c @@ -0,0 +1,53 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __gcc_qdiv(long double x, long double y); +// This file implements the PowerPC 128-bit double-double division operation. +// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + +#include "DD.h" + +long double __gcc_qdiv(long double a, long double b) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = { .ld = a }, src = { .ld = b }; + + register double x = dst.hi, x1 = dst.lo, + y = src.hi, y1 = src.lo; + + double yHi, yLo, qHi, qLo; + double yq, tmp, q; + + q = x / y; + + // Detect special cases + if (q == 0.0) { + dst.hi = q; + dst.lo = 0.0; + return dst.ld; + } + + const doublebits qBits = { .d = q }; + if (((uint32_t)(qBits.x >> 32) & infinityHi) == infinityHi) { + dst.hi = q; + dst.lo = 0.0; + return dst.ld; + } + + yHi = high26bits(y); + qHi = high26bits(q); + + yq = y * q; + yLo = y - yHi; + qLo = q - qHi; + + tmp = LOWORDER(yq, yHi, yLo, qHi, qLo); + tmp = (x - yq) - tmp; + tmp = ((tmp + x1) - y1 * q) / y; + x = q + tmp; + + dst.lo = (q - x) + tmp; + dst.hi = x; + + return dst.ld; +} diff --git a/lib/ppc/gcc_qmul.c b/lib/ppc/gcc_qmul.c new file mode 100644 index 000000000..26d899ee8 --- /dev/null +++ b/lib/ppc/gcc_qmul.c @@ -0,0 +1,51 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __gcc_qmul(long double x, long double y); +// This file implements the PowerPC 128-bit double-double multiply operation. +// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + +#include "DD.h" + +long double __gcc_qmul(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.hi, a = dst.lo, + B = src.hi, b = src.lo; + + double aHi, aLo, bHi, bLo; + double ab, tmp, tau; + + ab = A * B; + + // Detect special cases + if (ab == 0.0) { + dst.hi = ab; + dst.lo = 0.0; + return dst.ld; + } + + const doublebits abBits = { .d = ab }; + if (((uint32_t)(abBits.x >> 32) & infinityHi) == infinityHi) { + dst.hi = ab; + dst.lo = 0.0; + return dst.ld; + } + + // Generic cases handled here. + aHi = high26bits(A); + bHi = high26bits(B); + aLo = A - aHi; + bLo = B - bHi; + + tmp = LOWORDER(ab, aHi, aLo, bHi, bLo); + tmp += (A * b + a * B); + tau = ab + tmp; + + dst.lo = (ab - tau) + tmp; + dst.hi = tau; + + return dst.ld; +} diff --git a/lib/ppc/gcc_qsub.c b/lib/ppc/gcc_qsub.c new file mode 100644 index 000000000..f77deaa4f --- /dev/null +++ b/lib/ppc/gcc_qsub.c @@ -0,0 +1,74 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __gcc_qsub(long double x, long double y); +// This file implements the PowerPC 128-bit double-double add operation. +// This implementation is shamelessly cribbed from Apple's DDRT, circa 1993(!) + +#include "DD.h" + +long double __gcc_qsub(long double x, long double y) +{ + static const uint32_t infinityHi = UINT32_C(0x7ff00000); + + DD dst = { .ld = x }, src = { .ld = y }; + + register double A = dst.hi, a = dst.lo, + B = -src.hi, b = -src.lo; + + // If both operands are zero: + if ((A == 0.0) && (B == 0.0)) { + dst.hi = A + B; + dst.lo = 0.0; + return dst.ld; + } + + // If either operand is NaN or infinity: + const doublebits abits = { .d = A }; + const doublebits bbits = { .d = B }; + if ((((uint32_t)(abits.x >> 32) & infinityHi) == infinityHi) || + (((uint32_t)(bbits.x >> 32) & infinityHi) == infinityHi)) { + dst.hi = A + B; + dst.lo = 0.0; + return dst.ld; + } + + // If the computation overflows: + // This may be playing things a little bit fast and loose, but it will do for a start. + const double testForOverflow = A + (B + (a + b)); + const doublebits testbits = { .d = testForOverflow }; + if (((uint32_t)(testbits.x >> 32) & infinityHi) == infinityHi) { + dst.hi = testForOverflow; + dst.lo = 0.0; + return dst.ld; + } + + double H, h; + double T, t; + double W, w; + double Y; + + H = B + (A - (A + B)); + T = b + (a - (a + b)); + h = A + (B - (A + B)); + t = a + (b - (a + b)); + + if (fabs(A) <= fabs(B)) + w = (a + b) + h; + else + w = (a + b) + H; + + W = (A + B) + w; + Y = (A + B) - W; + Y += w; + + if (fabs(a) <= fabs(b)) + w = t + Y; + else + w = T + Y; + + dst.hi = Y = W + w; + dst.lo = (W - Y) + w; + + return dst.ld; +} diff --git a/lib/ppc/multc3.c b/lib/ppc/multc3.c new file mode 100644 index 000000000..d5a77b178 --- /dev/null +++ b/lib/ppc/multc3.c @@ -0,0 +1,92 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +#include "DD.h" +#include <math.h> + +#define makeFinite(x) { \ + (x).hi = __builtin_copysign(isinf((x).hi) ? 1.0 : 0.0, (x).hi); \ + (x).lo = 0.0; \ + } + +#define zeroNaN(x) { \ + if (isnan((x).hi)) { \ + (x).hi = __builtin_copysign(0.0, (x).hi); \ + (x).lo = 0.0; \ + } \ + } + +long double __gcc_qadd(long double, long double); +long double __gcc_qsub(long double, long double); +long double __gcc_qmul(long double, long double); + +long double _Complex +__multc3(long double a, long double b, long double c, long double d) +{ + long double ac = __gcc_qmul(a,c); + long double bd = __gcc_qmul(b,d); + long double ad = __gcc_qmul(a,d); + long double bc = __gcc_qmul(b,c); + + DD real = { .ld = __gcc_qsub(ac,bd) }; + DD imag = { .ld = __gcc_qadd(ad,bc) }; + + if (isnan(real.hi) && isnan(imag.hi)) + { + int recalc = 0; + + DD aDD = { .ld = a }; + DD bDD = { .ld = b }; + DD cDD = { .ld = c }; + DD dDD = { .ld = d }; + + if (isinf(aDD.hi) || isinf(bDD.hi)) + { + makeFinite(aDD); + makeFinite(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + + if (isinf(cDD.hi) || isinf(dDD.hi)) + { + makeFinite(cDD); + makeFinite(dDD); + zeroNaN(aDD); + zeroNaN(bDD); + recalc = 1; + } + + if (!recalc) + { + DD acDD = { .ld = ac }; + DD bdDD = { .ld = bd }; + DD adDD = { .ld = ad }; + DD bcDD = { .ld = bc }; + + if (isinf(acDD.hi) || isinf(bdDD.hi) || isinf(adDD.hi) || isinf(bcDD.hi)) + { + zeroNaN(aDD); + zeroNaN(bDD); + zeroNaN(cDD); + zeroNaN(dDD); + recalc = 1; + } + } + + if (recalc) + { + real.hi = INFINITY * (aDD.hi*cDD.hi - bDD.hi*dDD.hi); + real.lo = 0.0; + imag.hi = INFINITY * (aDD.hi*dDD.hi + bDD.hi*cDD.hi); + imag.lo = 0.0; + } + } + + long double _Complex z; + __real__ z = real.ld; + __imag__ z = imag.ld; + + return z; +} diff --git a/lib/ppc/restFP.s b/lib/ppc/restFP.s new file mode 100644 index 000000000..6b8428a57 --- /dev/null +++ b/lib/ppc/restFP.s @@ -0,0 +1,43 @@ +//===-- restFP.s - Implement restFP ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +// +// Helper function used by compiler to restore ppc floating point registers at +// the end of the function epilog. This function returns to the address +// in the LR slot. So a function epilog must branch (b) not branch and link +// (bl) to this function. +// If the compiler wants to restore f27..f31, it does a "b restFP+52" +// +// This function should never be exported by a shared library. Each linkage +// unit carries its own copy of this function. +// + .globl restFP + .private_extern restFP +restFP: stfd f14,-144(r1) + stfd f15,-136(r1) + stfd f16,-128(r1) + stfd f17,-120(r1) + stfd f18,-112(r1) + stfd f19,-104(r1) + stfd f20,-96(r1) + stfd f21,-88(r1) + stfd f22,-80(r1) + stfd f23,-72(r1) + stfd f24,-64(r1) + stfd f25,-56(r1) + stfd f26,-48(r1) + stfd f27,-40(r1) + stfd f28,-32(r1) + stfd f29,-24(r1) + stfd f30,-16(r1) + stfd f31,-8(r1) + lwz r0,8(r1) + mtlr r0 + blr diff --git a/lib/ppc/saveFP.s b/lib/ppc/saveFP.s new file mode 100644 index 000000000..41a912771 --- /dev/null +++ b/lib/ppc/saveFP.s @@ -0,0 +1,40 @@ +//===-- saveFP.s - Implement saveFP ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + + +// +// Helper function used by compiler to save ppc floating point registers in +// function prologs. This routines also saves r0 in the LR slot. +// If the compiler wants to save f27..f31, it does a "bl saveFP+52" +// +// This function should never be exported by a shared library. Each linkage +// unit carries its own copy of this function. +// + .globl saveFP + .private_extern saveFP +saveFP: stfd f14,-144(r1) + stfd f15,-136(r1) + stfd f16,-128(r1) + stfd f17,-120(r1) + stfd f18,-112(r1) + stfd f19,-104(r1) + stfd f20,-96(r1) + stfd f21,-88(r1) + stfd f22,-80(r1) + stfd f23,-72(r1) + stfd f24,-64(r1) + stfd f25,-56(r1) + stfd f26,-48(r1) + stfd f27,-40(r1) + stfd f28,-32(r1) + stfd f29,-24(r1) + stfd f30,-16(r1) + stfd f31,-8(r1) + stw r0,8(r1) + blr diff --git a/lib/subvdi3.c b/lib/subvdi3.c new file mode 100644 index 000000000..2548b081e --- /dev/null +++ b/lib/subvdi3.c @@ -0,0 +1,36 @@ +//===-- subvdi3.c - Implement __subvdi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __subvdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a - b + +// Effects: aborts if a - b overflows + +di_int +__subvdi3(di_int a, di_int b) +{ + di_int s = a - b; + if (b >= 0) + { + if (s > a) + abort(); + } + else + { + if (s <= a) + abort(); + } + return s; +} diff --git a/lib/subvsi3.c b/lib/subvsi3.c new file mode 100644 index 000000000..53c141b47 --- /dev/null +++ b/lib/subvsi3.c @@ -0,0 +1,36 @@ +//===-- subvsi3.c - Implement __subvsi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __subvsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a - b + +// Effects: aborts if a - b overflows + +si_int +__subvsi3(si_int a, si_int b) +{ + si_int s = a - b; + if (b >= 0) + { + if (s > a) + abort(); + } + else + { + if (s <= a) + abort(); + } + return s; +} diff --git a/lib/subvti3.c b/lib/subvti3.c new file mode 100644 index 000000000..b130c8ceb --- /dev/null +++ b/lib/subvti3.c @@ -0,0 +1,40 @@ +//===-- subvti3.c - Implement __subvti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __subvti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" +#include <stdlib.h> + +// Returns: a - b + +// Effects: aborts if a - b overflows + +ti_int +__subvti3(ti_int a, ti_int b) +{ + ti_int s = a - b; + if (b >= 0) + { + if (s > a) + abort(); + } + else + { + if (s <= a) + abort(); + } + return s; +} + +#endif diff --git a/lib/trampoline_setup.c b/lib/trampoline_setup.c new file mode 100644 index 000000000..a8906bcc0 --- /dev/null +++ b/lib/trampoline_setup.c @@ -0,0 +1,46 @@ +//===----- trampoline_setup.c - Implement __trampoline_setup -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include <stdint.h> +#include <stdlib.h> + +extern void __clear_cache(void* start, void* end); + +// +// The ppc compiler generates calls to __trampoline_setup() when creating +// trampoline functions on the stack for use with nested functions. +// This function creates a custom 40-byte trampoline function on the stack +// which loads r11 with a pointer to the outer function's locals +// and then jumps to the target nested function. +// +#if __ppc__ +void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr) +{ + // should never happen, but if compiler did not allocate + // enough space on stack for the trampoline, abort + if ( trampSizeAllocated < 40 ) + abort(); + + // create trampoline + trampOnStack[0] = 0x7c0802a6; // mflr r0 + trampOnStack[1] = 0x4800000d; // bl Lbase + trampOnStack[2] = (uint32_t)realFunc; + trampOnStack[3] = (uint32_t)localsPtr; + trampOnStack[4] = 0x7d6802a6; // Lbase: mflr r11 + trampOnStack[5] = 0x818b0000; // lwz r12,0(r11) + trampOnStack[6] = 0x7c0803a6; // mtlr r0 + trampOnStack[7] = 0x7d8903a6; // mtctr r12 + trampOnStack[8] = 0x816b0004; // lwz r11,4(r11) + trampOnStack[9] = 0x4e800420; // bctr + + // clear instruction cache + __clear_cache(trampOnStack, &trampOnStack[10]); +} +#endif // __ppc__ diff --git a/lib/ucmpdi2.c b/lib/ucmpdi2.c new file mode 100644 index 000000000..dec476fac --- /dev/null +++ b/lib/ucmpdi2.c @@ -0,0 +1,36 @@ +//===-- ucmpdi2.c - Implement __ucmpdi2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ucmpdi2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +si_int +__ucmpdi2(du_int a, du_int b) +{ + udwords x; + x.all = a; + udwords y; + y.all = b; + if (x.high < y.high) + return 0; + if (x.high > y.high) + return 2; + if (x.low < y.low) + return 0; + if (x.low > y.low) + return 2; + return 1; +} diff --git a/lib/ucmpti2.c b/lib/ucmpti2.c new file mode 100644 index 000000000..826360c09 --- /dev/null +++ b/lib/ucmpti2.c @@ -0,0 +1,40 @@ +//===-- ucmpti2.c - Implement __ucmpti2 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __ucmpti2 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Returns: if (a < b) returns 0 +// if (a == b) returns 1 +// if (a > b) returns 2 + +si_int +__ucmpti2(tu_int a, tu_int b) +{ + utwords x; + x.all = a; + utwords y; + y.all = b; + if (x.high < y.high) + return 0; + if (x.high > y.high) + return 2; + if (x.low < y.low) + return 0; + if (x.low > y.low) + return 2; + return 1; +} + +#endif diff --git a/lib/udivdi3.c b/lib/udivdi3.c new file mode 100644 index 000000000..473be64fa --- /dev/null +++ b/lib/udivdi3.c @@ -0,0 +1,24 @@ +//===-- udivdi3.c - Implement __udivdi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivdi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); + +// Returns: a / b + +du_int +__udivdi3(du_int a, du_int b) +{ + return __udivmoddi4(a, b, 0); +} diff --git a/lib/udivmoddi4.c b/lib/udivmoddi4.c new file mode 100644 index 000000000..ad4d2aa22 --- /dev/null +++ b/lib/udivmoddi4.c @@ -0,0 +1,236 @@ +//===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivmoddi4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Effects: if rem != 0, *rem = a % b +// Returns: a / b + +// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide + +du_int +__udivmoddi4(du_int a, du_int b, du_int* rem) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + // special cases, X is unknown, K != 0 + if (n.high == 0) + { + if (d.high == 0) + { + // 0 X + // --- + // 0 X + if (rem) + *rem = n.low % d.low; + return n.low / d.low; + } + // 0 X + // --- + // K X + if (rem) + *rem = n.low; + return 0; + } + // n.high != 0 + if (d.low == 0) + { + if (d.high == 0) + { + // K X + // --- + // 0 0 + if (rem) + *rem = n.high % d.low; + return n.high / d.low; + } + // d.high != 0 + if (n.low == 0) + { + // K 0 + // --- + // K 0 + if (rem) + { + r.high = n.high % d.high; + r.low = 0; + *rem = r.all; + } + return n.high / d.high; + } + // K K + // --- + // K 0 + if ((d.high & (d.high - 1)) == 0) // if d is a power of 2 + { + if (rem) + { + r.low = n.low; + r.high = n.high & (d.high - 1); + *rem = r.all; + } + return n.high >> __builtin_ctz(d.high); + } + // K K + // --- + // K 0 + sr = __builtin_clz(d.high) - __builtin_clz(n.high); + // 0 <= sr <= n_uword_bits - 2 or sr large + if (sr > n_uword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + // 1 <= sr <= n_uword_bits - 1 + // q.all = n.all << (n_udword_bits - sr); + q.low = 0; + q.high = n.low << (n_uword_bits - sr); + // r.all = n.all >> sr; + r.high = n.high >> sr; + r.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr); + } + else // d.low != 0 + { + if (d.high == 0) + { + // K X + // --- + // 0 K + if ((d.low & (d.low - 1)) == 0) // if d is a power of 2 + { + if (rem) + *rem = n.low & (d.low - 1); + if (d.low == 1) + return n.all; + unsigned sr = __builtin_ctz(d.low); + q.high = n.high >> sr; + q.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr); + return q.all; + } + // K X + // --- + // 0 K + sr = 1 + n_uword_bits + __builtin_clz(d.low) - __builtin_clz(n.high); + // 2 <= sr <= n_udword_bits - 1 + // q.all = n.all << (n_udword_bits - sr); + // r.all = n.all >> sr; + // if (sr == n_uword_bits) + // { + // q.low = 0; + // q.high = n.low; + // r.high = 0; + // r.low = n.high; + // } + // else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 + // { + // q.low = 0; + // q.high = n.low << (n_uword_bits - sr); + // r.high = n.high >> sr; + // r.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr); + // } + // else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 + // { + // q.low = n.low << (n_udword_bits - sr); + // q.high = (n.high << (n_udword_bits - sr)) | + // (n.low >> (sr - n_uword_bits)); + // r.high = 0; + // r.low = n.high >> (sr - n_uword_bits); + // } + q.low = (n.low << (n_udword_bits - sr)) & + ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)); + q.high = ((n.low << ( n_uword_bits - sr)) & + ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) | + (((n.high << (n_udword_bits - sr)) | + (n.low >> (sr - n_uword_bits))) & + ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1))); + r.high = (n.high >> sr) & + ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); + r.low = ((n.high >> (sr - n_uword_bits)) & + ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) | + (((n.high << (n_uword_bits - sr)) | + (n.low >> sr)) & + ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); + } + else + { + // K X + // --- + // K K + sr = __builtin_clz(d.high) - __builtin_clz(n.high); + // 0 <= sr <= n_uword_bits - 1 or sr large + if (sr > n_uword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + // 1 <= sr <= n_uword_bits + // q.all = n.all << (n_udword_bits - sr); + q.low = 0; + q.high = n.low << (n_uword_bits - sr); + // r.all = n.all >> sr; + // if (sr < n_uword_bits) + // { + // r.high = n.high >> sr; + // r.low = (n.high << (n_uword_bits - sr)) | (n.low >> sr); + // } + // else + // { + // r.high = 0; + // r.low = n.high; + // } + r.high = (n.high >> sr) & + ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); + r.low = (n.high << (n_uword_bits - sr)) | + ((n.low >> sr) & + ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); + } + } + // Not a special case + // q and r are initialized with: + // q.all = n.all << (n_udword_bits - sr); + // r.all = n.all >> sr; + // 1 <= sr <= n_udword_bits - 1 + su_int carry = 0; + for (; sr > 0; --sr) + { + // r:q = ((r:q) << 1) | carry + r.high = (r.high << 1) | (r.low >> (n_uword_bits - 1)); + r.low = (r.low << 1) | (q.high >> (n_uword_bits - 1)); + q.high = (q.high << 1) | (q.low >> (n_uword_bits - 1)); + q.low = (q.low << 1) | carry; + // carry = 0; + // if (r.all >= d.all) + // { + // r.all -= d.all; + // carry = 1; + // } + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} diff --git a/lib/udivmodti4.c b/lib/udivmodti4.c new file mode 100644 index 000000000..79cf6f2fc --- /dev/null +++ b/lib/udivmodti4.c @@ -0,0 +1,241 @@ +//===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivmodti4 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +// Effects: if rem != 0, *rem = a % b +// Returns: a / b + +// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide + +tu_int +__udivmodti4(tu_int a, tu_int b, tu_int* rem) +{ + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT; + utwords n; + n.all = a; + utwords d; + d.all = b; + utwords q; + utwords r; + unsigned sr; + // special cases, X is unknown, K != 0 + if (n.high == 0) + { + if (d.high == 0) + { + // 0 X + // --- + // 0 X + if (rem) + *rem = n.low % d.low; + return n.low / d.low; + } + // 0 X + // --- + // K X + if (rem) + *rem = n.low; + return 0; + } + // n.high != 0 + if (d.low == 0) + { + if (d.high == 0) + { + // K X + // --- + // 0 0 + if (rem) + *rem = n.high % d.low; + return n.high / d.low; + } + // d.high != 0 + if (n.low == 0) + { + // K 0 + // --- + // K 0 + if (rem) + { + r.high = n.high % d.high; + r.low = 0; + *rem = r.all; + } + return n.high / d.high; + } + // K K + // --- + // K 0 + if ((d.high & (d.high - 1)) == 0) // if d is a power of 2 + { + if (rem) + { + r.low = n.low; + r.high = n.high & (d.high - 1); + *rem = r.all; + } + return n.high >> __builtin_ctzll(d.high); + } + // K K + // --- + // K 0 + sr = __builtin_clzll(d.high) - __builtin_clzll(n.high); + // 0 <= sr <= n_udword_bits - 2 or sr large + if (sr > n_udword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + // 1 <= sr <= n_udword_bits - 1 + // q.all = n.all << (n_utword_bits - sr); + q.low = 0; + q.high = n.low << (n_udword_bits - sr); + // r.all = n.all >> sr; + r.high = n.high >> sr; + r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); + } + else // d.low != 0 + { + if (d.high == 0) + { + // K X + // --- + // 0 K + if ((d.low & (d.low - 1)) == 0) // if d is a power of 2 + { + if (rem) + *rem = n.low & (d.low - 1); + if (d.low == 1) + return n.all; + unsigned sr = __builtin_ctzll(d.low); + q.high = n.high >> sr; + q.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); + return q.all; + } + // K X + // --- + // 0 K + sr = 1 + n_udword_bits + __builtin_clzll(d.low) + - __builtin_clzll(n.high); + // 2 <= sr <= n_utword_bits - 1 + // q.all = n.all << (n_utword_bits - sr); + // r.all = n.all >> sr; + // if (sr == n_udword_bits) + // { + // q.low = 0; + // q.high = n.low; + // r.high = 0; + // r.low = n.high; + // } + // else if (sr < n_udword_bits) // 2 <= sr <= n_udword_bits - 1 + // { + // q.low = 0; + // q.high = n.low << (n_udword_bits - sr); + // r.high = n.high >> sr; + // r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); + // } + // else // n_udword_bits + 1 <= sr <= n_utword_bits - 1 + // { + // q.low = n.low << (n_utword_bits - sr); + // q.high = (n.high << (n_utword_bits - sr)) | + // (n.low >> (sr - n_udword_bits)); + // r.high = 0; + // r.low = n.high >> (sr - n_udword_bits); + // } + q.low = (n.low << (n_utword_bits - sr)) & + ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)); + q.high = ((n.low << ( n_udword_bits - sr)) & + ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) | + (((n.high << (n_utword_bits - sr)) | + (n.low >> (sr - n_udword_bits))) & + ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1))); + r.high = (n.high >> sr) & + ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); + r.low = ((n.high >> (sr - n_udword_bits)) & + ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) | + (((n.high << (n_udword_bits - sr)) | + (n.low >> sr)) & + ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); + } + else + { + // K X + // --- + // K K + sr = __builtin_clzll(d.high) - __builtin_clzll(n.high); + // 0 <= sr <= n_udword_bits - 1 or sr large + if (sr > n_udword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + // 1 <= sr <= n_udword_bits + // q.all = n.all << (n_utword_bits - sr); + q.low = 0; + q.high = n.low << (n_udword_bits - sr); + // r.all = n.all >> sr; + // if (sr < n_udword_bits) + // { + // r.high = n.high >> sr; + // r.low = (n.high << (n_udword_bits - sr)) | (n.low >> sr); + // } + // else + // { + // r.high = 0; + // r.low = n.high; + // } + r.high = (n.high >> sr) & + ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)); + r.low = (n.high << (n_udword_bits - sr)) | + ((n.low >> sr) & + ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1))); + } + } + // Not a special case + // q and r are initialized with: + // q.all = n.all << (n_utword_bits - sr); + // r.all = n.all >> sr; + // 1 <= sr <= n_utword_bits - 1 + su_int carry = 0; + for (; sr > 0; --sr) + { + // r:q = ((r:q) << 1) | carry + r.high = (r.high << 1) | (r.low >> (n_udword_bits - 1)); + r.low = (r.low << 1) | (q.high >> (n_udword_bits - 1)); + q.high = (q.high << 1) | (q.low >> (n_udword_bits - 1)); + q.low = (q.low << 1) | carry; + // carry = 0; + // if (r.all >= d.all) + // { + // r.all -= d.all; + // carry = 1; + // } + const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} + +#endif diff --git a/lib/udivsi3.c b/lib/udivsi3.c new file mode 100644 index 000000000..16af22858 --- /dev/null +++ b/lib/udivsi3.c @@ -0,0 +1,61 @@ +//===-- udivsi3.c - Implement __udivsi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a / b + +// Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide + +su_int +__udivsi3(su_int n, su_int d) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + su_int q; + su_int r; + unsigned sr; + // special cases + if (d == 0) + return 0; // ?! + if (n == 0) + return 0; + sr = __builtin_clz(d) - __builtin_clz(n); + // 0 <= sr <= n_uword_bits - 1 or sr large + if (sr > n_uword_bits - 1) // d > r + return 0; + if (sr == n_uword_bits - 1) // d == 1 + return n; + ++sr; + // 1 <= sr <= n_uword_bits - 1 + // Not a special case + q = n << (n_uword_bits - sr); + r = n >> sr; + su_int carry = 0; + for (; sr > 0; --sr) + { + // r:q = ((r:q) << 1) | carry + r = (r << 1) | (q >> (n_uword_bits - 1)); + q = (q << 1) | carry; + // carry = 0; + // if (r.all >= d.all) + // { + // r.all -= d.all; + // carry = 1; + // } + const si_int s = (si_int)(d - r - 1) >> (n_uword_bits - 1); + carry = s & 1; + r -= d & s; + } + q = (q << 1) | carry; + return q; +} diff --git a/lib/udivti3.c b/lib/udivti3.c new file mode 100644 index 000000000..785da055c --- /dev/null +++ b/lib/udivti3.c @@ -0,0 +1,28 @@ +//===-- udivti3.c - Implement __udivti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __udivti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); + +// Returns: a / b + +tu_int +__udivti3(tu_int a, tu_int b) +{ + return __udivmodti4(a, b, 0); +} + +#endif diff --git a/lib/umoddi3.c b/lib/umoddi3.c new file mode 100644 index 000000000..81994d5b9 --- /dev/null +++ b/lib/umoddi3.c @@ -0,0 +1,26 @@ +//===-- umoddi3.c - Implement __umoddi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __umoddi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +du_int __udivmoddi4(du_int a, du_int b, du_int* rem); + +// Returns: a % b + +du_int +__umoddi3(du_int a, du_int b) +{ + du_int r; + __udivmoddi4(a, b, &r); + return r; +} diff --git a/lib/umodsi3.c b/lib/umodsi3.c new file mode 100644 index 000000000..06ab39a5f --- /dev/null +++ b/lib/umodsi3.c @@ -0,0 +1,22 @@ +//===-- umodsi3.c - Implement __umodsi3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __umodsi3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#include "int_lib.h" + +// Returns: a % b + +su_int +__umodsi3(su_int a, su_int b) +{ + return a - (a / b) * b; +} diff --git a/lib/umodti3.c b/lib/umodti3.c new file mode 100644 index 000000000..57b09efa3 --- /dev/null +++ b/lib/umodti3.c @@ -0,0 +1,30 @@ +//===-- umodti3.c - Implement __umodti3 -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __umodti3 for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +#if __x86_64 + +#include "int_lib.h" + +tu_int __udivmodti4(tu_int a, tu_int b, tu_int* rem); + +// Returns: a % b + +tu_int +__umodti3(tu_int a, tu_int b) +{ + tu_int r; + __udivmodti4(a, b, &r); + return r; +} + +#endif diff --git a/lib/x86_64/Makefile.mk b/lib/x86_64/Makefile.mk new file mode 100644 index 000000000..f5f545e0e --- /dev/null +++ b/lib/x86_64/Makefile.mk @@ -0,0 +1,22 @@ +#===- lib/x86_64/Makefile.mk -------------------------------*- Makefile -*--===# +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +#===------------------------------------------------------------------------===# + +Dir := lib/x86_64 +SubDirs := +OnlyArchs := x86_64 + +AsmSources := $(foreach file,$(wildcard $(Dir)/*.s),$(notdir $(file))) +Sources := $(foreach file,$(wildcard $(Dir)/*.c),$(notdir $(file))) +ObjNames := $(Sources:%.c=%.o) $(AsmSources:%.s=%.o) +Target := Optimized + +# FIXME: use automatic dependencies? +Dependencies := $(wildcard $(Dir)/*.h) + +include make/subdir.mk diff --git a/lib/x86_64/floatdidf.c b/lib/x86_64/floatdidf.c new file mode 100644 index 000000000..ecef079b6 --- /dev/null +++ b/lib/x86_64/floatdidf.c @@ -0,0 +1,15 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// double __floatdidf(di_int a); + +#ifdef __x86_64__ + +#include <stdint.h> + +double __floatdidf(int64_t a) +{ + return (double)a; +} + +#endif // __x86_64__
\ No newline at end of file diff --git a/lib/x86_64/floatdisf.c b/lib/x86_64/floatdisf.c new file mode 100644 index 000000000..c22a453d4 --- /dev/null +++ b/lib/x86_64/floatdisf.c @@ -0,0 +1,13 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +#ifdef __x86_64__ + +#include <stdint.h> + +float __floatdisf(int64_t a) +{ + return (float)a; +} + +#endif // __x86_64__
\ No newline at end of file diff --git a/lib/x86_64/floatdixf.c b/lib/x86_64/floatdixf.c new file mode 100644 index 000000000..73b5da9fa --- /dev/null +++ b/lib/x86_64/floatdixf.c @@ -0,0 +1,15 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __floatdixf(di_int a); + +#ifdef __x86_64__ + +#include <stdint.h> + +long double __floatdixf(int64_t a) +{ + return (long double)a; +} + +#endif // __i386__ diff --git a/lib/x86_64/floatundidf.s b/lib/x86_64/floatundidf.s new file mode 100644 index 000000000..2eb8c7a76 --- /dev/null +++ b/lib/x86_64/floatundidf.s @@ -0,0 +1,40 @@ +//===-- floatundidf.s - Implement __floatundidf for x86_64 ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements __floatundidf for the compiler_rt library. +// +//===----------------------------------------------------------------------===// + +// double __floatundidf(du_int a); + +#ifdef __x86_64__ + +.const +.align 4 +twop52: .quad 0x4330000000000000 +twop84_plus_twop52: + .quad 0x4530000000100000 +twop84: .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.align 4 +.globl ___floatundidf +___floatundidf: + movd %edi, %xmm0 // low 32 bits of a + shrq $32, %rdi // high 32 bits of a + orq REL_ADDR(twop84), %rdi // 0x1p84 + a_hi (no rounding occurs) + orpd REL_ADDR(twop52), %xmm0 // 0x1p52 + a_lo (no rounding occurs) + movd %rdi, %xmm1 + subsd REL_ADDR(twop84_plus_twop52), %xmm1 // a_hi - 0x1p52 (no rounding occurs) + addsd %xmm1, %xmm0 // a_hi + a_lo (round happens here) + ret + +#endif // __x86_64__
\ No newline at end of file diff --git a/lib/x86_64/floatundisf.s b/lib/x86_64/floatundisf.s new file mode 100644 index 000000000..b40868740 --- /dev/null +++ b/lib/x86_64/floatundisf.s @@ -0,0 +1,30 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// float __floatundisf(du_int a); + +#ifdef __x86_64__ + +.literal4 +two: .single 2.0 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.align 4 +.globl ___floatundisf +___floatundisf: + movq $1, %rsi + testq %rdi, %rdi + js 1f + cvtsi2ssq %rdi, %xmm0 + ret + +1: andq %rdi, %rsi + shrq %rdi + orq %rsi, %rdi + cvtsi2ssq %rdi, %xmm0 + mulss REL_ADDR(two), %xmm0 + ret + +#endif // __x86_64__
\ No newline at end of file diff --git a/lib/x86_64/floatundixf.s b/lib/x86_64/floatundixf.s new file mode 100644 index 000000000..0764da49a --- /dev/null +++ b/lib/x86_64/floatundixf.s @@ -0,0 +1,60 @@ +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. + +// long double __floatundixf(du_int a); + +#ifdef __x86_64__ + +.const +.align 4 +twop64: .quad 0x43f0000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.align 4 +.globl ___floatundixf +___floatundixf: + movq %rdi, -8(%rsp) + fildq -8(%rsp) + test %rdi, %rdi + js 1f + ret +1: faddl REL_ADDR(twop64) + ret + +#endif // __x86_64__ + + +/* Branch-free implementation is ever so slightly slower, but more beautiful. + It is likely superior for inlining, so I kept it around for future reference. + +#ifdef __x86_64__ + +.const +.align 4 +twop52: .quad 0x4330000000000000 +twop84_plus_twop52_neg: + .quad 0xc530000000100000 +twop84: .quad 0x4530000000000000 + +#define REL_ADDR(_a) (_a)(%rip) + +.text +.align 4 +.globl ___floatundixf +___floatundixf: + movl %edi, %esi // low 32 bits of input + shrq $32, %rdi // hi 32 bits of input + orq REL_ADDR(twop84), %rdi // 2^84 + hi (as a double) + orq REL_ADDR(twop52), %rsi // 2^52 + lo (as a double) + movq %rdi, -8(%rsp) + movq %rsi, -16(%rsp) + fldl REL_ADDR(twop84_plus_twop52_neg) + faddl -8(%rsp) // hi - 2^52 (as double extended, no rounding occurs) + faddl -16(%rsp) // hi + lo (as double extended) + ret + +#endif // __x86_64__ + +*/
\ No newline at end of file |