summaryrefslogtreecommitdiff
path: root/libgcc/config/spu
diff options
context:
space:
mode:
Diffstat (limited to 'libgcc/config/spu')
-rw-r--r--libgcc/config/spu/divmodti4.c188
-rw-r--r--libgcc/config/spu/divv2df3.c195
-rw-r--r--libgcc/config/spu/float_disf.c31
-rw-r--r--libgcc/config/spu/float_unsdidf.c54
-rw-r--r--libgcc/config/spu/float_unsdisf.c31
-rw-r--r--libgcc/config/spu/float_unssidf.c45
-rw-r--r--libgcc/config/spu/mfc_multi_tag_release.c72
-rw-r--r--libgcc/config/spu/mfc_multi_tag_reserve.c84
-rw-r--r--libgcc/config/spu/mfc_tag_release.c59
-rw-r--r--libgcc/config/spu/mfc_tag_reserve.c51
-rw-r--r--libgcc/config/spu/mfc_tag_table.c39
-rw-r--r--libgcc/config/spu/multi3.c119
-rw-r--r--libgcc/config/spu/t-elf24
13 files changed, 992 insertions, 0 deletions
diff --git a/libgcc/config/spu/divmodti4.c b/libgcc/config/spu/divmodti4.c
new file mode 100644
index 00000000000..c63fb6b393c
--- /dev/null
+++ b/libgcc/config/spu/divmodti4.c
@@ -0,0 +1,188 @@
+/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+typedef int TItype __attribute__ ((mode (TI)));
+TItype __divti3 (TItype u, TItype v);
+TItype __modti3 (TItype u, TItype v);
+UTItype __udivti3 (UTItype u, UTItype v);
+UTItype __umodti3 (UTItype u, UTItype v);
+UTItype __udivmodti4 (UTItype u, UTItype v, UTItype *w);
+
+union qword_UTItype
+ {
+ qword q;
+ UTItype t;
+ };
+
+inline static qword
+si_from_UTItype (UTItype t)
+{
+ union qword_UTItype u;
+ u.t = t;
+ return u.q;
+}
+
+inline static UTItype
+si_to_UTItype (qword q)
+{
+ union qword_UTItype u;
+ u.q = q;
+ return u.t;
+}
+
+inline static unsigned int
+count_leading_zeros (UTItype x)
+{
+ qword c = si_clz (*(qword *) & x);
+ qword cmp0 = si_cgti (c, 31);
+ qword cmp1 = si_and (cmp0, si_shlqbyi (cmp0, 4));
+ qword cmp2 = si_and (cmp1, si_shlqbyi (cmp0, 8));
+ qword s = si_a (c, si_and (cmp0, si_shlqbyi (c, 4)));
+ s = si_a (s, si_and (cmp1, si_shlqbyi (c, 8)));
+ s = si_a (s, si_and (cmp2, si_shlqbyi (c, 12)));
+ return si_to_uint (s);
+}
+
+/* Based on implementation of udivmodsi4, which is essentially
+ * an optimized version of libgcc/udivmodsi4.c
+ clz %7,%2
+ clz %4,%1
+ il %5,1
+ fsmbi %0,0
+ sf %7,%4,%7
+ ori %3,%1,0
+ shl %5,%5,%7
+ shl %4,%2,%7
+1: or %8,%0,%5
+ rotmi %5,%5,-1
+ clgt %6,%4,%3
+ sf %7,%4,%3
+ rotmi %4,%4,-1
+ selb %0,%8,%0,%6
+ selb %3,%7,%3,%6
+3: brnz %5,1b
+ */
+
+UTItype
+__udivmodti4 (UTItype num, UTItype den, UTItype * rp)
+{
+ qword shift =
+ si_from_uint (count_leading_zeros (den) - count_leading_zeros (num));
+ qword n0 = si_from_UTItype (num);
+ qword d0 = si_from_UTItype (den);
+ qword bit = si_andi (si_fsmbi (1), 1);
+ qword r0 = si_il (0);
+ qword m1 = si_fsmbi (0x000f);
+ qword mask, r1, n1;
+
+ d0 = si_shlqbybi (si_shlqbi (d0, shift), shift);
+ bit = si_shlqbybi (si_shlqbi (bit, shift), shift);
+
+ do
+ {
+ r1 = si_or (r0, bit);
+
+ // n1 = n0 - d0 in TImode
+ n1 = si_bg (d0, n0);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_bgx (d0, n0, n1);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_bgx (d0, n0, n1);
+ n1 = si_shlqbyi (n1, 4);
+ n1 = si_sf (m1, n1);
+ n1 = si_sfx (d0, n0, n1);
+
+ mask = si_fsm (si_cgti (n1, -1));
+ r0 = si_selb (r0, r1, mask);
+ n0 = si_selb (n0, n1, mask);
+ bit = si_rotqmbii (bit, -1);
+ d0 = si_rotqmbii (d0, -1);
+ }
+ while (si_to_uint (si_orx (bit)));
+ if (rp)
+ *rp = si_to_UTItype (n0);
+ return si_to_UTItype (r0);
+}
+
+UTItype
+__udivti3 (UTItype n, UTItype d)
+{
+ return __udivmodti4 (n, d, (UTItype *)0);
+}
+
+UTItype
+__umodti3 (UTItype n, UTItype d)
+{
+ UTItype w;
+ __udivmodti4 (n, d, &w);
+ return w;
+}
+
+TItype
+__divti3 (TItype n, TItype d)
+{
+ int c = 0;
+ TItype w;
+
+ if (n < 0)
+ {
+ c = ~c;
+ n = -n;
+ }
+ if (d < 0)
+ {
+ c = ~c;
+ d = -d;
+ }
+
+ w = __udivmodti4 (n, d, (UTItype *)0);
+ if (c)
+ w = -w;
+ return w;
+}
+
+TItype
+__modti3 (TItype n, TItype d)
+{
+ int c = 0;
+ TItype w;
+
+ if (n < 0)
+ {
+ c = ~c;
+ n = -n;
+ }
+ if (d < 0)
+ {
+ c = ~c;
+ d = -d;
+ }
+
+ __udivmodti4 (n, d, (UTItype *) &w);
+ if (c)
+ w = -w;
+ return w;
+}
diff --git a/libgcc/config/spu/divv2df3.c b/libgcc/config/spu/divv2df3.c
new file mode 100644
index 00000000000..9d5e1a594e1
--- /dev/null
+++ b/libgcc/config/spu/divv2df3.c
@@ -0,0 +1,195 @@
+/* Copyright (C) 2009 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+
+vector double __divv2df3 (vector double a_in, vector double b_in);
+
+/* __divv2df3 divides the vector dividend a by the vector divisor b and
+ returns the resulting vector quotient. Maximum error about 0.5 ulp
+ over entire double range including denorms, compared to true result
+ in round-to-nearest rounding mode. Handles Inf or NaN operands and
+ results correctly. */
+
+vector double
+__divv2df3 (vector double a_in, vector double b_in)
+{
+ /* Variables */
+ vec_int4 exp, exp_bias;
+ vec_uint4 no_underflow, overflow;
+ vec_float4 mant_bf, inv_bf;
+ vec_ullong2 exp_a, exp_b;
+ vec_ullong2 a_nan, a_zero, a_inf, a_denorm, a_denorm0;
+ vec_ullong2 b_nan, b_zero, b_inf, b_denorm, b_denorm0;
+ vec_ullong2 nan;
+ vec_uint4 a_exp, b_exp;
+ vec_ullong2 a_mant_0, b_mant_0;
+ vec_ullong2 a_exp_1s, b_exp_1s;
+ vec_ullong2 sign_exp_mask;
+
+ vec_double2 a, b;
+ vec_double2 mant_a, mant_b, inv_b, q0, q1, q2, mult;
+
+ /* Constants */
+ vec_uint4 exp_mask_u32 = spu_splats((unsigned int)0x7FF00000);
+ vec_uchar16 splat_hi = (vec_uchar16){0,1,2,3, 0,1,2,3, 8, 9,10,11, 8,9,10,11};
+ vec_uchar16 swap_32 = (vec_uchar16){4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11};
+ vec_ullong2 exp_mask = spu_splats(0x7FF0000000000000ULL);
+ vec_ullong2 sign_mask = spu_splats(0x8000000000000000ULL);
+ vec_float4 onef = spu_splats(1.0f);
+ vec_double2 one = spu_splats(1.0);
+ vec_double2 exp_53 = (vec_double2)spu_splats(0x0350000000000000ULL);
+
+ sign_exp_mask = spu_or(sign_mask, exp_mask);
+
+ /* Extract the floating point components from each of the operands including
+ * exponent and mantissa.
+ */
+ a_exp = (vec_uint4)spu_and((vec_uint4)a_in, exp_mask_u32);
+ a_exp = spu_shuffle(a_exp, a_exp, splat_hi);
+ b_exp = (vec_uint4)spu_and((vec_uint4)b_in, exp_mask_u32);
+ b_exp = spu_shuffle(b_exp, b_exp, splat_hi);
+
+ a_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)a_in, sign_exp_mask), 0);
+ a_mant_0 = spu_and(a_mant_0, spu_shuffle(a_mant_0, a_mant_0, swap_32));
+
+ b_mant_0 = (vec_ullong2)spu_cmpeq((vec_uint4)spu_andc((vec_ullong2)b_in, sign_exp_mask), 0);
+ b_mant_0 = spu_and(b_mant_0, spu_shuffle(b_mant_0, b_mant_0, swap_32));
+
+ a_exp_1s = (vec_ullong2)spu_cmpeq(a_exp, exp_mask_u32);
+ b_exp_1s = (vec_ullong2)spu_cmpeq(b_exp, exp_mask_u32);
+
+ /* Identify all possible special values that must be accomodated including:
+ * +-denorm, +-0, +-infinity, and NaNs.
+ */
+ a_denorm0= (vec_ullong2)spu_cmpeq(a_exp, 0);
+ a_nan = spu_andc(a_exp_1s, a_mant_0);
+ a_zero = spu_and (a_denorm0, a_mant_0);
+ a_inf = spu_and (a_exp_1s, a_mant_0);
+ a_denorm = spu_andc(a_denorm0, a_zero);
+
+ b_denorm0= (vec_ullong2)spu_cmpeq(b_exp, 0);
+ b_nan = spu_andc(b_exp_1s, b_mant_0);
+ b_zero = spu_and (b_denorm0, b_mant_0);
+ b_inf = spu_and (b_exp_1s, b_mant_0);
+ b_denorm = spu_andc(b_denorm0, b_zero);
+
+ /* Scale denorm inputs to into normalized numbers by conditionally scaling the
+ * input parameters.
+ */
+ a = spu_sub(spu_or(a_in, exp_53), spu_sel(exp_53, a_in, sign_mask));
+ a = spu_sel(a_in, a, a_denorm);
+
+ b = spu_sub(spu_or(b_in, exp_53), spu_sel(exp_53, b_in, sign_mask));
+ b = spu_sel(b_in, b, b_denorm);
+
+ /* Extract the divisor and dividend exponent and force parameters into the signed
+ * range [1.0,2.0) or [-1.0,2.0).
+ */
+ exp_a = spu_and((vec_ullong2)a, exp_mask);
+ exp_b = spu_and((vec_ullong2)b, exp_mask);
+
+ mant_a = spu_sel(a, one, (vec_ullong2)exp_mask);
+ mant_b = spu_sel(b, one, (vec_ullong2)exp_mask);
+
+ /* Approximate the single reciprocal of b by using
+ * the single precision reciprocal estimate followed by one
+ * single precision iteration of Newton-Raphson.
+ */
+ mant_bf = spu_roundtf(mant_b);
+ inv_bf = spu_re(mant_bf);
+ inv_bf = spu_madd(spu_nmsub(mant_bf, inv_bf, onef), inv_bf, inv_bf);
+
+ /* Perform 2 more Newton-Raphson iterations in double precision. The
+ * result (q1) is in the range (0.5, 2.0).
+ */
+ inv_b = spu_extend(inv_bf);
+ inv_b = spu_madd(spu_nmsub(mant_b, inv_b, one), inv_b, inv_b);
+ q0 = spu_mul(mant_a, inv_b);
+ q1 = spu_madd(spu_nmsub(mant_b, q0, mant_a), inv_b, q0);
+
+ /* Determine the exponent correction factor that must be applied
+ * to q1 by taking into account the exponent of the normalized inputs
+ * and the scale factors that were applied to normalize them.
+ */
+ exp = spu_rlmaska(spu_sub((vec_int4)exp_a, (vec_int4)exp_b), -20);
+ exp = spu_add(exp, (vec_int4)spu_add(spu_and((vec_int4)a_denorm, -0x34), spu_and((vec_int4)b_denorm, 0x34)));
+
+ /* Bias the quotient exponent depending on the sign of the exponent correction
+ * factor so that a single multiplier will ensure the entire double precision
+ * domain (including denorms) can be achieved.
+ *
+ * exp bias q1 adjust exp
+ * ===== ======== ==========
+ * positive 2^+65 -65
+ * negative 2^-64 +64
+ */
+ exp_bias = spu_xor(spu_rlmaska(exp, -31), 64);
+ exp = spu_sub(exp, exp_bias);
+
+ q1 = spu_sel(q1, (vec_double2)spu_add((vec_int4)q1, spu_sl(exp_bias, 20)), exp_mask);
+
+ /* Compute a multiplier (mult) to applied to the quotient (q1) to produce the
+ * expected result. On overflow, clamp the multiplier to the maximum non-infinite
+ * number in case the rounding mode is not round-to-nearest.
+ */
+ exp = spu_add(exp, 0x3FF);
+ no_underflow = spu_cmpgt(exp, 0);
+ overflow = spu_cmpgt(exp, 0x7FE);
+ exp = spu_and(spu_sl(exp, 20), (vec_int4)no_underflow);
+ exp = spu_and(exp, (vec_int4)exp_mask);
+
+ mult = spu_sel((vec_double2)exp, (vec_double2)(spu_add((vec_uint4)exp_mask, -1)), (vec_ullong2)overflow);
+
+ /* Handle special value conditions. These include:
+ *
+ * 1) IF either operand is a NaN OR both operands are 0 or INFINITY THEN a NaN
+ * results.
+ * 2) ELSE IF the dividend is an INFINITY OR the divisor is 0 THEN a INFINITY results.
+ * 3) ELSE IF the dividend is 0 OR the divisor is INFINITY THEN a 0 results.
+ */
+ mult = spu_andc(mult, (vec_double2)spu_or(a_zero, b_inf));
+ mult = spu_sel(mult, (vec_double2)exp_mask, spu_or(a_inf, b_zero));
+
+ nan = spu_or(a_nan, b_nan);
+ nan = spu_or(nan, spu_and(a_zero, b_zero));
+ nan = spu_or(nan, spu_and(a_inf, b_inf));
+
+ mult = spu_or(mult, (vec_double2)nan);
+
+ /* Scale the final quotient */
+
+ q2 = spu_mul(q1, mult);
+
+ return (q2);
+}
+
+
+/* We use the same function for vector and scalar division. Provide the
+ scalar entry point as an alias. */
+double __divdf3 (double a, double b)
+ __attribute__ ((__alias__ ("__divv2df3")));
+
+/* Some toolchain builds used the __fast_divdf3 name for this helper function.
+ Provide this as another alternate entry point for compatibility. */
+double __fast_divdf3 (double a, double b)
+ __attribute__ ((__alias__ ("__divv2df3")));
+
diff --git a/libgcc/config/spu/float_disf.c b/libgcc/config/spu/float_disf.c
new file mode 100644
index 00000000000..0f4fe3d8e29
--- /dev/null
+++ b/libgcc/config/spu/float_disf.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Prototype. */
+float __floatdisf (long long x);
+
+float __floatdisf (long long x)
+{
+ /* The SPU back-end now generates inline code for this conversion.
+ This file is solely used to provide the __floatdisf functions
+ for objects generated with prior versions of GCC. */
+ return x;
+}
diff --git a/libgcc/config/spu/float_unsdidf.c b/libgcc/config/spu/float_unsdidf.c
new file mode 100644
index 00000000000..4fdf0b88a2b
--- /dev/null
+++ b/libgcc/config/spu/float_unsdidf.c
@@ -0,0 +1,54 @@
+/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+const unsigned char __didf_scale[16] __attribute__ ((__aligned__ (16))) = {
+ 0x00, 0x00, 0x04, 0x3e,
+ 0x00, 0x00, 0x04, 0x1e,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+const unsigned char __didf_pat[16] __attribute__ ((__aligned__ (16))) = {
+ 0x02, 0x03, 0x10, 0x11,
+ 0x12, 0x13, 0x80, 0x80,
+ 0x06, 0x07, 0x14, 0x15,
+ 0x16, 0x17, 0x80, 0x80
+};
+
+/* double __float_unsdidf (unsigned long long int)
+ Construct two exact doubles representing the high and low parts (in
+ parallel), then add them. */
+qword __float_unsdidf (qword DI);
+qword
+__float_unsdidf (qword DI)
+{
+ qword t0, t1, t2, t3, t4, t5, t6, t7, t8;
+ t0 = si_clz (DI);
+ t1 = si_shl (DI, t0);
+ t2 = si_ceqi (t0, 32);
+ t3 = si_sf (t0, *(const qword *) __didf_scale);
+ t4 = si_a (t1, t1);
+ t5 = si_andc (t3, t2);
+ t6 = si_shufb (t5, t4, *(const qword *) __didf_pat);
+ t7 = si_shlqbii (t6, 4);
+ t8 = si_shlqbyi (t7, 8);
+ return si_dfa (t7, t8);
+}
diff --git a/libgcc/config/spu/float_unsdisf.c b/libgcc/config/spu/float_unsdisf.c
new file mode 100644
index 00000000000..7af120ecc8c
--- /dev/null
+++ b/libgcc/config/spu/float_unsdisf.c
@@ -0,0 +1,31 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Prototype. */
+float __floatundisf (unsigned long long x);
+
+float __floatundisf (unsigned long long x)
+{
+ /* The SPU back-end now generates inline code for this conversion.
+ This file is solely used to provide the __floatundisf function
+ for objects generated with prior versions of GCC. */
+ return x;
+}
diff --git a/libgcc/config/spu/float_unssidf.c b/libgcc/config/spu/float_unssidf.c
new file mode 100644
index 00000000000..b255f81af55
--- /dev/null
+++ b/libgcc/config/spu/float_unssidf.c
@@ -0,0 +1,45 @@
+/* Copyright (C) 2006, 2008, 2009 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+const unsigned char __sidf_pat[16] __attribute__ ((__aligned__ (16))) = {
+ 0x02, 0x03, 0x10, 0x11,
+ 0x12, 0x13, 0x80, 0x80,
+ 0x06, 0x07, 0x14, 0x15,
+ 0x16, 0x17, 0x80, 0x80
+};
+
+/* double __float_unssidf (unsigned int SI) */
+qword __float_unssidf (qword SI);
+qword
+__float_unssidf (qword SI)
+{
+ qword t0, t1, t2, t3, t4, t5, t6, t7;
+ t0 = si_clz (SI);
+ t1 = si_il (1054);
+ t2 = si_shl (SI, t0);
+ t3 = si_ceqi (t0, 32);
+ t4 = si_sf (t0, t1);
+ t5 = si_a (t2, t2);
+ t6 = si_andc (t4, t3);
+ t7 = si_shufb (t6, t5, *(const qword *) __sidf_pat);
+ return si_shlqbii (t7, 4);
+}
diff --git a/libgcc/config/spu/mfc_multi_tag_release.c b/libgcc/config/spu/mfc_multi_tag_release.c
new file mode 100644
index 00000000000..62eb2beeb8f
--- /dev/null
+++ b/libgcc/config/spu/mfc_multi_tag_release.c
@@ -0,0 +1,72 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Release a sequential group of tags from exclusive use. The sequential
+ group of tags is the range starting from <first_tag> through
+ <first_tag>+<number_of_tags>-1. Upon sucessful release, MFC_DMA_TAG_VALID
+ is returned and the tags become available for future reservation.
+
+ If the specified tags were not previously reserved, no action is
+ taken and MFC_DMA_TAG_INVALID is returned. */
+
+unsigned int
+__mfc_multi_tag_release (unsigned int first_tag, unsigned int number_of_tags)
+{
+ vector unsigned int table_copy, tmp, tmp1;
+ vector unsigned int one = (vector unsigned int)
+ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+ vector unsigned int is_invalid;
+ unsigned int last_tag;
+ vector unsigned int has_been_reserved;
+
+ last_tag = first_tag + number_of_tags;
+
+ table_copy = spu_sl (one, number_of_tags);
+ table_copy = spu_rl (table_copy, -last_tag);
+ table_copy = spu_xor (table_copy, -1);
+
+ /* Make sure the tags are in range and valid. */
+ tmp = spu_cmpgt (spu_promote(last_tag, 0), 32);
+ tmp1 = spu_cmpgt (spu_promote(number_of_tags, 0), 32);
+ is_invalid = spu_cmpgt (spu_promote(first_tag, 0), 31);
+
+ /* All bits are set to 1 if invalid, 0 if valid. */
+ is_invalid = spu_or (tmp, is_invalid);
+ is_invalid = spu_or (tmp1, is_invalid);
+
+ /* check whether these tags have been reserved */
+ tmp = spu_rlmask (one, (int)-number_of_tags);
+ tmp1 = spu_sl (__mfc_tag_table, first_tag);
+ has_been_reserved = spu_cmpgt(tmp1, tmp);
+
+ is_invalid = spu_or (has_been_reserved, is_invalid);
+
+ table_copy = spu_sel (__mfc_tag_table, table_copy, table_copy);
+ __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_invalid);
+
+ return spu_extract (is_invalid, 0);
+}
+
diff --git a/libgcc/config/spu/mfc_multi_tag_reserve.c b/libgcc/config/spu/mfc_multi_tag_reserve.c
new file mode 100644
index 00000000000..06d70259276
--- /dev/null
+++ b/libgcc/config/spu/mfc_multi_tag_reserve.c
@@ -0,0 +1,84 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Reserve a sequential group of tags for exclusive use. The number of
+ tags to be reserved is specified by the <number_of_tags> parameter.
+ This routine returns the first tag ID for a sequential list of
+ available tags and marks them as reserved. The reserved group
+ of tags is in the range starting from the returned tag through
+ the returned tag + <number_of_tags>-1.
+
+ If the number of tags requested exceeds the number of available
+ sequential tags, then MFC_DMA_TAG_INVALID is returned indicating
+ that the request could not be serviced. */
+
+unsigned int
+__mfc_multi_tag_reserve (unsigned int number_of_tags)
+{
+ vector unsigned int table_copy;
+ vector unsigned int one = (vector unsigned int)
+ { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF };
+ vector unsigned int count_busy, is_valid;
+ vector unsigned int count_total;
+ vector unsigned int count_avail = (vector unsigned int) { 0, 0, 0, 0 };
+ vector unsigned int index = (vector unsigned int) { 0, 0, 0, 0 };
+
+ table_copy = __mfc_tag_table;
+
+
+ /* count_busy: number of consecutive busy tags
+ count_avail: number of consecutive free tags
+ table_copy: temporary copy of the tag table
+ count_total: sum of count_busy and count_avail
+ index: index of the current working tag */
+ do
+ {
+ table_copy = spu_sl (table_copy, count_avail);
+
+ count_busy = spu_cntlz (table_copy);
+ table_copy = spu_sl (table_copy, count_busy);
+ count_avail = spu_cntlz (spu_xor(table_copy, -1));
+ count_total = spu_add (count_busy, count_avail);
+ index = spu_add (index, count_total);
+ }
+ while (spu_extract (count_avail, 0) < number_of_tags
+ && spu_extract (table_copy, 0) != 0);
+
+ index = spu_sub (index, count_avail);
+
+ /* is_valid is set to 0xFFFFFFFF if table_copy == 0, 0 otherwise. */
+ is_valid = spu_cmpeq (table_copy, 0);
+ index = spu_sel (index, is_valid, is_valid);
+
+ /* Now I need to actually mark the tags as used. */
+ table_copy = spu_sl (one, number_of_tags);
+ table_copy = spu_rl (table_copy, -number_of_tags - spu_extract (index, 0));
+ table_copy = spu_sel (table_copy, __mfc_tag_table, table_copy);
+ __mfc_tag_table = spu_sel (table_copy, __mfc_tag_table, is_valid);
+
+ return spu_extract (index, 0);
+}
+
diff --git a/libgcc/config/spu/mfc_tag_release.c b/libgcc/config/spu/mfc_tag_release.c
new file mode 100644
index 00000000000..d59c5713053
--- /dev/null
+++ b/libgcc/config/spu/mfc_tag_release.c
@@ -0,0 +1,59 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Release the specified DMA tag from exclusive use. Once released, the
+ tag is available for future reservation. Upon sucessful release,
+ MFC_DMA_TAG_VALID is returned. If the specified tag is not in the
+ range 0 to 31, or had not been reserved, no action is taken and
+ MFC_DMA_TAG_INVALID is returned. */
+
+unsigned int
+__mfc_tag_release (unsigned int tag)
+{
+ vector unsigned int is_invalid;
+ vector unsigned int mask = (vector unsigned int)
+ { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+ vector signed int zero = (vector signed int) { 0, 0, 0, 0 };
+
+ vector signed int has_been_reserved;
+
+ /* Check if the tag is out of range. */
+ is_invalid = spu_cmpgt (spu_promote (tag, 0), 31);
+
+ /* Check whether the tag has been reserved, set to all 1 if has not
+ been reserved, 0 otherwise. */
+ has_been_reserved = (vector signed int) spu_rl (__mfc_tag_table, tag);
+ has_been_reserved = (vector signed int) spu_cmpgt (zero, has_been_reserved);
+
+ /* Set invalid. */
+ is_invalid = spu_or ((vector unsigned int) has_been_reserved, is_invalid);
+
+ mask = spu_rlmask (mask, (int)(-tag));
+ __mfc_tag_table = spu_or (__mfc_tag_table, mask);
+
+ return spu_extract(is_invalid, 0);
+}
+
diff --git a/libgcc/config/spu/mfc_tag_reserve.c b/libgcc/config/spu/mfc_tag_reserve.c
new file mode 100644
index 00000000000..23b4817c74f
--- /dev/null
+++ b/libgcc/config/spu/mfc_tag_reserve.c
@@ -0,0 +1,51 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include <spu_mfcio.h>
+extern vector unsigned int __mfc_tag_table;
+
+/* Reserves a DMA tag for exclusive use. This routine returns an available
+ tag id in the range 0 to 31 and marks the tag as reserved. If no tags
+ are available, MFC_DMA_TAG_INVALID is returned indicating that all tags
+ are already reserved. */
+
+unsigned int
+__mfc_tag_reserve (void)
+{
+ vector unsigned int mask = (vector unsigned int)
+ { 0x80000000, 0x80000000, 0x80000000, 0x80000000 };
+ vector unsigned int count_zeros, is_valid;
+ vector signed int count_neg;
+
+ count_zeros = spu_cntlz (__mfc_tag_table);
+ count_neg = spu_sub (0, (vector signed int) count_zeros);
+
+ mask = spu_rlmask (mask, (vector signed int) count_neg);
+ __mfc_tag_table = spu_andc (__mfc_tag_table, mask);
+
+ is_valid = spu_cmpeq (count_zeros, 32);
+ count_zeros = spu_sel (count_zeros, is_valid, is_valid);
+
+ return spu_extract (count_zeros, 0);
+}
+
diff --git a/libgcc/config/spu/mfc_tag_table.c b/libgcc/config/spu/mfc_tag_table.c
new file mode 100644
index 00000000000..bd08c580c18
--- /dev/null
+++ b/libgcc/config/spu/mfc_tag_table.c
@@ -0,0 +1,39 @@
+/* Copyright (C) 2007, 2009 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+/* The free tag table used by the MFC tag manager, with tag0
+ reserved for the overlay manager. */
+__vector unsigned int
+__mfc_tag_table = (__vector unsigned int) { 0x7FFFFFFF, -1, -1, -1 };
+
+/* Arrange to release tag0 if overlays are not present. */
+static void __mfc_tag_init (void) __attribute__ ((constructor));
+
+static void
+__mfc_tag_init (void)
+{
+ extern void _ovly_table __attribute__ ((weak));
+
+ if (&_ovly_table == 0)
+ __mfc_tag_table = (__vector unsigned int) { -1, -1, -1, -1 };
+}
diff --git a/libgcc/config/spu/multi3.c b/libgcc/config/spu/multi3.c
new file mode 100644
index 00000000000..b8b0e90ee25
--- /dev/null
+++ b/libgcc/config/spu/multi3.c
@@ -0,0 +1,119 @@
+/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
+
+ This file is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your option)
+ any later version.
+
+ This file is distributed in the hope that it will be useful, but WITHOUT
+ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <spu_intrinsics.h>
+
+typedef int TItype __attribute__ ((mode (TI)));
+
+union qword_TItype
+ {
+ qword q;
+ TItype t;
+ };
+
+inline static qword
+si_from_TItype (TItype t)
+{
+ union qword_TItype u;
+ u.t = t;
+ return u.q;
+}
+
+inline static TItype
+si_to_TItype (qword q)
+{
+ union qword_TItype u;
+ u.q = q;
+ return u.t;
+}
+
+/* A straight forward vectorization and unrolling of
+ * short l[8], r[8];
+ * TItype total = 0;
+ * for (i = 0; i < 8; i++)
+ * for (j = 0; j < 8; j++)
+ * total += (TItype)((l[7-i] * r[7-j]) << (16 * (i + j)));
+ */
+TItype
+__multi3 (TItype l, TItype r)
+{
+ qword u = si_from_TItype (l);
+ qword v = si_from_TItype (r);
+ qword splat0 = si_shufb (v, v, si_ilh (0x0001));
+ qword splat1 = si_shufb (v, v, si_ilh (0x0203));
+ qword splat2 = si_shufb (v, v, si_ilh (0x0405));
+ qword splat3 = si_shufb (v, v, si_ilh (0x0607));
+ qword splat4 = si_shufb (v, v, si_ilh (0x0809));
+ qword splat5 = si_shufb (v, v, si_ilh (0x0a0b));
+ qword splat6 = si_shufb (v, v, si_ilh (0x0c0d));
+ qword splat7 = si_shufb (v, v, si_ilh (0x0e0f));
+
+ qword part0l = si_shlqbyi (si_mpyu (u, splat0), 14);
+ qword part1h = si_shlqbyi (si_mpyhhu (u, splat1), 14);
+ qword part1l = si_shlqbyi (si_mpyu (u, splat1), 12);
+ qword part2h = si_shlqbyi (si_mpyhhu (u, splat2), 12);
+ qword part2l = si_shlqbyi (si_mpyu (u, splat2), 10);
+ qword part3h = si_shlqbyi (si_mpyhhu (u, splat3), 10);
+ qword part3l = si_shlqbyi (si_mpyu (u, splat3), 8);
+ qword part4h = si_shlqbyi (si_mpyhhu (u, splat4), 8);
+ qword part4l = si_shlqbyi (si_mpyu (u, splat4), 6);
+ qword part5h = si_shlqbyi (si_mpyhhu (u, splat5), 6);
+ qword part5l = si_shlqbyi (si_mpyu (u, splat5), 4);
+ qword part6h = si_shlqbyi (si_mpyhhu (u, splat6), 4);
+ qword part6l = si_shlqbyi (si_mpyu (u, splat6), 2);
+ qword part7h = si_shlqbyi (si_mpyhhu (u, splat7), 2);
+ qword part7l = si_mpyu (u, splat7);
+
+ qword carry, total0, total1, total2, total3, total4;
+ qword total5, total6, total7, total8, total9, total10;
+ qword total;
+
+ total0 = si_a (si_a (si_a (part0l, part1h), si_a (part1l, part2h)), part7l);
+ total1 = si_a (part2l, part3h);
+ total2 = si_a (part3l, part4h);
+ total3 = si_a (part4l, part5h);
+ total4 = si_a (part5l, part6h);
+ total5 = si_a (part6l, part7h);
+ total6 = si_a (total0, total1);
+ total7 = si_a (total2, total3);
+ total8 = si_a (total4, total5);
+ total9 = si_a (total6, total7);
+ total10 = si_a (total8, total9);
+
+ carry = si_cg (part2l, part3h);
+ carry = si_a (carry, si_cg (part3l, part4h));
+ carry = si_a (carry, si_cg (part4l, part5h));
+ carry = si_a (carry, si_cg (part5l, part6h));
+ carry = si_a (carry, si_cg (part6l, part7h));
+ carry = si_a (carry, si_cg (total0, total1));
+ carry = si_a (carry, si_cg (total2, total3));
+ carry = si_a (carry, si_cg (total4, total5));
+ carry = si_a (carry, si_cg (total6, total7));
+ carry = si_a (carry, si_cg (total8, total9));
+ carry = si_shlqbyi (carry, 4);
+
+ total = si_cg (total10, carry);
+ total = si_shlqbyi (total, 4);
+ total = si_cgx (total10, carry, total);
+ total = si_shlqbyi (total, 4);
+ total = si_addx (total10, carry, total);
+ return si_to_TItype (total);
+}
diff --git a/libgcc/config/spu/t-elf b/libgcc/config/spu/t-elf
index 130d5610297..83616c1ca7d 100644
--- a/libgcc/config/spu/t-elf
+++ b/libgcc/config/spu/t-elf
@@ -2,6 +2,30 @@
# FIXME: This is the default.
CRTSTUFF_T_CFLAGS =
+# We exclude those because the libgcc2.c default versions do not support
+# the SPU single-precision format (round towards zero). We provide our
+# own versions below and/or via direct expansion.
+LIB2ADD = _floatdisf _floatundisf _floattisf _floatunstisf
+
+LIB2ADD_ST = $(srcdir)/config/spu/float_unssidf.c \
+ $(srcdir)/config/spu/float_unsdidf.c \
+ $(srcdir)/config/spu/float_unsdisf.c \
+ $(srcdir)/config/spu/float_disf.c \
+ $(srcdir)/config/spu/mfc_tag_table.c \
+ $(srcdir)/config/spu/mfc_tag_reserve.c \
+ $(srcdir)/config/spu/mfc_tag_release.c \
+ $(srcdir)/config/spu/mfc_multi_tag_reserve.c \
+ $(srcdir)/config/spu/mfc_multi_tag_release.c \
+ $(srcdir)/config/spu/multi3.c \
+ $(srcdir)/config/spu/divmodti4.c \
+ $(srcdir)/config/spu/divv2df3.c
+
+# Build TImode conversion routines to support Fortran 128-bit
+# integer data types.
+LIB2_SIDITI_CONV_FUNCS = yes
+
+HOST_LIBGCC2_CFLAGS += -mwarn-reloc -D__IN_LIBGCC2
+
# Neither gcc or newlib seem to have a standard way to generate multiple
# crt*.o files. So we don't use the standard crt0.o name anymore.