diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-06 15:16:32 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2011-11-06 15:16:32 +0000 |
commit | 31ba6c3ff2311bad9422246f49d59c532cbb5078 (patch) | |
tree | 6e862e3ea14b2edf93a92c404a0d9b29f3f9ba65 /gcc/config/sparc | |
parent | bab85b65e545231656361b997a81fb8a44b266b4 (diff) | |
download | gcc-31ba6c3ff2311bad9422246f49d59c532cbb5078.tar.gz |
2011-11-06 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 181026 using svnmerge
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@181034 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/sparc')
-rw-r--r-- | gcc/config/sparc/lb1spc.asm | 784 | ||||
-rw-r--r-- | gcc/config/sparc/lb1spl.asm | 246 | ||||
-rw-r--r-- | gcc/config/sparc/libgcc-sparc-glibc.ver | 93 | ||||
-rw-r--r-- | gcc/config/sparc/sparc-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.c | 418 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.h | 1 | ||||
-rw-r--r-- | gcc/config/sparc/sparc.md | 145 | ||||
-rw-r--r-- | gcc/config/sparc/t-elf | 6 | ||||
-rw-r--r-- | gcc/config/sparc/t-leon | 6 | ||||
-rw-r--r-- | gcc/config/sparc/t-leon3 | 3 | ||||
-rw-r--r-- | gcc/config/sparc/t-linux | 5 | ||||
-rw-r--r-- | gcc/config/sparc/t-linux64 | 9 | ||||
-rw-r--r-- | gcc/config/sparc/t-netbsd64 | 5 |
13 files changed, 503 insertions, 1219 deletions
diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm deleted file mode 100644 index b60bd5740e7..00000000000 --- a/gcc/config/sparc/lb1spc.asm +++ /dev/null @@ -1,784 +0,0 @@ -/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 - for the sparc processor. - - These routines are derived from the SPARC Architecture Manual, version 8, - slightly edited to match the desired calling convention, and also to - optimize them for our purposes. */ - -#ifdef L_mulsi3 -.text - .align 4 - .global .umul - .proc 4 -.umul: - or %o0, %o1, %o4 ! logical or of multiplier and multiplicand - mov %o0, %y ! multiplier to Y register - andncc %o4, 0xfff, %o5 ! mask out lower 12 bits - be mul_shortway ! can do it the short way - andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc - ! - ! long multiply - ! - mulscc %o4, %o1, %o4 ! first iteration of 33 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 ! 32nd iteration - mulscc %o4, %g0, %o4 ! last iteration only shifts - ! the upper 32 bits of product are wrong, but we do not care - retl - rd %y, %o0 - ! - ! short multiply - ! -mul_shortway: - mulscc %o4, %o1, %o4 ! first iteration of 13 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 - mulscc %o4, %o1, %o4 ! 12th iteration - mulscc %o4, %g0, %o4 ! last iteration only shifts - rd %y, %o5 - sll %o4, 12, %o4 ! left shift partial product by 12 bits - srl %o5, 20, %o5 ! right shift partial product by 20 bits - retl - or %o5, %o4, %o0 ! merge for true product -#endif - -#ifdef L_divsi3 -/* - * Division and remainder, from Appendix E of the SPARC Version 8 - * Architecture Manual, with fixes from Gordon Irlam. - */ - -/* - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * m4 parameters: - * .div name of function to generate - * div div=div => %o0 / %o1; div=rem => %o0 % %o1 - * true true=true => signed; true=false => unsigned - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ - .global .udiv - .align 4 - .proc 4 - .text -.udiv: - b ready_to_divide - mov 0, %g3 ! result is always positive - - .global .div - .align 4 - .proc 4 - .text -.div: - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge ready_to_divide ! no, go do the divide - xor %o1, %o0, %g3 ! compute sign in any case - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge ready_to_divide ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative - - -ready_to_divide: - - ! Ready to divide. Compute size of quotient; scale comparand. - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta 0x2 ! ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu got_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu not_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g2 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g2. - 2: addcc %o5, %o5, %o5 - bcc not_too_big - add %g2, 1, %g2 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b do_single_div - sub %g2, 1, %g2 - - not_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be do_single_div - nop - /* NB: these are commented out in the V8-SPARC manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g2 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - do_single_div: - subcc %g2, 1, %g2 - bl end_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b end_single_divloop - nop - single_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - end_single_divloop: - subcc %g2, 1, %g2 - bge single_divloop - tst %o3 - b,a end_regular_divide - -not_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be got_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -divloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 - -L4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - - -L3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -end_regular_divide: - subcc %o4, 1, %o4 - bge divloop - tst %o3 - bl,a got_result - ! non-restoring fixup here (one instruction only!) - sub %o2, 1, %o2 - - -got_result: - ! check to see if answer should be < 0 - tst %g3 - bl,a 1f - sub %g0, %o2, %o2 -1: - retl - mov %o2, %o0 -#endif - -#ifdef L_modsi3 -/* This implementation was taken from glibc: - * - * Input: dividend and divisor in %o0 and %o1 respectively. - * - * Algorithm parameters: - * N how many bits per iteration we try to get (4) - * WORDSIZE total number of bits (32) - * - * Derived constants: - * TOPBITS number of bits in the top decade of a number - * - * Important variables: - * Q the partial quotient under development (initially 0) - * R the remainder so far, initially the dividend - * ITER number of main division loop iterations required; - * equal to ceil(log2(quotient) / N). Note that this - * is the log base (2^N) of the quotient. - * V the current comparand, initially divisor*2^(ITER*N-1) - * - * Cost: - * Current estimate for non-large dividend is - * ceil(log2(quotient) / N) * (10 + 7N/2) + C - * A large dividend is one greater than 2^(31-TOPBITS) and takes a - * different path, as the upper bits of the quotient must be developed - * one bit at a time. - */ -.text - .align 4 - .global .urem - .proc 4 -.urem: - b divide - mov 0, %g3 ! result always positive - - .align 4 - .global .rem - .proc 4 -.rem: - ! compute sign of result; if neither is negative, no problem - orcc %o1, %o0, %g0 ! either negative? - bge 2f ! no, go do the divide - mov %o0, %g3 ! sign of remainder matches %o0 - tst %o1 - bge 1f - tst %o0 - ! %o1 is definitely negative; %o0 might also be negative - bge 2f ! if %o0 not negative... - sub %g0, %o1, %o1 ! in any case, make %o1 nonneg -1: ! %o0 is negative, %o1 is nonnegative - sub %g0, %o0, %o0 ! make %o0 nonnegative -2: - - ! Ready to divide. Compute size of quotient; scale comparand. -divide: - orcc %o1, %g0, %o5 - bne 1f - mov %o0, %o3 - - ! Divide by zero trap. If it returns, return 0 (about as - ! wrong as possible, but that is what SunOS does...). - ta 0x2 !ST_DIV0 - retl - clr %o0 - -1: - cmp %o3, %o5 ! if %o1 exceeds %o0, done - blu got_result ! (and algorithm fails otherwise) - clr %o2 - sethi %hi(1 << (32 - 4 - 1)), %g1 - cmp %o3, %g1 - blu not_really_big - clr %o4 - - ! Here the dividend is >= 2**(31-N) or so. We must be careful here, - ! as our usual N-at-a-shot divide step will cause overflow and havoc. - ! The number of bits in the result here is N*ITER+SC, where SC <= N. - ! Compute ITER in an unorthodox manner: know we need to shift V into - ! the top decade: so do not even bother to compare to R. - 1: - cmp %o5, %g1 - bgeu 3f - mov 1, %g2 - sll %o5, 4, %o5 - b 1b - add %o4, 1, %o4 - - ! Now compute %g2. - 2: addcc %o5, %o5, %o5 - bcc not_too_big - add %g2, 1, %g2 - - ! We get here if the %o1 overflowed while shifting. - ! This means that %o3 has the high-order bit set. - ! Restore %o5 and subtract from %o3. - sll %g1, 4, %g1 ! high order bit - srl %o5, 1, %o5 ! rest of %o5 - add %o5, %g1, %o5 - b do_single_div - sub %g2, 1, %g2 - - not_too_big: - 3: cmp %o5, %o3 - blu 2b - nop - be do_single_div - nop - /* NB: these are commented out in the V8-SPARC manual as well */ - /* (I do not understand this) */ - ! %o5 > %o3: went too far: back up 1 step - ! srl %o5, 1, %o5 - ! dec %g2 - ! do single-bit divide steps - ! - ! We have to be careful here. We know that %o3 >= %o5, so we can do the - ! first divide step without thinking. BUT, the others are conditional, - ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high- - ! order bit set in the first step, just falling into the regular - ! division loop will mess up the first time around. - ! So we unroll slightly... - do_single_div: - subcc %g2, 1, %g2 - bl end_regular_divide - nop - sub %o3, %o5, %o3 - mov 1, %o2 - b end_single_divloop - nop - single_divloop: - sll %o2, 1, %o2 - bl 1f - srl %o5, 1, %o5 - ! %o3 >= 0 - sub %o3, %o5, %o3 - b 2f - add %o2, 1, %o2 - 1: ! %o3 < 0 - add %o3, %o5, %o3 - sub %o2, 1, %o2 - 2: - end_single_divloop: - subcc %g2, 1, %g2 - bge single_divloop - tst %o3 - b,a end_regular_divide - -not_really_big: -1: - sll %o5, 4, %o5 - cmp %o5, %o3 - bleu 1b - addcc %o4, 1, %o4 - be got_result - sub %o4, 1, %o4 - - tst %o3 ! set up for initial iteration -divloop: - sll %o2, 4, %o2 - ! depth 1, accumulated bits 0 - bl L1.16 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 2, accumulated bits 1 - bl L2.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits 3 - bl L3.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 7 - bl L4.23 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (7*2+1), %o2 -L4.23: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (7*2-1), %o2 - -L3.19: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 5 - bl L4.21 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (5*2+1), %o2 - -L4.21: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (5*2-1), %o2 - -L2.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits 1 - bl L3.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits 3 - bl L4.19 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (3*2+1), %o2 - -L4.19: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (3*2-1), %o2 - -L3.17: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits 1 - bl L4.17 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (1*2+1), %o2 - -L4.17: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (1*2-1), %o2 - -L1.16: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 2, accumulated bits -1 - bl L2.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 3, accumulated bits -1 - bl L3.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -1 - bl L4.15 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2+1), %o2 - -L4.15: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-1*2-1), %o2 - -L3.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -3 - bl L4.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2+1), %o2 - -L4.13: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-3*2-1), %o2 - -L2.15: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 3, accumulated bits -3 - bl L3.13 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - ! depth 4, accumulated bits -5 - bl L4.11 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2+1), %o2 - -L4.11: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-5*2-1), %o2 - -L3.13: - ! remainder is negative - addcc %o3,%o5,%o3 - ! depth 4, accumulated bits -7 - bl L4.9 - srl %o5,1,%o5 - ! remainder is positive - subcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2+1), %o2 - -L4.9: - ! remainder is negative - addcc %o3,%o5,%o3 - b 9f - add %o2, (-7*2-1), %o2 - - 9: -end_regular_divide: - subcc %o4, 1, %o4 - bge divloop - tst %o3 - bl,a got_result - ! non-restoring fixup here (one instruction only!) - add %o3, %o1, %o3 - -got_result: - ! check to see if answer should be < 0 - tst %g3 - bl,a 1f - sub %g0, %o3, %o3 -1: - retl - mov %o3, %o0 - -#endif - diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm deleted file mode 100644 index 973401f8018..00000000000 --- a/gcc/config/sparc/lb1spl.asm +++ /dev/null @@ -1,246 +0,0 @@ -/* This is an assembly language implementation of mulsi3, divsi3, and modsi3 - for the sparclite processor. - - These routines are all from the SPARClite User's Guide, slightly edited - to match the desired calling convention, and also to optimize them. */ - -#ifdef L_udivsi3 -.text - .align 4 - .global .udiv - .proc 04 -.udiv: - wr %g0,%g0,%y ! Not a delayed write for sparclite - tst %g0 - divscc %o0,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - retl - divscc %g1,%o1,%o0 -#endif - -#ifdef L_umodsi3 -.text - .align 4 - .global .urem - .proc 04 -.urem: - wr %g0,%g0,%y ! Not a delayed write for sparclite - tst %g0 - divscc %o0,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - divscc %g1,%o1,%g1 - bl 1f - rd %y,%o0 - retl - nop -1: retl - add %o0,%o1,%o0 -#endif - -#ifdef L_divsi3 -.text - .align 4 - .global .div - .proc 04 -! ??? This routine could be made faster if was optimized, and if it was -! rewritten to only calculate the quotient. -.div: - wr %g0,%g0,%y ! Not a delayed write for sparclite - mov %o1,%o4 - tst %o1 - bl,a 1f - sub %g0,%o4,%o4 -1: tst %o0 - bl,a 2f - mov -1,%y -2: divscc %o0,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - be 6f - mov %y,%o3 - bg 4f - addcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bl 5f - tst %g1 - ba 5f - add %o3,%o4,%o3 -4: subcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bge 5f - tst %g1 - sub %o3,%o4,%o3 -5: bl,a 6f - add %g1,1,%g1 -6: tst %o1 - bl,a 7f - sub %g0,%g1,%g1 -7: retl - mov %g1,%o0 ! Quotient is in %g1. -#endif - -#ifdef L_modsi3 -.text - .align 4 - .global .rem - .proc 04 -! ??? This routine could be made faster if was optimized, and if it was -! rewritten to only calculate the remainder. -.rem: - wr %g0,%g0,%y ! Not a delayed write for sparclite - mov %o1,%o4 - tst %o1 - bl,a 1f - sub %g0,%o4,%o4 -1: tst %o0 - bl,a 2f - mov -1,%y -2: divscc %o0,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - divscc %g1,%o4,%g1 - be 6f - mov %y,%o3 - bg 4f - addcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bl 5f - tst %g1 - ba 5f - add %o3,%o4,%o3 -4: subcc %o3,%o4,%g0 - be,a 6f - mov %g0,%o3 - tst %o0 - bge 5f - tst %g1 - sub %o3,%o4,%o3 -5: bl,a 6f - add %g1,1,%g1 -6: tst %o1 - bl,a 7f - sub %g0,%g1,%g1 -7: retl - mov %o3,%o0 ! Remainder is in %o3. -#endif diff --git a/gcc/config/sparc/libgcc-sparc-glibc.ver b/gcc/config/sparc/libgcc-sparc-glibc.ver deleted file mode 100644 index 91138d3795e..00000000000 --- a/gcc/config/sparc/libgcc-sparc-glibc.ver +++ /dev/null @@ -1,93 +0,0 @@ -# Copyright (C) 2002, 2006, 2008 Free Software Foundation, Inc. -# -# This file is part of GCC. -# -# GCC is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# GCC is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with GCC; see the file COPYING3. If not see -# <http://www.gnu.org/licenses/>. - -# In order to work around the very problems that force us to now generally -# create a libgcc.so, glibc reexported a number of routines from libgcc.a. -# By now choosing the same version tags for these specific routines, we -# maintain enough binary compatibility to allow future versions of glibc -# to defer implementation of these routines to libgcc.so via DT_AUXILIARY. - -%exclude { - __divdi3 - __moddi3 - __udivdi3 - __umoddi3 - __register_frame - __register_frame_table - __deregister_frame - __register_frame_info - __deregister_frame_info - __frame_state_for - __register_frame_info_table -} - -%ifdef __arch64__ -%define GLIBC_VER GLIBC_2.2 -%else -%define GLIBC_VER GLIBC_2.0 -%endif -%inherit GCC_3.0 GLIBC_VER -GLIBC_VER { - # Sampling of DImode arithmetic used by (at least) i386 and m68k. - __divdi3 - __moddi3 - __udivdi3 - __umoddi3 - - # Exception handling support functions used by most everyone. - __register_frame - __register_frame_table - __deregister_frame - __register_frame_info - __deregister_frame_info - __frame_state_for - __register_frame_info_table -} - -%if !defined (__arch64__) && defined (__LONG_DOUBLE_128__) - -# long double 128 bit support from 32-bit libgcc_s.so.1 is only available -# when configured with --with-long-double-128. Make sure all the -# symbols are available at @@GCC_LDBL_* versions to make it clear -# there is a configurable symbol set. - -%exclude { - __fixtfdi - __fixunstfdi - __floatditf - - __divtc3 - __multc3 - __powitf2 -} - -%inherit GCC_LDBL_3.0 GCC_3.0 -GCC_LDBL_3.0 { - __fixtfdi - __fixunstfdi - __floatditf -} - -%inherit GCC_LDBL_4.0.0 GCC_4.0.0 -GCC_LDBL_4.0.0 { - __divtc3 - __multc3 - __powitf2 -} - -%endif diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h index 108e105cbea..b9a094e160a 100644 --- a/gcc/config/sparc/sparc-protos.h +++ b/gcc/config/sparc/sparc-protos.h @@ -108,6 +108,7 @@ extern const char *output_v8plus_mult (rtx, rtx *, const char *); extern void sparc_expand_vector_init (rtx, rtx); extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx); extern bool sparc_expand_conditional_move (enum machine_mode, rtx *); +extern void sparc_expand_vcond (enum machine_mode, rtx *, int, int); #endif /* RTX_CODE */ #endif /* __SPARC_PROTOS_H__ */ diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c index 3883dbd21d8..5d22fc0313e 100644 --- a/gcc/config/sparc/sparc.c +++ b/gcc/config/sparc/sparc.c @@ -3440,7 +3440,7 @@ sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict) REG+REG address, then only one of them gets converted to an offsettable address. */ if (mode == TFmode - && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD)) + && ! (TARGET_ARCH64 && TARGET_HARD_QUAD)) return 0; /* We prohibit REG + REG on ARCH32 if not optimizing for @@ -11279,20 +11279,366 @@ output_v8plus_mult (rtx insn, rtx *operands, const char *name) } } +static void +vector_init_bshuffle (rtx target, rtx *locs, int n_elts, enum machine_mode mode, + enum machine_mode inner_mode) +{ + rtx mid_target, r0_high, r0_low, r1_high, r1_low; + enum machine_mode partial_mode; + int bmask, i, idxs[8]; + + partial_mode = (mode == V4HImode + ? V2HImode + : (mode == V8QImode + ? V4QImode : mode)); + + r0_high = r0_low = NULL_RTX; + r1_high = r1_low = NULL_RTX; + + /* Move the pieces into place, as needed, and calculate the nibble + indexes for the bmask calculation. After we execute this loop the + locs[] array is no longer needed. Therefore, to simplify things, + we set entries that have been processed already to NULL_RTX. */ + + for (i = 0; i < n_elts; i++) + { + int j; + + if (locs[i] == NULL_RTX) + continue; + + if (!r0_low) + { + r0_low = locs[i]; + idxs[i] = 0x7; + } + else if (!r1_low) + { + r1_low = locs[i]; + idxs[i] = 0xf; + } + else if (!r0_high) + { + r0_high = gen_highpart (partial_mode, r0_low); + emit_move_insn (r0_high, gen_lowpart (partial_mode, locs[i])); + idxs[i] = 0x3; + } + else if (!r1_high) + { + r1_high = gen_highpart (partial_mode, r1_low); + emit_move_insn (r1_high, gen_lowpart (partial_mode, locs[i])); + idxs[i] = 0xb; + } + else + gcc_unreachable (); + + for (j = i + 1; j < n_elts; j++) + { + if (locs[j] == locs[i]) + { + locs[j] = NULL_RTX; + idxs[j] = idxs[i]; + } + } + locs[i] = NULL_RTX; + } + + bmask = 0; + for (i = 0; i < n_elts; i++) + { + int v = idxs[i]; + + switch (GET_MODE_SIZE (inner_mode)) + { + case 2: + bmask <<= 8; + bmask |= (((v - 1) << 4) | v); + break; + + case 1: + bmask <<= 4; + bmask |= v; + break; + + default: + gcc_unreachable (); + } + } + + emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode), + force_reg (SImode, GEN_INT (bmask)))); + + mid_target = target; + if (GET_MODE_SIZE (mode) == 4) + { + mid_target = gen_reg_rtx (mode == V2HImode + ? V4HImode : V8QImode); + } + + if (!r1_low) + r1_low = r0_low; + + switch (GET_MODE (mid_target)) + { + case V4HImode: + emit_insn (gen_bshufflev4hi_vis (mid_target, r0_low, r1_low)); + break; + case V8QImode: + emit_insn (gen_bshufflev8qi_vis (mid_target, r0_low, r1_low)); + break; + default: + gcc_unreachable (); + } + + if (mid_target != target) + emit_move_insn (target, gen_lowpart (partial_mode, mid_target)); +} + +static bool +vector_init_move_words (rtx target, rtx vals, enum machine_mode mode, + enum machine_mode inner_mode) +{ + switch (mode) + { + case V1SImode: + case V1DImode: + emit_move_insn (gen_lowpart (inner_mode, target), + gen_lowpart (inner_mode, XVECEXP (vals, 0, 0))); + return true; + + case V2SImode: + emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0)); + emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1)); + return true; + + default: + break; + } + return false; +} + +/* Move the elements in rtvec VALS into registers compatible with MODE. + Store the rtx for these regs into the corresponding array entry of + LOCS. */ +static void +vector_init_prepare_elts (rtx vals, int n_elts, rtx *locs, enum machine_mode mode, + enum machine_mode inner_mode) +{ + enum machine_mode loc_mode; + int i; + + switch (mode) + { + case V2HImode: + loc_mode = V4HImode; + break; + + case V4QImode: + loc_mode = V8QImode; + break; + + case V4HImode: + case V8QImode: + loc_mode = mode; + break; + + default: + gcc_unreachable (); + } + + gcc_assert (GET_MODE_SIZE (inner_mode) <= 4); + for (i = 0; i < n_elts; i++) + { + rtx dst, elt = XVECEXP (vals, 0, i); + int j; + + /* Did we see this already? If so just record it's location. */ + dst = NULL_RTX; + for (j = 0; j < i; j++) + { + if (XVECEXP (vals, 0, j) == elt) + { + dst = locs[j]; + break; + } + } + + if (! dst) + { + enum rtx_code code = GET_CODE (elt); + + dst = gen_reg_rtx (loc_mode); + + /* We use different strategies based upon whether the element + is in memory or in a register. When we start in a register + and we're VIS3 capable, it's always cheaper to use the VIS3 + int-->fp register moves since we avoid having to use stack + memory. */ + if ((TARGET_VIS3 && (code == REG || code == SUBREG)) + || (CONSTANT_P (elt) + && (const_zero_operand (elt, inner_mode) + || const_all_ones_operand (elt, inner_mode)))) + { + elt = convert_modes (SImode, inner_mode, elt, true); + + emit_clobber (dst); + emit_move_insn (gen_lowpart (SImode, dst), elt); + } + else + { + rtx m = elt; + + if (CONSTANT_P (elt)) + { + m = force_const_mem (inner_mode, elt); + } + else if (code != MEM) + { + rtx stk = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode), 0); + emit_move_insn (stk, elt); + m = stk; + } + + switch (loc_mode) + { + case V4HImode: + emit_insn (gen_zero_extend_v4hi_vis (dst, m)); + break; + case V8QImode: + emit_insn (gen_zero_extend_v8qi_vis (dst, m)); + break; + default: + gcc_unreachable (); + } + } + } + locs[i] = dst; + } +} + +static void +sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique, + enum machine_mode mode, + enum machine_mode inner_mode) +{ + if (n_unique <= 4) + { + vector_init_bshuffle (target, locs, n_elts, mode, inner_mode); + } + else + { + int i; + + gcc_assert (mode == V8QImode); + + emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode), + force_reg (SImode, GEN_INT (7)), + CONST0_RTX (SImode))); + i = n_elts - 1; + emit_insn (gen_faligndatav8qi_vis (target, locs[i], locs[i])); + while (--i >= 0) + emit_insn (gen_faligndatav8qi_vis (target, locs[i], target)); + } +} + +static void +sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique, + enum machine_mode mode) +{ + enum machine_mode full_mode = mode; + rtx (*emitter)(rtx, rtx, rtx); + int alignaddr_val, i; + rtx tmp = target; + + if (n_unique == 1 && mode == V8QImode) + { + rtx t2, t2_low, t1; + + t1 = gen_reg_rtx (V4QImode); + emit_move_insn (t1, gen_lowpart (V4QImode, locs[0])); + + t2 = gen_reg_rtx (V8QImode); + t2_low = gen_lowpart (V4QImode, t2); + + /* xxxxxxAA --> xxxxxxxxxxxxAAAA + xxxxAAAA --> xxxxxxxxAAAAAAAA + AAAAAAAA --> AAAAAAAAAAAAAAAA */ + emit_insn (gen_fpmerge_vis (t2, t1, t1)); + emit_move_insn (t1, t2_low); + emit_insn (gen_fpmerge_vis (t2, t1, t1)); + emit_move_insn (t1, t2_low); + emit_insn (gen_fpmerge_vis (target, t1, t1)); + return; + } + + switch (mode) + { + case V2HImode: + full_mode = V4HImode; + /* FALLTHRU */ + case V4HImode: + emitter = gen_faligndatav4hi_vis; + alignaddr_val = 6; + break; + + case V4QImode: + full_mode = V8QImode; + /* FALLTHRU */ + case V8QImode: + emitter = gen_faligndatav8qi_vis; + alignaddr_val = 7; + break; + + default: + gcc_unreachable (); + } + + if (full_mode != mode) + tmp = gen_reg_rtx (full_mode); + + emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode), + force_reg (SImode, GEN_INT (alignaddr_val)), + CONST0_RTX (SImode))); + + i = n_elts - 1; + emit_insn (emitter (tmp, locs[i], locs[i])); + while (--i >= 0) + emit_insn (emitter (tmp, locs[i], tmp)); + + if (tmp != target) + emit_move_insn (target, gen_highpart (mode, tmp)); +} + void sparc_expand_vector_init (rtx target, rtx vals) { enum machine_mode mode = GET_MODE (target); enum machine_mode inner_mode = GET_MODE_INNER (mode); int n_elts = GET_MODE_NUNITS (mode); - int i, n_var = 0; - rtx mem; + int i, n_var = 0, n_unique = 0; + rtx locs[8]; + + gcc_assert (n_elts <= 8); for (i = 0; i < n_elts; i++) { rtx x = XVECEXP (vals, 0, i); + bool found = false; + int j; + if (!CONSTANT_P (x)) n_var++; + + for (j = 0; j < i; j++) + { + if (rtx_equal_p (x, XVECEXP (vals, 0, j))) + { + found = true; + break; + } + } + if (!found) + n_unique++; } if (n_var == 0) @@ -11301,12 +11647,16 @@ sparc_expand_vector_init (rtx target, rtx vals) return; } - mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0); - for (i = 0; i < n_elts; i++) - emit_move_insn (adjust_address_nv (mem, inner_mode, - i * GET_MODE_SIZE (inner_mode)), - XVECEXP (vals, 0, i)); - emit_move_insn (target, mem); + if (vector_init_move_words (target, vals, mode, inner_mode)) + return; + + vector_init_prepare_elts (vals, n_elts, locs, mode, inner_mode); + + if (TARGET_VIS2) + sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique, + mode, inner_mode); + else + sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode); } static reg_class_t @@ -11380,12 +11730,16 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands) rtx cc_reg, dst, cmp; cmp = operands[1]; - cmp_mode = GET_MODE (XEXP (cmp, 0)); - if (cmp_mode == DImode && !TARGET_ARCH64) + if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64) return false; - dst = operands[0]; + if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD) + cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc); + + cmp_mode = GET_MODE (XEXP (cmp, 0)); + rc = GET_CODE (cmp); + dst = operands[0]; if (! rtx_equal_p (operands[2], dst) && ! rtx_equal_p (operands[3], dst)) { @@ -11404,9 +11758,6 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands) rc = reverse_condition (rc); } - if (cmp_mode == TFmode && !TARGET_HARD_QUAD) - cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc); - if (XEXP (cmp, 1) == const0_rtx && GET_CODE (XEXP (cmp, 0)) == REG && cmp_mode == DImode @@ -11426,4 +11777,41 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands) return true; } +void +sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode) +{ + rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr; + enum rtx_code code = GET_CODE (operands[3]); + + mask = gen_reg_rtx (Pmode); + cop0 = operands[4]; + cop1 = operands[5]; + if (code == LT || code == GE) + { + rtx t; + + code = swap_condition (code); + t = cop0; cop0 = cop1; cop1 = t; + } + + gsr = gen_rtx_REG (DImode, SPARC_GSR_REG); + + fcmp = gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)), + fcode); + + cmask = gen_rtx_UNSPEC (DImode, + gen_rtvec (2, mask, gsr), + ccode); + + bshuf = gen_rtx_UNSPEC (mode, + gen_rtvec (3, operands[1], operands[2], gsr), + UNSPEC_BSHUFFLE); + + emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp)); + emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask)); + + emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf)); +} + #include "gt-sparc.h" diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h index 65b45271890..e8707f50577 100644 --- a/gcc/config/sparc/sparc.h +++ b/gcc/config/sparc/sparc.h @@ -328,6 +328,7 @@ extern enum cmodel sparc_cmodel; %{mcpu=sparclite:-Asparclite} \ %{mcpu=sparclite86x:-Asparclite} \ %{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \ +%{mcpu=v8:-Av8} \ %{mv8plus:-Av8plus} \ %{mcpu=v9:-Av9} \ %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \ diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md index 6dd390952c3..7452f96c9d3 100644 --- a/gcc/config/sparc/sparc.md +++ b/gcc/config/sparc/sparc.md @@ -92,6 +92,7 @@ (UNSPEC_MUL8 86) (UNSPEC_MUL8SU 87) (UNSPEC_MULDSU 88) + (UNSPEC_SHORT_LOAD 89) ]) (define_constants @@ -2041,8 +2042,8 @@ }) (define_insn "*movsf_insn" - [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f, f,*r, m, m") - (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r, m, m, f,*rG"))] + [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f,f,*r,m, m") + (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r,m, m,f,*rG"))] "(register_operand (operands[0], SFmode) || register_or_zero_or_all_ones_operand (operands[1], SFmode))" { @@ -2138,8 +2139,8 @@ }) (define_insn "*movdf_insn_sp32" - [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f, e,T,W,U,T, f, *r, o,o") - (match_operand:DF 1 "input_operand" "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roGF,*rG,f"))] + [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f, e,T,W,U,T, f, *r, o,o") + (match_operand:DF 1 "input_operand" "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roF,*rG,f"))] "! TARGET_ARCH64 && (register_operand (operands[0], DFmode) || register_or_zero_or_all_ones_operand (operands[1], DFmode))" @@ -2166,7 +2167,7 @@ (define_insn "*movdf_insn_sp64" [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e, e,W, *r,*r, m,*r") - (match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))] + (match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))] "TARGET_ARCH64 && (register_operand (operands[0], DFmode) || register_or_zero_or_all_ones_operand (operands[1], DFmode))" @@ -2191,9 +2192,8 @@ (define_split [(set (match_operand:DF 0 "register_operand" "") (match_operand:DF 1 "const_double_operand" ""))] - "TARGET_FPU - && (GET_CODE (operands[0]) == REG - && SPARC_INT_REG_P (REGNO (operands[0]))) + "REG_P (operands[0]) + && SPARC_INT_REG_P (REGNO (operands[0])) && ! const_zero_operand (operands[1], GET_MODE (operands[0])) && reload_completed" [(clobber (const_int 0))] @@ -2378,45 +2378,30 @@ }) (define_insn "*movtf_insn_sp32" - [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,U,r") - (match_operand:TF 1 "input_operand" "G,oe,GeUr,o,roG"))] - "TARGET_FPU - && ! TARGET_ARCH64 - && (register_operand (operands[0], TFmode) - || register_or_zero_operand (operands[1], TFmode))" - "#" - [(set_attr "length" "4")]) - -;; Exactly the same as above, except that all `e' cases are deleted. -;; This is necessary to prevent reload from ever trying to use a `e' reg -;; when -mno-fpu. - -(define_insn "*movtf_insn_sp32_no_fpu" - [(set (match_operand:TF 0 "nonimmediate_operand" "=o,U,o,r,o") - (match_operand:TF 1 "input_operand" "G,o,U,roG,r"))] - "! TARGET_FPU - && ! TARGET_ARCH64 + [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o,U, r") + (match_operand:TF 1 "input_operand" " G,oe,e,rGU,o,roG"))] + "! TARGET_ARCH64 && (register_operand (operands[0], TFmode) || register_or_zero_operand (operands[1], TFmode))" "#" - [(set_attr "length" "4")]) + [(set_attr "length" "4,4,4,4,4,4") + (set_attr "cpu_feature" "fpu,fpu,fpu,*,*,*")]) (define_insn "*movtf_insn_sp64" - [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,r") - (match_operand:TF 1 "input_operand" "G,oe,Ger,roG"))] - "TARGET_FPU - && TARGET_ARCH64 + [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o, r") + (match_operand:TF 1 "input_operand" "G,oe,e,rG,roG"))] + "TARGET_ARCH64 && ! TARGET_HARD_QUAD && (register_operand (operands[0], TFmode) || register_or_zero_operand (operands[1], TFmode))" "#" - [(set_attr "length" "2")]) + [(set_attr "length" "2,2,2,2,2") + (set_attr "cpu_feature" "fpu,fpu,fpu,*,*")]) (define_insn "*movtf_insn_sp64_hq" - [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m,o,r") - (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))] - "TARGET_FPU - && TARGET_ARCH64 + [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o, r") + (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))] + "TARGET_ARCH64 && TARGET_HARD_QUAD && (register_operand (operands[0], TFmode) || register_or_zero_operand (operands[1], TFmode))" @@ -2430,16 +2415,6 @@ [(set_attr "type" "*,fpmove,fpload,fpstore,*,*") (set_attr "length" "2,*,*,*,2,2")]) -(define_insn "*movtf_insn_sp64_no_fpu" - [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o") - (match_operand:TF 1 "input_operand" "orG,rG"))] - "! TARGET_FPU - && TARGET_ARCH64 - && (register_operand (operands[0], TFmode) - || register_or_zero_operand (operands[1], TFmode))" - "#" - [(set_attr "length" "2")]) - ;; Now all the splits to handle multi-insn TF mode moves. (define_split [(set (match_operand:TF 0 "register_operand" "") @@ -7856,6 +7831,48 @@ DONE; }) +(define_expand "zero_extend_v8qi_vis" + [(set (match_operand:V8QI 0 "register_operand" "") + (unspec:V8QI [(match_operand:QI 1 "memory_operand" "")] + UNSPEC_SHORT_LOAD))] + "TARGET_VIS" +{ + if (! REG_P (XEXP (operands[1], 0))) + { + rtx addr = force_reg (Pmode, XEXP (operands[1], 0)); + operands[1] = replace_equiv_address (operands[1], addr); + } +}) + +(define_expand "zero_extend_v4hi_vis" + [(set (match_operand:V4HI 0 "register_operand" "") + (unspec:V4HI [(match_operand:HI 1 "memory_operand" "")] + UNSPEC_SHORT_LOAD))] + "TARGET_VIS" +{ + if (! REG_P (XEXP (operands[1], 0))) + { + rtx addr = force_reg (Pmode, XEXP (operands[1], 0)); + operands[1] = replace_equiv_address (operands[1], addr); + } +}) + +(define_insn "*zero_extend_v8qi_<P:mode>_insn" + [(set (match_operand:V8QI 0 "register_operand" "=e") + (unspec:V8QI [(mem:QI + (match_operand:P 1 "register_operand" "r"))] + UNSPEC_SHORT_LOAD))] + "TARGET_VIS" + "ldda\t[%1] 0xd0, %0") + +(define_insn "*zero_extend_v4hi_<P:mode>_insn" + [(set (match_operand:V4HI 0 "register_operand" "=e") + (unspec:V4HI [(mem:HI + (match_operand:P 1 "register_operand" "r"))] + UNSPEC_SHORT_LOAD))] + "TARGET_VIS" + "ldda\t[%1] 0xd2, %0") + (define_expand "vec_init<mode>" [(match_operand:VMALL 0 "register_operand" "") (match_operand:VMALL 1 "" "")] @@ -8299,6 +8316,36 @@ [(set_attr "type" "fpmul") (set_attr "fptype" "double")]) +(define_expand "vcond<mode><mode>" + [(match_operand:GCM 0 "register_operand" "") + (match_operand:GCM 1 "register_operand" "") + (match_operand:GCM 2 "register_operand" "") + (match_operator 3 "" + [(match_operand:GCM 4 "register_operand" "") + (match_operand:GCM 5 "register_operand" "")])] + "TARGET_VIS3" +{ + sparc_expand_vcond (<MODE>mode, operands, + UNSPEC_CMASK<gcm_name>, + UNSPEC_FCMP); + DONE; +}) + +(define_expand "vconduv8qiv8qi" + [(match_operand:V8QI 0 "register_operand" "") + (match_operand:V8QI 1 "register_operand" "") + (match_operand:V8QI 2 "register_operand" "") + (match_operator 3 "" + [(match_operand:V8QI 4 "register_operand" "") + (match_operand:V8QI 5 "register_operand" "")])] + "TARGET_VIS3" +{ + sparc_expand_vcond (V8QImode, operands, + UNSPEC_CMASK8, + UNSPEC_FUCMP); + DONE; +}) + (define_insn "array8<P:mode>_vis" [(set (match_operand:P 0 "register_operand" "=r") (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ") @@ -8452,7 +8499,7 @@ ;; Conditional moves are possible via fcmpX --> cmaskX -> bshuffle (define_insn "cmask8<P:mode>_vis" [(set (reg:DI GSR_REG) - (unspec:DI [(match_operand:P 0 "register_operand" "r") + (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ") (reg:DI GSR_REG)] UNSPEC_CMASK8))] "TARGET_VIS3" @@ -8460,7 +8507,7 @@ (define_insn "cmask16<P:mode>_vis" [(set (reg:DI GSR_REG) - (unspec:DI [(match_operand:P 0 "register_operand" "r") + (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ") (reg:DI GSR_REG)] UNSPEC_CMASK16))] "TARGET_VIS3" @@ -8468,7 +8515,7 @@ (define_insn "cmask32<P:mode>_vis" [(set (reg:DI GSR_REG) - (unspec:DI [(match_operand:P 0 "register_operand" "r") + (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ") (reg:DI GSR_REG)] UNSPEC_CMASK32))] "TARGET_VIS3" diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf index 7073bcb7721..e9acfe3693e 100644 --- a/gcc/config/sparc/t-elf +++ b/gcc/config/sparc/t-elf @@ -17,12 +17,6 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = sparc/lb1spc.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 - MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat MULTILIB_DIRNAMES = soft v8 flat MULTILIB_MATCHES = msoft-float=mno-fpu - -LIBGCC = stmp-multilib -INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon index 4f9d0a9e797..25fc61136a9 100644 --- a/gcc/config/sparc/t-leon +++ b/gcc/config/sparc/t-leon @@ -16,15 +16,9 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -LIB1ASMSRC = sparc/lb1spc.asm -LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 - # Multilibs for LEON # LEON is a SPARC-V8, but the AT697 implementation has a bug in the # V8-specific instructions. MULTILIB_OPTIONS = mcpu=v7 msoft-float mflat MULTILIB_DIRNAMES = v7 soft flat MULTILIB_MATCHES = mcpu?v7=mv7 msoft-float=mno-fpu - -LIBGCC = stmp-multilib -INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-leon3 b/gcc/config/sparc/t-leon3 index 0e7e45cc594..acdd1f2c67b 100644 --- a/gcc/config/sparc/t-leon3 +++ b/gcc/config/sparc/t-leon3 @@ -20,6 +20,3 @@ MULTILIB_OPTIONS = msoft-float MULTILIB_DIRNAMES = soft MULTILIB_MATCHES = msoft-float=mno-fpu - -LIBGCC = stmp-multilib -INSTALL_LIBGCC = install-multilib diff --git a/gcc/config/sparc/t-linux b/gcc/config/sparc/t-linux deleted file mode 100644 index 30daa376614..00000000000 --- a/gcc/config/sparc/t-linux +++ /dev/null @@ -1,5 +0,0 @@ -# Override t-slibgcc-elf-ver to export some libgcc symbols with -# the symbol versions that glibc used. -# Avoid the t-linux version file. -SHLIB_MAPFILES = $$(libgcc_objdir)/libgcc-std.ver \ - $(srcdir)/config/sparc/libgcc-sparc-glibc.ver diff --git a/gcc/config/sparc/t-linux64 b/gcc/config/sparc/t-linux64 index 74d04898d9f..d9dfad66ce7 100644 --- a/gcc/config/sparc/t-linux64 +++ b/gcc/config/sparc/t-linux64 @@ -1,5 +1,5 @@ # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, -# 2006, 2010 Free Software Foundation, Inc. +# 2006, 2010, 2011 Free Software Foundation, Inc. # # This file is part of GCC. # @@ -27,10 +27,3 @@ MULTILIB_OPTIONS = m64/m32 MULTILIB_DIRNAMES = 64 32 MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib) - -LIBGCC = stmp-multilib -INSTALL_LIBGCC = install-multilib - -CRTSTUFF_T_CFLAGS = `if test x$$($(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) \ - -print-multi-os-directory) \ - = x../lib64; then echo -mcmodel=medany; fi` diff --git a/gcc/config/sparc/t-netbsd64 b/gcc/config/sparc/t-netbsd64 index 0fddb0ffe87..bc783c19366 100644 --- a/gcc/config/sparc/t-netbsd64 +++ b/gcc/config/sparc/t-netbsd64 @@ -1,8 +1,5 @@ -# Disable multilib fow now, as NetBSD/sparc64 does not ship with +# Disable multilib for now, as NetBSD/sparc64 does not ship with # a 32-bit environment. #MULTILIB_OPTIONS = m32/m64 #MULTILIB_DIRNAMES = 32 64 #MULTILIB_MATCHES = - -#LIBGCC = stmp-multilib -#INSTALL_LIBGCC = install-multilib |