summaryrefslogtreecommitdiff
path: root/gcc/config/sparc
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-06 15:16:32 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2011-11-06 15:16:32 +0000
commit31ba6c3ff2311bad9422246f49d59c532cbb5078 (patch)
tree6e862e3ea14b2edf93a92c404a0d9b29f3f9ba65 /gcc/config/sparc
parentbab85b65e545231656361b997a81fb8a44b266b4 (diff)
downloadgcc-31ba6c3ff2311bad9422246f49d59c532cbb5078.tar.gz
2011-11-06 Basile Starynkevitch <basile@starynkevitch.net>
MELT branch merged with trunk rev 181026 using svnmerge git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@181034 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/sparc')
-rw-r--r--gcc/config/sparc/lb1spc.asm784
-rw-r--r--gcc/config/sparc/lb1spl.asm246
-rw-r--r--gcc/config/sparc/libgcc-sparc-glibc.ver93
-rw-r--r--gcc/config/sparc/sparc-protos.h1
-rw-r--r--gcc/config/sparc/sparc.c418
-rw-r--r--gcc/config/sparc/sparc.h1
-rw-r--r--gcc/config/sparc/sparc.md145
-rw-r--r--gcc/config/sparc/t-elf6
-rw-r--r--gcc/config/sparc/t-leon6
-rw-r--r--gcc/config/sparc/t-leon33
-rw-r--r--gcc/config/sparc/t-linux5
-rw-r--r--gcc/config/sparc/t-linux649
-rw-r--r--gcc/config/sparc/t-netbsd645
13 files changed, 503 insertions, 1219 deletions
diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm
deleted file mode 100644
index b60bd5740e7..00000000000
--- a/gcc/config/sparc/lb1spc.asm
+++ /dev/null
@@ -1,784 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
- for the sparc processor.
-
- These routines are derived from the SPARC Architecture Manual, version 8,
- slightly edited to match the desired calling convention, and also to
- optimize them for our purposes. */
-
-#ifdef L_mulsi3
-.text
- .align 4
- .global .umul
- .proc 4
-.umul:
- or %o0, %o1, %o4 ! logical or of multiplier and multiplicand
- mov %o0, %y ! multiplier to Y register
- andncc %o4, 0xfff, %o5 ! mask out lower 12 bits
- be mul_shortway ! can do it the short way
- andcc %g0, %g0, %o4 ! zero the partial product and clear NV cc
- !
- ! long multiply
- !
- mulscc %o4, %o1, %o4 ! first iteration of 33
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4 ! 32nd iteration
- mulscc %o4, %g0, %o4 ! last iteration only shifts
- ! the upper 32 bits of product are wrong, but we do not care
- retl
- rd %y, %o0
- !
- ! short multiply
- !
-mul_shortway:
- mulscc %o4, %o1, %o4 ! first iteration of 13
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4
- mulscc %o4, %o1, %o4 ! 12th iteration
- mulscc %o4, %g0, %o4 ! last iteration only shifts
- rd %y, %o5
- sll %o4, 12, %o4 ! left shift partial product by 12 bits
- srl %o5, 20, %o5 ! right shift partial product by 20 bits
- retl
- or %o5, %o4, %o0 ! merge for true product
-#endif
-
-#ifdef L_divsi3
-/*
- * Division and remainder, from Appendix E of the SPARC Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- * .div name of function to generate
- * div div=div => %o0 / %o1; div=rem => %o0 % %o1
- * true true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top decade of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
- .global .udiv
- .align 4
- .proc 4
- .text
-.udiv:
- b ready_to_divide
- mov 0, %g3 ! result is always positive
-
- .global .div
- .align 4
- .proc 4
- .text
-.div:
- ! compute sign of result; if neither is negative, no problem
- orcc %o1, %o0, %g0 ! either negative?
- bge ready_to_divide ! no, go do the divide
- xor %o1, %o0, %g3 ! compute sign in any case
- tst %o1
- bge 1f
- tst %o0
- ! %o1 is definitely negative; %o0 might also be negative
- bge ready_to_divide ! if %o0 not negative...
- sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
-1: ! %o0 is negative, %o1 is nonnegative
- sub %g0, %o0, %o0 ! make %o0 nonnegative
-
-
-ready_to_divide:
-
- ! Ready to divide. Compute size of quotient; scale comparand.
- orcc %o1, %g0, %o5
- bne 1f
- mov %o0, %o3
-
- ! Divide by zero trap. If it returns, return 0 (about as
- ! wrong as possible, but that is what SunOS does...).
- ta 0x2 ! ST_DIV0
- retl
- clr %o0
-
-1:
- cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu got_result ! (and algorithm fails otherwise)
- clr %o2
- sethi %hi(1 << (32 - 4 - 1)), %g1
- cmp %o3, %g1
- blu not_really_big
- clr %o4
-
- ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
- ! Compute ITER in an unorthodox manner: know we need to shift V into
- ! the top decade: so do not even bother to compare to R.
- 1:
- cmp %o5, %g1
- bgeu 3f
- mov 1, %g2
- sll %o5, 4, %o5
- b 1b
- add %o4, 1, %o4
-
- ! Now compute %g2.
- 2: addcc %o5, %o5, %o5
- bcc not_too_big
- add %g2, 1, %g2
-
- ! We get here if the %o1 overflowed while shifting.
- ! This means that %o3 has the high-order bit set.
- ! Restore %o5 and subtract from %o3.
- sll %g1, 4, %g1 ! high order bit
- srl %o5, 1, %o5 ! rest of %o5
- add %o5, %g1, %o5
- b do_single_div
- sub %g2, 1, %g2
-
- not_too_big:
- 3: cmp %o5, %o3
- blu 2b
- nop
- be do_single_div
- nop
- /* NB: these are commented out in the V8-SPARC manual as well */
- /* (I do not understand this) */
- ! %o5 > %o3: went too far: back up 1 step
- ! srl %o5, 1, %o5
- ! dec %g2
- ! do single-bit divide steps
- !
- ! We have to be careful here. We know that %o3 >= %o5, so we can do the
- ! first divide step without thinking. BUT, the others are conditional,
- ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
- ! order bit set in the first step, just falling into the regular
- ! division loop will mess up the first time around.
- ! So we unroll slightly...
- do_single_div:
- subcc %g2, 1, %g2
- bl end_regular_divide
- nop
- sub %o3, %o5, %o3
- mov 1, %o2
- b end_single_divloop
- nop
- single_divloop:
- sll %o2, 1, %o2
- bl 1f
- srl %o5, 1, %o5
- ! %o3 >= 0
- sub %o3, %o5, %o3
- b 2f
- add %o2, 1, %o2
- 1: ! %o3 < 0
- add %o3, %o5, %o3
- sub %o2, 1, %o2
- 2:
- end_single_divloop:
- subcc %g2, 1, %g2
- bge single_divloop
- tst %o3
- b,a end_regular_divide
-
-not_really_big:
-1:
- sll %o5, 4, %o5
- cmp %o5, %o3
- bleu 1b
- addcc %o4, 1, %o4
- be got_result
- sub %o4, 1, %o4
-
- tst %o3 ! set up for initial iteration
-divloop:
- sll %o2, 4, %o2
- ! depth 1, accumulated bits 0
- bl L1.16
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 2, accumulated bits 1
- bl L2.17
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits 3
- bl L3.19
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 7
- bl L4.23
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2+1), %o2
-
-L4.23:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2-1), %o2
-
-
-L3.19:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 5
- bl L4.21
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2+1), %o2
-
-L4.21:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2-1), %o2
-
-L2.17:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits 1
- bl L3.17
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 3
- bl L4.19
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2+1), %o2
-
-L4.19:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2-1), %o2
-
-L3.17:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 1
- bl L4.17
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2+1), %o2
-
-L4.17:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2-1), %o2
-
-L1.16:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 2, accumulated bits -1
- bl L2.15
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits -1
- bl L3.15
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -1
- bl L4.15
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2+1), %o2
-
-L4.15:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2-1), %o2
-
-L3.15:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -3
- bl L4.13
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2+1), %o2
-
-L4.13:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2-1), %o2
-
-L2.15:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits -3
- bl L3.13
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -5
- bl L4.11
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2+1), %o2
-
-L4.11:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2-1), %o2
-
-L3.13:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -7
- bl L4.9
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2+1), %o2
-
-L4.9:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2-1), %o2
-
- 9:
-end_regular_divide:
- subcc %o4, 1, %o4
- bge divloop
- tst %o3
- bl,a got_result
- ! non-restoring fixup here (one instruction only!)
- sub %o2, 1, %o2
-
-
-got_result:
- ! check to see if answer should be < 0
- tst %g3
- bl,a 1f
- sub %g0, %o2, %o2
-1:
- retl
- mov %o2, %o0
-#endif
-
-#ifdef L_modsi3
-/* This implementation was taken from glibc:
- *
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * Algorithm parameters:
- * N how many bits per iteration we try to get (4)
- * WORDSIZE total number of bits (32)
- *
- * Derived constants:
- * TOPBITS number of bits in the top decade of a number
- *
- * Important variables:
- * Q the partial quotient under development (initially 0)
- * R the remainder so far, initially the dividend
- * ITER number of main division loop iterations required;
- * equal to ceil(log2(quotient) / N). Note that this
- * is the log base (2^N) of the quotient.
- * V the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- * Current estimate for non-large dividend is
- * ceil(log2(quotient) / N) * (10 + 7N/2) + C
- * A large dividend is one greater than 2^(31-TOPBITS) and takes a
- * different path, as the upper bits of the quotient must be developed
- * one bit at a time.
- */
-.text
- .align 4
- .global .urem
- .proc 4
-.urem:
- b divide
- mov 0, %g3 ! result always positive
-
- .align 4
- .global .rem
- .proc 4
-.rem:
- ! compute sign of result; if neither is negative, no problem
- orcc %o1, %o0, %g0 ! either negative?
- bge 2f ! no, go do the divide
- mov %o0, %g3 ! sign of remainder matches %o0
- tst %o1
- bge 1f
- tst %o0
- ! %o1 is definitely negative; %o0 might also be negative
- bge 2f ! if %o0 not negative...
- sub %g0, %o1, %o1 ! in any case, make %o1 nonneg
-1: ! %o0 is negative, %o1 is nonnegative
- sub %g0, %o0, %o0 ! make %o0 nonnegative
-2:
-
- ! Ready to divide. Compute size of quotient; scale comparand.
-divide:
- orcc %o1, %g0, %o5
- bne 1f
- mov %o0, %o3
-
- ! Divide by zero trap. If it returns, return 0 (about as
- ! wrong as possible, but that is what SunOS does...).
- ta 0x2 !ST_DIV0
- retl
- clr %o0
-
-1:
- cmp %o3, %o5 ! if %o1 exceeds %o0, done
- blu got_result ! (and algorithm fails otherwise)
- clr %o2
- sethi %hi(1 << (32 - 4 - 1)), %g1
- cmp %o3, %g1
- blu not_really_big
- clr %o4
-
- ! Here the dividend is >= 2**(31-N) or so. We must be careful here,
- ! as our usual N-at-a-shot divide step will cause overflow and havoc.
- ! The number of bits in the result here is N*ITER+SC, where SC <= N.
- ! Compute ITER in an unorthodox manner: know we need to shift V into
- ! the top decade: so do not even bother to compare to R.
- 1:
- cmp %o5, %g1
- bgeu 3f
- mov 1, %g2
- sll %o5, 4, %o5
- b 1b
- add %o4, 1, %o4
-
- ! Now compute %g2.
- 2: addcc %o5, %o5, %o5
- bcc not_too_big
- add %g2, 1, %g2
-
- ! We get here if the %o1 overflowed while shifting.
- ! This means that %o3 has the high-order bit set.
- ! Restore %o5 and subtract from %o3.
- sll %g1, 4, %g1 ! high order bit
- srl %o5, 1, %o5 ! rest of %o5
- add %o5, %g1, %o5
- b do_single_div
- sub %g2, 1, %g2
-
- not_too_big:
- 3: cmp %o5, %o3
- blu 2b
- nop
- be do_single_div
- nop
- /* NB: these are commented out in the V8-SPARC manual as well */
- /* (I do not understand this) */
- ! %o5 > %o3: went too far: back up 1 step
- ! srl %o5, 1, %o5
- ! dec %g2
- ! do single-bit divide steps
- !
- ! We have to be careful here. We know that %o3 >= %o5, so we can do the
- ! first divide step without thinking. BUT, the others are conditional,
- ! and are only done if %o3 >= 0. Because both %o3 and %o5 may have the high-
- ! order bit set in the first step, just falling into the regular
- ! division loop will mess up the first time around.
- ! So we unroll slightly...
- do_single_div:
- subcc %g2, 1, %g2
- bl end_regular_divide
- nop
- sub %o3, %o5, %o3
- mov 1, %o2
- b end_single_divloop
- nop
- single_divloop:
- sll %o2, 1, %o2
- bl 1f
- srl %o5, 1, %o5
- ! %o3 >= 0
- sub %o3, %o5, %o3
- b 2f
- add %o2, 1, %o2
- 1: ! %o3 < 0
- add %o3, %o5, %o3
- sub %o2, 1, %o2
- 2:
- end_single_divloop:
- subcc %g2, 1, %g2
- bge single_divloop
- tst %o3
- b,a end_regular_divide
-
-not_really_big:
-1:
- sll %o5, 4, %o5
- cmp %o5, %o3
- bleu 1b
- addcc %o4, 1, %o4
- be got_result
- sub %o4, 1, %o4
-
- tst %o3 ! set up for initial iteration
-divloop:
- sll %o2, 4, %o2
- ! depth 1, accumulated bits 0
- bl L1.16
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 2, accumulated bits 1
- bl L2.17
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits 3
- bl L3.19
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 7
- bl L4.23
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2+1), %o2
-L4.23:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (7*2-1), %o2
-
-L3.19:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 5
- bl L4.21
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2+1), %o2
-
-L4.21:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (5*2-1), %o2
-
-L2.17:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits 1
- bl L3.17
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits 3
- bl L4.19
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2+1), %o2
-
-L4.19:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (3*2-1), %o2
-
-L3.17:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits 1
- bl L4.17
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2+1), %o2
-
-L4.17:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (1*2-1), %o2
-
-L1.16:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 2, accumulated bits -1
- bl L2.15
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 3, accumulated bits -1
- bl L3.15
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -1
- bl L4.15
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2+1), %o2
-
-L4.15:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-1*2-1), %o2
-
-L3.15:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -3
- bl L4.13
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2+1), %o2
-
-L4.13:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-3*2-1), %o2
-
-L2.15:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 3, accumulated bits -3
- bl L3.13
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- ! depth 4, accumulated bits -5
- bl L4.11
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2+1), %o2
-
-L4.11:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-5*2-1), %o2
-
-L3.13:
- ! remainder is negative
- addcc %o3,%o5,%o3
- ! depth 4, accumulated bits -7
- bl L4.9
- srl %o5,1,%o5
- ! remainder is positive
- subcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2+1), %o2
-
-L4.9:
- ! remainder is negative
- addcc %o3,%o5,%o3
- b 9f
- add %o2, (-7*2-1), %o2
-
- 9:
-end_regular_divide:
- subcc %o4, 1, %o4
- bge divloop
- tst %o3
- bl,a got_result
- ! non-restoring fixup here (one instruction only!)
- add %o3, %o1, %o3
-
-got_result:
- ! check to see if answer should be < 0
- tst %g3
- bl,a 1f
- sub %g0, %o3, %o3
-1:
- retl
- mov %o3, %o0
-
-#endif
-
diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm
deleted file mode 100644
index 973401f8018..00000000000
--- a/gcc/config/sparc/lb1spl.asm
+++ /dev/null
@@ -1,246 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
- for the sparclite processor.
-
- These routines are all from the SPARClite User's Guide, slightly edited
- to match the desired calling convention, and also to optimize them. */
-
-#ifdef L_udivsi3
-.text
- .align 4
- .global .udiv
- .proc 04
-.udiv:
- wr %g0,%g0,%y ! Not a delayed write for sparclite
- tst %g0
- divscc %o0,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- retl
- divscc %g1,%o1,%o0
-#endif
-
-#ifdef L_umodsi3
-.text
- .align 4
- .global .urem
- .proc 04
-.urem:
- wr %g0,%g0,%y ! Not a delayed write for sparclite
- tst %g0
- divscc %o0,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- divscc %g1,%o1,%g1
- bl 1f
- rd %y,%o0
- retl
- nop
-1: retl
- add %o0,%o1,%o0
-#endif
-
-#ifdef L_divsi3
-.text
- .align 4
- .global .div
- .proc 04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the quotient.
-.div:
- wr %g0,%g0,%y ! Not a delayed write for sparclite
- mov %o1,%o4
- tst %o1
- bl,a 1f
- sub %g0,%o4,%o4
-1: tst %o0
- bl,a 2f
- mov -1,%y
-2: divscc %o0,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- be 6f
- mov %y,%o3
- bg 4f
- addcc %o3,%o4,%g0
- be,a 6f
- mov %g0,%o3
- tst %o0
- bl 5f
- tst %g1
- ba 5f
- add %o3,%o4,%o3
-4: subcc %o3,%o4,%g0
- be,a 6f
- mov %g0,%o3
- tst %o0
- bge 5f
- tst %g1
- sub %o3,%o4,%o3
-5: bl,a 6f
- add %g1,1,%g1
-6: tst %o1
- bl,a 7f
- sub %g0,%g1,%g1
-7: retl
- mov %g1,%o0 ! Quotient is in %g1.
-#endif
-
-#ifdef L_modsi3
-.text
- .align 4
- .global .rem
- .proc 04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the remainder.
-.rem:
- wr %g0,%g0,%y ! Not a delayed write for sparclite
- mov %o1,%o4
- tst %o1
- bl,a 1f
- sub %g0,%o4,%o4
-1: tst %o0
- bl,a 2f
- mov -1,%y
-2: divscc %o0,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- divscc %g1,%o4,%g1
- be 6f
- mov %y,%o3
- bg 4f
- addcc %o3,%o4,%g0
- be,a 6f
- mov %g0,%o3
- tst %o0
- bl 5f
- tst %g1
- ba 5f
- add %o3,%o4,%o3
-4: subcc %o3,%o4,%g0
- be,a 6f
- mov %g0,%o3
- tst %o0
- bge 5f
- tst %g1
- sub %o3,%o4,%o3
-5: bl,a 6f
- add %g1,1,%g1
-6: tst %o1
- bl,a 7f
- sub %g0,%g1,%g1
-7: retl
- mov %o3,%o0 ! Remainder is in %o3.
-#endif
diff --git a/gcc/config/sparc/libgcc-sparc-glibc.ver b/gcc/config/sparc/libgcc-sparc-glibc.ver
deleted file mode 100644
index 91138d3795e..00000000000
--- a/gcc/config/sparc/libgcc-sparc-glibc.ver
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (C) 2002, 2006, 2008 Free Software Foundation, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GCC is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3. If not see
-# <http://www.gnu.org/licenses/>.
-
-# In order to work around the very problems that force us to now generally
-# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
-# By now choosing the same version tags for these specific routines, we
-# maintain enough binary compatibility to allow future versions of glibc
-# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
-
-%exclude {
- __divdi3
- __moddi3
- __udivdi3
- __umoddi3
- __register_frame
- __register_frame_table
- __deregister_frame
- __register_frame_info
- __deregister_frame_info
- __frame_state_for
- __register_frame_info_table
-}
-
-%ifdef __arch64__
-%define GLIBC_VER GLIBC_2.2
-%else
-%define GLIBC_VER GLIBC_2.0
-%endif
-%inherit GCC_3.0 GLIBC_VER
-GLIBC_VER {
- # Sampling of DImode arithmetic used by (at least) i386 and m68k.
- __divdi3
- __moddi3
- __udivdi3
- __umoddi3
-
- # Exception handling support functions used by most everyone.
- __register_frame
- __register_frame_table
- __deregister_frame
- __register_frame_info
- __deregister_frame_info
- __frame_state_for
- __register_frame_info_table
-}
-
-%if !defined (__arch64__) && defined (__LONG_DOUBLE_128__)
-
-# long double 128 bit support from 32-bit libgcc_s.so.1 is only available
-# when configured with --with-long-double-128. Make sure all the
-# symbols are available at @@GCC_LDBL_* versions to make it clear
-# there is a configurable symbol set.
-
-%exclude {
- __fixtfdi
- __fixunstfdi
- __floatditf
-
- __divtc3
- __multc3
- __powitf2
-}
-
-%inherit GCC_LDBL_3.0 GCC_3.0
-GCC_LDBL_3.0 {
- __fixtfdi
- __fixunstfdi
- __floatditf
-}
-
-%inherit GCC_LDBL_4.0.0 GCC_4.0.0
-GCC_LDBL_4.0.0 {
- __divtc3
- __multc3
- __powitf2
-}
-
-%endif
diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h
index 108e105cbea..b9a094e160a 100644
--- a/gcc/config/sparc/sparc-protos.h
+++ b/gcc/config/sparc/sparc-protos.h
@@ -108,6 +108,7 @@ extern const char *output_v8plus_mult (rtx, rtx *, const char *);
extern void sparc_expand_vector_init (rtx, rtx);
extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx);
extern bool sparc_expand_conditional_move (enum machine_mode, rtx *);
+extern void sparc_expand_vcond (enum machine_mode, rtx *, int, int);
#endif /* RTX_CODE */
#endif /* __SPARC_PROTOS_H__ */
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 3883dbd21d8..5d22fc0313e 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -3440,7 +3440,7 @@ sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
REG+REG address, then only one of them gets converted to an
offsettable address. */
if (mode == TFmode
- && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
+ && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
return 0;
/* We prohibit REG + REG on ARCH32 if not optimizing for
@@ -11279,20 +11279,366 @@ output_v8plus_mult (rtx insn, rtx *operands, const char *name)
}
}
+static void
+vector_init_bshuffle (rtx target, rtx *locs, int n_elts, enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ rtx mid_target, r0_high, r0_low, r1_high, r1_low;
+ enum machine_mode partial_mode;
+ int bmask, i, idxs[8];
+
+ partial_mode = (mode == V4HImode
+ ? V2HImode
+ : (mode == V8QImode
+ ? V4QImode : mode));
+
+ r0_high = r0_low = NULL_RTX;
+ r1_high = r1_low = NULL_RTX;
+
+ /* Move the pieces into place, as needed, and calculate the nibble
+ indexes for the bmask calculation. After we execute this loop the
+ locs[] array is no longer needed. Therefore, to simplify things,
+ we set entries that have been processed already to NULL_RTX. */
+
+ for (i = 0; i < n_elts; i++)
+ {
+ int j;
+
+ if (locs[i] == NULL_RTX)
+ continue;
+
+ if (!r0_low)
+ {
+ r0_low = locs[i];
+ idxs[i] = 0x7;
+ }
+ else if (!r1_low)
+ {
+ r1_low = locs[i];
+ idxs[i] = 0xf;
+ }
+ else if (!r0_high)
+ {
+ r0_high = gen_highpart (partial_mode, r0_low);
+ emit_move_insn (r0_high, gen_lowpart (partial_mode, locs[i]));
+ idxs[i] = 0x3;
+ }
+ else if (!r1_high)
+ {
+ r1_high = gen_highpart (partial_mode, r1_low);
+ emit_move_insn (r1_high, gen_lowpart (partial_mode, locs[i]));
+ idxs[i] = 0xb;
+ }
+ else
+ gcc_unreachable ();
+
+ for (j = i + 1; j < n_elts; j++)
+ {
+ if (locs[j] == locs[i])
+ {
+ locs[j] = NULL_RTX;
+ idxs[j] = idxs[i];
+ }
+ }
+ locs[i] = NULL_RTX;
+ }
+
+ bmask = 0;
+ for (i = 0; i < n_elts; i++)
+ {
+ int v = idxs[i];
+
+ switch (GET_MODE_SIZE (inner_mode))
+ {
+ case 2:
+ bmask <<= 8;
+ bmask |= (((v - 1) << 4) | v);
+ break;
+
+ case 1:
+ bmask <<= 4;
+ bmask |= v;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+ }
+
+ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
+ force_reg (SImode, GEN_INT (bmask))));
+
+ mid_target = target;
+ if (GET_MODE_SIZE (mode) == 4)
+ {
+ mid_target = gen_reg_rtx (mode == V2HImode
+ ? V4HImode : V8QImode);
+ }
+
+ if (!r1_low)
+ r1_low = r0_low;
+
+ switch (GET_MODE (mid_target))
+ {
+ case V4HImode:
+ emit_insn (gen_bshufflev4hi_vis (mid_target, r0_low, r1_low));
+ break;
+ case V8QImode:
+ emit_insn (gen_bshufflev8qi_vis (mid_target, r0_low, r1_low));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ if (mid_target != target)
+ emit_move_insn (target, gen_lowpart (partial_mode, mid_target));
+}
+
+static bool
+vector_init_move_words (rtx target, rtx vals, enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ switch (mode)
+ {
+ case V1SImode:
+ case V1DImode:
+ emit_move_insn (gen_lowpart (inner_mode, target),
+ gen_lowpart (inner_mode, XVECEXP (vals, 0, 0)));
+ return true;
+
+ case V2SImode:
+ emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0));
+ emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1));
+ return true;
+
+ default:
+ break;
+ }
+ return false;
+}
+
+/* Move the elements in rtvec VALS into registers compatible with MODE.
+ Store the rtx for these regs into the corresponding array entry of
+ LOCS. */
+static void
+vector_init_prepare_elts (rtx vals, int n_elts, rtx *locs, enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ enum machine_mode loc_mode;
+ int i;
+
+ switch (mode)
+ {
+ case V2HImode:
+ loc_mode = V4HImode;
+ break;
+
+ case V4QImode:
+ loc_mode = V8QImode;
+ break;
+
+ case V4HImode:
+ case V8QImode:
+ loc_mode = mode;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (GET_MODE_SIZE (inner_mode) <= 4);
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx dst, elt = XVECEXP (vals, 0, i);
+ int j;
+
+ /* Did we see this already? If so just record it's location. */
+ dst = NULL_RTX;
+ for (j = 0; j < i; j++)
+ {
+ if (XVECEXP (vals, 0, j) == elt)
+ {
+ dst = locs[j];
+ break;
+ }
+ }
+
+ if (! dst)
+ {
+ enum rtx_code code = GET_CODE (elt);
+
+ dst = gen_reg_rtx (loc_mode);
+
+ /* We use different strategies based upon whether the element
+ is in memory or in a register. When we start in a register
+ and we're VIS3 capable, it's always cheaper to use the VIS3
+ int-->fp register moves since we avoid having to use stack
+ memory. */
+ if ((TARGET_VIS3 && (code == REG || code == SUBREG))
+ || (CONSTANT_P (elt)
+ && (const_zero_operand (elt, inner_mode)
+ || const_all_ones_operand (elt, inner_mode))))
+ {
+ elt = convert_modes (SImode, inner_mode, elt, true);
+
+ emit_clobber (dst);
+ emit_move_insn (gen_lowpart (SImode, dst), elt);
+ }
+ else
+ {
+ rtx m = elt;
+
+ if (CONSTANT_P (elt))
+ {
+ m = force_const_mem (inner_mode, elt);
+ }
+ else if (code != MEM)
+ {
+ rtx stk = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode), 0);
+ emit_move_insn (stk, elt);
+ m = stk;
+ }
+
+ switch (loc_mode)
+ {
+ case V4HImode:
+ emit_insn (gen_zero_extend_v4hi_vis (dst, m));
+ break;
+ case V8QImode:
+ emit_insn (gen_zero_extend_v8qi_vis (dst, m));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
+ }
+ locs[i] = dst;
+ }
+}
+
+static void
+sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique,
+ enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ if (n_unique <= 4)
+ {
+ vector_init_bshuffle (target, locs, n_elts, mode, inner_mode);
+ }
+ else
+ {
+ int i;
+
+ gcc_assert (mode == V8QImode);
+
+ emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+ force_reg (SImode, GEN_INT (7)),
+ CONST0_RTX (SImode)));
+ i = n_elts - 1;
+ emit_insn (gen_faligndatav8qi_vis (target, locs[i], locs[i]));
+ while (--i >= 0)
+ emit_insn (gen_faligndatav8qi_vis (target, locs[i], target));
+ }
+}
+
+static void
+sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique,
+ enum machine_mode mode)
+{
+ enum machine_mode full_mode = mode;
+ rtx (*emitter)(rtx, rtx, rtx);
+ int alignaddr_val, i;
+ rtx tmp = target;
+
+ if (n_unique == 1 && mode == V8QImode)
+ {
+ rtx t2, t2_low, t1;
+
+ t1 = gen_reg_rtx (V4QImode);
+ emit_move_insn (t1, gen_lowpart (V4QImode, locs[0]));
+
+ t2 = gen_reg_rtx (V8QImode);
+ t2_low = gen_lowpart (V4QImode, t2);
+
+ /* xxxxxxAA --> xxxxxxxxxxxxAAAA
+ xxxxAAAA --> xxxxxxxxAAAAAAAA
+ AAAAAAAA --> AAAAAAAAAAAAAAAA */
+ emit_insn (gen_fpmerge_vis (t2, t1, t1));
+ emit_move_insn (t1, t2_low);
+ emit_insn (gen_fpmerge_vis (t2, t1, t1));
+ emit_move_insn (t1, t2_low);
+ emit_insn (gen_fpmerge_vis (target, t1, t1));
+ return;
+ }
+
+ switch (mode)
+ {
+ case V2HImode:
+ full_mode = V4HImode;
+ /* FALLTHRU */
+ case V4HImode:
+ emitter = gen_faligndatav4hi_vis;
+ alignaddr_val = 6;
+ break;
+
+ case V4QImode:
+ full_mode = V8QImode;
+ /* FALLTHRU */
+ case V8QImode:
+ emitter = gen_faligndatav8qi_vis;
+ alignaddr_val = 7;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (full_mode != mode)
+ tmp = gen_reg_rtx (full_mode);
+
+ emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+ force_reg (SImode, GEN_INT (alignaddr_val)),
+ CONST0_RTX (SImode)));
+
+ i = n_elts - 1;
+ emit_insn (emitter (tmp, locs[i], locs[i]));
+ while (--i >= 0)
+ emit_insn (emitter (tmp, locs[i], tmp));
+
+ if (tmp != target)
+ emit_move_insn (target, gen_highpart (mode, tmp));
+}
+
void
sparc_expand_vector_init (rtx target, rtx vals)
{
enum machine_mode mode = GET_MODE (target);
enum machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode);
- int i, n_var = 0;
- rtx mem;
+ int i, n_var = 0, n_unique = 0;
+ rtx locs[8];
+
+ gcc_assert (n_elts <= 8);
for (i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
+ bool found = false;
+ int j;
+
if (!CONSTANT_P (x))
n_var++;
+
+ for (j = 0; j < i; j++)
+ {
+ if (rtx_equal_p (x, XVECEXP (vals, 0, j)))
+ {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ n_unique++;
}
if (n_var == 0)
@@ -11301,12 +11647,16 @@ sparc_expand_vector_init (rtx target, rtx vals)
return;
}
- mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
- for (i = 0; i < n_elts; i++)
- emit_move_insn (adjust_address_nv (mem, inner_mode,
- i * GET_MODE_SIZE (inner_mode)),
- XVECEXP (vals, 0, i));
- emit_move_insn (target, mem);
+ if (vector_init_move_words (target, vals, mode, inner_mode))
+ return;
+
+ vector_init_prepare_elts (vals, n_elts, locs, mode, inner_mode);
+
+ if (TARGET_VIS2)
+ sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique,
+ mode, inner_mode);
+ else
+ sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode);
}
static reg_class_t
@@ -11380,12 +11730,16 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
rtx cc_reg, dst, cmp;
cmp = operands[1];
- cmp_mode = GET_MODE (XEXP (cmp, 0));
- if (cmp_mode == DImode && !TARGET_ARCH64)
+ if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
return false;
- dst = operands[0];
+ if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
+ cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
+
+ cmp_mode = GET_MODE (XEXP (cmp, 0));
+ rc = GET_CODE (cmp);
+ dst = operands[0];
if (! rtx_equal_p (operands[2], dst)
&& ! rtx_equal_p (operands[3], dst))
{
@@ -11404,9 +11758,6 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
rc = reverse_condition (rc);
}
- if (cmp_mode == TFmode && !TARGET_HARD_QUAD)
- cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
-
if (XEXP (cmp, 1) == const0_rtx
&& GET_CODE (XEXP (cmp, 0)) == REG
&& cmp_mode == DImode
@@ -11426,4 +11777,41 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
return true;
}
+void
+sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
+{
+ rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
+ enum rtx_code code = GET_CODE (operands[3]);
+
+ mask = gen_reg_rtx (Pmode);
+ cop0 = operands[4];
+ cop1 = operands[5];
+ if (code == LT || code == GE)
+ {
+ rtx t;
+
+ code = swap_condition (code);
+ t = cop0; cop0 = cop1; cop1 = t;
+ }
+
+ gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
+
+ fcmp = gen_rtx_UNSPEC (Pmode,
+ gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
+ fcode);
+
+ cmask = gen_rtx_UNSPEC (DImode,
+ gen_rtvec (2, mask, gsr),
+ ccode);
+
+ bshuf = gen_rtx_UNSPEC (mode,
+ gen_rtvec (3, operands[1], operands[2], gsr),
+ UNSPEC_BSHUFFLE);
+
+ emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
+ emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
+}
+
#include "gt-sparc.h"
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 65b45271890..e8707f50577 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -328,6 +328,7 @@ extern enum cmodel sparc_cmodel;
%{mcpu=sparclite:-Asparclite} \
%{mcpu=sparclite86x:-Asparclite} \
%{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
+%{mcpu=v8:-Av8} \
%{mv8plus:-Av8plus} \
%{mcpu=v9:-Av9} \
%{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 6dd390952c3..7452f96c9d3 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -92,6 +92,7 @@
(UNSPEC_MUL8 86)
(UNSPEC_MUL8SU 87)
(UNSPEC_MULDSU 88)
+ (UNSPEC_SHORT_LOAD 89)
])
(define_constants
@@ -2041,8 +2042,8 @@
})
(define_insn "*movsf_insn"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f, f,*r, m, m")
- (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r, m, m, f,*rG"))]
+ [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f,f,*r,m, m")
+ (match_operand:SF 1 "input_operand" "G,C,f,*rR, Q, S, f,*r,m, m,f,*rG"))]
"(register_operand (operands[0], SFmode)
|| register_or_zero_or_all_ones_operand (operands[1], SFmode))"
{
@@ -2138,8 +2139,8 @@
})
(define_insn "*movdf_insn_sp32"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f, e,T,W,U,T, f, *r, o,o")
- (match_operand:DF 1 "input_operand" "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roGF,*rG,f"))]
+ [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f, e,T,W,U,T, f, *r, o,o")
+ (match_operand:DF 1 "input_operand" "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roF,*rG,f"))]
"! TARGET_ARCH64
&& (register_operand (operands[0], DFmode)
|| register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -2166,7 +2167,7 @@
(define_insn "*movdf_insn_sp64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e, e,W, *r,*r, m,*r")
- (match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
+ (match_operand:DF 1 "input_operand" "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
"TARGET_ARCH64
&& (register_operand (operands[0], DFmode)
|| register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -2191,9 +2192,8 @@
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(match_operand:DF 1 "const_double_operand" ""))]
- "TARGET_FPU
- && (GET_CODE (operands[0]) == REG
- && SPARC_INT_REG_P (REGNO (operands[0])))
+ "REG_P (operands[0])
+ && SPARC_INT_REG_P (REGNO (operands[0]))
&& ! const_zero_operand (operands[1], GET_MODE (operands[0]))
&& reload_completed"
[(clobber (const_int 0))]
@@ -2378,45 +2378,30 @@
})
(define_insn "*movtf_insn_sp32"
- [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,U,r")
- (match_operand:TF 1 "input_operand" "G,oe,GeUr,o,roG"))]
- "TARGET_FPU
- && ! TARGET_ARCH64
- && (register_operand (operands[0], TFmode)
- || register_or_zero_operand (operands[1], TFmode))"
- "#"
- [(set_attr "length" "4")])
-
-;; Exactly the same as above, except that all `e' cases are deleted.
-;; This is necessary to prevent reload from ever trying to use a `e' reg
-;; when -mno-fpu.
-
-(define_insn "*movtf_insn_sp32_no_fpu"
- [(set (match_operand:TF 0 "nonimmediate_operand" "=o,U,o,r,o")
- (match_operand:TF 1 "input_operand" "G,o,U,roG,r"))]
- "! TARGET_FPU
- && ! TARGET_ARCH64
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o,U, r")
+ (match_operand:TF 1 "input_operand" " G,oe,e,rGU,o,roG"))]
+ "! TARGET_ARCH64
&& (register_operand (operands[0], TFmode)
|| register_or_zero_operand (operands[1], TFmode))"
"#"
- [(set_attr "length" "4")])
+ [(set_attr "length" "4,4,4,4,4,4")
+ (set_attr "cpu_feature" "fpu,fpu,fpu,*,*,*")])
(define_insn "*movtf_insn_sp64"
- [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,r")
- (match_operand:TF 1 "input_operand" "G,oe,Ger,roG"))]
- "TARGET_FPU
- && TARGET_ARCH64
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o, r")
+ (match_operand:TF 1 "input_operand" "G,oe,e,rG,roG"))]
+ "TARGET_ARCH64
&& ! TARGET_HARD_QUAD
&& (register_operand (operands[0], TFmode)
|| register_or_zero_operand (operands[1], TFmode))"
"#"
- [(set_attr "length" "2")])
+ [(set_attr "length" "2,2,2,2,2")
+ (set_attr "cpu_feature" "fpu,fpu,fpu,*,*")])
(define_insn "*movtf_insn_sp64_hq"
- [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m,o,r")
- (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))]
- "TARGET_FPU
- && TARGET_ARCH64
+ [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o, r")
+ (match_operand:TF 1 "input_operand" "G,e,m,e,rG,roG"))]
+ "TARGET_ARCH64
&& TARGET_HARD_QUAD
&& (register_operand (operands[0], TFmode)
|| register_or_zero_operand (operands[1], TFmode))"
@@ -2430,16 +2415,6 @@
[(set_attr "type" "*,fpmove,fpload,fpstore,*,*")
(set_attr "length" "2,*,*,*,2,2")])
-(define_insn "*movtf_insn_sp64_no_fpu"
- [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o")
- (match_operand:TF 1 "input_operand" "orG,rG"))]
- "! TARGET_FPU
- && TARGET_ARCH64
- && (register_operand (operands[0], TFmode)
- || register_or_zero_operand (operands[1], TFmode))"
- "#"
- [(set_attr "length" "2")])
-
;; Now all the splits to handle multi-insn TF mode moves.
(define_split
[(set (match_operand:TF 0 "register_operand" "")
@@ -7856,6 +7831,48 @@
DONE;
})
+(define_expand "zero_extend_v8qi_vis"
+ [(set (match_operand:V8QI 0 "register_operand" "")
+ (unspec:V8QI [(match_operand:QI 1 "memory_operand" "")]
+ UNSPEC_SHORT_LOAD))]
+ "TARGET_VIS"
+{
+ if (! REG_P (XEXP (operands[1], 0)))
+ {
+ rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+ operands[1] = replace_equiv_address (operands[1], addr);
+ }
+})
+
+(define_expand "zero_extend_v4hi_vis"
+ [(set (match_operand:V4HI 0 "register_operand" "")
+ (unspec:V4HI [(match_operand:HI 1 "memory_operand" "")]
+ UNSPEC_SHORT_LOAD))]
+ "TARGET_VIS"
+{
+ if (! REG_P (XEXP (operands[1], 0)))
+ {
+ rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+ operands[1] = replace_equiv_address (operands[1], addr);
+ }
+})
+
+(define_insn "*zero_extend_v8qi_<P:mode>_insn"
+ [(set (match_operand:V8QI 0 "register_operand" "=e")
+ (unspec:V8QI [(mem:QI
+ (match_operand:P 1 "register_operand" "r"))]
+ UNSPEC_SHORT_LOAD))]
+ "TARGET_VIS"
+ "ldda\t[%1] 0xd0, %0")
+
+(define_insn "*zero_extend_v4hi_<P:mode>_insn"
+ [(set (match_operand:V4HI 0 "register_operand" "=e")
+ (unspec:V4HI [(mem:HI
+ (match_operand:P 1 "register_operand" "r"))]
+ UNSPEC_SHORT_LOAD))]
+ "TARGET_VIS"
+ "ldda\t[%1] 0xd2, %0")
+
(define_expand "vec_init<mode>"
[(match_operand:VMALL 0 "register_operand" "")
(match_operand:VMALL 1 "" "")]
@@ -8299,6 +8316,36 @@
[(set_attr "type" "fpmul")
(set_attr "fptype" "double")])
+(define_expand "vcond<mode><mode>"
+ [(match_operand:GCM 0 "register_operand" "")
+ (match_operand:GCM 1 "register_operand" "")
+ (match_operand:GCM 2 "register_operand" "")
+ (match_operator 3 ""
+ [(match_operand:GCM 4 "register_operand" "")
+ (match_operand:GCM 5 "register_operand" "")])]
+ "TARGET_VIS3"
+{
+ sparc_expand_vcond (<MODE>mode, operands,
+ UNSPEC_CMASK<gcm_name>,
+ UNSPEC_FCMP);
+ DONE;
+})
+
+(define_expand "vconduv8qiv8qi"
+ [(match_operand:V8QI 0 "register_operand" "")
+ (match_operand:V8QI 1 "register_operand" "")
+ (match_operand:V8QI 2 "register_operand" "")
+ (match_operator 3 ""
+ [(match_operand:V8QI 4 "register_operand" "")
+ (match_operand:V8QI 5 "register_operand" "")])]
+ "TARGET_VIS3"
+{
+ sparc_expand_vcond (V8QImode, operands,
+ UNSPEC_CMASK8,
+ UNSPEC_FUCMP);
+ DONE;
+})
+
(define_insn "array8<P:mode>_vis"
[(set (match_operand:P 0 "register_operand" "=r")
(unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
@@ -8452,7 +8499,7 @@
;; Conditional moves are possible via fcmpX --> cmaskX -> bshuffle
(define_insn "cmask8<P:mode>_vis"
[(set (reg:DI GSR_REG)
- (unspec:DI [(match_operand:P 0 "register_operand" "r")
+ (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
(reg:DI GSR_REG)]
UNSPEC_CMASK8))]
"TARGET_VIS3"
@@ -8460,7 +8507,7 @@
(define_insn "cmask16<P:mode>_vis"
[(set (reg:DI GSR_REG)
- (unspec:DI [(match_operand:P 0 "register_operand" "r")
+ (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
(reg:DI GSR_REG)]
UNSPEC_CMASK16))]
"TARGET_VIS3"
@@ -8468,7 +8515,7 @@
(define_insn "cmask32<P:mode>_vis"
[(set (reg:DI GSR_REG)
- (unspec:DI [(match_operand:P 0 "register_operand" "r")
+ (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
(reg:DI GSR_REG)]
UNSPEC_CMASK32))]
"TARGET_VIS3"
diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf
index 7073bcb7721..e9acfe3693e 100644
--- a/gcc/config/sparc/t-elf
+++ b/gcc/config/sparc/t-elf
@@ -17,12 +17,6 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat
MULTILIB_DIRNAMES = soft v8 flat
MULTILIB_MATCHES = msoft-float=mno-fpu
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon
index 4f9d0a9e797..25fc61136a9 100644
--- a/gcc/config/sparc/t-leon
+++ b/gcc/config/sparc/t-leon
@@ -16,15 +16,9 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
# Multilibs for LEON
# LEON is a SPARC-V8, but the AT697 implementation has a bug in the
# V8-specific instructions.
MULTILIB_OPTIONS = mcpu=v7 msoft-float mflat
MULTILIB_DIRNAMES = v7 soft flat
MULTILIB_MATCHES = mcpu?v7=mv7 msoft-float=mno-fpu
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-leon3 b/gcc/config/sparc/t-leon3
index 0e7e45cc594..acdd1f2c67b 100644
--- a/gcc/config/sparc/t-leon3
+++ b/gcc/config/sparc/t-leon3
@@ -20,6 +20,3 @@
MULTILIB_OPTIONS = msoft-float
MULTILIB_DIRNAMES = soft
MULTILIB_MATCHES = msoft-float=mno-fpu
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-linux b/gcc/config/sparc/t-linux
deleted file mode 100644
index 30daa376614..00000000000
--- a/gcc/config/sparc/t-linux
+++ /dev/null
@@ -1,5 +0,0 @@
-# Override t-slibgcc-elf-ver to export some libgcc symbols with
-# the symbol versions that glibc used.
-# Avoid the t-linux version file.
-SHLIB_MAPFILES = $$(libgcc_objdir)/libgcc-std.ver \
- $(srcdir)/config/sparc/libgcc-sparc-glibc.ver
diff --git a/gcc/config/sparc/t-linux64 b/gcc/config/sparc/t-linux64
index 74d04898d9f..d9dfad66ce7 100644
--- a/gcc/config/sparc/t-linux64
+++ b/gcc/config/sparc/t-linux64
@@ -1,5 +1,5 @@
# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004,
-# 2006, 2010 Free Software Foundation, Inc.
+# 2006, 2010, 2011 Free Software Foundation, Inc.
#
# This file is part of GCC.
#
@@ -27,10 +27,3 @@
MULTILIB_OPTIONS = m64/m32
MULTILIB_DIRNAMES = 64 32
MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
-
-CRTSTUFF_T_CFLAGS = `if test x$$($(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) \
- -print-multi-os-directory) \
- = x../lib64; then echo -mcmodel=medany; fi`
diff --git a/gcc/config/sparc/t-netbsd64 b/gcc/config/sparc/t-netbsd64
index 0fddb0ffe87..bc783c19366 100644
--- a/gcc/config/sparc/t-netbsd64
+++ b/gcc/config/sparc/t-netbsd64
@@ -1,8 +1,5 @@
-# Disable multilib fow now, as NetBSD/sparc64 does not ship with
+# Disable multilib for now, as NetBSD/sparc64 does not ship with
# a 32-bit environment.
#MULTILIB_OPTIONS = m32/m64
#MULTILIB_DIRNAMES = 32 64
#MULTILIB_MATCHES =
-
-#LIBGCC = stmp-multilib
-#INSTALL_LIBGCC = install-multilib