2011-11-06 Basile Starynkevitch <basile@starynkevitch.net>

MELT branch merged with trunk rev 181026 using svnmerge git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@181034 138bc75d-0d04-0410-961f-82ee72b054a4
author: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2011-11-06 15:16:32 +0000
committer: bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> 2011-11-06 15:16:32 +0000
commit: 31ba6c3ff2311bad9422246f49d59c532cbb5078 (patch)
tree: 6e862e3ea14b2edf93a92c404a0d9b29f3f9ba65 /gcc/config/sparc
parent: bab85b65e545231656361b997a81fb8a44b266b4 (diff)
download: gcc-31ba6c3ff2311bad9422246f49d59c532cbb5078.tar.gz
13 files changed, 503 insertions, 1219 deletions
diff --git a/gcc/config/sparc/lb1spc.asm b/gcc/config/sparc/lb1spc.asm
deleted file mode 100644
index b60bd5740e7..00000000000
--- a/gcc/config/sparc/lb1spc.asm
+++ /dev/null
@@ -1,784 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
-   for the sparc processor.
-
-   These routines are derived from the SPARC Architecture Manual, version 8,
-   slightly edited to match the desired calling convention, and also to
-   optimize them for our purposes.  */
-
-#ifdef L_mulsi3
-.text
-	.align 4
-	.global .umul
-	.proc 4
-.umul:
-	or	%o0, %o1, %o4	! logical or of multiplier and multiplicand
-	mov	%o0, %y		! multiplier to Y register
-	andncc	%o4, 0xfff, %o5	! mask out lower 12 bits
-	be	mul_shortway	! can do it the short way
-	andcc	%g0, %g0, %o4	! zero the partial product and clear NV cc
-	!
-	! long multiply
-	!
-	mulscc	%o4, %o1, %o4	! first iteration of 33
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4	! 32nd iteration
-	mulscc	%o4, %g0, %o4	! last iteration only shifts
-	! the upper 32 bits of product are wrong, but we do not care
-	retl
-	rd	%y, %o0
-	!
-	! short multiply
-	!
-mul_shortway:
-	mulscc	%o4, %o1, %o4	! first iteration of 13
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4
-	mulscc	%o4, %o1, %o4	! 12th iteration
-	mulscc	%o4, %g0, %o4	! last iteration only shifts
-	rd	%y, %o5
-	sll	%o4, 12, %o4	! left shift partial product by 12 bits
-	srl	%o5, 20, %o5	! right shift partial product by 20 bits
-	retl
-	or	%o5, %o4, %o0	! merge for true product
-#endif
-
-#ifdef L_divsi3
-/*
- * Division and remainder, from Appendix E of the SPARC Version 8
- * Architecture Manual, with fixes from Gordon Irlam.
- */
-
-/*
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * m4 parameters:
- *  .div	name of function to generate
- *  div		div=div => %o0 / %o1; div=rem => %o0 % %o1
- *  true		true=true => signed; true=false => unsigned
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-        .global .udiv
-        .align 4
-        .proc 4
-        .text
-.udiv:
-         b ready_to_divide
-         mov 0, %g3             ! result is always positive
-
-        .global .div
-        .align 4
-        .proc 4
-        .text
-.div:
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	ready_to_divide	! no, go do the divide
-	xor	%o1, %o0, %g3	! compute sign in any case
-	tst	%o1
-	bge	1f
-	tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	ready_to_divide	! if %o0 not negative...
-	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-
-
-ready_to_divide:
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-	! Divide by zero trap.  If it returns, return 0 (about as
-	! wrong as possible, but that is what SunOS does...).
-	ta	0x2    		! ST_DIV0
-	retl
-	clr	%o0
-
-1:
-	cmp	%o3, %o5		! if %o1 exceeds %o0, done
-	blu	got_result		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	not_really_big
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	not_too_big
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5	! rest of %o5
-		add	%o5, %g1, %o5
-		b	do_single_div
-		sub	%g2, 1, %g2
-
-	not_too_big:
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	do_single_div
-		nop
-	/* NB: these are commented out in the V8-SPARC manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	do_single_div:
-		subcc	%g2, 1, %g2
-		bl	end_regular_divide
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	end_single_divloop
-		nop
-	single_divloop:
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	end_single_divloop:
-		subcc	%g2, 1, %g2
-		bge	single_divloop
-		tst	%o3
-		b,a	end_regular_divide
-
-not_really_big:
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	got_result
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-divloop:
-	sll	%o2, 4, %o2
-	! depth 1, accumulated bits 0
-	bl	L1.16
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 2, accumulated bits 1
-	bl	L2.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 3
-	bl	L3.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 7
-	bl	L4.23
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2+1), %o2
-	
-L4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2-1), %o2
-	
-	
-L3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 5
-	bl	L4.21
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2+1), %o2
-	
-L4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2-1), %o2
-	
-L2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 1
-	bl	L3.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 3
-	bl	L4.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2+1), %o2
-	
-L4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2-1), %o2
-
-L3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 1
-	bl	L4.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2+1), %o2
-
-L4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2-1), %o2
-	
-L1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 2, accumulated bits -1
-	bl	L2.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -1
-	bl	L3.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -1
-	bl	L4.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2+1), %o2
-	
-L4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2-1), %o2
-	
-L3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -3
-	bl	L4.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2+1), %o2
-	
-L4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2-1), %o2
-	
-L2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -3
-	bl	L3.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -5
-	bl	L4.11
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2+1), %o2
-	
-L4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2-1), %o2
-	
-L3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -7
-	bl	L4.9
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2+1), %o2
-
-L4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2-1), %o2
-	
-	9:
-end_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	divloop
-	tst	%o3
-	bl,a	got_result
-	! non-restoring fixup here (one instruction only!)
-	sub	%o2, 1, %o2
-
-
-got_result:
-	! check to see if answer should be < 0
-	tst	%g3
-	bl,a	1f
-	sub %g0, %o2, %o2
-1:
-	retl
-	mov %o2, %o0
-#endif
-
-#ifdef L_modsi3
-/* This implementation was taken from glibc:
- *
- * Input: dividend and divisor in %o0 and %o1 respectively.
- *
- * Algorithm parameters:
- *  N		how many bits per iteration we try to get (4)
- *  WORDSIZE	total number of bits (32)
- *
- * Derived constants:
- *  TOPBITS	number of bits in the top decade of a number
- *
- * Important variables:
- *  Q		the partial quotient under development (initially 0)
- *  R		the remainder so far, initially the dividend
- *  ITER	number of main division loop iterations required;
- *		equal to ceil(log2(quotient) / N).  Note that this
- *		is the log base (2^N) of the quotient.
- *  V		the current comparand, initially divisor*2^(ITER*N-1)
- *
- * Cost:
- *  Current estimate for non-large dividend is
- *	ceil(log2(quotient) / N) * (10 + 7N/2) + C
- *  A large dividend is one greater than 2^(31-TOPBITS) and takes a
- *  different path, as the upper bits of the quotient must be developed
- *  one bit at a time.
- */
-.text
-	.align 4
-	.global	.urem
-	.proc 4
-.urem:
-	b	divide
-	mov	0, %g3		! result always positive
-
-        .align 4
-	.global .rem
-	.proc 4
-.rem:
-	! compute sign of result; if neither is negative, no problem
-	orcc	%o1, %o0, %g0	! either negative?
-	bge	2f			! no, go do the divide
-	mov	%o0, %g3		! sign of remainder matches %o0
-	tst	%o1
-	bge	1f
-	tst	%o0
-	! %o1 is definitely negative; %o0 might also be negative
-	bge	2f			! if %o0 not negative...
-	sub	%g0, %o1, %o1	! in any case, make %o1 nonneg
-1:	! %o0 is negative, %o1 is nonnegative
-	sub	%g0, %o0, %o0	! make %o0 nonnegative
-2:
-
-	! Ready to divide.  Compute size of quotient; scale comparand.
-divide:
-	orcc	%o1, %g0, %o5
-	bne	1f
-	mov	%o0, %o3
-
-		! Divide by zero trap.  If it returns, return 0 (about as
-		! wrong as possible, but that is what SunOS does...).
-		ta	0x2   !ST_DIV0
-		retl
-		clr	%o0
-
-1:
-	cmp	%o3, %o5		! if %o1 exceeds %o0, done
-	blu	got_result		! (and algorithm fails otherwise)
-	clr	%o2
-	sethi	%hi(1 << (32 - 4 - 1)), %g1
-	cmp	%o3, %g1
-	blu	not_really_big
-	clr	%o4
-
-	! Here the dividend is >= 2**(31-N) or so.  We must be careful here,
-	! as our usual N-at-a-shot divide step will cause overflow and havoc.
-	! The number of bits in the result here is N*ITER+SC, where SC <= N.
-	! Compute ITER in an unorthodox manner: know we need to shift V into
-	! the top decade: so do not even bother to compare to R.
-	1:
-		cmp	%o5, %g1
-		bgeu	3f
-		mov	1, %g2
-		sll	%o5, 4, %o5
-		b	1b
-		add	%o4, 1, %o4
-
-	! Now compute %g2.
-	2:	addcc	%o5, %o5, %o5
-		bcc	not_too_big
-		add	%g2, 1, %g2
-
-		! We get here if the %o1 overflowed while shifting.
-		! This means that %o3 has the high-order bit set.
-		! Restore %o5 and subtract from %o3.
-		sll	%g1, 4, %g1	! high order bit
-		srl	%o5, 1, %o5		! rest of %o5
-		add	%o5, %g1, %o5
-		b	do_single_div
-		sub	%g2, 1, %g2
-
-	not_too_big:
-	3:	cmp	%o5, %o3
-		blu	2b
-		nop
-		be	do_single_div
-		nop
-	/* NB: these are commented out in the V8-SPARC manual as well */
-	/* (I do not understand this) */
-	! %o5 > %o3: went too far: back up 1 step
-	!	srl	%o5, 1, %o5
-	!	dec	%g2
-	! do single-bit divide steps
-	!
-	! We have to be careful here.  We know that %o3 >= %o5, so we can do the
-	! first divide step without thinking.  BUT, the others are conditional,
-	! and are only done if %o3 >= 0.  Because both %o3 and %o5 may have the high-
-	! order bit set in the first step, just falling into the regular
-	! division loop will mess up the first time around.
-	! So we unroll slightly...
-	do_single_div:
-		subcc	%g2, 1, %g2
-		bl	end_regular_divide
-		nop
-		sub	%o3, %o5, %o3
-		mov	1, %o2
-		b	end_single_divloop
-		nop
-	single_divloop:
-		sll	%o2, 1, %o2
-		bl	1f
-		srl	%o5, 1, %o5
-		! %o3 >= 0
-		sub	%o3, %o5, %o3
-		b	2f
-		add	%o2, 1, %o2
-	1:	! %o3 < 0
-		add	%o3, %o5, %o3
-		sub	%o2, 1, %o2
-	2:
-	end_single_divloop:
-		subcc	%g2, 1, %g2
-		bge	single_divloop
-		tst	%o3
-		b,a	end_regular_divide
-
-not_really_big:
-1:
-	sll	%o5, 4, %o5
-	cmp	%o5, %o3
-	bleu	1b
-	addcc	%o4, 1, %o4
-	be	got_result
-	sub	%o4, 1, %o4
-
-	tst	%o3	! set up for initial iteration
-divloop:
-	sll	%o2, 4, %o2
-		! depth 1, accumulated bits 0
-	bl	L1.16
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 2, accumulated bits 1
-	bl	L2.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 3
-	bl	L3.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 7
-	bl	L4.23
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2+1), %o2
-L4.23:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (7*2-1), %o2
-	
-L3.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 5
-	bl	L4.21
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2+1), %o2
-	
-L4.21:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (5*2-1), %o2
-	
-L2.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits 1
-	bl	L3.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 3
-	bl	L4.19
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2+1), %o2
-	
-L4.19:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (3*2-1), %o2
-	
-L3.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits 1
-	bl	L4.17
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2+1), %o2
-	
-L4.17:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (1*2-1), %o2
-	
-L1.16:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 2, accumulated bits -1
-	bl	L2.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -1
-	bl	L3.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -1
-	bl	L4.15
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2+1), %o2
-	
-L4.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-1*2-1), %o2
-	
-L3.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -3
-	bl	L4.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2+1), %o2
-	
-L4.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-3*2-1), %o2
-	
-L2.15:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 3, accumulated bits -3
-	bl	L3.13
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -5
-	bl	L4.11
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2+1), %o2
-	
-L4.11:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-5*2-1), %o2
-	
-L3.13:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	! depth 4, accumulated bits -7
-	bl	L4.9
-	srl	%o5,1,%o5
-	! remainder is positive
-	subcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2+1), %o2
-	
-L4.9:
-	! remainder is negative
-	addcc	%o3,%o5,%o3
-	b	9f
-	add	%o2, (-7*2-1), %o2
-	
-	9:
-end_regular_divide:
-	subcc	%o4, 1, %o4
-	bge	divloop
-	tst	%o3
-	bl,a	got_result
-	! non-restoring fixup here (one instruction only!)
-	add	%o3, %o1, %o3
-
-got_result:
-	! check to see if answer should be < 0
-	tst	%g3
-	bl,a	1f
-	sub %g0, %o3, %o3
-1:
-	retl
-	mov %o3, %o0
-
-#endif
-
diff --git a/gcc/config/sparc/lb1spl.asm b/gcc/config/sparc/lb1spl.asm
deleted file mode 100644
index 973401f8018..00000000000
--- a/gcc/config/sparc/lb1spl.asm
+++ /dev/null
@@ -1,246 +0,0 @@
-/* This is an assembly language implementation of mulsi3, divsi3, and modsi3
-   for the sparclite processor.
-
-   These routines are all from the SPARClite User's Guide, slightly edited
-   to match the desired calling convention, and also to optimize them.  */
-
-#ifdef L_udivsi3
-.text
-	.align 4
-	.global .udiv
-	.proc	04
-.udiv:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	tst	%g0
-	divscc	%o0,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	retl
-	divscc	%g1,%o1,%o0
-#endif
-
-#ifdef L_umodsi3
-.text
-	.align 4
-	.global .urem
-	.proc	04
-.urem:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	tst	%g0
-	divscc	%o0,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	divscc	%g1,%o1,%g1
-	bl 1f
-	rd	%y,%o0
-	retl
-	nop
-1:	retl
-	add	%o0,%o1,%o0
-#endif
-
-#ifdef L_divsi3
-.text
-	.align 4
-	.global .div
-	.proc	04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the quotient.
-.div:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	mov	%o1,%o4
-	tst	%o1
-	bl,a	1f
-	sub	%g0,%o4,%o4
-1:	tst	%o0
-	bl,a	2f
-	mov	-1,%y
-2:	divscc	%o0,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	be	6f
-	mov	%y,%o3
-	bg	4f
-	addcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bl	5f
-	tst	%g1
-	ba	5f
-	add	%o3,%o4,%o3
-4:	subcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bge	5f
-	tst	%g1
-	sub	%o3,%o4,%o3
-5:	bl,a	6f
-	add	%g1,1,%g1
-6:	tst	%o1
-	bl,a	7f
-	sub	%g0,%g1,%g1
-7:	retl
-	mov	%g1,%o0		! Quotient is in %g1.
-#endif
-
-#ifdef L_modsi3
-.text
-	.align 4
-	.global .rem
-	.proc	04
-! ??? This routine could be made faster if was optimized, and if it was
-! rewritten to only calculate the remainder.
-.rem:
-	wr	%g0,%g0,%y	! Not a delayed write for sparclite
-	mov	%o1,%o4
-	tst	%o1
-	bl,a	1f
-	sub	%g0,%o4,%o4
-1:	tst	%o0
-	bl,a	2f
-	mov	-1,%y
-2:	divscc	%o0,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	divscc	%g1,%o4,%g1
-	be	6f
-	mov	%y,%o3
-	bg	4f
-	addcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bl	5f
-	tst	%g1
-	ba	5f
-	add	%o3,%o4,%o3
-4:	subcc	%o3,%o4,%g0
-	be,a	6f
-	mov	%g0,%o3
-	tst	%o0
-	bge	5f
-	tst	%g1
-	sub	%o3,%o4,%o3
-5:	bl,a	6f
-	add	%g1,1,%g1
-6:	tst	%o1
-	bl,a	7f
-	sub	%g0,%g1,%g1
-7:	retl
-	mov	%o3,%o0		! Remainder is in %o3.
-#endif
diff --git a/gcc/config/sparc/libgcc-sparc-glibc.ver b/gcc/config/sparc/libgcc-sparc-glibc.ver
deleted file mode 100644
index 91138d3795e..00000000000
--- a/gcc/config/sparc/libgcc-sparc-glibc.ver
+++ /dev/null
@@ -1,93 +0,0 @@
-# Copyright (C) 2002, 2006, 2008 Free Software Foundation, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# GCC is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-# In order to work around the very problems that force us to now generally
-# create a libgcc.so, glibc reexported a number of routines from libgcc.a.
-# By now choosing the same version tags for these specific routines, we
-# maintain enough binary compatibility to allow future versions of glibc
-# to defer implementation of these routines to libgcc.so via DT_AUXILIARY.
-
-%exclude {
-  __divdi3
-  __moddi3
-  __udivdi3
-  __umoddi3
-  __register_frame
-  __register_frame_table
-  __deregister_frame
-  __register_frame_info
-  __deregister_frame_info
-  __frame_state_for
-  __register_frame_info_table
-}
-
-%ifdef __arch64__
-%define GLIBC_VER GLIBC_2.2
-%else
-%define GLIBC_VER GLIBC_2.0
-%endif
-%inherit GCC_3.0 GLIBC_VER
-GLIBC_VER {
-  # Sampling of DImode arithmetic used by (at least) i386 and m68k.
-  __divdi3
-  __moddi3
-  __udivdi3
-  __umoddi3
-
-  # Exception handling support functions used by most everyone.
-  __register_frame
-  __register_frame_table
-  __deregister_frame
-  __register_frame_info
-  __deregister_frame_info
-  __frame_state_for
-  __register_frame_info_table
-}
-
-%if !defined (__arch64__) && defined (__LONG_DOUBLE_128__)
-
-# long double 128 bit support from 32-bit libgcc_s.so.1 is only available
-# when configured with --with-long-double-128.  Make sure all the
-# symbols are available at @@GCC_LDBL_* versions to make it clear
-# there is a configurable symbol set.
-
-%exclude {
-  __fixtfdi
-  __fixunstfdi
-  __floatditf
-
-  __divtc3
-  __multc3
-  __powitf2
-}
-
-%inherit GCC_LDBL_3.0 GCC_3.0
-GCC_LDBL_3.0 {
-  __fixtfdi
-  __fixunstfdi
-  __floatditf
-}
-
-%inherit GCC_LDBL_4.0.0 GCC_4.0.0
-GCC_LDBL_4.0.0 {
-  __divtc3
-  __multc3
-  __powitf2
-}
-
-%endif
diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h
index 108e105cbea..b9a094e160a 100644
--- a/gcc/config/sparc/sparc-protos.h
+++ b/gcc/config/sparc/sparc-protos.h
@@ -108,6 +108,7 @@ extern const char *output_v8plus_mult (rtx, rtx *, const char *);
 extern void sparc_expand_vector_init (rtx, rtx);
 extern void sparc_expand_vec_perm_bmask(enum machine_mode, rtx);
 extern bool sparc_expand_conditional_move (enum machine_mode, rtx *);
+extern void sparc_expand_vcond (enum machine_mode, rtx *, int, int);
 #endif /* RTX_CODE */
 
 #endif /* __SPARC_PROTOS_H__ */
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 3883dbd21d8..5d22fc0313e 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -3440,7 +3440,7 @@ sparc_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
 	     REG+REG address, then only one of them gets converted to an
 	     offsettable address.  */
 	  if (mode == TFmode
-	      && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
+	      && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
 	    return 0;
 
 	  /* We prohibit REG + REG on ARCH32 if not optimizing for
@@ -11279,20 +11279,366 @@ output_v8plus_mult (rtx insn, rtx *operands, const char *name)
     }
 }
 
+static void
+vector_init_bshuffle (rtx target, rtx *locs, int n_elts, enum machine_mode mode,
+		      enum machine_mode inner_mode)
+{
+  rtx mid_target, r0_high, r0_low, r1_high, r1_low;
+  enum machine_mode partial_mode;
+  int bmask, i, idxs[8];
+
+  partial_mode = (mode == V4HImode
+		  ? V2HImode
+		  : (mode == V8QImode
+		     ? V4QImode : mode));
+
+  r0_high = r0_low = NULL_RTX;
+  r1_high = r1_low = NULL_RTX;
+
+  /* Move the pieces into place, as needed, and calculate the nibble
+     indexes for the bmask calculation.  After we execute this loop the
+     locs[] array is no longer needed.  Therefore, to simplify things,
+     we set entries that have been processed already to NULL_RTX.  */
+
+  for (i = 0; i < n_elts; i++)
+    {
+      int j;
+
+      if (locs[i] == NULL_RTX)
+	continue;
+
+      if (!r0_low)
+	{
+	  r0_low = locs[i];
+	  idxs[i] = 0x7;
+	}
+      else if (!r1_low)
+	{
+	  r1_low = locs[i];
+	  idxs[i] = 0xf;
+	}
+      else if (!r0_high)
+	{
+	  r0_high = gen_highpart (partial_mode, r0_low);
+	  emit_move_insn (r0_high, gen_lowpart (partial_mode, locs[i]));
+	  idxs[i] = 0x3;
+	}
+      else if (!r1_high)
+	{
+	  r1_high = gen_highpart (partial_mode, r1_low);
+	  emit_move_insn (r1_high, gen_lowpart (partial_mode, locs[i]));
+	  idxs[i] = 0xb;
+	}
+      else
+	gcc_unreachable ();
+
+      for (j = i + 1; j < n_elts; j++)
+	{
+	  if (locs[j] == locs[i])
+	    {
+	      locs[j] = NULL_RTX;
+	      idxs[j] = idxs[i];
+	    }
+	}
+      locs[i] = NULL_RTX;
+    }
+
+  bmask = 0;
+  for (i = 0; i < n_elts; i++)
+    {
+      int v = idxs[i];
+
+      switch (GET_MODE_SIZE (inner_mode))
+	{
+	case 2:
+	  bmask <<= 8;
+	  bmask |= (((v - 1) << 4) | v);
+	  break;
+
+	case 1:
+	  bmask <<= 4;
+	  bmask |= v;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
+			      force_reg (SImode, GEN_INT (bmask))));
+
+  mid_target = target;
+  if (GET_MODE_SIZE (mode) == 4)
+    {
+      mid_target = gen_reg_rtx (mode == V2HImode
+				? V4HImode : V8QImode);
+    }
+
+  if (!r1_low)
+    r1_low = r0_low;
+
+  switch (GET_MODE (mid_target))
+    {
+    case V4HImode:
+      emit_insn (gen_bshufflev4hi_vis (mid_target, r0_low, r1_low));
+      break;
+    case V8QImode:
+      emit_insn (gen_bshufflev8qi_vis (mid_target, r0_low, r1_low));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (mid_target != target)
+    emit_move_insn (target, gen_lowpart (partial_mode, mid_target));
+}
+
+static bool
+vector_init_move_words (rtx target, rtx vals, enum machine_mode mode,
+			enum machine_mode inner_mode)
+{
+  switch (mode)
+    {
+    case V1SImode:
+    case V1DImode:
+      emit_move_insn (gen_lowpart (inner_mode, target),
+		      gen_lowpart (inner_mode, XVECEXP (vals, 0, 0)));
+      return true;
+
+    case V2SImode:
+      emit_move_insn (gen_highpart (SImode, target), XVECEXP (vals, 0, 0));
+      emit_move_insn (gen_lowpart (SImode, target), XVECEXP (vals, 0, 1));
+      return true;
+
+    default:
+      break;
+    }
+  return false;
+}
+
+/* Move the elements in rtvec VALS into registers compatible with MODE.
+   Store the rtx for these regs into the corresponding array entry of
+   LOCS.  */
+static void
+vector_init_prepare_elts (rtx vals, int n_elts, rtx *locs, enum machine_mode mode,
+			  enum machine_mode inner_mode)
+{
+  enum machine_mode loc_mode;
+  int i;
+
+  switch (mode)
+    {
+    case V2HImode:
+      loc_mode = V4HImode;
+      break;
+
+    case V4QImode:
+      loc_mode = V8QImode;
+      break;
+
+    case V4HImode:
+    case V8QImode:
+      loc_mode = mode;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  gcc_assert (GET_MODE_SIZE (inner_mode) <= 4);
+  for (i = 0; i < n_elts; i++)
+    {
+      rtx dst, elt = XVECEXP (vals, 0, i);
+      int j;
+
+      /* Did we see this already?  If so just record it's location.  */
+      dst = NULL_RTX;
+      for (j = 0; j < i; j++)
+	{
+	  if (XVECEXP (vals, 0, j) == elt)
+	    {
+	      dst = locs[j];
+	      break;
+	    }
+	}
+
+      if (! dst)
+	{
+	  enum rtx_code code = GET_CODE (elt);
+
+	  dst = gen_reg_rtx (loc_mode);
+
+	  /* We use different strategies based upon whether the element
+	     is in memory or in a register.  When we start in a register
+	     and we're VIS3 capable, it's always cheaper to use the VIS3
+	     int-->fp register moves since we avoid having to use stack
+	     memory.  */
+	  if ((TARGET_VIS3 && (code == REG || code == SUBREG))
+	      || (CONSTANT_P (elt)
+		  && (const_zero_operand (elt, inner_mode)
+		      || const_all_ones_operand (elt, inner_mode))))
+	    {
+	      elt = convert_modes (SImode, inner_mode, elt, true);
+
+	      emit_clobber (dst);
+	      emit_move_insn (gen_lowpart (SImode, dst), elt);
+	    }
+	  else
+	    {
+	      rtx m = elt;
+
+	      if (CONSTANT_P (elt))
+		{
+		  m = force_const_mem (inner_mode, elt);
+		}
+	      else if (code != MEM)
+		{
+		  rtx stk = assign_stack_temp (inner_mode, GET_MODE_SIZE(inner_mode), 0);
+		  emit_move_insn (stk, elt);
+		  m = stk;
+		}
+
+	      switch (loc_mode)
+		{
+		case V4HImode:
+		  emit_insn (gen_zero_extend_v4hi_vis (dst, m));
+		  break;
+		case V8QImode:
+		  emit_insn (gen_zero_extend_v8qi_vis (dst, m));
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+	}
+      locs[i] = dst;
+    }
+}
+
+static void
+sparc_expand_vector_init_vis2 (rtx target, rtx *locs, int n_elts, int n_unique,
+			       enum machine_mode mode,
+			       enum machine_mode inner_mode)
+{
+  if (n_unique <= 4)
+    {
+      vector_init_bshuffle (target, locs, n_elts, mode, inner_mode);
+    }
+  else
+    {
+      int i;
+
+      gcc_assert (mode == V8QImode);
+
+      emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+				      force_reg (SImode, GEN_INT (7)),
+				      CONST0_RTX (SImode)));
+      i = n_elts - 1;
+      emit_insn (gen_faligndatav8qi_vis (target, locs[i], locs[i]));
+      while (--i >= 0)
+	emit_insn (gen_faligndatav8qi_vis (target, locs[i], target));
+    }
+}
+
+static void
+sparc_expand_vector_init_vis1 (rtx target, rtx *locs, int n_elts, int n_unique,
+			       enum machine_mode mode)
+{
+  enum machine_mode full_mode = mode;
+  rtx (*emitter)(rtx, rtx, rtx);
+  int alignaddr_val, i;
+  rtx tmp = target;
+
+  if (n_unique == 1 && mode == V8QImode)
+    {
+      rtx t2, t2_low, t1;
+
+      t1 = gen_reg_rtx (V4QImode);
+      emit_move_insn (t1, gen_lowpart (V4QImode, locs[0]));
+
+      t2 = gen_reg_rtx (V8QImode);
+      t2_low = gen_lowpart (V4QImode, t2);
+
+      /* xxxxxxAA --> xxxxxxxxxxxxAAAA
+         xxxxAAAA --> xxxxxxxxAAAAAAAA
+         AAAAAAAA --> AAAAAAAAAAAAAAAA */
+      emit_insn (gen_fpmerge_vis (t2, t1, t1));
+      emit_move_insn (t1, t2_low);
+      emit_insn (gen_fpmerge_vis (t2, t1, t1));
+      emit_move_insn (t1, t2_low);
+      emit_insn (gen_fpmerge_vis (target, t1, t1));
+      return;
+    }
+
+  switch (mode)
+    {
+    case V2HImode:
+      full_mode = V4HImode;
+      /* FALLTHRU */
+    case V4HImode:
+      emitter = gen_faligndatav4hi_vis;
+      alignaddr_val = 6;
+      break;
+
+    case V4QImode:
+      full_mode = V8QImode;
+      /* FALLTHRU */
+    case V8QImode:
+      emitter = gen_faligndatav8qi_vis;
+      alignaddr_val = 7;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (full_mode != mode)
+    tmp = gen_reg_rtx (full_mode);
+
+  emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+				  force_reg (SImode, GEN_INT (alignaddr_val)),
+				  CONST0_RTX (SImode)));
+
+  i = n_elts - 1;
+  emit_insn (emitter (tmp, locs[i], locs[i]));
+  while (--i >= 0)
+    emit_insn (emitter (tmp, locs[i], tmp));
+
+  if (tmp != target)
+    emit_move_insn (target, gen_highpart (mode, tmp));
+}
+
 void
 sparc_expand_vector_init (rtx target, rtx vals)
 {
   enum machine_mode mode = GET_MODE (target);
   enum machine_mode inner_mode = GET_MODE_INNER (mode);
   int n_elts = GET_MODE_NUNITS (mode);
-  int i, n_var = 0;
-  rtx mem;
+  int i, n_var = 0, n_unique = 0;
+  rtx locs[8];
+
+  gcc_assert (n_elts <= 8);
 
   for (i = 0; i < n_elts; i++)
     {
       rtx x = XVECEXP (vals, 0, i);
+      bool found = false;
+      int j;
+
       if (!CONSTANT_P (x))
 	n_var++;
+
+      for (j = 0; j < i; j++)
+	{
+	  if (rtx_equal_p (x, XVECEXP (vals, 0, j)))
+	    {
+	      found = true;
+	      break;
+	    }
+	}
+      if (!found)
+	n_unique++;
     }
 
   if (n_var == 0)
@@ -11301,12 +11647,16 @@ sparc_expand_vector_init (rtx target, rtx vals)
       return;
     }
 
-  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
-  for (i = 0; i < n_elts; i++)
-    emit_move_insn (adjust_address_nv (mem, inner_mode,
-				    i * GET_MODE_SIZE (inner_mode)),
-		    XVECEXP (vals, 0, i));
-  emit_move_insn (target, mem);
+  if (vector_init_move_words (target, vals, mode, inner_mode))
+    return;
+
+  vector_init_prepare_elts (vals, n_elts, locs, mode, inner_mode);
+
+  if (TARGET_VIS2)
+    sparc_expand_vector_init_vis2 (target, locs, n_elts, n_unique,
+				   mode, inner_mode);
+  else
+    sparc_expand_vector_init_vis1 (target, locs, n_elts, n_unique, mode);
 }
 
 static reg_class_t
@@ -11380,12 +11730,16 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
   rtx cc_reg, dst, cmp;
 
   cmp = operands[1];
-  cmp_mode = GET_MODE (XEXP (cmp, 0));
-  if (cmp_mode == DImode && !TARGET_ARCH64)
+  if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
     return false;
 
-  dst = operands[0];
+  if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
+    cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
+
+  cmp_mode = GET_MODE (XEXP (cmp, 0));
+  rc = GET_CODE (cmp);
 
+  dst = operands[0];
   if (! rtx_equal_p (operands[2], dst)
       && ! rtx_equal_p (operands[3], dst))
     {
@@ -11404,9 +11758,6 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
         rc = reverse_condition (rc);
     }
 
-  if (cmp_mode == TFmode && !TARGET_HARD_QUAD)
-    cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
-
   if (XEXP (cmp, 1) == const0_rtx
       && GET_CODE (XEXP (cmp, 0)) == REG
       && cmp_mode == DImode
@@ -11426,4 +11777,41 @@ sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
   return true;
 }
 
+void
+sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
+{
+  rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
+  enum rtx_code code = GET_CODE (operands[3]);
+
+  mask = gen_reg_rtx (Pmode);
+  cop0 = operands[4];
+  cop1 = operands[5];
+  if (code == LT || code == GE)
+    {
+      rtx t;
+
+      code = swap_condition (code);
+      t = cop0; cop0 = cop1; cop1 = t;
+    }
+
+  gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
+
+  fcmp = gen_rtx_UNSPEC (Pmode,
+			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
+			 fcode);
+
+  cmask = gen_rtx_UNSPEC (DImode,
+			  gen_rtvec (2, mask, gsr),
+			  ccode);
+
+  bshuf = gen_rtx_UNSPEC (mode,
+			  gen_rtvec (3, operands[1], operands[2], gsr),
+			  UNSPEC_BSHUFFLE);
+
+  emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
+  emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
+
+  emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
+}
+
 #include "gt-sparc.h"
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 65b45271890..e8707f50577 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -328,6 +328,7 @@ extern enum cmodel sparc_cmodel;
 %{mcpu=sparclite:-Asparclite} \
 %{mcpu=sparclite86x:-Asparclite} \
 %{mcpu=f930:-Asparclite} %{mcpu=f934:-Asparclite} \
+%{mcpu=v8:-Av8} \
 %{mv8plus:-Av8plus} \
 %{mcpu=v9:-Av9} \
 %{mcpu=ultrasparc:%{!mv8plus:-Av9a}} \
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 6dd390952c3..7452f96c9d3 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -92,6 +92,7 @@
    (UNSPEC_MUL8			86)
    (UNSPEC_MUL8SU		87)
    (UNSPEC_MULDSU		88)
+   (UNSPEC_SHORT_LOAD		89)
   ])
 
 (define_constants
@@ -2041,8 +2042,8 @@
 })
 
 (define_insn "*movsf_insn"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f, f,*r, m,   m")
-	(match_operand:SF 1 "input_operand"         "G,C,f,*rR, Q, S, f,*r, m, m, f,*rG"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=d,d,f, *r,*r,*r,*r, f,f,*r,m,  m")
+	(match_operand:SF 1 "input_operand"         "G,C,f,*rR, Q, S, f,*r,m, m,f,*rG"))]
   "(register_operand (operands[0], SFmode)
     || register_or_zero_or_all_ones_operand (operands[1], SFmode))"
 {
@@ -2138,8 +2139,8 @@
 })
 
 (define_insn "*movdf_insn_sp32"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f,  e,T,W,U,T,  f,   *r,  o,o")
-        (match_operand:DF 1 "input_operand"         "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roGF,*rG,f"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,e,*r, f,  e,T,W,U,T,  f,  *r,  o,o")
+	(match_operand:DF 1 "input_operand"         "G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roF,*rG,f"))]
   "! TARGET_ARCH64
    && (register_operand (operands[0], DFmode)
        || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -2166,7 +2167,7 @@
 
 (define_insn "*movdf_insn_sp64"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,  e,W, *r,*r,  m,*r")
-        (match_operand:DF 1 "input_operand"         "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
+	(match_operand:DF 1 "input_operand"         "G,C,e, e,*r,W#F,e,*rG, m,*rG, F"))]
   "TARGET_ARCH64
    && (register_operand (operands[0], DFmode)
        || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -2191,9 +2192,8 @@
 (define_split
   [(set (match_operand:DF 0 "register_operand" "")
         (match_operand:DF 1 "const_double_operand" ""))]
-  "TARGET_FPU
-   && (GET_CODE (operands[0]) == REG
-       && SPARC_INT_REG_P (REGNO (operands[0])))
+  "REG_P (operands[0])
+   && SPARC_INT_REG_P (REGNO (operands[0]))
    && ! const_zero_operand (operands[1], GET_MODE (operands[0]))
    && reload_completed"
   [(clobber (const_int 0))]
@@ -2378,45 +2378,30 @@
 })
 
 (define_insn "*movtf_insn_sp32"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,U,r")
-	(match_operand:TF 1 "input_operand"    "G,oe,GeUr,o,roG"))]
-  "TARGET_FPU
-   && ! TARGET_ARCH64
-   && (register_operand (operands[0], TFmode)
-       || register_or_zero_operand (operands[1], TFmode))"
-  "#"
-  [(set_attr "length" "4")])
-
-;; Exactly the same as above, except that all `e' cases are deleted.
-;; This is necessary to prevent reload from ever trying to use a `e' reg
-;; when -mno-fpu.
-
-(define_insn "*movtf_insn_sp32_no_fpu"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=o,U,o,r,o")
-	(match_operand:TF 1 "input_operand"    "G,o,U,roG,r"))]
-  "! TARGET_FPU
-   && ! TARGET_ARCH64
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o,  o,U,  r")
+	(match_operand:TF 1 "input_operand"        " G,oe,e,rGU,o,roG"))]
+  "! TARGET_ARCH64
    && (register_operand (operands[0], TFmode)
        || register_or_zero_operand (operands[1], TFmode))"
   "#"
-  [(set_attr "length" "4")])
+  [(set_attr "length" "4,4,4,4,4,4")
+   (set_attr "cpu_feature" "fpu,fpu,fpu,*,*,*")])
 
 (define_insn "*movtf_insn_sp64"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,o,r")
-        (match_operand:TF 1 "input_operand"    "G,oe,Ger,roG"))]
-  "TARGET_FPU
-   && TARGET_ARCH64
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o,  r")
+	(match_operand:TF 1 "input_operand"         "G,oe,e,rG,roG"))]
+  "TARGET_ARCH64
    && ! TARGET_HARD_QUAD
    && (register_operand (operands[0], TFmode)
        || register_or_zero_operand (operands[1], TFmode))"
   "#"
-  [(set_attr "length" "2")])
+  [(set_attr "length" "2,2,2,2,2")
+   (set_attr "cpu_feature" "fpu,fpu,fpu,*,*")])
 
 (define_insn "*movtf_insn_sp64_hq"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m,o,r")
-        (match_operand:TF 1 "input_operand"    "G,e,m,e,rG,roG"))]
-  "TARGET_FPU
-   && TARGET_ARCH64
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b,e,e,m, o,  r")
+	(match_operand:TF 1 "input_operand"         "G,e,m,e,rG,roG"))]
+  "TARGET_ARCH64
    && TARGET_HARD_QUAD
    && (register_operand (operands[0], TFmode)
        || register_or_zero_operand (operands[1], TFmode))"
@@ -2430,16 +2415,6 @@
   [(set_attr "type" "*,fpmove,fpload,fpstore,*,*")
    (set_attr "length" "2,*,*,*,2,2")])
 
-(define_insn "*movtf_insn_sp64_no_fpu"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,o")
-        (match_operand:TF 1 "input_operand"    "orG,rG"))]
-  "! TARGET_FPU
-   && TARGET_ARCH64
-   && (register_operand (operands[0], TFmode)
-       || register_or_zero_operand (operands[1], TFmode))"
-  "#"
-  [(set_attr "length" "2")])
-
 ;; Now all the splits to handle multi-insn TF mode moves.
 (define_split
   [(set (match_operand:TF 0 "register_operand" "")
@@ -7856,6 +7831,48 @@
   DONE;
 })
 
+(define_expand "zero_extend_v8qi_vis"
+  [(set (match_operand:V8QI 0 "register_operand" "")
+        (unspec:V8QI [(match_operand:QI 1 "memory_operand" "")]
+                     UNSPEC_SHORT_LOAD))]
+  "TARGET_VIS"
+{
+  if (! REG_P (XEXP (operands[1], 0)))
+    {
+      rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+      operands[1] = replace_equiv_address (operands[1], addr);
+    }
+})
+
+(define_expand "zero_extend_v4hi_vis"
+  [(set (match_operand:V4HI 0 "register_operand" "")
+        (unspec:V4HI [(match_operand:HI 1 "memory_operand" "")]
+                     UNSPEC_SHORT_LOAD))]
+  "TARGET_VIS"
+{
+  if (! REG_P (XEXP (operands[1], 0)))
+    {
+      rtx addr = force_reg (Pmode, XEXP (operands[1], 0));
+      operands[1] = replace_equiv_address (operands[1], addr);
+    }
+})
+
+(define_insn "*zero_extend_v8qi_<P:mode>_insn"
+  [(set (match_operand:V8QI 0 "register_operand" "=e")
+        (unspec:V8QI [(mem:QI
+                       (match_operand:P 1 "register_operand" "r"))]
+                     UNSPEC_SHORT_LOAD))]
+  "TARGET_VIS"
+  "ldda\t[%1] 0xd0, %0")
+
+(define_insn "*zero_extend_v4hi_<P:mode>_insn"
+  [(set (match_operand:V4HI 0 "register_operand" "=e")
+        (unspec:V4HI [(mem:HI
+                       (match_operand:P 1 "register_operand" "r"))]
+                     UNSPEC_SHORT_LOAD))]
+  "TARGET_VIS"
+  "ldda\t[%1] 0xd2, %0")
+
 (define_expand "vec_init<mode>"
   [(match_operand:VMALL 0 "register_operand" "")
    (match_operand:VMALL 1 "" "")]
@@ -8299,6 +8316,36 @@
   [(set_attr "type" "fpmul")
    (set_attr "fptype" "double")])
 
+(define_expand "vcond<mode><mode>"
+  [(match_operand:GCM 0 "register_operand" "")
+   (match_operand:GCM 1 "register_operand" "")
+   (match_operand:GCM 2 "register_operand" "")
+   (match_operator 3 ""
+     [(match_operand:GCM 4 "register_operand" "")
+      (match_operand:GCM 5 "register_operand" "")])]
+  "TARGET_VIS3"
+{
+  sparc_expand_vcond (<MODE>mode, operands,
+                      UNSPEC_CMASK<gcm_name>,
+                      UNSPEC_FCMP);
+  DONE;
+})
+
+(define_expand "vconduv8qiv8qi"
+  [(match_operand:V8QI 0 "register_operand" "")
+   (match_operand:V8QI 1 "register_operand" "")
+   (match_operand:V8QI 2 "register_operand" "")
+   (match_operator 3 ""
+     [(match_operand:V8QI 4 "register_operand" "")
+      (match_operand:V8QI 5 "register_operand" "")])]
+  "TARGET_VIS3"
+{
+  sparc_expand_vcond (V8QImode, operands,
+                      UNSPEC_CMASK8,
+                      UNSPEC_FUCMP);
+  DONE;
+})
+
 (define_insn "array8<P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
         (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
@@ -8452,7 +8499,7 @@
 ;; Conditional moves are possible via fcmpX --> cmaskX -> bshuffle
 (define_insn "cmask8<P:mode>_vis"
   [(set (reg:DI GSR_REG)
-        (unspec:DI [(match_operand:P 0 "register_operand" "r")
+        (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
 	            (reg:DI GSR_REG)]
                    UNSPEC_CMASK8))]
   "TARGET_VIS3"
@@ -8460,7 +8507,7 @@
 
 (define_insn "cmask16<P:mode>_vis"
   [(set (reg:DI GSR_REG)
-        (unspec:DI [(match_operand:P 0 "register_operand" "r")
+        (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
 	            (reg:DI GSR_REG)]
                    UNSPEC_CMASK16))]
   "TARGET_VIS3"
@@ -8468,7 +8515,7 @@
 
 (define_insn "cmask32<P:mode>_vis"
   [(set (reg:DI GSR_REG)
-        (unspec:DI [(match_operand:P 0 "register_operand" "r")
+        (unspec:DI [(match_operand:P 0 "register_or_zero_operand" "rJ")
 	            (reg:DI GSR_REG)]
                    UNSPEC_CMASK32))]
   "TARGET_VIS3"
diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf
index 7073bcb7721..e9acfe3693e 100644
--- a/gcc/config/sparc/t-elf
+++ b/gcc/config/sparc/t-elf
@@ -17,12 +17,6 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
 MULTILIB_OPTIONS = msoft-float mcpu=v8 mflat
 MULTILIB_DIRNAMES = soft v8 flat
 MULTILIB_MATCHES = msoft-float=mno-fpu
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon
index 4f9d0a9e797..25fc61136a9 100644
--- a/gcc/config/sparc/t-leon
+++ b/gcc/config/sparc/t-leon
@@ -16,15 +16,9 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-LIB1ASMSRC = sparc/lb1spc.asm
-LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3
-
 # Multilibs for LEON
 # LEON is a SPARC-V8, but the AT697 implementation has a bug in the
 # V8-specific instructions.
 MULTILIB_OPTIONS = mcpu=v7 msoft-float mflat
 MULTILIB_DIRNAMES = v7 soft flat
 MULTILIB_MATCHES = mcpu?v7=mv7 msoft-float=mno-fpu
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-leon3 b/gcc/config/sparc/t-leon3
index 0e7e45cc594..acdd1f2c67b 100644
--- a/gcc/config/sparc/t-leon3
+++ b/gcc/config/sparc/t-leon3
@@ -20,6 +20,3 @@
 MULTILIB_OPTIONS = msoft-float
 MULTILIB_DIRNAMES = soft
 MULTILIB_MATCHES = msoft-float=mno-fpu
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/sparc/t-linux b/gcc/config/sparc/t-linux
deleted file mode 100644
index 30daa376614..00000000000
--- a/gcc/config/sparc/t-linux
+++ /dev/null
@@ -1,5 +0,0 @@
-# Override t-slibgcc-elf-ver to export some libgcc symbols with
-# the symbol versions that glibc used.
-# Avoid the t-linux version file.
-SHLIB_MAPFILES = $$(libgcc_objdir)/libgcc-std.ver \
-		 $(srcdir)/config/sparc/libgcc-sparc-glibc.ver
diff --git a/gcc/config/sparc/t-linux64 b/gcc/config/sparc/t-linux64
index 74d04898d9f..d9dfad66ce7 100644
--- a/gcc/config/sparc/t-linux64
+++ b/gcc/config/sparc/t-linux64
@@ -1,5 +1,5 @@
 # Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004,
-# 2006, 2010 Free Software Foundation, Inc.
+# 2006, 2010, 2011 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -27,10 +27,3 @@
 MULTILIB_OPTIONS = m64/m32
 MULTILIB_DIRNAMES = 64 32
 MULTILIB_OSDIRNAMES = ../lib64 $(if $(wildcard $(shell echo $(SYSTEM_HEADER_DIR))/../../usr/lib32),../lib32,../lib)
-
-LIBGCC = stmp-multilib
-INSTALL_LIBGCC = install-multilib
-
-CRTSTUFF_T_CFLAGS = `if test x$$($(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) \
-				 -print-multi-os-directory) \
-			= x../lib64; then echo -mcmodel=medany; fi`
diff --git a/gcc/config/sparc/t-netbsd64 b/gcc/config/sparc/t-netbsd64
index 0fddb0ffe87..bc783c19366 100644
--- a/gcc/config/sparc/t-netbsd64
+++ b/gcc/config/sparc/t-netbsd64
@@ -1,8 +1,5 @@
-# Disable multilib fow now, as NetBSD/sparc64 does not ship with
+# Disable multilib for now, as NetBSD/sparc64 does not ship with
 # a 32-bit environment.
 #MULTILIB_OPTIONS = m32/m64
 #MULTILIB_DIRNAMES = 32 64
 #MULTILIB_MATCHES =
-
-#LIBGCC = stmp-multilib
-#INSTALL_LIBGCC = install-multilib
author	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2011-11-06 15:16:32 +0000
committer	bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>	2011-11-06 15:16:32 +0000
commit	31ba6c3ff2311bad9422246f49d59c532cbb5078 (patch)
tree	6e862e3ea14b2edf93a92c404a0d9b29f3f9ba65 /gcc/config/sparc
parent	bab85b65e545231656361b997a81fb8a44b266b4 (diff)
download	gcc-31ba6c3ff2311bad9422246f49d59c532cbb5078.tar.gz