ARM: make assembly files compile Thumb2 with nop IT block.

ARM's UAL syntax allows the same assembly file to be compiled in both ARM and Thumb mode. Conditional execution is handled by requiring the Thumb IT blocks, but essentially ignoring them when compiling for ARM. git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@194429 91177308-0d34-0410-b5e6-96231b3b80d8
author: Tim Northover <tnorthover@apple.com> 2013-11-11 22:50:13 +0000
committer: Tim Northover <tnorthover@apple.com> 2013-11-11 22:50:13 +0000
commit: abd768d337ffc997392323a70c8c53e459173d9b (patch)
tree: e933bb0adf35ef9af6f75b3db1176d5416bd541b
parent: 6f94d718a4edee9ca6718cb09bf913cbabaa2422 (diff)
download: compiler-rt-abd768d337ffc997392323a70c8c53e459173d9b.tar.gz
8 files changed, 33 insertions, 7 deletions
diff --git a/lib/arm/comparesf2.S b/lib/arm/comparesf2.S
index ee1820339..ce6f4b9ef 100644
--- a/lib/arm/comparesf2.S
+++ b/lib/arm/comparesf2.S
@@ -59,12 +59,14 @@ DEFINE_COMPILERRT_FUNCTION(__nesf2)
     
     // Next, we check if a and b have the same or different signs.  If they have
     // opposite signs, this eor will set the N flag.
+    it ne
     eorsne  r12,    r0, r1
     
     // If a and b are equal (either both zeros or bit identical; again, we're
     // ignoring NaNs for now), this subtract will zero out r0.  If they have the
     // same sign, the flags are updated as they would be for a comparison of the
     // absolute values of a and b.
+    it pl
     subspl  r0,     r2, r3
     
     // If a is smaller in magnitude than b and both have the same sign, place
@@ -77,23 +79,27 @@ DEFINE_COMPILERRT_FUNCTION(__nesf2)
     // still clear from the shift argument in orrs; if a is positive and b
     // negative, this places 0 in r0; if a is negative and b positive, -1 is
     // placed in r0.
+    it lo
     mvnlo   r0,         r1, asr #31
 
     // If a is greater in magnitude than b and both have the same sign, place
     // the sign of b in r0.  Thus, if both are negative and a < b, -1 is placed
     // in r0, which is the desired result.  Conversely, if both are positive
     // and a > b, zero is placed in r0.
+    it hi
     movhi   r0,         r1, asr #31
     
     // If you've been keeping track, at this point r0 contains -1 if a < b and
     // 0 if a >= b.  All that remains to be done is to set it to 1 if a > b.
     // If a == b, then the Z flag is set, so we can get the correct final value
     // into r0 by simply or'ing with 1 if Z is clear.
-	orrne	r0,     r0, #1
+    it ne
+    orrne	r0,     r0, #1
     
     // Finally, we need to deal with NaNs.  If either argument is NaN, replace
     // the value in r0 with 1.
     cmp     r2,         #0xff000000
+    ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
     bx      lr
@@ -108,12 +114,18 @@ DEFINE_COMPILERRT_FUNCTION(__gtsf2)
     mov     r2,         r0, lsl #1
     mov     r3,         r1, lsl #1
     orrs    r12,    r2, r3, lsr #1
+    it ne
     eorsne  r12,    r0, r1
+    it pl
     subspl  r0,     r2, r3
+    it lo
     mvnlo   r0,         r1, asr #31
+    it hi
     movhi   r0,         r1, asr #31
-	orrne	r0,     r0, #1
+    it ne
+    orrne	r0,     r0, #1
     cmp     r2,         #0xff000000
+    ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #-1
     bx      lr
@@ -125,6 +137,7 @@ DEFINE_COMPILERRT_FUNCTION(__unordsf2)
     mov     r3,         r1, lsl #1
     mov     r0,         #0
     cmp     r2,         #0xff000000
+    ite ls
     cmpls   r3,         #0xff000000
     movhi   r0,         #1
     bx      lr
diff --git a/lib/arm/switch16.S b/lib/arm/switch16.S
index e8f08c49c..9c3f0cf99 100644
--- a/lib/arm/switch16.S
+++ b/lib/arm/switch16.S
@@ -34,8 +34,9 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch16)
 	ldrh    ip, [lr, #-1]           // get first 16-bit word in table
 	cmp     r0, ip                  // compare with index
 	add     r0, lr, r0, lsl #1      // compute address of element in table
-	ldrshcc r0, [r0, #1]            // load 16-bit element if r0 is in range
 	add     ip, lr, ip, lsl #1      // compute address of last element in table
+	ite lo
+	ldrshlo r0, [r0, #1]            // load 16-bit element if r0 is in range
 	ldrshhs r0, [ip, #1]            // load 16-bit element if r0 out of range
 	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
 	bx      ip                      // jump to computed label
diff --git a/lib/arm/switch32.S b/lib/arm/switch32.S
index 7008fccb1..3152dfa1d 100644
--- a/lib/arm/switch32.S
+++ b/lib/arm/switch32.S
@@ -34,9 +34,10 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch32)
 	ldr     ip, [lr, #-1]            // get first 32-bit word in table
 	cmp     r0, ip                   // compare with index
 	add     r0, lr, r0, lsl #2       // compute address of element in table
-	ldrcc   r0, [r0, #3]             // load 32-bit element if r0 is in range
 	add     ip, lr, ip, lsl #2       // compute address of last element in table
-	ldrcs   r0, [ip, #3]             // load 32-bit element if r0 out of range
+	ite lo
+	ldrlo   r0, [r0, #3]             // load 32-bit element if r0 is in range
+	ldrhs   r0, [ip, #3]             // load 32-bit element if r0 out of range
 	add     ip, lr, r0               // compute label = lr + element
 	bx      ip                       // jump to computed label
 
diff --git a/lib/arm/switch8.S b/lib/arm/switch8.S
index e784b4082..15729ebc3 100644
--- a/lib/arm/switch8.S
+++ b/lib/arm/switch8.S
@@ -33,7 +33,8 @@
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switch8)
 	ldrb    ip, [lr, #-1]           // get first byte in table
 	cmp     r0, ip                  // signed compare with index
-	ldrsbcc r0, [lr, r0]            // get indexed byte out of table
+	ite lo
+	ldrsblo r0, [lr, r0]            // get indexed byte out of table
 	ldrsbhs r0, [lr, ip]            // if out of range, use last entry in table
 	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
 	bx      ip                      // jump to computed label
diff --git a/lib/arm/switchu8.S b/lib/arm/switchu8.S
index 19bed2f66..0a4efac88 100644
--- a/lib/arm/switchu8.S
+++ b/lib/arm/switchu8.S
@@ -33,7 +33,8 @@
 DEFINE_COMPILERRT_PRIVATE_FUNCTION(__switchu8)
 	ldrb    ip, [lr, #-1]           // get first byte in table
 	cmp     r0, ip                  // compare with index
-	ldrbcc  r0, [lr, r0]            // get indexed byte out of table
+	ite lo
+	ldrblo  r0, [lr, r0]            // get indexed byte out of table
 	ldrbhs  r0, [lr, ip]            // if out of range, use last entry in table
 	add     ip, lr, r0, lsl #1      // compute label = lr + element*2
 	bx      ip                      // jump to computed label
diff --git a/lib/arm/udivmodsi4.S b/lib/arm/udivmodsi4.S
index 5fe53fe5f..aee277667 100644
--- a/lib/arm/udivmodsi4.S
+++ b/lib/arm/udivmodsi4.S
@@ -74,14 +74,17 @@ LOCAL_LABEL(mainLoop):
 //  this way, we can merge the two branches which is a substantial win for
 //  such a tight loop on current ARM architectures.
     subs    r,      a,  b, lsl i
+    itt hs
     orrhs   q,      q,one, lsl i
     movhs   a,      r
+    it ne
     subsne  i,      i, #1
     bhi     LOCAL_LABEL(mainLoop)
 
 //  Do the final test subtraction and update of quotient (i == 0), as it is
 //  not performed in the main loop.
     subs    r,      a,  b
+    itt hs
     orrhs   q,      #1
     movhs   a,      r
 
diff --git a/lib/arm/udivsi3.S b/lib/arm/udivsi3.S
index 1c1582510..2bb14123c 100644
--- a/lib/arm/udivsi3.S
+++ b/lib/arm/udivsi3.S
@@ -73,14 +73,17 @@ LOCAL_LABEL(mainLoop):
 //  this way, we can merge the two branches which is a substantial win for
 //  such a tight loop on current ARM architectures.
     subs    r,      a,  b, lsl i
+    itt hs
     orrhs   q,      q,one, lsl i
     movhs   a,      r
+    it ne
     subsne  i,      i, #1
     bhi     LOCAL_LABEL(mainLoop)
 
 //  Do the final test subtraction and update of quotient (i == 0), as it is
 //  not performed in the main loop.
     subs    r,      a,  b
+    it hs
     orrhs   q,      #1
 
 LOCAL_LABEL(return):
diff --git a/lib/arm/umodsi3.S b/lib/arm/umodsi3.S
index 188edf304..092a4f1a2 100644
--- a/lib/arm/umodsi3.S
+++ b/lib/arm/umodsi3.S
@@ -57,13 +57,16 @@ LOCAL_LABEL(mainLoop):
 //  this way, we can merge the two branches which is a substantial win for
 //  such a tight loop on current ARM architectures.
     subs    r,      a,  b, lsl i
+    it hs
     movhs   a,      r
+    it ne
     subsne  i,      i, #1
     bhi     LOCAL_LABEL(mainLoop)
 
 //  Do the final test subtraction and update of remainder (i == 0), as it is
 //  not performed in the main loop.
     subs    r,      a,  b
+    it hs
     movhs   a,      r
     bx      lr
 #endif
author	Tim Northover <tnorthover@apple.com>	2013-11-11 22:50:13 +0000
committer	Tim Northover <tnorthover@apple.com>	2013-11-11 22:50:13 +0000
commit	abd768d337ffc997392323a70c8c53e459173d9b (patch)
tree	e933bb0adf35ef9af6f75b3db1176d5416bd541b
parent	6f94d718a4edee9ca6718cb09bf913cbabaa2422 (diff)
download	compiler-rt-abd768d337ffc997392323a70c8c53e459173d9b.tar.gz