summaryrefslogtreecommitdiff
path: root/asm_arm.h
diff options
context:
space:
mode:
Diffstat (limited to 'asm_arm.h')
-rw-r--r--asm_arm.h85
1 files changed, 72 insertions, 13 deletions
diff --git a/asm_arm.h b/asm_arm.h
index 4b6362a..e8a718f 100644
--- a/asm_arm.h
+++ b/asm_arm.h
@@ -21,9 +21,10 @@
static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
int lo,hi;
- asm volatile("smull %0,%1,%2,%3;\n"
+ asm volatile("smull\t%0, %1, %2, %3"
: "=&r"(lo),"=&r"(hi)
- : "%r"(x),"r"(y));
+ : "%r"(x),"r"(y)
+ : "cc");
return(hi);
}
@@ -37,26 +38,84 @@ static inline ogg_int32_t MULT30(ogg_int32_t x, ogg_int32_t y) {
static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
int lo,hi;
- asm volatile("smull %0,%1,%2,%3;\n"
- "mov %0,%0, lsr #15;\n"
- "orr %1,%0,%1, lsl #17;\n"
+ asm volatile("smull %0, %1, %2, %3\n\t"
+ "movs %0, %0, lsr #15\n\t"
+ "adc %1, %0, %1, lsl #17\n\t"
: "=&r"(lo),"=&r"(hi)
- : "%r"(x),"r"(y));
+ : "%r"(x),"r"(y)
+ : "cc");
return(hi);
}
static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
- asm volatile("subs r0,%0,#32768;\n"
- "movpl %0,#0x7f00;\n"
- "orrpl %0,%0,#0xff;\n"
- "adds r0,%0,#32768;\n"
- "movmi %0,#0x8000;\n"
- : "+r"(x)
+ int tmp;
+ asm volatile("subs %1, %0, #32768\n\t"
+ "movpl %0, #0x7f00\n\t"
+ "orrpl %0, %0, #0xff\n"
+ "adds %1, %0, #32768\n\t"
+ "movmi %0, #0x8000"
+ : "+r"(x),"=r"(tmp)
:
- : "r0","cc");
+ : "cc");
return(x);
}
+#define MB() asm volatile ("" : : : "memory")
+
+static inline void XPROD32(ogg_int32_t a, ogg_int32_t b,
+ ogg_int32_t t, ogg_int32_t v,
+ ogg_int32_t *x, ogg_int32_t *y)
+{
+ int x1, y1, l;
+ asm( "smull %0, %1, %4, %6\n\t"
+ "smlal %0, %1, %5, %7\n\t"
+ "rsb %3, %4, #0\n\t"
+ "smull %0, %2, %5, %6\n\t"
+ "smlal %0, %2, %3, %7"
+ : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
+ : "3" (a), "r" (b), "r" (t), "r" (v)
+ : "cc" );
+ *x = x1;
+ MB();
+ *y = y1;
+}
+
+static inline void XPROD31(ogg_int32_t a, ogg_int32_t b,
+ ogg_int32_t t, ogg_int32_t v,
+ ogg_int32_t *x, ogg_int32_t *y)
+{
+ int x1, y1, l;
+ asm( "smull %0, %1, %4, %6\n\t"
+ "smlal %0, %1, %5, %7\n\t"
+ "rsb %3, %4, #0\n\t"
+ "smull %0, %2, %5, %6\n\t"
+ "smlal %0, %2, %3, %7"
+ : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
+ : "3" (a), "r" (b), "r" (t), "r" (v)
+ : "cc" );
+ *x = x1 << 1;
+ MB();
+ *y = y1 << 1;
+}
+
+static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
+ ogg_int32_t t, ogg_int32_t v,
+ ogg_int32_t *x, ogg_int32_t *y)
+{
+ int x1, y1, l;
+ asm( "rsb %2, %4, #0\n\t"
+ "smull %0, %1, %3, %5\n\t"
+ "smlal %0, %1, %2, %6\n\t"
+ "smull %0, %2, %4, %5\n\t"
+ "smlal %0, %2, %3, %6"
+ : "=&r" (l), "=&r" (x1), "=&r" (y1)
+ : "r" (a), "r" (b), "r" (t), "r" (v)
+ : "cc" );
+ *x = x1 << 1;
+ MB();
+ *y = y1 << 1;
+}
+
#endif
#ifndef _V_LSP_MATH_ASM