diff options
Diffstat (limited to 'libswscale/x86/rgb2rgb_template.c')
-rw-r--r-- | libswscale/x86/rgb2rgb_template.c | 368 |
1 files changed, 137 insertions, 231 deletions
diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index 594524d9ed..7e5ffdf8d1 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -73,25 +73,24 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "punpckldq 3%1, %%mm0 \n\t" - "movd 6%1, %%mm1 \n\t" - "punpckldq 9%1, %%mm1 \n\t" - "movd 12%1, %%mm2 \n\t" - "punpckldq 15%1, %%mm2 \n\t" - "movd 18%1, %%mm3 \n\t" - "punpckldq 21%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "punpckldq 3(%1), %%mm0 \n\t" + "movd 6(%1), %%mm1 \n\t" + "punpckldq 9(%1), %%mm1 \n\t" + "movd 12(%1), %%mm2 \n\t" + "punpckldq 15(%1), %%mm2 \n\t" + "movd 18(%1), %%mm3 \n\t" + "punpckldq 21(%1), %%mm3 \n\t" "por %%mm7, %%mm0 \n\t" "por %%mm7, %%mm1 \n\t" "por %%mm7, %%mm2 \n\t" "por %%mm7, %%mm3 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0" - :"=m"(*dest) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm1, 8(%0) \n\t" + MOVNTQ" %%mm2, 16(%0) \n\t" + MOVNTQ" %%mm3, 24(%0)" + :: "r"(dest), "r"(s) :"memory"); dest += 32; s += 24; @@ -138,9 +137,9 @@ static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int sr "pand "MANGLE(mask24hhhh)", %%mm5\n\t" \ "por %%mm5, %%mm4 \n\t" \ \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm1, 8%0 \n\t" \ - MOVNTQ" %%mm4, 16%0" + MOVNTQ" %%mm0, (%0) \n\t" \ + MOVNTQ" %%mm1, 8(%0) \n\t" \ + MOVNTQ" %%mm4, 16(%0)" static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size) @@ -154,18 +153,17 @@ static inline void RENAME(rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 31; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm5 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 16(%1), %%mm4 \n\t" + "movq 24(%1), %%mm5 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm1, %%mm3 \n\t" "movq %%mm4, %%mm6 \n\t" "movq %%mm5, %%mm7 \n\t" STORE_BGR24_MMX - :"=m"(*dest) - :"m"(*s) + :: "r"(dest), "r"(s) :"memory"); dest += 24; s += 32; @@ -198,19 +196,18 @@ static inline void RENAME(rgb15to16)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s<mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "pand %%mm4, %%mm0 \n\t" "pand %%mm4, %%mm2 \n\t" "paddw %%mm1, %%mm0 \n\t" "paddw %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm2, 8(%0)" + :: "r"(d), "r"(s) ); d+=16; s+=16; @@ -243,9 +240,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s<mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq 8(%1), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "psrlq $1, %%mm0 \n\t" @@ -256,10 +253,9 @@ static inline void RENAME(rgb16to15)(const uint8_t *src, uint8_t *dst, int src_s "pand %%mm6, %%mm3 \n\t" "por %%mm1, %%mm0 \n\t" "por %%mm3, %%mm2 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm2, 8%0" - :"=m"(*d) - :"m"(*s) + MOVNTQ" %%mm0, (%0) \n\t" + MOVNTQ" %%mm2, 8(%0)" + :: "r"(d), "r"(s) ); d+=16; s+=16; @@ -287,7 +283,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s uint16_t *d = (uint16_t *)dst; end = s + src_size; mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) __asm__ volatile( "movq %3, %%mm5 \n\t" "movq %4, %%mm6 \n\t" @@ -322,47 +317,6 @@ static inline void RENAME(rgb32to16)(const uint8_t *src, uint8_t *dst, int src_s : "+r" (d), "+r"(s) : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) ); -#else - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_16mask),"m"(green_16mask)); - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $5, %%mm1 \n\t" - "psrlq $5, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $8, %%mm2 \n\t" - "psrlq $8, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); - d += 4; - s += 16; - } -#endif __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); while (s < end) { @@ -386,11 +340,11 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -413,8 +367,8 @@ static inline void RENAME(rgb32tobgr16)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + :: "r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 16; } @@ -434,7 +388,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s uint16_t *d = (uint16_t *)dst; end = s + src_size; mm_end = end - 15; -#if 1 //is faster only if multiplies are reasonably fast (FIXME figure out on which CPUs this is faster, on Athlon it is slightly faster) __asm__ volatile( "movq %3, %%mm5 \n\t" "movq %4, %%mm6 \n\t" @@ -469,47 +422,6 @@ static inline void RENAME(rgb32to15)(const uint8_t *src, uint8_t *dst, int src_s : "+r" (d), "+r"(s) : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) ); -#else - __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); - __asm__ volatile( - "movq %0, %%mm7 \n\t" - "movq %1, %%mm6 \n\t" - ::"m"(red_15mask),"m"(green_15mask)); - while (s < mm_end) { - __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "movq %%mm3, %%mm4 \n\t" - "movq %%mm3, %%mm5 \n\t" - "psrlq $3, %%mm0 \n\t" - "psrlq $3, %%mm3 \n\t" - "pand %2, %%mm0 \n\t" - "pand %2, %%mm3 \n\t" - "psrlq $6, %%mm1 \n\t" - "psrlq $6, %%mm4 \n\t" - "pand %%mm6, %%mm1 \n\t" - "pand %%mm6, %%mm4 \n\t" - "psrlq $9, %%mm2 \n\t" - "psrlq $9, %%mm5 \n\t" - "pand %%mm7, %%mm2 \n\t" - "pand %%mm7, %%mm5 \n\t" - "por %%mm1, %%mm0 \n\t" - "por %%mm4, %%mm3 \n\t" - "por %%mm2, %%mm0 \n\t" - "por %%mm5, %%mm3 \n\t" - "psllq $16, %%mm3 \n\t" - "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); - d += 4; - s += 16; - } -#endif __asm__ volatile(SFENCE:::"memory"); __asm__ volatile(EMMS:::"memory"); while (s < end) { @@ -533,11 +445,11 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 4%1, %%mm3 \n\t" - "punpckldq 8%1, %%mm0 \n\t" - "punpckldq 12%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 4(%1), %%mm3 \n\t" + "punpckldq 8(%1), %%mm0 \n\t" + "punpckldq 12(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -560,8 +472,8 @@ static inline void RENAME(rgb32tobgr15)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 16; } @@ -588,11 +500,11 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 11; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -615,8 +527,8 @@ static inline void RENAME(rgb24tobgr16)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 12; } @@ -645,11 +557,11 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -672,8 +584,8 @@ static inline void RENAME(rgb24to16)(const uint8_t *src, uint8_t *dst, int src_s "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_16mask):"memory"); d += 4; s += 12; } @@ -702,11 +614,11 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 11; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -729,8 +641,8 @@ static inline void RENAME(rgb24tobgr15)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 12; } @@ -759,11 +671,11 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 15; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movd %1, %%mm0 \n\t" - "movd 3%1, %%mm3 \n\t" - "punpckldq 6%1, %%mm0 \n\t" - "punpckldq 9%1, %%mm3 \n\t" + PREFETCH" 32(%1) \n\t" + "movd (%1), %%mm0 \n\t" + "movd 3(%1), %%mm3 \n\t" + "punpckldq 6(%1), %%mm0 \n\t" + "punpckldq 9(%1), %%mm3 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "movq %%mm3, %%mm4 \n\t" @@ -786,8 +698,8 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s "por %%mm5, %%mm3 \n\t" "psllq $16, %%mm3 \n\t" "por %%mm3, %%mm0 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); + MOVNTQ" %%mm0, (%0) \n\t" + ::"r"(d),"r"(s),"m"(blue_15mask):"memory"); d += 4; s += 12; } @@ -812,10 +724,10 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 7; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -844,9 +756,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm6 \n\t" "movq %%mm3, %%mm7 \n\t" - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -873,7 +785,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null) + :"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -889,8 +801,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX - :"=m"(*d) - :"m"(*s) + :: "r"(d), "m"(*s) :"memory"); d += 24; s += 8; @@ -917,10 +828,10 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr mm_end = end - 7; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -950,9 +861,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm6 \n\t" "movq %%mm3, %%mm7 \n\t" - "movq 8%1, %%mm0 \n\t" - "movq 8%1, %%mm1 \n\t" - "movq 8%1, %%mm2 \n\t" + "movq 8(%1), %%mm0 \n\t" + "movq 8(%1), %%mm1 \n\t" + "movq 8(%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -979,7 +890,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "por %%mm4, %%mm3 \n\t" "por %%mm5, %%mm3 \n\t" :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) + :"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) :"memory"); /* borrowed 32 to 24 */ __asm__ volatile( @@ -995,8 +906,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr STORE_BGR24_MMX - :"=m"(*d) - :"m"(*s) + :: "r"(d), "m"(*s) :"memory"); d += 24; s += 8; @@ -1028,8 +938,8 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr "movq %%mm0, %%mm3 \n\t" \ "punpcklwd %%mm2, %%mm0 \n\t" /* FF R1 G1 B1 FF R0 G0 B0 */ \ "punpckhwd %%mm2, %%mm3 \n\t" /* FF R3 G3 B3 FF R2 G2 B2 */ \ - MOVNTQ" %%mm0, %0 \n\t" \ - MOVNTQ" %%mm3, 8%0 \n\t" \ + MOVNTQ" %%mm0, (%0) \n\t" \ + MOVNTQ" %%mm3, 8(%0) \n\t" \ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_size) { @@ -1044,10 +954,10 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 3; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -1056,8 +966,7 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s "pmulhw %5, %%mm1 \n\t" "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul15_mid) + ::"r"(d),"r"(s),"m"(mask15b),"m"(mask15g),"m"(mask15r) ,"m"(mul15_mid) :"memory"); d += 16; s += 4; @@ -1087,10 +996,10 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s mm_end = end - 3; while (s < mm_end) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq %1, %%mm1 \n\t" - "movq %1, %%mm2 \n\t" + PREFETCH" 32(%1) \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1), %%mm2 \n\t" "pand %2, %%mm0 \n\t" "pand %3, %%mm1 \n\t" "pand %4, %%mm2 \n\t" @@ -1100,8 +1009,7 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s "pmulhw "MANGLE(mul16_mid)", %%mm1 \n\t" "pmulhw "MANGLE(mul15_hi)", %%mm2 \n\t" PACK_RGB32 - :"=m"(*d) - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) + ::"r"(d),"r"(s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul15_mid) :"memory"); d += 16; s += 4; @@ -2029,8 +1937,8 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, int srcStride1, int srcStride2, int dstStride1, int dstStride2) { - x86_reg y; - int x,w,h; + x86_reg x, y; + int w,h; w=width/2; h=height/2; __asm__ volatile( PREFETCH" %0 \n\t" @@ -2042,11 +1950,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, x=0; for (;x<w-31;x+=32) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" + PREFETCH" 32(%1,%2) \n\t" + "movq (%1,%2), %%mm0 \n\t" + "movq 8(%1,%2), %%mm2 \n\t" + "movq 16(%1,%2), %%mm4 \n\t" + "movq 24(%1,%2), %%mm6 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -2059,16 +1967,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, "punpckhbw %%mm5, %%mm5 \n\t" "punpcklbw %%mm6, %%mm6 \n\t" "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s1[x]) + MOVNTQ" %%mm0, (%0,%2,2) \n\t" + MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" + MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" + MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" + MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" + MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" + MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" + MOVNTQ" %%mm7, 56(%0,%2,2)" + :: "r"(d), "r"(s1), "r"(x) :"memory"); } for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; @@ -2079,11 +1986,11 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, x=0; for (;x<w-31;x+=32) { __asm__ volatile( - PREFETCH" 32%1 \n\t" - "movq %1, %%mm0 \n\t" - "movq 8%1, %%mm2 \n\t" - "movq 16%1, %%mm4 \n\t" - "movq 24%1, %%mm6 \n\t" + PREFETCH" 32(%1,%2) \n\t" + "movq (%1,%2), %%mm0 \n\t" + "movq 8(%1,%2), %%mm2 \n\t" + "movq 16(%1,%2), %%mm4 \n\t" + "movq 24(%1,%2), %%mm6 \n\t" "movq %%mm0, %%mm1 \n\t" "movq %%mm2, %%mm3 \n\t" "movq %%mm4, %%mm5 \n\t" @@ -2096,16 +2003,15 @@ static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, "punpckhbw %%mm5, %%mm5 \n\t" "punpcklbw %%mm6, %%mm6 \n\t" "punpckhbw %%mm7, %%mm7 \n\t" - MOVNTQ" %%mm0, %0 \n\t" - MOVNTQ" %%mm1, 8%0 \n\t" - MOVNTQ" %%mm2, 16%0 \n\t" - MOVNTQ" %%mm3, 24%0 \n\t" - MOVNTQ" %%mm4, 32%0 \n\t" - MOVNTQ" %%mm5, 40%0 \n\t" - MOVNTQ" %%mm6, 48%0 \n\t" - MOVNTQ" %%mm7, 56%0" - :"=m"(d[2*x]) - :"m"(s2[x]) + MOVNTQ" %%mm0, (%0,%2,2) \n\t" + MOVNTQ" %%mm1, 8(%0,%2,2) \n\t" + MOVNTQ" %%mm2, 16(%0,%2,2) \n\t" + MOVNTQ" %%mm3, 24(%0,%2,2) \n\t" + MOVNTQ" %%mm4, 32(%0,%2,2) \n\t" + MOVNTQ" %%mm5, 40(%0,%2,2) \n\t" + MOVNTQ" %%mm6, 48(%0,%2,2) \n\t" + MOVNTQ" %%mm7, 56(%0,%2,2)" + :: "r"(d), "r"(s2), "r"(x) :"memory"); } for (;x<w;x++) d[2*x]=d[2*x+1]=s2[x]; |