diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2010-09-29 17:42:26 +0000 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2010-09-29 17:42:26 +0000 |
commit | a52ffc3f54e6ba9417513edf9a75c66dfcb93ebb (patch) | |
tree | f22d3341074abde55d0bc75b0fffd48c2a75d8a0 /libavcodec/x86 | |
parent | fc7c40c2bd95cc6f338806bdbf077732a74f5cf2 (diff) | |
download | ffmpeg-a52ffc3f54e6ba9417513edf9a75c66dfcb93ebb.tar.gz |
Move static inline function to a macro, so that constant propagation in
inline asm works for gcc-3.x also (hopefully). Should fix gcc-3.x FATE
breakage after r25254.
Originally committed as revision 25262 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/h264dsp_mmx.c | 230 |
1 files changed, 113 insertions, 117 deletions
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c index d449efdcca..401a488cb5 100644 --- a/libavcodec/x86/h264dsp_mmx.c +++ b/libavcodec/x86/h264dsp_mmx.c @@ -63,123 +63,119 @@ void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTEL /***********************************/ /* deblocking */ -static av_always_inline void h264_loop_filter_strength_iteration_mmx2(int16_t bS[2][4][4], uint8_t nnz[40], - int8_t ref[2][40], int16_t mv[2][40][2], - int bidir, int edges, int step, - int mask_mv, int dir, const int d_idx, - const uint64_t mask_dir) -{ - x86_reg b_idx; - mask_mv <<= 3; - for( b_idx=0; b_idx<edges; b_idx+=step ) { - if (!mask_dir) - __asm__ volatile( - "pxor %%mm0, %%mm0 \n\t" - :: - ); - if(!(mask_mv & b_idx)) { - if(bidir) { - __asm__ volatile( - "movd %a3(%0,%2), %%mm2 \n" - "punpckldq %a4(%0,%2), %%mm2 \n" // { ref0[bn], ref1[bn] } - "pshufw $0x44, 12(%0,%2), %%mm0 \n" // { ref0[b], ref0[b] } - "pshufw $0x44, 52(%0,%2), %%mm1 \n" // { ref1[b], ref1[b] } - "pshufw $0x4E, %%mm2, %%mm3 \n" - "psubb %%mm2, %%mm0 \n" // { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } - "psubb %%mm3, %%mm1 \n" // { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } - - "por %%mm1, %%mm0 \n" - "movq %a5(%1,%2,4), %%mm1 \n" - "movq %a6(%1,%2,4), %%mm2 \n" - "movq %%mm1, %%mm3 \n" - "movq %%mm2, %%mm4 \n" - "psubw 48(%1,%2,4), %%mm1 \n" - "psubw 56(%1,%2,4), %%mm2 \n" - "psubw 208(%1,%2,4), %%mm3 \n" - "psubw 216(%1,%2,4), %%mm4 \n" - "packsswb %%mm2, %%mm1 \n" - "packsswb %%mm4, %%mm3 \n" - "paddb %%mm6, %%mm1 \n" - "paddb %%mm6, %%mm3 \n" - "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit - "psubusb %%mm5, %%mm3 \n" - "packsswb %%mm3, %%mm1 \n" - - "por %%mm1, %%mm0 \n" - "movq %a7(%1,%2,4), %%mm1 \n" - "movq %a8(%1,%2,4), %%mm2 \n" - "movq %%mm1, %%mm3 \n" - "movq %%mm2, %%mm4 \n" - "psubw 48(%1,%2,4), %%mm1 \n" - "psubw 56(%1,%2,4), %%mm2 \n" - "psubw 208(%1,%2,4), %%mm3 \n" - "psubw 216(%1,%2,4), %%mm4 \n" - "packsswb %%mm2, %%mm1 \n" - "packsswb %%mm4, %%mm3 \n" - "paddb %%mm6, %%mm1 \n" - "paddb %%mm6, %%mm3 \n" - "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit - "psubusb %%mm5, %%mm3 \n" - "packsswb %%mm3, %%mm1 \n" - - "pshufw $0x4E, %%mm1, %%mm1 \n" - "por %%mm1, %%mm0 \n" - "pshufw $0x4E, %%mm0, %%mm1 \n" - "pminub %%mm1, %%mm0 \n" - ::"r"(ref), - "r"(mv), - "r"(b_idx), - "i"(d_idx+12), - "i"(d_idx+52), - "i"(d_idx*4+48), - "i"(d_idx*4+56), - "i"(d_idx*4+208), - "i"(d_idx*4+216) - ); - } else { - __asm__ volatile( - "movd 12(%0,%2), %%mm0 \n" - "psubb %a3(%0,%2), %%mm0 \n" // ref[b] != ref[bn] - "movq 48(%1,%2,4), %%mm1 \n" - "movq 56(%1,%2,4), %%mm2 \n" - "psubw %a4(%1,%2,4), %%mm1 \n" - "psubw %a5(%1,%2,4), %%mm2 \n" - "packsswb %%mm2, %%mm1 \n" - "paddb %%mm6, %%mm1 \n" - "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit - "packsswb %%mm1, %%mm1 \n" - "por %%mm1, %%mm0 \n" - ::"r"(ref), - "r"(mv), - "r"(b_idx), - "i"(d_idx+12), - "i"(d_idx*4+48), - "i"(d_idx*4+56) - ); - } - } - __asm__ volatile( - "movd 12(%0,%1), %%mm1 \n" - "por %a2(%0,%1), %%mm1 \n" // nnz[b] || nnz[bn] - ::"r"(nnz), - "r"(b_idx), - "i"(d_idx+12) - ); - __asm__ volatile( - "pminub %%mm7, %%mm1 \n" - "pminub %%mm7, %%mm0 \n" - "psllw $1, %%mm1 \n" - "pxor %%mm2, %%mm2 \n" - "pmaxub %%mm0, %%mm1 \n" - "punpcklbw %%mm2, %%mm1 \n" - "movq %%mm1, %a1(%0,%2) \n" - ::"r"(bS), - "i"(32*dir), - "r"(b_idx) - :"memory" - ); - } -} +#define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \ + do { \ + x86_reg b_idx; \ + mask_mv <<= 3; \ + for( b_idx=0; b_idx<edges; b_idx+=step ) { \ + if (!mask_dir) \ + __asm__ volatile( \ + "pxor %%mm0, %%mm0 \n\t" \ + :: \ + ); \ + if(!(mask_mv & b_idx)) { \ + if(bidir) { \ + __asm__ volatile( \ + "movd %a3(%0,%2), %%mm2 \n" \ + "punpckldq %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \ + "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \ + "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \ + "pshufw $0x4E, %%mm2, %%mm3 \n" \ + "psubb %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \ + "psubb %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \ + \ + "por %%mm1, %%mm0 \n" \ + "movq %a5(%1,%2,4), %%mm1 \n" \ + "movq %a6(%1,%2,4), %%mm2 \n" \ + "movq %%mm1, %%mm3 \n" \ + "movq %%mm2, %%mm4 \n" \ + "psubw 48(%1,%2,4), %%mm1 \n" \ + "psubw 56(%1,%2,4), %%mm2 \n" \ + "psubw 208(%1,%2,4), %%mm3 \n" \ + "psubw 216(%1,%2,4), %%mm4 \n" \ + "packsswb %%mm2, %%mm1 \n" \ + "packsswb %%mm4, %%mm3 \n" \ + "paddb %%mm6, %%mm1 \n" \ + "paddb %%mm6, %%mm3 \n" \ + "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ + "psubusb %%mm5, %%mm3 \n" \ + "packsswb %%mm3, %%mm1 \n" \ + \ + "por %%mm1, %%mm0 \n" \ + "movq %a7(%1,%2,4), %%mm1 \n" \ + "movq %a8(%1,%2,4), %%mm2 \n" \ + "movq %%mm1, %%mm3 \n" \ + "movq %%mm2, %%mm4 \n" \ + "psubw 48(%1,%2,4), %%mm1 \n" \ + "psubw 56(%1,%2,4), %%mm2 \n" \ + "psubw 208(%1,%2,4), %%mm3 \n" \ + "psubw 216(%1,%2,4), %%mm4 \n" \ + "packsswb %%mm2, %%mm1 \n" \ + "packsswb %%mm4, %%mm3 \n" \ + "paddb %%mm6, %%mm1 \n" \ + "paddb %%mm6, %%mm3 \n" \ + "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ + "psubusb %%mm5, %%mm3 \n" \ + "packsswb %%mm3, %%mm1 \n" \ + \ + "pshufw $0x4E, %%mm1, %%mm1 \n" \ + "por %%mm1, %%mm0 \n" \ + "pshufw $0x4E, %%mm0, %%mm1 \n" \ + "pminub %%mm1, %%mm0 \n" \ + ::"r"(ref), \ + "r"(mv), \ + "r"(b_idx), \ + "i"(d_idx+12), \ + "i"(d_idx+52), \ + "i"(d_idx*4+48), \ + "i"(d_idx*4+56), \ + "i"(d_idx*4+208), \ + "i"(d_idx*4+216) \ + ); \ + } else { \ + __asm__ volatile( \ + "movd 12(%0,%2), %%mm0 \n" \ + "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \ + "movq 48(%1,%2,4), %%mm1 \n" \ + "movq 56(%1,%2,4), %%mm2 \n" \ + "psubw %a4(%1,%2,4), %%mm1 \n" \ + "psubw %a5(%1,%2,4), %%mm2 \n" \ + "packsswb %%mm2, %%mm1 \n" \ + "paddb %%mm6, %%mm1 \n" \ + "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \ + "packsswb %%mm1, %%mm1 \n" \ + "por %%mm1, %%mm0 \n" \ + ::"r"(ref), \ + "r"(mv), \ + "r"(b_idx), \ + "i"(d_idx+12), \ + "i"(d_idx*4+48), \ + "i"(d_idx*4+56) \ + ); \ + } \ + } \ + __asm__ volatile( \ + "movd 12(%0,%1), %%mm1 \n" \ + "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \ + ::"r"(nnz), \ + "r"(b_idx), \ + "i"(d_idx+12) \ + ); \ + __asm__ volatile( \ + "pminub %%mm7, %%mm1 \n" \ + "pminub %%mm7, %%mm0 \n" \ + "psllw $1, %%mm1 \n" \ + "pxor %%mm2, %%mm2 \n" \ + "pmaxub %%mm0, %%mm1 \n" \ + "punpcklbw %%mm2, %%mm1 \n" \ + "movq %%mm1, %a1(%0,%2) \n" \ + ::"r"(bS), \ + "i"(32*dir), \ + "r"(b_idx) \ + :"memory" \ + ); \ + } \ + } while (0) static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { |