diff options
Diffstat (limited to 'libavcodec/i386')
28 files changed, 309 insertions, 309 deletions
diff --git a/libavcodec/i386/cavsdsp_mmx.c b/libavcodec/i386/cavsdsp_mmx.c index 7489630eeb..15cafd6dc0 100644 --- a/libavcodec/i386/cavsdsp_mmx.c +++ b/libavcodec/i386/cavsdsp_mmx.c @@ -35,7 +35,7 @@ static inline void cavs_idct8_1d(int16_t *block, uint64_t bias) { - asm volatile( + __asm__ volatile( "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */ "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */ "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */ @@ -120,7 +120,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) cavs_idct8_1d(block+4*i, ff_pw_4); - asm volatile( + __asm__ volatile( "psraw $3, %%mm7 \n\t" "psraw $3, %%mm6 \n\t" "psraw $3, %%mm5 \n\t" @@ -150,7 +150,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) for(i=0; i<2; i++){ cavs_idct8_1d(b2+4*i, ff_pw_64); - asm volatile( + __asm__ volatile( "psraw $7, %%mm7 \n\t" "psraw $7, %%mm6 \n\t" "psraw $7, %%mm5 \n\t" @@ -175,7 +175,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) add_pixels_clamped_mmx(b2, dst, stride); /* clear block */ - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "movq %%mm7, (%0) \n\t" "movq %%mm7, 8(%0) \n\t" @@ -275,7 +275,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) src -= 2*srcStride;\ \ while(w--){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -306,7 +306,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) : "memory"\ );\ if(h==16){\ - asm volatile(\ + __asm__ volatile(\ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ @@ -328,7 +328,7 @@ static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) #define QPEL_CAVS(OPNAME, OP, MMX)\ static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %5, %%mm6 \n\t"\ "1: \n\t"\ diff --git a/libavcodec/i386/cpuid.c b/libavcodec/i386/cpuid.c index 230ff26c0b..2f2a669eee 100644 --- a/libavcodec/i386/cpuid.c +++ b/libavcodec/i386/cpuid.c @@ -28,7 +28,7 @@ /* ebx saving is necessary for PIC. gcc seems unable to see it alone */ #define cpuid(index,eax,ebx,ecx,edx)\ - asm volatile\ + __asm__ volatile\ ("mov %%"REG_b", %%"REG_S"\n\t"\ "cpuid\n\t"\ "xchg %%"REG_b", %%"REG_S\ @@ -44,7 +44,7 @@ int mm_support(void) int max_std_level, max_ext_level, std_caps=0, ext_caps=0; x86_reg a, c; - asm volatile ( + __asm__ volatile ( /* See if CPUID instruction is supported ... */ /* ... Get copies of EFLAGS into eax and ecx */ "pushf\n\t" diff --git a/libavcodec/i386/dsputil_h264_template_mmx.c b/libavcodec/i386/dsputil_h264_template_mmx.c index a2daa0ba10..0bf8732e35 100644 --- a/libavcodec/i386/dsputil_h264_template_mmx.c +++ b/libavcodec/i386/dsputil_h264_template_mmx.c @@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3; - asm volatile( + __asm__ volatile( "movd %0, %%mm5\n\t" "movq %1, %%mm4\n\t" "movq %2, %%mm6\n\t" /* mm6 = rnd */ @@ -58,13 +58,13 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* :: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg)); for(i=0; i<h; i++) { - asm volatile( + __asm__ volatile( /* mm0 = src[0..7], mm1 = src[1..8] */ "movq %0, %%mm0\n\t" "movq %1, %%mm2\n\t" :: "m"(src[0]), "m"(src[dxy])); - asm volatile( + __asm__ volatile( /* [mm0,mm1] = A * src[0..7] */ /* [mm2,mm3] = B * src[1..8] */ "movq %%mm0, %%mm1\n\t" @@ -98,7 +98,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* /* general case, bilinear */ rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a; - asm volatile("movd %2, %%mm4\n\t" + __asm__ volatile("movd %2, %%mm4\n\t" "movd %3, %%mm6\n\t" "punpcklwd %%mm4, %%mm4\n\t" "punpcklwd %%mm6, %%mm6\n\t" @@ -119,7 +119,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* "movq %%mm4, %0\n\t" : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64)); - asm volatile( + __asm__ volatile( /* mm0 = src[0..7], mm1 = src[1..8] */ "movq %0, %%mm0\n\t" "movq %1, %%mm1\n\t" @@ -128,7 +128,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* for(i=0; i<h; i++) { src += stride; - asm volatile( + __asm__ volatile( /* mm2 = A * src[0..3] + B * src[1..4] */ /* mm3 = A * src[4..7] + B * src[5..8] */ "movq %%mm0, %%mm2\n\t" @@ -145,7 +145,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* "paddw %%mm0, %%mm3\n\t" : : "m" (AA)); - asm volatile( + __asm__ volatile( /* [mm2,mm3] += C * src[0..7] */ "movq %0, %%mm0\n\t" "movq %%mm0, %%mm1\n\t" @@ -157,7 +157,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* "paddw %%mm1, %%mm3\n\t" : : "m" (src[0])); - asm volatile( + __asm__ volatile( /* [mm2,mm3] += D * src[1..8] */ "movq %1, %%mm1\n\t" "movq %%mm1, %%mm0\n\t" @@ -171,7 +171,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* "movq %0, %%mm0\n\t" : : "m" (src[0]), "m" (src[1]), "m" (DD)); - asm volatile( + __asm__ volatile( /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */ "paddw %1, %%mm2\n\t" "paddw %1, %%mm3\n\t" @@ -187,7 +187,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) { - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "movd %5, %%mm2 \n\t" "movd %6, %%mm3 \n\t" @@ -259,7 +259,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* int tmp = ((1<<16)-1)*x + 8; int CD= tmp*y; int AB= (tmp<<3) - CD; - asm volatile( + __asm__ volatile( /* mm5 = {A,B,A,B} */ /* mm6 = {C,D,C,D} */ "movd %0, %%mm5\n\t" @@ -274,7 +274,7 @@ static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1* :: "r"(AB), "r"(CD), "m"(src[0])); - asm volatile( + __asm__ volatile( "1:\n\t" "add %4, %1\n\t" /* mm1 = A * src[0,1] + B * src[1,2] */ diff --git a/libavcodec/i386/dsputil_h264_template_ssse3.c b/libavcodec/i386/dsputil_h264_template_ssse3.c index 5345ccc1d8..e29e05e7c8 100644 --- a/libavcodec/i386/dsputil_h264_template_ssse3.c +++ b/libavcodec/i386/dsputil_h264_template_ssse3.c @@ -37,7 +37,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* if(y==0 || x==0) { /* 1 dimensional filter only */ - asm volatile( + __asm__ volatile( "movd %0, %%xmm7 \n\t" "movq %1, %%xmm6 \n\t" "pshuflw $0, %%xmm7, %%xmm7 \n\t" @@ -47,7 +47,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* ); if(x) { - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%1), %%xmm0 \n\t" "movq 1(%1), %%xmm1 \n\t" @@ -75,7 +75,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* :"r"((x86_reg)stride) ); } else { - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%1), %%xmm0 \n\t" "movq (%1,%3), %%xmm1 \n\t" @@ -107,7 +107,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* } /* general case, bilinear */ - asm volatile( + __asm__ volatile( "movd %0, %%xmm7 \n\t" "movd %1, %%xmm6 \n\t" "movdqa %2, %%xmm5 \n\t" @@ -118,7 +118,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28)) ); - asm volatile( + __asm__ volatile( "movq (%1), %%xmm0 \n\t" "movq 1(%1), %%xmm1 \n\t" "punpcklbw %%xmm1, %%xmm0 \n\t" @@ -160,7 +160,7 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1* static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y) { - asm volatile( + __asm__ volatile( "movd %0, %%mm7 \n\t" "movd %1, %%mm6 \n\t" "movq %2, %%mm5 \n\t" @@ -169,7 +169,7 @@ static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1* :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32) ); - asm volatile( + __asm__ volatile( "movd (%1), %%mm0 \n\t" "punpcklbw 1(%1), %%mm0 \n\t" "add %3, %1 \n\t" diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c index 6e1a93d711..f15eac987f 100644 --- a/libavcodec/i386/dsputil_mmx.c +++ b/libavcodec/i386/dsputil_mmx.c @@ -70,28 +70,28 @@ DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL; DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 }; DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 }; -#define JUMPALIGN() asm volatile (ASMALIGN(3)::) -#define MOVQ_ZERO(regd) asm volatile ("pxor %%" #regd ", %%" #regd ::) +#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::) +#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::) #define MOVQ_BFE(regd) \ - asm volatile ( \ + __asm__ volatile ( \ "pcmpeqd %%" #regd ", %%" #regd " \n\t"\ "paddb %%" #regd ", %%" #regd " \n\t" ::) #ifndef PIC -#define MOVQ_BONE(regd) asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone)) -#define MOVQ_WTWO(regd) asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo)) +#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone)) +#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo)) #else // for shared library it's better to use this way for accessing constants // pcmpeqd -> -1 #define MOVQ_BONE(regd) \ - asm volatile ( \ + __asm__ volatile ( \ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ "psrlw $15, %%" #regd " \n\t" \ "packuswb %%" #regd ", %%" #regd " \n\t" ::) #define MOVQ_WTWO(regd) \ - asm volatile ( \ + __asm__ volatile ( \ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ "psrlw $15, %%" #regd " \n\t" \ "psllw $1, %%" #regd " \n\t"::) @@ -223,7 +223,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size p = block; pix = pixels; /* unrolled loop */ - asm volatile( + __asm__ volatile( "movq %3, %%mm0 \n\t" "movq 8%3, %%mm1 \n\t" "movq 16%3, %%mm2 \n\t" @@ -248,7 +248,7 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size // if here would be an exact copy of the code above // compiler would generate some very strange code // thus using "r" - asm volatile( + __asm__ volatile( "movq (%3), %%mm0 \n\t" "movq 8(%3), %%mm1 \n\t" "movq 16(%3), %%mm2 \n\t" @@ -299,7 +299,7 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size MOVQ_ZERO(mm7); i = 4; do { - asm volatile( + __asm__ volatile( "movq (%2), %%mm0 \n\t" "movq 8(%2), %%mm1 \n\t" "movq 16(%2), %%mm2 \n\t" @@ -330,7 +330,7 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" ASMALIGN(3) "1: \n\t" @@ -356,7 +356,7 @@ static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" ASMALIGN(3) "1: \n\t" @@ -382,7 +382,7 @@ static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" ASMALIGN(3) "1: \n\t" @@ -416,7 +416,7 @@ static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_siz static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "1: \n\t" "movdqu (%1), %%xmm0 \n\t" "movdqu (%1,%3), %%xmm1 \n\t" @@ -438,7 +438,7 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "1: \n\t" "movdqu (%1), %%xmm0 \n\t" "movdqu (%1,%3), %%xmm1 \n\t" @@ -464,7 +464,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si static void clear_blocks_mmx(DCTELEM *blocks) { - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "mov $-128*6, %%"REG_a" \n\t" "1: \n\t" @@ -481,7 +481,7 @@ static void clear_blocks_mmx(DCTELEM *blocks) static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ x86_reg i=0; - asm volatile( + __asm__ volatile( "jmp 2f \n\t" "1: \n\t" "movq (%1, %0), %%mm0 \n\t" @@ -505,7 +505,7 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){ static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ x86_reg i=0; - asm volatile( + __asm__ volatile( "jmp 2f \n\t" "1: \n\t" "movq (%2, %0), %%mm0 \n\t" @@ -600,7 +600,7 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ if(ENABLE_ANY_H263) { const int strength= ff_h263_loop_filter_strength[qscale]; - asm volatile( + __asm__ volatile( H263_LOOP_FILTER @@ -618,7 +618,7 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){ } static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){ - asm volatile( //FIXME could save 1 instruction if done as 8x4 ... + __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ... "movd %4, %%mm0 \n\t" "movd %5, %%mm1 \n\t" "movd %6, %%mm2 \n\t" @@ -656,7 +656,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ transpose4x4(btemp , src , 8, stride); transpose4x4(btemp+4, src + 4*stride, 8, stride); - asm volatile( + __asm__ volatile( H263_LOOP_FILTER // 5 3 4 6 : "+m" (temp[0]), @@ -666,7 +666,7 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){ : "g" (2*strength), "m"(ff_pb_FC) ); - asm volatile( + __asm__ volatile( "movq %%mm5, %%mm1 \n\t" "movq %%mm4, %%mm0 \n\t" "punpcklbw %%mm3, %%mm5 \n\t" @@ -711,7 +711,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) ptr = buf; if(w==8) { - asm volatile( + __asm__ volatile( "1: \n\t" "movd (%0), %%mm0 \n\t" "punpcklbw %%mm0, %%mm0 \n\t" @@ -732,7 +732,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) } else { - asm volatile( + __asm__ volatile( "1: \n\t" "movd (%0), %%mm0 \n\t" "punpcklbw %%mm0, %%mm0 \n\t" @@ -757,7 +757,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) for(i=0;i<w;i+=4) { /* top and bottom (and hopefully also the corners) */ ptr= buf - (i + 1) * wrap - w; - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%1, %0), %%mm0 \n\t" "movq %%mm0, (%0) \n\t" @@ -771,7 +771,7 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w) : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w) ); ptr= last_line + (i + 1) * wrap - w; - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%1, %0), %%mm0 \n\t" "movq %%mm0, (%0) \n\t" @@ -792,7 +792,7 @@ static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t * {\ x86_reg i = -bpp;\ x86_reg end = w-3;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n"\ "movd (%1,%0), %%mm0 \n"\ "movd (%2,%0), %%mm1 \n"\ @@ -886,7 +886,7 @@ PAETH(ssse3, ABS3_SSSE3) static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ uint64_t temp;\ \ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "1: \n\t"\ "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\ @@ -1025,7 +1025,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\ temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\ temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\ - asm volatile(\ + __asm__ volatile(\ "movq (%0), %%mm0 \n\t"\ "movq 8(%0), %%mm1 \n\t"\ "paddw %2, %%mm0 \n\t"\ @@ -1051,7 +1051,7 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i }\ \ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "1: \n\t"\ "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\ @@ -1128,7 +1128,7 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, in temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\ temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\ temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\ - asm volatile(\ + __asm__ volatile(\ "movq (%0), %%mm0 \n\t"\ "movq 8(%0), %%mm1 \n\t"\ "paddw %2, %%mm0 \n\t"\ @@ -1153,7 +1153,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int count= 17;\ \ /*FIXME unroll */\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ @@ -1181,7 +1181,7 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, count=4;\ \ /*FIXME reorder for speed */\ - asm volatile(\ + __asm__ volatile(\ /*"pxor %%mm7, %%mm7 \n\t"*/\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ @@ -1231,7 +1231,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int count= 9;\ \ /*FIXME unroll */\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ @@ -1253,7 +1253,7 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, count=2;\ \ /*FIXME reorder for speed */\ - asm volatile(\ + __asm__ volatile(\ /*"pxor %%mm7, %%mm7 \n\t"*/\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ @@ -1620,7 +1620,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o src = edge_buf; } - asm volatile( + __asm__ volatile( "movd %0, %%mm6 \n\t" "pxor %%mm7, %%mm7 \n\t" "punpcklwd %%mm6, %%mm6 \n\t" @@ -1639,7 +1639,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o oys - dyys + dyxs*(x+3) }; for(y=0; y<h; y++){ - asm volatile( + __asm__ volatile( "movq %0, %%mm4 \n\t" "movq %1, %%mm5 \n\t" "paddw %2, %%mm4 \n\t" @@ -1652,7 +1652,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o : "m"(*dxy4), "m"(*dyy4) ); - asm volatile( + __asm__ volatile( "movq %%mm6, %%mm2 \n\t" "movq %%mm6, %%mm1 \n\t" "psubw %%mm4, %%mm2 \n\t" @@ -1701,7 +1701,7 @@ static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int o static void name(void *mem, int stride, int h){\ const uint8_t *p= mem;\ do{\ - asm volatile(#op" %0" :: "m"(*p));\ + __asm__ volatile(#op" %0" :: "m"(*p));\ p+= stride;\ }while(--h);\ } @@ -1787,9 +1787,9 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block) static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) { int i; - asm volatile("pxor %%mm7, %%mm7":); + __asm__ volatile("pxor %%mm7, %%mm7":); for(i=0; i<blocksize; i+=2) { - asm volatile( + __asm__ volatile( "movq %0, %%mm0 \n\t" "movq %1, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" @@ -1809,18 +1809,18 @@ static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) ::"memory" ); } - asm volatile("femms"); + __asm__ volatile("femms"); } static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) { int i; - asm volatile( + __asm__ volatile( "movaps %0, %%xmm5 \n\t" ::"m"(ff_pdw_80000000[0]) ); for(i=0; i<blocksize; i+=4) { - asm volatile( + __asm__ volatile( "movaps %0, %%xmm0 \n\t" "movaps %1, %%xmm1 \n\t" "xorps %%xmm2, %%xmm2 \n\t" @@ -1846,7 +1846,7 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) #define IF0(x) #define MIX5(mono,stereo)\ - asm volatile(\ + __asm__ volatile(\ "movss 0(%2), %%xmm5 \n"\ "movss 8(%2), %%xmm6 \n"\ "movss 24(%2), %%xmm7 \n"\ @@ -1879,7 +1879,7 @@ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) ); #define MIX_MISC(stereo)\ - asm volatile(\ + __asm__ volatile(\ "1: \n"\ "movaps (%3,%0), %%xmm0 \n"\ stereo("movaps %%xmm0, %%xmm1 \n")\ @@ -1919,7 +1919,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c } else { DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]); j = 2*in_ch*sizeof(float); - asm volatile( + __asm__ volatile( "1: \n" "sub $8, %0 \n" "movss (%2,%0), %%xmm6 \n" @@ -1943,7 +1943,7 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_c static void vector_fmul_3dnow(float *dst, const float *src, int len){ x86_reg i = (len-4)*4; - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%1,%0), %%mm0 \n\t" "movq 8(%1,%0), %%mm1 \n\t" @@ -1961,7 +1961,7 @@ static void vector_fmul_3dnow(float *dst, const float *src, int len){ } static void vector_fmul_sse(float *dst, const float *src, int len){ x86_reg i = (len-8)*4; - asm volatile( + __asm__ volatile( "1: \n\t" "movaps (%1,%0), %%xmm0 \n\t" "movaps 16(%1,%0), %%xmm1 \n\t" @@ -1979,7 +1979,7 @@ static void vector_fmul_sse(float *dst, const float *src, int len){ static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){ x86_reg i = len*4-16; - asm volatile( + __asm__ volatile( "1: \n\t" "pswapd 8(%1), %%mm0 \n\t" "pswapd (%1), %%mm1 \n\t" @@ -1993,11 +1993,11 @@ static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const floa :"+r"(i), "+r"(src1) :"r"(dst), "r"(src0) ); - asm volatile("femms"); + __asm__ volatile("femms"); } static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){ x86_reg i = len*4-32; - asm volatile( + __asm__ volatile( "1: \n\t" "movaps 16(%1), %%xmm0 \n\t" "movaps (%1), %%xmm1 \n\t" @@ -2020,7 +2020,7 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float x86_reg i = (len-4)*4; if(step == 2 && src3 == 0){ dst += (len-4)*2; - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%2,%0), %%mm0 \n\t" "movq 8(%2,%0), %%mm1 \n\t" @@ -2043,7 +2043,7 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float ); } else if(step == 1 && src3 == 0){ - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%2,%0), %%mm0 \n\t" "movq 8(%2,%0), %%mm1 \n\t" @@ -2062,14 +2062,14 @@ static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float } else ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); - asm volatile("femms"); + __asm__ volatile("femms"); } static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step){ x86_reg i = (len-8)*4; if(step == 2 && src3 == 0){ dst += (len-8)*2; - asm volatile( + __asm__ volatile( "1: \n\t" "movaps (%2,%0), %%xmm0 \n\t" "movaps 16(%2,%0), %%xmm1 \n\t" @@ -2100,7 +2100,7 @@ static void vector_fmul_add_add_sse(float *dst, const float *src0, const float * ); } else if(step == 1 && src3 == 0){ - asm volatile( + __asm__ volatile( "1: \n\t" "movaps (%2,%0), %%xmm0 \n\t" "movaps 16(%2,%0), %%xmm1 \n\t" @@ -2127,7 +2127,7 @@ static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float if(add_bias == 0){ x86_reg i = -len*4; x86_reg j = len*4-8; - asm volatile( + __asm__ volatile( "1: \n" "pswapd (%5,%1), %%mm1 \n" "movq (%5,%0), %%mm0 \n" @@ -2162,7 +2162,7 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s if(add_bias == 0){ x86_reg i = -len*4; x86_reg j = len*4-16; - asm volatile( + __asm__ volatile( "1: \n" "movaps (%5,%1), %%xmm1 \n" "movaps (%5,%0), %%xmm0 \n" @@ -2195,7 +2195,7 @@ static void vector_fmul_window_sse(float *dst, const float *src0, const float *s static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len) { x86_reg i = -4*len; - asm volatile( + __asm__ volatile( "movss %3, %%xmm4 \n" "shufps $0, %%xmm4, %%xmm4 \n" "1: \n" @@ -2219,7 +2219,7 @@ static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len) { x86_reg i = -4*len; - asm volatile( + __asm__ volatile( "movss %3, %%xmm4 \n" "shufps $0, %%xmm4, %%xmm4 \n" "1: \n" @@ -2238,7 +2238,7 @@ static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mu static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ // not bit-exact: pf2id uses different rounding than C and SSE - asm volatile( + __asm__ volatile( "add %0 , %0 \n\t" "lea (%2,%0,2) , %2 \n\t" "add %0 , %1 \n\t" @@ -2259,7 +2259,7 @@ static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){ ); } static void float_to_int16_sse(int16_t *dst, const float *src, long len){ - asm volatile( + __asm__ volatile( "add %0 , %0 \n\t" "lea (%2,%0,2) , %2 \n\t" "add %0 , %1 \n\t" @@ -2281,7 +2281,7 @@ static void float_to_int16_sse(int16_t *dst, const float *src, long len){ } static void float_to_int16_sse2(int16_t *dst, const float *src, long len){ - asm volatile( + __asm__ volatile( "add %0 , %0 \n\t" "lea (%2,%0,2) , %2 \n\t" "add %0 , %1 \n\t" @@ -2326,7 +2326,7 @@ static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, lon else if(channels==2){\ const float *src0 = src[0];\ const float *src1 = src[1];\ - asm volatile(\ + __asm__ volatile(\ "shl $2, %0 \n"\ "add %0, %1 \n"\ "add %0, %2 \n"\ @@ -2412,7 +2412,7 @@ static void add_int16_sse2(int16_t * v1, int16_t * v2, int order) x86_reg o = -(order << 1); v1 += order; v2 += order; - asm volatile( + __asm__ volatile( "1: \n\t" "movdqu (%1,%2), %%xmm0 \n\t" "movdqu 16(%1,%2), %%xmm1 \n\t" @@ -2431,7 +2431,7 @@ static void sub_int16_sse2(int16_t * v1, int16_t * v2, int order) x86_reg o = -(order << 1); v1 += order; v2 += order; - asm volatile( + __asm__ volatile( "1: \n\t" "movdqa (%0,%2), %%xmm0 \n\t" "movdqa 16(%0,%2), %%xmm2 \n\t" @@ -2456,7 +2456,7 @@ static int32_t scalarproduct_int16_sse2(int16_t * v1, int16_t * v2, int order, i v1 += order; v2 += order; sh = shift; - asm volatile( + __asm__ volatile( "pxor %%xmm7, %%xmm7 \n\t" "1: \n\t" "movdqu (%0,%3), %%xmm0 \n\t" diff --git a/libavcodec/i386/dsputil_mmx.h b/libavcodec/i386/dsputil_mmx.h index f095975128..5f81cb88d9 100644 --- a/libavcodec/i386/dsputil_mmx.h +++ b/libavcodec/i386/dsputil_mmx.h @@ -127,7 +127,7 @@ extern const double ff_pd_2[2]; #endif #define MOVQ_WONE(regd) \ - asm volatile ( \ + __asm__ volatile ( \ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \ "psrlw $15, %%" #regd ::) diff --git a/libavcodec/i386/dsputil_mmx_avg.h b/libavcodec/i386/dsputil_mmx_avg.h index 7509b75e58..a3f2068f0a 100644 --- a/libavcodec/i386/dsputil_mmx_avg.h +++ b/libavcodec/i386/dsputil_mmx_avg.h @@ -33,7 +33,7 @@ */ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -61,7 +61,7 @@ static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movd (%1), %%mm0 \n\t" @@ -112,7 +112,7 @@ static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movq (%1), %%mm0 \n\t" @@ -162,7 +162,7 @@ static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "pcmpeqb %%mm6, %%mm6 \n\t" "testl $1, %0 \n\t" " jz 1f \n\t" @@ -232,7 +232,7 @@ static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movd (%1), %%mm0 \n\t" @@ -284,7 +284,7 @@ static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movq (%1), %%mm0 \n\t" @@ -339,7 +339,7 @@ static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -379,7 +379,7 @@ static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movq (%1), %%mm0 \n\t" @@ -427,7 +427,7 @@ static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movq (%1), %%mm0 \n\t" @@ -481,7 +481,7 @@ static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { - asm volatile( + __asm__ volatile( "pcmpeqb %%mm6, %%mm6 \n\t" "testl $1, %0 \n\t" " jz 1f \n\t" @@ -556,7 +556,7 @@ static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *sr static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BONE(mm6); - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -592,7 +592,7 @@ static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, in static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" "sub %3, %2 \n\t" @@ -624,7 +624,7 @@ static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BONE(mm6); - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" "sub %3, %2 \n\t" @@ -656,7 +656,7 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%2), %%mm0 \n\t" @@ -684,7 +684,7 @@ static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_siz static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -716,7 +716,7 @@ static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" "sub %3, %2 \n\t" @@ -757,7 +757,7 @@ static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BONE(mm6); - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" PAVGB" 1(%1), %%mm0 \n\t" @@ -798,7 +798,7 @@ static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { do { - asm volatile( + __asm__ volatile( "movd (%1), %%mm0 \n\t" "movd (%1, %2), %%mm1 \n\t" "movd (%1, %2, 2), %%mm2 \n\t" @@ -852,7 +852,7 @@ static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int lin #define QPEL_2TAP_L3(OPNAME) \ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movq (%1,%2), %%mm0 \n\t"\ "movq 8(%1,%2), %%mm1 \n\t"\ @@ -874,7 +874,7 @@ static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride );\ }\ static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movq (%1,%2), %%mm0 \n\t"\ PAVGB" (%1,%3), %%mm0 \n\t"\ diff --git a/libavcodec/i386/dsputil_mmx_qns.h b/libavcodec/i386/dsputil_mmx_qns.h index af726edbfb..1f484e74f0 100644 --- a/libavcodec/i386/dsputil_mmx_qns.h +++ b/libavcodec/i386/dsputil_mmx_qns.h @@ -36,7 +36,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[ scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; SET_RND(mm6); - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "movd %4, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t" @@ -77,7 +77,7 @@ static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale) if(FFABS(scale) < MAX_ABS){ scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT; SET_RND(mm6); - asm volatile( + __asm__ volatile( "movd %3, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t" "punpcklwd %%mm5, %%mm5 \n\t" diff --git a/libavcodec/i386/dsputil_mmx_rnd.h b/libavcodec/i386/dsputil_mmx_rnd.h index 22413104ac..5ef06da285 100644 --- a/libavcodec/i386/dsputil_mmx_rnd.h +++ b/libavcodec/i386/dsputil_mmx_rnd.h @@ -32,7 +32,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BFE(mm6); - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" ASMALIGN(3) "1: \n\t" @@ -64,7 +64,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { MOVQ_BFE(mm6); - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movq (%1), %%mm0 \n\t" @@ -114,7 +114,7 @@ static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BFE(mm6); - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" ASMALIGN(3) "1: \n\t" @@ -160,7 +160,7 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) { MOVQ_BFE(mm6); - asm volatile( + __asm__ volatile( "testl $1, %0 \n\t" " jz 1f \n\t" "movq (%1), %%mm0 \n\t" @@ -209,7 +209,7 @@ static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BFE(mm6); - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" ASMALIGN(3) @@ -239,7 +239,7 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin { MOVQ_ZERO(mm7); SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - asm volatile( + __asm__ volatile( "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm4 \n\t" "movq %%mm0, %%mm1 \n\t" @@ -307,7 +307,7 @@ static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, i MOVQ_BFE(mm6); JUMPALIGN(); do { - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "movd %1, %%mm1 \n\t" PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) @@ -327,7 +327,7 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si MOVQ_BFE(mm6); JUMPALIGN(); do { - asm volatile( + __asm__ volatile( "movq %0, %%mm0 \n\t" "movq %1, %%mm1 \n\t" PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) @@ -346,7 +346,7 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s MOVQ_BFE(mm6); JUMPALIGN(); do { - asm volatile( + __asm__ volatile( "movq %0, %%mm0 \n\t" "movq %1, %%mm1 \n\t" PAVGB(%%mm0, %%mm1, %%mm2, %%mm6) @@ -369,7 +369,7 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line MOVQ_BFE(mm6); JUMPALIGN(); do { - asm volatile( + __asm__ volatile( "movq %1, %%mm0 \n\t" "movq 1%1, %%mm1 \n\t" "movq %0, %%mm3 \n\t" @@ -389,7 +389,7 @@ static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t MOVQ_BFE(mm6); JUMPALIGN(); do { - asm volatile( + __asm__ volatile( "movq %1, %%mm0 \n\t" "movq %2, %%mm1 \n\t" "movq %0, %%mm3 \n\t" @@ -410,7 +410,7 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin MOVQ_BFE(mm6); JUMPALIGN(); do { - asm volatile( + __asm__ volatile( "movq %1, %%mm0 \n\t" "movq 1%1, %%mm1 \n\t" "movq %0, %%mm3 \n\t" @@ -436,7 +436,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t MOVQ_BFE(mm6); JUMPALIGN(); do { - asm volatile( + __asm__ volatile( "movq %1, %%mm0 \n\t" "movq %2, %%mm1 \n\t" "movq %0, %%mm3 \n\t" @@ -461,7 +461,7 @@ static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { MOVQ_BFE(mm6); - asm volatile( + __asm__ volatile( "lea (%3, %3), %%"REG_a" \n\t" "movq (%1), %%mm0 \n\t" ASMALIGN(3) @@ -502,7 +502,7 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin { MOVQ_ZERO(mm7); SET_RND(mm6); // =2 for rnd and =1 for no_rnd version - asm volatile( + __asm__ volatile( "movq (%1), %%mm0 \n\t" "movq 1(%1), %%mm4 \n\t" "movq %%mm0, %%mm1 \n\t" diff --git a/libavcodec/i386/dsputilenc_mmx.c b/libavcodec/i386/dsputilenc_mmx.c index 8c7c4f07f7..f2e2b704af 100644 --- a/libavcodec/i386/dsputilenc_mmx.c +++ b/libavcodec/i386/dsputilenc_mmx.c @@ -30,7 +30,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) { - asm volatile( + __asm__ volatile( "mov $-128, %%"REG_a" \n\t" "pxor %%mm7, %%mm7 \n\t" ASMALIGN(4) @@ -58,7 +58,7 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size) static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size) { - asm volatile( + __asm__ volatile( "pxor %%xmm7, %%xmm7 \n\t" "movq (%0), %%xmm0 \n\t" "movq (%0, %2), %%xmm1 \n\t" @@ -92,7 +92,7 @@ static void get_pixels_sse2(DCTELEM *block, const uint8_t *pixels, int line_size static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride) { - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "mov $-128, %%"REG_a" \n\t" ASMALIGN(4) @@ -124,7 +124,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ int sum; x86_reg index= -line_size*h; - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "pxor %%mm6, %%mm6 \n\t" "1: \n\t" @@ -159,7 +159,7 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){ static int pix_norm1_mmx(uint8_t *pix, int line_size) { int tmp; - asm volatile ( + __asm__ volatile ( "movl $16,%%ecx\n" "pxor %%mm0,%%mm0\n" "pxor %%mm7,%%mm7\n" @@ -202,7 +202,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) { static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int tmp; - asm volatile ( + __asm__ volatile ( "movl %4,%%ecx\n" "shr $1,%%ecx\n" "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ @@ -263,7 +263,7 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int tmp; - asm volatile ( + __asm__ volatile ( "movl %4,%%ecx\n" "pxor %%mm0,%%mm0\n" /* mm0 = 0 */ "pxor %%mm7,%%mm7\n" /* mm7 holds the sum */ @@ -323,7 +323,7 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int tmp; - asm volatile ( + __asm__ volatile ( "shr $1,%2\n" "pxor %%xmm0,%%xmm0\n" /* mm0 = 0 */ "pxor %%xmm7,%%xmm7\n" /* mm7 holds the sum */ @@ -385,7 +385,7 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { int tmp; - asm volatile ( + __asm__ volatile ( "movl %3,%%ecx\n" "pxor %%mm7,%%mm7\n" "pxor %%mm6,%%mm6\n" @@ -511,7 +511,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) { static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) { int tmp; uint8_t * pix= pix1; - asm volatile ( + __asm__ volatile ( "movl %3,%%ecx\n" "pxor %%mm7,%%mm7\n" "pxor %%mm6,%%mm6\n" @@ -673,7 +673,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si "paddw " #in0 ", %%mm6\n" - asm volatile ( + __asm__ volatile ( "movl %3,%%ecx\n" "pxor %%mm6,%%mm6\n" "pxor %%mm7,%%mm7\n" @@ -719,7 +719,7 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s "paddw " #in1 ", " #in0 "\n"\ "paddw " #in0 ", %%mm6\n" - asm volatile ( + __asm__ volatile ( "movl %3,%%ecx\n" "pxor %%mm6,%%mm6\n" "pxor %%mm7,%%mm7\n" @@ -782,7 +782,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in "paddw " #in0 ", %%mm6\n" - asm volatile ( + __asm__ volatile ( "movl %4,%%ecx\n" "pxor %%mm6,%%mm6\n" "pcmpeqw %%mm7,%%mm7\n" @@ -845,7 +845,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i "paddw " #in1 ", " #in0 "\n"\ "paddw " #in0 ", %%mm6\n" - asm volatile ( + __asm__ volatile ( "movl %4,%%ecx\n" "pxor %%mm6,%%mm6\n" "pcmpeqw %%mm7,%%mm7\n" @@ -881,7 +881,7 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ x86_reg i=0; - asm volatile( + __asm__ volatile( "1: \n\t" "movq (%2, %0), %%mm0 \n\t" "movq (%1, %0), %%mm1 \n\t" @@ -905,7 +905,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t x86_reg i=0; uint8_t l, lt; - asm volatile( + __asm__ volatile( "1: \n\t" "movq -1(%1, %0), %%mm0 \n\t" // LT "movq (%1, %0), %%mm1 \n\t" // T @@ -946,7 +946,7 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t #define DIFF_PIXELS_8(m0,m1,mm,p1,p2,stride,temp) {\ uint8_t *p1b=p1, *p2b=p2;\ - asm volatile(\ + __asm__ volatile(\ DIFF_PIXELS_1(m0, mm##0, mm##7, (%1), (%2))\ DIFF_PIXELS_1(m0, mm##1, mm##7, (%1,%3), (%2,%3))\ DIFF_PIXELS_1(m0, mm##2, mm##7, (%1,%3,2), (%2,%3,2))\ @@ -1069,7 +1069,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid \ DIFF_PIXELS_4x8(src1, src2, stride, temp[0]);\ \ - asm volatile(\ + __asm__ volatile(\ HADAMARD48\ \ "movq %%mm7, 96(%1) \n\t"\ @@ -1087,7 +1087,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid \ DIFF_PIXELS_4x8(src1+4, src2+4, stride, temp[4]);\ \ - asm volatile(\ + __asm__ volatile(\ HADAMARD48\ \ "movq %%mm7, 96(%1) \n\t"\ @@ -1152,7 +1152,7 @@ static int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, int strid \ DIFF_PIXELS_8x8(src1, src2, stride, temp[0]);\ \ - asm volatile(\ + __asm__ volatile(\ HADAMARD8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)\ TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%1))\ HADAMARD8(%%xmm0, %%xmm5, %%xmm7, %%xmm3, %%xmm6, %%xmm4, %%xmm2, %%xmm1)\ @@ -1219,7 +1219,7 @@ HADAMARD8_DIFF_SSE2(ssse3) #define DCT_SAD_FUNC(cpu) \ static int sum_abs_dctelem_##cpu(DCTELEM *block){\ int sum;\ - asm volatile(\ + __asm__ volatile(\ DCT_SAD\ :"=r"(sum)\ :"r"(block)\ @@ -1256,7 +1256,7 @@ DCT_SAD_FUNC(ssse3) static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2, int size){ int sum; x86_reg i=size; - asm volatile( + __asm__ volatile( "pxor %%mm4, %%mm4 \n" "1: \n" "sub $8, %0 \n" diff --git a/libavcodec/i386/fdct_mmx.c b/libavcodec/i386/fdct_mmx.c index 9e017a656a..eb79ee7b8b 100644 --- a/libavcodec/i386/fdct_mmx.c +++ b/libavcodec/i386/fdct_mmx.c @@ -371,7 +371,7 @@ FDCT_COL(sse2, xmm, movdqa) static av_always_inline void fdct_row_sse2(const int16_t *in, int16_t *out) { - asm volatile( + __asm__ volatile( #define FDCT_ROW_SSE2_H1(i,t) \ "movq " #i "(%0), %%xmm2 \n\t" \ "movq " #i "+8(%0), %%xmm0 \n\t" \ diff --git a/libavcodec/i386/fft_3dn2.c b/libavcodec/i386/fft_3dn2.c index f474f6efa9..1f30edc99d 100644 --- a/libavcodec/i386/fft_3dn2.c +++ b/libavcodec/i386/fft_3dn2.c @@ -46,7 +46,7 @@ void ff_fft_calc_3dn2(FFTContext *s, FFTComplex *z) int n = 1<<s->nbits; int i; ff_fft_dispatch_interleave_3dn2(z, s->nbits); - asm volatile("femms"); + __asm__ volatile("femms"); if(n <= 8) for(i=0; i<n; i+=2) FFSWAP(FFTSample, z[i].im, z[i+1].re); @@ -69,11 +69,11 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu in1 = input; in2 = input + n2 - 1; #ifdef EMULATE_3DNOWEXT - asm volatile("movd %0, %%mm7" ::"r"(1<<31)); + __asm__ volatile("movd %0, %%mm7" ::"r"(1<<31)); #endif for(k = 0; k < n4; k++) { // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n" "movd %2, %%mm1 \n" "punpckldq %1, %%mm0 \n" @@ -94,7 +94,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu ::"m"(in2[-2*k]), "m"(in1[2*k]), "m"(tcos[k]), "m"(tsin[k]) ); - asm volatile( + __asm__ volatile( "movq %%mm0, %0 \n\t" :"=m"(z[revtab[k]]) ); @@ -117,7 +117,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu /* post rotation */ j = -n2; k = n2-8; - asm volatile( + __asm__ volatile( "1: \n" CMUL(%0, %%mm0, %%mm1) CMUL(%1, %%mm2, %%mm3) @@ -140,7 +140,7 @@ void ff_imdct_half_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu :"r"(z+n8), "r"(tcos+n8), "r"(tsin+n8) :"memory" ); - asm volatile("femms"); + __asm__ volatile("femms"); } void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *input) @@ -153,7 +153,7 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu j = -n; k = n-8; - asm volatile( + __asm__ volatile( "movq %4, %%mm7 \n" "1: \n" PSWAPD((%2,%1), %%mm0) @@ -168,6 +168,6 @@ void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, const FFTSample *inpu :"r"(output+n4), "r"(output+n4*3), "m"(*m1m1) ); - asm volatile("femms"); + __asm__ volatile("femms"); } diff --git a/libavcodec/i386/fft_sse.c b/libavcodec/i386/fft_sse.c index 924677008c..deced3b929 100644 --- a/libavcodec/i386/fft_sse.c +++ b/libavcodec/i386/fft_sse.c @@ -36,7 +36,7 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z) if(n <= 16) { x86_reg i = -8*n; - asm volatile( + __asm__ volatile( "1: \n" "movaps (%0,%1), %%xmm0 \n" "movaps %%xmm0, %%xmm1 \n" @@ -58,7 +58,7 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z) int n = 1 << s->nbits; int i; for(i=0; i<n; i+=2) { - asm volatile( + __asm__ volatile( "movaps %2, %%xmm0 \n" "movlps %%xmm0, %0 \n" "movhps %%xmm0, %1 \n" @@ -84,7 +84,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input /* pre rotation */ for(k=n8-2; k>=0; k-=2) { - asm volatile( + __asm__ volatile( "movaps (%2,%1,2), %%xmm0 \n" // { z[k].re, z[k].im, z[k+1].re, z[k+1].im } "movaps -16(%2,%0,2), %%xmm1 \n" // { z[-k-2].re, z[-k-2].im, z[-k-1].re, z[-k-1].im } "movaps %%xmm0, %%xmm2 \n" @@ -111,7 +111,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input #ifdef ARCH_X86_64 // if we have enough regs, don't let gcc make the luts latency-bound // but if not, latency is faster than spilling - asm("movlps %%xmm0, %0 \n" + __asm__("movlps %%xmm0, %0 \n" "movhps %%xmm0, %1 \n" "movlps %%xmm1, %2 \n" "movhps %%xmm1, %3 \n" @@ -121,10 +121,10 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input "=m"(z[revtab[ k+1]]) ); #else - asm("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]])); - asm("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]])); - asm("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); - asm("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); + __asm__("movlps %%xmm0, %0" :"=m"(z[revtab[-k-2]])); + __asm__("movhps %%xmm0, %0" :"=m"(z[revtab[-k-1]])); + __asm__("movlps %%xmm1, %0" :"=m"(z[revtab[ k ]])); + __asm__("movhps %%xmm1, %0" :"=m"(z[revtab[ k+1]])); #endif } @@ -146,7 +146,7 @@ void ff_imdct_half_sse(MDCTContext *s, FFTSample *output, const FFTSample *input j = -n2; k = n2-16; - asm volatile( + __asm__ volatile( "1: \n" CMUL(%0, %%xmm0, %%xmm1) CMUL(%1, %%xmm4, %%xmm5) @@ -181,7 +181,7 @@ void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, const FFTSample *input j = -n; k = n-16; - asm volatile( + __asm__ volatile( "movaps %4, %%xmm7 \n" "1: \n" "movaps (%2,%1), %%xmm0 \n" diff --git a/libavcodec/i386/flacdsp_mmx.c b/libavcodec/i386/flacdsp_mmx.c index 1f12bc402f..01c0d7ae8a 100644 --- a/libavcodec/i386/flacdsp_mmx.c +++ b/libavcodec/i386/flacdsp_mmx.c @@ -28,7 +28,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data int n2 = len>>1; x86_reg i = -n2*sizeof(int32_t); x86_reg j = n2*sizeof(int32_t); - asm volatile( + __asm__ volatile( "movsd %0, %%xmm7 \n\t" "movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t" "movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t" @@ -38,7 +38,7 @@ static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data ::"m"(c) ); #define WELCH(MOVPD, offset)\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movapd %%xmm7, %%xmm1 \n\t"\ "mulpd %%xmm1, %%xmm1 \n\t"\ @@ -84,7 +84,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, for(j=0; j<lag; j+=2){ x86_reg i = -len*sizeof(double); if(j == lag-2) { - asm volatile( + __asm__ volatile( "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t" @@ -113,7 +113,7 @@ void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, :"r"(data1+len), "r"(data1+len-j) ); } else { - asm volatile( + __asm__ volatile( "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" "1: \n\t" diff --git a/libavcodec/i386/h264_i386.h b/libavcodec/i386/h264_i386.h index 5cd03a519b..ed62dd6c2e 100644 --- a/libavcodec/i386/h264_i386.h +++ b/libavcodec/i386/h264_i386.h @@ -43,7 +43,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, int minusstart= -(int)significant_coeff_ctx_base; int minusindex= 4-(int)index; int coeff_count; - asm volatile( + __asm__ volatile( "movl "RANGE "(%3), %%esi \n\t" "movl "LOW "(%3), %%ebx \n\t" @@ -96,7 +96,7 @@ static int decode_significance_8x8_x86(CABACContext *c, int minusindex= 4-(int)index; int coeff_count; x86_reg last=0; - asm volatile( + __asm__ volatile( "movl "RANGE "(%3), %%esi \n\t" "movl "LOW "(%3), %%ebx \n\t" diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c index f94f7088cf..bb9c82d612 100644 --- a/libavcodec/i386/h264dsp_mmx.c +++ b/libavcodec/i386/h264dsp_mmx.c @@ -57,14 +57,14 @@ DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL; static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) { /* Load dct coeffs */ - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" "movq 8(%0), %%mm1 \n\t" "movq 16(%0), %%mm2 \n\t" "movq 24(%0), %%mm3 \n\t" :: "r"(block) ); - asm volatile( + __asm__ volatile( /* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */ IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 ) @@ -80,7 +80,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) "pxor %%mm7, %%mm7 \n\t" :: "m"(ff_pw_32)); - asm volatile( + __asm__ volatile( STORE_DIFF_4P( %%mm0, %%mm1, %%mm7) "add %1, %0 \n\t" STORE_DIFF_4P( %%mm2, %%mm1, %%mm7) @@ -95,7 +95,7 @@ static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) static inline void h264_idct8_1d(int16_t *block) { - asm volatile( + __asm__ volatile( "movq 112(%0), %%mm7 \n\t" "movq 80(%0), %%mm0 \n\t" "movq 48(%0), %%mm3 \n\t" @@ -166,7 +166,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) h264_idct8_1d(block+4*i); - asm volatile( + __asm__ volatile( "movq %%mm7, %0 \n\t" TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 ) "movq %%mm0, 8(%1) \n\t" @@ -188,7 +188,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) for(i=0; i<2; i++){ h264_idct8_1d(b2+4*i); - asm volatile( + __asm__ volatile( "psraw $6, %%mm7 \n\t" "psraw $6, %%mm6 \n\t" "psraw $6, %%mm5 \n\t" @@ -269,7 +269,7 @@ static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) { - asm volatile( + __asm__ volatile( "movdqa 0x10(%1), %%xmm1 \n" "movdqa 0x20(%1), %%xmm2 \n" "movdqa 0x30(%1), %%xmm3 \n" @@ -304,7 +304,7 @@ static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) { int dc = (block[0] + 32) >> 6; - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "pshufw $0, %%mm0, %%mm0 \n\t" "pxor %%mm1, %%mm1 \n\t" @@ -313,7 +313,7 @@ static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) "packuswb %%mm1, %%mm1 \n\t" ::"r"(dc) ); - asm volatile( + __asm__ volatile( "movd %0, %%mm2 \n\t" "movd %1, %%mm3 \n\t" "movd %2, %%mm4 \n\t" @@ -341,7 +341,7 @@ static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) { int dc = (block[0] + 32) >> 6; int y; - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "pshufw $0, %%mm0, %%mm0 \n\t" "pxor %%mm1, %%mm1 \n\t" @@ -351,7 +351,7 @@ static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) ::"r"(dc) ); for(y=2; y--; dst += 4*stride){ - asm volatile( + __asm__ volatile( "movq %0, %%mm2 \n\t" "movq %1, %%mm3 \n\t" "movq %2, %%mm4 \n\t" @@ -463,7 +463,7 @@ static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alph { DECLARE_ALIGNED_8(uint64_t, tmp0[2]); - asm volatile( + __asm__ volatile( "movq (%1,%3), %%mm0 \n\t" //p1 "movq (%1,%3,2), %%mm1 \n\t" //p0 "movq (%2), %%mm2 \n\t" //q0 @@ -540,7 +540,7 @@ static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, in static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0) { - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" //p1 "movq (%0,%2), %%mm1 \n\t" //p0 "movq (%1), %%mm2 \n\t" //q0 @@ -586,7 +586,7 @@ static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1) { - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" "movq (%0,%2), %%mm1 \n\t" "movq (%1), %%mm2 \n\t" @@ -628,7 +628,7 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { int dir; - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "movq %0, %%mm6 \n\t" "movq %1, %%mm5 \n\t" @@ -636,7 +636,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7) ); if(field) - asm volatile( + __asm__ volatile( "movq %0, %%mm5 \n\t" "movq %1, %%mm4 \n\t" ::"m"(ff_pb_3_1), "m"(ff_pb_7_3) @@ -650,14 +650,14 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL; int b_idx, edge, l; for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) { - asm volatile( + __asm__ volatile( "pand %0, %%mm0 \n\t" ::"m"(mask_dir) ); if(!(mask_mv & edge)) { - asm volatile("pxor %%mm0, %%mm0 \n\t":); + __asm__ volatile("pxor %%mm0, %%mm0 \n\t":); for( l = bidir; l >= 0; l-- ) { - asm volatile( + __asm__ volatile( "movd %0, %%mm1 \n\t" "punpckldq %1, %%mm1 \n\t" "movq %%mm1, %%mm2 \n\t" @@ -688,7 +688,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] ); } } - asm volatile( + __asm__ volatile( "movd %0, %%mm1 \n\t" "por %1, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t" @@ -696,7 +696,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] ::"m"(nnz[b_idx]), "m"(nnz[b_idx+d_idx]) ); - asm volatile( + __asm__ volatile( "pcmpeqw %%mm7, %%mm0 \n\t" "pcmpeqw %%mm7, %%mm0 \n\t" "psrlw $15, %%mm0 \n\t" // nonzero -> 1 @@ -713,7 +713,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] edges = 4; step = 1; } - asm volatile( + __asm__ volatile( "movq (%0), %%mm0 \n\t" "movq 8(%0), %%mm1 \n\t" "movq 16(%0), %%mm2 \n\t" @@ -774,7 +774,7 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=4;\ \ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %5, %%mm4 \n\t"\ "movq %6, %%mm5 \n\t"\ @@ -813,14 +813,14 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uin }\ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=4;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %0, %%mm4 \n\t"\ "movq %1, %%mm5 \n\t"\ :: "m"(ff_pw_5), "m"(ff_pw_16)\ );\ do{\ - asm volatile(\ + __asm__ volatile(\ "movd -1(%0), %%mm1 \n\t"\ "movd (%0), %%mm2 \n\t"\ "movd 1(%0), %%mm3 \n\t"\ @@ -857,7 +857,7 @@ static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, }\ static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ src -= 2*srcStride;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -889,7 +889,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in int w=3;\ src -= 2*srcStride+2;\ while(w--){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -919,7 +919,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in src += 4 - 9*srcStride;\ }\ tmp -= 3*4;\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ "paddw 10(%0), %%mm0 \n\t"\ @@ -948,7 +948,7 @@ static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, in \ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %5, %%mm6 \n\t"\ "1: \n\t"\ @@ -1005,13 +1005,13 @@ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uin \ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movq %0, %%mm6 \n\t"\ :: "m"(ff_pw_5)\ );\ do{\ - asm volatile(\ + __asm__ volatile(\ "movq (%0), %%mm0 \n\t"\ "movq 1(%0), %%mm2 \n\t"\ "movq %%mm0, %%mm1 \n\t"\ @@ -1071,7 +1071,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, src -= 2*srcStride;\ \ while(w--){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -1102,7 +1102,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, : "memory"\ );\ if(h==16){\ - asm volatile(\ + __asm__ volatile(\ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\ QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\ @@ -1125,7 +1125,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_ int w = (size+8)>>2;\ src -= 2*srcStride+2;\ while(w--){\ - asm volatile(\ + __asm__ volatile(\ "pxor %%mm7, %%mm7 \n\t"\ "movd (%0), %%mm0 \n\t"\ "add %2, %0 \n\t"\ @@ -1155,7 +1155,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_ : "memory"\ );\ if(size==16){\ - asm volatile(\ + __asm__ volatile(\ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\ QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\ @@ -1177,7 +1177,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_ int w = size>>4;\ do{\ int h = size;\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movq (%0), %%mm0 \n\t"\ "movq 8(%0), %%mm3 \n\t"\ @@ -1261,7 +1261,7 @@ static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, \ static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ {\ - asm volatile(\ + __asm__ volatile(\ "movq (%1), %%mm0 \n\t"\ "movq 24(%1), %%mm1 \n\t"\ "psraw $5, %%mm0 \n\t"\ @@ -1291,7 +1291,7 @@ static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_ static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ {\ do{\ - asm volatile(\ + __asm__ volatile(\ "movq (%1), %%mm0 \n\t"\ "movq 8(%1), %%mm1 \n\t"\ "movq 48(%1), %%mm2 \n\t"\ @@ -1325,7 +1325,7 @@ static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, u #define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=16;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm15, %%xmm15 \n\t"\ "movdqa %6, %%xmm14 \n\t"\ "movdqa %7, %%xmm13 \n\t"\ @@ -1403,13 +1403,13 @@ static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, #define QPEL_H264_H_XMM(OPNAME, OP, MMX)\ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ "movdqa %0, %%xmm6 \n\t"\ :: "m"(ff_pw_5)\ );\ do{\ - asm volatile(\ + __asm__ volatile(\ "lddqu -5(%0), %%xmm1 \n\t"\ "movdqa %%xmm1, %%xmm0 \n\t"\ "punpckhbw %%xmm7, %%xmm1 \n\t"\ @@ -1450,7 +1450,7 @@ QPEL_H264_H16_XMM(OPNAME, OP, MMX)\ \ static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ int h=8;\ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ "movdqa %5, %%xmm6 \n\t"\ "1: \n\t"\ @@ -1501,7 +1501,7 @@ static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\ src -= 2*srcStride;\ \ - asm volatile(\ + __asm__ volatile(\ "pxor %%xmm7, %%xmm7 \n\t"\ "movq (%0), %%xmm0 \n\t"\ "add %2, %0 \n\t"\ @@ -1532,7 +1532,7 @@ static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, : "memory"\ );\ if(h==16){\ - asm volatile(\ + __asm__ volatile(\ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\ QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\ @@ -1560,7 +1560,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u int w = (size+8)>>3; src -= 2*srcStride+2; while(w--){ - asm volatile( + __asm__ volatile( "pxor %%xmm7, %%xmm7 \n\t" "movq (%0), %%xmm0 \n\t" "add %2, %0 \n\t" @@ -1590,7 +1590,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u : "memory" ); if(size==16){ - asm volatile( + __asm__ volatile( QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48) QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48) QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48) @@ -1613,7 +1613,7 @@ static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, u static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\ int h = size;\ if(size == 16){\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movdqa 32(%0), %%xmm4 \n\t"\ "movdqa 16(%0), %%xmm5 \n\t"\ @@ -1668,7 +1668,7 @@ static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_ : "memory"\ );\ }else{\ - asm volatile(\ + __asm__ volatile(\ "1: \n\t"\ "movdqa 16(%0), %%xmm1 \n\t"\ "movdqa (%0), %%xmm0 \n\t"\ @@ -2022,7 +2022,7 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de int x, y; offset <<= log2_denom; offset += (1 << log2_denom) >> 1; - asm volatile( + __asm__ volatile( "movd %0, %%mm4 \n\t" "movd %1, %%mm5 \n\t" "movd %2, %%mm6 \n\t" @@ -2033,7 +2033,7 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de ); for(y=0; y<h; y+=2){ for(x=0; x<w; x+=4){ - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "movd %1, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" @@ -2060,7 +2060,7 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str { int x, y; offset = ((offset + 1) | 1) << log2_denom; - asm volatile( + __asm__ volatile( "movd %0, %%mm3 \n\t" "movd %1, %%mm4 \n\t" "movd %2, %%mm5 \n\t" @@ -2073,7 +2073,7 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str ); for(y=0; y<h; y++){ for(x=0; x<w; x+=4){ - asm volatile( + __asm__ volatile( "movd %0, %%mm0 \n\t" "movd %1, %%mm1 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" diff --git a/libavcodec/i386/idct_mmx_xvid.c b/libavcodec/i386/idct_mmx_xvid.c index 9f90b5d8fd..d4fdd7a54a 100644 --- a/libavcodec/i386/idct_mmx_xvid.c +++ b/libavcodec/i386/idct_mmx_xvid.c @@ -483,7 +483,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_i_04_xmm[32*4]) = { void ff_idct_xvid_mmx(short *block){ -asm volatile( +__asm__ volatile( //# Process each row DCT_8_INV_ROW_MMX(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) DCT_8_INV_ROW_MMX(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) @@ -506,7 +506,7 @@ asm volatile( void ff_idct_xvid_mmx2(short *block){ -asm volatile( +__asm__ volatile( //# Process each row DCT_8_INV_ROW_XMM(0*16(%0), 0*16(%0), 64*0(%2), 8*0(%1)) DCT_8_INV_ROW_XMM(1*16(%0), 1*16(%0), 64*1(%2), 8*1(%1)) diff --git a/libavcodec/i386/idct_sse2_xvid.c b/libavcodec/i386/idct_sse2_xvid.c index 3c6c3b3105..be4f2115e2 100644 --- a/libavcodec/i386/idct_sse2_xvid.c +++ b/libavcodec/i386/idct_sse2_xvid.c @@ -341,7 +341,7 @@ DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = { inline void ff_idct_xvid_sse2(short *block) { - asm volatile( + __asm__ volatile( "movq "MANGLE(m127)", %%mm0 \n\t" iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0)) iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1)) diff --git a/libavcodec/i386/mathops.h b/libavcodec/i386/mathops.h index 46c2d638f0..2ae24fcaac 100644 --- a/libavcodec/i386/mathops.h +++ b/libavcodec/i386/mathops.h @@ -24,7 +24,7 @@ #ifdef FRAC_BITS # define MULL(ra, rb) \ - ({ int rt, dummy; asm (\ + ({ int rt, dummy; __asm__ (\ "imull %3 \n\t"\ "shrdl %4, %%edx, %%eax \n\t"\ : "=a"(rt), "=d"(dummy)\ @@ -34,12 +34,12 @@ #define MULH(ra, rb) \ ({ int rt, dummy;\ - asm ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\ + __asm__ ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\ rt; }) #define MUL64(ra, rb) \ ({ int64_t rt;\ - asm ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\ + __asm__ ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\ rt; }) #endif /* AVCODEC_I386_MATHOPS_H */ diff --git a/libavcodec/i386/mmx.h b/libavcodec/i386/mmx.h index 197e059a1c..1f0e8a9d3c 100644 --- a/libavcodec/i386/mmx.h +++ b/libavcodec/i386/mmx.h @@ -43,25 +43,25 @@ typedef union { #define mmx_i2r(op,imm,reg) \ - asm volatile (#op " %0, %%" #reg \ + __asm__ volatile (#op " %0, %%" #reg \ : /* nothing */ \ : "i" (imm) ) #define mmx_m2r(op,mem,reg) \ - asm volatile (#op " %0, %%" #reg \ + __asm__ volatile (#op " %0, %%" #reg \ : /* nothing */ \ : "m" (mem)) #define mmx_r2m(op,reg,mem) \ - asm volatile (#op " %%" #reg ", %0" \ + __asm__ volatile (#op " %%" #reg ", %0" \ : "=m" (mem) \ : /* nothing */ ) #define mmx_r2r(op,regs,regd) \ - asm volatile (#op " %" #regs ", %" #regd) + __asm__ volatile (#op " %" #regs ", %" #regd) -#define emms() asm volatile ("emms") +#define emms() __asm__ volatile ("emms") #define movd_m2r(var,reg) mmx_m2r (movd, var, reg) #define movd_r2m(reg,var) mmx_r2m (movd, reg, var) @@ -200,16 +200,16 @@ typedef union { #define mmx_m2ri(op,mem,reg,imm) \ - asm volatile (#op " %1, %0, %%" #reg \ + __asm__ volatile (#op " %1, %0, %%" #reg \ : /* nothing */ \ : "m" (mem), "i" (imm)) #define mmx_r2ri(op,regs,regd,imm) \ - asm volatile (#op " %0, %%" #regs ", %%" #regd \ + __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \ : /* nothing */ \ : "i" (imm) ) #define mmx_fetch(mem,hint) \ - asm volatile ("prefetch" #hint " %0" \ + __asm__ volatile ("prefetch" #hint " %0" \ : /* nothing */ \ : "m" (mem)) @@ -240,7 +240,7 @@ typedef union { #define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd) #define pmovmskb(mmreg,reg) \ - asm volatile ("movmskps %" #mmreg ", %" #reg) + __asm__ volatile ("movmskps %" #mmreg ", %" #reg) #define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg) #define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd) @@ -256,7 +256,7 @@ typedef union { #define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm) #define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm) -#define sfence() asm volatile ("sfence\n\t") +#define sfence() __asm__ volatile ("sfence\n\t") /* SSE2 */ #define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm) diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/i386/motion_est_mmx.c index 0e111f9f97..16291612a5 100644 --- a/libavcodec/i386/motion_est_mmx.c +++ b/libavcodec/i386/motion_est_mmx.c @@ -36,7 +36,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) { x86_reg len= -(stride*h); - asm volatile( + __asm__ volatile( ASMALIGN(4) "1: \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t" @@ -71,7 +71,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - asm volatile( + __asm__ volatile( ASMALIGN(4) "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -92,7 +92,7 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) { int ret; - asm volatile( + __asm__ volatile( "pxor %%xmm6, %%xmm6 \n\t" ASMALIGN(4) "1: \n\t" @@ -109,7 +109,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) : "+r" (h), "+r" (blk1), "+r" (blk2) : "r" ((x86_reg)stride) ); - asm volatile( + __asm__ volatile( "movhlps %%xmm6, %%xmm0 \n\t" "paddw %%xmm0, %%xmm6 \n\t" "movd %%xmm6, %0 \n\t" @@ -120,7 +120,7 @@ static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - asm volatile( + __asm__ volatile( ASMALIGN(4) "1: \n\t" "movq (%1), %%mm0 \n\t" @@ -142,7 +142,7 @@ static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - asm volatile( + __asm__ volatile( "movq (%1), %%mm0 \n\t" "add %3, %1 \n\t" ASMALIGN(4) @@ -167,7 +167,7 @@ static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) { - asm volatile( + __asm__ volatile( "movq "MANGLE(bone)", %%mm5 \n\t" "movq (%1), %%mm0 \n\t" "pavgb 1(%1), %%mm0 \n\t" @@ -198,7 +198,7 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) { x86_reg len= -(stride*h); - asm volatile( + __asm__ volatile( ASMALIGN(4) "1: \n\t" "movq (%1, %%"REG_a"), %%mm0 \n\t" @@ -236,7 +236,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) { x86_reg len= -(stride*h); - asm volatile( + __asm__ volatile( "movq (%1, %%"REG_a"), %%mm0 \n\t" "movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq %%mm0, %%mm1 \n\t" @@ -289,7 +289,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) static inline int sum_mmx(void) { int ret; - asm volatile( + __asm__ volatile( "movq %%mm6, %%mm0 \n\t" "psrlq $32, %%mm6 \n\t" "paddw %%mm0, %%mm6 \n\t" @@ -305,7 +305,7 @@ static inline int sum_mmx(void) static inline int sum_mmx2(void) { int ret; - asm volatile( + __asm__ volatile( "movd %%mm6, %0 \n\t" : "=r" (ret) ); @@ -326,7 +326,7 @@ static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ assert(h==8);\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t":);\ \ sad8_1_ ## suf(blk1, blk2, stride, 8);\ @@ -336,7 +336,7 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ assert(h==8);\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ "movq %0, %%mm5 \n\t"\ :: "m"(round_tab[1]) \ @@ -350,7 +350,7 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ assert(h==8);\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ "movq %0, %%mm5 \n\t"\ :: "m"(round_tab[1]) \ @@ -364,7 +364,7 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ assert(h==8);\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ ::);\ \ @@ -375,7 +375,7 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i \ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t":);\ \ sad8_1_ ## suf(blk1 , blk2 , stride, h);\ @@ -385,7 +385,7 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int }\ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ "movq %0, %%mm5 \n\t"\ :: "m"(round_tab[1]) \ @@ -398,7 +398,7 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i }\ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ "movq %0, %%mm5 \n\t"\ :: "m"(round_tab[1]) \ @@ -411,7 +411,7 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i }\ static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ {\ - asm volatile("pxor %%mm7, %%mm7 \n\t"\ + __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\ ::);\ \ diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c index 4ff730243b..90c61be0db 100644 --- a/libavcodec/i386/mpegvideo_mmx.c +++ b/libavcodec/i386/mpegvideo_mmx.c @@ -55,7 +55,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, else nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; //printf("%d %d ", qmul, qadd); -asm volatile( +__asm__ volatile( "movd %1, %%mm6 \n\t" //qmul "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" @@ -118,7 +118,7 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; //printf("%d %d ", qmul, qadd); -asm volatile( +__asm__ volatile( "movd %1, %%mm6 \n\t" //qmul "packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t" @@ -214,7 +214,7 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, block0 = block[0] * s->c_dc_scale; /* XXX: only mpeg1 */ quant_matrix = s->intra_matrix; -asm volatile( +__asm__ volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" "movd %2, %%mm6 \n\t" @@ -277,7 +277,7 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; quant_matrix = s->inter_matrix; -asm volatile( +__asm__ volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" "movd %2, %%mm6 \n\t" @@ -349,7 +349,7 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, else block0 = block[0] * s->c_dc_scale; quant_matrix = s->intra_matrix; -asm volatile( +__asm__ volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" "movd %2, %%mm6 \n\t" @@ -410,7 +410,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; quant_matrix = s->inter_matrix; -asm volatile( +__asm__ volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlq $48, %%mm7 \n\t" "movd %2, %%mm6 \n\t" @@ -482,7 +482,7 @@ static void denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){ s->dct_count[intra]++; - asm volatile( + __asm__ volatile( "pxor %%mm7, %%mm7 \n\t" "1: \n\t" "pxor %%mm0, %%mm0 \n\t" @@ -536,7 +536,7 @@ static void denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){ s->dct_count[intra]++; - asm volatile( + __asm__ volatile( "pxor %%xmm7, %%xmm7 \n\t" "1: \n\t" "pxor %%xmm0, %%xmm0 \n\t" diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c index c6b989420a..a1aae5fdd4 100644 --- a/libavcodec/i386/mpegvideo_mmx_template.c +++ b/libavcodec/i386/mpegvideo_mmx_template.c @@ -117,13 +117,13 @@ static int RENAME(dct_quantize)(MpegEncContext *s, /* note: block[0] is assumed to be positive */ if (!s->h263_aic) { #if 1 - asm volatile ( + __asm__ volatile ( "mul %%ecx \n\t" : "=d" (level), "=a"(dummy) : "a" ((block[0]>>2) + q), "c" (ff_inverse[q<<1]) ); #else - asm volatile ( + __asm__ volatile ( "xorl %%edx, %%edx \n\t" "divw %%cx \n\t" "movzwl %%ax, %%eax \n\t" @@ -149,7 +149,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ - asm volatile( + __asm__ volatile( "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 @@ -182,7 +182,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "r" (inv_zigzag_direct16+64), "r" (temp_block+64) ); }else{ // FMT_H263 - asm volatile( + __asm__ volatile( "movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 SPREADW(MM"3") "pxor "MM"7, "MM"7 \n\t" // 0 @@ -214,7 +214,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s, "r" (inv_zigzag_direct16+64), "r" (temp_block+64) ); } - asm volatile( + __asm__ volatile( "movd %1, "MM"1 \n\t" // max_qcoeff SPREADW(MM"1") "psubusw "MM"1, "MM"4 \n\t" diff --git a/libavcodec/i386/simple_idct_mmx.c b/libavcodec/i386/simple_idct_mmx.c index 04b28f50c1..6306fcbd44 100644 --- a/libavcodec/i386/simple_idct_mmx.c +++ b/libavcodec/i386/simple_idct_mmx.c @@ -212,7 +212,7 @@ static inline void idct(int16_t *block) DECLARE_ALIGNED(8, int64_t, align_tmp[16]); int16_t * const temp= (int16_t*)align_tmp; - asm volatile( + __asm__ volatile( #if 0 //Alternative, simpler variant #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \ diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/i386/snowdsp_mmx.c index 2fb76cf0d4..93119787fd 100644 --- a/libavcodec/i386/snowdsp_mmx.c +++ b/libavcodec/i386/snowdsp_mmx.c @@ -38,7 +38,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ // calculate b[0] correctly afterwards. i = 0; - asm volatile( + __asm__ volatile( "pcmpeqd %%xmm7, %%xmm7 \n\t" "pcmpeqd %%xmm3, %%xmm3 \n\t" "psllw $1, %%xmm3 \n\t" @@ -46,7 +46,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ "psllw $13, %%xmm3 \n\t" ::); for(; i<w_l-15; i+=16){ - asm volatile( + __asm__ volatile( "movdqu (%1), %%xmm1 \n\t" "movdqu 16(%1), %%xmm5 \n\t" "movdqu 2(%1), %%xmm2 \n\t" @@ -77,7 +77,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ dst[i] = dst[i] - (b[i] + b[i + 1]); } for(; i<w_r-15; i+=16){ - asm volatile( + __asm__ volatile( "movdqu (%1), %%xmm1 \n\t" "movdqu 16(%1), %%xmm5 \n\t" "movdqu 2(%1), %%xmm2 \n\t" @@ -102,14 +102,14 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ IDWTELEM b_0 = b[0]; i = 0; - asm volatile( + __asm__ volatile( "psllw $15, %%xmm7 \n\t" "pcmpeqw %%xmm6, %%xmm6 \n\t" "psrlw $13, %%xmm6 \n\t" "paddw %%xmm7, %%xmm6 \n\t" ::); for(; i<w_l-15; i+=16){ - asm volatile( + __asm__ volatile( "movdqu (%1), %%xmm0 \n\t" "movdqu 16(%1), %%xmm4 \n\t" "movdqu 2(%1), %%xmm1 \n\t" @@ -150,7 +150,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS); } for(; i<w_r-7; i+=8){ - asm volatile( + __asm__ volatile( "movdqu 2(%1), %%xmm2 \n\t" "movdqu 18(%1), %%xmm6 \n\t" "paddw (%1), %%xmm2 \n\t" @@ -180,7 +180,7 @@ void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){ b[i] = b[i>>1]; } for (i-=62; i>=0; i-=64){ - asm volatile( + __asm__ volatile( "movdqa (%1), %%xmm0 \n\t" "movdqa 16(%1), %%xmm2 \n\t" "movdqa 32(%1), %%xmm4 \n\t" @@ -224,7 +224,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ i = 1; b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS); - asm volatile( + __asm__ volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm3, %%mm3 \n\t" "psllw $1, %%mm3 \n\t" @@ -232,7 +232,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ "psllw $13, %%mm3 \n\t" ::); for(; i<w_l-7; i+=8){ - asm volatile( + __asm__ volatile( "movq (%1), %%mm2 \n\t" "movq 8(%1), %%mm6 \n\t" "paddw 2(%1), %%mm2 \n\t" @@ -257,7 +257,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ i = 0; for(; i<w_r-7; i+=8){ - asm volatile( + __asm__ volatile( "movq (%1), %%mm2 \n\t" "movq 8(%1), %%mm6 \n\t" "paddw 2(%1), %%mm2 \n\t" @@ -280,14 +280,14 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ i = 1; b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS); - asm volatile( + __asm__ volatile( "psllw $15, %%mm7 \n\t" "pcmpeqw %%mm6, %%mm6 \n\t" "psrlw $13, %%mm6 \n\t" "paddw %%mm7, %%mm6 \n\t" ::); for(; i<w_l-7; i+=8){ - asm volatile( + __asm__ volatile( "movq (%1), %%mm0 \n\t" "movq 8(%1), %%mm4 \n\t" "movq 2(%1), %%mm1 \n\t" @@ -324,7 +324,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ i = 0; for(; i<w_r-7; i+=8){ - asm volatile( + __asm__ volatile( "movq 2(%1), %%mm2 \n\t" "movq 10(%1), %%mm6 \n\t" "paddw (%1), %%mm2 \n\t" @@ -354,7 +354,7 @@ void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){ b[i] = b[i>>1]; } for (i-=30; i>=0; i-=32){ - asm volatile( + __asm__ volatile( "movq (%1), %%mm0 \n\t" "movq 8(%1), %%mm2 \n\t" "movq 16(%1), %%mm4 \n\t" @@ -448,7 +448,7 @@ void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, } i+=i; - asm volatile ( + __asm__ volatile ( "jmp 2f \n\t" "1: \n\t" snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6") @@ -544,7 +544,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS; } i+=i; - asm volatile( + __asm__ volatile( "jmp 2f \n\t" "1: \n\t" @@ -606,7 +606,7 @@ void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, I #define snow_inner_add_yblock_sse2_header \ IDWTELEM * * dst_array = sb->line + src_y;\ x86_reg tmp;\ - asm volatile(\ + __asm__ volatile(\ "mov %7, %%"REG_c" \n\t"\ "mov %6, %2 \n\t"\ "mov %4, %%"REG_S" \n\t"\ @@ -759,7 +759,7 @@ snow_inner_add_yblock_sse2_end_16 #define snow_inner_add_yblock_mmx_header \ IDWTELEM * * dst_array = sb->line + src_y;\ x86_reg tmp;\ - asm volatile(\ + __asm__ volatile(\ "mov %7, %%"REG_c" \n\t"\ "mov %6, %2 \n\t"\ "mov %4, %%"REG_S" \n\t"\ diff --git a/libavcodec/i386/vc1dsp_mmx.c b/libavcodec/i386/vc1dsp_mmx.c index 2bbe3fa465..b5db0ed9e4 100644 --- a/libavcodec/i386/vc1dsp_mmx.c +++ b/libavcodec/i386/vc1dsp_mmx.c @@ -74,7 +74,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, const uint8_t *src, x86_reg stride, int rnd, int64_t shift) { - asm volatile( + __asm__ volatile( "mov $3, %%"REG_c" \n\t" LOAD_ROUNDER_MMX("%5") "movq "MANGLE(ff_pw_9)", %%mm6 \n\t" @@ -114,7 +114,7 @@ static void vc1_put_hor_16b_shift2_mmx(uint8_t *dst, x86_reg stride, src -= 1; rnd -= (-1+9+9-1)*1024; /* Add -1024 bias */ - asm volatile( + __asm__ volatile( LOAD_ROUNDER_MMX("%4") "movq "MANGLE(ff_pw_128)", %%mm6\n\t" "movq "MANGLE(ff_pw_9)", %%mm5 \n\t" @@ -155,7 +155,7 @@ static void vc1_put_shift2_mmx(uint8_t *dst, const uint8_t *src, x86_reg stride, int rnd, x86_reg offset) { rnd = 8-rnd; - asm volatile( + __asm__ volatile( "mov $8, %%"REG_c" \n\t" LOAD_ROUNDER_MMX("%5") "movq "MANGLE(ff_pw_9)", %%mm6\n\t" @@ -264,7 +264,7 @@ vc1_put_ver_16b_ ## NAME ## _mmx(int16_t *dst, const uint8_t *src, \ { \ int h = 8; \ src -= src_stride; \ - asm volatile( \ + __asm__ volatile( \ LOAD_ROUNDER_MMX("%5") \ "movq "MANGLE(ff_pw_53)", %%mm5\n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6\n\t" \ @@ -320,7 +320,7 @@ vc1_put_hor_16b_ ## NAME ## _mmx(uint8_t *dst, x86_reg stride, \ int h = 8; \ src -= 1; \ rnd -= (-4+58+13-3)*256; /* Add -256 bias */ \ - asm volatile( \ + __asm__ volatile( \ LOAD_ROUNDER_MMX("%4") \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ @@ -358,7 +358,7 @@ vc1_put_## NAME ## _mmx(uint8_t *dst, const uint8_t *src, \ int h = 8; \ src -= offset; \ rnd = 32-rnd; \ - asm volatile ( \ + __asm__ volatile ( \ LOAD_ROUNDER_MMX("%6") \ "movq "MANGLE(ff_pw_53)", %%mm5 \n\t" \ "movq "MANGLE(ff_pw_18)", %%mm6 \n\t" \ @@ -412,7 +412,7 @@ static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, static const vc1_mspel_mc_filter_8bits vc1_put_shift_8bits[] = { NULL, vc1_put_shift1_mmx, vc1_put_shift2_mmx, vc1_put_shift3_mmx }; - asm volatile( + __asm__ volatile( "pxor %%mm0, %%mm0 \n\t" ::: "memory" ); diff --git a/libavcodec/i386/vp3dsp_mmx.c b/libavcodec/i386/vp3dsp_mmx.c index 6304e91594..e7571c0b9e 100644 --- a/libavcodec/i386/vp3dsp_mmx.c +++ b/libavcodec/i386/vp3dsp_mmx.c @@ -250,7 +250,7 @@ void ff_vp3_idct_mmx(int16_t *output_data) #define I(x) AV_STRINGIFY(16* x )"(%0)" #define J(x) AV_STRINGIFY(16*(x-4) + 8)"(%0)" - asm volatile ( + __asm__ volatile ( RowIDCT() Transpose() diff --git a/libavcodec/i386/vp3dsp_sse2.c b/libavcodec/i386/vp3dsp_sse2.c index f162ed5999..82670c74ef 100644 --- a/libavcodec/i386/vp3dsp_sse2.c +++ b/libavcodec/i386/vp3dsp_sse2.c @@ -161,7 +161,7 @@ void ff_vp3_idct_sse2(int16_t *input_data) #define O(x) I(x) #define C(x) AV_STRINGIFY(16*(x-1))"(%1)" - asm volatile ( + __asm__ volatile ( VP3_1D_IDCT_SSE2(NOP, NOP) TRANSPOSE8(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7, (%0)) |