diff options
Diffstat (limited to 'libavcodec/liba52/resample_mmx.c')
-rw-r--r-- | libavcodec/liba52/resample_mmx.c | 38 |
1 files changed, 19 insertions, 19 deletions
diff --git a/libavcodec/liba52/resample_mmx.c b/libavcodec/liba52/resample_mmx.c index 315322e3ca..a4e313c606 100644 --- a/libavcodec/liba52/resample_mmx.c +++ b/libavcodec/liba52/resample_mmx.c @@ -1,10 +1,10 @@ // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) -/* optimization TODO / NOTES - movntq is slightly faster (0.5% with the current test.c benchmark) +/* optimization TODO / NOTES + movntq is slightly faster (0.5% with the current test.c benchmark) (but thats just test.c so that needs to be testd in reallity) - and it would mean (C / MMX2 / MMX / 3DNOW) versions + and it would mean (C / MMX2 / MMX / 3DNOW) versions */ static uint64_t __attribute__((aligned(8))) attribute_used magicF2W= 0x43c0000043c00000LL; @@ -128,7 +128,7 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ "movq %%mm0, (%0, %%edi) \n\t" "movq %%mm1, 8(%0, %%edi) \n\t" "movq %%mm3, 16(%0, %%edi) \n\t" - + "movd 1032(%1, %%esi), %%mm1 \n\t" "punpckldq 12(%1, %%esi), %%mm1\n\t" "movd 2060(%1, %%esi), %%mm2 \n\t" @@ -142,7 +142,7 @@ static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ "packssdw %%mm3, %%mm2 \n\t" "movq %%mm0, 24(%0, %%edi) \n\t" "movq %%mm2, 32(%0, %%edi) \n\t" - + "addl $16, %%esi \n\t" " jnz 1b \n\t" "emms \n\t" @@ -235,7 +235,7 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ "movq %%mm0, (%0, %%edi) \n\t" "movq %%mm2, 8(%0, %%edi) \n\t" "movq %%mm4, 16(%0, %%edi) \n\t" - + "movd 3080(%1, %%esi), %%mm0 \n\t" "punpckldq 4104(%1, %%esi), %%mm0\n\t" "movd 1032(%1, %%esi), %%mm1 \n\t" @@ -252,7 +252,7 @@ static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ "packssdw %%mm3, %%mm2 \n\t" "movq %%mm0, 24(%0, %%edi) \n\t" "movq %%mm2, 32(%0, %%edi) \n\t" - + "addl $16, %%esi \n\t" " jnz 1b \n\t" "emms \n\t" @@ -311,12 +311,12 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ "1: \n\t" "movq 1024(%1, %%esi), %%mm0 \n\t" "movq 2048(%1, %%esi), %%mm1 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm5 \n\t" "leal (%%esi, %%esi, 2), %%edi \n\t" - + "pxor %%mm4, %%mm4 \n\t" "packssdw %%mm5, %%mm0 \n\t" // FfAa "packssdw %%mm4, %%mm1 \n\t" // 00Bb @@ -326,7 +326,7 @@ static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ "punpckldq %%mm4, %%mm3 \n\t" // f0XX "punpckldq %%mm6, %%mm0 \n\t" // 00ba "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 - + "movq %%mm0, (%0, %%edi) \n\t" // 00ba "punpckhdq %%mm4, %%mm0 \n\t" // F000 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 @@ -350,13 +350,13 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ "movq 1024(%1, %%esi), %%mm0 \n\t" "movq 3072(%1, %%esi), %%mm1 \n\t" "movq 2048(%1, %%esi), %%mm4 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm4 \n\t" "psubd %%mm7, %%mm5 \n\t" "leal (%%esi, %%esi, 2), %%edi \n\t" - + "packssdw %%mm4, %%mm0 \n\t" // EeAa "packssdw %%mm5, %%mm1 \n\t" // FfBb "movq %%mm0, %%mm2 \n\t" // EeAa @@ -365,7 +365,7 @@ static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ "movq %%mm0, %%mm1 \n\t" // BAba "punpckldq %%mm6, %%mm0 \n\t" // 00ba "punpckhdq %%mm1, %%mm1 \n\t" // BABA - + "movq %%mm0, (%0, %%edi) \n\t" "punpckhdq %%mm2, %%mm0 \n\t" // FE00 "punpckldq %%mm1, %%mm2 \n\t" // BAfe @@ -391,14 +391,14 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ "movq 2048(%1, %%esi), %%mm1 \n\t" "movq 3072(%1, %%esi), %%mm2 \n\t" "movq 4096(%1, %%esi), %%mm3 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" "psubd %%mm7, %%mm3 \n\t" "psubd %%mm7, %%mm5 \n\t" "leal (%%esi, %%esi, 2), %%edi \n\t" - + "packssdw %%mm2, %%mm0 \n\t" // CcAa "packssdw %%mm3, %%mm1 \n\t" // DdBb "packssdw %%mm5, %%mm5 \n\t" // FfFf @@ -413,7 +413,7 @@ static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ "punpckhdq %%mm1, %%mm1 \n\t" // BABA "punpckldq %%mm1, %%mm4 \n\t" // BAf0 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC - + "movq %%mm0, (%0, %%edi) \n\t" "movq %%mm4, 8(%0, %%edi) \n\t" "movq %%mm2, 16(%0, %%edi) \n\t" @@ -438,7 +438,7 @@ static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ "movq 4096(%1, %%esi), %%mm2 \n\t" "movq 5120(%1, %%esi), %%mm3 \n\t" "movq 2048(%1, %%esi), %%mm4 \n\t" - "movq (%1, %%esi), %%mm5 \n\t" + "movq (%1, %%esi), %%mm5 \n\t" "psubd %%mm7, %%mm0 \n\t" "psubd %%mm7, %%mm1 \n\t" "psubd %%mm7, %%mm2 \n\t" @@ -446,7 +446,7 @@ static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ "psubd %%mm7, %%mm4 \n\t" "psubd %%mm7, %%mm5 \n\t" "leal (%%esi, %%esi, 2), %%edi \n\t" - + "packssdw %%mm2, %%mm0 \n\t" // CcAa "packssdw %%mm3, %%mm1 \n\t" // DdBb "packssdw %%mm4, %%mm4 \n\t" // EeEe @@ -461,7 +461,7 @@ static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ "punpckhdq %%mm1, %%mm1 \n\t" // BABA "punpckldq %%mm1, %%mm4 \n\t" // BAfe "punpckhdq %%mm3, %%mm2 \n\t" // FEDC - + "movq %%mm0, (%0, %%edi) \n\t" "movq %%mm4, 8(%0, %%edi) \n\t" "movq %%mm2, 16(%0, %%edi) \n\t" |