40 files changed, 807 insertions, 525 deletions
diff --git a/common.mak b/common.mak
index 4dee65d86e..68909e63dd 100644
--- a/common.mak
+++ b/common.mak
@@ -11,7 +11,7 @@ ifndef V
 Q      = @
 ECHO   = printf "$(1)\t%s\n" $(2)
 BRIEF  = CC CXX AS YASM AR LD HOSTCC STRIP CP
-SILENT = DEPCC DEPAS DEPHOSTCC YASMDEP RM RANLIB
+SILENT = DEPCC DEPAS DEPHOSTCC DEPYASM RM RANLIB
 MSG    = $@
 M      = @$(call ECHO,$(TAG),$@);
 $(foreach VAR,$(BRIEF), \
@@ -35,7 +35,7 @@ LDFLAGS    := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS)
 
 define COMPILE
        $(call $(1)DEP,$(1))
-       $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $<
+       $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) $($(1)_C) $($(1)_O) $<
 endef
 
 COMPILE_C = $(call COMPILE,CC)
@@ -55,7 +55,7 @@ COMPILE_S = $(call COMPILE,AS)
 	$(COMPILE_S)
 
 %.ho: %.h
-	$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ -x c $<
+	$(CC) $(CCFLAGS) -c $(CC_O) -x c $<
 
 %.ver: %.v
 	$(Q)sed 's/$$MAJOR/$($(basename $(@F))_VERSION_MAJOR)/' $^ > $@
diff --git a/configure b/configure
index 0b27b6b7b7..1445b82a20 100755
--- a/configure
+++ b/configure
@@ -673,11 +673,15 @@ cc_o(){
     eval printf '%s\\n' $CC_O
 }
 
+cc_e(){
+    eval printf '%s\\n' $CC_E
+}
+
 check_cc(){
     log check_cc "$@"
     cat > $TMPC
     log_file $TMPC
-    check_cmd $cc $CPPFLAGS $CFLAGS "$@" -c $(cc_o $TMPO) $TMPC
+    check_cmd $cc $CPPFLAGS $CFLAGS "$@" $CC_C $(cc_o $TMPO) $TMPC
 }
 
 check_cxx(){
@@ -691,14 +695,14 @@ check_cpp(){
     log check_cpp "$@"
     cat > $TMPC
     log_file $TMPC
-    check_cmd $cc $CPPFLAGS $CFLAGS "$@" -E -o $TMPO $TMPC
+    check_cmd $cc $CPPFLAGS $CFLAGS "$@" $(cc_e $TMPO) $TMPC
 }
 
 check_as(){
     log check_as "$@"
     cat > $TMPC
     log_file $TMPC
-    check_cmd $as $CPPFLAGS $ASFLAGS "$@" -c -o $TMPO $TMPC
+    check_cmd $as $CPPFLAGS $ASFLAGS "$@" $AS_C -o $TMPO $TMPC
 }
 
 check_asm(){
@@ -1043,7 +1047,6 @@ PROGRAM_LIST="
 CONFIG_LIST="
     $COMPONENT_LIST
     $PROGRAM_LIST
-    ac3dsp
     avcodec
     avdevice
     avfilter
@@ -1061,7 +1064,6 @@ CONFIG_LIST="
     fft
     fontconfig
     frei0r
-    gcrypt
     gnutls
     gpl
     gray
@@ -1107,8 +1109,6 @@ CONFIG_LIST="
     mdct
     memalign_hack
     memory_poisoning
-    mpegaudiodsp
-    nettle
     network
     nonfree
     openal
@@ -1116,11 +1116,9 @@ CONFIG_LIST="
     pic
     postproc
     rdft
-    rtpdec
     runtime_cpudetect
     safe_bitstream_reader
     shared
-    sinewin
     small
     sram
     static
@@ -1341,7 +1339,9 @@ HAVE_LIST="
 # options emitted with CONFIG_ prefix but not available on command line
 CONFIG_EXTRA="
     aandcttables
+    ac3dsp
     avutil
+    gcrypt
     golomb
     gplv3
     h264chroma
@@ -1351,6 +1351,10 @@ CONFIG_EXTRA="
     huffman
     lgplv3
     lpc
+    mpegaudiodsp
+    nettle
+    rtpdec
+    sinewin
     vp3dsp
 "
 
@@ -1738,7 +1742,7 @@ v4l_indev_deps="linux_videodev_h"
 v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
 vfwcap_indev_deps="capCreateCaptureWindow vfwcap_defines"
 vfwcap_indev_extralibs="-lavicap32"
-x11_grab_device_indev_deps="x11grab"
+x11grab_indev_deps="x11grab"
 
 # protocols
 bluray_protocol_deps="libbluray"
@@ -1968,10 +1972,14 @@ asflags_filter=echo
 cflags_filter=echo
 ldflags_filter=echo
 
+AS_C='-c'
 AS_O='-o $@'
+CC_C='-c'
+CC_E='-E -o $@'
 CC_O='-o $@'
 CXX_O='-o $@'
 LD_O='-o $@'
+HOSTCC_C='-c'
 HOSTCC_O='-o $@'
 
 host_cflags='-D_ISOC99_SOURCE -D_XOPEN_SOURCE=600 -O3 -g'
@@ -2331,7 +2339,8 @@ probe_cc(){
     pfx=$1
     _cc=$2
 
-    unset _type _ident _cc_o _flags _cflags _ldflags _depflags _DEPCMD _DEPFLAGS
+    unset _type _ident _cc_c _cc_e _cc_o _flags _cflags _ldflags
+    unset _depflags _DEPCMD _DEPFLAGS
     _flags_filter=echo
 
     if $_cc -v 2>&1 | grep -q '^gcc.*LLVM'; then
@@ -2394,6 +2403,7 @@ probe_cc(){
         _ident=$($_cc -version | head -n1 | tr -s ' ')
         _flags='--gcc --abi=eabi -me'
         _cflags='-D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__='
+        _cc_e='-ppl -fe=$@'
         _cc_o='-fe=$@'
         as_default="${cross_prefix}gcc"
         ld_default="${cross_prefix}gcc"
@@ -2445,6 +2455,8 @@ probe_cc(){
 }
 
 set_ccvars(){
+    eval ${1}_C=\${_cc_c-\${${1}_C}}
+    eval ${1}_E=\${_cc_e-\${${1}_E}}
     eval ${1}_O=\${_cc_o-\${${1}_O}}
 
     if [ -n "$_depflags" ]; then
@@ -3874,7 +3886,7 @@ DEPCCFLAGS=$DEPCCFLAGS \$(CPPFLAGS)
 DEPAS=$as
 DEPASFLAGS=$DEPASFLAGS \$(CPPFLAGS)
 YASM=$yasmexe
-YASMDEP=$yasmexe
+DEPYASM=$yasmexe
 AR=$ar
 RANLIB=$ranlib
 CP=cp -p
@@ -3884,7 +3896,9 @@ CPPFLAGS=$CPPFLAGS
 CFLAGS=$CFLAGS
 CXXFLAGS=$CXXFLAGS
 ASFLAGS=$ASFLAGS
+AS_C=$AS_C
 AS_O=$AS_O
+CC_C=$CC_C
 CC_O=$CC_O
 CXX_O=$CXX_O
 LD_O=$LD_O
@@ -3919,6 +3933,7 @@ DEPHOSTCCFLAGS=$DEPHOSTCCFLAGS \$(HOSTCCFLAGS)
 HOSTCCDEP=$HOSTCCDEP
 HOSTCCDEP_FLAGS=$HOSTCCDEP_FLAGS
 HOSTCC_DEPFLAGS=$HOSTCC_DEPFLAGS
+HOSTCC_C=$HOSTCC_C
 HOSTCC_O=$HOSTCC_O
 TARGET_EXEC=$target_exec
 TARGET_PATH=$target_path
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index d448a8a4c8..b6ffac8da4 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -1050,9 +1050,10 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
         if ((edges&8) && s->mb_y == (s->mb_height - 1))        \
             mquant = v->altpq;                                 \
         if (!mquant || mquant > 31) {                          \
-            av_log(v->s.avctx, AV_LOG_ERROR, "invalid mquant %d\n", mquant);   \
-            mquant = 1;                                \
-        }                                              \
+            av_log(v->s.avctx, AV_LOG_ERROR,                   \
+                   "Overriding invalid mquant %d\n", mquant);  \
+            mquant = 1;                                        \
+        }                                                      \
     }
 
 /**
@@ -4944,15 +4945,17 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
                 int      iline  = s->current_picture.f.linesize[plane];
                 int      ycoord = yoff[sprite] + yadv[sprite] * row;
                 int      yline  = ycoord >> 16;
+                int      next_line;
                 ysub[sprite] = ycoord & 0xFFFF;
                 if (sprite) {
                     iplane = s->last_picture.f.data[plane];
                     iline  = s->last_picture.f.linesize[plane];
                 }
+                next_line = FFMIN(yline + 1, (v->sprite_height >> !!plane) - 1) * iline;
                 if (!(xoff[sprite] & 0xFFFF) && xadv[sprite] == 1 << 16) {
                         src_h[sprite][0] = iplane + (xoff[sprite] >> 16) +  yline      * iline;
                     if (ysub[sprite])
-                        src_h[sprite][1] = iplane + (xoff[sprite] >> 16) + FFMIN(yline + 1, (v->sprite_height>>!!plane)-1) * iline;
+                        src_h[sprite][1] = iplane + (xoff[sprite] >> 16) + next_line;
                 } else {
                     if (sr_cache[sprite][0] != yline) {
                         if (sr_cache[sprite][1] == yline) {
@@ -4964,7 +4967,9 @@ static void vc1_draw_sprites(VC1Context *v, SpriteData* sd)
                         }
                     }
                     if (ysub[sprite] && sr_cache[sprite][1] != yline + 1) {
-                        v->vc1dsp.sprite_h(v->sr_rows[sprite][1], iplane + FFMIN(yline + 1, (v->sprite_height>>!!plane)-1) * iline, xoff[sprite], xadv[sprite], width);
+                        v->vc1dsp.sprite_h(v->sr_rows[sprite][1],
+                                           iplane + next_line, xoff[sprite],
+                                           xadv[sprite], width);
                         sr_cache[sprite][1] = yline + 1;
                     }
                     src_h[sprite][0] = v->sr_rows[sprite][0];
@@ -5581,8 +5586,10 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
         mb_height = s->mb_height >> v->field_mode;
         for (i = 0; i <= n_slices; i++) {
             if (i > 0 &&  slices[i - 1].mby_start >= mb_height) {
-                if(v->field_mode <= 0) {
-                    av_log(v->s.avctx, AV_LOG_ERROR, "invalid end_mb_y %d\n", slices[i - 1].mby_start);
+                if (v->field_mode <= 0) {
+                    av_log(v->s.avctx, AV_LOG_ERROR, "Slice %d starts beyond "
+                           "picture boundary (%d >= %d)\n", i,
+                           slices[i - 1].mby_start, mb_height);
                     continue;
                 }
                 v->second_field = 1;
@@ -5597,13 +5604,13 @@ static int vc1_decode_frame(AVCodecContext *avctx, void *data,
                 v->pic_header_flag = 0;
                 if (v->field_mode && i == n_slices1 + 2) {
                     if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
-                        av_log(v->s.avctx, AV_LOG_ERROR, "slice header damaged\n");
+                        av_log(v->s.avctx, AV_LOG_ERROR, "Field header damaged\n");
                         continue;
                     }
                 } else if (get_bits1(&s->gb)) {
                     v->pic_header_flag = 1;
                     if (ff_vc1_parse_frame_header_adv(v, &s->gb) < 0) {
-                        av_log(v->s.avctx, AV_LOG_ERROR, "slice header damaged\n");
+                        av_log(v->s.avctx, AV_LOG_ERROR, "Slice header damaged\n");
                         continue;
                     }
                 }
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 1488389572..85753e07f5 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -2137,10 +2137,10 @@ void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT      \
                                       (uint8_t *dst, uint8_t *src,      \
                                        int stride, int h, int x, int y);
 
-CHROMA_MC(put, 2, 10, mmxext)
-CHROMA_MC(avg, 2, 10, mmxext)
-CHROMA_MC(put, 4, 10, mmxext)
-CHROMA_MC(avg, 4, 10, mmxext)
+CHROMA_MC(put, 2, 10, mmx2)
+CHROMA_MC(avg, 2, 10, mmx2)
+CHROMA_MC(put, 4, 10, mmx2)
+CHROMA_MC(avg, 4, 10, mmx2)
 CHROMA_MC(put, 8, 10, sse2)
 CHROMA_MC(avg, 8, 10, sse2)
 CHROMA_MC(put, 8, 10, avx)
@@ -2841,10 +2841,10 @@ static void dsputil_init_mmx2(DSPContext *c, AVCodecContext *avctx,
         c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmx2;
     }
     if (bit_depth == 10 && CONFIG_H264CHROMA) {
-        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
-        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
-        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
-        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
+        c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmx2;
+        c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmx2;
+        c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmx2;
+        c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmx2;
     }
 
     c->add_hfyu_median_prediction   = ff_add_hfyu_median_prediction_mmx2;
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
index 5e5004ca97..a709287f48 100644
--- a/libavcodec/x86/fft_mmx.asm
+++ b/libavcodec/x86/fft_mmx.asm
@@ -517,23 +517,23 @@ INIT_MMX 3dnow
 FFT48_3DN
 
 
-%define Z(x) [zq + o1q*(x&6) + mmsize*(x&1)]
-%define Z2(x) [zq + o3q + mmsize*(x&1)]
-%define ZH(x) [zq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
-%define Z2H(x) [zq + o3q + mmsize*(x&1) + mmsize/2]
+%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)]
+%define Z2(x) [zcq + o3q + mmsize*(x&1)]
+%define ZH(x) [zcq + o1q*(x&6) + mmsize*(x&1) + mmsize/2]
+%define Z2H(x) [zcq + o3q + mmsize*(x&1) + mmsize/2]
 
 %macro DECL_PASS 2+ ; name, payload
 align 16
 %1:
-DEFINE_ARGS z, w, n, o1, o3
+DEFINE_ARGS zc, w, n, o1, o3
     lea o3q, [nq*3]
     lea o1q, [nq*8]
     shl o3q, 4
 .loop:
     %2
-    add zq, mmsize*2
-    add wq, mmsize
-    sub nd, mmsize/8
+    add zcq, mmsize*2
+    add  wq, mmsize
+    sub  nd, mmsize/8
     jg .loop
     rep ret
 %endmacro
@@ -748,7 +748,7 @@ section .text
 
 ; On x86_32, this function does the register saving and restoring for all of fft.
 ; The others pass args in registers and don't spill anything.
-cglobal fft_dispatch%2, 2,5,8, z, nbits
+cglobal fft_dispatch%2, 2,5,8, zc, nbits
     FFT_DISPATCH fullsuffix, nbits
     RET
 %endmacro ; DECL_FFT
diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm
index 3f7c513069..370c7b5a46 100644
--- a/libavcodec/x86/h264_chromamc_10bit.asm
+++ b/libavcodec/x86/h264_chromamc_10bit.asm
@@ -60,10 +60,10 @@ SECTION .text
 ;-----------------------------------------------------------------------------
 ; void put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h, int mx, int my)
 ;-----------------------------------------------------------------------------
-%macro CHROMA_MC8 2
+%macro CHROMA_MC8 1
 ; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
 ;                              int stride, int h, int mx, int my)
-cglobal %1_h264_chroma_mc8_10_%2, 6,7,8
+cglobal %1_h264_chroma_mc8_10, 6,7,8
     movsxdifnidn  r2, r2d
     mov          r6d, r5d
     or           r6d, r4d
@@ -173,8 +173,8 @@ cglobal %1_h264_chroma_mc8_10_%2, 6,7,8
     add           r0, r2
 %endmacro
 
-%macro CHROMA_MC4 2
-cglobal %1_h264_chroma_mc4_10_%2, 6,6,7
+%macro CHROMA_MC4 1
+cglobal %1_h264_chroma_mc4_10, 6,6,7
     movsxdifnidn  r2, r2d
     movd          m2, r4m         ; x
     movd          m3, r5m         ; y
@@ -203,8 +203,8 @@ cglobal %1_h264_chroma_mc4_10_%2, 6,6,7
 ;-----------------------------------------------------------------------------
 ; void put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h, int mx, int my)
 ;-----------------------------------------------------------------------------
-%macro CHROMA_MC2 2
-cglobal %1_h264_chroma_mc2_10_%2, 6,7
+%macro CHROMA_MC2 1
+cglobal %1_h264_chroma_mc2_10, 6,7
     movsxdifnidn  r2, r2d
     mov          r6d, r4d
     shl          r4d, 16
@@ -250,24 +250,24 @@ cglobal %1_h264_chroma_mc2_10_%2, 6,7
 %endmacro
 
 %define CHROMAMC_AVG  NOTHING
-INIT_XMM
-CHROMA_MC8 put, sse2
+INIT_XMM sse2
+CHROMA_MC8 put
 %if HAVE_AVX
-INIT_AVX
-CHROMA_MC8 put, avx
+INIT_XMM avx
+CHROMA_MC8 put
 %endif
-INIT_MMX
-CHROMA_MC4 put, mmxext
-CHROMA_MC2 put, mmxext
+INIT_MMX mmx2
+CHROMA_MC4 put
+CHROMA_MC2 put
 
 %define CHROMAMC_AVG  AVG
 %define PAVG          pavgw
-INIT_XMM
-CHROMA_MC8 avg, sse2
+INIT_XMM sse2
+CHROMA_MC8 avg
 %if HAVE_AVX
-INIT_AVX
-CHROMA_MC8 avg, avx
+INIT_XMM avx
+CHROMA_MC8 avg
 %endif
-INIT_MMX
-CHROMA_MC4 avg, mmxext
-CHROMA_MC2 avg, mmxext
+INIT_MMX mmx2
+CHROMA_MC4 avg
+CHROMA_MC2 avg
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 2f33f320a1..ea422e3b3a 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -282,8 +282,8 @@ cextern pb_A1
 ;-----------------------------------------------------------------------------
 ; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-%macro DEBLOCK_LUMA 1
-cglobal deblock_v_luma_8_%1, 5,5,10
+%macro DEBLOCK_LUMA 0
+cglobal deblock_v_luma_8, 5,5,10
     movd    m8, [r4] ; tc0
     lea     r4, [r1*3]
     dec     r2d        ; alpha-1
@@ -327,8 +327,8 @@ cglobal deblock_v_luma_8_%1, 5,5,10
 ;-----------------------------------------------------------------------------
 ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-INIT_MMX
-cglobal deblock_h_luma_8_%1, 5,9
+INIT_MMX cpuname
+cglobal deblock_h_luma_8, 5,9
     movsxd r7,  r1d
     lea    r8,  [r7+r7*2]
     lea    r6,  [r0-4]
@@ -355,7 +355,7 @@ cglobal deblock_h_luma_8_%1, 5,9
 %if WIN64
     mov    [rsp+0x20], r4
 %endif
-    call   deblock_v_luma_8_%1
+    call   deblock_v_luma_8
 
     ; transpose 16x4 -> original space  (only the middle 4 rows were changed by the filter)
     add    r6, 2
@@ -384,26 +384,26 @@ cglobal deblock_h_luma_8_%1, 5,9
     RET
 %endmacro
 
-INIT_XMM
-DEBLOCK_LUMA sse2
+INIT_XMM sse2
+DEBLOCK_LUMA
 %if HAVE_AVX
-INIT_AVX
-DEBLOCK_LUMA avx
+INIT_XMM avx
+DEBLOCK_LUMA
 %endif
 
 %else
 
-%macro DEBLOCK_LUMA 3
+%macro DEBLOCK_LUMA 2
 ;-----------------------------------------------------------------------------
 ; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-cglobal deblock_%2_luma_8_%1, 5,5
+cglobal deblock_%1_luma_8, 5,5
     lea     r4, [r1*3]
     dec     r2     ; alpha-1
     neg     r4
     dec     r3     ; beta-1
     add     r4, r0 ; pix-3*stride
-    %assign pad 2*%3+12-(stack_offset&15)
+    %assign pad 2*%2+12-(stack_offset&15)
     SUB     esp, pad
 
     mova    m0, [r4+r1]   ; p1
@@ -417,7 +417,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
     movd    m4, [r3] ; tc0
     punpcklbw m4, m4
     punpcklbw m4, m4 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
-    mova   [esp+%3], m4 ; tc
+    mova   [esp+%2], m4 ; tc
     pcmpgtb m4, m3
     mova    m3, [r4] ; p2
     pand    m4, m7
@@ -425,7 +425,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
 
     DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
     pand    m6, m4
-    pand    m4, [esp+%3] ; tc
+    pand    m4, [esp+%2] ; tc
     psubb   m7, m4, m6
     pand    m6, m4
     LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
@@ -433,7 +433,7 @@ cglobal deblock_%2_luma_8_%1, 5,5
     mova    m4, [r0+2*r1] ; q2
     DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
     pand    m6, [esp] ; mask
-    mova    m5, [esp+%3] ; tc
+    mova    m5, [esp+%2] ; tc
     psubb   m7, m6
     pand    m5, m6
     mova    m3, [r0+r1]
@@ -448,8 +448,8 @@ cglobal deblock_%2_luma_8_%1, 5,5
 ;-----------------------------------------------------------------------------
 ; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-INIT_MMX
-cglobal deblock_h_luma_8_%1, 0,5
+INIT_MMX cpuname
+cglobal deblock_h_luma_8, 0,5
     mov    r0, r0mp
     mov    r3, r1m
     lea    r4, [r3*3]
@@ -472,11 +472,11 @@ cglobal deblock_h_luma_8_%1, 0,5
     PUSH   dword r2m
     PUSH   dword 16
     PUSH   dword r0
-    call   deblock_%2_luma_8_%1
-%ifidn %2, v8
+    call   deblock_%1_luma_8
+%ifidn %1, v8
     add    dword [esp   ], 8 ; pix_tmp+0x38
     add    dword [esp+16], 2 ; tc0+2
-    call   deblock_%2_luma_8_%1
+    call   deblock_%1_luma_8
 %endif
     ADD    esp, 20
 
@@ -503,13 +503,13 @@ cglobal deblock_h_luma_8_%1, 0,5
     RET
 %endmacro ; DEBLOCK_LUMA
 
-INIT_MMX
-DEBLOCK_LUMA mmxext, v8, 8
-INIT_XMM
-DEBLOCK_LUMA sse2, v, 16
+INIT_MMX mmx2
+DEBLOCK_LUMA v8, 8
+INIT_XMM sse2
+DEBLOCK_LUMA v, 16
 %if HAVE_AVX
-INIT_AVX
-DEBLOCK_LUMA avx, v, 16
+INIT_XMM avx
+DEBLOCK_LUMA v, 16
 %endif
 
 %endif ; ARCH
@@ -612,7 +612,7 @@ DEBLOCK_LUMA avx, v, 16
     %define mask1p mask1q
 %endmacro
 
-%macro DEBLOCK_LUMA_INTRA 2
+%macro DEBLOCK_LUMA_INTRA 1
     %define p1 m0
     %define p0 m1
     %define q0 m2
@@ -647,7 +647,7 @@ DEBLOCK_LUMA avx, v, 16
 ;-----------------------------------------------------------------------------
 ; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_%2_luma_intra_8_%1, 4,6,16
+cglobal deblock_%1_luma_intra_8, 4,6,16
 %if ARCH_X86_64 == 0
     sub     esp, 0x60
 %endif
@@ -704,12 +704,12 @@ cglobal deblock_%2_luma_intra_8_%1, 4,6,16
 %endif
     RET
 
-INIT_MMX
+INIT_MMX cpuname
 %if ARCH_X86_64
 ;-----------------------------------------------------------------------------
 ; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_8_%1, 4,9
+cglobal deblock_h_luma_intra_8, 4,9
     movsxd r7,  r1d
     lea    r8,  [r7*3]
     lea    r6,  [r0-4]
@@ -725,7 +725,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9
 
     lea    r0,  [pix_tmp+0x40]
     mov    r1,  0x10
-    call   deblock_v_luma_intra_8_%1
+    call   deblock_v_luma_intra_8
 
     ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
     lea    r5, [r6+r8]
@@ -738,7 +738,7 @@ cglobal deblock_h_luma_intra_8_%1, 4,9
     add    rsp, 0x88
     RET
 %else
-cglobal deblock_h_luma_intra_8_%1, 2,4
+cglobal deblock_h_luma_intra_8, 2,4
     lea    r3,  [r1*3]
     sub    r0,  4
     lea    r2,  [r0+r3]
@@ -757,10 +757,10 @@ cglobal deblock_h_luma_intra_8_%1, 2,4
     PUSH   dword r2m
     PUSH   dword 16
     PUSH   r0
-    call   deblock_%2_luma_intra_8_%1
-%ifidn %2, v8
+    call   deblock_%1_luma_intra_8
+%ifidn %1, v8
     add    dword [rsp], 8 ; pix_tmp+8
-    call   deblock_%2_luma_intra_8_%1
+    call   deblock_%1_luma_intra_8
 %endif
     ADD    esp, 16
 
@@ -779,18 +779,18 @@ cglobal deblock_h_luma_intra_8_%1, 2,4
 %endif ; ARCH_X86_64
 %endmacro ; DEBLOCK_LUMA_INTRA
 
-INIT_XMM
-DEBLOCK_LUMA_INTRA sse2, v
+INIT_XMM sse2
+DEBLOCK_LUMA_INTRA v
 %if HAVE_AVX
-INIT_AVX
-DEBLOCK_LUMA_INTRA avx , v
+INIT_XMM avx
+DEBLOCK_LUMA_INTRA v
 %endif
 %if ARCH_X86_64 == 0
-INIT_MMX
-DEBLOCK_LUMA_INTRA mmxext, v8
+INIT_MMX mmx2
+DEBLOCK_LUMA_INTRA v8
 %endif
 
-INIT_MMX
+INIT_MMX mmx2
 
 %macro CHROMA_V_START 0
     dec    r2d      ; alpha-1
@@ -815,13 +815,13 @@ INIT_MMX
 ;-----------------------------------------------------------------------------
 ; void ff_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_8_mmxext, 5,6
+cglobal deblock_v_chroma_8, 5,6
     CHROMA_V_START
     movq  m0, [t5]
     movq  m1, [t5+r1]
     movq  m2, [r0]
     movq  m3, [r0+r1]
-    call ff_chroma_inter_body_mmxext
+    call ff_chroma_inter_body_mmx2
     movq  [t5+r1], m1
     movq  [r0], m2
     RET
@@ -829,7 +829,7 @@ cglobal deblock_v_chroma_8_mmxext, 5,6
 ;-----------------------------------------------------------------------------
 ; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_chroma_8_mmxext, 5,7
+cglobal deblock_h_chroma_8, 5,7
 %if UNIX64
     %define buf0 [rsp-24]
     %define buf1 [rsp-16]
@@ -859,7 +859,7 @@ cglobal deblock_h_chroma_8_mmxext, 5,7
     RET
 
 ALIGN 16
-ff_chroma_inter_body_mmxext:
+ff_chroma_inter_body_mmx2:
     LOAD_MASK  r2d, r3d
     movd       m6, [r4] ; tc0
     punpcklbw  m6, m6
@@ -886,13 +886,13 @@ ff_chroma_inter_body_mmxext:
 ;-----------------------------------------------------------------------------
 ; void ff_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_intra_8_mmxext, 4,5
+cglobal deblock_v_chroma_intra_8, 4,5
     CHROMA_V_START
     movq  m0, [t5]
     movq  m1, [t5+r1]
     movq  m2, [r0]
     movq  m3, [r0+r1]
-    call ff_chroma_intra_body_mmxext
+    call ff_chroma_intra_body_mmx2
     movq  [t5+r1], m1
     movq  [r0], m2
     RET
@@ -900,15 +900,15 @@ cglobal deblock_v_chroma_intra_8_mmxext, 4,5
 ;-----------------------------------------------------------------------------
 ; void ff_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_chroma_intra_8_mmxext, 4,6
+cglobal deblock_h_chroma_intra_8, 4,6
     CHROMA_H_START
     TRANSPOSE4x8_LOAD  bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
-    call ff_chroma_intra_body_mmxext
+    call ff_chroma_intra_body_mmx2
     TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
     RET
 
 ALIGN 16
-ff_chroma_intra_body_mmxext:
+ff_chroma_intra_body_mmx2:
     LOAD_MASK r2d, r3d
     movq   m5, m1
     movq   m6, m2
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index caf270163c..d625eee4a4 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -151,11 +151,11 @@ cextern pw_4
 %endif
 %endmacro
 
-%macro DEBLOCK_LUMA 1
+%macro DEBLOCK_LUMA 0
 ;-----------------------------------------------------------------------------
 ; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
+cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
     %assign pad 5*mmsize+12-(stack_offset&15)
     %define tcm [rsp]
     %define ms1 [rsp+mmsize]
@@ -210,7 +210,7 @@ cglobal deblock_v_luma_10_%1, 5,5,8*(mmsize/16)
     ADD         rsp, pad
     RET
 
-cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
+cglobal deblock_h_luma_10, 5,6,8*(mmsize/16)
     %assign pad 7*mmsize+12-(stack_offset&15)
     %define tcm [rsp]
     %define ms1 [rsp+mmsize]
@@ -301,7 +301,6 @@ cglobal deblock_h_luma_10_%1, 5,6,8*(mmsize/16)
     RET
 %endmacro
 
-INIT_XMM
 %if ARCH_X86_64
 ; in:  m0=p1, m1=p0, m2=q0, m3=q1, m8=p2, m9=q2
 ;      m12=alpha, m13=beta
@@ -339,8 +338,8 @@ INIT_XMM
     SWAP         3, 9
 %endmacro
 
-%macro DEBLOCK_LUMA_64 1
-cglobal deblock_v_luma_10_%1, 5,5,15
+%macro DEBLOCK_LUMA_64 0
+cglobal deblock_v_luma_10, 5,5,15
     %define p2 m8
     %define p1 m0
     %define p0 m1
@@ -377,7 +376,7 @@ cglobal deblock_v_luma_10_%1, 5,5,15
     jg .loop
     REP_RET
 
-cglobal deblock_h_luma_10_%1, 5,7,15
+cglobal deblock_h_luma_10, 5,7,15
     shl        r2d, 2
     shl        r3d, 2
     LOAD_AB    m12, m13, r2d, r3d
@@ -417,11 +416,11 @@ cglobal deblock_h_luma_10_%1, 5,7,15
     REP_RET
 %endmacro
 
-INIT_XMM
-DEBLOCK_LUMA_64 sse2
+INIT_XMM sse2
+DEBLOCK_LUMA_64
 %if HAVE_AVX
-INIT_AVX
-DEBLOCK_LUMA_64 avx
+INIT_XMM avx
+DEBLOCK_LUMA_64
 %endif
 %endif
 
@@ -604,8 +603,8 @@ DEBLOCK_LUMA_64 avx
 ;-----------------------------------------------------------------------------
 ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-%macro DEBLOCK_LUMA_INTRA_64 1
-cglobal deblock_v_luma_intra_10_%1, 4,7,16
+%macro DEBLOCK_LUMA_INTRA_64 0
+cglobal deblock_v_luma_intra_10, 4,7,16
     %define t0 m1
     %define t1 m2
     %define t2 m4
@@ -655,7 +654,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,16
 ;-----------------------------------------------------------------------------
 ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10_%1, 4,7,16
+cglobal deblock_h_luma_intra_10, 4,7,16
     %define t0 m15
     %define t1 m14
     %define t2 m2
@@ -714,20 +713,20 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,16
     RET
 %endmacro
 
-INIT_XMM
-DEBLOCK_LUMA_INTRA_64 sse2
+INIT_XMM sse2
+DEBLOCK_LUMA_INTRA_64
 %if HAVE_AVX
-INIT_AVX
-DEBLOCK_LUMA_INTRA_64 avx
+INIT_XMM avx
+DEBLOCK_LUMA_INTRA_64
 %endif
 
 %endif
 
-%macro DEBLOCK_LUMA_INTRA 1
+%macro DEBLOCK_LUMA_INTRA 0
 ;-----------------------------------------------------------------------------
 ; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16)
+cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
     LUMA_INTRA_INIT 3
     lea     r4, [r1*4]
     lea     r5, [r1*3]
@@ -755,7 +754,7 @@ cglobal deblock_v_luma_intra_10_%1, 4,7,8*(mmsize/16)
 ;-----------------------------------------------------------------------------
 ; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16)
+cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
     LUMA_INTRA_INIT 8
 %if mmsize == 8
     lea     r4, [r1*3]
@@ -797,16 +796,16 @@ cglobal deblock_h_luma_intra_10_%1, 4,7,8*(mmsize/16)
 %endmacro
 
 %if ARCH_X86_64 == 0
-INIT_MMX
-DEBLOCK_LUMA mmxext
-DEBLOCK_LUMA_INTRA mmxext
-INIT_XMM
-DEBLOCK_LUMA sse2
-DEBLOCK_LUMA_INTRA sse2
+INIT_MMX mmx2
+DEBLOCK_LUMA
+DEBLOCK_LUMA_INTRA
+INIT_XMM sse2
+DEBLOCK_LUMA
+DEBLOCK_LUMA_INTRA
 %if HAVE_AVX
-INIT_AVX
-DEBLOCK_LUMA avx
-DEBLOCK_LUMA_INTRA avx
+INIT_XMM avx
+DEBLOCK_LUMA
+DEBLOCK_LUMA_INTRA
 %endif
 %endif
 
@@ -849,11 +848,11 @@ DEBLOCK_LUMA_INTRA avx
     psraw       %1, 6
 %endmacro
 
-%macro DEBLOCK_CHROMA 1
+%macro DEBLOCK_CHROMA 0
 ;-----------------------------------------------------------------------------
 ; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
+cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
     mov         r5, r0
     sub         r0, r1
     sub         r0, r1
@@ -887,7 +886,7 @@ cglobal deblock_v_chroma_10_%1, 5,7-(mmsize/16),8*(mmsize/16)
 ;-----------------------------------------------------------------------------
 ; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta )
 ;-----------------------------------------------------------------------------
-cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
+cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
     mov         r4, r0
     sub         r0, r1
     sub         r0, r1
@@ -914,12 +913,12 @@ cglobal deblock_v_chroma_intra_10_%1, 4,6-(mmsize/16),8*(mmsize/16)
 %endmacro
 
 %if ARCH_X86_64 == 0
-INIT_MMX
-DEBLOCK_CHROMA mmxext
+INIT_MMX mmx2
+DEBLOCK_CHROMA
 %endif
-INIT_XMM
-DEBLOCK_CHROMA sse2
+INIT_XMM sse2
+DEBLOCK_CHROMA
 %if HAVE_AVX
-INIT_AVX
-DEBLOCK_CHROMA avx
+INIT_XMM avx
+DEBLOCK_CHROMA
 %endif
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index a126573347..67bc68ec5c 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -72,25 +72,25 @@ SECTION .text
     STORE_DIFFx2 m2, m3, m4, m5, %1, %3
 %endmacro
 
-%macro IDCT_ADD_10 1
-cglobal h264_idct_add_10_%1, 3,3
+%macro IDCT_ADD_10 0
+cglobal h264_idct_add_10, 3,3
     IDCT4_ADD_10 r0, r1, r2
     RET
 %endmacro
 
-INIT_XMM
-IDCT_ADD_10 sse2
+INIT_XMM sse2
+IDCT_ADD_10
 %if HAVE_AVX
-INIT_AVX
-IDCT_ADD_10 avx
+INIT_XMM avx
+IDCT_ADD_10
 %endif
 
 ;-----------------------------------------------------------------------------
 ; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
 ;;;;;;; NO FATE SAMPLES TRIGGER THIS
-%macro ADD4x4IDCT 1
-add4x4_idct_%1:
+%macro ADD4x4IDCT 0
+add4x4_idct %+ SUFFIX:
     add   r5, r0
     mova  m0, [r2+ 0]
     mova  m1, [r2+16]
@@ -107,52 +107,52 @@ add4x4_idct_%1:
     ret
 %endmacro
 
-INIT_XMM
+INIT_XMM sse2
 ALIGN 16
-ADD4x4IDCT sse2
+ADD4x4IDCT
 %if HAVE_AVX
-INIT_AVX
+INIT_XMM avx
 ALIGN 16
-ADD4x4IDCT avx
+ADD4x4IDCT
 %endif
 
-%macro ADD16_OP 3
-    cmp          byte [r4+%3], 0
-    jz .skipblock%2
-    mov         r5d, [r1+%2*4]
-    call add4x4_idct_%1
-.skipblock%2:
-%if %2<15
+%macro ADD16_OP 2
+    cmp          byte [r4+%2], 0
+    jz .skipblock%1
+    mov         r5d, [r1+%1*4]
+    call add4x4_idct %+ SUFFIX
+.skipblock%1:
+%if %1<15
     add          r2, 64
 %endif
 %endmacro
 
-%macro IDCT_ADD16_10 1
-cglobal h264_idct_add16_10_%1, 5,6
-    ADD16_OP %1, 0, 4+1*8
-    ADD16_OP %1, 1, 5+1*8
-    ADD16_OP %1, 2, 4+2*8
-    ADD16_OP %1, 3, 5+2*8
-    ADD16_OP %1, 4, 6+1*8
-    ADD16_OP %1, 5, 7+1*8
-    ADD16_OP %1, 6, 6+2*8
-    ADD16_OP %1, 7, 7+2*8
-    ADD16_OP %1, 8, 4+3*8
-    ADD16_OP %1, 9, 5+3*8
-    ADD16_OP %1, 10, 4+4*8
-    ADD16_OP %1, 11, 5+4*8
-    ADD16_OP %1, 12, 6+3*8
-    ADD16_OP %1, 13, 7+3*8
-    ADD16_OP %1, 14, 6+4*8
-    ADD16_OP %1, 15, 7+4*8
+%macro IDCT_ADD16_10 0
+cglobal h264_idct_add16_10, 5,6
+    ADD16_OP 0, 4+1*8
+    ADD16_OP 1, 5+1*8
+    ADD16_OP 2, 4+2*8
+    ADD16_OP 3, 5+2*8
+    ADD16_OP 4, 6+1*8
+    ADD16_OP 5, 7+1*8
+    ADD16_OP 6, 6+2*8
+    ADD16_OP 7, 7+2*8
+    ADD16_OP 8, 4+3*8
+    ADD16_OP 9, 5+3*8
+    ADD16_OP 10, 4+4*8
+    ADD16_OP 11, 5+4*8
+    ADD16_OP 12, 6+3*8
+    ADD16_OP 13, 7+3*8
+    ADD16_OP 14, 6+4*8
+    ADD16_OP 15, 7+4*8
     REP_RET
 %endmacro
 
-INIT_XMM
-IDCT_ADD16_10 sse2
+INIT_XMM sse2
+IDCT_ADD16_10
 %if HAVE_AVX
-INIT_AVX
-IDCT_ADD16_10 avx
+INIT_XMM avx
+IDCT_ADD16_10
 %endif
 
 ;-----------------------------------------------------------------------------
@@ -185,8 +185,8 @@ IDCT_ADD16_10 avx
     mova [%1+%3  ], m4
 %endmacro
 
-INIT_MMX
-cglobal h264_idct_dc_add_10_mmx2,3,3
+INIT_MMX mmx2
+cglobal h264_idct_dc_add_10,3,3
     movd      m0, [r1]
     paddd     m0, [pd_32]
     psrad     m0, 6
@@ -199,8 +199,8 @@ cglobal h264_idct_dc_add_10_mmx2,3,3
 ;-----------------------------------------------------------------------------
 ; void h264_idct8_dc_add(pixel *dst, dctcoef *block, int stride)
 ;-----------------------------------------------------------------------------
-%macro IDCT8_DC_ADD 1
-cglobal h264_idct8_dc_add_10_%1,3,3,7
+%macro IDCT8_DC_ADD 0
+cglobal h264_idct8_dc_add_10,3,3,7
     mov      r1d, [r1]
     add       r1, 32
     sar       r1, 6
@@ -214,45 +214,45 @@ cglobal h264_idct8_dc_add_10_%1,3,3,7
     RET
 %endmacro
 
-INIT_XMM
-IDCT8_DC_ADD sse2
+INIT_XMM sse2
+IDCT8_DC_ADD
 %if HAVE_AVX
-INIT_AVX
-IDCT8_DC_ADD avx
+INIT_XMM avx
+IDCT8_DC_ADD
 %endif
 
 ;-----------------------------------------------------------------------------
 ; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
-%macro AC 2
-.ac%2
-    mov  r5d, [r1+(%2+0)*4]
-    call add4x4_idct_%1
-    mov  r5d, [r1+(%2+1)*4]
+%macro AC 1
+.ac%1
+    mov  r5d, [r1+(%1+0)*4]
+    call add4x4_idct %+ SUFFIX
+    mov  r5d, [r1+(%1+1)*4]
     add  r2, 64
-    call add4x4_idct_%1
+    call add4x4_idct %+ SUFFIX
     add  r2, 64
-    jmp .skipadd%2
+    jmp .skipadd%1
 %endmacro
 
 %assign last_block 16
-%macro ADD16_OP_INTRA 3
-    cmp      word [r4+%3], 0
-    jnz .ac%2
+%macro ADD16_OP_INTRA 2
+    cmp      word [r4+%2], 0
+    jnz .ac%1
     mov      r5d, [r2+ 0]
     or       r5d, [r2+64]
-    jz .skipblock%2
-    mov      r5d, [r1+(%2+0)*4]
-    call idct_dc_add_%1
-.skipblock%2:
-%if %2<last_block-2
+    jz .skipblock%1
+    mov      r5d, [r1+(%1+0)*4]
+    call idct_dc_add %+ SUFFIX
+.skipblock%1:
+%if %1<last_block-2
     add       r2, 128
 %endif
-.skipadd%2:
+.skipadd%1:
 %endmacro
 
-%macro IDCT_ADD16INTRA_10 1
-idct_dc_add_%1:
+%macro IDCT_ADD16INTRA_10 0
+idct_dc_add %+ SUFFIX:
     add       r5, r0
     movq      m0, [r2+ 0]
     movhps    m0, [r2+64]
@@ -265,46 +265,46 @@ idct_dc_add_%1:
     IDCT_DC_ADD_OP_10 r5, r3, r6
     ret
 
-cglobal h264_idct_add16intra_10_%1,5,7,8
-    ADD16_OP_INTRA %1, 0, 4+1*8
-    ADD16_OP_INTRA %1, 2, 4+2*8
-    ADD16_OP_INTRA %1, 4, 6+1*8
-    ADD16_OP_INTRA %1, 6, 6+2*8
-    ADD16_OP_INTRA %1, 8, 4+3*8
-    ADD16_OP_INTRA %1, 10, 4+4*8
-    ADD16_OP_INTRA %1, 12, 6+3*8
-    ADD16_OP_INTRA %1, 14, 6+4*8
+cglobal h264_idct_add16intra_10,5,7,8
+    ADD16_OP_INTRA 0, 4+1*8
+    ADD16_OP_INTRA 2, 4+2*8
+    ADD16_OP_INTRA 4, 6+1*8
+    ADD16_OP_INTRA 6, 6+2*8
+    ADD16_OP_INTRA 8, 4+3*8
+    ADD16_OP_INTRA 10, 4+4*8
+    ADD16_OP_INTRA 12, 6+3*8
+    ADD16_OP_INTRA 14, 6+4*8
     REP_RET
-    AC %1, 8
-    AC %1, 10
-    AC %1, 12
-    AC %1, 14
-    AC %1, 0
-    AC %1, 2
-    AC %1, 4
-    AC %1, 6
+    AC 8
+    AC 10
+    AC 12
+    AC 14
+    AC 0
+    AC 2
+    AC 4
+    AC 6
 %endmacro
 
-INIT_XMM
-IDCT_ADD16INTRA_10 sse2
+INIT_XMM sse2
+IDCT_ADD16INTRA_10
 %if HAVE_AVX
-INIT_AVX
-IDCT_ADD16INTRA_10 avx
+INIT_XMM avx
+IDCT_ADD16INTRA_10
 %endif
 
 %assign last_block 36
 ;-----------------------------------------------------------------------------
 ; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
-%macro IDCT_ADD8 1
-cglobal h264_idct_add8_10_%1,5,8,7
+%macro IDCT_ADD8 0
+cglobal h264_idct_add8_10,5,8,7
 %if ARCH_X86_64
     mov      r7, r0
 %endif
     add      r2, 1024
     mov      r0, [r0]
-    ADD16_OP_INTRA %1, 16, 4+ 6*8
-    ADD16_OP_INTRA %1, 18, 4+ 7*8
+    ADD16_OP_INTRA 16, 4+ 6*8
+    ADD16_OP_INTRA 18, 4+ 7*8
     add      r2, 1024-128*2
 %if ARCH_X86_64
     mov      r0, [r7+gprsize]
@@ -312,21 +312,21 @@ cglobal h264_idct_add8_10_%1,5,8,7
     mov      r0, r0m
     mov      r0, [r0+gprsize]
 %endif
-    ADD16_OP_INTRA %1, 32, 4+11*8
-    ADD16_OP_INTRA %1, 34, 4+12*8
+    ADD16_OP_INTRA 32, 4+11*8
+    ADD16_OP_INTRA 34, 4+12*8
     REP_RET
-    AC %1, 16
-    AC %1, 18
-    AC %1, 32
-    AC %1, 34
+    AC 16
+    AC 18
+    AC 32
+    AC 34
 
 %endmacro ; IDCT_ADD8
 
-INIT_XMM
-IDCT_ADD8 sse2
+INIT_XMM sse2
+IDCT_ADD8
 %if HAVE_AVX
-INIT_AVX
-IDCT_ADD8 avx
+INIT_XMM avx
+IDCT_ADD8
 %endif
 
 ;-----------------------------------------------------------------------------
@@ -432,19 +432,19 @@ IDCT_ADD8 avx
     STORE_DIFFx2 m0, m1, m6, m7, %1, %3
 %endmacro
 
-%macro IDCT8_ADD 1
-cglobal h264_idct8_add_10_%1, 3,4,16
+%macro IDCT8_ADD 0
+cglobal h264_idct8_add_10, 3,4,16
 %if UNIX64 == 0
     %assign pad 16-gprsize-(stack_offset&15)
     sub  rsp, pad
-    call h264_idct8_add1_10_%1
+    call h264_idct8_add1_10 %+ SUFFIX
     add  rsp, pad
     RET
 %endif
 
 ALIGN 16
 ; TODO: does not need to use stack
-h264_idct8_add1_10_%1:
+h264_idct8_add1_10 %+ SUFFIX:
 %assign pad 256+16-gprsize
     sub          rsp, pad
     add   dword [r1], 32
@@ -499,31 +499,31 @@ h264_idct8_add1_10_%1:
     ret
 %endmacro
 
-INIT_XMM
-IDCT8_ADD sse2
+INIT_XMM sse2
+IDCT8_ADD
 %if HAVE_AVX
-INIT_AVX
-IDCT8_ADD avx
+INIT_XMM avx
+IDCT8_ADD
 %endif
 
 ;-----------------------------------------------------------------------------
 ; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
 ;-----------------------------------------------------------------------------
 ;;;;;;; NO FATE SAMPLES TRIGGER THIS
-%macro IDCT8_ADD4_OP 3
-    cmp       byte [r4+%3], 0
-    jz .skipblock%2
-    mov      r0d, [r6+%2*4]
+%macro IDCT8_ADD4_OP 2
+    cmp       byte [r4+%2], 0
+    jz .skipblock%1
+    mov      r0d, [r6+%1*4]
     add       r0, r5
-    call h264_idct8_add1_10_%1
-.skipblock%2:
-%if %2<12
+    call h264_idct8_add1_10 %+ SUFFIX
+.skipblock%1:
+%if %1<12
     add       r1, 256
 %endif
 %endmacro
 
-%macro IDCT8_ADD4 1
-cglobal h264_idct8_add4_10_%1, 0,7,16
+%macro IDCT8_ADD4 0
+cglobal h264_idct8_add4_10, 0,7,16
     %assign pad 16-gprsize-(stack_offset&15)
     SUB      rsp, pad
     mov       r5, r0mp
@@ -531,17 +531,17 @@ cglobal h264_idct8_add4_10_%1, 0,7,16
     mov       r1, r2mp
     mov      r2d, r3m
     movifnidn r4, r4mp
-    IDCT8_ADD4_OP %1,  0, 4+1*8
-    IDCT8_ADD4_OP %1,  4, 6+1*8
-    IDCT8_ADD4_OP %1,  8, 4+3*8
-    IDCT8_ADD4_OP %1, 12, 6+3*8
+    IDCT8_ADD4_OP  0, 4+1*8
+    IDCT8_ADD4_OP  4, 6+1*8
+    IDCT8_ADD4_OP  8, 4+3*8
+    IDCT8_ADD4_OP 12, 6+3*8
     ADD       rsp, pad
     RET
 %endmacro ; IDCT8_ADD4
 
-INIT_XMM
-IDCT8_ADD4 sse2
+INIT_XMM sse2
+IDCT8_ADD4
 %if HAVE_AVX
-INIT_AVX
-IDCT8_ADD4 avx
+INIT_XMM avx
+IDCT8_ADD4
 %endif
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index be7224aed4..d767c687ad 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -249,12 +249,12 @@ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, in
                                                                 int alpha, int beta);
 
 #define LF_FUNCS(type, depth)\
-LF_FUNC (h,  chroma,       depth, mmxext)\
-LF_IFUNC(h,  chroma_intra, depth, mmxext)\
-LF_FUNC (v,  chroma,       depth, mmxext)\
-LF_IFUNC(v,  chroma_intra, depth, mmxext)\
-LF_FUNC (h,  luma,         depth, mmxext)\
-LF_IFUNC(h,  luma_intra,   depth, mmxext)\
+LF_FUNC (h,  chroma,       depth, mmx2)\
+LF_IFUNC(h,  chroma_intra, depth, mmx2)\
+LF_FUNC (v,  chroma,       depth, mmx2)\
+LF_IFUNC(v,  chroma_intra, depth, mmx2)\
+LF_FUNC (h,  luma,         depth, mmx2)\
+LF_IFUNC(h,  luma_intra,   depth, mmx2)\
 LF_FUNC (h,  luma,         depth, sse2)\
 LF_IFUNC(h,  luma_intra,   depth, sse2)\
 LF_FUNC (v,  luma,         depth, sse2)\
@@ -276,24 +276,24 @@ LF_FUNCS( uint8_t,  8)
 LF_FUNCS(uint16_t, 10)
 
 #if ARCH_X86_32 && HAVE_YASM
-LF_FUNC (v8, luma,             8, mmxext)
-static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+LF_FUNC (v8, luma,             8, mmx2)
+static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
 {
     if((tc0[0] & tc0[1]) >= 0)
-        ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0);
+        ff_deblock_v8_luma_8_mmx2(pix+0, stride, alpha, beta, tc0);
     if((tc0[2] & tc0[3]) >= 0)
-        ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2);
+        ff_deblock_v8_luma_8_mmx2(pix+8, stride, alpha, beta, tc0+2);
 }
-LF_IFUNC(v8, luma_intra,        8, mmxext)
-static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta)
+LF_IFUNC(v8, luma_intra,        8, mmx2)
+static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, int alpha, int beta)
 {
-    ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta);
-    ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta);
+    ff_deblock_v8_luma_intra_8_mmx2(pix+0, stride, alpha, beta);
+    ff_deblock_v8_luma_intra_8_mmx2(pix+8, stride, alpha, beta);
 }
 #endif /* ARCH_X86_32 */
 
-LF_FUNC (v,  luma,            10, mmxext)
-LF_IFUNC(v,  luma_intra,      10, mmxext)
+LF_FUNC (v,  luma,            10, mmx2)
+LF_IFUNC(v,  luma_intra,      10, mmx2)
 
 /***********************************/
 /* weighted prediction */
@@ -373,17 +373,17 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
                 c->h264_idct_add8  = ff_h264_idct_add8_8_mmx2;
             c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
 
-            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
-            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
+            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmx2;
+            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmx2;
             if (chroma_format_idc == 1) {
-                c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
-                c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
+                c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmx2;
+                c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmx2;
             }
 #if ARCH_X86_32
-            c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
-            c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
-            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
-            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
+            c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmx2;
+            c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmx2;
+            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2;
+            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2;
 #endif
             c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2;
             c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2;
@@ -436,12 +436,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chrom
     if (mm_flags & AV_CPU_FLAG_MMX) {
         if (mm_flags & AV_CPU_FLAG_MMX2) {
 #if ARCH_X86_32
-            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmxext;
-            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmxext;
-            c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext;
-            c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext;
-            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
-            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
+            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmx2;
+            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmx2;
+            c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmx2;
+            c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmx2;
+            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmx2;
+            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2;
 #endif
             c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2;
             if (mm_flags&AV_CPU_FLAG_SSE2) {
diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm
index ae04e82c2d..78d8c92b0b 100644
--- a/libavcodec/x86/rv34dsp.asm
+++ b/libavcodec/x86/rv34dsp.asm
@@ -46,7 +46,7 @@ SECTION .text
 %endmacro
 
 %macro rv34_idct 1
-cglobal rv34_idct_%1_mmx2, 1, 2, 0
+cglobal rv34_idct_%1, 1, 2, 0
     movsx   r1, word [r0]
     IDCT_DC r1
     movd    m0, r1d
@@ -58,14 +58,15 @@ cglobal rv34_idct_%1_mmx2, 1, 2, 0
     REP_RET
 %endmacro
 
-INIT_MMX
+INIT_MMX mmx2
 %define IDCT_DC IDCT_DC_ROUND
 rv34_idct dc
 %define IDCT_DC IDCT_DC_NOROUND
 rv34_idct dc_noround
 
 ; ff_rv34_idct_dc_add_mmx(uint8_t *dst, int stride, int dc);
-cglobal rv34_idct_dc_add_mmx, 3, 3
+INIT_MMX mmx
+cglobal rv34_idct_dc_add, 3, 3
     ; calculate DC
     IDCT_DC_ROUND r2
     pxor       m1, m1
@@ -167,8 +168,8 @@ cglobal rv34_idct_add, 3,3,0, d, s, b
     ret
 
 ; ff_rv34_idct_dc_add_sse4(uint8_t *dst, int stride, int dc);
-INIT_XMM
-cglobal rv34_idct_dc_add_sse4, 3, 3, 6
+INIT_XMM sse4
+cglobal rv34_idct_dc_add, 3, 3, 6
     ; load data
     IDCT_DC_ROUND r2
     pxor       m1, m1
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 46bd9d8f86..dcd55f5f8b 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -102,8 +102,8 @@ SECTION .text
     mov  [r0+r3  -1], r2w
 %endmacro
 
-INIT_MMX
-cglobal vp3_v_loop_filter_mmx2, 3, 4
+INIT_MMX mmx2
+cglobal vp3_v_loop_filter, 3, 4
 %if ARCH_X86_64
     movsxd        r1, r1d
 %endif
@@ -120,7 +120,7 @@ cglobal vp3_v_loop_filter_mmx2, 3, 4
     movq     [r0   ], m3
     RET
 
-cglobal vp3_h_loop_filter_mmx2, 3, 4
+cglobal vp3_h_loop_filter, 3, 4
 %if ARCH_X86_64
     movsxd        r1, r1d
 %endif
@@ -354,38 +354,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
     movq        I(2), m2
 %endmacro
 
-%macro VP3_IDCT_mmx 1
-    ; eax = quantized input
-    ; ebx = dequantizer matrix
-    ; ecx = IDCT constants
-    ;  M(I) = ecx + MaskOffset(0) + I * 8
-    ;  C(I) = ecx + CosineOffset(32) + (I-1) * 8
-    ; edx = output
-    ; r0..r7 = mm0..mm7
-%define OC_8 [pw_8]
-%define C(x) [vp3_idct_data+16*(x-1)]
-
-    ; at this point, function has completed dequantization + dezigzag +
-    ; partial transposition; now do the idct itself
-%define I(x) [%1+16* x     ]
-%define J(x) [%1+16*(x-4)+8]
-    RowIDCT
-    Transpose
-
-%define I(x) [%1+16* x   +64]
-%define J(x) [%1+16*(x-4)+72]
-    RowIDCT
-    Transpose
-
-%define I(x) [%1+16*x]
-%define J(x) [%1+16*x]
-    ColumnIDCT
-
-%define I(x) [%1+16*x+8]
-%define J(x) [%1+16*x+8]
-    ColumnIDCT
-%endmacro
-
 %macro VP3_1D_IDCT_SSE2 0
     movdqa        m2, I(3)      ; xmm2 = i3
     movdqa        m6, C(3)      ; xmm6 = c3
@@ -501,7 +469,8 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
     movdqa      O(7), m%8
 %endmacro
 
-%macro VP3_IDCT_sse2 1
+%macro VP3_IDCT 1
+%if mmsize == 16
 %define I(x) [%1+16*x]
 %define O(x) [%1+16*x]
 %define C(x) [vp3_idct_data+16*(x-1)]
@@ -519,11 +488,42 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
 %define ADD(x)   paddsw x, [pw_8]
         VP3_1D_IDCT_SSE2
         PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
+%else ; mmsize == 8
+    ; eax = quantized input
+    ; ebx = dequantizer matrix
+    ; ecx = IDCT constants
+    ;  M(I) = ecx + MaskOffset(0) + I * 8
+    ;  C(I) = ecx + CosineOffset(32) + (I-1) * 8
+    ; edx = output
+    ; r0..r7 = mm0..mm7
+%define OC_8 [pw_8]
+%define C(x) [vp3_idct_data+16*(x-1)]
+
+    ; at this point, function has completed dequantization + dezigzag +
+    ; partial transposition; now do the idct itself
+%define I(x) [%1+16* x     ]
+%define J(x) [%1+16*(x-4)+8]
+    RowIDCT
+    Transpose
+
+%define I(x) [%1+16* x   +64]
+%define J(x) [%1+16*(x-4)+72]
+    RowIDCT
+    Transpose
+
+%define I(x) [%1+16*x]
+%define J(x) [%1+16*x]
+    ColumnIDCT
+
+%define I(x) [%1+16*x+8]
+%define J(x) [%1+16*x+8]
+    ColumnIDCT
+%endif ; mmsize == 16/8
 %endmacro
 
-%macro vp3_idct_funcs 1
-cglobal vp3_idct_put_%1, 3, 4, 9
-    VP3_IDCT_%1   r2
+%macro vp3_idct_funcs 0
+cglobal vp3_idct_put, 3, 4, 9
+    VP3_IDCT      r2
 
     movsxdifnidn  r1, r1d
     mova          m4, [pb_80]
@@ -565,8 +565,8 @@ cglobal vp3_idct_put_%1, 3, 4, 9
 %endrep
     RET
 
-cglobal vp3_idct_add_%1, 3, 4, 9
-    VP3_IDCT_%1   r2
+cglobal vp3_idct_add, 3, 4, 9
+    VP3_IDCT      r2
 
     mov           r3, 4
     pxor          m4, m4
@@ -607,10 +607,13 @@ cglobal vp3_idct_add_%1, 3, 4, 9
     RET
 %endmacro
 
-INIT_MMX
-vp3_idct_funcs mmx
-INIT_XMM
-vp3_idct_funcs sse2
+%if ARCH_X86_32
+INIT_MMX mmx
+vp3_idct_funcs
+%endif
+
+INIT_XMM sse2
+vp3_idct_funcs
 
 %macro DC_ADD 0
     movq          m2, [r0     ]
@@ -631,8 +634,8 @@ vp3_idct_funcs sse2
     movq   [r0+r3  ], m5
 %endmacro
 
-INIT_MMX
-cglobal vp3_idct_dc_add_mmx2, 3, 4
+INIT_MMX mmx2
+cglobal vp3_idct_dc_add, 3, 4
 %if ARCH_X86_64
     movsxd        r1, r1d
 %endif
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index 3ae2a90e57..e0366d0f06 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -41,11 +41,13 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
 #if HAVE_YASM
     int cpuflags = av_get_cpu_flags();
 
+#if ARCH_X86_32
     if (HAVE_MMX && cpuflags & AV_CPU_FLAG_MMX) {
         c->idct_put  = ff_vp3_idct_put_mmx;
         c->idct_add  = ff_vp3_idct_add_mmx;
         c->idct_perm = FF_PARTTRANS_IDCT_PERM;
     }
+#endif
 
     if (HAVE_MMX2 && cpuflags & AV_CPU_FLAG_MMX2) {
         c->idct_dc_add = ff_vp3_idct_dc_add_mmx2;
diff --git a/libavcodec/x86/vp56dsp.asm b/libavcodec/x86/vp56dsp.asm
index 2d409bfca1..21ff041d61 100644
--- a/libavcodec/x86/vp56dsp.asm
+++ b/libavcodec/x86/vp56dsp.asm
@@ -27,7 +27,8 @@ cextern pw_64
 
 SECTION .text
 
-%macro DIAG4_MMX 6
+%macro DIAG4 6
+%if mmsize == 8
     movq          m0, [%1+%2]
     movq          m1, [%1+%3]
     movq          m3, m0
@@ -64,9 +65,7 @@ SECTION .text
     psraw         m3, 7
     packuswb      m0, m3
     movq        [%6], m0
-%endmacro
-
-%macro DIAG4_SSE2 6
+%else ; mmsize == 16
     movq          m0, [%1+%2]
     movq          m1, [%1+%3]
     punpcklbw     m0, m7
@@ -86,9 +85,11 @@ SECTION .text
     psraw         m0, 7
     packuswb      m0, m0
     movq        [%6], m0
+%endif ; mmsize == 8/16
 %endmacro
 
-%macro SPLAT4REGS_MMX 0
+%macro SPLAT4REGS 0
+%if mmsize == 8
     movq         m5, m3
     punpcklwd    m3, m3
     movq         m4, m3
@@ -102,9 +103,7 @@ SECTION .text
     movq [rsp+8*12], m4
     movq [rsp+8*13], m5
     movq [rsp+8*14], m2
-%endmacro
-
-%macro SPLAT4REGS_SSE2 0
+%else ; mmsize == 16
     pshuflw      m4, m3, 0x0
     pshuflw      m5, m3, 0x55
     pshuflw      m6, m3, 0xAA
@@ -113,15 +112,16 @@ SECTION .text
     punpcklqdq   m5, m5
     punpcklqdq   m6, m6
     punpcklqdq   m3, m3
+%endif ; mmsize == 8/16
 %endmacro
 
-%macro vp6_filter_diag4 2
+%macro vp6_filter_diag4 0
 ; void ff_vp6_filter_diag4_<opt>(uint8_t *dst, uint8_t *src, int stride,
 ;                                const int16_t h_weight[4], const int16_t v_weights[4])
-cglobal vp6_filter_diag4_%1, 5, 7, %2
+cglobal vp6_filter_diag4, 5, 7, 8
     mov          r5, rsp         ; backup stack pointer
     and         rsp, ~(mmsize-1) ; align stack
-%ifidn %1, sse2
+%if mmsize == 16
     sub         rsp, 8*11
 %else
     sub         rsp, 8*15
@@ -162,12 +162,10 @@ cglobal vp6_filter_diag4_%1, 5, 7, %2
     RET
 %endmacro
 
-INIT_MMX
-%define DIAG4      DIAG4_MMX
-%define SPLAT4REGS SPLAT4REGS_MMX
-vp6_filter_diag4 mmx,  0
+%if ARCH_X86_32
+INIT_MMX mmx
+vp6_filter_diag4
+%endif
 
-INIT_XMM
-%define DIAG4      DIAG4_SSE2
-%define SPLAT4REGS SPLAT4REGS_SSE2
-vp6_filter_diag4 sse2, 8
+INIT_XMM sse2
+vp6_filter_diag4
diff --git a/libavcodec/x86/vp56dsp_init.c b/libavcodec/x86/vp56dsp_init.c
index 87fc935315..9108aa446d 100644
--- a/libavcodec/x86/vp56dsp_init.c
+++ b/libavcodec/x86/vp56dsp_init.c
@@ -36,9 +36,11 @@ av_cold void ff_vp56dsp_init_x86(VP56DSPContext* c, enum CodecID codec)
     int mm_flags = av_get_cpu_flags();
 
     if (CONFIG_VP6_DECODER && codec == CODEC_ID_VP6) {
+#if ARCH_X86_32
         if (mm_flags & AV_CPU_FLAG_MMX) {
             c->vp6_filter_diag4 = ff_vp6_filter_diag4_mmx;
         }
+#endif
 
         if (mm_flags & AV_CPU_FLAG_SSE2) {
             c->vp6_filter_diag4 = ff_vp6_filter_diag4_sse2;
diff --git a/libavdevice/Makefile b/libavdevice/Makefile
index 8f6f843349..efffa8bbcd 100644
--- a/libavdevice/Makefile
+++ b/libavdevice/Makefile
@@ -35,7 +35,7 @@ OBJS-$(CONFIG_SNDIO_OUTDEV)              += sndio_common.o sndio_enc.o
 OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o timefilter.o
 OBJS-$(CONFIG_V4L_INDEV)                 += v4l.o
 OBJS-$(CONFIG_VFWCAP_INDEV)              += vfwcap.o
-OBJS-$(CONFIG_X11_GRAB_DEVICE_INDEV)     += x11grab.o
+OBJS-$(CONFIG_X11GRAB_INDEV)             += x11grab.o
 
 # external libraries
 OBJS-$(CONFIG_LIBCDIO_INDEV)             += libcdio.o
diff --git a/libavdevice/alldevices.c b/libavdevice/alldevices.c
index 092e6c547c..7789058bb1 100644
--- a/libavdevice/alldevices.c
+++ b/libavdevice/alldevices.c
@@ -55,7 +55,7 @@ void avdevice_register_all(void)
     REGISTER_INDEV    (V4L2, v4l2);
 //    REGISTER_INDEV    (V4L, v4l
     REGISTER_INDEV    (VFWCAP, vfwcap);
-    REGISTER_INDEV    (X11_GRAB_DEVICE, x11_grab_device);
+    REGISTER_INDEV    (X11GRAB, x11grab);
 
     /* external libraries */
     REGISTER_INDEV    (LIBCDIO, libcdio);
diff --git a/libavdevice/avdevice.h b/libavdevice/avdevice.h
index 1c96dab144..93a044f270 100644
--- a/libavdevice/avdevice.h
+++ b/libavdevice/avdevice.h
@@ -36,7 +36,7 @@
  * (de)muxers in libavdevice are of the AVFMT_NOFILE type (they use their own
  * I/O functions). The filename passed to avformat_open_input() often does not
  * refer to an actually existing file, but has some special device-specific
- * meaning - e.g. for the x11grab device it is the display name.
+ * meaning - e.g. for x11grab it is the display name.
  *
  * To use libavdevice, simply call avdevice_register_all() to register all
  * compiled muxers and demuxers. They all use standard libavformat API.
diff --git a/libavdevice/fbdev.c b/libavdevice/fbdev.c
index 40eed68ea2..93f7ce3763 100644
--- a/libavdevice/fbdev.c
+++ b/libavdevice/fbdev.c
@@ -39,6 +39,7 @@
 #include "libavutil/log.h"
 #include "libavutil/mem.h"
 #include "libavutil/opt.h"
+#include "libavutil/time.h"
 #include "libavutil/parseutils.h"
 #include "libavutil/pixdesc.h"
 #include "avdevice.h"
diff --git a/libavdevice/jack_audio.c b/libavdevice/jack_audio.c
index 33ee19ce73..c9ef23e68a 100644
--- a/libavdevice/jack_audio.c
+++ b/libavdevice/jack_audio.c
@@ -27,6 +27,7 @@
 #include "libavutil/log.h"
 #include "libavutil/fifo.h"
 #include "libavutil/opt.h"
+#include "libavutil/time.h"
 #include "libavcodec/avcodec.h"
 #include "libavformat/avformat.h"
 #include "libavformat/internal.h"
diff --git a/libavdevice/oss_audio.c b/libavdevice/oss_audio.c
index 53acba3181..531d0b6225 100644
--- a/libavdevice/oss_audio.c
+++ b/libavdevice/oss_audio.c
@@ -36,6 +36,7 @@
 
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
+#include "libavutil/time.h"
 #include "libavcodec/avcodec.h"
 #include "avdevice.h"
 #include "libavformat/internal.h"
diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index cc9268ec9a..6202bbc3f4 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -41,6 +41,7 @@
 #include "libavutil/log.h"
 #include "libavutil/opt.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/time.h"
 #include <time.h>
 #include <X11/X.h>
 #include <X11/Xlib.h>
@@ -56,8 +57,7 @@
 /**
  * X11 Device Demuxer context
  */
-struct x11_grab
-{
+struct x11grab {
     const AVClass *class;    /**< Class for private options. */
     int frame_size;          /**< Size in bytes of a grabbed frame */
     AVRational time_base;    /**< Time base */
@@ -84,10 +84,10 @@ struct x11_grab
 /**
  * Draw grabbing region window
  *
- * @param s x11_grab context
+ * @param s x11grab context
  */
 static void
-x11grab_draw_region_win(struct x11_grab *s)
+x11grab_draw_region_win(struct x11grab *s)
 {
     Display *dpy = s->dpy;
     int screen;
@@ -109,10 +109,10 @@ x11grab_draw_region_win(struct x11_grab *s)
 /**
  * Initialize grabbing region window
  *
- * @param s x11_grab context
+ * @param s x11grab context
  */
 static void
-x11grab_region_win_init(struct x11_grab *s)
+x11grab_region_win_init(struct x11grab *s)
 {
     Display *dpy = s->dpy;
     int screen;
@@ -154,7 +154,7 @@ x11grab_region_win_init(struct x11_grab *s)
 static int
 x11grab_read_header(AVFormatContext *s1)
 {
-    struct x11_grab *x11grab = s1->priv_data;
+    struct x11grab *x11grab = s1->priv_data;
     Display *dpy;
     AVStream *st = NULL;
     enum PixelFormat input_pixfmt;
@@ -330,7 +330,7 @@ out:
  *          coordinates
  */
 static void
-paint_mouse_pointer(XImage *image, struct x11_grab *s)
+paint_mouse_pointer(XImage *image, struct x11grab *s)
 {
     int x_off = s->x_off;
     int y_off = s->y_off;
@@ -444,7 +444,7 @@ xget_zpixmap(Display *dpy, Drawable d, XImage *image, int x, int y)
 static int
 x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
 {
-    struct x11_grab *s = s1->priv_data;
+    struct x11grab *s = s1->priv_data;
     Display *dpy = s->dpy;
     XImage *image = s->image;
     int x_off = s->x_off;
@@ -554,7 +554,7 @@ x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
 static int
 x11grab_read_close(AVFormatContext *s1)
 {
-    struct x11_grab *x11grab = s1->priv_data;
+    struct x11grab *x11grab = s1->priv_data;
 
     /* Detach cleanly from shared mem */
     if (x11grab->use_shm) {
@@ -578,7 +578,7 @@ x11grab_read_close(AVFormatContext *s1)
     return 0;
 }
 
-#define OFFSET(x) offsetof(struct x11_grab, x)
+#define OFFSET(x) offsetof(struct x11grab, x)
 #define DEC AV_OPT_FLAG_DECODING_PARAM
 static const AVOption options[] = {
     { "video_size", "A string describing frame size, such as 640x480 or hd720.", OFFSET(width), AV_OPT_TYPE_IMAGE_SIZE, {.str = "vga"}, 0, 0, DEC },
@@ -599,10 +599,10 @@ static const AVClass x11_class = {
 };
 
 /** x11 grabber device demuxer declaration */
-AVInputFormat ff_x11_grab_device_demuxer = {
+AVInputFormat ff_x11grab_demuxer = {
     .name           = "x11grab",
     .long_name      = NULL_IF_CONFIG_SMALL("X11grab"),
-    .priv_data_size = sizeof(struct x11_grab),
+    .priv_data_size = sizeof(struct x11grab),
     .read_header    = x11grab_read_header,
     .read_packet    = x11grab_read_packet,
     .read_close     = x11grab_read_close,
diff --git a/libavfilter/version.h b/libavfilter/version.h
index 7d9b378863..6b5fa89ed9 100644
--- a/libavfilter/version.h
+++ b/libavfilter/version.h
@@ -29,7 +29,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVFILTER_VERSION_MAJOR  3
-#define LIBAVFILTER_VERSION_MINOR  4
+#define LIBAVFILTER_VERSION_MINOR  5
 #define LIBAVFILTER_VERSION_MICRO 100
 
 #define LIBAVFILTER_VERSION_INT AV_VERSION_INT(LIBAVFILTER_VERSION_MAJOR, \
diff --git a/libavformat/hls.c b/libavformat/hls.c
index 9d4a2bc39f..11b6ab1273 100644
--- a/libavformat/hls.c
+++ b/libavformat/hls.c
@@ -420,8 +420,6 @@ reload:
     ret = ffurl_read(v->input, buf, buf_size);
     if (ret > 0)
         return ret;
-    if (ret < 0 && ret != AVERROR_EOF)
-        return ret;
     ffurl_close(v->input);
     v->input = NULL;
     v->cur_seq_no++;
diff --git a/libavformat/mp3dec.c b/libavformat/mp3dec.c
index f6db77a8d2..3d52c3fe86 100644
--- a/libavformat/mp3dec.c
+++ b/libavformat/mp3dec.c
@@ -196,7 +196,6 @@ static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt)
     MP3Context *mp3 = s->priv_data;
     int ret, size;
     int64_t pos;
-    //    AVStream *st = s->streams[0];
 
     size= MP3_PACKET_SIZE;
     pos = avio_tell(s->pb);
@@ -204,15 +203,15 @@ static int mp3_read_packet(AVFormatContext *s, AVPacket *pkt)
         size= FFMIN(size, mp3->filesize - pos);
 
     ret= av_get_packet(s->pb, pkt, size);
-
-    pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
-    pkt->stream_index = 0;
     if (ret <= 0) {
         if(ret<0)
             return ret;
         return AVERROR_EOF;
     }
 
+    pkt->flags &= ~AV_PKT_FLAG_CORRUPT;
+    pkt->stream_index = 0;
+
     if (ret >= ID3v1_TAG_SIZE &&
         memcmp(&pkt->data[ret - ID3v1_TAG_SIZE], "TAG", 3) == 0)
         ret -= ID3v1_TAG_SIZE;
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index 3a272e33c7..989695ee78 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -21,6 +21,7 @@
 
 #include "libavutil/mathematics.h"
 #include "libavutil/avstring.h"
+#include "libavutil/time.h"
 #include "libavcodec/get_bits.h"
 #include "avformat.h"
 #include "mpegts.h"
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index c00ff99686..cbcda2e71d 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -27,6 +27,7 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/dict.h"
 #include "libavutil/opt.h"
+#include "libavutil/time.h"
 #include "avformat.h"
 #include "avio_internal.h"
 
diff --git a/libavformat/rtspdec.c b/libavformat/rtspdec.c
index d34da53ded..f642a56644 100644
--- a/libavformat/rtspdec.c
+++ b/libavformat/rtspdec.c
@@ -23,6 +23,7 @@
 #include "libavutil/intreadwrite.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/random_seed.h"
+#include "libavutil/time.h"
 #include "avformat.h"
 
 #include "internal.h"
diff --git a/libavformat/rtspenc.c b/libavformat/rtspenc.c
index ece879eee1..927dd38d56 100644
--- a/libavformat/rtspenc.c
+++ b/libavformat/rtspenc.c
@@ -31,6 +31,7 @@
 #include "avio_internal.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/avstring.h"
+#include "libavutil/time.h"
 #include "url.h"
 
 #define SDP_MAX_SIZE 16384
diff --git a/libavformat/sapenc.c b/libavformat/sapenc.c
index c1923f31c6..c1d8db3761 100644
--- a/libavformat/sapenc.c
+++ b/libavformat/sapenc.c
@@ -24,6 +24,7 @@
 #include "libavutil/random_seed.h"
 #include "libavutil/avstring.h"
 #include "libavutil/intreadwrite.h"
+#include "libavutil/time.h"
 #include "internal.h"
 #include "network.h"
 #include "os_support.h"
diff --git a/libavformat/tls.c b/libavformat/tls.c
index 908bd505aa..38dd70c9df 100644
--- a/libavformat/tls.c
+++ b/libavformat/tls.c
@@ -266,7 +266,7 @@ static int tls_read(URLContext *h, uint8_t *buf, int size)
         if (ret > 0)
             return ret;
         if (ret == 0)
-            return AVERROR(EIO);
+            return AVERROR_EOF;
         if ((ret = do_tls_poll(h, ret)) < 0)
             return ret;
     }
@@ -281,7 +281,7 @@ static int tls_write(URLContext *h, const uint8_t *buf, int size)
         if (ret > 0)
             return ret;
         if (ret == 0)
-            return AVERROR(EIO);
+            return AVERROR_EOF;
         if ((ret = do_tls_poll(h, ret)) < 0)
             return ret;
     }
diff --git a/libavformat/utils.c b/libavformat/utils.c
index b753598950..eda5e28721 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -37,6 +37,7 @@
 #include "libavutil/avstring.h"
 #include "libavutil/mathematics.h"
 #include "libavutil/parseutils.h"
+#include "libavutil/time.h"
 #include "libavutil/timestamp.h"
 #include "riff.h"
 #include "audiointerleave.h"
@@ -2765,9 +2766,7 @@ int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
             if (tb_unreliable(st->codec) && st->info->duration_count > 15 && st->info->duration_gcd > FFMAX(1, st->time_base.den/(500LL*st->time_base.num)) && !st->r_frame_rate.num)
                 av_reduce(&st->r_frame_rate.num, &st->r_frame_rate.den, st->time_base.den, st->time_base.num * st->info->duration_gcd, INT_MAX);
             if (st->info->duration_count && !st->r_frame_rate.num
-               && tb_unreliable(st->codec) /*&&
-               //FIXME we should not special-case MPEG-2, but this needs testing with non-MPEG-2 ...
-               st->time_base.num*duration_sum[i]/st->info->duration_count*101LL > st->time_base.den*/){
+                && tb_unreliable(st->codec)) {
                 int num = 0;
                 double best_error= 0.01;
 
diff --git a/libavformat/wv.c b/libavformat/wv.c
index 767f4f48ef..a80cad776d 100644
--- a/libavformat/wv.c
+++ b/libavformat/wv.c
@@ -90,17 +90,17 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
     if(!append){
         tag = avio_rl32(pb);
         if (tag != MKTAG('w', 'v', 'p', 'k'))
-            return -1;
+            return AVERROR_INVALIDDATA;
         size = avio_rl32(pb);
         if(size < 24 || size > WV_BLOCK_LIMIT){
             av_log(ctx, AV_LOG_ERROR, "Incorrect block size %i\n", size);
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         wc->blksize = size;
         ver = avio_rl16(pb);
         if(ver < 0x402 || ver > 0x410){
             av_log(ctx, AV_LOG_ERROR, "Unsupported version %03X\n", ver);
-            return -1;
+            return AVERROR_PATCHWELCOME;
         }
         avio_r8(pb); // track no
         avio_r8(pb); // track sub index
@@ -128,7 +128,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
         int64_t block_end = avio_tell(pb) + wc->blksize - 24;
         if(!pb->seekable){
             av_log(ctx, AV_LOG_ERROR, "Cannot determine additional parameters\n");
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         while(avio_tell(pb) < block_end){
             int id, size;
@@ -141,7 +141,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
             case 0xD:
                 if(size <= 1){
                     av_log(ctx, AV_LOG_ERROR, "Insufficient channel information\n");
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
                 chan = avio_r8(pb);
                 switch(size - 2){
@@ -164,7 +164,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
                     break;
                 default:
                     av_log(ctx, AV_LOG_ERROR, "Invalid channel info size %d\n", size);
-                    return -1;
+                    return AVERROR_INVALIDDATA;
                 }
                 break;
             case 0x27:
@@ -178,7 +178,7 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
         }
         if(rate == -1){
             av_log(ctx, AV_LOG_ERROR, "Cannot determine custom sampling rate\n");
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         avio_seek(pb, block_end - wc->blksize + 24, SEEK_SET);
     }
@@ -189,15 +189,15 @@ static int wv_read_block_header(AVFormatContext *ctx, AVIOContext *pb, int appen
 
     if(wc->flags && bpp != wc->bpp){
         av_log(ctx, AV_LOG_ERROR, "Bits per sample differ, this block: %i, header block: %i\n", bpp, wc->bpp);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     if(wc->flags && !wc->multichannel && chan != wc->chan){
         av_log(ctx, AV_LOG_ERROR, "Channels differ, this block: %i, header block: %i\n", chan, wc->chan);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     if(wc->flags && rate != -1 && rate != wc->rate){
         av_log(ctx, AV_LOG_ERROR, "Sampling rate differ, this block: %i, header block: %i\n", rate, wc->rate);
-        return -1;
+        return AVERROR_INVALIDDATA;
     }
     wc->blksize = size - 24;
     return 0;
@@ -208,11 +208,12 @@ static int wv_read_header(AVFormatContext *s)
     AVIOContext *pb = s->pb;
     WVContext *wc = s->priv_data;
     AVStream *st;
+    int ret;
 
     wc->block_parsed = 0;
     for(;;){
-        if(wv_read_block_header(s, pb, 0) < 0)
-            return -1;
+        if ((ret = wv_read_block_header(s, pb, 0)) < 0)
+            return ret;
         if(!AV_RN32(wc->extra))
             avio_skip(pb, wc->blksize - 24);
         else
@@ -222,7 +223,7 @@ static int wv_read_header(AVFormatContext *s)
     /* now we are ready: build format streams */
     st = avformat_new_stream(s, NULL);
     if (!st)
-        return -1;
+        return AVERROR(ENOMEM);
     st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
     st->codec->codec_id = CODEC_ID_WAVPACK;
     st->codec->channels = wc->chan;
@@ -254,10 +255,10 @@ static int wv_read_packet(AVFormatContext *s,
     uint32_t block_samples;
 
     if (url_feof(s->pb))
-        return AVERROR(EIO);
+        return AVERROR_EOF;
     if(wc->block_parsed){
-        if(wv_read_block_header(s, s->pb, 0) < 0)
-            return -1;
+        if ((ret = wv_read_block_header(s, s->pb, 0)) < 0)
+            return ret;
     }
 
     pos = wc->pos;
@@ -275,7 +276,7 @@ static int wv_read_packet(AVFormatContext *s,
     while(!(wc->flags & WV_END_BLOCK)){
         if(avio_rl32(s->pb) != MKTAG('w', 'v', 'p', 'k')){
             av_free_packet(pkt);
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         if((ret = av_append_packet(s->pb, pkt, 4)) < 0){
             av_free_packet(pkt);
@@ -285,14 +286,14 @@ static int wv_read_packet(AVFormatContext *s,
         if(size < 24 || size > WV_BLOCK_LIMIT){
             av_free_packet(pkt);
             av_log(s, AV_LOG_ERROR, "Incorrect block size %d\n", size);
-            return -1;
+            return AVERROR_INVALIDDATA;
         }
         wc->blksize = size;
         ver = avio_rl16(s->pb);
         if(ver < 0x402 || ver > 0x410){
             av_free_packet(pkt);
             av_log(s, AV_LOG_ERROR, "Unsupported version %03X\n", ver);
-            return -1;
+            return AVERROR_PATCHWELCOME;
         }
         avio_r8(s->pb); // track no
         avio_r8(s->pb); // track sub index
@@ -304,9 +305,9 @@ static int wv_read_packet(AVFormatContext *s,
         }
         memcpy(wc->extra, pkt->data + pkt->size - WV_EXTRA_SIZE, WV_EXTRA_SIZE);
 
-        if(wv_read_block_header(s, s->pb, 1) < 0){
+        if ((ret = wv_read_block_header(s, s->pb, 1)) < 0){
             av_free_packet(pkt);
-            return -1;
+            return ret;
         }
         ret = av_append_packet(s->pb, pkt, wc->blksize);
         if(ret < 0){
@@ -345,14 +346,14 @@ static int wv_read_seek(AVFormatContext *s, int stream_index, int64_t timestamp,
     }
     /* if timestamp is out of bounds, return error */
     if(timestamp < 0 || timestamp >= s->duration)
-        return -1;
+        return AVERROR(EINVAL);
 
     pos = avio_tell(s->pb);
     do{
         ret = av_read_frame(s, pkt);
         if (ret < 0){
             avio_seek(s->pb, pos, SEEK_SET);
-            return -1;
+            return ret;
         }
         pts = pkt->pts;
         av_free_packet(pkt);
diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm
index c80e0a1c1a..95d707bd7b 100644
--- a/libavutil/x86/x86inc.asm
+++ b/libavutil/x86/x86inc.asm
@@ -36,8 +36,8 @@
 
 %define program_name ff
 
-%define UNIX64 0
 %define WIN64  0
+%define UNIX64 0
 %if ARCH_X86_64
     %ifidn __OUTPUT_FORMAT__,win32
         %define WIN64  1
@@ -54,11 +54,6 @@
     %define mangle(x) x
 %endif
 
-; FIXME: All of the 64bit asm functions that take a stride as an argument
-; via register, assume that the high dword of that register is filled with 0.
-; This is true in practice (since we never do any 64bit arithmetic on strides,
-; and x264's strides are all positive), but is not guaranteed by the ABI.
-
 ; Name of the .rodata section.
 %macro SECTION_RODATA 0-1 16
     ; Kludge: Something on OS X fails to align .rodata even given an align
@@ -152,34 +147,38 @@ CPU amdnop
 ; registers:
 ; rN and rNq are the native-size register holding function argument N
 ; rNd, rNw, rNb are dword, word, and byte size
+; rNh is the high 8 bits of the word size
 ; rNm is the original location of arg N (a register or on the stack), dword
 ; rNmp is native size
 
-%macro DECLARE_REG 5-6
+%macro DECLARE_REG 2-3
     %define r%1q %2
-    %define r%1d %3
-    %define r%1w %4
-    %define r%1b %5
-    %if %0 == 5
-        %define r%1m  %3
+    %define r%1d %2d
+    %define r%1w %2w
+    %define r%1b %2b
+    %define r%1h %2h
+    %if %0 == 2
+        %define r%1m  %2d
         %define r%1mp %2
     %elif ARCH_X86_64 ; memory
-        %define r%1m [rsp + stack_offset + %6]
+        %define r%1m [rsp + stack_offset + %3]
         %define r%1mp qword r %+ %1 %+ m
     %else
-        %define r%1m [esp + stack_offset + %6]
+        %define r%1m [esp + stack_offset + %3]
         %define r%1mp dword r %+ %1 %+ m
     %endif
     %define r%1  %2
 %endmacro
 
-%macro DECLARE_REG_SIZE 2
+%macro DECLARE_REG_SIZE 3
     %define r%1q r%1
     %define e%1q r%1
     %define r%1d e%1
     %define e%1d e%1
     %define r%1w %1
     %define e%1w %1
+    %define r%1h %3
+    %define e%1h %3
     %define r%1b %2
     %define e%1b %2
 %if ARCH_X86_64 == 0
@@ -187,13 +186,13 @@ CPU amdnop
 %endif
 %endmacro
 
-DECLARE_REG_SIZE ax, al
-DECLARE_REG_SIZE bx, bl
-DECLARE_REG_SIZE cx, cl
-DECLARE_REG_SIZE dx, dl
-DECLARE_REG_SIZE si, sil
-DECLARE_REG_SIZE di, dil
-DECLARE_REG_SIZE bp, bpl
+DECLARE_REG_SIZE ax, al, ah
+DECLARE_REG_SIZE bx, bl, bh
+DECLARE_REG_SIZE cx, cl, ch
+DECLARE_REG_SIZE dx, dl, dh
+DECLARE_REG_SIZE si, sil, null
+DECLARE_REG_SIZE di, dil, null
+DECLARE_REG_SIZE bp, bpl, null
 
 ; t# defines for when per-arch register allocation is more complex than just function arguments
 
@@ -211,6 +210,7 @@ DECLARE_REG_SIZE bp, bpl
         %define t%1q t%1 %+ q
         %define t%1d t%1 %+ d
         %define t%1w t%1 %+ w
+        %define t%1h t%1 %+ h
         %define t%1b t%1 %+ b
         %rotate 1
     %endrep
@@ -300,6 +300,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
             CAT_UNDEF arg_name %+ %%i, q
             CAT_UNDEF arg_name %+ %%i, d
             CAT_UNDEF arg_name %+ %%i, w
+            CAT_UNDEF arg_name %+ %%i, h
             CAT_UNDEF arg_name %+ %%i, b
             CAT_UNDEF arg_name %+ %%i, m
             CAT_UNDEF arg_name %+ %%i, mp
@@ -315,6 +316,7 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
         %xdefine %1q r %+ %%i %+ q
         %xdefine %1d r %+ %%i %+ d
         %xdefine %1w r %+ %%i %+ w
+        %xdefine %1h r %+ %%i %+ h
         %xdefine %1b r %+ %%i %+ b
         %xdefine %1m r %+ %%i %+ m
         %xdefine %1mp r %+ %%i %+ mp
@@ -328,21 +330,21 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
 
 %if WIN64 ; Windows x64 ;=================================================
 
-DECLARE_REG 0,  rcx, ecx,  cx,   cl
-DECLARE_REG 1,  rdx, edx,  dx,   dl
-DECLARE_REG 2,  R8,  R8D,  R8W,  R8B
-DECLARE_REG 3,  R9,  R9D,  R9W,  R9B
-DECLARE_REG 4,  R10, R10D, R10W, R10B, 40
-DECLARE_REG 5,  R11, R11D, R11W, R11B, 48
-DECLARE_REG 6,  rax, eax,  ax,   al,   56
-DECLARE_REG 7,  rdi, edi,  di,   dil,  64
-DECLARE_REG 8,  rsi, esi,  si,   sil,  72
-DECLARE_REG 9,  rbx, ebx,  bx,   bl,   80
-DECLARE_REG 10, rbp, ebp,  bp,   bpl,  88
-DECLARE_REG 11, R12, R12D, R12W, R12B, 96
-DECLARE_REG 12, R13, R13D, R13W, R13B, 104
-DECLARE_REG 13, R14, R14D, R14W, R14B, 112
-DECLARE_REG 14, R15, R15D, R15W, R15B, 120
+DECLARE_REG 0,  rcx
+DECLARE_REG 1,  rdx
+DECLARE_REG 2,  R8
+DECLARE_REG 3,  R9
+DECLARE_REG 4,  R10, 40
+DECLARE_REG 5,  R11, 48
+DECLARE_REG 6,  rax, 56
+DECLARE_REG 7,  rdi, 64
+DECLARE_REG 8,  rsi, 72
+DECLARE_REG 9,  rbx, 80
+DECLARE_REG 10, rbp, 88
+DECLARE_REG 11, R12, 96
+DECLARE_REG 12, R13, 104
+DECLARE_REG 13, R14, 112
+DECLARE_REG 14, R15, 120
 
 %macro PROLOGUE 2-4+ 0 ; #args, #regs, #xmm_regs, arg_names...
     %assign num_args %1
@@ -389,6 +391,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
     %assign xmm_regs_used 0
 %endmacro
 
+%define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
+
 %macro RET 0
     WIN64_RESTORE_XMM_INTERNAL rsp
     POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7
@@ -398,31 +402,23 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120
     ret
 %endmacro
 
-%macro REP_RET 0
-    %if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32
-        RET
-    %else
-        rep ret
-    %endif
-%endmacro
-
 %elif ARCH_X86_64 ; *nix x64 ;=============================================
 
-DECLARE_REG 0,  rdi, edi,  di,   dil
-DECLARE_REG 1,  rsi, esi,  si,   sil
-DECLARE_REG 2,  rdx, edx,  dx,   dl
-DECLARE_REG 3,  rcx, ecx,  cx,   cl
-DECLARE_REG 4,  R8,  R8D,  R8W,  R8B
-DECLARE_REG 5,  R9,  R9D,  R9W,  R9B
-DECLARE_REG 6,  rax, eax,  ax,   al,   8
-DECLARE_REG 7,  R10, R10D, R10W, R10B, 16
-DECLARE_REG 8,  R11, R11D, R11W, R11B, 24
-DECLARE_REG 9,  rbx, ebx,  bx,   bl,   32
-DECLARE_REG 10, rbp, ebp,  bp,   bpl,  40
-DECLARE_REG 11, R12, R12D, R12W, R12B, 48
-DECLARE_REG 12, R13, R13D, R13W, R13B, 56
-DECLARE_REG 13, R14, R14D, R14W, R14B, 64
-DECLARE_REG 14, R15, R15D, R15W, R15B, 72
+DECLARE_REG 0,  rdi
+DECLARE_REG 1,  rsi
+DECLARE_REG 2,  rdx
+DECLARE_REG 3,  rcx
+DECLARE_REG 4,  R8
+DECLARE_REG 5,  R9
+DECLARE_REG 6,  rax, 8
+DECLARE_REG 7,  R10, 16
+DECLARE_REG 8,  R11, 24
+DECLARE_REG 9,  rbx, 32
+DECLARE_REG 10, rbp, 40
+DECLARE_REG 11, R12, 48
+DECLARE_REG 12, R13, 56
+DECLARE_REG 13, R14, 64
+DECLARE_REG 14, R15, 72
 
 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
     %assign num_args %1
@@ -434,6 +430,8 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
     DEFINE_ARGS %4
 %endmacro
 
+%define has_epilogue regs_used > 9 || mmsize == 32
+
 %macro RET 0
     POP_IF_USED 14, 13, 12, 11, 10, 9
 %if mmsize == 32
@@ -442,23 +440,15 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72
     ret
 %endmacro
 
-%macro REP_RET 0
-    %if regs_used > 9 || mmsize == 32
-        RET
-    %else
-        rep ret
-    %endif
-%endmacro
-
 %else ; X86_32 ;==============================================================
 
-DECLARE_REG 0, eax, eax, ax, al,   4
-DECLARE_REG 1, ecx, ecx, cx, cl,   8
-DECLARE_REG 2, edx, edx, dx, dl,   12
-DECLARE_REG 3, ebx, ebx, bx, bl,   16
-DECLARE_REG 4, esi, esi, si, null, 20
-DECLARE_REG 5, edi, edi, di, null, 24
-DECLARE_REG 6, ebp, ebp, bp, null, 28
+DECLARE_REG 0, eax, 4
+DECLARE_REG 1, ecx, 8
+DECLARE_REG 2, edx, 12
+DECLARE_REG 3, ebx, 16
+DECLARE_REG 4, esi, 20
+DECLARE_REG 5, edi, 24
+DECLARE_REG 6, ebp, 28
 %define rsp esp
 
 %macro DECLARE_ARG 1-*
@@ -474,6 +464,9 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
 %macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
     %assign num_args %1
     %assign regs_used %2
+    %if num_args > 7
+        %assign num_args 7
+    %endif
     %if regs_used > 7
         %assign regs_used 7
     %endif
@@ -483,6 +476,8 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
     DEFINE_ARGS %4
 %endmacro
 
+%define has_epilogue regs_used > 3 || mmsize == 32
+
 %macro RET 0
     POP_IF_USED 6, 5, 4, 3
 %if mmsize == 32
@@ -491,14 +486,6 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
     ret
 %endmacro
 
-%macro REP_RET 0
-    %if regs_used > 3 || mmsize == 32
-        RET
-    %else
-        rep ret
-    %endif
-%endmacro
-
 %endif ;======================================================================
 
 %if WIN64 == 0
@@ -508,6 +495,23 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
 %endmacro
 %endif
 
+%macro REP_RET 0
+    %if has_epilogue
+        RET
+    %else
+        rep ret
+    %endif
+%endmacro
+
+%macro TAIL_CALL 2 ; callee, is_nonadjacent
+    %if has_epilogue
+        call %1
+        RET
+    %elif %2
+        jmp %1
+    %endif
+%endmacro
+
 ;=============================================================================
 ; arch-independent part
 ;=============================================================================
@@ -597,6 +601,8 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
 %assign cpuflags_avx      (1<<11)| cpuflags_sse42
 %assign cpuflags_xop      (1<<12)| cpuflags_avx
 %assign cpuflags_fma4     (1<<13)| cpuflags_avx
+%assign cpuflags_avx2     (1<<14)| cpuflags_avx
+%assign cpuflags_fma3     (1<<15)| cpuflags_avx
 
 %assign cpuflags_cache32  (1<<16)
 %assign cpuflags_cache64  (1<<17)
@@ -605,6 +611,9 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits
 %assign cpuflags_misalign (1<<20)
 %assign cpuflags_aligned  (1<<21) ; not a cpu feature, but a function variant
 %assign cpuflags_atom     (1<<22)
+%assign cpuflags_bmi1     (1<<23)
+%assign cpuflags_bmi2     (1<<24)|cpuflags_bmi1
+%assign cpuflags_tbm      (1<<25)|cpuflags_bmi1
 
 %define    cpuflag(x) ((cpuflags & (cpuflags_ %+ x)) == (cpuflags_ %+ x))
 %define notcpuflag(x) ((cpuflags & (cpuflags_ %+ x)) != (cpuflags_ %+ x))
@@ -875,25 +884,38 @@ INIT_XMM
 %endrep
 %undef i
 
+%macro CHECK_AVX_INSTR_EMU 3-*
+    %xdefine %%opcode %1
+    %xdefine %%dst %2
+    %rep %0-2
+        %ifidn %%dst, %3
+            %error non-avx emulation of ``%%opcode'' is not supported
+        %endif
+        %rotate 1
+    %endrep
+%endmacro
+
 ;%1 == instruction
 ;%2 == 1 if float, 0 if int
 ;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
 ;%4 == number of operands given
 ;%5+: operands
 %macro RUN_AVX_INSTR 6-7+
-    %ifid %5
-        %define %%size sizeof%5
+    %ifid %6
+        %define %%sizeofreg sizeof%6
+    %elifid %5
+        %define %%sizeofreg sizeof%5
     %else
-        %define %%size mmsize
+        %define %%sizeofreg mmsize
     %endif
-    %if %%size==32
-        %if %0 >= 7
+    %if %%sizeofreg==32
+        %if %4>=3
             v%1 %5, %6, %7
         %else
             v%1 %5, %6
         %endif
     %else
-        %if %%size==8
+        %if %%sizeofreg==8
             %define %%regmov movq
         %elif %2
             %define %%regmov movaps
@@ -903,16 +925,17 @@ INIT_XMM
 
         %if %4>=3+%3
             %ifnidn %5, %6
-                %if avx_enabled && sizeof%5==16
+                %if avx_enabled && %%sizeofreg==16
                     v%1 %5, %6, %7
                 %else
+                    CHECK_AVX_INSTR_EMU {%1 %5, %6, %7}, %5, %7
                     %%regmov %5, %6
                     %1 %5, %7
                 %endif
             %else
                 %1 %5, %7
             %endif
-        %elif %3
+        %elif %4>=3
             %1 %5, %6, %7
         %else
             %1 %5, %6
@@ -943,7 +966,7 @@ INIT_XMM
 
 ;%1 == instruction
 ;%2 == 1 if float, 0 if int
-;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 3-operand (xmm, xmm, xmm)
+;%3 == 1 if 4-operand (xmm, xmm, xmm, imm), 0 if 2- or 3-operand (xmm, xmm, xmm)
 ;%4 == 1 if symmetric (i.e. doesn't matter which src arg is which), 0 if not
 %macro AVX_INSTR 4
     %macro %1 2-9 fnord, fnord, fnord, %1, %2, %3, %4
@@ -1008,6 +1031,9 @@ AVX_INSTR mulsd, 1, 0, 1
 AVX_INSTR mulss, 1, 0, 1
 AVX_INSTR orpd, 1, 0, 1
 AVX_INSTR orps, 1, 0, 1
+AVX_INSTR pabsb, 0, 0, 0
+AVX_INSTR pabsw, 0, 0, 0
+AVX_INSTR pabsd, 0, 0, 0
 AVX_INSTR packsswb, 0, 0, 0
 AVX_INSTR packssdw, 0, 0, 0
 AVX_INSTR packuswb, 0, 0, 0
@@ -1059,6 +1085,7 @@ AVX_INSTR pminsd, 0, 0, 1
 AVX_INSTR pminub, 0, 0, 1
 AVX_INSTR pminuw, 0, 0, 1
 AVX_INSTR pminud, 0, 0, 1
+AVX_INSTR pmovmskb, 0, 0, 0
 AVX_INSTR pmulhuw, 0, 0, 1
 AVX_INSTR pmulhrsw, 0, 0, 1
 AVX_INSTR pmulhw, 0, 0, 1
@@ -1069,6 +1096,9 @@ AVX_INSTR pmuldq, 0, 0, 1
 AVX_INSTR por, 0, 0, 1
 AVX_INSTR psadbw, 0, 0, 1
 AVX_INSTR pshufb, 0, 0, 0
+AVX_INSTR pshufd, 0, 1, 0
+AVX_INSTR pshufhw, 0, 1, 0
+AVX_INSTR pshuflw, 0, 1, 0
 AVX_INSTR psignb, 0, 0, 0
 AVX_INSTR psignw, 0, 0, 0
 AVX_INSTR psignd, 0, 0, 0
@@ -1090,6 +1120,7 @@ AVX_INSTR psubsb, 0, 0, 0
 AVX_INSTR psubsw, 0, 0, 0
 AVX_INSTR psubusb, 0, 0, 0
 AVX_INSTR psubusw, 0, 0, 0
+AVX_INSTR ptest, 0, 0, 0
 AVX_INSTR punpckhbw, 0, 0, 0
 AVX_INSTR punpckhwd, 0, 0, 0
 AVX_INSTR punpckhdq, 0, 0, 0
@@ -1154,3 +1185,7 @@ FMA_INSTR  fmaddps,   mulps, addps
 FMA_INSTR  pmacsdd,  pmulld, paddd
 FMA_INSTR  pmacsww,  pmullw, paddw
 FMA_INSTR pmadcswd, pmaddwd, paddd
+
+; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
+; This lets us use tzcnt without bumping the yasm version requirement yet.
+%define tzcnt rep bsf
diff --git a/library.mak b/library.mak
index 53f1ee7741..5cc3252ddd 100644
--- a/library.mak
+++ b/library.mak
@@ -17,7 +17,7 @@ $(SUBDIR)%-test.o: $(SUBDIR)%.c
 	$(COMPILE_C)
 
 $(SUBDIR)x86/%.o: $(SUBDIR)x86/%.asm
-	$(YASMDEP) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
+	$(DEPYASM) $(YASMFLAGS) -I $(<D)/ -M -o $@ $< > $(@:.o=.d)
 	$(YASM) $(YASMFLAGS) -I $(<D)/ -o $@ $<
 
 $(OBJS) $(OBJS:.o=.s) $(SUBDIR)%.ho $(TESTOBJS): CPPFLAGS += -DHAVE_AV_CONFIG_H
diff --git a/tests/audiogen.c b/tests/audiogen.c
index 5818797a65..07f0be32eb 100644
--- a/tests/audiogen.c
+++ b/tests/audiogen.c
@@ -144,8 +144,8 @@ int main(int argc, char **argv)
     int nb_channels = 2;
     char *ext;
 
-    if (argc < 2 || argc > 4) {
-        printf("usage: %s file [<sample rate> [<channels>]]\n"
+    if (argc < 2 || argc > 5) {
+        printf("usage: %s file [<sample rate> [<channels>] [<random seed>]]\n"
                "generate a test raw 16 bit audio stream\n"
                "If the file extension is .wav a WAVE header will be added.\n"
                "default: 44100 Hz stereo\n", argv[0]);
@@ -168,6 +168,9 @@ int main(int argc, char **argv)
         }
     }
 
+    if (argc > 4)
+        seed = atoi(argv[4]);
+
     outfile = fopen(argv[1], "wb");
     if (!outfile) {
         perror(argv[1]);
diff --git a/tests/fate/filter.mak b/tests/fate/filter.mak
index cd309abc4d..5e4c1537b8 100644
--- a/tests/fate/filter.mak
+++ b/tests/fate/filter.mak
@@ -1,3 +1,25 @@
+FATE_AMIX += fate-filter-amix-simple
+fate-filter-amix-simple: CMD = ffmpeg -filter_complex amix -i $(SRC) -ss 3 -i $(SRC1) -f f32le -
+fate-filter-amix-simple: REF = $(SAMPLES)/filter/amix_simple.pcm
+
+FATE_AMIX += fate-filter-amix-first
+fate-filter-amix-first: CMD = ffmpeg -filter_complex amix=duration=first -ss 4 -i $(SRC) -i $(SRC1) -f f32le -
+fate-filter-amix-first: REF = $(SAMPLES)/filter/amix_first.pcm
+
+FATE_AMIX += fate-filter-amix-transition
+fate-filter-amix-transition: tests/data/asynth-44100-2-3.wav
+fate-filter-amix-transition: SRC2 = $(TARGET_PATH)/tests/data/asynth-44100-2-3.wav
+fate-filter-amix-transition: CMD = ffmpeg -filter_complex amix=inputs=3:dropout_transition=0.5 -i $(SRC) -ss 2 -i $(SRC1) -ss 4 -i $(SRC2) -f f32le -
+fate-filter-amix-transition: REF = $(SAMPLES)/filter/amix_transition.pcm
+
+$(FATE_AMIX): tests/data/asynth-44100-2.wav tests/data/asynth-44100-2-2.wav
+$(FATE_AMIX): SRC  = $(TARGET_PATH)/tests/data/asynth-44100-2.wav
+$(FATE_AMIX): SRC1 = $(TARGET_PATH)/tests/data/asynth-44100-2-2.wav
+$(FATE_AMIX): CMP  = oneoff
+
+FATE_FILTER += $(FATE_AMIX)
+FATE_SAMPLES_AVCONV += $(FATE_AMIX)
+
 FATE_ASYNCTS += fate-filter-asyncts
 fate-filter-asyncts: SRC = $(SAMPLES)/nellymoser/nellymoser-discont.flv
 fate-filter-asyncts: CMD = pcm -i $(SRC) -af aresample=min_comp=0.001:min_hard_comp=0.1
@@ -7,4 +29,18 @@ fate-filter-asyncts: REF = $(SAMPLES)/nellymoser/nellymoser-discont.pcm
 FATE_FILTER += $(FATE_ASYNCTS)
 FATE_SAMPLES_AVCONV += $(FATE_ASYNCTS)
 
+fate-filter-delogo: CMD = framecrc -i $(SAMPLES)/real/rv30.rm -vf delogo=show=0:x=290:y=25:w=26:h=16 -an
+
+FATE_FILTER += fate-filter-delogo
+FATE_SAMPLES_AVCONV += fate-filter-delogo
+
+FATE_YADIF += fate-filter-yadif-mode0
+fate-filter-yadif-mode0: CMD = framecrc -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=0
+
+FATE_YADIF += fate-filter-yadif-mode1
+fate-filter-yadif-mode1: CMD = framecrc -i $(SAMPLES)/mpeg2/mpeg2_field_encoding.ts -vf yadif=1
+
+FATE_FILTER += $(FATE_YADIF)
+FATE_SAMPLES_AVCONV += $(FATE_YADIF)
+
 fate-filter: $(FATE_FILTER)
diff --git a/tests/ref/fate/filter-delogo b/tests/ref/fate/filter-delogo
new file mode 100644
index 0000000000..e0f24cd8ce
--- /dev/null
+++ b/tests/ref/fate/filter-delogo
@@ -0,0 +1,110 @@
+#tb 0: 32768/982057
+0,          0,          0,        1,   126720, 0x689de87e
+0,          1,          1,        1,   126720, 0x3db9e91c
+0,          2,          2,        1,   126720, 0x3db9e91c
+0,          3,          3,        1,   126720, 0x3db9e91c
+0,          4,          4,        1,   126720, 0xfa6ae95e
+0,          5,          5,        1,   126720, 0x5bcbf0e6
+0,          6,          6,        1,   126720, 0x94a0f126
+0,          7,          7,        1,   126720, 0x0250f106
+0,          8,          8,        1,   126720, 0xcf6ab4bc
+0,          9,          9,        1,   126720, 0x44aeb57c
+0,         10,         10,        1,   126720, 0x33b0b5bc
+0,         11,         11,        1,   126720, 0xc4bab591
+0,         12,         12,        1,   126720, 0xa492b5ec
+0,         13,         13,        1,   126720, 0x1459b85c
+0,         14,         14,        1,   126720, 0x806fb8dc
+0,         15,         15,        1,   126720, 0xd241b871
+0,         16,         16,        1,   126720, 0x698eb5cc
+0,         17,         17,        1,   126720, 0x4719aa98
+0,         18,         18,        1,   126720, 0x9ca1962c
+0,         19,         19,        1,   126720, 0x18cda460
+0,         20,         20,        1,   126720, 0xc230b716
+0,         21,         21,        1,   126720, 0x8451a4e2
+0,         22,         22,        1,   126720, 0x59e9a7ea
+0,         23,         23,        1,   126720, 0xc77ca73d
+0,         24,         24,        1,   126720, 0x725fb976
+0,         25,         25,        1,   126720, 0xb30da3b3
+0,         26,         26,        1,   126720, 0x7af2ea86
+0,         27,         27,        1,   126720, 0x40d4b4eb
+0,         28,         28,        1,   126720, 0x49d00307
+0,         29,         29,        1,   126720, 0x44c8848e
+0,         30,         30,        1,   126720, 0xc6990101
+0,         31,         31,        1,   126720, 0x2e01b963
+0,         32,         32,        1,   126720, 0xd0e903f0
+0,         33,         33,        1,   126720, 0x3457d592
+0,         34,         34,        1,   126720, 0x4f1ddb3c
+0,         35,         35,        1,   126720, 0x3980ace5
+0,         36,         36,        1,   126720, 0xb1e37954
+0,         37,         37,        1,   126720, 0x619fc554
+0,         38,         38,        1,   126720, 0x945fb39e
+0,         39,         39,        1,   126720, 0xb1d5e0ce
+0,         40,         40,        1,   126720, 0xf26e1dcc
+0,         41,         41,        1,   126720, 0x04d5783e
+0,         42,         42,        1,   126720, 0xbaa0479e
+0,         43,         43,        1,   126720, 0x20d88b01
+0,         44,         44,        1,   126720, 0x59d99901
+0,         45,         45,        1,   126720, 0x1c6e09f6
+0,         46,         46,        1,   126720, 0xeec50fc5
+0,         47,         47,        1,   126720, 0xb3a92827
+0,         48,         48,        1,   126720, 0xf62dd2b6
+0,         49,         49,        1,   126720, 0x75b1e619
+0,         50,         50,        1,   126720, 0x6bbce2c0
+0,         51,         51,        1,   126720, 0xd93e023c
+0,         52,         52,        1,   126720, 0xbbe8e7c2
+0,         53,         53,        1,   126720, 0x2272ec17
+0,         54,         54,        1,   126720, 0xf5e4ee6e
+0,         55,         55,        1,   126720, 0x751d2607
+0,         56,         56,        1,   126720, 0x44c499c9
+0,         57,         57,        1,   126720, 0xddccd842
+0,         58,         58,        1,   126720, 0x508dd214
+0,         59,         59,        1,   126720, 0x8eb10272
+0,         60,         60,        1,   126720, 0x7224b1c6
+0,         61,         61,        1,   126720, 0x50ff456c
+0,         62,         62,        1,   126720, 0xa81e2731
+0,         63,         63,        1,   126720, 0x7e50456d
+0,         64,         64,        1,   126720, 0x44802978
+0,         65,         65,        1,   126720, 0x86e88743
+0,         66,         66,        1,   126720, 0x0b1087d6
+0,         67,         67,        1,   126720, 0xb0227d21
+0,         68,         68,        1,   126720, 0x29d10bd2
+0,         69,         69,        1,   126720, 0x04b43afa
+0,         70,         70,        1,   126720, 0xb48e9698
+0,         71,         71,        1,   126720, 0x75d760fb
+0,         72,         72,        1,   126720, 0xa2ab1fdb
+0,         73,         73,        1,   126720, 0xec30a5ee
+0,         74,         74,        1,   126720, 0xbdab7c8c
+0,         75,         75,        1,   126720, 0xac5c3f2c
+0,         76,         76,        1,   126720, 0xce6350be
+0,         77,         77,        1,   126720, 0xb109657a
+0,         78,         78,        1,   126720, 0x723865a4
+0,         79,         79,        1,   126720, 0xa9869124
+0,         80,         80,        1,   126720, 0xc41af558
+0,         81,         81,        1,   126720, 0xcbe6a402
+0,         82,         82,        1,   126720, 0xb6735ecb
+0,         83,         83,        1,   126720, 0xba3059f2
+0,         84,         84,        1,   126720, 0xe7d63b8d
+0,         85,         85,        1,   126720, 0x8f115906
+0,         86,         86,        1,   126720, 0xaf6a8dcb
+0,         87,         87,        1,   126720, 0xb73e846e
+0,         88,         88,        1,   126720, 0xedd6380f
+0,         89,         89,        1,   126720, 0xd9026acf
+0,         90,         90,        1,   126720, 0xa03a650b
+0,         91,         91,        1,   126720, 0x262765bc
+0,         92,         92,        1,   126720, 0xaaa9ded1
+0,         93,         93,        1,   126720, 0xe4f42665
+0,         94,         94,        1,   126720, 0x78daf760
+0,         95,         95,        1,   126720, 0x3b0c6ef8
+0,         96,         96,        1,   126720, 0xb745df80
+0,         97,         97,        1,   126720, 0x08e57b90
+0,         98,         98,        1,   126720, 0x6f883ab0
+0,         99,         99,        1,   126720, 0x934b4dd5
+0,        100,        100,        1,   126720, 0x762f108f
+0,        101,        101,        1,   126720, 0x91ee0f2b
+0,        102,        102,        1,   126720, 0x9af6e5e8
+0,        103,        103,        1,   126720, 0xdcd95e0a
+0,        104,        104,        1,   126720, 0x22c33a6e
+0,        105,        105,        1,   126720, 0x21c1b7f4
+0,        106,        106,        1,   126720, 0x0a66a1ed
+0,        107,        107,        1,   126720, 0x53fea81b
+0,        108,        108,        1,   126720, 0x597f5567
diff --git a/tests/ref/fate/filter-yadif-mode0 b/tests/ref/fate/filter-yadif-mode0
new file mode 100644
index 0000000000..a25ea3e8c8
--- /dev/null
+++ b/tests/ref/fate/filter-yadif-mode0
@@ -0,0 +1,32 @@
+#tb 0: 1/25
+0,          9,          9,        1,   622080, 0x1511cae9
+0,         10,         10,        1,   622080, 0x6e77e746
+0,         11,         11,        1,   622080, 0x89aac777
+0,         12,         12,        1,   622080, 0x7e0a9335
+0,         13,         13,        1,   622080, 0x5f34759b
+0,         14,         14,        1,   622080, 0xfac498a6
+0,         15,         15,        1,   622080, 0xe60e7a9e
+0,         16,         16,        1,   622080, 0x44875bbd
+0,         17,         17,        1,   622080, 0xfa761aab
+0,         18,         18,        1,   622080, 0x59be119c
+0,         19,         19,        1,   622080, 0x21316b36
+0,         20,         20,        1,   622080, 0x929fde5b
+0,         21,         21,        1,   622080, 0xfca8990c
+0,         22,         22,        1,   622080, 0x1ec87d02
+0,         23,         23,        1,   622080, 0x5768eea0
+0,         24,         24,        1,   622080, 0x1a0894ab
+0,         25,         25,        1,   622080, 0xb4e61323
+0,         26,         26,        1,   622080, 0xb773341a
+0,         27,         27,        1,   622080, 0x8a914cf7
+0,         28,         28,        1,   622080, 0xf1cfbc7d
+0,         29,         29,        1,   622080, 0xebaeb317
+0,         30,         30,        1,   622080, 0xbae9adf4
+0,         31,         31,        1,   622080, 0x593544fd
+0,         32,         32,        1,   622080, 0x2cd8ec0b
+0,         33,         33,        1,   622080, 0x8032d9d4
+0,         34,         34,        1,   622080, 0x5c67ace7
+0,         35,         35,        1,   622080, 0x95714528
+0,         36,         36,        1,   622080, 0xa11cbed2
+0,         37,         37,        1,   622080, 0x7389f8f1
+0,         38,         38,        1,   622080, 0xa694f3f2
+0,         39,         39,        1,   622080, 0xac3a3d09
diff --git a/tests/ref/fate/filter-yadif-mode1 b/tests/ref/fate/filter-yadif-mode1
new file mode 100644
index 0000000000..1d13ebe8cb
--- /dev/null
+++ b/tests/ref/fate/filter-yadif-mode1
@@ -0,0 +1,34 @@
+#tb 0: 1/25
+0,          9,          9,        1,   622080, 0x1511cae9
+0,         10,         10,        1,   622080, 0xb88ca855
+0,         11,         11,        1,   622080, 0x6e77e746
+0,         12,         12,        1,   622080, 0x5da19198
+0,         13,         13,        1,   622080, 0xee31c8a8
+0,         14,         14,        1,   622080, 0xcbb7aac5
+0,         15,         15,        1,   622080, 0x19972f1a
+0,         16,         16,        1,   622080, 0xac7d34b9
+0,         17,         17,        1,   622080, 0x4adfe592
+0,         18,         18,        1,   622080, 0x5d738330
+0,         19,         19,        1,   622080, 0xb60b4447
+0,         20,         20,        1,   622080, 0x1e11acf4
+0,         21,         21,        1,   622080, 0x5ed635d0
+0,         22,         22,        1,   622080, 0x939857af
+0,         23,         23,        1,   622080, 0x530b28fd
+0,         24,         24,        1,   622080, 0x3bc0d5d3
+0,         25,         25,        1,   622080, 0x77e0fe99
+0,         26,         26,        1,   622080, 0xd2151c1e
+0,         27,         27,        1,   622080, 0xe021a815
+0,         28,         28,        1,   622080, 0xceae4f12
+0,         29,         29,        1,   622080, 0x4c2f3330
+0,         30,         30,        1,   622080, 0xf534c392
+0,         31,         31,        1,   622080, 0x88f01c11
+0,         32,         32,        1,   622080, 0x654d5df2
+0,         33,         33,        1,   622080, 0x89ef6f8a
+0,         34,         34,        1,   622080, 0x78a7b5f1
+0,         35,         35,        1,   622080, 0x8152d67f
+0,         36,         36,        1,   622080, 0x6590ff5f
+0,         37,         37,        1,   622080, 0x51d2be96
+0,         38,         38,        1,   622080, 0x483f65f7
+0,         39,         39,        1,   622080, 0x7a69143d
+0,         40,         40,        1,   622080, 0xeccc58ff
+0,         41,         41,        1,   622080, 0xc4d2c370