summaryrefslogtreecommitdiff
path: root/libavcodec/x86
diff options
context:
space:
mode:
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/Makefile11
-rw-r--r--libavcodec/x86/ac3dsp.asm8
-rw-r--r--libavcodec/x86/ac3dsp_init.c13
-rw-r--r--libavcodec/x86/cabac.h18
-rw-r--r--libavcodec/x86/cavsdsp.c8
-rw-r--r--libavcodec/x86/constants.c8
-rw-r--r--libavcodec/x86/constants.h8
-rw-r--r--libavcodec/x86/dct32.asm10
-rw-r--r--libavcodec/x86/deinterlace.asm8
-rw-r--r--libavcodec/x86/dirac_dwt.c202
-rw-r--r--libavcodec/x86/dirac_dwt.h30
-rw-r--r--libavcodec/x86/diracdsp_mmx.c104
-rw-r--r--libavcodec/x86/diracdsp_mmx.h47
-rw-r--r--libavcodec/x86/diracdsp_yasm.asm264
-rw-r--r--libavcodec/x86/dnxhdenc.c8
-rw-r--r--libavcodec/x86/dsputil.asm11
-rw-r--r--libavcodec/x86/dsputil_init.c75
-rw-r--r--libavcodec/x86/dsputil_mmx.c166
-rw-r--r--libavcodec/x86/dsputil_qns_template.c10
-rw-r--r--libavcodec/x86/dsputil_x86.h17
-rw-r--r--libavcodec/x86/dsputilenc.asm8
-rw-r--r--libavcodec/x86/dsputilenc_mmx.c31
-rw-r--r--libavcodec/x86/dwt_yasm.asm306
-rw-r--r--libavcodec/x86/fdct.c12
-rw-r--r--libavcodec/x86/fft.asm22
-rw-r--r--libavcodec/x86/fft.h8
-rw-r--r--libavcodec/x86/fft_init.c8
-rw-r--r--libavcodec/x86/fmtconvert.asm10
-rw-r--r--libavcodec/x86/fmtconvert_init.c12
-rw-r--r--libavcodec/x86/fpel.asm8
-rw-r--r--libavcodec/x86/fpel_mmx.c8
-rw-r--r--libavcodec/x86/h263_loopfilter.asm10
-rw-r--r--libavcodec/x86/h264_chromamc.asm8
-rw-r--r--libavcodec/x86/h264_chromamc_10bit.asm4
-rw-r--r--libavcodec/x86/h264_deblock.asm20
-rw-r--r--libavcodec/x86/h264_deblock_10bit.asm8
-rw-r--r--libavcodec/x86/h264_i386.h8
-rw-r--r--libavcodec/x86/h264_idct.asm8
-rw-r--r--libavcodec/x86/h264_idct_10bit.asm16
-rw-r--r--libavcodec/x86/h264_intrapred.asm8
-rw-r--r--libavcodec/x86/h264_intrapred_10bit.asm26
-rw-r--r--libavcodec/x86/h264_intrapred_init.c8
-rw-r--r--libavcodec/x86/h264_qpel.c16
-rw-r--r--libavcodec/x86/h264_qpel_8bit.asm8
-rw-r--r--libavcodec/x86/h264_weight.asm15
-rw-r--r--libavcodec/x86/h264chroma_init.c8
-rw-r--r--libavcodec/x86/h264dsp_init.c10
-rw-r--r--libavcodec/x86/hpeldsp.asm15
-rw-r--r--libavcodec/x86/hpeldsp_init.c10
-rw-r--r--libavcodec/x86/hpeldsp_mmx.c8
-rw-r--r--libavcodec/x86/hpeldsp_rnd_template.c8
-rw-r--r--libavcodec/x86/idct_mmx_xvid.c8
-rw-r--r--libavcodec/x86/idct_sse2_xvid.c9
-rw-r--r--libavcodec/x86/idct_xvid.h8
-rw-r--r--libavcodec/x86/imdct36.asm12
-rw-r--r--libavcodec/x86/lpc.c9
-rw-r--r--libavcodec/x86/mathops.h10
-rw-r--r--libavcodec/x86/mlpdsp.c9
-rw-r--r--libavcodec/x86/motion_est.c26
-rw-r--r--libavcodec/x86/mpeg4qpel.asm10
-rw-r--r--libavcodec/x86/mpegaudiodsp.c27
-rw-r--r--libavcodec/x86/mpegvideo.c20
-rw-r--r--libavcodec/x86/mpegvideoenc.c10
-rw-r--r--libavcodec/x86/mpegvideoenc_template.c21
-rw-r--r--libavcodec/x86/pngdsp_init.c8
-rw-r--r--libavcodec/x86/proresdsp.asm132
-rw-r--r--libavcodec/x86/proresdsp_init.c13
-rw-r--r--libavcodec/x86/qpel.asm8
-rw-r--r--libavcodec/x86/rnd_mmx.c8
-rw-r--r--libavcodec/x86/rnd_template.c8
-rw-r--r--libavcodec/x86/rv40dsp_init.c8
-rw-r--r--libavcodec/x86/sbrdsp.asm184
-rw-r--r--libavcodec/x86/sbrdsp_init.c19
-rw-r--r--libavcodec/x86/simple_idct.c9
-rw-r--r--libavcodec/x86/snowdsp.c902
-rw-r--r--libavcodec/x86/v210-init.c48
-rw-r--r--libavcodec/x86/v210.asm88
-rw-r--r--libavcodec/x86/vc1dsp.asm8
-rw-r--r--libavcodec/x86/vc1dsp.h8
-rw-r--r--libavcodec/x86/vc1dsp_mmx.c1
-rw-r--r--libavcodec/x86/videodsp.asm8
-rw-r--r--libavcodec/x86/videodsp_init.c26
-rw-r--r--libavcodec/x86/vorbisdsp.asm8
-rw-r--r--libavcodec/x86/vorbisdsp_init.c8
-rw-r--r--libavcodec/x86/vp3dsp.asm8
-rw-r--r--libavcodec/x86/vp3dsp_init.c69
-rw-r--r--libavcodec/x86/vp56_arith.h8
-rw-r--r--libavcodec/x86/vp56dsp.asm8
-rw-r--r--libavcodec/x86/vp56dsp_init.c8
-rw-r--r--libavcodec/x86/vp8dsp.asm8
-rw-r--r--libavcodec/x86/vp8dsp_init.c8
-rw-r--r--libavcodec/x86/w64xmmtest.c7
92 files changed, 2939 insertions, 547 deletions
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 663144228a..6d51b161ec 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -18,9 +18,11 @@ OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o
OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp_init.o
OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp_init.o
+OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp_init.o
OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp_init.o
OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \
x86/rv40dsp_init.o
+OBJS-$(CONFIG_V210_DECODER) += x86/v210-init.o
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o
OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o
@@ -29,6 +31,7 @@ OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o
OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o
OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o
OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o
+OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o
OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o \
@@ -42,9 +45,12 @@ MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o \
MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \
x86/motion_est.o
+MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o
MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
x86/hpeldsp_mmx.o \
x86/rnd_mmx.o
+MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o
+MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
YASM-OBJS += x86/deinterlace.o \
@@ -53,6 +59,8 @@ YASM-OBJS += x86/deinterlace.o \
YASM-OBJS-$(CONFIG_AAC_DECODER) += x86/sbrdsp.o
YASM-OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp.o
YASM-OBJS-$(CONFIG_DCT) += x86/dct32.o
+YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o\
+ x86/dwt_yasm.o
YASM-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil.o \
x86/fpel.o \
x86/mpeg4qpel.o \
@@ -80,12 +88,15 @@ YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \
YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o
YASM-OBJS-$(CONFIG_PNG_DECODER) += x86/pngdsp.o
YASM-OBJS-$(CONFIG_PRORES_DECODER) += x86/proresdsp.o
+YASM-OBJS-$(CONFIG_PRORES_LGPL_DECODER) += x86/proresdsp.o
YASM-OBJS-$(CONFIG_RV30_DECODER) += x86/rv34dsp.o
YASM-OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp.o \
x86/rv40dsp.o
+YASM-OBJS-$(CONFIG_V210_DECODER) += x86/v210.o
YASM-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp.o
YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o
YASM-OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp.o
YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o
YASM-OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp.o
YASM-OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp.o
+YASM-OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp.o
diff --git a/libavcodec/x86/ac3dsp.asm b/libavcodec/x86/ac3dsp.asm
index b43231855c..89a64f50d4 100644
--- a/libavcodec/x86/ac3dsp.asm
+++ b/libavcodec/x86/ac3dsp.asm
@@ -2,20 +2,20 @@
;* x86-optimized AC-3 DSP utils
;* Copyright (c) 2011 Justin Ruggles
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/ac3dsp_init.c b/libavcodec/x86/ac3dsp_init.c
index a6f93406bb..db1505e014 100644
--- a/libavcodec/x86/ac3dsp_init.c
+++ b/libavcodec/x86/ac3dsp_init.c
@@ -2,20 +2,20 @@
* x86-optimized AC-3 DSP utils
* Copyright (c) 2011 Justin Ruggles
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -51,6 +51,11 @@ void ff_ac3_extract_exponents_3dnow(uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_ac3_extract_exponents_sse2 (uint8_t *exp, int32_t *coef, int nb_coefs);
void ff_ac3_extract_exponents_ssse3(uint8_t *exp, int32_t *coef, int nb_coefs);
+#if ARCH_X86_32 && defined(__INTEL_COMPILER)
+# undef HAVE_7REGS
+# define HAVE_7REGS 0
+#endif
+
#if HAVE_SSE_INLINE && HAVE_7REGS
#define IF1(x) x
diff --git a/libavcodec/x86/cabac.h b/libavcodec/x86/cabac.h
index a74cf0bf1f..2c9f77e3ff 100644
--- a/libavcodec/x86/cabac.h
+++ b/libavcodec/x86/cabac.h
@@ -73,10 +73,7 @@
"test "lowword" , "lowword" \n\t"\
"jnz 2f \n\t"\
"mov "byte" , %%"REG_c" \n\t"\
- "cmp "end" , %%"REG_c" \n\t"\
- "jge 1f \n\t"\
"add"OPSIZE" $2 , "byte" \n\t"\
- "1: \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
@@ -93,6 +90,7 @@
#else /* BROKEN_RELOCATIONS */
#define TABLES_ARG
+#define RIP_ARG
#if HAVE_FAST_CMOV
#define BRANCHLESS_GET_CABAC_UPDATE(ret, low, range, tmp)\
@@ -134,10 +132,7 @@
"test "lowword" , "lowword" \n\t"\
" jnz 2f \n\t"\
"mov "byte" , %%"REG_c" \n\t"\
- "cmp "end" , %%"REG_c" \n\t"\
- "jge 1f \n\t"\
"add"OPSIZE" $2 , "byte" \n\t"\
- "1: \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
@@ -155,7 +150,8 @@
#endif /* BROKEN_RELOCATIONS */
-#if HAVE_7REGS
+#if HAVE_7REGS && !(defined(__i386) && defined(__clang__) && (__clang_major__<2 || (__clang_major__==2 && __clang_minor__<10)))\
+ && !( !defined(__clang__) && defined(__llvm__) && __GNUC__==4 && __GNUC_MINOR__==2 && __GNUC_PATCHLEVEL__<=1)
#define get_cabac_inline get_cabac_inline_x86
static av_always_inline int get_cabac_inline_x86(CABACContext *c,
uint8_t *const state)
@@ -178,11 +174,12 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
AV_STRINGIFY(H264_LPS_RANGE_OFFSET),
AV_STRINGIFY(H264_MLPS_STATE_OFFSET),
"%8")
- : "=&r"(bit), "+&r"(c->low), "+&r"(c->range), "=&q"(tmp)
+ : "=&r"(bit), "=&r"(c->low), "=&r"(c->range), "=&q"(tmp)
: "r"(state), "r"(c),
"i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end))
TABLES_ARG
+ ,"1"(c->low), "2"(c->range)
: "%"REG_c, "memory"
);
return bit & 1;
@@ -211,10 +208,9 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
"movzwl (%1), %%edx \n\t"
"bswap %%edx \n\t"
"shrl $15, %%edx \n\t"
+ "add $2, %1 \n\t"
"addl %%edx, %%eax \n\t"
- "cmp %c5(%2), %1 \n\t"
- "jge 1f \n\t"
- "add"OPSIZE" $2, %c4(%2) \n\t"
+ "mov %1, %c4(%2) \n\t"
"1: \n\t"
"movl %%eax, %c3(%2) \n\t"
diff --git a/libavcodec/x86/cavsdsp.c b/libavcodec/x86/cavsdsp.c
index 68273461e8..0354333382 100644
--- a/libavcodec/x86/cavsdsp.c
+++ b/libavcodec/x86/cavsdsp.c
@@ -5,20 +5,20 @@
* MMX-optimized DSP functions, based on H.264 optimizations by
* Michael Niedermayer and Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/constants.c b/libavcodec/x86/constants.c
index 5b8d1b224f..3bba80bd87 100644
--- a/libavcodec/x86/constants.c
+++ b/libavcodec/x86/constants.c
@@ -1,20 +1,20 @@
/*
* MMX/SSE constants used across x86 dsp optimizations.
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/constants.h b/libavcodec/x86/constants.h
index f38fbe3425..8097bc49f0 100644
--- a/libavcodec/x86/constants.h
+++ b/libavcodec/x86/constants.h
@@ -1,20 +1,20 @@
/*
* MMX/SSE constants used across x86 dsp optimizations.
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dct32.asm b/libavcodec/x86/dct32.asm
index 9c147b9c00..6fd5ba350d 100644
--- a/libavcodec/x86/dct32.asm
+++ b/libavcodec/x86/dct32.asm
@@ -2,20 +2,20 @@
;* 32 point SSE-optimized DCT transform
;* Copyright (c) 2010 Vitor Sessak
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -192,6 +192,7 @@ ps_p1p1m1m1: dd 0, 0, 0x80000000, 0x80000000, 0, 0, 0x80000000, 0x80000000
INIT_YMM avx
SECTION_TEXT
+%if HAVE_AVX_EXTERNAL
; void ff_dct32_float_avx(FFTSample *out, const FFTSample *in)
cglobal dct32_float, 2,3,8, out, in, tmp
; pass 1
@@ -264,6 +265,7 @@ cglobal dct32_float, 2,3,8, out, in, tmp
INIT_XMM
PASS6_AND_PERMUTE
RET
+%endif
%if ARCH_X86_64
%define SPILL SWAP
diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm
index b2828f3f17..3812dbe601 100644
--- a/libavcodec/x86/deinterlace.asm
+++ b/libavcodec/x86/deinterlace.asm
@@ -3,20 +3,20 @@
;* Copyright (c) 2010 Vitor Sessak
;* Copyright (c) 2002 Michael Niedermayer
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/dirac_dwt.c b/libavcodec/x86/dirac_dwt.c
new file mode 100644
index 0000000000..04c514f4fd
--- /dev/null
+++ b/libavcodec/x86/dirac_dwt.c
@@ -0,0 +1,202 @@
+/*
+ * MMX optimized discrete wavelet transform
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86/asm.h"
+#include "dsputil_x86.h"
+#include "dirac_dwt.h"
+
+#define COMPOSE_VERTICAL(ext, align) \
+void ff_vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
+void ff_vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width); \
+void ff_vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
+void ff_vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, int width); \
+void ff_vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width); \
+void ff_horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
+void ff_horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w);\
+\
+static void vertical_compose53iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
+\
+ ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
+} \
+\
+static void vertical_compose_dirac53iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
+\
+ ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
+} \
+\
+static void vertical_compose_dd137iL0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
+ IDWTELEM *b3, IDWTELEM *b4, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
+\
+ ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
+} \
+\
+static void vertical_compose_dd97iH0##ext(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, \
+ IDWTELEM *b3, IDWTELEM *b4, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) \
+ b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
+\
+ ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
+} \
+static void vertical_compose_haar##ext(IDWTELEM *b0, IDWTELEM *b1, int width) \
+{ \
+ int i, width_align = width&~(align-1); \
+\
+ for(i=width_align; i<width; i++) { \
+ b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
+ b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
+ } \
+\
+ ff_vertical_compose_haar##ext(b0, b1, width_align); \
+} \
+static void horizontal_compose_haar0i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+ int w2= w>>1;\
+ int x= w2 - (w2&(align-1));\
+ ff_horizontal_compose_haar0i##ext(b, tmp, w);\
+\
+ for (; x < w2; x++) {\
+ b[2*x ] = tmp[x];\
+ b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
+ }\
+}\
+static void horizontal_compose_haar1i##ext(IDWTELEM *b, IDWTELEM *tmp, int w)\
+{\
+ int w2= w>>1;\
+ int x= w2 - (w2&(align-1));\
+ ff_horizontal_compose_haar1i##ext(b, tmp, w);\
+\
+ for (; x < w2; x++) {\
+ b[2*x ] = (tmp[x] + 1)>>1;\
+ b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
+ }\
+}\
+\
+
+#if HAVE_YASM
+#if !ARCH_X86_64
+COMPOSE_VERTICAL(_mmx, 4)
+#endif
+COMPOSE_VERTICAL(_sse2, 8)
+
+
+void ff_horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w);
+
+static void horizontal_compose_dd97i_ssse3(IDWTELEM *b, IDWTELEM *tmp, int w)
+{
+ int w2= w>>1;
+ int x= w2 - (w2&7);
+ ff_horizontal_compose_dd97i_ssse3(b, tmp, w);
+
+ for (; x < w2; x++) {
+ b[2*x ] = (tmp[x] + 1)>>1;
+ b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
+ }
+}
+#endif
+
+void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type)
+{
+#if HAVE_YASM
+ int mm_flags = av_get_cpu_flags();
+
+#if !ARCH_X86_64
+ if (!(mm_flags & AV_CPU_FLAG_MMX))
+ return;
+
+ switch (type) {
+ case DWT_DIRAC_DD9_7:
+ d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
+ d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
+ break;
+ case DWT_DIRAC_LEGALL5_3:
+ d->vertical_compose_l0 = (void*)vertical_compose53iL0_mmx;
+ d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_mmx;
+ break;
+ case DWT_DIRAC_DD13_7:
+ d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_mmx;
+ d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_mmx;
+ break;
+ case DWT_DIRAC_HAAR0:
+ d->vertical_compose = (void*)vertical_compose_haar_mmx;
+ d->horizontal_compose = horizontal_compose_haar0i_mmx;
+ break;
+ case DWT_DIRAC_HAAR1:
+ d->vertical_compose = (void*)vertical_compose_haar_mmx;
+ d->horizontal_compose = horizontal_compose_haar1i_mmx;
+ break;
+ }
+#endif
+
+ if (!(mm_flags & AV_CPU_FLAG_SSE2))
+ return;
+
+ switch (type) {
+ case DWT_DIRAC_DD9_7:
+ d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
+ d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
+ break;
+ case DWT_DIRAC_LEGALL5_3:
+ d->vertical_compose_l0 = (void*)vertical_compose53iL0_sse2;
+ d->vertical_compose_h0 = (void*)vertical_compose_dirac53iH0_sse2;
+ break;
+ case DWT_DIRAC_DD13_7:
+ d->vertical_compose_l0 = (void*)vertical_compose_dd137iL0_sse2;
+ d->vertical_compose_h0 = (void*)vertical_compose_dd97iH0_sse2;
+ break;
+ case DWT_DIRAC_HAAR0:
+ d->vertical_compose = (void*)vertical_compose_haar_sse2;
+ d->horizontal_compose = horizontal_compose_haar0i_sse2;
+ break;
+ case DWT_DIRAC_HAAR1:
+ d->vertical_compose = (void*)vertical_compose_haar_sse2;
+ d->horizontal_compose = horizontal_compose_haar1i_sse2;
+ break;
+ }
+
+ if (!(mm_flags & AV_CPU_FLAG_SSSE3))
+ return;
+
+ switch (type) {
+ case DWT_DIRAC_DD9_7:
+ d->horizontal_compose = horizontal_compose_dd97i_ssse3;
+ break;
+ }
+#endif // HAVE_YASM
+}
diff --git a/libavcodec/x86/dirac_dwt.h b/libavcodec/x86/dirac_dwt.h
new file mode 100644
index 0000000000..126b29029f
--- /dev/null
+++ b/libavcodec/x86/dirac_dwt.h
@@ -0,0 +1,30 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DIRAC_DWT_H
+#define AVCODEC_X86_DIRAC_DWT_H
+
+#include "libavcodec/dirac_dwt.h"
+
+void ff_horizontal_compose_dd97i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+void ff_horizontal_compose_haar1i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+void ff_horizontal_compose_haar0i_end_c(IDWTELEM *b, IDWTELEM *tmp, int w2, int x);
+
+void ff_spatial_idwt_init_mmx(DWTContext *d, enum dwt_type type);
+
+#endif
diff --git a/libavcodec/x86/diracdsp_mmx.c b/libavcodec/x86/diracdsp_mmx.c
new file mode 100644
index 0000000000..a28bb82060
--- /dev/null
+++ b/libavcodec/x86/diracdsp_mmx.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dsputil_x86.h"
+#include "diracdsp_mmx.h"
+
+void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
+
+#define HPEL_FILTER(MMSIZE, EXT) \
+ void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \
+ void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \
+ \
+ static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \
+ const uint8_t *src, int stride, int width, int height) \
+ { \
+ while( height-- ) \
+ { \
+ ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
+ ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \
+ ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \
+ \
+ dsth += stride; \
+ dstv += stride; \
+ dstc += stride; \
+ src += stride; \
+ } \
+ }
+
+#if !ARCH_X86_64
+HPEL_FILTER(8, mmx)
+#endif
+HPEL_FILTER(16, sse2)
+
+#define PIXFUNC(PFX, IDX, EXT) \
+ /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/ \
+ c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
+ c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT
+
+void ff_diracdsp_init_mmx(DiracDSPContext* c)
+{
+ int mm_flags = av_get_cpu_flags();
+
+ if (!(mm_flags & AV_CPU_FLAG_MMX))
+ return;
+
+#if HAVE_YASM
+ c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
+#if !ARCH_X86_64
+ c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
+ c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
+ c->dirac_hpel_filter = dirac_hpel_filter_mmx;
+ c->add_rect_clamped = ff_add_rect_clamped_mmx;
+ c->put_signed_rect_clamped = ff_put_signed_rect_clamped_mmx;
+#endif
+#endif
+
+#if HAVE_MMX_INLINE
+ PIXFUNC(put, 0, mmx);
+ PIXFUNC(avg, 0, mmx);
+#endif
+
+#if HAVE_MMXEXT_INLINE
+ if (mm_flags & AV_CPU_FLAG_MMX2) {
+ PIXFUNC(avg, 0, mmxext);
+ }
+#endif
+
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
+#if HAVE_YASM
+ c->dirac_hpel_filter = dirac_hpel_filter_sse2;
+ c->add_rect_clamped = ff_add_rect_clamped_sse2;
+ c->put_signed_rect_clamped = ff_put_signed_rect_clamped_sse2;
+
+ c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
+ c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
+#endif
+#if HAVE_SSE2_INLINE
+ c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
+ c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
+ c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
+ c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
+#endif
+ }
+}
diff --git a/libavcodec/x86/diracdsp_mmx.h b/libavcodec/x86/diracdsp_mmx.h
new file mode 100644
index 0000000000..89858544f3
--- /dev/null
+++ b/libavcodec/x86/diracdsp_mmx.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2010 David Conrad
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DIRACDSP_H
+#define AVCODEC_X86_DIRACDSP_H
+
+#include "libavcodec/diracdsp.h"
+
+void ff_diracdsp_init_mmx(DiracDSPContext* c);
+
+DECL_DIRAC_PIXOP(put, mmx);
+DECL_DIRAC_PIXOP(avg, mmx);
+DECL_DIRAC_PIXOP(avg, mmxext);
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
+
+void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
+
+void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
+
+#endif
diff --git a/libavcodec/x86/diracdsp_yasm.asm b/libavcodec/x86/diracdsp_yasm.asm
new file mode 100644
index 0000000000..3e9765b42d
--- /dev/null
+++ b/libavcodec/x86/diracdsp_yasm.asm
@@ -0,0 +1,264 @@
+;******************************************************************************
+;* Copyright (c) 2010 David Conrad
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+pw_3: times 8 dw 3
+pw_7: times 8 dw 7
+pw_16: times 8 dw 16
+pw_32: times 8 dw 32
+pb_128: times 16 db 128
+
+section .text
+
+%macro UNPACK_ADD 6
+ mov%5 %1, %3
+ mov%6 m5, %4
+ mova m4, %1
+ mova %2, m5
+ punpcklbw %1, m7
+ punpcklbw m5, m7
+ punpckhbw m4, m7
+ punpckhbw %2, m7
+ paddw %1, m5
+ paddw %2, m4
+%endmacro
+
+%macro HPEL_FILTER 1
+; dirac_hpel_filter_v_sse2(uint8_t *dst, uint8_t *src, int stride, int width);
+cglobal dirac_hpel_filter_v_%1, 4,6,8, dst, src, stride, width, src0, stridex3
+ mov src0q, srcq
+ lea stridex3q, [3*strideq]
+ sub src0q, stridex3q
+ pxor m7, m7
+.loop:
+ ; 7*(src[0] + src[1])
+ UNPACK_ADD m0, m1, [srcq], [srcq + strideq], a,a
+ pmullw m0, [pw_7]
+ pmullw m1, [pw_7]
+
+ ; 3*( ... + src[-2] + src[3])
+ UNPACK_ADD m2, m3, [src0q + strideq], [srcq + stridex3q], a,a
+ paddw m0, m2
+ paddw m1, m3
+ pmullw m0, [pw_3]
+ pmullw m1, [pw_3]
+
+ ; ... - 7*(src[-1] + src[2])
+ UNPACK_ADD m2, m3, [src0q + strideq*2], [srcq + strideq*2], a,a
+ pmullw m2, [pw_7]
+ pmullw m3, [pw_7]
+ psubw m0, m2
+ psubw m1, m3
+
+ ; ... - (src[-3] + src[4])
+ UNPACK_ADD m2, m3, [src0q], [srcq + strideq*4], a,a
+ psubw m0, m2
+ psubw m1, m3
+
+ paddw m0, [pw_16]
+ paddw m1, [pw_16]
+ psraw m0, 5
+ psraw m1, 5
+ packuswb m0, m1
+ mova [dstq], m0
+ add dstq, mmsize
+ add srcq, mmsize
+ add src0q, mmsize
+ sub widthd, mmsize
+ jg .loop
+ RET
+
+; dirac_hpel_filter_h_sse2(uint8_t *dst, uint8_t *src, int width);
+cglobal dirac_hpel_filter_h_%1, 3,3,8, dst, src, width
+ dec widthd
+ pxor m7, m7
+ and widthd, ~(mmsize-1)
+.loop:
+ ; 7*(src[0] + src[1])
+ UNPACK_ADD m0, m1, [srcq + widthq], [srcq + widthq + 1], u,u
+ pmullw m0, [pw_7]
+ pmullw m1, [pw_7]
+
+ ; 3*( ... + src[-2] + src[3])
+ UNPACK_ADD m2, m3, [srcq + widthq - 2], [srcq + widthq + 3], u,u
+ paddw m0, m2
+ paddw m1, m3
+ pmullw m0, [pw_3]
+ pmullw m1, [pw_3]
+
+ ; ... - 7*(src[-1] + src[2])
+ UNPACK_ADD m2, m3, [srcq + widthq - 1], [srcq + widthq + 2], u,u
+ pmullw m2, [pw_7]
+ pmullw m3, [pw_7]
+ psubw m0, m2
+ psubw m1, m3
+
+ ; ... - (src[-3] + src[4])
+ UNPACK_ADD m2, m3, [srcq + widthq - 3], [srcq + widthq + 4], u,u
+ psubw m0, m2
+ psubw m1, m3
+
+ paddw m0, [pw_16]
+ paddw m1, [pw_16]
+ psraw m0, 5
+ psraw m1, 5
+ packuswb m0, m1
+ mova [dstq + widthq], m0
+ sub widthd, mmsize
+ jge .loop
+ RET
+%endmacro
+
+%macro PUT_RECT 1
+; void put_rect_clamped(uint8_t *dst, int dst_stride, int16_t *src, int src_stride, int width, int height)
+cglobal put_signed_rect_clamped_%1, 5,9,3, dst, dst_stride, src, src_stride, w, dst2, src2
+ mova m0, [pb_128]
+ add wd, (mmsize-1)
+ and wd, ~(mmsize-1)
+
+%if ARCH_X86_64
+ movsxd dst_strideq, dst_strided
+ movsxd src_strideq, src_strided
+ mov r7d, r5m
+ mov r8d, wd
+ %define wspill r8d
+ %define hd r7d
+%else
+ mov r4m, wd
+ %define wspill r4m
+ %define hd r5mp
+%endif
+
+.loopy
+ lea src2q, [srcq+src_strideq*2]
+ lea dst2q, [dstq+dst_strideq]
+.loopx:
+ sub wd, mmsize
+ mova m1, [srcq +2*wq]
+ mova m2, [src2q+2*wq]
+ packsswb m1, [srcq +2*wq+mmsize]
+ packsswb m2, [src2q+2*wq+mmsize]
+ paddb m1, m0
+ paddb m2, m0
+ mova [dstq +wq], m1
+ mova [dst2q+wq], m2
+ jg .loopx
+
+ lea srcq, [srcq+src_strideq*4]
+ lea dstq, [dstq+dst_strideq*2]
+ sub hd, 2
+ mov wd, wspill
+ jg .loopy
+ RET
+%endm
+
+%macro ADD_RECT 1
+; void add_rect_clamped(uint8_t *dst, uint16_t *src, int stride, int16_t *idwt, int idwt_stride, int width, int height)
+cglobal add_rect_clamped_%1, 7,9,3, dst, src, stride, idwt, idwt_stride, w, h
+ mova m0, [pw_32]
+ add wd, (mmsize-1)
+ and wd, ~(mmsize-1)
+
+%if ARCH_X86_64
+ movsxd strideq, strided
+ movsxd idwt_strideq, idwt_strided
+ mov r8d, wd
+ %define wspill r8d
+%else
+ mov r5m, wd
+ %define wspill r5m
+%endif
+
+.loop:
+ sub wd, mmsize
+ movu m1, [srcq +2*wq] ; FIXME: ensure alignment
+ paddw m1, m0
+ psraw m1, 6
+ movu m2, [srcq +2*wq+mmsize] ; FIXME: ensure alignment
+ paddw m2, m0
+ psraw m2, 6
+ paddw m1, [idwtq+2*wq]
+ paddw m2, [idwtq+2*wq+mmsize]
+ packuswb m1, m2
+ mova [dstq +wq], m1
+ jg .loop
+
+ lea srcq, [srcq + 2*strideq]
+ add dstq, strideq
+ lea idwtq, [idwtq+ 2*idwt_strideq]
+ sub hd, 1
+ mov wd, wspill
+ jg .loop
+ RET
+%endm
+
+%macro ADD_OBMC 2
+; void add_obmc(uint16_t *dst, uint8_t *src, int stride, uint8_t *obmc_weight, int yblen)
+cglobal add_dirac_obmc%1_%2, 6,6,5, dst, src, stride, obmc, yblen
+ pxor m4, m4
+.loop:
+%assign i 0
+%rep %1 / mmsize
+ mova m0, [srcq+i]
+ mova m1, m0
+ punpcklbw m0, m4
+ punpckhbw m1, m4
+ mova m2, [obmcq+i]
+ mova m3, m2
+ punpcklbw m2, m4
+ punpckhbw m3, m4
+ pmullw m0, m2
+ pmullw m1, m3
+ movu m2, [dstq+2*i]
+ movu m3, [dstq+2*i+mmsize]
+ paddw m0, m2
+ paddw m1, m3
+ movu [dstq+2*i], m0
+ movu [dstq+2*i+mmsize], m1
+%assign i i+mmsize
+%endrep
+ lea srcq, [srcq+strideq]
+ lea dstq, [dstq+2*strideq]
+ add obmcq, 32
+ sub yblend, 1
+ jg .loop
+ RET
+%endm
+
+INIT_MMX
+%if ARCH_X86_64 == 0
+PUT_RECT mmx
+ADD_RECT mmx
+
+HPEL_FILTER mmx
+ADD_OBMC 32, mmx
+ADD_OBMC 16, mmx
+%endif
+ADD_OBMC 8, mmx
+
+INIT_XMM
+PUT_RECT sse2
+ADD_RECT sse2
+
+HPEL_FILTER sse2
+ADD_OBMC 32, sse2
+ADD_OBMC 16, sse2
diff --git a/libavcodec/x86/dnxhdenc.c b/libavcodec/x86/dnxhdenc.c
index 9f71c82e91..349fbb0825 100644
--- a/libavcodec/x86/dnxhdenc.c
+++ b/libavcodec/x86/dnxhdenc.c
@@ -4,20 +4,20 @@
*
* VC-3 encoder funded by the British Broadcasting Corporation
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm
index 5d73ff8ee4..77069e20f8 100644
--- a/libavcodec/x86/dsputil.asm
+++ b/libavcodec/x86/dsputil.asm
@@ -1,21 +1,23 @@
;******************************************************************************
;* MMX optimized DSP utils
;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2003-2013 Michael Niedermayer
+;* Copyright (c) 2013 Daniel Kang
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -605,6 +607,7 @@ cglobal bswap32_buf, 3,4,3
cglobal bswap32_buf, 3,4,5
mov r3, r1
%endif
+ or r3, r0
and r3, 15
jz .start_align
BSWAP_LOOPS u
diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c
index c2bf403104..7b49fd623d 100644
--- a/libavcodec/x86/dsputil_init.c
+++ b/libavcodec/x86/dsputil_init.c
@@ -1,18 +1,21 @@
/*
- * This file is part of Libav.
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -542,24 +545,11 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
c->clear_block = ff_clear_block_mmx;
c->clear_blocks = ff_clear_blocks_mmx;
c->draw_edges = ff_draw_edges_mmx;
-
- switch (avctx->idct_algo) {
- case FF_IDCT_AUTO:
- case FF_IDCT_SIMPLEMMX:
- c->idct_put = ff_simple_idct_put_mmx;
- c->idct_add = ff_simple_idct_add_mmx;
- c->idct = ff_simple_idct_mmx;
- c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
- break;
- case FF_IDCT_XVIDMMX:
- c->idct_put = ff_idct_xvid_mmx_put;
- c->idct_add = ff_idct_xvid_mmx_add;
- c->idct = ff_idct_xvid_mmx;
- break;
- }
}
+#if CONFIG_VIDEODSP && (ARCH_X86_32 || !HAVE_YASM)
c->gmc = ff_gmc_mmx;
+#endif
c->add_bytes = ff_add_bytes_mmx;
#endif /* HAVE_MMX_INLINE */
@@ -577,16 +567,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
int mm_flags)
{
-#if HAVE_MMXEXT_INLINE
- const int high_bit_depth = avctx->bits_per_raw_sample > 8;
-
- if (!high_bit_depth && avctx->idct_algo == FF_IDCT_XVIDMMX) {
- c->idct_put = ff_idct_xvid_mmxext_put;
- c->idct_add = ff_idct_xvid_mmxext_add;
- c->idct = ff_idct_xvid_mmxext;
- }
-#endif /* HAVE_MMXEXT_INLINE */
-
#if HAVE_MMXEXT_EXTERNAL
SET_QPEL_FUNCS(avg_qpel, 0, 16, mmxext, );
SET_QPEL_FUNCS(avg_qpel, 1, 8, mmxext, );
@@ -627,6 +607,12 @@ static av_cold void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx,
c->vector_clipf = ff_vector_clipf_sse;
#endif /* HAVE_SSE_INLINE */
+
+#if HAVE_YASM
+#if HAVE_INLINE_ASM && CONFIG_VIDEODSP
+ c->gmc = ff_gmc_sse;
+#endif
+#endif /* HAVE_YASM */
}
static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
@@ -695,8 +681,37 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov;
#endif
- if (mm_flags & AV_CPU_FLAG_MMX)
+ if (mm_flags & AV_CPU_FLAG_MMX) {
+#if HAVE_INLINE_ASM
+ const int idct_algo = avctx->idct_algo;
+
+ if (avctx->lowres == 0 && avctx->bits_per_raw_sample <= 8) {
+ if (idct_algo == FF_IDCT_AUTO || idct_algo == FF_IDCT_SIMPLEMMX) {
+ c->idct_put = ff_simple_idct_put_mmx;
+ c->idct_add = ff_simple_idct_add_mmx;
+ c->idct = ff_simple_idct_mmx;
+ c->idct_permutation_type = FF_SIMPLE_IDCT_PERM;
+ } else if (idct_algo == FF_IDCT_XVIDMMX) {
+ if (mm_flags & AV_CPU_FLAG_SSE2) {
+ c->idct_put = ff_idct_xvid_sse2_put;
+ c->idct_add = ff_idct_xvid_sse2_add;
+ c->idct = ff_idct_xvid_sse2;
+ c->idct_permutation_type = FF_SSE2_IDCT_PERM;
+ } else if (mm_flags & AV_CPU_FLAG_MMXEXT) {
+ c->idct_put = ff_idct_xvid_mmxext_put;
+ c->idct_add = ff_idct_xvid_mmxext_add;
+ c->idct = ff_idct_xvid_mmxext;
+ } else {
+ c->idct_put = ff_idct_xvid_mmx_put;
+ c->idct_add = ff_idct_xvid_mmx_add;
+ c->idct = ff_idct_xvid_mmx;
+ }
+ }
+ }
+#endif /* HAVE_INLINE_ASM */
+
dsputil_init_mmx(c, avctx, mm_flags);
+ }
if (mm_flags & AV_CPU_FLAG_MMXEXT)
dsputil_init_mmxext(c, avctx, mm_flags);
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index b0e48e3117..421a6c2090 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -3,30 +3,33 @@
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
- * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
- *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
*/
#include "config.h"
+#include "libavutil/avassert.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/asm.h"
+#include "libavcodec/videodsp.h"
#include "constants.h"
#include "dsputil_x86.h"
+#include "diracdsp_mmx.h"
#if HAVE_INLINE_ASM
@@ -317,7 +320,7 @@ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
: "+r"(ptr)
: "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
);
- } else {
+ } else if(w==16){
__asm__ volatile (
"1: \n\t"
"movd (%0), %%mm0 \n\t"
@@ -338,6 +341,25 @@ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
: "+r"(ptr)
: "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
);
+ } else {
+ av_assert1(w == 4);
+ __asm__ volatile (
+ "1: \n\t"
+ "movd (%0), %%mm0 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpcklwd %%mm0, %%mm0 \n\t"
+ "movd %%mm0, -4(%0) \n\t"
+ "movd -4(%0, %2), %%mm1 \n\t"
+ "punpcklbw %%mm1, %%mm1 \n\t"
+ "punpckhwd %%mm1, %%mm1 \n\t"
+ "punpckhdq %%mm1, %%mm1 \n\t"
+ "movd %%mm1, (%0, %2) \n\t"
+ "add %1, %0 \n\t"
+ "cmp %3, %0 \n\t"
+ "jb 1b \n\t"
+ : "+r"(ptr)
+ : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height)
+ );
}
/* top and bottom (and hopefully also the corners) */
@@ -383,10 +405,15 @@ void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height,
}
}
-void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
- int stride, int h, int ox, int oy,
- int dxx, int dxy, int dyx, int dyy,
- int shift, int r, int width, int height)
+typedef void emulated_edge_mc_func(uint8_t *dst, const uint8_t *src,
+ ptrdiff_t linesize, int block_w, int block_h,
+ int src_x, int src_y, int w, int h);
+
+static av_always_inline void gmc(uint8_t *dst, uint8_t *src,
+ int stride, int h, int ox, int oy,
+ int dxx, int dxy, int dyx, int dyy,
+ int shift, int r, int width, int height,
+ emulated_edge_mc_func *emu_edge_fn)
{
const int w = 8;
const int ix = ox >> (16 + shift);
@@ -401,19 +428,24 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
const uint64_t shift2 = 2 * shift;
+#define MAX_STRIDE 4096U
+#define MAX_H 8U
+ uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE];
int x, y;
const int dxw = (dxx - (1 << (16 + shift))) * (w - 1);
const int dyh = (dyy - (1 << (16 + shift))) * (h - 1);
const int dxh = dxy * (h - 1);
const int dyw = dyx * (w - 1);
+ int need_emu = (unsigned)ix >= width - w ||
+ (unsigned)iy >= height - h;
+
if ( // non-constant fullpel offset (3% of blocks)
((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
(oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 + shift)
// uses more than 16 bits of subpel mv (only at huge resolution)
- || (dxx | dxy | dyx | dyy) & 15 ||
- (unsigned)ix >= width - w ||
- (unsigned)iy >= height - h) {
+ || (dxx | dxy | dyx | dyy) & 15
+ || (need_emu && (h > MAX_H || stride > MAX_STRIDE))) {
// FIXME could still use mmx for some of the rows
ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy,
shift, r, width, height);
@@ -421,6 +453,10 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
}
src += ix + iy * stride;
+ if (need_emu) {
+ emu_edge_fn(edge_buf, src, stride, w + 1, h + 1, ix, iy, width, height);
+ src = edge_buf;
+ }
__asm__ volatile (
"movd %0, %%mm6 \n\t"
@@ -499,6 +535,108 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
}
}
+#if CONFIG_VIDEODSP
+#if HAVE_YASM
+#if ARCH_X86_32
+void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
+ int stride, int h, int ox, int oy,
+ int dxx, int dxy, int dyx, int dyy,
+ int shift, int r, int width, int height)
+{
+ gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
+ width, height, &ff_emulated_edge_mc_8);
+}
+#endif
+void ff_gmc_sse(uint8_t *dst, uint8_t *src,
+ int stride, int h, int ox, int oy,
+ int dxx, int dxy, int dyx, int dyy,
+ int shift, int r, int width, int height)
+{
+ gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
+ width, height, &ff_emulated_edge_mc_8);
+}
+#else
+void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
+ int stride, int h, int ox, int oy,
+ int dxx, int dxy, int dyx, int dyy,
+ int shift, int r, int width, int height)
+{
+ gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
+ width, height, &ff_emulated_edge_mc_8);
+}
+#endif
+#endif
+
+#if CONFIG_DIRAC_DECODER
+#define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
+void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ if (h&3)\
+ ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
+ else\
+ OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ if (h&3)\
+ ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
+ else\
+ OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ if (h&3) {\
+ ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
+ } else {\
+ OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
+ OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
+ }\
+}
+
+#if HAVE_MMX_INLINE
+PIXELS16(static, ff_avg, , , _mmxext)
+DIRAC_PIXOP(put, ff_put, mmx)
+DIRAC_PIXOP(avg, ff_avg, mmx)
+#endif
+
+#if HAVE_YASM
+DIRAC_PIXOP(avg, ff_avg, mmxext)
+
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3)
+ ff_put_dirac_pixels16_c(dst, src, stride, h);
+ else
+ ff_put_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3)
+ ff_avg_dirac_pixels16_c(dst, src, stride, h);
+ else
+ ff_avg_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3) {
+ ff_put_dirac_pixels32_c(dst, src, stride, h);
+ } else {
+ ff_put_pixels16_sse2(dst , src[0] , stride, h);
+ ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
+ }
+}
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ if (h&3) {
+ ff_avg_dirac_pixels32_c(dst, src, stride, h);
+ } else {
+ ff_avg_pixels16_sse2(dst , src[0] , stride, h);
+ ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
+ }
+}
+#endif
+#endif
+
void ff_vector_clipf_sse(float *dst, const float *src,
float min, float max, int len)
{
diff --git a/libavcodec/x86/dsputil_qns_template.c b/libavcodec/x86/dsputil_qns_template.c
index 20a40a175e..bde6b0a606 100644
--- a/libavcodec/x86/dsputil_qns_template.c
+++ b/libavcodec/x86/dsputil_qns_template.c
@@ -5,20 +5,20 @@
* MMX optimization by Michael Niedermayer <michaelni@gmx.at>
* 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -28,7 +28,7 @@ static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[
{
x86_reg i=0;
- assert(FFABS(scale) < MAX_ABS);
+ av_assert2(FFABS(scale) < MAX_ABS);
scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
SET_RND(mm6);
diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h
index c8615b2472..356b2c142f 100644
--- a/libavcodec/x86/dsputil_x86.h
+++ b/libavcodec/x86/dsputil_x86.h
@@ -2,20 +2,20 @@
* MMX optimized DSP utils
* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -129,6 +129,11 @@ void ff_gmc_mmx(uint8_t *dst, uint8_t *src,
int dxx, int dxy, int dyx, int dyy,
int shift, int r, int width, int height);
+void ff_gmc_sse(uint8_t *dst, uint8_t *src,
+ int stride, int h, int ox, int oy,
+ int dxx, int dxy, int dyx, int dyy,
+ int shift, int r, int width, int height);
+
void ff_vector_clipf_sse(float *dst, const float *src,
float min, float max, int len);
@@ -162,6 +167,10 @@ void ff_put_pixels8_xy2_mmx(uint8_t *block, const uint8_t *pixels,
void ff_put_pixels16_xy2_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
+
+void ff_mmx_idct(int16_t *block);
+void ff_mmxext_idct(int16_t *block);
+
void ff_deinterlace_line_mmx(uint8_t *dst,
const uint8_t *lum_m4, const uint8_t *lum_m3,
const uint8_t *lum_m2, const uint8_t *lum_m1,
diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm
index 7e4fd8152c..1839bee24a 100644
--- a/libavcodec/x86/dsputilenc.asm
+++ b/libavcodec/x86/dsputilenc.asm
@@ -4,20 +4,20 @@
;* Copyright (c) 2000, 2001 Fabrice Bellard
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;*****************************************************************************
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 8537f7ab58..51026d10e2 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -5,20 +5,20 @@
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -424,8 +424,8 @@ static int nsse8_mmx(void *p, uint8_t * pix1, uint8_t * pix2, int line_size, int
static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
int tmp;
- assert( (((int)pix) & 7) == 0);
- assert((line_size &7) ==0);
+ av_assert2( (((int)pix) & 7) == 0);
+ av_assert2((line_size &7) ==0);
#define SUM(in0, in1, out0, out1) \
"movq (%0), %%mm2\n"\
@@ -487,8 +487,8 @@ static int vsad_intra16_mmxext(void *v, uint8_t *pix, uint8_t *dummy,
{
int tmp;
- assert( (((int)pix) & 7) == 0);
- assert((line_size &7) ==0);
+ av_assert2( (((int)pix) & 7) == 0);
+ av_assert2((line_size &7) ==0);
#define SUM(in0, in1, out0, out1) \
"movq (%0), " #out0 "\n"\
@@ -527,9 +527,9 @@ static int vsad_intra16_mmxext(void *v, uint8_t *pix, uint8_t *dummy,
static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
int tmp;
- assert( (((int)pix1) & 7) == 0);
- assert( (((int)pix2) & 7) == 0);
- assert((line_size &7) ==0);
+ av_assert2( (((int)pix1) & 7) == 0);
+ av_assert2( (((int)pix2) & 7) == 0);
+ av_assert2((line_size &7) ==0);
#define SUM(in0, in1, out0, out1) \
"movq (%0),%%mm2\n"\
@@ -607,9 +607,9 @@ static int vsad16_mmxext(void *v, uint8_t *pix1, uint8_t *pix2,
{
int tmp;
- assert( (((int)pix1) & 7) == 0);
- assert( (((int)pix2) & 7) == 0);
- assert((line_size &7) ==0);
+ av_assert2( (((int)pix1) & 7) == 0);
+ av_assert2( (((int)pix2) & 7) == 0);
+ av_assert2((line_size &7) ==0);
#define SUM(in0, in1, out0, out1) \
"movq (%0)," #out0 "\n"\
@@ -661,8 +661,9 @@ static int vsad16_mmxext(void *v, uint8_t *pix1, uint8_t *pix2,
}
#undef SUM
-static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+static void diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
x86_reg i=0;
+ if(w>=16)
__asm__ volatile(
"1: \n\t"
"movq (%2, %0), %%mm0 \n\t"
diff --git a/libavcodec/x86/dwt_yasm.asm b/libavcodec/x86/dwt_yasm.asm
new file mode 100644
index 0000000000..5253abc6c8
--- /dev/null
+++ b/libavcodec/x86/dwt_yasm.asm
@@ -0,0 +1,306 @@
+;******************************************************************************
+;* MMX optimized discrete wavelet trasnform
+;* Copyright (c) 2010 David Conrad
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+pw_1: times 8 dw 1
+pw_2: times 8 dw 2
+pw_8: times 8 dw 8
+pw_16: times 8 dw 16
+pw_1991: times 4 dw 9,-1
+
+section .text
+
+; %1 -= (%2 + %3 + 2)>>2 %4 is pw_2
+%macro COMPOSE_53iL0 4
+ paddw %2, %3
+ paddw %2, %4
+ psraw %2, 2
+ psubw %1, %2
+%endm
+
+; m1 = %1 + (-m0 + 9*m1 + 9*%2 -%3 + 8)>>4
+; if %4 is supplied, %1 is loaded unaligned from there
+; m2: clobbered m3: pw_8 m4: pw_1991
+%macro COMPOSE_DD97iH0 3-4
+ paddw m0, %3
+ paddw m1, %2
+ psubw m0, m3
+ mova m2, m1
+ punpcklwd m1, m0
+ punpckhwd m2, m0
+ pmaddwd m1, m4
+ pmaddwd m2, m4
+%if %0 > 3
+ movu %1, %4
+%endif
+ psrad m1, 4
+ psrad m2, 4
+ packssdw m1, m2
+ paddw m1, %1
+%endm
+
+%macro COMPOSE_VERTICAL 1
+; void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; int width)
+cglobal vertical_compose53iL0_%1, 4,4,1, b0, b1, b2, width
+ mova m2, [pw_2]
+%if ARCH_X86_64
+ mov widthd, widthd
+%endif
+.loop:
+ sub widthq, mmsize/2
+ mova m1, [b0q+2*widthq]
+ mova m0, [b1q+2*widthq]
+ COMPOSE_53iL0 m0, m1, [b2q+2*widthq], m2
+ mova [b1q+2*widthq], m0
+ jg .loop
+ REP_RET
+
+; void vertical_compose_dirac53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; int width)
+cglobal vertical_compose_dirac53iH0_%1, 4,4,1, b0, b1, b2, width
+ mova m1, [pw_1]
+%if ARCH_X86_64
+ mov widthd, widthd
+%endif
+.loop:
+ sub widthq, mmsize/2
+ mova m0, [b0q+2*widthq]
+ paddw m0, [b2q+2*widthq]
+ paddw m0, m1
+ psraw m0, 1
+ paddw m0, [b1q+2*widthq]
+ mova [b1q+2*widthq], m0
+ jg .loop
+ REP_RET
+
+; void vertical_compose_dd97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; IDWTELEM *b3, IDWTELEM *b4, int width)
+cglobal vertical_compose_dd97iH0_%1, 6,6,5, b0, b1, b2, b3, b4, width
+ mova m3, [pw_8]
+ mova m4, [pw_1991]
+%if ARCH_X86_64
+ mov widthd, widthd
+%endif
+.loop:
+ sub widthq, mmsize/2
+ mova m0, [b0q+2*widthq]
+ mova m1, [b1q+2*widthq]
+ COMPOSE_DD97iH0 [b2q+2*widthq], [b3q+2*widthq], [b4q+2*widthq]
+ mova [b2q+2*widthq], m1
+ jg .loop
+ REP_RET
+
+; void vertical_compose_dd137iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
+; IDWTELEM *b3, IDWTELEM *b4, int width)
+cglobal vertical_compose_dd137iL0_%1, 6,6,6, b0, b1, b2, b3, b4, width
+ mova m3, [pw_16]
+ mova m4, [pw_1991]
+%if ARCH_X86_64
+ mov widthd, widthd
+%endif
+.loop:
+ sub widthq, mmsize/2
+ mova m0, [b0q+2*widthq]
+ mova m1, [b1q+2*widthq]
+ mova m5, [b2q+2*widthq]
+ paddw m0, [b4q+2*widthq]
+ paddw m1, [b3q+2*widthq]
+ psubw m0, m3
+ mova m2, m1
+ punpcklwd m1, m0
+ punpckhwd m2, m0
+ pmaddwd m1, m4
+ pmaddwd m2, m4
+ psrad m1, 5
+ psrad m2, 5
+ packssdw m1, m2
+ psubw m5, m1
+ mova [b2q+2*widthq], m5
+ jg .loop
+ REP_RET
+
+; void vertical_compose_haar(IDWTELEM *b0, IDWTELEM *b1, int width)
+cglobal vertical_compose_haar_%1, 3,4,3, b0, b1, width
+ mova m3, [pw_1]
+%if ARCH_X86_64
+ mov widthd, widthd
+%endif
+.loop:
+ sub widthq, mmsize/2
+ mova m1, [b1q+2*widthq]
+ mova m0, [b0q+2*widthq]
+ mova m2, m1
+ paddw m1, m3
+ psraw m1, 1
+ psubw m0, m1
+ mova [b0q+2*widthq], m0
+ paddw m2, m0
+ mova [b1q+2*widthq], m2
+ jg .loop
+ REP_RET
+%endmacro
+
+; extend the left and right edges of the tmp array by %1 and %2 respectively
+%macro EDGE_EXTENSION 3
+ mov %3, [tmpq]
+%assign %%i 1
+%rep %1
+ mov [tmpq-2*%%i], %3
+ %assign %%i %%i+1
+%endrep
+ mov %3, [tmpq+2*w2q-2]
+%assign %%i 0
+%rep %2
+ mov [tmpq+2*w2q+2*%%i], %3
+ %assign %%i %%i+1
+%endrep
+%endmacro
+
+
+%macro HAAR_HORIZONTAL 2
+; void horizontal_compose_haari(IDWTELEM *b, IDWTELEM *tmp, int width)
+cglobal horizontal_compose_haar%2i_%1, 3,6,4, b, tmp, w, x, w2, b_w2
+ mov w2d, wd
+ xor xq, xq
+ shr w2d, 1
+ lea b_w2q, [bq+wq]
+ mova m3, [pw_1]
+.lowpass_loop:
+ movu m1, [b_w2q + 2*xq]
+ mova m0, [bq + 2*xq]
+ paddw m1, m3
+ psraw m1, 1
+ psubw m0, m1
+ mova [tmpq + 2*xq], m0
+ add xq, mmsize/2
+ cmp xq, w2q
+ jl .lowpass_loop
+
+ xor xq, xq
+ and w2q, ~(mmsize/2 - 1)
+ cmp w2q, mmsize/2
+ jl .end
+
+.highpass_loop:
+ movu m1, [b_w2q + 2*xq]
+ mova m0, [tmpq + 2*xq]
+ paddw m1, m0
+
+ ; shift and interleave
+%if %2 == 1
+ paddw m0, m3
+ paddw m1, m3
+ psraw m0, 1
+ psraw m1, 1
+%endif
+ mova m2, m0
+ punpcklwd m0, m1
+ punpckhwd m2, m1
+ mova [bq+4*xq], m0
+ mova [bq+4*xq+mmsize], m2
+
+ add xq, mmsize/2
+ cmp xq, w2q
+ jl .highpass_loop
+.end:
+ REP_RET
+%endmacro
+
+
+INIT_XMM
+; void horizontal_compose_dd97i(IDWTELEM *b, IDWTELEM *tmp, int width)
+cglobal horizontal_compose_dd97i_ssse3, 3,6,8, b, tmp, w, x, w2, b_w2
+ mov w2d, wd
+ xor xd, xd
+ shr w2d, 1
+ lea b_w2q, [bq+wq]
+ movu m4, [bq+wq]
+ mova m7, [pw_2]
+ pslldq m4, 14
+.lowpass_loop:
+ movu m1, [b_w2q + 2*xq]
+ mova m0, [bq + 2*xq]
+ mova m2, m1
+ palignr m1, m4, 14
+ mova m4, m2
+ COMPOSE_53iL0 m0, m1, m2, m7
+ mova [tmpq + 2*xq], m0
+ add xd, mmsize/2
+ cmp xd, w2d
+ jl .lowpass_loop
+
+ EDGE_EXTENSION 1, 2, xw
+ ; leave the last up to 7 (sse) or 3 (mmx) values for C
+ xor xd, xd
+ and w2d, ~(mmsize/2 - 1)
+ cmp w2d, mmsize/2
+ jl .end
+
+ mova m7, [tmpq-mmsize]
+ mova m0, [tmpq]
+ mova m5, [pw_1]
+ mova m3, [pw_8]
+ mova m4, [pw_1991]
+.highpass_loop:
+ mova m6, m0
+ palignr m0, m7, 14
+ mova m7, [tmpq + 2*xq + 16]
+ mova m1, m7
+ mova m2, m7
+ palignr m1, m6, 2
+ palignr m2, m6, 4
+ COMPOSE_DD97iH0 m0, m6, m2, [b_w2q + 2*xq]
+ mova m0, m7
+ mova m7, m6
+
+ ; shift and interleave
+ paddw m6, m5
+ paddw m1, m5
+ psraw m6, 1
+ psraw m1, 1
+ mova m2, m6
+ punpcklwd m6, m1
+ punpckhwd m2, m1
+ mova [bq+4*xq], m6
+ mova [bq+4*xq+mmsize], m2
+
+ add xd, mmsize/2
+ cmp xd, w2d
+ jl .highpass_loop
+.end:
+ REP_RET
+
+
+%if ARCH_X86_64 == 0
+INIT_MMX
+COMPOSE_VERTICAL mmx
+HAAR_HORIZONTAL mmx, 0
+HAAR_HORIZONTAL mmx, 1
+%endif
+
+;;INIT_XMM
+INIT_XMM
+COMPOSE_VERTICAL sse2
+HAAR_HORIZONTAL sse2, 0
+HAAR_HORIZONTAL sse2, 1
diff --git a/libavcodec/x86/fdct.c b/libavcodec/x86/fdct.c
index bd76648b98..d35245dbbe 100644
--- a/libavcodec/x86/fdct.c
+++ b/libavcodec/x86/fdct.c
@@ -13,20 +13,20 @@
* a page about fdct at http://www.geocities.com/ssavekar/dct.htm
* Skal's fdct at http://skal.planet-d.net/coding/dct.html
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -70,7 +70,7 @@ DECLARE_ALIGNED(16, static const int16_t, fdct_one_corr)[8] = { X8(1) };
DECLARE_ALIGNED(8, static const int32_t, fdct_r_row)[2] = {RND_FRW_ROW, RND_FRW_ROW };
-static struct
+static const struct
{
DECLARE_ALIGNED(16, const int32_t, fdct_r_row_sse2)[4];
} fdct_r_row_sse2 =
@@ -153,7 +153,7 @@ DECLARE_ALIGNED(8, static const int16_t, tab_frw_01234567)[] = { // forward_dct
29692, -12299, 26722, -31521,
};
-static struct
+static const struct
{
DECLARE_ALIGNED(16, const int16_t, tab_frw_01234567_sse2)[256];
} tab_frw_01234567_sse2 =
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index c87752bae8..5071741d63 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -6,20 +6,20 @@
;* This algorithm (though not any of the implementation details) is
;* based on libdjbfft by D. J. Bernstein.
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -36,6 +36,8 @@
%define pointer resd
%endif
+SECTION_RODATA
+
struc FFTContext
.nbits: resd 1
.reverse: resd 1
@@ -51,8 +53,6 @@ struc FFTContext
.imdcthalf:pointer 1
endstruc
-SECTION_RODATA
-
%define M_SQRT1_2 0.70710678118654752440
%define M_COS_PI_1_8 0.923879532511287
%define M_COS_PI_3_8 0.38268343236509
@@ -305,6 +305,7 @@ IF%1 mova Z(1), m5
INIT_YMM avx
+%if HAVE_AVX_EXTERNAL
align 16
fft8_avx:
mova m0, Z(0)
@@ -394,6 +395,8 @@ fft32_interleave_avx:
jg .deint_loop
ret
+%endif
+
INIT_XMM sse
align 16
@@ -537,6 +540,7 @@ DEFINE_ARGS zc, w, n, o1, o3
INIT_YMM avx
+%if HAVE_AVX_EXTERNAL
%macro INTERL_AVX 5
vunpckhps %3, %2, %1
vunpcklps %2, %2, %1
@@ -558,6 +562,7 @@ cglobal fft_calc, 2,5,8
FFT_DISPATCH _interleave %+ SUFFIX, r1
REP_RET
+%endif
INIT_XMM sse
@@ -776,9 +781,11 @@ align 8
dispatch_tab %+ fullsuffix: pointer list_of_fft
%endmacro ; DECL_FFT
+%if HAVE_AVX_EXTERNAL
INIT_YMM avx
DECL_FFT 6
DECL_FFT 6, _interleave
+%endif
INIT_XMM sse
DECL_FFT 5
DECL_FFT 5, _interleave
@@ -1080,4 +1087,7 @@ DECL_IMDCT POSROTATESHUF_3DNOW
%endif
INIT_YMM avx
+
+%if HAVE_AVX_EXTERNAL
DECL_IMDCT POSROTATESHUF_AVX
+%endif
diff --git a/libavcodec/x86/fft.h b/libavcodec/x86/fft.h
index 6e80b95d11..3f8b21d95b 100644
--- a/libavcodec/x86/fft.h
+++ b/libavcodec/x86/fft.h
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c
index 77f5ba96f8..bfa794743b 100644
--- a/libavcodec/x86/fft_init.c
+++ b/libavcodec/x86/fft_init.c
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/fmtconvert.asm b/libavcodec/x86/fmtconvert.asm
index 8267bd47dc..60078e2c2c 100644
--- a/libavcodec/x86/fmtconvert.asm
+++ b/libavcodec/x86/fmtconvert.asm
@@ -2,20 +2,20 @@
;* x86 optimized Format Conversion Utils
;* Copyright (c) 2008 Loren Merritt
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -32,7 +32,7 @@ SECTION_TEXT
%endmacro
;---------------------------------------------------------------------------------
-; void int32_to_float_fmul_scalar(float *dst, const int *src, float mul, int len);
+; void int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul, int len);
;---------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 1
%if UNIX64
diff --git a/libavcodec/x86/fmtconvert_init.c b/libavcodec/x86/fmtconvert_init.c
index 24c81bdbe0..340c192500 100644
--- a/libavcodec/x86/fmtconvert_init.c
+++ b/libavcodec/x86/fmtconvert_init.c
@@ -5,20 +5,20 @@
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -30,8 +30,8 @@
#if HAVE_YASM
-void ff_int32_to_float_fmul_scalar_sse (float *dst, const int *src, float mul, int len);
-void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len);
+void ff_int32_to_float_fmul_scalar_sse (float *dst, const int32_t *src, float mul, int len);
+void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int32_t *src, float mul, int len);
void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
void ff_float_to_int16_sse (int16_t *dst, const float *src, long len);
diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm
index 43b039d0e0..dc363d1a63 100644
--- a/libavcodec/x86/fpel.asm
+++ b/libavcodec/x86/fpel.asm
@@ -4,20 +4,20 @@
;* Copyright (c) 2003-2013 Michael Niedermayer
;* Copyright (c) 2013 Daniel Kang
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/fpel_mmx.c b/libavcodec/x86/fpel_mmx.c
index 1ae8f86466..384ab89d9c 100644
--- a/libavcodec/x86/fpel_mmx.c
+++ b/libavcodec/x86/fpel_mmx.c
@@ -4,20 +4,20 @@
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/h263_loopfilter.asm b/libavcodec/x86/h263_loopfilter.asm
index a940aad07a..a21baf1629 100644
--- a/libavcodec/x86/h263_loopfilter.asm
+++ b/libavcodec/x86/h263_loopfilter.asm
@@ -1,20 +1,22 @@
;******************************************************************************
;* MMX-optimized H.263 loop filter
+;* Copyright (c) 2003-2013 Michael Niedermayer
+;* Copyright (c) 2013 Daniel Kang
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index b7b18e03f8..32681aafd7 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -3,20 +3,20 @@
;* Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
;* 2005-2008 Loren Merritt
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/h264_chromamc_10bit.asm b/libavcodec/x86/h264_chromamc_10bit.asm
index aec7678d75..b850551781 100644
--- a/libavcodec/x86/h264_chromamc_10bit.asm
+++ b/libavcodec/x86/h264_chromamc_10bit.asm
@@ -251,8 +251,10 @@ cglobal %1_h264_chroma_mc2_10, 6,7
%define CHROMAMC_AVG NOTHING
INIT_XMM sse2
CHROMA_MC8 put
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
CHROMA_MC8 put
+%endif
INIT_MMX mmxext
CHROMA_MC4 put
CHROMA_MC2 put
@@ -260,8 +262,10 @@ CHROMA_MC2 put
%define CHROMAMC_AVG AVG
INIT_XMM sse2
CHROMA_MC8 avg
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
CHROMA_MC8 avg
+%endif
INIT_MMX mmxext
CHROMA_MC4 avg
CHROMA_MC2 avg
diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index fc6c983052..d58e16ca89 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -7,20 +7,20 @@
;* Jason Garrett-Glaser <darkshikari@gmail.com>
;* Oskar Arvidsson <oskar@irock.se>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -389,8 +389,10 @@ cglobal deblock_h_luma_8, 5,9
INIT_XMM sse2
DEBLOCK_LUMA
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA
+%endif
%else
@@ -502,8 +504,10 @@ INIT_MMX mmxext
DEBLOCK_LUMA v8, 8
INIT_XMM sse2
DEBLOCK_LUMA v, 16
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA v, 16
+%endif
%endif ; ARCH
@@ -773,8 +777,10 @@ cglobal deblock_h_luma_intra_8, 2,4,8,0x80
INIT_XMM sse2
DEBLOCK_LUMA_INTRA v
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA_INTRA v
+%endif
%if ARCH_X86_64 == 0
INIT_MMX mmxext
DEBLOCK_LUMA_INTRA v8
@@ -835,7 +841,11 @@ cglobal deblock_h_chroma_8, 5,7
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
movq buf0, m0
movq buf1, m3
- call ff_chroma_inter_body_mmxext
+ LOAD_MASK r2d, r3d
+ movd m6, [r4] ; tc0
+ punpcklbw m6, m6
+ pand m7, m6
+ DEBLOCK_P0_Q0
movq m0, buf0
movq m3, buf1
TRANSPOSE8x4B_STORE PASS8ROWS(t5, r0, r1, t6)
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm
index 3b81ef6fcf..d63ca02c7a 100644
--- a/libavcodec/x86/h264_deblock_10bit.asm
+++ b/libavcodec/x86/h264_deblock_10bit.asm
@@ -417,9 +417,11 @@ cglobal deblock_h_luma_10, 5,7,15
INIT_XMM sse2
DEBLOCK_LUMA_64
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA_64
%endif
+%endif
%macro SWAPMOVA 2
%ifid %1
@@ -712,8 +714,10 @@ cglobal deblock_h_luma_intra_10, 4,7,16
INIT_XMM sse2
DEBLOCK_LUMA_INTRA_64
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA_INTRA_64
+%endif
%endif
@@ -797,10 +801,12 @@ DEBLOCK_LUMA_INTRA
INIT_XMM sse2
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
%endif
+%endif
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
; out: %1=p0', %2=q0'
@@ -911,5 +917,7 @@ DEBLOCK_CHROMA
%endif
INIT_XMM sse2
DEBLOCK_CHROMA
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEBLOCK_CHROMA
+%endif
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
index bb881c35df..0dc0a7cb0f 100644
--- a/libavcodec/x86/h264_i386.h
+++ b/libavcodec/x86/h264_i386.h
@@ -2,20 +2,20 @@
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 3ced8af40e..17bf794bce 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -9,20 +9,20 @@
;* Holger Lubitz <hal@duncan.ol.sub.de>
;* Min Chen <chenm001.163.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;*****************************************************************************
diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm
index 4e51d2b5d0..88fdb843a6 100644
--- a/libavcodec/x86/h264_idct_10bit.asm
+++ b/libavcodec/x86/h264_idct_10bit.asm
@@ -83,8 +83,10 @@ cglobal h264_idct_add_10, 3,3
INIT_XMM sse2
IDCT_ADD_10
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD_10
+%endif
;-----------------------------------------------------------------------------
; h264_idct_add16(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
@@ -115,9 +117,11 @@ add4x4_idct %+ SUFFIX:
INIT_XMM sse2
ALIGN 16
ADD4x4IDCT
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
ALIGN 16
ADD4x4IDCT
+%endif
%macro ADD16_OP 2
cmp byte [r4+%2], 0
@@ -153,8 +157,10 @@ cglobal h264_idct_add16_10, 5,6
INIT_XMM sse2
IDCT_ADD16_10
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD16_10
+%endif
;-----------------------------------------------------------------------------
; void h264_idct_dc_add(pixel *dst, dctcoef *block, int stride)
@@ -218,8 +224,10 @@ cglobal h264_idct8_dc_add_10,3,4,7
INIT_XMM sse2
IDCT8_DC_ADD
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_DC_ADD
+%endif
;-----------------------------------------------------------------------------
; h264_idct_add16intra(pixel *dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
@@ -289,8 +297,10 @@ cglobal h264_idct_add16intra_10,5,7,8
INIT_XMM sse2
IDCT_ADD16INTRA_10
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD16INTRA_10
+%endif
%assign last_block 36
;-----------------------------------------------------------------------------
@@ -324,8 +334,10 @@ cglobal h264_idct_add8_10,5,8,7
INIT_XMM sse2
IDCT_ADD8
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT_ADD8
+%endif
;-----------------------------------------------------------------------------
; void h264_idct8_add(pixel *dst, dctcoef *block, int stride)
@@ -531,8 +543,10 @@ h264_idct8_add1_10 %+ SUFFIX:
INIT_XMM sse2
IDCT8_ADD
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_ADD
+%endif
;-----------------------------------------------------------------------------
; h264_idct8_add4(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
@@ -569,5 +583,7 @@ cglobal h264_idct8_add4_10, 0,7,16
INIT_XMM sse2
IDCT8_ADD4
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
IDCT8_ADD4
+%endif
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index b9db3f450f..5c0dff429a 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -5,20 +5,20 @@
;* Copyright (c) 2010 Loren Merritt
;* Copyright (c) 2010 Ronald S. Bultje
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/h264_intrapred_10bit.asm b/libavcodec/x86/h264_intrapred_10bit.asm
index 1b7974b790..db2b25ce57 100644
--- a/libavcodec/x86/h264_intrapred_10bit.asm
+++ b/libavcodec/x86/h264_intrapred_10bit.asm
@@ -82,8 +82,10 @@ INIT_XMM sse2
PRED4x4_DR
INIT_XMM ssse3
PRED4x4_DR
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED4x4_DR
+%endif
;-----------------------------------------------------------------------------
; void pred4x4_vertical_right(pixel *src, const pixel *topright, int stride)
@@ -119,8 +121,10 @@ INIT_XMM sse2
PRED4x4_VR
INIT_XMM ssse3
PRED4x4_VR
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED4x4_VR
+%endif
;-----------------------------------------------------------------------------
; void pred4x4_horizontal_down(pixel *src, const pixel *topright, int stride)
@@ -159,8 +163,10 @@ INIT_XMM sse2
PRED4x4_HD
INIT_XMM ssse3
PRED4x4_HD
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED4x4_HD
+%endif
;-----------------------------------------------------------------------------
; void pred4x4_dc(pixel *src, const pixel *topright, int stride)
@@ -228,8 +234,10 @@ cglobal pred4x4_down_left_10, 3, 3
INIT_XMM sse2
PRED4x4_DL
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED4x4_DL
+%endif
;-----------------------------------------------------------------------------
; void pred4x4_vertical_left(pixel *src, const pixel *topright, int stride)
@@ -255,8 +263,10 @@ cglobal pred4x4_vertical_left_10, 3, 3
INIT_XMM sse2
PRED4x4_VL
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED4x4_VL
+%endif
;-----------------------------------------------------------------------------
; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
@@ -563,8 +573,10 @@ cglobal pred8x8l_top_dc_10, 4, 4, 6
INIT_XMM sse2
PRED8x8L_TOP_DC
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_TOP_DC
+%endif
;-----------------------------------------------------------------------------
;void pred8x8l_dc(pixel *src, int has_topleft, int has_topright, int stride)
@@ -620,8 +632,10 @@ cglobal pred8x8l_dc_10, 4, 6, 6
INIT_XMM sse2
PRED8x8L_DC
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_DC
+%endif
;-----------------------------------------------------------------------------
; void pred8x8l_vertical(pixel *src, int has_topleft, int has_topright, int stride)
@@ -653,8 +667,10 @@ cglobal pred8x8l_vertical_10, 4, 4, 6
INIT_XMM sse2
PRED8x8L_VERTICAL
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_VERTICAL
+%endif
;-----------------------------------------------------------------------------
; void pred8x8l_horizontal(uint8_t *src, int has_topleft, int has_topright, int stride)
@@ -707,8 +723,10 @@ INIT_XMM sse2
PRED8x8L_HORIZONTAL
INIT_XMM ssse3
PRED8x8L_HORIZONTAL
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_HORIZONTAL
+%endif
;-----------------------------------------------------------------------------
;void pred8x8l_down_left(pixel *src, int has_topleft, int has_topright, int stride)
@@ -773,8 +791,10 @@ INIT_XMM sse2
PRED8x8L_DOWN_LEFT
INIT_XMM ssse3
PRED8x8L_DOWN_LEFT
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_DOWN_LEFT
+%endif
;-----------------------------------------------------------------------------
;void pred8x8l_down_right(pixel *src, int has_topleft, int has_topright, int stride)
@@ -845,8 +865,10 @@ INIT_XMM sse2
PRED8x8L_DOWN_RIGHT
INIT_XMM ssse3
PRED8x8L_DOWN_RIGHT
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_DOWN_RIGHT
+%endif
;-----------------------------------------------------------------------------
; void pred8x8l_vertical_right(pixel *src, int has_topleft, int has_topright, int stride)
@@ -913,8 +935,10 @@ INIT_XMM sse2
PRED8x8L_VERTICAL_RIGHT
INIT_XMM ssse3
PRED8x8L_VERTICAL_RIGHT
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_VERTICAL_RIGHT
+%endif
;-----------------------------------------------------------------------------
; void pred8x8l_horizontal_up(pixel *src, int has_topleft, int has_topright, int stride)
@@ -972,8 +996,10 @@ INIT_XMM sse2
PRED8x8L_HORIZONTAL_UP
INIT_XMM ssse3
PRED8x8L_HORIZONTAL_UP
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
PRED8x8L_HORIZONTAL_UP
+%endif
;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 84ffd4f9eb..f5b5e3ef39 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2010 Jason Garrett-Glaser
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c
index b00983e021..c4fdf5b692 100644
--- a/libavcodec/x86/h264_qpel.c
+++ b/libavcodec/x86/h264_qpel.c
@@ -2,20 +2,20 @@
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
* Copyright (c) 2011 Daniel Kang
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -338,7 +338,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
+ av_assert2(((int)temp & 7) == 0);\
ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
}\
@@ -348,7 +348,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
+ av_assert2(((int)temp & 7) == 0);\
ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
ff_ ## OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
}\
@@ -358,7 +358,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
+ av_assert2(((int)temp & 7) == 0);\
ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
}\
@@ -368,7 +368,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
DECLARE_ALIGNED(ALIGN, uint8_t, temp)[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE];\
uint8_t * const halfHV= temp;\
int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
+ av_assert2(((int)temp & 7) == 0);\
ff_put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
ff_ ## OPNAME ## pixels ## SIZE ## _l2_shift5_mmxext(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
}\
diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm
index bc6c72541b..2d287ba443 100644
--- a/libavcodec/x86/h264_qpel_8bit.asm
+++ b/libavcodec/x86/h264_qpel_8bit.asm
@@ -6,20 +6,20 @@
;*
;* Authors: Daniel Kang <daniel.d.kang@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm
index 646acdffe3..4759a063a6 100644
--- a/libavcodec/x86/h264_weight.asm
+++ b/libavcodec/x86/h264_weight.asm
@@ -4,20 +4,20 @@
;* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
;* Copyright (C) 2010 Eli Friedman <eli.friedman@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -135,6 +135,13 @@ WEIGHT_FUNC_HALF_MM 8, 8
add off_regd, 1
or off_regd, 1
add r4, 1
+ cmp r5, 128
+ jne .normal
+ sar r5, 1
+ sar r6, 1
+ sar off_regd, 1
+ sub r4, 1
+.normal
%if cpuflag(ssse3)
movd m4, r5d
movd m0, r6d
diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c
index 1ed34c0da8..c07fe948e0 100644
--- a/libavcodec/x86/h264chroma_init.c
+++ b/libavcodec/x86/h264chroma_init.c
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index cfd8bb93b5..0255c0354f 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -1,20 +1,20 @@
/*
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -211,6 +211,7 @@ H264_BIWEIGHT_10_SSE(4, 10)
av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
const int chroma_format_idc)
{
+#if HAVE_YASM
int mm_flags = av_get_cpu_flags();
if (chroma_format_idc == 1 && EXTERNAL_MMXEXT(mm_flags))
@@ -370,4 +371,5 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
}
}
}
+#endif
}
diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm
index ec04d99e77..1a572a3e66 100644
--- a/libavcodec/x86/hpeldsp.asm
+++ b/libavcodec/x86/hpeldsp.asm
@@ -1,20 +1,27 @@
;******************************************************************************
+;*
+;* Copyright (c) 2000-2001 Fabrice Bellard <fabrice@bellard.org>
+;* Copyright (c) Nick Kurshev <nickols_k@mail.ru>
+;* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+;* Copyright (c) 2002 Zdenek Kabelac <kabi@informatics.muni.cz>
+;* Copyright (c) 2013 Daniel Kang
+;*
;* MMX optimized hpel functions
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c
index 14be9a5060..b3e0d11a50 100644
--- a/libavcodec/x86/hpeldsp_init.c
+++ b/libavcodec/x86/hpeldsp_init.c
@@ -3,20 +3,20 @@
* Copyright (c) 2000, 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*
* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
@@ -254,7 +254,7 @@ void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags)
{
int mm_flags = av_get_cpu_flags();
- if (mm_flags & AV_CPU_FLAG_MMX)
+ if (HAVE_MMX && mm_flags & AV_CPU_FLAG_MMX)
hpeldsp_init_mmx(c, flags, mm_flags);
if (mm_flags & AV_CPU_FLAG_MMXEXT)
diff --git a/libavcodec/x86/hpeldsp_mmx.c b/libavcodec/x86/hpeldsp_mmx.c
index fece265da8..50db36dc1b 100644
--- a/libavcodec/x86/hpeldsp_mmx.c
+++ b/libavcodec/x86/hpeldsp_mmx.c
@@ -3,20 +3,20 @@
*
* Copyright (c) 2001 Fabrice Bellard
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/hpeldsp_rnd_template.c b/libavcodec/x86/hpeldsp_rnd_template.c
index 516a03aec2..94e06d820a 100644
--- a/libavcodec/x86/hpeldsp_rnd_template.c
+++ b/libavcodec/x86/hpeldsp_rnd_template.c
@@ -7,20 +7,20 @@
* mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
* and improved by Zdenek Kabelac <kabi@users.sf.net>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c
index 7c97b9541b..f66519b642 100644
--- a/libavcodec/x86/idct_mmx_xvid.c
+++ b/libavcodec/x86/idct_mmx_xvid.c
@@ -22,20 +22,20 @@
*
* conversion to gcc syntax by Michael Niedermayer
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
- * along with Libav; if not, write to the Free Software Foundation,
+ * along with FFmpeg; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
index da2c772ad1..ee2a08d73a 100644
--- a/libavcodec/x86/idct_sse2_xvid.c
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -9,7 +9,7 @@
*
* Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
* Vertical pass is an implementation of the scheme:
* Loeffler C., Ligtenberg A., and Moschytz C.S.:
@@ -23,22 +23,21 @@
*
* More details at http://skal.planet-d.net/coding/dct.html
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
- * along with Libav; if not, write to the Free Software Foundation,
+ * along with FFmpeg; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "libavutil/internal.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "idct_xvid.h"
diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h
index aea28bab96..7a2847b864 100644
--- a/libavcodec/x86/idct_xvid.h
+++ b/libavcodec/x86/idct_xvid.h
@@ -1,20 +1,20 @@
/*
* XVID MPEG-4 VIDEO CODEC
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/imdct36.asm b/libavcodec/x86/imdct36.asm
index 633fcd9d59..d311fbe1a0 100644
--- a/libavcodec/x86/imdct36.asm
+++ b/libavcodec/x86/imdct36.asm
@@ -2,20 +2,20 @@
;* 36 point SSE-optimized IMDCT transform
;* Copyright (c) 2011 Vitor Sessak
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -370,8 +370,10 @@ DEFINE_IMDCT
INIT_XMM ssse3
DEFINE_IMDCT
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEFINE_IMDCT
+%endif
INIT_XMM sse
@@ -716,5 +718,7 @@ cglobal four_imdct36_float, 5,5,16, out, buf, in, win, tmp
INIT_XMM sse
DEFINE_FOUR_IMDCT
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
DEFINE_FOUR_IMDCT
+%endif
diff --git a/libavcodec/x86/lpc.c b/libavcodec/x86/lpc.c
index 7a37b88582..7dd35d4cd8 100644
--- a/libavcodec/x86/lpc.c
+++ b/libavcodec/x86/lpc.c
@@ -2,27 +2,26 @@
* MMX optimized LPC DSP utils
* Copyright (c) 2007 Loren Merritt
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/x86/asm.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
-#include "libavutil/internal.h"
#include "libavutil/mem.h"
#include "libavcodec/lpc.h"
diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
index 32c88705e5..9c48afeb20 100644
--- a/libavcodec/x86/mathops.h
+++ b/libavcodec/x86/mathops.h
@@ -2,20 +2,20 @@
* simple math operations
* Copyright (c) 2006 Michael Niedermayer <michaelni@gmx.at> et al
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -74,7 +74,7 @@ static av_always_inline av_const int64_t MUL64(int a, int b)
static inline av_const int mid_pred(int a, int b, int c)
{
int i=b;
- __asm__ volatile(
+ __asm__ (
"cmp %2, %1 \n\t"
"cmovg %1, %0 \n\t"
"cmovg %2, %1 \n\t"
diff --git a/libavcodec/x86/mlpdsp.c b/libavcodec/x86/mlpdsp.c
index 7bd2c12a9e..81cab5a5e9 100644
--- a/libavcodec/x86/mlpdsp.c
+++ b/libavcodec/x86/mlpdsp.c
@@ -2,25 +2,24 @@
* MLP DSP functions x86-optimized
* Copyright (c) 2009 Ramiro Polla
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
-#include "libavutil/internal.h"
#include "libavutil/x86/asm.h"
#include "libavcodec/mlpdsp.h"
#include "libavcodec/mlp.h"
diff --git a/libavcodec/x86/motion_est.c b/libavcodec/x86/motion_est.c
index 8ef72112dc..416cbb0719 100644
--- a/libavcodec/x86/motion_est.c
+++ b/libavcodec/x86/motion_est.c
@@ -5,25 +5,25 @@
*
* mostly by Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
-#include "libavutil/internal.h"
+#include "libavutil/avassert.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "dsputil_x86.h"
@@ -40,7 +40,7 @@ DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL;
static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- x86_reg len= -(stride*h);
+ x86_reg len= -(x86_reg)stride*h;
__asm__ volatile(
".p2align 4 \n\t"
"1: \n\t"
@@ -203,7 +203,7 @@ static inline void sad8_4_mmxext(uint8_t *blk1, uint8_t *blk2,
static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
{
- x86_reg len= -(stride*h);
+ x86_reg len= -(x86_reg)stride*h;
__asm__ volatile(
".p2align 4 \n\t"
"1: \n\t"
@@ -241,7 +241,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
{
- x86_reg len= -(stride*h);
+ x86_reg len= -(x86_reg)stride*h;
__asm__ volatile(
"movq (%1, %%"REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t"
@@ -331,7 +331,7 @@ static inline void sad8_y2a_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
#define PIX_SAD(suf)\
static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- assert(h==8);\
+ av_assert2(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t":);\
\
@@ -341,7 +341,7 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h
}\
static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- assert(h==8);\
+ av_assert2(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
@@ -355,7 +355,7 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
\
static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- assert(h==8);\
+ av_assert2(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"movq %0, %%mm5 \n\t"\
@@ -369,7 +369,7 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
\
static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
{\
- assert(h==8);\
+ av_assert2(h==8);\
__asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
::);\
@@ -466,7 +466,7 @@ av_cold void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx)
c->pix_abs[1][3] = sad8_xy2_mmxext;
}
}
- if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)) {
+ if ((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW) && avctx->codec_id != AV_CODEC_ID_SNOW) {
c->sad[0]= sad16_sse2;
}
#endif /* HAVE_INLINE_ASM */
diff --git a/libavcodec/x86/mpeg4qpel.asm b/libavcodec/x86/mpeg4qpel.asm
index df20ea9dc6..ca52375a76 100644
--- a/libavcodec/x86/mpeg4qpel.asm
+++ b/libavcodec/x86/mpeg4qpel.asm
@@ -1,21 +1,23 @@
;******************************************************************************
;* mpeg4 qpel
+;* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
;* Copyright (c) 2008 Loren Merritt
+;* Copyright (c) 2013 Daniel Kang
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/mpegaudiodsp.c b/libavcodec/x86/mpegaudiodsp.c
index 79a29ce506..287d8ff126 100644
--- a/libavcodec/x86/mpegaudiodsp.c
+++ b/libavcodec/x86/mpegaudiodsp.c
@@ -2,20 +2,20 @@
* MMX optimized MP3 decoding functions
* Copyright (c) 2010 Vitor Sessak
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -26,11 +26,16 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/mpegaudiodsp.h"
-void ff_imdct36_float_sse(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_sse2(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_sse3(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_ssse3(float *out, float *buf, float *in, float *win);
-void ff_imdct36_float_avx(float *out, float *buf, float *in, float *win);
+#define DECL(CPU)\
+static void imdct36_blocks_ ## CPU(float *out, float *buf, float *in, int count, int switch_point, int block_type);\
+void ff_imdct36_float_ ## CPU(float *out, float *buf, float *in, float *win);
+
+DECL(sse)
+DECL(sse2)
+DECL(sse3)
+DECL(ssse3)
+DECL(avx)
+
void ff_four_imdct36_float_sse(float *out, float *buf, float *in, float *win,
float *tmpbuf);
void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win,
@@ -217,11 +222,15 @@ static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \
} \
}
+#if HAVE_SSE
DECL_IMDCT_BLOCKS(sse,sse)
DECL_IMDCT_BLOCKS(sse2,sse)
DECL_IMDCT_BLOCKS(sse3,sse)
DECL_IMDCT_BLOCKS(ssse3,sse)
+#endif
+#if HAVE_AVX_EXTERNAL
DECL_IMDCT_BLOCKS(avx,avx)
+#endif
#endif /* HAVE_YASM */
av_cold void ff_mpadsp_init_x86(MPADSPContext *s)
diff --git a/libavcodec/x86/mpegvideo.c b/libavcodec/x86/mpegvideo.c
index 68f3b62fdd..f8cabe5ddb 100644
--- a/libavcodec/x86/mpegvideo.c
+++ b/libavcodec/x86/mpegvideo.c
@@ -2,20 +2,20 @@
* Optimized for ia32 CPUs by Nick Kurshev <nickols_k@mail.ru>
* h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -35,7 +35,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
qmul = qscale << 1;
- assert(s->block_last_index[n]>=0 || s->h263_aic);
+ av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
if (!s->h263_aic) {
if (n < 4)
@@ -111,7 +111,7 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
qmul = qscale << 1;
qadd = (qscale - 1) | 1;
- assert(s->block_last_index[n]>=0 || s->h263_aic);
+ av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
@@ -171,7 +171,7 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
const uint16_t *quant_matrix;
int block0;
- assert(s->block_last_index[n]>=0);
+ av_assert2(s->block_last_index[n]>=0);
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
@@ -239,7 +239,7 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
x86_reg nCoeffs;
const uint16_t *quant_matrix;
- assert(s->block_last_index[n]>=0);
+ av_assert2(s->block_last_index[n]>=0);
nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
@@ -306,7 +306,7 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
const uint16_t *quant_matrix;
int block0;
- assert(s->block_last_index[n]>=0);
+ av_assert2(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
@@ -371,7 +371,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
x86_reg nCoeffs;
const uint16_t *quant_matrix;
- assert(s->block_last_index[n]>=0);
+ av_assert2(s->block_last_index[n]>=0);
if(s->alternate_scan) nCoeffs= 63; //FIXME
else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
diff --git a/libavcodec/x86/mpegvideoenc.c b/libavcodec/x86/mpegvideoenc.c
index b1b22f5de2..c841bccc86 100644
--- a/libavcodec/x86/mpegvideoenc.c
+++ b/libavcodec/x86/mpegvideoenc.c
@@ -2,20 +2,20 @@
* The simplest mpeg encoder (well, it was the simplest!)
* Copyright (c) 2000,2001 Fabrice Bellard
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -81,7 +81,7 @@ extern uint16_t ff_inv_zigzag_direct16[64];
#include "mpegvideoenc_template.c"
#endif /* HAVE_SSSE3_INLINE */
-av_cold void ff_MPV_encode_init_x86(MpegEncContext *s)
+av_cold void ff_dct_encode_init_x86(MpegEncContext *s)
{
const int dct_algo = s->avctx->dct_algo;
diff --git a/libavcodec/x86/mpegvideoenc_template.c b/libavcodec/x86/mpegvideoenc_template.c
index 4d3fb7445b..1e0505ea3c 100644
--- a/libavcodec/x86/mpegvideoenc_template.c
+++ b/libavcodec/x86/mpegvideoenc_template.c
@@ -3,20 +3,20 @@
*
* Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -100,7 +100,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
const uint16_t *qmat, *bias;
LOCAL_ALIGNED_16(int16_t, temp_block, [64]);
- assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
+ av_assert2((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
//s->fdct (block);
RENAMEl(ff_fdct) (block); //cannot be anything else ...
@@ -110,10 +110,15 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if (s->mb_intra) {
int dummy;
- if (n < 4)
+ if (n < 4){
q = s->y_dc_scale;
- else
+ bias = s->q_intra_matrix16[qscale][1];
+ qmat = s->q_intra_matrix16[qscale][0];
+ }else{
q = s->c_dc_scale;
+ bias = s->q_chroma_intra_matrix16[qscale][1];
+ qmat = s->q_chroma_intra_matrix16[qscale][0];
+ }
/* note: block[0] is assumed to be positive */
if (!s->h263_aic) {
__asm__ volatile (
@@ -128,8 +133,6 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
block[0]=0; //avoid fake overflow
// temp_block[0] = (block[0] + (q >> 1)) / q;
last_non_zero_p1 = 1;
- bias = s->q_intra_matrix16[qscale][1];
- qmat = s->q_intra_matrix16[qscale][0];
} else {
last_non_zero_p1 = 0;
bias = s->q_inter_matrix16[qscale][1];
diff --git a/libavcodec/x86/pngdsp_init.c b/libavcodec/x86/pngdsp_init.c
index ec6c238bf9..4c54ed30cf 100644
--- a/libavcodec/x86/pngdsp_init.c
+++ b/libavcodec/x86/pngdsp_init.c
@@ -2,20 +2,20 @@
* x86 PNG optimizations.
* Copyright (c) 2008 Loren Merrit <lorenm@u.washington.edu>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm
index 855f2094b7..aedacc2cdc 100644
--- a/libavcodec/x86/proresdsp.asm
+++ b/libavcodec/x86/proresdsp.asm
@@ -1,23 +1,24 @@
;******************************************************************************
;* x86-SIMD-optimized IDCT for prores
-;* this is identical to "simple" IDCT except for the clip range
+;* this is identical to "simple" IDCT written by Michael Niedermayer
+;* except for the clip range
;*
;* Copyright (c) 2011 Ronald S. Bultje <rsbultje@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
@@ -47,10 +48,10 @@ w1_plus_w5: times 4 dw W1sh2, +W5sh2
w5_min_w1: times 4 dw W5sh2, -W1sh2
w5_plus_w7: times 4 dw W5sh2, +W7sh2
w7_min_w5: times 4 dw W7sh2, -W5sh2
-row_round: times 8 dw (1<<14)
+pw_88: times 8 dw 0x2008
+cextern pw_1
cextern pw_4
-cextern pw_8
cextern pw_512
cextern pw_1019
@@ -91,14 +92,12 @@ section .text align=16
; a2 -= W6 * row[2];
; a3 -= W2 * row[2];
%ifidn %1, col
- paddw m10,[pw_8]
+ paddw m10,[pw_88]
%endif
- SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[0], row[2] }[0-3]/[4-7]
%ifidn %1, row
- psubw m10,[row_round]
+ paddw m10,[pw_1]
%endif
- SIGNEXTEND m8, m9, m14 ; { row[2] }[0-3] / [4-7]
- SIGNEXTEND m10, m11, m14 ; { row[0] }[0-3] / [4-7]
+ SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[0], row[2] }[0-3]/[4-7]
pmaddwd m2, m0, [w4_plus_w6]
pmaddwd m3, m1, [w4_plus_w6]
pmaddwd m4, m0, [w4_min_w6]
@@ -107,75 +106,33 @@ section .text align=16
pmaddwd m7, m1, [w4_min_w2]
pmaddwd m0, [w4_plus_w2]
pmaddwd m1, [w4_plus_w2]
- pslld m2, 2
- pslld m3, 2
- pslld m4, 2
- pslld m5, 2
- pslld m6, 2
- pslld m7, 2
- pslld m0, 2
- pslld m1, 2
; a0: -1*row[0]-1*row[2]
; a1: -1*row[0]
; a2: -1*row[0]
; a3: -1*row[0]+1*row[2]
- psubd m2, m10 ; a1[0-3]
- psubd m3, m11 ; a1[4-7]
- psubd m4, m10 ; a2[0-3]
- psubd m5, m11 ; a2[4-7]
- psubd m0, m10
- psubd m1, m11
- psubd m6, m10
- psubd m7, m11
- psubd m0, m8 ; a0[0-3]
- psubd m1, m9 ; a0[4-7]
- paddd m6, m8 ; a3[0-3]
- paddd m7, m9 ; a3[4-7]
; a0 += W4*row[4] + W6*row[6]; i.e. -1*row[4]
; a1 -= W4*row[4] + W2*row[6]; i.e. -1*row[4]-1*row[6]
; a2 -= W4*row[4] - W2*row[6]; i.e. -1*row[4]+1*row[6]
; a3 += W4*row[4] - W6*row[6]; i.e. -1*row[4]
SBUTTERFLY3 wd, 8, 9, 13, 12 ; { row[4], row[6] }[0-3]/[4-7]
- SIGNEXTEND m13, m14, m10 ; { row[4] }[0-3] / [4-7]
pmaddwd m10, m8, [w4_plus_w6]
pmaddwd m11, m9, [w4_plus_w6]
- pslld m10, 2
- pslld m11, 2
- psubd m10, m13
- psubd m11, m14
paddd m0, m10 ; a0[0-3]
paddd m1, m11 ; a0[4-7]
pmaddwd m10, m8, [w4_min_w6]
pmaddwd m11, m9, [w4_min_w6]
- pslld m10, 2
- pslld m11, 2
- psubd m10, m13
- psubd m11, m14
paddd m6, m10 ; a3[0-3]
paddd m7, m11 ; a3[4-7]
pmaddwd m10, m8, [w4_min_w2]
pmaddwd m11, m9, [w4_min_w2]
pmaddwd m8, [w4_plus_w2]
pmaddwd m9, [w4_plus_w2]
- pslld m10, 2
- pslld m11, 2
- pslld m8, 2
- pslld m9, 2
- psubd m10, m13
- psubd m11, m14
- psubd m8, m13
- psubd m9, m14
psubd m4, m10 ; a2[0-3] intermediate
psubd m5, m11 ; a2[4-7] intermediate
psubd m2, m8 ; a1[0-3] intermediate
psubd m3, m9 ; a1[4-7] intermediate
- SIGNEXTEND m12, m13, m10 ; { row[6] }[0-3] / [4-7]
- psubd m4, m12 ; a2[0-3]
- psubd m5, m13 ; a2[4-7]
- paddd m2, m12 ; a1[0-3]
- paddd m3, m13 ; a1[4-7]
; load/store
mova [r2+ 0], m0
@@ -206,8 +163,6 @@ section .text align=16
; b3 = MUL(W7, row[1]);
; MAC(b3, -W5, row[3]);
SBUTTERFLY3 wd, 0, 1, 10, 8 ; { row[1], row[3] }[0-3]/[4-7]
- SIGNEXTEND m10, m11, m12 ; { row[1] }[0-3] / [4-7]
- SIGNEXTEND m8, m9, m12 ; { row[3] }[0-3] / [4-7]
pmaddwd m2, m0, [w3_min_w7]
pmaddwd m3, m1, [w3_min_w7]
pmaddwd m4, m0, [w5_min_w1]
@@ -216,35 +171,11 @@ section .text align=16
pmaddwd m7, m1, [w7_min_w5]
pmaddwd m0, [w1_plus_w3]
pmaddwd m1, [w1_plus_w3]
- pslld m2, 2
- pslld m3, 2
- pslld m4, 2
- pslld m5, 2
- pslld m6, 2
- pslld m7, 2
- pslld m0, 2
- pslld m1, 2
; b0: +1*row[1]+2*row[3]
; b1: +2*row[1]-1*row[3]
; b2: -1*row[1]-1*row[3]
; b3: +1*row[1]+1*row[3]
- psubd m2, m8
- psubd m3, m9
- paddd m0, m8
- paddd m1, m9
- paddd m8, m10 ; { row[1] + row[3] }[0-3]
- paddd m9, m11 ; { row[1] + row[3] }[4-7]
- paddd m10, m10
- paddd m11, m11
- paddd m0, m8 ; b0[0-3]
- paddd m1, m9 ; b0[4-7]
- paddd m2, m10 ; b1[0-3]
- paddd m3, m11 ; b2[4-7]
- psubd m4, m8 ; b2[0-3]
- psubd m5, m9 ; b2[4-7]
- paddd m6, m8 ; b3[0-3]
- paddd m7, m9 ; b3[4-7]
; MAC(b0, W5, row[5]);
; MAC(b0, W7, row[7]);
@@ -255,38 +186,16 @@ section .text align=16
; MAC(b3, W3, row[5]);
; MAC(b3, -W1, row[7]);
SBUTTERFLY3 wd, 8, 9, 13, 14 ; { row[5], row[7] }[0-3]/[4-7]
- SIGNEXTEND m13, m12, m11 ; { row[5] }[0-3] / [4-7]
- SIGNEXTEND m14, m11, m10 ; { row[7] }[0-3] / [4-7]
; b0: -1*row[5]+1*row[7]
; b1: -1*row[5]+1*row[7]
; b2: +1*row[5]+2*row[7]
; b3: +2*row[5]-1*row[7]
- paddd m4, m13
- paddd m5, m12
- paddd m6, m13
- paddd m7, m12
- psubd m13, m14 ; { row[5] - row[7] }[0-3]
- psubd m12, m11 ; { row[5] - row[7] }[4-7]
- paddd m14, m14
- paddd m11, m11
- psubd m0, m13
- psubd m1, m12
- psubd m2, m13
- psubd m3, m12
- paddd m4, m14
- paddd m5, m11
- paddd m6, m13
- paddd m7, m12
pmaddwd m10, m8, [w1_plus_w5]
pmaddwd m11, m9, [w1_plus_w5]
pmaddwd m12, m8, [w5_plus_w7]
pmaddwd m13, m9, [w5_plus_w7]
- pslld m10, 2
- pslld m11, 2
- pslld m12, 2
- pslld m13, 2
psubd m2, m10 ; b1[0-3]
psubd m3, m11 ; b1[4-7]
paddd m0, m12 ; b0[0-3]
@@ -295,10 +204,6 @@ section .text align=16
pmaddwd m13, m9, [w7_plus_w3]
pmaddwd m8, [w3_min_w1]
pmaddwd m9, [w3_min_w1]
- pslld m12, 2
- pslld m13, 2
- pslld m8, 2
- pslld m9, 2
paddd m4, m12 ; b2[0-3]
paddd m5, m13 ; b2[4-7]
paddd m6, m8 ; b3[0-3]
@@ -345,7 +250,7 @@ cglobal prores_idct_put_10, 4, 4, %1
pmullw m13,[r3+64]
pmullw m12,[r3+96]
- IDCT_1D row, 17
+ IDCT_1D row, 15
; transpose for second part of IDCT
TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3
@@ -360,20 +265,11 @@ cglobal prores_idct_put_10, 4, 4, %1
; for (i = 0; i < 8; i++)
; idctSparseColAdd(dest + i, line_size, block + i);
- IDCT_1D col, 20
+ IDCT_1D col, 18
; clip/store
- mova m6, [pw_512]
mova m3, [pw_4]
mova m5, [pw_1019]
- paddw m8, m6
- paddw m0, m6
- paddw m1, m6
- paddw m2, m6
- paddw m4, m6
- paddw m11, m6
- paddw m9, m6
- paddw m10, m6
pmaxsw m8, m3
pmaxsw m0, m3
pmaxsw m1, m3
@@ -422,7 +318,9 @@ INIT_XMM sse2
idct_put_fn 16
INIT_XMM sse4
idct_put_fn 16
+%if HAVE_AVX_EXTERNAL
INIT_XMM avx
idct_put_fn 16
+%endif
%endif
diff --git a/libavcodec/x86/proresdsp_init.c b/libavcodec/x86/proresdsp_init.c
index 8bff85fbe8..47fe7a4933 100644
--- a/libavcodec/x86/proresdsp_init.c
+++ b/libavcodec/x86/proresdsp_init.c
@@ -3,20 +3,20 @@
*
* Copyright (c) 2010-2011 Maxim Poliakovski
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -32,11 +32,14 @@ void ff_prores_idct_put_10_sse4(uint16_t *dst, int linesize,
void ff_prores_idct_put_10_avx (uint16_t *dst, int linesize,
int16_t *block, const int16_t *qmat);
-av_cold void ff_proresdsp_x86_init(ProresDSPContext *dsp)
+av_cold void ff_proresdsp_x86_init(ProresDSPContext *dsp, AVCodecContext *avctx)
{
#if ARCH_X86_64
int flags = av_get_cpu_flags();
+ if(avctx->flags & CODEC_FLAG_BITEXACT)
+ return;
+
if (EXTERNAL_SSE2(flags)) {
dsp->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
dsp->idct_put = ff_prores_idct_put_10_sse2;
diff --git a/libavcodec/x86/qpel.asm b/libavcodec/x86/qpel.asm
index c90b3932af..c2ffb86717 100644
--- a/libavcodec/x86/qpel.asm
+++ b/libavcodec/x86/qpel.asm
@@ -4,20 +4,20 @@
;* Copyright (c) 2003-2013 Michael Niedermayer
;* Copyright (c) 2013 Daniel Kang
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/rnd_mmx.c b/libavcodec/x86/rnd_mmx.c
index db4515a9c5..326e2f395b 100644
--- a/libavcodec/x86/rnd_mmx.c
+++ b/libavcodec/x86/rnd_mmx.c
@@ -1,18 +1,18 @@
/*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/rnd_template.c b/libavcodec/x86/rnd_template.c
index e9a5a45799..e37fc19506 100644
--- a/libavcodec/x86/rnd_template.c
+++ b/libavcodec/x86/rnd_template.c
@@ -7,20 +7,20 @@
* mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
* and improved by Zdenek Kabelac <kabi@users.sf.net>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/rv40dsp_init.c b/libavcodec/x86/rv40dsp_init.c
index a90f57fe10..6e0f69e452 100644
--- a/libavcodec/x86/rv40dsp_init.c
+++ b/libavcodec/x86/rv40dsp_init.c
@@ -2,20 +2,20 @@
* RV40 decoder motion compensation functions x86-optimised
* Copyright (c) 2008 Konstantin Shishkov
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index 36a0918473..fc519bfb02 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -26,6 +26,12 @@ SECTION_RODATA
ps_mask times 2 dd 1<<31, 0
ps_mask2 times 2 dd 0, 1<<31
ps_neg times 4 dd 1<<31
+ps_noise0 times 2 dd 1.0, 0.0,
+ps_noise2 times 2 dd -1.0, 0.0
+ps_noise13 dd 0.0, 1.0, 0.0, -1.0
+ dd 0.0, -1.0, 0.0, 1.0
+ dd 0.0, 1.0, 0.0, -1.0
+cextern sbr_noise_table
SECTION_TEXT
@@ -136,7 +142,6 @@ cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
mulps m2, bw ; (a1[0] a1[1])*bw*bw = (a0 a1)
mova m3, m1
mova m4, m2
- mova m7, [ps_mask]
; Set pointers
%if ARCH_X86_64 == 0 || WIN64
@@ -156,30 +161,28 @@ cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
shl start, 3 ; offset from num loops
mova m0, [X_lowq + start]
- movlhps m1, m1 ; (a2 a3 a2 a3)
- movlhps m2, m2 ; (a0 a1 a0 a1)
- shufps m3, m3, q0101 ; (a3 a2 a3 a2)
- shufps m4, m4, q0101 ; (a1 a0 a1 a0)
- xorps m3, m7 ; (-a3 a2 -a3 a2)
- xorps m4, m7 ; (-a1 a0 -a1 a0)
+ shufps m3, m3, q1111
+ shufps m4, m4, q1111
+ xorps m3, [ps_mask]
+ shufps m1, m1, q0000
+ shufps m2, m2, q0000
+ xorps m4, [ps_mask]
.loop2:
- mova m5, m0
+ movu m7, [X_lowq + start + 8] ; BbCc
mova m6, m0
- shufps m0, m0, q2200 ; {Xl[-2][0],",Xl[-1][0],"}
- shufps m5, m5, q3311 ; {Xl[-2][1],",Xl[-1][1],"}
- mulps m0, m2
- mulps m5, m4
- mova m7, m6
- addps m5, m0
- mova m0, [X_lowq + start + 2*2*4]
- shufps m6, m0, q0022 ; {Xl[-1][0],",Xl[0][0],"}
- shufps m7, m0, q1133 ; {Xl[-1][1],",Xl[1][1],"}
- mulps m6, m1
+ mova m5, m7
+ shufps m0, m0, q2301 ; aAbB
+ shufps m7, m7, q2301 ; bBcC
+ mulps m0, m4
mulps m7, m3
- addps m5, m6
+ mulps m6, m2
+ mulps m5, m1
+ addps m7, m0
+ mova m0, [X_lowq + start +16] ; CcDd
addps m7, m0
- addps m5, m7
- mova [X_highq + start], m5
+ addps m6, m5
+ addps m7, m6
+ mova [X_highq + start], m7
add start, 16
jnz .loop2
RET
@@ -246,33 +249,47 @@ cglobal sbr_neg_odd_64, 1,2,4,z
jne .loop
REP_RET
-INIT_XMM sse2
; sbr_qmf_deint_bfly(float *v, const float *src0, const float *src1)
+%macro SBR_QMF_DEINT_BFLY 0
cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
mov cq, 64*4-2*mmsize
lea vrevq, [vq + 64*4]
.loop:
mova m0, [src0q+cq]
mova m1, [src1q]
- mova m2, [src0q+cq+mmsize]
- mova m3, [src1q+mmsize]
- pshufd m4, m0, q0123
- pshufd m5, m1, q0123
- pshufd m6, m2, q0123
- pshufd m7, m3, q0123
- addps m3, m4
+ mova m4, [src0q+cq+mmsize]
+ mova m5, [src1q+mmsize]
+%if cpuflag(sse2)
+ pshufd m2, m0, q0123
+ pshufd m3, m1, q0123
+ pshufd m6, m4, q0123
+ pshufd m7, m5, q0123
+%else
+ shufps m2, m0, m0, q0123
+ shufps m3, m1, m1, q0123
+ shufps m6, m4, m4, q0123
+ shufps m7, m5, m5, q0123
+%endif
+ addps m5, m2
subps m0, m7
addps m1, m6
- subps m2, m5
+ subps m4, m3
mova [vrevq], m1
- mova [vrevq+mmsize], m3
+ mova [vrevq+mmsize], m5
mova [vq+cq], m0
- mova [vq+cq+mmsize], m2
+ mova [vq+cq+mmsize], m4
add src1q, 2*mmsize
add vrevq, 2*mmsize
sub cq, 2*mmsize
jge .loop
REP_RET
+%endmacro
+
+INIT_XMM sse
+SBR_QMF_DEINT_BFLY
+
+INIT_XMM sse2
+SBR_QMF_DEINT_BFLY
INIT_XMM sse2
cglobal sbr_qmf_pre_shuffle, 1,4,6,z
@@ -303,3 +320,106 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
movq m2, [zq]
movq [r2q], m2
REP_RET
+
+%ifdef PIC
+%define NREGS 1
+%if UNIX64
+%define NOISE_TABLE r6q ; r5q is m_max
+%else
+%define NOISE_TABLE r5q
+%endif
+%else
+%define NREGS 0
+%define NOISE_TABLE sbr_noise_table
+%endif
+
+%macro LOAD_NST 1
+%ifdef PIC
+ lea NOISE_TABLE, [%1]
+ mova m0, [kxq + NOISE_TABLE]
+%else
+ mova m0, [kxq + %1]
+%endif
+%endmacro
+
+INIT_XMM sse2
+; sbr_hf_apply_noise_0(float (*Y)[2], const float *s_m,
+; const float *q_filt, int noise,
+; int kx, int m_max)
+cglobal sbr_hf_apply_noise_0, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+ mova m0, [ps_noise0]
+ jmp apply_noise_main
+
+; sbr_hf_apply_noise_1(float (*Y)[2], const float *s_m,
+; const float *q_filt, int noise,
+; int kx, int m_max)
+cglobal sbr_hf_apply_noise_1, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+ and kxq, 1
+ shl kxq, 4
+ LOAD_NST ps_noise13
+ jmp apply_noise_main
+
+; sbr_hf_apply_noise_2(float (*Y)[2], const float *s_m,
+; const float *q_filt, int noise,
+; int kx, int m_max)
+cglobal sbr_hf_apply_noise_2, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+ mova m0, [ps_noise2]
+ jmp apply_noise_main
+
+; sbr_hf_apply_noise_3(float (*Y)[2], const float *s_m,
+; const float *q_filt, int noise,
+; int kx, int m_max)
+cglobal sbr_hf_apply_noise_3, 5,5+NREGS+UNIX64,8, Y,s_m,q_filt,noise,kx,m_max
+ and kxq, 1
+ shl kxq, 4
+ LOAD_NST ps_noise13+16
+
+apply_noise_main:
+%if ARCH_X86_64 == 0 || WIN64
+ mov kxd, m_maxm
+%define count kxq
+%else
+%define count m_maxq
+%endif
+ dec noiseq
+ shl count, 2
+%ifdef PIC
+ lea NOISE_TABLE, [sbr_noise_table]
+%endif
+ lea Yq, [Yq + 2*count]
+ add s_mq, count
+ add q_filtq, count
+ shl noiseq, 3
+ pxor m5, m5
+ neg count
+.loop:
+ mova m1, [q_filtq + count]
+ movu m3, [noiseq + NOISE_TABLE + 1*mmsize]
+ movu m4, [noiseq + NOISE_TABLE + 2*mmsize]
+ add noiseq, 2*mmsize
+ and noiseq, 0x1ff<<3
+ punpckhdq m2, m1, m1
+ punpckldq m1, m1
+ mulps m1, m3 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
+ mulps m2, m4 ; m2 = q_filt[m] * ff_sbr_noise_table[noise]
+ mova m3, [s_mq + count]
+ ; TODO: replace by a vpermd in AVX2
+ punpckhdq m4, m3, m3
+ punpckldq m3, m3
+ pcmpeqd m6, m3, m5 ; m6 == 0
+ pcmpeqd m7, m4, m5 ; m7 == 0
+ mulps m3, m0 ; s_m[m] * phi_sign
+ mulps m4, m0 ; s_m[m] * phi_sign
+ pand m1, m6
+ pand m2, m7
+ movu m6, [Yq + 2*count]
+ movu m7, [Yq + 2*count + mmsize]
+ addps m3, m1
+ addps m4, m2
+ addps m6, m3
+ addps m7, m4
+ movu [Yq + 2*count], m6
+ movu [Yq + 2*count + mmsize], m7
+ add count, mmsize
+ jl .loop
+ RET
diff --git a/libavcodec/x86/sbrdsp_init.c b/libavcodec/x86/sbrdsp_init.c
index e2eaa70d8a..0bc4a6183e 100644
--- a/libavcodec/x86/sbrdsp_init.c
+++ b/libavcodec/x86/sbrdsp_init.c
@@ -34,9 +34,23 @@ void ff_sbr_hf_gen_sse(float (*X_high)[2], const float (*X_low)[2],
float bw, int start, int end);
void ff_sbr_neg_odd_64_sse(float *z);
void ff_sbr_qmf_post_shuffle_sse(float W[32][2], const float *z);
+void ff_sbr_qmf_deint_bfly_sse(float *v, const float *src0, const float *src1);
void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1);
void ff_sbr_qmf_pre_shuffle_sse2(float *z);
+void ff_sbr_hf_apply_noise_0_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+void ff_sbr_hf_apply_noise_1_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+void ff_sbr_hf_apply_noise_2_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+void ff_sbr_hf_apply_noise_3_sse2(float (*Y)[2], const float *s_m,
+ const float *q_filt, int noise,
+ int kx, int m_max);
+
av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
{
int mm_flags = av_get_cpu_flags();
@@ -48,10 +62,15 @@ av_cold void ff_sbrdsp_init_x86(SBRDSPContext *s)
s->hf_g_filt = ff_sbr_hf_g_filt_sse;
s->hf_gen = ff_sbr_hf_gen_sse;
s->qmf_post_shuffle = ff_sbr_qmf_post_shuffle_sse;
+ s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse;
}
if (EXTERNAL_SSE2(mm_flags)) {
s->qmf_deint_bfly = ff_sbr_qmf_deint_bfly_sse2;
s->qmf_pre_shuffle = ff_sbr_qmf_pre_shuffle_sse2;
+ s->hf_apply_noise[0] = ff_sbr_hf_apply_noise_0_sse2;
+ s->hf_apply_noise[1] = ff_sbr_hf_apply_noise_1_sse2;
+ s->hf_apply_noise[2] = ff_sbr_hf_apply_noise_2_sse2;
+ s->hf_apply_noise[3] = ff_sbr_hf_apply_noise_3_sse2;
}
}
diff --git a/libavcodec/x86/simple_idct.c b/libavcodec/x86/simple_idct.c
index 36f0b477e0..c790ef57e9 100644
--- a/libavcodec/x86/simple_idct.c
+++ b/libavcodec/x86/simple_idct.c
@@ -3,24 +3,23 @@
*
* Copyright (c) 2001, 2002 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavcodec/simple_idct.h"
-#include "libavutil/internal.h"
#include "libavutil/mem.h"
#include "dsputil_x86.h"
diff --git a/libavcodec/x86/snowdsp.c b/libavcodec/x86/snowdsp.c
new file mode 100644
index 0000000000..735e7905a0
--- /dev/null
+++ b/libavcodec/x86/snowdsp.c
@@ -0,0 +1,902 @@
+/*
+ * MMX and SSE2 optimized snow DSP utils
+ * Copyright (c) 2005-2006 Robert Edele <yartrebo@earthlink.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavcodec/avcodec.h"
+#include "libavcodec/snow.h"
+#include "libavcodec/snow_dwt.h"
+#include "dsputil_x86.h"
+
+#if HAVE_INLINE_ASM
+
+static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, IDWTELEM *temp, int width){
+ const int w2= (width+1)>>1;
+ const int w_l= (width>>1);
+ const int w_r= w2 - 1;
+ int i;
+
+ { // Lift 0
+ IDWTELEM * const ref = b + w2 - 1;
+ IDWTELEM b_0 = b[0]; //By allowing the first entry in b[0] to be calculated twice
+ // (the first time erroneously), we allow the SSE2 code to run an extra pass.
+ // The savings in code and time are well worth having to store this value and
+ // calculate b[0] correctly afterwards.
+
+ i = 0;
+ __asm__ volatile(
+ "pcmpeqd %%xmm7, %%xmm7 \n\t"
+ "pcmpeqd %%xmm3, %%xmm3 \n\t"
+ "psllw $1, %%xmm3 \n\t"
+ "paddw %%xmm7, %%xmm3 \n\t"
+ "psllw $13, %%xmm3 \n\t"
+ ::);
+ for(; i<w_l-15; i+=16){
+ __asm__ volatile(
+ "movdqu (%1), %%xmm1 \n\t"
+ "movdqu 16(%1), %%xmm5 \n\t"
+ "movdqu 2(%1), %%xmm2 \n\t"
+ "movdqu 18(%1), %%xmm6 \n\t"
+ "paddw %%xmm1, %%xmm2 \n\t"
+ "paddw %%xmm5, %%xmm6 \n\t"
+ "paddw %%xmm7, %%xmm2 \n\t"
+ "paddw %%xmm7, %%xmm6 \n\t"
+ "pmulhw %%xmm3, %%xmm2 \n\t"
+ "pmulhw %%xmm3, %%xmm6 \n\t"
+ "paddw (%0), %%xmm2 \n\t"
+ "paddw 16(%0), %%xmm6 \n\t"
+ "movdqa %%xmm2, (%0) \n\t"
+ "movdqa %%xmm6, 16(%0) \n\t"
+ :: "r"(&b[i]), "r"(&ref[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+ b[0] = b_0 - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+ }
+
+ { // Lift 1
+ IDWTELEM * const dst = b+w2;
+
+ i = 0;
+ for(; (((x86_reg)&dst[i]) & 0x1F) && i<w_r; i++){
+ dst[i] = dst[i] - (b[i] + b[i + 1]);
+ }
+ for(; i<w_r-15; i+=16){
+ __asm__ volatile(
+ "movdqu (%1), %%xmm1 \n\t"
+ "movdqu 16(%1), %%xmm5 \n\t"
+ "movdqu 2(%1), %%xmm2 \n\t"
+ "movdqu 18(%1), %%xmm6 \n\t"
+ "paddw %%xmm1, %%xmm2 \n\t"
+ "paddw %%xmm5, %%xmm6 \n\t"
+ "movdqa (%0), %%xmm0 \n\t"
+ "movdqa 16(%0), %%xmm4 \n\t"
+ "psubw %%xmm2, %%xmm0 \n\t"
+ "psubw %%xmm6, %%xmm4 \n\t"
+ "movdqa %%xmm0, (%0) \n\t"
+ "movdqa %%xmm4, 16(%0) \n\t"
+ :: "r"(&dst[i]), "r"(&b[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+ }
+
+ { // Lift 2
+ IDWTELEM * const ref = b+w2 - 1;
+ IDWTELEM b_0 = b[0];
+
+ i = 0;
+ __asm__ volatile(
+ "psllw $15, %%xmm7 \n\t"
+ "pcmpeqw %%xmm6, %%xmm6 \n\t"
+ "psrlw $13, %%xmm6 \n\t"
+ "paddw %%xmm7, %%xmm6 \n\t"
+ ::);
+ for(; i<w_l-15; i+=16){
+ __asm__ volatile(
+ "movdqu (%1), %%xmm0 \n\t"
+ "movdqu 16(%1), %%xmm4 \n\t"
+ "movdqu 2(%1), %%xmm1 \n\t"
+ "movdqu 18(%1), %%xmm5 \n\t" //FIXME try aligned reads and shifts
+ "paddw %%xmm6, %%xmm0 \n\t"
+ "paddw %%xmm6, %%xmm4 \n\t"
+ "paddw %%xmm7, %%xmm1 \n\t"
+ "paddw %%xmm7, %%xmm5 \n\t"
+ "pavgw %%xmm1, %%xmm0 \n\t"
+ "pavgw %%xmm5, %%xmm4 \n\t"
+ "psubw %%xmm7, %%xmm0 \n\t"
+ "psubw %%xmm7, %%xmm4 \n\t"
+ "psraw $1, %%xmm0 \n\t"
+ "psraw $1, %%xmm4 \n\t"
+ "movdqa (%0), %%xmm1 \n\t"
+ "movdqa 16(%0), %%xmm5 \n\t"
+ "paddw %%xmm1, %%xmm0 \n\t"
+ "paddw %%xmm5, %%xmm4 \n\t"
+ "psraw $2, %%xmm0 \n\t"
+ "psraw $2, %%xmm4 \n\t"
+ "paddw %%xmm1, %%xmm0 \n\t"
+ "paddw %%xmm5, %%xmm4 \n\t"
+ "movdqa %%xmm0, (%0) \n\t"
+ "movdqa %%xmm4, 16(%0) \n\t"
+ :: "r"(&b[i]), "r"(&ref[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+ b[0] = b_0 + ((2 * ref[1] + W_BO-1 + 4 * b_0) >> W_BS);
+ }
+
+ { // Lift 3
+ IDWTELEM * const src = b+w2;
+
+ i = 0;
+ for(; (((x86_reg)&temp[i]) & 0x1F) && i<w_r; i++){
+ temp[i] = src[i] - ((-W_AM*(b[i] + b[i+1]))>>W_AS);
+ }
+ for(; i<w_r-7; i+=8){
+ __asm__ volatile(
+ "movdqu 2(%1), %%xmm2 \n\t"
+ "movdqu 18(%1), %%xmm6 \n\t"
+ "paddw (%1), %%xmm2 \n\t"
+ "paddw 16(%1), %%xmm6 \n\t"
+ "movdqu (%0), %%xmm0 \n\t"
+ "movdqu 16(%0), %%xmm4 \n\t"
+ "paddw %%xmm2, %%xmm0 \n\t"
+ "paddw %%xmm6, %%xmm4 \n\t"
+ "psraw $1, %%xmm2 \n\t"
+ "psraw $1, %%xmm6 \n\t"
+ "paddw %%xmm0, %%xmm2 \n\t"
+ "paddw %%xmm4, %%xmm6 \n\t"
+ "movdqa %%xmm2, (%2) \n\t"
+ "movdqa %%xmm6, 16(%2) \n\t"
+ :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
+ }
+
+ {
+ snow_interleave_line_header(&i, width, b, temp);
+
+ for (; (i & 0x3E) != 0x3E; i-=2){
+ b[i+1] = temp[i>>1];
+ b[i] = b[i>>1];
+ }
+ for (i-=62; i>=0; i-=64){
+ __asm__ volatile(
+ "movdqa (%1), %%xmm0 \n\t"
+ "movdqa 16(%1), %%xmm2 \n\t"
+ "movdqa 32(%1), %%xmm4 \n\t"
+ "movdqa 48(%1), %%xmm6 \n\t"
+ "movdqa (%1), %%xmm1 \n\t"
+ "movdqa 16(%1), %%xmm3 \n\t"
+ "movdqa 32(%1), %%xmm5 \n\t"
+ "movdqa 48(%1), %%xmm7 \n\t"
+ "punpcklwd (%2), %%xmm0 \n\t"
+ "punpcklwd 16(%2), %%xmm2 \n\t"
+ "punpcklwd 32(%2), %%xmm4 \n\t"
+ "punpcklwd 48(%2), %%xmm6 \n\t"
+ "movdqa %%xmm0, (%0) \n\t"
+ "movdqa %%xmm2, 32(%0) \n\t"
+ "movdqa %%xmm4, 64(%0) \n\t"
+ "movdqa %%xmm6, 96(%0) \n\t"
+ "punpckhwd (%2), %%xmm1 \n\t"
+ "punpckhwd 16(%2), %%xmm3 \n\t"
+ "punpckhwd 32(%2), %%xmm5 \n\t"
+ "punpckhwd 48(%2), %%xmm7 \n\t"
+ "movdqa %%xmm1, 16(%0) \n\t"
+ "movdqa %%xmm3, 48(%0) \n\t"
+ "movdqa %%xmm5, 80(%0) \n\t"
+ "movdqa %%xmm7, 112(%0) \n\t"
+ :: "r"(&(b)[i]), "r"(&(b)[i>>1]), "r"(&(temp)[i>>1])
+ : "memory"
+ );
+ }
+ }
+}
+
+static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int width){
+ const int w2= (width+1)>>1;
+ const int w_l= (width>>1);
+ const int w_r= w2 - 1;
+ int i;
+
+ { // Lift 0
+ IDWTELEM * const ref = b + w2 - 1;
+
+ i = 1;
+ b[0] = b[0] - ((W_DM * 2 * ref[1]+W_DO)>>W_DS);
+ __asm__ volatile(
+ "pcmpeqw %%mm7, %%mm7 \n\t"
+ "pcmpeqw %%mm3, %%mm3 \n\t"
+ "psllw $1, %%mm3 \n\t"
+ "paddw %%mm7, %%mm3 \n\t"
+ "psllw $13, %%mm3 \n\t"
+ ::);
+ for(; i<w_l-7; i+=8){
+ __asm__ volatile(
+ "movq (%1), %%mm2 \n\t"
+ "movq 8(%1), %%mm6 \n\t"
+ "paddw 2(%1), %%mm2 \n\t"
+ "paddw 10(%1), %%mm6 \n\t"
+ "paddw %%mm7, %%mm2 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+ "pmulhw %%mm3, %%mm2 \n\t"
+ "pmulhw %%mm3, %%mm6 \n\t"
+ "paddw (%0), %%mm2 \n\t"
+ "paddw 8(%0), %%mm6 \n\t"
+ "movq %%mm2, (%0) \n\t"
+ "movq %%mm6, 8(%0) \n\t"
+ :: "r"(&b[i]), "r"(&ref[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_lift_lead_out(i, b, b, ref, width, w_l, 0, W_DM, W_DO, W_DS);
+ }
+
+ { // Lift 1
+ IDWTELEM * const dst = b+w2;
+
+ i = 0;
+ for(; i<w_r-7; i+=8){
+ __asm__ volatile(
+ "movq (%1), %%mm2 \n\t"
+ "movq 8(%1), %%mm6 \n\t"
+ "paddw 2(%1), %%mm2 \n\t"
+ "paddw 10(%1), %%mm6 \n\t"
+ "movq (%0), %%mm0 \n\t"
+ "movq 8(%0), %%mm4 \n\t"
+ "psubw %%mm2, %%mm0 \n\t"
+ "psubw %%mm6, %%mm4 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm4, 8(%0) \n\t"
+ :: "r"(&dst[i]), "r"(&b[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_lift_lead_out(i, dst, dst, b, width, w_r, 1, W_CM, W_CO, W_CS);
+ }
+
+ { // Lift 2
+ IDWTELEM * const ref = b+w2 - 1;
+
+ i = 1;
+ b[0] = b[0] + (((2 * ref[1] + W_BO) + 4 * b[0]) >> W_BS);
+ __asm__ volatile(
+ "psllw $15, %%mm7 \n\t"
+ "pcmpeqw %%mm6, %%mm6 \n\t"
+ "psrlw $13, %%mm6 \n\t"
+ "paddw %%mm7, %%mm6 \n\t"
+ ::);
+ for(; i<w_l-7; i+=8){
+ __asm__ volatile(
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm4 \n\t"
+ "movq 2(%1), %%mm1 \n\t"
+ "movq 10(%1), %%mm5 \n\t"
+ "paddw %%mm6, %%mm0 \n\t"
+ "paddw %%mm6, %%mm4 \n\t"
+ "paddw %%mm7, %%mm1 \n\t"
+ "paddw %%mm7, %%mm5 \n\t"
+ "pavgw %%mm1, %%mm0 \n\t"
+ "pavgw %%mm5, %%mm4 \n\t"
+ "psubw %%mm7, %%mm0 \n\t"
+ "psubw %%mm7, %%mm4 \n\t"
+ "psraw $1, %%mm0 \n\t"
+ "psraw $1, %%mm4 \n\t"
+ "movq (%0), %%mm1 \n\t"
+ "movq 8(%0), %%mm5 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+ "paddw %%mm5, %%mm4 \n\t"
+ "psraw $2, %%mm0 \n\t"
+ "psraw $2, %%mm4 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+ "paddw %%mm5, %%mm4 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm4, 8(%0) \n\t"
+ :: "r"(&b[i]), "r"(&ref[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_liftS_lead_out(i, b, b, ref, width, w_l);
+ }
+
+ { // Lift 3
+ IDWTELEM * const src = b+w2;
+ i = 0;
+
+ for(; i<w_r-7; i+=8){
+ __asm__ volatile(
+ "movq 2(%1), %%mm2 \n\t"
+ "movq 10(%1), %%mm6 \n\t"
+ "paddw (%1), %%mm2 \n\t"
+ "paddw 8(%1), %%mm6 \n\t"
+ "movq (%0), %%mm0 \n\t"
+ "movq 8(%0), %%mm4 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+ "paddw %%mm6, %%mm4 \n\t"
+ "psraw $1, %%mm2 \n\t"
+ "psraw $1, %%mm6 \n\t"
+ "paddw %%mm0, %%mm2 \n\t"
+ "paddw %%mm4, %%mm6 \n\t"
+ "movq %%mm2, (%2) \n\t"
+ "movq %%mm6, 8(%2) \n\t"
+ :: "r"(&src[i]), "r"(&b[i]), "r"(&temp[i])
+ : "memory"
+ );
+ }
+ snow_horizontal_compose_lift_lead_out(i, temp, src, b, width, w_r, 1, -W_AM, W_AO+1, W_AS);
+ }
+
+ {
+ snow_interleave_line_header(&i, width, b, temp);
+
+ for (; (i & 0x1E) != 0x1E; i-=2){
+ b[i+1] = temp[i>>1];
+ b[i] = b[i>>1];
+ }
+ for (i-=30; i>=0; i-=32){
+ __asm__ volatile(
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 16(%1), %%mm4 \n\t"
+ "movq 24(%1), %%mm6 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "movq 8(%1), %%mm3 \n\t"
+ "movq 16(%1), %%mm5 \n\t"
+ "movq 24(%1), %%mm7 \n\t"
+ "punpcklwd (%2), %%mm0 \n\t"
+ "punpcklwd 8(%2), %%mm2 \n\t"
+ "punpcklwd 16(%2), %%mm4 \n\t"
+ "punpcklwd 24(%2), %%mm6 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm2, 16(%0) \n\t"
+ "movq %%mm4, 32(%0) \n\t"
+ "movq %%mm6, 48(%0) \n\t"
+ "punpckhwd (%2), %%mm1 \n\t"
+ "punpckhwd 8(%2), %%mm3 \n\t"
+ "punpckhwd 16(%2), %%mm5 \n\t"
+ "punpckhwd 24(%2), %%mm7 \n\t"
+ "movq %%mm1, 8(%0) \n\t"
+ "movq %%mm3, 24(%0) \n\t"
+ "movq %%mm5, 40(%0) \n\t"
+ "movq %%mm7, 56(%0) \n\t"
+ :: "r"(&b[i]), "r"(&b[i>>1]), "r"(&temp[i>>1])
+ : "memory"
+ );
+ }
+ }
+}
+
+#if HAVE_7REGS
+#define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
+ ""op" ("r",%%"REG_d"), %%"t0" \n\t"\
+ ""op" 16("r",%%"REG_d"), %%"t1" \n\t"\
+ ""op" 32("r",%%"REG_d"), %%"t2" \n\t"\
+ ""op" 48("r",%%"REG_d"), %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
+ snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_sse2_add(r,t0,t1,t2,t3)\
+ snow_vertical_compose_sse2_load_add("paddw",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_r2r_sub(s0,s1,s2,s3,t0,t1,t2,t3)\
+ "psubw %%"s0", %%"t0" \n\t"\
+ "psubw %%"s1", %%"t1" \n\t"\
+ "psubw %%"s2", %%"t2" \n\t"\
+ "psubw %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
+ "movdqa %%"s0", ("w",%%"REG_d") \n\t"\
+ "movdqa %%"s1", 16("w",%%"REG_d") \n\t"\
+ "movdqa %%"s2", 32("w",%%"REG_d") \n\t"\
+ "movdqa %%"s3", 48("w",%%"REG_d") \n\t"
+
+#define snow_vertical_compose_sra(n,t0,t1,t2,t3)\
+ "psraw $"n", %%"t0" \n\t"\
+ "psraw $"n", %%"t1" \n\t"\
+ "psraw $"n", %%"t2" \n\t"\
+ "psraw $"n", %%"t3" \n\t"
+
+#define snow_vertical_compose_r2r_add(s0,s1,s2,s3,t0,t1,t2,t3)\
+ "paddw %%"s0", %%"t0" \n\t"\
+ "paddw %%"s1", %%"t1" \n\t"\
+ "paddw %%"s2", %%"t2" \n\t"\
+ "paddw %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_r2r_pmulhw(s0,s1,s2,s3,t0,t1,t2,t3)\
+ "pmulhw %%"s0", %%"t0" \n\t"\
+ "pmulhw %%"s1", %%"t1" \n\t"\
+ "pmulhw %%"s2", %%"t2" \n\t"\
+ "pmulhw %%"s3", %%"t3" \n\t"
+
+#define snow_vertical_compose_sse2_move(s0,s1,s2,s3,t0,t1,t2,t3)\
+ "movdqa %%"s0", %%"t0" \n\t"\
+ "movdqa %%"s1", %%"t1" \n\t"\
+ "movdqa %%"s2", %%"t2" \n\t"\
+ "movdqa %%"s3", %%"t3" \n\t"
+
+static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+ x86_reg i = width;
+
+ while(i & 0x1F)
+ {
+ i--;
+ b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+ b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+ b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+ b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+ }
+ i+=i;
+
+ __asm__ volatile (
+ "jmp 2f \n\t"
+ "1: \n\t"
+ snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_add("%6","xmm0","xmm2","xmm4","xmm6")
+
+
+ "pcmpeqw %%xmm0, %%xmm0 \n\t"
+ "pcmpeqw %%xmm2, %%xmm2 \n\t"
+ "paddw %%xmm2, %%xmm2 \n\t"
+ "paddw %%xmm0, %%xmm2 \n\t"
+ "psllw $13, %%xmm2 \n\t"
+ snow_vertical_compose_r2r_add("xmm0","xmm0","xmm0","xmm0","xmm1","xmm3","xmm5","xmm7")
+ snow_vertical_compose_r2r_pmulhw("xmm2","xmm2","xmm2","xmm2","xmm1","xmm3","xmm5","xmm7")
+ snow_vertical_compose_sse2_add("%5","xmm1","xmm3","xmm5","xmm7")
+ snow_vertical_compose_sse2_store("%5","xmm1","xmm3","xmm5","xmm7")
+ snow_vertical_compose_sse2_load("%4","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_add("%3","xmm1","xmm3","xmm5","xmm7")
+ snow_vertical_compose_r2r_sub("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_store("%4","xmm0","xmm2","xmm4","xmm6")
+
+ "pcmpeqw %%xmm7, %%xmm7 \n\t"
+ "pcmpeqw %%xmm5, %%xmm5 \n\t"
+ "psllw $15, %%xmm7 \n\t"
+ "psrlw $13, %%xmm5 \n\t"
+ "paddw %%xmm7, %%xmm5 \n\t"
+ snow_vertical_compose_r2r_add("xmm5","xmm5","xmm5","xmm5","xmm0","xmm2","xmm4","xmm6")
+ "movq (%2,%%"REG_d"), %%xmm1 \n\t"
+ "movq 8(%2,%%"REG_d"), %%xmm3 \n\t"
+ "paddw %%xmm7, %%xmm1 \n\t"
+ "paddw %%xmm7, %%xmm3 \n\t"
+ "pavgw %%xmm1, %%xmm0 \n\t"
+ "pavgw %%xmm3, %%xmm2 \n\t"
+ "movq 16(%2,%%"REG_d"), %%xmm1 \n\t"
+ "movq 24(%2,%%"REG_d"), %%xmm3 \n\t"
+ "paddw %%xmm7, %%xmm1 \n\t"
+ "paddw %%xmm7, %%xmm3 \n\t"
+ "pavgw %%xmm1, %%xmm4 \n\t"
+ "pavgw %%xmm3, %%xmm6 \n\t"
+ snow_vertical_compose_r2r_sub("xmm7","xmm7","xmm7","xmm7","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sra("1","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_add("%3","xmm0","xmm2","xmm4","xmm6")
+
+ snow_vertical_compose_sra("2","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_add("%3","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_store("%3","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_add("%1","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_move("xmm0","xmm2","xmm4","xmm6","xmm1","xmm3","xmm5","xmm7")
+ snow_vertical_compose_sra("1","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_r2r_add("xmm1","xmm3","xmm5","xmm7","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_add("%2","xmm0","xmm2","xmm4","xmm6")
+ snow_vertical_compose_sse2_store("%2","xmm0","xmm2","xmm4","xmm6")
+
+ "2: \n\t"
+ "sub $64, %%"REG_d" \n\t"
+ "jge 1b \n\t"
+ :"+d"(i)
+ :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
+}
+
+#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
+ ""op" ("r",%%"REG_d"), %%"t0" \n\t"\
+ ""op" 8("r",%%"REG_d"), %%"t1" \n\t"\
+ ""op" 16("r",%%"REG_d"), %%"t2" \n\t"\
+ ""op" 24("r",%%"REG_d"), %%"t3" \n\t"
+
+#define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
+ snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_add(r,t0,t1,t2,t3)\
+ snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3)
+
+#define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
+ "movq %%"s0", ("w",%%"REG_d") \n\t"\
+ "movq %%"s1", 8("w",%%"REG_d") \n\t"\
+ "movq %%"s2", 16("w",%%"REG_d") \n\t"\
+ "movq %%"s3", 24("w",%%"REG_d") \n\t"
+
+#define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
+ "movq %%"s0", %%"t0" \n\t"\
+ "movq %%"s1", %%"t1" \n\t"\
+ "movq %%"s2", %%"t2" \n\t"\
+ "movq %%"s3", %%"t3" \n\t"
+
+
+static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
+ x86_reg i = width;
+ while(i & 15)
+ {
+ i--;
+ b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
+ b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
+ b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
+ b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
+ }
+ i+=i;
+ __asm__ volatile(
+ "jmp 2f \n\t"
+ "1: \n\t"
+
+ snow_vertical_compose_mmx_load("%4","mm1","mm3","mm5","mm7")
+ snow_vertical_compose_mmx_add("%6","mm1","mm3","mm5","mm7")
+ "pcmpeqw %%mm0, %%mm0 \n\t"
+ "pcmpeqw %%mm2, %%mm2 \n\t"
+ "paddw %%mm2, %%mm2 \n\t"
+ "paddw %%mm0, %%mm2 \n\t"
+ "psllw $13, %%mm2 \n\t"
+ snow_vertical_compose_r2r_add("mm0","mm0","mm0","mm0","mm1","mm3","mm5","mm7")
+ snow_vertical_compose_r2r_pmulhw("mm2","mm2","mm2","mm2","mm1","mm3","mm5","mm7")
+ snow_vertical_compose_mmx_add("%5","mm1","mm3","mm5","mm7")
+ snow_vertical_compose_mmx_store("%5","mm1","mm3","mm5","mm7")
+ snow_vertical_compose_mmx_load("%4","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_add("%3","mm1","mm3","mm5","mm7")
+ snow_vertical_compose_r2r_sub("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_store("%4","mm0","mm2","mm4","mm6")
+ "pcmpeqw %%mm7, %%mm7 \n\t"
+ "pcmpeqw %%mm5, %%mm5 \n\t"
+ "psllw $15, %%mm7 \n\t"
+ "psrlw $13, %%mm5 \n\t"
+ "paddw %%mm7, %%mm5 \n\t"
+ snow_vertical_compose_r2r_add("mm5","mm5","mm5","mm5","mm0","mm2","mm4","mm6")
+ "movq (%2,%%"REG_d"), %%mm1 \n\t"
+ "movq 8(%2,%%"REG_d"), %%mm3 \n\t"
+ "paddw %%mm7, %%mm1 \n\t"
+ "paddw %%mm7, %%mm3 \n\t"
+ "pavgw %%mm1, %%mm0 \n\t"
+ "pavgw %%mm3, %%mm2 \n\t"
+ "movq 16(%2,%%"REG_d"), %%mm1 \n\t"
+ "movq 24(%2,%%"REG_d"), %%mm3 \n\t"
+ "paddw %%mm7, %%mm1 \n\t"
+ "paddw %%mm7, %%mm3 \n\t"
+ "pavgw %%mm1, %%mm4 \n\t"
+ "pavgw %%mm3, %%mm6 \n\t"
+ snow_vertical_compose_r2r_sub("mm7","mm7","mm7","mm7","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_sra("1","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_add("%3","mm0","mm2","mm4","mm6")
+
+ snow_vertical_compose_sra("2","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_add("%3","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_store("%3","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_add("%1","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_move("mm0","mm2","mm4","mm6","mm1","mm3","mm5","mm7")
+ snow_vertical_compose_sra("1","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_r2r_add("mm1","mm3","mm5","mm7","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_add("%2","mm0","mm2","mm4","mm6")
+ snow_vertical_compose_mmx_store("%2","mm0","mm2","mm4","mm6")
+
+ "2: \n\t"
+ "sub $32, %%"REG_d" \n\t"
+ "jge 1b \n\t"
+ :"+d"(i)
+ :"r"(b0),"r"(b1),"r"(b2),"r"(b3),"r"(b4),"r"(b5));
+}
+#endif //HAVE_7REGS
+
+#define snow_inner_add_yblock_sse2_header \
+ IDWTELEM * * dst_array = sb->line + src_y;\
+ x86_reg tmp;\
+ __asm__ volatile(\
+ "mov %7, %%"REG_c" \n\t"\
+ "mov %6, %2 \n\t"\
+ "mov %4, %%"REG_S" \n\t"\
+ "pxor %%xmm7, %%xmm7 \n\t" /* 0 */\
+ "pcmpeqd %%xmm3, %%xmm3 \n\t"\
+ "psllw $15, %%xmm3 \n\t"\
+ "psrlw $12, %%xmm3 \n\t" /* FRAC_BITS >> 1 */\
+ "1: \n\t"\
+ "mov %1, %%"REG_D" \n\t"\
+ "mov (%%"REG_D"), %%"REG_D" \n\t"\
+ "add %3, %%"REG_D" \n\t"
+
+#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
+ "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+ "movq (%%"REG_d"), %%"out_reg1" \n\t"\
+ "movq (%%"REG_d", %%"REG_c"), %%"out_reg2" \n\t"\
+ "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
+ "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
+ "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
+ "movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm4 \n\t"\
+ "pmullw %%xmm0, %%"out_reg1" \n\t"\
+ "pmullw %%xmm4, %%"out_reg2" \n\t"
+
+#define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
+ "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+ "movq (%%"REG_d"), %%"out_reg1" \n\t"\
+ "movq 8(%%"REG_d"), %%"out_reg2" \n\t"\
+ "punpcklbw %%xmm7, %%"out_reg1" \n\t"\
+ "punpcklbw %%xmm7, %%"out_reg2" \n\t"\
+ "movq "s_offset"(%%"REG_S"), %%xmm0 \n\t"\
+ "movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm4 \n\t"\
+ "pmullw %%xmm0, %%"out_reg1" \n\t"\
+ "pmullw %%xmm4, %%"out_reg2" \n\t"
+
+#define snow_inner_add_yblock_sse2_accum_8(ptr_offset, s_offset) \
+ snow_inner_add_yblock_sse2_start_8("xmm2", "xmm6", ptr_offset, s_offset)\
+ "paddusw %%xmm2, %%xmm1 \n\t"\
+ "paddusw %%xmm6, %%xmm5 \n\t"
+
+#define snow_inner_add_yblock_sse2_accum_16(ptr_offset, s_offset) \
+ snow_inner_add_yblock_sse2_start_16("xmm2", "xmm6", ptr_offset, s_offset)\
+ "paddusw %%xmm2, %%xmm1 \n\t"\
+ "paddusw %%xmm6, %%xmm5 \n\t"
+
+#define snow_inner_add_yblock_sse2_end_common1\
+ "add $32, %%"REG_S" \n\t"\
+ "add %%"REG_c", %0 \n\t"\
+ "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
+ "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
+ "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
+ "add %%"REG_c", (%%"REG_a") \n\t"
+
+#define snow_inner_add_yblock_sse2_end_common2\
+ "jnz 1b \n\t"\
+ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
+ :\
+ "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
+ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+
+#define snow_inner_add_yblock_sse2_end_8\
+ "sal $1, %%"REG_c" \n\t"\
+ "add"OPSIZE" $"PTR_SIZE"*2, %1 \n\t"\
+ snow_inner_add_yblock_sse2_end_common1\
+ "sar $1, %%"REG_c" \n\t"\
+ "sub $2, %2 \n\t"\
+ snow_inner_add_yblock_sse2_end_common2
+
+#define snow_inner_add_yblock_sse2_end_16\
+ "add"OPSIZE" $"PTR_SIZE"*1, %1 \n\t"\
+ snow_inner_add_yblock_sse2_end_common1\
+ "dec %2 \n\t"\
+ snow_inner_add_yblock_sse2_end_common2
+
+static void inner_add_yblock_bw_8_obmc_16_bh_even_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_sse2_header
+snow_inner_add_yblock_sse2_start_8("xmm1", "xmm5", "3", "0")
+snow_inner_add_yblock_sse2_accum_8("2", "8")
+snow_inner_add_yblock_sse2_accum_8("1", "128")
+snow_inner_add_yblock_sse2_accum_8("0", "136")
+
+ "mov %0, %%"REG_d" \n\t"
+ "movdqa (%%"REG_D"), %%xmm0 \n\t"
+ "movdqa %%xmm1, %%xmm2 \n\t"
+
+ "punpckhwd %%xmm7, %%xmm1 \n\t"
+ "punpcklwd %%xmm7, %%xmm2 \n\t"
+ "paddd %%xmm2, %%xmm0 \n\t"
+ "movdqa 16(%%"REG_D"), %%xmm2 \n\t"
+ "paddd %%xmm1, %%xmm2 \n\t"
+ "paddd %%xmm3, %%xmm0 \n\t"
+ "paddd %%xmm3, %%xmm2 \n\t"
+
+ "mov %1, %%"REG_D" \n\t"
+ "mov "PTR_SIZE"(%%"REG_D"), %%"REG_D";\n\t"
+ "add %3, %%"REG_D" \n\t"
+
+ "movdqa (%%"REG_D"), %%xmm4 \n\t"
+ "movdqa %%xmm5, %%xmm6 \n\t"
+ "punpckhwd %%xmm7, %%xmm5 \n\t"
+ "punpcklwd %%xmm7, %%xmm6 \n\t"
+ "paddd %%xmm6, %%xmm4 \n\t"
+ "movdqa 16(%%"REG_D"), %%xmm6 \n\t"
+ "paddd %%xmm5, %%xmm6 \n\t"
+ "paddd %%xmm3, %%xmm4 \n\t"
+ "paddd %%xmm3, %%xmm6 \n\t"
+
+ "psrad $8, %%xmm0 \n\t" /* FRAC_BITS. */
+ "psrad $8, %%xmm2 \n\t" /* FRAC_BITS. */
+ "packssdw %%xmm2, %%xmm0 \n\t"
+ "packuswb %%xmm7, %%xmm0 \n\t"
+ "movq %%xmm0, (%%"REG_d") \n\t"
+
+ "psrad $8, %%xmm4 \n\t" /* FRAC_BITS. */
+ "psrad $8, %%xmm6 \n\t" /* FRAC_BITS. */
+ "packssdw %%xmm6, %%xmm4 \n\t"
+ "packuswb %%xmm7, %%xmm4 \n\t"
+ "movq %%xmm4, (%%"REG_d",%%"REG_c");\n\t"
+snow_inner_add_yblock_sse2_end_8
+}
+
+static void inner_add_yblock_bw_16_obmc_32_sse2(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_sse2_header
+snow_inner_add_yblock_sse2_start_16("xmm1", "xmm5", "3", "0")
+snow_inner_add_yblock_sse2_accum_16("2", "16")
+snow_inner_add_yblock_sse2_accum_16("1", "512")
+snow_inner_add_yblock_sse2_accum_16("0", "528")
+
+ "mov %0, %%"REG_d" \n\t"
+ "psrlw $4, %%xmm1 \n\t"
+ "psrlw $4, %%xmm5 \n\t"
+ "paddw (%%"REG_D"), %%xmm1 \n\t"
+ "paddw 16(%%"REG_D"), %%xmm5 \n\t"
+ "paddw %%xmm3, %%xmm1 \n\t"
+ "paddw %%xmm3, %%xmm5 \n\t"
+ "psraw $4, %%xmm1 \n\t" /* FRAC_BITS. */
+ "psraw $4, %%xmm5 \n\t" /* FRAC_BITS. */
+ "packuswb %%xmm5, %%xmm1 \n\t"
+
+ "movdqu %%xmm1, (%%"REG_d") \n\t"
+
+snow_inner_add_yblock_sse2_end_16
+}
+
+#define snow_inner_add_yblock_mmx_header \
+ IDWTELEM * * dst_array = sb->line + src_y;\
+ x86_reg tmp;\
+ __asm__ volatile(\
+ "mov %7, %%"REG_c" \n\t"\
+ "mov %6, %2 \n\t"\
+ "mov %4, %%"REG_S" \n\t"\
+ "pxor %%mm7, %%mm7 \n\t" /* 0 */\
+ "pcmpeqd %%mm3, %%mm3 \n\t"\
+ "psllw $15, %%mm3 \n\t"\
+ "psrlw $12, %%mm3 \n\t" /* FRAC_BITS >> 1 */\
+ "1: \n\t"\
+ "mov %1, %%"REG_D" \n\t"\
+ "mov (%%"REG_D"), %%"REG_D" \n\t"\
+ "add %3, %%"REG_D" \n\t"
+
+#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
+ "mov "PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"REG_d"; \n\t"\
+ "movd "d_offset"(%%"REG_d"), %%"out_reg1" \n\t"\
+ "movd "d_offset"+4(%%"REG_d"), %%"out_reg2" \n\t"\
+ "punpcklbw %%mm7, %%"out_reg1" \n\t"\
+ "punpcklbw %%mm7, %%"out_reg2" \n\t"\
+ "movd "s_offset"(%%"REG_S"), %%mm0 \n\t"\
+ "movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "pmullw %%mm0, %%"out_reg1" \n\t"\
+ "pmullw %%mm4, %%"out_reg2" \n\t"
+
+#define snow_inner_add_yblock_mmx_accum(ptr_offset, s_offset, d_offset) \
+ snow_inner_add_yblock_mmx_start("mm2", "mm6", ptr_offset, s_offset, d_offset)\
+ "paddusw %%mm2, %%mm1 \n\t"\
+ "paddusw %%mm6, %%mm5 \n\t"
+
+#define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
+ "mov %0, %%"REG_d" \n\t"\
+ "psrlw $4, %%mm1 \n\t"\
+ "psrlw $4, %%mm5 \n\t"\
+ "paddw "read_offset"(%%"REG_D"), %%mm1 \n\t"\
+ "paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "paddw %%mm3, %%mm5 \n\t"\
+ "psraw $4, %%mm1 \n\t"\
+ "psraw $4, %%mm5 \n\t"\
+ "packuswb %%mm5, %%mm1 \n\t"\
+ "movq %%mm1, "write_offset"(%%"REG_d") \n\t"
+
+#define snow_inner_add_yblock_mmx_end(s_step)\
+ "add $"s_step", %%"REG_S" \n\t"\
+ "add %%"REG_c", "PTR_SIZE"*3(%%"REG_a");\n\t"\
+ "add %%"REG_c", "PTR_SIZE"*2(%%"REG_a");\n\t"\
+ "add %%"REG_c", "PTR_SIZE"*1(%%"REG_a");\n\t"\
+ "add %%"REG_c", (%%"REG_a") \n\t"\
+ "add"OPSIZE " $"PTR_SIZE"*1, %1 \n\t"\
+ "add %%"REG_c", %0 \n\t"\
+ "dec %2 \n\t"\
+ "jnz 1b \n\t"\
+ :"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
+ :\
+ "rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
+ "%"REG_c"","%"REG_S"","%"REG_D"","%"REG_d"");
+
+static void inner_add_yblock_bw_8_obmc_16_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_mmx_header
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
+snow_inner_add_yblock_mmx_accum("2", "8", "0")
+snow_inner_add_yblock_mmx_accum("1", "128", "0")
+snow_inner_add_yblock_mmx_accum("0", "136", "0")
+snow_inner_add_yblock_mmx_mix("0", "0")
+snow_inner_add_yblock_mmx_end("16")
+}
+
+static void inner_add_yblock_bw_16_obmc_32_mmx(const uint8_t *obmc, const x86_reg obmc_stride, uint8_t * * block, int b_w, x86_reg b_h,
+ int src_x, int src_y, x86_reg src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+snow_inner_add_yblock_mmx_header
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "0", "0")
+snow_inner_add_yblock_mmx_accum("2", "16", "0")
+snow_inner_add_yblock_mmx_accum("1", "512", "0")
+snow_inner_add_yblock_mmx_accum("0", "528", "0")
+snow_inner_add_yblock_mmx_mix("0", "0")
+
+snow_inner_add_yblock_mmx_start("mm1", "mm5", "3", "8", "8")
+snow_inner_add_yblock_mmx_accum("2", "24", "8")
+snow_inner_add_yblock_mmx_accum("1", "520", "8")
+snow_inner_add_yblock_mmx_accum("0", "536", "8")
+snow_inner_add_yblock_mmx_mix("16", "8")
+snow_inner_add_yblock_mmx_end("32")
+}
+
+static void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+ int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+
+ if (b_w == 16)
+ inner_add_yblock_bw_16_obmc_32_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+ else if (b_w == 8 && obmc_stride == 16) {
+ if (!(b_h & 1))
+ inner_add_yblock_bw_8_obmc_16_bh_even_sse2(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+ else
+ inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+ } else
+ ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+}
+
+static void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+ int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
+ if (b_w == 16)
+ inner_add_yblock_bw_16_obmc_32_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+ else if (b_w == 8 && obmc_stride == 16)
+ inner_add_yblock_bw_8_obmc_16_mmx(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+ else
+ ff_snow_inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+void ff_dwt_init_x86(SnowDWTContext *c)
+{
+#if HAVE_INLINE_ASM
+ int mm_flags = av_get_cpu_flags();
+
+ if (mm_flags & AV_CPU_FLAG_MMX) {
+ if(mm_flags & AV_CPU_FLAG_SSE2 & 0){
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
+#if HAVE_7REGS
+ c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
+#endif
+ c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
+ }
+ else{
+ if (mm_flags & AV_CPU_FLAG_MMXEXT) {
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
+#if HAVE_7REGS
+ c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
+#endif
+ }
+ c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
+ }
+ }
+#endif /* HAVE_INLINE_ASM */
+}
diff --git a/libavcodec/x86/v210-init.c b/libavcodec/x86/v210-init.c
new file mode 100644
index 0000000000..02c5eaa2c2
--- /dev/null
+++ b/libavcodec/x86/v210-init.c
@@ -0,0 +1,48 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/cpu.h"
+#include "libavcodec/v210dec.h"
+
+extern void ff_v210_planar_unpack_unaligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+extern void ff_v210_planar_unpack_unaligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+
+extern void ff_v210_planar_unpack_aligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+extern void ff_v210_planar_unpack_aligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width);
+
+av_cold void v210_x86_init(V210DecContext *s)
+{
+ int cpu_flags = av_get_cpu_flags();
+
+#if HAVE_YASM
+ if (s->aligned_input) {
+ if (cpu_flags & AV_CPU_FLAG_SSSE3)
+ s->unpack_frame = ff_v210_planar_unpack_aligned_ssse3;
+
+ if (HAVE_AVX_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX)
+ s->unpack_frame = ff_v210_planar_unpack_aligned_avx;
+ }
+ else {
+ if (cpu_flags & AV_CPU_FLAG_SSSE3)
+ s->unpack_frame = ff_v210_planar_unpack_unaligned_ssse3;
+
+ if (HAVE_AVX_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX)
+ s->unpack_frame = ff_v210_planar_unpack_unaligned_avx;
+ }
+#endif
+}
diff --git a/libavcodec/x86/v210.asm b/libavcodec/x86/v210.asm
new file mode 100644
index 0000000000..547312678a
--- /dev/null
+++ b/libavcodec/x86/v210.asm
@@ -0,0 +1,88 @@
+;******************************************************************************
+;* V210 SIMD unpack
+;* Copyright (c) 2011 Loren Merritt <lorenm@u.washington.edu>
+;* Copyright (c) 2011 Kieran Kunhya <kieran@kunhya.com>
+;*
+;* This file is part of Libav.
+;*
+;* Libav is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* Libav is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with Libav; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+SECTION_RODATA
+
+v210_mask: times 4 dd 0x3ff
+v210_mult: dw 64,4,64,4,64,4,64,4
+v210_luma_shuf: db 8,9,0,1,2,3,12,13,4,5,6,7,-1,-1,-1,-1
+v210_chroma_shuf: db 0,1,8,9,6,7,-1,-1,2,3,4,5,12,13,-1,-1
+
+SECTION .text
+
+%macro v210_planar_unpack 2
+
+; v210_planar_unpack(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width)
+cglobal v210_planar_unpack_%1_%2, 5, 5, 7
+ movsxdifnidn r4, r4d
+ lea r1, [r1+2*r4]
+ add r2, r4
+ add r3, r4
+ neg r4
+
+ mova m3, [v210_mult]
+ mova m4, [v210_mask]
+ mova m5, [v210_luma_shuf]
+ mova m6, [v210_chroma_shuf]
+.loop
+%ifidn %1, unaligned
+ movu m0, [r0]
+%else
+ mova m0, [r0]
+%endif
+
+ pmullw m1, m0, m3
+ psrld m0, 10
+ psrlw m1, 6 ; u0 v0 y1 y2 v1 u2 y4 y5
+ pand m0, m4 ; y0 __ u1 __ y3 __ v2 __
+
+ shufps m2, m1, m0, 0x8d ; y1 y2 y4 y5 y0 __ y3 __
+ pshufb m2, m5 ; y0 y1 y2 y3 y4 y5 __ __
+ movu [r1+2*r4], m2
+
+ shufps m1, m0, 0xd8 ; u0 v0 v1 u2 u1 __ v2 __
+ pshufb m1, m6 ; u0 u1 u2 __ v0 v1 v2 __
+ movq [r2+r4], m1
+ movhps [r3+r4], m1
+
+ add r0, mmsize
+ add r4, 6
+ jl .loop
+
+ REP_RET
+%endmacro
+
+INIT_XMM
+v210_planar_unpack unaligned, ssse3
+%if HAVE_AVX_EXTERNAL
+INIT_AVX
+v210_planar_unpack unaligned, avx
+%endif
+
+INIT_XMM
+v210_planar_unpack aligned, ssse3
+%if HAVE_AVX_EXTERNAL
+INIT_AVX
+v210_planar_unpack aligned, avx
+%endif
diff --git a/libavcodec/x86/vc1dsp.asm b/libavcodec/x86/vc1dsp.asm
index adf08d7d84..546688cf9d 100644
--- a/libavcodec/x86/vc1dsp.asm
+++ b/libavcodec/x86/vc1dsp.asm
@@ -2,20 +2,20 @@
;* VC1 deblocking optimizations
;* Copyright (c) 2009 David Conrad
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/vc1dsp.h b/libavcodec/x86/vc1dsp.h
index 9b6c8ada26..fdd4de1813 100644
--- a/libavcodec/x86/vc1dsp.h
+++ b/libavcodec/x86/vc1dsp.h
@@ -1,20 +1,20 @@
/*
* VC-1 and WMV3 decoder - X86 DSP init functions
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
index 15fd2c830c..5ceacd348e 100644
--- a/libavcodec/x86/vc1dsp_mmx.c
+++ b/libavcodec/x86/vc1dsp_mmx.c
@@ -25,7 +25,6 @@
*/
#include "libavutil/cpu.h"
-#include "libavutil/internal.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
diff --git a/libavcodec/x86/videodsp.asm b/libavcodec/x86/videodsp.asm
index 34f007f8d2..3f2f01a3c4 100644
--- a/libavcodec/x86/videodsp.asm
+++ b/libavcodec/x86/videodsp.asm
@@ -2,20 +2,20 @@
;* Core video DSP functions
;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index 48f42d5be5..902450ea4d 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -1,25 +1,27 @@
/*
+ * Copyright (C) 2002-2012 Michael Niedermayer
* Copyright (C) 2012 Ronald S. Bultje
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/attributes.h"
+#include "libavutil/avassert.h"
#include "libavutil/common.h"
#include "libavutil/cpu.h"
#include "libavutil/mem.h"
@@ -36,19 +38,25 @@ extern emu_edge_core_func ff_emu_edge_core_mmx;
extern emu_edge_core_func ff_emu_edge_core_sse;
static av_always_inline void emulated_edge_mc(uint8_t *buf, const uint8_t *src,
- ptrdiff_t linesize,
+ ptrdiff_t linesize_arg,
int block_w, int block_h,
int src_x, int src_y,
int w, int h,
emu_edge_core_func *core_fn)
{
int start_y, start_x, end_y, end_x, src_y_add = 0;
+ int linesize = linesize_arg;
+
+ if(!w || !h)
+ return;
if (src_y >= h) {
- src_y_add = h - 1 - src_y;
+ src -= src_y*linesize;
+ src_y_add = h - 1;
src_y = h - 1;
} else if (src_y <= -block_h) {
- src_y_add = 1 - block_h - src_y;
+ src -= src_y*linesize;
+ src_y_add = 1 - block_h;
src_y = 1 - block_h;
}
if (src_x >= w) {
@@ -63,8 +71,8 @@ static av_always_inline void emulated_edge_mc(uint8_t *buf, const uint8_t *src,
start_x = FFMAX(0, -src_x);
end_y = FFMIN(block_h, h-src_y);
end_x = FFMIN(block_w, w-src_x);
- assert(start_x < end_x && block_w > 0);
- assert(start_y < end_y && block_h > 0);
+ av_assert2(start_x < end_x && block_w > 0);
+ av_assert2(start_y < end_y && block_h > 0);
// fill in the to-be-copied part plus all above/below
src += (src_y_add + start_y) * linesize + start_x;
diff --git a/libavcodec/x86/vorbisdsp.asm b/libavcodec/x86/vorbisdsp.asm
index c54650eef5..b25d838868 100644
--- a/libavcodec/x86/vorbisdsp.asm
+++ b/libavcodec/x86/vorbisdsp.asm
@@ -2,20 +2,20 @@
;* Vorbis x86 optimizations
;* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/vorbisdsp_init.c b/libavcodec/x86/vorbisdsp_init.c
index 762d3eeca9..08a2c096f0 100644
--- a/libavcodec/x86/vorbisdsp_init.c
+++ b/libavcodec/x86/vorbisdsp_init.c
@@ -1,20 +1,20 @@
/*
* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index fc8a047224..24496ae8cf 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -2,20 +2,20 @@
;* MMX/SSE2-optimized functions for the VP3 decoder
;* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/vp3dsp_init.c b/libavcodec/x86/vp3dsp_init.c
index cc52fbccdd..252b40a432 100644
--- a/libavcodec/x86/vp3dsp_init.c
+++ b/libavcodec/x86/vp3dsp_init.c
@@ -1,18 +1,20 @@
/*
- * This file is part of Libav.
+ * Copyright (c) 2009 David Conrad <lessen42@gmail.com>
*
- * Libav is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
@@ -21,6 +23,7 @@
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/x86/cpu.h"
+#include "libavutil/x86/asm.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/vp3dsp.h"
@@ -40,10 +43,68 @@ void ff_vp3_v_loop_filter_mmxext(uint8_t *src, int stride,
void ff_vp3_h_loop_filter_mmxext(uint8_t *src, int stride,
int *bounding_values);
+#if HAVE_INLINE_ASM
+
+#define MOVQ_BFE(regd) \
+ __asm__ volatile ( \
+ "pcmpeqd %%"#regd", %%"#regd" \n\t" \
+ "paddb %%"#regd", %%"#regd" \n\t" ::)
+
+#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
+ "movq "#rega", "#regr" \n\t" \
+ "movq "#regc", "#regp" \n\t" \
+ "pand "#regb", "#regr" \n\t" \
+ "pand "#regd", "#regp" \n\t" \
+ "pxor "#rega", "#regb" \n\t" \
+ "pxor "#regc", "#regd" \n\t" \
+ "pand %%mm6, "#regb" \n\t" \
+ "pand %%mm6, "#regd" \n\t" \
+ "psrlq $1, "#regb" \n\t" \
+ "psrlq $1, "#regd" \n\t" \
+ "paddb "#regb", "#regr" \n\t" \
+ "paddb "#regd", "#regp" \n\t"
+
+static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, ptrdiff_t stride, int h)
+{
+// START_TIMER
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "movq (%1,%4), %%mm2 \n\t"
+ "movq (%2,%4), %%mm3 \n\t"
+ PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "movq %%mm5, (%3,%4) \n\t"
+
+ "movq (%1,%4,2), %%mm0 \n\t"
+ "movq (%2,%4,2), %%mm1 \n\t"
+ "movq (%1,%5), %%mm2 \n\t"
+ "movq (%2,%5), %%mm3 \n\t"
+ "lea (%1,%4,4), %1 \n\t"
+ "lea (%2,%4,4), %2 \n\t"
+ PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3,%4,2) \n\t"
+ "movq %%mm5, (%3,%5) \n\t"
+ "lea (%3,%4,4), %3 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+r"(h), "+r"(a), "+r"(b), "+r"(dst)
+ :"r"((x86_reg)stride), "r"((x86_reg)3L*stride)
+ :"memory");
+// STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx")
+}
+#endif /* HAVE_INLINE_ASM */
+
av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
{
int cpuflags = av_get_cpu_flags();
+#if HAVE_INLINE_ASM
+ c->put_no_rnd_pixels_l2 = put_vp_no_rnd_pixels8_l2_mmx;
+#endif /* HAVE_INLINE_ASM */
+
#if ARCH_X86_32
if (EXTERNAL_MMX(cpuflags)) {
c->idct_put = ff_vp3_idct_put_mmx;
diff --git a/libavcodec/x86/vp56_arith.h b/libavcodec/x86/vp56_arith.h
index 0a693684af..e71dbf8ed0 100644
--- a/libavcodec/x86/vp56_arith.h
+++ b/libavcodec/x86/vp56_arith.h
@@ -4,20 +4,20 @@
* Copyright (C) 2006 Aurelien Jacobs <aurel@gnuage.org>
* Copyright (C) 2010 Eli Friedman
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/vp56dsp.asm b/libavcodec/x86/vp56dsp.asm
index 80f8ca5f38..3d874ea62a 100644
--- a/libavcodec/x86/vp56dsp.asm
+++ b/libavcodec/x86/vp56dsp.asm
@@ -3,20 +3,20 @@
;* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
;* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/vp56dsp_init.c b/libavcodec/x86/vp56dsp_init.c
index 689ae352c6..defc63b0ea 100644
--- a/libavcodec/x86/vp56dsp_init.c
+++ b/libavcodec/x86/vp56dsp_init.c
@@ -3,20 +3,20 @@
* Copyright (C) 2009 Sebastien Lucas <sebastien.lucas@gmail.com>
* Copyright (C) 2009 Zuxy Meng <zuxy.meng@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 1d7aadc809..ca07333795 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -3,20 +3,20 @@
;* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
;* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
;*
-;* This file is part of Libav.
+;* This file is part of FFmpeg.
;*
-;* Libav is free software; you can redistribute it and/or
+;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
-;* Libav is distributed in the hope that it will be useful,
+;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
-;* License along with Libav; if not, write to the Free Software
+;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;******************************************************************************
diff --git a/libavcodec/x86/vp8dsp_init.c b/libavcodec/x86/vp8dsp_init.c
index 629a7bc77b..745d95a54d 100644
--- a/libavcodec/x86/vp8dsp_init.c
+++ b/libavcodec/x86/vp8dsp_init.c
@@ -3,20 +3,20 @@
* Copyright (c) 2010 Ronald S. Bultje <rsbultje@gmail.com>
* Copyright (c) 2010 Jason Garrett-Glaser <darkshikari@gmail.com>
*
- * This file is part of Libav.
+ * This file is part of FFmpeg.
*
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
diff --git a/libavcodec/x86/w64xmmtest.c b/libavcodec/x86/w64xmmtest.c
index a8b6803daf..f6e3de9496 100644
--- a/libavcodec/x86/w64xmmtest.c
+++ b/libavcodec/x86/w64xmmtest.c
@@ -65,6 +65,13 @@ wrap(avcodec_encode_audio2(AVCodecContext *avctx,
got_packet_ptr);
}
+wrap(avcodec_encode_video(AVCodecContext *avctx,
+ uint8_t *buf, int buf_size,
+ const AVFrame *pict))
+{
+ testxmmclobbers(avcodec_encode_video, avctx, buf, buf_size, pict);
+}
+
wrap(avcodec_encode_subtitle(AVCodecContext *avctx,
uint8_t *buf, int buf_size,
const AVSubtitle *sub))