summaryrefslogtreecommitdiff
path: root/libavcodec/x86/h264_qpel_mmx.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2011-07-04 00:34:44 +0200
committerMichael Niedermayer <michaelni@gmx.at>2011-07-04 00:45:21 +0200
commit976a8b217986fecdbe1fdcaa3e14ce9c3c92eb25 (patch)
treed31a42173318b29419733ec4634c1f6f07cdce6c /libavcodec/x86/h264_qpel_mmx.c
parent2a375bb400febf8c1a2dfa87c29fd4185663454c (diff)
parent556f8a066cb33241bf29e85d7e24c9acf7ea9043 (diff)
downloadffmpeg-976a8b217986fecdbe1fdcaa3e14ce9c3c92eb25.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: (40 commits) H.264: template left MB handling H.264: faster fill_decode_caches H.264: faster write_back_* H.264: faster fill_filter_caches H.264: make filter_mb_fast support the case of unavailable top mb Do not include log.h in avutil.h Do not include pixfmt.h in avutil.h Do not include rational.h in avutil.h Do not include mathematics.h in avutil.h Do not include intfloat_readwrite.h in avutil.h Remove return statements following infinite loops without break RTSP: Doxygen comment cleanup doxygen: Escape '\' in Doxygen documentation. md5: cosmetics md5: use AV_WL32 to write result md5: add fate test md5: include correct headers md5: fix test program doxygen: Drop array size declarations from Doxygen parameter names. doxygen: Fix parameter names to match the function prototypes. ... Conflicts: libavcodec/x86/dsputil_mmx.c libavformat/flvenc.c libavformat/oggenc.c libavformat/wtv.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/h264_qpel_mmx.c')
-rw-r--r--libavcodec/x86/h264_qpel_mmx.c98
1 files changed, 98 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_qpel_mmx.c b/libavcodec/x86/h264_qpel_mmx.c
index f5af44e82f..4dac05cfa3 100644
--- a/libavcodec/x86/h264_qpel_mmx.c
+++ b/libavcodec/x86/h264_qpel_mmx.c
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ * Copyright (c) 2011 Daniel Kang
*
* This file is part of FFmpeg.
*
@@ -1199,3 +1200,100 @@ H264_MC_816(H264_MC_HV, sse2)
H264_MC_816(H264_MC_H, ssse3)
H264_MC_816(H264_MC_HV, ssse3)
#endif
+
+
+
+//10bit
+#define LUMA_MC_OP(OP, NUM, DEPTH, TYPE, OPT) \
+void ff_ ## OP ## _h264_qpel ## NUM ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT \
+ (uint8_t *dst, uint8_t *src, int stride);
+
+#define LUMA_MC_ALL(DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 4, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 4, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+#define LUMA_MC_816(DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 8, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(put, 16, DEPTH, TYPE, OPT) \
+ LUMA_MC_OP(avg, 16, DEPTH, TYPE, OPT)
+
+LUMA_MC_ALL(10, mc00, mmxext)
+LUMA_MC_ALL(10, mc10, mmxext)
+LUMA_MC_ALL(10, mc20, mmxext)
+LUMA_MC_ALL(10, mc30, mmxext)
+LUMA_MC_ALL(10, mc01, mmxext)
+LUMA_MC_ALL(10, mc11, mmxext)
+LUMA_MC_ALL(10, mc21, mmxext)
+LUMA_MC_ALL(10, mc31, mmxext)
+LUMA_MC_ALL(10, mc02, mmxext)
+LUMA_MC_ALL(10, mc12, mmxext)
+LUMA_MC_ALL(10, mc22, mmxext)
+LUMA_MC_ALL(10, mc32, mmxext)
+LUMA_MC_ALL(10, mc03, mmxext)
+LUMA_MC_ALL(10, mc13, mmxext)
+LUMA_MC_ALL(10, mc23, mmxext)
+LUMA_MC_ALL(10, mc33, mmxext)
+
+LUMA_MC_816(10, mc00, sse2)
+LUMA_MC_816(10, mc10, sse2)
+LUMA_MC_816(10, mc10, sse2_cache64)
+LUMA_MC_816(10, mc10, ssse3_cache64)
+LUMA_MC_816(10, mc20, sse2)
+LUMA_MC_816(10, mc20, sse2_cache64)
+LUMA_MC_816(10, mc20, ssse3_cache64)
+LUMA_MC_816(10, mc30, sse2)
+LUMA_MC_816(10, mc30, sse2_cache64)
+LUMA_MC_816(10, mc30, ssse3_cache64)
+LUMA_MC_816(10, mc01, sse2)
+LUMA_MC_816(10, mc11, sse2)
+LUMA_MC_816(10, mc21, sse2)
+LUMA_MC_816(10, mc31, sse2)
+LUMA_MC_816(10, mc02, sse2)
+LUMA_MC_816(10, mc12, sse2)
+LUMA_MC_816(10, mc22, sse2)
+LUMA_MC_816(10, mc32, sse2)
+LUMA_MC_816(10, mc03, sse2)
+LUMA_MC_816(10, mc13, sse2)
+LUMA_MC_816(10, mc23, sse2)
+LUMA_MC_816(10, mc33, sse2)
+
+#define QPEL16_OPMC(OP, MC, MMX)\
+void ff_ ## OP ## _h264_qpel16_ ## MC ## _10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+ src += 8*stride;\
+ dst += 8*stride;\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst , src , stride);\
+ ff_ ## OP ## _h264_qpel8_ ## MC ## _10_ ## MMX(dst+16, src+16, stride);\
+}
+
+#define QPEL16_OP(MC, MMX)\
+QPEL16_OPMC(put, MC, MMX)\
+QPEL16_OPMC(avg, MC, MMX)
+
+#define QPEL16(MMX)\
+QPEL16_OP(mc00, MMX)\
+QPEL16_OP(mc01, MMX)\
+QPEL16_OP(mc02, MMX)\
+QPEL16_OP(mc03, MMX)\
+QPEL16_OP(mc10, MMX)\
+QPEL16_OP(mc11, MMX)\
+QPEL16_OP(mc12, MMX)\
+QPEL16_OP(mc13, MMX)\
+QPEL16_OP(mc20, MMX)\
+QPEL16_OP(mc21, MMX)\
+QPEL16_OP(mc22, MMX)\
+QPEL16_OP(mc23, MMX)\
+QPEL16_OP(mc30, MMX)\
+QPEL16_OP(mc31, MMX)\
+QPEL16_OP(mc32, MMX)\
+QPEL16_OP(mc33, MMX)
+
+#if ARCH_X86_32 // ARCH_X86_64 implies sse2+
+QPEL16(mmxext)
+#endif