summaryrefslogtreecommitdiff
path: root/libavcodec
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2013-02-07 13:09:35 +0100
committerMichael Niedermayer <michaelni@gmx.at>2013-02-07 13:09:35 +0100
commitc4e394e46008c9e208f666d156f49f7ba500d73a (patch)
tree4c70acf89ede1f9d24ec3056bb2d586f98a12bcd /libavcodec
parentb5884db437eebe136efef63a8186b7b4b51b5525 (diff)
parent79dad2a932534d1155079f937649e099f9e5cc27 (diff)
downloadffmpeg-c4e394e46008c9e208f666d156f49f7ba500d73a.tar.gz
Merge commit '79dad2a932534d1155079f937649e099f9e5cc27'
* commit '79dad2a932534d1155079f937649e099f9e5cc27': dsputil: Separate h264chroma Conflicts: libavcodec/dsputil_template.c libavcodec/ppc/dsputil_ppc.c libavcodec/vc1dec.c libavcodec/vc1dsp.c Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/Makefile1
-rw-r--r--libavcodec/arm/Makefile4
-rw-r--r--libavcodec/arm/dsputil_init_neon.c18
-rw-r--r--libavcodec/arm/h264chroma_init_arm.c51
-rw-r--r--libavcodec/cavs.c27
-rw-r--r--libavcodec/cavs.h2
-rw-r--r--libavcodec/dsputil.c7
-rw-r--r--libavcodec/dsputil.h7
-rw-r--r--libavcodec/dsputil_template.c118
-rw-r--r--libavcodec/h264.c5
-rw-r--r--libavcodec/h264.h2
-rw-r--r--libavcodec/h264_mb_template.c12
-rw-r--r--libavcodec/h264chroma.c64
-rw-r--r--libavcodec/h264chroma.h38
-rw-r--r--libavcodec/h264chroma_template.c142
-rw-r--r--libavcodec/mpegvideo.c10
-rw-r--r--libavcodec/mpegvideo.h2
-rw-r--r--libavcodec/ppc/Makefile1
-rw-r--r--libavcodec/ppc/dsputil_altivec.h2
-rw-r--r--libavcodec/ppc/dsputil_ppc.c2
-rw-r--r--libavcodec/ppc/h264_qpel.c20
-rw-r--r--libavcodec/ppc/h264_qpel_template.c268
-rw-r--r--libavcodec/ppc/h264chroma_init.c64
-rw-r--r--libavcodec/ppc/h264chroma_template.c289
-rw-r--r--libavcodec/ppc/vc1dsp_altivec.c4
-rw-r--r--libavcodec/rv30dsp.c11
-rw-r--r--libavcodec/rv34dsp.h1
-rw-r--r--libavcodec/sh4/Makefile2
-rw-r--r--libavcodec/sh4/dsputil_align.c8
-rw-r--r--libavcodec/sh4/h264chroma_init.c132
-rw-r--r--libavcodec/sh4/qpel.c91
-rw-r--r--libavcodec/vc1.h2
-rw-r--r--libavcodec/vc1dec.c25
-rw-r--r--libavcodec/vc1dsp.c3
-rw-r--r--libavcodec/vc1dsp.h1
-rw-r--r--libavcodec/vp56.c3
-rw-r--r--libavcodec/vp56.h2
-rw-r--r--libavcodec/vp6.c6
-rw-r--r--libavcodec/x86/Makefile3
-rw-r--r--libavcodec/x86/dsputil_mmx.c98
-rw-r--r--libavcodec/x86/h264chroma_init.c116
41 files changed, 981 insertions, 683 deletions
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index a2014f8957..f08c83c47f 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -48,6 +48,7 @@ FFT-OBJS-$(CONFIG_HARDCODED_TABLES) += cos_tables.o cos_fixed_tables.o
OBJS-$(CONFIG_FFT) += avfft.o fft_fixed.o fft_float.o \
$(FFT-OBJS-yes)
OBJS-$(CONFIG_GOLOMB) += golomb.o
+OBJS-$(CONFIG_H264CHROMA) += h264chroma.o
OBJS-$(CONFIG_H264DSP) += h264dsp.o h264idct.o
OBJS-$(CONFIG_H264PRED) += h264pred.o
OBJS-$(CONFIG_H264QPEL) += h264qpel.o
diff --git a/libavcodec/arm/Makefile b/libavcodec/arm/Makefile
index 0c372a428a..1c91d62ca8 100644
--- a/libavcodec/arm/Makefile
+++ b/libavcodec/arm/Makefile
@@ -26,6 +26,7 @@ ARMV6-OBJS-$(CONFIG_VP8_DECODER) += arm/vp8_armv6.o \
arm/vp8dsp_init_armv6.o \
arm/vp8dsp_armv6.o
+OBJS-$(CONFIG_H264CHROMA) += arm/h264chroma_init_arm.o
OBJS-$(CONFIG_H264DSP) += arm/h264dsp_init_arm.o
OBJS-$(CONFIG_H264PRED) += arm/h264pred_init_arm.o
OBJS-$(CONFIG_H264QPEL) += arm/h264qpel_init_arm.o
@@ -67,9 +68,9 @@ NEON-OBJS-$(CONFIG_MDCT) += arm/mdct_neon.o \
NEON-OBJS-$(CONFIG_RDFT) += arm/rdft_neon.o \
+NEON-OBJS-$(CONFIG_H264CHROMA) += arm/h264cmc_neon.o
NEON-OBJS-$(CONFIG_H264DSP) += arm/h264dsp_neon.o \
arm/h264idct_neon.o \
- arm/h264cmc_neon.o \
NEON-OBJS-$(CONFIG_H264PRED) += arm/h264pred_neon.o \
@@ -87,7 +88,6 @@ NEON-OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_neon.o
NEON-OBJS-$(CONFIG_RV30_DECODER) += arm/rv34dsp_neon.o
NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \
arm/rv40dsp_neon.o \
- arm/h264cmc_neon.o \
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 559a8414a1..4e44034e06 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -64,14 +64,6 @@ void ff_add_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_pixels_clamped_neon(const int16_t *, uint8_t *, int);
void ff_put_signed_pixels_clamped_neon(const int16_t *, uint8_t *, int);
-void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
-void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
-void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
-
-void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
-void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
-void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
-
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
@@ -139,16 +131,6 @@ av_cold void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
c->put_pixels_clamped = ff_put_pixels_clamped_neon;
c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_neon;
- if (CONFIG_H264_DECODER && !high_bit_depth) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
-
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
- }
-
c->vector_clipf = ff_vector_clipf_neon;
c->vector_clip_int32 = ff_vector_clip_int32_neon;
diff --git a/libavcodec/arm/h264chroma_init_arm.c b/libavcodec/arm/h264chroma_init_arm.c
new file mode 100644
index 0000000000..13f7e0d702
--- /dev/null
+++ b/libavcodec/arm/h264chroma_init_arm.c
@@ -0,0 +1,51 @@
+/*
+ * ARM NEON optimised H.264 chroma functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
+#include "libavcodec/h264chroma.h"
+
+void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
+
+void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
+
+av_cold void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth)
+{
+ const int high_bit_depth = bit_depth > 8;
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags) && !high_bit_depth) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
+
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
+ }
+}
diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c
index bcdeb7c228..adf0ecaa56 100644
--- a/libavcodec/cavs.c
+++ b/libavcodec/cavs.c
@@ -28,6 +28,7 @@
#include "avcodec.h"
#include "get_bits.h"
#include "golomb.h"
+#include "h264chroma.h"
#include "mathops.h"
#include "cavs.h"
@@ -464,30 +465,35 @@ void ff_cavs_inter(AVSContext *h, enum cavs_mb mb_type) {
if(ff_cavs_partition_flags[mb_type] == 0){ // 16x16
mc_part_std(h, 8, 0, h->cy, h->cu, h->cv, 0, 0,
h->cdsp.put_cavs_qpel_pixels_tab[0],
- h->dsp.put_h264_chroma_pixels_tab[0],
+ h->h264chroma.put_h264_chroma_pixels_tab[0],
h->cdsp.avg_cavs_qpel_pixels_tab[0],
- h->dsp.avg_h264_chroma_pixels_tab[0],&h->mv[MV_FWD_X0]);
+ h->h264chroma.avg_h264_chroma_pixels_tab[0],
+ &h->mv[MV_FWD_X0]);
}else{
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 0,
h->cdsp.put_cavs_qpel_pixels_tab[1],
- h->dsp.put_h264_chroma_pixels_tab[1],
+ h->h264chroma.put_h264_chroma_pixels_tab[1],
h->cdsp.avg_cavs_qpel_pixels_tab[1],
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X0]);
+ h->h264chroma.avg_h264_chroma_pixels_tab[1],
+ &h->mv[MV_FWD_X0]);
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 0,
h->cdsp.put_cavs_qpel_pixels_tab[1],
- h->dsp.put_h264_chroma_pixels_tab[1],
+ h->h264chroma.put_h264_chroma_pixels_tab[1],
h->cdsp.avg_cavs_qpel_pixels_tab[1],
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X1]);
+ h->h264chroma.avg_h264_chroma_pixels_tab[1],
+ &h->mv[MV_FWD_X1]);
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 0, 4,
h->cdsp.put_cavs_qpel_pixels_tab[1],
- h->dsp.put_h264_chroma_pixels_tab[1],
+ h->h264chroma.put_h264_chroma_pixels_tab[1],
h->cdsp.avg_cavs_qpel_pixels_tab[1],
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X2]);
+ h->h264chroma.avg_h264_chroma_pixels_tab[1],
+ &h->mv[MV_FWD_X2]);
mc_part_std(h, 4, 0, h->cy, h->cu, h->cv, 4, 4,
h->cdsp.put_cavs_qpel_pixels_tab[1],
- h->dsp.put_h264_chroma_pixels_tab[1],
+ h->h264chroma.put_h264_chroma_pixels_tab[1],
h->cdsp.avg_cavs_qpel_pixels_tab[1],
- h->dsp.avg_h264_chroma_pixels_tab[1],&h->mv[MV_FWD_X3]);
+ h->h264chroma.avg_h264_chroma_pixels_tab[1],
+ &h->mv[MV_FWD_X3]);
}
}
@@ -722,6 +728,7 @@ av_cold int ff_cavs_init(AVCodecContext *avctx) {
AVSContext *h = avctx->priv_data;
ff_dsputil_init(&h->dsp, avctx);
+ ff_h264chroma_init(&h->h264chroma, 8);
ff_videodsp_init(&h->vdsp, 8);
ff_cavsdsp_init(&h->cdsp, avctx);
ff_init_scantable_permutation(h->dsp.idct_permutation,
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
index 73e6bfe8c6..b0cdb8f200 100644
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -24,6 +24,7 @@
#include "cavsdsp.h"
#include "dsputil.h"
+#include "h264chroma.h"
#include "get_bits.h"
#include "videodsp.h"
@@ -161,6 +162,7 @@ typedef struct AVSFrame {
typedef struct AVSContext {
AVCodecContext *avctx;
DSPContext dsp;
+ H264ChromaContext h264chroma;
VideoDSPContext vdsp;
CAVSDSPContext cdsp;
GetBitContext gb;
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 1fbdd869fb..878a6be113 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2893,13 +2893,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
-\
- c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
- c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
- c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
- c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
- c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
- c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth)
switch (avctx->bits_per_raw_sample) {
case 9:
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index f0158cdf2a..248446eb3a 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -147,7 +147,6 @@ void clear_blocks_c(int16_t *blocks);
typedef void (*op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, ptrdiff_t line_size, int h);
typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int w, int h);
typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
-typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
typedef void (*op_fill_func)(uint8_t *block/*align width (8 or 16)*/, uint8_t value, int line_size, int h);
@@ -325,12 +324,6 @@ typedef struct DSPContext {
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
qpel_mc_func put_mspel_pixels_tab[8];
- /**
- * h264 Chroma MC
- */
- h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
- h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
-
me_cmp_func pix_abs[2][4];
/* huffyuv specific */
diff --git a/libavcodec/dsputil_template.c b/libavcodec/dsputil_template.c
index 201aa55828..e174e5ff6e 100644
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -463,124 +463,6 @@ PIXOP2(put, op_put)
#undef op_avg
#undef op_put
-#define H264_CHROMA_MC(OPNAME, OP)\
-static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
- pixel *dst = (pixel*)p_dst;\
- pixel *src = (pixel*)p_src;\
- const int A=(8-x)*(8-y);\
- const int B=( x)*(8-y);\
- const int C=(8-x)*( y);\
- const int D=( x)*( y);\
- int i;\
- stride >>= sizeof(pixel)-1;\
- \
- av_assert2(x<8 && y<8 && x>=0 && y>=0);\
-\
- if(D){\
- for(i=0; i<h; i++){\
- OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
- OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
- dst+= stride;\
- src+= stride;\
- }\
- }else{\
- const int E= B+C;\
- const int step= C ? stride : 1;\
- for(i=0; i<h; i++){\
- OP(dst[0], (A*src[0] + E*src[step+0]));\
- OP(dst[1], (A*src[1] + E*src[step+1]));\
- dst+= stride;\
- src+= stride;\
- }\
- }\
-}\
-\
-static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
- pixel *dst = (pixel*)p_dst;\
- pixel *src = (pixel*)p_src;\
- const int A=(8-x)*(8-y);\
- const int B=( x)*(8-y);\
- const int C=(8-x)*( y);\
- const int D=( x)*( y);\
- int i;\
- stride >>= sizeof(pixel)-1;\
- \
- av_assert2(x<8 && y<8 && x>=0 && y>=0);\
-\
- if(D){\
- for(i=0; i<h; i++){\
- OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
- OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
- OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
- OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
- dst+= stride;\
- src+= stride;\
- }\
- }else{\
- const int E= B+C;\
- const int step= C ? stride : 1;\
- for(i=0; i<h; i++){\
- OP(dst[0], (A*src[0] + E*src[step+0]));\
- OP(dst[1], (A*src[1] + E*src[step+1]));\
- OP(dst[2], (A*src[2] + E*src[step+2]));\
- OP(dst[3], (A*src[3] + E*src[step+3]));\
- dst+= stride;\
- src+= stride;\
- }\
- }\
-}\
-\
-static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *p_dst/*align 8*/, uint8_t *p_src/*align 1*/, int stride, int h, int x, int y){\
- pixel *dst = (pixel*)p_dst;\
- pixel *src = (pixel*)p_src;\
- const int A=(8-x)*(8-y);\
- const int B=( x)*(8-y);\
- const int C=(8-x)*( y);\
- const int D=( x)*( y);\
- int i;\
- stride >>= sizeof(pixel)-1;\
- \
- av_assert2(x<8 && y<8 && x>=0 && y>=0);\
-\
- if(D){\
- for(i=0; i<h; i++){\
- OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
- OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
- OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
- OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
- OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
- OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
- OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
- OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
- dst+= stride;\
- src+= stride;\
- }\
- }else{\
- const int E= B+C;\
- const int step= C ? stride : 1;\
- for(i=0; i<h; i++){\
- OP(dst[0], (A*src[0] + E*src[step+0]));\
- OP(dst[1], (A*src[1] + E*src[step+1]));\
- OP(dst[2], (A*src[2] + E*src[step+2]));\
- OP(dst[3], (A*src[3] + E*src[step+3]));\
- OP(dst[4], (A*src[4] + E*src[step+4]));\
- OP(dst[5], (A*src[5] + E*src[step+5]));\
- OP(dst[6], (A*src[6] + E*src[step+6]));\
- OP(dst[7], (A*src[7] + E*src[step+7]));\
- dst+= stride;\
- src+= stride;\
- }\
- }\
-}
-
-#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
-#define op_put(a, b) a = (((b) + 32)>>6)
-
-H264_CHROMA_MC(put_ , op_put)
-H264_CHROMA_MC(avg_ , op_avg)
-#undef op_avg
-#undef op_put
-
void FUNCC(ff_put_pixels8x8)(uint8_t *dst, uint8_t *src, int stride) {
FUNCC(put_pixels8)(dst, src, stride, 8);
}
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 34cd8c0658..22e37c3ba9 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -37,6 +37,7 @@
#include "mpegvideo.h"
#include "h264.h"
#include "h264data.h"
+#include "h264chroma.h"
#include "h264_mvpred.h"
#include "golomb.h"
#include "mathops.h"
@@ -996,6 +997,8 @@ static av_cold void common_init(H264Context *h)
h->cur_chroma_format_idc = 1;
ff_h264dsp_init(&h->h264dsp, 8, 1);
+ av_assert0(h->sps.bit_depth_chroma == 0);
+ ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
ff_h264qpel_init(&h->h264qpel, 8);
ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1);
@@ -2476,6 +2479,7 @@ static int h264_set_parameter_from_sps(H264Context *h)
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma,
h->sps.chroma_format_idc);
+ ff_h264chroma_init(&h->h264chroma, h->sps.bit_depth_chroma);
ff_h264qpel_init(&h->h264qpel, h->sps.bit_depth_luma);
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma,
h->sps.chroma_format_idc);
@@ -2633,6 +2637,7 @@ static int h264_slice_header_init(H264Context *h, int reinit)
memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
c->h264dsp = h->h264dsp;
+ c->h264chroma = h->h264chroma;
c->h264qpel = h->h264qpel;
c->sps = h->sps;
c->pps = h->pps;
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 96148e5095..e4e5ec286d 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -31,6 +31,7 @@
#include "libavutil/intreadwrite.h"
#include "cabac.h"
#include "mpegvideo.h"
+#include "h264chroma.h"
#include "h264dsp.h"
#include "h264pred.h"
#include "h264qpel.h"
@@ -258,6 +259,7 @@ typedef struct MMCO {
typedef struct H264Context {
MpegEncContext s;
H264DSPContext h264dsp;
+ H264ChromaContext h264chroma;
H264QpelContext h264qpel;
int pixel_shift; ///< 0 for 8-bit H264, 1 for high-bit-depth H264
int chroma_qp[2]; // QPc
diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c
index 34e63dcc32..679021f437 100644
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -173,14 +173,14 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
} else if (is_h264) {
if (chroma422) {
FUNC(hl_motion_422)(h, dest_y, dest_cb, dest_cr,
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+ s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
+ s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
} else {
FUNC(hl_motion_420)(h, dest_y, dest_cb, dest_cr,
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+ s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
+ s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
}
@@ -357,8 +357,8 @@ static av_noinline void FUNC(hl_decode_mb_444)(H264Context *h)
linesize, 0, 1, SIMPLE, PIXEL_SHIFT);
} else {
FUNC(hl_motion_444)(h, dest[0], dest[1], dest[2],
- s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
- s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
+ s->me.qpel_put, h->h264chroma.put_h264_chroma_pixels_tab,
+ s->me.qpel_avg, h->h264chroma.avg_h264_chroma_pixels_tab,
h->h264dsp.weight_h264_pixels_tab,
h->h264dsp.biweight_h264_pixels_tab);
}
diff --git a/libavcodec/h264chroma.c b/libavcodec/h264chroma.c
new file mode 100644
index 0000000000..31004a597b
--- /dev/null
+++ b/libavcodec/h264chroma.c
@@ -0,0 +1,64 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "h264chroma.h"
+
+#define BIT_DEPTH 8
+#include "h264chroma_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 9
+#include "h264chroma_template.c"
+#undef BIT_DEPTH
+
+#define BIT_DEPTH 10
+#include "h264chroma_template.c"
+#undef BIT_DEPTH
+
+#define SET_CHROMA(depth) \
+ c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_ ## depth ## _c; \
+ c->put_h264_chroma_pixels_tab[1] = put_h264_chroma_mc4_ ## depth ## _c; \
+ c->put_h264_chroma_pixels_tab[2] = put_h264_chroma_mc2_ ## depth ## _c; \
+ c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_ ## depth ## _c; \
+ c->avg_h264_chroma_pixels_tab[1] = avg_h264_chroma_mc4_ ## depth ## _c; \
+ c->avg_h264_chroma_pixels_tab[2] = avg_h264_chroma_mc2_ ## depth ## _c; \
+
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth)
+{
+ switch (bit_depth) {
+ case 10:
+ SET_CHROMA(10);
+ break;
+ case 9:
+ SET_CHROMA(9);
+ break;
+ default:
+ SET_CHROMA(8);
+ break;
+ }
+
+ if (ARCH_ARM)
+ ff_h264chroma_init_arm(c, bit_depth);
+ if (ARCH_PPC)
+ ff_h264chroma_init_ppc(c, bit_depth);
+ if (ARCH_SH4)
+ ff_h264chroma_init_sh4(c, bit_depth);
+ if (ARCH_X86)
+ ff_h264chroma_init_x86(c, bit_depth);
+}
diff --git a/libavcodec/h264chroma.h b/libavcodec/h264chroma.h
new file mode 100644
index 0000000000..4e035b0c38
--- /dev/null
+++ b/libavcodec/h264chroma.h
@@ -0,0 +1,38 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_H264CHROMA_H
+#define AVCODEC_H264CHROMA_H
+
+#include <stdint.h>
+
+typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
+
+typedef struct H264ChromaContext {
+ h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
+ h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
+} H264ChromaContext;
+
+void ff_h264chroma_init(H264ChromaContext *c, int bit_depth);
+
+void ff_h264chroma_init_arm(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth);
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth);
+
+#endif /* AVCODEC_H264CHROMA_H */
diff --git a/libavcodec/h264chroma_template.c b/libavcodec/h264chroma_template.c
new file mode 100644
index 0000000000..93559d7c68
--- /dev/null
+++ b/libavcodec/h264chroma_template.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2000, 2001 Fabrice Bellard
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/avassert.h"
+
+#include "bit_depth_template.c"
+
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void FUNCC(OPNAME ## h264_chroma_mc2)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ stride >>= sizeof(pixel)-1;\
+ \
+ av_assert2(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0]));\
+ OP(dst[1], (A*src[1] + E*src[step+1]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}\
+\
+static void FUNCC(OPNAME ## h264_chroma_mc4)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ stride >>= sizeof(pixel)-1;\
+ \
+ av_assert2(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0]));\
+ OP(dst[1], (A*src[1] + E*src[step+1]));\
+ OP(dst[2], (A*src[2] + E*src[step+2]));\
+ OP(dst[3], (A*src[3] + E*src[step+3]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}\
+\
+static void FUNCC(OPNAME ## h264_chroma_mc8)(uint8_t *_dst/*align 8*/, uint8_t *_src/*align 1*/, int stride, int h, int x, int y){\
+ pixel *dst = (pixel*)_dst;\
+ pixel *src = (pixel*)_src;\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ stride >>= sizeof(pixel)-1;\
+ \
+ av_assert2(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
+ OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
+ OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
+ OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
+ OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0]));\
+ OP(dst[1], (A*src[1] + E*src[step+1]));\
+ OP(dst[2], (A*src[2] + E*src[step+2]));\
+ OP(dst[3], (A*src[3] + E*src[step+3]));\
+ OP(dst[4], (A*src[4] + E*src[step+4]));\
+ OP(dst[5], (A*src[5] + E*src[step+5]));\
+ OP(dst[6], (A*src[6] + E*src[step+6]));\
+ OP(dst[7], (A*src[7] + E*src[step+7]));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}
+
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+
+H264_CHROMA_MC(put_ , op_put)
+H264_CHROMA_MC(avg_ , op_avg)
+#undef op_avg
+#undef op_put
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 0c7b02b3c7..f4c651688e 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -30,6 +30,7 @@
#include "libavutil/imgutils.h"
#include "avcodec.h"
#include "dsputil.h"
+#include "h264chroma.h"
#include "internal.h"
#include "mathops.h"
#include "mpegvideo.h"
@@ -181,6 +182,7 @@ const uint8_t *avpriv_mpv_find_start_code(const uint8_t *av_restrict p,
av_cold int ff_dct_common_init(MpegEncContext *s)
{
ff_dsputil_init(&s->dsp, s->avctx);
+ ff_h264chroma_init(&s->h264chroma, 8); //for lowres
ff_videodsp_init(&s->vdsp, s->avctx->bits_per_raw_sample);
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
@@ -2388,7 +2390,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
s->mv[dir][2 * i + j][1],
block_s, mb_y);
}
- pix_op = s->dsp.avg_h264_chroma_pixels_tab;
+ pix_op = s->h264chroma.avg_h264_chroma_pixels_tab;
}
} else {
for (i = 0; i < 2; i++) {
@@ -2399,7 +2401,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
2 * block_s, mb_y >> 1);
// after put we make avg of the same block
- pix_op = s->dsp.avg_h264_chroma_pixels_tab;
+ pix_op = s->h264chroma.avg_h264_chroma_pixels_tab;
// opposite parity is always in the same
// frame if this is second field
@@ -2620,11 +2622,11 @@ void MPV_decode_mb_internal(MpegEncContext *s, int16_t block[12][64],
}
if(lowres_flag){
- h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
+ h264_chroma_mc_func *op_pix = s->h264chroma.put_h264_chroma_pixels_tab;
if (s->mv_dir & MV_DIR_FORWARD) {
MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.f.data, op_pix);
- op_pix = s->dsp.avg_h264_chroma_pixels_tab;
+ op_pix = s->h264chroma.avg_h264_chroma_pixels_tab;
}
if (s->mv_dir & MV_DIR_BACKWARD) {
MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.f.data, op_pix);
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 0675004948..ba352de3fd 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -31,6 +31,7 @@
#include "avcodec.h"
#include "dsputil.h"
#include "get_bits.h"
+#include "h264chroma.h"
#include "put_bits.h"
#include "ratecontrol.h"
#include "parser.h"
@@ -359,6 +360,7 @@ typedef struct MpegEncContext {
int h263_long_vectors; ///< use horrible h263v1 long vector mode
DSPContext dsp; ///< pointers for accelerated dsp functions
+ H264ChromaContext h264chroma;
VideoDSPContext vdsp;
int f_code; ///< forward MV resolution
int b_code; ///< backward MV resolution for B Frames (mpeg4)
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index e152483e7c..a2ce9ebafb 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -1,6 +1,7 @@
OBJS += ppc/dsputil_ppc.o \
ppc/videodsp_ppc.o \
+OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o
OBJS-$(CONFIG_H264QPEL) += ppc/h264_qpel.o
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h
index f23ceaf337..0e769ab060 100644
--- a/libavcodec/ppc/dsputil_altivec.h
+++ b/libavcodec/ppc/dsputil_altivec.h
@@ -36,8 +36,6 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
-void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
-
void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
void ff_float_init_altivec(DSPContext* c, AVCodecContext *avctx);
void ff_int_init_altivec(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 24aa88f173..6fce90438b 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -165,8 +165,6 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
}
#if HAVE_ALTIVEC
- if(CONFIG_H264_DECODER) ff_dsputil_h264_init_ppc(c, avctx);
-
if (mm_flags & AV_CPU_FLAG_ALTIVEC) {
ff_dsputil_init_altivec(c, avctx);
ff_int_init_altivec(c, avctx);
diff --git a/libavcodec/ppc/h264_qpel.c b/libavcodec/ppc/h264_qpel.c
index 26e333effb..8938783a47 100644
--- a/libavcodec/ppc/h264_qpel.c
+++ b/libavcodec/ppc/h264_qpel.c
@@ -33,8 +33,6 @@
#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
-#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
-#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
#define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
#define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
#define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
@@ -43,8 +41,6 @@
#define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
#include "h264_qpel_template.c"
#undef OP_U8_ALTIVEC
-#undef PREFIX_h264_chroma_mc8_altivec
-#undef PREFIX_h264_chroma_mc8_num
#undef PREFIX_h264_qpel16_h_lowpass_altivec
#undef PREFIX_h264_qpel16_h_lowpass_num
#undef PREFIX_h264_qpel16_v_lowpass_altivec
@@ -53,8 +49,6 @@
#undef PREFIX_h264_qpel16_hv_lowpass_num
#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
-#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
-#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
#define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
#define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
#define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
@@ -63,8 +57,6 @@
#define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
#include "h264_qpel_template.c"
#undef OP_U8_ALTIVEC
-#undef PREFIX_h264_chroma_mc8_altivec
-#undef PREFIX_h264_chroma_mc8_num
#undef PREFIX_h264_qpel16_h_lowpass_altivec
#undef PREFIX_h264_qpel16_h_lowpass_num
#undef PREFIX_h264_qpel16_v_lowpass_altivec
@@ -273,18 +265,6 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
H264_MC(put_, 16, altivec)
H264_MC(avg_, 16, altivec)
-
-void ff_dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx)
-{
- const int high_bit_depth = avctx->bits_per_raw_sample > 8;
-
- if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
- if (!high_bit_depth) {
- c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
- c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
- }
- }
-}
#endif /* HAVE_ALTIVEC */
av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
diff --git a/libavcodec/ppc/h264_qpel_template.c b/libavcodec/ppc/h264_qpel_template.c
index 5a08e0e27f..cfc4560267 100644
--- a/libavcodec/ppc/h264_qpel_template.c
+++ b/libavcodec/ppc/h264_qpel_template.c
@@ -26,274 +26,6 @@
#define ASSERT_ALIGNED(ptr) ;
#endif
-/* this code assume that stride % 16 == 0 */
-
-#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \
- vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
- vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
-\
- psum = vec_mladd(vA, vsrc0ssH, BIAS1);\
- psum = vec_mladd(vB, vsrc1ssH, psum);\
- psum = vec_mladd(vC, vsrc2ssH, psum);\
- psum = vec_mladd(vD, vsrc3ssH, psum);\
- psum = BIAS2(psum);\
- psum = vec_sr(psum, v6us);\
-\
- vdst = vec_ld(0, dst);\
- ppsum = (vec_u8)vec_pack(psum, psum);\
- vfdst = vec_perm(vdst, ppsum, fperm);\
-\
- OP_U8_ALTIVEC(fsum, vfdst, vdst);\
-\
- vec_st(fsum, 0, dst);\
-\
- vsrc0ssH = vsrc2ssH;\
- vsrc1ssH = vsrc3ssH;\
-\
- dst += stride;\
- src += stride;
-
-#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
-\
- vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
- vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
-\
- psum = vec_mladd(vA, vsrc0ssH, v32ss);\
- psum = vec_mladd(vE, vsrc1ssH, psum);\
- psum = vec_sr(psum, v6us);\
-\
- vdst = vec_ld(0, dst);\
- ppsum = (vec_u8)vec_pack(psum, psum);\
- vfdst = vec_perm(vdst, ppsum, fperm);\
-\
- OP_U8_ALTIVEC(fsum, vfdst, vdst);\
-\
- vec_st(fsum, 0, dst);\
-\
- dst += stride;\
- src += stride;
-
-#define noop(a) a
-#define add28(a) vec_add(v28ss, a)
-
-#ifdef PREFIX_h264_chroma_mc8_altivec
-static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
- int stride, int h, int x, int y) {
- DECLARE_ALIGNED(16, signed int, ABCD)[4] =
- {((8 - x) * (8 - y)),
- (( x) * (8 - y)),
- ((8 - x) * ( y)),
- (( x) * ( y))};
- register int i;
- vec_u8 fperm;
- const vec_s32 vABCD = vec_ld(0, ABCD);
- const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
- const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
- const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
- const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
- LOAD_ZERO;
- const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
- const vec_u16 v6us = vec_splat_u16(6);
- register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
- register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
-
- vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
- vec_u8 vsrc0uc, vsrc1uc;
- vec_s16 vsrc0ssH, vsrc1ssH;
- vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
- vec_s16 vsrc2ssH, vsrc3ssH, psum;
- vec_u8 vdst, ppsum, vfdst, fsum;
-
- if (((unsigned long)dst) % 16 == 0) {
- fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
- 0x14, 0x15, 0x16, 0x17,
- 0x08, 0x09, 0x0A, 0x0B,
- 0x0C, 0x0D, 0x0E, 0x0F};
- } else {
- fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
- 0x04, 0x05, 0x06, 0x07,
- 0x18, 0x19, 0x1A, 0x1B,
- 0x1C, 0x1D, 0x1E, 0x1F};
- }
-
- vsrcAuc = vec_ld(0, src);
-
- if (loadSecond)
- vsrcBuc = vec_ld(16, src);
- vsrcperm0 = vec_lvsl(0, src);
- vsrcperm1 = vec_lvsl(1, src);
-
- vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
- if (reallyBadAlign)
- vsrc1uc = vsrcBuc;
- else
- vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
-
- vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);
- vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);
-
- if (ABCD[3]) {
- if (!loadSecond) {// -> !reallyBadAlign
- for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(stride + 0, src);
- vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
- vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
-
- CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
- }
- } else {
- vec_u8 vsrcDuc;
- for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(stride + 0, src);
- vsrcDuc = vec_ld(stride + 16, src);
- vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
- if (reallyBadAlign)
- vsrc3uc = vsrcDuc;
- else
- vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
-
- CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
- }
- }
- } else {
- const vec_s16 vE = vec_add(vB, vC);
- if (ABCD[2]) { // x == 0 B == 0
- if (!loadSecond) {// -> !reallyBadAlign
- for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(stride + 0, src);
- vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE
-
- vsrc0uc = vsrc1uc;
- }
- } else {
- vec_u8 vsrcDuc;
- for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(stride + 0, src);
- vsrcDuc = vec_ld(stride + 15, src);
- vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE
-
- vsrc0uc = vsrc1uc;
- }
- }
- } else { // y == 0 C == 0
- if (!loadSecond) {// -> !reallyBadAlign
- for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(0, src);
- vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
- vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
-
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE
- }
- } else {
- vec_u8 vsrcDuc;
- for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(0, src);
- vsrcDuc = vec_ld(15, src);
- vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
- if (reallyBadAlign)
- vsrc1uc = vsrcDuc;
- else
- vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
-
- CHROMA_MC8_ALTIVEC_CORE_SIMPLE
- }
- }
- }
- }
-}
-#endif
-
-/* this code assume that stride % 16 == 0 */
-#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec
-static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
- DECLARE_ALIGNED(16, signed int, ABCD)[4] =
- {((8 - x) * (8 - y)),
- (( x) * (8 - y)),
- ((8 - x) * ( y)),
- (( x) * ( y))};
- register int i;
- vec_u8 fperm;
- const vec_s32 vABCD = vec_ld(0, ABCD);
- const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
- const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
- const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
- const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
- LOAD_ZERO;
- const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
- const vec_u16 v6us = vec_splat_u16(6);
- register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
- register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
-
- vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
- vec_u8 vsrc0uc, vsrc1uc;
- vec_s16 vsrc0ssH, vsrc1ssH;
- vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
- vec_s16 vsrc2ssH, vsrc3ssH, psum;
- vec_u8 vdst, ppsum, vfdst, fsum;
-
- if (((unsigned long)dst) % 16 == 0) {
- fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
- 0x14, 0x15, 0x16, 0x17,
- 0x08, 0x09, 0x0A, 0x0B,
- 0x0C, 0x0D, 0x0E, 0x0F};
- } else {
- fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
- 0x04, 0x05, 0x06, 0x07,
- 0x18, 0x19, 0x1A, 0x1B,
- 0x1C, 0x1D, 0x1E, 0x1F};
- }
-
- vsrcAuc = vec_ld(0, src);
-
- if (loadSecond)
- vsrcBuc = vec_ld(16, src);
- vsrcperm0 = vec_lvsl(0, src);
- vsrcperm1 = vec_lvsl(1, src);
-
- vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
- if (reallyBadAlign)
- vsrc1uc = vsrcBuc;
- else
- vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
-
- vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc);
- vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc);
-
- if (!loadSecond) {// -> !reallyBadAlign
- for (i = 0 ; i < h ; i++) {
-
-
- vsrcCuc = vec_ld(stride + 0, src);
-
- vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
- vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
-
- CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
- }
- } else {
- vec_u8 vsrcDuc;
- for (i = 0 ; i < h ; i++) {
- vsrcCuc = vec_ld(stride + 0, src);
- vsrcDuc = vec_ld(stride + 16, src);
-
- vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
- if (reallyBadAlign)
- vsrc3uc = vsrcDuc;
- else
- vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
-
- CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
- }
- }
-}
-#endif
-
-#undef noop
-#undef add28
-#undef CHROMA_MC8_ALTIVEC_CORE
-
/* this code assume stride % 16 == 0 */
#ifdef PREFIX_h264_qpel16_h_lowpass_altivec
static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
diff --git a/libavcodec/ppc/h264chroma_init.c b/libavcodec/ppc/h264chroma_init.c
new file mode 100644
index 0000000000..f9e2a76cca
--- /dev/null
+++ b/libavcodec/ppc/h264chroma_init.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavcodec/h264chroma.h"
+
+#if HAVE_ALTIVEC
+#include "libavutil/cpu.h"
+#include "libavutil/intreadwrite.h"
+#include "libavutil/ppc/types_altivec.h"
+#include "libavutil/ppc/util_altivec.h"
+#include "dsputil_altivec.h"
+
+#define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
+#define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
+
+#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
+#include "h264chroma_template.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+
+#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
+#define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
+#define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
+#include "h264chroma_template.c"
+#undef OP_U8_ALTIVEC
+#undef PREFIX_h264_chroma_mc8_altivec
+#undef PREFIX_h264_chroma_mc8_num
+#endif /* HAVE_ALTIVEC */
+
+av_cold void ff_h264chroma_init_ppc(H264ChromaContext *c, int bit_depth)
+{
+#if HAVE_ALTIVEC
+ const int high_bit_depth = bit_depth > 8;
+
+ if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+ if (!high_bit_depth) {
+ c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
+ c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
+ }
+ }
+#endif /* HAVE_ALTIVEC */
+}
diff --git a/libavcodec/ppc/h264chroma_template.c b/libavcodec/ppc/h264chroma_template.c
new file mode 100644
index 0000000000..7436e118c5
--- /dev/null
+++ b/libavcodec/ppc/h264chroma_template.c
@@ -0,0 +1,289 @@
+/*
+ * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/mem.h"
+
+/* this code assume that stride % 16 == 0 */
+
+#define CHROMA_MC8_ALTIVEC_CORE(BIAS1, BIAS2) \
+ vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
+ vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
+\
+ psum = vec_mladd(vA, vsrc0ssH, BIAS1);\
+ psum = vec_mladd(vB, vsrc1ssH, psum);\
+ psum = vec_mladd(vC, vsrc2ssH, psum);\
+ psum = vec_mladd(vD, vsrc3ssH, psum);\
+ psum = BIAS2(psum);\
+ psum = vec_sr(psum, v6us);\
+\
+ vdst = vec_ld(0, dst);\
+ ppsum = (vec_u8)vec_pack(psum, psum);\
+ vfdst = vec_perm(vdst, ppsum, fperm);\
+\
+ OP_U8_ALTIVEC(fsum, vfdst, vdst);\
+\
+ vec_st(fsum, 0, dst);\
+\
+ vsrc0ssH = vsrc2ssH;\
+ vsrc1ssH = vsrc3ssH;\
+\
+ dst += stride;\
+ src += stride;
+
+#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
+\
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
+\
+ psum = vec_mladd(vA, vsrc0ssH, v32ss);\
+ psum = vec_mladd(vE, vsrc1ssH, psum);\
+ psum = vec_sr(psum, v6us);\
+\
+ vdst = vec_ld(0, dst);\
+ ppsum = (vec_u8)vec_pack(psum, psum);\
+ vfdst = vec_perm(vdst, ppsum, fperm);\
+\
+ OP_U8_ALTIVEC(fsum, vfdst, vdst);\
+\
+ vec_st(fsum, 0, dst);\
+\
+ dst += stride;\
+ src += stride;
+
+#define noop(a) a
+#define add28(a) vec_add(v28ss, a)
+
+#ifdef PREFIX_h264_chroma_mc8_altivec
+static void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
+ int stride, int h, int x, int y) {
+ DECLARE_ALIGNED(16, signed int, ABCD)[4] =
+ {((8 - x) * (8 - y)),
+ (( x) * (8 - y)),
+ ((8 - x) * ( y)),
+ (( x) * ( y))};
+ register int i;
+ vec_u8 fperm;
+ const vec_s32 vABCD = vec_ld(0, ABCD);
+ const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
+ const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
+ const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
+ const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
+ LOAD_ZERO;
+ const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
+ const vec_u16 v6us = vec_splat_u16(6);
+ register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+ register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+ vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
+ vec_u8 vsrc0uc, vsrc1uc;
+ vec_s16 vsrc0ssH, vsrc1ssH;
+ vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
+ vec_s16 vsrc2ssH, vsrc3ssH, psum;
+ vec_u8 vdst, ppsum, vfdst, fsum;
+
+ if (((unsigned long)dst) % 16 == 0) {
+ fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F};
+ } else {
+ fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F};
+ }
+
+ vsrcAuc = vec_ld(0, src);
+
+ if (loadSecond)
+ vsrcBuc = vec_ld(16, src);
+ vsrcperm0 = vec_lvsl(0, src);
+ vsrcperm1 = vec_lvsl(1, src);
+
+ vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc1uc = vsrcBuc;
+ else
+ vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);
+
+ if (ABCD[3]) {
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+ vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+ CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
+ }
+ } else {
+ vec_u8 vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrcDuc = vec_ld(stride + 16, src);
+ vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc3uc = vsrcDuc;
+ else
+ vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+ CHROMA_MC8_ALTIVEC_CORE(v32ss, noop)
+ }
+ }
+ } else {
+ const vec_s16 vE = vec_add(vB, vC);
+ if (ABCD[2]) { // x == 0 B == 0
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+
+ vsrc0uc = vsrc1uc;
+ }
+ } else {
+ vec_u8 vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrcDuc = vec_ld(stride + 15, src);
+ vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+
+ vsrc0uc = vsrc1uc;
+ }
+ }
+ } else { // y == 0 C == 0
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(0, src);
+ vsrc0uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+ vsrc1uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+ }
+ } else {
+ vec_u8 vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(0, src);
+ vsrcDuc = vec_ld(15, src);
+ vsrc0uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc1uc = vsrcDuc;
+ else
+ vsrc1uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+ CHROMA_MC8_ALTIVEC_CORE_SIMPLE
+ }
+ }
+ }
+ }
+}
+#endif
+
+/* this code assume that stride % 16 == 0 */
+#ifdef PREFIX_no_rnd_vc1_chroma_mc8_altivec
+static void PREFIX_no_rnd_vc1_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
+ DECLARE_ALIGNED(16, signed int, ABCD)[4] =
+ {((8 - x) * (8 - y)),
+ (( x) * (8 - y)),
+ ((8 - x) * ( y)),
+ (( x) * ( y))};
+ register int i;
+ vec_u8 fperm;
+ const vec_s32 vABCD = vec_ld(0, ABCD);
+ const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
+ const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
+ const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
+ const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
+ LOAD_ZERO;
+ const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
+ const vec_u16 v6us = vec_splat_u16(6);
+ register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
+ register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
+
+ vec_u8 vsrcAuc, av_uninit(vsrcBuc), vsrcperm0, vsrcperm1;
+ vec_u8 vsrc0uc, vsrc1uc;
+ vec_s16 vsrc0ssH, vsrc1ssH;
+ vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
+ vec_s16 vsrc2ssH, vsrc3ssH, psum;
+ vec_u8 vdst, ppsum, vfdst, fsum;
+
+ if (((unsigned long)dst) % 16 == 0) {
+ fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
+ 0x14, 0x15, 0x16, 0x17,
+ 0x08, 0x09, 0x0A, 0x0B,
+ 0x0C, 0x0D, 0x0E, 0x0F};
+ } else {
+ fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
+ 0x04, 0x05, 0x06, 0x07,
+ 0x18, 0x19, 0x1A, 0x1B,
+ 0x1C, 0x1D, 0x1E, 0x1F};
+ }
+
+ vsrcAuc = vec_ld(0, src);
+
+ if (loadSecond)
+ vsrcBuc = vec_ld(16, src);
+ vsrcperm0 = vec_lvsl(0, src);
+ vsrcperm1 = vec_lvsl(1, src);
+
+ vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc1uc = vsrcBuc;
+ else
+ vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
+
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc);
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc);
+
+ if (!loadSecond) {// -> !reallyBadAlign
+ for (i = 0 ; i < h ; i++) {
+
+
+ vsrcCuc = vec_ld(stride + 0, src);
+
+ vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
+ vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
+
+ CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
+ }
+ } else {
+ vec_u8 vsrcDuc;
+ for (i = 0 ; i < h ; i++) {
+ vsrcCuc = vec_ld(stride + 0, src);
+ vsrcDuc = vec_ld(stride + 16, src);
+
+ vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
+ if (reallyBadAlign)
+ vsrc3uc = vsrcDuc;
+ else
+ vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
+
+ CHROMA_MC8_ALTIVEC_CORE(vec_splat_s16(0), add28)
+ }
+ }
+}
+#endif
+
+#undef noop
+#undef add28
+#undef CHROMA_MC8_ALTIVEC_CORE
diff --git a/libavcodec/ppc/vc1dsp_altivec.c b/libavcodec/ppc/vc1dsp_altivec.c
index 5bbb20ffc4..1a0d7c08dd 100644
--- a/libavcodec/ppc/vc1dsp_altivec.c
+++ b/libavcodec/ppc/vc1dsp_altivec.c
@@ -326,13 +326,13 @@ static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, int16_t *block)
#define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
#define PREFIX_no_rnd_vc1_chroma_mc8_altivec put_no_rnd_vc1_chroma_mc8_altivec
-#include "h264_qpel_template.c"
+#include "h264chroma_template.c"
#undef OP_U8_ALTIVEC
#undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
#define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
#define PREFIX_no_rnd_vc1_chroma_mc8_altivec avg_no_rnd_vc1_chroma_mc8_altivec
-#include "h264_qpel_template.c"
+#include "h264chroma_template.c"
#undef OP_U8_ALTIVEC
#undef PREFIX_no_rnd_vc1_chroma_mc8_altivec
diff --git a/libavcodec/rv30dsp.c b/libavcodec/rv30dsp.c
index 028c98af12..2f54a5fbe8 100644
--- a/libavcodec/rv30dsp.c
+++ b/libavcodec/rv30dsp.c
@@ -26,6 +26,7 @@
#include "avcodec.h"
#include "dsputil.h"
+#include "h264chroma.h"
#include "h264qpel.h"
#include "rv34dsp.h"
@@ -254,9 +255,11 @@ RV30_MC(avg_, 8)
RV30_MC(avg_, 16)
av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) {
+ H264ChromaContext h264chroma;
H264QpelContext qpel;
ff_rv34dsp_init(c, dsp);
+ ff_h264chroma_init(&h264chroma, 8);
ff_h264qpel_init(&qpel, 8);
c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0];
@@ -296,8 +299,8 @@ av_cold void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp) {
c->avg_pixels_tab[1][ 9] = avg_rv30_tpel8_mc12_c;
c->avg_pixels_tab[1][10] = avg_rv30_tpel8_mc22_c;
- c->put_chroma_pixels_tab[0] = dsp->put_h264_chroma_pixels_tab[0];
- c->put_chroma_pixels_tab[1] = dsp->put_h264_chroma_pixels_tab[1];
- c->avg_chroma_pixels_tab[0] = dsp->avg_h264_chroma_pixels_tab[0];
- c->avg_chroma_pixels_tab[1] = dsp->avg_h264_chroma_pixels_tab[1];
+ c->put_chroma_pixels_tab[0] = h264chroma.put_h264_chroma_pixels_tab[0];
+ c->put_chroma_pixels_tab[1] = h264chroma.put_h264_chroma_pixels_tab[1];
+ c->avg_chroma_pixels_tab[0] = h264chroma.avg_h264_chroma_pixels_tab[0];
+ c->avg_chroma_pixels_tab[1] = h264chroma.avg_h264_chroma_pixels_tab[1];
}
diff --git a/libavcodec/rv34dsp.h b/libavcodec/rv34dsp.h
index e04a47d35d..6d2f8345cc 100644
--- a/libavcodec/rv34dsp.h
+++ b/libavcodec/rv34dsp.h
@@ -28,6 +28,7 @@
#define AVCODEC_RV34DSP_H
#include "dsputil.h"
+#include "h264chroma.h"
typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
uint8_t *src1/*align width (8 or 16)*/,
diff --git a/libavcodec/sh4/Makefile b/libavcodec/sh4/Makefile
index aa17eabd6e..f907408d9f 100644
--- a/libavcodec/sh4/Makefile
+++ b/libavcodec/sh4/Makefile
@@ -1,3 +1,5 @@
OBJS += sh4/dsputil_align.o \
sh4/dsputil_sh4.o \
sh4/idct_sh4.o \
+
+OBJS-$(CONFIG_H264CHROMA) += sh4/h264chroma_init.o \
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 736942512d..d59a12ec56 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -369,14 +369,6 @@ av_cold void ff_dsputil_init_align(DSPContext *c, AVCodecContext *avctx)
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
#undef dspfunc
- if (!high_bit_depth) {
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
- c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
- c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
- }
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
diff --git a/libavcodec/sh4/h264chroma_init.c b/libavcodec/sh4/h264chroma_init.c
new file mode 100644
index 0000000000..d15f0ae34e
--- /dev/null
+++ b/libavcodec/sh4/h264chroma_init.c
@@ -0,0 +1,132 @@
+/*
+ * aligned/packed access motion
+ *
+ * Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "libavutil/attributes.h"
+#include "libavcodec/h264chroma.h"
+
+#define H264_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ do {\
+ int t0,t1,t2,t3; \
+ uint8_t *s0 = src; \
+ uint8_t *s1 = src+stride; \
+ t0 = *s0++; t2 = *s1++; \
+ t1 = *s0++; t3 = *s1++; \
+ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
+ t0 = *s0++; t2 = *s1++; \
+ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
+ dst+= stride;\
+ src+= stride;\
+ }while(--h);\
+}\
+\
+static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ do {\
+ int t0,t1,t2,t3; \
+ uint8_t *s0 = src; \
+ uint8_t *s1 = src+stride; \
+ t0 = *s0++; t2 = *s1++; \
+ t1 = *s0++; t3 = *s1++; \
+ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
+ t0 = *s0++; t2 = *s1++; \
+ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
+ t1 = *s0++; t3 = *s1++; \
+ OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
+ t0 = *s0++; t2 = *s1++; \
+ OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
+ dst+= stride;\
+ src+= stride;\
+ }while(--h);\
+}\
+\
+static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ do {\
+ int t0,t1,t2,t3; \
+ uint8_t *s0 = src; \
+ uint8_t *s1 = src+stride; \
+ t0 = *s0++; t2 = *s1++; \
+ t1 = *s0++; t3 = *s1++; \
+ OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
+ t0 = *s0++; t2 = *s1++; \
+ OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
+ t1 = *s0++; t3 = *s1++; \
+ OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
+ t0 = *s0++; t2 = *s1++; \
+ OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
+ t1 = *s0++; t3 = *s1++; \
+ OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
+ t0 = *s0++; t2 = *s1++; \
+ OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
+ t1 = *s0++; t3 = *s1++; \
+ OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
+ t0 = *s0++; t2 = *s1++; \
+ OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
+ dst+= stride;\
+ src+= stride;\
+ }while(--h);\
+}
+
+#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
+#define op_put(a, b) a = (((b) + 32)>>6)
+
+H264_CHROMA_MC(put_ , op_put)
+H264_CHROMA_MC(avg_ , op_avg)
+#undef op_avg
+#undef op_put
+
+av_cold void ff_h264chroma_init_sh4(H264ChromaContext *c, int bit_depth)
+{
+ const int high_bit_depth = bit_depth > 8;
+
+ if (!high_bit_depth) {
+ c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
+ c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
+ c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
+ c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
+ c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
+ c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
+ }
+}
diff --git a/libavcodec/sh4/qpel.c b/libavcodec/sh4/qpel.c
index 115ccbf0c8..c6d032bd07 100644
--- a/libavcodec/sh4/qpel.c
+++ b/libavcodec/sh4/qpel.c
@@ -359,97 +359,6 @@ static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y
}while(--h);
}
-#define H264_CHROMA_MC(OPNAME, OP)\
-static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
- const int A=(8-x)*(8-y);\
- const int B=( x)*(8-y);\
- const int C=(8-x)*( y);\
- const int D=( x)*( y);\
- \
- assert(x<8 && y<8 && x>=0 && y>=0);\
-\
- do {\
- int t0,t1,t2,t3; \
- uint8_t *s0 = src; \
- uint8_t *s1 = src+stride; \
- t0 = *s0++; t2 = *s1++; \
- t1 = *s0++; t3 = *s1++; \
- OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
- t0 = *s0++; t2 = *s1++; \
- OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
- dst+= stride;\
- src+= stride;\
- }while(--h);\
-}\
-\
-static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
- const int A=(8-x)*(8-y);\
- const int B=( x)*(8-y);\
- const int C=(8-x)*( y);\
- const int D=( x)*( y);\
- \
- assert(x<8 && y<8 && x>=0 && y>=0);\
-\
- do {\
- int t0,t1,t2,t3; \
- uint8_t *s0 = src; \
- uint8_t *s1 = src+stride; \
- t0 = *s0++; t2 = *s1++; \
- t1 = *s0++; t3 = *s1++; \
- OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
- t0 = *s0++; t2 = *s1++; \
- OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
- t1 = *s0++; t3 = *s1++; \
- OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
- t0 = *s0++; t2 = *s1++; \
- OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
- dst+= stride;\
- src+= stride;\
- }while(--h);\
-}\
-\
-static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
- const int A=(8-x)*(8-y);\
- const int B=( x)*(8-y);\
- const int C=(8-x)*( y);\
- const int D=( x)*( y);\
- \
- assert(x<8 && y<8 && x>=0 && y>=0);\
-\
- do {\
- int t0,t1,t2,t3; \
- uint8_t *s0 = src; \
- uint8_t *s1 = src+stride; \
- t0 = *s0++; t2 = *s1++; \
- t1 = *s0++; t3 = *s1++; \
- OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
- t0 = *s0++; t2 = *s1++; \
- OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
- t1 = *s0++; t3 = *s1++; \
- OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
- t0 = *s0++; t2 = *s1++; \
- OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
- t1 = *s0++; t3 = *s1++; \
- OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
- t0 = *s0++; t2 = *s1++; \
- OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
- t1 = *s0++; t3 = *s1++; \
- OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
- t0 = *s0++; t2 = *s1++; \
- OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
- dst+= stride;\
- src+= stride;\
- }while(--h);\
-}
-
-#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
-#define op_put(a, b) a = (((b) + 32)>>6)
-
-H264_CHROMA_MC(put_ , op_put)
-H264_CHROMA_MC(avg_ , op_avg)
-#undef op_avg
-#undef op_put
-
#define QPEL_MC(r, OPNAME, RND, OP) \
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index e32a507a65..3a580dc228 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -24,6 +24,7 @@
#define AVCODEC_VC1_H
#include "avcodec.h"
+#include "h264chroma.h"
#include "mpegvideo.h"
#include "intrax8.h"
#include "vc1dsp.h"
@@ -181,6 +182,7 @@ enum FrameCodingMode {
typedef struct VC1Context{
MpegEncContext s;
IntraX8Context x8;
+ H264ChromaContext h264chroma;
VC1DSPContext vc1dsp;
int bits;
diff --git a/libavcodec/vc1dec.c b/libavcodec/vc1dec.c
index 80eaca9e7c..013e96b107 100644
--- a/libavcodec/vc1dec.c
+++ b/libavcodec/vc1dec.c
@@ -31,6 +31,7 @@
#include "avcodec.h"
#include "mpegvideo.h"
#include "h263.h"
+#include "h264chroma.h"
#include "vc1.h"
#include "vc1data.h"
#include "vc1acdata.h"
@@ -334,6 +335,7 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
{
MpegEncContext *s = &v->s;
DSPContext *dsp = &v->s.dsp;
+ H264ChromaContext *h264chroma = &v->h264chroma;
uint8_t *srcY, *srcU, *srcV;
int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
int off, off_uv;
@@ -525,8 +527,8 @@ static void vc1_mc_1mv(VC1Context *v, int dir)
uvmx = (uvmx & 3) << 1;
uvmy = (uvmy & 3) << 1;
if (!v->rnd) {
- dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
- dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
} else {
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
@@ -780,7 +782,7 @@ static av_always_inline int get_chroma_mv(int *mvx, int *mvy, int *a, int flag,
static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
{
MpegEncContext *s = &v->s;
- DSPContext *dsp = &v->s.dsp;
+ H264ChromaContext *h264chroma = &v->h264chroma;
uint8_t *srcU, *srcV;
int uvmx, uvmy, uvsrc_x, uvsrc_y;
int k, tx = 0, ty = 0;
@@ -932,8 +934,8 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
uvmx = (uvmx & 3) << 1;
uvmy = (uvmy & 3) << 1;
if (!v->rnd) {
- dsp->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
- dsp->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
+ h264chroma->put_h264_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
} else {
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off, srcU, s->uvlinesize, 8, uvmx, uvmy);
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off, srcV, s->uvlinesize, 8, uvmx, uvmy);
@@ -945,7 +947,7 @@ static void vc1_mc_4mv_chroma(VC1Context *v, int dir)
static void vc1_mc_4mv_chroma4(VC1Context *v)
{
MpegEncContext *s = &v->s;
- DSPContext *dsp = &v->s.dsp;
+ H264ChromaContext *h264chroma = &v->h264chroma;
uint8_t *srcU, *srcV;
int uvsrc_x, uvsrc_y;
int uvmx_field[4], uvmy_field[4];
@@ -1018,8 +1020,8 @@ static void vc1_mc_4mv_chroma4(VC1Context *v)
}
}
if (!v->rnd) {
- dsp->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
- dsp->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
+ h264chroma->put_h264_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
+ h264chroma->put_h264_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
} else {
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[1] + off, srcU, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[1](s->dest[2] + off, srcV, s->uvlinesize << fieldmv, 4, uvmx_field[i], uvmy_field[i]);
@@ -1848,6 +1850,7 @@ static void vc1_interp_mc(VC1Context *v)
{
MpegEncContext *s = &v->s;
DSPContext *dsp = &v->s.dsp;
+ H264ChromaContext *h264chroma = &v->h264chroma;
uint8_t *srcY, *srcU, *srcV;
int dxy, mx, my, uvmx, uvmy, src_x, src_y, uvsrc_x, uvsrc_y;
int off, off_uv;
@@ -1977,8 +1980,8 @@ static void vc1_interp_mc(VC1Context *v)
uvmx = (uvmx & 3) << 1;
uvmy = (uvmy & 3) << 1;
if (!v->rnd) {
- dsp->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
- dsp->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
+ h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
+ h264chroma->avg_h264_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
} else {
v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1] + off_uv, srcU, s->uvlinesize, 8, uvmx, uvmy);
v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2] + off_uv, srcV, s->uvlinesize, 8, uvmx, uvmy);
@@ -5195,6 +5198,8 @@ static av_cold int vc1_decode_init(AVCodecContext *avctx)
// again once we know all necessary settings.
// That this is necessary might indicate a bug.
ff_vc1_decode_end(avctx);
+
+ ff_h264chroma_init(&v->h264chroma, 8);
ff_vc1dsp_init(&v->vc1dsp);
if (avctx->codec_id == AV_CODEC_ID_WMV3 || avctx->codec_id == AV_CODEC_ID_WMV3IMAGE) {
diff --git a/libavcodec/vc1dsp.c b/libavcodec/vc1dsp.c
index 9ddd7c7b96..7ad63afd78 100644
--- a/libavcodec/vc1dsp.c
+++ b/libavcodec/vc1dsp.c
@@ -25,9 +25,10 @@
*
*/
-#include "vc1dsp.h"
#include "libavutil/avassert.h"
#include "libavutil/common.h"
+#include "h264chroma.h"
+#include "vc1dsp.h"
/** Apply overlap transform to horizontal edge
diff --git a/libavcodec/vc1dsp.h b/libavcodec/vc1dsp.h
index 1d4c813528..6540eff0b8 100644
--- a/libavcodec/vc1dsp.h
+++ b/libavcodec/vc1dsp.h
@@ -29,6 +29,7 @@
#define AVCODEC_VC1DSP_H
#include "dsputil.h"
+#include "h264chroma.h"
typedef void (*vc1op_pixels_func)(uint8_t *block/*align width (8 or 16)*/, const uint8_t *pixels/*align 1*/, int line_size, int h);
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index 6cc377036c..b4af2cac71 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -26,7 +26,7 @@
#include "avcodec.h"
#include "bytestream.h"
#include "internal.h"
-
+#include "h264chroma.h"
#include "vp56.h"
#include "vp56data.h"
@@ -706,6 +706,7 @@ av_cold void ff_vp56_init_context(AVCodecContext *avctx, VP56Context *s,
avctx->pix_fmt = has_alpha ? AV_PIX_FMT_YUVA420P : AV_PIX_FMT_YUV420P;
ff_dsputil_init(&s->dsp, avctx);
+ ff_h264chroma_init(&s->h264chroma, 8);
ff_videodsp_init(&s->vdsp, 8);
ff_vp3dsp_init(&s->vp3dsp, avctx->flags);
ff_vp56dsp_init(&s->vp56dsp, avctx->codec->id);
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index 223a202a03..14c130f430 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -30,6 +30,7 @@
#include "dsputil.h"
#include "get_bits.h"
#include "bytestream.h"
+#include "h264chroma.h"
#include "videodsp.h"
#include "vp3dsp.h"
#include "vp56dsp.h"
@@ -95,6 +96,7 @@ typedef struct VP56Model {
struct vp56_context {
AVCodecContext *avctx;
DSPContext dsp;
+ H264ChromaContext h264chroma;
VideoDSPContext vdsp;
VP3DSPContext vp3dsp;
VP56DSPContext vp56dsp;
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index c6ca13c5c0..04afdf8236 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -536,8 +536,8 @@ static void vp6_filter_diag2(VP56Context *s, uint8_t *dst, uint8_t *src,
int stride, int h_weight, int v_weight)
{
uint8_t *tmp = s->edge_emu_buffer+16;
- s->dsp.put_h264_chroma_pixels_tab[0](tmp, src, stride, 9, h_weight, 0);
- s->dsp.put_h264_chroma_pixels_tab[0](dst, tmp, stride, 8, 0, v_weight);
+ s->h264chroma.put_h264_chroma_pixels_tab[0](tmp, src, stride, 9, h_weight, 0);
+ s->h264chroma.put_h264_chroma_pixels_tab[0](dst, tmp, stride, 8, 0, v_weight);
}
static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
@@ -583,7 +583,7 @@ static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
}
} else {
if (!x8 || !y8) {
- s->dsp.put_h264_chroma_pixels_tab[0](dst, src+offset1, stride, 8, x8, y8);
+ s->h264chroma.put_h264_chroma_pixels_tab[0](dst, src + offset1, stride, 8, x8, y8);
} else {
vp6_filter_diag2(s, dst, src+offset1 + ((mv.x^mv.y)>>31), stride, x8, y8);
}
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 23348a6b3d..111fdedf47 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -51,7 +51,8 @@ YASM-OBJS-$(CONFIG_DIRAC_DECODER) += x86/diracdsp_mmx.o x86/diracdsp_yasm.o
x86/dwt_yasm.o
YASM-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc.o
YASM-OBJS-$(CONFIG_FFT) += x86/fft.o
-YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264_chromamc.o \
+YASM-OBJS-$(CONFIG_H264CHROMA) += x86/h264chroma_init.o \
+ x86/h264_chromamc.o \
x86/h264_chromamc_10bit.o
YASM-OBJS-$(CONFIG_H264DSP) += x86/h264_deblock.o \
x86/h264_deblock_10bit.o \
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
index 75edf4a7fe..b528d4ba95 100644
--- a/libavcodec/x86/dsputil_mmx.c
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -1527,49 +1527,6 @@ void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
-void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
- int stride, int h, int x, int y);
-
-#define CHROMA_MC(OP, NUM, DEPTH, OPT) \
-void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
- (uint8_t *dst, uint8_t *src, \
- int stride, int h, int x, int y);
-
-CHROMA_MC(put, 2, 10, mmxext)
-CHROMA_MC(avg, 2, 10, mmxext)
-CHROMA_MC(put, 4, 10, mmxext)
-CHROMA_MC(avg, 4, 10, mmxext)
-CHROMA_MC(put, 8, 10, sse2)
-CHROMA_MC(avg, 8, 10, sse2)
-CHROMA_MC(put, 8, 10, avx)
-CHROMA_MC(avg, 8, 10, avx)
-
#if HAVE_INLINE_ASM
/* CAVS-specific */
@@ -1859,11 +1816,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_INLINE_ASM */
#if HAVE_YASM
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
- }
-
c->vector_clip_int32 = ff_vector_clip_int32_mmx;
#endif
@@ -1920,19 +1872,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_mmxext;
}
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
- }
- if (bit_depth == 10 && CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
- c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
- }
-
/* slower than cmov version on AMD */
if (!(mm_flags & AV_CPU_FLAG_3DNOW))
c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext;
@@ -1985,11 +1924,6 @@ static av_cold void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx,
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_exact_3dnow;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_exact_3dnow;
}
-
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
- }
#endif /* HAVE_YASM */
}
@@ -2042,13 +1976,6 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx,
}
}
- if (bit_depth == 10) {
- if (CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
- }
- }
-
c->scalarproduct_int16 = ff_scalarproduct_int16_sse2;
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_sse2;
if (mm_flags & AV_CPU_FLAG_ATOM) {
@@ -2069,14 +1996,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx,
int mm_flags)
{
#if HAVE_SSSE3_EXTERNAL
- const int high_bit_depth = avctx->bits_per_raw_sample > 8;
-
- if (!high_bit_depth && CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
- c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
- c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
- }
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
@@ -2099,20 +2018,6 @@ static av_cold void dsputil_init_sse4(DSPContext *c, AVCodecContext *avctx,
#endif /* HAVE_SSE4_EXTERNAL */
}
-static av_cold void dsputil_init_avx(DSPContext *c, AVCodecContext *avctx, int mm_flags)
-{
-#if HAVE_AVX_EXTERNAL
- const int bit_depth = avctx->bits_per_raw_sample;
-
- if (bit_depth == 10) {
- if (CONFIG_H264CHROMA) {
- c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
- c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
- }
- }
-#endif /* HAVE_AVX_EXTERNAL */
-}
-
av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
{
int mm_flags = av_get_cpu_flags();
@@ -2185,9 +2090,6 @@ av_cold void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx)
if (mm_flags & AV_CPU_FLAG_SSE4)
dsputil_init_sse4(c, avctx, mm_flags);
- if (mm_flags & AV_CPU_FLAG_AVX)
- dsputil_init_avx(c, avctx, mm_flags);
-
if (CONFIG_ENCODERS)
ff_dsputilenc_init_mmx(c, avctx);
}
diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c
new file mode 100644
index 0000000000..b9783d6693
--- /dev/null
+++ b/libavcodec/x86/h264chroma_init.c
@@ -0,0 +1,116 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/h264chroma.h"
+
+void ff_put_h264_chroma_mc8_rnd_mmx (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc8_rnd_mmxext(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc8_rnd_3dnow(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc4_mmx (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_mmxext (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_3dnow (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc2_mmxext (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_put_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_put_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+void ff_avg_h264_chroma_mc8_rnd_ssse3(uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+void ff_avg_h264_chroma_mc4_ssse3 (uint8_t *dst, uint8_t *src,
+ int stride, int h, int x, int y);
+
+#define CHROMA_MC(OP, NUM, DEPTH, OPT) \
+void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
+ (uint8_t *dst, uint8_t *src, \
+ int stride, int h, int x, int y);
+
+CHROMA_MC(put, 2, 10, mmxext)
+CHROMA_MC(avg, 2, 10, mmxext)
+CHROMA_MC(put, 4, 10, mmxext)
+CHROMA_MC(avg, 4, 10, mmxext)
+CHROMA_MC(put, 8, 10, sse2)
+CHROMA_MC(avg, 8, 10, sse2)
+CHROMA_MC(put, 8, 10, avx)
+CHROMA_MC(avg, 8, 10, avx)
+
+void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth)
+{
+ int high_bit_depth = bit_depth > 8;
+ int mm_flags = av_get_cpu_flags();
+
+ if (EXTERNAL_MMX(mm_flags) && !high_bit_depth) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_mmx;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_mmx;
+ }
+
+ if (EXTERNAL_AMD3DNOW(mm_flags) && !high_bit_depth) {
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_3dnow;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_3dnow;
+ }
+
+ if (EXTERNAL_MMXEXT(mm_flags) && !high_bit_depth) {
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_mmxext;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_mmxext;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_mmxext;
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext;
+ }
+
+ if (EXTERNAL_MMXEXT(mm_flags) && bit_depth == 10) {
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext;
+ }
+
+ if (EXTERNAL_SSE2(mm_flags) && bit_depth == 10) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2;
+ }
+
+ if (EXTERNAL_SSSE3(mm_flags) && !high_bit_depth) {
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_rnd_ssse3;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_rnd_ssse3;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_ssse3;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3;
+ }
+
+ if (EXTERNAL_AVX(mm_flags) && bit_depth == 10) {
+ // AVX implies !cache64.
+ // TODO: Port cache(32|64) detection from x264.
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_avx;
+ }
+}