diff options
author | James Almer <jamrial@gmail.com> | 2017-01-08 11:48:05 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-01-12 22:53:05 -0300 |
commit | 47f212329e5d73c81e2c67acd6a481bc0fe687b2 (patch) | |
tree | 5457e6f0cad40c63b67f86331659f880b4e576b8 /libavcodec | |
parent | cf9ef839606dd50f779c395d8a277de143f7e5b2 (diff) | |
download | ffmpeg-47f212329e5d73c81e2c67acd6a481bc0fe687b2.tar.gz |
huffyuvdsp: move functions only used by huffyuv from lossless_videodsp
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/huffyuvdec.c | 8 | ||||
-rw-r--r-- | libavcodec/huffyuvdsp.c | 36 | ||||
-rw-r--r-- | libavcodec/huffyuvdsp.h | 13 | ||||
-rw-r--r-- | libavcodec/lagarith.c | 2 | ||||
-rw-r--r-- | libavcodec/lossless_videodsp.c | 36 | ||||
-rw-r--r-- | libavcodec/lossless_videodsp.h | 9 | ||||
-rw-r--r-- | libavcodec/magicyuv.c | 2 | ||||
-rw-r--r-- | libavcodec/ppc/lossless_videodsp_altivec.c | 2 | ||||
-rw-r--r-- | libavcodec/utvideodec.c | 2 | ||||
-rw-r--r-- | libavcodec/vble.c | 2 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp.asm | 137 | ||||
-rw-r--r-- | libavcodec/x86/huffyuvdsp_init.c | 13 | ||||
-rw-r--r-- | libavcodec/x86/lossless_videodsp.asm | 136 | ||||
-rw-r--r-- | libavcodec/x86/lossless_videodsp_init.c | 14 |
14 files changed, 208 insertions, 204 deletions
diff --git a/libavcodec/huffyuvdec.c b/libavcodec/huffyuvdec.c index d0682040b3..c79dda4c90 100644 --- a/libavcodec/huffyuvdec.c +++ b/libavcodec/huffyuvdec.c @@ -297,8 +297,8 @@ static av_cold int decode_init(AVCodecContext *avctx) if (ret < 0) return ret; - ff_huffyuvdsp_init(&s->hdsp); - ff_llviddsp_init(&s->llviddsp, avctx); + ff_huffyuvdsp_init(&s->hdsp, avctx); + ff_llviddsp_init(&s->llviddsp); memset(s->vlc, 0, 4 * sizeof(VLC)); s->interlaced = avctx->height > 288; @@ -891,7 +891,7 @@ static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w) if (s->bps <= 8) { s->llviddsp.add_bytes(dst, src, w); } else { - s->llviddsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w); + s->hdsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w); } } @@ -900,7 +900,7 @@ static void add_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *s if (s->bps <= 8) { s->llviddsp.add_median_pred(dst, src, diff, w, left, left_top); } else { - s->llviddsp.add_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src, (const uint16_t *)diff, s->n-1, w, left, left_top); + s->hdsp.add_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src, (const uint16_t *)diff, s->n-1, w, left, left_top); } } static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, diff --git a/libavcodec/huffyuvdsp.c b/libavcodec/huffyuvdsp.c index 2adfc19ff3..759ffda0b8 100644 --- a/libavcodec/huffyuvdsp.c +++ b/libavcodec/huffyuvdsp.c @@ -23,6 +23,36 @@ #include "mathops.h" #include "huffyuvdsp.h" +static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){ + long i; + unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; + unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL; + for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { + long a = *(long*)(src+i); + long b = *(long*)(dst+i); + *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb); + } + for(; i<w; i++) + dst[i] = (dst[i] + src[i]) & mask; +} + +static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top){ + int i; + uint16_t l, lt; + + l = *left; + lt = *left_top; + + for(i=0; i<w; i++){ + l = (mid_pred(l, src[i], (l + src[i] - lt) & mask) + diff[i]) & mask; + lt = src[i]; + dst[i] = l; + } + + *left = l; + *left_top = lt; +} + static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left) { @@ -47,10 +77,12 @@ static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src, left[A] = a; } -av_cold void ff_huffyuvdsp_init(HuffYUVDSPContext *c) +av_cold void ff_huffyuvdsp_init(HuffYUVDSPContext *c, AVCodecContext *avctx) { + c->add_int16 = add_int16_c; + c->add_hfyu_median_pred_int16 = add_hfyu_median_pred_int16_c; c->add_hfyu_left_pred_bgr32 = add_hfyu_left_pred_bgr32_c; if (ARCH_X86) - ff_huffyuvdsp_init_x86(c); + ff_huffyuvdsp_init_x86(c, avctx); } diff --git a/libavcodec/huffyuvdsp.h b/libavcodec/huffyuvdsp.h index eaad1affaf..7680f2ec9c 100644 --- a/libavcodec/huffyuvdsp.h +++ b/libavcodec/huffyuvdsp.h @@ -21,6 +21,7 @@ #include <stdint.h> #include "config.h" +#include "avcodec.h" #if HAVE_BIGENDIAN #define B 3 @@ -35,12 +36,18 @@ #endif typedef struct HuffYUVDSPContext { + void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, + unsigned mask, int w); + + void (*add_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *top, + const uint16_t *diff, unsigned mask, + int w, int *left, int *left_top); void (*add_hfyu_left_pred_bgr32)(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); } HuffYUVDSPContext; -void ff_huffyuvdsp_init(HuffYUVDSPContext *c); -void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c); -void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c); +void ff_huffyuvdsp_init(HuffYUVDSPContext *c, AVCodecContext *avctx); +void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c, AVCodecContext *avctx); +void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c, AVCodecContext *avctx); #endif /* AVCODEC_HUFFYUVDSP_H */ diff --git a/libavcodec/lagarith.c b/libavcodec/lagarith.c index 96a4b5c653..f03305fdd4 100644 --- a/libavcodec/lagarith.c +++ b/libavcodec/lagarith.c @@ -725,7 +725,7 @@ static av_cold int lag_decode_init(AVCodecContext *avctx) LagarithContext *l = avctx->priv_data; l->avctx = avctx; - ff_llviddsp_init(&l->llviddsp, avctx); + ff_llviddsp_init(&l->llviddsp); return 0; } diff --git a/libavcodec/lossless_videodsp.c b/libavcodec/lossless_videodsp.c index b93d4e7214..54ce677dd8 100644 --- a/libavcodec/lossless_videodsp.c +++ b/libavcodec/lossless_videodsp.c @@ -79,36 +79,6 @@ static int add_left_pred_c(uint8_t *dst, const uint8_t *src, intptr_t w, return acc; } -static void add_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w){ - long i; - unsigned long pw_lsb = (mask >> 1) * 0x0001000100010001ULL; - unsigned long pw_msb = pw_lsb + 0x0001000100010001ULL; - for (i = 0; i <= w - (int)sizeof(long)/2; i += sizeof(long)/2) { - long a = *(long*)(src+i); - long b = *(long*)(dst+i); - *(long*)(dst+i) = ((a&pw_lsb) + (b&pw_lsb)) ^ ((a^b)&pw_msb); - } - for(; i<w; i++) - dst[i] = (dst[i] + src[i]) & mask; -} - -static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top){ - int i; - uint16_t l, lt; - - l = *left; - lt = *left_top; - - for(i=0; i<w; i++){ - l = (mid_pred(l, src[i], (l + src[i] - lt) & mask) + diff[i]) & mask; - lt = src[i]; - dst[i] = l; - } - - *left = l; - *left_top = lt; -} - static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc){ int i; @@ -129,16 +99,14 @@ static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsign } -void ff_llviddsp_init(LLVidDSPContext *c, AVCodecContext *avctx) +void ff_llviddsp_init(LLVidDSPContext *c) { c->add_bytes = add_bytes_c; c->add_median_pred = add_median_pred_c; c->add_left_pred = add_left_pred_c; - c->add_int16 = add_int16_c; c->add_hfyu_left_pred_int16 = add_hfyu_left_pred_int16_c; - c->add_hfyu_median_pred_int16 = add_hfyu_median_pred_int16_c; if (ARCH_X86) - ff_llviddsp_init_x86(c, avctx); + ff_llviddsp_init_x86(c); } diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h index 7f3168339f..b8105d3815 100644 --- a/libavcodec/lossless_videodsp.h +++ b/libavcodec/lossless_videodsp.h @@ -34,14 +34,11 @@ typedef struct LLVidDSPContext { int (*add_left_pred)(uint8_t *dst, const uint8_t *src, intptr_t w, int left); - void (*add_int16)(uint16_t *dst/*align 16*/, const uint16_t *src/*align 16*/, unsigned mask, int w); - - void (*add_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); int (*add_hfyu_left_pred_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned left); } LLVidDSPContext; -void ff_llviddsp_init(LLVidDSPContext *llviddsp, AVCodecContext *avctx); -void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp, AVCodecContext *avctx); -void ff_llviddsp_init_ppc(LLVidDSPContext *llviddsp, AVCodecContext *avctx); +void ff_llviddsp_init(LLVidDSPContext *llviddsp); +void ff_llviddsp_init_x86(LLVidDSPContext *llviddsp); +void ff_llviddsp_init_ppc(LLVidDSPContext *llviddsp); #endif //AVCODEC_LOSSLESS_VIDEODSP_H diff --git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c index 4e78ff1e21..ac0737caa4 100644 --- a/libavcodec/magicyuv.c +++ b/libavcodec/magicyuv.c @@ -697,7 +697,7 @@ static int magy_init_thread_copy(AVCodecContext *avctx) static av_cold int magy_decode_init(AVCodecContext *avctx) { MagicYUVContext *s = avctx->priv_data; - ff_llviddsp_init(&s->llviddsp, avctx); + ff_llviddsp_init(&s->llviddsp); return 0; } diff --git a/libavcodec/ppc/lossless_videodsp_altivec.c b/libavcodec/ppc/lossless_videodsp_altivec.c index e17abaa8f9..c388dc33af 100644 --- a/libavcodec/ppc/lossless_videodsp_altivec.c +++ b/libavcodec/ppc/lossless_videodsp_altivec.c @@ -51,7 +51,7 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, intptr_t w) } #endif /* HAVE_ALTIVEC */ -av_cold void ff_llviddsp_init_ppc(LLVidDSPContext *c, AVCodecContext *avctx) +av_cold void ff_llviddsp_init_ppc(LLVidDSPContext *c) { #if HAVE_ALTIVEC if (!PPC_ALTIVEC(av_get_cpu_flags())) diff --git a/libavcodec/utvideodec.c b/libavcodec/utvideodec.c index 7d1d35b76a..38de2c8cc5 100644 --- a/libavcodec/utvideodec.c +++ b/libavcodec/utvideodec.c @@ -827,7 +827,7 @@ static av_cold int decode_init(AVCodecContext *avctx) c->avctx = avctx; ff_bswapdsp_init(&c->bdsp); - ff_llviddsp_init(&c->llviddsp, avctx); + ff_llviddsp_init(&c->llviddsp); if (avctx->extradata_size >= 16) { av_log(avctx, AV_LOG_DEBUG, "Encoder version %d.%d.%d.%d\n", diff --git a/libavcodec/vble.c b/libavcodec/vble.c index 7598d306cc..4a07ab3fa5 100644 --- a/libavcodec/vble.c +++ b/libavcodec/vble.c @@ -185,7 +185,7 @@ static av_cold int vble_decode_init(AVCodecContext *avctx) /* Stash for later use */ ctx->avctx = avctx; - ff_llviddsp_init(&ctx->llviddsp, avctx); + ff_llviddsp_init(&ctx->llviddsp); avctx->pix_fmt = AV_PIX_FMT_YUV420P; avctx->bits_per_raw_sample = 8; diff --git a/libavcodec/x86/huffyuvdsp.asm b/libavcodec/x86/huffyuvdsp.asm index 0befd3baa8..0d8cae354a 100644 --- a/libavcodec/x86/huffyuvdsp.asm +++ b/libavcodec/x86/huffyuvdsp.asm @@ -24,6 +24,78 @@ SECTION .text + +%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub + movd m4, maskd + SPLATW m4, m4 + add wd, wd + test wq, 2*mmsize - 1 + jz %%.tomainloop + push tmpq +%%.wordloop: + sub wq, 2 +%ifidn %2, add + mov tmpw, [srcq+wq] + add tmpw, [dstq+wq] +%else + mov tmpw, [src1q+wq] + sub tmpw, [src2q+wq] +%endif + and tmpw, maskw + mov [dstq+wq], tmpw + test wq, 2*mmsize - 1 + jnz %%.wordloop + pop tmpq +%%.tomainloop: +%ifidn %2, add + add srcq, wq +%else + add src1q, wq + add src2q, wq +%endif + add dstq, wq + neg wq + jz %%.end +%%.loop: +%ifidn %2, add + mov%1 m0, [srcq+wq] + mov%1 m1, [dstq+wq] + mov%1 m2, [srcq+wq+mmsize] + mov%1 m3, [dstq+wq+mmsize] +%else + mov%1 m0, [src1q+wq] + mov%1 m1, [src2q+wq] + mov%1 m2, [src1q+wq+mmsize] + mov%1 m3, [src2q+wq+mmsize] +%endif + p%2w m0, m1 + p%2w m2, m3 + pand m0, m4 + pand m2, m4 + mov%1 [dstq+wq] , m0 + mov%1 [dstq+wq+mmsize], m2 + add wq, 2*mmsize + jl %%.loop +%%.end: + RET +%endmacro + +%if ARCH_X86_32 +INIT_MMX mmx +cglobal add_int16, 4,4,5, dst, src, mask, w, tmp + INT16_LOOP a, add +%endif + +INIT_XMM sse2 +cglobal add_int16, 4,4,5, dst, src, mask, w, tmp + test srcq, mmsize-1 + jnz .unaligned + test dstq, mmsize-1 + jnz .unaligned + INT16_LOOP a, add +.unaligned: + INT16_LOOP u, add + ; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src, ; intptr_t w, uint8_t *left) %macro LEFT_BGR32 0 @@ -63,3 +135,68 @@ LEFT_BGR32 %endif INIT_XMM sse2 LEFT_BGR32 + +; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) +INIT_MMX mmxext +cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top + add wd, wd + movd mm6, maskd + SPLATW mm6, mm6 + movq mm0, [topq] + movq mm2, mm0 + movd mm4, [left_topq] + psllq mm2, 16 + movq mm1, mm0 + por mm4, mm2 + movd mm3, [leftq] + psubw mm0, mm4 ; t-tl + add dstq, wq + add topq, wq + add diffq, wq + neg wq + jmp .skip +.loop: + movq mm4, [topq+wq] + movq mm0, mm4 + psllq mm4, 16 + por mm4, mm1 + movq mm1, mm0 ; t + psubw mm0, mm4 ; t-tl +.skip: + movq mm2, [diffq+wq] +%assign i 0 +%rep 4 + movq mm4, mm0 + paddw mm4, mm3 ; t-tl+l + pand mm4, mm6 + movq mm5, mm3 + pmaxsw mm3, mm1 + pminsw mm5, mm1 + pminsw mm3, mm4 + pmaxsw mm3, mm5 ; median + paddw mm3, mm2 ; +residual + pand mm3, mm6 +%if i==0 + movq mm7, mm3 + psllq mm7, 48 +%else + movq mm4, mm3 + psrlq mm7, 16 + psllq mm4, 48 + por mm7, mm4 +%endif +%if i<3 + psrlq mm0, 16 + psrlq mm1, 16 + psrlq mm2, 16 +%endif +%assign i i+1 +%endrep + movq [dstq+wq], mm7 + add wq, 8 + jl .loop + movzx r2d, word [dstq-2] + mov [leftq], r2d + movzx r2d, word [topq-2] + mov [left_topq], r2d + RET diff --git a/libavcodec/x86/huffyuvdsp_init.c b/libavcodec/x86/huffyuvdsp_init.c index fc87c3844b..f72d759ef2 100644 --- a/libavcodec/x86/huffyuvdsp_init.c +++ b/libavcodec/x86/huffyuvdsp_init.c @@ -21,24 +21,35 @@ #include "config.h" #include "libavutil/attributes.h" #include "libavutil/cpu.h" +#include "libavutil/pixdesc.h" #include "libavutil/x86/asm.h" #include "libavutil/x86/cpu.h" #include "libavcodec/huffyuvdsp.h" +void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); +void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_hfyu_left_pred_bgr32_mmx(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); void ff_add_hfyu_left_pred_bgr32_sse2(uint8_t *dst, const uint8_t *src, intptr_t w, uint8_t *left); +void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); -av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) +av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c, AVCodecContext *avctx) { int cpu_flags = av_get_cpu_flags(); + const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_mmx; + c->add_int16 = ff_add_int16_mmx; + } + + if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { + c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; } if (EXTERNAL_SSE2(cpu_flags)) { + c->add_int16 = ff_add_int16_sse2; c->add_hfyu_left_pred_bgr32 = ff_add_hfyu_left_pred_bgr32_sse2; } } diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index bcc40ec061..f82f04f7fa 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -217,77 +217,6 @@ ADD_BYTES INIT_XMM sse2 ADD_BYTES -%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub - movd m4, maskd - SPLATW m4, m4 - add wd, wd - test wq, 2*mmsize - 1 - jz %%.tomainloop - push tmpq -%%.wordloop: - sub wq, 2 -%ifidn %2, add - mov tmpw, [srcq+wq] - add tmpw, [dstq+wq] -%else - mov tmpw, [src1q+wq] - sub tmpw, [src2q+wq] -%endif - and tmpw, maskw - mov [dstq+wq], tmpw - test wq, 2*mmsize - 1 - jnz %%.wordloop - pop tmpq -%%.tomainloop: -%ifidn %2, add - add srcq, wq -%else - add src1q, wq - add src2q, wq -%endif - add dstq, wq - neg wq - jz %%.end -%%.loop: -%ifidn %2, add - mov%1 m0, [srcq+wq] - mov%1 m1, [dstq+wq] - mov%1 m2, [srcq+wq+mmsize] - mov%1 m3, [dstq+wq+mmsize] -%else - mov%1 m0, [src1q+wq] - mov%1 m1, [src2q+wq] - mov%1 m2, [src1q+wq+mmsize] - mov%1 m3, [src2q+wq+mmsize] -%endif - p%2w m0, m1 - p%2w m2, m3 - pand m0, m4 - pand m2, m4 - mov%1 [dstq+wq] , m0 - mov%1 [dstq+wq+mmsize], m2 - add wq, 2*mmsize - jl %%.loop -%%.end: - RET -%endmacro - -%if ARCH_X86_32 -INIT_MMX mmx -cglobal add_int16, 4,4,5, dst, src, mask, w, tmp - INT16_LOOP a, add -%endif - -INIT_XMM sse2 -cglobal add_int16, 4,4,5, dst, src, mask, w, tmp - test srcq, mmsize-1 - jnz .unaligned - test dstq, mmsize-1 - jnz .unaligned - INT16_LOOP a, add -.unaligned: - INT16_LOOP u, add - %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u) add wd, wd add srcq, wq @@ -359,68 +288,3 @@ cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left ADD_HFYU_LEFT_LOOP_INT16 u, a .src_unaligned: ADD_HFYU_LEFT_LOOP_INT16 u, u - -; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top) -INIT_MMX mmxext -cglobal add_hfyu_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top - add wd, wd - movd mm6, maskd - SPLATW mm6, mm6 - movq mm0, [topq] - movq mm2, mm0 - movd mm4, [left_topq] - psllq mm2, 16 - movq mm1, mm0 - por mm4, mm2 - movd mm3, [leftq] - psubw mm0, mm4 ; t-tl - add dstq, wq - add topq, wq - add diffq, wq - neg wq - jmp .skip -.loop: - movq mm4, [topq+wq] - movq mm0, mm4 - psllq mm4, 16 - por mm4, mm1 - movq mm1, mm0 ; t - psubw mm0, mm4 ; t-tl -.skip: - movq mm2, [diffq+wq] -%assign i 0 -%rep 4 - movq mm4, mm0 - paddw mm4, mm3 ; t-tl+l - pand mm4, mm6 - movq mm5, mm3 - pmaxsw mm3, mm1 - pminsw mm5, mm1 - pminsw mm3, mm4 - pmaxsw mm3, mm5 ; median - paddw mm3, mm2 ; +residual - pand mm3, mm6 -%if i==0 - movq mm7, mm3 - psllq mm7, 48 -%else - movq mm4, mm3 - psrlq mm7, 16 - psllq mm4, 48 - por mm7, mm4 -%endif -%if i<3 - psrlq mm0, 16 - psrlq mm1, 16 - psrlq mm2, 16 -%endif -%assign i i+1 -%endrep - movq [dstq+wq], mm7 - add wq, 8 - jl .loop - movzx r2d, word [dstq-2] - mov [leftq], r2d - movzx r2d, word [topq-2] - mov [left_topq], r2d - RET diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 2dc662d8b1..dbb63a1f48 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -21,7 +21,6 @@ #include "config.h" #include "libavutil/x86/asm.h" #include "../lossless_videodsp.h" -#include "libavutil/pixdesc.h" #include "libavutil/x86/cpu.h" void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, intptr_t w); @@ -39,11 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src, int ff_add_left_pred_sse4(uint8_t *dst, const uint8_t *src, intptr_t w, int left); -void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); -void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); -void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, @@ -83,10 +79,9 @@ static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top, } #endif -void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) +void ff_llviddsp_init_x86(LLVidDSPContext *c) { int cpu_flags = av_get_cpu_flags(); - const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); #if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32 if (cpu_flags & AV_CPU_FLAG_CMOV) @@ -95,7 +90,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { c->add_bytes = ff_add_bytes_mmx; - c->add_int16 = ff_add_int16_mmx; } if (ARCH_X86_32 && EXTERNAL_MMXEXT(cpu_flags)) { @@ -104,15 +98,9 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) c->add_median_pred = ff_add_median_pred_mmxext; } - if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { - c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; - } - if (EXTERNAL_SSE2(cpu_flags)) { c->add_bytes = ff_add_bytes_sse2; c->add_median_pred = ff_add_median_pred_sse2; - - c->add_int16 = ff_add_int16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { |