diff options
author | James Almer <jamrial@gmail.com> | 2017-10-21 12:28:39 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-10-21 12:28:39 -0300 |
commit | 53eea3a5693af41ccb281fccb5a37905f522b9ad (patch) | |
tree | b720d9a363067e5fc0e7ce5c4842ecc9a062fe17 | |
parent | 2904db90458a1253e4aea6844ba9a59ac11923b6 (diff) | |
parent | 307eb1a8ee363db1fcf869e427a8deb6d9538881 (diff) | |
download | ffmpeg-53eea3a5693af41ccb281fccb5a37905f522b9ad.tar.gz |
Merge commit '307eb1a8ee363db1fcf869e427a8deb6d9538881'
* commit '307eb1a8ee363db1fcf869e427a8deb6d9538881':
x86: vp8dsp: port FILTER_BILINEAR macro to cpuflags
Merged-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 143 |
1 files changed, 66 insertions, 77 deletions
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index e303b80293..75de5690a1 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -664,6 +664,37 @@ INIT_XMM sse2 FILTER_V 8 %macro FILTER_BILINEAR 1 +%if cpuflag(ssse3) +cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my + shl myd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m3, [bilinear_filter_vb+myq-16] +.nextrow: + movh m0, [srcq+srcstrideq*0] + movh m1, [srcq+srcstrideq*1] + movh m2, [srcq+srcstrideq*2] + punpcklbw m0, m1 + punpcklbw m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 %ifdef PIC @@ -701,6 +732,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif +%endif ; cpuflag(ssse3) lea dstq, [dstq+dststrideq*2] lea srcq, [srcq+srcstrideq*2] @@ -708,6 +740,37 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p jg .nextrow REP_RET +%if cpuflag(ssse3) +cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg + shl mxd, 4 +%ifdef PIC + lea picregq, [bilinear_filter_vb_m] +%endif + pxor m4, m4 + mova m2, [filter_h2_shuf] + mova m3, [bilinear_filter_vb+mxq-16] +.nextrow: + movu m0, [srcq+srcstrideq*0] + movu m1, [srcq+srcstrideq*1] + pshufb m0, m2 + pshufb m1, m2 + pmaddubsw m0, m3 + pmaddubsw m1, m3 + psraw m0, 2 + psraw m1, 2 + pavgw m0, m4 + pavgw m1, m4 +%if mmsize==8 + packuswb m0, m0 + packuswb m1, m1 + movh [dstq+dststrideq*0], m0 + movh [dstq+dststrideq*1], m1 +%else + packuswb m0, m1 + movh [dstq+dststrideq*0], m0 + movhps [dstq+dststrideq*1], m0 +%endif +%else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 %ifdef PIC @@ -746,6 +809,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride movh [dstq+dststrideq*0], m0 movhps [dstq+dststrideq*1], m0 %endif +%endif ; cpuflag(ssse3) lea dstq, [dstq+dststrideq*2] lea srcq, [srcq+srcstrideq*2] @@ -758,85 +822,10 @@ INIT_MMX mmxext FILTER_BILINEAR 4 INIT_XMM sse2 FILTER_BILINEAR 8 - -%macro FILTER_BILINEAR_SSSE3 1 -cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my - shl myd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m3, [bilinear_filter_vb+myq-16] -.nextrow: - movh m0, [srcq+srcstrideq*0] - movh m1, [srcq+srcstrideq*1] - movh m2, [srcq+srcstrideq*2] - punpcklbw m0, m1 - punpcklbw m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif - - lea dstq, [dstq+dststrideq*2] - lea srcq, [srcq+srcstrideq*2] - sub heightd, 2 - jg .nextrow - REP_RET - -cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg - shl mxd, 4 -%ifdef PIC - lea picregq, [bilinear_filter_vb_m] -%endif - pxor m4, m4 - mova m2, [filter_h2_shuf] - mova m3, [bilinear_filter_vb+mxq-16] -.nextrow: - movu m0, [srcq+srcstrideq*0] - movu m1, [srcq+srcstrideq*1] - pshufb m0, m2 - pshufb m1, m2 - pmaddubsw m0, m3 - pmaddubsw m1, m3 - psraw m0, 2 - psraw m1, 2 - pavgw m0, m4 - pavgw m1, m4 -%if mmsize==8 - packuswb m0, m0 - packuswb m1, m1 - movh [dstq+dststrideq*0], m0 - movh [dstq+dststrideq*1], m1 -%else - packuswb m0, m1 - movh [dstq+dststrideq*0], m0 - movhps [dstq+dststrideq*1], m0 -%endif - - lea dstq, [dstq+dststrideq*2] - lea srcq, [srcq+srcstrideq*2] - sub heightd, 2 - jg .nextrow - REP_RET -%endmacro - INIT_MMX ssse3 -FILTER_BILINEAR_SSSE3 4 +FILTER_BILINEAR 4 INIT_XMM ssse3 -FILTER_BILINEAR_SSSE3 8 +FILTER_BILINEAR 8 INIT_MMX mmx cglobal put_vp8_pixels8, 5, 5, 0, dst, dststride, src, srcstride, height |