diff options
author | Johann <johannkoenig@google.com> | 2017-02-02 15:28:16 -0800 |
---|---|---|
committer | Johann Koenig <johannkoenig@google.com> | 2017-02-07 15:03:28 +0000 |
commit | 537949a9df4c06f0f6f8ee087b917c5fdde6155c (patch) | |
tree | e8b8f1b61b3f370582d4368631a39c12d45fc84c | |
parent | 85f3a82355a16ded505c8c50d85bfca0f55782c7 (diff) | |
download | libvpx-537949a9df4c06f0f6f8ee087b917c5fdde6155c.tar.gz |
block_error_fp highbd sse2: use tran_low_t for coeff
BUG=webm:1365
Change-Id: Id2ed3ebaaaa6a4b68628c23e08b64ea5f1341761
-rw-r--r-- | test/avg_test.cc | 4 | ||||
-rw-r--r-- | vp9/common/vp9_rtcd_defs.pl | 2 | ||||
-rw-r--r-- | vp9/encoder/x86/vp9_error_sse2.asm | 26 | ||||
-rw-r--r-- | vp9/vp9cx.mk | 3 |
4 files changed, 17 insertions, 18 deletions
diff --git a/test/avg_test.cc b/test/avg_test.cc index f634c7a26..612aff018 100644 --- a/test/avg_test.cc +++ b/test/avg_test.cc @@ -446,16 +446,12 @@ INSTANTIATE_TEST_CASE_P(SSE2, SatdTest, make_tuple(256, &vpx_satd_sse2), make_tuple(1024, &vpx_satd_sse2))); -// TODO(jianj): Remove the highbitdepth flag once the SIMD functions are -// in place. -#if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( SSE2, BlockErrorTest, ::testing::Values(make_tuple(16, &vp9_block_error_fp_sse2), make_tuple(64, &vp9_block_error_fp_sse2), make_tuple(256, &vp9_block_error_fp_sse2), make_tuple(1024, &vp9_block_error_fp_sse2))); -#endif // !CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 #if HAVE_NEON diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl index ecdce7c34..87aaecb23 100644 --- a/vp9/common/vp9_rtcd_defs.pl +++ b/vp9/common/vp9_rtcd_defs.pl @@ -133,7 +133,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") { specialize qw/vp9_highbd_block_error_8bit sse2 avx/; add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size"; - specialize qw/vp9_block_error_fp/; + specialize qw/vp9_block_error_fp sse2/; add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan"; diff --git a/vp9/encoder/x86/vp9_error_sse2.asm b/vp9/encoder/x86/vp9_error_sse2.asm index 5b0238272..dcedf913d 100644 --- a/vp9/encoder/x86/vp9_error_sse2.asm +++ b/vp9/encoder/x86/vp9_error_sse2.asm @@ -11,9 +11,12 @@ %define private_prefix vp9 %include "third_party/x86inc/x86inc.asm" +%include "vpx_dsp/x86/bitdepth_conversion_sse2.asm" SECTION .text +%if CONFIG_VP9_HIGHBITDEPTH +%else ; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size, ; int64_t *ssz) @@ -74,23 +77,25 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz movd edx, m5 %endif RET +%endif ; CONFIG_VP9_HIGHBITDEPTH -; Compute the sum of squared difference between two int16_t vectors. -; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff, +; Compute the sum of squared difference between two tran_low_t vectors. +; Vectors are converted (if necessary) to int16_t for calculations. +; int64_t vp9_block_error_fp(tran_low_t *coeff, tran_low_t *dqcoeff, ; intptr_t block_size) INIT_XMM sse2 cglobal block_error_fp, 3, 3, 6, uqc, dqc, size pxor m4, m4 ; sse accumulator pxor m5, m5 ; dedicated zero register - lea uqcq, [uqcq+sizeq*2] - lea dqcq, [dqcq+sizeq*2] - neg sizeq .loop: - mova m2, [uqcq+sizeq*2] - mova m0, [dqcq+sizeq*2] - mova m3, [uqcq+sizeq*2+mmsize] - mova m1, [dqcq+sizeq*2+mmsize] + LOAD_TRAN_LOW 2, uqcq, 0 + LOAD_TRAN_LOW 0, dqcq, 0 + LOAD_TRAN_LOW 3, uqcq, 1 + LOAD_TRAN_LOW 1, dqcq, 1 + INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16 + INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16 + sub sizeq, 16 psubw m0, m2 psubw m1, m3 ; individual errors are max. 15bit+sign, so squares are 30bit, and @@ -106,8 +111,7 @@ cglobal block_error_fp, 3, 3, 6, uqc, dqc, size punpckhdq m1, m5 paddq m4, m3 paddq m4, m1 - add sizeq, mmsize - jl .loop + jnz .loop ; accumulate horizontally and store in return value movhlps m5, m4 diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk index 87d9a775b..ad33fa1b4 100644 --- a/vp9/vp9cx.mk +++ b/vp9/vp9cx.mk @@ -107,11 +107,10 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm +VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm -else -VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm endif ifeq ($(ARCH_X86_64),yes) |