summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohann <johannkoenig@google.com>2017-02-02 15:28:16 -0800
committerJohann Koenig <johannkoenig@google.com>2017-02-07 15:03:28 +0000
commit537949a9df4c06f0f6f8ee087b917c5fdde6155c (patch)
treee8b8f1b61b3f370582d4368631a39c12d45fc84c
parent85f3a82355a16ded505c8c50d85bfca0f55782c7 (diff)
downloadlibvpx-537949a9df4c06f0f6f8ee087b917c5fdde6155c.tar.gz
block_error_fp highbd sse2: use tran_low_t for coeff
BUG=webm:1365 Change-Id: Id2ed3ebaaaa6a4b68628c23e08b64ea5f1341761
-rw-r--r--test/avg_test.cc4
-rw-r--r--vp9/common/vp9_rtcd_defs.pl2
-rw-r--r--vp9/encoder/x86/vp9_error_sse2.asm26
-rw-r--r--vp9/vp9cx.mk3
4 files changed, 17 insertions, 18 deletions
diff --git a/test/avg_test.cc b/test/avg_test.cc
index f634c7a26..612aff018 100644
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -446,16 +446,12 @@ INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
make_tuple(256, &vpx_satd_sse2),
make_tuple(1024, &vpx_satd_sse2)));
-// TODO(jianj): Remove the highbitdepth flag once the SIMD functions are
-// in place.
-#if !CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
SSE2, BlockErrorTest,
::testing::Values(make_tuple(16, &vp9_block_error_fp_sse2),
make_tuple(64, &vp9_block_error_fp_sse2),
make_tuple(256, &vp9_block_error_fp_sse2),
make_tuple(1024, &vp9_block_error_fp_sse2)));
-#endif // !CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_SSE2
#if HAVE_NEON
diff --git a/vp9/common/vp9_rtcd_defs.pl b/vp9/common/vp9_rtcd_defs.pl
index ecdce7c34..87aaecb23 100644
--- a/vp9/common/vp9_rtcd_defs.pl
+++ b/vp9/common/vp9_rtcd_defs.pl
@@ -133,7 +133,7 @@ if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {
specialize qw/vp9_highbd_block_error_8bit sse2 avx/;
add_proto qw/int64_t vp9_block_error_fp/, "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size";
- specialize qw/vp9_block_error_fp/;
+ specialize qw/vp9_block_error_fp sse2/;
add_proto qw/void vp9_quantize_fp/, "const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan";
diff --git a/vp9/encoder/x86/vp9_error_sse2.asm b/vp9/encoder/x86/vp9_error_sse2.asm
index 5b0238272..dcedf913d 100644
--- a/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/vp9/encoder/x86/vp9_error_sse2.asm
@@ -11,9 +11,12 @@
%define private_prefix vp9
%include "third_party/x86inc/x86inc.asm"
+%include "vpx_dsp/x86/bitdepth_conversion_sse2.asm"
SECTION .text
+%if CONFIG_VP9_HIGHBITDEPTH
+%else
; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size,
; int64_t *ssz)
@@ -74,23 +77,25 @@ cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz
movd edx, m5
%endif
RET
+%endif ; CONFIG_VP9_HIGHBITDEPTH
-; Compute the sum of squared difference between two int16_t vectors.
-; int64_t vp9_block_error_fp(int16_t *coeff, int16_t *dqcoeff,
+; Compute the sum of squared difference between two tran_low_t vectors.
+; Vectors are converted (if necessary) to int16_t for calculations.
+; int64_t vp9_block_error_fp(tran_low_t *coeff, tran_low_t *dqcoeff,
; intptr_t block_size)
INIT_XMM sse2
cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
pxor m4, m4 ; sse accumulator
pxor m5, m5 ; dedicated zero register
- lea uqcq, [uqcq+sizeq*2]
- lea dqcq, [dqcq+sizeq*2]
- neg sizeq
.loop:
- mova m2, [uqcq+sizeq*2]
- mova m0, [dqcq+sizeq*2]
- mova m3, [uqcq+sizeq*2+mmsize]
- mova m1, [dqcq+sizeq*2+mmsize]
+ LOAD_TRAN_LOW 2, uqcq, 0
+ LOAD_TRAN_LOW 0, dqcq, 0
+ LOAD_TRAN_LOW 3, uqcq, 1
+ LOAD_TRAN_LOW 1, dqcq, 1
+ INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16
+ INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16
+ sub sizeq, 16
psubw m0, m2
psubw m1, m3
; individual errors are max. 15bit+sign, so squares are 30bit, and
@@ -106,8 +111,7 @@ cglobal block_error_fp, 3, 3, 6, uqc, dqc, size
punpckhdq m1, m5
paddq m4, m3
paddq m4, m1
- add sizeq, mmsize
- jl .loop
+ jnz .loop
; accumulate horizontally and store in return value
movhlps m5, m4
diff --git a/vp9/vp9cx.mk b/vp9/vp9cx.mk
index 87d9a775b..ad33fa1b4 100644
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -107,11 +107,10 @@ VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c
endif
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
+VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_error_sse2.asm
VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_highbd_error_avx.asm
-else
-VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
endif
ifeq ($(ARCH_X86_64),yes)