summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohann <johann.koenig@duck.com>2018-11-13 14:25:59 -0800
committerPascal Massimino <skal@google.com>2018-12-03 22:59:12 +0000
commit5173d4ee6f51cf01058f141c291fba39a5520aed (patch)
tree73e03f328b0785633080ee67da323fb7db78b894
parent5b081219c9d6311a383af83182d6db43cef3cbcf (diff)
downloadlibwebp-5173d4ee6f51cf01058f141c291fba39a5520aed.tar.gz
neon IsFlat
Move IsFlat to its own header. This allows it to continue to be inlined. Using the RTCD and creating a distinct function slows down arm builds. flower mug C 3.59 2.12 NEON 3.47 2.01 BUG=b/118740850 Change-Id: Id77e8f76d9e9790c498806e7070bbe37c10bc2e9
-rw-r--r--src/dsp/Makefile.am1
-rw-r--r--src/dsp/quant.h70
-rw-r--r--src/enc/quant_enc.c15
3 files changed, 72 insertions, 14 deletions
diff --git a/src/dsp/Makefile.am b/src/dsp/Makefile.am
index 8cd3e53c..9f67f5b3 100644
--- a/src/dsp/Makefile.am
+++ b/src/dsp/Makefile.am
@@ -40,6 +40,7 @@ ENC_SOURCES =
ENC_SOURCES += cost.c
ENC_SOURCES += enc.c
ENC_SOURCES += lossless_enc.c
+ENC_SOURCES += quant.h
ENC_SOURCES += ssim.c
libwebpdspdecode_sse41_la_SOURCES =
diff --git a/src/dsp/quant.h b/src/dsp/quant.h
new file mode 100644
index 00000000..5ba6f9c3
--- /dev/null
+++ b/src/dsp/quant.h
@@ -0,0 +1,70 @@
+// Copyright 2018 Google Inc. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the COPYING file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS. All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+// -----------------------------------------------------------------------------
+
+#ifndef WEBP_DSP_QUANT_H_
+#define WEBP_DSP_QUANT_H_
+
+#include "src/dsp/dsp.h"
+#include "src/webp/types.h"
+
+#if defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) && \
+ !defined(WEBP_HAVE_NEON_RTCD)
+#include <arm_neon.h>
+
+#define IsFlat IsFlat_NEON
+
+static uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
+ const uint64x2_t b = vpaddlq_u32(a);
+ return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+ vreinterpret_u32_u64(vget_high_u64(b)));
+}
+
+static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
+ int thresh) {
+ const int16x8_t tst_ones = vdupq_n_s16(-1);
+ uint32x4_t sum = vdupq_n_u32(0);
+
+ for (int i = 0; i < num_blocks; ++i) {
+ // Set DC to zero.
+ const int16x8_t a_0 = vsetq_lane_s16(0, vld1q_s16(levels), 0);
+ const int16x8_t a_1 = vld1q_s16(levels + 8);
+
+ const uint16x8_t b_0 = vshrq_n_u16(vtstq_s16(a_0, tst_ones), 15);
+ const uint16x8_t b_1 = vshrq_n_u16(vtstq_s16(a_1, tst_ones), 15);
+
+ sum = vpadalq_u16(sum, b_0);
+ sum = vpadalq_u16(sum, b_1);
+
+ levels += 16;
+ }
+ return thresh >= (int32_t)vget_lane_u32(horizontal_add_uint32x4(sum), 0);
+}
+
+#else
+
+#define IsFlat IsFlat_C
+
+static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
+ int thresh) {
+ int score = 0;
+ while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
+ int i;
+ for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
+ score += (levels[i] != 0);
+ if (score > thresh) return 0;
+ }
+ levels += 16;
+ }
+ return 1;
+}
+
+#endif // defined(WEBP_USE_NEON) && !defined(WEBP_ANDROID_NEON) &&
+ // !defined(WEBP_HAVE_NEON_RTCD)
+
+#endif // WEBP_DSP_QUANT_H_
diff --git a/src/enc/quant_enc.c b/src/enc/quant_enc.c
index a02ff405..03c682e3 100644
--- a/src/enc/quant_enc.c
+++ b/src/enc/quant_enc.c
@@ -15,6 +15,7 @@
#include <math.h>
#include <stdlib.h> // for abs()
+#include "src/dsp/quant.h"
#include "src/enc/vp8i_enc.h"
#include "src/enc/cost_enc.h"
@@ -977,20 +978,6 @@ static void SwapOut(VP8EncIterator* const it) {
SwapPtr(&it->yuv_out_, &it->yuv_out2_);
}
-static WEBP_INLINE int IsFlat(const int16_t* levels, int num_blocks,
- int thresh) {
- int score = 0;
- while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
- int i;
- for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
- score += (levels[i] != 0);
- if (score > thresh) return 0;
- }
- levels += 16;
- }
- return 1;
-}
-
static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) {
const int kNumBlocks = 16;
VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_];