summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott LaVarnway <slavarnway@google.com>2017-07-03 06:10:50 -0700
committerScott LaVarnway <slavarnway@google.com>2017-07-05 15:56:01 -0700
commite6e3ec335c17190b14b5727f93f21b7be0313643 (patch)
treec23d36bb369eb8c420f5108226909092625e6c2b
parent168a3a9e28cb862bd3abac2241eb35aca25071d6 (diff)
downloadlibwebp-e6e3ec335c17190b14b5727f93f21b7be0313643.tar.gz
wasm: Add Truemotion
BUG=webp:352 Change-Id: Ie65e155ac2d8253a5706ee85e830ec220a12ab4b
-rw-r--r--src/dsp/dec_wasm.c67
1 files changed, 67 insertions, 0 deletions
diff --git a/src/dsp/dec_wasm.c b/src/dsp/dec_wasm.c
index 8b24ae8a..b8937f09 100644
--- a/src/dsp/dec_wasm.c
+++ b/src/dsp/dec_wasm.c
@@ -39,6 +39,19 @@ static WEBP_INLINE uint8x16 get_8_bytes(uint8_t* dst) {
return a;
}
+static WEBP_INLINE uint8x16 get_4_bytes(uint8_t* dst) {
+ uint8x16 a;
+ memcpy(&a, dst, 4);
+ return a;
+}
+
+static WEBP_INLINE int16x8 splat_int16(int val) {
+ int16x8 a;
+ a[0] = val;
+ a = (int16x8)__builtin_shufflevector(a, a, 0, 0, 0, 0, 0, 0, 0, 0);
+ return a;
+}
+
static WEBP_INLINE uint8x16 splat_uint8(uint32_t val) {
uint8x16 a;
a[0] = val;
@@ -58,6 +71,11 @@ static WEBP_INLINE int16x8 _unpacklo_epi8(const int8x16 a, const int8x16 b) {
6, 22, 7, 23);
}
+static WEBP_INLINE int16x8 _unpackhi_epi8(const int8x16 a, const int8x16 b) {
+ return __builtin_shufflevector(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13,
+ 29, 14, 30, 15, 31);
+}
+
static WEBP_INLINE int32x4 _unpacklo_epi16(const int16x8 a, const int16x8 b) {
return __builtin_shufflevector(a, b, 0, 8, 1, 9, 2, 10, 3, 11);
}
@@ -112,6 +130,7 @@ static WEBP_INLINE uint8x16 int16x8_to_uint8x16_sat(int16x8 x) {
28, 30);
return final;
}
+
//------------------------------------------------------------------------------
// Transforms (Paragraph 14.4)
@@ -471,6 +490,51 @@ static void VL4(uint8_t* dst) { // Vertical-Left
#undef AVG2
#undef AVG3
+static void TrueMotion(uint8_t* dst, uint32_t size) {
+ const uint8x16 zero = (uint8x16){0};
+ uint8_t* top = dst - BPS;
+ int y;
+
+ if (size == 4) {
+ const uint8x16 top_values = get_4_bytes(top);
+ const int16x8 top_base = (int16x8)_unpacklo_epi8(top_values, zero);
+ for (y = 0; y < 4; ++y, dst += BPS) {
+ const int val = dst[-1] - top[-1];
+ const int16x8 base = splat_int16(val);
+ const uint32x4 out = (uint32x4)int16x8_to_uint8x16_sat(base + top_base);
+ WebPUint32ToMem(dst, out[0]);
+ }
+ } else if (size == 8) {
+ const uint8x16 top_values = get_8_bytes(top);
+ const int16x8 top_base = (int16x8)_unpacklo_epi8(top_values, zero);
+ for (y = 0; y < 8; ++y, dst += BPS) {
+ const int val = dst[-1] - top[-1];
+ const int16x8 base = splat_int16(val);
+ const uint8x16 out = (uint8x16)int16x8_to_uint8x16_sat(base + top_base);
+ memcpy(dst, &out, 8);
+ }
+ } else {
+ const uint8x16 top_values = get_16_bytes(top);
+ const int16x8 top_base_0 = (int16x8)_unpacklo_epi8(top_values, zero);
+ const int16x8 top_base_1 = (int16x8)_unpackhi_epi8(top_values, zero);
+ for (y = 0; y < 16; ++y, dst += BPS) {
+ const int val = dst[-1] - top[-1];
+ const int16x8 base = splat_int16(val);
+ const uint8x16 out_0 =
+ (uint8x16)int16x8_to_uint8x16_sat(base + top_base_0);
+ const uint8x16 out_1 =
+ (uint8x16)int16x8_to_uint8x16_sat(base + top_base_1);
+ const uint8x16 out = (uint8x16)__builtin_shufflevector(
+ out_0, out_1, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23);
+ memcpy(dst, &out, 16);
+ }
+ }
+}
+
+static void TM4(uint8_t* dst) { TrueMotion(dst, 4); }
+static void TM8uv(uint8_t* dst) { TrueMotion(dst, 8); }
+static void TM16(uint8_t* dst) { TrueMotion(dst, 16); }
+
//------------------------------------------------------------------------------
// Luma 16x16
@@ -636,6 +700,7 @@ extern void VP8DspInitWASM(void);
WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitWASM(void) {
VP8Transform = Transform;
+ VP8PredLuma4[1] = TM4;
VP8PredLuma4[2] = VE4;
VP8PredLuma4[4] = RD4;
VP8PredLuma4[5] = VR4;
@@ -643,6 +708,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitWASM(void) {
VP8PredLuma4[7] = VL4;
VP8PredLuma16[0] = DC16;
+ VP8PredLuma16[1] = TM16;
VP8PredLuma16[2] = VE16;
VP8PredLuma16[3] = HE16;
VP8PredLuma16[4] = DC16NoTop;
@@ -650,6 +716,7 @@ WEBP_TSAN_IGNORE_FUNCTION void VP8DspInitWASM(void) {
VP8PredLuma16[6] = DC16NoTopLeft;
VP8PredChroma8[0] = DC8uv;
+ VP8PredChroma8[1] = TM8uv;
VP8PredChroma8[2] = VE8uv;
VP8PredChroma8[3] = HE8uv;
VP8PredChroma8[4] = DC8uvNoTop;