summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPascal Massimino <pascal.massimino@gmail.com>2018-04-11 15:17:14 +0200
committerJames Zern <jzern@google.com>2018-04-11 21:25:06 +0000
commitc1cb86af5f9cdea7afe2fff54d0e04b085d1e186 (patch)
tree6c98ddeef1fa99a0c07493730269a8e86f21d8f8
parente577feb7c2108e0ac2ef115bf14a2f719d6e0889 (diff)
downloadlibwebp-c1cb86af5f9cdea7afe2fff54d0e04b085d1e186.tar.gz
fix 16b overflow in SSE2
the 'accum' variable can be larger than 15b for large rescale values. Assert triggered: src/dsp/rescaler_sse2.c:249: RescalerExportRowExpand_SSE2: Assertion `v >= 0 && v <= 255' failed. src/dsp/rescaler_sse2.c:350: RescalerExportRowShrink_SSE2: Assertion `v >= 0 && v <= 255' failed. -> fall back to C implementation in this case for now Change-Id: I7ea1cb72301cafc1459be403f6a6f4e3cbc89bb1
-rw-r--r--src/dsp/rescaler_sse2.c20
1 files changed, 9 insertions, 11 deletions
diff --git a/src/dsp/rescaler_sse2.c b/src/dsp/rescaler_sse2.c
index f93b204f..64c50dea 100644
--- a/src/dsp/rescaler_sse2.c
+++ b/src/dsp/rescaler_sse2.c
@@ -36,7 +36,7 @@ static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) {
}
// input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0
-static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) {
+static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) {
const __m128i zero = _mm_setzero_si128();
const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH
*out = _mm_unpacklo_epi8(A, zero);
@@ -50,13 +50,15 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
int accum = x_add;
__m128i cur_pixels;
+ // SSE2 implementation only works with 16b signed arithmetic at max.
+ if (wrk->src_width < 8 || accum >= (1 << 15)) {
+ WebPRescalerImportRowExpand_C(wrk, src);
+ return;
+ }
+
assert(!WebPRescalerInputDone(wrk));
assert(wrk->x_expand);
if (wrk->num_channels == 4) {
- if (wrk->src_width < 2) {
- WebPRescalerImportRowExpand_C(wrk, src);
- return;
- }
LoadTwoPixels_SSE2(src, &cur_pixels);
src += 4;
while (1) {
@@ -75,11 +77,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
} else {
int left;
const uint8_t* const src_limit = src + wrk->src_width - 8;
- if (wrk->src_width < 8) {
- WebPRescalerImportRowExpand_C(wrk, src);
- return;
- }
- LoadHeightPixels_SSE2(src, &cur_pixels);
+ LoadEightPixels_SSE2(src, &cur_pixels);
src += 7;
left = 7;
while (1) {
@@ -94,7 +92,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk,
if (--left) {
cur_pixels = _mm_srli_si128(cur_pixels, 2);
} else if (src <= src_limit) {
- LoadHeightPixels_SSE2(src, &cur_pixels);
+ LoadEightPixels_SSE2(src, &cur_pixels);
src += 7;
left = 7;
} else { // tail