diff options
author | Pascal Massimino <pascal.massimino@gmail.com> | 2018-04-11 15:17:14 +0200 |
---|---|---|
committer | James Zern <jzern@google.com> | 2018-04-11 21:25:06 +0000 |
commit | c1cb86af5f9cdea7afe2fff54d0e04b085d1e186 (patch) | |
tree | 6c98ddeef1fa99a0c07493730269a8e86f21d8f8 | |
parent | e577feb7c2108e0ac2ef115bf14a2f719d6e0889 (diff) | |
download | libwebp-c1cb86af5f9cdea7afe2fff54d0e04b085d1e186.tar.gz |
fix 16b overflow in SSE2
the 'accum' variable can be larger than 15b for large
rescale values.
Assert triggered:
src/dsp/rescaler_sse2.c:249: RescalerExportRowExpand_SSE2: Assertion `v >= 0 && v <= 255' failed.
src/dsp/rescaler_sse2.c:350: RescalerExportRowShrink_SSE2: Assertion `v >= 0 && v <= 255' failed.
-> fall back to C implementation in this case for now
Change-Id: I7ea1cb72301cafc1459be403f6a6f4e3cbc89bb1
-rw-r--r-- | src/dsp/rescaler_sse2.c | 20 |
1 files changed, 9 insertions, 11 deletions
diff --git a/src/dsp/rescaler_sse2.c b/src/dsp/rescaler_sse2.c index f93b204f..64c50dea 100644 --- a/src/dsp/rescaler_sse2.c +++ b/src/dsp/rescaler_sse2.c @@ -36,7 +36,7 @@ static void LoadTwoPixels_SSE2(const uint8_t* const src, __m128i* out) { } // input: 8 bytes ABCDEFGH -> output: A0B0C0D0E0F0G0H0 -static void LoadHeightPixels_SSE2(const uint8_t* const src, __m128i* out) { +static void LoadEightPixels_SSE2(const uint8_t* const src, __m128i* out) { const __m128i zero = _mm_setzero_si128(); const __m128i A = _mm_loadl_epi64((const __m128i*)(src)); // ABCDEFGH *out = _mm_unpacklo_epi8(A, zero); @@ -50,13 +50,15 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, int accum = x_add; __m128i cur_pixels; + // SSE2 implementation only works with 16b signed arithmetic at max. + if (wrk->src_width < 8 || accum >= (1 << 15)) { + WebPRescalerImportRowExpand_C(wrk, src); + return; + } + assert(!WebPRescalerInputDone(wrk)); assert(wrk->x_expand); if (wrk->num_channels == 4) { - if (wrk->src_width < 2) { - WebPRescalerImportRowExpand_C(wrk, src); - return; - } LoadTwoPixels_SSE2(src, &cur_pixels); src += 4; while (1) { @@ -75,11 +77,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, } else { int left; const uint8_t* const src_limit = src + wrk->src_width - 8; - if (wrk->src_width < 8) { - WebPRescalerImportRowExpand_C(wrk, src); - return; - } - LoadHeightPixels_SSE2(src, &cur_pixels); + LoadEightPixels_SSE2(src, &cur_pixels); src += 7; left = 7; while (1) { @@ -94,7 +92,7 @@ static void RescalerImportRowExpand_SSE2(WebPRescaler* const wrk, if (--left) { cur_pixels = _mm_srli_si128(cur_pixels, 2); } else if (src <= src_limit) { - LoadHeightPixels_SSE2(src, &cur_pixels); + LoadEightPixels_SSE2(src, &cur_pixels); src += 7; left = 7; } else { // tail |