diff options
author | Liang Qi <liang.qi@theqtcompany.com> | 2015-12-08 09:06:25 +0100 |
---|---|---|
committer | Liang Qi <liang.qi@theqtcompany.com> | 2015-12-08 20:43:30 +0000 |
commit | fa4ed3621c9e50441b98e6c52c9bbe3ad9438248 (patch) | |
tree | 33a5739b577def87b5ec563f3dd7a9f0132431ad | |
parent | 531c30ea66d2b8fe517c89bed149a938e16332ec (diff) | |
download | qtimageformats-fa4ed3621c9e50441b98e6c52c9bbe3ad9438248.tar.gz |
libwebp: update to 0.4.4
This commit imports libwebp 0.4.4, including AUTHORS, COPYING, ChangeLog,
NEWS, PATENTS, README and src directories. In src, only includes header
and source files.
The patches required to build it in Qt will follow in separate
commit(s).
Change-Id: Ib12593f56553e5358548c224d1855273f3fd2318
Reviewed-by: Lars Knoll <lars.knoll@theqtcompany.com>
Reviewed-by: aavit <eirik.aavitsland@theqtcompany.com>
-rw-r--r-- | src/3rdparty/libwebp/AUTHORS | 1 | ||||
-rw-r--r-- | src/3rdparty/libwebp/ChangeLog | 21 | ||||
-rw-r--r-- | src/3rdparty/libwebp/NEWS | 7 | ||||
-rw-r--r-- | src/3rdparty/libwebp/PATENTS | 2 | ||||
-rw-r--r-- | src/3rdparty/libwebp/README | 4 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dec/buffer.c | 22 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dec/io.c | 32 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dec/vp8i.h | 2 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dec/vp8l.c | 15 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/dec_neon.c | 32 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/dsp.h | 14 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/enc_mips32.c | 354 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/dsp/lossless_mips32.c | 34 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/histogram.c | 13 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/picture_rescale.c | 14 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/enc/vp8enci.h | 2 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/utils/endian_inl.h | 12 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/utils/rescaler.c | 615 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/utils/rescaler.h | 47 | ||||
-rw-r--r-- | src/3rdparty/libwebp/src/utils/utils.h | 2 |
20 files changed, 760 insertions, 485 deletions
diff --git a/src/3rdparty/libwebp/AUTHORS b/src/3rdparty/libwebp/AUTHORS index 5767e90..70423cb 100644 --- a/src/3rdparty/libwebp/AUTHORS +++ b/src/3rdparty/libwebp/AUTHORS @@ -16,6 +16,7 @@ Contributors: - Pascal Massimino (pascal dot massimino at gmail dot com) - Paweł Hajdan, Jr (phajdan dot jr at chromium dot org) - Pierre Joye (pierre dot php at gmail dot com) +- Sam Clegg (sbc at chromium dot org) - Scott LaVarnway (slavarnway at google dot com) - Scott Talbot (s at chikachow dot org) - Slobodan Prijic (slobodan dot prijic at imgtec dot com) diff --git a/src/3rdparty/libwebp/ChangeLog b/src/3rdparty/libwebp/ChangeLog index 2914cf5..0c362ad 100644 --- a/src/3rdparty/libwebp/ChangeLog +++ b/src/3rdparty/libwebp/ChangeLog @@ -1,3 +1,24 @@ +46e18c0 vwebp: fix incorrect clipping w/NO_BLEND +fcfde90 update issue tracker url +8c3fb33 update AUTHORS +808d4a6 update NEWS +6286404 bump version to 0.4.4 +b8b314a doc/webp-container-spec: update repo browser link +c3953e3 fix typo: constitutes -> constitute +cd377e2 Use __has_builtin to check clang support +e2e8980 wicdec: fix alpha detection w/64bpp BGRA/RGBA +5c3fe77 iosbuild: fix linking with Xcode 7 / iOS SDK 9 +f9f5498 VP8LAllocateHistogramSet: align histogram[] entries +3026db2 Loosen the buffer size checks for Y/U/V/A too. +d089362 loosen the padding check on buffer size +53d22c5 dec_neon: add whitespace around stringizing operator +8bcc4d4 dsp/mips: add whitespace around stringizing operator +d49c44f Container spec: clarify ordering of ALPH chunk. +382de22 msvc: fix pointer type warning in BitsLog2Floor +84ecd9d FlattenSimilarBlocks should only be tried when blending is possible. +f55ebbb backport rescaler fix +2ff633c fix mips2 build target +326b5fb update ChangeLog (tag: v0.4.3, origin/0.4.3, 0.4.3) a661e50 Disable NEON code on Native Client fcd94e9 update ChangeLog (tag: v0.4.3-rc1) 569fe57 update NEWS diff --git a/src/3rdparty/libwebp/NEWS b/src/3rdparty/libwebp/NEWS index 61d0d6c..8f5a039 100644 --- a/src/3rdparty/libwebp/NEWS +++ b/src/3rdparty/libwebp/NEWS @@ -1,3 +1,10 @@ +- 10/15/15: version 0.4.4 + This is a binary compatible release. + * rescaling out-of-bounds read fix (issue #254) + * various build fixes and improvements (issues #253, #259, #262, #267, #268) + * container documentation update + * gif2webp transparency fix (issue #245) + - 3/3/15: version 0.4.3 This is a binary compatible release. * Android / gcc / iOS / MSVS build fixes and improvements diff --git a/src/3rdparty/libwebp/PATENTS b/src/3rdparty/libwebp/PATENTS index 79d17d7..caedf60 100644 --- a/src/3rdparty/libwebp/PATENTS +++ b/src/3rdparty/libwebp/PATENTS @@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent enforcement activity against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that any of these implementations of WebM or any code incorporated within any of these implementations of WebM -constitutes direct or contributory patent infringement, or inducement of +constitute direct or contributory patent infringement, or inducement of patent infringement, then any patent rights granted to you under this License for these implementations of WebM shall terminate as of the date such litigation is filed. diff --git a/src/3rdparty/libwebp/README b/src/3rdparty/libwebp/README index c3f32c5..dd50185 100644 --- a/src/3rdparty/libwebp/README +++ b/src/3rdparty/libwebp/README @@ -4,7 +4,7 @@ \__\__/\____/\_____/__/ ____ ___ / _/ / \ \ / _ \/ _/ / \_/ / / \ \ __/ \__ - \____/____/\_____/_____/____/v0.4.3 + \____/____/\_____/_____/____/v0.4.4 Description: ============ @@ -596,7 +596,7 @@ Bugs: ===== Please report all bugs to our issue tracker: - http://code.google.com/p/webp/issues + https://bugs.chromium.org/p/webp Patches welcome! See this page to get started: http://www.webmproject.org/code/contribute/submitting-patches/ diff --git a/src/3rdparty/libwebp/src/dec/buffer.c b/src/3rdparty/libwebp/src/dec/buffer.c index 42feac7..2129312 100644 --- a/src/3rdparty/libwebp/src/dec/buffer.c +++ b/src/3rdparty/libwebp/src/dec/buffer.c @@ -33,6 +33,11 @@ static int IsValidColorspace(int webp_csp_mode) { return (webp_csp_mode >= MODE_RGB && webp_csp_mode < MODE_LAST); } +// strictly speaking, the very last (or first, if flipped) row +// doesn't require padding. +#define MIN_BUFFER_SIZE(WIDTH, HEIGHT, STRIDE) \ + (uint64_t)(STRIDE) * ((HEIGHT) - 1) + (WIDTH) + static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { int ok = 1; const WEBP_CSP_MODE mode = buffer->colorspace; @@ -42,20 +47,22 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { ok = 0; } else if (!WebPIsRGBMode(mode)) { // YUV checks const WebPYUVABuffer* const buf = &buffer->u.YUVA; + const int uv_width = (width + 1) / 2; + const int uv_height = (height + 1) / 2; const int y_stride = abs(buf->y_stride); const int u_stride = abs(buf->u_stride); const int v_stride = abs(buf->v_stride); const int a_stride = abs(buf->a_stride); - const uint64_t y_size = (uint64_t)y_stride * height; - const uint64_t u_size = (uint64_t)u_stride * ((height + 1) / 2); - const uint64_t v_size = (uint64_t)v_stride * ((height + 1) / 2); - const uint64_t a_size = (uint64_t)a_stride * height; + const uint64_t y_size = MIN_BUFFER_SIZE(width, height, y_stride); + const uint64_t u_size = MIN_BUFFER_SIZE(uv_width, uv_height, u_stride); + const uint64_t v_size = MIN_BUFFER_SIZE(uv_width, uv_height, v_stride); + const uint64_t a_size = MIN_BUFFER_SIZE(width, height, a_stride); ok &= (y_size <= buf->y_size); ok &= (u_size <= buf->u_size); ok &= (v_size <= buf->v_size); ok &= (y_stride >= width); - ok &= (u_stride >= (width + 1) / 2); - ok &= (v_stride >= (width + 1) / 2); + ok &= (u_stride >= uv_width); + ok &= (v_stride >= uv_width); ok &= (buf->y != NULL); ok &= (buf->u != NULL); ok &= (buf->v != NULL); @@ -67,13 +74,14 @@ static VP8StatusCode CheckDecBuffer(const WebPDecBuffer* const buffer) { } else { // RGB checks const WebPRGBABuffer* const buf = &buffer->u.RGBA; const int stride = abs(buf->stride); - const uint64_t size = (uint64_t)stride * height; + const uint64_t size = MIN_BUFFER_SIZE(width, height, stride); ok &= (size <= buf->size); ok &= (stride >= width * kModeBpp[mode]); ok &= (buf->rgba != NULL); } return ok ? VP8_STATUS_OK : VP8_STATUS_INVALID_PARAM; } +#undef MIN_BUFFER_SIZE static VP8StatusCode AllocateBuffer(WebPDecBuffer* const buffer) { const int w = buffer->width; diff --git a/src/3rdparty/libwebp/src/dec/io.c b/src/3rdparty/libwebp/src/dec/io.c index 8094e44..b2e72f0 100644 --- a/src/3rdparty/libwebp/src/dec/io.c +++ b/src/3rdparty/libwebp/src/dec/io.c @@ -322,37 +322,31 @@ static int InitYUVRescaler(const VP8Io* const io, WebPDecParams* const p) { const size_t work_size = 2 * out_width; // scratch memory for luma rescaler const size_t uv_work_size = 2 * uv_out_width; // and for each u/v ones size_t tmp_size; - int32_t* work; + rescaler_t* work; tmp_size = (work_size + 2 * uv_work_size) * sizeof(*work); if (has_alpha) { tmp_size += work_size * sizeof(*work); } - p->memory = WebPSafeCalloc(1ULL, tmp_size); + p->memory = WebPSafeMalloc(1ULL, tmp_size); if (p->memory == NULL) { return 0; // memory error } - work = (int32_t*)p->memory; + work = (rescaler_t*)p->memory; WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h, buf->y, out_width, out_height, buf->y_stride, 1, - io->mb_w, out_width, io->mb_h, out_height, work); WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height, buf->u, uv_out_width, uv_out_height, buf->u_stride, 1, - uv_in_width, uv_out_width, - uv_in_height, uv_out_height, work + work_size); WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height, buf->v, uv_out_width, uv_out_height, buf->v_stride, 1, - uv_in_width, uv_out_width, - uv_in_height, uv_out_height, work + work_size + uv_work_size); p->emit = EmitRescaledYUV; if (has_alpha) { WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, buf->a, out_width, out_height, buf->a_stride, 1, - io->mb_w, out_width, io->mb_h, out_height, work + work_size + 2 * uv_work_size); p->emit_alpha = EmitRescaledAlphaYUV; WebPInitAlphaProcessing(); @@ -375,9 +369,9 @@ static int ExportRGB(WebPDecParams* const p, int y_pos) { WebPRescalerHasPendingOutput(&p->scaler_u)) { assert(p->last_y + y_pos + num_lines_out < p->output->height); assert(p->scaler_u.y_accum == p->scaler_v.y_accum); - WebPRescalerExportRow(&p->scaler_y, 0); - WebPRescalerExportRow(&p->scaler_u, 0); - WebPRescalerExportRow(&p->scaler_v, 0); + WebPRescalerExportRow(&p->scaler_y); + WebPRescalerExportRow(&p->scaler_u); + WebPRescalerExportRow(&p->scaler_v); convert(p->scaler_y.dst, p->scaler_u.dst, p->scaler_v.dst, dst, p->scaler_y.dst_width); dst += buf->stride; @@ -425,7 +419,7 @@ static int ExportAlpha(WebPDecParams* const p, int y_pos) { while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); - WebPRescalerExportRow(&p->scaler_a, 0); + WebPRescalerExportRow(&p->scaler_a); for (i = 0; i < width; ++i) { const uint32_t alpha_value = p->scaler_a.dst[i]; dst[4 * i] = alpha_value; @@ -458,7 +452,7 @@ static int ExportAlphaRGBA4444(WebPDecParams* const p, int y_pos) { while (WebPRescalerHasPendingOutput(&p->scaler_a)) { int i; assert(p->last_y + y_pos + num_lines_out < p->output->height); - WebPRescalerExportRow(&p->scaler_a, 0); + WebPRescalerExportRow(&p->scaler_a); for (i = 0; i < width; ++i) { // Fill in the alpha value (converted to 4 bits). const uint32_t alpha_value = p->scaler_a.dst[i] >> 4; @@ -495,7 +489,7 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { const int uv_in_width = (io->mb_w + 1) >> 1; const int uv_in_height = (io->mb_h + 1) >> 1; const size_t work_size = 2 * out_width; // scratch memory for one rescaler - int32_t* work; // rescalers work area + rescaler_t* work; // rescalers work area uint8_t* tmp; // tmp storage for scaled YUV444 samples before RGB conversion size_t tmp_size1, tmp_size2, total_size; @@ -506,30 +500,26 @@ static int InitRGBRescaler(const VP8Io* const io, WebPDecParams* const p) { tmp_size2 += out_width; } total_size = tmp_size1 * sizeof(*work) + tmp_size2 * sizeof(*tmp); - p->memory = WebPSafeCalloc(1ULL, total_size); + p->memory = WebPSafeMalloc(1ULL, total_size); if (p->memory == NULL) { return 0; // memory error } - work = (int32_t*)p->memory; + work = (rescaler_t*)p->memory; tmp = (uint8_t*)(work + tmp_size1); WebPRescalerInit(&p->scaler_y, io->mb_w, io->mb_h, tmp + 0 * out_width, out_width, out_height, 0, 1, - io->mb_w, out_width, io->mb_h, out_height, work + 0 * work_size); WebPRescalerInit(&p->scaler_u, uv_in_width, uv_in_height, tmp + 1 * out_width, out_width, out_height, 0, 1, - io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, work + 1 * work_size); WebPRescalerInit(&p->scaler_v, uv_in_width, uv_in_height, tmp + 2 * out_width, out_width, out_height, 0, 1, - io->mb_w, 2 * out_width, io->mb_h, 2 * out_height, work + 2 * work_size); p->emit = EmitRescaledRGB; if (has_alpha) { WebPRescalerInit(&p->scaler_a, io->mb_w, io->mb_h, tmp + 3 * out_width, out_width, out_height, 0, 1, - io->mb_w, out_width, io->mb_h, out_height, work + 3 * work_size); p->emit_alpha = EmitRescaledAlphaRGB; if (p->output->colorspace == MODE_RGBA_4444 || diff --git a/src/3rdparty/libwebp/src/dec/vp8i.h b/src/3rdparty/libwebp/src/dec/vp8i.h index a02d9ff..0e6c8f5 100644 --- a/src/3rdparty/libwebp/src/dec/vp8i.h +++ b/src/3rdparty/libwebp/src/dec/vp8i.h @@ -31,7 +31,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 0 #define DEC_MIN_VERSION 4 -#define DEC_REV_VERSION 3 +#define DEC_REV_VERSION 4 // intra prediction modes enum { B_DC_PRED = 0, // 4x4 modes diff --git a/src/3rdparty/libwebp/src/dec/vp8l.c b/src/3rdparty/libwebp/src/dec/vp8l.c index e2780e5..2fa5f40 100644 --- a/src/3rdparty/libwebp/src/dec/vp8l.c +++ b/src/3rdparty/libwebp/src/dec/vp8l.c @@ -390,13 +390,13 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { const int in_height = io->mb_h; const int out_height = io->scaled_height; const uint64_t work_size = 2 * num_channels * (uint64_t)out_width; - int32_t* work; // Rescaler work area. - const uint64_t scaled_data_size = num_channels * (uint64_t)out_width; + rescaler_t* work; // Rescaler work area. + const uint64_t scaled_data_size = (uint64_t)out_width; uint32_t* scaled_data; // Temporary storage for scaled BGRA data. const uint64_t memory_size = sizeof(*dec->rescaler) + work_size * sizeof(*work) + scaled_data_size * sizeof(*scaled_data); - uint8_t* memory = (uint8_t*)WebPSafeCalloc(memory_size, sizeof(*memory)); + uint8_t* memory = (uint8_t*)WebPSafeMalloc(memory_size, sizeof(*memory)); if (memory == NULL) { dec->status_ = VP8_STATUS_OUT_OF_MEMORY; return 0; @@ -406,13 +406,12 @@ static int AllocateAndInitRescaler(VP8LDecoder* const dec, VP8Io* const io) { dec->rescaler = (WebPRescaler*)memory; memory += sizeof(*dec->rescaler); - work = (int32_t*)memory; + work = (rescaler_t*)memory; memory += work_size * sizeof(*work); scaled_data = (uint32_t*)memory; WebPRescalerInit(dec->rescaler, in_width, in_height, (uint8_t*)scaled_data, - out_width, out_height, 0, num_channels, - in_width, out_width, in_height, out_height, work); + out_width, out_height, 0, num_channels, work); return 1; } @@ -427,7 +426,7 @@ static int Export(WebPRescaler* const rescaler, WEBP_CSP_MODE colorspace, int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { uint8_t* const dst = rgba + num_lines_out * rgba_stride; - WebPRescalerExportRow(rescaler, 0); + WebPRescalerExportRow(rescaler); WebPMultARGBRow(src, dst_width, 1); VP8LConvertFromBGRA(src, dst_width, colorspace, dst); ++num_lines_out; @@ -545,7 +544,7 @@ static int ExportYUVA(const VP8LDecoder* const dec, int y_pos) { const int dst_width = rescaler->dst_width; int num_lines_out = 0; while (WebPRescalerHasPendingOutput(rescaler)) { - WebPRescalerExportRow(rescaler, 0); + WebPRescalerExportRow(rescaler); WebPMultARGBRow(src, dst_width, 1); ConvertToYUVA(src, dst_width, y_pos, dec->output_); ++y_pos; diff --git a/src/3rdparty/libwebp/src/dsp/dec_neon.c b/src/3rdparty/libwebp/src/dsp/dec_neon.c index 9c5bc1c..4afae07 100644 --- a/src/3rdparty/libwebp/src/dsp/dec_neon.c +++ b/src/3rdparty/libwebp/src/dsp/dec_neon.c @@ -24,24 +24,24 @@ // Load/Store vertical edge #define LOAD8x4(c1, c2, c3, c4, b1, b2, stride) \ - "vld4.8 {" #c1"[0], " #c2"[0], " #c3"[0], " #c4"[0]}," #b1 "," #stride"\n" \ - "vld4.8 {" #c1"[1], " #c2"[1], " #c3"[1], " #c4"[1]}," #b2 "," #stride"\n" \ - "vld4.8 {" #c1"[2], " #c2"[2], " #c3"[2], " #c4"[2]}," #b1 "," #stride"\n" \ - "vld4.8 {" #c1"[3], " #c2"[3], " #c3"[3], " #c4"[3]}," #b2 "," #stride"\n" \ - "vld4.8 {" #c1"[4], " #c2"[4], " #c3"[4], " #c4"[4]}," #b1 "," #stride"\n" \ - "vld4.8 {" #c1"[5], " #c2"[5], " #c3"[5], " #c4"[5]}," #b2 "," #stride"\n" \ - "vld4.8 {" #c1"[6], " #c2"[6], " #c3"[6], " #c4"[6]}," #b1 "," #stride"\n" \ - "vld4.8 {" #c1"[7], " #c2"[7], " #c3"[7], " #c4"[7]}," #b2 "," #stride"\n" + "vld4.8 {" #c1 "[0]," #c2 "[0]," #c3 "[0]," #c4 "[0]}," #b1 "," #stride "\n" \ + "vld4.8 {" #c1 "[1]," #c2 "[1]," #c3 "[1]," #c4 "[1]}," #b2 "," #stride "\n" \ + "vld4.8 {" #c1 "[2]," #c2 "[2]," #c3 "[2]," #c4 "[2]}," #b1 "," #stride "\n" \ + "vld4.8 {" #c1 "[3]," #c2 "[3]," #c3 "[3]," #c4 "[3]}," #b2 "," #stride "\n" \ + "vld4.8 {" #c1 "[4]," #c2 "[4]," #c3 "[4]," #c4 "[4]}," #b1 "," #stride "\n" \ + "vld4.8 {" #c1 "[5]," #c2 "[5]," #c3 "[5]," #c4 "[5]}," #b2 "," #stride "\n" \ + "vld4.8 {" #c1 "[6]," #c2 "[6]," #c3 "[6]," #c4 "[6]}," #b1 "," #stride "\n" \ + "vld4.8 {" #c1 "[7]," #c2 "[7]," #c3 "[7]," #c4 "[7]}," #b2 "," #stride "\n" #define STORE8x2(c1, c2, p, stride) \ - "vst2.8 {" #c1"[0], " #c2"[0]}," #p "," #stride " \n" \ - "vst2.8 {" #c1"[1], " #c2"[1]}," #p "," #stride " \n" \ - "vst2.8 {" #c1"[2], " #c2"[2]}," #p "," #stride " \n" \ - "vst2.8 {" #c1"[3], " #c2"[3]}," #p "," #stride " \n" \ - "vst2.8 {" #c1"[4], " #c2"[4]}," #p "," #stride " \n" \ - "vst2.8 {" #c1"[5], " #c2"[5]}," #p "," #stride " \n" \ - "vst2.8 {" #c1"[6], " #c2"[6]}," #p "," #stride " \n" \ - "vst2.8 {" #c1"[7], " #c2"[7]}," #p "," #stride " \n" + "vst2.8 {" #c1 "[0], " #c2 "[0]}," #p "," #stride " \n" \ + "vst2.8 {" #c1 "[1], " #c2 "[1]}," #p "," #stride " \n" \ + "vst2.8 {" #c1 "[2], " #c2 "[2]}," #p "," #stride " \n" \ + "vst2.8 {" #c1 "[3], " #c2 "[3]}," #p "," #stride " \n" \ + "vst2.8 {" #c1 "[4], " #c2 "[4]}," #p "," #stride " \n" \ + "vst2.8 {" #c1 "[5], " #c2 "[5]}," #p "," #stride " \n" \ + "vst2.8 {" #c1 "[6], " #c2 "[6]}," #p "," #stride " \n" \ + "vst2.8 {" #c1 "[7], " #c2 "[7]}," #p "," #stride " \n" #if !defined(WORK_AROUND_GCC) diff --git a/src/3rdparty/libwebp/src/dsp/dsp.h b/src/3rdparty/libwebp/src/dsp/dsp.h index 2409bae..a2c3951 100644 --- a/src/3rdparty/libwebp/src/dsp/dsp.h +++ b/src/3rdparty/libwebp/src/dsp/dsp.h @@ -36,14 +36,9 @@ extern "C" { # define LOCAL_GCC_PREREQ(maj, min) 0 #endif -#ifdef __clang__ -# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) -# define LOCAL_CLANG_PREREQ(maj, min) \ - (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) -#else -# define LOCAL_CLANG_VERSION 0 -# define LOCAL_CLANG_PREREQ(maj, min) 0 -#endif // __clang__ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif #if defined(_MSC_VER) && _MSC_VER > 1310 && \ (defined(_M_X64) || defined(_M_IX86)) @@ -73,7 +68,8 @@ extern "C" { #define WEBP_USE_NEON #endif -#if defined(__mips__) && !defined(__mips64) && (__mips_isa_rev < 6) +#if defined(__mips__) && !defined(__mips64) && \ + defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) #define WEBP_USE_MIPS32 #if (__mips_isa_rev >= 2) #define WEBP_USE_MIPS32_R2 diff --git a/src/3rdparty/libwebp/src/dsp/enc_mips32.c b/src/3rdparty/libwebp/src/dsp/enc_mips32.c index def9a16..6cede18 100644 --- a/src/3rdparty/libwebp/src/dsp/enc_mips32.c +++ b/src/3rdparty/libwebp/src/dsp/enc_mips32.c @@ -34,26 +34,26 @@ static const int kC2 = 35468; // TEMP0..TEMP3 - registers for corresponding tmp elements // TEMP4..TEMP5 - temporary registers #define VERTICAL_PASS(A, B, C, D, TEMP4, TEMP0, TEMP1, TEMP2, TEMP3) \ - "lh %[temp16], "#A"(%[temp20]) \n\t" \ - "lh %[temp18], "#B"(%[temp20]) \n\t" \ - "lh %[temp17], "#C"(%[temp20]) \n\t" \ - "lh %[temp19], "#D"(%[temp20]) \n\t" \ - "addu %["#TEMP4"], %[temp16], %[temp18] \n\t" \ - "subu %[temp16], %[temp16], %[temp18] \n\t" \ - "mul %["#TEMP0"], %[temp17], %[kC2] \n\t" \ - "mul %[temp18], %[temp19], %[kC1] \n\t" \ - "mul %[temp17], %[temp17], %[kC1] \n\t" \ - "mul %[temp19], %[temp19], %[kC2] \n\t" \ - "sra %["#TEMP0"], %["#TEMP0"], 16 \n\n" \ - "sra %[temp18], %[temp18], 16 \n\n" \ - "sra %[temp17], %[temp17], 16 \n\n" \ - "sra %[temp19], %[temp19], 16 \n\n" \ - "subu %["#TEMP2"], %["#TEMP0"], %[temp18] \n\t" \ - "addu %["#TEMP3"], %[temp17], %[temp19] \n\t" \ - "addu %["#TEMP0"], %["#TEMP4"], %["#TEMP3"] \n\t" \ - "addu %["#TEMP1"], %[temp16], %["#TEMP2"] \n\t" \ - "subu %["#TEMP2"], %[temp16], %["#TEMP2"] \n\t" \ - "subu %["#TEMP3"], %["#TEMP4"], %["#TEMP3"] \n\t" + "lh %[temp16], " #A "(%[temp20]) \n\t" \ + "lh %[temp18], " #B "(%[temp20]) \n\t" \ + "lh %[temp17], " #C "(%[temp20]) \n\t" \ + "lh %[temp19], " #D "(%[temp20]) \n\t" \ + "addu %[" #TEMP4 "], %[temp16], %[temp18] \n\t" \ + "subu %[temp16], %[temp16], %[temp18] \n\t" \ + "mul %[" #TEMP0 "], %[temp17], %[kC2] \n\t" \ + "mul %[temp18], %[temp19], %[kC1] \n\t" \ + "mul %[temp17], %[temp17], %[kC1] \n\t" \ + "mul %[temp19], %[temp19], %[kC2] \n\t" \ + "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\n" \ + "sra %[temp18], %[temp18], 16 \n\n" \ + "sra %[temp17], %[temp17], 16 \n\n" \ + "sra %[temp19], %[temp19], 16 \n\n" \ + "subu %[" #TEMP2 "], %[" #TEMP0 "], %[temp18] \n\t" \ + "addu %[" #TEMP3 "], %[temp17], %[temp19] \n\t" \ + "addu %[" #TEMP0 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" \ + "addu %[" #TEMP1 "], %[temp16], %[" #TEMP2 "] \n\t" \ + "subu %[" #TEMP2 "], %[temp16], %[" #TEMP2 "] \n\t" \ + "subu %[" #TEMP3 "], %[" #TEMP4 "], %[" #TEMP3 "] \n\t" // macro for one horizontal pass in ITransformOne // MUL and STORE macros inlined @@ -61,59 +61,59 @@ static const int kC2 = 35468; // temp0..temp15 holds tmp[0]..tmp[15] // A..D - offsets in bytes to load from ref and store to dst buffer // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements -#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ - "addiu %["#TEMP0"], %["#TEMP0"], 4 \n\t" \ - "addu %[temp16], %["#TEMP0"], %["#TEMP8"] \n\t" \ - "subu %[temp17], %["#TEMP0"], %["#TEMP8"] \n\t" \ - "mul %["#TEMP0"], %["#TEMP4"], %[kC2] \n\t" \ - "mul %["#TEMP8"], %["#TEMP12"], %[kC1] \n\t" \ - "mul %["#TEMP4"], %["#TEMP4"], %[kC1] \n\t" \ - "mul %["#TEMP12"], %["#TEMP12"], %[kC2] \n\t" \ - "sra %["#TEMP0"], %["#TEMP0"], 16 \n\t" \ - "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \ - "sra %["#TEMP4"], %["#TEMP4"], 16 \n\t" \ - "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \ - "subu %[temp18], %["#TEMP0"], %["#TEMP8"] \n\t" \ - "addu %[temp19], %["#TEMP4"], %["#TEMP12"] \n\t" \ - "addu %["#TEMP0"], %[temp16], %[temp19] \n\t" \ - "addu %["#TEMP4"], %[temp17], %[temp18] \n\t" \ - "subu %["#TEMP8"], %[temp17], %[temp18] \n\t" \ - "subu %["#TEMP12"], %[temp16], %[temp19] \n\t" \ - "lw %[temp20], 0(%[args]) \n\t" \ - "sra %["#TEMP0"], %["#TEMP0"], 3 \n\t" \ - "sra %["#TEMP4"], %["#TEMP4"], 3 \n\t" \ - "sra %["#TEMP8"], %["#TEMP8"], 3 \n\t" \ - "sra %["#TEMP12"], %["#TEMP12"], 3 \n\t" \ - "lbu %[temp16], "#A"(%[temp20]) \n\t" \ - "lbu %[temp17], "#B"(%[temp20]) \n\t" \ - "lbu %[temp18], "#C"(%[temp20]) \n\t" \ - "lbu %[temp19], "#D"(%[temp20]) \n\t" \ - "addu %["#TEMP0"], %[temp16], %["#TEMP0"] \n\t" \ - "addu %["#TEMP4"], %[temp17], %["#TEMP4"] \n\t" \ - "addu %["#TEMP8"], %[temp18], %["#TEMP8"] \n\t" \ - "addu %["#TEMP12"], %[temp19], %["#TEMP12"] \n\t" \ - "slt %[temp16], %["#TEMP0"], $zero \n\t" \ - "slt %[temp17], %["#TEMP4"], $zero \n\t" \ - "slt %[temp18], %["#TEMP8"], $zero \n\t" \ - "slt %[temp19], %["#TEMP12"], $zero \n\t" \ - "movn %["#TEMP0"], $zero, %[temp16] \n\t" \ - "movn %["#TEMP4"], $zero, %[temp17] \n\t" \ - "movn %["#TEMP8"], $zero, %[temp18] \n\t" \ - "movn %["#TEMP12"], $zero, %[temp19] \n\t" \ - "addiu %[temp20], $zero, 255 \n\t" \ - "slt %[temp16], %["#TEMP0"], %[temp20] \n\t" \ - "slt %[temp17], %["#TEMP4"], %[temp20] \n\t" \ - "slt %[temp18], %["#TEMP8"], %[temp20] \n\t" \ - "slt %[temp19], %["#TEMP12"], %[temp20] \n\t" \ - "movz %["#TEMP0"], %[temp20], %[temp16] \n\t" \ - "movz %["#TEMP4"], %[temp20], %[temp17] \n\t" \ - "lw %[temp16], 8(%[args]) \n\t" \ - "movz %["#TEMP8"], %[temp20], %[temp18] \n\t" \ - "movz %["#TEMP12"], %[temp20], %[temp19] \n\t" \ - "sb %["#TEMP0"], "#A"(%[temp16]) \n\t" \ - "sb %["#TEMP4"], "#B"(%[temp16]) \n\t" \ - "sb %["#TEMP8"], "#C"(%[temp16]) \n\t" \ - "sb %["#TEMP12"], "#D"(%[temp16]) \n\t" +#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ + "addiu %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \ + "addu %[temp16], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ + "subu %[temp17], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ + "mul %[" #TEMP0 "], %[" #TEMP4 "], %[kC2] \n\t" \ + "mul %[" #TEMP8 "], %[" #TEMP12 "], %[kC1] \n\t" \ + "mul %[" #TEMP4 "], %[" #TEMP4 "], %[kC1] \n\t" \ + "mul %[" #TEMP12 "], %[" #TEMP12 "], %[kC2] \n\t" \ + "sra %[" #TEMP0 "], %[" #TEMP0 "], 16 \n\t" \ + "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \ + "sra %[" #TEMP4 "], %[" #TEMP4 "], 16 \n\t" \ + "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \ + "subu %[temp18], %[" #TEMP0 "], %[" #TEMP8 "] \n\t" \ + "addu %[temp19], %[" #TEMP4 "], %[" #TEMP12 "] \n\t" \ + "addu %[" #TEMP0 "], %[temp16], %[temp19] \n\t" \ + "addu %[" #TEMP4 "], %[temp17], %[temp18] \n\t" \ + "subu %[" #TEMP8 "], %[temp17], %[temp18] \n\t" \ + "subu %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \ + "lw %[temp20], 0(%[args]) \n\t" \ + "sra %[" #TEMP0 "], %[" #TEMP0 "], 3 \n\t" \ + "sra %[" #TEMP4 "], %[" #TEMP4 "], 3 \n\t" \ + "sra %[" #TEMP8 "], %[" #TEMP8 "], 3 \n\t" \ + "sra %[" #TEMP12 "], %[" #TEMP12 "], 3 \n\t" \ + "lbu %[temp16], " #A "(%[temp20]) \n\t" \ + "lbu %[temp17], " #B "(%[temp20]) \n\t" \ + "lbu %[temp18], " #C "(%[temp20]) \n\t" \ + "lbu %[temp19], " #D "(%[temp20]) \n\t" \ + "addu %[" #TEMP0 "], %[temp16], %[" #TEMP0 "] \n\t" \ + "addu %[" #TEMP4 "], %[temp17], %[" #TEMP4 "] \n\t" \ + "addu %[" #TEMP8 "], %[temp18], %[" #TEMP8 "] \n\t" \ + "addu %[" #TEMP12 "], %[temp19], %[" #TEMP12 "] \n\t" \ + "slt %[temp16], %[" #TEMP0 "], $zero \n\t" \ + "slt %[temp17], %[" #TEMP4 "], $zero \n\t" \ + "slt %[temp18], %[" #TEMP8 "], $zero \n\t" \ + "slt %[temp19], %[" #TEMP12 "], $zero \n\t" \ + "movn %[" #TEMP0 "], $zero, %[temp16] \n\t" \ + "movn %[" #TEMP4 "], $zero, %[temp17] \n\t" \ + "movn %[" #TEMP8 "], $zero, %[temp18] \n\t" \ + "movn %[" #TEMP12 "], $zero, %[temp19] \n\t" \ + "addiu %[temp20], $zero, 255 \n\t" \ + "slt %[temp16], %[" #TEMP0 "], %[temp20] \n\t" \ + "slt %[temp17], %[" #TEMP4 "], %[temp20] \n\t" \ + "slt %[temp18], %[" #TEMP8 "], %[temp20] \n\t" \ + "slt %[temp19], %[" #TEMP12 "], %[temp20] \n\t" \ + "movz %[" #TEMP0 "], %[temp20], %[temp16] \n\t" \ + "movz %[" #TEMP4 "], %[temp20], %[temp17] \n\t" \ + "lw %[temp16], 8(%[args]) \n\t" \ + "movz %[" #TEMP8 "], %[temp20], %[temp18] \n\t" \ + "movz %[" #TEMP12 "], %[temp20], %[temp19] \n\t" \ + "sb %[" #TEMP0 "], " #A "(%[temp16]) \n\t" \ + "sb %[" #TEMP4 "], " #B "(%[temp16]) \n\t" \ + "sb %[" #TEMP8 "], " #C "(%[temp16]) \n\t" \ + "sb %[" #TEMP12 "], " #D "(%[temp16]) \n\t" // Does one or two inverse transforms. static WEBP_INLINE void ITransformOne(const uint8_t* ref, const int16_t* in, @@ -164,9 +164,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in, // K - offset in bytes (kZigzag[n] * 4) // N - offset in bytes (n * 2) #define QUANTIZE_ONE(J, K, N) \ - "lh %[temp0], "#J"(%[ppin]) \n\t" \ - "lhu %[temp1], "#J"(%[ppsharpen]) \n\t" \ - "lw %[temp2], "#K"(%[ppzthresh]) \n\t" \ + "lh %[temp0], " #J "(%[ppin]) \n\t" \ + "lhu %[temp1], " #J "(%[ppsharpen]) \n\t" \ + "lw %[temp2], " #K "(%[ppzthresh]) \n\t" \ "sra %[sign], %[temp0], 15 \n\t" \ "xor %[coeff], %[temp0], %[sign] \n\t" \ "subu %[coeff], %[coeff], %[sign] \n\t" \ @@ -175,9 +175,9 @@ static void ITransform(const uint8_t* ref, const int16_t* in, "addiu %[temp5], $zero, 0 \n\t" \ "addiu %[level], $zero, 0 \n\t" \ "beqz %[temp4], 2f \n\t" \ - "lhu %[temp1], "#J"(%[ppiq]) \n\t" \ - "lw %[temp2], "#K"(%[ppbias]) \n\t" \ - "lhu %[temp3], "#J"(%[ppq]) \n\t" \ + "lhu %[temp1], " #J "(%[ppiq]) \n\t" \ + "lw %[temp2], " #K "(%[ppbias]) \n\t" \ + "lhu %[temp3], " #J "(%[ppq]) \n\t" \ "mul %[level], %[coeff], %[temp1] \n\t" \ "addu %[level], %[level], %[temp2] \n\t" \ "sra %[level], %[level], 17 \n\t" \ @@ -187,8 +187,8 @@ static void ITransform(const uint8_t* ref, const int16_t* in, "subu %[level], %[level], %[sign] \n\t" \ "mul %[temp5], %[level], %[temp3] \n\t" \ "2: \n\t" \ - "sh %[temp5], "#J"(%[ppin]) \n\t" \ - "sh %[level], "#N"(%[pout]) \n\t" + "sh %[temp5], " #J "(%[ppin]) \n\t" \ + "sh %[level], " #N "(%[pout]) \n\t" static int QuantizeBlock(int16_t in[16], int16_t out[16], const VP8Matrix* const mtx) { @@ -249,14 +249,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], // E..H - offsets in bytes to store first results to tmp buffer // E1..H1 - offsets in bytes to store second results to tmp buffer #define HORIZONTAL_PASS(A, B, C, D, E, F, G, H, E1, F1, G1, H1) \ - "lbu %[temp0], "#A"(%[a]) \n\t" \ - "lbu %[temp1], "#B"(%[a]) \n\t" \ - "lbu %[temp2], "#C"(%[a]) \n\t" \ - "lbu %[temp3], "#D"(%[a]) \n\t" \ - "lbu %[temp4], "#A"(%[b]) \n\t" \ - "lbu %[temp5], "#B"(%[b]) \n\t" \ - "lbu %[temp6], "#C"(%[b]) \n\t" \ - "lbu %[temp7], "#D"(%[b]) \n\t" \ + "lbu %[temp0], " #A "(%[a]) \n\t" \ + "lbu %[temp1], " #B "(%[a]) \n\t" \ + "lbu %[temp2], " #C "(%[a]) \n\t" \ + "lbu %[temp3], " #D "(%[a]) \n\t" \ + "lbu %[temp4], " #A "(%[b]) \n\t" \ + "lbu %[temp5], " #B "(%[b]) \n\t" \ + "lbu %[temp6], " #C "(%[b]) \n\t" \ + "lbu %[temp7], " #D "(%[b]) \n\t" \ "addu %[temp8], %[temp0], %[temp2] \n\t" \ "subu %[temp0], %[temp0], %[temp2] \n\t" \ "addu %[temp2], %[temp1], %[temp3] \n\t" \ @@ -273,14 +273,14 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], "subu %[temp3], %[temp3], %[temp6] \n\t" \ "addu %[temp6], %[temp4], %[temp5] \n\t" \ "subu %[temp4], %[temp4], %[temp5] \n\t" \ - "sw %[temp7], "#E"(%[tmp]) \n\t" \ - "sw %[temp2], "#H"(%[tmp]) \n\t" \ - "sw %[temp8], "#F"(%[tmp]) \n\t" \ - "sw %[temp0], "#G"(%[tmp]) \n\t" \ - "sw %[temp1], "#E1"(%[tmp]) \n\t" \ - "sw %[temp3], "#H1"(%[tmp]) \n\t" \ - "sw %[temp6], "#F1"(%[tmp]) \n\t" \ - "sw %[temp4], "#G1"(%[tmp]) \n\t" + "sw %[temp7], " #E "(%[tmp]) \n\t" \ + "sw %[temp2], " #H "(%[tmp]) \n\t" \ + "sw %[temp8], " #F "(%[tmp]) \n\t" \ + "sw %[temp0], " #G "(%[tmp]) \n\t" \ + "sw %[temp1], " #E1 "(%[tmp]) \n\t" \ + "sw %[temp3], " #H1 "(%[tmp]) \n\t" \ + "sw %[temp6], " #F1 "(%[tmp]) \n\t" \ + "sw %[temp4], " #G1 "(%[tmp]) \n\t" // macro for one vertical pass in Disto4x4 (TTransform) // two calls of function TTransform are merged into single one @@ -295,10 +295,10 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], // A1..D1 - offsets in bytes to load second results from tmp buffer // E..H - offsets in bytes to load from w buffer #define VERTICAL_PASS(A, B, C, D, A1, B1, C1, D1, E, F, G, H) \ - "lw %[temp0], "#A1"(%[tmp]) \n\t" \ - "lw %[temp1], "#C1"(%[tmp]) \n\t" \ - "lw %[temp2], "#B1"(%[tmp]) \n\t" \ - "lw %[temp3], "#D1"(%[tmp]) \n\t" \ + "lw %[temp0], " #A1 "(%[tmp]) \n\t" \ + "lw %[temp1], " #C1 "(%[tmp]) \n\t" \ + "lw %[temp2], " #B1 "(%[tmp]) \n\t" \ + "lw %[temp3], " #D1 "(%[tmp]) \n\t" \ "addu %[temp8], %[temp0], %[temp1] \n\t" \ "subu %[temp0], %[temp0], %[temp1] \n\t" \ "addu %[temp1], %[temp2], %[temp3] \n\t" \ @@ -319,18 +319,18 @@ static int QuantizeBlock(int16_t in[16], int16_t out[16], "subu %[temp1], %[temp1], %[temp5] \n\t" \ "subu %[temp0], %[temp0], %[temp6] \n\t" \ "subu %[temp8], %[temp8], %[temp7] \n\t" \ - "lhu %[temp4], "#E"(%[w]) \n\t" \ - "lhu %[temp5], "#F"(%[w]) \n\t" \ - "lhu %[temp6], "#G"(%[w]) \n\t" \ - "lhu %[temp7], "#H"(%[w]) \n\t" \ + "lhu %[temp4], " #E "(%[w]) \n\t" \ + "lhu %[temp5], " #F "(%[w]) \n\t" \ + "lhu %[temp6], " #G "(%[w]) \n\t" \ + "lhu %[temp7], " #H "(%[w]) \n\t" \ "madd %[temp4], %[temp3] \n\t" \ "madd %[temp5], %[temp1] \n\t" \ "madd %[temp6], %[temp0] \n\t" \ "madd %[temp7], %[temp8] \n\t" \ - "lw %[temp0], "#A"(%[tmp]) \n\t" \ - "lw %[temp1], "#C"(%[tmp]) \n\t" \ - "lw %[temp2], "#B"(%[tmp]) \n\t" \ - "lw %[temp3], "#D"(%[tmp]) \n\t" \ + "lw %[temp0], " #A "(%[tmp]) \n\t" \ + "lw %[temp1], " #C "(%[tmp]) \n\t" \ + "lw %[temp2], " #B "(%[tmp]) \n\t" \ + "lw %[temp3], " #D "(%[tmp]) \n\t" \ "addu %[temp8], %[temp0], %[temp1] \n\t" \ "subu %[temp0], %[temp0], %[temp1] \n\t" \ "addu %[temp1], %[temp2], %[temp3] \n\t" \ @@ -407,71 +407,71 @@ static int Disto16x16(const uint8_t* const a, const uint8_t* const b, // temp0..temp15 holds tmp[0]..tmp[15] // A..D - offsets in bytes to load from src and ref buffers // TEMP0..TEMP3 - registers for corresponding tmp elements -#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \ - "lw %["#TEMP1"], 0(%[args]) \n\t" \ - "lw %["#TEMP2"], 4(%[args]) \n\t" \ - "lbu %[temp16], "#A"(%["#TEMP1"]) \n\t" \ - "lbu %[temp17], "#A"(%["#TEMP2"]) \n\t" \ - "lbu %[temp18], "#B"(%["#TEMP1"]) \n\t" \ - "lbu %[temp19], "#B"(%["#TEMP2"]) \n\t" \ - "subu %[temp20], %[temp16], %[temp17] \n\t" \ - "lbu %[temp16], "#C"(%["#TEMP1"]) \n\t" \ - "lbu %[temp17], "#C"(%["#TEMP2"]) \n\t" \ - "subu %["#TEMP0"], %[temp18], %[temp19] \n\t" \ - "lbu %[temp18], "#D"(%["#TEMP1"]) \n\t" \ - "lbu %[temp19], "#D"(%["#TEMP2"]) \n\t" \ - "subu %["#TEMP1"], %[temp16], %[temp17] \n\t" \ - "subu %["#TEMP2"], %[temp18], %[temp19] \n\t" \ - "addu %["#TEMP3"], %[temp20], %["#TEMP2"] \n\t" \ - "subu %["#TEMP2"], %[temp20], %["#TEMP2"] \n\t" \ - "addu %[temp20], %["#TEMP0"], %["#TEMP1"] \n\t" \ - "subu %["#TEMP0"], %["#TEMP0"], %["#TEMP1"] \n\t" \ - "mul %[temp16], %["#TEMP2"], %[c5352] \n\t" \ - "mul %[temp17], %["#TEMP2"], %[c2217] \n\t" \ - "mul %[temp18], %["#TEMP0"], %[c5352] \n\t" \ - "mul %[temp19], %["#TEMP0"], %[c2217] \n\t" \ - "addu %["#TEMP1"], %["#TEMP3"], %[temp20] \n\t" \ - "subu %[temp20], %["#TEMP3"], %[temp20] \n\t" \ - "sll %["#TEMP0"], %["#TEMP1"], 3 \n\t" \ - "sll %["#TEMP2"], %[temp20], 3 \n\t" \ - "addiu %[temp16], %[temp16], 1812 \n\t" \ - "addiu %[temp17], %[temp17], 937 \n\t" \ - "addu %[temp16], %[temp16], %[temp19] \n\t" \ - "subu %[temp17], %[temp17], %[temp18] \n\t" \ - "sra %["#TEMP1"], %[temp16], 9 \n\t" \ - "sra %["#TEMP3"], %[temp17], 9 \n\t" +#define HORIZONTAL_PASS(A, B, C, D, TEMP0, TEMP1, TEMP2, TEMP3) \ + "lw %[" #TEMP1 "], 0(%[args]) \n\t" \ + "lw %[" #TEMP2 "], 4(%[args]) \n\t" \ + "lbu %[temp16], " #A "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp17], " #A "(%[" #TEMP2 "]) \n\t" \ + "lbu %[temp18], " #B "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp19], " #B "(%[" #TEMP2 "]) \n\t" \ + "subu %[temp20], %[temp16], %[temp17] \n\t" \ + "lbu %[temp16], " #C "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp17], " #C "(%[" #TEMP2 "]) \n\t" \ + "subu %[" #TEMP0 "], %[temp18], %[temp19] \n\t" \ + "lbu %[temp18], " #D "(%[" #TEMP1 "]) \n\t" \ + "lbu %[temp19], " #D "(%[" #TEMP2 "]) \n\t" \ + "subu %[" #TEMP1 "], %[temp16], %[temp17] \n\t" \ + "subu %[" #TEMP2 "], %[temp18], %[temp19] \n\t" \ + "addu %[" #TEMP3 "], %[temp20], %[" #TEMP2 "] \n\t" \ + "subu %[" #TEMP2 "], %[temp20], %[" #TEMP2 "] \n\t" \ + "addu %[temp20], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ + "subu %[" #TEMP0 "], %[" #TEMP0 "], %[" #TEMP1 "] \n\t" \ + "mul %[temp16], %[" #TEMP2 "], %[c5352] \n\t" \ + "mul %[temp17], %[" #TEMP2 "], %[c2217] \n\t" \ + "mul %[temp18], %[" #TEMP0 "], %[c5352] \n\t" \ + "mul %[temp19], %[" #TEMP0 "], %[c2217] \n\t" \ + "addu %[" #TEMP1 "], %[" #TEMP3 "], %[temp20] \n\t" \ + "subu %[temp20], %[" #TEMP3 "], %[temp20] \n\t" \ + "sll %[" #TEMP0 "], %[" #TEMP1 "], 3 \n\t" \ + "sll %[" #TEMP2 "], %[temp20], 3 \n\t" \ + "addiu %[temp16], %[temp16], 1812 \n\t" \ + "addiu %[temp17], %[temp17], 937 \n\t" \ + "addu %[temp16], %[temp16], %[temp19] \n\t" \ + "subu %[temp17], %[temp17], %[temp18] \n\t" \ + "sra %[" #TEMP1 "], %[temp16], 9 \n\t" \ + "sra %[" #TEMP3 "], %[temp17], 9 \n\t" // macro for one vertical pass in FTransform // temp0..temp15 holds tmp[0]..tmp[15] // A..D - offsets in bytes to store to out buffer // TEMP0, TEMP4, TEMP8 and TEMP12 - registers for corresponding tmp elements -#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ - "addu %[temp16], %["#TEMP0"], %["#TEMP12"] \n\t" \ - "subu %[temp19], %["#TEMP0"], %["#TEMP12"] \n\t" \ - "addu %[temp17], %["#TEMP4"], %["#TEMP8"] \n\t" \ - "subu %[temp18], %["#TEMP4"], %["#TEMP8"] \n\t" \ - "mul %["#TEMP8"], %[temp19], %[c2217] \n\t" \ - "mul %["#TEMP12"], %[temp18], %[c2217] \n\t" \ - "mul %["#TEMP4"], %[temp19], %[c5352] \n\t" \ - "mul %[temp18], %[temp18], %[c5352] \n\t" \ - "addiu %[temp16], %[temp16], 7 \n\t" \ - "addu %["#TEMP0"], %[temp16], %[temp17] \n\t" \ - "sra %["#TEMP0"], %["#TEMP0"], 4 \n\t" \ - "addu %["#TEMP12"], %["#TEMP12"], %["#TEMP4"] \n\t" \ - "subu %["#TEMP4"], %[temp16], %[temp17] \n\t" \ - "sra %["#TEMP4"], %["#TEMP4"], 4 \n\t" \ - "addiu %["#TEMP8"], %["#TEMP8"], 30000 \n\t" \ - "addiu %["#TEMP12"], %["#TEMP12"], 12000 \n\t" \ - "addiu %["#TEMP8"], %["#TEMP8"], 21000 \n\t" \ - "subu %["#TEMP8"], %["#TEMP8"], %[temp18] \n\t" \ - "sra %["#TEMP12"], %["#TEMP12"], 16 \n\t" \ - "sra %["#TEMP8"], %["#TEMP8"], 16 \n\t" \ - "addiu %[temp16], %["#TEMP12"], 1 \n\t" \ - "movn %["#TEMP12"], %[temp16], %[temp19] \n\t" \ - "sh %["#TEMP0"], "#A"(%[temp20]) \n\t" \ - "sh %["#TEMP4"], "#C"(%[temp20]) \n\t" \ - "sh %["#TEMP8"], "#D"(%[temp20]) \n\t" \ - "sh %["#TEMP12"], "#B"(%[temp20]) \n\t" +#define VERTICAL_PASS(A, B, C, D, TEMP0, TEMP4, TEMP8, TEMP12) \ + "addu %[temp16], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \ + "subu %[temp19], %[" #TEMP0 "], %[" #TEMP12 "] \n\t" \ + "addu %[temp17], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \ + "subu %[temp18], %[" #TEMP4 "], %[" #TEMP8 "] \n\t" \ + "mul %[" #TEMP8 "], %[temp19], %[c2217] \n\t" \ + "mul %[" #TEMP12 "], %[temp18], %[c2217] \n\t" \ + "mul %[" #TEMP4 "], %[temp19], %[c5352] \n\t" \ + "mul %[temp18], %[temp18], %[c5352] \n\t" \ + "addiu %[temp16], %[temp16], 7 \n\t" \ + "addu %[" #TEMP0 "], %[temp16], %[temp17] \n\t" \ + "sra %[" #TEMP0 "], %[" #TEMP0 "], 4 \n\t" \ + "addu %[" #TEMP12 "], %[" #TEMP12 "], %[" #TEMP4 "] \n\t" \ + "subu %[" #TEMP4 "], %[temp16], %[temp17] \n\t" \ + "sra %[" #TEMP4 "], %[" #TEMP4 "], 4 \n\t" \ + "addiu %[" #TEMP8 "], %[" #TEMP8 "], 30000 \n\t" \ + "addiu %[" #TEMP12 "], %[" #TEMP12 "], 12000 \n\t" \ + "addiu %[" #TEMP8 "], %[" #TEMP8 "], 21000 \n\t" \ + "subu %[" #TEMP8 "], %[" #TEMP8 "], %[temp18] \n\t" \ + "sra %[" #TEMP12 "], %[" #TEMP12 "], 16 \n\t" \ + "sra %[" #TEMP8 "], %[" #TEMP8 "], 16 \n\t" \ + "addiu %[temp16], %[" #TEMP12 "], 1 \n\t" \ + "movn %[" #TEMP12 "], %[temp16], %[temp19] \n\t" \ + "sh %[" #TEMP0 "], " #A "(%[temp20]) \n\t" \ + "sh %[" #TEMP4 "], " #C "(%[temp20]) \n\t" \ + "sh %[" #TEMP8 "], " #D "(%[temp20]) \n\t" \ + "sh %[" #TEMP12 "], " #B "(%[temp20]) \n\t" static void FTransform(const uint8_t* src, const uint8_t* ref, int16_t* out) { int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; @@ -622,14 +622,14 @@ int VP8GetResidualCostMIPS32(int ctx0, const VP8Residual* const res) { } #define GET_SSE_INNER(A, B, C, D) \ - "lbu %[temp0], "#A"(%[a]) \n\t" \ - "lbu %[temp1], "#A"(%[b]) \n\t" \ - "lbu %[temp2], "#B"(%[a]) \n\t" \ - "lbu %[temp3], "#B"(%[b]) \n\t" \ - "lbu %[temp4], "#C"(%[a]) \n\t" \ - "lbu %[temp5], "#C"(%[b]) \n\t" \ - "lbu %[temp6], "#D"(%[a]) \n\t" \ - "lbu %[temp7], "#D"(%[b]) \n\t" \ + "lbu %[temp0], " #A "(%[a]) \n\t" \ + "lbu %[temp1], " #A "(%[b]) \n\t" \ + "lbu %[temp2], " #B "(%[a]) \n\t" \ + "lbu %[temp3], " #B "(%[b]) \n\t" \ + "lbu %[temp4], " #C "(%[a]) \n\t" \ + "lbu %[temp5], " #C "(%[b]) \n\t" \ + "lbu %[temp6], " #D "(%[a]) \n\t" \ + "lbu %[temp7], " #D "(%[b]) \n\t" \ "subu %[temp0], %[temp0], %[temp1] \n\t" \ "subu %[temp2], %[temp2], %[temp3] \n\t" \ "subu %[temp4], %[temp4], %[temp5] \n\t" \ diff --git a/src/3rdparty/libwebp/src/dsp/lossless_mips32.c b/src/3rdparty/libwebp/src/dsp/lossless_mips32.c index 1308580..5562c41 100644 --- a/src/3rdparty/libwebp/src/dsp/lossless_mips32.c +++ b/src/3rdparty/libwebp/src/dsp/lossless_mips32.c @@ -285,28 +285,28 @@ static VP8LStreaks HuffmanCostCombinedCount(const uint32_t* X, // literal_ and successive histograms could be unaligned // so we must use ulw and usw #define ADD_TO_OUT(A, B, C, D, E, P0, P1, P2) \ - "ulw %[temp0], "#A"(%["#P0"]) \n\t" \ - "ulw %[temp1], "#B"(%["#P0"]) \n\t" \ - "ulw %[temp2], "#C"(%["#P0"]) \n\t" \ - "ulw %[temp3], "#D"(%["#P0"]) \n\t" \ - "ulw %[temp4], "#A"(%["#P1"]) \n\t" \ - "ulw %[temp5], "#B"(%["#P1"]) \n\t" \ - "ulw %[temp6], "#C"(%["#P1"]) \n\t" \ - "ulw %[temp7], "#D"(%["#P1"]) \n\t" \ + "ulw %[temp0], " #A "(%[" #P0 "]) \n\t" \ + "ulw %[temp1], " #B "(%[" #P0 "]) \n\t" \ + "ulw %[temp2], " #C "(%[" #P0 "]) \n\t" \ + "ulw %[temp3], " #D "(%[" #P0 "]) \n\t" \ + "ulw %[temp4], " #A "(%[" #P1 "]) \n\t" \ + "ulw %[temp5], " #B "(%[" #P1 "]) \n\t" \ + "ulw %[temp6], " #C "(%[" #P1 "]) \n\t" \ + "ulw %[temp7], " #D "(%[" #P1 "]) \n\t" \ "addu %[temp4], %[temp4], %[temp0] \n\t" \ "addu %[temp5], %[temp5], %[temp1] \n\t" \ "addu %[temp6], %[temp6], %[temp2] \n\t" \ "addu %[temp7], %[temp7], %[temp3] \n\t" \ - "addiu %["#P0"], %["#P0"], 16 \n\t" \ - ".if "#E" == 1 \n\t" \ - "addiu %["#P1"], %["#P1"], 16 \n\t" \ + "addiu %[" #P0 "], %[" #P0 "], 16 \n\t" \ + ".if " #E " == 1 \n\t" \ + "addiu %[" #P1 "], %[" #P1 "], 16 \n\t" \ ".endif \n\t" \ - "usw %[temp4], "#A"(%["#P2"]) \n\t" \ - "usw %[temp5], "#B"(%["#P2"]) \n\t" \ - "usw %[temp6], "#C"(%["#P2"]) \n\t" \ - "usw %[temp7], "#D"(%["#P2"]) \n\t" \ - "addiu %["#P2"], %["#P2"], 16 \n\t" \ - "bne %["#P0"], %[LoopEnd], 1b \n\t" \ + "usw %[temp4], " #A "(%[" #P2 "]) \n\t" \ + "usw %[temp5], " #B "(%[" #P2 "]) \n\t" \ + "usw %[temp6], " #C "(%[" #P2 "]) \n\t" \ + "usw %[temp7], " #D "(%[" #P2 "]) \n\t" \ + "addiu %[" #P2 "], %[" #P2 "], 16 \n\t" \ + "bne %[" #P0 "], %[LoopEnd], 1b \n\t" \ ".set pop \n\t" \ #define ASM_END_COMMON_0 \ diff --git a/src/3rdparty/libwebp/src/enc/histogram.c b/src/3rdparty/libwebp/src/enc/histogram.c index 7c6abb4..a2266b4 100644 --- a/src/3rdparty/libwebp/src/enc/histogram.c +++ b/src/3rdparty/libwebp/src/enc/histogram.c @@ -20,6 +20,9 @@ #include "../dsp/lossless.h" #include "../utils/utils.h" +#define ALIGN_CST 15 +#define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST) + #define MAX_COST 1.e38 // Number of partitions for the three dominant (literal, red and blue) symbol @@ -101,9 +104,9 @@ VP8LHistogram* VP8LAllocateHistogram(int cache_bits) { VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { int i; VP8LHistogramSet* set; - const size_t total_size = sizeof(*set) - + sizeof(*set->histograms) * size - + (size_t)VP8LGetHistogramSize(cache_bits) * size; + const int histo_size = VP8LGetHistogramSize(cache_bits); + const size_t total_size = + sizeof(*set) + size * (sizeof(*set->histograms) + histo_size + ALIGN_CST); uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory)); if (memory == NULL) return NULL; @@ -114,12 +117,12 @@ VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) { set->max_size = size; set->size = size; for (i = 0; i < size; ++i) { + memory = (uint8_t*)DO_ALIGN(memory); set->histograms[i] = (VP8LHistogram*)memory; // literal_ won't necessary be aligned. set->histograms[i]->literal_ = (uint32_t*)(memory + sizeof(VP8LHistogram)); VP8LHistogramInit(set->histograms[i], cache_bits); - // There's no padding/alignment between successive histograms. - memory += VP8LGetHistogramSize(cache_bits); + memory += histo_size; } return set; } diff --git a/src/3rdparty/libwebp/src/enc/picture_rescale.c b/src/3rdparty/libwebp/src/enc/picture_rescale.c index de52848..9e45551 100644 --- a/src/3rdparty/libwebp/src/enc/picture_rescale.c +++ b/src/3rdparty/libwebp/src/enc/picture_rescale.c @@ -175,17 +175,13 @@ static void RescalePlane(const uint8_t* src, int src_width, int src_height, int src_stride, uint8_t* dst, int dst_width, int dst_height, int dst_stride, - int32_t* const work, + rescaler_t* const work, int num_channels) { WebPRescaler rescaler; int y = 0; WebPRescalerInit(&rescaler, src_width, src_height, dst, dst_width, dst_height, dst_stride, - num_channels, - src_width, dst_width, - src_height, dst_height, - work); - memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); + num_channels, work); while (y < src_height) { y += WebPRescalerImport(&rescaler, src_height - y, src + y * src_stride, src_stride); @@ -209,7 +205,7 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) { int WebPPictureRescale(WebPPicture* pic, int width, int height) { WebPPicture tmp; int prev_width, prev_height; - int32_t* work; + rescaler_t* work; if (pic == NULL) return 0; prev_width = pic->width; @@ -231,7 +227,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { if (!WebPPictureAlloc(&tmp)) return 0; if (!pic->use_argb) { - work = (int32_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); + work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); return 0; @@ -259,7 +255,7 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { tmp.v, HALVE(width), HALVE(height), tmp.uv_stride, work, 1); } else { - work = (int32_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); + work = (rescaler_t*)WebPSafeMalloc(2ULL * width * 4, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); return 0; diff --git a/src/3rdparty/libwebp/src/enc/vp8enci.h b/src/3rdparty/libwebp/src/enc/vp8enci.h index 74c8f70..20f58c6 100644 --- a/src/3rdparty/libwebp/src/enc/vp8enci.h +++ b/src/3rdparty/libwebp/src/enc/vp8enci.h @@ -30,7 +30,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 0 #define ENC_MIN_VERSION 4 -#define ENC_REV_VERSION 3 +#define ENC_REV_VERSION 4 // intra prediction modes enum { B_DC_PRED = 0, // 4x4 modes diff --git a/src/3rdparty/libwebp/src/utils/endian_inl.h b/src/3rdparty/libwebp/src/utils/endian_inl.h index cd56c37..e11260f 100644 --- a/src/3rdparty/libwebp/src/utils/endian_inl.h +++ b/src/3rdparty/libwebp/src/utils/endian_inl.h @@ -35,14 +35,14 @@ #endif #if !defined(HAVE_CONFIG_H) -// clang-3.3 and gcc-4.3 have builtin functions for swap32/swap64 -#if LOCAL_GCC_PREREQ(4,3) || LOCAL_CLANG_PREREQ(3,3) +#if LOCAL_GCC_PREREQ(4,8) || __has_builtin(__builtin_bswap16) +#define HAVE_BUILTIN_BSWAP16 +#endif +#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap32) #define HAVE_BUILTIN_BSWAP32 -#define HAVE_BUILTIN_BSWAP64 #endif -// clang-3.3 and gcc-4.8 have a builtin function for swap16 -#if LOCAL_GCC_PREREQ(4,8) || LOCAL_CLANG_PREREQ(3,3) -#define HAVE_BUILTIN_BSWAP16 +#if LOCAL_GCC_PREREQ(4,3) || __has_builtin(__builtin_bswap64) +#define HAVE_BUILTIN_BSWAP64 #endif #endif // !HAVE_CONFIG_H diff --git a/src/3rdparty/libwebp/src/utils/rescaler.c b/src/3rdparty/libwebp/src/utils/rescaler.c index fad9c6b..3a43229 100644 --- a/src/3rdparty/libwebp/src/utils/rescaler.c +++ b/src/3rdparty/libwebp/src/utils/rescaler.c @@ -13,77 +13,192 @@ #include <assert.h> #include <stdlib.h> +#include <string.h> #include "./rescaler.h" #include "../dsp/dsp.h" //------------------------------------------------------------------------------ // Implementations of critical functions ImportRow / ExportRow -void (*WebPRescalerImportRow)(WebPRescaler* const wrk, - const uint8_t* const src, int channel) = NULL; -void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out) = NULL; +// Import a row of data and save its contribution in the rescaler. +// 'channel' denotes the channel number to be imported. 'Expand' corresponds to +// the wrk->x_expand case. Otherwise, 'Shrink' is to be used. +typedef void (*WebPRescalerImportRowFunc)(WebPRescaler* const wrk, + const uint8_t* src); +static WebPRescalerImportRowFunc WebPRescalerImportRowExpand; +static WebPRescalerImportRowFunc WebPRescalerImportRowShrink; -#define RFIX 30 -#define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX) +// Export one row (starting at x_out position) from rescaler. +// 'Expand' corresponds to the wrk->y_expand case. +// Otherwise 'Shrink' is to be used +typedef void (*WebPRescalerExportRowFunc)(WebPRescaler* const wrk); +static WebPRescalerExportRowFunc WebPRescalerExportRowExpand; +static WebPRescalerExportRowFunc WebPRescalerExportRowShrink; -static void ImportRowC(WebPRescaler* const wrk, - const uint8_t* const src, int channel) { +#define WEBP_RESCALER_RFIX 32 // fixed-point precision for multiplies +#define WEBP_RESCALER_ONE (1ull << WEBP_RESCALER_RFIX) +#define WEBP_RESCALER_FRAC(x, y) \ + ((uint32_t)(((uint64_t)(x) << WEBP_RESCALER_RFIX) / (y))) +#define ROUNDER (WEBP_RESCALER_ONE >> 1) +#define MULT_FIX(x, y) (((uint64_t)(x) * (y) + ROUNDER) >> WEBP_RESCALER_RFIX) + +static void ImportRowExpandC(WebPRescaler* const wrk, const uint8_t* src) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; - int x_in = channel; - int x_out; - int accum = 0; - if (!wrk->x_expand) { - int sum = 0; - for (x_out = channel; x_out < x_out_max; x_out += x_stride) { + int channel; + assert(!WebPRescalerInputDone(wrk)); + assert(wrk->x_expand); + for (channel = 0; channel < x_stride; ++channel) { + int x_in = channel; + int x_out = channel; + // simple bilinear interpolation + int accum = wrk->x_add; + int left = src[x_in]; + int right = (wrk->src_width > 1) ? src[x_in + x_stride] : left; + x_in += x_stride; + while (1) { + wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; + x_out += x_stride; + if (x_out >= x_out_max) break; + accum -= wrk->x_sub; + if (accum < 0) { + left = right; + x_in += x_stride; + assert(x_in < wrk->src_width * x_stride); + right = src[x_in]; + accum += wrk->x_add; + } + } + assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0); + } +} + +static void ImportRowShrinkC(WebPRescaler* const wrk, const uint8_t* src) { + const int x_stride = wrk->num_channels; + const int x_out_max = wrk->dst_width * wrk->num_channels; + int channel; + assert(!WebPRescalerInputDone(wrk)); + assert(!wrk->x_expand); + for (channel = 0; channel < x_stride; ++channel) { + int x_in = channel; + int x_out = channel; + uint32_t sum = 0; + int accum = 0; + while (x_out < x_out_max) { + uint32_t base = 0; accum += wrk->x_add; - for (; accum > 0; accum -= wrk->x_sub) { - sum += src[x_in]; + while (accum > 0) { + accum -= wrk->x_sub; + assert(x_in < wrk->src_width * x_stride); + base = src[x_in]; + sum += base; x_in += x_stride; } { // Emit next horizontal pixel. - const int32_t base = src[x_in]; - const int32_t frac = base * (-accum); - x_in += x_stride; - wrk->frow[x_out] = (sum + base) * wrk->x_sub - frac; + const rescaler_t frac = base * (-accum); + wrk->frow[x_out] = sum * wrk->x_sub - frac; // fresh fractional start for next pixel sum = (int)MULT_FIX(frac, wrk->fx_scale); } + x_out += x_stride; } - } else { // simple bilinear interpolation - int left = src[channel], right = src[channel]; - for (x_out = channel; x_out < x_out_max; x_out += x_stride) { - if (accum < 0) { - left = right; - x_in += x_stride; - right = src[x_in]; - accum += wrk->x_add; - } - wrk->frow[x_out] = right * wrk->x_add + (left - right) * accum; - accum -= wrk->x_sub; - } + assert(accum == 0); } - // Accumulate the contribution of the new row. - for (x_out = channel; x_out < x_out_max; x_out += x_stride) { - wrk->irow[x_out] += wrk->frow[x_out]; +} + +//------------------------------------------------------------------------------ +// Row export + +static void ExportRowExpandC(WebPRescaler* const wrk) { + int x_out; + uint8_t* const dst = wrk->dst; + rescaler_t* const irow = wrk->irow; + const int x_out_max = wrk->dst_width * wrk->num_channels; + const rescaler_t* const frow = wrk->frow; + assert(!WebPRescalerOutputDone(wrk)); + assert(wrk->y_accum <= 0); + assert(wrk->y_expand); + assert(wrk->y_sub != 0); + if (wrk->y_accum == 0) { + for (x_out = 0; x_out < x_out_max; ++x_out) { + const uint32_t J = frow[x_out]; + const int v = (int)MULT_FIX(J, wrk->fy_scale); + assert(v >= 0 && v <= 255); + dst[x_out] = v; + } + } else { + const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); + const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); + for (x_out = 0; x_out < x_out_max; ++x_out) { + const uint64_t I = (uint64_t)A * frow[x_out] + + (uint64_t)B * irow[x_out]; + const uint32_t J = (uint32_t)((I + ROUNDER) >> WEBP_RESCALER_RFIX); + const int v = (int)MULT_FIX(J, wrk->fy_scale); + assert(v >= 0 && v <= 255); + dst[x_out] = v; + } } } -static void ExportRowC(WebPRescaler* const wrk, int x_out) { - if (wrk->y_accum <= 0) { - uint8_t* const dst = wrk->dst; - int32_t* const irow = wrk->irow; - const int32_t* const frow = wrk->frow; - const int yscale = wrk->fy_scale * (-wrk->y_accum); - const int x_out_max = wrk->dst_width * wrk->num_channels; - for (; x_out < x_out_max; ++x_out) { - const int frac = (int)MULT_FIX(frow[x_out], yscale); +static void ExportRowShrinkC(WebPRescaler* const wrk) { + int x_out; + uint8_t* const dst = wrk->dst; + rescaler_t* const irow = wrk->irow; + const int x_out_max = wrk->dst_width * wrk->num_channels; + const rescaler_t* const frow = wrk->frow; + const uint32_t yscale = wrk->fy_scale * (-wrk->y_accum); + assert(!WebPRescalerOutputDone(wrk)); + assert(wrk->y_accum <= 0); + assert(!wrk->y_expand); + if (yscale) { + for (x_out = 0; x_out < x_out_max; ++x_out) { + const uint32_t frac = (uint32_t)MULT_FIX(frow[x_out], yscale); const int v = (int)MULT_FIX(irow[x_out] - frac, wrk->fxy_scale); - dst[x_out] = (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; + assert(v >= 0 && v <= 255); + dst[x_out] = v; irow[x_out] = frac; // new fractional start } + } else { + for (x_out = 0; x_out < x_out_max; ++x_out) { + const int v = (int)MULT_FIX(irow[x_out], wrk->fxy_scale); + assert(v >= 0 && v <= 255); + dst[x_out] = v; + irow[x_out] = 0; + } + } +} + +//------------------------------------------------------------------------------ +// Main entry calls + +void WebPRescalerImportRow(WebPRescaler* const wrk, const uint8_t* src) { + assert(!WebPRescalerInputDone(wrk)); + if (!wrk->x_expand) { + WebPRescalerImportRowShrink(wrk, src); + } else { + WebPRescalerImportRowExpand(wrk, src); + } +} + +void WebPRescalerExportRow(WebPRescaler* const wrk) { + if (wrk->y_accum <= 0) { + assert(!WebPRescalerOutputDone(wrk)); + if (wrk->y_expand) { + WebPRescalerExportRowExpand(wrk); + } else if (wrk->fxy_scale) { + WebPRescalerExportRowShrink(wrk); + } else { // very special case for src = dst = 1x1 + int i; + assert(wrk->src_width == 1 && wrk->dst_width <= 2); + assert(wrk->src_height == 1 && wrk->dst_height == 1); + for (i = 0; i < wrk->num_channels * wrk->dst_width; ++i) { + wrk->dst[i] = wrk->irow[i]; + wrk->irow[i] = 0; + } + } wrk->y_accum += wrk->y_add; wrk->dst += wrk->dst_stride; + ++wrk->dst_y; } } @@ -92,23 +207,25 @@ static void ExportRowC(WebPRescaler* const wrk, int x_out) { #if defined(WEBP_USE_MIPS32) -static void ImportRowMIPS(WebPRescaler* const wrk, - const uint8_t* const src, int channel) { +static void ImportRowShrinkMIPS(WebPRescaler* const wrk, const uint8_t* src) { const int x_stride = wrk->num_channels; const int x_out_max = wrk->dst_width * wrk->num_channels; const int fx_scale = wrk->fx_scale; const int x_add = wrk->x_add; const int x_sub = wrk->x_sub; - int* frow = wrk->frow + channel; - int* irow = wrk->irow + channel; - const uint8_t* src1 = src + channel; - int temp1, temp2, temp3; - int base, frac, sum; - int accum, accum1; const int x_stride1 = x_stride << 2; - int loop_c = x_out_max - channel; + int channel; + assert(!wrk->x_expand); + assert(!WebPRescalerInputDone(wrk)); + + for (channel = 0; channel < x_stride; ++channel) { + const uint8_t* src1 = src + channel; + rescaler_t* frow = wrk->frow + channel; + int temp1, temp2, temp3; + int base, frac, sum; + int accum, accum1; + int loop_c = x_out_max - channel; - if (!wrk->x_expand) { __asm__ volatile ( "li %[temp1], 0x8000 \n\t" "li %[temp2], 0x10000 \n\t" @@ -116,179 +233,295 @@ static void ImportRowMIPS(WebPRescaler* const wrk, "li %[accum], 0 \n\t" "1: \n\t" "addu %[accum], %[accum], %[x_add] \n\t" + "li %[base], 0 \n\t" "blez %[accum], 3f \n\t" "2: \n\t" - "lbu %[temp3], 0(%[src1]) \n\t" + "lbu %[base], 0(%[src1]) \n\t" "subu %[accum], %[accum], %[x_sub] \n\t" "addu %[src1], %[src1], %[x_stride] \n\t" - "addu %[sum], %[sum], %[temp3] \n\t" + "addu %[sum], %[sum], %[base] \n\t" "bgtz %[accum], 2b \n\t" "3: \n\t" - "lbu %[base], 0(%[src1]) \n\t" - "addu %[src1], %[src1], %[x_stride] \n\t" "negu %[accum1], %[accum] \n\t" "mul %[frac], %[base], %[accum1] \n\t" - "addu %[temp3], %[sum], %[base] \n\t" - "mul %[temp3], %[temp3], %[x_sub] \n\t" - "lw %[base], 0(%[irow]) \n\t" + "mul %[temp3], %[sum], %[x_sub] \n\t" "subu %[loop_c], %[loop_c], %[x_stride] \n\t" - "sll %[accum1], %[frac], 2 \n\t" "mult %[temp1], %[temp2] \n\t" - "madd %[accum1], %[fx_scale] \n\t" + "maddu %[frac], %[fx_scale] \n\t" "mfhi %[sum] \n\t" "subu %[temp3], %[temp3], %[frac] \n\t" "sw %[temp3], 0(%[frow]) \n\t" - "add %[base], %[base], %[temp3] \n\t" - "sw %[base], 0(%[irow]) \n\t" - "addu %[irow], %[irow], %[x_stride1] \n\t" "addu %[frow], %[frow], %[x_stride1] \n\t" "bgtz %[loop_c], 1b \n\t" + : [accum]"=&r"(accum), [src1]"+r"(src1), [temp3]"=&r"(temp3), + [sum]"=&r"(sum), [base]"=&r"(base), [frac]"=&r"(frac), + [frow]"+r"(frow), [accum1]"=&r"(accum1), + [temp2]"=&r"(temp2), [temp1]"=&r"(temp1) + : [x_stride]"r"(x_stride), [fx_scale]"r"(fx_scale), + [x_sub]"r"(x_sub), [x_add]"r"(x_add), + [loop_c]"r"(loop_c), [x_stride1]"r"(x_stride1) + : "memory", "hi", "lo" + ); + assert(accum == 0); + } +} + +static void ImportRowExpandMIPS(WebPRescaler* const wrk, const uint8_t* src) { + const int x_stride = wrk->num_channels; + const int x_out_max = wrk->dst_width * wrk->num_channels; + const int x_add = wrk->x_add; + const int x_sub = wrk->x_sub; + const int src_width = wrk->src_width; + const int x_stride1 = x_stride << 2; + int channel; + assert(wrk->x_expand); + assert(!WebPRescalerInputDone(wrk)); - : [accum] "=&r" (accum), [src1] "+r" (src1), [temp3] "=&r" (temp3), - [sum] "=&r" (sum), [base] "=&r" (base), [frac] "=&r" (frac), - [frow] "+r" (frow), [irow] "+r" (irow), [accum1] "=&r" (accum1), - [temp2] "=&r" (temp2), [temp1] "=&r" (temp1) - : [x_stride] "r" (x_stride), [fx_scale] "r" (fx_scale), - [x_sub] "r" (x_sub), [x_add] "r" (x_add), - [loop_c] "r" (loop_c), [x_stride1] "r" (x_stride1) + for (channel = 0; channel < x_stride; ++channel) { + const uint8_t* src1 = src + channel; + rescaler_t* frow = wrk->frow + channel; + int temp1, temp2, temp3, temp4; + int frac; + int accum; + int x_out = channel; + + __asm__ volatile ( + "addiu %[temp3], %[src_width], -1 \n\t" + "lbu %[temp2], 0(%[src1]) \n\t" + "addu %[src1], %[src1], %[x_stride] \n\t" + "bgtz %[temp3], 0f \n\t" + "addiu %[temp1], %[temp2], 0 \n\t" + "b 3f \n\t" + "0: \n\t" + "lbu %[temp1], 0(%[src1]) \n\t" + "3: \n\t" + "addiu %[accum], %[x_add], 0 \n\t" + "1: \n\t" + "subu %[temp3], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[accum] \n\t" + "mul %[temp4], %[temp1], %[x_add] \n\t" + "addu %[temp3], %[temp4], %[temp3] \n\t" + "sw %[temp3], 0(%[frow]) \n\t" + "addu %[frow], %[frow], %[x_stride1] \n\t" + "addu %[x_out], %[x_out], %[x_stride] \n\t" + "subu %[temp3], %[x_out], %[x_out_max] \n\t" + "bgez %[temp3], 2f \n\t" + "subu %[accum], %[accum], %[x_sub] \n\t" + "bgez %[accum], 4f \n\t" + "addiu %[temp2], %[temp1], 0 \n\t" + "addu %[src1], %[src1], %[x_stride] \n\t" + "lbu %[temp1], 0(%[src1]) \n\t" + "addu %[accum], %[accum], %[x_add] \n\t" + "4: \n\t" + "b 1b \n\t" + "2: \n\t" + : [src1]"+r"(src1), [accum]"=&r"(accum), [temp1]"=&r"(temp1), + [temp2]"=&r"(temp2), [temp3]"=&r"(temp3), [temp4]"=&r"(temp4), + [x_out]"+r"(x_out), [frac]"=&r"(frac), [frow]"+r"(frow) + : [x_stride]"r"(x_stride), [x_add]"r"(x_add), [x_sub]"r"(x_sub), + [x_stride1]"r"(x_stride1), [src_width]"r"(src_width), + [x_out_max]"r"(x_out_max) + : "memory", "hi", "lo" + ); + assert(wrk->x_sub == 0 /* <- special case for src_width=1 */ || accum == 0); + } +} + +//------------------------------------------------------------------------------ +// Row export + +static void ExportRowExpandMIPS(WebPRescaler* const wrk) { + uint8_t* dst = wrk->dst; + rescaler_t* irow = wrk->irow; + const int x_out_max = wrk->dst_width * wrk->num_channels; + const rescaler_t* frow = wrk->frow; + int temp0, temp1, temp3, temp4, temp5, loop_end; + const int temp2 = (int)wrk->fy_scale; + const int temp6 = x_out_max << 2; + assert(!WebPRescalerOutputDone(wrk)); + assert(wrk->y_accum <= 0); + assert(wrk->y_expand); + assert(wrk->y_sub != 0); + if (wrk->y_accum == 0) { + __asm__ volatile ( + "li %[temp3], 0x10000 \n\t" + "li %[temp4], 0x8000 \n\t" + "addu %[loop_end], %[frow], %[temp6] \n\t" + "1: \n\t" + "lw %[temp0], 0(%[frow]) \n\t" + "addiu %[dst], %[dst], 1 \n\t" + "addiu %[frow], %[frow], 4 \n\t" + "mult %[temp3], %[temp4] \n\t" + "maddu %[temp0], %[temp2] \n\t" + "mfhi %[temp5] \n\t" + "sb %[temp5], -1(%[dst]) \n\t" + "bne %[frow], %[loop_end], 1b \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), + [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), + [dst]"+r"(dst), [loop_end]"=&r"(loop_end) + : [temp2]"r"(temp2), [temp6]"r"(temp6) : "memory", "hi", "lo" ); } else { + const uint32_t B = WEBP_RESCALER_FRAC(-wrk->y_accum, wrk->y_sub); + const uint32_t A = (uint32_t)(WEBP_RESCALER_ONE - B); __asm__ volatile ( - "lbu %[temp1], 0(%[src1]) \n\t" - "move %[temp2], %[temp1] \n\t" - "li %[accum], 0 \n\t" - "1: \n\t" - "bgez %[accum], 2f \n\t" - "move %[temp2], %[temp1] \n\t" - "addu %[src1], %[x_stride] \n\t" - "lbu %[temp1], 0(%[src1]) \n\t" - "addu %[accum], %[x_add] \n\t" - "2: \n\t" - "subu %[temp3], %[temp2], %[temp1] \n\t" - "mul %[temp3], %[temp3], %[accum] \n\t" - "mul %[base], %[temp1], %[x_add] \n\t" - "subu %[accum], %[accum], %[x_sub] \n\t" - "lw %[frac], 0(%[irow]) \n\t" - "subu %[loop_c], %[loop_c], %[x_stride] \n\t" - "addu %[temp3], %[base], %[temp3] \n\t" - "sw %[temp3], 0(%[frow]) \n\t" - "addu %[frow], %[x_stride1] \n\t" - "addu %[frac], %[temp3] \n\t" - "sw %[frac], 0(%[irow]) \n\t" - "addu %[irow], %[x_stride1] \n\t" - "bgtz %[loop_c], 1b \n\t" - - : [src1] "+r" (src1), [accum] "=&r" (accum), [temp1] "=&r" (temp1), - [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), [base] "=&r" (base), - [frac] "=&r" (frac), [frow] "+r" (frow), [irow] "+r" (irow) - : [x_stride] "r" (x_stride), [x_add] "r" (x_add), [x_sub] "r" (x_sub), - [x_stride1] "r" (x_stride1), [loop_c] "r" (loop_c) + "li %[temp3], 0x10000 \n\t" + "li %[temp4], 0x8000 \n\t" + "addu %[loop_end], %[frow], %[temp6] \n\t" + "1: \n\t" + "lw %[temp0], 0(%[frow]) \n\t" + "lw %[temp1], 0(%[irow]) \n\t" + "addiu %[dst], %[dst], 1 \n\t" + "mult %[temp3], %[temp4] \n\t" + "maddu %[A], %[temp0] \n\t" + "maddu %[B], %[temp1] \n\t" + "addiu %[frow], %[frow], 4 \n\t" + "addiu %[irow], %[irow], 4 \n\t" + "mfhi %[temp5] \n\t" + "mult %[temp3], %[temp4] \n\t" + "maddu %[temp5], %[temp2] \n\t" + "mfhi %[temp5] \n\t" + "sb %[temp5], -1(%[dst]) \n\t" + "bne %[frow], %[loop_end], 1b \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), + [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), + [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end) + : [temp2]"r"(temp2), [temp6]"r"(temp6), [A]"r"(A), [B]"r"(B) : "memory", "hi", "lo" ); } } -static void ExportRowMIPS(WebPRescaler* const wrk, int x_out) { - if (wrk->y_accum <= 0) { - uint8_t* const dst = wrk->dst; - int32_t* const irow = wrk->irow; - const int32_t* const frow = wrk->frow; - const int yscale = wrk->fy_scale * (-wrk->y_accum); - const int x_out_max = wrk->dst_width * wrk->num_channels; - // if wrk->fxy_scale can fit into 32 bits use optimized code, - // otherwise use C code - if ((wrk->fxy_scale >> 32) == 0) { - int temp0, temp1, temp3, temp4, temp5, temp6, temp7, loop_end; - const int temp2 = (int)(wrk->fxy_scale); - const int temp8 = x_out_max << 2; - uint8_t* dst_t = (uint8_t*)dst; - int32_t* irow_t = (int32_t*)irow; - const int32_t* frow_t = (const int32_t*)frow; - - __asm__ volatile( - "addiu %[temp6], $zero, -256 \n\t" - "addiu %[temp7], $zero, 255 \n\t" - "li %[temp3], 0x10000 \n\t" - "li %[temp4], 0x8000 \n\t" - "addu %[loop_end], %[frow_t], %[temp8] \n\t" - "1: \n\t" - "lw %[temp0], 0(%[frow_t]) \n\t" - "mult %[temp3], %[temp4] \n\t" - "addiu %[frow_t], %[frow_t], 4 \n\t" - "sll %[temp0], %[temp0], 2 \n\t" - "madd %[temp0], %[yscale] \n\t" - "mfhi %[temp1] \n\t" - "lw %[temp0], 0(%[irow_t]) \n\t" - "addiu %[dst_t], %[dst_t], 1 \n\t" - "addiu %[irow_t], %[irow_t], 4 \n\t" - "subu %[temp0], %[temp0], %[temp1] \n\t" - "mult %[temp3], %[temp4] \n\t" - "sll %[temp0], %[temp0], 2 \n\t" - "madd %[temp0], %[temp2] \n\t" - "mfhi %[temp5] \n\t" - "sw %[temp1], -4(%[irow_t]) \n\t" - "and %[temp0], %[temp5], %[temp6] \n\t" - "slti %[temp1], %[temp5], 0 \n\t" - "beqz %[temp0], 2f \n\t" - "xor %[temp5], %[temp5], %[temp5] \n\t" - "movz %[temp5], %[temp7], %[temp1] \n\t" - "2: \n\t" - "sb %[temp5], -1(%[dst_t]) \n\t" - "bne %[frow_t], %[loop_end], 1b \n\t" - - : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), - [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [temp6]"=&r"(temp6), - [temp7]"=&r"(temp7), [frow_t]"+r"(frow_t), [irow_t]"+r"(irow_t), - [dst_t]"+r"(dst_t), [loop_end]"=&r"(loop_end) - : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp8]"r"(temp8) - : "memory", "hi", "lo" - ); - wrk->y_accum += wrk->y_add; - wrk->dst += wrk->dst_stride; - } else { - ExportRowC(wrk, x_out); - } +static void ExportRowShrinkMIPS(WebPRescaler* const wrk) { + const int x_out_max = wrk->dst_width * wrk->num_channels; + uint8_t* dst = wrk->dst; + rescaler_t* irow = wrk->irow; + const rescaler_t* frow = wrk->frow; + const int yscale = wrk->fy_scale * (-wrk->y_accum); + int temp0, temp1, temp3, temp4, temp5, loop_end; + const int temp2 = (int)wrk->fxy_scale; + const int temp6 = x_out_max << 2; + + assert(!WebPRescalerOutputDone(wrk)); + assert(wrk->y_accum <= 0); + assert(!wrk->y_expand); + assert(wrk->fxy_scale != 0); + if (yscale) { + __asm__ volatile ( + "li %[temp3], 0x10000 \n\t" + "li %[temp4], 0x8000 \n\t" + "addu %[loop_end], %[frow], %[temp6] \n\t" + "1: \n\t" + "lw %[temp0], 0(%[frow]) \n\t" + "mult %[temp3], %[temp4] \n\t" + "addiu %[frow], %[frow], 4 \n\t" + "maddu %[temp0], %[yscale] \n\t" + "mfhi %[temp1] \n\t" + "lw %[temp0], 0(%[irow]) \n\t" + "addiu %[dst], %[dst], 1 \n\t" + "addiu %[irow], %[irow], 4 \n\t" + "subu %[temp0], %[temp0], %[temp1] \n\t" + "mult %[temp3], %[temp4] \n\t" + "maddu %[temp0], %[temp2] \n\t" + "mfhi %[temp5] \n\t" + "sw %[temp1], -4(%[irow]) \n\t" + "sb %[temp5], -1(%[dst]) \n\t" + "bne %[frow], %[loop_end], 1b \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), + [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [frow]"+r"(frow), + [irow]"+r"(irow), [dst]"+r"(dst), [loop_end]"=&r"(loop_end) + : [temp2]"r"(temp2), [yscale]"r"(yscale), [temp6]"r"(temp6) + : "memory", "hi", "lo" + ); + } else { + __asm__ volatile ( + "li %[temp3], 0x10000 \n\t" + "li %[temp4], 0x8000 \n\t" + "addu %[loop_end], %[irow], %[temp6] \n\t" + "1: \n\t" + "lw %[temp0], 0(%[irow]) \n\t" + "addiu %[dst], %[dst], 1 \n\t" + "addiu %[irow], %[irow], 4 \n\t" + "mult %[temp3], %[temp4] \n\t" + "maddu %[temp0], %[temp2] \n\t" + "mfhi %[temp5] \n\t" + "sw $zero, -4(%[irow]) \n\t" + "sb %[temp5], -1(%[dst]) \n\t" + "bne %[irow], %[loop_end], 1b \n\t" + : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1), [temp3]"=&r"(temp3), + [temp4]"=&r"(temp4), [temp5]"=&r"(temp5), [irow]"+r"(irow), + [dst]"+r"(dst), [loop_end]"=&r"(loop_end) + : [temp2]"r"(temp2), [temp6]"r"(temp6) + : "memory", "hi", "lo" + ); } } + #endif // WEBP_USE_MIPS32 //------------------------------------------------------------------------------ void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, - uint8_t* const dst, int dst_width, int dst_height, - int dst_stride, int num_channels, int x_add, int x_sub, - int y_add, int y_sub, int32_t* const work) { + uint8_t* const dst, + int dst_width, int dst_height, int dst_stride, + int num_channels, rescaler_t* const work) { + const int x_add = src_width, x_sub = dst_width; + const int y_add = src_height, y_sub = dst_height; wrk->x_expand = (src_width < dst_width); + wrk->y_expand = (src_height < dst_height); wrk->src_width = src_width; wrk->src_height = src_height; wrk->dst_width = dst_width; wrk->dst_height = dst_height; + wrk->src_y = 0; + wrk->dst_y = 0; wrk->dst = dst; wrk->dst_stride = dst_stride; wrk->num_channels = num_channels; + // for 'x_expand', we use bilinear interpolation - wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add - x_sub; + wrk->x_add = wrk->x_expand ? (x_sub - 1) : x_add; wrk->x_sub = wrk->x_expand ? (x_add - 1) : x_sub; - wrk->y_accum = y_add; - wrk->y_add = y_add; - wrk->y_sub = y_sub; - wrk->fx_scale = (1 << RFIX) / x_sub; - wrk->fy_scale = (1 << RFIX) / y_sub; - wrk->fxy_scale = wrk->x_expand ? - ((int64_t)dst_height << RFIX) / (x_sub * src_height) : - ((int64_t)dst_height << RFIX) / (x_add * src_height); + if (!wrk->x_expand) { // fx_scale is not used otherwise + wrk->fx_scale = WEBP_RESCALER_FRAC(1, wrk->x_sub); + } + // vertical scaling parameters + wrk->y_add = wrk->y_expand ? y_add - 1 : y_add; + wrk->y_sub = wrk->y_expand ? y_sub - 1 : y_sub; + wrk->y_accum = wrk->y_expand ? wrk->y_sub : wrk->y_add; + if (!wrk->y_expand) { + // this is WEBP_RESCALER_FRAC(dst_height, x_add * y_add) without the cast. + const uint64_t ratio = + (uint64_t)dst_height * WEBP_RESCALER_ONE / (wrk->x_add * wrk->y_add); + if (ratio != (uint32_t)ratio) { + // We can't represent the ratio with the current fixed-point precision. + // => We special-case fxy_scale = 0, in WebPRescalerExportRow(). + wrk->fxy_scale = 0; + } else { + wrk->fxy_scale = (uint32_t)ratio; + } + wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->y_sub); + } else { + wrk->fy_scale = WEBP_RESCALER_FRAC(1, wrk->x_add); + // wrk->fxy_scale is unused here. + } wrk->irow = work; wrk->frow = work + num_channels * dst_width; + memset(work, 0, 2 * dst_width * num_channels * sizeof(*work)); - if (WebPRescalerImportRow == NULL) { - WebPRescalerImportRow = ImportRowC; - WebPRescalerExportRow = ExportRowC; + if (WebPRescalerImportRowExpand == NULL) { + WebPRescalerImportRowExpand = ImportRowExpandC; + WebPRescalerImportRowShrink = ImportRowShrinkC; + WebPRescalerExportRowExpand = ExportRowExpandC; + WebPRescalerExportRowShrink = ExportRowShrinkC; if (VP8GetCPUInfo != NULL) { #if defined(WEBP_USE_MIPS32) if (VP8GetCPUInfo(kMIPS32)) { - WebPRescalerImportRow = ImportRowMIPS; - WebPRescalerExportRow = ExportRowMIPS; + WebPRescalerImportRowExpand = ImportRowExpandMIPS; + WebPRescalerImportRowShrink = ImportRowShrinkMIPS; + WebPRescalerExportRowExpand = ExportRowExpandMIPS; + WebPRescalerExportRowShrink = ExportRowShrinkMIPS; } #endif } @@ -296,7 +529,10 @@ void WebPRescalerInit(WebPRescaler* const wrk, int src_width, int src_height, } #undef MULT_FIX -#undef RFIX +#undef WEBP_RESCALER_RFIX +#undef WEBP_RESCALER_ONE +#undef WEBP_RESCALER_FRAC +#undef ROUNDER //------------------------------------------------------------------------------ // all-in-one calls @@ -309,11 +545,20 @@ int WebPRescaleNeededLines(const WebPRescaler* const wrk, int max_num_lines) { int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, const uint8_t* src, int src_stride) { int total_imported = 0; - while (total_imported < num_lines && wrk->y_accum > 0) { - int channel; - for (channel = 0; channel < wrk->num_channels; ++channel) { - WebPRescalerImportRow(wrk, src, channel); + while (total_imported < num_lines && !WebPRescalerHasPendingOutput(wrk)) { + if (wrk->y_expand) { + rescaler_t* const tmp = wrk->irow; + wrk->irow = wrk->frow; + wrk->frow = tmp; + } + WebPRescalerImportRow(wrk, src); + if (!wrk->y_expand) { // Accumulate the contribution of the new row. + int x; + for (x = 0; x < wrk->num_channels * wrk->dst_width; ++x) { + wrk->irow[x] += wrk->frow[x]; + } } + ++wrk->src_y; src += src_stride; ++total_imported; wrk->y_accum -= wrk->y_sub; @@ -324,7 +569,7 @@ int WebPRescalerImport(WebPRescaler* const wrk, int num_lines, int WebPRescalerExport(WebPRescaler* const rescaler) { int total_exported = 0; while (WebPRescalerHasPendingOutput(rescaler)) { - WebPRescalerExportRow(rescaler, 0); + WebPRescalerExportRow(rescaler); ++total_exported; } return total_exported; diff --git a/src/3rdparty/libwebp/src/utils/rescaler.h b/src/3rdparty/libwebp/src/utils/rescaler.h index a6f3787..8244cfe 100644 --- a/src/3rdparty/libwebp/src/utils/rescaler.h +++ b/src/3rdparty/libwebp/src/utils/rescaler.h @@ -21,20 +21,23 @@ extern "C" { #include "../webp/types.h" // Structure used for on-the-fly rescaling +typedef uint32_t rescaler_t; // type for side-buffer typedef struct { int x_expand; // true if we're expanding in the x direction + int y_expand; // true if we're expanding in the y direction int num_channels; // bytes to jump between pixels - int fy_scale, fx_scale; // fixed-point scaling factor - int64_t fxy_scale; // '' - // we need hpel-precise add/sub increments, for the downsampled U/V planes. + uint32_t fx_scale; // fixed-point scaling factors + uint32_t fy_scale; // '' + uint32_t fxy_scale; // '' int y_accum; // vertical accumulator - int y_add, y_sub; // vertical increments (add ~= src, sub ~= dst) - int x_add, x_sub; // horizontal increments (add ~= src, sub ~= dst) + int y_add, y_sub; // vertical increments + int x_add, x_sub; // horizontal increments int src_width, src_height; // source dimensions int dst_width, dst_height; // destination dimensions + int src_y, dst_y; // row counters for input and output uint8_t* dst; int dst_stride; - int32_t* irow, *frow; // work buffer + rescaler_t* irow, *frow; // work buffer } WebPRescaler; // Initialize a rescaler given scratch area 'work' and dimensions of src & dst. @@ -43,9 +46,7 @@ void WebPRescalerInit(WebPRescaler* const rescaler, uint8_t* const dst, int dst_width, int dst_height, int dst_stride, int num_channels, - int x_add, int x_sub, - int y_add, int y_sub, - int32_t* const work); + rescaler_t* const work); // Returns the number of input lines needed next to produce one output line, // considering that the maximum available input lines are 'max_num_lines'. @@ -57,21 +58,29 @@ int WebPRescaleNeededLines(const WebPRescaler* const rescaler, int WebPRescalerImport(WebPRescaler* const rescaler, int num_rows, const uint8_t* src, int src_stride); -// Import a row of data and save its contribution in the rescaler. -// 'channel' denotes the channel number to be imported. -extern void (*WebPRescalerImportRow)(WebPRescaler* const wrk, - const uint8_t* const src, int channel); +// Export as many rows as possible. Return the numbers of rows written. +int WebPRescalerExport(WebPRescaler* const rescaler); +void WebPRescalerImportRow(WebPRescaler* const wrk, + const uint8_t* src); // Export one row (starting at x_out position) from rescaler. -extern void (*WebPRescalerExportRow)(WebPRescaler* const wrk, int x_out); +void WebPRescalerExportRow(WebPRescaler* const wrk); -// Return true if there is pending output rows ready. +// Return true if input is finished static WEBP_INLINE -int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) { - return (rescaler->y_accum <= 0); +int WebPRescalerInputDone(const WebPRescaler* const rescaler) { + return (rescaler->src_y >= rescaler->src_height); +} +// Return true if output is finished +static WEBP_INLINE +int WebPRescalerOutputDone(const WebPRescaler* const rescaler) { + return (rescaler->dst_y >= rescaler->dst_height); } -// Export as many rows as possible. Return the numbers of rows written. -int WebPRescalerExport(WebPRescaler* const rescaler); +// Return true if there are pending output rows ready. +static WEBP_INLINE +int WebPRescalerHasPendingOutput(const WebPRescaler* const rescaler) { + return !WebPRescalerOutputDone(rescaler) && (rescaler->y_accum <= 0); +} //------------------------------------------------------------------------------ diff --git a/src/3rdparty/libwebp/src/utils/utils.h b/src/3rdparty/libwebp/src/utils/utils.h index f2c498a..0bbbcab 100644 --- a/src/3rdparty/libwebp/src/utils/utils.h +++ b/src/3rdparty/libwebp/src/utils/utils.h @@ -90,7 +90,7 @@ static WEBP_INLINE int BitsLog2Floor(uint32_t n) { #pragma intrinsic(_BitScanReverse) static WEBP_INLINE int BitsLog2Floor(uint32_t n) { - uint32_t first_set_bit; + unsigned long first_set_bit; _BitScanReverse(&first_set_bit, n); return first_set_bit; } |