diff options
author | Allan Sandfeld Jensen <allan.jensen@digia.com> | 2013-06-05 18:43:34 +0200 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2013-06-06 09:41:17 +0200 |
commit | 9769275688aec7a53b55b66bde961b18c32c6d51 (patch) | |
tree | 2b947f7ccfa0f43980e63ec373ceccb7215f78a0 | |
parent | 43d40815e5ed44a3dbc89380ae4b8975c4f86cb7 (diff) | |
download | qtwebkit-9769275688aec7a53b55b66bde961b18c32c6d51.tar.gz |
Make PNGImageDecoder::rowAvailable auto-vectorizable
https://bugs.webkit.org/show_bug.cgi?id=116151
Reviewed by Benjamin Poulain.
Changed the main loops under PNGImageDecoder::rowAvailable so that they
avoid branches and non-sequential table look ups.
Together with automatic vectorization by the compiler this provides around
4x speed-up with AVX or 2x speed-up on generic x64. Shaving off 12-40% on
PNG decoding in general.
* platform/graphics/Color.cpp:
(WebCore::premultipliedARGBFromColor):
* platform/graphics/Color.h:
(WebCore::fastDivideBy255):
* platform/graphics/filters/FEBlend.cpp:
* platform/image-decoders/png/PNGImageDecoder.cpp:
(WebCore::setPixelRGB):
(WebCore::setPixelRGBA):
(WebCore::setPixelRGBA_Premultiplied):
(WebCore::PNGImageDecoder::rowAvailable):
Change-Id: Ic3d6cefda505ebceb37165d0a7aeb3cd2fe3e2c9
git-svn-id: http://svn.webkit.org/repository/webkit/trunk@150252 268f45cc-cd09-0410-ab3c-d52691b4dbfc
Reviewed-by: Jocelyn Turcotte <jocelyn.turcotte@digia.com>
4 files changed, 62 insertions, 28 deletions
diff --git a/Source/WebCore/platform/graphics/Color.cpp b/Source/WebCore/platform/graphics/Color.cpp index 04dc73ea6..51c710007 100644 --- a/Source/WebCore/platform/graphics/Color.cpp +++ b/Source/WebCore/platform/graphics/Color.cpp @@ -434,9 +434,9 @@ RGBA32 premultipliedARGBFromColor(const Color& color) unsigned alpha = color.alpha(); if (alpha < 255) { pixelColor = Color::createUnchecked( - (color.red() * alpha + 254) / 255, - (color.green() * alpha + 254) / 255, - (color.blue() * alpha + 254) / 255, + fastDivideBy255(color.red() * alpha + 254), + fastDivideBy255(color.green() * alpha + 254), + fastDivideBy255(color.blue() * alpha + 254), alpha).rgb(); } else pixelColor = color.rgb(); diff --git a/Source/WebCore/platform/graphics/Color.h b/Source/WebCore/platform/graphics/Color.h index 87ca23375..9e4ce8c11 100644 --- a/Source/WebCore/platform/graphics/Color.h +++ b/Source/WebCore/platform/graphics/Color.h @@ -211,6 +211,14 @@ inline Color blend(const Color& from, const Color& to, double progress, bool ble blend(from.alpha(), to.alpha(), progress)); } +inline uint16_t fastDivideBy255(uint16_t value) +{ + // This is an approximate algorithm for division by 255, but it gives accurate results for 16bit values. + uint16_t approximation = value >> 8; + uint16_t remainder = value - (approximation * 255) + 1; + return approximation + (remainder >> 8); +} + #if USE(CG) CGColorRef cachedCGColor(const Color&, ColorSpace); #endif diff --git a/Source/WebCore/platform/graphics/filters/FEBlend.cpp b/Source/WebCore/platform/graphics/filters/FEBlend.cpp index d5897c392..bf527ff53 100644 --- a/Source/WebCore/platform/graphics/filters/FEBlend.cpp +++ b/Source/WebCore/platform/graphics/filters/FEBlend.cpp @@ -63,14 +63,6 @@ bool FEBlend::setBlendMode(BlendModeType mode) return true; } -static inline unsigned char fastDivideBy255(uint16_t value) -{ - // This is an approximate algorithm for division by 255, but it gives accurate results for 16bit values. - uint16_t quotient = value >> 8; - uint16_t remainder = value - (quotient * 255) + 1; - return quotient + (remainder >> 8); -} - inline unsigned char feBlendNormal(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char) { return fastDivideBy255((255 - alphaA) * colorB + colorA * 255); diff --git a/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp b/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp index 7984c9ae8..776acce2b 100644 --- a/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp +++ b/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp @@ -40,6 +40,7 @@ #include "config.h" #include "PNGImageDecoder.h" +#include "Color.h" #include "PlatformInstrumentation.h" #include "png.h" #include <wtf/OwnArrayPtr.h> @@ -402,6 +403,29 @@ void PNGImageDecoder::headerAvailable() } } +static inline void setPixelRGB(ImageFrame::PixelData* dest, png_bytep pixel) +{ + *dest = 0xFF000000U | pixel[0] << 16 | pixel[1] << 8 | pixel[2]; +} + +static inline void setPixelRGBA(ImageFrame::PixelData* dest, png_bytep pixel, unsigned char& nonTrivialAlphaMask) +{ + unsigned char a = pixel[3]; + *dest = a << 24 | pixel[0] << 16 | pixel[1] << 8 | pixel[2]; + nonTrivialAlphaMask |= (255 - a); +} + +static inline void setPixelPremultipliedRGBA(ImageFrame::PixelData* dest, png_bytep pixel, unsigned char& nonTrivialAlphaMask) +{ + unsigned char a = pixel[3]; + unsigned char r = fastDivideBy255(pixel[0] * a); + unsigned char g = fastDivideBy255(pixel[1] * a); + unsigned char b = fastDivideBy255(pixel[2] * a); + + *dest = a << 24 | r << 16 | g << 8 | b; + nonTrivialAlphaMask |= (255 - a); +} + void PNGImageDecoder::rowAvailable(unsigned char* rowBuffer, unsigned rowIndex, int) { if (m_frameBufferCache.isEmpty()) @@ -501,27 +525,37 @@ void PNGImageDecoder::rowAvailable(unsigned char* rowBuffer, unsigned rowIndex, // Write the decoded row pixels to the frame buffer. ImageFrame::PixelData* address = buffer.getAddr(0, y); int width = scaledSize().width(); - bool nonTrivialAlpha = false; + unsigned char nonTrivialAlphaMask = 0; #if ENABLE(IMAGE_DECODER_DOWN_SAMPLING) - for (int x = 0; x < width; ++x) { - png_bytep pixel = row + (m_scaled ? m_scaledColumns[x] : x) * colorChannels; - unsigned alpha = hasAlpha ? pixel[3] : 255; - buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha); - nonTrivialAlpha |= alpha < 255; - } -#else - ASSERT(!m_scaled); - png_bytep pixel = row; - for (int x = 0; x < width; ++x, pixel += colorChannels) { - unsigned alpha = hasAlpha ? pixel[3] : 255; - buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha); - nonTrivialAlpha |= alpha < 255; - } + if (m_scaled) { + for (int x = 0; x < width; ++x) { + png_bytep pixel = row + m_scaledColumns[x] * colorChannels; + unsigned alpha = hasAlpha ? pixel[3] : 255; + buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha); + nonTrivialAlphaMask |= (255 - alpha); + } + } else #endif + { + png_bytep pixel = row; + if (hasAlpha) { + if (buffer.premultiplyAlpha()) { + for (int x = 0; x < width; ++x, pixel += 4) + setPixelPremultipliedRGBA(address++, pixel, nonTrivialAlphaMask); + } else { + for (int x = 0; x < width; ++x, pixel += 4) + setPixelRGBA(address++, pixel, nonTrivialAlphaMask); + } + } else { + for (int x = 0; x < width; ++x, pixel += 3) + setPixelRGB(address++, pixel); + } + } + - if (nonTrivialAlpha && !buffer.hasAlpha()) - buffer.setHasAlpha(nonTrivialAlpha); + if (nonTrivialAlphaMask && !buffer.hasAlpha()) + buffer.setHasAlpha(true); } void PNGImageDecoder::pngComplete() |