Make PNGImageDecoder::rowAvailable auto-vectorizable

https://bugs.webkit.org/show_bug.cgi?id=116151 Reviewed by Benjamin Poulain. Changed the main loops under PNGImageDecoder::rowAvailable so that they avoid branches and non-sequential table look ups. Together with automatic vectorization by the compiler this provides around 4x speed-up with AVX or 2x speed-up on generic x64. Shaving off 12-40% on PNG decoding in general. * platform/graphics/Color.cpp: (WebCore::premultipliedARGBFromColor): * platform/graphics/Color.h: (WebCore::fastDivideBy255): * platform/graphics/filters/FEBlend.cpp: * platform/image-decoders/png/PNGImageDecoder.cpp: (WebCore::setPixelRGB): (WebCore::setPixelRGBA): (WebCore::setPixelRGBA_Premultiplied): (WebCore::PNGImageDecoder::rowAvailable): Change-Id: Ic3d6cefda505ebceb37165d0a7aeb3cd2fe3e2c9 git-svn-id: http://svn.webkit.org/repository/webkit/trunk@150252 268f45cc-cd09-0410-ab3c-d52691b4dbfc Reviewed-by: Jocelyn Turcotte <jocelyn.turcotte@digia.com>
author: Allan Sandfeld Jensen <allan.jensen@digia.com> 2013-06-05 18:43:34 +0200
committer: The Qt Project <gerrit-noreply@qt-project.org> 2013-06-06 09:41:17 +0200
commit: 9769275688aec7a53b55b66bde961b18c32c6d51 (patch)
tree: 2b947f7ccfa0f43980e63ec373ceccb7215f78a0
parent: 43d40815e5ed44a3dbc89380ae4b8975c4f86cb7 (diff)
download: qtwebkit-9769275688aec7a53b55b66bde961b18c32c6d51.tar.gz
4 files changed, 62 insertions, 28 deletions
diff --git a/Source/WebCore/platform/graphics/Color.cpp b/Source/WebCore/platform/graphics/Color.cpp
index 04dc73ea6..51c710007 100644
--- a/Source/WebCore/platform/graphics/Color.cpp
+++ b/Source/WebCore/platform/graphics/Color.cpp
@@ -434,9 +434,9 @@ RGBA32 premultipliedARGBFromColor(const Color& color)
     unsigned alpha = color.alpha();
     if (alpha < 255) {
         pixelColor = Color::createUnchecked(
-            (color.red() * alpha  + 254) / 255,
-            (color.green() * alpha  + 254) / 255,
-            (color.blue() * alpha  + 254) / 255,
+            fastDivideBy255(color.red() * alpha + 254),
+            fastDivideBy255(color.green() * alpha + 254),
+            fastDivideBy255(color.blue() * alpha + 254),
             alpha).rgb();
     } else
          pixelColor = color.rgb();
diff --git a/Source/WebCore/platform/graphics/Color.h b/Source/WebCore/platform/graphics/Color.h
index 87ca23375..9e4ce8c11 100644
--- a/Source/WebCore/platform/graphics/Color.h
+++ b/Source/WebCore/platform/graphics/Color.h
@@ -211,6 +211,14 @@ inline Color blend(const Color& from, const Color& to, double progress, bool ble
                  blend(from.alpha(), to.alpha(), progress));
 }
 
+inline uint16_t fastDivideBy255(uint16_t value)
+{
+    // This is an approximate algorithm for division by 255, but it gives accurate results for 16bit values.
+    uint16_t approximation = value >> 8;
+    uint16_t remainder = value - (approximation * 255) + 1;
+    return approximation + (remainder >> 8);
+}
+
 #if USE(CG)
 CGColorRef cachedCGColor(const Color&, ColorSpace);
 #endif
diff --git a/Source/WebCore/platform/graphics/filters/FEBlend.cpp b/Source/WebCore/platform/graphics/filters/FEBlend.cpp
index d5897c392..bf527ff53 100644
--- a/Source/WebCore/platform/graphics/filters/FEBlend.cpp
+++ b/Source/WebCore/platform/graphics/filters/FEBlend.cpp
@@ -63,14 +63,6 @@ bool FEBlend::setBlendMode(BlendModeType mode)
     return true;
 }
 
-static inline unsigned char fastDivideBy255(uint16_t value)
-{
-    // This is an approximate algorithm for division by 255, but it gives accurate results for 16bit values.
-    uint16_t quotient = value >> 8;
-    uint16_t remainder = value - (quotient * 255) + 1;
-    return quotient + (remainder >> 8);
-}
-
 inline unsigned char feBlendNormal(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char)
 {
     return fastDivideBy255((255 - alphaA) * colorB + colorA * 255);
diff --git a/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp b/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp
index 7984c9ae8..776acce2b 100644
--- a/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp
+++ b/Source/WebCore/platform/image-decoders/png/PNGImageDecoder.cpp
@@ -40,6 +40,7 @@
 #include "config.h"
 #include "PNGImageDecoder.h"
 
+#include "Color.h"
 #include "PlatformInstrumentation.h"
 #include "png.h"
 #include <wtf/OwnArrayPtr.h>
@@ -402,6 +403,29 @@ void PNGImageDecoder::headerAvailable()
     }
 }
 
+static inline void setPixelRGB(ImageFrame::PixelData* dest, png_bytep pixel)
+{
+    *dest = 0xFF000000U | pixel[0] << 16 | pixel[1] << 8 | pixel[2];
+}
+
+static inline void setPixelRGBA(ImageFrame::PixelData* dest, png_bytep pixel, unsigned char& nonTrivialAlphaMask)
+{
+    unsigned char a = pixel[3];
+    *dest = a << 24 | pixel[0] << 16 | pixel[1] << 8 | pixel[2];
+    nonTrivialAlphaMask |= (255 - a);
+}
+
+static inline void setPixelPremultipliedRGBA(ImageFrame::PixelData* dest, png_bytep pixel, unsigned char& nonTrivialAlphaMask)
+{
+    unsigned char a = pixel[3];
+    unsigned char r = fastDivideBy255(pixel[0] * a);
+    unsigned char g = fastDivideBy255(pixel[1] * a);
+    unsigned char b = fastDivideBy255(pixel[2] * a);
+
+    *dest = a << 24 | r << 16 | g << 8 | b;
+    nonTrivialAlphaMask |= (255 - a);
+}
+
 void PNGImageDecoder::rowAvailable(unsigned char* rowBuffer, unsigned rowIndex, int)
 {
     if (m_frameBufferCache.isEmpty())
@@ -501,27 +525,37 @@ void PNGImageDecoder::rowAvailable(unsigned char* rowBuffer, unsigned rowIndex,
     // Write the decoded row pixels to the frame buffer.
     ImageFrame::PixelData* address = buffer.getAddr(0, y);
     int width = scaledSize().width();
-    bool nonTrivialAlpha = false;
+    unsigned char nonTrivialAlphaMask = 0;
 
 #if ENABLE(IMAGE_DECODER_DOWN_SAMPLING)
-    for (int x = 0; x < width; ++x) {
-        png_bytep pixel = row + (m_scaled ? m_scaledColumns[x] : x) * colorChannels;
-        unsigned alpha = hasAlpha ? pixel[3] : 255;
-        buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha);
-        nonTrivialAlpha |= alpha < 255;
-    }
-#else
-    ASSERT(!m_scaled);
-    png_bytep pixel = row;
-    for (int x = 0; x < width; ++x, pixel += colorChannels) {
-        unsigned alpha = hasAlpha ? pixel[3] : 255;
-        buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha);
-        nonTrivialAlpha |= alpha < 255;
-    }
+    if (m_scaled) {
+        for (int x = 0; x < width; ++x) {
+            png_bytep pixel = row + m_scaledColumns[x] * colorChannels;
+            unsigned alpha = hasAlpha ? pixel[3] : 255;
+            buffer.setRGBA(address++, pixel[0], pixel[1], pixel[2], alpha);
+            nonTrivialAlphaMask |= (255 - alpha);
+        }
+    } else
 #endif
+    {
+        png_bytep pixel = row;
+        if (hasAlpha) {
+            if (buffer.premultiplyAlpha()) {
+                for (int x = 0; x < width; ++x, pixel += 4)
+                    setPixelPremultipliedRGBA(address++, pixel, nonTrivialAlphaMask);
+            } else {
+                for (int x = 0; x < width; ++x, pixel += 4)
+                    setPixelRGBA(address++, pixel, nonTrivialAlphaMask);
+            }
+        } else {
+            for (int x = 0; x < width; ++x, pixel += 3)
+                setPixelRGB(address++, pixel);
+        }
+    }
+
 
-    if (nonTrivialAlpha && !buffer.hasAlpha())
-        buffer.setHasAlpha(nonTrivialAlpha);
+    if (nonTrivialAlphaMask && !buffer.hasAlpha())
+        buffer.setHasAlpha(true);
 }
 
 void PNGImageDecoder::pngComplete()
author	Allan Sandfeld Jensen <allan.jensen@digia.com>	2013-06-05 18:43:34 +0200
committer	The Qt Project <gerrit-noreply@qt-project.org>	2013-06-06 09:41:17 +0200
commit	9769275688aec7a53b55b66bde961b18c32c6d51 (patch)
tree	2b947f7ccfa0f43980e63ec373ceccb7215f78a0
parent	43d40815e5ed44a3dbc89380ae4b8975c4f86cb7 (diff)
download	qtwebkit-9769275688aec7a53b55b66bde961b18c32c6d51.tar.gz