summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2010-09-08 09:30:23 +0300
committerSiarhei Siamashka <siarhei.siamashka@nokia.com>2010-10-11 01:07:01 +0300
commitc748650d700c2f18f1587f06ada3b58d6ddc18d3 (patch)
tree63106df479ae5e8cd6c58f4145d3fd9fe365ede1
parenta520c15e1134d9e801bc2ab461a3c5ade60544f2 (diff)
downloadpixman-c748650d700c2f18f1587f06ada3b58d6ddc18d3.tar.gz
Use more unrolling for scaled src_0565_0565 with nearest filter
Benchmark from Intel Core i7 860: == before == op=1, src_fmt=10020565, dst_fmt=10020565, speed=1335.29 MPix/s == after == op=1, src_fmt=10020565, dst_fmt=10020565, speed=1550.96 MPix/s == performance of nonscaled src_0565_0565 operation as a reference == op=1, src_fmt=10020565, dst_fmt=10020565, speed=2401.31 MPix/s Benchmark from ARM Cortex-A8: == before == op=1, src_fmt=10020565, dst_fmt=10020565, speed=81.79 MPix/s == after == op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s == performance of nonscaled src_0565_0565 operation as a reference == op=1, src_fmt=10020565, dst_fmt=10020565, speed=197.44 MPix/s
-rw-r--r--pixman/pixman-fast-path.c51
1 files changed, 48 insertions, 3 deletions
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index c210919..5d5fa95 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1399,15 +1399,60 @@ FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
-FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER);
-FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
-FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD);
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+static force_inline void
+scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
+ uint16_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx)
+{
+ uint16_t tmp1, tmp2, tmp3, tmp4;
+ while ((w -= 4) >= 0)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ *dst++ = tmp3;
+ *dst++ = tmp4;
+ }
+ if (w & 2)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ }
+ if (w & 1)
+ *dst++ = src[pixman_fixed_to_int (vx)];
+}
+
+FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, COVER);
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, NONE);
+FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, PAD);
+
static force_inline uint32_t
fetch_nearest (pixman_repeat_t src_repeat,
pixman_format_code_t format,