diff options
author | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2010-09-08 09:30:23 +0300 |
---|---|---|
committer | Siarhei Siamashka <siarhei.siamashka@nokia.com> | 2010-10-11 01:07:01 +0300 |
commit | c748650d700c2f18f1587f06ada3b58d6ddc18d3 (patch) | |
tree | 63106df479ae5e8cd6c58f4145d3fd9fe365ede1 | |
parent | a520c15e1134d9e801bc2ab461a3c5ade60544f2 (diff) | |
download | pixman-c748650d700c2f18f1587f06ada3b58d6ddc18d3.tar.gz |
Use more unrolling for scaled src_0565_0565 with nearest filter
Benchmark from Intel Core i7 860:
== before ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=1335.29 MPix/s
== after ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=1550.96 MPix/s
== performance of nonscaled src_0565_0565 operation as a reference ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=2401.31 MPix/s
Benchmark from ARM Cortex-A8:
== before ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=81.79 MPix/s
== after ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s
== performance of nonscaled src_0565_0565 operation as a reference ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=197.44 MPix/s
-rw-r--r-- | pixman/pixman-fast-path.c | 51 |
1 files changed, 48 insertions, 3 deletions
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index c210919..5d5fa95 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -1399,15 +1399,60 @@ FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER); FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE); FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD); FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL); -FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER); -FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE); -FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD); FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL); FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER); FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE); FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD); FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL); +/* Use more unrolling for src_0565_0565 because it is typically CPU bound */ +static force_inline void +scaled_nearest_scanline_565_565_SRC (uint16_t * dst, + uint16_t * src, + int32_t w, + pixman_fixed_t vx, + pixman_fixed_t unit_x, + pixman_fixed_t max_vx) +{ + uint16_t tmp1, tmp2, tmp3, tmp4; + while ((w -= 4) >= 0) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp3 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp4 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + *dst++ = tmp3; + *dst++ = tmp4; + } + if (w & 2) + { + tmp1 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + tmp2 = src[pixman_fixed_to_int (vx)]; + vx += unit_x; + *dst++ = tmp1; + *dst++ = tmp2; + } + if (w & 1) + *dst++ = src[pixman_fixed_to_int (vx)]; +} + +FAST_NEAREST_MAINLOOP (565_565_cover_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, COVER); +FAST_NEAREST_MAINLOOP (565_565_none_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, NONE); +FAST_NEAREST_MAINLOOP (565_565_pad_SRC, + scaled_nearest_scanline_565_565_SRC, + uint16_t, uint16_t, PAD); + static force_inline uint32_t fetch_nearest (pixman_repeat_t src_repeat, pixman_format_code_t format, |