summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2011-03-07 03:10:43 +0200
committerSiarhei Siamashka <siarhei.siamashka@nokia.com>2011-03-12 21:24:40 +0200
commitf3e17872f5522e25da8e32de83e62bee8cc198d7 (patch)
tree33ef1f11076d3f6f9b5210612af8ca306db62c32
parentbb3d1b67fd0f42ae00af811c624ea1c44541034d (diff)
downloadpixman-f3e17872f5522e25da8e32de83e62bee8cc198d7.tar.gz
ARM: common macro for nearest scaling fast paths
The code of nearest scaled 'src_0565_0565' function was generalized and moved to a common macro, so that it can be reused for other fast paths.
-rw-r--r--pixman/pixman-arm-simd-asm.S60
1 files changed, 36 insertions, 24 deletions
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index dd1366d..a9775e2 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
.endfunc
/*
- * Note: This function is only using armv4t instructions (not even armv6),
+ * Note: This code is only using armv5te instructions (not even armv6),
* but is scheduled for ARM Cortex-A8 pipeline. So it might need to
* be split into a few variants, tuned for each microarchitecture.
*
* TODO: In order to get good performance on ARM9/ARM11 cores (which don't
* have efficient write combining), it needs to be changed to use 16-byte
* aligned writes using STM instruction.
+ *
+ * Nearest scanline scaler macro template uses the following arguments:
+ * fname - name of the function to generate
+ * bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
+ * t - type suffix for LDR/STR instructions
+ * prefetch_distance - prefetch in the source image by that many
+ * pixels ahead
+ * prefetch_braking_distance - stop prefetching when that many pixels are
+ * remaining before the end of scanline
*/
-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+
+.macro generate_nearest_scanline_func fname, bpp_shift, t, \
+ prefetch_distance, \
+ prefetch_braking_distance
+
+pixman_asm_function fname
W .req r0
DST .req r1
SRC .req r2
@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
ldr UNIT_X, [sp]
push {r4, r5, r6, r7}
- mvn VXMASK, #1
+ mvn VXMASK, #((1 << bpp_shift) - 1)
/* define helper macro */
.macro scale_2_pixels
- ldrh TMP1, [SRC, TMP1]
- and TMP2, VXMASK, VX, lsr #15
+ ldr&t TMP1, [SRC, TMP1]
+ and TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
add VX, VX, UNIT_X
- strh TMP1, [DST], #2
+ str&t TMP1, [DST], #(1 << bpp_shift)
- ldrh TMP2, [SRC, TMP2]
- and TMP1, VXMASK, VX, lsr #15
+ ldr&t TMP2, [SRC, TMP2]
+ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
add VX, VX, UNIT_X
- strh TMP2, [DST], #2
+ str&t TMP2, [DST], #(1 << bpp_shift)
.endm
- /*
- * stop prefetch before reaching the end of scanline (a good behaving
- * value selected based on some benchmarks with short scanlines)
- */
- #define PREFETCH_BRAKING_DISTANCE 32
-
/* now do the scaling */
- and TMP1, VXMASK, VX, lsr #15
+ and TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
add VX, VX, UNIT_X
- subs W, #(8 + PREFETCH_BRAKING_DISTANCE)
+ subs W, W, #(8 + prefetch_braking_distance)
blt 2f
- /* set prefetch distance to 80 pixels ahead */
- add PF_OFFS, VX, UNIT_X, lsl #6
- add PF_OFFS, PF_OFFS, UNIT_X, lsl #4
+ /* calculate prefetch offset */
+ mov PF_OFFS, #prefetch_distance
+ mla PF_OFFS, UNIT_X, PF_OFFS, VX
1: /* main loop, process 8 pixels per iteration with prefetch */
subs W, W, #8
add PF_OFFS, UNIT_X, lsl #3
@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
scale_2_pixels
scale_2_pixels
scale_2_pixels
- pld [SRC, PF_OFFS, lsr #15]
+ pld [SRC, PF_OFFS, lsr #(16 - bpp_shift)]
bge 1b
2:
- subs W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
+ subs W, W, #(4 - 8 - prefetch_braking_distance)
blt 2f
1: /* process the remaining pixels */
scale_2_pixels
@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
scale_2_pixels
2:
tst W, #1
- ldrneh TMP1, [SRC, TMP1]
- strneh TMP1, [DST], #2
+ ldrne&t TMP1, [SRC, TMP1]
+ strne&t TMP1, [DST]
/* cleanup helper macro */
.purgem scale_2_pixels
.unreq DST
@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
pop {r4, r5, r6, r7}
bx lr
.endfunc
+.endm
+
+generate_nearest_scanline_func \
+ pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32