summaryrefslogtreecommitdiff
path: root/pixman/pixman-arm-neon-asm.h
diff options
context:
space:
mode:
authorSiarhei Siamashka <siarhei.siamashka@nokia.com>2009-12-09 23:49:04 +0200
committerSiarhei Siamashka <siarhei.siamashka@nokia.com>2009-12-16 20:55:54 +0200
commit24cd286af6f4507eb9937ced6d9998d296c77a0a (patch)
tree8d2dff8f414b1fc851fdc73fda861d92d31e9798 /pixman/pixman-arm-neon-asm.h
parentae8d9df6248445170702c244cd60f894aa761267 (diff)
downloadpixman-24cd286af6f4507eb9937ced6d9998d296c77a0a.tar.gz
ARM: macro template for single scanline compositing functions
Existing template already supports 2D images processing, but pixman also needs some NEON optimized functions for improving performance when compositing is decoupled into "fetch -> process -> store" stages and done via temporary scanline buffer. That's why a new simplified template which deals only with the generation of single scanline processing functions is handy.
Diffstat (limited to 'pixman/pixman-arm-neon-asm.h')
-rw-r--r--pixman/pixman-arm-neon-asm.h119
1 files changed, 119 insertions, 0 deletions
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index c7a5f14..56c3fae 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -780,6 +780,125 @@ fname:
.endfunc
.endm
+/*
+ * A simplified variant of function generation template for a single
+ * scanline processing (for implementing pixman combine functions)
+ */
+.macro generate_composite_function_single_scanline fname, \
+ src_bpp_, \
+ mask_bpp_, \
+ dst_w_bpp_, \
+ flags, \
+ pixblock_size_, \
+ init, \
+ cleanup, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head, \
+ dst_w_basereg_ = 28, \
+ dst_r_basereg_ = 4, \
+ src_basereg_ = 0, \
+ mask_basereg_ = 24
+
+ .func fname
+ .global fname
+ /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+ .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
+/*
+ * Make some macro arguments globally visible and accessible
+ * from other macros
+ */
+ .set src_bpp, src_bpp_
+ .set mask_bpp, mask_bpp_
+ .set dst_w_bpp, dst_w_bpp_
+ .set pixblock_size, pixblock_size_
+ .set dst_w_basereg, dst_w_basereg_
+ .set dst_r_basereg, dst_r_basereg_
+ .set src_basereg, src_basereg_
+ .set mask_basereg, mask_basereg_
+/*
+ * Assign symbolic names to registers
+ */
+ W .req r0 /* width (is updated during processing) */
+ DST_W .req r1 /* destination buffer pointer for writes */
+ SRC .req r2 /* source buffer pointer */
+ DST_R .req ip /* destination buffer pointer for reads */
+ MASK .req r3 /* mask pointer */
+
+.if (((flags) & FLAG_DST_READWRITE) != 0)
+ .set dst_r_bpp, dst_w_bpp
+.else
+ .set dst_r_bpp, 0
+.endif
+.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+ .set DEINTERLEAVE_32BPP_ENABLED, 1
+.else
+ .set DEINTERLEAVE_32BPP_ENABLED, 0
+.endif
+
+ init
+ mov DST_R, DST_W
+
+ cmp W, #pixblock_size
+ blt 8f
+
+ ensure_destination_ptr_alignment process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ subs W, W, #pixblock_size
+ blt 7f
+
+ /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
+ pixld_a pixblock_size, dst_r_bpp, \
+ (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+ pixld pixblock_size, src_bpp, \
+ (src_basereg - pixblock_size * src_bpp / 64), SRC
+ pixld pixblock_size, mask_bpp, \
+ (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+ process_pixblock_head
+ subs W, W, #pixblock_size
+ blt 2f
+1:
+ process_pixblock_tail_head
+ subs W, W, #pixblock_size
+ bge 1b
+2:
+ process_pixblock_tail
+ pixst_a pixblock_size, dst_w_bpp, \
+ (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+7:
+ /* Process the remaining trailing pixels in the scanline (dst aligned) */
+ process_trailing_pixels 0, 1, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ cleanup
+ bx lr /* exit */
+8:
+ /* Process the remaining trailing pixels in the scanline (dst unaligned) */
+ process_trailing_pixels 0, 0, \
+ process_pixblock_head, \
+ process_pixblock_tail, \
+ process_pixblock_tail_head
+
+ cleanup
+ bx lr /* exit */
+
+ .unreq SRC
+ .unreq MASK
+ .unreq DST_R
+ .unreq DST_W
+ .unreq W
+ .endfunc
+.endm
+
.macro default_init
.endm