summaryrefslogtreecommitdiff
path: root/powerpc
diff options
context:
space:
mode:
authorVadim Barkov <neverscaired@gmail.com>2017-01-15 20:20:29 +0300
committerVadim Barkov <neverscaired@gmail.com>2017-01-15 20:20:29 +0300
commit91acd4baf8041cf2df2ab790e98a8e8025a727cc (patch)
tree198a09cc3a8a6cb5832cba02283bf4d10764bb3c /powerpc
parent6ff408d423b0fa13e735a93ea9876e39ec8176c8 (diff)
downloadlibpng-91acd4baf8041cf2df2ab790e98a8e8025a727cc.tar.gz
Implemented png_read_filter_up_vsx
Diffstat (limited to 'powerpc')
-rw-r--r--powerpc/filter_vsx_intrinsics.c57
1 files changed, 41 insertions, 16 deletions
diff --git a/powerpc/filter_vsx_intrinsics.c b/powerpc/filter_vsx_intrinsics.c
index 8e4ef2930..fe1483a13 100644
--- a/powerpc/filter_vsx_intrinsics.c
+++ b/powerpc/filter_vsx_intrinsics.c
@@ -18,18 +18,20 @@
/* This code requires -maltivec and -mabi=altivec on the command line: */
#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */
-/* libpng row pointers are not necessarily aligned to any particular boundary,
- * however this code will only work with appropriate alignment. arm/arm_init.c
- * checks for this (and will not compile unless it is done). This code uses
- * variants of png_aligncast to avoid compiler warnings.
- */
-#define png_ptr(type,pointer) png_aligncast(type *,pointer)
-#define png_ptrc(type,pointer) png_aligncastconst(const type *,pointer)
-
-/*#include <altivec.h>*/
+#include <altivec.h>
#if PNG_POWERPC_VSX_OPT > 0
+/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d).
+ * They're positioned like this:
+ * prev: c b
+ * row: a d
+ * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be
+ * whichever of a, b, or c is closest to p=a+b-c.
+ * ( this is taken from ../intel/filter_sse2_intrinsics.c )
+ */
+
+
void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
@@ -38,27 +40,50 @@ void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
png_bytep rp = row;
png_const_bytep pp = prev_row;
- for (i = 0; i < istop; i++)
+ vector unsigned char rp_vec;
+ vector unsigned char pp_vec;
+
+ /* Using SIMD while we can */
+ while( istop >= 16 )
{
- *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
- rp++;
+ rp_vec = vec_ld(0,rp);
+ pp_vec = vec_ld(0,pp);
+
+ rp_vec = vec_add(rp_vec,pp_vec);
+
+ vec_st(rp_vec,0,rp);
+
+ pp += 16;
+ rp += 16;
+ istop -= 16;
}
+ if(istop % 16 > 0)
+ {
+ /* If byte count of row is not divisible by 16
+ * we will process remaining part as usual
+ */
+ for (i = 0; i < istop; i++)
+ {
+ *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+ rp++;
+ }
+ }
}
void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
+ const unsigned int bpp = 4;
png_size_t i;
png_size_t istop = row_info->rowbytes;
- unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_bytep rp = row + bpp;
PNG_UNUSED(prev_row)
for (i = bpp; i < istop; i++)
{
- *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-4))) & 0xff);
rp++;
}
}
@@ -66,16 +91,16 @@ void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
+ const unsigned int bpp = 4;
png_size_t i;
png_size_t istop = row_info->rowbytes;
- unsigned int bpp = (row_info->pixel_depth + 7) >> 3;
png_bytep rp = row + bpp;
PNG_UNUSED(prev_row)
for (i = bpp; i < istop; i++)
{
- *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff);
+ *rp = (png_byte)(((int)(*rp) + (int)(*(rp-3))) & 0xff);
rp++;
}
}