summaryrefslogtreecommitdiff
path: root/powerpc/filter_vsx_intrinsics.c
diff options
context:
space:
mode:
authorVadim Barkov <neverscaired@gmail.com>2017-01-16 19:45:17 +0300
committerVadim Barkov <neverscaired@gmail.com>2017-01-16 19:45:17 +0300
commitccef5edbfe128415bdc6cbe73da868707d4ccc16 (patch)
tree65f47ec84b426404ebc7ed8cdd76df73b39b355f /powerpc/filter_vsx_intrinsics.c
parent91acd4baf8041cf2df2ab790e98a8e8025a727cc (diff)
downloadlibpng-ccef5edbfe128415bdc6cbe73da868707d4ccc16.tar.gz
Fixed bug with unaligned input on VSX filter_up
Diffstat (limited to 'powerpc/filter_vsx_intrinsics.c')
-rw-r--r--powerpc/filter_vsx_intrinsics.c55
1 files changed, 25 insertions, 30 deletions
diff --git a/powerpc/filter_vsx_intrinsics.c b/powerpc/filter_vsx_intrinsics.c
index fe1483a13..833f9cc44 100644
--- a/powerpc/filter_vsx_intrinsics.c
+++ b/powerpc/filter_vsx_intrinsics.c
@@ -36,26 +36,37 @@ void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
png_size_t i;
- png_size_t istop = row_info->rowbytes;
+ png_size_t unaligned_top = (png_size_t)row % 16;
+ png_size_t istop = row_info->rowbytes - unaligned_top;
png_bytep rp = row;
png_const_bytep pp = prev_row;
vector unsigned char rp_vec;
vector unsigned char pp_vec;
+ /* Altivec operations require 16-byte aligned data
+ * but input can be unaligned. So we calculate
+ * unaligned part as usual.
+ */
+ for (i = 0; i < unaligned_top; i++)
+ {
+ *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
+ rp++;
+ }
+
/* Using SIMD while we can */
while( istop >= 16 )
{
- rp_vec = vec_ld(0,rp);
- pp_vec = vec_ld(0,pp);
-
- rp_vec = vec_add(rp_vec,pp_vec);
-
- vec_st(rp_vec,0,rp);
-
- pp += 16;
- rp += 16;
- istop -= 16;
+ rp_vec = vec_ld(0,rp);
+ pp_vec = vec_ld(0,pp);
+
+ rp_vec = vec_add(rp_vec,pp_vec);
+
+ vec_st(rp_vec,0,rp);
+
+ pp += 16;
+ rp += 16;
+ istop -= 16;
}
if(istop % 16 > 0)
@@ -68,10 +79,11 @@ void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row,
*rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff);
rp++;
}
- }
}
-void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
+}
+
+void png_read_filter_row_sub_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
const unsigned int bpp = 4;
@@ -88,23 +100,6 @@ void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row,
}
}
-void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row,
- png_const_bytep prev_row)
-{
- const unsigned int bpp = 4;
- png_size_t i;
- png_size_t istop = row_info->rowbytes;
- png_bytep rp = row + bpp;
-
- PNG_UNUSED(prev_row)
-
- for (i = bpp; i < istop; i++)
- {
- *rp = (png_byte)(((int)(*rp) + (int)(*(rp-3))) & 0xff);
- rp++;
- }
-}
-
void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{