summaryrefslogtreecommitdiff
path: root/powerpc/filter_vsx_intrinsics.c
diff options
context:
space:
mode:
authorVadim Barkov <neverscaired@gmail.com>2017-02-09 18:57:53 +0000
committerVadim Barkov <neverscaired@gmail.com>2017-02-09 18:57:53 +0000
commit99d7285f206685aa80eea874cc17d0a6304761c2 (patch)
tree28e5c57fb93af3ed98e5b41258f766ee95714e1b /powerpc/filter_vsx_intrinsics.c
parentd4bdca45b37662a0c25c316829a9594b10d0fc1f (diff)
downloadlibpng-99d7285f206685aa80eea874cc17d0a6304761c2.tar.gz
Fixed filter_paeth for PowerPC VSX
Now all tests are fine for ppc64le
Diffstat (limited to 'powerpc/filter_vsx_intrinsics.c')
-rw-r--r--powerpc/filter_vsx_intrinsics.c270
1 files changed, 161 insertions, 109 deletions
diff --git a/powerpc/filter_vsx_intrinsics.c b/powerpc/filter_vsx_intrinsics.c
index 9376bc885..e1ba064a6 100644
--- a/powerpc/filter_vsx_intrinsics.c
+++ b/powerpc/filter_vsx_intrinsics.c
@@ -448,6 +448,32 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row,
*rp++ = (png_byte)a;\
}
+#define VEC_CHAR_ZERO (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}
+#ifdef __LITTLE_ENDIAN__
+
+# define VEC_CHAR_TO_SHORT1_4 (vector unsigned char){ 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16}
+# define VEC_CHAR_TO_SHORT2_4 (vector unsigned char){ 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16}
+# define VEC_CHAR_TO_SHORT3_4 (vector unsigned char){12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16}
+
+# define VEC_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16}
+# define VEC_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16}
+# define VEC_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6}
+
+# define VEC_CHAR_TO_SHORT1_3 (vector unsigned char){ 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16}
+# define VEC_CHAR_TO_SHORT2_3 (vector unsigned char){ 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16}
+# define VEC_CHAR_TO_SHORT3_3 (vector unsigned char){ 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16}
+# define VEC_CHAR_TO_SHORT4_3 (vector unsigned char){12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16}
+
+# define VEC_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16}
+# define VEC_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16}
+# define VEC_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16}
+# define VEC_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16}
+
+#endif
+
+#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VEC_CHAR_ZERO,VEC_CHAR_TO_SHORT##offset##_##bpp)
+#define vsx_short_to_char(vec,offset,bpp) vec_perm((vector unsigned char)(vec),VEC_CHAR_ZERO,VEC_SHORT_TO_CHAR##offset##_##bpp)
+
void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
png_const_bytep prev_row)
{
@@ -456,10 +482,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
int a, b, c, pa, pb, pc, p;
vector unsigned char rp_vec;
vector unsigned char pp_vec;
- vector unsigned char a_vec,b_vec,c_vec,nearest_vec;
- vector signed char pa_vec,pb_vec,pc_vec;
- vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec;
vector unsigned char zero_vec = {0};
+ vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
+ vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp;
@@ -493,56 +518,68 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row,
rp_vec = vec_ld(0,rp);
vec_ld_unaligned(pp_vec,pp);
- a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_4);
- b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4);
- c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_4);
- pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
- pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
- pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
- pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
- pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
- pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
- smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_4),1,4);
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4),1,4);
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_4),1,4);
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+ pc_vec = vec_add(pa_vec,pb_vec);
+ pa_vec = vec_abs(pa_vec);
+ pb_vec = vec_abs(pb_vec);
+ pc_vec = vec_abs(pc_vec);
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
- vec_cmpeq(pa_vec_abs,smallest_vec),
+ vec_cmpeq(pa_vec,smallest_vec),
a_vec,
- if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
- );
- rp_vec = vec_add(rp_vec, nearest_vec);
-
- a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_4);
- b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4);
- c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_4);
- pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
- pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
- pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
- pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
- pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
- pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
- smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
+ if_then_else(
+ vec_cmpeq(pb_vec,smallest_vec),
+ b_vec,
+ c_vec
+ )
+ );
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4)));
+
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_4),2,4);
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4),2,4);
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_4),2,4);
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+ pc_vec = vec_add(pa_vec,pb_vec);
+ pa_vec = vec_abs(pa_vec);
+ pb_vec = vec_abs(pb_vec);
+ pc_vec = vec_abs(pc_vec);
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
- vec_cmpeq(pa_vec_abs,smallest_vec),
+ vec_cmpeq(pa_vec,smallest_vec),
a_vec,
- if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
- );
- rp_vec = vec_add(rp_vec, nearest_vec);
-
- a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_4);
- b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4);
- c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_4);
- pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
- pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
- pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
- pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
- pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
- pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
- smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
+ if_then_else(
+ vec_cmpeq(pb_vec,smallest_vec),
+ b_vec,
+ c_vec
+ )
+ );
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4)));
+
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_4),3,4);
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4),3,4);
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_4),3,4);
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+ pc_vec = vec_add(pa_vec,pb_vec);
+ pa_vec = vec_abs(pa_vec);
+ pb_vec = vec_abs(pb_vec);
+ pc_vec = vec_abs(pc_vec);
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
- vec_cmpeq(pa_vec_abs,smallest_vec),
+ vec_cmpeq(pa_vec,smallest_vec),
a_vec,
- if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
- );
- rp_vec = vec_add(rp_vec, nearest_vec);
+ if_then_else(
+ vec_cmpeq(pb_vec,smallest_vec),
+ b_vec,
+ c_vec
+ )
+ );
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4)));
vec_st(rp_vec,0,rp);
@@ -566,10 +603,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
int a, b, c, pa, pb, pc, p;
vector unsigned char rp_vec;
vector unsigned char pp_vec;
- vector unsigned char a_vec,b_vec,c_vec,nearest_vec;
- vector signed char pa_vec,pb_vec,pc_vec;
- vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec;
vector unsigned char zero_vec = {0};
+ vector unsigned short a_vec,b_vec,c_vec,nearest_vec;
+ vector signed short pa_vec,pb_vec,pc_vec,smallest_vec;
declare_common_vars(row_info,row,prev_row,bpp)
rp -= bpp;
@@ -603,73 +639,89 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row,
rp_vec = vec_ld(0,rp);
vec_ld_unaligned(pp_vec,pp);
- a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_3);
- b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3);
- c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_3);
- pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
- pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
- pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
- pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
- pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
- pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
- smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_3),1,3);
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3),1,3);
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_3),1,3);
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+ pc_vec = vec_add(pa_vec,pb_vec);
+ pa_vec = vec_abs(pa_vec);
+ pb_vec = vec_abs(pb_vec);
+ pc_vec = vec_abs(pc_vec);
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
- vec_cmpeq(pa_vec_abs,smallest_vec),
+ vec_cmpeq(pa_vec,smallest_vec),
a_vec,
- if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
- );
- rp_vec = vec_add(rp_vec, nearest_vec);
-
- a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_3);
- b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3);
- c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_3);
- pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
- pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
- pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
- pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
- pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
- pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
- smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
+ if_then_else(
+ vec_cmpeq(pb_vec,smallest_vec),
+ b_vec,
+ c_vec
+ )
+ );
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3)));
+
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_3),2,3);
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3),2,3);
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_3),2,3);
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+ pc_vec = vec_add(pa_vec,pb_vec);
+ pa_vec = vec_abs(pa_vec);
+ pb_vec = vec_abs(pb_vec);
+ pc_vec = vec_abs(pc_vec);
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
- vec_cmpeq(pa_vec_abs,smallest_vec),
+ vec_cmpeq(pa_vec,smallest_vec),
a_vec,
- if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
- );
- rp_vec = vec_add(rp_vec, nearest_vec);
-
- a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_3);
- b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3);
- c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_3);
- pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
- pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
- pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
- pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
- pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
- pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
- smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
+ if_then_else(
+ vec_cmpeq(pb_vec,smallest_vec),
+ b_vec,
+ c_vec
+ )
+ );
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3)));
+
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_3),3,3);
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3),3,3);
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_3),3,3);
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+ pc_vec = vec_add(pa_vec,pb_vec);
+ pa_vec = vec_abs(pa_vec);
+ pb_vec = vec_abs(pb_vec);
+ pc_vec = vec_abs(pc_vec);
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
- vec_cmpeq(pa_vec_abs,smallest_vec),
+ vec_cmpeq(pa_vec,smallest_vec),
a_vec,
- if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
- );
- rp_vec = vec_add(rp_vec, nearest_vec);
-
- a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT4_3);
- b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3);
- c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT4_3);
- pa_vec = (vector signed char) vec_sub(b_vec,c_vec);
- pb_vec = (vector signed char) vec_sub(a_vec , c_vec);
- pc_vec = (vector signed char) vec_add(pa_vec,pb_vec);
- pa_vec_abs = (vector unsigned char)vec_abs(pa_vec);
- pb_vec_abs = (vector unsigned char)vec_abs(pb_vec);
- pc_vec_abs = (vector unsigned char)vec_abs(pc_vec);
- smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs));
+ if_then_else(
+ vec_cmpeq(pb_vec,smallest_vec),
+ b_vec,
+ c_vec
+ )
+ );
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3)));
+
+ a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT4_3),4,3);
+ b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3),4,3);
+ c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT4_3),4,3);
+ pa_vec = (vector signed short) vec_sub(b_vec,c_vec);
+ pb_vec = (vector signed short) vec_sub(a_vec , c_vec);
+ pc_vec = vec_add(pa_vec,pb_vec);
+ pa_vec = vec_abs(pa_vec);
+ pb_vec = vec_abs(pb_vec);
+ pc_vec = vec_abs(pc_vec);
+ smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec));
nearest_vec = if_then_else(
- vec_cmpeq(pa_vec_abs,smallest_vec),
+ vec_cmpeq(pa_vec,smallest_vec),
a_vec,
- if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec)
- );
- rp_vec = vec_add(rp_vec, nearest_vec);
+ if_then_else(
+ vec_cmpeq(pb_vec,smallest_vec),
+ b_vec,
+ c_vec
+ )
+ );
+ rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3)));
vec_st(rp_vec,0,rp);