diff options
author | Vadim Barkov <neverscaired@gmail.com> | 2017-02-09 18:57:53 +0000 |
---|---|---|
committer | Vadim Barkov <neverscaired@gmail.com> | 2017-02-09 18:57:53 +0000 |
commit | 99d7285f206685aa80eea874cc17d0a6304761c2 (patch) | |
tree | 28e5c57fb93af3ed98e5b41258f766ee95714e1b /powerpc/filter_vsx_intrinsics.c | |
parent | d4bdca45b37662a0c25c316829a9594b10d0fc1f (diff) | |
download | libpng-99d7285f206685aa80eea874cc17d0a6304761c2.tar.gz |
Fixed filter_paeth for PowerPC VSX
Now all tests are fine for ppc64le
Diffstat (limited to 'powerpc/filter_vsx_intrinsics.c')
-rw-r--r-- | powerpc/filter_vsx_intrinsics.c | 270 |
1 files changed, 161 insertions, 109 deletions
diff --git a/powerpc/filter_vsx_intrinsics.c b/powerpc/filter_vsx_intrinsics.c index 9376bc885..e1ba064a6 100644 --- a/powerpc/filter_vsx_intrinsics.c +++ b/powerpc/filter_vsx_intrinsics.c @@ -448,6 +448,32 @@ void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, *rp++ = (png_byte)a;\ } +#define VEC_CHAR_ZERO (vector unsigned char){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} +#ifdef __LITTLE_ENDIAN__ + +# define VEC_CHAR_TO_SHORT1_4 (vector unsigned char){ 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT2_4 (vector unsigned char){ 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT3_4 (vector unsigned char){12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16} + +# define VEC_SHORT_TO_CHAR1_4 (vector unsigned char){16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16} +# define VEC_SHORT_TO_CHAR2_4 (vector unsigned char){16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16} +# define VEC_SHORT_TO_CHAR3_4 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6} + +# define VEC_CHAR_TO_SHORT1_3 (vector unsigned char){ 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT2_3 (vector unsigned char){ 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT3_3 (vector unsigned char){ 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16} +# define VEC_CHAR_TO_SHORT4_3 (vector unsigned char){12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16} + +# define VEC_SHORT_TO_CHAR1_3 (vector unsigned char){16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16} +# define VEC_SHORT_TO_CHAR2_3 (vector unsigned char){16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16} +# define VEC_SHORT_TO_CHAR3_3 (vector unsigned char){16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16} +# define VEC_SHORT_TO_CHAR4_3 (vector unsigned char){16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16} + +#endif + +#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VEC_CHAR_ZERO,VEC_CHAR_TO_SHORT##offset##_##bpp) +#define vsx_short_to_char(vec,offset,bpp) vec_perm((vector unsigned char)(vec),VEC_CHAR_ZERO,VEC_SHORT_TO_CHAR##offset##_##bpp) + void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, png_const_bytep prev_row) { @@ -456,10 +482,9 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, int a, b, c, pa, pb, pc, p; vector unsigned char rp_vec; vector unsigned char pp_vec; - vector unsigned char a_vec,b_vec,c_vec,nearest_vec; - vector signed char pa_vec,pb_vec,pc_vec; - vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec; vector unsigned char zero_vec = {0}; + vector unsigned short a_vec,b_vec,c_vec,nearest_vec; + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; declare_common_vars(row_info,row,prev_row,bpp) rp -= bpp; @@ -493,56 +518,68 @@ void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, rp_vec = vec_ld(0,rp); vec_ld_unaligned(pp_vec,pp); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_4); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_4); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_4),1,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_4),1,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_4),1,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); - - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_4); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_4); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_4),2,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_4),2,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_4),2,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); - - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_4); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_4); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_4),3,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_4),3,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_4),3,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4))); vec_st(rp_vec,0,rp); @@ -566,10 +603,9 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, int a, b, c, pa, pb, pc, p; vector unsigned char rp_vec; vector unsigned char pp_vec; - vector unsigned char a_vec,b_vec,c_vec,nearest_vec; - vector signed char pa_vec,pb_vec,pc_vec; - vector unsigned char pa_vec_abs,pb_vec_abs,pc_vec_abs,smallest_vec; vector unsigned char zero_vec = {0}; + vector unsigned short a_vec,b_vec,c_vec,nearest_vec; + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; declare_common_vars(row_info,row,prev_row,bpp) rp -= bpp; @@ -603,73 +639,89 @@ void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, rp_vec = vec_ld(0,rp); vec_ld_unaligned(pp_vec,pp); - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT1_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT1_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT1_3),1,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT1_3),1,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT1_3),1,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); - - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT2_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT2_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT2_3),2,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT2_3),2,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT2_3),2,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); - - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT3_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT3_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT3_3),3,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT3_3),3,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT3_3),3,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); - - a_vec = vec_perm(rp_vec , zero_vec , VEC_SELECT4_3); - b_vec = vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3); - c_vec = vec_perm(pp_vec , zero_vec , VEC_SELECT4_3); - pa_vec = (vector signed char) vec_sub(b_vec,c_vec); - pb_vec = (vector signed char) vec_sub(a_vec , c_vec); - pc_vec = (vector signed char) vec_add(pa_vec,pb_vec); - pa_vec_abs = (vector unsigned char)vec_abs(pa_vec); - pb_vec_abs = (vector unsigned char)vec_abs(pb_vec); - pc_vec_abs = (vector unsigned char)vec_abs(pc_vec); - smallest_vec = vec_min(pc_vec_abs, vec_min(pa_vec_abs,pb_vec_abs)); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , zero_vec , VEC_SELECT4_3),4,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_AVG_SELECT4_3),4,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , zero_vec , VEC_SELECT4_3),4,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); nearest_vec = if_then_else( - vec_cmpeq(pa_vec_abs,smallest_vec), + vec_cmpeq(pa_vec,smallest_vec), a_vec, - if_then_else(vec_cmpeq(pb_vec_abs,smallest_vec),b_vec,c_vec) - ); - rp_vec = vec_add(rp_vec, nearest_vec); + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3))); vec_st(rp_vec,0,rp); |