From c3bbb94b4c76292837a3b79322f8a678f106394f Mon Sep 17 00:00:00 2001
From: Matt Turner <mattst88@gmail.com>
Date: Thu, 27 Oct 2022 15:10:30 -0400
Subject: Revert "Fix signed-unsigned semantics in reduce_32"

This reverts commit aaf59b0338fbd4b9142794254261f8d0a018b60c.

This commit regressed the scaling-test unit test, by apparently allowing
the compiler to emit fused multiply-add instructions in cases they
wouldn't have been allowed before. While using gcc's -ffp-contract=...
flag avoids the issue on amd64, it does not on at least aarch64 and
ppc64.

This is unfortunate, because the commit being reverted resolved
https://gitlab.freedesktop.org/pixman/pixman/-/issues/43 so we will
reintroduce this failure, but after more than a year without a fix for
the unit test, I think it's time to bite the bullet.

Fixes: https://gitlab.freedesktop.org/pixman/pixman/-/issues/49
---
 pixman/pixman-bits-image.c | 24 +++++++++----------
 pixman/pixman-filter.c     | 31 +++++++-----------------
 test/scaling-test.c        | 59 +++++++++-------------------------------------
 3 files changed, 32 insertions(+), 82 deletions(-)

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 6f65420..f050f35 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -209,15 +209,15 @@ static force_inline void reduce_32(unsigned int satot, unsigned int srtot,
 {
     uint32_t *ret = p;
 
-    satot = (int32_t)(satot + 0x8000) / 65536;
-    srtot = (int32_t)(srtot + 0x8000) / 65536;
-    sgtot = (int32_t)(sgtot + 0x8000) / 65536;
-    sbtot = (int32_t)(sbtot + 0x8000) / 65536;
+    satot = (satot + 0x8000) >> 16;
+    srtot = (srtot + 0x8000) >> 16;
+    sgtot = (sgtot + 0x8000) >> 16;
+    sbtot = (sbtot + 0x8000) >> 16;
 
-    satot = CLIP ((int32_t)satot, 0, 0xff);
-    srtot = CLIP ((int32_t)srtot, 0, 0xff);
-    sgtot = CLIP ((int32_t)sgtot, 0, 0xff);
-    sbtot = CLIP ((int32_t)sbtot, 0, 0xff);
+    satot = CLIP (satot, 0, 0xff);
+    srtot = CLIP (srtot, 0, 0xff);
+    sgtot = CLIP (sgtot, 0, 0xff);
+    sbtot = CLIP (sbtot, 0, 0xff);
 
     *ret = ((satot << 24) | (srtot << 16) | (sgtot <<  8) | (sbtot));
 }
@@ -240,10 +240,10 @@ static force_inline void reduce_float(unsigned int satot, unsigned int srtot,
 {
     argb_t *ret = p;
 
-    ret->a = CLIP ((int32_t)satot / 65536.f, 0.f, 1.f);
-    ret->r = CLIP ((int32_t)srtot / 65536.f, 0.f, 1.f);
-    ret->g = CLIP ((int32_t)sgtot / 65536.f, 0.f, 1.f);
-    ret->b = CLIP ((int32_t)sbtot / 65536.f, 0.f, 1.f);
+    ret->a = CLIP (satot / 65536.f, 0.f, 1.f);
+    ret->r = CLIP (srtot / 65536.f, 0.f, 1.f);
+    ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f);
+    ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f);
 }
 
 typedef void (* accumulate_pixel_t) (unsigned int *satot, unsigned int *srtot,
diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index fa94532..5f3b752 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -237,14 +237,11 @@ create_1d_filter (int              width,
 		  pixman_kernel_t  sample,
 		  double           scale,
 		  int              n_phases,
-		  pixman_fixed_t *pstart,
-		  pixman_fixed_t *pend
-		  )
+		  pixman_fixed_t *p)
 {
-    pixman_fixed_t *p = pstart;
     double step;
     int i;
-    if(width <= 0) return;
+
     step = 1.0 / n_phases;
 
     for (i = 0; i < n_phases; ++i)
@@ -261,7 +258,7 @@ create_1d_filter (int              width,
 
 	x1 = ceil (frac - width / 2.0 - 0.5);
 	x2 = x1 + width;
-    assert( p >= pstart && p + (x2 - x1) <= pend ); /* assert validity of the following loop */
+
 	total = 0;
         for (x = x1; x < x2; ++x)
         {
@@ -290,10 +287,8 @@ create_1d_filter (int              width,
 
 	/* Normalize, with error diffusion */
 	p -= width;
-	assert(p >= pstart && p + (x2 - x1) <= pend); /* assert validity of the following loop */
-
-    total = 65536.0 / total;
-    new_total = 0;
+        total = 65536.0 / total;
+        new_total = 0;
 	e = 0.0;
 	for (x = x1; x < x2; ++x)
 	{
@@ -309,8 +304,6 @@ create_1d_filter (int              width,
 	 * at the first sample, since that is the only one that
 	 * hasn't had any error diffused into it.
 	 */
-
-	assert(p - width >= pstart && p - width < pend); /* assert... */
 	*(p - width) += pixman_fixed_1 - new_total;
     }
 }
@@ -472,16 +465,10 @@ pixman_filter_create_separable_convolution (int             *n_values,
     params[2] = pixman_int_to_fixed (subsample_bits_x);
     params[3] = pixman_int_to_fixed (subsample_bits_y);
 
-    {
-        pixman_fixed_t
-            *xparams = params+4,
-            *yparams = xparams + width*subsample_x,
-            *endparams = params + *n_values;
-        create_1d_filter(width, reconstruct_x, sample_x, sx, subsample_x,
-                         xparams, yparams);
-        create_1d_filter(height, reconstruct_y, sample_y, sy, subsample_y,
-                         yparams, endparams);
-    }
+    create_1d_filter (width, reconstruct_x, sample_x, sx, subsample_x,
+		      params + 4);
+    create_1d_filter (height, reconstruct_y, sample_y, sy, subsample_y,
+		      params + 4 + width * subsample_x);
 
 #ifdef PIXMAN_GNUPLOT
     gnuplot_filter(width, subsample_x, params + 4);
diff --git a/test/scaling-test.c b/test/scaling-test.c
index 4c22011..0ece611 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -45,35 +45,6 @@ get_format (int bpp)
     }
 }
 
-/* sets up "some kind of separable convolution" using prng values */
-void setup_separable_convulution(pixman_image_t* image, pixman_fixed_t scale_x, pixman_fixed_t scale_y, int verbose) {
-  int num_filters = PIXMAN_KERNEL_LANCZOS3_STRETCHED + 1;
-  int max_subsample_bits = 4;
-  struct Params {
-    pixman_fixed_t scale_x, scale_y;
-    pixman_kernel_t reconstruct_x, reconstruct_y;
-    pixman_kernel_t sample_x, sample_y;
-    int subsample_bits_x, subsample_bits_y;
-  } params = {
-      scale_x, scale_y,
-      prng_rand_n(num_filters), prng_rand_n(num_filters),
-      prng_rand_n(num_filters), prng_rand_n(num_filters),
-      prng_rand_n(max_subsample_bits + 1), prng_rand_n(max_subsample_bits + 1),
-  };
-  int num_params;
-  pixman_fixed_t *filter;
-  if(verbose) {
-    printf("separable convolution kernels=%d %d %d %d subsample_bits=%d %d\n", params.reconstruct_x, params.reconstruct_y, params.sample_x, params.sample_y, params.subsample_bits_x, params.subsample_bits_y);
-  }
-  filter = pixman_filter_create_separable_convolution(&num_params,
-                                                      params.scale_x, params.scale_y,
-                                                      params.reconstruct_x, params.reconstruct_y,
-                                                      params.sample_x, params.sample_y,
-                                                      params.subsample_bits_x, params.subsample_bits_y);
-  pixman_image_set_filter(image, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, filter, num_params);
-  free(filter);
-}
-
 uint32_t
 test_composite (int      testnum,
 		int      verbose)
@@ -275,23 +246,15 @@ test_composite (int      testnum,
     }
     pixman_image_set_repeat (src_img, repeat);
 
-    switch (prng_rand_n (5)) {
-    case 0: pixman_image_set_filter(src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); break;
-    case 1: pixman_image_set_filter(src_img, PIXMAN_FILTER_NEAREST, NULL, 0); break;
-    default:
-      /* this gets a larger range as samples distribute over quite a few possible settings */
-      setup_separable_convulution(src_img, scale_x, scale_y, verbose);
-      break;
-    }
+    if (prng_rand_n (2))
+	pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+    else
+	pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
 
-    switch (prng_rand_n (5)) {
-    case 0: pixman_image_set_filter(mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0); break;
-    case 1: pixman_image_set_filter(mask_img, PIXMAN_FILTER_NEAREST, NULL, 0); break;
-    default:
-      /* this gets a larger range as samples distribute over quite a few possible settings */
-      setup_separable_convulution(mask_img, scale_x, scale_y, verbose);
-      break;
-    }
+    if (prng_rand_n (2))
+	pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+    else
+	pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
 
     if (prng_rand_n (8) == 0)
     {
@@ -417,7 +380,7 @@ test_composite (int      testnum,
                             src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h);
 
     crc32 = compute_crc32_for_image (0, dst_img);
-
+    
     if (verbose)
 	print_image (dst_img);
 
@@ -444,9 +407,9 @@ test_composite (int      testnum,
 }
 
 #if BILINEAR_INTERPOLATION_BITS == 7
-#define CHECKSUM 0xD3589272
+#define CHECKSUM 0x92E0F068
 #elif BILINEAR_INTERPOLATION_BITS == 4
-#define CHECKSUM 0x0FD4B248
+#define CHECKSUM 0x8EFFA1E5
 #else
 #define CHECKSUM 0x00000000
 #endif
-- 
cgit v1.2.1