From c3bbb94b4c76292837a3b79322f8a678f106394f Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 27 Oct 2022 15:10:30 -0400 Subject: Revert "Fix signed-unsigned semantics in reduce_32" This reverts commit aaf59b0338fbd4b9142794254261f8d0a018b60c. This commit regressed the scaling-test unit test, by apparently allowing the compiler to emit fused multiply-add instructions in cases they wouldn't have been allowed before. While using gcc's -ffp-contract=... flag avoids the issue on amd64, it does not on at least aarch64 and ppc64. This is unfortunate, because the commit being reverted resolved https://gitlab.freedesktop.org/pixman/pixman/-/issues/43 so we will reintroduce this failure, but after more than a year without a fix for the unit test, I think it's time to bite the bullet. Fixes: https://gitlab.freedesktop.org/pixman/pixman/-/issues/49 --- pixman/pixman-bits-image.c | 24 +++++++++---------- pixman/pixman-filter.c | 31 +++++++----------------- test/scaling-test.c | 59 +++++++++------------------------------------- 3 files changed, 32 insertions(+), 82 deletions(-) diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c index 6f65420..f050f35 100644 --- a/pixman/pixman-bits-image.c +++ b/pixman/pixman-bits-image.c @@ -209,15 +209,15 @@ static force_inline void reduce_32(unsigned int satot, unsigned int srtot, { uint32_t *ret = p; - satot = (int32_t)(satot + 0x8000) / 65536; - srtot = (int32_t)(srtot + 0x8000) / 65536; - sgtot = (int32_t)(sgtot + 0x8000) / 65536; - sbtot = (int32_t)(sbtot + 0x8000) / 65536; + satot = (satot + 0x8000) >> 16; + srtot = (srtot + 0x8000) >> 16; + sgtot = (sgtot + 0x8000) >> 16; + sbtot = (sbtot + 0x8000) >> 16; - satot = CLIP ((int32_t)satot, 0, 0xff); - srtot = CLIP ((int32_t)srtot, 0, 0xff); - sgtot = CLIP ((int32_t)sgtot, 0, 0xff); - sbtot = CLIP ((int32_t)sbtot, 0, 0xff); + satot = CLIP (satot, 0, 0xff); + srtot = CLIP (srtot, 0, 0xff); + sgtot = CLIP (sgtot, 0, 0xff); + sbtot = CLIP (sbtot, 0, 0xff); *ret = ((satot << 24) | (srtot << 16) | (sgtot << 8) | (sbtot)); } @@ -240,10 +240,10 @@ static force_inline void reduce_float(unsigned int satot, unsigned int srtot, { argb_t *ret = p; - ret->a = CLIP ((int32_t)satot / 65536.f, 0.f, 1.f); - ret->r = CLIP ((int32_t)srtot / 65536.f, 0.f, 1.f); - ret->g = CLIP ((int32_t)sgtot / 65536.f, 0.f, 1.f); - ret->b = CLIP ((int32_t)sbtot / 65536.f, 0.f, 1.f); + ret->a = CLIP (satot / 65536.f, 0.f, 1.f); + ret->r = CLIP (srtot / 65536.f, 0.f, 1.f); + ret->g = CLIP (sgtot / 65536.f, 0.f, 1.f); + ret->b = CLIP (sbtot / 65536.f, 0.f, 1.f); } typedef void (* accumulate_pixel_t) (unsigned int *satot, unsigned int *srtot, diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c index fa94532..5f3b752 100644 --- a/pixman/pixman-filter.c +++ b/pixman/pixman-filter.c @@ -237,14 +237,11 @@ create_1d_filter (int width, pixman_kernel_t sample, double scale, int n_phases, - pixman_fixed_t *pstart, - pixman_fixed_t *pend - ) + pixman_fixed_t *p) { - pixman_fixed_t *p = pstart; double step; int i; - if(width <= 0) return; + step = 1.0 / n_phases; for (i = 0; i < n_phases; ++i) @@ -261,7 +258,7 @@ create_1d_filter (int width, x1 = ceil (frac - width / 2.0 - 0.5); x2 = x1 + width; - assert( p >= pstart && p + (x2 - x1) <= pend ); /* assert validity of the following loop */ + total = 0; for (x = x1; x < x2; ++x) { @@ -290,10 +287,8 @@ create_1d_filter (int width, /* Normalize, with error diffusion */ p -= width; - assert(p >= pstart && p + (x2 - x1) <= pend); /* assert validity of the following loop */ - - total = 65536.0 / total; - new_total = 0; + total = 65536.0 / total; + new_total = 0; e = 0.0; for (x = x1; x < x2; ++x) { @@ -309,8 +304,6 @@ create_1d_filter (int width, * at the first sample, since that is the only one that * hasn't had any error diffused into it. */ - - assert(p - width >= pstart && p - width < pend); /* assert... */ *(p - width) += pixman_fixed_1 - new_total; } } @@ -472,16 +465,10 @@ pixman_filter_create_separable_convolution (int *n_values, params[2] = pixman_int_to_fixed (subsample_bits_x); params[3] = pixman_int_to_fixed (subsample_bits_y); - { - pixman_fixed_t - *xparams = params+4, - *yparams = xparams + width*subsample_x, - *endparams = params + *n_values; - create_1d_filter(width, reconstruct_x, sample_x, sx, subsample_x, - xparams, yparams); - create_1d_filter(height, reconstruct_y, sample_y, sy, subsample_y, - yparams, endparams); - } + create_1d_filter (width, reconstruct_x, sample_x, sx, subsample_x, + params + 4); + create_1d_filter (height, reconstruct_y, sample_y, sy, subsample_y, + params + 4 + width * subsample_x); #ifdef PIXMAN_GNUPLOT gnuplot_filter(width, subsample_x, params + 4); diff --git a/test/scaling-test.c b/test/scaling-test.c index 4c22011..0ece611 100644 --- a/test/scaling-test.c +++ b/test/scaling-test.c @@ -45,35 +45,6 @@ get_format (int bpp) } } -/* sets up "some kind of separable convolution" using prng values */ -void setup_separable_convulution(pixman_image_t* image, pixman_fixed_t scale_x, pixman_fixed_t scale_y, int verbose) { - int num_filters = PIXMAN_KERNEL_LANCZOS3_STRETCHED + 1; - int max_subsample_bits = 4; - struct Params { - pixman_fixed_t scale_x, scale_y; - pixman_kernel_t reconstruct_x, reconstruct_y; - pixman_kernel_t sample_x, sample_y; - int subsample_bits_x, subsample_bits_y; - } params = { - scale_x, scale_y, - prng_rand_n(num_filters), prng_rand_n(num_filters), - prng_rand_n(num_filters), prng_rand_n(num_filters), - prng_rand_n(max_subsample_bits + 1), prng_rand_n(max_subsample_bits + 1), - }; - int num_params; - pixman_fixed_t *filter; - if(verbose) { - printf("separable convolution kernels=%d %d %d %d subsample_bits=%d %d\n", params.reconstruct_x, params.reconstruct_y, params.sample_x, params.sample_y, params.subsample_bits_x, params.subsample_bits_y); - } - filter = pixman_filter_create_separable_convolution(&num_params, - params.scale_x, params.scale_y, - params.reconstruct_x, params.reconstruct_y, - params.sample_x, params.sample_y, - params.subsample_bits_x, params.subsample_bits_y); - pixman_image_set_filter(image, PIXMAN_FILTER_SEPARABLE_CONVOLUTION, filter, num_params); - free(filter); -} - uint32_t test_composite (int testnum, int verbose) @@ -275,23 +246,15 @@ test_composite (int testnum, } pixman_image_set_repeat (src_img, repeat); - switch (prng_rand_n (5)) { - case 0: pixman_image_set_filter(src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); break; - case 1: pixman_image_set_filter(src_img, PIXMAN_FILTER_NEAREST, NULL, 0); break; - default: - /* this gets a larger range as samples distribute over quite a few possible settings */ - setup_separable_convulution(src_img, scale_x, scale_y, verbose); - break; - } + if (prng_rand_n (2)) + pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0); - switch (prng_rand_n (5)) { - case 0: pixman_image_set_filter(mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0); break; - case 1: pixman_image_set_filter(mask_img, PIXMAN_FILTER_NEAREST, NULL, 0); break; - default: - /* this gets a larger range as samples distribute over quite a few possible settings */ - setup_separable_convulution(mask_img, scale_x, scale_y, verbose); - break; - } + if (prng_rand_n (2)) + pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0); + else + pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0); if (prng_rand_n (8) == 0) { @@ -417,7 +380,7 @@ test_composite (int testnum, src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); crc32 = compute_crc32_for_image (0, dst_img); - + if (verbose) print_image (dst_img); @@ -444,9 +407,9 @@ test_composite (int testnum, } #if BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0xD3589272 +#define CHECKSUM 0x92E0F068 #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0x0FD4B248 +#define CHECKSUM 0x8EFFA1E5 #else #define CHECKSUM 0x00000000 #endif -- cgit v1.2.1