summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarsten Haitzler (Rasterman) <raster@rasterman.com>2019-03-09 15:19:28 +0000
committerCarsten Haitzler (Rasterman) <raster@rasterman.com>2019-03-09 15:21:46 +0000
commit4758f06e637239f981eedbaaf8c0d613b78e4417 (patch)
tree01e220a40497a9288cc2390fdc37e4e7e9cd5bb9
parentf234a2b6c77af85c3a272cb417257cf1bc531b4f (diff)
downloadefl-4758f06e637239f981eedbaaf8c0d613b78e4417.tar.gz
solve neon rotation issue by moving to the tiled rotator
the tiles rotator is faster no matter what. this will fix D8099 by movoing to tiled rotation and nuking the neon code and we end uop being faster anyway in all cases. @fix
-rw-r--r--configure.ac20
-rw-r--r--src/lib/evas/common/evas_convert_rgb_32.c547
2 files changed, 215 insertions, 352 deletions
diff --git a/configure.ac b/configure.ac
index 34d6abb4a0..9157bfd338 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2538,19 +2538,6 @@ AC_ARG_ENABLE([pixman-image-scale-sample],
],
[have_pixman_image_scale_sample="no"])
-# Tile rotate
-AC_ARG_ENABLE([tile-rotate],
- [AS_HELP_STRING([--enable-tile-rotate],[Enable tiled rotate algorithm. @<:@default=disabled@:>@])],
- [
- if test "x${enableval}" = "xyes" ; then
- have_tile_rotate="yes"
- CFOPT_WARNING="xyes"
- else
- have_tile_rotate="no"
- fi
- ],
- [have_tile_rotate="no"])
-
# Ecore Buffer
AC_ARG_ENABLE([ecore-buffer],
[AS_HELP_STRING([--enable-ecore-buffer],[enable ecore-buffer. @<:@default=disabled@:>@])],
@@ -2984,13 +2971,6 @@ AC_CHECK_LIB([m], [lround],
### Configuration
-## Tile rotation
-
-if test "x${have_tile_rotate}" = "xyes" ; then
- AC_DEFINE(TILE_ROTATE, 1, [Enable tiled rotate algorithm])
-fi
-
-
## dither options
AC_ARG_WITH([evas-dither-mask],
diff --git a/src/lib/evas/common/evas_convert_rgb_32.c b/src/lib/evas/common/evas_convert_rgb_32.c
index 89789b2ac5..11671466b2 100644
--- a/src/lib/evas/common/evas_convert_rgb_32.c
+++ b/src/lib/evas/common/evas_convert_rgb_32.c
@@ -1,9 +1,13 @@
#include "evas_common_private.h"
#include "evas_convert_rgb_32.h"
#ifdef BUILD_NEON
-#include <arm_neon.h>
+# include <arm_neon.h>
#endif
+// tiled rotate is faster in every case i've tested, so just use this
+// by default.
+#define TILE_ROTATE 1
+
void
evas_common_convert_rgba_to_32bpp_rgb_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
@@ -19,9 +23,9 @@ evas_common_convert_rgba_to_32bpp_rgb_8888 (DATA32 *src, DATA8 *dst, int src_jum
for (y = 0; y < h; y++)
{
- func(src_ptr, dst_ptr, w);
- src_ptr += w + src_jump;
- dst_ptr += w + dst_jump;
+ func(src_ptr, dst_ptr, w);
+ src_ptr += w + src_jump;
+ dst_ptr += w + dst_jump;
}
return;
}
@@ -44,234 +48,205 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_180 (DATA32 *src, DATA8 *dst, int
}
#ifdef TILE_ROTATE
-#ifdef BUILD_NEON
-#define ROT90_QUAD_COPY_LOOP(pix_type) \
- if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) \
- { \
- if((w%4) == 0) \
- { \
- int klght = 4 * src_stride; \
- for(y = 0; y < h; y++) \
- { \
- const pix_type *s = &(src[(h - y - 1)]); \
- pix_type *d = &(dst[(dst_stride * y)]); \
- pix_type *ptr1 = s; \
- pix_type *ptr2 = ptr1 + src_stride; \
- pix_type *ptr3 = ptr2 + src_stride; \
- pix_type *ptr4 = ptr3 + src_stride; \
- for(x = 0; x < w; x+=4) \
- { \
- pix_type s_array[4] = {*ptr1, *ptr2, *ptr3, *ptr4}; \
- vst1q_s32(d, vld1q_s32(s_array)); \
- d += 4; \
- ptr1 += klght; \
- ptr2 += klght; \
- ptr3 += klght; \
- ptr4 += klght; \
- } \
- } \
+# ifdef BUILD_NEON
+# define ROT90_QUAD_COPY_LOOP(pix_type) \
+ if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) { \
+ if ((w % 4) == 0) { \
+ int klght = 4 * src_stride; \
+ for (y = 0; y < h; y++) { \
+ const pix_type *s = &(src[h - y - 1]); \
+ pix_type *d = &(dst[dst_stride * y]); \
+ pix_type *ptr1 = s; \
+ pix_type *ptr2 = ptr1 + src_stride; \
+ pix_type *ptr3 = ptr2 + src_stride; \
+ pix_type *ptr4 = ptr3 + src_stride; \
+ for(x = 0; x < w; x += 4) { \
+ pix_type s_array[4] = { *ptr1, *ptr2, *ptr3, *ptr4 }; \
+ vst1q_s32(d, vld1q_s32(s_array)); \
+ d += 4; \
+ ptr1 += klght; \
+ ptr2 += klght; \
+ ptr3 += klght; \
+ ptr4 += klght; \
+ } \
+ } \
} \
- else \
- { \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = &(src[(h - y - 1)]); \
- pix_type *d = &(dst[(dst_stride * y)]); \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s += src_stride; \
- } \
- } \
+ else { \
+ for (y = 0; y < h; y++) { \
+ const pix_type *s = &(src[h - y - 1]); \
+ pix_type *d = &(dst[dst_stride * y]); \
+ for (x = 0; x < w; x++) { \
+ *d++ = *s; \
+ s += src_stride; \
+ } \
+ } \
} \
} \
else
-#define ROT270_QUAD_COPY_LOOP(pix_type) \
- if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) \
- { \
- if((w%4) == 0) \
- { \
- int klght = 4 * src_stride; \
- for(y = 0; y < h; y++) \
- { \
- const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
- pix_type *d = &(dst[(dst_stride * y)]); \
- pix_type *ptr1 = s; \
- pix_type *ptr2 = ptr1 + src_stride; \
- pix_type *ptr3 = ptr2 + src_stride; \
- pix_type *ptr4 = ptr3 + src_stride; \
- for(x = 0; x < w; x+=4) \
- { \
- pix_type s_array[4] = {*ptr1, *ptr2, *ptr3, *ptr4}; \
- vst1q_s32(d, vld1q_s32(s_array)); \
- d += 4; \
- ptr1 += klght; \
- ptr2 += klght; \
- ptr3 += klght; \
- ptr4 += klght; \
- } \
- } \
+# define ROT270_QUAD_COPY_LOOP(pix_type) \
+ if (evas_common_cpu_has_feature(CPU_FEATURE_NEON)) { \
+ if ((w % 4) == 0) { \
+ int klght = 4 * src_stride; \
+ for (y = 0; y < h; y++) { \
+ const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
+ pix_type *d = &(dst[dst_stride * y]); \
+ pix_type *ptr1 = s; \
+ pix_type *ptr2 = ptr1 + src_stride; \
+ pix_type *ptr3 = ptr2 + src_stride; \
+ pix_type *ptr4 = ptr3 + src_stride; \
+ for(x = 0; x < w; x+=4) { \
+ pix_type s_array[4] = { *ptr1, *ptr2, *ptr3, *ptr4 }; \
+ vst1q_s32(d, vld1q_s32(s_array)); \
+ d += 4; \
+ ptr1 += klght; \
+ ptr2 += klght; \
+ ptr3 += klght; \
+ ptr4 += klght; \
+ } \
+ } \
} \
- else \
- { \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
- pix_type *d = &(dst[(dst_stride * y)]); \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s += src_stride; \
- } \
- } \
+ else { \
+ for (y = 0; y < h; y++) { \
+ const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
+ pix_type *d = &(dst[dst_stride * y]); \
+ for (x = 0; x < w; x++) { \
+ *d++ = *s; \
+ s += src_stride; \
+ } \
+ } \
} \
} \
else
-#else
-#define ROT90_QUAD_COPY_LOOP(pix_type)
-#define ROT270_QUAD_COPY_LOOP(pix_type)
-#endif
-#define FAST_SIMPLE_ROTATE(suffix, pix_type) \
+# else
+# define ROT90_QUAD_COPY_LOOP(pix_type)
+# define ROT270_QUAD_COPY_LOOP(pix_type)
+# endif
+
+# define FAST_SIMPLE_ROTATE(suffix, pix_type) \
static void \
- blt_rotated_90_trivial_##suffix(pix_type * restrict dst, \
- int dst_stride, \
+ blt_rotated_90_trivial_##suffix(pix_type * restrict dst, \
+ int dst_stride, \
const pix_type * restrict src, \
- int src_stride, \
- int w, \
- int h) \
+ int src_stride, \
+ int w, \
+ int h) \
{ \
int x, y; \
- ROT90_QUAD_COPY_LOOP(pix_type) \
- { \
- for (y = 0; y < h; y++) \
- { \
- const pix_type *s = &(src[(h - y - 1)]); \
- pix_type *d = &(dst[(dst_stride * y)]); \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s += src_stride; \
- } \
- } \
+ ROT90_QUAD_COPY_LOOP(pix_type) { \
+ for (y = 0; y < h; y++) { \
+ const pix_type *s = &(src[h - y - 1]); \
+ pix_type *d = &(dst[dst_stride * y]); \
+ for (x = 0; x < w; x++) { \
+ *d++ = *s; \
+ s += src_stride; \
+ } \
+ } \
} \
} \
static void \
- blt_rotated_270_trivial_##suffix(pix_type * restrict dst, \
- int dst_stride, \
+ blt_rotated_270_trivial_##suffix(pix_type * restrict dst, \
+ int dst_stride, \
const pix_type * restrict src, \
- int src_stride, \
- int w, \
- int h) \
+ int src_stride, \
+ int w, \
+ int h) \
{ \
int x, y; \
- ROT270_QUAD_COPY_LOOP(pix_type) \
- { \
- for(y = 0; y < h; y++) \
- { \
- const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
- pix_type *d = &(dst[(dst_stride * y)]); \
- for (x = 0; x < w; x++) \
- { \
- *d++ = *s; \
- s -= src_stride; \
- } \
- } \
+ ROT270_QUAD_COPY_LOOP(pix_type) { \
+ for (y = 0; y < h; y++) { \
+ const pix_type *s = &(src[(src_stride * (w - 1)) + y]); \
+ pix_type *d = &(dst[dst_stride * y]); \
+ for (x = 0; x < w; x++) { \
+ *d++ = *s; \
+ s -= src_stride; \
+ } \
+ } \
} \
} \
static void \
- blt_rotated_90_##suffix(pix_type * restrict dst, \
- int dst_stride, \
+ blt_rotated_90_##suffix(pix_type * restrict dst, \
+ int dst_stride, \
const pix_type * restrict src, \
- int src_stride, \
- int w, \
- int h) \
+ int src_stride, \
+ int w, \
+ int h) \
{ \
int x, leading_pixels = 0, trailing_pixels = 0; \
const int TILE_SIZE = TILE_CACHE_LINE_SIZE / sizeof(pix_type); \
- if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - \
- (((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > w) \
- leading_pixels = w; \
- blt_rotated_90_trivial_##suffix(dst, \
- dst_stride, \
- src, \
- src_stride, \
- leading_pixels, \
- h); \
- dst += leading_pixels; \
- src += leading_pixels * src_stride; \
- w -= leading_pixels; \
- } \
- if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + w) & \
- (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > w) \
- trailing_pixels = w; \
- w -= trailing_pixels; \
- } \
- for (x = 0; x < w; x += TILE_SIZE) \
- { \
- blt_rotated_90_trivial_##suffix(dst + x, \
- dst_stride, \
- &(src[(src_stride * x)]), \
- src_stride, \
- TILE_SIZE, \
- h); \
- } \
+ if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) { \
+ leading_pixels = TILE_SIZE - \
+ (((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (leading_pixels > w) leading_pixels = w; \
+ blt_rotated_90_trivial_##suffix(dst, \
+ dst_stride, \
+ src, \
+ src_stride, \
+ leading_pixels, \
+ h); \
+ dst += leading_pixels; \
+ src += leading_pixels * src_stride; \
+ w -= leading_pixels; \
+ } \
+ if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) { \
+ trailing_pixels = (((uintptr_t)(dst + w) & \
+ (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (trailing_pixels > w) trailing_pixels = w; \
+ w -= trailing_pixels; \
+ } \
+ for (x = 0; x < w; x += TILE_SIZE) { \
+ blt_rotated_90_trivial_##suffix(dst + x, \
+ dst_stride, \
+ &(src[src_stride * x]), \
+ src_stride, \
+ TILE_SIZE, \
+ h); \
+ } \
if (trailing_pixels) \
blt_rotated_90_trivial_##suffix(dst + w, \
dst_stride, \
- &(src[(w * src_stride)]), \
+ &(src[src_stride * w]), \
src_stride, \
trailing_pixels, \
h); \
} \
static void \
- blt_rotated_270_##suffix(pix_type * restrict dst, \
- int dst_stride, \
+ blt_rotated_270_##suffix(pix_type * restrict dst, \
+ int dst_stride, \
const pix_type * restrict src, \
- int src_stride, \
- int w, \
- int h) \
+ int src_stride, \
+ int w, \
+ int h) \
{ \
int x, leading_pixels = 0, trailing_pixels = 0; \
const int TILE_SIZE = TILE_CACHE_LINE_SIZE / sizeof(pix_type); \
- if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) \
- { \
- leading_pixels = TILE_SIZE - \
- (((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (leading_pixels > w) \
- leading_pixels = w; \
- blt_rotated_270_trivial_##suffix(dst, \
- dst_stride, \
- &(src[(src_stride * (w - leading_pixels))]), \
- src_stride, \
- leading_pixels, \
- h); \
- dst += leading_pixels; \
- w -= leading_pixels; \
- } \
- if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) \
- { \
- trailing_pixels = (((uintptr_t)(dst + w) & \
- (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
- if (trailing_pixels > w) \
- trailing_pixels = w; \
- w -= trailing_pixels; \
- src += trailing_pixels * src_stride; \
- } \
- for (x = 0; x < w; x += TILE_SIZE) \
- { \
- blt_rotated_270_trivial_##suffix(dst + x, \
- dst_stride, \
- &(src[(src_stride * (w - x - TILE_SIZE))]), \
- src_stride, \
- TILE_SIZE, \
- h); \
- } \
+ if ((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) { \
+ leading_pixels = TILE_SIZE - \
+ (((uintptr_t)dst & (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (leading_pixels > w) leading_pixels = w; \
+ blt_rotated_270_trivial_##suffix(dst, \
+ dst_stride, \
+ &(src[src_stride * (w - leading_pixels)]), \
+ src_stride, \
+ leading_pixels, \
+ h); \
+ dst += leading_pixels; \
+ w -= leading_pixels; \
+ } \
+ if ((uintptr_t)(dst + w) & (TILE_CACHE_LINE_SIZE - 1)) { \
+ trailing_pixels = (((uintptr_t)(dst + w) & \
+ (TILE_CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \
+ if (trailing_pixels > w) trailing_pixels = w; \
+ w -= trailing_pixels; \
+ src += trailing_pixels * src_stride; \
+ } \
+ for (x = 0; x < w; x += TILE_SIZE) { \
+ blt_rotated_270_trivial_##suffix(dst + x, \
+ dst_stride, \
+ &(src[src_stride * (w - x - TILE_SIZE)]), \
+ src_stride, \
+ TILE_SIZE, \
+ h); \
+ } \
if (trailing_pixels) \
blt_rotated_270_trivial_##suffix(dst + w, \
dst_stride, \
@@ -288,12 +263,13 @@ void
evas_common_convert_rgba_to_32bpp_rgb_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
#ifdef TILE_ROTATE
- blt_rotated_270_8888((DATA8 *)dst, dst_jump+w, (const DATA8 *)src, src_jump+h, w, h) ;
+ blt_rotated_270_8888((DATA32 *)dst, dst_jump + w,
+ src, src_jump + h,
+ w, h);
#else
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr *dst_ptr;
int x, y;
-
+
dst_ptr = (DATA32 *)dst;
CONVERT_LOOP_START_ROT_270();
@@ -305,15 +281,32 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_270 (DATA32 *src, DATA8 *dst, int
return;
}
+/* speed measuring code - enable when optimizing to compare
+#include <time.h>
+static double
+get_time(void)
+{
+ struct timespec t;
+
+ clock_gettime(CLOCK_MONOTONIC, &t);
+ return (double)t.tv_sec + (((double)t.tv_nsec) / 1000000000.0);
+}
+*/
+
void
evas_common_convert_rgba_to_32bpp_rgb_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
+/*
+ static double tt = 0.0;
+ static unsigned long long pt = 0;
+ double t0 = get_time();
+ */
#ifdef TILE_ROTATE
- blt_rotated_90_8888((DATA8 *)dst, dst_jump+w, (const DATA8 *)src, src_jump+h, w, h) ;
+ blt_rotated_90_8888((DATA32 *)dst, dst_jump + w,
+ src, src_jump + h,
+ w, h);
#else
-# ifndef BUILD_NEON
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -322,117 +315,19 @@ evas_common_convert_rgba_to_32bpp_rgb_8888_rot_90 (DATA32 *src, DATA8 *dst, int
*dst_ptr = *src_ptr;
CONVERT_LOOP_END_ROT_90();
-# elif defined BUILD_NEON_INTRINSICS
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
- int x, y;
-
- dst_ptr = (DATA32 *)dst;
- CONVERT_LOOP_START_ROT_90();
-
- *dst_ptr = *src_ptr;
-
- CONVERT_LOOP_END_ROT_90();
-# else
- if ((w & 1) || (h & 1))
- {
- /* Rarely (if ever) if ever: so slow path is fine */
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
- int x, y;
-
- dst_ptr = (DATA32 *)dst;
- CONVERT_LOOP_START_ROT_90();
-
- *dst_ptr = *src_ptr;
-
- CONVERT_LOOP_END_ROT_90();
- }
- else
- {
-# define AP "convert_rgba32_rot_90_"
- asm volatile (
- ".fpu neon \n\t"
- " mov %[s1], %[src] \n\t"
- " add %[s1], %[s1], %[h],lsl #2 \n\t"
- " sub %[s1], #8 \n\t"
-
- " mov %[s2], %[src] \n\t"
- " add %[s2], %[s2], %[h], lsl #3 \n\t"
- " add %[s2], %[s2], %[sjmp], lsr #1 \n\t"
- " sub %[s2], #8 \n\t"
-
- " mov %[d1], %[dst] \n\t"
-
- " add %[d2], %[d1], %[djmp] \n\t"
- " add %[d2], %[d2], %[w], lsl #2 \n\t"
-
- " mov %[sadv], %[h], lsl #3 \n\t"
- " add %[sadv], %[sadv], %[sjmp], lsl #1\n\t"
-
- " mov %[y], #0 \n\t"
- " mov %[x], #0 \n\t"
- AP"loop: \n\t"
- " vld1.u32 d0, [%[s1]] \n\t"
- " vld1.u32 d1, [%[s2]] \n\t"
- " add %[x], #2 \n\t"
- " add %[s1], %[sadv] \n\t"
- " add %[s2], %[sadv] \n\t"
- " vtrn.u32 d0, d1 \n\t"
- " cmp %[x], %[w] \n\t"
- " vst1.u32 d1, [%[d1]]! \n\t"
- " vst1.u32 d0, [%[d2]]! \n\t"
- " blt "AP"loop \n\t"
-
- " mov %[x], #0 \n\t"
- " add %[d1], %[djmp] \n\t"
- " add %[d1], %[d1], %[w], lsl #2 \n\t"
- " add %[d2], %[djmp] \n\t"
- " add %[d2], %[d2], %[w], lsl #2 \n\t"
-
- " mov %[s1], %[src] \n\t"
- " add %[s1], %[s1], %[h], lsl #2 \n\t"
- " sub %[s1], %[s1], %[y], lsl #2 \n\t"
- " sub %[s1], #16 \n\t"
-
- " add %[s2], %[s1], %[h], lsl #2 \n\t"
- " add %[s2], %[s2], %[sjmp], lsl #2 \n\t"
-
- " add %[y], #2 \n\t"
-
- " cmp %[y], %[h] \n\t"
- " blt "AP"loop \n\t"
-
- : // Out
- : [s1] "r" (1),
- [s2] "r" (11),
- [d1] "r" (2),
- [d2] "r" (12),
- [src] "r" (src),
- [dst] "r" (dst),
- [x] "r" (3),
- [y] "r" (4),
- [w] "r" (w),
- [h] "r" (h),
- [sadv] "r" (5),
- [sjmp] "r" (src_jump * 4),
- [djmp] "r" (dst_jump * 4 * 2)
- : "d0", "d1", "memory", "cc"// Clober
-
-
- );
- }
-# undef AP
-# endif
#endif
- return;
+/*
+ double t1 = get_time();
+ tt += t1 - t0;
+ pt += (w * h);
+ printf("%1.2f mpix/sec (%1.9f @ %1.9f)\n", (double)pt / (tt * 1000000), tt, t1);
+*/
}
void
evas_common_convert_rgba_to_32bpp_rgbx_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -449,8 +344,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888 (DATA32 *src, DATA8 *dst, int src_ju
void
evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_180 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -467,8 +361,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_180 (DATA32 *src, DATA8 *dst, in
void
evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -485,8 +378,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_270 (DATA32 *src, DATA8 *dst, in
void
evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -503,8 +395,7 @@ evas_common_convert_rgba_to_32bpp_rgbx_8888_rot_90 (DATA32 *src, DATA8 *dst, int
void
evas_common_convert_rgba_to_32bpp_bgr_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -520,8 +411,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888 (DATA32 *src, DATA8 *dst, int src_jum
void
evas_common_convert_rgba_to_32bpp_bgr_8888_rot_180 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -537,8 +427,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888_rot_180 (DATA32 *src, DATA8 *dst, int
void
evas_common_convert_rgba_to_32bpp_bgr_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -554,8 +443,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888_rot_270 (DATA32 *src, DATA8 *dst, int
void
evas_common_convert_rgba_to_32bpp_bgr_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -571,8 +459,7 @@ evas_common_convert_rgba_to_32bpp_bgr_8888_rot_90 (DATA32 *src, DATA8 *dst, int
void
evas_common_convert_rgba_to_32bpp_bgrx_8888 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -588,8 +475,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888 (DATA32 *src, DATA8 *dst, int src_ju
void
evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_180 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -605,8 +491,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_180 (DATA32 *src, DATA8 *dst, in
void
evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_270 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -622,8 +507,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_270 (DATA32 *src, DATA8 *dst, in
void
evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_90 (DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;
@@ -639,8 +523,7 @@ evas_common_convert_rgba_to_32bpp_bgrx_8888_rot_90 (DATA32 *src, DATA8 *dst, int
void
evas_common_convert_rgba_to_32bpp_rgb_666(DATA32 *src, DATA8 *dst, int src_jump, int dst_jump, int w, int h, int dith_x EINA_UNUSED, int dith_y EINA_UNUSED, DATA8 *pal EINA_UNUSED)
{
- DATA32 *src_ptr;
- DATA32 *dst_ptr;
+ DATA32 *src_ptr, *dst_ptr;
int x, y;
dst_ptr = (DATA32 *)dst;