diff options
author | Neil Roberts <neil@linux.intel.com> | 2011-11-01 13:10:59 +0000 |
---|---|---|
committer | Neil Roberts <neil@linux.intel.com> | 2011-11-16 16:32:11 +0000 |
commit | 037c0aa88cb8eb74f21ce24c80467b909028d2f6 (patch) | |
tree | 6113491fed1b96315e70b053e8ec8f25970b57cc | |
parent | 436a7a45da0298f72d5b839601c60a5d23108a8a (diff) | |
download | cogl-037c0aa88cb8eb74f21ce24c80467b909028d2f6.tar.gz |
Move POPCOUNTL to cogl-util
This moves the POPCOUNTL macro from cogl-winsys-glx to cogl-util and
renames it to _cogl_util_popcountl so that it can be used in more
places. The fallback function for when the GCC builtin is not
available has been replaced with an 8-bit lookup table because the
HAKMEM implementation doesn't look like it would work when longs are
64-bit so it's not suitable for a general purpose function on 64-bit
architectures. Some of the pages regarding population counts seem to
suggest that using a lookup table is the fastest method anyway.
Reviewed-by: Robert Bragg <robert@linux.intel.com>
-rw-r--r-- | cogl/cogl-util.c | 20 | ||||
-rw-r--r-- | cogl/cogl-util.h | 34 | ||||
-rw-r--r-- | cogl/winsys/cogl-winsys-glx.c | 21 |
3 files changed, 55 insertions, 20 deletions
diff --git a/cogl/cogl-util.c b/cogl/cogl-util.c index 1cb38143..487a762c 100644 --- a/cogl/cogl-util.c +++ b/cogl/cogl-util.c @@ -100,3 +100,23 @@ _cogl_util_ffsl_wrapper (long int num) } #endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */ + +#ifndef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL + +const unsigned char +_cogl_util_popcount_table[256] = + { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, + 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, + 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; + +#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */ diff --git a/cogl/cogl-util.h b/cogl/cogl-util.h index d38167c6..256ecaea 100644 --- a/cogl/cogl-util.h +++ b/cogl/cogl-util.h @@ -100,6 +100,12 @@ _cogl_util_one_at_a_time_hash (unsigned int hash, unsigned int _cogl_util_one_at_a_time_mix (unsigned int hash); +/* These two builtins are available since GCC 3.4 */ +#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#define COGL_UTIL_HAVE_BUILTIN_FFSL +#define COGL_UTIL_HAVE_BUILTIN_POPCOUNTL +#endif + /* The 'ffs' function is part of C99 so it isn't always available */ #ifdef HAVE_FFS #define _cogl_util_ffs ffs @@ -110,9 +116,8 @@ _cogl_util_ffs (int num); /* The 'ffsl' function is non-standard but GCC has a builtin for it since 3.4 which we can use */ -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +#ifdef COGL_UTIL_HAVE_BUILTIN_FFSL #define _cogl_util_ffsl __builtin_ffsl -#define COGL_UTIL_HAVE_BUILTIN_FFSL #else /* If ints and longs are the same size we can just use ffs. Hopefully the compiler will optimise away this conditional */ @@ -121,7 +126,30 @@ _cogl_util_ffs (int num); _cogl_util_ffsl_wrapper (x)) int _cogl_util_ffsl_wrapper (long int num); -#endif +#endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */ + +#ifdef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL +#define _cogl_util_popcountl __builtin_popcountl +#else +extern const unsigned char _cogl_util_popcount_table[256]; + +/* There are many ways of doing popcount but doing a table lookup + seems to be the most robust against different sizes for long. Some + pages seem to claim it's the fastest method anyway. */ +static inline int +_cogl_util_popcountl (unsigned long num) +{ + int i; + int sum = 0; + + /* Let's hope GCC will unroll this loop.. */ + for (i = 0; i < sizeof (num); i++) + sum += _cogl_util_popcount_table[(num >> (i * 8)) & 0xff]; + + return sum; +} + +#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */ #ifdef COGL_HAS_GLIB_SUPPORT #define _COGL_RETURN_IF_FAIL(EXPR) g_return_if_fail(EXPR) diff --git a/cogl/winsys/cogl-winsys-glx.c b/cogl/winsys/cogl-winsys-glx.c index 05e2dac1..0dce4741 100644 --- a/cogl/winsys/cogl-winsys-glx.c +++ b/cogl/winsys/cogl-winsys-glx.c @@ -49,6 +49,7 @@ #include "cogl-onscreen-private.h" #include "cogl-swap-chain-private.h" #include "cogl-xlib-renderer.h" +#include "cogl-util.h" #include <stdlib.h> #include <sys/types.h> @@ -1697,22 +1698,6 @@ should_use_rectangle (CoglContext *context) return context->rectangle_state == COGL_WINSYS_RECTANGLE_STATE_ENABLE; } -/* GCC's population count builtin is available since version 3.4 */ -#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) -#define POPCOUNTL(n) __builtin_popcountl(n) -#else -/* HAKMEM 169 */ -static int -hakmem_popcountl (unsigned long n) -{ - unsigned long tmp; - - tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111); - return ((tmp + (tmp >> 3)) & 030707070707) % 63; -} -#define POPCOUNTL(n) hakmem_popcountl(n) -#endif - static gboolean try_create_glx_pixmap (CoglContext *context, CoglTexturePixmapX11 *tex_pixmap, @@ -1765,7 +1750,9 @@ try_create_glx_pixmap (CoglContext *context, * number of 1-bits in color masks against the color depth requested * by the client. */ - if (POPCOUNTL(visual->red_mask|visual->green_mask|visual->blue_mask) == depth) + if (_cogl_util_popcountl (visual->red_mask | + visual->green_mask | + visual->blue_mask) == depth) attribs[i++] = GLX_TEXTURE_FORMAT_RGB_EXT; else attribs[i++] = GLX_TEXTURE_FORMAT_RGBA_EXT; |