summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeil Roberts <neil@linux.intel.com>2011-11-01 13:10:59 +0000
committerNeil Roberts <neil@linux.intel.com>2011-11-16 16:32:11 +0000
commit037c0aa88cb8eb74f21ce24c80467b909028d2f6 (patch)
tree6113491fed1b96315e70b053e8ec8f25970b57cc
parent436a7a45da0298f72d5b839601c60a5d23108a8a (diff)
downloadcogl-037c0aa88cb8eb74f21ce24c80467b909028d2f6.tar.gz
Move POPCOUNTL to cogl-util
This moves the POPCOUNTL macro from cogl-winsys-glx to cogl-util and renames it to _cogl_util_popcountl so that it can be used in more places. The fallback function for when the GCC builtin is not available has been replaced with an 8-bit lookup table because the HAKMEM implementation doesn't look like it would work when longs are 64-bit so it's not suitable for a general purpose function on 64-bit architectures. Some of the pages regarding population counts seem to suggest that using a lookup table is the fastest method anyway. Reviewed-by: Robert Bragg <robert@linux.intel.com>
-rw-r--r--cogl/cogl-util.c20
-rw-r--r--cogl/cogl-util.h34
-rw-r--r--cogl/winsys/cogl-winsys-glx.c21
3 files changed, 55 insertions, 20 deletions
diff --git a/cogl/cogl-util.c b/cogl/cogl-util.c
index 1cb38143..487a762c 100644
--- a/cogl/cogl-util.c
+++ b/cogl/cogl-util.c
@@ -100,3 +100,23 @@ _cogl_util_ffsl_wrapper (long int num)
}
#endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */
+
+#ifndef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
+
+const unsigned char
+_cogl_util_popcount_table[256] =
+ {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
+ 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
+ 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
+ 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+ };
+
+#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */
diff --git a/cogl/cogl-util.h b/cogl/cogl-util.h
index d38167c6..256ecaea 100644
--- a/cogl/cogl-util.h
+++ b/cogl/cogl-util.h
@@ -100,6 +100,12 @@ _cogl_util_one_at_a_time_hash (unsigned int hash,
unsigned int
_cogl_util_one_at_a_time_mix (unsigned int hash);
+/* These two builtins are available since GCC 3.4 */
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#define COGL_UTIL_HAVE_BUILTIN_FFSL
+#define COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
+#endif
+
/* The 'ffs' function is part of C99 so it isn't always available */
#ifdef HAVE_FFS
#define _cogl_util_ffs ffs
@@ -110,9 +116,8 @@ _cogl_util_ffs (int num);
/* The 'ffsl' function is non-standard but GCC has a builtin for it
since 3.4 which we can use */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
+#ifdef COGL_UTIL_HAVE_BUILTIN_FFSL
#define _cogl_util_ffsl __builtin_ffsl
-#define COGL_UTIL_HAVE_BUILTIN_FFSL
#else
/* If ints and longs are the same size we can just use ffs. Hopefully
the compiler will optimise away this conditional */
@@ -121,7 +126,30 @@ _cogl_util_ffs (int num);
_cogl_util_ffsl_wrapper (x))
int
_cogl_util_ffsl_wrapper (long int num);
-#endif
+#endif /* COGL_UTIL_HAVE_BUILTIN_FFSL */
+
+#ifdef COGL_UTIL_HAVE_BUILTIN_POPCOUNTL
+#define _cogl_util_popcountl __builtin_popcountl
+#else
+extern const unsigned char _cogl_util_popcount_table[256];
+
+/* There are many ways of doing popcount but doing a table lookup
+ seems to be the most robust against different sizes for long. Some
+ pages seem to claim it's the fastest method anyway. */
+static inline int
+_cogl_util_popcountl (unsigned long num)
+{
+ int i;
+ int sum = 0;
+
+ /* Let's hope GCC will unroll this loop.. */
+ for (i = 0; i < sizeof (num); i++)
+ sum += _cogl_util_popcount_table[(num >> (i * 8)) & 0xff];
+
+ return sum;
+}
+
+#endif /* COGL_UTIL_HAVE_BUILTIN_POPCOUNTL */
#ifdef COGL_HAS_GLIB_SUPPORT
#define _COGL_RETURN_IF_FAIL(EXPR) g_return_if_fail(EXPR)
diff --git a/cogl/winsys/cogl-winsys-glx.c b/cogl/winsys/cogl-winsys-glx.c
index 05e2dac1..0dce4741 100644
--- a/cogl/winsys/cogl-winsys-glx.c
+++ b/cogl/winsys/cogl-winsys-glx.c
@@ -49,6 +49,7 @@
#include "cogl-onscreen-private.h"
#include "cogl-swap-chain-private.h"
#include "cogl-xlib-renderer.h"
+#include "cogl-util.h"
#include <stdlib.h>
#include <sys/types.h>
@@ -1697,22 +1698,6 @@ should_use_rectangle (CoglContext *context)
return context->rectangle_state == COGL_WINSYS_RECTANGLE_STATE_ENABLE;
}
-/* GCC's population count builtin is available since version 3.4 */
-#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
-#define POPCOUNTL(n) __builtin_popcountl(n)
-#else
-/* HAKMEM 169 */
-static int
-hakmem_popcountl (unsigned long n)
-{
- unsigned long tmp;
-
- tmp = n - ((n >> 1) & 033333333333) - ((n >> 2) & 011111111111);
- return ((tmp + (tmp >> 3)) & 030707070707) % 63;
-}
-#define POPCOUNTL(n) hakmem_popcountl(n)
-#endif
-
static gboolean
try_create_glx_pixmap (CoglContext *context,
CoglTexturePixmapX11 *tex_pixmap,
@@ -1765,7 +1750,9 @@ try_create_glx_pixmap (CoglContext *context,
* number of 1-bits in color masks against the color depth requested
* by the client.
*/
- if (POPCOUNTL(visual->red_mask|visual->green_mask|visual->blue_mask) == depth)
+ if (_cogl_util_popcountl (visual->red_mask |
+ visual->green_mask |
+ visual->blue_mask) == depth)
attribs[i++] = GLX_TEXTURE_FORMAT_RGB_EXT;
else
attribs[i++] = GLX_TEXTURE_FORMAT_RGBA_EXT;