summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
authorSimon Marlow <marlowsd@gmail.com>2016-04-23 22:14:43 +0100
committerSimon Marlow <marlowsd@gmail.com>2016-04-26 16:00:43 +0100
commit24864ba5587c1a0447beabae90529e8bb4fa117a (patch)
treefe0ecf67674cc1a98d6aa88f6947ef8798ee48b0 /rts
parentd396996298939f647c22b547bc01f1b00e6e2fd9 (diff)
downloadhaskell-24864ba5587c1a0447beabae90529e8bb4fa117a.tar.gz
Use __builtin_clz() to implement log_2()
A microoptimisation in the block allocator.
Diffstat (limited to 'rts')
-rw-r--r--rts/sm/BlockAlloc.c32
1 files changed, 21 insertions, 11 deletions
diff --git a/rts/sm/BlockAlloc.c b/rts/sm/BlockAlloc.c
index a633726935..1c83de9ded 100644
--- a/rts/sm/BlockAlloc.c
+++ b/rts/sm/BlockAlloc.c
@@ -199,31 +199,41 @@ initGroup(bdescr *head)
}
}
-// There are quicker non-loopy ways to do log_2, but we expect n to be
-// usually small, and MAX_FREE_LIST is also small, so the loop version
-// might well be the best choice here.
+// log base 2 (floor), needs to support up to 2^MAX_FREE_LIST
STATIC_INLINE nat
-log_2_ceil(W_ n)
+log_2(W_ n)
{
+#if defined(__GNUC__)
+ return __builtin_clzl(n) ^ (sizeof(StgWord)*8 - 1);
+ // generates good code on x86. __builtin_clz() compiles to bsr+xor, but
+ // we want just bsr, so the xor here cancels out gcc's xor.
+#else
W_ i, x;
- x = 1;
+ x = n;
for (i=0; i < MAX_FREE_LIST; i++) {
- if (x >= n) return i;
- x = x << 1;
+ x = x >> 1;
+ if (x == 0) return i;
}
return MAX_FREE_LIST;
+#endif
}
+// log base 2 (ceiling), needs to support up to 2^MAX_FREE_LIST
STATIC_INLINE nat
-log_2(W_ n)
+log_2_ceil(W_ n)
{
+#if defined(__GNUC__)
+ nat r = log_2(n);
+ return (n & (n-1)) ? r+1 : r;
+#else
W_ i, x;
- x = n;
+ x = 1;
for (i=0; i < MAX_FREE_LIST; i++) {
- x = x >> 1;
- if (x == 0) return i;
+ if (x >= n) return i;
+ x = x << 1;
}
return MAX_FREE_LIST;
+#endif
}
STATIC_INLINE void