summaryrefslogtreecommitdiff
path: root/rts/PrimOps.cmm
diff options
context:
space:
mode:
authorSimon Marlow <simonmar@microsoft.com>2007-07-27 10:41:57 +0000
committerSimon Marlow <simonmar@microsoft.com>2007-07-27 10:41:57 +0000
commit6015a94f9108a502150565577b66c23650796639 (patch)
tree20d499d1a9644c2c98374d99f511a4a1c2cb7d1d /rts/PrimOps.cmm
parent04d444716b2e5415fb8f13771e49f1192ef8c8f8 (diff)
downloadhaskell-6015a94f9108a502150565577b66c23650796639.tar.gz
Pointer Tagging
This patch implements pointer tagging as per our ICFP'07 paper "Faster laziness using dynamic pointer tagging". It improves performance by 10-15% for most workloads, including GHC itself. The original patches were by Alexey Rodriguez Yakushev <mrchebas@gmail.com>, with additions and improvements by me. I've re-recorded the development as a single patch. The basic idea is this: we use the low 2 bits of a pointer to a heap object (3 bits on a 64-bit architecture) to encode some information about the object pointed to. For a constructor, we encode the "tag" of the constructor (e.g. True vs. False), for a function closure its arity. This enables some decisions to be made without dereferencing the pointer, which speeds up some common operations. In particular it enables us to avoid costly indirect jumps in many cases. More information in the commentary: http://hackage.haskell.org/trac/ghc/wiki/Commentary/Rts/HaskellExecution/PointerTagging
Diffstat (limited to 'rts/PrimOps.cmm')
-rw-r--r--rts/PrimOps.cmm9
1 files changed, 6 insertions, 3 deletions
diff --git a/rts/PrimOps.cmm b/rts/PrimOps.cmm
index 7c75fca0e8..cb8626e5dd 100644
--- a/rts/PrimOps.cmm
+++ b/rts/PrimOps.cmm
@@ -1869,7 +1869,7 @@ unpackClosurezh_fast
// TODO: Consider the absence of ptrs or nonptrs as a special case ?
W_ info, ptrs, nptrs, p, ptrs_arr, nptrs_arr;
- info = %GET_STD_INFO(R1);
+ info = %GET_STD_INFO(UNTAG(R1));
// Some closures have non-standard layout, so we omit those here.
W_ type;
@@ -1899,6 +1899,9 @@ out:
ALLOC_PRIM (ptrs_arr_sz + nptrs_arr_sz, R1_PTR, unpackClosurezh_fast);
+ W_ clos;
+ clos = UNTAG(R1);
+
ptrs_arr = Hp - nptrs_arr_sz - ptrs_arr_sz + WDS(1);
nptrs_arr = Hp - nptrs_arr_sz + WDS(1);
@@ -1907,7 +1910,7 @@ out:
p = 0;
for:
if(p < ptrs) {
- W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(R1,p);
+ W_[ptrs_arr + SIZEOF_StgMutArrPtrs + WDS(p)] = StgClosure_payload(clos,p);
p = p + 1;
goto for;
}
@@ -1917,7 +1920,7 @@ for:
p = 0;
for2:
if(p < nptrs) {
- W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(R1, p+ptrs);
+ W_[BYTE_ARR_CTS(nptrs_arr) + WDS(p)] = StgClosure_payload(clos, p+ptrs);
p = p + 1;
goto for2;
}