summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorHerbert Valerio Riedel <hvr@gnu.org>2018-01-26 13:07:17 -0500
committerBen Gamari <ben@smart-cactus.org>2018-01-26 14:37:28 -0500
commit31c260f3967d2c06063c962a98475058daa45c6d (patch)
tree0c2ce731c6e84f62d0f8b2ef7ee5d37bc3ffafec /compiler
parente7dcc7085315ea8ebc2d6808fde2d9c37fd10c67 (diff)
downloadhaskell-31c260f3967d2c06063c962a98475058daa45c6d.tar.gz
Add ptr-eq short-cut to `compareByteArrays#` primitive
This is an obvious optimisation whose overhead is neglectable but which significantly simplifies the common uses of `compareByteArrays#` which would otherwise require to make *careful* use of `reallyUnsafePtrEquality#` or (equally fragile) `byteArrayContents#` which can result in less optimal assembler code being generated. Test Plan: carefully examined generated cmm/asm code; validate via phab Reviewers: alexbiehl, bgamari, simonmar Reviewed By: bgamari, simonmar Subscribers: rwbarton, thomie, carter Differential Revision: https://phabricator.haskell.org/D4319
Diffstat (limited to 'compiler')
-rw-r--r--compiler/codeGen/StgCmmPrim.hs43
1 files changed, 43 insertions, 0 deletions
diff --git a/compiler/codeGen/StgCmmPrim.hs b/compiler/codeGen/StgCmmPrim.hs
index 948af2aba0..8ec132b1d3 100644
--- a/compiler/codeGen/StgCmmPrim.hs
+++ b/compiler/codeGen/StgCmmPrim.hs
@@ -1746,8 +1746,51 @@ doCompareByteArraysOp res ba1 ba1_off ba2 ba2_off n = do
dflags <- getDynFlags
ba1_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba1 (arrWordsHdrSize dflags)) ba1_off
ba2_p <- assignTempE $ cmmOffsetExpr dflags (cmmOffsetB dflags ba2 (arrWordsHdrSize dflags)) ba2_off
+
+ -- short-cut in case of equal pointers avoiding a costly
+ -- subroutine call to the memcmp(3) routine; the Cmm logic below
+ -- results in assembly code being generated for
+ --
+ -- cmpPrefix10 :: ByteArray# -> ByteArray# -> Int#
+ -- cmpPrefix10 ba1 ba2 = compareByteArrays# ba1 0# ba2 0# 10#
+ --
+ -- that looks like
+ --
+ -- leaq 16(%r14),%rax
+ -- leaq 16(%rsi),%rbx
+ -- xorl %ecx,%ecx
+ -- cmpq %rbx,%rax
+ -- je l_ptr_eq
+ --
+ -- ; NB: the common case (unequal pointers) falls-through
+ -- ; the conditional jump, and therefore matches the
+ -- ; usual static branch prediction convention of modern cpus
+ --
+ -- subq $8,%rsp
+ -- movq %rbx,%rsi
+ -- movq %rax,%rdi
+ -- movl $10,%edx
+ -- xorl %eax,%eax
+ -- call memcmp
+ -- addq $8,%rsp
+ -- movslq %eax,%rax
+ -- movq %rax,%rcx
+ -- l_ptr_eq:
+ -- movq %rcx,%rbx
+ -- jmp *(%rbp)
+
+ l_ptr_eq <- newBlockId
+ l_ptr_ne <- newBlockId
+
+ emit (mkAssign (CmmLocal res) (zeroExpr dflags))
+ emit (mkCbranch (cmmEqWord dflags ba1_p ba2_p)
+ l_ptr_eq l_ptr_ne (Just False))
+
+ emitLabel l_ptr_ne
emitMemcmpCall res ba1_p ba2_p n 1
+ emitLabel l_ptr_eq
+
-- ----------------------------------------------------------------------------
-- Copying byte arrays