summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGabor Greif <ggreif@gmail.com>2018-01-29 14:34:25 +0100
committerGabor Greif <ggreif@gmail.com>2019-04-15 17:19:03 -0400
commitbe05bd8168b0ea65d63dc0093a5c8781a2528500 (patch)
treed45ed24579c4d084c73884da9589da25b8dcf7d8
parented94d3450cbb6ec7a31d9aa37efb7fe93d0559cf (diff)
downloadhaskell-be05bd8168b0ea65d63dc0093a5c8781a2528500.tar.gz
asm-emit-time IND_STATIC elimination
When a new closure identifier is being established to a local or exported closure already emitted into the same module, refrain from adding an IND_STATIC closure, and instead emit an assembly-language alias. Inter-module IND_STATIC objects still remain, and need to be addressed by other measures. Binary-size savings on nofib are around 0.1%.
-rw-r--r--compiler/cmm/CLabel.hs138
-rw-r--r--compiler/codeGen/StgCmmBind.hs4
-rw-r--r--compiler/llvmGen/Llvm/Types.hs1
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Base.hs32
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Data.hs34
-rw-r--r--compiler/llvmGen/LlvmCodeGen/Ppr.hs2
-rw-r--r--compiler/nativeGen/PPC/Ppr.hs11
-rw-r--r--compiler/nativeGen/SPARC/Ppr.hs11
-rw-r--r--compiler/nativeGen/X86/Ppr.hs12
-rw-r--r--testsuite/tests/codeGen/should_compile/Makefile15
-rw-r--r--testsuite/tests/codeGen/should_compile/T15155.stdout2
-rw-r--r--testsuite/tests/codeGen/should_compile/T15155l.hs8
-rw-r--r--testsuite/tests/codeGen/should_compile/T15155l.stdout2
-rw-r--r--testsuite/tests/codeGen/should_compile/all.T9
14 files changed, 269 insertions, 12 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs
index bc26490700..81a226d65f 100644
--- a/compiler/cmm/CLabel.hs
+++ b/compiler/cmm/CLabel.hs
@@ -98,7 +98,7 @@ module CLabel (
needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel,
isMathFun,
isCFunctionLabel, isGcPtrLabel, labelDynamic,
- isLocalCLabel,
+ isLocalCLabel, mayRedirectTo,
-- * Conversions
toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName,
@@ -1432,3 +1432,139 @@ pprDynamicLinkerAsmLabel platform dllInfo lbl =
SymbolPtr -> text ".LC_" <> ppr lbl
GotSymbolPtr -> ppr lbl <> text "@got"
GotSymbolOffset -> ppr lbl <> text "@gotoff"
+
+-- Figure out whether `symbol` may serve as an alias
+-- to `target` within one compilation unit.
+--
+-- This is true if any of these holds:
+-- * `target` is a module-internal haskell name.
+-- * `target` is an exported name, but comes from the same
+-- module as `symbol`
+--
+-- These are sufficient conditions for establishing e.g. a
+-- GNU assembly alias ('.equiv' directive). Sadly, there is
+-- no such thing as an alias to an imported symbol (conf.
+-- http://blog.omega-prime.co.uk/2011/07/06/the-sad-state-of-symbol-aliases/)
+-- See note [emit-time elimination of static indirections].
+--
+-- Precondition is that both labels represent the
+-- same semantic value.
+
+mayRedirectTo :: CLabel -> CLabel -> Bool
+mayRedirectTo symbol target
+ | Just nam <- haskellName
+ , staticClosureLabel
+ , isExternalName nam
+ , Just mod <- nameModule_maybe nam
+ , Just anam <- hasHaskellName symbol
+ , Just amod <- nameModule_maybe anam
+ = amod == mod
+
+ | Just nam <- haskellName
+ , staticClosureLabel
+ , isInternalName nam
+ = True
+
+ | otherwise = False
+ where staticClosureLabel = isStaticClosureLabel target
+ haskellName = hasHaskellName target
+
+
+{-
+Note [emit-time elimination of static indirections]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+As described in #15155, certain static values are repesentationally
+equivalent, e.g. 'cast'ed values (when created by 'newtype' wrappers).
+
+ newtype A = A Int
+ {-# NOINLINE a #-}
+ a = A 42
+
+a1_rYB :: Int
+[GblId, Caf=NoCafRefs, Unf=OtherCon []]
+a1_rYB = GHC.Types.I# 42#
+
+a [InlPrag=NOINLINE] :: A
+[GblId, Unf=OtherCon []]
+a = a1_rYB `cast` (Sym (T15155.N:A[0]) :: Int ~R# A)
+
+Formerly we created static indirections for these (IND_STATIC), which
+consist of a statically allocated forwarding closure that contains
+the (possibly tagged) indirectee. (See CMM/assembly below.)
+This approach is suboptimal for two reasons:
+ (a) they occupy extra space,
+ (b) they need to be entered in order to obtain the indirectee,
+ thus they cannot be tagged.
+
+Fortunately there is a common case where static indirections can be
+eliminated while emitting assembly (native or LLVM), viz. when the
+indirectee is in the same module (object file) as the symbol that
+points to it. In this case an assembly-level identification can
+be created ('.equiv' directive), and as such the same object will
+be assigned two names in the symbol table. Any of the identified
+symbols can be referenced by a tagged pointer.
+
+Currently the 'mayRedirectTo' predicate will
+give a clue whether a label can be equated with another, already
+emitted, label (which can in turn be an alias). The general mechanics
+is that we identify data (IND_STATIC closures) that are amenable
+to aliasing while pretty-printing of assembly output, and emit the
+'.equiv' directive instead of static data in such a case.
+
+Here is a sketch how the output is massaged:
+
+ Consider
+newtype A = A Int
+{-# NOINLINE a #-}
+a = A 42 -- I# 42# is the indirectee
+ -- 'a' is exported
+
+ results in STG
+
+a1_rXq :: GHC.Types.Int
+[GblId, Caf=NoCafRefs, Unf=OtherCon []] =
+ CCS_DONT_CARE GHC.Types.I#! [42#];
+
+T15155.a [InlPrag=NOINLINE] :: T15155.A
+[GblId, Unf=OtherCon []] =
+ CAF_ccs \ u [] a1_rXq;
+
+ and CMM
+
+[section ""data" . a1_rXq_closure" {
+ a1_rXq_closure:
+ const GHC.Types.I#_con_info;
+ const 42;
+ }]
+
+[section ""data" . T15155.a_closure" {
+ T15155.a_closure:
+ const stg_IND_STATIC_info;
+ const a1_rXq_closure+1;
+ const 0;
+ const 0;
+ }]
+
+The emitted assembly is
+
+#### INDIRECTEE
+a1_rXq_closure: -- module local haskell value
+ .quad GHC.Types.I#_con_info -- an Int
+ .quad 42
+
+#### BEFORE
+.globl T15155.a_closure -- exported newtype wrapped value
+T15155.a_closure:
+ .quad stg_IND_STATIC_info -- the closure info
+ .quad a1_rXq_closure+1 -- indirectee ('+1' being the tag)
+ .quad 0
+ .quad 0
+
+#### AFTER
+.globl T15155.a_closure -- exported newtype wrapped value
+.equiv a1_rXq_closure,T15155.a_closure -- both are shared
+
+The transformation is performed because
+ T15155.a_closure `mayRedirectTo` a1_rXq_closure+1
+returns True.
+-}
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index d134dfd677..f8bdc0d37e 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -78,7 +78,9 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body =
-- closure pointing directly to the indirectee. This is exactly
-- what the CAF will eventually evaluate to anyway, we're just
-- shortcutting the whole process, and generating a lot less code
- -- (#7308)
+ -- (#7308). Eventually the IND_STATIC closure will be eliminated
+ -- by assembly '.equiv' directives, where possible (#15155).
+ -- See note [emit-time elimination of static indirections] in CLabel.
--
-- Note: we omit the optimisation when this binding is part of a
-- recursive group, because the optimisation would inhibit the black
diff --git a/compiler/llvmGen/Llvm/Types.hs b/compiler/llvmGen/Llvm/Types.hs
index 9bcceb599d..6e349d813f 100644
--- a/compiler/llvmGen/Llvm/Types.hs
+++ b/compiler/llvmGen/Llvm/Types.hs
@@ -185,6 +185,7 @@ pprSpecialStatic :: LlvmStatic -> SDoc
pprSpecialStatic (LMBitc v t) =
ppr (pLower t) <> text ", bitcast (" <> ppr v <> text " to " <> ppr t
<> char ')'
+pprSpecialStatic v@(LMStaticPointer x) = ppr (pLower $ getVarType x) <> comma <+> ppr v
pprSpecialStatic stat = ppr stat
diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs
index 5ebb7b3830..15101c82ee 100644
--- a/compiler/llvmGen/LlvmCodeGen/Base.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Base.hs
@@ -31,7 +31,7 @@ module LlvmCodeGen.Base (
strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm,
getGlobalPtr, generateExternDecls,
- aliasify,
+ aliasify, llvmDefLabel
) where
#include "HsVersions.h"
@@ -57,6 +57,7 @@ import UniqSupply
import ErrUtils
import qualified Stream
+import Data.Maybe (fromJust)
import Control.Monad (ap)
-- ----------------------------------------------------------------------------
@@ -376,7 +377,7 @@ ghcInternalFunctions = do
mk "newSpark" (llvmWord dflags) [i8Ptr, i8Ptr]
where
mk n ret args = do
- let n' = fsLit n `appendFS` fsLit "$def"
+ let n' = llvmDefLabel $ fsLit n
decl = LlvmFunctionDecl n' ExternallyVisible CC_Ccc ret
FixedArgs (tysToParams args) Nothing
renderLlvm $ ppLlvmFunctionDecl decl
@@ -436,12 +437,17 @@ getGlobalPtr llvmLbl = do
let mkGlbVar lbl ty = LMGlobalVar lbl (LMPointer ty) Private Nothing Nothing
case m_ty of
-- Directly reference if we have seen it already
- Just ty -> return $ mkGlbVar (llvmLbl `appendFS` fsLit "$def") ty Global
+ Just ty -> return $ mkGlbVar (llvmDefLabel llvmLbl) ty Global
-- Otherwise use a forward alias of it
Nothing -> do
saveAlias llvmLbl
return $ mkGlbVar llvmLbl i8 Alias
+-- | Derive the definition label. It has an identified
+-- structure type.
+llvmDefLabel :: LMString -> LMString
+llvmDefLabel = (`appendFS` fsLit "$def")
+
-- | Generate definitions for aliases forward-referenced by @getGlobalPtr@.
--
-- Must be called at a point where we are sure that no new global definitions
@@ -472,10 +478,28 @@ generateExternDecls = do
-- | Here we take a global variable definition, rename it with a
-- @$def@ suffix, and generate the appropriate alias.
aliasify :: LMGlobal -> LlvmM [LMGlobal]
+-- See note [emit-time elimination of static indirections] in CLabel.
+-- Here we obtain the indirectee's precise type and introduce
+-- fresh aliases to both the precise typed label (lbl$def) and the i8*
+-- typed (regular) label of it with the matching new names.
+aliasify (LMGlobal (LMGlobalVar lbl ty@LMAlias{} link sect align Alias)
+ (Just orig)) = do
+ let defLbl = llvmDefLabel lbl
+ LMStaticPointer (LMGlobalVar origLbl _ oLnk Nothing Nothing Alias) = orig
+ defOrigLbl = llvmDefLabel origLbl
+ orig' = LMStaticPointer (LMGlobalVar origLbl i8Ptr oLnk Nothing Nothing Alias)
+ origType <- funLookup origLbl
+ let defOrig = LMBitc (LMStaticPointer (LMGlobalVar defOrigLbl
+ (pLift $ fromJust origType) oLnk
+ Nothing Nothing Alias))
+ (pLift ty)
+ pure [ LMGlobal (LMGlobalVar defLbl ty link sect align Alias) (Just defOrig)
+ , LMGlobal (LMGlobalVar lbl i8Ptr link sect align Alias) (Just orig')
+ ]
aliasify (LMGlobal var val) = do
let LMGlobalVar lbl ty link sect align const = var
- defLbl = lbl `appendFS` fsLit "$def"
+ defLbl = llvmDefLabel lbl
defVar = LMGlobalVar defLbl ty Internal sect align const
defPtrVar = LMGlobalVar defLbl (LMPointer ty) link Nothing Nothing const
diff --git a/compiler/llvmGen/LlvmCodeGen/Data.hs b/compiler/llvmGen/LlvmCodeGen/Data.hs
index cabfe76762..3651a88cc6 100644
--- a/compiler/llvmGen/LlvmCodeGen/Data.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Data.hs
@@ -32,12 +32,41 @@ import qualified Data.ByteString as BS
structStr :: LMString
structStr = fsLit "_struct"
+-- | The LLVM visibility of the label
+linkage :: CLabel -> LlvmLinkageType
+linkage lbl = if externallyVisibleCLabel lbl
+ then ExternallyVisible else Internal
+
-- ----------------------------------------------------------------------------
-- * Top level
--
-- | Pass a CmmStatic section to an equivalent Llvm code.
genLlvmData :: (Section, CmmStatics) -> LlvmM LlvmData
+-- See note [emit-time elimination of static indirections] in CLabel.
+genLlvmData (_, Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
+ | lbl == mkIndStaticInfoLabel
+ , let labelInd (CmmLabelOff l _) = Just l
+ labelInd (CmmLabel l) = Just l
+ labelInd _ = Nothing
+ , Just ind' <- labelInd ind
+ , alias `mayRedirectTo` ind' = do
+ label <- strCLabel_llvm alias
+ label' <- strCLabel_llvm ind'
+ let link = linkage alias
+ link' = linkage ind'
+ -- the LLVM type we give the alias is an empty struct type
+ -- but it doesn't really matter, as the pointer is only
+ -- used for (bit/int)casting.
+ tyAlias = LMAlias (label `appendFS` structStr, LMStructU [])
+
+ aliasDef = LMGlobalVar label tyAlias link Nothing Nothing Alias
+ -- we don't know the type of the indirectee here
+ indType = panic "will be filled by 'aliasify', later"
+ orig = LMStaticPointer $ LMGlobalVar label' indType link' Nothing Nothing Alias
+
+ pure ([LMGlobal aliasDef $ Just orig], [tyAlias])
+
genLlvmData (sec, Statics lbl xs) = do
label <- strCLabel_llvm lbl
static <- mapM genData xs
@@ -45,11 +74,10 @@ genLlvmData (sec, Statics lbl xs) = do
let types = map getStatType static
strucTy = LMStruct types
- tyAlias = LMAlias ((label `appendFS` structStr), strucTy)
+ tyAlias = LMAlias (label `appendFS` structStr, strucTy)
struct = Just $ LMStaticStruc static tyAlias
- link = if (externallyVisibleCLabel lbl)
- then ExternallyVisible else Internal
+ link = linkage lbl
align = case sec of
Section CString _ -> Just 1
_ -> Nothing
diff --git a/compiler/llvmGen/LlvmCodeGen/Ppr.hs b/compiler/llvmGen/LlvmCodeGen/Ppr.hs
index c1378aa1fd..3f29133e59 100644
--- a/compiler/llvmGen/LlvmCodeGen/Ppr.hs
+++ b/compiler/llvmGen/LlvmCodeGen/Ppr.hs
@@ -71,7 +71,7 @@ pprLlvmCmmDecl (CmmProc mb_info entry_lbl live (ListGraph blks))
let fun = LlvmFunction funDec funArgs llvmStdFunAttrs funSect
prefix lmblocks
name = decName $ funcDecl fun
- defName = name `appendFS` fsLit "$def"
+ defName = llvmDefLabel name
funcDecl' = (funcDecl fun) { decName = defName }
fun' = fun { funcDecl = funcDecl' }
funTy = LMFunction funcDecl'
diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs
index 8e9ff95b9f..be6402e57a 100644
--- a/compiler/nativeGen/PPC/Ppr.hs
+++ b/compiler/nativeGen/PPC/Ppr.hs
@@ -27,6 +27,7 @@ import Hoopl.Label
import BlockId
import CLabel
+import PprCmmExpr ()
import Unique ( pprUniqueAlways, getUnique )
import Platform
@@ -119,6 +120,16 @@ pprBasicBlock info_env (BasicBlock blockid instrs)
pprDatas :: CmmStatics -> SDoc
+-- See note [emit-time elimination of static indirections] in CLabel.
+pprDatas (Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
+ | lbl == mkIndStaticInfoLabel
+ , let labelInd (CmmLabelOff l _) = Just l
+ labelInd (CmmLabel l) = Just l
+ labelInd _ = Nothing
+ , Just ind' <- labelInd ind
+ , alias `mayRedirectTo` ind'
+ = pprGloblDecl alias
+ $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind')
pprDatas (Statics lbl dats) = vcat (pprLabel lbl : map pprData dats)
pprData :: CmmStatic -> SDoc
diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs
index 6b441819a6..0619956f39 100644
--- a/compiler/nativeGen/SPARC/Ppr.hs
+++ b/compiler/nativeGen/SPARC/Ppr.hs
@@ -102,6 +102,16 @@ pprBasicBlock info_env (BasicBlock blockid instrs)
pprDatas :: CmmStatics -> SDoc
+-- See note [emit-time elimination of static indirections] in CLabel.
+pprDatas (Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
+ | lbl == mkIndStaticInfoLabel
+ , let labelInd (CmmLabelOff l _) = Just l
+ labelInd (CmmLabel l) = Just l
+ labelInd _ = Nothing
+ , Just ind' <- labelInd ind
+ , alias `mayRedirectTo` ind'
+ = pprGloblDecl alias
+ $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind')
pprDatas (Statics lbl dats) = vcat (pprLabel lbl : map pprData dats)
pprData :: CmmStatic -> SDoc
@@ -634,4 +644,3 @@ pp_comma_lbracket = text ",["
pp_comma_a :: SDoc
pp_comma_a = text ",a"
-
diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs
index ccfaff4753..bf28d29be9 100644
--- a/compiler/nativeGen/X86/Ppr.hs
+++ b/compiler/nativeGen/X86/Ppr.hs
@@ -145,7 +145,19 @@ pprBasicBlock info_env (BasicBlock blockid instrs)
(l@LOCATION{} : _) -> pprInstr l
_other -> empty
+
pprDatas :: (Alignment, CmmStatics) -> SDoc
+-- See note [emit-time elimination of static indirections] in CLabel.
+pprDatas (_, Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _])
+ | lbl == mkIndStaticInfoLabel
+ , let labelInd (CmmLabelOff l _) = Just l
+ labelInd (CmmLabel l) = Just l
+ labelInd _ = Nothing
+ , Just ind' <- labelInd ind
+ , alias `mayRedirectTo` ind'
+ = pprGloblDecl alias
+ $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind')
+
pprDatas (align, (Statics lbl dats))
= vcat (pprAlign align : pprLabel lbl : map pprData dats)
diff --git a/testsuite/tests/codeGen/should_compile/Makefile b/testsuite/tests/codeGen/should_compile/Makefile
index c0729443c9..8aa7917751 100644
--- a/testsuite/tests/codeGen/should_compile/Makefile
+++ b/testsuite/tests/codeGen/should_compile/Makefile
@@ -43,3 +43,18 @@ T15723:
'$(TEST_HC)' $(TEST_HC_OPTS) -prof -fPIC -fexternal-dynamic-refs -fforce-recomp -O2 -c T15723A.hs -o T15723A.o
'$(TEST_HC)' $(TEST_HC_OPTS) -prof -fPIC -fexternal-dynamic-refs -fforce-recomp -O2 -c T15723B.hs -o T15723B.o
'$(TEST_HC)' $(TEST_HC_OPTS) -dynamic -shared T15723B.o -o T15723B.so
+
+## check that there are two assembly equates
+# mentioning T15155.a_closure (def and use)
+T15155:
+ '$(TEST_HC)' $(TEST_HC_OPTS) -c -O0 -ddump-asm T15155l.hs | grep -F ".equiv " \
+ | grep -F "T15155.a_closure" | wc -l | sed -e 's/ *//g' | grep "2" ; echo $$?
+
+## check that there are two "$def" aliases:
+# - one that bitcasts to %T15155_a_closure_struct*
+# - and the other which bitcasts from %T15155_a_closure_struct*
+##
+T15155l:
+ '$(TEST_HC)' $(TEST_HC_OPTS) -c -O0 -ddump-llvm T15155l.hs 2>/dev/null \
+ | grep -F "= alias %T15155_" | grep -E "@T15155_[ab]_closure.def = " | grep -F "%T15155_a_closure_struct*" \
+ | wc -l | sed -e 's/ *//g' | grep "2"; echo $$?
diff --git a/testsuite/tests/codeGen/should_compile/T15155.stdout b/testsuite/tests/codeGen/should_compile/T15155.stdout
new file mode 100644
index 0000000000..389e262145
--- /dev/null
+++ b/testsuite/tests/codeGen/should_compile/T15155.stdout
@@ -0,0 +1,2 @@
+2
+0
diff --git a/testsuite/tests/codeGen/should_compile/T15155l.hs b/testsuite/tests/codeGen/should_compile/T15155l.hs
new file mode 100644
index 0000000000..643610bc06
--- /dev/null
+++ b/testsuite/tests/codeGen/should_compile/T15155l.hs
@@ -0,0 +1,8 @@
+module T15155 (a, B(..), b) where
+
+newtype A = A Int
+newtype B = B A
+
+{-# NOINLINE a #-}
+a = A 42
+b = B a
diff --git a/testsuite/tests/codeGen/should_compile/T15155l.stdout b/testsuite/tests/codeGen/should_compile/T15155l.stdout
new file mode 100644
index 0000000000..389e262145
--- /dev/null
+++ b/testsuite/tests/codeGen/should_compile/T15155l.stdout
@@ -0,0 +1,2 @@
+2
+0
diff --git a/testsuite/tests/codeGen/should_compile/all.T b/testsuite/tests/codeGen/should_compile/all.T
index 45924efc33..547814a0d6 100644
--- a/testsuite/tests/codeGen/should_compile/all.T
+++ b/testsuite/tests/codeGen/should_compile/all.T
@@ -44,7 +44,7 @@ test('T14999',
# Verify that we optimize away redundant jumps for unordered comparisons.
test('T15196',
- [ unless(arch('x86_64'),skip),
+ [ unless(arch('x86_64'), skip),
only_ways('normal'),
], makefile_test, [])
@@ -52,3 +52,10 @@ test('T15723',
[ unless(have_profiling(), skip),
unless(have_dynamic(), skip),
], makefile_test, [])
+
+test('T15155',
+ [ unless(have_ncg(), skip)
+ ], makefile_test, [])
+
+test('T15155l', when(unregisterised(), skip),
+ makefile_test, [])