diff options
author | Gabor Greif <ggreif@gmail.com> | 2018-01-29 14:34:25 +0100 |
---|---|---|
committer | Gabor Greif <ggreif@gmail.com> | 2019-04-15 17:19:03 -0400 |
commit | be05bd8168b0ea65d63dc0093a5c8781a2528500 (patch) | |
tree | d45ed24579c4d084c73884da9589da25b8dcf7d8 | |
parent | ed94d3450cbb6ec7a31d9aa37efb7fe93d0559cf (diff) | |
download | haskell-be05bd8168b0ea65d63dc0093a5c8781a2528500.tar.gz |
asm-emit-time IND_STATIC elimination
When a new closure identifier is being established to a
local or exported closure already emitted into the same
module, refrain from adding an IND_STATIC closure, and
instead emit an assembly-language alias.
Inter-module IND_STATIC objects still remain, and need to be
addressed by other measures.
Binary-size savings on nofib are around 0.1%.
-rw-r--r-- | compiler/cmm/CLabel.hs | 138 | ||||
-rw-r--r-- | compiler/codeGen/StgCmmBind.hs | 4 | ||||
-rw-r--r-- | compiler/llvmGen/Llvm/Types.hs | 1 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Base.hs | 32 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Data.hs | 34 | ||||
-rw-r--r-- | compiler/llvmGen/LlvmCodeGen/Ppr.hs | 2 | ||||
-rw-r--r-- | compiler/nativeGen/PPC/Ppr.hs | 11 | ||||
-rw-r--r-- | compiler/nativeGen/SPARC/Ppr.hs | 11 | ||||
-rw-r--r-- | compiler/nativeGen/X86/Ppr.hs | 12 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/Makefile | 15 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/T15155.stdout | 2 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/T15155l.hs | 8 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/T15155l.stdout | 2 | ||||
-rw-r--r-- | testsuite/tests/codeGen/should_compile/all.T | 9 |
14 files changed, 269 insertions, 12 deletions
diff --git a/compiler/cmm/CLabel.hs b/compiler/cmm/CLabel.hs index bc26490700..81a226d65f 100644 --- a/compiler/cmm/CLabel.hs +++ b/compiler/cmm/CLabel.hs @@ -98,7 +98,7 @@ module CLabel ( needsCDecl, maybeLocalBlockLabel, externallyVisibleCLabel, isMathFun, isCFunctionLabel, isGcPtrLabel, labelDynamic, - isLocalCLabel, + isLocalCLabel, mayRedirectTo, -- * Conversions toClosureLbl, toSlowEntryLbl, toEntryLbl, toInfoLbl, hasHaskellName, @@ -1432,3 +1432,139 @@ pprDynamicLinkerAsmLabel platform dllInfo lbl = SymbolPtr -> text ".LC_" <> ppr lbl GotSymbolPtr -> ppr lbl <> text "@got" GotSymbolOffset -> ppr lbl <> text "@gotoff" + +-- Figure out whether `symbol` may serve as an alias +-- to `target` within one compilation unit. +-- +-- This is true if any of these holds: +-- * `target` is a module-internal haskell name. +-- * `target` is an exported name, but comes from the same +-- module as `symbol` +-- +-- These are sufficient conditions for establishing e.g. a +-- GNU assembly alias ('.equiv' directive). Sadly, there is +-- no such thing as an alias to an imported symbol (conf. +-- http://blog.omega-prime.co.uk/2011/07/06/the-sad-state-of-symbol-aliases/) +-- See note [emit-time elimination of static indirections]. +-- +-- Precondition is that both labels represent the +-- same semantic value. + +mayRedirectTo :: CLabel -> CLabel -> Bool +mayRedirectTo symbol target + | Just nam <- haskellName + , staticClosureLabel + , isExternalName nam + , Just mod <- nameModule_maybe nam + , Just anam <- hasHaskellName symbol + , Just amod <- nameModule_maybe anam + = amod == mod + + | Just nam <- haskellName + , staticClosureLabel + , isInternalName nam + = True + + | otherwise = False + where staticClosureLabel = isStaticClosureLabel target + haskellName = hasHaskellName target + + +{- +Note [emit-time elimination of static indirections] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +As described in #15155, certain static values are repesentationally +equivalent, e.g. 'cast'ed values (when created by 'newtype' wrappers). + + newtype A = A Int + {-# NOINLINE a #-} + a = A 42 + +a1_rYB :: Int +[GblId, Caf=NoCafRefs, Unf=OtherCon []] +a1_rYB = GHC.Types.I# 42# + +a [InlPrag=NOINLINE] :: A +[GblId, Unf=OtherCon []] +a = a1_rYB `cast` (Sym (T15155.N:A[0]) :: Int ~R# A) + +Formerly we created static indirections for these (IND_STATIC), which +consist of a statically allocated forwarding closure that contains +the (possibly tagged) indirectee. (See CMM/assembly below.) +This approach is suboptimal for two reasons: + (a) they occupy extra space, + (b) they need to be entered in order to obtain the indirectee, + thus they cannot be tagged. + +Fortunately there is a common case where static indirections can be +eliminated while emitting assembly (native or LLVM), viz. when the +indirectee is in the same module (object file) as the symbol that +points to it. In this case an assembly-level identification can +be created ('.equiv' directive), and as such the same object will +be assigned two names in the symbol table. Any of the identified +symbols can be referenced by a tagged pointer. + +Currently the 'mayRedirectTo' predicate will +give a clue whether a label can be equated with another, already +emitted, label (which can in turn be an alias). The general mechanics +is that we identify data (IND_STATIC closures) that are amenable +to aliasing while pretty-printing of assembly output, and emit the +'.equiv' directive instead of static data in such a case. + +Here is a sketch how the output is massaged: + + Consider +newtype A = A Int +{-# NOINLINE a #-} +a = A 42 -- I# 42# is the indirectee + -- 'a' is exported + + results in STG + +a1_rXq :: GHC.Types.Int +[GblId, Caf=NoCafRefs, Unf=OtherCon []] = + CCS_DONT_CARE GHC.Types.I#! [42#]; + +T15155.a [InlPrag=NOINLINE] :: T15155.A +[GblId, Unf=OtherCon []] = + CAF_ccs \ u [] a1_rXq; + + and CMM + +[section ""data" . a1_rXq_closure" { + a1_rXq_closure: + const GHC.Types.I#_con_info; + const 42; + }] + +[section ""data" . T15155.a_closure" { + T15155.a_closure: + const stg_IND_STATIC_info; + const a1_rXq_closure+1; + const 0; + const 0; + }] + +The emitted assembly is + +#### INDIRECTEE +a1_rXq_closure: -- module local haskell value + .quad GHC.Types.I#_con_info -- an Int + .quad 42 + +#### BEFORE +.globl T15155.a_closure -- exported newtype wrapped value +T15155.a_closure: + .quad stg_IND_STATIC_info -- the closure info + .quad a1_rXq_closure+1 -- indirectee ('+1' being the tag) + .quad 0 + .quad 0 + +#### AFTER +.globl T15155.a_closure -- exported newtype wrapped value +.equiv a1_rXq_closure,T15155.a_closure -- both are shared + +The transformation is performed because + T15155.a_closure `mayRedirectTo` a1_rXq_closure+1 +returns True. +-} diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs index d134dfd677..f8bdc0d37e 100644 --- a/compiler/codeGen/StgCmmBind.hs +++ b/compiler/codeGen/StgCmmBind.hs @@ -78,7 +78,9 @@ cgTopRhsClosure dflags rec id ccs upd_flag args body = -- closure pointing directly to the indirectee. This is exactly -- what the CAF will eventually evaluate to anyway, we're just -- shortcutting the whole process, and generating a lot less code - -- (#7308) + -- (#7308). Eventually the IND_STATIC closure will be eliminated + -- by assembly '.equiv' directives, where possible (#15155). + -- See note [emit-time elimination of static indirections] in CLabel. -- -- Note: we omit the optimisation when this binding is part of a -- recursive group, because the optimisation would inhibit the black diff --git a/compiler/llvmGen/Llvm/Types.hs b/compiler/llvmGen/Llvm/Types.hs index 9bcceb599d..6e349d813f 100644 --- a/compiler/llvmGen/Llvm/Types.hs +++ b/compiler/llvmGen/Llvm/Types.hs @@ -185,6 +185,7 @@ pprSpecialStatic :: LlvmStatic -> SDoc pprSpecialStatic (LMBitc v t) = ppr (pLower t) <> text ", bitcast (" <> ppr v <> text " to " <> ppr t <> char ')' +pprSpecialStatic v@(LMStaticPointer x) = ppr (pLower $ getVarType x) <> comma <+> ppr v pprSpecialStatic stat = ppr stat diff --git a/compiler/llvmGen/LlvmCodeGen/Base.hs b/compiler/llvmGen/LlvmCodeGen/Base.hs index 5ebb7b3830..15101c82ee 100644 --- a/compiler/llvmGen/LlvmCodeGen/Base.hs +++ b/compiler/llvmGen/LlvmCodeGen/Base.hs @@ -31,7 +31,7 @@ module LlvmCodeGen.Base ( strCLabel_llvm, strDisplayName_llvm, strProcedureName_llvm, getGlobalPtr, generateExternDecls, - aliasify, + aliasify, llvmDefLabel ) where #include "HsVersions.h" @@ -57,6 +57,7 @@ import UniqSupply import ErrUtils import qualified Stream +import Data.Maybe (fromJust) import Control.Monad (ap) -- ---------------------------------------------------------------------------- @@ -376,7 +377,7 @@ ghcInternalFunctions = do mk "newSpark" (llvmWord dflags) [i8Ptr, i8Ptr] where mk n ret args = do - let n' = fsLit n `appendFS` fsLit "$def" + let n' = llvmDefLabel $ fsLit n decl = LlvmFunctionDecl n' ExternallyVisible CC_Ccc ret FixedArgs (tysToParams args) Nothing renderLlvm $ ppLlvmFunctionDecl decl @@ -436,12 +437,17 @@ getGlobalPtr llvmLbl = do let mkGlbVar lbl ty = LMGlobalVar lbl (LMPointer ty) Private Nothing Nothing case m_ty of -- Directly reference if we have seen it already - Just ty -> return $ mkGlbVar (llvmLbl `appendFS` fsLit "$def") ty Global + Just ty -> return $ mkGlbVar (llvmDefLabel llvmLbl) ty Global -- Otherwise use a forward alias of it Nothing -> do saveAlias llvmLbl return $ mkGlbVar llvmLbl i8 Alias +-- | Derive the definition label. It has an identified +-- structure type. +llvmDefLabel :: LMString -> LMString +llvmDefLabel = (`appendFS` fsLit "$def") + -- | Generate definitions for aliases forward-referenced by @getGlobalPtr@. -- -- Must be called at a point where we are sure that no new global definitions @@ -472,10 +478,28 @@ generateExternDecls = do -- | Here we take a global variable definition, rename it with a -- @$def@ suffix, and generate the appropriate alias. aliasify :: LMGlobal -> LlvmM [LMGlobal] +-- See note [emit-time elimination of static indirections] in CLabel. +-- Here we obtain the indirectee's precise type and introduce +-- fresh aliases to both the precise typed label (lbl$def) and the i8* +-- typed (regular) label of it with the matching new names. +aliasify (LMGlobal (LMGlobalVar lbl ty@LMAlias{} link sect align Alias) + (Just orig)) = do + let defLbl = llvmDefLabel lbl + LMStaticPointer (LMGlobalVar origLbl _ oLnk Nothing Nothing Alias) = orig + defOrigLbl = llvmDefLabel origLbl + orig' = LMStaticPointer (LMGlobalVar origLbl i8Ptr oLnk Nothing Nothing Alias) + origType <- funLookup origLbl + let defOrig = LMBitc (LMStaticPointer (LMGlobalVar defOrigLbl + (pLift $ fromJust origType) oLnk + Nothing Nothing Alias)) + (pLift ty) + pure [ LMGlobal (LMGlobalVar defLbl ty link sect align Alias) (Just defOrig) + , LMGlobal (LMGlobalVar lbl i8Ptr link sect align Alias) (Just orig') + ] aliasify (LMGlobal var val) = do let LMGlobalVar lbl ty link sect align const = var - defLbl = lbl `appendFS` fsLit "$def" + defLbl = llvmDefLabel lbl defVar = LMGlobalVar defLbl ty Internal sect align const defPtrVar = LMGlobalVar defLbl (LMPointer ty) link Nothing Nothing const diff --git a/compiler/llvmGen/LlvmCodeGen/Data.hs b/compiler/llvmGen/LlvmCodeGen/Data.hs index cabfe76762..3651a88cc6 100644 --- a/compiler/llvmGen/LlvmCodeGen/Data.hs +++ b/compiler/llvmGen/LlvmCodeGen/Data.hs @@ -32,12 +32,41 @@ import qualified Data.ByteString as BS structStr :: LMString structStr = fsLit "_struct" +-- | The LLVM visibility of the label +linkage :: CLabel -> LlvmLinkageType +linkage lbl = if externallyVisibleCLabel lbl + then ExternallyVisible else Internal + -- ---------------------------------------------------------------------------- -- * Top level -- -- | Pass a CmmStatic section to an equivalent Llvm code. genLlvmData :: (Section, CmmStatics) -> LlvmM LlvmData +-- See note [emit-time elimination of static indirections] in CLabel. +genLlvmData (_, Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) + | lbl == mkIndStaticInfoLabel + , let labelInd (CmmLabelOff l _) = Just l + labelInd (CmmLabel l) = Just l + labelInd _ = Nothing + , Just ind' <- labelInd ind + , alias `mayRedirectTo` ind' = do + label <- strCLabel_llvm alias + label' <- strCLabel_llvm ind' + let link = linkage alias + link' = linkage ind' + -- the LLVM type we give the alias is an empty struct type + -- but it doesn't really matter, as the pointer is only + -- used for (bit/int)casting. + tyAlias = LMAlias (label `appendFS` structStr, LMStructU []) + + aliasDef = LMGlobalVar label tyAlias link Nothing Nothing Alias + -- we don't know the type of the indirectee here + indType = panic "will be filled by 'aliasify', later" + orig = LMStaticPointer $ LMGlobalVar label' indType link' Nothing Nothing Alias + + pure ([LMGlobal aliasDef $ Just orig], [tyAlias]) + genLlvmData (sec, Statics lbl xs) = do label <- strCLabel_llvm lbl static <- mapM genData xs @@ -45,11 +74,10 @@ genLlvmData (sec, Statics lbl xs) = do let types = map getStatType static strucTy = LMStruct types - tyAlias = LMAlias ((label `appendFS` structStr), strucTy) + tyAlias = LMAlias (label `appendFS` structStr, strucTy) struct = Just $ LMStaticStruc static tyAlias - link = if (externallyVisibleCLabel lbl) - then ExternallyVisible else Internal + link = linkage lbl align = case sec of Section CString _ -> Just 1 _ -> Nothing diff --git a/compiler/llvmGen/LlvmCodeGen/Ppr.hs b/compiler/llvmGen/LlvmCodeGen/Ppr.hs index c1378aa1fd..3f29133e59 100644 --- a/compiler/llvmGen/LlvmCodeGen/Ppr.hs +++ b/compiler/llvmGen/LlvmCodeGen/Ppr.hs @@ -71,7 +71,7 @@ pprLlvmCmmDecl (CmmProc mb_info entry_lbl live (ListGraph blks)) let fun = LlvmFunction funDec funArgs llvmStdFunAttrs funSect prefix lmblocks name = decName $ funcDecl fun - defName = name `appendFS` fsLit "$def" + defName = llvmDefLabel name funcDecl' = (funcDecl fun) { decName = defName } fun' = fun { funcDecl = funcDecl' } funTy = LMFunction funcDecl' diff --git a/compiler/nativeGen/PPC/Ppr.hs b/compiler/nativeGen/PPC/Ppr.hs index 8e9ff95b9f..be6402e57a 100644 --- a/compiler/nativeGen/PPC/Ppr.hs +++ b/compiler/nativeGen/PPC/Ppr.hs @@ -27,6 +27,7 @@ import Hoopl.Label import BlockId import CLabel +import PprCmmExpr () import Unique ( pprUniqueAlways, getUnique ) import Platform @@ -119,6 +120,16 @@ pprBasicBlock info_env (BasicBlock blockid instrs) pprDatas :: CmmStatics -> SDoc +-- See note [emit-time elimination of static indirections] in CLabel. +pprDatas (Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) + | lbl == mkIndStaticInfoLabel + , let labelInd (CmmLabelOff l _) = Just l + labelInd (CmmLabel l) = Just l + labelInd _ = Nothing + , Just ind' <- labelInd ind + , alias `mayRedirectTo` ind' + = pprGloblDecl alias + $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind') pprDatas (Statics lbl dats) = vcat (pprLabel lbl : map pprData dats) pprData :: CmmStatic -> SDoc diff --git a/compiler/nativeGen/SPARC/Ppr.hs b/compiler/nativeGen/SPARC/Ppr.hs index 6b441819a6..0619956f39 100644 --- a/compiler/nativeGen/SPARC/Ppr.hs +++ b/compiler/nativeGen/SPARC/Ppr.hs @@ -102,6 +102,16 @@ pprBasicBlock info_env (BasicBlock blockid instrs) pprDatas :: CmmStatics -> SDoc +-- See note [emit-time elimination of static indirections] in CLabel. +pprDatas (Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) + | lbl == mkIndStaticInfoLabel + , let labelInd (CmmLabelOff l _) = Just l + labelInd (CmmLabel l) = Just l + labelInd _ = Nothing + , Just ind' <- labelInd ind + , alias `mayRedirectTo` ind' + = pprGloblDecl alias + $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind') pprDatas (Statics lbl dats) = vcat (pprLabel lbl : map pprData dats) pprData :: CmmStatic -> SDoc @@ -634,4 +644,3 @@ pp_comma_lbracket = text ",[" pp_comma_a :: SDoc pp_comma_a = text ",a" - diff --git a/compiler/nativeGen/X86/Ppr.hs b/compiler/nativeGen/X86/Ppr.hs index ccfaff4753..bf28d29be9 100644 --- a/compiler/nativeGen/X86/Ppr.hs +++ b/compiler/nativeGen/X86/Ppr.hs @@ -145,7 +145,19 @@ pprBasicBlock info_env (BasicBlock blockid instrs) (l@LOCATION{} : _) -> pprInstr l _other -> empty + pprDatas :: (Alignment, CmmStatics) -> SDoc +-- See note [emit-time elimination of static indirections] in CLabel. +pprDatas (_, Statics alias [CmmStaticLit (CmmLabel lbl), CmmStaticLit ind, _, _]) + | lbl == mkIndStaticInfoLabel + , let labelInd (CmmLabelOff l _) = Just l + labelInd (CmmLabel l) = Just l + labelInd _ = Nothing + , Just ind' <- labelInd ind + , alias `mayRedirectTo` ind' + = pprGloblDecl alias + $$ text ".equiv" <+> ppr alias <> comma <> ppr (CmmLabel ind') + pprDatas (align, (Statics lbl dats)) = vcat (pprAlign align : pprLabel lbl : map pprData dats) diff --git a/testsuite/tests/codeGen/should_compile/Makefile b/testsuite/tests/codeGen/should_compile/Makefile index c0729443c9..8aa7917751 100644 --- a/testsuite/tests/codeGen/should_compile/Makefile +++ b/testsuite/tests/codeGen/should_compile/Makefile @@ -43,3 +43,18 @@ T15723: '$(TEST_HC)' $(TEST_HC_OPTS) -prof -fPIC -fexternal-dynamic-refs -fforce-recomp -O2 -c T15723A.hs -o T15723A.o '$(TEST_HC)' $(TEST_HC_OPTS) -prof -fPIC -fexternal-dynamic-refs -fforce-recomp -O2 -c T15723B.hs -o T15723B.o '$(TEST_HC)' $(TEST_HC_OPTS) -dynamic -shared T15723B.o -o T15723B.so + +## check that there are two assembly equates +# mentioning T15155.a_closure (def and use) +T15155: + '$(TEST_HC)' $(TEST_HC_OPTS) -c -O0 -ddump-asm T15155l.hs | grep -F ".equiv " \ + | grep -F "T15155.a_closure" | wc -l | sed -e 's/ *//g' | grep "2" ; echo $$? + +## check that there are two "$def" aliases: +# - one that bitcasts to %T15155_a_closure_struct* +# - and the other which bitcasts from %T15155_a_closure_struct* +## +T15155l: + '$(TEST_HC)' $(TEST_HC_OPTS) -c -O0 -ddump-llvm T15155l.hs 2>/dev/null \ + | grep -F "= alias %T15155_" | grep -E "@T15155_[ab]_closure.def = " | grep -F "%T15155_a_closure_struct*" \ + | wc -l | sed -e 's/ *//g' | grep "2"; echo $$? diff --git a/testsuite/tests/codeGen/should_compile/T15155.stdout b/testsuite/tests/codeGen/should_compile/T15155.stdout new file mode 100644 index 0000000000..389e262145 --- /dev/null +++ b/testsuite/tests/codeGen/should_compile/T15155.stdout @@ -0,0 +1,2 @@ +2 +0 diff --git a/testsuite/tests/codeGen/should_compile/T15155l.hs b/testsuite/tests/codeGen/should_compile/T15155l.hs new file mode 100644 index 0000000000..643610bc06 --- /dev/null +++ b/testsuite/tests/codeGen/should_compile/T15155l.hs @@ -0,0 +1,8 @@ +module T15155 (a, B(..), b) where + +newtype A = A Int +newtype B = B A + +{-# NOINLINE a #-} +a = A 42 +b = B a diff --git a/testsuite/tests/codeGen/should_compile/T15155l.stdout b/testsuite/tests/codeGen/should_compile/T15155l.stdout new file mode 100644 index 0000000000..389e262145 --- /dev/null +++ b/testsuite/tests/codeGen/should_compile/T15155l.stdout @@ -0,0 +1,2 @@ +2 +0 diff --git a/testsuite/tests/codeGen/should_compile/all.T b/testsuite/tests/codeGen/should_compile/all.T index 45924efc33..547814a0d6 100644 --- a/testsuite/tests/codeGen/should_compile/all.T +++ b/testsuite/tests/codeGen/should_compile/all.T @@ -44,7 +44,7 @@ test('T14999', # Verify that we optimize away redundant jumps for unordered comparisons. test('T15196', - [ unless(arch('x86_64'),skip), + [ unless(arch('x86_64'), skip), only_ways('normal'), ], makefile_test, []) @@ -52,3 +52,10 @@ test('T15723', [ unless(have_profiling(), skip), unless(have_dynamic(), skip), ], makefile_test, []) + +test('T15155', + [ unless(have_ncg(), skip) + ], makefile_test, []) + +test('T15155l', when(unregisterised(), skip), + makefile_test, []) |