summaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
authorMichal Terepeta <michal.terepeta@gmail.com>2017-10-29 20:49:32 -0400
committerBen Gamari <ben@smart-cactus.org>2017-10-29 21:51:05 -0400
commitcca2d6b78f97bfb79bef4dc3f75d6c4d15b94680 (patch)
tree9be80ec91082ad99ba79d21a6cd0aac68309a236 /compiler
parent85aa1f4253163985fe07d172f8da73b784bb7b4b (diff)
downloadhaskell-cca2d6b78f97bfb79bef4dc3f75d6c4d15b94680.tar.gz
Allow packing constructor fields
This is another step for fixing #13825 and is based on D38 by Simon Marlow. The change allows storing multiple constructor fields within the same word. This currently applies only to `Float`s, e.g., ``` data Foo = Foo {-# UNPACK #-} !Float {-# UNPACK #-} !Float ``` on 64-bit arch, will now store both fields within the same constructor word. For `WordX/IntX` we'll need to introduce new primop types. Main changes: - We now use sizes in bytes when we compute the offsets for constructor fields in `StgCmmLayout` and introduce padding if necessary (word-sized fields are still word-aligned) - `ByteCodeGen` had to be updated to correctly construct the data types. This required some new bytecode instructions to allow pushing things that are not full words onto the stack (and updating `Interpreter.c`). Note that we only use the packed stuff when constructing data types (i.e., for `PACK`), in all other cases the behavior should not change. - `RtClosureInspect` was changed to handle the new layout when extracting subterms. This seems to be used by things like `:print`. I've also added a test for this. - I deviated slightly from Simon's approach and use `PrimRep` instead of `ArgRep` for computing the size of fields. This seemed more natural and in the future we'll probably want to introduce new primitive types (e.g., `Int8#`) and `PrimRep` seems like a better place to do that (where we already have `Int64Rep` for example). `ArgRep` on the other hand seems to be more focused on calling functions. Signed-off-by: Michal Terepeta <michal.terepeta@gmail.com> Test Plan: ./validate Reviewers: bgamari, simonmar, austin, hvr, goldfire, erikd Reviewed By: bgamari Subscribers: maoe, rwbarton, thomie GHC Trac Issues: #13825 Differential Revision: https://phabricator.haskell.org/D3809
Diffstat (limited to 'compiler')
-rw-r--r--compiler/cmm/CmmCallConv.hs5
-rw-r--r--compiler/cmm/SMRep.hs9
-rw-r--r--compiler/codeGen/StgCmmBind.hs2
-rw-r--r--compiler/codeGen/StgCmmCon.hs15
-rw-r--r--compiler/codeGen/StgCmmHeap.hs15
-rw-r--r--compiler/codeGen/StgCmmLayout.hs93
-rw-r--r--compiler/coreSyn/CoreLint.hs4
-rw-r--r--compiler/ghci/ByteCodeAsm.hs15
-rw-r--r--compiler/ghci/ByteCodeGen.hs84
-rw-r--r--compiler/ghci/ByteCodeInstr.hs52
-rw-r--r--compiler/ghci/RtClosureInspect.hs89
-rw-r--r--compiler/main/Constants.hs4
-rw-r--r--compiler/types/TyCon.hs35
13 files changed, 317 insertions, 105 deletions
diff --git a/compiler/cmm/CmmCallConv.hs b/compiler/cmm/CmmCallConv.hs
index 0e89ce79f8..c32710e1b0 100644
--- a/compiler/cmm/CmmCallConv.hs
+++ b/compiler/cmm/CmmCallConv.hs
@@ -131,9 +131,10 @@ assignStack dflags offset arg_ty args = assign_stk offset [] (reverse args)
assign_stk offset assts (r:rs)
= assign_stk off' ((r, StackParam off') : assts) rs
where w = typeWidth (arg_ty r)
- size = (((widthInBytes w - 1) `div` word_size) + 1) * word_size
off' = offset + size
- word_size = wORD_SIZE dflags
+ -- Stack arguments always take a whole number of words, we never
+ -- pack them unlike constructor fields.
+ size = roundUpToWords dflags (widthInBytes w)
-----------------------------------------------------------------------------
-- Local information about the registers available
diff --git a/compiler/cmm/SMRep.hs b/compiler/cmm/SMRep.hs
index 34048fe116..1469ae1bd3 100644
--- a/compiler/cmm/SMRep.hs
+++ b/compiler/cmm/SMRep.hs
@@ -9,7 +9,7 @@ module SMRep (
-- * Words and bytes
WordOff, ByteOff,
wordsToBytes, bytesToWordsRoundUp,
- roundUpToWords,
+ roundUpToWords, roundUpTo,
StgWord, fromStgWord, toStgWord,
StgHalfWord, fromStgHalfWord, toStgHalfWord,
@@ -79,8 +79,11 @@ type ByteOff = Int
-- | Round up the given byte count to the next byte count that's a
-- multiple of the machine's word size.
roundUpToWords :: DynFlags -> ByteOff -> ByteOff
-roundUpToWords dflags n =
- (n + (wORD_SIZE dflags - 1)) .&. (complement (wORD_SIZE dflags - 1))
+roundUpToWords dflags n = roundUpTo n (wORD_SIZE dflags)
+
+-- | Round up @base@ to a multiple of @size@.
+roundUpTo :: ByteOff -> ByteOff -> ByteOff
+roundUpTo base size = (base + (size - 1)) .&. (complement (size - 1))
-- | Convert the given number of words to a number of bytes.
--
diff --git a/compiler/codeGen/StgCmmBind.hs b/compiler/codeGen/StgCmmBind.hs
index 8b2e998b5e..13f908e846 100644
--- a/compiler/codeGen/StgCmmBind.hs
+++ b/compiler/codeGen/StgCmmBind.hs
@@ -112,7 +112,7 @@ cgTopRhsClosure dflags rec id ccs _ upd_flag args body =
-- BUILD THE OBJECT, AND GENERATE INFO TABLE (IF NECESSARY)
; emitDataLits closure_label closure_rep
- ; let fv_details :: [(NonVoid Id, VirtualHpOffset)]
+ ; let fv_details :: [(NonVoid Id, ByteOff)]
(_, _, fv_details) = mkVirtHeapOffsets dflags (isLFThunk lf_info) []
-- Don't drop the non-void args until the closure info has been made
; forkClosureBody (closureCodeBody True id closure_info ccs
diff --git a/compiler/codeGen/StgCmmCon.hs b/compiler/codeGen/StgCmmCon.hs
index 1540d00715..a38f7bce37 100644
--- a/compiler/codeGen/StgCmmCon.hs
+++ b/compiler/codeGen/StgCmmCon.hs
@@ -79,9 +79,16 @@ cgTopRhsCon dflags id con args =
-- LAY IT OUT
; let
+ is_thunk = False
(tot_wds, -- #ptr_wds + #nonptr_wds
ptr_wds, -- #ptr_wds
- nv_args_w_offsets) = mkVirtConstrOffsets dflags (addArgReps args)
+ nv_args_w_offsets) =
+ mkVirtHeapOffsetsWithPadding dflags is_thunk (addArgReps args)
+
+ mk_payload (Padding len _) = return (CmmInt 0 (widthFromBytes len))
+ mk_payload (FieldOff arg _) = do
+ CmmLit lit <- getArgAmode arg
+ return lit
nonptr_wds = tot_wds - ptr_wds
@@ -90,10 +97,8 @@ cgTopRhsCon dflags id con args =
-- needs to poke around inside it.
info_tbl = mkDataConInfoTable dflags con True ptr_wds nonptr_wds
- get_lit (arg, _offset) = do { CmmLit lit <- getArgAmode arg
- ; return lit }
- ; payload <- mapM get_lit nv_args_w_offsets
+ ; payload <- mapM mk_payload nv_args_w_offsets
-- NB1: nv_args_w_offsets is sorted into ptrs then non-ptrs
-- NB2: all the amodes should be Lits!
-- TODO (osa): Why?
@@ -264,7 +269,7 @@ bindConArgs (DataAlt con) base args
-- The binding below forces the masking out of the tag bits
-- when accessing the constructor field.
- bind_arg :: (NonVoid Id, VirtualHpOffset) -> FCode (Maybe LocalReg)
+ bind_arg :: (NonVoid Id, ByteOff) -> FCode (Maybe LocalReg)
bind_arg (arg@(NonVoid b), offset)
| isDeadBinder b =
-- Do not load unused fields from objects to local variables.
diff --git a/compiler/codeGen/StgCmmHeap.hs b/compiler/codeGen/StgCmmHeap.hs
index 15dcaa2d89..790453619c 100644
--- a/compiler/codeGen/StgCmmHeap.hs
+++ b/compiler/codeGen/StgCmmHeap.hs
@@ -221,24 +221,11 @@ mkStaticClosure :: DynFlags -> CLabel -> CostCentreStack -> [CmmLit]
mkStaticClosure dflags info_lbl ccs payload padding static_link_field saved_info_field
= [CmmLabel info_lbl]
++ staticProfHdr dflags ccs
- ++ concatMap (padLitToWord dflags) payload
+ ++ payload
++ padding
++ static_link_field
++ saved_info_field
--- JD: Simon had elided this padding, but without it the C back end asserts
--- failure. Maybe it's a bad assertion, and this padding is indeed unnecessary?
-padLitToWord :: DynFlags -> CmmLit -> [CmmLit]
-padLitToWord dflags lit = lit : padding pad_length
- where width = typeWidth (cmmLitType dflags lit)
- pad_length = wORD_SIZE dflags - widthInBytes width :: Int
-
- padding n | n <= 0 = []
- | n `rem` 2 /= 0 = CmmInt 0 W8 : padding (n-1)
- | n `rem` 4 /= 0 = CmmInt 0 W16 : padding (n-2)
- | n `rem` 8 /= 0 = CmmInt 0 W32 : padding (n-4)
- | otherwise = CmmInt 0 W64 : padding (n-8)
-
-----------------------------------------------------------
-- Heap overflow checking
-----------------------------------------------------------
diff --git a/compiler/codeGen/StgCmmLayout.hs b/compiler/codeGen/StgCmmLayout.hs
index aeb01242e7..5111b93bc5 100644
--- a/compiler/codeGen/StgCmmLayout.hs
+++ b/compiler/codeGen/StgCmmLayout.hs
@@ -1,3 +1,4 @@
+{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
-----------------------------------------------------------------------------
@@ -17,7 +18,12 @@ module StgCmmLayout (
slowCall, directCall,
- mkVirtHeapOffsets, mkVirtConstrOffsets, mkVirtConstrSizes, getHpRelOffset,
+ FieldOffOrPadding(..),
+ mkVirtHeapOffsets,
+ mkVirtHeapOffsetsWithPadding,
+ mkVirtConstrOffsets,
+ mkVirtConstrSizes,
+ getHpRelOffset,
ArgRep(..), toArgRep, argRepSizeW -- re-exported from StgCmmArgRep
) where
@@ -44,7 +50,7 @@ import CmmInfo
import CLabel
import StgSyn
import Id
-import TyCon ( PrimRep(..) )
+import TyCon ( PrimRep(..), primRepSizeB )
import BasicTypes ( RepArity )
import DynFlags
import Module
@@ -387,26 +393,33 @@ getHpRelOffset virtual_offset
hp_usg <- getHpUsage
return (cmmRegOffW dflags hpReg (hpRel (realHp hp_usg) virtual_offset))
-mkVirtHeapOffsets
+data FieldOffOrPadding a
+ = FieldOff (NonVoid a) -- Something that needs an offset.
+ ByteOff -- Offset in bytes.
+ | Padding ByteOff -- Length of padding in bytes.
+ ByteOff -- Offset in bytes.
+
+mkVirtHeapOffsetsWithPadding
:: DynFlags
-> Bool -- True <=> is a thunk
- -> [NonVoid (PrimRep,a)] -- Things to make offsets for
- -> (WordOff, -- _Total_ number of words allocated
- WordOff, -- Number of words allocated for *pointers*
- [(NonVoid a, ByteOff)])
+ -> [NonVoid (PrimRep, a)] -- Things to make offsets for
+ -> ( WordOff -- Total number of words allocated
+ , WordOff -- Number of words allocated for *pointers*
+ , [FieldOffOrPadding a] -- Either an offset or padding.
+ )
-- Things with their offsets from start of object in order of
-- increasing offset; BUT THIS MAY BE DIFFERENT TO INPUT ORDER
-- First in list gets lowest offset, which is initial offset + 1.
--
--- mkVirtHeapOffsets always returns boxed things with smaller offsets
+-- mkVirtHeapOffsetsWithPadding always returns boxed things with smaller offsets
-- than the unboxed things
-mkVirtHeapOffsets dflags is_thunk things
- = ASSERT(not (any (isVoidRep . fst . fromNonVoid) things))
- ( bytesToWordsRoundUp dflags tot_bytes
+mkVirtHeapOffsetsWithPadding dflags is_thunk things =
+ ASSERT(not (any (isVoidRep . fst . fromNonVoid) things))
+ ( tot_wds
, bytesToWordsRoundUp dflags bytes_of_ptrs
- , ptrs_w_offsets ++ non_ptrs_w_offsets
+ , concat (ptrs_w_offsets ++ non_ptrs_w_offsets) ++ final_pad
)
where
hdr_words | is_thunk = thunkHdrSize dflags
@@ -420,10 +433,58 @@ mkVirtHeapOffsets dflags is_thunk things
(tot_bytes, non_ptrs_w_offsets) =
mapAccumL computeOffset bytes_of_ptrs non_ptrs
- computeOffset bytes_so_far nv_thing
- = (bytes_so_far + wordsToBytes dflags (argRepSizeW dflags (toArgRep rep)),
- (NonVoid thing, hdr_bytes + bytes_so_far))
- where (rep,thing) = fromNonVoid nv_thing
+ tot_wds = bytesToWordsRoundUp dflags tot_bytes
+
+ final_pad_size = tot_wds * word_size - tot_bytes
+ final_pad
+ | final_pad_size > 0 = [(Padding final_pad_size
+ (hdr_bytes + tot_bytes))]
+ | otherwise = []
+
+ word_size = wORD_SIZE dflags
+
+ computeOffset bytes_so_far nv_thing =
+ (new_bytes_so_far, with_padding field_off)
+ where
+ (rep, thing) = fromNonVoid nv_thing
+
+ -- Size of the field in bytes.
+ !sizeB = primRepSizeB dflags rep
+
+ -- Align the start offset (eg, 2-byte value should be 2-byte aligned).
+ -- But not more than to a word.
+ !align = min word_size sizeB
+ !start = roundUpTo bytes_so_far align
+ !padding = start - bytes_so_far
+
+ -- Final offset is:
+ -- size of header + bytes_so_far + padding
+ !final_offset = hdr_bytes + bytes_so_far + padding
+ !new_bytes_so_far = start + sizeB
+ field_off = FieldOff (NonVoid thing) final_offset
+
+ with_padding field_off
+ | padding == 0 = [field_off]
+ | otherwise = [ Padding padding (hdr_bytes + bytes_so_far)
+ , field_off
+ ]
+
+
+mkVirtHeapOffsets
+ :: DynFlags
+ -> Bool -- True <=> is a thunk
+ -> [NonVoid (PrimRep,a)] -- Things to make offsets for
+ -> (WordOff, -- _Total_ number of words allocated
+ WordOff, -- Number of words allocated for *pointers*
+ [(NonVoid a, ByteOff)])
+mkVirtHeapOffsets dflags is_thunk things =
+ ( tot_wds
+ , ptr_wds
+ , [ (field, offset) | (FieldOff field offset) <- things_offsets ]
+ )
+ where
+ (tot_wds, ptr_wds, things_offsets) =
+ mkVirtHeapOffsetsWithPadding dflags is_thunk things
-- | Just like mkVirtHeapOffsets, but for constructors
mkVirtConstrOffsets
diff --git a/compiler/coreSyn/CoreLint.hs b/compiler/coreSyn/CoreLint.hs
index 96c34852ba..20354ec530 100644
--- a/compiler/coreSyn/CoreLint.hs
+++ b/compiler/coreSyn/CoreLint.hs
@@ -1677,8 +1677,8 @@ lintCoercion co@(UnivCo prov r ty1 ty2)
= do { dflags <- getDynFlags
; checkWarnL (isUnBoxed rep1 == isUnBoxed rep2)
(report "between unboxed and boxed value")
- ; checkWarnL (TyCon.primRepSizeW dflags rep1
- == TyCon.primRepSizeW dflags rep2)
+ ; checkWarnL (TyCon.primRepSizeB dflags rep1
+ == TyCon.primRepSizeB dflags rep2)
(report "between unboxed values of different size")
; let fl = liftM2 (==) (TyCon.primRepIsFloat rep1)
(TyCon.primRepIsFloat rep2)
diff --git a/compiler/ghci/ByteCodeAsm.hs b/compiler/ghci/ByteCodeAsm.hs
index edb18df382..920bc4ac2b 100644
--- a/compiler/ghci/ByteCodeAsm.hs
+++ b/compiler/ghci/ByteCodeAsm.hs
@@ -351,6 +351,12 @@ assembleI dflags i = case i of
PUSH_L o1 -> emit bci_PUSH_L [SmallOp o1]
PUSH_LL o1 o2 -> emit bci_PUSH_LL [SmallOp o1, SmallOp o2]
PUSH_LLL o1 o2 o3 -> emit bci_PUSH_LLL [SmallOp o1, SmallOp o2, SmallOp o3]
+ PUSH8 o1 -> emit bci_PUSH8 [SmallOp o1]
+ PUSH16 o1 -> emit bci_PUSH16 [SmallOp o1]
+ PUSH32 o1 -> emit bci_PUSH32 [SmallOp o1]
+ PUSH8_W o1 -> emit bci_PUSH8_W [SmallOp o1]
+ PUSH16_W o1 -> emit bci_PUSH16_W [SmallOp o1]
+ PUSH32_W o1 -> emit bci_PUSH32_W [SmallOp o1]
PUSH_G nm -> do p <- ptr (BCOPtrName nm)
emit bci_PUSH_G [Op p]
PUSH_PRIMOP op -> do p <- ptr (BCOPtrPrimOp op)
@@ -365,6 +371,15 @@ assembleI dflags i = case i of
-> do let ul_bco = assembleBCO dflags proto
p <- ioptr (liftM BCOPtrBCO ul_bco)
emit (push_alts pk) [Op p]
+ PUSH_PAD8 -> emit bci_PUSH_PAD8 []
+ PUSH_PAD16 -> emit bci_PUSH_PAD16 []
+ PUSH_PAD32 -> emit bci_PUSH_PAD32 []
+ PUSH_UBX8 lit -> do np <- literal lit
+ emit bci_PUSH_UBX8 [Op np]
+ PUSH_UBX16 lit -> do np <- literal lit
+ emit bci_PUSH_UBX16 [Op np]
+ PUSH_UBX32 lit -> do np <- literal lit
+ emit bci_PUSH_UBX32 [Op np]
PUSH_UBX lit nws -> do np <- literal lit
emit bci_PUSH_UBX [Op np, SmallOp nws]
diff --git a/compiler/ghci/ByteCodeGen.hs b/compiler/ghci/ByteCodeGen.hs
index c7b96a83a0..697dc63b43 100644
--- a/compiler/ghci/ByteCodeGen.hs
+++ b/compiler/ghci/ByteCodeGen.hs
@@ -47,8 +47,9 @@ import Unique
import FastString
import Panic
import StgCmmClosure ( NonVoid(..), fromNonVoid, nonVoidIds )
-import StgCmmLayout ( ArgRep(..), toArgRep, argRepSizeW,
- mkVirtHeapOffsets, mkVirtConstrOffsets )
+import StgCmmLayout ( ArgRep(..), FieldOffOrPadding(..),
+ toArgRep, argRepSizeW,
+ mkVirtHeapOffsetsWithPadding, mkVirtConstrOffsets )
import SMRep hiding (WordOff, ByteOff, wordsToBytes)
import Bitmap
import OrdList
@@ -455,6 +456,9 @@ truncIntegral16 w
| otherwise
= fromIntegral w
+trunc16B :: ByteOff -> Word16
+trunc16B = truncIntegral16
+
trunc16W :: WordOff -> Word16
trunc16W = truncIntegral16
@@ -798,10 +802,13 @@ mkConAppCode orig_d _ p con args_r_to_l =
, not (isVoidRep prim_rep)
]
is_thunk = False
- (_, _, args_offsets) = mkVirtHeapOffsets dflags is_thunk non_voids
+ (_, _, args_offsets) =
+ mkVirtHeapOffsetsWithPadding dflags is_thunk non_voids
- do_pushery !d ((arg, _) : args) = do
- (push, arg_bytes) <- pushAtom d p (fromNonVoid arg)
+ do_pushery !d (arg : args) = do
+ (push, arg_bytes) <- case arg of
+ (Padding l _) -> pushPadding l
+ (FieldOff a _) -> pushConstrAtom d p (fromNonVoid a)
more_push_code <- do_pushery (d + arg_bytes) args
return (push `appOL` more_push_code)
do_pushery !d [] = do
@@ -926,7 +933,8 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
| otherwise = wordSize dflags
-- depth of stack after the return value has been pushed
- d_bndr = d + ret_frame_size_b + idSizeB dflags bndr
+ d_bndr =
+ d + ret_frame_size_b + wordsToBytes dflags (idSizeW dflags bndr)
-- depth of stack after the extra info table for an unboxed return
-- has been pushed, if any. This is the stack depth at the
@@ -1127,8 +1135,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
code_n_reps <- pargs d0 args_r_to_l
let
(pushs_arg, a_reps_pushed_r_to_l) = unzip code_n_reps
- a_reps_sizeW =
- WordOff (sum (map (primRepSizeW dflags) a_reps_pushed_r_to_l))
+ a_reps_sizeW = sum (map (repSizeWords dflags) a_reps_pushed_r_to_l)
push_args = concatOL pushs_arg
!d_after_args = d0 + wordsToBytes dflags a_reps_sizeW
@@ -1218,7 +1225,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
-- Push the return placeholder. For a call returning nothing,
-- this is a V (tag).
- r_sizeW = WordOff (primRepSizeW dflags r_rep)
+ r_sizeW = repSizeWords dflags r_rep
d_after_r = d_after_Addr + wordsToBytes dflags r_sizeW
push_r =
if returns_void
@@ -1472,12 +1479,20 @@ pushAtom d p (AnnVar var)
| Just d_v <- lookupBCEnv_maybe var p -- var is a local variable
= do dflags <- getDynFlags
- -- Currently this code assumes that @szb@ is a multiple of full words.
- -- It'll need to change to support, e.g., sub-word constructor fields.
- let !szb = idSizeB dflags var
- !szw = bytesToWords dflags szb -- szb is a multiple of words
- l = trunc16W $ bytesToWords dflags (d - d_v) + szw - 1
- return (toOL (genericReplicate szw (PUSH_L l)), szb)
+
+ let !szb = idSizeCon dflags var
+ with_instr instr = do
+ let !off_b = trunc16B $ d - d_v
+ return (unitOL (instr off_b), wordSize dflags)
+
+ case szb of
+ 1 -> with_instr PUSH8_W
+ 2 -> with_instr PUSH16_W
+ 4 -> with_instr PUSH32_W
+ _ -> do
+ let !szw = bytesToWords dflags szb
+ !off_w = trunc16W $ bytesToWords dflags (d - d_v) + szw - 1
+ return (toOL (genericReplicate szw (PUSH_L off_w)), szb)
-- d - d_v offset from TOS to the first slot of the object
--
-- d - d_v + sz - 1 offset from the TOS of the last slot of the object
@@ -1492,7 +1507,7 @@ pushAtom d p (AnnVar var)
ptrToWordPtr $ fromRemotePtr ptr
Nothing -> do
dflags <- getDynFlags
- let sz = idSizeB dflags var
+ let sz = idSizeCon dflags var
MASSERT( sz == wordSize dflags )
return (unitOL (PUSH_G (getName var)), sz)
@@ -1525,6 +1540,36 @@ pushAtom _ _ expr
(pprCoreExpr (deAnnotate' expr))
+-- | Push an atom for constructor (i.e., PACK instruction) onto the stack.
+-- This is slightly different to @pushAtom@ due to the fact that we allow
+-- packing constructor fields. See also @mkConAppCode@ and @pushPadding@.
+pushConstrAtom
+ :: StackDepth -> BCEnv -> AnnExpr' Id DVarSet -> BcM (BCInstrList, ByteOff)
+
+pushConstrAtom _ _ (AnnLit lit@(MachFloat _)) =
+ return (unitOL (PUSH_UBX32 lit), 4)
+
+pushConstrAtom d p (AnnVar v)
+ | Just d_v <- lookupBCEnv_maybe v p = do -- v is a local variable
+ dflags <- getDynFlags
+ let !szb = idSizeCon dflags v
+ done instr = do
+ let !off = trunc16B $ d - d_v
+ return (unitOL (instr off), szb)
+ case szb of
+ 1 -> done PUSH8
+ 2 -> done PUSH16
+ 4 -> done PUSH32
+ _ -> pushAtom d p (AnnVar v)
+
+pushConstrAtom d p expr = pushAtom d p expr
+
+pushPadding :: Int -> BcM (BCInstrList, ByteOff)
+pushPadding 1 = return (unitOL (PUSH_PAD8), 1)
+pushPadding 2 = return (unitOL (PUSH_PAD16), 2)
+pushPadding 4 = return (unitOL (PUSH_PAD32), 4)
+pushPadding x = panic $ "pushPadding x=" ++ show x
+
-- -----------------------------------------------------------------------------
-- Given a bunch of alts code and their discrs, do the donkey work
-- of making a multiway branch using a switch tree.
@@ -1669,8 +1714,8 @@ lookupBCEnv_maybe = Map.lookup
idSizeW :: DynFlags -> Id -> WordOff
idSizeW dflags = WordOff . argRepSizeW dflags . bcIdArgRep
-idSizeB :: DynFlags -> Id -> ByteOff
-idSizeB dflags = wordsToBytes dflags . idSizeW dflags
+idSizeCon :: DynFlags -> Id -> ByteOff
+idSizeCon dflags = ByteOff . primRepSizeB dflags . bcIdPrimRep
bcIdArgRep :: Id -> ArgRep
bcIdArgRep = toArgRep . bcIdPrimRep
@@ -1682,6 +1727,9 @@ bcIdPrimRep id
| otherwise
= pprPanic "bcIdPrimRep" (ppr id <+> dcolon <+> ppr (idType id))
+repSizeWords :: DynFlags -> PrimRep -> WordOff
+repSizeWords dflags rep = WordOff $ argRepSizeW dflags (toArgRep rep)
+
isFollowableArg :: ArgRep -> Bool
isFollowableArg P = True
isFollowableArg _ = False
diff --git a/compiler/ghci/ByteCodeInstr.hs b/compiler/ghci/ByteCodeInstr.hs
index 7ef82206cb..07dcd2222a 100644
--- a/compiler/ghci/ByteCodeInstr.hs
+++ b/compiler/ghci/ByteCodeInstr.hs
@@ -62,6 +62,23 @@ data BCInstr
| PUSH_LL !Word16 !Word16{-2 offsets-}
| PUSH_LLL !Word16 !Word16 !Word16{-3 offsets-}
+ -- Push the specified local as a 8, 16, 32 bit value onto the stack. (i.e.,
+ -- the stack will grow by 8, 16 or 32 bits)
+ | PUSH8 !Word16
+ | PUSH16 !Word16
+ | PUSH32 !Word16
+
+ -- Push the specifiec local as a 8, 16, 32 bit value onto the stack, but the
+ -- value will take the whole word on the stack (i.e., the stack will gorw by
+ -- a word)
+ -- This is useful when extracting a packed constructor field for further use.
+ -- Currently we expect all values on the stack to take full words, except for
+ -- the ones used for PACK (i.e., actually constracting new data types, in
+ -- which case we use PUSH{8,16,32})
+ | PUSH8_W !Word16
+ | PUSH16_W !Word16
+ | PUSH32_W !Word16
+
-- Push a ptr (these all map to PUSH_G really)
| PUSH_G Name
| PUSH_PRIMOP PrimOp
@@ -71,8 +88,16 @@ data BCInstr
| PUSH_ALTS (ProtoBCO Name)
| PUSH_ALTS_UNLIFTED (ProtoBCO Name) ArgRep
+ -- Pushing 8, 16 and 32 bits of padding (for constructors).
+ | PUSH_PAD8
+ | PUSH_PAD16
+ | PUSH_PAD32
+
-- Pushing literals
- | PUSH_UBX Literal Word16
+ | PUSH_UBX8 Literal
+ | PUSH_UBX16 Literal
+ | PUSH_UBX32 Literal
+ | PUSH_UBX Literal Word16
-- push this int/float/double/addr, on the stack. Word16
-- is # of words to copy from literal pool. Eitherness reflects
-- the difficulty of dealing with MachAddr here, mostly due to
@@ -194,6 +219,12 @@ instance Outputable BCInstr where
ppr (PUSH_L offset) = text "PUSH_L " <+> ppr offset
ppr (PUSH_LL o1 o2) = text "PUSH_LL " <+> ppr o1 <+> ppr o2
ppr (PUSH_LLL o1 o2 o3) = text "PUSH_LLL" <+> ppr o1 <+> ppr o2 <+> ppr o3
+ ppr (PUSH8 offset) = text "PUSH8 " <+> ppr offset
+ ppr (PUSH16 offset) = text "PUSH16 " <+> ppr offset
+ ppr (PUSH32 offset) = text "PUSH32 " <+> ppr offset
+ ppr (PUSH8_W offset) = text "PUSH8_W " <+> ppr offset
+ ppr (PUSH16_W offset) = text "PUSH16_W " <+> ppr offset
+ ppr (PUSH32_W offset) = text "PUSH32_W " <+> ppr offset
ppr (PUSH_G nm) = text "PUSH_G " <+> ppr nm
ppr (PUSH_PRIMOP op) = text "PUSH_G " <+> text "GHC.PrimopWrappers."
<> ppr op
@@ -201,6 +232,13 @@ instance Outputable BCInstr where
ppr (PUSH_ALTS bco) = hang (text "PUSH_ALTS") 2 (ppr bco)
ppr (PUSH_ALTS_UNLIFTED bco pk) = hang (text "PUSH_ALTS_UNLIFTED" <+> ppr pk) 2 (ppr bco)
+ ppr PUSH_PAD8 = text "PUSH_PAD8"
+ ppr PUSH_PAD16 = text "PUSH_PAD16"
+ ppr PUSH_PAD32 = text "PUSH_PAD32"
+
+ ppr (PUSH_UBX8 lit) = text "PUSH_UBX8" <+> ppr lit
+ ppr (PUSH_UBX16 lit) = text "PUSH_UBX16" <+> ppr lit
+ ppr (PUSH_UBX32 lit) = text "PUSH_UBX32" <+> ppr lit
ppr (PUSH_UBX lit nw) = text "PUSH_UBX" <+> parens (ppr nw) <+> ppr lit
ppr PUSH_APPLY_N = text "PUSH_APPLY_N"
ppr PUSH_APPLY_V = text "PUSH_APPLY_V"
@@ -269,11 +307,23 @@ bciStackUse STKCHECK{} = 0
bciStackUse PUSH_L{} = 1
bciStackUse PUSH_LL{} = 2
bciStackUse PUSH_LLL{} = 3
+bciStackUse PUSH8{} = 1 -- overapproximation
+bciStackUse PUSH16{} = 1 -- overapproximation
+bciStackUse PUSH32{} = 1 -- overapproximation on 64bit arch
+bciStackUse PUSH8_W{} = 1 -- takes exactly 1 word
+bciStackUse PUSH16_W{} = 1 -- takes exactly 1 word
+bciStackUse PUSH32_W{} = 1 -- takes exactly 1 word
bciStackUse PUSH_G{} = 1
bciStackUse PUSH_PRIMOP{} = 1
bciStackUse PUSH_BCO{} = 1
bciStackUse (PUSH_ALTS bco) = 2 + protoBCOStackUse bco
bciStackUse (PUSH_ALTS_UNLIFTED bco _) = 2 + protoBCOStackUse bco
+bciStackUse (PUSH_PAD8) = 1 -- overapproximation
+bciStackUse (PUSH_PAD16) = 1 -- overapproximation
+bciStackUse (PUSH_PAD32) = 1 -- overapproximation on 64bit arch
+bciStackUse (PUSH_UBX8 _) = 1 -- overapproximation
+bciStackUse (PUSH_UBX16 _) = 1 -- overapproximation
+bciStackUse (PUSH_UBX32 _) = 1 -- overapproximation on 64bit arch
bciStackUse (PUSH_UBX _ nw) = fromIntegral nw
bciStackUse PUSH_APPLY_N{} = 1
bciStackUse PUSH_APPLY_V{} = 1
diff --git a/compiler/ghci/RtClosureInspect.hs b/compiler/ghci/RtClosureInspect.hs
index 63d1886b4d..b85322d60e 100644
--- a/compiler/ghci/RtClosureInspect.hs
+++ b/compiler/ghci/RtClosureInspect.hs
@@ -60,6 +60,7 @@ import GHC.Arr ( Array(..) )
import GHC.Char
import GHC.Exts
import GHC.IO ( IO(..) )
+import SMRep ( roundUpTo )
import Control.Monad
import Data.Maybe
@@ -71,6 +72,7 @@ import Data.Sequence (viewl, ViewL(..))
import Foreign
import System.IO.Unsafe
+
---------------------------------------------
-- * A representation of semi evaluated Terms
---------------------------------------------
@@ -148,11 +150,13 @@ data ClosureType = Constr
| Other Int
deriving (Show, Eq)
+data ClosureNonPtrs = ClosureNonPtrs ByteArray#
+
data Closure = Closure { tipe :: ClosureType
, infoPtr :: Ptr ()
, infoTable :: StgInfoTable
, ptrs :: Array Int HValue
- , nonPtrs :: [Word]
+ , nonPtrs :: ClosureNonPtrs
}
instance Outputable ClosureType where
@@ -184,8 +188,7 @@ getClosureData dflags a =
let tipe = readCType (InfoTable.tipe itbl)
elems = fromIntegral (InfoTable.ptrs itbl)
ptrsList = Array 0 (elems - 1) elems ptrs
- nptrs_data = [W# (indexWordArray# nptrs i)
- | I# i <- [0.. fromIntegral (InfoTable.nptrs itbl)-1] ]
+ nptrs_data = ClosureNonPtrs nptrs
ASSERT(elems >= 0) return ()
ptrsList `seq`
return (Closure tipe iptr0 itbl ptrsList nptrs_data)
@@ -793,47 +796,75 @@ cvObtainTerm hsc_env max_depth force old_ty hval = runTR hsc_env $ do
extractSubTerms :: (Type -> HValue -> TcM Term)
-> Closure -> [Type] -> TcM [Term]
-extractSubTerms recurse clos = liftM thdOf3 . go 0 (nonPtrs clos)
+extractSubTerms recurse clos = liftM thdOf3 . go 0 0
where
- go ptr_i ws [] = return (ptr_i, ws, [])
- go ptr_i ws (ty:tys)
+ !(ClosureNonPtrs array) = nonPtrs clos
+
+ go ptr_i arr_i [] = return (ptr_i, arr_i, [])
+ go ptr_i arr_i (ty:tys)
| Just (tc, elem_tys) <- tcSplitTyConApp_maybe ty
, isUnboxedTupleTyCon tc
-- See Note [Unboxed tuple RuntimeRep vars] in TyCon
- = do (ptr_i, ws, terms0) <- go ptr_i ws (dropRuntimeRepArgs elem_tys)
- (ptr_i, ws, terms1) <- go ptr_i ws tys
- return (ptr_i, ws, unboxedTupleTerm ty terms0 : terms1)
+ = do (ptr_i, arr_i, terms0) <-
+ go ptr_i arr_i (dropRuntimeRepArgs elem_tys)
+ (ptr_i, arr_i, terms1) <- go ptr_i arr_i tys
+ return (ptr_i, arr_i, unboxedTupleTerm ty terms0 : terms1)
| otherwise
= case typePrimRepArgs ty of
[rep_ty] -> do
- (ptr_i, ws, term0) <- go_rep ptr_i ws ty rep_ty
- (ptr_i, ws, terms1) <- go ptr_i ws tys
- return (ptr_i, ws, term0 : terms1)
+ (ptr_i, arr_i, term0) <- go_rep ptr_i arr_i ty rep_ty
+ (ptr_i, arr_i, terms1) <- go ptr_i arr_i tys
+ return (ptr_i, arr_i, term0 : terms1)
rep_tys -> do
- (ptr_i, ws, terms0) <- go_unary_types ptr_i ws rep_tys
- (ptr_i, ws, terms1) <- go ptr_i ws tys
- return (ptr_i, ws, unboxedTupleTerm ty terms0 : terms1)
+ (ptr_i, arr_i, terms0) <- go_unary_types ptr_i arr_i rep_tys
+ (ptr_i, arr_i, terms1) <- go ptr_i arr_i tys
+ return (ptr_i, arr_i, unboxedTupleTerm ty terms0 : terms1)
- go_unary_types ptr_i ws [] = return (ptr_i, ws, [])
- go_unary_types ptr_i ws (rep_ty:rep_tys) = do
+ go_unary_types ptr_i arr_i [] = return (ptr_i, arr_i, [])
+ go_unary_types ptr_i arr_i (rep_ty:rep_tys) = do
tv <- newVar liftedTypeKind
- (ptr_i, ws, term0) <- go_rep ptr_i ws tv rep_ty
- (ptr_i, ws, terms1) <- go_unary_types ptr_i ws rep_tys
- return (ptr_i, ws, term0 : terms1)
-
- go_rep ptr_i ws ty rep
- | isGcPtrRep rep
- = do t <- appArr (recurse ty) (ptrs clos) ptr_i
- return (ptr_i + 1, ws, t)
- | otherwise
- = do dflags <- getDynFlags
- let (ws0, ws1) = splitAt (primRepSizeW dflags rep) ws
- return (ptr_i, ws1, Prim ty ws0)
+ (ptr_i, arr_i, term0) <- go_rep ptr_i arr_i tv rep_ty
+ (ptr_i, arr_i, terms1) <- go_unary_types ptr_i arr_i rep_tys
+ return (ptr_i, arr_i, term0 : terms1)
+
+ go_rep ptr_i arr_i ty rep
+ | isGcPtrRep rep = do
+ t <- appArr (recurse ty) (ptrs clos) ptr_i
+ return (ptr_i + 1, arr_i, t)
+ | otherwise = do
+ -- This is a bit involved since we allow packing multiple fields
+ -- within a single word. See also
+ -- StgCmmLayout.mkVirtHeapOffsetsWithPadding
+ dflags <- getDynFlags
+ let word_size = wORD_SIZE dflags
+ size_b = primRepSizeB dflags rep
+ -- Fields are always aligned.
+ !aligned_idx = roundUpTo arr_i size_b
+ !new_arr_i = aligned_idx + size_b
+ ws
+ | size_b < word_size = [index size_b array aligned_idx]
+ | otherwise =
+ let (q, r) = size_b `quotRem` word_size
+ in ASSERT( r == 0 )
+ [ W# (indexWordArray# array i)
+ | o <- [0.. q - 1]
+ , let !(I# i) = (aligned_idx + o) `quot` word_size
+ ]
+ return (ptr_i, new_arr_i, Prim ty ws)
unboxedTupleTerm ty terms
= Term ty (Right (tupleDataCon Unboxed (length terms)))
(error "unboxedTupleTerm: no HValue for unboxed tuple") terms
+ index item_size_b array (I# index_b) =
+ case item_size_b of
+ -- indexWord*Array# functions take offsets dependent not in bytes,
+ -- but in multiples of an element's size.
+ 1 -> W# (indexWord8Array# array index_b)
+ 2 -> W# (indexWord16Array# array (index_b `quotInt#` 2#))
+ 4 -> W# (indexWord32Array# array (index_b `quotInt#` 4#))
+ _ -> panic ("Weird byte-index: " ++ show (I# index_b))
+
-- Fast, breadth-first Type reconstruction
------------------------------------------
diff --git a/compiler/main/Constants.hs b/compiler/main/Constants.hs
index 06d93128e7..7eda130917 100644
--- a/compiler/main/Constants.hs
+++ b/compiler/main/Constants.hs
@@ -38,5 +38,9 @@ mAX_SOLVER_ITERATIONS = 4
wORD64_SIZE :: Int
wORD64_SIZE = 8
+-- Size of float in bytes.
+fLOAT_SIZE :: Int
+fLOAT_SIZE = 4
+
tARGET_MAX_CHAR :: Int
tARGET_MAX_CHAR = 0x10ffff
diff --git a/compiler/types/TyCon.hs b/compiler/types/TyCon.hs
index 103c824bca..596c5f37ef 100644
--- a/compiler/types/TyCon.hs
+++ b/compiler/types/TyCon.hs
@@ -114,7 +114,8 @@ module TyCon(
-- * Primitive representations of Types
PrimRep(..), PrimElemRep(..),
isVoidRep, isGcPtrRep,
- primRepSizeW, primElemRepSizeB,
+ primRepSizeB,
+ primElemRepSizeB,
primRepIsFloat,
-- * Recursion breaking
@@ -1340,19 +1341,25 @@ isGcPtrRep LiftedRep = True
isGcPtrRep UnliftedRep = True
isGcPtrRep _ = False
--- | Find the size of a 'PrimRep', in words
-primRepSizeW :: DynFlags -> PrimRep -> Int
-primRepSizeW _ IntRep = 1
-primRepSizeW _ WordRep = 1
-primRepSizeW dflags Int64Rep = wORD64_SIZE `quot` wORD_SIZE dflags
-primRepSizeW dflags Word64Rep = wORD64_SIZE `quot` wORD_SIZE dflags
-primRepSizeW _ FloatRep = 1 -- NB. might not take a full word
-primRepSizeW dflags DoubleRep = dOUBLE_SIZE dflags `quot` wORD_SIZE dflags
-primRepSizeW _ AddrRep = 1
-primRepSizeW _ LiftedRep = 1
-primRepSizeW _ UnliftedRep = 1
-primRepSizeW _ VoidRep = 0
-primRepSizeW dflags (VecRep len rep) = len * primElemRepSizeB rep `quot` wORD_SIZE dflags
+-- | The size of a 'PrimRep' in bytes.
+--
+-- This applies also when used in a constructor, where we allow packing the
+-- fields. For instance, in @data Foo = Foo Float# Float#@ the two fields will
+-- take only 8 bytes, which for 64-bit arch will be equal to 1 word.
+-- See also mkVirtHeapOffsetsWithPadding for details of how data fields are
+-- layed out.
+primRepSizeB :: DynFlags -> PrimRep -> Int
+primRepSizeB dflags IntRep = wORD_SIZE dflags
+primRepSizeB dflags WordRep = wORD_SIZE dflags
+primRepSizeB _ Int64Rep = wORD64_SIZE
+primRepSizeB _ Word64Rep = wORD64_SIZE
+primRepSizeB _ FloatRep = fLOAT_SIZE
+primRepSizeB dflags DoubleRep = dOUBLE_SIZE dflags
+primRepSizeB dflags AddrRep = wORD_SIZE dflags
+primRepSizeB dflags LiftedRep = wORD_SIZE dflags
+primRepSizeB dflags UnliftedRep = wORD_SIZE dflags
+primRepSizeB _ VoidRep = 0
+primRepSizeB _ (VecRep len rep) = len * primElemRepSizeB rep
primElemRepSizeB :: PrimElemRep -> Int
primElemRepSizeB Int8ElemRep = 1