From 6dc22bfa9d0026c09abf94e44f04fb9e761d4e54 Mon Sep 17 00:00:00 2001
From: Paolo Capriotti <p.capriotti@gmail.com>
Date: Thu, 5 Apr 2012 09:52:18 +0100
Subject: Support large SLIDE instructions.

The bytecode generator used to keep track of the stack depth with a
16-bit counter, which could overflow for very large BCOs, resulting in
incorrect bytecode.

This commit switches to a word-sized counter, and eagerly panics
whenever an operand is too big, instead of truncating the result.

This allows us to work around the 16-bit limitation in the case of SLIDE
instructions, since we can simply factor it into multiple SLIDEs with
smaller arguments.
---
 compiler/ghci/ByteCodeGen.lhs | 110 +++++++++++++++++++++++++-----------------
 1 file changed, 66 insertions(+), 44 deletions(-)

diff --git a/compiler/ghci/ByteCodeGen.lhs b/compiler/ghci/ByteCodeGen.lhs
index 046d6ec132..c8b1b303b5 100644
--- a/compiler/ghci/ByteCodeGen.lhs
+++ b/compiler/ghci/ByteCodeGen.lhs
@@ -131,11 +131,11 @@ coreExprToBCOs dflags this_mod expr
 
 type BCInstrList = OrdList BCInstr
 
-type Sequel = Word16 -- back off to this depth before ENTER
+type Sequel = Word -- back off to this depth before ENTER
 
 -- Maps Ids to the offset from the stack _base_ so we don't have
 -- to mess with it after each push/pop.
-type BCEnv = Map Id Word16 -- To find vars on the stack
+type BCEnv = Map Id Word -- To find vars on the stack
 
 {-
 ppBCEnv :: BCEnv -> SDoc
@@ -298,10 +298,10 @@ schemeR_wrk fvs nm original_body (args, body)
                  arity bitmap_size bitmap False{-not alts-})
 
 -- introduce break instructions for ticked expressions
-schemeER_wrk :: Word16 -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
+schemeER_wrk :: Word -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
 schemeER_wrk d p rhs
   | AnnTick (Breakpoint tick_no fvs) (_annot, newRhs) <- rhs
-  = do  code <- schemeE d 0 p newRhs
+  = do  code <- schemeE (fromIntegral d) 0 p newRhs
         arr <- getBreakArray
         this_mod <- getCurrentModule
         let idOffSets = getVarOffSets d p fvs
@@ -315,16 +315,23 @@ schemeER_wrk d p rhs
                          BA arr# ->
                              BRK_FUN arr# (fromIntegral tick_no) breakInfo
         return $ breakInstr `consOL` code
-   | otherwise = schemeE d 0 p rhs
+   | otherwise = schemeE (fromIntegral d) 0 p rhs
 
-getVarOffSets :: Word16 -> BCEnv -> [Id] -> [(Id, Word16)]
+getVarOffSets :: Word -> BCEnv -> [Id] -> [(Id, Word16)]
 getVarOffSets d p = catMaybes . map (getOffSet d p)
 
-getOffSet :: Word16 -> BCEnv -> Id -> Maybe (Id, Word16)
+getOffSet :: Word -> BCEnv -> Id -> Maybe (Id, Word16)
 getOffSet d env id
    = case lookupBCEnv_maybe id env of
         Nothing     -> Nothing
-        Just offset -> Just (id, d - offset)
+        Just offset -> Just (id, trunc16 $ d - offset)
+
+trunc16 :: Word -> Word16
+trunc16 w
+    | w > fromIntegral (maxBound :: Word16)
+    = panic "stack depth overflow"
+    | otherwise
+    = fromIntegral w
 
 fvsToEnv :: BCEnv -> VarSet -> [Id]
 -- Takes the free variables of a right-hand side, and
@@ -342,7 +349,7 @@ fvsToEnv p fvs = [v | v <- varSetElems fvs,
 -- -----------------------------------------------------------------------------
 -- schemeE
 
-returnUnboxedAtom :: Word16 -> Sequel -> BCEnv
+returnUnboxedAtom :: Word -> Sequel -> BCEnv
                  -> AnnExpr' Id VarSet -> CgRep
                  -> BcM BCInstrList
 -- Returning an unlifted value.
@@ -355,7 +362,7 @@ returnUnboxedAtom d s p e e_rep
 
 -- Compile code to apply the given expression to the remaining args
 -- on the stack, returning a HNF.
-schemeE :: Word16 -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
+schemeE :: Word -> Sequel -> BCEnv -> AnnExpr' Id VarSet -> BcM BCInstrList
 
 schemeE d s p e
    | Just e' <- bcView e
@@ -404,7 +411,7 @@ schemeE d s p (AnnLet binds (_,body))
          -- after the closures have been allocated in the heap (but not
          -- filled in), and pointers to them parked on the stack.
          p'    = Map.insertList (zipE xs (mkStackOffsets d (genericReplicate n_binds 1))) p
-         d'    = d + n_binds
+         d'    = d + fromIntegral n_binds
          zipE  = zipEqual "schemeE"
 
          -- ToDo: don't build thunks for things with no free variables
@@ -415,7 +422,7 @@ schemeE d s p (AnnLet binds (_,body))
                      | otherwise  = MKPAP
          build_thunk dd (fv:fvs) size bco off arity = do
               (push_code, pushed_szw) <- pushAtom dd p' (AnnVar fv)
-              more_push_code <- build_thunk (dd+pushed_szw) fvs size bco off arity
+              more_push_code <- build_thunk (dd + fromIntegral pushed_szw) fvs size bco off arity
               return (push_code `appOL` more_push_code)
 
          alloc_code = toOL (zipWith mkAlloc sizes arities)
@@ -542,7 +549,7 @@ schemeE _ _ _ expr
 -- 4.  Otherwise, it must be a function call.  Push the args
 --     right to left, SLIDE and ENTER.
 
-schemeT :: Word16       -- Stack depth
+schemeT :: Word         -- Stack depth
         -> Sequel       -- Sequel depth
         -> BCEnv        -- stack env
         -> AnnExpr' Id VarSet
@@ -561,7 +568,7 @@ schemeT d s p app
    = do (push, arg_words) <- pushAtom d p arg
         tagToId_sequence <- implement_tagToId constr_names
         return (push `appOL`  tagToId_sequence
-                       `appOL`  mkSLIDE 1 (d+arg_words-s)
+                       `appOL`  mkSLIDE 1 (d - s + fromIntegral arg_words)
                        `snocOL` ENTER)
 
    -- Case 1
@@ -625,7 +632,7 @@ schemeT d s p app
 -- Generate code to build a constructor application,
 -- leaving it on top of the stack
 
-mkConAppCode :: Word16 -> Sequel -> BCEnv
+mkConAppCode :: Word -> Sequel -> BCEnv
              -> DataCon                 -- The data constructor
              -> [AnnExpr' Id VarSet]    -- Args, in *reverse* order
              -> BcM BCInstrList
@@ -646,12 +653,12 @@ mkConAppCode orig_d _ p con args_r_to_l
 
       do_pushery d (arg:args)
          = do (push, arg_words) <- pushAtom d p arg
-              more_push_code <- do_pushery (d+arg_words) args
+              more_push_code <- do_pushery (d + fromIntegral arg_words) args
               return (push `appOL` more_push_code)
       do_pushery d []
          = return (unitOL (PACK con n_arg_words))
          where
-           n_arg_words = d - orig_d
+           n_arg_words = trunc16 $ d - orig_d
 
 
 -- -----------------------------------------------------------------------------
@@ -662,19 +669,19 @@ mkConAppCode orig_d _ p con args_r_to_l
 -- returned, even if it is a pointed type.  We always just return.
 
 unboxedTupleReturn
-        :: Word16 -> Sequel -> BCEnv
+        :: Word -> Sequel -> BCEnv
         -> AnnExpr' Id VarSet -> BcM BCInstrList
 unboxedTupleReturn d s p arg = do
   (push, sz) <- pushAtom d p arg
   return (push                      `appOL`
-          mkSLIDE sz (d-s)          `snocOL`
+          mkSLIDE sz (d - s)        `snocOL`
           RETURN_UBX (atomRep arg))
 
 -- -----------------------------------------------------------------------------
 -- Generate code for a tail-call
 
 doTailCall
-        :: Word16 -> Sequel -> BCEnv
+        :: Word -> Sequel -> BCEnv
         -> Id -> [AnnExpr' Id VarSet]
         -> BcM BCInstrList
 doTailCall init_d s p fn args
@@ -685,7 +692,7 @@ doTailCall init_d s p fn args
         (push_fn, sz) <- pushAtom d p (AnnVar fn)
         ASSERT( sz == 1 ) return ()
         return (push_fn `appOL` (
-                  mkSLIDE ((d-init_d) + 1) (init_d - s) `appOL`
+                  mkSLIDE (trunc16 $ d - init_d + 1) (init_d - s) `appOL`
                   unitOL ENTER))
   do_pushes d args reps = do
       let (push_apply, n, rest_of_reps) = findPushSeq reps
@@ -698,7 +705,7 @@ doTailCall init_d s p fn args
   push_seq d [] = return (d, nilOL)
   push_seq d (arg:args) = do
     (push_code, sz) <- pushAtom d p arg
-    (final_d, more_push_code) <- push_seq (d+sz) args
+    (final_d, more_push_code) <- push_seq (d + fromIntegral sz) args
     return (final_d, push_code `appOL` more_push_code)
 
 -- v. similar to CgStackery.findMatch, ToDo: merge
@@ -731,7 +738,7 @@ findPushSeq _
 -- -----------------------------------------------------------------------------
 -- Case expressions
 
-doCase  :: Word16 -> Sequel -> BCEnv
+doCase  :: Word -> Sequel -> BCEnv
         -> AnnExpr Id VarSet -> Id -> [AnnAlt Id VarSet]
         -> Bool  -- True <=> is an unboxed tuple case, don't enter the result
         -> BcM BCInstrList
@@ -741,10 +748,12 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
         -- underneath it is the pointer to the alt_code BCO.
         -- When an alt is entered, it assumes the returned value is
         -- on top of the itbl.
+        ret_frame_sizeW :: Word
         ret_frame_sizeW = 2
 
         -- An unlifted value gets an extra info table pushed on top
         -- when it is returned.
+        unlifted_itbl_sizeW :: Word
         unlifted_itbl_sizeW | isAlgCase = 0
                             | otherwise = 1
 
@@ -758,7 +767,7 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
 
         -- Env in which to compile the alts, not including
         -- any vars bound by the alts themselves
-        p_alts = Map.insert bndr (d_bndr - 1) p
+        p_alts = Map.insert bndr (fromIntegral d_bndr - 1) p
 
         bndr_ty = idType bndr
         isAlgCase = not (isUnLiftedType bndr_ty) && not is_unboxed_tuple
@@ -788,8 +797,8 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
                         p_alts
              in do
              MASSERT(isAlgCase)
-             rhs_code <- schemeE (d_alts+size) s p' rhs
-             return (my_discr alt, unitOL (UNPACK size) `appOL` rhs_code)
+             rhs_code <- schemeE (d_alts + size) s p' rhs
+             return (my_discr alt, unitOL (UNPACK (trunc16 size)) `appOL` rhs_code)
 	   where
 	     real_bndrs = filterOut isTyVar bndrs
 
@@ -828,7 +837,7 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
         -- really want a bitmap up to depth (d-s).  This affects compilation of
         -- case-of-case expressions, which is the only time we can be compiling a
         -- case expression with s /= 0.
-        bitmap_size = d-s
+        bitmap_size = trunc16 $ d-s
         bitmap_size' :: Int
         bitmap_size' = fromIntegral bitmap_size
         bitmap = intsToReverseBitmap bitmap_size'{-size-}
@@ -839,7 +848,7 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
           spread (id, offset)
                 | isFollowableArg (idCgRep id) = [ rel_offset ]
                 | otherwise = []
-                where rel_offset = d - offset - 1
+                where rel_offset = trunc16 $ d - fromIntegral offset - 1
 
      in do
      alt_stuff <- mapM codeAlt alts
@@ -852,7 +861,9 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
      -- in
 --     trace ("case: bndr = " ++ showSDocDebug (ppr bndr) ++ "\ndepth = " ++ show d ++ "\nenv = \n" ++ showSDocDebug (ppBCEnv p) ++
 --            "\n      bitmap = " ++ show bitmap) $ do
-     scrut_code <- schemeE (d + ret_frame_sizeW) (d + ret_frame_sizeW) p scrut
+     scrut_code <- schemeE (d + ret_frame_sizeW)
+                           (d + ret_frame_sizeW)
+                           p scrut
      alt_bco' <- emitBc alt_bco
      let push_alts
             | isAlgCase = PUSH_ALTS alt_bco'
@@ -869,7 +880,7 @@ doCase d s p (_,scrut) bndr alts is_unboxed_tuple
 -- (machine) code for the ccall, and create bytecodes to call that and
 -- then return in the right way.
 
-generateCCall :: Word16 -> Sequel       -- stack and sequel depths
+generateCCall :: Word -> Sequel         -- stack and sequel depths
               -> BCEnv
               -> CCallSpec              -- where to call
               -> Id                     -- of target, for type info
@@ -896,25 +907,25 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
                     -- contains.
                     Just t
                      | t == arrayPrimTyCon || t == mutableArrayPrimTyCon
-                       -> do rest <- pargs (d + addr_sizeW) az
+                       -> do rest <- pargs (d + fromIntegral addr_sizeW) az
                              code <- parg_ArrayishRep (fromIntegral arrPtrsHdrSize) d p a
                              return ((code,AddrRep):rest)
 
                      | t == byteArrayPrimTyCon || t == mutableByteArrayPrimTyCon
-                       -> do rest <- pargs (d + addr_sizeW) az
+                       -> do rest <- pargs (d + fromIntegral addr_sizeW) az
                              code <- parg_ArrayishRep (fromIntegral arrWordsHdrSize) d p a
                              return ((code,AddrRep):rest)
 
                     -- Default case: push taggedly, but otherwise intact.
                     _
                        -> do (code_a, sz_a) <- pushAtom d p a
-                             rest <- pargs (d+sz_a) az
+                             rest <- pargs (d + fromIntegral sz_a) az
                              return ((code_a, atomPrimRep a) : rest)
 
          -- Do magic for Ptr/Byte arrays.  Push a ptr to the array on
          -- the stack but then advance it over the headers, so as to
          -- point to the payload.
-         parg_ArrayishRep :: Word16 -> Word16 -> BCEnv -> AnnExpr' Id VarSet
+         parg_ArrayishRep :: Word16 -> Word -> BCEnv -> AnnExpr' Id VarSet
                           -> BcM BCInstrList
          parg_ArrayishRep hdrSize d p a
             = do (push_fo, _) <- pushAtom d p a
@@ -1016,14 +1027,14 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
          (push_Addr, d_after_Addr)
             | is_static
             = (toOL [PUSH_UBX (Right static_target_addr) addr_sizeW],
-               d_after_args + addr_sizeW)
+               d_after_args + fromIntegral addr_sizeW)
             | otherwise -- is already on the stack
             = (nilOL, d_after_args)
 
          -- Push the return placeholder.  For a call returning nothing,
          -- this is a VoidArg (tag).
          r_sizeW   = fromIntegral (primRepSizeW r_rep)
-         d_after_r = d_after_Addr + r_sizeW
+         d_after_r = d_after_Addr + fromIntegral r_sizeW
          r_lit     = mkDummyLiteral r_rep
          push_r    = (if   returns_void
                       then nilOL
@@ -1035,7 +1046,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
          -- instruction needs to describe the chunk of stack containing
          -- the ccall args to the GC, so it needs to know how large it
          -- is.  See comment in Interpreter.c with the CCALL instruction.
-         stk_offset   = d_after_r - s
+         stk_offset   = trunc16 $ d_after_r - s
 
      -- in
      -- the only difference in libffi mode is that we prepare a cif
@@ -1050,7 +1061,7 @@ generateCCall d0 s p (CCallSpec target cconv safety) fn args_r_to_l
          do_call      = unitOL (CCALL stk_offset (castFunPtrToPtr addr_of_marshaller)
                                  (fromIntegral (fromEnum (playInterruptible safety))))
          -- slide and return
-         wrapup       = mkSLIDE r_sizeW (d_after_r - r_sizeW - s)
+         wrapup       = mkSLIDE r_sizeW (d_after_r - fromIntegral r_sizeW - s)
                         `snocOL` RETURN_UBX (primRepToCgRep r_rep)
      --in
          --trace (show (arg1_offW, args_offW  ,  (map cgRepSizeW a_reps) )) $
@@ -1150,7 +1161,7 @@ implement_tagToId names
 -- to 5 and not to 4.  Stack locations are numbered from zero, so a
 -- depth 6 stack has valid words 0 .. 5.
 
-pushAtom :: Word16 -> BCEnv -> AnnExpr' Id VarSet -> BcM (BCInstrList, Word16)
+pushAtom :: Word -> BCEnv -> AnnExpr' Id VarSet -> BcM (BCInstrList, Word16)
 
 pushAtom d p e
    | Just e' <- bcView e
@@ -1170,7 +1181,7 @@ pushAtom d p (AnnVar v)
    = return (unitOL (PUSH_PRIMOP primop), 1)
 
    | Just d_v <- lookupBCEnv_maybe v p  -- v is a local variable
-   = let l = d - d_v + sz - 2
+   = let l = trunc16 $ d - d_v + fromIntegral sz - 2
      in return (toOL (genericReplicate sz (PUSH_L l)), sz)
          -- d - d_v                 the number of words between the TOS
          --                         and the 1st slot of the object
@@ -1401,7 +1412,7 @@ instance Outputable Discr where
    ppr NoDiscr    = text "DEF"
 
 
-lookupBCEnv_maybe :: Id -> BCEnv -> Maybe Word16
+lookupBCEnv_maybe :: Id -> BCEnv -> Maybe Word
 lookupBCEnv_maybe = Map.lookup
 
 idSizeW :: Id -> Int
@@ -1417,8 +1428,19 @@ unboxedTupleException
             "  Workaround: use -fobject-code, or compile this module to .o separately."))
 
 
-mkSLIDE :: Word16 -> Word16 -> OrdList BCInstr
-mkSLIDE n d = if d == 0 then nilOL else unitOL (SLIDE n d)
+mkSLIDE :: Word16 -> Word -> OrdList BCInstr
+mkSLIDE n d
+    -- if the amount to slide doesn't fit in a word,
+    -- generate multiple slide instructions
+    | d > fromIntegral limit
+    = SLIDE n limit `consOL` mkSLIDE n (d - fromIntegral limit)
+    | d == 0
+    = nilOL
+    | otherwise
+    = if d == 0 then nilOL else unitOL (SLIDE n $ fromIntegral d)
+    where
+        limit :: Word16
+        limit = maxBound
 
 splitApp :: AnnExpr' Var ann -> (AnnExpr' Var ann, [AnnExpr' Var ann])
         -- The arguments are returned in *right-to-left* order
@@ -1465,7 +1487,7 @@ isPtrAtom e = atomRep e == PtrArg
 -- Let szsw be the sizes in words of some items pushed onto the stack,
 -- which has initial depth d'.  Return the values which the stack environment
 -- should map these items to.
-mkStackOffsets :: Word16 -> [Word16] -> [Word16]
+mkStackOffsets :: Word -> [Word] -> [Word]
 mkStackOffsets original_depth szsw
    = map (subtract 1) (tail (scanl (+) original_depth szsw))
 
-- 
cgit v1.2.1