diff options
Diffstat (limited to 'testsuite/tests/perf/should_run')
31 files changed, 998 insertions, 0 deletions
diff --git a/testsuite/tests/perf/should_run/3586.hs b/testsuite/tests/perf/should_run/3586.hs new file mode 100644 index 0000000000..968f2eba27 --- /dev/null +++ b/testsuite/tests/perf/should_run/3586.hs @@ -0,0 +1,20 @@ +{-# LANGUAGE BangPatterns #-} +{-# OPTIONS -fvia-C -optc-O3 -fexcess-precision -optc-msse3 #-} + +import Control.Monad.ST +import Data.Array.ST +import Data.Array.Base + +main = print $ runST + (do arr <- newArray (1,2000000) 137.0 :: ST s (STUArray s Int Double) + go arr 2000000 0.0 ) + + +go :: STUArray s Int Double -> Int -> Double -> ST s Double +go !a i !acc + | i < 1 = return acc + | otherwise = do + b <- unsafeRead a i + unsafeWrite a i (b+3.0) + c <- unsafeRead a i + go a (i-1) (c+acc) diff --git a/testsuite/tests/perf/should_run/3586.stdout b/testsuite/tests/perf/should_run/3586.stdout new file mode 100644 index 0000000000..626282f10c --- /dev/null +++ b/testsuite/tests/perf/should_run/3586.stdout @@ -0,0 +1 @@ +2.79999863e8 diff --git a/testsuite/tests/perf/should_run/Makefile b/testsuite/tests/perf/should_run/Makefile new file mode 100644 index 0000000000..e839342f90 --- /dev/null +++ b/testsuite/tests/perf/should_run/Makefile @@ -0,0 +1,36 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk + +define runT3736 +./T3736 $1 +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/".*//' +endef +.PHONY: T3736 +T3736: + $(RM) -f T3736.hi T3736.o T3736 + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T3736 -rtsopts +# Check ALLOC1 is > 100 just to check with have sane results, and if so, +# the test passes if the two numbers are equal. We could check that the +# actual numbers are in the range we expect too (on the various +# platforms), but we don't currently. + ALLOC1=`$(call runT3736,1)`; ALLOC2=`$(call runT3736,2)`; if [ "$$ALLOC1" -gt 100 ] && [ "$$ALLOC1" -eq "$$ALLOC2" ]; then echo Match; else echo "Mismatch: $$ALLOC1 $$ALLOC2"; fi + +.PHONY: T2902 +T2902: + $(RM) -f T2902_A T2902_B + $(RM) -f T2902_A.hi T2902_B.hi + $(RM) -f T2902_A.o T2902_B.o + $(RM) -f T2902_A_PairingSum.hi T2902_B_PairingSum.hi T2902_Sum.hi + $(RM) -f T2902_A_PairingSum.o T2902_B_PairingSum.o T2902_Sum.o + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T2902_A -rtsopts + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T2902_B -rtsopts + BAA=`./T2902_A +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; BAB=`./T2902_B +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; [ "$$BAA" = "" ] && echo 'T2902_A: No "bytes allocated"'; [ "$$BAA" = "$$BAB" ] || echo "T2902: Mismatch in \"bytes allocated\": $$BAA $$BAB" + +.PHONY: T149 +T149: + $(RM) -f T149_A T149_A.hi T149_A.o + $(RM) -f T149_B T149_B.hi T149_B.o + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T149_A -rtsopts + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T149_B -rtsopts + BAA=`./T149_A +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; BAB=`./T149_B +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; [ "$$BAA" = "" ] && echo 'T149_A: No "bytes allocated"'; [ "$$BAA" = "$$BAB" ] || echo "T149: Mismatch in \"bytes allocated\": $$BAA $$BAB" + diff --git a/testsuite/tests/perf/should_run/MethSharing.hs b/testsuite/tests/perf/should_run/MethSharing.hs new file mode 100644 index 0000000000..fb69bd4509 --- /dev/null +++ b/testsuite/tests/perf/should_run/MethSharing.hs @@ -0,0 +1,97 @@ +module Main where + +-- This test works efficiently because the full laziness +-- pass now floats out applications +-- \x -> f y (x+1) +-- It'll float out the (f y) if that's a redex + +loop :: Double -> [Int] -> Double +{-# NOINLINE loop #-} +loop x [] = x +loop x (n:ns) = x `seq` loop (x ^ n) ns + +main = print $ loop 1 (replicate 10000000 5) + +---------------------------------------------------- +{- Roman's message of May 2010 + +I tried running nofib with -fno-method-sharing (we discussed this at some point). These are the results: + +-------------------------------------------------------------------------------- + Program Size Allocs Runtime Elapsed +-------------------------------------------------------------------------------- + + Min -0.3% -25.0% -12.5% -9.9% + Max +0.2% +159.1% +90.0% +84.7% + Geometric Mean -0.0% +2.2% +6.8% +5.1% + +This is the worst program: + + simple +0.2% +159.1% +65.3% +63.9% + +I looked at it a bit and came up with this small example: + +---- +loop :: Double -> [Int] -> Double +{-# NOINLINE loop #-} +loop x [] = x +loop x (n:ns) = x `seq` loop (x ^ n) ns + +main = print $ loop 1 (replicate 10000000 5) +---- + +This is over 2x slower with -fno-method-sharing. The culprit is, of +course, (^). Here is the difference: + +Without -fno-method-sharing: + +---- +^_rVB :: GHC.Types.Double -> GHC.Types.Int -> GHC.Types.Double ^_rVB = + GHC.Real.^ + @ GHC.Types.Double + @ GHC.Types.Int + GHC.Float.$fNumDouble + GHC.Real.$fIntegralInt + +Main.loop [InlPrag=NOINLINE (sat-args=2), Occ=LoopBreaker] + :: GHC.Types.Double -> [GHC.Types.Int] -> GHC.Types.Double Main.loop = + \ (x1_aat :: GHC.Types.Double) (ds_drG :: [GHC.Types.Int]) -> + case ds_drG of _ { + [] -> x1_aat; + : n_aav ns_aaw -> + case x1_aat of x2_aau { GHC.Types.D# ipv_srQ -> + Main.loop (^_rVB x2_aau n_aav) ns_aaw + } + } +---- + +With: + +---- +Main.loop [InlPrag=NOINLINE (sat-args=2), Occ=LoopBreaker] + :: GHC.Types.Double -> [GHC.Types.Int] -> GHC.Types.Double Main.loop = + \ (x1_aat :: GHC.Types.Double) (ds_drD :: [GHC.Types.Int]) -> + case ds_drD of _ { + [] -> x1_aat; + : n_aav ns_aaw -> + case x1_aat of x2_aau { GHC.Types.D# ipv_srN -> + Main.loop + (GHC.Real.^ + @ GHC.Types.Double + @ GHC.Types.Int + GHC.Float.$fNumDouble + GHC.Real.$fIntegralInt + x2_aau + n_aav) + ns_aaw + } + } +---- + +This is a bit disappointing. I would have expected GHC to float out +the application of (^) to the two dictionaries during full laziness +(note that (^) has arity 2 so the application is oversaturated). Why +doesn't that happen? SetLevels (if this is the right place to look) +has this: + +-}
\ No newline at end of file diff --git a/testsuite/tests/perf/should_run/MethSharing.stdout b/testsuite/tests/perf/should_run/MethSharing.stdout new file mode 100644 index 0000000000..d3827e75a5 --- /dev/null +++ b/testsuite/tests/perf/should_run/MethSharing.stdout @@ -0,0 +1 @@ +1.0 diff --git a/testsuite/tests/perf/should_run/T149_A.hs b/testsuite/tests/perf/should_run/T149_A.hs new file mode 100644 index 0000000000..dd745460c0 --- /dev/null +++ b/testsuite/tests/perf/should_run/T149_A.hs @@ -0,0 +1,25 @@ +module Main (main) where + +-- See Trac #149 + +-- Curently (with GHC 7.0) the CSE works, just, +-- but it's delicate. + + +import System.CPUTime + +main :: IO () +main = print $ playerMostOccur1 [1..m] + +m :: Int +m = 22 + +playerMostOccur1 :: [Int] -> Int +playerMostOccur1 [a] = a +playerMostOccur1 (x:xs) + | numOccur x (x:xs) > numOccur (playerMostOccur1 xs) xs = x + | otherwise = playerMostOccur1 xs + +numOccur :: Int -> [Int] -> Int +numOccur i is = length $ filter (i ==) is + diff --git a/testsuite/tests/perf/should_run/T149_B.hs b/testsuite/tests/perf/should_run/T149_B.hs new file mode 100644 index 0000000000..fcc87cdf55 --- /dev/null +++ b/testsuite/tests/perf/should_run/T149_B.hs @@ -0,0 +1,26 @@ +module Main (main) where + +-- See Trac #149 + +-- Curently (with GHC 7.0) the CSE works, just, +-- but it's delicate. + + +import System.CPUTime + +main :: IO () +main = print $ playerMostOccur2 [1..m] + +m :: Int +m = 22 + +playerMostOccur2 :: [Int] -> Int +playerMostOccur2 [a] = a +playerMostOccur2 (x:xs) + | numOccur x (x:xs) > numOccur pmo xs = x + | otherwise = pmo + where pmo = playerMostOccur2 xs + +numOccur :: Int -> [Int] -> Int +numOccur i is = length $ filter (i ==) is + diff --git a/testsuite/tests/perf/should_run/T2902_A.hs b/testsuite/tests/perf/should_run/T2902_A.hs new file mode 100644 index 0000000000..c0939104f3 --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_A.hs @@ -0,0 +1,18 @@ + +{-# LANGUAGE UnicodeSyntax #-} + +module Main (main) where + +import T2902_A_PairingSum + +f :: Int -> PSum Int Int +f n = unions $ fmap g [1..n] + where + g m = unions $ fmap fromList + [ zip [m..n] $ repeat 1 + , zip [m,2+m..n] $ repeat 2 + , zip [m,3+m..n] $ repeat 3 + ] + +main ∷ IO () +main = print $ take 20 $ toList $ f 20 diff --git a/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs b/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs new file mode 100644 index 0000000000..a5dd0e7803 --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs @@ -0,0 +1,49 @@ + +{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses, FlexibleInstances #-} + +module T2902_A_PairingSum (Sum(..), PSum) where + +import T2902_Sum + +data PSum a b = Empty | Tree a b [(PSum a b)] + +instance (Ord a, Num b) ⇒ Sum PSum a b where + insert = insertX + union = unionX + unions = unionsX + extractMin = extractMinX + fromList = fromListX + toList = toListX + +insertX ∷ (Ord a, Num b) ⇒ a → b → PSum a b → PSum a b +insertX v r = unionX $ Tree v r [] + +unionX ∷ (Ord a, Num b) ⇒ PSum a b → PSum a b → PSum a b +unionX x Empty = x +unionX Empty x = x +unionX x@(Tree v r xs) y@(Tree w s ys) = + case compare v w of + LT → Tree v r (y:xs) + GT → Tree w s (x:ys) + EQ → case r + s of + 0 → z + t → insertX v t z + where z = unionX (unionsX xs) (unionsX ys) + +unionsX ∷ (Ord a, Num b) ⇒ [PSum a b] → PSum a b +unionsX [] = Empty +unionsX [x] = x +unionsX (x : y : zs) = unionX (unionX x y) (unionsX zs) + +extractMinX ∷ (Ord a, Num b) ⇒ PSum a b → ((a,b), PSum a b) +extractMinX Empty = undefined +extractMinX (Tree v r xs) = ((v,r), unionsX xs) + +fromListX ∷ (Ord a, Num b) ⇒ [(a,b)] → PSum a b +fromListX [] = Empty +fromListX ((v,r):xs) = insertX v r $ fromListX xs + +toListX ∷ (Ord a, Num b) ⇒ PSum a b → [(a,b)] +toListX Empty = [] +toListX x = let (y, z) = extractMinX x in y : toListX z + diff --git a/testsuite/tests/perf/should_run/T2902_B.hs b/testsuite/tests/perf/should_run/T2902_B.hs new file mode 100644 index 0000000000..c6558c625b --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_B.hs @@ -0,0 +1,18 @@ + +{-# LANGUAGE UnicodeSyntax #-} + +module Main (main) where + +import T2902_B_PairingSum + +f :: Int -> PSum Int Int +f n = unions $ fmap g [1..n] + where + g m = unions $ fmap fromList + [ zip [m..n] $ repeat 1 + , zip [m,2+m..n] $ repeat 2 + , zip [m,3+m..n] $ repeat 3 + ] + +main ∷ IO () +main = print $ take 20 $ toList $ f 20 diff --git a/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs b/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs new file mode 100644 index 0000000000..5276da818b --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs @@ -0,0 +1,37 @@ + +{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses, FlexibleInstances #-} + +module T2902_B_PairingSum (Sum(..), PSum) where + +import T2902_Sum + +data PSum a b = Empty | Tree a b [PSum a b] + +instance (Ord a, Num b) ⇒ Sum PSum a b where + + insert v r = union $ Tree v r [] + + union x Empty = x + union Empty x = x + union x@(Tree v r xs) y@(Tree w s ys) = + case compare v w of + LT → Tree v r (y:xs) + GT → Tree w s (x:ys) + EQ → case r + s of + 0 → z + t → insert v t z + where z = union (unions xs) (unions ys) + + unions [] = Empty + unions [x] = x + unions (x : y : zs) = union (union x y) (unions zs) + + extractMin Empty = undefined + extractMin (Tree v r xs) = ((v,r), unions xs) + + fromList [] = Empty + fromList ((v,r):xs) = insert v r $ fromList xs + + toList Empty = [] + toList x = let (y, z) = extractMin x in y : toList z + diff --git a/testsuite/tests/perf/should_run/T2902_Sum.hs b/testsuite/tests/perf/should_run/T2902_Sum.hs new file mode 100644 index 0000000000..9be6b10568 --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_Sum.hs @@ -0,0 +1,14 @@ + +{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses #-} + +module T2902_Sum (Sum(..)) where + +class Sum c a b where + insert ∷ a → b → c a b → c a b + union ∷ c a b → c a b → c a b + unions ∷ [c a b] → c a b + extractMin ∷ c a b → ((a,b), c a b) + + fromList ∷ [(a,b)] → c a b + toList ∷ c a b → [(a,b)] + diff --git a/testsuite/tests/perf/should_run/T3245.hs b/testsuite/tests/perf/should_run/T3245.hs new file mode 100644 index 0000000000..f52fc27303 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3245.hs @@ -0,0 +1,50 @@ +{-# LANGUAGE ExistentialQuantification #-} + +-- The second version (count2) took ages with GHC 6.12 +-- because the typeOf function was not properly memoised + +import Data.Typeable +import System.CPUTime + +size :: Int +size = 40000 -- This was big enough to take 5 seconds in + -- the bad case on my machine. + +data Any = forall a. (Typeable a) => Any a + +int_type, int_list_type :: TypeRep +int_type = typeOf (undefined :: Int) +int_list_type = typeOf (undefined :: [Int]) + +count1 :: [Any] -> Int +count1 [] = 0 +count1 (Any x:xs) = count1 xs + (if typeOf x == int_type then 1 else 0) + +doTime x = do + start <- getCPUTime + putStr "Result: " + print x + stop <- getCPUTime + putStr "Time(sec): " + print (round $ fromIntegral (stop - start) / 1e12) + -- The 'round' rounds to an integral number of seconds + -- Should be zero if things are working right! + +main = do + let list = [MkT | i <- [1..size :: Int]] + putStrLn "count1" + let x = map Any list + doTime $ count1 x + doTime $ count1 x + doTime $ count1 x + putStrLn "" + putStrLn "count2" + let x = map (Any . (:[])) list + doTime $ count1 x + doTime $ count1 x + doTime $ count1 x + +data T = MkT +tcname :: TyCon +tcname = mkTyCon "T" +instance Typeable T where { typeOf _ = mkTyConApp tcname [] } diff --git a/testsuite/tests/perf/should_run/T3245.stdout b/testsuite/tests/perf/should_run/T3245.stdout new file mode 100644 index 0000000000..bcff7f8fd1 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3245.stdout @@ -0,0 +1,15 @@ +count1 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 + +count2 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 diff --git a/testsuite/tests/perf/should_run/T3736.hs b/testsuite/tests/perf/should_run/T3736.hs new file mode 100644 index 0000000000..e812109611 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3736.hs @@ -0,0 +1,212 @@ +{-# OPTIONS_GHC -funbox-strict-fields -O #-} +{-# LANGUAGE ExistentialQuantification #-} + +{- OPTIONS_GHC -ddump-simpl -ddump-asm -} + +module Main (main) where + +import GHC.Float (float2Int, int2Float) + +import System.Environment + +import Prelude hiding (null + ,lines,unlines + ,writeFile + ) + +import Control.Exception (assert, bracket, ) + +import Foreign.Marshal.Array (advancePtr) +import Foreign.Ptr (minusPtr) +import Foreign.Storable (Storable(..)) + +import Control.Monad (when) + +import System.IO (openBinaryFile, hClose, + hPutBuf, + Handle, IOMode(..)) + +import System.IO.Unsafe (unsafePerformIO) + +import Foreign.Ptr (Ptr) +import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, ) +import Foreign.Marshal.Array (copyArray) + +import qualified Foreign.ForeignPtr as F + +main :: IO () +main = do args <- getArgs + case args of + ["1"] -> mainMonolithic1Generator + ["2"] -> mainMonolithic1Composed + _ -> error "Huh?" + +type Phase = (Float, Float, Float) + +{-# INLINE saw #-} +saw :: Num a => a -> a +saw t = 1-2*t + +{-# INLINE fraction #-} +fraction :: Float -> Float +fraction x = x - int2Float (float2Int x) + +{-# INLINE generator0Freq #-} +generator0Freq :: Float -> Float -> Maybe (Float, Float) +generator0Freq freq = + \p -> Just (saw p, fraction (p+freq)) + +infixl 6 `mix`, `mixGen` + +{-# INLINE mix #-} +mix :: + (Num y) => + (s -> Maybe (y, s)) -> + (t -> Maybe (y, t)) -> + ((s,t) -> Maybe (y, (s,t))) +mix f g (s0,t0) = + do (a,s1) <- f s0 + (b,t1) <- g t0 + return ((a+b), (s1,t1)) + +data Generator a = + forall s. + Generator (s -> Maybe (a, s)) s + +{-# INLINE runGeneratorMonolithic #-} +runGeneratorMonolithic :: Int -> Generator Float -> Vector Float +runGeneratorMonolithic size' (Generator f s) = + fst $ unfoldrN size' f s + +{- SPECIALISE INLINE generator0Gen :: Float -> Float -> Generator Float -} +{-# INLINE generator0Gen #-} +generator0Gen :: Float -> Float -> Generator Float +generator0Gen freq phase = + Generator (\p -> Just (saw p, fraction (p+freq))) phase + +{- SPECIALISE INLINE mixGen :: Generator Float -> Generator Float -> Generator Float -} +{-# INLINE mixGen #-} +mixGen :: + (Num y) => + Generator y -> + Generator y -> + Generator y +mixGen (Generator f s) (Generator g t) = + Generator (\(s0,t0) -> + do (a,s1) <- f s0 + (b,t1) <- g t0 + return ((a+b), (s1,t1))) (s,t) + +{-# INLINE dl #-} +dl :: Phase +dl = (0.01008, 0.01003, 0.00990) + +{-# INLINE initPhase2 #-} +initPhase2 :: (Phase, Phase) +initPhase2 = + ((0,0.7,0.1), (0.3,0.4,0.6)) + + +size :: Int +size = 10000000 + + +mainMonolithic1Composed :: IO () +mainMonolithic1Composed = + writeFile "speed.f32" + (fst $ unfoldrN size + (let (f0,f1,f2) = dl + in generator0Freq f0 `mix` + generator0Freq f1 `mix` + generator0Freq f2) + (let (p0,p1,p2) = fst initPhase2 + in ((p0,p1),p2))) + +mainMonolithic1Generator :: IO () +mainMonolithic1Generator = + writeFile "speed.f32" + (runGeneratorMonolithic size + (let (f0,f1,f2) = dl + (p0,p1,p2) = fst initPhase2 + in generator0Gen f0 p0 `mixGen` + generator0Gen f1 p1 `mixGen` + generator0Gen f2 p2)) + +empty :: (Storable a) => Vector a +empty = unsafeCreate 0 $ const $ return () +{-# NOINLINE empty #-} + +null :: Vector a -> Bool +null (SV _ _ l) = assert (l >= 0) $ l <= 0 +{-# INLINE null #-} + +unfoldrN :: (Storable b) => Int -> (a -> Maybe (b, a)) -> a -> (Vector b, Maybe a) +unfoldrN n f x0 = + if n <= 0 + then (empty, Just x0) + else unsafePerformIO $ createAndTrim' n $ \p -> go p n x0 + where + go = arguments2 $ \p i -> \x -> + if i == 0 + then return (0, n-i, Just x) + else + case f x of + Nothing -> return (0, n-i, Nothing) + Just (w,x') -> do poke p w + go (incPtr p) (i-1) x' +{-# INLINE unfoldrN #-} + +hPut :: (Storable a) => Handle -> Vector a -> IO () +hPut h v = + when (not (null v)) $ + withStartPtr v $ \ ptrS l -> + let ptrE = advancePtr ptrS l + in hPutBuf h ptrS (minusPtr ptrE ptrS) + +writeFile :: (Storable a) => FilePath -> Vector a -> IO () +writeFile f txt = + bracket (openBinaryFile f WriteMode) hClose + (\h -> hPut h txt) + +data Vector a = SV {-# UNPACK #-} !(ForeignPtr a) + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- length + +withStartPtr :: Storable a => Vector a -> (Ptr a -> Int -> IO b) -> IO b +withStartPtr (SV x s l) f = + withForeignPtr x $ \p -> f (p `advancePtr` s) l +{-# INLINE withStartPtr #-} + +incPtr :: (Storable a) => Ptr a -> Ptr a +incPtr v = advancePtr v 1 +{-# INLINE incPtr #-} + +unsafeCreate :: (Storable a) => Int -> (Ptr a -> IO ()) -> Vector a +unsafeCreate l f = unsafePerformIO (create l f) +{-# INLINE unsafeCreate #-} + +create :: (Storable a) => Int -> (Ptr a -> IO ()) -> IO (Vector a) +create l f = do + fp <- mallocForeignPtrArray l + withForeignPtr fp $ \p -> f p + return $! SV fp 0 l + +createAndTrim' :: (Storable a) => Int + -> (Ptr a -> IO (Int, Int, b)) + -> IO (Vector a, b) +createAndTrim' l f = do + fp <- mallocForeignPtrArray l + withForeignPtr fp $ \p -> do + (off, l', res) <- f p + if assert (l' <= l) $ l' >= l + then return $! (SV fp 0 l, res) + else do ps <- create l' $ \p' -> copyArray p' (p `advancePtr` off) l' + return $! (ps, res) + +{-# INLINE arguments2 #-} +arguments2 :: (a -> b -> x) -> a -> b -> x +arguments2 f = \a b -> (f $! a) $! b + +{-# INLINE mallocForeignPtrArray #-} +mallocForeignPtrArray :: Storable a => Int -> IO (F.ForeignPtr a) +mallocForeignPtrArray = F.mallocForeignPtrArray diff --git a/testsuite/tests/perf/should_run/T3736.stdout b/testsuite/tests/perf/should_run/T3736.stdout new file mode 100644 index 0000000000..1796dc2720 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3736.stdout @@ -0,0 +1 @@ +Match diff --git a/testsuite/tests/perf/should_run/T3738.hs b/testsuite/tests/perf/should_run/T3738.hs new file mode 100644 index 0000000000..1b3141c0f5 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3738.hs @@ -0,0 +1,10 @@ + +module Main where + +import T3738a + +{-# INLINE bar #-} +bar :: Int -> [Int] +bar x = map (+ 2) (foo x) + +main = print (bar 2 !! 10000) diff --git a/testsuite/tests/perf/should_run/T3738.stdout b/testsuite/tests/perf/should_run/T3738.stdout new file mode 100644 index 0000000000..7ed6ff82de --- /dev/null +++ b/testsuite/tests/perf/should_run/T3738.stdout @@ -0,0 +1 @@ +5 diff --git a/testsuite/tests/perf/should_run/T3738a.hs b/testsuite/tests/perf/should_run/T3738a.hs new file mode 100644 index 0000000000..b27451d12d --- /dev/null +++ b/testsuite/tests/perf/should_run/T3738a.hs @@ -0,0 +1,6 @@ + +module T3738a where + +{-# INLINE foo #-} +foo :: Num a => a -> [a] +foo x = map (+ 1) (repeat x) diff --git a/testsuite/tests/perf/should_run/T4321.hs b/testsuite/tests/perf/should_run/T4321.hs new file mode 100644 index 0000000000..b8a0dbc4a1 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4321.hs @@ -0,0 +1,15 @@ + +-- In 6.13 this stack overflowed + +module Main (main) where + +main :: IO () +main = let n = 1000000 + in print $ integrate n (1 / fromIntegral n) + +integrate :: Int -> Double -> Double +integrate n h = h * (sum (map area [1..n])) + where area :: Int -> Double + area i = let x = h * (fromIntegral i - 0.5) + in 4 / (1 + x*x) + diff --git a/testsuite/tests/perf/should_run/T4321.stdout b/testsuite/tests/perf/should_run/T4321.stdout new file mode 100644 index 0000000000..ce98a198a7 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4321.stdout @@ -0,0 +1 @@ +3.1415926535897643 diff --git a/testsuite/tests/perf/should_run/T4830.hs b/testsuite/tests/perf/should_run/T4830.hs new file mode 100644 index 0000000000..e345ffc9cd --- /dev/null +++ b/testsuite/tests/perf/should_run/T4830.hs @@ -0,0 +1,15 @@ +-- Compile with O2; SpecConstr should fire nicely +-- and eliminate all allocation in inner loop + +module Main where + +foo :: Int -> Maybe (Double,Double) -> Double +foo _ Nothing = 0 +foo 0 (Just (x,y)) = x+y +foo n (Just (x,y)) = let r = f x y in r `seq` foo (n-1) (Just r) + where + f x y | x <= y = (x,y) + | otherwise = (y,x) + +main = print (foo 1000000 (Just (1,2))) + diff --git a/testsuite/tests/perf/should_run/T4830.stdout b/testsuite/tests/perf/should_run/T4830.stdout new file mode 100644 index 0000000000..9f55b2ccb5 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4830.stdout @@ -0,0 +1 @@ +3.0 diff --git a/testsuite/tests/perf/should_run/T4978.hs b/testsuite/tests/perf/should_run/T4978.hs new file mode 100644 index 0000000000..6413b01fdc --- /dev/null +++ b/testsuite/tests/perf/should_run/T4978.hs @@ -0,0 +1,125 @@ +module Main (main) where + +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L +import Data.ByteString.Internal (inlinePerformIO) +import qualified Data.ByteString.Internal as S +import Data.Monoid +import Foreign + +newtype Builder = Builder { + runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString] + } + +instance Monoid Builder where + mempty = empty + {-# INLINE mempty #-} + mappend = append + {-# INLINE mappend #-} + mconcat = foldr mappend mempty + {-# INLINE mconcat #-} + +empty :: Builder +empty = Builder (\ k b -> b `seq` k b) +{-# INLINE empty #-} + +singleton :: Word8 -> Builder +singleton = writeN 1 . flip poke +{-# INLINE singleton #-} + +append :: Builder -> Builder -> Builder +append (Builder f) (Builder g) = Builder (f . g) +{-# INLINE [0] append #-} + +-- Our internal buffer type +data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- used bytes + {-# UNPACK #-} !Int -- length left + +-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any, +-- yielding a new chunk in the result lazy 'L.ByteString'. +flush :: Builder +flush = Builder $ \ k buf@(Buffer p o u l) -> + if u == 0 + then k buf + else S.PS p o u : k (Buffer p (o+u) 0 l) + +-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'. +-- The construction work takes place if and when the relevant part of +-- the lazy 'L.ByteString' is demanded. +-- +toLazyByteString :: Builder -> L.ByteString +toLazyByteString m = L.fromChunks $ unsafePerformIO $ do + buf <- newBuffer defaultSize + return (runBuilder (m `append` flush) (const []) buf) +{-# INLINE toLazyByteString #-} + +defaultSize :: Int +defaultSize = 32 * k - overhead + where k = 1024 + overhead = 2 * sizeOf (undefined :: Int) + +-- | Sequence an IO operation on the buffer +unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder +unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do + buf' <- f buf + return (k buf') +{-# INLINE unsafeLiftIO #-} + +-- | Get the size of the buffer +withSize :: (Int -> Builder) -> Builder +withSize f = Builder $ \ k buf@(Buffer _ _ _ l) -> runBuilder (f l) k buf + +-- | Ensure that there are at least @n@ many bytes available. +ensureFree :: Int -> Builder +ensureFree n = n `seq` withSize $ \ l -> + if n <= l then empty else + flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize))) +{-# INLINE [0] ensureFree #-} + +-- | Ensure that @n@ many bytes are available, and then use @f@ to write some +-- bytes into the memory. +writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder +writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f) +{-# INLINE [0] writeN #-} + +writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer +writeNBuffer n f (Buffer fp o u l) = do + withForeignPtr fp (\p -> f (p `plusPtr` (o+u))) + return (Buffer fp o (u+n) (l-n)) +{-# INLINE writeNBuffer #-} + +newBuffer :: Int -> IO Buffer +newBuffer size = do + fp <- S.mallocByteString size + return $! Buffer fp 0 0 size +{-# INLINE newBuffer #-} + +-- Merge buffer bounds checks. +{-# RULES +"append/writeN" forall a b (f::Ptr Word8 -> IO ()) + (g::Ptr Word8 -> IO ()) ws. + append (writeN a f) (append (writeN b g) ws) = + append (writeN (a+b) (\p -> f p >> g (p `plusPtr` a))) ws + +"writeN/writeN" forall a b (f::Ptr Word8 -> IO ()) + (g::Ptr Word8 -> IO ()). + append (writeN a f) (writeN b g) = + writeN (a+b) (\p -> f p >> g (p `plusPtr` a)) + +"ensureFree/ensureFree" forall a b . + append (ensureFree a) (ensureFree b) = ensureFree (max a b) + #-} + +-- Test case + +-- Argument must be a multiple of 4. +test :: Int -> Builder +test 0 = mempty +test n = singleton 1 `mappend` + (singleton 2 `mappend` + (singleton 3 `mappend` + (singleton 4 `mappend` test (n-4)))) + +main = print $ L.length $ toLazyByteString $ test 10000000 diff --git a/testsuite/tests/perf/should_run/T4978.stdout b/testsuite/tests/perf/should_run/T4978.stdout new file mode 100644 index 0000000000..825319e1c5 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4978.stdout @@ -0,0 +1 @@ +10000000 diff --git a/testsuite/tests/perf/should_run/T5113.hs b/testsuite/tests/perf/should_run/T5113.hs new file mode 100644 index 0000000000..e87bcb6cad --- /dev/null +++ b/testsuite/tests/perf/should_run/T5113.hs @@ -0,0 +1,31 @@ +{-# LANGUAGE BangPatterns #-} +module Main where + +import Data.Array.Base (unsafeRead, unsafeWrite) +import Data.Array.ST +import Data.Array.Unboxed +import Control.Monad.ST + +main = print (divisorCounts 1000000 ! 342) + +isqrt :: Int -> Int +isqrt n = floor (sqrt $ fromIntegral n) + +divisorCounts :: Int -> UArray Int Int +divisorCounts n = runSTUArray $ do + let !rt = isqrt n + darr <- newArray (0,n) 1 :: ST s (STUArray s Int Int) + let inc i = unsafeRead darr i >>= \k -> unsafeWrite darr i (k+1) + note step i + | i > n = return () + | otherwise = do + inc i + note step (i+step) + count j + | j > rt = return () + | otherwise = do + note (2*j) (j*j) + count (j+2) + note 2 4 + count 3 + return darr diff --git a/testsuite/tests/perf/should_run/T5113.stdout b/testsuite/tests/perf/should_run/T5113.stdout new file mode 100644 index 0000000000..0cfbf08886 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5113.stdout @@ -0,0 +1 @@ +2 diff --git a/testsuite/tests/perf/should_run/T5205.hs b/testsuite/tests/perf/should_run/T5205.hs new file mode 100644 index 0000000000..215dd42647 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5205.hs @@ -0,0 +1,13 @@ + +import Control.Concurrent +import Control.Monad + +main :: IO () +main = do t <- forkIO (x >> x) + threadDelay 1000000 + killThread t + putStrLn "Done" + +x :: IO () +x = forever yield + diff --git a/testsuite/tests/perf/should_run/T5205.stdout b/testsuite/tests/perf/should_run/T5205.stdout new file mode 100644 index 0000000000..a965a70ed4 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5205.stdout @@ -0,0 +1 @@ +Done diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T new file mode 100644 index 0000000000..04133979df --- /dev/null +++ b/testsuite/tests/perf/should_run/all.T @@ -0,0 +1,148 @@ +# Tests that newArray/newArray_ is being optimised correctly + +# fortunately the values here are mostly independent of the wordsize, +# because the test allocates an unboxed array of doubles. + +test('3586', + [stats_num_field('peak_megabytes_allocated', 17, + 18), + # expected value: 17 (amd64/Linux) + stats_num_field('bytes allocated', 16000000, + 17000000), + # expected value: 16835544 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T4830', + [stats_num_field('bytes allocated', 60000, + 200000), + # expected value: 127,000 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O2']) + +test('T3245', normal, compile_and_run, ['-O']) + +# Test that bytestring reading/writing isn't over-allocating. We had +# a bug in hGetBufNonBlocking in 6.13 that triggered this. +# +test('lazy-bs-alloc', + [stats_num_field('peak_megabytes_allocated', 1, + 3), + # expected value: 2 (amd64/Linux) + stats_num_field('bytes allocated', 400000, + 600000), + # expected value: 489776 (amd64/Linux) + only_ways(['normal']), + extra_run_opts('../../numeric/should_run/arith011.stdout'), + ignore_output + ], + # use a suitably big file, without bloating the repo with a new one: + compile_and_run, + ['-O']) + +# Get reproducible floating-point results on x86 +if config.arch == 'i386': + sse2_opts = '-msse2' +else: + sse2_opts = '' + +test('T4321', omit_ways(['ghci']), compile_and_run, ['-O ' + sse2_opts]) + +test('T3736', normal, run_command, ['$MAKE -s --no-print-directory T3736']) +test('T3738', + [stats_num_field('peak_megabytes_allocated', 1, + 1), + # expected value: 1 (amd64/Linux) + # expected value: 45648 (x86/Linux): + if_wordsize(32, + stats_num_field('bytes allocated', 40000, + 50000)), + if_wordsize(64, + stats_num_field('bytes allocated', 40000, + 60000)), + # expected value: 49400 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('MethSharing', + [stats_num_field('peak_megabytes_allocated', 1, + 1), + # expected value: 1 (amd64/Linux) + # expected value: 2685858140 (x86/OS X): + if_wordsize(32, + stats_num_field('bytes allocated', 300000000, + 400000000)), + # expected: 360940756 (x86/Linux) + if_wordsize(64, + stats_num_field('bytes allocated', 600000000, + 700000000)), + # expected: 640067672 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) +test('T2902', + extra_clean(['T2902_A', 'T2902_B', + 'T2902_A.hi', 'T2902_B.hi', + 'T2902_A.o', 'T2902_B.o', + 'T2902_A_PairingSum.hi', 'T2902_B_PairingSum.hi', + 'T2902_A_PairingSum.o', 'T2902_B_PairingSum.o', + 'T2902_Sum.hi', + 'T2902_Sum.o']), + run_command, + ['$MAKE -s --no-print-directory T2902']) +test('T149', + [expect_broken(149), + extra_clean(['T149_A', 'T149_B', + 'T149_A.hi', 'T149_B.hi', + 'T149_A.o', 'T149_B.o'])], + run_command, + ['$MAKE -s --no-print-directory T149']) + +test('T5113', + [ + if_wordsize(32, + stats_num_field('bytes allocated', 3000000, + 5000000)), + if_wordsize(64, + stats_num_field('bytes allocated', 8000000, + 9000000)), + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + + +test('T4978', + [if_wordsize(32, + stats_num_field('bytes allocated', 9000000, + 11000000)), + if_wordsize(64, + stats_num_field('bytes allocated', 9000000, + 11000000)), + # expected value: 10137680 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O2']) + +test('T5205', + [if_wordsize(32, + stats_num_field('bytes allocated', 40000, + 50000)), + # expected value: 47088 (x86/Darwin) + if_wordsize(64, + stats_num_field('bytes allocated', 40000, + 60000)), + # expected value: 51320 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + diff --git a/testsuite/tests/perf/should_run/lazy-bs-alloc.hs b/testsuite/tests/perf/should_run/lazy-bs-alloc.hs new file mode 100644 index 0000000000..76850c67d0 --- /dev/null +++ b/testsuite/tests/perf/should_run/lazy-bs-alloc.hs @@ -0,0 +1,9 @@ +module Main (main) where + +import System.Environment (getArgs) +import qualified Data.ByteString.Lazy as L + +main :: IO () +main = do + (file : _) <- getArgs + L.readFile file >>= L.putStr |