diff options
Diffstat (limited to 'testsuite/tests/perf/should_run')
59 files changed, 1471 insertions, 0 deletions
diff --git a/testsuite/tests/perf/should_run/Conversions.hs b/testsuite/tests/perf/should_run/Conversions.hs new file mode 100644 index 0000000000..8432727965 --- /dev/null +++ b/testsuite/tests/perf/should_run/Conversions.hs @@ -0,0 +1,21 @@ +{-# LANGUAGE BangPatterns #-} + +-- | Tests that conversions between various primitive types (e.g. +-- Word, Double, etc) doesn't allocate. +module Main (main) where + +import Data.Word + +-- Repeatedly convert Words to Doubles +loop :: Floating a => Word -> a +loop n = go 0 0.0 + where + go i !acc | i < n = go (i+1) (acc + fromIntegral i) + | otherwise = acc +{-# SPECIALISE loop :: Word -> Float #-} +{-# SPECIALISE loop :: Word -> Double #-} + +main :: IO () +main = do + print (loop 1000000 :: Float) + print (loop 1000000 :: Double) diff --git a/testsuite/tests/perf/should_run/Conversions.stdout b/testsuite/tests/perf/should_run/Conversions.stdout new file mode 100644 index 0000000000..2fe5b4dc9d --- /dev/null +++ b/testsuite/tests/perf/should_run/Conversions.stdout @@ -0,0 +1,2 @@ +4.9994036e11 +4.999995e11 diff --git a/testsuite/tests/perf/should_run/Makefile b/testsuite/tests/perf/should_run/Makefile new file mode 100644 index 0000000000..e839342f90 --- /dev/null +++ b/testsuite/tests/perf/should_run/Makefile @@ -0,0 +1,36 @@ +TOP=../../.. +include $(TOP)/mk/boilerplate.mk +include $(TOP)/mk/test.mk + +define runT3736 +./T3736 $1 +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/".*//' +endef +.PHONY: T3736 +T3736: + $(RM) -f T3736.hi T3736.o T3736 + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T3736 -rtsopts +# Check ALLOC1 is > 100 just to check with have sane results, and if so, +# the test passes if the two numbers are equal. We could check that the +# actual numbers are in the range we expect too (on the various +# platforms), but we don't currently. + ALLOC1=`$(call runT3736,1)`; ALLOC2=`$(call runT3736,2)`; if [ "$$ALLOC1" -gt 100 ] && [ "$$ALLOC1" -eq "$$ALLOC2" ]; then echo Match; else echo "Mismatch: $$ALLOC1 $$ALLOC2"; fi + +.PHONY: T2902 +T2902: + $(RM) -f T2902_A T2902_B + $(RM) -f T2902_A.hi T2902_B.hi + $(RM) -f T2902_A.o T2902_B.o + $(RM) -f T2902_A_PairingSum.hi T2902_B_PairingSum.hi T2902_Sum.hi + $(RM) -f T2902_A_PairingSum.o T2902_B_PairingSum.o T2902_Sum.o + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T2902_A -rtsopts + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T2902_B -rtsopts + BAA=`./T2902_A +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; BAB=`./T2902_B +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; [ "$$BAA" = "" ] && echo 'T2902_A: No "bytes allocated"'; [ "$$BAA" = "$$BAB" ] || echo "T2902: Mismatch in \"bytes allocated\": $$BAA $$BAB" + +.PHONY: T149 +T149: + $(RM) -f T149_A T149_A.hi T149_A.o + $(RM) -f T149_B T149_B.hi T149_B.o + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T149_A -rtsopts + '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T149_B -rtsopts + BAA=`./T149_A +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; BAB=`./T149_B +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; [ "$$BAA" = "" ] && echo 'T149_A: No "bytes allocated"'; [ "$$BAA" = "$$BAB" ] || echo "T149: Mismatch in \"bytes allocated\": $$BAA $$BAB" + diff --git a/testsuite/tests/perf/should_run/MethSharing.hs b/testsuite/tests/perf/should_run/MethSharing.hs new file mode 100644 index 0000000000..fb69bd4509 --- /dev/null +++ b/testsuite/tests/perf/should_run/MethSharing.hs @@ -0,0 +1,97 @@ +module Main where + +-- This test works efficiently because the full laziness +-- pass now floats out applications +-- \x -> f y (x+1) +-- It'll float out the (f y) if that's a redex + +loop :: Double -> [Int] -> Double +{-# NOINLINE loop #-} +loop x [] = x +loop x (n:ns) = x `seq` loop (x ^ n) ns + +main = print $ loop 1 (replicate 10000000 5) + +---------------------------------------------------- +{- Roman's message of May 2010 + +I tried running nofib with -fno-method-sharing (we discussed this at some point). These are the results: + +-------------------------------------------------------------------------------- + Program Size Allocs Runtime Elapsed +-------------------------------------------------------------------------------- + + Min -0.3% -25.0% -12.5% -9.9% + Max +0.2% +159.1% +90.0% +84.7% + Geometric Mean -0.0% +2.2% +6.8% +5.1% + +This is the worst program: + + simple +0.2% +159.1% +65.3% +63.9% + +I looked at it a bit and came up with this small example: + +---- +loop :: Double -> [Int] -> Double +{-# NOINLINE loop #-} +loop x [] = x +loop x (n:ns) = x `seq` loop (x ^ n) ns + +main = print $ loop 1 (replicate 10000000 5) +---- + +This is over 2x slower with -fno-method-sharing. The culprit is, of +course, (^). Here is the difference: + +Without -fno-method-sharing: + +---- +^_rVB :: GHC.Types.Double -> GHC.Types.Int -> GHC.Types.Double ^_rVB = + GHC.Real.^ + @ GHC.Types.Double + @ GHC.Types.Int + GHC.Float.$fNumDouble + GHC.Real.$fIntegralInt + +Main.loop [InlPrag=NOINLINE (sat-args=2), Occ=LoopBreaker] + :: GHC.Types.Double -> [GHC.Types.Int] -> GHC.Types.Double Main.loop = + \ (x1_aat :: GHC.Types.Double) (ds_drG :: [GHC.Types.Int]) -> + case ds_drG of _ { + [] -> x1_aat; + : n_aav ns_aaw -> + case x1_aat of x2_aau { GHC.Types.D# ipv_srQ -> + Main.loop (^_rVB x2_aau n_aav) ns_aaw + } + } +---- + +With: + +---- +Main.loop [InlPrag=NOINLINE (sat-args=2), Occ=LoopBreaker] + :: GHC.Types.Double -> [GHC.Types.Int] -> GHC.Types.Double Main.loop = + \ (x1_aat :: GHC.Types.Double) (ds_drD :: [GHC.Types.Int]) -> + case ds_drD of _ { + [] -> x1_aat; + : n_aav ns_aaw -> + case x1_aat of x2_aau { GHC.Types.D# ipv_srN -> + Main.loop + (GHC.Real.^ + @ GHC.Types.Double + @ GHC.Types.Int + GHC.Float.$fNumDouble + GHC.Real.$fIntegralInt + x2_aau + n_aav) + ns_aaw + } + } +---- + +This is a bit disappointing. I would have expected GHC to float out +the application of (^) to the two dictionaries during full laziness +(note that (^) has arity 2 so the application is oversaturated). Why +doesn't that happen? SetLevels (if this is the right place to look) +has this: + +-}
\ No newline at end of file diff --git a/testsuite/tests/perf/should_run/MethSharing.stdout b/testsuite/tests/perf/should_run/MethSharing.stdout new file mode 100644 index 0000000000..d3827e75a5 --- /dev/null +++ b/testsuite/tests/perf/should_run/MethSharing.stdout @@ -0,0 +1 @@ +1.0 diff --git a/testsuite/tests/perf/should_run/T149_A.hs b/testsuite/tests/perf/should_run/T149_A.hs new file mode 100644 index 0000000000..dd745460c0 --- /dev/null +++ b/testsuite/tests/perf/should_run/T149_A.hs @@ -0,0 +1,25 @@ +module Main (main) where + +-- See Trac #149 + +-- Curently (with GHC 7.0) the CSE works, just, +-- but it's delicate. + + +import System.CPUTime + +main :: IO () +main = print $ playerMostOccur1 [1..m] + +m :: Int +m = 22 + +playerMostOccur1 :: [Int] -> Int +playerMostOccur1 [a] = a +playerMostOccur1 (x:xs) + | numOccur x (x:xs) > numOccur (playerMostOccur1 xs) xs = x + | otherwise = playerMostOccur1 xs + +numOccur :: Int -> [Int] -> Int +numOccur i is = length $ filter (i ==) is + diff --git a/testsuite/tests/perf/should_run/T149_B.hs b/testsuite/tests/perf/should_run/T149_B.hs new file mode 100644 index 0000000000..fcc87cdf55 --- /dev/null +++ b/testsuite/tests/perf/should_run/T149_B.hs @@ -0,0 +1,26 @@ +module Main (main) where + +-- See Trac #149 + +-- Curently (with GHC 7.0) the CSE works, just, +-- but it's delicate. + + +import System.CPUTime + +main :: IO () +main = print $ playerMostOccur2 [1..m] + +m :: Int +m = 22 + +playerMostOccur2 :: [Int] -> Int +playerMostOccur2 [a] = a +playerMostOccur2 (x:xs) + | numOccur x (x:xs) > numOccur pmo xs = x + | otherwise = pmo + where pmo = playerMostOccur2 xs + +numOccur :: Int -> [Int] -> Int +numOccur i is = length $ filter (i ==) is + diff --git a/testsuite/tests/perf/should_run/T2902_A.hs b/testsuite/tests/perf/should_run/T2902_A.hs new file mode 100644 index 0000000000..c0939104f3 --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_A.hs @@ -0,0 +1,18 @@ + +{-# LANGUAGE UnicodeSyntax #-} + +module Main (main) where + +import T2902_A_PairingSum + +f :: Int -> PSum Int Int +f n = unions $ fmap g [1..n] + where + g m = unions $ fmap fromList + [ zip [m..n] $ repeat 1 + , zip [m,2+m..n] $ repeat 2 + , zip [m,3+m..n] $ repeat 3 + ] + +main ∷ IO () +main = print $ take 20 $ toList $ f 20 diff --git a/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs b/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs new file mode 100644 index 0000000000..6dc5fb484e --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs @@ -0,0 +1,49 @@ + +{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses, FlexibleInstances #-} + +module T2902_A_PairingSum (Sum(..), PSum) where + +import T2902_Sum + +data PSum a b = Empty | Tree a b [(PSum a b)] + +instance (Ord a, Eq b, Num b) ⇒ Sum PSum a b where + insert = insertX + union = unionX + unions = unionsX + extractMin = extractMinX + fromList = fromListX + toList = toListX + +insertX ∷ (Ord a, Eq b, Num b) ⇒ a → b → PSum a b → PSum a b +insertX v r = unionX $ Tree v r [] + +unionX ∷ (Ord a, Eq b, Num b) ⇒ PSum a b → PSum a b → PSum a b +unionX x Empty = x +unionX Empty x = x +unionX x@(Tree v r xs) y@(Tree w s ys) = + case compare v w of + LT → Tree v r (y:xs) + GT → Tree w s (x:ys) + EQ → case r + s of + 0 → z + t → insertX v t z + where z = unionX (unionsX xs) (unionsX ys) + +unionsX ∷ (Ord a, Eq b, Num b) ⇒ [PSum a b] → PSum a b +unionsX [] = Empty +unionsX [x] = x +unionsX (x : y : zs) = unionX (unionX x y) (unionsX zs) + +extractMinX ∷ (Ord a, Eq b, Num b) ⇒ PSum a b → ((a,b), PSum a b) +extractMinX Empty = undefined +extractMinX (Tree v r xs) = ((v,r), unionsX xs) + +fromListX ∷ (Ord a, Eq b, Num b) ⇒ [(a,b)] → PSum a b +fromListX [] = Empty +fromListX ((v,r):xs) = insertX v r $ fromListX xs + +toListX ∷ (Ord a, Eq b, Num b) ⇒ PSum a b → [(a,b)] +toListX Empty = [] +toListX x = let (y, z) = extractMinX x in y : toListX z + diff --git a/testsuite/tests/perf/should_run/T2902_B.hs b/testsuite/tests/perf/should_run/T2902_B.hs new file mode 100644 index 0000000000..c6558c625b --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_B.hs @@ -0,0 +1,18 @@ + +{-# LANGUAGE UnicodeSyntax #-} + +module Main (main) where + +import T2902_B_PairingSum + +f :: Int -> PSum Int Int +f n = unions $ fmap g [1..n] + where + g m = unions $ fmap fromList + [ zip [m..n] $ repeat 1 + , zip [m,2+m..n] $ repeat 2 + , zip [m,3+m..n] $ repeat 3 + ] + +main ∷ IO () +main = print $ take 20 $ toList $ f 20 diff --git a/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs b/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs new file mode 100644 index 0000000000..baf5885798 --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs @@ -0,0 +1,37 @@ + +{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses, FlexibleInstances #-} + +module T2902_B_PairingSum (Sum(..), PSum) where + +import T2902_Sum + +data PSum a b = Empty | Tree a b [PSum a b] + +instance (Ord a, Eq b, Num b) ⇒ Sum PSum a b where + + insert v r = union $ Tree v r [] + + union x Empty = x + union Empty x = x + union x@(Tree v r xs) y@(Tree w s ys) = + case compare v w of + LT → Tree v r (y:xs) + GT → Tree w s (x:ys) + EQ → case r + s of + 0 → z + t → insert v t z + where z = union (unions xs) (unions ys) + + unions [] = Empty + unions [x] = x + unions (x : y : zs) = union (union x y) (unions zs) + + extractMin Empty = undefined + extractMin (Tree v r xs) = ((v,r), unions xs) + + fromList [] = Empty + fromList ((v,r):xs) = insert v r $ fromList xs + + toList Empty = [] + toList x = let (y, z) = extractMin x in y : toList z + diff --git a/testsuite/tests/perf/should_run/T2902_Sum.hs b/testsuite/tests/perf/should_run/T2902_Sum.hs new file mode 100644 index 0000000000..9be6b10568 --- /dev/null +++ b/testsuite/tests/perf/should_run/T2902_Sum.hs @@ -0,0 +1,14 @@ + +{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses #-} + +module T2902_Sum (Sum(..)) where + +class Sum c a b where + insert ∷ a → b → c a b → c a b + union ∷ c a b → c a b → c a b + unions ∷ [c a b] → c a b + extractMin ∷ c a b → ((a,b), c a b) + + fromList ∷ [(a,b)] → c a b + toList ∷ c a b → [(a,b)] + diff --git a/testsuite/tests/perf/should_run/T3245.hs b/testsuite/tests/perf/should_run/T3245.hs new file mode 100644 index 0000000000..d345fed38b --- /dev/null +++ b/testsuite/tests/perf/should_run/T3245.hs @@ -0,0 +1,47 @@ +{-# LANGUAGE ExistentialQuantification, DeriveDataTypeable #-} + +-- The second version (count2) took ages with GHC 6.12 +-- because the typeOf function was not properly memoised + +import Data.Typeable +import System.CPUTime + +size :: Int +size = 40000 -- This was big enough to take 5 seconds in + -- the bad case on my machine. + +data Any = forall a. (Typeable a) => Any a + +int_type, int_list_type :: TypeRep +int_type = typeOf (undefined :: Int) +int_list_type = typeOf (undefined :: [Int]) + +count1 :: [Any] -> Int +count1 [] = 0 +count1 (Any x:xs) = count1 xs + (if typeOf x == int_type then 1 else 0) + +doTime x = do + start <- getCPUTime + putStr "Result: " + print x + stop <- getCPUTime + putStr "Time(sec): " + print (round $ fromIntegral (stop - start) / 1e12) + -- The 'round' rounds to an integral number of seconds + -- Should be zero if things are working right! + +main = do + let list = [MkT | i <- [1..size :: Int]] + putStrLn "count1" + let x = map Any list + doTime $ count1 x + doTime $ count1 x + doTime $ count1 x + putStrLn "" + putStrLn "count2" + let x = map (Any . (:[])) list + doTime $ count1 x + doTime $ count1 x + doTime $ count1 x + +data T = MkT deriving Typeable diff --git a/testsuite/tests/perf/should_run/T3245.stdout b/testsuite/tests/perf/should_run/T3245.stdout new file mode 100644 index 0000000000..bcff7f8fd1 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3245.stdout @@ -0,0 +1,15 @@ +count1 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 + +count2 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 +Result: 0 +Time(sec): 0 diff --git a/testsuite/tests/perf/should_run/T3586.hs b/testsuite/tests/perf/should_run/T3586.hs new file mode 100644 index 0000000000..968f2eba27 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3586.hs @@ -0,0 +1,20 @@ +{-# LANGUAGE BangPatterns #-} +{-# OPTIONS -fvia-C -optc-O3 -fexcess-precision -optc-msse3 #-} + +import Control.Monad.ST +import Data.Array.ST +import Data.Array.Base + +main = print $ runST + (do arr <- newArray (1,2000000) 137.0 :: ST s (STUArray s Int Double) + go arr 2000000 0.0 ) + + +go :: STUArray s Int Double -> Int -> Double -> ST s Double +go !a i !acc + | i < 1 = return acc + | otherwise = do + b <- unsafeRead a i + unsafeWrite a i (b+3.0) + c <- unsafeRead a i + go a (i-1) (c+acc) diff --git a/testsuite/tests/perf/should_run/T3586.stdout b/testsuite/tests/perf/should_run/T3586.stdout new file mode 100644 index 0000000000..626282f10c --- /dev/null +++ b/testsuite/tests/perf/should_run/T3586.stdout @@ -0,0 +1 @@ +2.79999863e8 diff --git a/testsuite/tests/perf/should_run/T3736.hs b/testsuite/tests/perf/should_run/T3736.hs new file mode 100644 index 0000000000..3a399ada0f --- /dev/null +++ b/testsuite/tests/perf/should_run/T3736.hs @@ -0,0 +1,212 @@ +{-# OPTIONS_GHC -funbox-strict-fields -O #-} +{-# LANGUAGE ExistentialQuantification #-} + +{- OPTIONS_GHC -ddump-simpl -ddump-asm -} + +module Main (main) where + +import GHC.Float (float2Int, int2Float) + +import System.Environment + +import Prelude hiding (null + ,lines,unlines + ,writeFile + ) + +import Control.Exception (assert, bracket, ) + +import Foreign.Marshal.Array (advancePtr) +import Foreign.Ptr (minusPtr) +import Foreign.Storable (Storable(..)) + +import Control.Monad (when) + +import System.IO (openBinaryFile, hClose, + hPutBuf, + Handle, IOMode(..)) + +import System.IO.Unsafe (unsafePerformIO) + +import Foreign.Ptr (Ptr) +import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, ) +import Foreign.Marshal.Array (copyArray) + +import qualified Foreign.ForeignPtr as F + +main :: IO () +main = do args <- getArgs + case args of + ["1"] -> mainMonolithic1Generator + ["2"] -> mainMonolithic1Composed + _ -> error "Huh?" + +type Phase = (Float, Float, Float) + +{-# INLINE saw #-} +saw :: Num a => a -> a +saw t = 1-2*t + +{-# INLINE fraction #-} +fraction :: Float -> Float +fraction x = x - int2Float (float2Int x) + +{-# INLINE generator0Freq #-} +generator0Freq :: Float -> Float -> Maybe (Float, Float) +generator0Freq freq = + \p -> Just (saw p, fraction (p+freq)) + +infixl 6 `mix`, `mixGen` + +{-# INLINE mix #-} +mix :: + (Num y) => + (s -> Maybe (y, s)) -> + (t -> Maybe (y, t)) -> + ((s,t) -> Maybe (y, (s,t))) +mix f g (s0,t0) = + do (a,s1) <- f s0 + (b,t1) <- g t0 + return ((a+b), (s1,t1)) + +data Generator a = + forall s. + Generator (s -> Maybe (a, s)) s + +{-# INLINE runGeneratorMonolithic #-} +runGeneratorMonolithic :: Int -> Generator Float -> Vector Float +runGeneratorMonolithic size' (Generator f s) = + fst $ unfoldrN size' f s + +{- SPECIALISE INLINE generator0Gen :: Float -> Float -> Generator Float -} +{-# INLINE generator0Gen #-} +generator0Gen :: Float -> Float -> Generator Float +generator0Gen freq phase = + Generator (\p -> Just (saw p, fraction (p+freq))) phase + +{- SPECIALISE INLINE mixGen :: Generator Float -> Generator Float -> Generator Float -} +{-# INLINE mixGen #-} +mixGen :: + (Num y) => + Generator y -> + Generator y -> + Generator y +mixGen (Generator f s) (Generator g t) = + Generator (\(s0,t0) -> + do (a,s1) <- f s0 + (b,t1) <- g t0 + return ((a+b), (s1,t1))) (s,t) + +{-# INLINE dl #-} +dl :: Phase +dl = (0.01008, 0.01003, 0.00990) + +{-# INLINE initPhase2 #-} +initPhase2 :: (Phase, Phase) +initPhase2 = + ((0,0.7,0.1), (0.3,0.4,0.6)) + + +size :: Int +size = 10000000 + + +mainMonolithic1Composed :: IO () +mainMonolithic1Composed = + writeFile "T3736.speed.f32" + (fst $ unfoldrN size + (let (f0,f1,f2) = dl + in generator0Freq f0 `mix` + generator0Freq f1 `mix` + generator0Freq f2) + (let (p0,p1,p2) = fst initPhase2 + in ((p0,p1),p2))) + +mainMonolithic1Generator :: IO () +mainMonolithic1Generator = + writeFile "T3736.speed.f32" + (runGeneratorMonolithic size + (let (f0,f1,f2) = dl + (p0,p1,p2) = fst initPhase2 + in generator0Gen f0 p0 `mixGen` + generator0Gen f1 p1 `mixGen` + generator0Gen f2 p2)) + +empty :: (Storable a) => Vector a +empty = unsafeCreate 0 $ const $ return () +{-# NOINLINE empty #-} + +null :: Vector a -> Bool +null (SV _ _ l) = assert (l >= 0) $ l <= 0 +{-# INLINE null #-} + +unfoldrN :: (Storable b) => Int -> (a -> Maybe (b, a)) -> a -> (Vector b, Maybe a) +unfoldrN n f x0 = + if n <= 0 + then (empty, Just x0) + else unsafePerformIO $ createAndTrim' n $ \p -> go p n x0 + where + go = arguments2 $ \p i -> \x -> + if i == 0 + then return (0, n-i, Just x) + else + case f x of + Nothing -> return (0, n-i, Nothing) + Just (w,x') -> do poke p w + go (incPtr p) (i-1) x' +{-# INLINE unfoldrN #-} + +hPut :: (Storable a) => Handle -> Vector a -> IO () +hPut h v = + when (not (null v)) $ + withStartPtr v $ \ ptrS l -> + let ptrE = advancePtr ptrS l + in hPutBuf h ptrS (minusPtr ptrE ptrS) + +writeFile :: (Storable a) => FilePath -> Vector a -> IO () +writeFile f txt = + bracket (openBinaryFile f WriteMode) hClose + (\h -> hPut h txt) + +data Vector a = SV {-# UNPACK #-} !(ForeignPtr a) + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- length + +withStartPtr :: Storable a => Vector a -> (Ptr a -> Int -> IO b) -> IO b +withStartPtr (SV x s l) f = + withForeignPtr x $ \p -> f (p `advancePtr` s) l +{-# INLINE withStartPtr #-} + +incPtr :: (Storable a) => Ptr a -> Ptr a +incPtr v = advancePtr v 1 +{-# INLINE incPtr #-} + +unsafeCreate :: (Storable a) => Int -> (Ptr a -> IO ()) -> Vector a +unsafeCreate l f = unsafePerformIO (create l f) +{-# INLINE unsafeCreate #-} + +create :: (Storable a) => Int -> (Ptr a -> IO ()) -> IO (Vector a) +create l f = do + fp <- mallocForeignPtrArray l + withForeignPtr fp $ \p -> f p + return $! SV fp 0 l + +createAndTrim' :: (Storable a) => Int + -> (Ptr a -> IO (Int, Int, b)) + -> IO (Vector a, b) +createAndTrim' l f = do + fp <- mallocForeignPtrArray l + withForeignPtr fp $ \p -> do + (off, l', res) <- f p + if assert (l' <= l) $ l' >= l + then return $! (SV fp 0 l, res) + else do ps <- create l' $ \p' -> copyArray p' (p `advancePtr` off) l' + return $! (ps, res) + +{-# INLINE arguments2 #-} +arguments2 :: (a -> b -> x) -> a -> b -> x +arguments2 f = \a b -> (f $! a) $! b + +{-# INLINE mallocForeignPtrArray #-} +mallocForeignPtrArray :: Storable a => Int -> IO (F.ForeignPtr a) +mallocForeignPtrArray = F.mallocForeignPtrArray diff --git a/testsuite/tests/perf/should_run/T3736.stdout b/testsuite/tests/perf/should_run/T3736.stdout new file mode 100644 index 0000000000..1796dc2720 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3736.stdout @@ -0,0 +1 @@ +Match diff --git a/testsuite/tests/perf/should_run/T3738.hs b/testsuite/tests/perf/should_run/T3738.hs new file mode 100644 index 0000000000..1b3141c0f5 --- /dev/null +++ b/testsuite/tests/perf/should_run/T3738.hs @@ -0,0 +1,10 @@ + +module Main where + +import T3738a + +{-# INLINE bar #-} +bar :: Int -> [Int] +bar x = map (+ 2) (foo x) + +main = print (bar 2 !! 10000) diff --git a/testsuite/tests/perf/should_run/T3738.stdout b/testsuite/tests/perf/should_run/T3738.stdout new file mode 100644 index 0000000000..7ed6ff82de --- /dev/null +++ b/testsuite/tests/perf/should_run/T3738.stdout @@ -0,0 +1 @@ +5 diff --git a/testsuite/tests/perf/should_run/T3738a.hs b/testsuite/tests/perf/should_run/T3738a.hs new file mode 100644 index 0000000000..b27451d12d --- /dev/null +++ b/testsuite/tests/perf/should_run/T3738a.hs @@ -0,0 +1,6 @@ + +module T3738a where + +{-# INLINE foo #-} +foo :: Num a => a -> [a] +foo x = map (+ 1) (repeat x) diff --git a/testsuite/tests/perf/should_run/T4321.hs b/testsuite/tests/perf/should_run/T4321.hs new file mode 100644 index 0000000000..b8a0dbc4a1 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4321.hs @@ -0,0 +1,15 @@ + +-- In 6.13 this stack overflowed + +module Main (main) where + +main :: IO () +main = let n = 1000000 + in print $ integrate n (1 / fromIntegral n) + +integrate :: Int -> Double -> Double +integrate n h = h * (sum (map area [1..n])) + where area :: Int -> Double + area i = let x = h * (fromIntegral i - 0.5) + in 4 / (1 + x*x) + diff --git a/testsuite/tests/perf/should_run/T4321.stdout b/testsuite/tests/perf/should_run/T4321.stdout new file mode 100644 index 0000000000..ce98a198a7 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4321.stdout @@ -0,0 +1 @@ +3.1415926535897643 diff --git a/testsuite/tests/perf/should_run/T4474a.hs b/testsuite/tests/perf/should_run/T4474a.hs new file mode 100644 index 0000000000..ef70a6ae69 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4474a.hs @@ -0,0 +1,40 @@ +{-# LANGUAGE BangPatterns #-} + +module Main where + +data Tree = Leaf !Int | Fork !Tree !Tree deriving Show + +fullTree 0 = Leaf 1 +fullTree n = let t = fullTree (n - 1) in Fork t t + +flatListNaive (Leaf n) = [n] +flatListNaive (Fork a b) = flatListNaive a ++ flatListNaive b + +flatListCons t = flat t [] + where + flat (Leaf n) ns = n : ns + flat (Fork a b) ns = flat a (flat b ns) + +flatListCons2 t = flat t [] + where + flat (Leaf n) = \ns -> n : ns + flat (Fork a b) = \ns -> flat a (flat b ns) + +flatListCons3 t = flat t [] + where + flat (Leaf n) = (n :) + flat (Fork a b) = flat a . flat b + +flatDList (Leaf n) = (n :) +flatDList (Fork a b) = flatDList a . flatDList b + +sumList l = loop 0 l + where loop !c [] = c + loop !c (h:t) = loop (c + h) t + +sumDList l = loop 0 (l []) + where loop !c [] = c + loop !c (h : t) = loop (c + h) t + +main = print $ sumList $ flatListCons $ fullTree 26 + diff --git a/testsuite/tests/perf/should_run/T4474a.stdout b/testsuite/tests/perf/should_run/T4474a.stdout new file mode 100644 index 0000000000..e6c68622ac --- /dev/null +++ b/testsuite/tests/perf/should_run/T4474a.stdout @@ -0,0 +1 @@ +67108864 diff --git a/testsuite/tests/perf/should_run/T4474b.hs b/testsuite/tests/perf/should_run/T4474b.hs new file mode 100644 index 0000000000..fd931b4223 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4474b.hs @@ -0,0 +1,40 @@ +{-# LANGUAGE BangPatterns #-} + +module Main where + +data Tree = Leaf !Int | Fork !Tree !Tree deriving Show + +fullTree 0 = Leaf 1 +fullTree n = let t = fullTree (n - 1) in Fork t t + +flatListNaive (Leaf n) = [n] +flatListNaive (Fork a b) = flatListNaive a ++ flatListNaive b + +flatListCons t = flat t [] + where + flat (Leaf n) ns = n : ns + flat (Fork a b) ns = flat a (flat b ns) + +flatListCons2 t = flat t [] + where + flat (Leaf n) = \ns -> n : ns + flat (Fork a b) = \ns -> flat a (flat b ns) + +flatListCons3 t = flat t [] + where + flat (Leaf n) = (n :) + flat (Fork a b) = flat a . flat b + +flatDList (Leaf n) = (n :) +flatDList (Fork a b) = flatDList a . flatDList b + +sumList l = loop 0 l + where loop !c [] = c + loop !c (h:t) = loop (c + h) t + +sumDList l = loop 0 (l []) + where loop !c [] = c + loop !c (h : t) = loop (c + h) t + +main = print $ sumList $ flatListCons2 $ fullTree 26 + diff --git a/testsuite/tests/perf/should_run/T4474b.stdout b/testsuite/tests/perf/should_run/T4474b.stdout new file mode 100644 index 0000000000..e6c68622ac --- /dev/null +++ b/testsuite/tests/perf/should_run/T4474b.stdout @@ -0,0 +1 @@ +67108864 diff --git a/testsuite/tests/perf/should_run/T4474c.hs b/testsuite/tests/perf/should_run/T4474c.hs new file mode 100644 index 0000000000..116a21322e --- /dev/null +++ b/testsuite/tests/perf/should_run/T4474c.hs @@ -0,0 +1,40 @@ +{-# LANGUAGE BangPatterns #-} + +module Main where + +data Tree = Leaf !Int | Fork !Tree !Tree deriving Show + +fullTree 0 = Leaf 1 +fullTree n = let t = fullTree (n - 1) in Fork t t + +flatListNaive (Leaf n) = [n] +flatListNaive (Fork a b) = flatListNaive a ++ flatListNaive b + +flatListCons t = flat t [] + where + flat (Leaf n) ns = n : ns + flat (Fork a b) ns = flat a (flat b ns) + +flatListCons2 t = flat t [] + where + flat (Leaf n) = \ns -> n : ns + flat (Fork a b) = \ns -> flat a (flat b ns) + +flatListCons3 t = flat t [] + where + flat (Leaf n) = (n :) + flat (Fork a b) = flat a . flat b + +flatDList (Leaf n) = (n :) +flatDList (Fork a b) = flatDList a . flatDList b + +sumList l = loop 0 l + where loop !c [] = c + loop !c (h:t) = loop (c + h) t + +sumDList l = loop 0 (l []) + where loop !c [] = c + loop !c (h : t) = loop (c + h) t + +main = print $ sumList $ flatListCons3 $ fullTree 26 + diff --git a/testsuite/tests/perf/should_run/T4474c.stdout b/testsuite/tests/perf/should_run/T4474c.stdout new file mode 100644 index 0000000000..e6c68622ac --- /dev/null +++ b/testsuite/tests/perf/should_run/T4474c.stdout @@ -0,0 +1 @@ +67108864 diff --git a/testsuite/tests/perf/should_run/T4830.hs b/testsuite/tests/perf/should_run/T4830.hs new file mode 100644 index 0000000000..e345ffc9cd --- /dev/null +++ b/testsuite/tests/perf/should_run/T4830.hs @@ -0,0 +1,15 @@ +-- Compile with O2; SpecConstr should fire nicely +-- and eliminate all allocation in inner loop + +module Main where + +foo :: Int -> Maybe (Double,Double) -> Double +foo _ Nothing = 0 +foo 0 (Just (x,y)) = x+y +foo n (Just (x,y)) = let r = f x y in r `seq` foo (n-1) (Just r) + where + f x y | x <= y = (x,y) + | otherwise = (y,x) + +main = print (foo 1000000 (Just (1,2))) + diff --git a/testsuite/tests/perf/should_run/T4830.stdout b/testsuite/tests/perf/should_run/T4830.stdout new file mode 100644 index 0000000000..9f55b2ccb5 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4830.stdout @@ -0,0 +1 @@ +3.0 diff --git a/testsuite/tests/perf/should_run/T4978.hs b/testsuite/tests/perf/should_run/T4978.hs new file mode 100644 index 0000000000..b661edc483 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4978.hs @@ -0,0 +1,126 @@ +module Main (main) where + +import qualified Data.ByteString as S +import qualified Data.ByteString.Lazy as L +import Data.ByteString.Internal (inlinePerformIO) +import qualified Data.ByteString.Internal as S +import Data.Monoid +import Foreign +import System.IO.Unsafe + +newtype Builder = Builder { + runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString] + } + +instance Monoid Builder where + mempty = empty + {-# INLINE mempty #-} + mappend = append + {-# INLINE mappend #-} + mconcat = foldr mappend mempty + {-# INLINE mconcat #-} + +empty :: Builder +empty = Builder (\ k b -> b `seq` k b) +{-# INLINE empty #-} + +singleton :: Word8 -> Builder +singleton = writeN 1 . flip poke +{-# INLINE singleton #-} + +append :: Builder -> Builder -> Builder +append (Builder f) (Builder g) = Builder (f . g) +{-# INLINE [0] append #-} + +-- Our internal buffer type +data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) + {-# UNPACK #-} !Int -- offset + {-# UNPACK #-} !Int -- used bytes + {-# UNPACK #-} !Int -- length left + +-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any, +-- yielding a new chunk in the result lazy 'L.ByteString'. +flush :: Builder +flush = Builder $ \ k buf@(Buffer p o u l) -> + if u == 0 + then k buf + else S.PS p o u : k (Buffer p (o+u) 0 l) + +-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'. +-- The construction work takes place if and when the relevant part of +-- the lazy 'L.ByteString' is demanded. +-- +toLazyByteString :: Builder -> L.ByteString +toLazyByteString m = L.fromChunks $ unsafePerformIO $ do + buf <- newBuffer defaultSize + return (runBuilder (m `append` flush) (const []) buf) +{-# INLINE toLazyByteString #-} + +defaultSize :: Int +defaultSize = 32 * k - overhead + where k = 1024 + overhead = 2 * sizeOf (undefined :: Int) + +-- | Sequence an IO operation on the buffer +unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder +unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do + buf' <- f buf + return (k buf') +{-# INLINE unsafeLiftIO #-} + +-- | Get the size of the buffer +withSize :: (Int -> Builder) -> Builder +withSize f = Builder $ \ k buf@(Buffer _ _ _ l) -> runBuilder (f l) k buf + +-- | Ensure that there are at least @n@ many bytes available. +ensureFree :: Int -> Builder +ensureFree n = n `seq` withSize $ \ l -> + if n <= l then empty else + flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize))) +{-# INLINE [0] ensureFree #-} + +-- | Ensure that @n@ many bytes are available, and then use @f@ to write some +-- bytes into the memory. +writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder +writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f) +{-# INLINE [0] writeN #-} + +writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer +writeNBuffer n f (Buffer fp o u l) = do + withForeignPtr fp (\p -> f (p `plusPtr` (o+u))) + return (Buffer fp o (u+n) (l-n)) +{-# INLINE writeNBuffer #-} + +newBuffer :: Int -> IO Buffer +newBuffer size = do + fp <- S.mallocByteString size + return $! Buffer fp 0 0 size +{-# INLINE newBuffer #-} + +-- Merge buffer bounds checks. +{-# RULES +"append/writeN" forall a b (f::Ptr Word8 -> IO ()) + (g::Ptr Word8 -> IO ()) ws. + append (writeN a f) (append (writeN b g) ws) = + append (writeN (a+b) (\p -> f p >> g (p `plusPtr` a))) ws + +"writeN/writeN" forall a b (f::Ptr Word8 -> IO ()) + (g::Ptr Word8 -> IO ()). + append (writeN a f) (writeN b g) = + writeN (a+b) (\p -> f p >> g (p `plusPtr` a)) + +"ensureFree/ensureFree" forall a b . + append (ensureFree a) (ensureFree b) = ensureFree (max a b) + #-} + +-- Test case + +-- Argument must be a multiple of 4. +test :: Int -> Builder +test 0 = mempty +test n = singleton 1 `mappend` + (singleton 2 `mappend` + (singleton 3 `mappend` + (singleton 4 `mappend` test (n-4)))) + +main = print $ L.length $ toLazyByteString $ test 10000000 diff --git a/testsuite/tests/perf/should_run/T4978.stdout b/testsuite/tests/perf/should_run/T4978.stdout new file mode 100644 index 0000000000..825319e1c5 --- /dev/null +++ b/testsuite/tests/perf/should_run/T4978.stdout @@ -0,0 +1 @@ +10000000 diff --git a/testsuite/tests/perf/should_run/T5113.hs b/testsuite/tests/perf/should_run/T5113.hs new file mode 100644 index 0000000000..e87bcb6cad --- /dev/null +++ b/testsuite/tests/perf/should_run/T5113.hs @@ -0,0 +1,31 @@ +{-# LANGUAGE BangPatterns #-} +module Main where + +import Data.Array.Base (unsafeRead, unsafeWrite) +import Data.Array.ST +import Data.Array.Unboxed +import Control.Monad.ST + +main = print (divisorCounts 1000000 ! 342) + +isqrt :: Int -> Int +isqrt n = floor (sqrt $ fromIntegral n) + +divisorCounts :: Int -> UArray Int Int +divisorCounts n = runSTUArray $ do + let !rt = isqrt n + darr <- newArray (0,n) 1 :: ST s (STUArray s Int Int) + let inc i = unsafeRead darr i >>= \k -> unsafeWrite darr i (k+1) + note step i + | i > n = return () + | otherwise = do + inc i + note step (i+step) + count j + | j > rt = return () + | otherwise = do + note (2*j) (j*j) + count (j+2) + note 2 4 + count 3 + return darr diff --git a/testsuite/tests/perf/should_run/T5113.stdout b/testsuite/tests/perf/should_run/T5113.stdout new file mode 100644 index 0000000000..0cfbf08886 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5113.stdout @@ -0,0 +1 @@ +2 diff --git a/testsuite/tests/perf/should_run/T5205.hs b/testsuite/tests/perf/should_run/T5205.hs new file mode 100644 index 0000000000..215dd42647 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5205.hs @@ -0,0 +1,13 @@ + +import Control.Concurrent +import Control.Monad + +main :: IO () +main = do t <- forkIO (x >> x) + threadDelay 1000000 + killThread t + putStrLn "Done" + +x :: IO () +x = forever yield + diff --git a/testsuite/tests/perf/should_run/T5205.stdout b/testsuite/tests/perf/should_run/T5205.stdout new file mode 100644 index 0000000000..a965a70ed4 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5205.stdout @@ -0,0 +1 @@ +Done diff --git a/testsuite/tests/perf/should_run/T5237.hs b/testsuite/tests/perf/should_run/T5237.hs new file mode 100644 index 0000000000..6a12f5ecea --- /dev/null +++ b/testsuite/tests/perf/should_run/T5237.hs @@ -0,0 +1,15 @@ +{-# LANGUAGE BangPatterns #-} +module Main (main) where + +-- Test that the rewrite rules for small exponents fire (#5237). +-- If they don't fire, this will allocate much. + +fun :: Double -> Double +fun x = go 0 1.0 + where + go !acc z + | x < z = acc + | otherwise = go (acc + 1/z^4) (z+1.0) + +main :: IO () +main = print (fun 1e7) diff --git a/testsuite/tests/perf/should_run/T5237.stdout b/testsuite/tests/perf/should_run/T5237.stdout new file mode 100644 index 0000000000..a620a54dd7 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5237.stdout @@ -0,0 +1 @@ +1.082323233710861 diff --git a/testsuite/tests/perf/should_run/T5536.hs b/testsuite/tests/perf/should_run/T5536.hs new file mode 100644 index 0000000000..3259c46675 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5536.hs @@ -0,0 +1,5 @@ + +main :: IO () +main = do writeFile "T5536.data" (replicate 10000000 'a') + readFile "T5536.data" >>= putStr + diff --git a/testsuite/tests/perf/should_run/T5549.hs b/testsuite/tests/perf/should_run/T5549.hs new file mode 100644 index 0000000000..cab0dc0226 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5549.hs @@ -0,0 +1,27 @@ +module Main where +-- See Trac #5549 +-- The issue here is allocating integer constants inside a loop + + +lcs3 :: Eq a => [a] -> [a] -> [a] +lcs3 a b = fst $ aux (a, length a) (b, length b) + where + aux (_,0) _ = ([],0) + aux _ (_,0) = ([],0) + aux (a@(ha:as),la) (b@(hb:bs), lb) + | ha == hb = let (s,n) = aux (as,la-1) (bs,lb-1) in (ha : s, n+1) + | otherwise = + let (sa,na) = aux (as,la-1) (b,lb) + (sb,nb) = aux (a,la) (bs,lb-1) in + if na > nb then (sa,na) else (sb,nb) + +f :: Integer -> Integer -> Integer +f acc 0 = acc +f acc n = g (acc + 1) (n-1) + +g :: Integer -> Integer -> Integer +g acc 0 = acc +g acc n = f (acc -1) (n-1) + +main = do putStrLn . show $ f 0 100000000 + putStrLn . show $ lcs3 [1..20] [10..20] diff --git a/testsuite/tests/perf/should_run/T5549.stdout b/testsuite/tests/perf/should_run/T5549.stdout new file mode 100644 index 0000000000..95a4feb269 --- /dev/null +++ b/testsuite/tests/perf/should_run/T5549.stdout @@ -0,0 +1,2 @@ +0 +[10,11,12,13,14,15,16,17,18,19,20] diff --git a/testsuite/tests/perf/should_run/T7257.hs b/testsuite/tests/perf/should_run/T7257.hs new file mode 100644 index 0000000000..ef65db3a56 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7257.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE BangPatterns #-} +module Main where + +import qualified Data.ByteString as S +import Data.IORef +import Control.Monad + +makeBs :: Int -> S.ByteString +makeBs n = S.replicate n (fromIntegral n) + +doStuff :: IORef [S.ByteString] -> Int -> IO () +doStuff ref n = do + let !bs = makeBs n + modifyIORef ref (bs:) +{-# NOINLINE doStuff #-} + +undo :: IORef [S.ByteString] -> IO () +undo ref = do + h <- atomicModifyIORef ref (\(x:xs) -> (xs,x)) + S.length h `seq` return () + +main = do + ref <- newIORef [S.empty] + let fn n = do + doStuff ref n + when (rem 5 n /= 0 ) $ undo ref + + mapM_ fn (take 5000000 $ cycle [1..100]) + var <- readIORef ref + print $ length var diff --git a/testsuite/tests/perf/should_run/T7257.stdout b/testsuite/tests/perf/should_run/T7257.stdout new file mode 100644 index 0000000000..26aece761a --- /dev/null +++ b/testsuite/tests/perf/should_run/T7257.stdout @@ -0,0 +1 @@ +100001 diff --git a/testsuite/tests/perf/should_run/T7436.hs b/testsuite/tests/perf/should_run/T7436.hs new file mode 100644 index 0000000000..9f615da9ed --- /dev/null +++ b/testsuite/tests/perf/should_run/T7436.hs @@ -0,0 +1,22 @@ +{-# LANGUAGE DeriveFunctor, DeriveFoldable #-} +module Main where + +import Prelude hiding (foldr) +import Data.Foldable + +data List a = Nil | Cons a (List a) + deriving (Functor, Foldable) + +mkList :: Int -> List Int +mkList 0 = Nil +mkList n = Cons n (mkList (n-1)) + +main :: IO () +main = print $ foldr (\x y -> y) "end" (mkList n) + where n = 40000 + -- Increase this to increase the difference between good and bad + -- Eg 6000 takes a lot longer + -- The biggest difference is not allocation or bytes used, + -- but execution time! + + diff --git a/testsuite/tests/perf/should_run/T7436.stdout b/testsuite/tests/perf/should_run/T7436.stdout new file mode 100644 index 0000000000..e0deb4bed2 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7436.stdout @@ -0,0 +1 @@ +"end" diff --git a/testsuite/tests/perf/should_run/T7507.hs b/testsuite/tests/perf/should_run/T7507.hs new file mode 100644 index 0000000000..04f156d871 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7507.hs @@ -0,0 +1,14 @@ +module Main where + +import Data.Int +import System.Environment + +bitcount x = if x > 0 + then let (d,m) = divMod x 2 in bitcount d + m + else 0 + +main = print $ sum $ map bitcount + [ 0 :: Int64 .. 2^20 - 1 ] + + + diff --git a/testsuite/tests/perf/should_run/T7507.stdout b/testsuite/tests/perf/should_run/T7507.stdout new file mode 100644 index 0000000000..a6ec9d96e5 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7507.stdout @@ -0,0 +1 @@ +10485760 diff --git a/testsuite/tests/perf/should_run/T7797.hs b/testsuite/tests/perf/should_run/T7797.hs new file mode 100644 index 0000000000..9329a408a7 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7797.hs @@ -0,0 +1,15 @@ +{-# LANGUAGE ExistentialQuantification #-} +module Main where + +import T7797a + +data Box = forall a. (Size a) => Box a a + +box = Box (go 10000000) (go 10000000) where + go :: Int -> [Int] + go 0 = [] + go n = 1 : go (n - 1) +{-# NOINLINE box #-} + +main = print $ case box of + Box l r -> size l r diff --git a/testsuite/tests/perf/should_run/T7797.stdout b/testsuite/tests/perf/should_run/T7797.stdout new file mode 100644 index 0000000000..573541ac97 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7797.stdout @@ -0,0 +1 @@ +0 diff --git a/testsuite/tests/perf/should_run/T7797a.hs b/testsuite/tests/perf/should_run/T7797a.hs new file mode 100644 index 0000000000..d06df97a5d --- /dev/null +++ b/testsuite/tests/perf/should_run/T7797a.hs @@ -0,0 +1,12 @@ +module T7797a where + +class Size t where + size :: t -> t -> Int + burg :: t -> t + +instance (Ord a, Num a) => Size [a] where + {-# SPECIALISE instance Size [Int] #-} + size (x:xs) (y:ys) | x+y > 4 = size xs ys + | otherwise = size xs ys + size _ _ = 0 + burg = error "urk" diff --git a/testsuite/tests/perf/should_run/T7850.hs b/testsuite/tests/perf/should_run/T7850.hs new file mode 100644 index 0000000000..d3124db3c2 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7850.hs @@ -0,0 +1,11 @@ + +{-# LANGUAGE BangPatterns #-} + +main :: IO () +main = print $ ack 4 1 + +ack :: Int -> Int -> Int +ack 0 !n = n+1 +ack m 0 = ack (m-1) 1 +ack m n = ack (m-1) $ ack m (n-1) + diff --git a/testsuite/tests/perf/should_run/T7850.stdout b/testsuite/tests/perf/should_run/T7850.stdout new file mode 100644 index 0000000000..b07de00606 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7850.stdout @@ -0,0 +1 @@ +65533 diff --git a/testsuite/tests/perf/should_run/T7954.hs b/testsuite/tests/perf/should_run/T7954.hs new file mode 100644 index 0000000000..2b86d2fc39 --- /dev/null +++ b/testsuite/tests/perf/should_run/T7954.hs @@ -0,0 +1,7 @@ +module Main where + +norm :: [Double] -> Double +norm = sqrt . sum . map (\x -> x*x) + +main :: IO () +main = print (norm (enumFromTo 0 10000000) > 100) diff --git a/testsuite/tests/perf/should_run/T7954.stdout b/testsuite/tests/perf/should_run/T7954.stdout new file mode 100644 index 0000000000..0ca95142bb --- /dev/null +++ b/testsuite/tests/perf/should_run/T7954.stdout @@ -0,0 +1 @@ +True diff --git a/testsuite/tests/perf/should_run/T876.hs b/testsuite/tests/perf/should_run/T876.hs new file mode 100644 index 0000000000..398859f86d --- /dev/null +++ b/testsuite/tests/perf/should_run/T876.hs @@ -0,0 +1,11 @@ +-- This test allocates a lot more if length is +-- not a good consumer + +module Main where +import System.Environment (getArgs) + +foo :: Int -> Int +foo n = sum [ length [i..n] | i <- [1..n] ] + +main = do { [arg] <- getArgs + ; print (foo (read arg)) } diff --git a/testsuite/tests/perf/should_run/T876.stdout b/testsuite/tests/perf/should_run/T876.stdout new file mode 100644 index 0000000000..b9d569380c --- /dev/null +++ b/testsuite/tests/perf/should_run/T876.stdout @@ -0,0 +1 @@ +50005000 diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T new file mode 100644 index 0000000000..b44d30755f --- /dev/null +++ b/testsuite/tests/perf/should_run/all.T @@ -0,0 +1,298 @@ +# Tests that newArray/newArray_ is being optimised correctly + +# fortunately the values here are mostly independent of the wordsize, +# because the test allocates an unboxed array of doubles. + +test('T3586', + [stats_num_field('peak_megabytes_allocated', (17, 1)), + # expected value: 17 (amd64/Linux) + stats_num_field('bytes allocated', (16835544, 5)), + # expected value: 16835544 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T4830', + [stats_num_field('bytes allocated', + [(wordsize(64), 99264, 1), + # 127000 (amd64/Linux) + # 2013-02-07: 99264 (amd64/Linux) + (wordsize(32), 70646, 2)]), + # 2013-02-10: 69744 (x86/Windows) + # 2013-02-10: 71548 (x86/OSX) + only_ways(['normal']) + ], + compile_and_run, + ['-O2']) + +test('T3245', normal, compile_and_run, ['-O']) + +# Test that bytestring reading/writing isn't over-allocating. We had +# a bug in hGetBufNonBlocking in 6.13 that triggered this. +# +test('lazy-bs-alloc', + [stats_num_field('peak_megabytes_allocated', (2, 1)), + # expected value: 2 (amd64/Linux) + stats_num_field('bytes allocated', + [(wordsize(64), 425400, 1), + # 489776 (amd64/Linux) + # 2013-02-07: 429744 (amd64/Linux) + # 2013-12-12: 425400 (amd64/Linux) + (wordsize(32), 417738, 1)]), + # 2013-02-10: 421296 (x86/Windows) + # 2013-02-10: 414180 (x86/OSX) + only_ways(['normal']), + extra_run_opts('../../numeric/should_run/arith011.stdout'), + ignore_output + ], + # use a suitably big file, without bloating the repo with a new one: + compile_and_run, + ['-O']) + +test('T876', + [stats_num_field('bytes allocated', + [(wordsize(64), 1263712 , 5), + # 2013-02-14: 1263712 (x86_64/Linux) + (wordsize(32), 663712, 5)]), + only_ways(['normal']), + extra_run_opts('10000') + ], + compile_and_run, + ['-O']) + +# Get reproducible floating-point results on x86 +if config.arch == 'i386': + sse2_opts = '-msse2' +else: + sse2_opts = '' + +test('T4321', + omit_ways(['ghci']), + compile_and_run, ['-O ' + sse2_opts]) + +test('T3736', + extra_clean(['T3736.speed.f32']), + run_command, + ['$MAKE -s --no-print-directory T3736']) +test('T3738', + [extra_clean(['T3738a.hi', 'T3738a.o']), + stats_num_field('peak_megabytes_allocated', (1, 0)), + # expected value: 1 (amd64/Linux) + stats_num_field('bytes allocated', + [(wordsize(32), 45648, 5), + # expected value: 45648 (x86/Linux) + (wordsize(64), 49400, 5)]), + # expected value: 49400 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('MethSharing', + [stats_num_field('peak_megabytes_allocated', (1, 0)), + # expected value: 1 (amd64/Linux) + stats_num_field('bytes allocated', + [(wordsize(32), 360940756, 5), + # expected value: 2685858140 (x86/OS X) + # expected: 360940756 (x86/Linux) + (wordsize(64), 640067672, 5)]), + # expected: 640067672 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) +test('T2902', + extra_clean(['T2902_A', 'T2902_B', + 'T2902_A.hi', 'T2902_B.hi', + 'T2902_A.o', 'T2902_B.o', + 'T2902_A_PairingSum.hi', 'T2902_B_PairingSum.hi', + 'T2902_A_PairingSum.o', 'T2902_B_PairingSum.o', + 'T2902_Sum.hi', + 'T2902_Sum.o']), + run_command, + ['$MAKE -s --no-print-directory T2902']) +test('T149', + [ # expect_broken(149), + # working (2 Jul 2013, x86-64/Linux) + extra_clean(['T149_A', 'T149_B', + 'T149_A.hi', 'T149_B.hi', + 'T149_A.o', 'T149_B.o'])], + run_command, + ['$MAKE -s --no-print-directory T149']) + +test('T5113', + [stats_num_field('bytes allocated', + [(wordsize(32), 4000000, 5), + (wordsize(64), 8000000, 5)]), + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + + +test('T4978', + [stats_num_field('bytes allocated', + [(wordsize(32), 10000000, 5), + (wordsize(64), 10137680, 5)]), + # expected value: 10137680 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O2']) + +test('T5205', + [stats_num_field('bytes allocated', + [(wordsize(32), 47088, 5), + # expected value: 47088 (x86/Darwin) + (wordsize(64), 51320, 5)]), + # expected value: 51320 (amd64/Linux) + only_ways(['normal', 'optasm']) + ], + compile_and_run, + ['']) + +test('T5549', + [stats_num_field('bytes allocated', + [(wordsize(32), 3362958676, 5), + # expected value: 3362958676 (Windows) + (wordsize(64), 6725846120, 5)]), + # expected value: 6725846120 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T4474a', + [stats_num_field('bytes allocated', + [(wordsize(32), 2405242767, 5), + (wordsize(64), 4831890304, 5)]), + # expected value: 4831890304 (amd64/OSX) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) +test('T4474b', + [stats_num_field('bytes allocated', + [(wordsize(32), 2405242767, 5), + (wordsize(64), 4831890304, 5)]), + # expected value: 4831890304 (amd64/OSX) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) +test('T4474c', + [stats_num_field('bytes allocated', + [(wordsize(32), 2405242767, 5), + (wordsize(64), 4831890304, 5)]), + # expected value: 4831890304 (amd64/OSX) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T5237', + [stats_num_field('bytes allocated', + [(wordsize(32), 78328, 5), + # expected value: 78328 (i386/Linux) + (wordsize(64), 104176, 5)]), + # expected value: 110888 (amd64/Linux) + # expected value: 104176 (amd64/Linux) + only_ways(['normal']) + ], + compile_and_run, + ['-O ' + sse2_opts]) + +test('T5536', + [stats_num_field('bytes allocated', + [(wordsize(32), 446260520, 1), + # 1246287228 (i386/Linux) + # 446328556 (i386/Windows) + # 446192484 (i386/OSX) + (wordsize(64), 892399040, 5)]), + # expected value: 2492589480 (amd64/Linux) + # 17/1/13: 892399040 (x86_64/Linux) + # (new demand analyser) + extra_clean(['T5536.data']), + ignore_output, + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T7257', + [stats_num_field('bytes allocated', + [(wordsize(32), 1150000000, 10), + # expected value: 1246287228 (i386/Linux) + (wordsize(64), 1774893760, 5)]), + # 2012-09-21: 1774893760 (amd64/Linux) + stats_num_field('peak_megabytes_allocated', + [(wordsize(32), 217, 5), + # 2012-10-08: 217 (x86/Linux) + (wordsize(64), 227, 5)]), + # 2012-09-21: 227 (amd64/Linux) + + only_ways(['normal']) + ], + compile_and_run, ['-O']) + +test('Conversions', + [stats_num_field('bytes allocated', + [(wordsize(32), 78374, 2), + # 2012-12-18: 55316 Guessed 64-bit value / 2 + # 2013-02-10: 77472 (x86/OSX) + # 2013-02-10: 79276 (x86/Windows) + (wordsize(64), 110632, 5)]), + # 2012-12-18: 109608 (amd64/OS X) + + only_ways(['normal']) + ], + compile_and_run, ['-O -msse2']) + +test('T7507', omit_ways(['ghci']), compile_and_run, ['-O']) +# For 7507, stack overflow is the bad case + +test('T7436', + [stats_num_field('max_bytes_used', + [(wordsize(64), 60360, 1), + # 127000 (amd64/Linux) + # 2013-02-07: 60360 (amd64/Linux) + (wordsize(32), 58434, 1)]), + # 2013-02-10: 58032 (x86/Windows) + # 2013-02-10: 58836 (x86/OSX) + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T7797', + [stats_num_field('bytes allocated', + [(wordsize(32), 240044984, 5), + # expected value: 2685858140 (x86/OS X) + # expected: 360940756 (x86/Linux) + # expected: 240044984 (x86/Windows, 64bit machine) + (wordsize(64), 480050944, 5)]), + # expected: 480050944 (amd64/Linux) + extra_clean(['T7797a.hi', 'T7797a.o']), + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T7954', + [stats_num_field('bytes allocated', + [(wordsize(32), 1380051408, 10), + (wordsize(64), 2720051528, 10)]), + only_ways(['normal']) + ], + compile_and_run, + ['-O']) + +test('T7850', + [stats_num_field('peak_megabytes_allocated', + [(wordsize(32), 2, 10), + (wordsize(64), 4, 10)]), + only_ways(['normal'])], + compile_and_run, + ['-O']) + diff --git a/testsuite/tests/perf/should_run/lazy-bs-alloc.hs b/testsuite/tests/perf/should_run/lazy-bs-alloc.hs new file mode 100644 index 0000000000..76850c67d0 --- /dev/null +++ b/testsuite/tests/perf/should_run/lazy-bs-alloc.hs @@ -0,0 +1,9 @@ +module Main (main) where + +import System.Environment (getArgs) +import qualified Data.ByteString.Lazy as L + +main :: IO () +main = do + (file : _) <- getArgs + L.readFile file >>= L.putStr |