summaryrefslogtreecommitdiff
path: root/testsuite/tests/perf/should_run
diff options
context:
space:
mode:
Diffstat (limited to 'testsuite/tests/perf/should_run')
-rw-r--r--testsuite/tests/perf/should_run/3586.hs20
-rw-r--r--testsuite/tests/perf/should_run/3586.stdout1
-rw-r--r--testsuite/tests/perf/should_run/Makefile36
-rw-r--r--testsuite/tests/perf/should_run/MethSharing.hs97
-rw-r--r--testsuite/tests/perf/should_run/MethSharing.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T149_A.hs25
-rw-r--r--testsuite/tests/perf/should_run/T149_B.hs26
-rw-r--r--testsuite/tests/perf/should_run/T2902_A.hs18
-rw-r--r--testsuite/tests/perf/should_run/T2902_A_PairingSum.hs49
-rw-r--r--testsuite/tests/perf/should_run/T2902_B.hs18
-rw-r--r--testsuite/tests/perf/should_run/T2902_B_PairingSum.hs37
-rw-r--r--testsuite/tests/perf/should_run/T2902_Sum.hs14
-rw-r--r--testsuite/tests/perf/should_run/T3245.hs50
-rw-r--r--testsuite/tests/perf/should_run/T3245.stdout15
-rw-r--r--testsuite/tests/perf/should_run/T3736.hs212
-rw-r--r--testsuite/tests/perf/should_run/T3736.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T3738.hs10
-rw-r--r--testsuite/tests/perf/should_run/T3738.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T3738a.hs6
-rw-r--r--testsuite/tests/perf/should_run/T4321.hs15
-rw-r--r--testsuite/tests/perf/should_run/T4321.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T4830.hs15
-rw-r--r--testsuite/tests/perf/should_run/T4830.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T4978.hs125
-rw-r--r--testsuite/tests/perf/should_run/T4978.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T5113.hs31
-rw-r--r--testsuite/tests/perf/should_run/T5113.stdout1
-rw-r--r--testsuite/tests/perf/should_run/T5205.hs13
-rw-r--r--testsuite/tests/perf/should_run/T5205.stdout1
-rw-r--r--testsuite/tests/perf/should_run/all.T148
-rw-r--r--testsuite/tests/perf/should_run/lazy-bs-alloc.hs9
31 files changed, 998 insertions, 0 deletions
diff --git a/testsuite/tests/perf/should_run/3586.hs b/testsuite/tests/perf/should_run/3586.hs
new file mode 100644
index 0000000000..968f2eba27
--- /dev/null
+++ b/testsuite/tests/perf/should_run/3586.hs
@@ -0,0 +1,20 @@
+{-# LANGUAGE BangPatterns #-}
+{-# OPTIONS -fvia-C -optc-O3 -fexcess-precision -optc-msse3 #-}
+
+import Control.Monad.ST
+import Data.Array.ST
+import Data.Array.Base
+
+main = print $ runST
+ (do arr <- newArray (1,2000000) 137.0 :: ST s (STUArray s Int Double)
+ go arr 2000000 0.0 )
+
+
+go :: STUArray s Int Double -> Int -> Double -> ST s Double
+go !a i !acc
+ | i < 1 = return acc
+ | otherwise = do
+ b <- unsafeRead a i
+ unsafeWrite a i (b+3.0)
+ c <- unsafeRead a i
+ go a (i-1) (c+acc)
diff --git a/testsuite/tests/perf/should_run/3586.stdout b/testsuite/tests/perf/should_run/3586.stdout
new file mode 100644
index 0000000000..626282f10c
--- /dev/null
+++ b/testsuite/tests/perf/should_run/3586.stdout
@@ -0,0 +1 @@
+2.79999863e8
diff --git a/testsuite/tests/perf/should_run/Makefile b/testsuite/tests/perf/should_run/Makefile
new file mode 100644
index 0000000000..e839342f90
--- /dev/null
+++ b/testsuite/tests/perf/should_run/Makefile
@@ -0,0 +1,36 @@
+TOP=../../..
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+define runT3736
+./T3736 $1 +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/".*//'
+endef
+.PHONY: T3736
+T3736:
+ $(RM) -f T3736.hi T3736.o T3736
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T3736 -rtsopts
+# Check ALLOC1 is > 100 just to check with have sane results, and if so,
+# the test passes if the two numbers are equal. We could check that the
+# actual numbers are in the range we expect too (on the various
+# platforms), but we don't currently.
+ ALLOC1=`$(call runT3736,1)`; ALLOC2=`$(call runT3736,2)`; if [ "$$ALLOC1" -gt 100 ] && [ "$$ALLOC1" -eq "$$ALLOC2" ]; then echo Match; else echo "Mismatch: $$ALLOC1 $$ALLOC2"; fi
+
+.PHONY: T2902
+T2902:
+ $(RM) -f T2902_A T2902_B
+ $(RM) -f T2902_A.hi T2902_B.hi
+ $(RM) -f T2902_A.o T2902_B.o
+ $(RM) -f T2902_A_PairingSum.hi T2902_B_PairingSum.hi T2902_Sum.hi
+ $(RM) -f T2902_A_PairingSum.o T2902_B_PairingSum.o T2902_Sum.o
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T2902_A -rtsopts
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T2902_B -rtsopts
+ BAA=`./T2902_A +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; BAB=`./T2902_B +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; [ "$$BAA" = "" ] && echo 'T2902_A: No "bytes allocated"'; [ "$$BAA" = "$$BAB" ] || echo "T2902: Mismatch in \"bytes allocated\": $$BAA $$BAB"
+
+.PHONY: T149
+T149:
+ $(RM) -f T149_A T149_A.hi T149_A.o
+ $(RM) -f T149_B T149_B.hi T149_B.o
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T149_A -rtsopts
+ '$(TEST_HC)' $(TEST_HC_OPTS) -v0 -O --make T149_B -rtsopts
+ BAA=`./T149_A +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; BAB=`./T149_B +RTS -t --machine-readable 2>&1 | grep '"bytes allocated"' | sed -e 's/.*, "//' -e 's/")//'`; [ "$$BAA" = "" ] && echo 'T149_A: No "bytes allocated"'; [ "$$BAA" = "$$BAB" ] || echo "T149: Mismatch in \"bytes allocated\": $$BAA $$BAB"
+
diff --git a/testsuite/tests/perf/should_run/MethSharing.hs b/testsuite/tests/perf/should_run/MethSharing.hs
new file mode 100644
index 0000000000..fb69bd4509
--- /dev/null
+++ b/testsuite/tests/perf/should_run/MethSharing.hs
@@ -0,0 +1,97 @@
+module Main where
+
+-- This test works efficiently because the full laziness
+-- pass now floats out applications
+-- \x -> f y (x+1)
+-- It'll float out the (f y) if that's a redex
+
+loop :: Double -> [Int] -> Double
+{-# NOINLINE loop #-}
+loop x [] = x
+loop x (n:ns) = x `seq` loop (x ^ n) ns
+
+main = print $ loop 1 (replicate 10000000 5)
+
+----------------------------------------------------
+{- Roman's message of May 2010
+
+I tried running nofib with -fno-method-sharing (we discussed this at some point). These are the results:
+
+--------------------------------------------------------------------------------
+ Program Size Allocs Runtime Elapsed
+--------------------------------------------------------------------------------
+
+ Min -0.3% -25.0% -12.5% -9.9%
+ Max +0.2% +159.1% +90.0% +84.7%
+ Geometric Mean -0.0% +2.2% +6.8% +5.1%
+
+This is the worst program:
+
+ simple +0.2% +159.1% +65.3% +63.9%
+
+I looked at it a bit and came up with this small example:
+
+----
+loop :: Double -> [Int] -> Double
+{-# NOINLINE loop #-}
+loop x [] = x
+loop x (n:ns) = x `seq` loop (x ^ n) ns
+
+main = print $ loop 1 (replicate 10000000 5)
+----
+
+This is over 2x slower with -fno-method-sharing. The culprit is, of
+course, (^). Here is the difference:
+
+Without -fno-method-sharing:
+
+----
+^_rVB :: GHC.Types.Double -> GHC.Types.Int -> GHC.Types.Double ^_rVB =
+ GHC.Real.^
+ @ GHC.Types.Double
+ @ GHC.Types.Int
+ GHC.Float.$fNumDouble
+ GHC.Real.$fIntegralInt
+
+Main.loop [InlPrag=NOINLINE (sat-args=2), Occ=LoopBreaker]
+ :: GHC.Types.Double -> [GHC.Types.Int] -> GHC.Types.Double Main.loop =
+ \ (x1_aat :: GHC.Types.Double) (ds_drG :: [GHC.Types.Int]) ->
+ case ds_drG of _ {
+ [] -> x1_aat;
+ : n_aav ns_aaw ->
+ case x1_aat of x2_aau { GHC.Types.D# ipv_srQ ->
+ Main.loop (^_rVB x2_aau n_aav) ns_aaw
+ }
+ }
+----
+
+With:
+
+----
+Main.loop [InlPrag=NOINLINE (sat-args=2), Occ=LoopBreaker]
+ :: GHC.Types.Double -> [GHC.Types.Int] -> GHC.Types.Double Main.loop =
+ \ (x1_aat :: GHC.Types.Double) (ds_drD :: [GHC.Types.Int]) ->
+ case ds_drD of _ {
+ [] -> x1_aat;
+ : n_aav ns_aaw ->
+ case x1_aat of x2_aau { GHC.Types.D# ipv_srN ->
+ Main.loop
+ (GHC.Real.^
+ @ GHC.Types.Double
+ @ GHC.Types.Int
+ GHC.Float.$fNumDouble
+ GHC.Real.$fIntegralInt
+ x2_aau
+ n_aav)
+ ns_aaw
+ }
+ }
+----
+
+This is a bit disappointing. I would have expected GHC to float out
+the application of (^) to the two dictionaries during full laziness
+(note that (^) has arity 2 so the application is oversaturated). Why
+doesn't that happen? SetLevels (if this is the right place to look)
+has this:
+
+-} \ No newline at end of file
diff --git a/testsuite/tests/perf/should_run/MethSharing.stdout b/testsuite/tests/perf/should_run/MethSharing.stdout
new file mode 100644
index 0000000000..d3827e75a5
--- /dev/null
+++ b/testsuite/tests/perf/should_run/MethSharing.stdout
@@ -0,0 +1 @@
+1.0
diff --git a/testsuite/tests/perf/should_run/T149_A.hs b/testsuite/tests/perf/should_run/T149_A.hs
new file mode 100644
index 0000000000..dd745460c0
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T149_A.hs
@@ -0,0 +1,25 @@
+module Main (main) where
+
+-- See Trac #149
+
+-- Curently (with GHC 7.0) the CSE works, just,
+-- but it's delicate.
+
+
+import System.CPUTime
+
+main :: IO ()
+main = print $ playerMostOccur1 [1..m]
+
+m :: Int
+m = 22
+
+playerMostOccur1 :: [Int] -> Int
+playerMostOccur1 [a] = a
+playerMostOccur1 (x:xs)
+ | numOccur x (x:xs) > numOccur (playerMostOccur1 xs) xs = x
+ | otherwise = playerMostOccur1 xs
+
+numOccur :: Int -> [Int] -> Int
+numOccur i is = length $ filter (i ==) is
+
diff --git a/testsuite/tests/perf/should_run/T149_B.hs b/testsuite/tests/perf/should_run/T149_B.hs
new file mode 100644
index 0000000000..fcc87cdf55
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T149_B.hs
@@ -0,0 +1,26 @@
+module Main (main) where
+
+-- See Trac #149
+
+-- Curently (with GHC 7.0) the CSE works, just,
+-- but it's delicate.
+
+
+import System.CPUTime
+
+main :: IO ()
+main = print $ playerMostOccur2 [1..m]
+
+m :: Int
+m = 22
+
+playerMostOccur2 :: [Int] -> Int
+playerMostOccur2 [a] = a
+playerMostOccur2 (x:xs)
+ | numOccur x (x:xs) > numOccur pmo xs = x
+ | otherwise = pmo
+ where pmo = playerMostOccur2 xs
+
+numOccur :: Int -> [Int] -> Int
+numOccur i is = length $ filter (i ==) is
+
diff --git a/testsuite/tests/perf/should_run/T2902_A.hs b/testsuite/tests/perf/should_run/T2902_A.hs
new file mode 100644
index 0000000000..c0939104f3
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T2902_A.hs
@@ -0,0 +1,18 @@
+
+{-# LANGUAGE UnicodeSyntax #-}
+
+module Main (main) where
+
+import T2902_A_PairingSum
+
+f :: Int -> PSum Int Int
+f n = unions $ fmap g [1..n]
+ where
+ g m = unions $ fmap fromList
+ [ zip [m..n] $ repeat 1
+ , zip [m,2+m..n] $ repeat 2
+ , zip [m,3+m..n] $ repeat 3
+ ]
+
+main ∷ IO ()
+main = print $ take 20 $ toList $ f 20
diff --git a/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs b/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs
new file mode 100644
index 0000000000..a5dd0e7803
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T2902_A_PairingSum.hs
@@ -0,0 +1,49 @@
+
+{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses, FlexibleInstances #-}
+
+module T2902_A_PairingSum (Sum(..), PSum) where
+
+import T2902_Sum
+
+data PSum a b = Empty | Tree a b [(PSum a b)]
+
+instance (Ord a, Num b) ⇒ Sum PSum a b where
+ insert = insertX
+ union = unionX
+ unions = unionsX
+ extractMin = extractMinX
+ fromList = fromListX
+ toList = toListX
+
+insertX ∷ (Ord a, Num b) ⇒ a → b → PSum a b → PSum a b
+insertX v r = unionX $ Tree v r []
+
+unionX ∷ (Ord a, Num b) ⇒ PSum a b → PSum a b → PSum a b
+unionX x Empty = x
+unionX Empty x = x
+unionX x@(Tree v r xs) y@(Tree w s ys) =
+ case compare v w of
+ LT → Tree v r (y:xs)
+ GT → Tree w s (x:ys)
+ EQ → case r + s of
+ 0 → z
+ t → insertX v t z
+ where z = unionX (unionsX xs) (unionsX ys)
+
+unionsX ∷ (Ord a, Num b) ⇒ [PSum a b] → PSum a b
+unionsX [] = Empty
+unionsX [x] = x
+unionsX (x : y : zs) = unionX (unionX x y) (unionsX zs)
+
+extractMinX ∷ (Ord a, Num b) ⇒ PSum a b → ((a,b), PSum a b)
+extractMinX Empty = undefined
+extractMinX (Tree v r xs) = ((v,r), unionsX xs)
+
+fromListX ∷ (Ord a, Num b) ⇒ [(a,b)] → PSum a b
+fromListX [] = Empty
+fromListX ((v,r):xs) = insertX v r $ fromListX xs
+
+toListX ∷ (Ord a, Num b) ⇒ PSum a b → [(a,b)]
+toListX Empty = []
+toListX x = let (y, z) = extractMinX x in y : toListX z
+
diff --git a/testsuite/tests/perf/should_run/T2902_B.hs b/testsuite/tests/perf/should_run/T2902_B.hs
new file mode 100644
index 0000000000..c6558c625b
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T2902_B.hs
@@ -0,0 +1,18 @@
+
+{-# LANGUAGE UnicodeSyntax #-}
+
+module Main (main) where
+
+import T2902_B_PairingSum
+
+f :: Int -> PSum Int Int
+f n = unions $ fmap g [1..n]
+ where
+ g m = unions $ fmap fromList
+ [ zip [m..n] $ repeat 1
+ , zip [m,2+m..n] $ repeat 2
+ , zip [m,3+m..n] $ repeat 3
+ ]
+
+main ∷ IO ()
+main = print $ take 20 $ toList $ f 20
diff --git a/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs b/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs
new file mode 100644
index 0000000000..5276da818b
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T2902_B_PairingSum.hs
@@ -0,0 +1,37 @@
+
+{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses, FlexibleInstances #-}
+
+module T2902_B_PairingSum (Sum(..), PSum) where
+
+import T2902_Sum
+
+data PSum a b = Empty | Tree a b [PSum a b]
+
+instance (Ord a, Num b) ⇒ Sum PSum a b where
+
+ insert v r = union $ Tree v r []
+
+ union x Empty = x
+ union Empty x = x
+ union x@(Tree v r xs) y@(Tree w s ys) =
+ case compare v w of
+ LT → Tree v r (y:xs)
+ GT → Tree w s (x:ys)
+ EQ → case r + s of
+ 0 → z
+ t → insert v t z
+ where z = union (unions xs) (unions ys)
+
+ unions [] = Empty
+ unions [x] = x
+ unions (x : y : zs) = union (union x y) (unions zs)
+
+ extractMin Empty = undefined
+ extractMin (Tree v r xs) = ((v,r), unions xs)
+
+ fromList [] = Empty
+ fromList ((v,r):xs) = insert v r $ fromList xs
+
+ toList Empty = []
+ toList x = let (y, z) = extractMin x in y : toList z
+
diff --git a/testsuite/tests/perf/should_run/T2902_Sum.hs b/testsuite/tests/perf/should_run/T2902_Sum.hs
new file mode 100644
index 0000000000..9be6b10568
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T2902_Sum.hs
@@ -0,0 +1,14 @@
+
+{-# LANGUAGE UnicodeSyntax, MultiParamTypeClasses #-}
+
+module T2902_Sum (Sum(..)) where
+
+class Sum c a b where
+ insert ∷ a → b → c a b → c a b
+ union ∷ c a b → c a b → c a b
+ unions ∷ [c a b] → c a b
+ extractMin ∷ c a b → ((a,b), c a b)
+
+ fromList ∷ [(a,b)] → c a b
+ toList ∷ c a b → [(a,b)]
+
diff --git a/testsuite/tests/perf/should_run/T3245.hs b/testsuite/tests/perf/should_run/T3245.hs
new file mode 100644
index 0000000000..f52fc27303
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T3245.hs
@@ -0,0 +1,50 @@
+{-# LANGUAGE ExistentialQuantification #-}
+
+-- The second version (count2) took ages with GHC 6.12
+-- because the typeOf function was not properly memoised
+
+import Data.Typeable
+import System.CPUTime
+
+size :: Int
+size = 40000 -- This was big enough to take 5 seconds in
+ -- the bad case on my machine.
+
+data Any = forall a. (Typeable a) => Any a
+
+int_type, int_list_type :: TypeRep
+int_type = typeOf (undefined :: Int)
+int_list_type = typeOf (undefined :: [Int])
+
+count1 :: [Any] -> Int
+count1 [] = 0
+count1 (Any x:xs) = count1 xs + (if typeOf x == int_type then 1 else 0)
+
+doTime x = do
+ start <- getCPUTime
+ putStr "Result: "
+ print x
+ stop <- getCPUTime
+ putStr "Time(sec): "
+ print (round $ fromIntegral (stop - start) / 1e12)
+ -- The 'round' rounds to an integral number of seconds
+ -- Should be zero if things are working right!
+
+main = do
+ let list = [MkT | i <- [1..size :: Int]]
+ putStrLn "count1"
+ let x = map Any list
+ doTime $ count1 x
+ doTime $ count1 x
+ doTime $ count1 x
+ putStrLn ""
+ putStrLn "count2"
+ let x = map (Any . (:[])) list
+ doTime $ count1 x
+ doTime $ count1 x
+ doTime $ count1 x
+
+data T = MkT
+tcname :: TyCon
+tcname = mkTyCon "T"
+instance Typeable T where { typeOf _ = mkTyConApp tcname [] }
diff --git a/testsuite/tests/perf/should_run/T3245.stdout b/testsuite/tests/perf/should_run/T3245.stdout
new file mode 100644
index 0000000000..bcff7f8fd1
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T3245.stdout
@@ -0,0 +1,15 @@
+count1
+Result: 0
+Time(sec): 0
+Result: 0
+Time(sec): 0
+Result: 0
+Time(sec): 0
+
+count2
+Result: 0
+Time(sec): 0
+Result: 0
+Time(sec): 0
+Result: 0
+Time(sec): 0
diff --git a/testsuite/tests/perf/should_run/T3736.hs b/testsuite/tests/perf/should_run/T3736.hs
new file mode 100644
index 0000000000..e812109611
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T3736.hs
@@ -0,0 +1,212 @@
+{-# OPTIONS_GHC -funbox-strict-fields -O #-}
+{-# LANGUAGE ExistentialQuantification #-}
+
+{- OPTIONS_GHC -ddump-simpl -ddump-asm -}
+
+module Main (main) where
+
+import GHC.Float (float2Int, int2Float)
+
+import System.Environment
+
+import Prelude hiding (null
+ ,lines,unlines
+ ,writeFile
+ )
+
+import Control.Exception (assert, bracket, )
+
+import Foreign.Marshal.Array (advancePtr)
+import Foreign.Ptr (minusPtr)
+import Foreign.Storable (Storable(..))
+
+import Control.Monad (when)
+
+import System.IO (openBinaryFile, hClose,
+ hPutBuf,
+ Handle, IOMode(..))
+
+import System.IO.Unsafe (unsafePerformIO)
+
+import Foreign.Ptr (Ptr)
+import Foreign.ForeignPtr (ForeignPtr, withForeignPtr, )
+import Foreign.Marshal.Array (copyArray)
+
+import qualified Foreign.ForeignPtr as F
+
+main :: IO ()
+main = do args <- getArgs
+ case args of
+ ["1"] -> mainMonolithic1Generator
+ ["2"] -> mainMonolithic1Composed
+ _ -> error "Huh?"
+
+type Phase = (Float, Float, Float)
+
+{-# INLINE saw #-}
+saw :: Num a => a -> a
+saw t = 1-2*t
+
+{-# INLINE fraction #-}
+fraction :: Float -> Float
+fraction x = x - int2Float (float2Int x)
+
+{-# INLINE generator0Freq #-}
+generator0Freq :: Float -> Float -> Maybe (Float, Float)
+generator0Freq freq =
+ \p -> Just (saw p, fraction (p+freq))
+
+infixl 6 `mix`, `mixGen`
+
+{-# INLINE mix #-}
+mix ::
+ (Num y) =>
+ (s -> Maybe (y, s)) ->
+ (t -> Maybe (y, t)) ->
+ ((s,t) -> Maybe (y, (s,t)))
+mix f g (s0,t0) =
+ do (a,s1) <- f s0
+ (b,t1) <- g t0
+ return ((a+b), (s1,t1))
+
+data Generator a =
+ forall s.
+ Generator (s -> Maybe (a, s)) s
+
+{-# INLINE runGeneratorMonolithic #-}
+runGeneratorMonolithic :: Int -> Generator Float -> Vector Float
+runGeneratorMonolithic size' (Generator f s) =
+ fst $ unfoldrN size' f s
+
+{- SPECIALISE INLINE generator0Gen :: Float -> Float -> Generator Float -}
+{-# INLINE generator0Gen #-}
+generator0Gen :: Float -> Float -> Generator Float
+generator0Gen freq phase =
+ Generator (\p -> Just (saw p, fraction (p+freq))) phase
+
+{- SPECIALISE INLINE mixGen :: Generator Float -> Generator Float -> Generator Float -}
+{-# INLINE mixGen #-}
+mixGen ::
+ (Num y) =>
+ Generator y ->
+ Generator y ->
+ Generator y
+mixGen (Generator f s) (Generator g t) =
+ Generator (\(s0,t0) ->
+ do (a,s1) <- f s0
+ (b,t1) <- g t0
+ return ((a+b), (s1,t1))) (s,t)
+
+{-# INLINE dl #-}
+dl :: Phase
+dl = (0.01008, 0.01003, 0.00990)
+
+{-# INLINE initPhase2 #-}
+initPhase2 :: (Phase, Phase)
+initPhase2 =
+ ((0,0.7,0.1), (0.3,0.4,0.6))
+
+
+size :: Int
+size = 10000000
+
+
+mainMonolithic1Composed :: IO ()
+mainMonolithic1Composed =
+ writeFile "speed.f32"
+ (fst $ unfoldrN size
+ (let (f0,f1,f2) = dl
+ in generator0Freq f0 `mix`
+ generator0Freq f1 `mix`
+ generator0Freq f2)
+ (let (p0,p1,p2) = fst initPhase2
+ in ((p0,p1),p2)))
+
+mainMonolithic1Generator :: IO ()
+mainMonolithic1Generator =
+ writeFile "speed.f32"
+ (runGeneratorMonolithic size
+ (let (f0,f1,f2) = dl
+ (p0,p1,p2) = fst initPhase2
+ in generator0Gen f0 p0 `mixGen`
+ generator0Gen f1 p1 `mixGen`
+ generator0Gen f2 p2))
+
+empty :: (Storable a) => Vector a
+empty = unsafeCreate 0 $ const $ return ()
+{-# NOINLINE empty #-}
+
+null :: Vector a -> Bool
+null (SV _ _ l) = assert (l >= 0) $ l <= 0
+{-# INLINE null #-}
+
+unfoldrN :: (Storable b) => Int -> (a -> Maybe (b, a)) -> a -> (Vector b, Maybe a)
+unfoldrN n f x0 =
+ if n <= 0
+ then (empty, Just x0)
+ else unsafePerformIO $ createAndTrim' n $ \p -> go p n x0
+ where
+ go = arguments2 $ \p i -> \x ->
+ if i == 0
+ then return (0, n-i, Just x)
+ else
+ case f x of
+ Nothing -> return (0, n-i, Nothing)
+ Just (w,x') -> do poke p w
+ go (incPtr p) (i-1) x'
+{-# INLINE unfoldrN #-}
+
+hPut :: (Storable a) => Handle -> Vector a -> IO ()
+hPut h v =
+ when (not (null v)) $
+ withStartPtr v $ \ ptrS l ->
+ let ptrE = advancePtr ptrS l
+ in hPutBuf h ptrS (minusPtr ptrE ptrS)
+
+writeFile :: (Storable a) => FilePath -> Vector a -> IO ()
+writeFile f txt =
+ bracket (openBinaryFile f WriteMode) hClose
+ (\h -> hPut h txt)
+
+data Vector a = SV {-# UNPACK #-} !(ForeignPtr a)
+ {-# UNPACK #-} !Int -- offset
+ {-# UNPACK #-} !Int -- length
+
+withStartPtr :: Storable a => Vector a -> (Ptr a -> Int -> IO b) -> IO b
+withStartPtr (SV x s l) f =
+ withForeignPtr x $ \p -> f (p `advancePtr` s) l
+{-# INLINE withStartPtr #-}
+
+incPtr :: (Storable a) => Ptr a -> Ptr a
+incPtr v = advancePtr v 1
+{-# INLINE incPtr #-}
+
+unsafeCreate :: (Storable a) => Int -> (Ptr a -> IO ()) -> Vector a
+unsafeCreate l f = unsafePerformIO (create l f)
+{-# INLINE unsafeCreate #-}
+
+create :: (Storable a) => Int -> (Ptr a -> IO ()) -> IO (Vector a)
+create l f = do
+ fp <- mallocForeignPtrArray l
+ withForeignPtr fp $ \p -> f p
+ return $! SV fp 0 l
+
+createAndTrim' :: (Storable a) => Int
+ -> (Ptr a -> IO (Int, Int, b))
+ -> IO (Vector a, b)
+createAndTrim' l f = do
+ fp <- mallocForeignPtrArray l
+ withForeignPtr fp $ \p -> do
+ (off, l', res) <- f p
+ if assert (l' <= l) $ l' >= l
+ then return $! (SV fp 0 l, res)
+ else do ps <- create l' $ \p' -> copyArray p' (p `advancePtr` off) l'
+ return $! (ps, res)
+
+{-# INLINE arguments2 #-}
+arguments2 :: (a -> b -> x) -> a -> b -> x
+arguments2 f = \a b -> (f $! a) $! b
+
+{-# INLINE mallocForeignPtrArray #-}
+mallocForeignPtrArray :: Storable a => Int -> IO (F.ForeignPtr a)
+mallocForeignPtrArray = F.mallocForeignPtrArray
diff --git a/testsuite/tests/perf/should_run/T3736.stdout b/testsuite/tests/perf/should_run/T3736.stdout
new file mode 100644
index 0000000000..1796dc2720
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T3736.stdout
@@ -0,0 +1 @@
+Match
diff --git a/testsuite/tests/perf/should_run/T3738.hs b/testsuite/tests/perf/should_run/T3738.hs
new file mode 100644
index 0000000000..1b3141c0f5
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T3738.hs
@@ -0,0 +1,10 @@
+
+module Main where
+
+import T3738a
+
+{-# INLINE bar #-}
+bar :: Int -> [Int]
+bar x = map (+ 2) (foo x)
+
+main = print (bar 2 !! 10000)
diff --git a/testsuite/tests/perf/should_run/T3738.stdout b/testsuite/tests/perf/should_run/T3738.stdout
new file mode 100644
index 0000000000..7ed6ff82de
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T3738.stdout
@@ -0,0 +1 @@
+5
diff --git a/testsuite/tests/perf/should_run/T3738a.hs b/testsuite/tests/perf/should_run/T3738a.hs
new file mode 100644
index 0000000000..b27451d12d
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T3738a.hs
@@ -0,0 +1,6 @@
+
+module T3738a where
+
+{-# INLINE foo #-}
+foo :: Num a => a -> [a]
+foo x = map (+ 1) (repeat x)
diff --git a/testsuite/tests/perf/should_run/T4321.hs b/testsuite/tests/perf/should_run/T4321.hs
new file mode 100644
index 0000000000..b8a0dbc4a1
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T4321.hs
@@ -0,0 +1,15 @@
+
+-- In 6.13 this stack overflowed
+
+module Main (main) where
+
+main :: IO ()
+main = let n = 1000000
+ in print $ integrate n (1 / fromIntegral n)
+
+integrate :: Int -> Double -> Double
+integrate n h = h * (sum (map area [1..n]))
+ where area :: Int -> Double
+ area i = let x = h * (fromIntegral i - 0.5)
+ in 4 / (1 + x*x)
+
diff --git a/testsuite/tests/perf/should_run/T4321.stdout b/testsuite/tests/perf/should_run/T4321.stdout
new file mode 100644
index 0000000000..ce98a198a7
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T4321.stdout
@@ -0,0 +1 @@
+3.1415926535897643
diff --git a/testsuite/tests/perf/should_run/T4830.hs b/testsuite/tests/perf/should_run/T4830.hs
new file mode 100644
index 0000000000..e345ffc9cd
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T4830.hs
@@ -0,0 +1,15 @@
+-- Compile with O2; SpecConstr should fire nicely
+-- and eliminate all allocation in inner loop
+
+module Main where
+
+foo :: Int -> Maybe (Double,Double) -> Double
+foo _ Nothing = 0
+foo 0 (Just (x,y)) = x+y
+foo n (Just (x,y)) = let r = f x y in r `seq` foo (n-1) (Just r)
+ where
+ f x y | x <= y = (x,y)
+ | otherwise = (y,x)
+
+main = print (foo 1000000 (Just (1,2)))
+
diff --git a/testsuite/tests/perf/should_run/T4830.stdout b/testsuite/tests/perf/should_run/T4830.stdout
new file mode 100644
index 0000000000..9f55b2ccb5
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T4830.stdout
@@ -0,0 +1 @@
+3.0
diff --git a/testsuite/tests/perf/should_run/T4978.hs b/testsuite/tests/perf/should_run/T4978.hs
new file mode 100644
index 0000000000..6413b01fdc
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T4978.hs
@@ -0,0 +1,125 @@
+module Main (main) where
+
+import qualified Data.ByteString as S
+import qualified Data.ByteString.Lazy as L
+import Data.ByteString.Internal (inlinePerformIO)
+import qualified Data.ByteString.Internal as S
+import Data.Monoid
+import Foreign
+
+newtype Builder = Builder {
+ runBuilder :: (Buffer -> [S.ByteString]) -> Buffer -> [S.ByteString]
+ }
+
+instance Monoid Builder where
+ mempty = empty
+ {-# INLINE mempty #-}
+ mappend = append
+ {-# INLINE mappend #-}
+ mconcat = foldr mappend mempty
+ {-# INLINE mconcat #-}
+
+empty :: Builder
+empty = Builder (\ k b -> b `seq` k b)
+{-# INLINE empty #-}
+
+singleton :: Word8 -> Builder
+singleton = writeN 1 . flip poke
+{-# INLINE singleton #-}
+
+append :: Builder -> Builder -> Builder
+append (Builder f) (Builder g) = Builder (f . g)
+{-# INLINE [0] append #-}
+
+-- Our internal buffer type
+data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8)
+ {-# UNPACK #-} !Int -- offset
+ {-# UNPACK #-} !Int -- used bytes
+ {-# UNPACK #-} !Int -- length left
+
+-- | /O(1)./ Pop the 'S.ByteString' we have constructed so far, if any,
+-- yielding a new chunk in the result lazy 'L.ByteString'.
+flush :: Builder
+flush = Builder $ \ k buf@(Buffer p o u l) ->
+ if u == 0
+ then k buf
+ else S.PS p o u : k (Buffer p (o+u) 0 l)
+
+-- | /O(n)./ Extract a lazy 'L.ByteString' from a 'Builder'.
+-- The construction work takes place if and when the relevant part of
+-- the lazy 'L.ByteString' is demanded.
+--
+toLazyByteString :: Builder -> L.ByteString
+toLazyByteString m = L.fromChunks $ unsafePerformIO $ do
+ buf <- newBuffer defaultSize
+ return (runBuilder (m `append` flush) (const []) buf)
+{-# INLINE toLazyByteString #-}
+
+defaultSize :: Int
+defaultSize = 32 * k - overhead
+ where k = 1024
+ overhead = 2 * sizeOf (undefined :: Int)
+
+-- | Sequence an IO operation on the buffer
+unsafeLiftIO :: (Buffer -> IO Buffer) -> Builder
+unsafeLiftIO f = Builder $ \ k buf -> inlinePerformIO $ do
+ buf' <- f buf
+ return (k buf')
+{-# INLINE unsafeLiftIO #-}
+
+-- | Get the size of the buffer
+withSize :: (Int -> Builder) -> Builder
+withSize f = Builder $ \ k buf@(Buffer _ _ _ l) -> runBuilder (f l) k buf
+
+-- | Ensure that there are at least @n@ many bytes available.
+ensureFree :: Int -> Builder
+ensureFree n = n `seq` withSize $ \ l ->
+ if n <= l then empty else
+ flush `append` unsafeLiftIO (const (newBuffer (max n defaultSize)))
+{-# INLINE [0] ensureFree #-}
+
+-- | Ensure that @n@ many bytes are available, and then use @f@ to write some
+-- bytes into the memory.
+writeN :: Int -> (Ptr Word8 -> IO ()) -> Builder
+writeN n f = ensureFree n `append` unsafeLiftIO (writeNBuffer n f)
+{-# INLINE [0] writeN #-}
+
+writeNBuffer :: Int -> (Ptr Word8 -> IO ()) -> Buffer -> IO Buffer
+writeNBuffer n f (Buffer fp o u l) = do
+ withForeignPtr fp (\p -> f (p `plusPtr` (o+u)))
+ return (Buffer fp o (u+n) (l-n))
+{-# INLINE writeNBuffer #-}
+
+newBuffer :: Int -> IO Buffer
+newBuffer size = do
+ fp <- S.mallocByteString size
+ return $! Buffer fp 0 0 size
+{-# INLINE newBuffer #-}
+
+-- Merge buffer bounds checks.
+{-# RULES
+"append/writeN" forall a b (f::Ptr Word8 -> IO ())
+ (g::Ptr Word8 -> IO ()) ws.
+ append (writeN a f) (append (writeN b g) ws) =
+ append (writeN (a+b) (\p -> f p >> g (p `plusPtr` a))) ws
+
+"writeN/writeN" forall a b (f::Ptr Word8 -> IO ())
+ (g::Ptr Word8 -> IO ()).
+ append (writeN a f) (writeN b g) =
+ writeN (a+b) (\p -> f p >> g (p `plusPtr` a))
+
+"ensureFree/ensureFree" forall a b .
+ append (ensureFree a) (ensureFree b) = ensureFree (max a b)
+ #-}
+
+-- Test case
+
+-- Argument must be a multiple of 4.
+test :: Int -> Builder
+test 0 = mempty
+test n = singleton 1 `mappend`
+ (singleton 2 `mappend`
+ (singleton 3 `mappend`
+ (singleton 4 `mappend` test (n-4))))
+
+main = print $ L.length $ toLazyByteString $ test 10000000
diff --git a/testsuite/tests/perf/should_run/T4978.stdout b/testsuite/tests/perf/should_run/T4978.stdout
new file mode 100644
index 0000000000..825319e1c5
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T4978.stdout
@@ -0,0 +1 @@
+10000000
diff --git a/testsuite/tests/perf/should_run/T5113.hs b/testsuite/tests/perf/should_run/T5113.hs
new file mode 100644
index 0000000000..e87bcb6cad
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T5113.hs
@@ -0,0 +1,31 @@
+{-# LANGUAGE BangPatterns #-}
+module Main where
+
+import Data.Array.Base (unsafeRead, unsafeWrite)
+import Data.Array.ST
+import Data.Array.Unboxed
+import Control.Monad.ST
+
+main = print (divisorCounts 1000000 ! 342)
+
+isqrt :: Int -> Int
+isqrt n = floor (sqrt $ fromIntegral n)
+
+divisorCounts :: Int -> UArray Int Int
+divisorCounts n = runSTUArray $ do
+ let !rt = isqrt n
+ darr <- newArray (0,n) 1 :: ST s (STUArray s Int Int)
+ let inc i = unsafeRead darr i >>= \k -> unsafeWrite darr i (k+1)
+ note step i
+ | i > n = return ()
+ | otherwise = do
+ inc i
+ note step (i+step)
+ count j
+ | j > rt = return ()
+ | otherwise = do
+ note (2*j) (j*j)
+ count (j+2)
+ note 2 4
+ count 3
+ return darr
diff --git a/testsuite/tests/perf/should_run/T5113.stdout b/testsuite/tests/perf/should_run/T5113.stdout
new file mode 100644
index 0000000000..0cfbf08886
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T5113.stdout
@@ -0,0 +1 @@
+2
diff --git a/testsuite/tests/perf/should_run/T5205.hs b/testsuite/tests/perf/should_run/T5205.hs
new file mode 100644
index 0000000000..215dd42647
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T5205.hs
@@ -0,0 +1,13 @@
+
+import Control.Concurrent
+import Control.Monad
+
+main :: IO ()
+main = do t <- forkIO (x >> x)
+ threadDelay 1000000
+ killThread t
+ putStrLn "Done"
+
+x :: IO ()
+x = forever yield
+
diff --git a/testsuite/tests/perf/should_run/T5205.stdout b/testsuite/tests/perf/should_run/T5205.stdout
new file mode 100644
index 0000000000..a965a70ed4
--- /dev/null
+++ b/testsuite/tests/perf/should_run/T5205.stdout
@@ -0,0 +1 @@
+Done
diff --git a/testsuite/tests/perf/should_run/all.T b/testsuite/tests/perf/should_run/all.T
new file mode 100644
index 0000000000..04133979df
--- /dev/null
+++ b/testsuite/tests/perf/should_run/all.T
@@ -0,0 +1,148 @@
+# Tests that newArray/newArray_ is being optimised correctly
+
+# fortunately the values here are mostly independent of the wordsize,
+# because the test allocates an unboxed array of doubles.
+
+test('3586',
+ [stats_num_field('peak_megabytes_allocated', 17,
+ 18),
+ # expected value: 17 (amd64/Linux)
+ stats_num_field('bytes allocated', 16000000,
+ 17000000),
+ # expected value: 16835544 (amd64/Linux)
+ only_ways(['normal'])
+ ],
+ compile_and_run,
+ ['-O'])
+
+test('T4830',
+ [stats_num_field('bytes allocated', 60000,
+ 200000),
+ # expected value: 127,000 (amd64/Linux)
+ only_ways(['normal'])
+ ],
+ compile_and_run,
+ ['-O2'])
+
+test('T3245', normal, compile_and_run, ['-O'])
+
+# Test that bytestring reading/writing isn't over-allocating. We had
+# a bug in hGetBufNonBlocking in 6.13 that triggered this.
+#
+test('lazy-bs-alloc',
+ [stats_num_field('peak_megabytes_allocated', 1,
+ 3),
+ # expected value: 2 (amd64/Linux)
+ stats_num_field('bytes allocated', 400000,
+ 600000),
+ # expected value: 489776 (amd64/Linux)
+ only_ways(['normal']),
+ extra_run_opts('../../numeric/should_run/arith011.stdout'),
+ ignore_output
+ ],
+ # use a suitably big file, without bloating the repo with a new one:
+ compile_and_run,
+ ['-O'])
+
+# Get reproducible floating-point results on x86
+if config.arch == 'i386':
+ sse2_opts = '-msse2'
+else:
+ sse2_opts = ''
+
+test('T4321', omit_ways(['ghci']), compile_and_run, ['-O ' + sse2_opts])
+
+test('T3736', normal, run_command, ['$MAKE -s --no-print-directory T3736'])
+test('T3738',
+ [stats_num_field('peak_megabytes_allocated', 1,
+ 1),
+ # expected value: 1 (amd64/Linux)
+ # expected value: 45648 (x86/Linux):
+ if_wordsize(32,
+ stats_num_field('bytes allocated', 40000,
+ 50000)),
+ if_wordsize(64,
+ stats_num_field('bytes allocated', 40000,
+ 60000)),
+ # expected value: 49400 (amd64/Linux)
+ only_ways(['normal'])
+ ],
+ compile_and_run,
+ ['-O'])
+
+test('MethSharing',
+ [stats_num_field('peak_megabytes_allocated', 1,
+ 1),
+ # expected value: 1 (amd64/Linux)
+ # expected value: 2685858140 (x86/OS X):
+ if_wordsize(32,
+ stats_num_field('bytes allocated', 300000000,
+ 400000000)),
+ # expected: 360940756 (x86/Linux)
+ if_wordsize(64,
+ stats_num_field('bytes allocated', 600000000,
+ 700000000)),
+ # expected: 640067672 (amd64/Linux)
+ only_ways(['normal'])
+ ],
+ compile_and_run,
+ ['-O'])
+test('T2902',
+ extra_clean(['T2902_A', 'T2902_B',
+ 'T2902_A.hi', 'T2902_B.hi',
+ 'T2902_A.o', 'T2902_B.o',
+ 'T2902_A_PairingSum.hi', 'T2902_B_PairingSum.hi',
+ 'T2902_A_PairingSum.o', 'T2902_B_PairingSum.o',
+ 'T2902_Sum.hi',
+ 'T2902_Sum.o']),
+ run_command,
+ ['$MAKE -s --no-print-directory T2902'])
+test('T149',
+ [expect_broken(149),
+ extra_clean(['T149_A', 'T149_B',
+ 'T149_A.hi', 'T149_B.hi',
+ 'T149_A.o', 'T149_B.o'])],
+ run_command,
+ ['$MAKE -s --no-print-directory T149'])
+
+test('T5113',
+ [
+ if_wordsize(32,
+ stats_num_field('bytes allocated', 3000000,
+ 5000000)),
+ if_wordsize(64,
+ stats_num_field('bytes allocated', 8000000,
+ 9000000)),
+ only_ways(['normal'])
+ ],
+ compile_and_run,
+ ['-O'])
+
+
+test('T4978',
+ [if_wordsize(32,
+ stats_num_field('bytes allocated', 9000000,
+ 11000000)),
+ if_wordsize(64,
+ stats_num_field('bytes allocated', 9000000,
+ 11000000)),
+ # expected value: 10137680 (amd64/Linux)
+ only_ways(['normal'])
+ ],
+ compile_and_run,
+ ['-O2'])
+
+test('T5205',
+ [if_wordsize(32,
+ stats_num_field('bytes allocated', 40000,
+ 50000)),
+ # expected value: 47088 (x86/Darwin)
+ if_wordsize(64,
+ stats_num_field('bytes allocated', 40000,
+ 60000)),
+ # expected value: 51320 (amd64/Linux)
+ only_ways(['normal'])
+ ],
+ compile_and_run,
+ ['-O'])
+
diff --git a/testsuite/tests/perf/should_run/lazy-bs-alloc.hs b/testsuite/tests/perf/should_run/lazy-bs-alloc.hs
new file mode 100644
index 0000000000..76850c67d0
--- /dev/null
+++ b/testsuite/tests/perf/should_run/lazy-bs-alloc.hs
@@ -0,0 +1,9 @@
+module Main (main) where
+
+import System.Environment (getArgs)
+import qualified Data.ByteString.Lazy as L
+
+main :: IO ()
+main = do
+ (file : _) <- getArgs
+ L.readFile file >>= L.putStr