summaryrefslogtreecommitdiff
path: root/testsuite/tests/simplCore/should_compile/spec001.hs
blob: 5fb9685d0574a38878a29710c84a6c7e3a21e9f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
{-# LANGUAGE CPP, UnboxedTuples, MagicHash, StandaloneDeriving, DeriveDataTypeable #-}
{-# OPTIONS_GHC -O #-}

-- In GHC 6.4, compiling this module gave a Core Lint failure following the
-- specialiser, because a function was floated out that had a RULE that
-- mentioned another function (unpack, in fact).  but the latter wasn't
-- floated because we didn't take the RULES into account properly; result,
-- variable out of scope.

-- It's hard to cut this test down.


module Data.PackedString.Latin1 (
        -- * The @PackedString@ type
        PackedString,      -- abstract, instances: Eq, Ord, Show, Typeable

         -- * Converting to and from @PackedString@s
        pack,
        unpack,

        -- * I\/O with @PackedString@s
        hPut, hGet,

        -- * List-like manipulation functions
        nil,
        cons,
        head,
        tail,
        null,
        append,
        length,
        index,
        map,
        filter,
        reverse,
        concat,
        elem,
        substr,
        take,
        drop,
        splitAt,
        foldl,
        foldr,
        takeWhile,
        dropWhile,
        span,
        break,
        lines,
        unlines,
        words,
        unwords,
        split,
        splitWith,
        join,
--      unpackList, -- eek, otherwise it gets thrown away by the simplifier

    ) where

import qualified Prelude
import Prelude hiding (
        head,
        tail,
        null,
        length,
        (!!),
        map,
        filter,
        reverse,
        concat,
        elem,
        take,
        drop,
        foldl,
        foldr,
        splitAt,
        takeWhile,
        dropWhile,
        span,
        break,
        lines,
        unlines,
        words,
        unwords,
        join
 )

import GHC.Exts
import GHC.IO (IO(..))
import Foreign
import Data.Typeable
import Data.Char
import qualified Data.List
import System.IO

-- -----------------------------------------------------------------------------
-- PackedString type declaration

-- | A space-efficient representation of a 'String', which supports
-- various efficient operations.  A 'PackedString' contains Latin1
-- (8-bit) characters only.
data PackedString = PS {-#UNPACK#-}!Int {-#UNPACK#-}!Int
                       {-#UNPACK#-}!(ForeignPtr Word8)
        -- this is a pretty efficient representation, and can be
        -- converted to/from a StorableArray.
        -- When the ForeignPtr is unpacked, we get the Addr# stored
        -- directly in the PS constructor.

-- Perhaps making a slice should be conditional on the ratio of the
-- slice/string size to limit memory leaks.

instance Eq PackedString where
   a == b =  comparePS a b == EQ

instance Ord PackedString where
   compare = comparePS

comparePS (PS off1 len1 fp1) (PS off2 len2 fp2)
  = inlinePerformIO $
        withForeignPtr fp1 $ \p1 ->
        withForeignPtr fp2 $ \p2 ->
        cmp (p1 `plusPtr` off1) (p2 `plusPtr` off2) len1
  where
    cmp :: Ptr Word8 -> Ptr Word8 -> Int -> IO Ordering
    cmp p1 p2 n
      | n == len1 = if n == len2 then return EQ else return LT
      | n == len2 = return GT
      | otherwise = do
          a <- peekElemOff p1 n
          b <- peekElemOff p2 n
          case a `compare` b of
                EQ -> cmp p1 p2 (n+1)
                LT -> return LT
                GT -> return GT

--instance Read PackedString: ToDo

instance Show PackedString where
    showsPrec p ps r = showsPrec p (unpack ps) r

deriving instance Typeable PackedString

-- -----------------------------------------------------------------------------
-- Constructor functions

-- | The 'nilPS' value is the empty string.
nil :: PackedString
nil = inlinePerformIO $ do
                fp <- newForeignPtr_ nullPtr
                return (PS 0 0 fp)

-- | The 'consPS' function prepends the given character to the
-- given string.
cons :: Char -> PackedString -> PackedString
cons c cs = pack (c : (unpack cs)) -- ToDo:better

-- | Convert a 'String' into a 'PackedString'
packLen :: Int -> String -> PackedString
packLen len str = inlinePerformIO $ do
  fp <- mallocForeignPtrBytes len
  withForeignPtr fp $ \p -> do
        fill_it_in p 0 str
        return (PS 0 len fp)

fill_it_in p i [] = return ()
fill_it_in p i (c:cs) = do pokeElemOff p i (c2w c); fill_it_in p (i+1) cs

pack :: String -> PackedString
pack str = packLen (Prelude.length str) str

{-# INLINE w2c #-}
w2c :: Word8 -> Char
w2c = chr . fromIntegral
{-# INLINE c2w #-}
c2w :: Char -> Word8
c2w = fromIntegral . ord

-- -----------------------------------------------------------------------------
-- List-mimicking functions for PackedStrings

-- | The 'length' function returns the length of the input list.
-- Analogous to 'length'.
length :: PackedString -> Int
length (PS _ len _) = len

-- | The 'index' function returns the character in the string at the
-- given position.
index :: PackedString -> Int -> Char
index ps i
  | i >= 0 && i < len = unsafeIndex ps i
  | otherwise = error "Data.PackedString.Latin1.index: index out of range"
  where len = length ps

unsafeIndex :: PackedString -> Int -> Char
unsafeIndex (PS off len fp) i =
  withPackedString fp $ \p -> do
    w <- peekElemOff (p `plusPtr` off) i
    return $! w2c w

-- | The 'head' function returns the first element of a
-- 'PackedString' or throws an error if the string is empty.
head :: PackedString -> Char
head ps
  | len <= 0 = error "Data.PackedString.Latin1.head: head []"
  | otherwise = index ps 0
  where len = length ps

-- | The 'tail' function returns the tail of a 'PackedString' or throws an error
-- if the string is empty.
tail :: PackedString -> PackedString
tail ps
  | len <= 0 = error "Data.PackedString.Latin1.tail: tail []"
  | len == 1 = nil
  | otherwise  = substr ps 1 (len - 1)
  where len = length ps

-- | The 'null' function returns True iff the argument is null.
null :: PackedString -> Bool
null (PS _ l _) = l == 0

-- | The 'append' function appends the second string onto the first.
append :: PackedString -> PackedString -> PackedString
append xs ys
  | null xs = ys
  | null ys = xs
  | otherwise  = concat [xs,ys]

-- | The 'map' function applies a function to each character in the string.
map :: (Char -> Char) -> PackedString -> PackedString
map f ps = packLen (length ps) (Prelude.map f (unpack ps))

-- | The 'filter' function filters out the appropriate substring.
filter :: (Char -> Bool) -> PackedString -> PackedString {-or String?-}
filter pred ps = pack $ Prelude.filter pred $ unpack ps

-- | The 'foldl' function behaves like 'foldl' on 'PackedString's.
foldl :: (a -> Char -> a) -> a -> PackedString -> a
foldl f b ps = Prelude.foldl f b $ unpack ps

-- | The 'foldr' function behaves like 'foldr' on 'PackedString's.
foldr :: (Char -> a -> a) -> a -> PackedString -> a
foldr f v ps = Prelude.foldr f v $ unpack ps -- no intermediate list, we hope

-- | The 'take' function takes the first @n@ characters of a 'PackedString'.
take :: Int -> PackedString -> PackedString
take n ps = substr ps 0 (n-1)

-- | The 'drop' function drops the first @n@ characters of a 'PackedString'.
drop    :: Int -> PackedString -> PackedString
drop n ps = substr ps n (length ps - 1)

-- | The 'splitWith' function splits a 'PackedString' at a given index.
splitAt :: Int -> PackedString -> (PackedString, PackedString)
splitAt  n ps  = (take n ps, drop n ps)

-- | The 'takeWhile' function is analogous to the 'takeWhile' function.
takeWhile :: (Char -> Bool) -> PackedString -> PackedString
takeWhile pred ps = pack $ Prelude.takeWhile pred $ unpack ps

-- | The 'dropWhile' function is analogous to the 'dropWhile' function.
dropWhile :: (Char -> Bool) -> PackedString -> PackedString
dropWhile pred ps = pack $ Prelude.dropWhile pred $ unpack ps

-- | The 'elem' function returns True iff the given element is in the string.
elem :: Char -> PackedString -> Bool
elem c ps = c `Prelude.elem` unpack ps

-- | The 'span' function returns a pair containing the result of
-- running both 'takeWhile' and 'dropWhile'.
span :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
span  p ps = (takeWhile p ps, dropWhile p ps)

-- | The 'break' function breaks a string at the first position which
-- satisfies the predicate.
break :: (Char -> Bool) -> PackedString -> (PackedString, PackedString)
break p ps = span (not . p) ps

-- | The 'lines' function splits the input on line-breaks.
lines :: PackedString -> [PackedString]
lines ps = split '\n' ps

-- | The 'unlines' function concatenates the input list after
-- interspersing newlines.
unlines :: [PackedString] -> PackedString
unlines pss = join (pack "\n") pss

-- | The 'words' function is analogous to the 'words' function.
words :: PackedString -> [PackedString]
words ps = Prelude.filter (not.null) (splitWith isSpace ps)

-- | The 'unwords' function is analogous to the 'unwords' function.
unwords :: [PackedString] -> PackedString
unwords pss = join (pack " ") pss

-- | The 'reverse' function reverses the string.
reverse :: PackedString -> PackedString
reverse ps = pack $ Prelude.reverse $ unpack ps

-- | The 'concat' function concatenates a list of 'PackedString's.
concat :: [PackedString] -> PackedString
concat pss = pack $ Prelude.concat $ Prelude.map unpack pss

------------------------------------------------------------

-- | The 'join' function takes a 'PackedString' and a list of 'PackedString's
-- and concatenates the list after interspersing the first argument between
-- each element of the list.
join :: PackedString -> [PackedString] -> PackedString
join filler pss = concat (splice pss)
 where
  splice []  = []
  splice [x] = [x]
  splice (x:y:xs) = x:filler:splice (y:xs)

-- ToDo: the obvious generalisation
{-
  Some properties that hold:

  * split x ls = ls'
      where False = any (map (x `elem`) ls')

  * join (pack [x]) (split x ls) = ls
-}

-- | The 'split' function splits the input string on each occurrence of the given 'Char'.
split :: Char -> PackedString -> [PackedString]
split c = splitWith (== c)

splitWith :: (Char -> Bool) -> PackedString -> [PackedString]
splitWith pred (PS off 0 fp) = []
splitWith pred (PS off len fp) = splitWith' pred off len fp

splitWith' pred off len fp =
  withPackedString fp $ \p -> splitLoop pred p 0 off len fp

splitLoop pred p idx off len fp
        | p `seq` idx `seq` off `seq` fp `seq` False = undefined
splitLoop pred p idx off len fp
        | idx >= len  = return [PS off idx fp]
        | otherwise = do
                w <- peekElemOff p (off+idx)
                if pred (w2c w)
                   then return (PS off idx fp :
                                  splitWith' pred (off+idx+1) (len-idx-1) fp)
                   else splitLoop pred p (idx+1) off len fp

-- -----------------------------------------------------------------------------
-- Local utility functions

-- The definition of @_substr@ is essentially:
-- @take (end - begin + 1) (drop begin str)@.

-- | The 'substr' function takes a 'PackedString' and two indices
-- and returns the substring of the input string between (and including)
-- these indices.
substr :: PackedString -> Int -> Int -> PackedString
substr (PS off len fp) begin end = PS (off+begin) (end-begin+1) fp

-- -----------------------------------------------------------------------------
-- hPut

-- | Outputs a 'PackedString' to the specified 'Handle'.
--
-- NOTE: the string will be output directly in Latin-1.
--
hPut :: Handle -> PackedString -> IO ()
hPut h (PS off l fp) =
  withForeignPtr fp $ \p ->
    hPutBuf h (p `plusPtr` off) l

-- -----------------------------------------------------------------------------
-- hGet

-- | Read a 'PackedString' directly from the specified 'Handle'.
-- This is far more efficient than reading the characters into a 'String'
-- and then using 'pack'.
--
-- NOTE: as with 'hPut', the string representation in the file is
-- assumed to be Latin-1.
hGet :: Handle -> Int -> IO PackedString
hGet h i = do
  fp <- mallocForeignPtrBytes i
  withForeignPtr fp $ \p -> do
    l <- hGetBuf h p i
    return (PS 0 l fp)

-- -----------------------------------------------------------------------------
-- unpacking

{-# INLINE unpack #-}
unpack :: PackedString -> String
unpack ps = build (unpackFoldr ps)

{-# RULES
"unpack-list"  [1]  forall p  . unpackFoldr p (:) [] = unpackList p
 #-}

unpackList :: PackedString -> [Char]
unpackList (PS off len fp) =
   withPackedString fp $ \p -> do
      let loop p (-1) acc = return acc
          loop p n acc = do
             a <- peekElemOff p n
             loop p (n-1) (w2c a : acc)
      loop (p `plusPtr` off) (len-1) []

{-# INLINE [0] unpackFoldr #-}
unpackFoldr :: PackedString -> (Char -> a -> a) -> a -> a
unpackFoldr (PS off len fp) f c =
   withPackedString fp $ \p -> do
      let loop p (-1) acc = return acc
          loop p n acc = do
             a <- peekElemOff p n
             loop p (n-1) (w2c a `f` acc)
      loop (p `plusPtr` off) (len-1) c

-- -----------------------------------------------------------------------------
-- Utils

-- Just like unsafePerformIO, but we inline it.
{-# INLINE inlinePerformIO #-}
inlinePerformIO :: IO a -> a
inlinePerformIO (IO m) = case m realWorld# of (# _, r #)   -> r

withPackedString :: ForeignPtr a -> (Ptr a -> IO b) -> b
withPackedString fp io = inlinePerformIO (withForeignPtr fp io)