blob: 218e885dac65e258e8db313f6b260d44df1c7098 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
-- Break up a string into words in parallel.
-- Based on the presentation "Breaking Sequential Habits of Thought", Guy Steele.
-- http://groups.csail.mit.edu/mac/users/gjs/6.945/readings/MITApril2009Steele.pdf
--
-- NOTE: This is a naive implementation, and I haven't benchmarked it.
-- Using parallel arrays in Seg probably isn't helpful for performance,
-- but it's a stress test for the vectoriser.
--
-- If we actually cared about performance we wouldn't want to recursively
-- subdivide the string right down to individual characters.
--
{-# LANGUAGE ParallelArrays, ParallelListComp #-}
{-# OPTIONS -fvectorise #-}
module WordsVect
( wordsOfPArray
, wordCountOfPArray )
where
import qualified Data.Array.Parallel.Prelude.Word8 as W
import Data.Array.Parallel.Prelude.Word8 (Word8)
import Data.Array.Parallel.Prelude.Int as I
import Data.Array.Parallel
import qualified Prelude as Prel
-- We can't use the Prelude Char and String types in vectorised code yet..
type Char = Word8
char_space = W.fromInt 32
type String = [: Char :]
-- | Word state
data State
= Chunk String
| Seg String -- initial word chunk
[:String:] -- complete words in the middle of the segment
String -- final word chunk
-- | Compose two wordstates.
plusState :: State -> State -> State
plusState str1 str2
= case (str1, str2) of
(Chunk as, Chunk bs) -> Chunk (as +:+ bs)
(Chunk as, Seg bl bss br) -> Seg (as +:+ bl) bss br
(Seg al ass ar, Chunk bs) -> Seg al ass (ar +:+ bs)
(Seg al ass ar, Seg bl bss br) -> Seg al (ass +:+ joinEmpty [:ar +:+ bl:] +:+ bss) br
joinEmpty :: [:[:Word8:]:] -> [:[:Word8:]:]
joinEmpty ws
| lengthP ws I.== 1 && lengthP (ws !: 0) I.== 0 = [::]
| otherwise = ws
-- | Convert a single char to a wordstate.
stateOfChar :: Char -> State
stateOfChar c
| c W.== char_space = Seg [::] [::] [::]
| otherwise = Chunk [:c:]
-- | Break this string into words.
stateOfString :: String -> State
stateOfString str
= let len = lengthP str
result
| len I.== 0 = Chunk [::]
| len I.== 1 = stateOfChar (str !: 0)
| otherwise
= let half = len `div` 2
s1 = sliceP 0 half str
s2 = sliceP half (len I.- half) str
in plusState (stateOfString s1) (stateOfString s2)
in result
-- | Count the number of words in a string.
countWordsOfState :: State -> Int
countWordsOfState state
= case state of
Chunk c -> wordsInChunkArr c
Seg c1 ws c2 -> wordsInChunkArr c1 I.+ lengthP ws I.+ wordsInChunkArr c2
wordsInChunkArr :: [:Word8:] -> Int
wordsInChunkArr arr
| lengthP arr I.== 0 = 0
| otherwise = 1
-- | Flatten a state back to an array of Word8s,
-- inserting spaces between the words.
flattenState :: State -> [:Word8:]
flattenState ss
= case ss of
Chunk s -> s
Seg w1 ws w2
-> w1
+:+ [:char_space:]
+:+ concatP [: w +:+ [:char_space:] | w <- ws :]
+:+ w2
-- Interface ------------------------------------------------------------------
-- | Break up an array of chars into words then flatten it back.
wordsOfPArray :: PArray Word8 -> PArray Word8
{-# NOINLINE wordsOfPArray #-}
wordsOfPArray arr
= let str = fromPArrayP arr
state = stateOfString str
strOut = flattenState state
in toPArrayP strOut
-- | Count the number of words in an array
wordCountOfPArray :: PArray Word8 -> Int
{-# NOINLINE wordCountOfPArray #-}
wordCountOfPArray arr
= let str = fromPArrayP arr
state = stateOfString str
in countWordsOfState state
|