testsuite/tests/dph/words/WordsVect.hs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123


-- Break up a string into words in parallel.
-- 	Based on the presentation "Breaking Sequential Habits of Thought", Guy Steele.
--	http://groups.csail.mit.edu/mac/users/gjs/6.945/readings/MITApril2009Steele.pdf
--
-- NOTE: This is a naive implementation, and I haven't benchmarked it.
--       Using parallel arrays in Seg probably isn't helpful for performance,
--       but it's a stress test for the vectoriser.
--
--       If we actually cared about performance we wouldn't want to recursively
--       subdivide the string right down to individual characters.
--
{-# LANGUAGE ParallelArrays, ParallelListComp #-}
{-# OPTIONS -fvectorise #-}

module WordsVect
	( wordsOfPArray
	, wordCountOfPArray )
where
import qualified Data.Array.Parallel.Prelude.Word8	as W
import Data.Array.Parallel.Prelude.Word8		(Word8)
import Data.Array.Parallel.Prelude.Int
import Data.Array.Parallel

import qualified Prelude as Prel


-- We can't use the Prelude Char and String types in vectorised code yet..
type Char	= Word8
char_space	= W.fromInt 32

type String	= [: Char :]


-- | Word state
data State
	= Chunk	String
	| Seg 	String 		-- initial word chunk
		[:String:] 	-- complete words in the middle of the segment
		String		-- final word chunk


-- | Compose two wordstates.
plusState :: State -> State -> State
plusState str1 str2
 = case (str1, str2) of
	(Chunk as, Chunk bs)		-> Chunk (as +:+ bs)
	(Chunk as, Seg bl bss br)	-> Seg (as +:+ bl) bss br
	(Seg al ass ar,	Chunk bs)	-> Seg al ass (ar +:+ bs)
	(Seg al ass ar,	Seg bl bss br)	-> Seg al (ass +:+ joinEmpty [:ar +:+ bl:] +:+ bss) br

joinEmpty :: [:[:Word8:]:] -> [:[:Word8:]:]
joinEmpty ws 
	| lengthP ws == 1 && lengthP (ws !: 0) == 0	= [::]
	| otherwise					= ws


-- | Convert a single char to a wordstate.
stateOfChar :: Char -> State
stateOfChar c
	| c W.== char_space	= Seg [::] [::] [::]
	| otherwise		= Chunk [:c:]
	
	
-- | Break this string into words.
stateOfString :: String -> State
stateOfString str
 = let 	len	= lengthP str
   	result
	 | len == 0	= Chunk [::]
	 | len == 1	= stateOfChar (str !: 0)
	 | otherwise	
	 =  let	half	= len `div` 2
		s1	= sliceP 0    half       str
		s2	= sliceP half (len-half) str
	    in	plusState (stateOfString s1) (stateOfString s2)
    in	result


-- | Count the number of words in a string.
countWordsOfState :: State -> Int
countWordsOfState state
 = case state of
	Chunk c		-> wordsInChunkArr c
	Seg c1 ws c2 	-> wordsInChunkArr c1 + lengthP ws + wordsInChunkArr c2
	
wordsInChunkArr :: [:Word8:] -> Int
wordsInChunkArr arr
	| lengthP arr == 0	= 0
	| otherwise		= 1


-- | Flatten a state back to an array of Word8s,
--	inserting spaces between the words.
flattenState :: State -> [:Word8:]
flattenState ss
 = case ss of
	Chunk s	-> s

	Seg   w1 ws w2	
		->  w1 
		+:+ [:char_space:]
		+:+ concatP [: w +:+ [:char_space:] | w <- ws :]
		+:+ w2

-- Interface ------------------------------------------------------------------

-- | Break up an array of chars into words then flatten it back.
wordsOfPArray :: PArray Word8 -> PArray Word8
wordsOfPArray arr
 = let	str	= fromPArrayP arr
	state	= stateOfString str
	strOut	= flattenState state
   in	toPArrayP strOut


-- | Count the number of words in an array
wordCountOfPArray :: PArray Word8 -> Int
wordCountOfPArray arr
 = let	str	= fromPArrayP arr
	state	= stateOfString str
   in	countWordsOfState state