summaryrefslogtreecommitdiff
path: root/libraries/base/GHC/IO/Encoding/UTF32.hs
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/base/GHC/IO/Encoding/UTF32.hs')
-rw-r--r--libraries/base/GHC/IO/Encoding/UTF32.hs286
1 files changed, 153 insertions, 133 deletions
diff --git a/libraries/base/GHC/IO/Encoding/UTF32.hs b/libraries/base/GHC/IO/Encoding/UTF32.hs
index 379f76066b..9319234ca7 100644
--- a/libraries/base/GHC/IO/Encoding/UTF32.hs
+++ b/libraries/base/GHC/IO/Encoding/UTF32.hs
@@ -3,6 +3,7 @@
, BangPatterns
, NondecreasingIndentation
, MagicHash
+ , UnboxedTuples
#-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
@@ -61,68 +62,70 @@ mkUTF32 cfm = TextEncoding { textEncodingName = "UTF-32",
mkTextDecoder = utf32_DF cfm,
mkTextEncoder = utf32_EF cfm }
-utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf32_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
utf32_DF cfm = do
seen_bom <- newIORef Nothing
- return (BufferCodec {
- encode = utf32_decode seen_bom,
- recover = recoverDecode cfm,
- close = return (),
- getState = readIORef seen_bom,
- setState = writeIORef seen_bom
+ return (BufferCodec# {
+ encode# = utf32_decode seen_bom,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = readIORef seen_bom,
+ setState# = writeIORef seen_bom
})
utf32_EF :: CodingFailureMode -> IO (TextEncoder Bool)
utf32_EF cfm = do
done_bom <- newIORef False
- return (BufferCodec {
- encode = utf32_encode done_bom,
- recover = recoverEncode cfm,
- close = return (),
- getState = readIORef done_bom,
- setState = writeIORef done_bom
+ return (BufferCodec# {
+ encode# = utf32_encode done_bom,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = readIORef done_bom,
+ setState# = writeIORef done_bom
})
-utf32_encode :: IORef Bool -> EncodeBuffer
+utf32_encode :: IORef Bool -> EncodeBuffer#
utf32_encode done_bom input
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+ st0
= do
- b <- readIORef done_bom
- if b then utf32_native_encode input output
+ let !(# st1, b #) = unIO (readIORef done_bom) st0
+ if b then utf32_native_encode input output st1
else if os - ow < 4
- then return (OutputUnderflow, input,output)
+ then (# st1,OutputUnderflow,input,output #)
else do
- writeIORef done_bom True
- writeWord8Buf oraw ow bom0
- writeWord8Buf oraw (ow+1) bom1
- writeWord8Buf oraw (ow+2) bom2
- writeWord8Buf oraw (ow+3) bom3
- utf32_native_encode input output{ bufR = ow+4 }
-
-utf32_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+ let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw ow bom0) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom1) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+2) bom2) st4
+ !(# st6, () #) = unIO (writeWord8Buf oraw (ow+3) bom3) st5
+ utf32_native_encode input output{ bufR = ow+4 } st6
+
+utf32_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
utf32_decode seen_bom
input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ }
output
+ st0
= do
- mb <- readIORef seen_bom
+ let !(# st1, mb #) = unIO (readIORef seen_bom) st0
case mb of
- Just decode -> decode input output
+ Just decode -> decode input output st1
Nothing ->
- if iw - ir < 4 then return (InputUnderflow, input,output) else do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
+ if iw - ir < 4 then (# st1,InputUnderflow,input,output #) else do
+ let !(# st2, c0 #) = unIO (readWord8Buf iraw ir ) st1
+ !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
+ !(# st4, c2 #) = unIO (readWord8Buf iraw (ir+2)) st3
+ !(# st5, c3 #) = unIO (readWord8Buf iraw (ir+3)) st4
case () of
- _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 -> do
- writeIORef seen_bom (Just utf32be_decode)
- utf32be_decode input{ bufL= ir+4 } output
- _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 -> do
- writeIORef seen_bom (Just utf32le_decode)
- utf32le_decode input{ bufL= ir+4 } output
- | otherwise -> do
- writeIORef seen_bom (Just utf32_native_decode)
- utf32_native_decode input output
+ _ | c0 == bom0 && c1 == bom1 && c2 == bom2 && c3 == bom3 ->
+ let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32be_decode)) st5
+ in utf32be_decode input{ bufL= ir+4 } output st6
+ _ | c0 == bom3 && c1 == bom2 && c2 == bom1 && c3 == bom0 ->
+ let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32le_decode)) st5
+ in utf32le_decode input{ bufL= ir+4 } output st6
+ | otherwise ->
+ let !(# st6, () #) = unIO (writeIORef seen_bom (Just utf32_native_decode)) st5
+ in utf32_native_decode input output st6
bom0, bom1, bom2, bom3 :: Word8
@@ -132,10 +135,10 @@ bom2 = 0xfe
bom3 = 0xff
-- choose UTF-32BE by default for UTF-32 output
-utf32_native_decode :: DecodeBuffer
+utf32_native_decode :: DecodeBuffer#
utf32_native_decode = utf32be_decode
-utf32_native_encode :: EncodeBuffer
+utf32_native_encode :: EncodeBuffer#
utf32_native_encode = utf32be_encode
-- -----------------------------------------------------------------------------
@@ -152,22 +155,22 @@ mkUTF32be cfm = TextEncoding { textEncodingName = "UTF-32BE",
utf32be_DF :: CodingFailureMode -> IO (TextDecoder ())
utf32be_DF cfm =
- return (BufferCodec {
- encode = utf32be_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32be_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf32be_EF :: CodingFailureMode -> IO (TextEncoder ())
utf32be_EF cfm =
- return (BufferCodec {
- encode = utf32be_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32be_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
@@ -182,128 +185,145 @@ mkUTF32le cfm = TextEncoding { textEncodingName = "UTF-32LE",
utf32le_DF :: CodingFailureMode -> IO (TextDecoder ())
utf32le_DF cfm =
- return (BufferCodec {
- encode = utf32le_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32le_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf32le_EF :: CodingFailureMode -> IO (TextEncoder ())
utf32le_EF cfm =
- return (BufferCodec {
- encode = utf32le_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf32le_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
-utf32be_decode :: DecodeBuffer
+utf32be_decode :: DecodeBuffer#
utf32be_decode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | iw - ir < 4 = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | iw - ir < 4 = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
let x1 = chr4 c0 c1 c2 c3
- if not (validate x1) then invalid else do
- ow' <- writeCharBuf oraw ow x1
- loop (ir+4) ow'
+ if not (validate x1) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf32le_decode :: DecodeBuffer
+utf32le_decode :: DecodeBuffer#
utf32le_decode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | iw - ir < 4 = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | iw - ir < 4 = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
let x1 = chr4 c3 c2 c1 c0
- if not (validate x1) then invalid else do
- ow' <- writeCharBuf oraw ow x1
- loop (ir+4) ow'
+ if not (validate x1) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow x1) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf32be_encode :: EncodeBuffer
+utf32be_encode :: EncodeBuffer#
utf32be_encode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 4 = done OutputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 4 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
- if isSurrogate c then done InvalidSequence ir ow else do
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+ if isSurrogate c then done InvalidSequence ir ow st1 else do
let (c0,c1,c2,c3) = ord4 c
- writeWord8Buf oraw ow c0
- writeWord8Buf oraw (ow+1) c1
- writeWord8Buf oraw (ow+2) c2
- writeWord8Buf oraw (ow+3) c3
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c0) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c2) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf32le_encode :: EncodeBuffer
+utf32le_encode :: EncodeBuffer#
utf32le_encode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 4 = done OutputUnderflow ir ow
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL=0, bufR=0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 4 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
- if isSurrogate c then done InvalidSequence ir ow else do
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
+ if isSurrogate c then done InvalidSequence ir ow st1 else do
let (c0,c1,c2,c3) = ord4 c
- writeWord8Buf oraw ow c3
- writeWord8Buf oraw (ow+1) c2
- writeWord8Buf oraw (ow+2) c1
- writeWord8Buf oraw (ow+3) c0
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c3) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c1) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c0) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
chr4 :: Word8 -> Word8 -> Word8 -> Word8 -> Char
chr4 (W8# x1#) (W8# x2#) (W8# x3#) (W8# x4#) =