summaryrefslogtreecommitdiff
path: root/libraries/base/GHC/IO/Encoding/UTF16.hs
diff options
context:
space:
mode:
Diffstat (limited to 'libraries/base/GHC/IO/Encoding/UTF16.hs')
-rw-r--r--libraries/base/GHC/IO/Encoding/UTF16.hs314
1 files changed, 167 insertions, 147 deletions
diff --git a/libraries/base/GHC/IO/Encoding/UTF16.hs b/libraries/base/GHC/IO/Encoding/UTF16.hs
index a0878d4fce..a592d4c47c 100644
--- a/libraries/base/GHC/IO/Encoding/UTF16.hs
+++ b/libraries/base/GHC/IO/Encoding/UTF16.hs
@@ -3,6 +3,7 @@
, BangPatterns
, NondecreasingIndentation
, MagicHash
+ , UnboxedTuples
#-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
@@ -61,64 +62,66 @@ mkUTF16 cfm = TextEncoding { textEncodingName = "UTF-16",
mkTextDecoder = utf16_DF cfm,
mkTextEncoder = utf16_EF cfm }
-utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer))
+utf16_DF :: CodingFailureMode -> IO (TextDecoder (Maybe DecodeBuffer#))
utf16_DF cfm = do
seen_bom <- newIORef Nothing
- return (BufferCodec {
- encode = utf16_decode seen_bom,
- recover = recoverDecode cfm,
- close = return (),
- getState = readIORef seen_bom,
- setState = writeIORef seen_bom
+ return (BufferCodec# {
+ encode# = utf16_decode seen_bom,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = readIORef seen_bom,
+ setState# = writeIORef seen_bom
})
utf16_EF :: CodingFailureMode -> IO (TextEncoder Bool)
utf16_EF cfm = do
done_bom <- newIORef False
- return (BufferCodec {
- encode = utf16_encode done_bom,
- recover = recoverEncode cfm,
- close = return (),
- getState = readIORef done_bom,
- setState = writeIORef done_bom
+ return (BufferCodec# {
+ encode# = utf16_encode done_bom,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = readIORef done_bom,
+ setState# = writeIORef done_bom
})
-utf16_encode :: IORef Bool -> EncodeBuffer
+utf16_encode :: IORef Bool -> EncodeBuffer#
utf16_encode done_bom input
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow, bufSize=os }
+ st0
= do
- b <- readIORef done_bom
- if b then utf16_native_encode input output
+ let !(# st1, b #) = unIO (readIORef done_bom) st0
+ if b then utf16_native_encode input output st1
else if os - ow < 2
- then return (OutputUnderflow,input,output)
+ then (# st1,OutputUnderflow,input,output #)
else do
- writeIORef done_bom True
- writeWord8Buf oraw ow bom1
- writeWord8Buf oraw (ow+1) bom2
- utf16_native_encode input output{ bufR = ow+2 }
+ let !(# st2, () #) = unIO (writeIORef done_bom True) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw ow bom1) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+1) bom2) st3
+ utf16_native_encode input output{ bufR = ow+2 } st4
-utf16_decode :: IORef (Maybe DecodeBuffer) -> DecodeBuffer
+utf16_decode :: IORef (Maybe DecodeBuffer#) -> DecodeBuffer#
utf16_decode seen_bom
input@Buffer{ bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_ }
output
+ st0
= do
- mb <- readIORef seen_bom
+ let !(# st1, mb #) = unIO (readIORef seen_bom) st0
case mb of
- Just decode -> decode input output
+ Just decode -> decode input output st1
Nothing ->
- if iw - ir < 2 then return (InputUnderflow,input,output) else do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
+ if iw - ir < 2 then (# st1,InputUnderflow,input,output #) else do
+ let !(# st2, c0 #) = unIO (readWord8Buf iraw ir ) st1
+ !(# st3, c1 #) = unIO (readWord8Buf iraw (ir+1)) st2
case () of
- _ | c0 == bomB && c1 == bomL -> do
- writeIORef seen_bom (Just utf16be_decode)
- utf16be_decode input{ bufL= ir+2 } output
- | c0 == bomL && c1 == bomB -> do
- writeIORef seen_bom (Just utf16le_decode)
- utf16le_decode input{ bufL= ir+2 } output
- | otherwise -> do
- writeIORef seen_bom (Just utf16_native_decode)
- utf16_native_decode input output
+ _ | c0 == bomB && c1 == bomL ->
+ let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16be_decode)) st3
+ in utf16be_decode input{ bufL= ir+2 } output st4
+ | c0 == bomL && c1 == bomB ->
+ let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16le_decode)) st3
+ in utf16le_decode input{ bufL= ir+2 } output st4
+ | otherwise ->
+ let !(# st4, () #) = unIO (writeIORef seen_bom (Just utf16_native_decode)) st3
+ in utf16_native_decode input output st4
bomB, bomL, bom1, bom2 :: Word8
@@ -126,10 +129,10 @@ bomB = 0xfe
bomL = 0xff
-- choose UTF-16BE by default for UTF-16 output
-utf16_native_decode :: DecodeBuffer
+utf16_native_decode :: DecodeBuffer#
utf16_native_decode = utf16be_decode
-utf16_native_encode :: EncodeBuffer
+utf16_native_encode :: EncodeBuffer#
utf16_native_encode = utf16be_encode
bom1 = bomB
@@ -149,22 +152,22 @@ mkUTF16be cfm = TextEncoding { textEncodingName = "UTF-16BE",
utf16be_DF :: CodingFailureMode -> IO (TextDecoder ())
utf16be_DF cfm =
- return (BufferCodec {
- encode = utf16be_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16be_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf16be_EF :: CodingFailureMode -> IO (TextEncoder ())
utf16be_EF cfm =
- return (BufferCodec {
- encode = utf16be_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16be_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf16le :: TextEncoding
@@ -178,114 +181,127 @@ mkUTF16le cfm = TextEncoding { textEncodingName = "UTF16-LE",
utf16le_DF :: CodingFailureMode -> IO (TextDecoder ())
utf16le_DF cfm =
- return (BufferCodec {
- encode = utf16le_decode,
- recover = recoverDecode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16le_decode,
+ recover# = recoverDecode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
utf16le_EF :: CodingFailureMode -> IO (TextEncoder ())
utf16le_EF cfm =
- return (BufferCodec {
- encode = utf16le_encode,
- recover = recoverEncode cfm,
- close = return (),
- getState = return (),
- setState = const $ return ()
+ return (BufferCodec# {
+ encode# = utf16le_encode,
+ recover# = recoverEncode# cfm,
+ close# = return (),
+ getState# = return (),
+ setState# = const $ return ()
})
-utf16be_decode :: DecodeBuffer
+utf16be_decode :: DecodeBuffer#
utf16be_decode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
- | ir + 1 == iw = done InputUnderflow ir ow
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | ir + 1 == iw = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
let x1 = fromIntegral c0 `shiftL` 8 + fromIntegral c1
if validate1 x1
- then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
- loop (ir+2) ow'
- else if iw - ir < 4 then done InputUnderflow ir ow else do
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
- let x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
- if not (validate2 x1 x2) then invalid else do
- ow' <- writeCharBuf oraw ow (chr2 x1 x2)
- loop (ir+4) ow'
+ then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+ in loop (ir+2) ow' st3
+ else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+ let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+ x2 = fromIntegral c2 `shiftL` 8 + fromIntegral c3
+ if not (validate2 x1 x2) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input { bufL = 0, bufR = 0 } else input { bufL = ir }
+ !ro = output{ bufR = ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf16le_decode :: DecodeBuffer
+utf16le_decode :: DecodeBuffer#
utf16le_decode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- loop !ir !ow
- | ow >= os = done OutputUnderflow ir ow
- | ir >= iw = done InputUnderflow ir ow
- | ir + 1 == iw = done InputUnderflow ir ow
+ loop :: Int -> Int -> DecodingBuffer#
+ loop !ir !ow st0
+ | ow >= os = done OutputUnderflow ir ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | ir + 1 == iw = done InputUnderflow ir ow st0
| otherwise = do
- c0 <- readWord8Buf iraw ir
- c1 <- readWord8Buf iraw (ir+1)
- let x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
+ let !(# st1, c0 #) = unIO (readWord8Buf iraw ir ) st0
+ !(# st2, c1 #) = unIO (readWord8Buf iraw (ir+1)) st1
+ x1 = fromIntegral c1 `shiftL` 8 + fromIntegral c0
if validate1 x1
- then do ow' <- writeCharBuf oraw ow (unsafeChr (fromIntegral x1))
- loop (ir+2) ow'
- else if iw - ir < 4 then done InputUnderflow ir ow else do
- c2 <- readWord8Buf iraw (ir+2)
- c3 <- readWord8Buf iraw (ir+3)
- let x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
- if not (validate2 x1 x2) then invalid else do
- ow' <- writeCharBuf oraw ow (chr2 x1 x2)
- loop (ir+4) ow'
+ then let !(# st3, ow' #) = unIO (writeCharBuf oraw ow (unsafeChr (fromIntegral x1))) st2
+ in loop (ir+2) ow' st3
+ else if iw - ir < 4 then done InputUnderflow ir ow st2 else do
+ let !(# st3, c2 #) = unIO (readWord8Buf iraw (ir+2)) st2
+ !(# st4, c3 #) = unIO (readWord8Buf iraw (ir+3)) st3
+ x2 = fromIntegral c3 `shiftL` 8 + fromIntegral c2
+ if not (validate2 x1 x2) then invalid st4 else do
+ let !(# st5, ow' #) = unIO (writeCharBuf oraw ow (chr2 x1 x2)) st4
+ loop (ir+4) ow' st5
where
- invalid = done InvalidSequence ir ow
+ invalid :: DecodingBuffer#
+ invalid st' = done InvalidSequence ir ow st'
-- lambda-lifted, to avoid thunks being built in the inner-loop:
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> DecodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+ !ro = output{ bufR = ow }
+ in (# st', why, ri, ro #)
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf16be_encode :: EncodeBuffer
+utf16be_encode :: EncodeBuffer#
utf16be_encode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 2 = done OutputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL=ir }
+ !ro = output{ bufR=ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 2 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
case ord c of
- x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
- writeWord8Buf oraw ow (fromIntegral (x `shiftR` 8))
- writeWord8Buf oraw (ow+1) (fromIntegral x)
- loop ir' (ow+2)
+ x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+ let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral (x `shiftR` 8))) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral x)) st2
+ loop ir' (ow+2) st3
| otherwise -> do
- if os - ow < 4 then done OutputUnderflow ir ow else do
+ if os - ow < 4 then done OutputUnderflow ir ow st1 else do
let
n1 = x - 0x10000
c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -294,35 +310,39 @@ utf16be_encode
c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
c4 = fromIntegral n2
--
- writeWord8Buf oraw ow c1
- writeWord8Buf oraw (ow+1) c2
- writeWord8Buf oraw (ow+2) c3
- writeWord8Buf oraw (ow+3) c4
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c1) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c2) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c3) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c4) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
-utf16le_encode :: EncodeBuffer
+utf16le_encode :: EncodeBuffer#
utf16le_encode
input@Buffer{ bufRaw=iraw, bufL=ir0, bufR=iw, bufSize=_ }
output@Buffer{ bufRaw=oraw, bufL=_, bufR=ow0, bufSize=os }
+ st
= let
- done why !ir !ow = return (why,
- if ir == iw then input{ bufL=0, bufR=0 }
- else input{ bufL=ir },
- output{ bufR=ow })
- loop !ir !ow
- | ir >= iw = done InputUnderflow ir ow
- | os - ow < 2 = done OutputUnderflow ir ow
+ {-# NOINLINE done #-}
+ done :: CodingProgress -> Int -> Int -> EncodingBuffer#
+ done why !ir !ow st' =
+ let !ri = if ir == iw then input{ bufL = 0, bufR = 0 } else input{ bufL = ir }
+ !ro = output{ bufR = ow }
+ in (# st', why, ri, ro #)
+ loop :: Int -> Int -> EncodingBuffer#
+ loop !ir !ow st0
+ | ir >= iw = done InputUnderflow ir ow st0
+ | os - ow < 2 = done OutputUnderflow ir ow st0
| otherwise = do
- (c,ir') <- readCharBuf iraw ir
+ let !(# st1, (c,ir') #) = unIO (readCharBuf iraw ir) st0
case ord c of
- x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow else do
- writeWord8Buf oraw ow (fromIntegral x)
- writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))
- loop ir' (ow+2)
+ x | x < 0x10000 -> if isSurrogate c then done InvalidSequence ir ow st1 else do
+ let !(# st2, () #) = unIO (writeWord8Buf oraw ow (fromIntegral x)) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) (fromIntegral (x `shiftR` 8))) st2
+ loop ir' (ow+2) st3
| otherwise ->
- if os - ow < 4 then done OutputUnderflow ir ow else do
+ if os - ow < 4 then done OutputUnderflow ir ow st1 else do
let
n1 = x - 0x10000
c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
@@ -331,13 +351,13 @@ utf16le_encode
c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
c4 = fromIntegral n2
--
- writeWord8Buf oraw ow c2
- writeWord8Buf oraw (ow+1) c1
- writeWord8Buf oraw (ow+2) c4
- writeWord8Buf oraw (ow+3) c3
- loop ir' (ow+4)
+ !(# st2, () #) = unIO (writeWord8Buf oraw ow c2) st1
+ !(# st3, () #) = unIO (writeWord8Buf oraw (ow+1) c1) st2
+ !(# st4, () #) = unIO (writeWord8Buf oraw (ow+2) c4) st3
+ !(# st5, () #) = unIO (writeWord8Buf oraw (ow+3) c3) st4
+ loop ir' (ow+4) st5
in
- loop ir0 ow0
+ loop ir0 ow0 st
chr2 :: Word16 -> Word16 -> Char
chr2 (W16# a#) (W16# b#) = C# (chr# (upper# +# lower# +# 0x10000#))