summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBodigrim <andrew.lelechenko@gmail.com>2021-07-10 15:23:08 +0100
committerMarge Bot <ben+marge-bot@smart-cactus.org>2021-08-05 07:14:04 -0400
commit1f8090933268b1ca071bc4a8a35b0f1828a76fce (patch)
treeb665f1c2f6511c8df466a9e783df57b1dd667098
parent7a9d8803cfde3c42da4b27a7b89bdcb2ac870e3f (diff)
downloadhaskell-1f8090933268b1ca071bc4a8a35b0f1828a76fce.tar.gz
Add Data.ByteArray, derived from primitive
-rw-r--r--libraries/base/Data/ByteArray.hs242
-rw-r--r--libraries/base/base.cabal1
-rw-r--r--libraries/base/changelog.md2
3 files changed, 245 insertions, 0 deletions
diff --git a/libraries/base/Data/ByteArray.hs b/libraries/base/Data/ByteArray.hs
new file mode 100644
index 0000000000..47562c0ce7
--- /dev/null
+++ b/libraries/base/Data/ByteArray.hs
@@ -0,0 +1,242 @@
+-- |
+-- Module : Data.ByteArray
+-- Copyright : (c) Roman Leshchinskiy 2009-2012
+-- License : BSD-style
+--
+-- Maintainer : libraries@haskell.org
+-- Portability : non-portable
+--
+-- Derived from @primitive@ package.
+
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE MagicHash #-}
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE UnboxedTuples #-}
+
+module Data.ByteArray (
+ ByteArray(..)
+) where
+
+import Data.Bits ((.&.), unsafeShiftR)
+import Data.Data (mkNoRepType, Data(..))
+import qualified Data.Foldable as F
+import Data.Semigroup
+import GHC.Show (intToDigit)
+import GHC.Exts
+import GHC.ST (ST(..), runST)
+import GHC.Word (Word8(..))
+
+-- | Boxed wrapper for 'ByteArray#'.
+--
+-- Since 'ByteArray#' is an unlifted type and not a member of kind 'Data.Kind.Type',
+-- things like @[ByteArray#]@ or @IO ByteArray#@ are ill-typed. To work around this
+-- inconvenience this module provides a standard boxed wrapper, inhabiting 'Data.Kind.Type'.
+-- Clients are expected to use 'ByteArray' in higher-level APIs,
+-- but wrap and unwrap 'ByteArray' internally as they please
+-- and use functions from "GHC.Exts".
+--
+-- @since 4.17.0.0
+data ByteArray = ByteArray ByteArray#
+
+data MutableByteArray s = MutableByteArray (MutableByteArray# s)
+
+-- | Create a new mutable byte array of the specified size in bytes.
+--
+-- /Note:/ this function does not check if the input is non-negative.
+newByteArray :: Int -> ST s (MutableByteArray s)
+{-# INLINE newByteArray #-}
+newByteArray (I# n#) =
+ ST (\s# -> case newByteArray# n# s# of
+ (# s'#, arr# #) -> (# s'#, MutableByteArray arr# #))
+
+-- | Convert a mutable byte array to an immutable one without copying. The
+-- array should not be modified after the conversion.
+unsafeFreezeByteArray :: MutableByteArray s -> ST s ByteArray
+{-# INLINE unsafeFreezeByteArray #-}
+unsafeFreezeByteArray (MutableByteArray arr#) =
+ ST (\s# -> case unsafeFreezeByteArray# arr# s# of
+ (# s'#, arr'# #) -> (# s'#, ByteArray arr'# #))
+
+-- | Size of the byte array in bytes.
+sizeofByteArray :: ByteArray -> Int
+{-# INLINE sizeofByteArray #-}
+sizeofByteArray (ByteArray arr#) = I# (sizeofByteArray# arr#)
+
+-- | Read byte at specific index.
+indexByteArray :: ByteArray -> Int -> Word8
+{-# INLINE indexByteArray #-}
+indexByteArray (ByteArray arr#) (I# i#) = W8# (indexWord8Array# arr# i#)
+
+-- | Write byte at specific index.
+writeByteArray :: MutableByteArray s -> Int -> Word8 -> ST s ()
+{-# INLINE writeByteArray #-}
+writeByteArray (MutableByteArray arr#) (I# i#) (W8# x#) =
+ ST (\s# -> case writeWord8Array# arr# i# x# s# of
+ s'# -> (# s'#, () #))
+
+-- | Explode 'ByteArray' into a list of bytes.
+byteArrayToList :: ByteArray -> [Word8]
+{-# INLINE byteArrayToList #-}
+byteArrayToList arr = go 0
+ where
+ go i
+ | i < maxI = indexByteArray arr i : go (i+1)
+ | otherwise = []
+ maxI = sizeofByteArray arr
+
+-- | Create a 'ByteArray' from a list of a known length. If the length
+-- of the list does not match the given length, this throws an exception.
+byteArrayFromListN :: Int -> [Word8] -> ByteArray
+byteArrayFromListN n ys = runST $ do
+ marr <- newByteArray n
+ let go !ix [] = if ix == n
+ then return ()
+ else error $ "Data.ByteArray.byteArrayFromListN: list length less than specified size"
+ go !ix (x : xs) = if ix < n
+ then do
+ writeByteArray marr ix x
+ go (ix + 1) xs
+ else error $ "Data.ByteArray.byteArrayFromListN: list length greater than specified size"
+ go 0 ys
+ unsafeFreezeByteArray marr
+
+-- | Copy a slice of an immutable byte array to a mutable byte array.
+--
+-- /Note:/ this function does not do bounds or overlap checking.
+copyByteArray
+ :: MutableByteArray s -- ^ destination array
+ -> Int -- ^ offset into destination array
+ -> ByteArray -- ^ source array
+ -> Int -- ^ offset into source array
+ -> Int -- ^ number of bytes to copy
+ -> ST s ()
+{-# INLINE copyByteArray #-}
+copyByteArray (MutableByteArray dst#) (I# doff#) (ByteArray src#) (I# soff#) (I# sz#) =
+ ST (\s# -> case copyByteArray# src# soff# dst# doff# sz# s# of
+ s'# -> (# s'#, () #))
+
+instance Data ByteArray where
+ toConstr _ = error "toConstr"
+ gunfold _ _ = error "gunfold"
+ dataTypeOf _ = mkNoRepType "Data.ByteArray.ByteArray"
+
+instance Show ByteArray where
+ showsPrec _ ba =
+ showString "[" . go 0
+ where
+ showW8 :: Word8 -> String -> String
+ showW8 !w s =
+ '0'
+ : 'x'
+ : intToDigit (fromIntegral (unsafeShiftR w 4))
+ : intToDigit (fromIntegral (w .&. 0x0F))
+ : s
+ go i
+ | i < sizeofByteArray ba = comma . showW8 (indexByteArray ba i :: Word8) . go (i+1)
+ | otherwise = showChar ']'
+ where
+ comma | i == 0 = id
+ | otherwise = showString ", "
+
+-- | Compare prefixes of given length.
+compareByteArraysFromBeginning :: ByteArray -> ByteArray -> Int -> Ordering
+{-# INLINE compareByteArraysFromBeginning #-}
+compareByteArraysFromBeginning (ByteArray ba1#) (ByteArray ba2#) (I# n#)
+ = compare (I# (compareByteArrays# ba1# 0# ba2# 0# n#)) 0
+
+-- | Do two byte arrays share the same pointer?
+sameByteArray :: ByteArray# -> ByteArray# -> Bool
+sameByteArray ba1 ba2 =
+ case reallyUnsafePtrEquality# (unsafeCoerce# ba1 :: ()) (unsafeCoerce# ba2 :: ()) of
+ r -> isTrue# r
+
+instance Eq ByteArray where
+ ba1@(ByteArray ba1#) == ba2@(ByteArray ba2#)
+ | sameByteArray ba1# ba2# = True
+ | n1 /= n2 = False
+ | otherwise = compareByteArraysFromBeginning ba1 ba2 n1 == EQ
+ where
+ n1 = sizeofByteArray ba1
+ n2 = sizeofByteArray ba2
+
+-- | Non-lexicographic ordering. This compares the lengths of
+-- the byte arrays first and uses a lexicographic ordering if
+-- the lengths are equal. Subject to change between major versions.
+instance Ord ByteArray where
+ ba1@(ByteArray ba1#) `compare` ba2@(ByteArray ba2#)
+ | sameByteArray ba1# ba2# = EQ
+ | n1 /= n2 = n1 `compare` n2
+ | otherwise = compareByteArraysFromBeginning ba1 ba2 n1
+ where
+ n1 = sizeofByteArray ba1
+ n2 = sizeofByteArray ba2
+-- The primop compareByteArrays# (invoked from 'compareByteArraysFromBeginning')
+-- performs a check for pointer equality as well. However, it
+-- is included here because it is likely better to check for pointer equality
+-- before checking for length equality. Getting the length requires deferencing
+-- the pointers, which could cause accesses to memory that is not in the cache.
+-- By contrast, a pointer equality check is always extremely cheap.
+
+-- | Append two byte arrays.
+appendByteArray :: ByteArray -> ByteArray -> ByteArray
+appendByteArray a b = runST $ do
+ marr <- newByteArray (sizeofByteArray a + sizeofByteArray b)
+ copyByteArray marr 0 a 0 (sizeofByteArray a)
+ copyByteArray marr (sizeofByteArray a) b 0 (sizeofByteArray b)
+ unsafeFreezeByteArray marr
+
+-- | Concatenate a list of 'ByteArray's.
+concatByteArray :: [ByteArray] -> ByteArray
+concatByteArray arrs = runST $ do
+ let len = calcLength arrs 0
+ marr <- newByteArray len
+ pasteByteArrays marr 0 arrs
+ unsafeFreezeByteArray marr
+
+-- | Dump immutable 'ByteArray's into a mutable one, starting from a given offset.
+pasteByteArrays :: MutableByteArray s -> Int -> [ByteArray] -> ST s ()
+pasteByteArrays !_ !_ [] = return ()
+pasteByteArrays !marr !ix (x : xs) = do
+ copyByteArray marr ix x 0 (sizeofByteArray x)
+ pasteByteArrays marr (ix + sizeofByteArray x) xs
+
+-- | Compute total length of 'ByteArray's, increased by accumulator.
+calcLength :: [ByteArray] -> Int -> Int
+calcLength [] !n = n
+calcLength (x : xs) !n = calcLength xs (sizeofByteArray x + n)
+
+-- | An array of zero length.
+emptyByteArray :: ByteArray
+emptyByteArray = runST (newByteArray 0 >>= unsafeFreezeByteArray)
+
+-- | Replicate 'ByteArray' given number of times and concatenate all together.
+replicateByteArray :: Int -> ByteArray -> ByteArray
+replicateByteArray n arr = runST $ do
+ marr <- newByteArray (n * sizeofByteArray arr)
+ let go i = if i < n
+ then do
+ copyByteArray marr (i * sizeofByteArray arr) arr 0 (sizeofByteArray arr)
+ go (i + 1)
+ else return ()
+ go 0
+ unsafeFreezeByteArray marr
+
+instance Semigroup ByteArray where
+ (<>) = appendByteArray
+ sconcat = mconcat . F.toList
+ stimes i arr
+ | itgr < 1 = emptyByteArray
+ | itgr <= (fromIntegral (maxBound :: Int)) = replicateByteArray (fromIntegral itgr) arr
+ | otherwise = error "Data.ByteArray#stimes: cannot allocate the requested amount of memory"
+ where itgr = toInteger i :: Integer
+
+instance Monoid ByteArray where
+ mempty = emptyByteArray
+ mconcat = concatByteArray
+
+instance IsList ByteArray where
+ type Item ByteArray = Word8
+
+ toList = byteArrayToList
+ fromList xs = byteArrayFromListN (length xs) xs
+ fromListN = byteArrayFromListN
diff --git a/libraries/base/base.cabal b/libraries/base/base.cabal
index bfaad7dcf5..7e09412839 100644
--- a/libraries/base/base.cabal
+++ b/libraries/base/base.cabal
@@ -118,6 +118,7 @@ Library
Data.Bitraversable
Data.Bits
Data.Bool
+ Data.ByteArray
Data.Char
Data.Coerce
Data.Complex
diff --git a/libraries/base/changelog.md b/libraries/base/changelog.md
index c8a1611508..edf876cb26 100644
--- a/libraries/base/changelog.md
+++ b/libraries/base/changelog.md
@@ -11,6 +11,8 @@
* Introduce `GHC.ExecutablePath.executablePath`, which is more robust than
`getExecutablePath` in cases when the executable has been deleted.
+ * Add `Data.ByteArray` module, providing a boxed `ByteArray#`.
+
## 4.16.0.0 *TBA*
* Make it possible to promote `Natural`s and remove the separate `Nat` kind.