11 files changed, 360 insertions, 71 deletions
diff --git a/compiler/prelude/PrelNames.hs b/compiler/prelude/PrelNames.hs
index 978c6f5b26..4f5ecde4f2 100644
--- a/compiler/prelude/PrelNames.hs
+++ b/compiler/prelude/PrelNames.hs
@@ -343,8 +343,9 @@ basicKnownKeyNames
         groupWithName,
 
         -- Strings and lists
-        unpackCStringName,
-        unpackCStringFoldrName, unpackCStringUtf8Name,
+        unpackCStringName, unpackCStringUtf8Name,
+        unpackCStringFoldrName, unpackCStringFoldrUtf8Name,
+        cstringLengthName,
 
         -- Overloaded lists
         isListClassName,
@@ -704,11 +705,12 @@ ioDataCon_RDR :: RdrName
 ioDataCon_RDR           = nameRdrName ioDataConName
 
 eqString_RDR, unpackCString_RDR, unpackCStringFoldr_RDR,
-    unpackCStringUtf8_RDR :: RdrName
+    unpackCStringFoldrUtf8_RDR, unpackCStringUtf8_RDR :: RdrName
 eqString_RDR            = nameRdrName eqStringName
 unpackCString_RDR       = nameRdrName unpackCStringName
 unpackCStringFoldr_RDR  = nameRdrName unpackCStringFoldrName
 unpackCStringUtf8_RDR   = nameRdrName unpackCStringUtf8Name
+unpackCStringFoldrUtf8_RDR  = nameRdrName unpackCStringFoldrUtf8Name
 
 newStablePtr_RDR :: RdrName
 newStablePtr_RDR        = nameRdrName newStablePtrName
@@ -1001,11 +1003,13 @@ modIntName = varQual gHC_CLASSES (fsLit "modInt#") modIntIdKey
 
 -- Base strings Strings
 unpackCStringName, unpackCStringFoldrName,
-    unpackCStringUtf8Name, eqStringName :: Name
+    unpackCStringUtf8Name, unpackCStringFoldrUtf8Name,
+    eqStringName, cstringLengthName :: Name
 unpackCStringName       = varQual gHC_CSTRING (fsLit "unpackCString#") unpackCStringIdKey
 unpackCStringFoldrName  = varQual gHC_CSTRING (fsLit "unpackFoldrCString#") unpackCStringFoldrIdKey
 unpackCStringUtf8Name   = varQual gHC_CSTRING (fsLit "unpackCStringUtf8#") unpackCStringUtf8IdKey
 eqStringName            = varQual gHC_BASE (fsLit "eqString")  eqStringIdKey
+unpackCStringFoldrUtf8Name  = varQual gHC_CSTRING (fsLit "unpackFoldrCStringUtf8#") unpackCStringFoldrUtf8IdKey
 
 -- The 'inline' function
 inlineIdName :: Name
@@ -2080,7 +2084,8 @@ wildCardKey, absentErrorIdKey, augmentIdKey, appendIdKey,
     runtimeErrorIdKey, patErrorIdKey, voidPrimIdKey,
     realWorldPrimIdKey, recConErrorIdKey,
     unpackCStringUtf8IdKey, unpackCStringAppendIdKey,
-    unpackCStringFoldrIdKey, unpackCStringIdKey,
+    unpackCStringFoldrIdKey, unpackCStringFoldrUtf8IdKey,
+    unpackCStringIdKey,
     typeErrorIdKey, divIntIdKey, modIntIdKey,
     absentSumFieldErrorIdKey :: Unique
 
@@ -2103,12 +2108,14 @@ recConErrorIdKey              = mkPreludeMiscIdUnique 16
 unpackCStringUtf8IdKey        = mkPreludeMiscIdUnique 17
 unpackCStringAppendIdKey      = mkPreludeMiscIdUnique 18
 unpackCStringFoldrIdKey       = mkPreludeMiscIdUnique 19
+
 unpackCStringIdKey            = mkPreludeMiscIdUnique 20
-voidPrimIdKey                 = mkPreludeMiscIdUnique 21
-typeErrorIdKey                = mkPreludeMiscIdUnique 22
-divIntIdKey                   = mkPreludeMiscIdUnique 23
-modIntIdKey                   = mkPreludeMiscIdUnique 24
-absentSumFieldErrorIdKey      = mkPreludeMiscIdUnique 9
+unpackCStringFoldrUtf8IdKey   = mkPreludeMiscIdUnique 21
+voidPrimIdKey                 = mkPreludeMiscIdUnique 22
+typeErrorIdKey                = mkPreludeMiscIdUnique 23
+divIntIdKey                   = mkPreludeMiscIdUnique 24
+modIntIdKey                   = mkPreludeMiscIdUnique 25
+cstringLengthIdKey            = mkPreludeMiscIdUnique 26
 
 unsafeCoerceIdKey, concatIdKey, filterIdKey, zipIdKey, bindIOIdKey,
     returnIOIdKey, newStablePtrIdKey,
diff --git a/compiler/prelude/PrelRules.hs b/compiler/prelude/PrelRules.hs
index ffee79da36..370a026768 100644
--- a/compiler/prelude/PrelRules.hs
+++ b/compiler/prelude/PrelRules.hs
@@ -1206,7 +1206,10 @@ builtinRules :: [CoreRule]
 builtinRules
   = [BuiltinRule { ru_name = fsLit "AppendLitString",
                    ru_fn = unpackCStringFoldrName,
-                   ru_nargs = 4, ru_try = match_append_lit },
+                   ru_nargs = 4, ru_try = match_append_lit_C },
+     BuiltinRule { ru_name = fsLit "AppendLitStringUtf8",
+                   ru_fn = unpackCStringFoldrUtf8Name,
+                   ru_nargs = 4, ru_try = match_append_lit_utf8 },
      BuiltinRule { ru_name = fsLit "EqString", ru_fn = eqStringName,
                    ru_nargs = 2, ru_try = match_eq_string },
      BuiltinRule { ru_name = fsLit "Inline", ru_fn = inlineIdName,
@@ -1378,11 +1381,22 @@ builtinNaturalRules =
 
 ---------------------------------------------------
 -- The rule is this:
---      unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
---      =  unpackFoldrCString# "foobaz" c n
+--      unpackFoldrCString*# "foo"# c (unpackFoldrCString*# "baz"# c n)
+--      =  unpackFoldrCString*# "foobaz"# c n
+--
+-- See also Note [String literals in GHC] in CString.hs
+
+-- CString version
+match_append_lit_C :: RuleFun
+match_append_lit_C = match_append_lit unpackCStringFoldrIdKey
 
-match_append_lit :: RuleFun
-match_append_lit _ id_unf _
+-- CStringUTF8 version
+match_append_lit_utf8 :: RuleFun
+match_append_lit_utf8 = match_append_lit unpackCStringFoldrUtf8IdKey
+
+{-# INLINE match_append_lit #-}
+match_append_lit :: Unique -> RuleFun
+match_append_lit foldVariant _ id_unf _
         [ Type ty1
         , lit1
         , c1
@@ -1395,12 +1409,13 @@ match_append_lit _ id_unf _
                         `App` lit2
                         `App` c2
                         `App` n) <- stripTicksTop tickishFloatable e2
-  , unpk `hasKey` unpackCStringFoldrIdKey
-  , cheapEqExpr' tickishFloatable c1 c2
-  , (c1Ticks, c1') <- stripTicksTop tickishFloatable c1
-  , c2Ticks <- stripTicksTopT tickishFloatable c2
+  , unpk `hasKey` foldVariant
   , Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1
   , Just (LitString s2) <- exprIsLiteral_maybe id_unf lit2
+  , let freeVars = (mkInScopeSet (exprFreeVars c1 `unionVarSet` exprFreeVars c2))
+    in eqExpr freeVars c1 c2
+  , (c1Ticks, c1') <- stripTicksTop tickishFloatable c1
+  , c2Ticks <- stripTicksTopT tickishFloatable c2
   = ASSERT( ty1 `eqType` ty2 )
     Just $ mkTicks strTicks
          $ Var unpk `App` Type ty1
@@ -1408,17 +1423,23 @@ match_append_lit _ id_unf _
                     `App` mkTicks (c1Ticks ++ c2Ticks) c1'
                     `App` n
 
-match_append_lit _ _ _ _ = Nothing
+match_append_lit _ _ _ _ _ = Nothing
 
 ---------------------------------------------------
 -- The rule is this:
 --      eqString (unpackCString# (Lit s1)) (unpackCString# (Lit s2)) = s1==s2
+-- Also  matches unpackCStringUtf8#
 
 match_eq_string :: RuleFun
 match_eq_string _ id_unf _
         [Var unpk1 `App` lit1, Var unpk2 `App` lit2]
-  | unpk1 `hasKey` unpackCStringIdKey
-  , unpk2 `hasKey` unpackCStringIdKey
+  | unpk_key1 <- getUnique unpk1
+  , unpk_key2 <- getUnique unpk2
+  , unpk_key1 == unpk_key2
+  -- For now we insist the literals have to agree in their encoding
+  -- to keep the rule simple. But we could check if the decoded strings
+  -- compare equal in here as well.
+  , unpk_key1 `elem` [unpackCStringUtf8IdKey, unpackCStringIdKey]
   , Just (LitString s1) <- exprIsLiteral_maybe id_unf lit1
   , Just (LitString s2) <- exprIsLiteral_maybe id_unf lit2
   = Just (if s1 == s2 then trueValBool else falseValBool)
diff --git a/libraries/base/GHC/Base.hs b/libraries/base/GHC/Base.hs
index cd0c7e2c15..dae86a9484 100644
--- a/libraries/base/GHC/Base.hs
+++ b/libraries/base/GHC/Base.hs
@@ -1637,7 +1637,13 @@ a `iShiftRL#` b | isTrue# (b >=# WORD_SIZE_IN_BITS#) = 0#
 "unpack-list"  [1]  forall a   . unpackFoldrCString# a (:) [] = unpackCString# a
 "unpack-append"     forall a n . unpackFoldrCString# a (:) n  = unpackAppendCString# a n
 
--- There's a built-in rule (in PrelRules.hs) for
+"unpack-utf8"       [~1] forall a   . unpackCStringUtf8# a             = build (unpackFoldrCStringUtf8# a)
+"unpack-list-utf8"  [1]  forall a   . unpackFoldrCStringUtf8# a (:) [] = unpackCStringUtf8# a
+"unpack-append-utf8"     forall a n . unpackFoldrCStringUtf8# a (:) n  = unpackAppendCStringUtf8# a n
+
+-- There's a built-in rule (in GHC.Core.Op.ConstantFold) for
 --      unpackFoldr "foo" c (unpackFoldr "baz" c n)  =  unpackFoldr "foobaz" c n
 
+-- See also the Note [String literals in GHC] in CString.hs
+
   #-}
diff --git a/libraries/base/GHC/List.hs b/libraries/base/GHC/List.hs
index 6f6d9d670a..f75b9426fb 100644
--- a/libraries/base/GHC/List.hs
+++ b/libraries/base/GHC/List.hs
@@ -860,7 +860,7 @@ elem _ []       = False
 elem x (y:ys)   = x==y || elem x ys
 {-# NOINLINE [1] elem #-}
 {-# RULES
-"elem/build"    forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"elem/build"    forall x (g :: forall b . (a -> b -> b) -> b -> b)
    . elem x (build g) = g (\ y r -> (x == y) || r) False
  #-}
 #endif
@@ -874,7 +874,7 @@ notElem _ []    =  True
 notElem x (y:ys)=  x /= y && notElem x ys
 {-# NOINLINE [1] notElem #-}
 {-# RULES
-"notElem/build" forall x (g :: forall b . Eq a => (a -> b -> b) -> b -> b)
+"notElem/build" forall x (g :: forall b . (a -> b -> b) -> b -> b)
    . notElem x (build g) = g (\ y r -> (x /= y) && r) True
  #-}
 #endif
diff --git a/libraries/base/changelog.md b/libraries/base/changelog.md
index 953f80740d..a760f97970 100644
--- a/libraries/base/changelog.md
+++ b/libraries/base/changelog.md
@@ -1,5 +1,18 @@
 # Changelog for [`base` package](http://hackage.haskell.org/package/base)
 
+## 4.14.2.0 *Mar 2021*
+
+  * Add `Generic` instances to `Fingerprint`, `GiveGCStats`, `GCFlags`,
+    `ConcFlags`, `DebugFlags`, `CCFlags`, `DoHeapProfile`, `ProfFlags`,
+    `DoTrace`, `TraceFlags`, `TickyFlags`, `ParFlags`, `RTSFlags`, `RTSStats`,
+    `GCStats`, `ByteOrder`, `GeneralCategory`, `SrcLoc`
+
+  * Add rules `unpackUtf8`, `unpack-listUtf8` and `unpack-appendUtf8` to `GHC.Base`.
+    They correspond to their ascii versions and hopefully make it easier
+    for libraries to handle utf8 encoded strings efficiently.
+
+  * An issue with list fusion and `elem` was fixed. `elem` applied to known
+    small lists will now compile to a simple case statement more often.
 ## 4.14.1.0 *Jul 2020*
 
   * Bundled with GHC 8.10.2
diff --git a/libraries/base/tests/perf/Makefile b/libraries/base/tests/perf/Makefile
new file mode 100644
index 0000000000..28840dc0b9
--- /dev/null
+++ b/libraries/base/tests/perf/Makefile
@@ -0,0 +1,15 @@
+# This Makefile runs the tests using GHC's testsuite framework.  It
+# assumes the package is part of a GHC build tree with the testsuite
+# installed in ../../../testsuite.
+
+TOP=../../../../testsuite
+include $(TOP)/mk/boilerplate.mk
+include $(TOP)/mk/test.mk
+
+
+T17752:
+	'$(TEST_HC)' $(TEST_HC_OPTS) -O --make T17752 -rtsopts -ddump-simpl -ddump-to-file -dsuppress-uniques -dsuppress-all
+	# All occurences of elem should be optimized away.
+	# For strings these should result in loops after inlining foldCString.
+	# For lists it should result in a case expression.
+	echo $$(cat T17752.dump-simpl | grep "elem" -A4 )
diff --git a/libraries/base/tests/perf/T17752.hs b/libraries/base/tests/perf/T17752.hs
new file mode 100644
index 0000000000..f7f136e1f4
--- /dev/null
+++ b/libraries/base/tests/perf/T17752.hs
@@ -0,0 +1,18 @@
+module T17752 where
+
+-- All occurences of elem should be optimized away.
+-- For strings these should result in loops after inlining foldCString.
+-- For lists it should result in a case expression.
+
+-- Should compile to a pattern match if the rules fire
+isElemList    x = x `elem` ['a','b','c']
+isNotElemList x = x `elem` ['x','y','z']
+
+isOneOfThese x = x `elem` [1,2,3,4,5::Int]
+isNotOneOfThese x = x `notElem` [1,2,3,4,5::Int]
+
+isElemString x = elem x "foo"
+isNotElemString x = notElem x "bar"
+
+isElemStringUtf x = elem x "foö"
+isNotElemStringUtf x = notElem x "bär"
diff --git a/libraries/base/tests/perf/T17752.stdout b/libraries/base/tests/perf/T17752.stdout
new file mode 100644
index 0000000000..4d70e27e02
--- /dev/null
+++ b/libraries/base/tests/perf/T17752.stdout
@@ -0,0 +1,2 @@
+[1 of 1] Compiling T17752           ( T17752.hs, T17752.o )
+
diff --git a/libraries/base/tests/perf/all.T b/libraries/base/tests/perf/all.T
new file mode 100644
index 0000000000..61f42f5420
--- /dev/null
+++ b/libraries/base/tests/perf/all.T
@@ -0,0 +1,5 @@
+#--------------------------------------
+# Check specialization of elem via rules
+#--------------------------------------
+
+test('T17752', [only_ways(['normal'])] , makefile_test, ['T17752'])
diff --git a/libraries/ghc-prim/GHC/CString.hs b/libraries/ghc-prim/GHC/CString.hs
index 0e6199f30f..fd0e3f6625 100644
--- a/libraries/ghc-prim/GHC/CString.hs
+++ b/libraries/ghc-prim/GHC/CString.hs
@@ -16,13 +16,78 @@
 -----------------------------------------------------------------------------
 
 module GHC.CString (
+        -- * Ascii variants
         unpackCString#, unpackAppendCString#, unpackFoldrCString#,
-        unpackCStringUtf8#, unpackNBytes#
+        cstringLength#,
+
+        -- * Utf variants
+        unpackCStringUtf8#, unpackAppendCStringUtf8#, unpackFoldrCStringUtf8#,
+
+        -- * Other
+        unpackNBytes#,
     ) where
 
 import GHC.Types
 import GHC.Prim
 
+{-
+Note [String literals in GHC]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+String literals get quite a bit of special handling in GHC.  This Note
+summarises the moving parts.
+
+* Desugaring: see GHC.HsToCore.Match.Literal.dsLit, which in
+  turn calls GHC.Core.Make.mkStringExprFS.
+
+  The desugarer desugars the Haskell literal "foo" into Core
+     GHC.CString.unpackCString# "foo"#
+  where "foo"# is primitive string literal (of type Addr#).
+
+  When the string cannot be encoded as a C string, we use UTF8:
+     GHC.CString.unpackCStringUtf8# "foo"#
+
+* The library module ghc-prim:GHC.CString has a bunch of functions that
+  work over primitive strings, including GHC.CString.unpackCString#
+
+* GHC.Core.Op.ConstantFold has some RULES that optimise certain string
+  operations on literal strings. For example:
+
+    + Constant folding the desugared form of ("foo" ++ "bar")
+      into ("foobar")
+    + Comparing strings
+    + and more
+
+* GHC.Base has a number of regular rules for String literals.
+
+  + a rule "eqString": (==) @String = eqString
+    where GHC.Base.eqString :: String -> String -> Bool
+
+    ConstantFold has a RULE for eqString on literals:
+     eqString (Lit "foo"#) (Lit "bar"#) --> False
+
+    This allows compile time evaluation of things like "foo" == "bar"
+
+  + A bunch of rules to promote fusion:
+
+    "unpack"       [~1] forall a   . unpackCString# a             = build (unpackFoldrCString# a)
+    "unpack-list"  [1]  forall a   . unpackFoldrCString# a (:) [] = unpackCString# a
+    "unpack-append"     forall a n . unpackFoldrCString# a (:) n  = unpackAppendCString# a n
+
+    And UTF8 variants of these rules.
+
+* We allow primitive (unlifted) literal strings to be top-level
+  bindings, breaking out usual rule.  See GHC.Core
+  Note [Core top-level string literals]
+
+* TODO: There is work on a special code-gen path for top-level boxed strings
+     str :: [Char]
+     str = unpackCString# "foo"#
+  so that they can all share a common code pointer
+
+  There is a WIP MR on gitlab for this: !3012
+
+-}
+
 -----------------------------------------------------------------------------
 -- Unpacking C strings
 -----------------------------------------------------------------------------
@@ -70,6 +135,49 @@ Moreover, we want to make it CONLIKE, so that:
 All of this goes for unpackCStringUtf8# too.
 -}
 
+{- Note [Inlining of unpackFoldrCString]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
+It also has a BuiltInRule in PrelRules.hs:
+     unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
+       =  unpackFoldrCString# "foobaz" c n
+
+We use NOINLINE [0] on the grounds that, unlike
+unpackCString#, there *is* some point in inlining
+unpackFoldrCString#, because we get better code for the
+higher-order function call.
+
+This can cause a code size increase but it was minimal
+when looking at nofib.
+
+This is especially important for elem which then results in an
+allocation free loop.
+
+  Note [unpackCString# iterating over addr]
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+When unpacking unpackCString# and friends repeatedly return a cons cell
+containing:
+* The current character we just unpacked.
+* A thunk to unpack the rest of the string.
+
+In order to minimize the size of the thunk we do not index of
+the start of the string, offsetting into it, but instead increment
+the addr and always use offset 0#.
+
+This works since these two expressions will read from the same address.
+* `indexCharOffAddr# a i`
+* `indexCharOffAddr (a `plusAddr#` i) 0#`
+
+This way we avoid the need for the thunks to close over both the start of
+the string and the current offset, saving a word for each character unpacked.
+
+This has the additional advantage the we can guarantee that only the
+increment will happen in the loop.
+
+-}
+
 unpackCString# :: Addr# -> [Char]
 {-# NOINLINE CONLIKE unpackCString# #-}
 unpackCString# addr
@@ -93,60 +201,63 @@ unpackAppendCString# addr rest
       where
         !ch = indexCharOffAddr# addr nh
 
-unpackFoldrCString# :: Addr# -> (Char  -> a -> a) -> a -> a
-
--- Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#
-
--- It also has a BuiltInRule in PrelRules.hs:
---      unpackFoldrCString# "foo" c (unpackFoldrCString# "baz" c n)
---        =  unpackFoldrCString# "foobaz" c n
-
-{-# NOINLINE unpackFoldrCString# #-}
--- At one stage I had NOINLINE [0] on the grounds that, unlike
--- unpackCString#, there *is* some point in inlining
--- unpackFoldrCString#, because we get better code for the
--- higher-order function call.  BUT there may be a lot of
--- literal strings, and making a separate 'unpack' loop for
--- each is highly gratuitous.  See nofib/real/anna/PrettyPrint.
-
-unpackFoldrCString# addr f z
-  = unpack 0#
+-- Usually the unpack-list rule turns unpackFoldrCString# into unpackCString#.
+-- See Note [String literals in GHC] for more details.
+-- See [Inlining of unpackFoldrCString]
+{-# NOINLINE[0] unpackFoldrCString# #-}
+unpackFoldrCString# :: Addr# -> (Char -> a -> a) -> a -> a
+unpackFoldrCString# str f z_init = go str z_init
   where
-    unpack nh
+    go addr z
       | isTrue# (ch `eqChar#` '\0'#) = z
-      | True                         = C# ch `f` unpack (nh +# 1#)
+      | True                         = C# ch `f` go (addr `plusAddr#` 1#) z
       where
-        !ch = indexCharOffAddr# addr nh
+        -- See Note [unpackCString# iterating over addr]
+        !ch = indexCharOffAddr# addr 0#
 
 -- There's really no point in inlining this for the same reasons as
 -- unpackCString. See Note [Inlining unpackCString#] above for details.
 unpackCStringUtf8# :: Addr# -> [Char]
 {-# NOINLINE CONLIKE unpackCStringUtf8# #-}
 unpackCStringUtf8# addr
-  = unpack 0#
+    | isTrue# (ch `eqChar#` '\0'#  ) = []
+    | True =
+        let !byte_count = getByteCount ch
+            !utf_ch = unpackUtf8Char# byte_count ch addr
+            !addr' = addr `plusBytes` byte_count
+        in  C# utf_ch : unpackCStringUtf8# addr'
+      where
+        -- See Note [unpackCString# iterating over addr]
+        !ch = indexCharOffAddr# addr 0#
+
+
+unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+{-# NOINLINE unpackAppendCStringUtf8# #-}
+     -- See the NOINLINE note on unpackCString#
+unpackAppendCStringUtf8# addr rest
+    | isTrue# (ch `eqChar#` '\0'#) = rest
+    | True =
+        let !byte_count = getByteCount ch
+            !utf_ch = unpackUtf8Char# byte_count ch addr
+            !addr' = (addr `plusBytes` byte_count)
+        in  C# utf_ch : unpackAppendCStringUtf8# addr' rest
+      where
+        -- See Note [unpackCString# iterating over addr]
+        !ch = indexCharOffAddr# addr 0#
+
+-- See Note [Inlining of unpackFoldrCString]
+{-# NOINLINE[0] unpackFoldrCStringUtf8# #-}
+unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+unpackFoldrCStringUtf8# addr_init f z_init
+  = go addr_init z_init
   where
-    -- We take care to strictly evaluate the character decoding as
-    -- indexCharOffAddr# is marked with the can_fail flag and
-    -- consequently GHC won't evaluate the expression unless it is absolutely
-    -- needed.
-    unpack nh
-      | isTrue# (ch `eqChar#` '\0'#  ) = []
-      | isTrue# (ch `leChar#` '\x7F'#) = C# ch : unpack (nh +# 1#)
-      | isTrue# (ch `leChar#` '\xDF'#) =
-          let !c = C# (chr# (((ord# ch                                  -# 0xC0#) `uncheckedIShiftL#`  6#) +#
-                              (ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#)))
-          in c : unpack (nh +# 2#)
-      | isTrue# (ch `leChar#` '\xEF'#) =
-          let !c = C# (chr# (((ord# ch                                  -# 0xE0#) `uncheckedIShiftL#` 12#) +#
-                             ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                              (ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#)))
-          in c : unpack (nh +# 3#)
-      | True                           =
-          let !c = C# (chr# (((ord# ch                                  -# 0xF0#) `uncheckedIShiftL#` 18#) +#
-                             ((ord# (indexCharOffAddr# addr (nh +# 1#)) -# 0x80#) `uncheckedIShiftL#` 12#) +#
-                             ((ord# (indexCharOffAddr# addr (nh +# 2#)) -# 0x80#) `uncheckedIShiftL#`  6#) +#
-                              (ord# (indexCharOffAddr# addr (nh +# 3#)) -# 0x80#)))
-          in c : unpack (nh +# 4#)
+    go addr z
+      | isTrue# (ch `eqChar#` '\0'#) = z
+      | True =
+          let !byte_count = getByteCount ch
+              !utf_ch = unpackUtf8Char# byte_count ch addr
+              !addr' = (addr `plusBytes` byte_count)
+          in C# utf_ch `f` go addr' z
       where
         !ch = indexCharOffAddr# addr nh
 
@@ -163,3 +274,75 @@ unpackNBytes#  addr len# = unpack [] (len# -# 1#)
          case indexCharOffAddr# addr i# of
             ch -> unpack (C# ch : acc) (i# -# 1#)
 
+-- The return type is not correct here. We really want CSize,
+-- but that type is defined in base. However, CSize should always
+-- match the size of a machine word (I hope), so this is probably
+-- alright on all platforms that GHC supports.
+foreign import ccall unsafe "strlen" c_strlen :: Addr# -> Int#
+
+-- | Compute the length of a NUL-terminated string. This address
+-- must refer to immutable memory. GHC includes a built-in rule for
+-- constant folding when the argument is a statically-known literal.
+-- That is, a core-to-core pass reduces the expression
+-- @cstringLength# "hello"#@ to the constant @5#@.
+cstringLength# :: Addr# -> Int#
+{-# INLINE[0] cstringLength# #-}
+cstringLength# = c_strlen
+
+
+------------------------------
+--- UTF8 decoding utilities
+------------------------------
+--
+-- These functions make explicit the logic that was originally
+-- part of unpackCStringUtf8. Since we want the same support for ascii
+-- and non-ascii a variety of functions needs the same logic. Instead
+-- of C&P'in the decoding logic all over we have it here once, and then
+-- force GHC to inline it.
+--
+-- All the overhead of the Bytes argument and calls goes away once all is
+-- said and done. And what remains is readable code in Haskell land and
+-- performant code in the resulting binary.
+
+data Bytes = One | Two | Three | Four
+
+{-# INLINE getByteCount #-}
+getByteCount :: Char# -> Bytes
+getByteCount ch
+    | isTrue# (ch `leChar#` '\x7F'#) = One
+    | isTrue# (ch `leChar#` '\xDF'#) = Two
+    | isTrue# (ch `leChar#` '\xEF'#) = Three
+    | True                           = Four
+
+{-# INLINE plusBytes #-}
+plusBytes :: Addr# -> Bytes -> Addr#
+plusBytes addr bytes =
+  case bytes of
+    One   -> addr `plusAddr#` 1#
+    Two   -> addr `plusAddr#` 2#
+    Three -> addr `plusAddr#` 3#
+    Four  -> addr `plusAddr#` 4#
+
+-- | Take the current address, read unicode char of the given size.
+-- We obviously want the number of bytes, but we have to read one
+-- byte to determine the number of bytes for the current codepoint
+-- so we might as well reuse it and avoid a read.
+--
+-- Side Note: We don't dare to decode all 4 possibilities at once.
+-- Reading past the end of the addr might trigger an exception.
+-- For this reason we really have to check the width first and only
+-- decode after.
+{-# INLINE unpackUtf8Char# #-}
+unpackUtf8Char# :: Bytes -> Char# -> Addr# -> Char#
+unpackUtf8Char# bytes ch addr =
+  case bytes of
+    One -> ch
+    Two ->   (chr# (((ord# ch                                           -# 0xC0#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#)))
+    Three -> (chr# (((ord# ch                                           -# 0xE0#) `uncheckedIShiftL#` 12#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#)))
+    Four ->  (chr# (((ord# ch                                           -# 0xF0#) `uncheckedIShiftL#` 18#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 1#) 0#) -# 0x80#) `uncheckedIShiftL#` 12#) +#
+                    ((ord# (indexCharOffAddr# (addr `plusAddr#` 2#) 0#) -# 0x80#) `uncheckedIShiftL#`  6#) +#
+                     (ord# (indexCharOffAddr# (addr `plusAddr#` 3#) 0#) -# 0x80#)))
diff --git a/libraries/ghc-prim/changelog.md b/libraries/ghc-prim/changelog.md
index 96c648e620..1b52f06260 100644
--- a/libraries/ghc-prim/changelog.md
+++ b/libraries/ghc-prim/changelog.md
@@ -1,4 +1,23 @@
-## 0.6.1
+## 0.6.2 (edit as necessary)
+
+- Shipped with GHC 8.12.1
+
+- Add known-key `cstringLength#` to `GHC.CString`. This is just the
+  C function `strlen`, but a built-in rewrite rule allows GHC to
+  compute the result at compile time when the argument is known.
+
+- In order to support unicode better the following functions in `GHC.CString`
+  gained UTF8 counterparts:
+
+        unpackAppendCStringUtf8# :: Addr# -> [Char] -> [Char]
+        unpackFoldrCStringUtf8# :: Addr# -> (Char -> a -> a) -> a -> a
+
+- unpackFoldrCString* variants can now inline in phase [0].
+
+  If the folding function is known this allows for unboxing of the
+  Char argument resulting in much faster code.
+
+## 0.6.1 (edit as necessary)
 
 - Shipped with GHC 8.10.1