diff options
author | Sylvain Henry <sylvain@haskus.fr> | 2020-08-11 13:15:41 +0200 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-09-01 12:39:36 -0400 |
commit | 4b4fbc58d37d37457144014ef82bdd928de175df (patch) | |
tree | 9b49838986f07b5843e13f33ad2f6fd19d83f987 /compiler | |
parent | 884245dd29265b7bee12cda8c915da9c916251ce (diff) | |
download | haskell-4b4fbc58d37d37457144014ef82bdd928de175df.tar.gz |
Remove "Ord FastString" instance
FastStrings can be compared in 2 ways: by Unique or lexically. We don't
want to bless one particular way with an "Ord" instance because it leads
to bugs (#18562) or to suboptimal code (e.g. using lexical comparison
while a Unique comparison would suffice).
UTF-8 encoding has the advantage that sorting strings by their encoded
bytes also sorts them by their Unicode code points, without having to
decode the actual code points. BUT GHC uses Modified UTF-8 which
diverges from UTF-8 by encoding \0 as 0xC080 instead of 0x00 (to avoid
null bytes in the middle of a String so that the string can still be
null-terminated). This patch adds a new `utf8CompareShortByteString`
function that performs sorting by bytes but that also takes Modified
UTF-8 into account. It is much more performant than decoding the strings
into [Char] to perform comparisons (which we did in the previous patch).
Bump haddock submodule
Diffstat (limited to 'compiler')
30 files changed, 236 insertions, 112 deletions
diff --git a/compiler/GHC/Builtin/Types/Literals.hs b/compiler/GHC/Builtin/Types/Literals.hs index ef6fb962fd..cbb9f9e847 100644 --- a/compiler/GHC/Builtin/Types/Literals.hs +++ b/compiler/GHC/Builtin/Types/Literals.hs @@ -32,6 +32,7 @@ import GHC.Core.Coercion ( Role(..) ) import GHC.Tc.Types.Constraint ( Xi ) import GHC.Core.Coercion.Axiom ( CoAxiomRule(..), BuiltInSynFamily(..), TypeEqn ) import GHC.Types.Name ( Name, BuiltInSyntax(..) ) +import GHC.Types.Unique.FM import GHC.Builtin.Types import GHC.Builtin.Types.Prim ( mkTemplateAnonTyConBinders ) import GHC.Builtin.Names @@ -50,7 +51,6 @@ import GHC.Builtin.Names , typeSymbolAppendFamNameKey ) import GHC.Data.FastString -import qualified Data.Map as Map import Data.Maybe ( isJust ) import Control.Monad ( guard ) import Data.List ( isPrefixOf, isSuffixOf ) @@ -401,7 +401,7 @@ axCmpSymbolDef = s2' <- isStrLitTy s2 t2' <- isStrLitTy t2 return (mkTyConApp typeSymbolCmpTyCon [s1,t1] === - ordering (compare s2' t2')) } + ordering (lexicalCompareFS s2' t2')) } axAppendSymbolDef = CoAxiomRule { coaxrName = fsLit "AppendSymbolDef" @@ -457,8 +457,8 @@ axAppendSymbol0L = mkAxiom1 "Concat0L" -- The list of built-in type family axioms that GHC uses. -- If you define new axioms, make sure to include them in this list. -- See Note [Adding built-in type families] -typeNatCoAxiomRules :: Map.Map FastString CoAxiomRule -typeNatCoAxiomRules = Map.fromList $ map (\x -> (coaxrName x, x)) +typeNatCoAxiomRules :: UniqFM FastString CoAxiomRule +typeNatCoAxiomRules = listToUFM $ map (\x -> (coaxrName x, x)) [ axAddDef , axMulDef , axExpDef @@ -706,7 +706,7 @@ matchFamCmpNat _ = Nothing matchFamCmpSymbol :: [Type] -> Maybe (CoAxiomRule, [Type], Type) matchFamCmpSymbol [s,t] | Just x <- mbX, Just y <- mbY = - Just (axCmpSymbolDef, [s,t], ordering (compare x y)) + Just (axCmpSymbolDef, [s,t], ordering (lexicalCompareFS x y)) | tcEqType s t = Just (axCmpSymbolRefl, [s], ordering EQ) where mbX = isStrLitTy s mbY = isStrLitTy t diff --git a/compiler/GHC/Cmm/CLabel.hs b/compiler/GHC/Cmm/CLabel.hs index 5f2eb565c5..2782da2ea4 100644 --- a/compiler/GHC/Cmm/CLabel.hs +++ b/compiler/GHC/Cmm/CLabel.hs @@ -311,19 +311,19 @@ instance Ord CLabel where compare (CmmLabel a1 b1 c1 d1) (CmmLabel a2 b2 c2 d2) = compare a1 a2 `thenCmp` compare b1 b2 `thenCmp` - compare c1 c2 `thenCmp` + uniqCompareFS c1 c2 `thenCmp` compare d1 d2 compare (RtsLabel a1) (RtsLabel a2) = compare a1 a2 compare (LocalBlockLabel u1) (LocalBlockLabel u2) = nonDetCmpUnique u1 u2 compare (ForeignLabel a1 b1 c1 d1) (ForeignLabel a2 b2 c2 d2) = - compare a1 a2 `thenCmp` + uniqCompareFS a1 a2 `thenCmp` compare b1 b2 `thenCmp` compare c1 c2 `thenCmp` compare d1 d2 compare (AsmTempLabel u1) (AsmTempLabel u2) = nonDetCmpUnique u1 u2 compare (AsmTempDerivedLabel a1 b1) (AsmTempDerivedLabel a2 b2) = compare a1 a2 `thenCmp` - compare b1 b2 + uniqCompareFS b1 b2 compare (StringLitLabel u1) (StringLitLabel u2) = nonDetCmpUnique u1 u2 compare (CC_Label a1) (CC_Label a2) = @@ -451,13 +451,11 @@ data RtsLabelInfo | RtsApInfoTable Bool{-updatable-} Int{-arity-} -- ^ AP thunks | RtsApEntry Bool{-updatable-} Int{-arity-} - | RtsPrimOp PrimOp - | RtsApFast FastString -- ^ _fast versions of generic apply + | RtsPrimOp PrimOp + | RtsApFast NonDetFastString -- ^ _fast versions of generic apply | RtsSlowFastTickyCtr String - deriving (Eq, Ord) - -- NOTE: Eq on PtrString compares the pointer only, so this isn't - -- a real equality. + deriving (Eq,Ord) -- | What type of Cmm label we're dealing with. @@ -708,7 +706,7 @@ mkCCLabel cc = CC_Label cc mkCCSLabel ccs = CCS_Label ccs mkRtsApFastLabel :: FastString -> CLabel -mkRtsApFastLabel str = RtsLabel (RtsApFast str) +mkRtsApFastLabel str = RtsLabel (RtsApFast (NonDetFastString str)) mkRtsSlowFastTickyCtrLabel :: String -> CLabel mkRtsSlowFastTickyCtrLabel pat = RtsLabel (RtsSlowFastTickyCtr pat) @@ -1308,7 +1306,7 @@ pprCLabel_common platform = \case (LocalBlockLabel u) -> tempLabelPrefixOrUnderscore platform <> text "blk_" <> pprUniqueAlways u - (RtsLabel (RtsApFast str)) -> ftext str <> text "_fast" + (RtsLabel (RtsApFast (NonDetFastString str))) -> ftext str <> text "_fast" (RtsLabel (RtsSelectorInfoTable upd_reqd offset)) -> hcat [text "stg_sel_", text (show offset), diff --git a/compiler/GHC/Core/Coercion/Axiom.hs b/compiler/GHC/Core/Coercion/Axiom.hs index a1ace3ccfd..2bf31f8cc1 100644 --- a/compiler/GHC/Core/Coercion/Axiom.hs +++ b/compiler/GHC/Core/Coercion/Axiom.hs @@ -568,7 +568,9 @@ instance Eq CoAxiomRule where x == y = coaxrName x == coaxrName y instance Ord CoAxiomRule where - compare x y = compare (coaxrName x) (coaxrName y) + -- we compare lexically to avoid non-deterministic output when sets of rules + -- are printed + compare x y = lexicalCompareFS (coaxrName x) (coaxrName y) instance Outputable CoAxiomRule where ppr = ppr . coaxrName diff --git a/compiler/GHC/Core/Map.hs b/compiler/GHC/Core/Map.hs index 092a5a9773..ad5cd6fd53 100644 --- a/compiler/GHC/Core/Map.hs +++ b/compiler/GHC/Core/Map.hs @@ -55,6 +55,7 @@ import GHC.Utils.Panic import qualified Data.Map as Map import qualified Data.IntMap as IntMap +import GHC.Types.Unique.FM import GHC.Types.Var.Env import GHC.Types.Name.Env import Control.Monad( (>=>) ) @@ -605,7 +606,7 @@ fdT k m = foldTM k (tm_var m) ------------------------ data TyLitMap a = TLM { tlm_number :: Map.Map Integer a - , tlm_string :: Map.Map FastString a + , tlm_string :: UniqFM FastString a } instance TrieMap TyLitMap where @@ -617,27 +618,27 @@ instance TrieMap TyLitMap where mapTM = mapTyLit emptyTyLitMap :: TyLitMap a -emptyTyLitMap = TLM { tlm_number = Map.empty, tlm_string = Map.empty } +emptyTyLitMap = TLM { tlm_number = Map.empty, tlm_string = emptyUFM } mapTyLit :: (a->b) -> TyLitMap a -> TyLitMap b mapTyLit f (TLM { tlm_number = tn, tlm_string = ts }) - = TLM { tlm_number = Map.map f tn, tlm_string = Map.map f ts } + = TLM { tlm_number = Map.map f tn, tlm_string = mapUFM f ts } lkTyLit :: TyLit -> TyLitMap a -> Maybe a lkTyLit l = case l of NumTyLit n -> tlm_number >.> Map.lookup n - StrTyLit n -> tlm_string >.> Map.lookup n + StrTyLit n -> tlm_string >.> (`lookupUFM` n) xtTyLit :: TyLit -> XT a -> TyLitMap a -> TyLitMap a xtTyLit l f m = case l of - NumTyLit n -> m { tlm_number = tlm_number m |> Map.alter f n } - StrTyLit n -> m { tlm_string = tlm_string m |> Map.alter f n } + NumTyLit n -> m { tlm_number = Map.alter f n (tlm_number m) } + StrTyLit n -> m { tlm_string = alterUFM f (tlm_string m) n } foldTyLit :: (a -> b -> b) -> TyLitMap a -> b -> b -foldTyLit l m = flip (Map.foldr l) (tlm_string m) - . flip (Map.foldr l) (tlm_number m) +foldTyLit l m = flip (foldUFM l) (tlm_string m) + . flip (Map.foldr l) (tlm_number m) ------------------------------------------------- -- | @TypeMap a@ is a map from 'Type' to @a@. If you are a client, this diff --git a/compiler/GHC/Core/Opt/Monad.hs b/compiler/GHC/Core/Opt/Monad.hs index 54487652f0..7e37592878 100644 --- a/compiler/GHC/Core/Opt/Monad.hs +++ b/compiler/GHC/Core/Opt/Monad.hs @@ -528,7 +528,7 @@ cmpEqTick :: Tick -> Tick -> Ordering cmpEqTick (PreInlineUnconditionally a) (PreInlineUnconditionally b) = a `compare` b cmpEqTick (PostInlineUnconditionally a) (PostInlineUnconditionally b) = a `compare` b cmpEqTick (UnfoldingDone a) (UnfoldingDone b) = a `compare` b -cmpEqTick (RuleFired a) (RuleFired b) = a `compare` b +cmpEqTick (RuleFired a) (RuleFired b) = a `uniqCompareFS` b cmpEqTick (EtaExpansion a) (EtaExpansion b) = a `compare` b cmpEqTick (EtaReduction a) (EtaReduction b) = a `compare` b cmpEqTick (BetaReduction a) (BetaReduction b) = a `compare` b diff --git a/compiler/GHC/Core/Rules.hs b/compiler/GHC/Core/Rules.hs index 4ad942c212..1ca0c67ebb 100644 --- a/compiler/GHC/Core/Rules.hs +++ b/compiler/GHC/Core/Rules.hs @@ -69,7 +69,7 @@ import GHC.Data.Maybe import GHC.Data.Bag import GHC.Utils.Misc import Data.List -import Data.Ord +import Data.Function ( on ) import Control.Monad ( guard ) {- @@ -271,7 +271,7 @@ pprRulesForUser :: [CoreRule] -> SDoc pprRulesForUser rules = withPprStyle defaultUserStyle $ pprRules $ - sortBy (comparing ruleName) $ + sortBy (lexicalCompareFS `on` ruleName) $ tidyRules emptyTidyEnv rules {- diff --git a/compiler/GHC/Core/TyCo/Rep.hs b/compiler/GHC/Core/TyCo/Rep.hs index 5931d8c94a..5215b5ce2e 100644 --- a/compiler/GHC/Core/TyCo/Rep.hs +++ b/compiler/GHC/Core/TyCo/Rep.hs @@ -252,7 +252,13 @@ instance Outputable Type where data TyLit = NumTyLit Integer | StrTyLit FastString - deriving (Eq, Ord, Data.Data) + deriving (Eq, Data.Data) + +instance Ord TyLit where + compare (NumTyLit _) (StrTyLit _) = LT + compare (StrTyLit _) (NumTyLit _) = GT + compare (NumTyLit x) (NumTyLit y) = compare x y + compare (StrTyLit x) (StrTyLit y) = uniqCompareFS x y instance Outputable TyLit where ppr = pprTyLit diff --git a/compiler/GHC/Data/FastString.hs b/compiler/GHC/Data/FastString.hs index 1907ef91c9..604c8f3e25 100644 --- a/compiler/GHC/Data/FastString.hs +++ b/compiler/GHC/Data/FastString.hs @@ -2,6 +2,7 @@ {-# LANGUAGE BangPatterns, CPP, MagicHash, UnboxedTuples, GeneralizedNewtypeDeriving #-} +{-# LANGUAGE DeriveDataTypeable #-} {-# OPTIONS_GHC -O2 -funbox-strict-fields #-} -- We always optimise this, otherwise performance of a non-optimised -- compiler is severely affected @@ -12,8 +13,12 @@ -- ['FastString'] -- -- * A compact, hash-consed, representation of character strings. --- * Comparison is O(1), and you can get a 'GHC.Types.Unique.Unique' from them. -- * Generated by 'fsLit'. +-- * You can get a 'GHC.Types.Unique.Unique' from them. +-- * Equality test is O(1) (it uses the Unique). +-- * Comparison is O(1) or O(n): +-- * O(n) but deterministic with lexical comparison (`lexicalCompareFS`) +-- * O(1) but non-deterministic with Unique comparison (`uniqCompareFS`) -- * Turn into 'GHC.Utils.Outputable.SDoc' with 'GHC.Utils.Outputable.ftext'. -- -- ['PtrString'] @@ -50,6 +55,8 @@ module GHC.Data.FastString -- * FastStrings FastString(..), -- not abstract, for now. + NonDetFastString (..), + LexicalFastString (..), -- ** Construction fsLit, @@ -74,6 +81,8 @@ module GHC.Data.FastString consFS, nilFS, isUnderscoreFS, + lexicalCompareFS, + uniqCompareFS, -- ** Outputting hPutFS, @@ -135,7 +144,7 @@ import GHC.Base (unpackCString#,unpackNBytes#) import GHC.Exts import GHC.IO --- | Gives the UTF-8 encoded bytes corresponding to a 'FastString' +-- | Gives the Modified UTF-8 encoded bytes corresponding to a 'FastString' bytesFS, fastStringToByteString :: FastString -> ByteString bytesFS = fastStringToByteString @@ -196,17 +205,10 @@ data FastString = FastString { instance Eq FastString where f1 == f2 = uniq f1 == uniq f2 -instance Ord FastString where - -- Compares lexicographically, not by unique - a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False } - a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False } - a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True } - a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True } - max x y | x >= y = x - | otherwise = y - min x y | x <= y = x - | otherwise = y - compare a b = cmpFS a b +-- We don't provide any "Ord FastString" instance to force you to think about +-- which ordering you want: +-- * lexical: deterministic, O(n). Cf lexicalCompareFS and LexicalFastString. +-- * by unique: non-deterministic, O(1). Cf uniqCompareFS and NonDetFastString. instance IsString FastString where fromString = fsLit @@ -231,12 +233,51 @@ instance Data FastString where instance NFData FastString where rnf fs = seq fs () --- | Compare FastString lexicographically -cmpFS :: FastString -> FastString -> Ordering -cmpFS fs1 fs2 = +-- | Compare FastString lexically +-- +-- If you don't care about the lexical ordering, use `uniqCompareFS` instead. +lexicalCompareFS :: FastString -> FastString -> Ordering +lexicalCompareFS fs1 fs2 = if uniq fs1 == uniq fs2 then EQ else - compare (unpackFS fs1) (unpackFS fs2) - -- compare as String, not as ShortByteString (cf #18562) + utf8CompareShortByteString (fs_sbs fs1) (fs_sbs fs2) + -- perform a lexical comparison taking into account the Modified UTF-8 + -- encoding we use (cf #18562) + +-- | Compare FastString by their Unique (not lexically). +-- +-- Much cheaper than `lexicalCompareFS` but non-deterministic! +uniqCompareFS :: FastString -> FastString -> Ordering +uniqCompareFS fs1 fs2 = compare (uniq fs1) (uniq fs2) + +-- | Non-deterministic FastString +-- +-- This is a simple FastString wrapper with an Ord instance using +-- `uniqCompareFS` (i.e. which compares FastStrings on their Uniques). Hence it +-- is not deterministic from one run to the other. +newtype NonDetFastString + = NonDetFastString FastString + deriving (Eq,Data) + +instance Ord NonDetFastString where + compare (NonDetFastString fs1) (NonDetFastString fs2) = uniqCompareFS fs1 fs2 + +instance Show NonDetFastString where + show (NonDetFastString fs) = show fs + +-- | Lexical FastString +-- +-- This is a simple FastString wrapper with an Ord instance using +-- `lexicalCompareFS` (i.e. which compares FastStrings on their String +-- representation). Hence it is deterministic from one run to the other. +newtype LexicalFastString + = LexicalFastString FastString + deriving (Eq,Data) + +instance Ord LexicalFastString where + compare (LexicalFastString fs1) (LexicalFastString fs2) = lexicalCompareFS fs1 fs2 + +instance Show LexicalFastString where + show (LexicalFastString fs) = show fs -- ----------------------------------------------------------------------------- -- Construction diff --git a/compiler/GHC/Driver/Backpack.hs b/compiler/GHC/Driver/Backpack.hs index bfc58a3f42..743ce77926 100644 --- a/compiler/GHC/Driver/Backpack.hs +++ b/compiler/GHC/Driver/Backpack.hs @@ -560,7 +560,7 @@ msgInclude (i,n) uid = do -- ---------------------------------------------------------------------------- -- Conversion from PackageName to HsComponentId -type PackageNameMap a = Map PackageName a +type PackageNameMap a = UniqFM PackageName a -- For now, something really simple, since we're not actually going -- to use this for anything @@ -569,7 +569,7 @@ unitDefines (L _ HsUnit{ hsunitName = L _ pn@(PackageName fs) }) = (pn, HsComponentId pn (Indefinite (UnitId fs))) bkpPackageNameMap :: [LHsUnit PackageName] -> PackageNameMap HsComponentId -bkpPackageNameMap units = Map.fromList (map unitDefines units) +bkpPackageNameMap units = listToUFM (map unitDefines units) renameHsUnits :: UnitState -> PackageNameMap HsComponentId -> [LHsUnit PackageName] -> [LHsUnit HsComponentId] renameHsUnits pkgstate m units = map (fmap renameHsUnit) units @@ -577,7 +577,7 @@ renameHsUnits pkgstate m units = map (fmap renameHsUnit) units renamePackageName :: PackageName -> HsComponentId renamePackageName pn = - case Map.lookup pn m of + case lookupUFM m pn of Nothing -> case lookupPackageName pkgstate pn of Nothing -> error "no package name" diff --git a/compiler/GHC/Hs/Lit.hs b/compiler/GHC/Hs/Lit.hs index a529b138fb..4ceaac4264 100644 --- a/compiler/GHC/Hs/Lit.hs +++ b/compiler/GHC/Hs/Lit.hs @@ -223,7 +223,7 @@ instance Ord OverLitVal where compare (HsFractional f1) (HsFractional f2) = f1 `compare` f2 compare (HsFractional _) (HsIntegral _) = GT compare (HsFractional _) (HsIsString _ _) = LT - compare (HsIsString _ s1) (HsIsString _ s2) = s1 `compare` s2 + compare (HsIsString _ s1) (HsIsString _ s2) = s1 `uniqCompareFS` s2 compare (HsIsString _ _) (HsIntegral _) = GT compare (HsIsString _ _) (HsFractional _) = GT diff --git a/compiler/GHC/Iface/Ext/Ast.hs b/compiler/GHC/Iface/Ext/Ast.hs index 452e649f5d..448ae5dc54 100644 --- a/compiler/GHC/Iface/Ext/Ast.hs +++ b/compiler/GHC/Iface/Ext/Ast.hs @@ -342,7 +342,7 @@ enrichHie ts (hsGrp, imports, exports, _) ev_bs insts tcs = , exps ] - modulify file xs' = do + modulify (HiePath file) xs' = do top_ev_asts <- toHie $ EvBindContext ModuleScope Nothing @@ -363,12 +363,12 @@ enrichHie ts (hsGrp, imports, exports, _) ev_bs insts tcs = case mergeSortAsts $ moduleNode : xs of [x] -> return x - xs -> panicDoc "enrichHie: mergeSortAsts returned more than one result" (ppr $ map nodeSpan xs) + xs -> panicDoc "enrichHie: mergeSortAsts retur:ed more than one result" (ppr $ map nodeSpan xs) asts' <- sequence $ M.mapWithKey modulify $ M.fromListWith (++) - $ map (\x -> (srcSpanFile (nodeSpan x),[x])) flat_asts + $ map (\x -> (HiePath (srcSpanFile (nodeSpan x)),[x])) flat_asts let asts = HieASTs $ resolveTyVarScopes asts' return asts diff --git a/compiler/GHC/Iface/Ext/Debug.hs b/compiler/GHC/Iface/Ext/Debug.hs index 903413eaab..b10b4c982c 100644 --- a/compiler/GHC/Iface/Ext/Debug.hs +++ b/compiler/GHC/Iface/Ext/Debug.hs @@ -11,7 +11,6 @@ import GHC.Prelude import GHC.Types.SrcLoc import GHC.Unit.Module -import GHC.Data.FastString import GHC.Utils.Outputable import GHC.Iface.Ext.Types @@ -28,7 +27,7 @@ type Diff a = a -> a -> [SDoc] diffFile :: Diff HieFile diffFile = diffAsts eqDiff `on` (getAsts . hie_asts) -diffAsts :: (Outputable a, Eq a, Ord a) => Diff a -> Diff (M.Map FastString (HieAST a)) +diffAsts :: (Outputable a, Eq a, Ord a) => Diff a -> Diff (M.Map HiePath (HieAST a)) diffAsts f = diffList (diffAst f) `on` M.elems diffAst :: (Outputable a, Eq a,Ord a) => Diff a -> Diff (HieAST a) @@ -106,7 +105,7 @@ validAst (Node _ span children) = do -- | Look for any identifiers which occur outside of their supposed scopes. -- Returns a list of error messages. -validateScopes :: Module -> M.Map FastString (HieAST a) -> [SDoc] +validateScopes :: Module -> M.Map HiePath (HieAST a) -> [SDoc] validateScopes mod asts = validScopes ++ validEvs where refMap = generateReferencesMap asts diff --git a/compiler/GHC/Iface/Ext/Types.hs b/compiler/GHC/Iface/Ext/Types.hs index fe11bd094c..75331a273e 100644 --- a/compiler/GHC/Iface/Ext/Types.hs +++ b/compiler/GHC/Iface/Ext/Types.hs @@ -10,13 +10,15 @@ For more information see https://gitlab.haskell.org/ghc/ghc/wikis/hie-files {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE PatternSynonyms #-} + module GHC.Iface.Ext.Types where import GHC.Prelude import GHC.Settings.Config import GHC.Utils.Binary -import GHC.Data.FastString ( FastString ) +import GHC.Data.FastString import GHC.Builtin.Utils import GHC.Iface.Type import GHC.Unit.Module ( ModuleName, Module ) @@ -211,9 +213,18 @@ instance Binary (HieArgs TypeIndex) where put_ bh (HieArgs xs) = put_ bh xs get bh = HieArgs <$> get bh --- | Mapping from filepaths (represented using 'FastString') to the --- corresponding AST -newtype HieASTs a = HieASTs { getAsts :: (M.Map FastString (HieAST a)) } + +-- A HiePath is just a lexical FastString. We use a lexical FastString to avoid +-- non-determinism when printing or storing HieASTs which are sorted by their +-- HiePath. +type HiePath = LexicalFastString + +{-# COMPLETE HiePath #-} +pattern HiePath :: FastString -> HiePath +pattern HiePath fs = LexicalFastString fs + +-- | Mapping from filepaths to the corresponding AST +newtype HieASTs a = HieASTs { getAsts :: M.Map HiePath (HieAST a) } deriving (Functor, Foldable, Traversable) instance Binary (HieASTs TypeIndex) where @@ -285,13 +296,35 @@ instance Binary NodeOrigin where put_ bh b = putByte bh (fromIntegral (fromEnum b)) get bh = do x <- getByte bh; pure $! (toEnum (fromIntegral x)) +-- | A node annotation +data NodeAnnotation = NodeAnnotation + { nodeAnnotConstr :: !FastString -- ^ name of the AST node constructor + , nodeAnnotType :: !FastString -- ^ name of the AST node Type + } + deriving (Eq) + +instance Ord NodeAnnotation where + compare (NodeAnnotation c0 t0) (NodeAnnotation c1 t1) + = mconcat [uniqCompareFS c0 c1, uniqCompareFS t0 t1] + +instance Outputable NodeAnnotation where + ppr (NodeAnnotation c t) = ppr (c,t) + +instance Binary NodeAnnotation where + put_ bh (NodeAnnotation c t) = do + put_ bh c + put_ bh t + get bh = NodeAnnotation + <$> get bh + <*> get bh + -- | The information stored in one AST node. -- -- The type parameter exists to provide flexibility in representation of types -- (see Note [Efficient serialization of redundant type info]). data NodeInfo a = NodeInfo - { nodeAnnotations :: S.Set (FastString,FastString) - -- ^ (name of the AST node constructor, name of the AST node Type) + { nodeAnnotations :: S.Set NodeAnnotation + -- ^ Annotations , nodeType :: [a] -- ^ The Haskell types of this node, if any. diff --git a/compiler/GHC/Iface/Ext/Utils.hs b/compiler/GHC/Iface/Ext/Utils.hs index d1b6db6fb7..9245a11f7b 100644 --- a/compiler/GHC/Iface/Ext/Utils.hs +++ b/compiler/GHC/Iface/Ext/Utils.hs @@ -85,7 +85,7 @@ foldType f (Roll t) = f $ fmap (foldType f) t selectPoint :: HieFile -> (Int,Int) -> Maybe (HieAST Int) selectPoint hf (sl,sc) = getFirst $ - flip foldMap (M.toList (getAsts $ hie_asts hf)) $ \(fs,ast) -> First $ + flip foldMap (M.toList (getAsts $ hie_asts hf)) $ \(HiePath fs,ast) -> First $ case selectSmallestContaining (sp fs) ast of Nothing -> Nothing Just ast' -> Just ast' @@ -248,12 +248,12 @@ getTypeIndex t return $ HCastTy i go (CoercionTy _) = return HCoercionTy -resolveTyVarScopes :: M.Map FastString (HieAST a) -> M.Map FastString (HieAST a) +resolveTyVarScopes :: M.Map HiePath (HieAST a) -> M.Map HiePath (HieAST a) resolveTyVarScopes asts = M.map go asts where go ast = resolveTyVarScopeLocal ast asts -resolveTyVarScopeLocal :: HieAST a -> M.Map FastString (HieAST a) -> HieAST a +resolveTyVarScopeLocal :: HieAST a -> M.Map HiePath (HieAST a) -> HieAST a resolveTyVarScopeLocal ast asts = go ast where resolveNameScope dets = dets{identInfo = @@ -278,12 +278,12 @@ resolveTyVarScopeLocal ast asts = go ast where idents = M.map resolveNameScope $ nodeIdentifiers i -getNameBinding :: Name -> M.Map FastString (HieAST a) -> Maybe Span +getNameBinding :: Name -> M.Map HiePath (HieAST a) -> Maybe Span getNameBinding n asts = do (_,msp) <- getNameScopeAndBinding n asts msp -getNameScope :: Name -> M.Map FastString (HieAST a) -> Maybe [Scope] +getNameScope :: Name -> M.Map HiePath (HieAST a) -> Maybe [Scope] getNameScope n asts = do (scopes,_) <- getNameScopeAndBinding n asts return scopes @@ -291,10 +291,10 @@ getNameScope n asts = do getNameBindingInClass :: Name -> Span - -> M.Map FastString (HieAST a) + -> M.Map HiePath (HieAST a) -> Maybe Span getNameBindingInClass n sp asts = do - ast <- M.lookup (srcSpanFile sp) asts + ast <- M.lookup (HiePath (srcSpanFile sp)) asts getFirst $ foldMap First $ do child <- flattenAst ast dets <- maybeToList @@ -304,11 +304,11 @@ getNameBindingInClass n sp asts = do getNameScopeAndBinding :: Name - -> M.Map FastString (HieAST a) + -> M.Map HiePath (HieAST a) -> Maybe ([Scope], Maybe Span) getNameScopeAndBinding n asts = case nameSrcSpan n of RealSrcSpan sp _ -> do -- @Maybe - ast <- M.lookup (srcSpanFile sp) asts + ast <- M.lookup (HiePath (srcSpanFile sp)) asts defNode <- selectLargestContainedBy sp ast getFirst $ foldMap First $ do -- @[] node <- flattenAst defNode @@ -369,9 +369,9 @@ selectSmallestContaining sp node | sp `containsSpan` nodeSpan node = Nothing | otherwise = Nothing -definedInAsts :: M.Map FastString (HieAST a) -> Name -> Bool +definedInAsts :: M.Map HiePath (HieAST a) -> Name -> Bool definedInAsts asts n = case nameSrcSpan n of - RealSrcSpan sp _ -> srcSpanFile sp `elem` M.keys asts + RealSrcSpan sp _ -> M.member (HiePath (srcSpanFile sp)) asts _ -> False getEvidenceBindDeps :: ContextInfo -> [Name] @@ -515,7 +515,7 @@ mergeSortAsts = go . map pure mergePairs (xs:ys:xss) = mergeAsts xs ys : mergePairs xss simpleNodeInfo :: FastString -> FastString -> NodeInfo a -simpleNodeInfo cons typ = NodeInfo (S.singleton (cons, typ)) [] M.empty +simpleNodeInfo cons typ = NodeInfo (S.singleton (NodeAnnotation cons typ)) [] M.empty locOnly :: Monad m => SrcSpan -> ReaderT NodeOrigin m [HieAST a] locOnly (RealSrcSpan span _) = do @@ -568,7 +568,7 @@ makeTypeNode x spn etyp = do org <- ask pure $ case spn of RealSrcSpan span _ -> - [Node (mkSourcedNodeInfo org $ NodeInfo (S.singleton (cons,typ)) [etyp] M.empty) span []] + [Node (mkSourcedNodeInfo org $ NodeInfo (S.singleton (NodeAnnotation cons typ)) [etyp] M.empty) span []] _ -> [] where cons = mkFastString . show . toConstr $ x diff --git a/compiler/GHC/Iface/Make.hs b/compiler/GHC/Iface/Make.hs index a179beff18..3fd0eaac29 100644 --- a/compiler/GHC/Iface/Make.hs +++ b/compiler/GHC/Iface/Make.hs @@ -292,7 +292,7 @@ mkIface_ hsc_env mi_final_exts = (), mi_ext_fields = emptyExtensibleFields } where - cmp_rule = comparing ifRuleName + cmp_rule = lexicalCompareFS `on` ifRuleName -- Compare these lexicographically by OccName, *not* by unique, -- because the latter is not stable across compilations: cmp_inst = comparing (nameOccName . ifDFun) diff --git a/compiler/GHC/Iface/Recomp.hs b/compiler/GHC/Iface/Recomp.hs index 2ffb094b11..7e72633622 100644 --- a/compiler/GHC/Iface/Recomp.hs +++ b/compiler/GHC/Iface/Recomp.hs @@ -39,6 +39,7 @@ import GHC.Utils.Outputable as Outputable import GHC.Types.Unique import GHC.Utils.Misc hiding ( eqListBy ) import GHC.Data.Maybe +import GHC.Data.FastString import GHC.Utils.Binary import GHC.Utils.Fingerprint import GHC.Utils.Exception @@ -1081,11 +1082,11 @@ getOrphanHashes hsc_env mods = do sortDependencies :: Dependencies -> Dependencies sortDependencies d - = Deps { dep_mods = sortBy (compare `on` (moduleNameFS . gwib_mod)) (dep_mods d), + = Deps { dep_mods = sortBy (lexicalCompareFS `on` (moduleNameFS . gwib_mod)) (dep_mods d), dep_pkgs = sortBy (compare `on` fst) (dep_pkgs d), dep_orphs = sortBy stableModuleCmp (dep_orphs d), dep_finsts = sortBy stableModuleCmp (dep_finsts d), - dep_plgins = sortBy (compare `on` moduleNameFS) (dep_plgins d) } + dep_plgins = sortBy (lexicalCompareFS `on` moduleNameFS) (dep_plgins d) } {- ************************************************************************ diff --git a/compiler/GHC/IfaceToCore.hs b/compiler/GHC/IfaceToCore.hs index de32ae9e15..ae6461ce3a 100644 --- a/compiler/GHC/IfaceToCore.hs +++ b/compiler/GHC/IfaceToCore.hs @@ -81,7 +81,6 @@ import GHC.Fingerprint import qualified GHC.Data.BooleanFormula as BF import Control.Monad -import qualified Data.Map as Map {- This module takes @@ -1752,7 +1751,7 @@ tcIfaceCoAxiomRule :: IfLclName -> IfL CoAxiomRule -- there are a fixed set of CoAxiomRules, -- currently enumerated in typeNatCoAxiomRules tcIfaceCoAxiomRule n - = case Map.lookup n typeNatCoAxiomRules of + = case lookupUFM typeNatCoAxiomRules n of Just ax -> return ax _ -> pprPanic "tcIfaceCoAxiomRule" (ppr n) diff --git a/compiler/GHC/Tc/Gen/Expr.hs b/compiler/GHC/Tc/Gen/Expr.hs index f1030538e0..6ef43c634e 100644 --- a/compiler/GHC/Tc/Gen/Expr.hs +++ b/compiler/GHC/Tc/Gen/Expr.hs @@ -84,12 +84,11 @@ import GHC.Utils.Panic import GHC.Data.FastString import Control.Monad import GHC.Core.Class(classTyCon) -import GHC.Types.Unique.Set ( nonDetEltsUniqSet ) +import GHC.Types.Unique.Set import qualified GHC.LanguageExtensions as LangExt import Data.Function import Data.List (partition, sortBy, groupBy, intersect) -import qualified Data.Set as Set {- ************************************************************************ @@ -2818,11 +2817,11 @@ badFieldsUpd rbinds data_cons -- For each field, which constructors contain the field? membership :: [(FieldLabelString, [Bool])] membership = sortMembership $ - map (\fld -> (fld, map (Set.member fld) fieldLabelSets)) $ + map (\fld -> (fld, map (elementOfUniqSet fld) fieldLabelSets)) $ map (occNameFS . rdrNameOcc . rdrNameAmbiguousFieldOcc . unLoc . hsRecFieldLbl . unLoc) rbinds - fieldLabelSets :: [Set.Set FieldLabelString] - fieldLabelSets = map (Set.fromList . map flLabel . conLikeFieldLabels) data_cons + fieldLabelSets :: [UniqSet FieldLabelString] + fieldLabelSets = map (mkUniqSet . map flLabel . conLikeFieldLabels) data_cons -- Sort in order of increasing number of True, so that a smaller -- conflicting set can be found. diff --git a/compiler/GHC/Tc/TyCl.hs b/compiler/GHC/Tc/TyCl.hs index 113fadd20d..50eeb60930 100644 --- a/compiler/GHC/Tc/TyCl.hs +++ b/compiler/GHC/Tc/TyCl.hs @@ -61,6 +61,7 @@ import GHC.Types.Id import GHC.Types.Var import GHC.Types.Var.Env import GHC.Types.Var.Set +import GHC.Data.FastString import GHC.Unit import GHC.Types.Name import GHC.Types.Name.Set @@ -3947,7 +3948,7 @@ checkValidTyCon tc data_cons = tyConDataCons tc groups = equivClasses cmp_fld (concatMap get_fields data_cons) - cmp_fld (f1,_) (f2,_) = flLabel f1 `compare` flLabel f2 + cmp_fld (f1,_) (f2,_) = flLabel f1 `uniqCompareFS` flLabel f2 get_fields con = dataConFieldLabels con `zip` repeat con -- dataConFieldLabels may return the empty list, which is fine diff --git a/compiler/GHC/Types/CostCentre.hs b/compiler/GHC/Types/CostCentre.hs index 730c469a04..0bb615a1c4 100644 --- a/compiler/GHC/Types/CostCentre.hs +++ b/compiler/GHC/Types/CostCentre.hs @@ -30,7 +30,6 @@ import GHC.Types.Unique import GHC.Utils.Outputable import GHC.Types.SrcLoc import GHC.Data.FastString -import GHC.Utils.Misc import GHC.Types.CostCentre.State import Data.Data @@ -95,7 +94,11 @@ cmpCostCentre (AllCafsCC {cc_mod = m1}) (AllCafsCC {cc_mod = m2}) cmpCostCentre NormalCC {cc_flavour = f1, cc_mod = m1, cc_name = n1} NormalCC {cc_flavour = f2, cc_mod = m2, cc_name = n2} -- first key is module name, then centre name, then flavour - = (m1 `compare` m2) `thenCmp` (n1 `compare` n2) `thenCmp` (f1 `compare` f2) + = mconcat + [ m1 `compare` m2 + , n1 `lexicalCompareFS` n2 -- compare lexically to avoid non-determinism + , f1 `compare` f2 + ] cmpCostCentre other_1 other_2 = let diff --git a/compiler/GHC/Types/Literal.hs b/compiler/GHC/Types/Literal.hs index 2c5471c52b..16d4539630 100644 --- a/compiler/GHC/Types/Literal.hs +++ b/compiler/GHC/Types/Literal.hs @@ -671,7 +671,7 @@ cmpLit (LitString a) (LitString b) = a `compare` b cmpLit (LitNullAddr) (LitNullAddr) = EQ cmpLit (LitFloat a) (LitFloat b) = a `compare` b cmpLit (LitDouble a) (LitDouble b) = a `compare` b -cmpLit (LitLabel a _ _) (LitLabel b _ _) = a `compare` b +cmpLit (LitLabel a _ _) (LitLabel b _ _) = a `uniqCompareFS` b cmpLit (LitNumber nt1 a) (LitNumber nt2 b) | nt1 == nt2 = a `compare` b | otherwise = nt1 `compare` nt2 diff --git a/compiler/GHC/Types/Name/Occurrence.hs b/compiler/GHC/Types/Name/Occurrence.hs index 9756900ad8..83037a0704 100644 --- a/compiler/GHC/Types/Name/Occurrence.hs +++ b/compiler/GHC/Types/Name/Occurrence.hs @@ -239,7 +239,7 @@ instance Eq OccName where instance Ord OccName where -- Compares lexicographically, *not* by Unique of the string compare (OccName sp1 s1) (OccName sp2 s2) - = (s1 `compare` s2) `thenCmp` (sp1 `compare` sp2) + = (s1 `lexicalCompareFS` s2) `thenCmp` (sp1 `compare` sp2) instance Data OccName where -- don't traverse? diff --git a/compiler/GHC/Types/SrcLoc.hs b/compiler/GHC/Types/SrcLoc.hs index 46a362026f..30db87d827 100644 --- a/compiler/GHC/Types/SrcLoc.hs +++ b/compiler/GHC/Types/SrcLoc.hs @@ -147,7 +147,7 @@ this is the obvious stuff: -- -- Represents a single point within a file data RealSrcLoc - = SrcLoc FastString -- A precise location (file name) + = SrcLoc LexicalFastString -- A precise location (file name) {-# UNPACK #-} !Int -- line number, begins at 1 {-# UNPACK #-} !Int -- column number, begins at 1 deriving (Eq, Ord) @@ -244,7 +244,7 @@ mkSrcLoc :: FastString -> Int -> Int -> SrcLoc mkSrcLoc x line col = RealSrcLoc (mkRealSrcLoc x line col) Nothing mkRealSrcLoc :: FastString -> Int -> Int -> RealSrcLoc -mkRealSrcLoc x line col = SrcLoc x line col +mkRealSrcLoc x line col = SrcLoc (LexicalFastString x) line col getBufPos :: SrcLoc -> Maybe BufPos getBufPos (RealSrcLoc _ mbpos) = mbpos @@ -262,7 +262,7 @@ mkGeneralSrcLoc = UnhelpfulLoc -- | Gives the filename of the 'RealSrcLoc' srcLocFile :: RealSrcLoc -> FastString -srcLocFile (SrcLoc fname _ _) = fname +srcLocFile (SrcLoc (LexicalFastString fname) _ _) = fname -- | Raises an error when used on a "bad" 'SrcLoc' srcLocLine :: RealSrcLoc -> Int @@ -309,7 +309,7 @@ lookupSrcSpan (RealSrcSpan l _) = Map.lookup l lookupSrcSpan (UnhelpfulSpan _) = const Nothing instance Outputable RealSrcLoc where - ppr (SrcLoc src_path src_line src_col) + ppr (SrcLoc (LexicalFastString src_path) src_line src_col) = hcat [ pprFastFilePath src_path <> colon , int src_line <> colon , int src_col ] @@ -458,7 +458,7 @@ srcLocSpan (UnhelpfulLoc str) = UnhelpfulSpan (UnhelpfulOther str) srcLocSpan (RealSrcLoc l mb) = RealSrcSpan (realSrcLocSpan l) (fmap (\b -> BufSpan b b) mb) realSrcLocSpan :: RealSrcLoc -> RealSrcSpan -realSrcLocSpan (SrcLoc file line col) = RealSrcSpan' file line col line col +realSrcLocSpan (SrcLoc (LexicalFastString file) line col) = RealSrcSpan' file line col line col -- | Create a 'SrcSpan' between two points in a file mkRealSrcSpan :: RealSrcLoc -> RealSrcLoc -> RealSrcSpan diff --git a/compiler/GHC/Unit/Info.hs b/compiler/GHC/Unit/Info.hs index 034b61e145..abb2122ef0 100644 --- a/compiler/GHC/Unit/Info.hs +++ b/compiler/GHC/Unit/Info.hs @@ -86,15 +86,11 @@ mapUnitInfo f = mapGenericUnitInfo id -- module name (fmap (mapGenUnit f)) -- instantiating modules --- TODO: there's no need for these to be FastString, as we don't need the uniq --- feature, but ghc doesn't currently have convenient support for any --- other compact string types, e.g. plain ByteString or Text. - -newtype PackageId = PackageId FastString deriving (Eq, Ord) +newtype PackageId = PackageId FastString deriving (Eq) newtype PackageName = PackageName { unPackageName :: FastString } - deriving (Eq, Ord) + deriving (Eq) instance Uniquable PackageId where getUnique (PackageId n) = getUnique n diff --git a/compiler/GHC/Unit/Module/Name.hs b/compiler/GHC/Unit/Module/Name.hs index ad09fa7549..76c40f6a87 100644 --- a/compiler/GHC/Unit/Module/Name.hs +++ b/compiler/GHC/Unit/Module/Name.hs @@ -59,7 +59,7 @@ instance NFData ModuleName where stableModuleNameCmp :: ModuleName -> ModuleName -> Ordering -- ^ Compares module names lexically, rather than by their 'Unique's -stableModuleNameCmp n1 n2 = moduleNameFS n1 `compare` moduleNameFS n2 +stableModuleNameCmp n1 n2 = moduleNameFS n1 `lexicalCompareFS` moduleNameFS n2 pprModuleName :: ModuleName -> SDoc pprModuleName (ModuleName nm) = diff --git a/compiler/GHC/Unit/State.hs b/compiler/GHC/Unit/State.hs index 6e3a53310c..c95c9e4031 100644 --- a/compiler/GHC/Unit/State.hs +++ b/compiler/GHC/Unit/State.hs @@ -415,7 +415,7 @@ data UnitState = UnitState { -- | A mapping of 'PackageName' to 'IndefUnitId'. This is used when -- users refer to packages in Backpack includes. - packageNameMap :: Map PackageName IndefUnitId, + packageNameMap :: UniqFM PackageName IndefUnitId, -- | A mapping from database unit keys to wired in unit ids. wireMap :: Map UnitId UnitId, @@ -460,7 +460,7 @@ emptyUnitState :: UnitState emptyUnitState = UnitState { unitInfoMap = Map.empty, preloadClosure = emptyUniqSet, - packageNameMap = Map.empty, + packageNameMap = emptyUFM, wireMap = Map.empty, unwireMap = Map.empty, preloadUnits = [], @@ -533,7 +533,7 @@ unsafeLookupUnitId state uid = case lookupUnitId state uid of -- | Find the unit we know about with the given package name (e.g. @foo@), if any -- (NB: there might be a locally defined unit name which overrides this) lookupPackageName :: UnitState -> PackageName -> Maybe IndefUnitId -lookupPackageName pkgstate n = Map.lookup n (packageNameMap pkgstate) +lookupPackageName pkgstate n = lookupUFM (packageNameMap pkgstate) n -- | Search for units with a given package ID (e.g. \"foo-0.1\") searchPackageId :: UnitState -> PackageId -> [UnitInfo] @@ -1587,10 +1587,9 @@ mkUnitState ctx printer cfg = do -- likely to actually happen. return (updateVisibilityMap wired_map plugin_vis_map2) - let pkgname_map = foldl' add Map.empty pkgs2 - where add pn_map p - = Map.insert (unitPackageName p) (unitInstanceOf p) pn_map - + let pkgname_map = listToUFM [ (unitPackageName p, unitInstanceOf p) + | p <- pkgs2 + ] -- The explicitUnits accurately reflects the set of units we have turned -- on; as such, it also is the only way one can come up with requirements. -- The requirement context is directly based off of this: we simply diff --git a/compiler/GHC/Unit/Types.hs b/compiler/GHC/Unit/Types.hs index f80a3b5b9d..aa725b429c 100644 --- a/compiler/GHC/Unit/Types.hs +++ b/compiler/GHC/Unit/Types.hs @@ -290,7 +290,7 @@ instance Eq (GenInstantiatedUnit unit) where u1 == u2 = instUnitKey u1 == instUnitKey u2 instance Ord (GenInstantiatedUnit unit) where - u1 `compare` u2 = instUnitFS u1 `compare` instUnitFS u2 + u1 `compare` u2 = instUnitFS u1 `uniqCompareFS` instUnitFS u2 instance Binary InstantiatedUnit where put_ bh indef = do @@ -328,7 +328,7 @@ instance NFData Unit where -- | Compares unit ids lexically, rather than by their 'Unique's stableUnitCmp :: Unit -> Unit -> Ordering -stableUnitCmp p1 p2 = unitFS p1 `compare` unitFS p2 +stableUnitCmp p1 p2 = unitFS p1 `lexicalCompareFS` unitFS p2 instance Outputable Unit where ppr pk = pprUnit pk @@ -504,7 +504,9 @@ instance Eq UnitId where uid1 == uid2 = getUnique uid1 == getUnique uid2 instance Ord UnitId where - u1 `compare` u2 = unitIdFS u1 `compare` unitIdFS u2 + -- we compare lexically to avoid non-deterministic output when sets of + -- unit-ids are printed (dependencies, etc.) + u1 `compare` u2 = unitIdFS u1 `lexicalCompareFS` unitIdFS u2 instance Uniquable UnitId where getUnique = getUnique . unitIdFS diff --git a/compiler/GHC/Utils/Binary.hs b/compiler/GHC/Utils/Binary.hs index d73939c53c..d4afa624cf 100644 --- a/compiler/GHC/Utils/Binary.hs +++ b/compiler/GHC/Utils/Binary.hs @@ -5,6 +5,8 @@ {-# LANGUAGE GADTs #-} {-# LANGUAGE MultiWayIf #-} {-# LANGUAGE BangPatterns #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# OPTIONS_GHC -O2 -funbox-strict-fields #-} -- We always optimise this, otherwise performance of a non-optimised @@ -1207,6 +1209,9 @@ instance Binary FastString where case getUserData bh of UserData { ud_get_fs = get_fs } -> get_fs bh +deriving instance Binary NonDetFastString +deriving instance Binary LexicalFastString + -- Here to avoid loop instance Binary LeftOrRight where put_ bh CLeft = putByte bh 0 diff --git a/compiler/GHC/Utils/Encoding.hs b/compiler/GHC/Utils/Encoding.hs index 24637a3bff..592b0bd40e 100644 --- a/compiler/GHC/Utils/Encoding.hs +++ b/compiler/GHC/Utils/Encoding.hs @@ -19,6 +19,7 @@ module GHC.Utils.Encoding ( utf8DecodeChar, utf8DecodeByteString, utf8DecodeShortByteString, + utf8CompareShortByteString, utf8DecodeStringLazy, utf8EncodeChar, utf8EncodeString, @@ -164,6 +165,38 @@ utf8DecodeStringLazy fp offset (I# len#) -- withForeignPtr would provide here. That's why we use touchForeignPtr to -- keep the fp alive until the last character has actually been decoded. +utf8CompareShortByteString :: ShortByteString -> ShortByteString -> Ordering +utf8CompareShortByteString (SBS a1) (SBS a2) = go 0# 0# + -- UTF-8 has the property that sorting by bytes values also sorts by + -- code-points. + -- BUT we use "Modified UTF-8" which encodes \0 as 0xC080 so this property + -- doesn't hold and we must explicitly check this case here. + -- Note that decoding every code point would also work but it would be much + -- more costly. + where + !sz1 = sizeofByteArray# a1 + !sz2 = sizeofByteArray# a2 + go off1 off2 + | isTrue# ((off1 >=# sz1) `andI#` (off2 >=# sz2)) = EQ + | isTrue# (off1 >=# sz1) = LT + | isTrue# (off2 >=# sz2) = GT + | otherwise = + let !b1_1 = indexWord8Array# a1 off1 + !b2_1 = indexWord8Array# a2 off2 + in case b1_1 of + 0xC0## -> case b2_1 of + 0xC0## -> go (off1 +# 1#) (off2 +# 1#) + _ -> case indexWord8Array# a1 (off1 +# 1#) of + 0x80## -> LT + _ -> go (off1 +# 1#) (off2 +# 1#) + _ -> case b2_1 of + 0xC0## -> case indexWord8Array# a2 (off2 +# 1#) of + 0x80## -> GT + _ -> go (off1 +# 1#) (off2 +# 1#) + _ | isTrue# (b1_1 `gtWord#` b2_1) -> GT + | isTrue# (b1_1 `ltWord#` b2_1) -> LT + | otherwise -> go (off1 +# 1#) (off2 +# 1#) + utf8DecodeShortByteString :: ShortByteString -> [Char] utf8DecodeShortByteString (SBS ba#) = unsafeDupablePerformIO $ diff --git a/compiler/GHC/Utils/Outputable.hs b/compiler/GHC/Utils/Outputable.hs index af146ed72a..8723f16233 100644 --- a/compiler/GHC/Utils/Outputable.hs +++ b/compiler/GHC/Utils/Outputable.hs @@ -1,5 +1,8 @@ {-# LANGUAGE LambdaCase #-} {-# LANGUAGE PatternSynonyms #-} +{-# LANGUAGE StandaloneDeriving #-} +{-# LANGUAGE DerivingStrategies #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {- (c) The University of Glasgow 2006-2012 @@ -926,6 +929,9 @@ instance Outputable FastString where ppr fs = ftext fs -- Prints an unadorned string, -- no double quotes or anything +deriving newtype instance Outputable NonDetFastString +deriving newtype instance Outputable LexicalFastString + instance (Outputable key, Outputable elt) => Outputable (M.Map key elt) where ppr m = ppr (M.toList m) instance (Outputable elt) => Outputable (IM.IntMap elt) where |