diff options
author | Ben Gamari <bgamari.foss@gmail.com> | 2015-09-08 08:38:40 -0500 |
---|---|---|
committer | Austin Seipp <austin@well-typed.com> | 2015-09-08 08:39:05 -0500 |
commit | e4a73f4fa1cc9681aee3ce13ee15073deed54635 (patch) | |
tree | 4ae445764e5c7615705238cafb8533de9c359f2b /libraries/base/Data/Char.hs | |
parent | 8be43dd966c9c56e530eab266d6bf2710f9b07f4 (diff) | |
download | haskell-e4a73f4fa1cc9681aee3ce13ee15073deed54635.tar.gz |
Move GeneralCategory et al to GHC.Unicode
This allows these to be used from Text.Read.Lex import cycles.
Reviewed By: thomie, austin
Differential Revision: https://phabricator.haskell.org/D1121
GHC Trac Issues: #10444
Diffstat (limited to 'libraries/base/Data/Char.hs')
-rw-r--r-- | libraries/base/Data/Char.hs | 209 |
1 files changed, 1 insertions, 208 deletions
diff --git a/libraries/base/Data/Char.hs b/libraries/base/Data/Char.hs index e4e7fbfcb8..c8dd9331c6 100644 --- a/libraries/base/Data/Char.hs +++ b/libraries/base/Data/Char.hs @@ -53,14 +53,12 @@ module Data.Char ) where import GHC.Base -import GHC.Arr (Ix) import GHC.Char import GHC.Real (fromIntegral) import GHC.Show -import GHC.Read (Read, readLitChar, lexLitChar) +import GHC.Read (readLitChar, lexLitChar) import GHC.Unicode import GHC.Num -import GHC.Enum -- $setup -- Allow the use of Prelude in doctests. @@ -105,121 +103,6 @@ digitToInt c hexl = ord c - ord 'a' hexu = ord c - ord 'A' --- | Unicode General Categories (column 2 of the UnicodeData table) in --- the order they are listed in the Unicode standard (the Unicode --- Character Database, in particular). --- --- ==== __Examples__ --- --- Basic usage: --- --- >>> :t OtherLetter --- OtherLetter :: GeneralCategory --- --- 'Eq' instance: --- --- >>> UppercaseLetter == UppercaseLetter --- True --- >>> UppercaseLetter == LowercaseLetter --- False --- --- 'Ord' instance: --- --- >>> NonSpacingMark <= MathSymbol --- True --- --- 'Enum' instance: --- --- >>> enumFromTo ModifierLetter SpacingCombiningMark --- [ModifierLetter,OtherLetter,NonSpacingMark,SpacingCombiningMark] --- --- 'Read' instance: --- --- >>> read "DashPunctuation" :: GeneralCategory --- DashPunctuation --- >>> read "17" :: GeneralCategory --- *** Exception: Prelude.read: no parse --- --- 'Show' instance: --- --- >>> show EnclosingMark --- "EnclosingMark" --- --- 'Bounded' instance: --- --- >>> minBound :: GeneralCategory --- UppercaseLetter --- >>> maxBound :: GeneralCategory --- NotAssigned --- --- 'Ix' instance: --- --- >>> import Data.Ix ( index ) --- >>> index (OtherLetter,Control) FinalQuote --- 12 --- >>> index (OtherLetter,Control) Format --- *** Exception: Error in array index --- -data GeneralCategory - = UppercaseLetter -- ^ Lu: Letter, Uppercase - | LowercaseLetter -- ^ Ll: Letter, Lowercase - | TitlecaseLetter -- ^ Lt: Letter, Titlecase - | ModifierLetter -- ^ Lm: Letter, Modifier - | OtherLetter -- ^ Lo: Letter, Other - | NonSpacingMark -- ^ Mn: Mark, Non-Spacing - | SpacingCombiningMark -- ^ Mc: Mark, Spacing Combining - | EnclosingMark -- ^ Me: Mark, Enclosing - | DecimalNumber -- ^ Nd: Number, Decimal - | LetterNumber -- ^ Nl: Number, Letter - | OtherNumber -- ^ No: Number, Other - | ConnectorPunctuation -- ^ Pc: Punctuation, Connector - | DashPunctuation -- ^ Pd: Punctuation, Dash - | OpenPunctuation -- ^ Ps: Punctuation, Open - | ClosePunctuation -- ^ Pe: Punctuation, Close - | InitialQuote -- ^ Pi: Punctuation, Initial quote - | FinalQuote -- ^ Pf: Punctuation, Final quote - | OtherPunctuation -- ^ Po: Punctuation, Other - | MathSymbol -- ^ Sm: Symbol, Math - | CurrencySymbol -- ^ Sc: Symbol, Currency - | ModifierSymbol -- ^ Sk: Symbol, Modifier - | OtherSymbol -- ^ So: Symbol, Other - | Space -- ^ Zs: Separator, Space - | LineSeparator -- ^ Zl: Separator, Line - | ParagraphSeparator -- ^ Zp: Separator, Paragraph - | Control -- ^ Cc: Other, Control - | Format -- ^ Cf: Other, Format - | Surrogate -- ^ Cs: Other, Surrogate - | PrivateUse -- ^ Co: Other, Private Use - | NotAssigned -- ^ Cn: Other, Not Assigned - deriving (Eq, Ord, Enum, Read, Show, Bounded, Ix) - --- | The Unicode general category of the character. This relies on the --- 'Enum' instance of 'GeneralCategory', which must remain in the --- same order as the categories are presented in the Unicode --- standard. --- --- ==== __Examples__ --- --- Basic usage: --- --- >>> generalCategory 'a' --- LowercaseLetter --- >>> generalCategory 'A' --- UppercaseLetter --- >>> generalCategory '0' --- DecimalNumber --- >>> generalCategory '%' --- OtherPunctuation --- >>> generalCategory '♥' --- OtherSymbol --- >>> generalCategory '\31' --- Control --- >>> generalCategory ' ' --- Space --- -generalCategory :: Char -> GeneralCategory -generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c - -- derived character classifiers -- | Selects alphabetic Unicode characters (lower-case, upper-case and @@ -360,96 +243,6 @@ isNumber c = case generalCategory c of OtherNumber -> True _ -> False --- | Selects Unicode punctuation characters, including various kinds --- of connectors, brackets and quotes. --- --- This function returns 'True' if its argument has one of the --- following 'GeneralCategory's, or 'False' otherwise: --- --- * 'ConnectorPunctuation' --- * 'DashPunctuation' --- * 'OpenPunctuation' --- * 'ClosePunctuation' --- * 'InitialQuote' --- * 'FinalQuote' --- * 'OtherPunctuation' --- --- These classes are defined in the --- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>, --- part of the Unicode standard. The same document defines what is --- and is not a \"Punctuation\". --- --- ==== __Examples__ --- --- Basic usage: --- --- >>> isPunctuation 'a' --- False --- >>> isPunctuation '7' --- False --- >>> isPunctuation '♥' --- False --- >>> isPunctuation '"' --- True --- >>> isPunctuation '?' --- True --- >>> isPunctuation '—' --- True --- -isPunctuation :: Char -> Bool -isPunctuation c = case generalCategory c of - ConnectorPunctuation -> True - DashPunctuation -> True - OpenPunctuation -> True - ClosePunctuation -> True - InitialQuote -> True - FinalQuote -> True - OtherPunctuation -> True - _ -> False - --- | Selects Unicode symbol characters, including mathematical and --- currency symbols. --- --- This function returns 'True' if its argument has one of the --- following 'GeneralCategory's, or 'False' otherwise: --- --- * 'MathSymbol' --- * 'CurrencySymbol' --- * 'ModifierSymbol' --- * 'OtherSymbol' --- --- These classes are defined in the --- <http://www.unicode.org/reports/tr44/tr44-14.html#GC_Values_Table Unicode Character Database>, --- part of the Unicode standard. The same document defines what is --- and is not a \"Symbol\". --- --- ==== __Examples__ --- --- Basic usage: --- --- >>> isSymbol 'a' --- False --- >>> isSymbol '6' --- False --- >>> isSymbol '=' --- True --- --- The definition of \"math symbol\" may be a little --- counter-intuitive depending on one's background: --- --- >>> isSymbol '+' --- True --- >>> isSymbol '-' --- False --- -isSymbol :: Char -> Bool -isSymbol c = case generalCategory c of - MathSymbol -> True - CurrencySymbol -> True - ModifierSymbol -> True - OtherSymbol -> True - _ -> False - -- | Selects Unicode space and separator characters. -- -- This function returns 'True' if its argument has one of the |