diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/Makefile | 20 | ||||
-rw-r--r-- | compiler/cbits/unicode.c | 5 | ||||
-rw-r--r-- | compiler/parser/Lexer.x | 6 | ||||
-rw-r--r-- | compiler/utils/Unicode.hs | 69 |
4 files changed, 81 insertions, 19 deletions
diff --git a/compiler/Makefile b/compiler/Makefile index 5e503eac14..be304f9a42 100644 --- a/compiler/Makefile +++ b/compiler/Makefile @@ -483,10 +483,13 @@ endif # the interpreter is supported on this platform. ifeq "$(bootstrapped)" "YES" -SRC_HC_OPTS += -package hpc -package bytestring -PKG_DEPENDS += hpc bytestring +SRC_HC_OPTS += -package bytestring +PKG_DEPENDS += bytestring endif +SRC_HC_OPTS += -package hpc +PKG_DEPENDS += hpc + ifeq "$(GhcWithInterpreter) $(bootstrapped)" "YES YES" # Yes, include the interepreter and Template Haskell extensions @@ -756,13 +759,6 @@ primop-usage.hs-incl: prelude/primops.txt #----------------------------------------------------------------------------- # Linking -# Include libghccompat in stage1 only. In stage2 onwards, all these -# libraries will be available from the main libraries. - -ifeq "$(stage)" "1" -include $(GHC_COMPAT_DIR)/compat.mk -endif - ifeq "$(GhcUnregisterised)" "NO" ifeq "$(HOSTPLATFORM)" "ia64-unknown-linux" # needed for generating proper relocation in large binaries: trac #856 @@ -1089,12 +1085,8 @@ ifneq "$(BootingFromHc)" "YES" $(MKDEPENDHS) -M -optdep-f -optdep.depend-BASE $(foreach way,$(WAYS),-optdep-s -optdep$(way)) $(foreach obj,$(MKDEPENDHS_OBJ_SUFFICES),-osuf $(obj)) $(MKDEPENDHS_OPTS) $(filter-out -split-objs, $(MKDEPENDHS_HC_OPTS)) $(HS_SRCS) endif $(MKDEPENDC) -f .depend-BASE $(MKDEPENDC_OPTS) $(foreach way,$(WAYS),-s $(way)) -- $(CC_OPTS) -- $(C_SRCS) - $(PERL) -pe 'binmode(stdin); binmode(stdout); s@(\S*[._]o)@stage$(stage)/$$1@g; s@(\S*[._]hi)@stage$(stage)/$$1@g; s@^.*/compat.*$$@@g;' <.depend-BASE >.depend-$(stage) + $(PERL) -pe 'binmode(stdin); binmode(stdout); s@(\S*[._]o)@stage$(stage)/$$1@g; s@(\S*[._]hi)@stage$(stage)/$$1@g;' <.depend-BASE >.depend-$(stage) # The binmode stuff tells perl not to add stupid ^M's to the output -# -# The /lib/compat replacement is to workaround a bug in the -# -optdep--exclude-module flag in GHC 6.4. It is not required for any -# other version of GHC, but doesn't do any harm. ifeq "$(MakefileDeps)" "YES" $(CONFIG_HS) : Makefile diff --git a/compiler/cbits/unicode.c b/compiler/cbits/unicode.c new file mode 100644 index 0000000000..c239e21c77 --- /dev/null +++ b/compiler/cbits/unicode.c @@ -0,0 +1,5 @@ +#if __GLASGOW_HASKELL__ < 605 +#if __GLASGOW_HASKELL__ != 604 || __GHC_PATCHLEVEL__ == 0 +#include "WCsubst.c" +#endif +#endif diff --git a/compiler/parser/Lexer.x b/compiler/parser/Lexer.x index b65668e2dd..2b86fd7331 100644 --- a/compiler/parser/Lexer.x +++ b/compiler/parser/Lexer.x @@ -60,11 +60,7 @@ import Data.Char ( chr, ord, isSpace ) import Data.Ratio import Debug.Trace -#if __GLASGOW_HASKELL__ >= 605 -import Data.Char ( GeneralCategory(..), generalCategory, isPrint, isUpper ) -#else -import Compat.Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) -#endif +import Unicode ( GeneralCategory(..), generalCategory, isPrint, isUpper ) } $unispace = \x05 -- Trick Alex into handling Unicode. See alexGetChar. diff --git a/compiler/utils/Unicode.hs b/compiler/utils/Unicode.hs new file mode 100644 index 0000000000..c12dc4600f --- /dev/null +++ b/compiler/utils/Unicode.hs @@ -0,0 +1,69 @@ +{-# OPTIONS -cpp #-} +module Unicode ( + GeneralCategory(..), generalCategory, isPrint, isUpper + ) where + +#if __GLASGOW_HASKELL__ > 604 + +import Data.Char (GeneralCategory(..), generalCategory,isPrint,isUpper) + +#else + +import Foreign.C ( CInt ) +import Data.Char ( ord ) + +-- | Unicode General Categories (column 2 of the UnicodeData table) +-- in the order they are listed in the Unicode standard. + +data GeneralCategory + = UppercaseLetter -- Lu Letter, Uppercase + | LowercaseLetter -- Ll Letter, Lowercase + | TitlecaseLetter -- Lt Letter, Titlecase + | ModifierLetter -- Lm Letter, Modifier + | OtherLetter -- Lo Letter, Other + | NonSpacingMark -- Mn Mark, Non-Spacing + | SpacingCombiningMark -- Mc Mark, Spacing Combining + | EnclosingMark -- Me Mark, Enclosing + | DecimalNumber -- Nd Number, Decimal + | LetterNumber -- Nl Number, Letter + | OtherNumber -- No Number, Other + | ConnectorPunctuation -- Pc Punctuation, Connector + | DashPunctuation -- Pd Punctuation, Dash + | OpenPunctuation -- Ps Punctuation, Open + | ClosePunctuation -- Pe Punctuation, Close + | InitialQuote -- Pi Punctuation, Initial quote + | FinalQuote -- Pf Punctuation, Final quote + | OtherPunctuation -- Po Punctuation, Other + | MathSymbol -- Sm Symbol, Math + | CurrencySymbol -- Sc Symbol, Currency + | ModifierSymbol -- Sk Symbol, Modifier + | OtherSymbol -- So Symbol, Other + | Space -- Zs Separator, Space + | LineSeparator -- Zl Separator, Line + | ParagraphSeparator -- Zp Separator, Paragraph + | Control -- Cc Other, Control + | Format -- Cf Other, Format + | Surrogate -- Cs Other, Surrogate + | PrivateUse -- Co Other, Private Use + | NotAssigned -- Cn Other, Not Assigned + deriving (Eq, Ord, Enum, Read, Show, Bounded) + +-- | Retrieves the general Unicode category of the character. +generalCategory :: Char -> GeneralCategory +generalCategory c = toEnum $ fromIntegral $ wgencat $ fromIntegral $ ord c + +foreign import ccall unsafe "u_gencat" + wgencat :: CInt -> CInt + +isPrint :: Char -> Bool +isPrint c = iswprint (fromIntegral (ord c)) /= 0 + +isUpper :: Char -> Bool +isUpper c = iswupper (fromIntegral (ord c)) /= 0 + +foreign import ccall unsafe "u_iswprint" + iswprint :: CInt -> CInt + +foreign import ccall unsafe "u_iswupper" + iswupper :: CInt -> CInt +#endif |