diff options
author | Ben Gamari <ben@smart-cactus.org> | 2020-02-06 18:23:30 -0500 |
---|---|---|
committer | Marge Bot <ben+marge-bot@smart-cactus.org> | 2020-02-12 17:22:37 -0500 |
commit | f5ffd8d9ec776db708e690c4fdbf671afa8df48f (patch) | |
tree | 7092307f960d4025da0d9cf0094bf5179fb8aea2 /libraries | |
parent | 059c3c9d7c84fc37c69e9f414ff736d47081e72c (diff) | |
download | haskell-f5ffd8d9ec776db708e690c4fdbf671afa8df48f.tar.gz |
base: Expose GHC.Unicode.unicodeVersion
This exposes a Data.Version.Version representing the version of the
Unicode database used by `base`. This should clear up some confusion I
have seen in tickets regarding with which Unicode versions a given GHC
can be expected to work.
While in town I also regenerated (but did not update) the Unicode
database with database 12.0.0. Strangely, the file cited in the README
no longer existed. Consequently, I used
https://www.unicode.org/Public/12.0.0/ucd/UnicodeData.txt and was
slightly surprised to find that there were a few changes.
Diffstat (limited to 'libraries')
-rw-r--r-- | libraries/base/Data/Version.hs-boot | 12 | ||||
-rw-r--r-- | libraries/base/GHC/Unicode.hs | 7 | ||||
-rw-r--r-- | libraries/base/cbits/README.Unicode | 8 | ||||
-rw-r--r-- | libraries/base/cbits/WCsubst.c | 23 | ||||
-rwxr-xr-x[-rw-r--r--] | libraries/base/cbits/ubconfc | 20 | ||||
-rw-r--r-- | libraries/base/include/UnicodeVersion.h | 7 |
6 files changed, 65 insertions, 12 deletions
diff --git a/libraries/base/Data/Version.hs-boot b/libraries/base/Data/Version.hs-boot new file mode 100644 index 0000000000..63726cf6af --- /dev/null +++ b/libraries/base/Data/Version.hs-boot @@ -0,0 +1,12 @@ +{-# LANGUAGE NoImplicitPrelude #-} + +module Data.Version + ( Version + , makeVersion + ) where + +import GHC.Base + +data Version + +makeVersion :: [Int] -> Version diff --git a/libraries/base/GHC/Unicode.hs b/libraries/base/GHC/Unicode.hs index 9d11b37d0c..6fba91f0e2 100644 --- a/libraries/base/GHC/Unicode.hs +++ b/libraries/base/GHC/Unicode.hs @@ -19,6 +19,7 @@ ----------------------------------------------------------------------------- module GHC.Unicode ( + unicodeVersion, GeneralCategory (..), generalCategory, isAscii, isLatin1, isControl, isAsciiUpper, isAsciiLower, @@ -36,12 +37,18 @@ import GHC.Real import GHC.Enum ( Enum (..), Bounded (..) ) import GHC.Ix ( Ix (..) ) import GHC.Num +import {-# SOURCE #-} Data.Version -- Data.Char.chr already imports this and we need to define a Show instance -- for GeneralCategory import GHC.Show ( Show ) #include "HsBaseConfig.h" +#include "UnicodeVersion.h" + +-- | Version of Unicode standard used by @base@. +unicodeVersion :: Version +unicodeVersion = makeVersion UNICODE_VERSION_NUMS -- | Unicode General Categories (column 2 of the UnicodeData table) in -- the order they are listed in the Unicode standard (the Unicode diff --git a/libraries/base/cbits/README.Unicode b/libraries/base/cbits/README.Unicode index 6cc18464cd..1eef278c96 100644 --- a/libraries/base/cbits/README.Unicode +++ b/libraries/base/cbits/README.Unicode @@ -1,8 +1,12 @@ +Generating GHC's Unicode table +============================== WCsubst.c is generated with: - sh ubconfc < UnicodeData.txt > WCsubst.c + sh ubconfc 12.0.0 < UnicodeData.txt > WCsubst.c where UnicodeData.txt came from - https://www.unicode.org/Public/12.0.0/ucd/UnicodeData-12.0.0d4.txt + https://www.unicode.org/Public/12.0.0/ucd/UnicodeData.txt + +Don't forget to mention the update in the User's Guide. diff --git a/libraries/base/cbits/WCsubst.c b/libraries/base/cbits/WCsubst.c index 9940405a8e..aa58dc244c 100644 --- a/libraries/base/cbits/WCsubst.c +++ b/libraries/base/cbits/WCsubst.c @@ -1,6 +1,6 @@ /*------------------------------------------------------------------------- This is an automatically generated file: do not edit -Generated by ubconfc at Tue Aug 14 10:04:18 UTC 2018 +Generated by ubconfc at Mon Feb 10 11:42:08 EST 2020 @generated -------------------------------------------------------------------------*/ @@ -90,7 +90,7 @@ struct _charblock_ #define GENCAT_MN 2097152 #define GENCAT_LO 16384 #define MAX_UNI_CHAR 1114109 -#define NUM_BLOCKS 3349 +#define NUM_BLOCKS 3352 #define NUM_CONVBLOCKS 1326 #define NUM_SPACEBLOCKS 7 #define NUM_LAT1BLOCKS 63 @@ -1485,7 +1485,8 @@ static const struct _charblock_ allchars[]={ {5112, 6, &rule110}, {5120, 1, &rule7}, {5121, 620, &rule14}, - {5741, 2, &rule2}, + {5741, 1, &rule13}, + {5742, 1, &rule2}, {5743, 17, &rule14}, {5760, 1, &rule1}, {5761, 26, &rule14}, @@ -2799,8 +2800,8 @@ static const struct _charblock_ allchars[]={ {43444, 2, &rule124}, {43446, 4, &rule92}, {43450, 2, &rule124}, - {43452, 1, &rule92}, - {43453, 4, &rule124}, + {43452, 2, &rule92}, + {43454, 3, &rule124}, {43457, 13, &rule2}, {43471, 1, &rule91}, {43472, 10, &rule8}, @@ -3302,14 +3303,14 @@ static const struct _charblock_ allchars[]={ {71935, 1, &rule14}, {72096, 8, &rule14}, {72106, 39, &rule14}, - {72145, 1, &rule124}, - {72146, 1, &rule92}, - {72147, 1, &rule124}, + {72145, 3, &rule124}, {72148, 4, &rule92}, {72154, 2, &rule92}, {72156, 4, &rule124}, {72160, 1, &rule92}, - {72161, 3, &rule14}, + {72161, 1, &rule14}, + {72162, 1, &rule2}, + {72163, 1, &rule14}, {72164, 1, &rule124}, {72192, 1, &rule14}, {72193, 10, &rule92}, @@ -3545,7 +3546,8 @@ static const struct _charblock_ allchars[]={ {123184, 7, &rule92}, {123191, 7, &rule91}, {123200, 10, &rule8}, - {123214, 2, &rule14}, + {123214, 1, &rule14}, + {123215, 1, &rule13}, {123584, 44, &rule14}, {123628, 4, &rule92}, {123632, 10, &rule8}, @@ -3556,6 +3558,7 @@ static const struct _charblock_ allchars[]={ {125184, 34, &rule203}, {125218, 34, &rule204}, {125252, 7, &rule92}, + {125259, 1, &rule91}, {125264, 10, &rule8}, {125278, 2, &rule2}, {126065, 59, &rule17}, diff --git a/libraries/base/cbits/ubconfc b/libraries/base/cbits/ubconfc index 4d325866bb..cd29641c58 100644..100755 --- a/libraries/base/cbits/ubconfc +++ b/libraries/base/cbits/ubconfc @@ -17,6 +17,26 @@ # Output the file header +VERSION="$1" +if [ -z "$VERSION" ]; then + echo "Usage: $0 [unicode version]" + exit 1 +fi + +# This file is #included from GHC.Unicode and is used to define +# GHC.Unicode.unicodeVersion. +cat > $(dirname $0)/../include/UnicodeVersion.h <<EOF +#if 0 +This is an automatically generated file: do not edit +Generated by `basename $0` at `date` +@generated +#endif + +#define UNICODE_VERSION_NUMS [$(echo $VERSION | sed 's/\./,/g')] +EOF + +exec > $(dirname $0)/WCsubst.c + echo "/*-------------------------------------------------------------------------" echo "This is an automatically generated file: do not edit" echo "Generated by `basename $0` at `date`" diff --git a/libraries/base/include/UnicodeVersion.h b/libraries/base/include/UnicodeVersion.h new file mode 100644 index 0000000000..14852682ac --- /dev/null +++ b/libraries/base/include/UnicodeVersion.h @@ -0,0 +1,7 @@ +#if 0 +This is an automatically generated file: do not edit +Generated by ubconfc at Mon Feb 10 11:42:08 EST 2020 +@generated +#endif + +#define UNICODE_VERSION_NUMS [12,0,0] |