summaryrefslogtreecommitdiff
path: root/ucp.h
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-08-25 11:36:15 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2012-08-25 11:36:15 +0000
commit39f01fd0ae3cc442ec0114e31b6a256172e8288c (patch)
treea1a59625f17f928e67d56ba2c747fab4c4e62cd7 /ucp.h
parent1e22d8f74de1ebf5bea6c8b07a3b79f457fcf419 (diff)
downloadpcre-39f01fd0ae3cc442ec0114e31b6a256172e8288c.tar.gz
Upgrade \X to match an extended grapheme cluster
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1011 2f5784b3-3f2a-0410-8824-cb99058d5e15
Diffstat (limited to 'ucp.h')
-rw-r--r--ucp.h28
1 files changed, 26 insertions, 2 deletions
diff --git a/ucp.h b/ucp.h
index 59c3bec..8c3afde 100644
--- a/ucp.h
+++ b/ucp.h
@@ -7,7 +7,11 @@
/* This file contains definitions of the property values that are returned by
the UCD access macros. New values that are added for new releases of Unicode
-should always be at the end of each enum, for backwards compatibility. */
+should always be at the end of each enum, for backwards compatibility.
+
+IMPORTANT: Note also that the specific numeric values of the enums have to be
+the same as the values that are generated by the maint/MultiStage2.py script,
+where the equivalent property descriptive names are listed in vectors. */
/* These are the general character categories. */
@@ -21,7 +25,7 @@ enum {
ucp_Z /* Separator */
};
-/* These are the particular character types. */
+/* These are the particular character categories. */
enum {
ucp_Cc, /* Control */
@@ -56,6 +60,26 @@ enum {
ucp_Zs /* Space separator */
};
+/* These are grapheme break properties. Note that the code for processing them
+assumes that the values are less than 16. If more values are added that take
+the number to 16 or more, the code will have to be rewritten. */
+
+enum {
+ ucp_gbCR, /* 0 */
+ ucp_gbLF, /* 1 */
+ ucp_gbControl, /* 2 */
+ ucp_gbExtend, /* 3 */
+ ucp_gbPrepend, /* 4 */
+ ucp_gbSpacingMark, /* 5 */
+ ucp_gbL, /* 6 Hangul syllable type L */
+ ucp_gbV, /* 7 Hangul syllable type V */
+ ucp_gbT, /* 8 Hangul syllable type T */
+ ucp_gbLV, /* 9 Hangul syllable type LV */
+ ucp_gbLVT, /* 10 Hangul syllable type LVT */
+ ucp_gbOther, /* 11 */
+ ucp_gbCount /* 12 The number of properties */
+};
+
/* These are the script identifications. */
enum {