diff options
author | Karl Williamson <khw@cpan.org> | 2016-01-15 22:20:30 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2016-01-19 15:08:59 -0700 |
commit | 6dc808648618929bd0388e2c65377210e6044fd8 (patch) | |
tree | 1c5a1f5f6549d62d212137ad03fd8449db900658 /charclass_invlists.h | |
parent | 3c84a230ed64f5ef68617ef355b5b35bf93a3951 (diff) | |
download | perl-6dc808648618929bd0388e2c65377210e6044fd8.tar.gz |
regen/mk_invlists.pl: Keep internal enum values last
Most Unicode properties have a finite set of possible values. Most, for
example, are binary, they can be either true or false, but nothing in
between. Others have more possibilities (and still others, like Name,
are not restricted at all. The Word Break property, for example can
take on a restricted set of values, currently 19 in all, that indicate
what type, for purposes of word breaking, the character is.
In implementing things like Word Break, Perl adds some internal-only
values, like EDGE, which means matching like /^/ or /$/. By using
these synthetic values, we don't need to have extra code for edge
cases.
These properties are implemented using C enums. Prior to this commit,
the actual numeric values for each enum was mostly arbitrary, with the
synthetic ones intermixed with the offical ones. This commit changes
that so the synthetic ones are all higher numbers than any official ones,
and the order they appear in the generating code will be the numerical
order they have, so that the program has control of their order.
Diffstat (limited to 'charclass_invlists.h')
-rw-r--r-- | charclass_invlists.h | 236 |
1 files changed, 118 insertions, 118 deletions
diff --git a/charclass_invlists.h b/charclass_invlists.h index edcba31f11..03d949c6f5 100644 --- a/charclass_invlists.h +++ b/charclass_invlists.h @@ -2211,17 +2211,17 @@ typedef enum { GCB_Other = 0, GCB_CR = 1, GCB_Control = 2, - GCB_EDGE = 3, - GCB_Extend = 4, - GCB_L = 5, - GCB_LF = 6, - GCB_LV = 7, - GCB_LVT = 8, - GCB_Prepend = 9, - GCB_Regional_Indicator = 10, - GCB_SpacingMark = 11, - GCB_T = 12, - GCB_V = 13 + GCB_Extend = 3, + GCB_L = 4, + GCB_LF = 5, + GCB_LV = 6, + GCB_LVT = 7, + GCB_Prepend = 8, + GCB_Regional_Indicator = 9, + GCB_SpacingMark = 10, + GCB_T = 11, + GCB_V = 12, + GCB_EDGE = 13 } GCB_enum; static const GCB_enum _Perl_GCB_invmap[] = { /* for ASCII/Latin1 */ @@ -9111,18 +9111,18 @@ typedef enum { SB_ATerm = 1, SB_CR = 2, SB_Close = 3, - SB_EDGE = 4, - SB_Extend = 5, - SB_Format = 6, - SB_LF = 7, - SB_Lower = 8, - SB_Numeric = 9, - SB_OLetter = 10, - SB_SContinue = 11, - SB_STerm = 12, - SB_Sep = 13, - SB_Sp = 14, - SB_Upper = 15 + SB_Extend = 4, + SB_Format = 5, + SB_LF = 6, + SB_Lower = 7, + SB_Numeric = 8, + SB_OLetter = 9, + SB_SContinue = 10, + SB_STerm = 11, + SB_Sep = 12, + SB_Sp = 13, + SB_Upper = 14, + SB_EDGE = 15 } SB_enum; static const SB_enum _Perl_SB_invmap[] = { /* for ASCII/Latin1 */ @@ -13581,22 +13581,22 @@ typedef enum { WB_ALetter = 1, WB_CR = 2, WB_Double_Quote = 3, - WB_EDGE = 4, - WB_Extend = 5, - WB_ExtendNumLet = 6, - WB_Format = 7, - WB_Hebrew_Letter = 8, - WB_Katakana = 9, - WB_LF = 10, - WB_MidLetter = 11, - WB_MidNum = 12, - WB_MidNumLet = 13, - WB_Newline = 14, - WB_Numeric = 15, - WB_Perl_Tailored_HSpace = 16, - WB_Regional_Indicator = 17, - WB_Single_Quote = 18, - WB_UNKNOWN = 19 + WB_Extend = 4, + WB_ExtendNumLet = 5, + WB_Format = 6, + WB_Hebrew_Letter = 7, + WB_Katakana = 8, + WB_LF = 9, + WB_MidLetter = 10, + WB_MidNum = 11, + WB_MidNumLet = 12, + WB_Newline = 13, + WB_Numeric = 14, + WB_Perl_Tailored_HSpace = 15, + WB_Regional_Indicator = 16, + WB_Single_Quote = 17, + WB_UNKNOWN = 18, + WB_EDGE = 19 } WB_enum; static const WB_enum _Perl_WB_invmap[] = { /* for ASCII/Latin1 */ @@ -26905,17 +26905,17 @@ typedef enum { GCB_Other = 0, GCB_CR = 1, GCB_Control = 2, - GCB_EDGE = 3, - GCB_Extend = 4, - GCB_L = 5, - GCB_LF = 6, - GCB_LV = 7, - GCB_LVT = 8, - GCB_Prepend = 9, - GCB_Regional_Indicator = 10, - GCB_SpacingMark = 11, - GCB_T = 12, - GCB_V = 13 + GCB_Extend = 3, + GCB_L = 4, + GCB_LF = 5, + GCB_LV = 6, + GCB_LVT = 7, + GCB_Prepend = 8, + GCB_Regional_Indicator = 9, + GCB_SpacingMark = 10, + GCB_T = 11, + GCB_V = 12, + GCB_EDGE = 13 } GCB_enum; static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 1047 */ @@ -33879,18 +33879,18 @@ typedef enum { SB_ATerm = 1, SB_CR = 2, SB_Close = 3, - SB_EDGE = 4, - SB_Extend = 5, - SB_Format = 6, - SB_LF = 7, - SB_Lower = 8, - SB_Numeric = 9, - SB_OLetter = 10, - SB_SContinue = 11, - SB_STerm = 12, - SB_Sep = 13, - SB_Sp = 14, - SB_Upper = 15 + SB_Extend = 4, + SB_Format = 5, + SB_LF = 6, + SB_Lower = 7, + SB_Numeric = 8, + SB_OLetter = 9, + SB_SContinue = 10, + SB_STerm = 11, + SB_Sep = 12, + SB_Sp = 13, + SB_Upper = 14, + SB_EDGE = 15 } SB_enum; static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 1047 */ @@ -38396,22 +38396,22 @@ typedef enum { WB_ALetter = 1, WB_CR = 2, WB_Double_Quote = 3, - WB_EDGE = 4, - WB_Extend = 5, - WB_ExtendNumLet = 6, - WB_Format = 7, - WB_Hebrew_Letter = 8, - WB_Katakana = 9, - WB_LF = 10, - WB_MidLetter = 11, - WB_MidNum = 12, - WB_MidNumLet = 13, - WB_Newline = 14, - WB_Numeric = 15, - WB_Perl_Tailored_HSpace = 16, - WB_Regional_Indicator = 17, - WB_Single_Quote = 18, - WB_UNKNOWN = 19 + WB_Extend = 4, + WB_ExtendNumLet = 5, + WB_Format = 6, + WB_Hebrew_Letter = 7, + WB_Katakana = 8, + WB_LF = 9, + WB_MidLetter = 10, + WB_MidNum = 11, + WB_MidNumLet = 12, + WB_Newline = 13, + WB_Numeric = 14, + WB_Perl_Tailored_HSpace = 15, + WB_Regional_Indicator = 16, + WB_Single_Quote = 17, + WB_UNKNOWN = 18, + WB_EDGE = 19 } WB_enum; static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC 1047 */ @@ -51901,17 +51901,17 @@ typedef enum { GCB_Other = 0, GCB_CR = 1, GCB_Control = 2, - GCB_EDGE = 3, - GCB_Extend = 4, - GCB_L = 5, - GCB_LF = 6, - GCB_LV = 7, - GCB_LVT = 8, - GCB_Prepend = 9, - GCB_Regional_Indicator = 10, - GCB_SpacingMark = 11, - GCB_T = 12, - GCB_V = 13 + GCB_Extend = 3, + GCB_L = 4, + GCB_LF = 5, + GCB_LV = 6, + GCB_LVT = 7, + GCB_Prepend = 8, + GCB_Regional_Indicator = 9, + GCB_SpacingMark = 10, + GCB_T = 11, + GCB_V = 12, + GCB_EDGE = 13 } GCB_enum; static const GCB_enum _Perl_GCB_invmap[] = { /* for EBCDIC 037 */ @@ -58863,18 +58863,18 @@ typedef enum { SB_ATerm = 1, SB_CR = 2, SB_Close = 3, - SB_EDGE = 4, - SB_Extend = 5, - SB_Format = 6, - SB_LF = 7, - SB_Lower = 8, - SB_Numeric = 9, - SB_OLetter = 10, - SB_SContinue = 11, - SB_STerm = 12, - SB_Sep = 13, - SB_Sp = 14, - SB_Upper = 15 + SB_Extend = 4, + SB_Format = 5, + SB_LF = 6, + SB_Lower = 7, + SB_Numeric = 8, + SB_OLetter = 9, + SB_SContinue = 10, + SB_STerm = 11, + SB_Sep = 12, + SB_Sp = 13, + SB_Upper = 14, + SB_EDGE = 15 } SB_enum; static const SB_enum _Perl_SB_invmap[] = { /* for EBCDIC 037 */ @@ -63372,22 +63372,22 @@ typedef enum { WB_ALetter = 1, WB_CR = 2, WB_Double_Quote = 3, - WB_EDGE = 4, - WB_Extend = 5, - WB_ExtendNumLet = 6, - WB_Format = 7, - WB_Hebrew_Letter = 8, - WB_Katakana = 9, - WB_LF = 10, - WB_MidLetter = 11, - WB_MidNum = 12, - WB_MidNumLet = 13, - WB_Newline = 14, - WB_Numeric = 15, - WB_Perl_Tailored_HSpace = 16, - WB_Regional_Indicator = 17, - WB_Single_Quote = 18, - WB_UNKNOWN = 19 + WB_Extend = 4, + WB_ExtendNumLet = 5, + WB_Format = 6, + WB_Hebrew_Letter = 7, + WB_Katakana = 8, + WB_LF = 9, + WB_MidLetter = 10, + WB_MidNum = 11, + WB_MidNumLet = 12, + WB_Newline = 13, + WB_Numeric = 14, + WB_Perl_Tailored_HSpace = 15, + WB_Regional_Indicator = 16, + WB_Single_Quote = 17, + WB_UNKNOWN = 18, + WB_EDGE = 19 } WB_enum; static const WB_enum _Perl_WB_invmap[] = { /* for EBCDIC 037 */ @@ -74618,5 +74618,5 @@ static const UV XPosixXDigit_invlist[] = { /* for EBCDIC 037 */ * cd8623059fc882357fcf6fdea363d1c927af6ad1506e5aea5d383070344183f6 lib/unicore/mktables * 462c9aaa608fb2014cd9649af1c5c009485c60b9c8b15b89401fdc10cf6161c6 lib/unicore/version * 913d2f93f3cb6cdf1664db888bf840bc4eb074eef824e082fceda24a9445e60c regen/charset_translations.pl - * 214ab3909a11fcc57cb6ee0611897342109b5a895b2b42d5227b80d948744a0a regen/mk_invlists.pl + * fc9308a68257fc2184997cf2a25fc143c780c62accc2377d35381a1be5306dc0 regen/mk_invlists.pl * ex: set ro: */ |