diff options
210 files changed, 2323 insertions, 669 deletions
@@ -1493,7 +1493,9 @@ lib/unicore/In/YiRadica.pl Unicode character database lib/unicore/In/YiSyllab.pl Unicode character database lib/unicore/Index.txt Unicode character database lib/unicore/Is.pl Unicode character database -lib/unicore/Is/2.pl Unicode character database +lib/unicore/Is/_CanonDC.pl Unicode character database +lib/unicore/Is/_CaseIgn.pl Unicode character database +lib/unicore/Is/_CombAbo.pl Unicode character database lib/unicore/Is/Alnum.pl Unicode character database lib/unicore/Is/Alpha.pl Unicode character database lib/unicore/Is/Alphabet.pl Unicode character database @@ -1501,7 +1503,7 @@ lib/unicore/Is/Any.pl Unicode character database lib/unicore/Is/Arabic.pl Unicode character database lib/unicore/Is/Armenian.pl Unicode character database lib/unicore/Is/ASCII.pl Unicode character database -lib/unicore/Is/ASCIIHex.pl Unicode character database +lib/unicore/Is/AsciiHex.pl Unicode character database lib/unicore/Is/Assigned.pl Unicode character database lib/unicore/Is/Bengali.pl Unicode character database lib/unicore/Is/BidiAL.pl Unicode character database @@ -1529,8 +1531,6 @@ lib/unicore/Is/Bopomofo.pl Unicode character database lib/unicore/Is/C.pl Unicode character database lib/unicore/Is/Canadian.pl Unicode character database lib/unicore/Is/Canon.pl Unicode character database -lib/unicore/Is/CanonDCI.pl Unicode character database -lib/unicore/Is/CaseIgno.pl Unicode character database lib/unicore/Is/Cc.pl Unicode character database lib/unicore/Is/Cf.pl Unicode character database lib/unicore/Is/Cherokee.pl Unicode character database @@ -1538,7 +1538,6 @@ lib/unicore/Is/Cn.pl Unicode character database lib/unicore/Is/Cntrl.pl Unicode character database lib/unicore/Is/Co.pl Unicode character database lib/unicore/Is/Common.pl Unicode character database -lib/unicore/Is/CombAbov.pl Unicode character database lib/unicore/Is/Compat.pl Unicode character database lib/unicore/Is/Cs.pl Unicode character database lib/unicore/Is/Cyrillic.pl Unicode character database @@ -1577,15 +1576,16 @@ lib/unicore/Is/Hebrew.pl Unicode character database lib/unicore/Is/HexDigit.pl Unicode character database lib/unicore/Is/Hiragana.pl Unicode character database lib/unicore/Is/Hyphen.pl Unicode character database -lib/unicore/Is/IDContin.pl Unicode character database +lib/unicore/Is/IdContin.pl Unicode character database lib/unicore/Is/Ideograp.pl Unicode character database -lib/unicore/Is/IDStart.pl Unicode character database +lib/unicore/Is/IdStart.pl Unicode character database lib/unicore/Is/Inherite.pl Unicode character database lib/unicore/Is/JoinCont.pl Unicode character database lib/unicore/Is/Kannada.pl Unicode character database lib/unicore/Is/Katakana.pl Unicode character database lib/unicore/Is/Khmer.pl Unicode character database lib/unicore/Is/L.pl Unicode character database +lib/unicore/Is/L_.pl Unicode character database lib/unicore/Is/Lao.pl Unicode character database lib/unicore/Is/Latin.pl Unicode character database lib/unicore/Is/LbrkAI.pl Unicode character database @@ -1693,6 +1693,7 @@ lib/unicore/Name.pl Unicode character database lib/unicore/NamesList.html Unicode character database lib/unicore/NamesList.txt Unicode character database lib/unicore/Number.pl Unicode character database +lib/unicore/Properties Built-in \p{...} / \P{...} property list lib/unicore/PropList.html Unicode character database lib/unicore/PropList.txt Unicode character database lib/unicore/README.perl Unicode character database diff --git a/lib/unicore/ArabLink.pl b/lib/unicore/ArabLink.pl index 9e65a9715e..1be0855380 100644 --- a/lib/unicore/ArabLink.pl +++ b/lib/unicore/ArabLink.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/ArabLnkGrp.pl b/lib/unicore/ArabLnkGrp.pl index 104c30e2bd..a183712d3a 100644 --- a/lib/unicore/ArabLnkGrp.pl +++ b/lib/unicore/ArabLnkGrp.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Bidirectional.pl b/lib/unicore/Bidirectional.pl index d9e1f58f60..a158c17da1 100644 --- a/lib/unicore/Bidirectional.pl +++ b/lib/unicore/Bidirectional.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Blocks.pl b/lib/unicore/Blocks.pl index 751c9cecb8..9349041263 100644 --- a/lib/unicore/Blocks.pl +++ b/lib/unicore/Blocks.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Category.pl b/lib/unicore/Category.pl index c56d9ad46e..bb2272e537 100644 --- a/lib/unicore/Category.pl +++ b/lib/unicore/Category.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/CombiningClass.pl b/lib/unicore/CombiningClass.pl index 07c1f602b6..86afd9d54a 100644 --- a/lib/unicore/CombiningClass.pl +++ b/lib/unicore/CombiningClass.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Decomposition.pl b/lib/unicore/Decomposition.pl index 021066a920..ed2bd4bc0b 100644 --- a/lib/unicore/Decomposition.pl +++ b/lib/unicore/Decomposition.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/In.pl b/lib/unicore/In.pl index aa04c0ac61..00f16e1855 100644 --- a/lib/unicore/In.pl +++ b/lib/unicore/In.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! ## @@ -16,11 +16,11 @@ 'Alphabetic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms' => 'Alphabet', }, 'ar' => { + 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'ArabicP2', 'Armenian' => 'Armenian', 'Arabic' => 'Arabic', 'Arrows' => 'Arrows', - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?A' => 'ArabicPr', - 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'ArabicP2', + 'Arabic(?:[-_]|\s+)?Presentation(?:[-_]|\s+)?Forms(?:[-_]|\s+)?B' => 'ArabicPr', }, 'ba' => { 'Basic(?:[-_]|\s+)?Latin' => 'BasicLat', @@ -32,9 +32,9 @@ 'Block(?:[-_]|\s+)?Elements' => 'BlockEle', }, 'bo' => { - 'Bopomofo' => 'Bopomof2', - 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomofo', + 'Bopomofo(?:[-_]|\s+)?Extended' => 'Bopomof2', 'Box(?:[-_]|\s+)?Drawing' => 'BoxDrawi', + 'Bopomofo' => 'Bopomofo', }, 'br' => { 'Braille(?:[-_]|\s+)?Patterns' => 'BrailleP', @@ -46,21 +46,21 @@ 'Cherokee' => 'Cherokee', }, 'cj' => { + 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CjkUnif2', 'Cjk(?:[-_]|\s+)?Radicals(?:[-_]|\s+)?Supplement' => 'CjkRadic', + 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CjkComp3', + 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CjkComp4', + 'Cjk(?:[-_]|\s+)?Compatibility' => 'CjkCompa', + 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CjkUnifi', 'Cjk(?:[-_]|\s+)?Symbols(?:[-_]|\s+)?And(?:[-_]|\s+)?Punctuation' => 'CjkSymbo', - 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?A' => 'CjkUnif2', + 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CjkComp2', 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Extension(?:[-_]|\s+)?B' => 'CjkUnif3', - 'Cjk(?:[-_]|\s+)?Unified(?:[-_]|\s+)?Ideographs' => 'CjkUnifi', - 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs' => 'CjkComp2', - 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Ideographs(?:[-_]|\s+)?Supplement' => 'CjkComp4', - 'Cjk(?:[-_]|\s+)?Compatibility' => 'CjkComp3', - 'Cjk(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Forms' => 'CjkCompa', }, 'co' => { - 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?For(?:[-_]|\s+)?Symbols' => 'Combini2', - 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combini3', - 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combinin', + 'Combining(?:[-_]|\s+)?Diacritical(?:[-_]|\s+)?Marks' => 'Combini2', + 'Combining(?:[-_]|\s+)?Half(?:[-_]|\s+)?Marks' => 'Combinin', 'Control(?:[-_]|\s+)?Pictures' => 'ControlP', + 'Combining(?:[-_]|\s+)?Marks(?:[-_]|\s+)?For(?:[-_]|\s+)?Symbols' => 'Combini3', }, 'cu' => { 'Currency(?:[-_]|\s+)?Symbols' => 'Currency', @@ -69,15 +69,15 @@ 'Cyrillic' => 'Cyrillic', }, 'de' => { - 'Devanagari' => 'Devanaga', 'Deseret' => 'Deseret', + 'Devanagari' => 'Devanaga', }, 'di' => { 'Dingbats' => 'Dingbats', }, 'en' => { - 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclose2', - 'Enclosed(?:[-_]|\s+)?Cjk(?:[-_]|\s+)?Letters(?:[-_]|\s+)?And(?:[-_]|\s+)?Months' => 'Enclosed', + 'Enclosed(?:[-_]|\s+)?Alphanumerics' => 'Enclosed', + 'Enclosed(?:[-_]|\s+)?Cjk(?:[-_]|\s+)?Letters(?:[-_]|\s+)?And(?:[-_]|\s+)?Months' => 'Enclose2', }, 'et' => { 'Ethiopic' => 'Ethiopic', @@ -95,12 +95,12 @@ 'Greek' => 'Greek', }, 'gu' => { - 'Gurmukhi' => 'Gurmukhi', 'Gujarati' => 'Gujarati', + 'Gurmukhi' => 'Gurmukhi', }, 'ha' => { - 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'HangulCo', 'Hangul(?:[-_]|\s+)?Syllables' => 'HangulSy', + 'Hangul(?:[-_]|\s+)?Compatibility(?:[-_]|\s+)?Jamo' => 'HangulCo', 'Halfwidth(?:[-_]|\s+)?And(?:[-_]|\s+)?Fullwidth(?:[-_]|\s+)?Forms' => 'Halfwidt', 'Hangul(?:[-_]|\s+)?Jamo' => 'HangulJa', }, @@ -108,9 +108,9 @@ 'Hebrew' => 'Hebrew', }, 'hi' => { - 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'HighPriv', - 'Hiragana' => 'Hiragana', 'High(?:[-_]|\s+)?Surrogates' => 'HighSurr', + 'Hiragana' => 'Hiragana', + 'High(?:[-_]|\s+)?Private(?:[-_]|\s+)?Use(?:[-_]|\s+)?Surrogates' => 'HighPriv', }, 'id' => { 'Ideographic(?:[-_]|\s+)?Description(?:[-_]|\s+)?Characters' => 'Ideograp', @@ -119,20 +119,20 @@ 'Ipa(?:[-_]|\s+)?Extensions' => 'IpaExten', }, 'ka' => { - 'Katakana' => 'Katakana', - 'Kangxi(?:[-_]|\s+)?Radicals' => 'KangxiRa', 'Kannada' => 'Kannada', 'Kanbun' => 'Kanbun', + 'Kangxi(?:[-_]|\s+)?Radicals' => 'KangxiRa', + 'Katakana' => 'Katakana', }, 'kh' => { 'Khmer' => 'Khmer', }, 'la' => { - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'LatinExt', - 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin1Su', 'Lao' => 'Lao', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'LatinExt', 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?A' => 'LatinEx2', - 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?B' => 'LatinEx3', + 'Latin(?:[-_]|\s+)?Extended(?:[-_]|\s+)?Additional' => 'LatinEx3', + 'Latin(?:[-_]|\s+)?1(?:[-_]|\s+)?Supplement' => 'Latin1Su', }, 'le' => { 'Letterlike(?:[-_]|\s+)?Symbols' => 'Letterli', @@ -146,8 +146,8 @@ 'Malayalam' => 'Malayala', }, 'mi' => { - 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscell2', - 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscella', + 'Miscellaneous(?:[-_]|\s+)?Technical' => 'Miscell2', + 'Miscellaneous(?:[-_]|\s+)?Symbols' => 'Miscella', }, 'mo' => { 'Mongolian' => 'Mongolia', @@ -186,8 +186,8 @@ 'Small(?:[-_]|\s+)?Form(?:[-_]|\s+)?Variants' => 'SmallFor', }, 'sp' => { - 'Specials' => 'Specials', 'Spacing(?:[-_]|\s+)?Modifier(?:[-_]|\s+)?Letters' => 'SpacingM', + 'Specials' => 'Specials', }, 'su' => { 'Superscripts(?:[-_]|\s+)?And(?:[-_]|\s+)?Subscripts' => 'Superscr', @@ -196,8 +196,8 @@ 'Syriac' => 'Syriac', }, 'ta' => { - 'Tags' => 'Tags', 'Tamil' => 'Tamil', + 'Tags' => 'Tags', }, 'te' => { 'Telugu' => 'Telugu', @@ -213,7 +213,7 @@ 'Unified(?:[-_]|\s+)?Canadian(?:[-_]|\s+)?Aboriginal(?:[-_]|\s+)?Syllabics' => 'UnifiedC', }, 'yi' => { - 'Yi(?:[-_]|\s+)?Radicals' => 'YiRadica', 'Yi(?:[-_]|\s+)?Syllables' => 'YiSyllab', + 'Yi(?:[-_]|\s+)?Radicals' => 'YiRadica', }, ); diff --git a/lib/unicore/Is.pl b/lib/unicore/Is.pl index 5f39461503..91debeedcc 100644 --- a/lib/unicore/Is.pl +++ b/lib/unicore/Is.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! ## @@ -58,8 +58,8 @@ 'DCwide' => 'DCwide', 'Digit' => 'Digit', 'Graph' => 'Graph', - 'L' => '2', - 'L&' => 'L', + 'L' => 'L', + 'L&' => 'L_', 'LbrkAI' => 'LbrkAI', 'LbrkAL' => 'LbrkAL', 'LbrkB2' => 'LbrkB2', @@ -129,16 +129,16 @@ 'Zl' => 'Zl', 'Zp' => 'Zp', 'Zs' => 'Zs', - '_CanonDCIJ' => 'CanonDCI', - '_CaseIgnorable' => 'CaseIgno', - '_CombAbove' => 'CombAbov', + '_CanonDCIJ' => '_CanonDC', + '_CaseIgnorable' => '_CaseIgn', + '_CombAbove' => '_CombAbo', ); ## Mappings from regex to filename in ./Is/ %utf8::IsPat = ( 'al' => { - 'Alphabetic' => 'Alphabet', 'All' => 'Any', + 'Alphabetic' => 'Alphabet', }, 'an' => { 'Any' => 'Any', @@ -148,7 +148,7 @@ 'Arabic' => 'Arabic', }, 'as' => { - 'ASCII(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'ASCIIHex', + 'Ascii(?:[-_]|\s+)?Hex(?:[-_]|\s+)?Digit' => 'AsciiHex', 'Assigned' => 'Assigned', }, 'be' => { @@ -170,9 +170,9 @@ 'Close(?:[-_]|\s+)?Punctuation' => 'Pe', }, 'co' => { - 'Connector(?:[-_]|\s+)?Punctuation' => 'Pc', 'Control' => 'Cc', 'Common' => 'Common', + 'Connector(?:[-_]|\s+)?Punctuation' => 'Pc', }, 'cu' => { 'Currency(?:[-_]|\s+)?Symbol' => 'Sc', @@ -185,9 +185,9 @@ 'Dash' => 'Dash', }, 'de' => { + 'Deseret' => 'Deseret', 'Devanagari' => 'Devanaga', 'Decimal(?:[-_]|\s+)?Number' => 'Nd', - 'Deseret' => 'Deseret', }, 'di' => { 'Diacritic' => 'Diacriti', @@ -217,12 +217,12 @@ 'Greek' => 'Greek', }, 'gu' => { - 'Gurmukhi' => 'Gurmukhi', 'Gujarati' => 'Gujarati', + 'Gurmukhi' => 'Gurmukhi', }, 'ha' => { - 'Han' => 'Han', 'Hangul' => 'Hangul', + 'Han' => 'Han', }, 'he' => { 'Hebrew' => 'Hebrew', @@ -236,8 +236,8 @@ }, 'id' => { 'Ideographic' => 'Ideograp', - 'ID(?:[-_]|\s+)?Continue' => 'IDContin', - 'ID(?:[-_]|\s+)?Start' => 'IDStart', + 'Id(?:[-_]|\s+)?Continue' => 'IdContin', + 'Id(?:[-_]|\s+)?Start' => 'IdStart', }, 'in' => { 'Inherited' => 'Inherite', @@ -247,44 +247,44 @@ 'Join(?:[-_]|\s+)?Control' => 'JoinCont', }, 'ka' => { - 'Katakana' => 'Katakana', 'Kannada' => 'Kannada', + 'Katakana' => 'Katakana', }, 'kh' => { 'Khmer' => 'Khmer', }, 'la' => { - 'Latin' => 'Latin', 'Lao' => 'Lao', + 'Latin' => 'Latin', }, 'le' => { 'Letter(?:[-_]|\s+)?Number' => 'Nl', - 'Letter' => '2', + 'Letter' => 'L', }, 'li' => { 'Line(?:[-_]|\s+)?Separator' => 'Zl', }, 'lo' => { - 'Lowercase(?:[-_]|\s+)?Letter' => 'Ll', 'Lowercase' => 'Lowercas', + 'Lowercase(?:[-_]|\s+)?Letter' => 'Ll', }, 'ma' => { - 'Math(?:[-_]|\s+)?Symbol' => 'Sm', - 'Mark' => 'M', 'Math' => 'Math', 'Malayalam' => 'Malayala', + 'Mark' => 'M', + 'Math(?:[-_]|\s+)?Symbol' => 'Sm', }, 'mo' => { - 'Modifier(?:[-_]|\s+)?Letter' => 'Lm', 'Modifier(?:[-_]|\s+)?Symbol' => 'Sk', 'Mongolian' => 'Mongolia', + 'Modifier(?:[-_]|\s+)?Letter' => 'Lm', }, 'my' => { 'Myanmar' => 'Myanmar', }, 'no' => { - 'Non(?:[-_]|\s+)?Spacing(?:[-_]|\s+)?Mark' => 'Mn', 'Noncharacter(?:[-_]|\s+)?Code(?:[-_]|\s+)?Point' => 'Nonchara', + 'Non(?:[-_]|\s+)?Spacing(?:[-_]|\s+)?Mark' => 'Mn', }, 'nu' => { 'Number' => 'N', @@ -302,15 +302,15 @@ 'Oriya' => 'Oriya', }, 'ot' => { - 'Other(?:[-_]|\s+)?Math' => 'OtherMat', 'Other(?:[-_]|\s+)?Punctuation' => 'Po', - 'Other(?:[-_]|\s+)?Lowercase' => 'OtherLow', 'Other(?:[-_]|\s+)?Uppercase' => 'OtherUpp', - 'Other(?:[-_]|\s+)?Letter' => 'Lo', 'Other(?:[-_]|\s+)?Alphabetic' => 'OtherAlp', 'Other(?:[-_]|\s+)?Symbol' => 'So', 'Other(?:[-_]|\s+)?Number' => 'No', 'Other' => 'C', + 'Other(?:[-_]|\s+)?Math' => 'OtherMat', + 'Other(?:[-_]|\s+)?Letter' => 'Lo', + 'Other(?:[-_]|\s+)?Lowercase' => 'OtherLow', }, 'pa' => { 'Paragraph(?:[-_]|\s+)?Separator' => 'Zp', @@ -348,26 +348,26 @@ 'Tamil' => 'Tamil', }, 'te' => { - 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal', 'Telugu' => 'Telugu', + 'Terminal(?:[-_]|\s+)?Punctuation' => 'Terminal', }, 'th' => { 'Thaana' => 'Thaana', 'Thai' => 'Thai', }, 'ti' => { - 'Titlecase(?:[-_]|\s+)?Letter' => 'Lt', 'Tibetan' => 'Tibetan', + 'Titlecase(?:[-_]|\s+)?Letter' => 'Lt', }, 'un' => { 'Unassigned' => 'Cn', }, 'up' => { - 'Uppercase(?:[-_]|\s+)?Letter' => 'Lu', 'Uppercase' => 'Uppercas', + 'Uppercase(?:[-_]|\s+)?Letter' => 'Lu', }, 'wh' => { - 'White(?:[-_]|\s+)?space' => 'WhiteSpa', + 'White(?:[-_]|\s+)?Space' => 'WhiteSpa', }, 'yi' => { 'Yi' => 'Yi', diff --git a/lib/unicore/Is/2.pl b/lib/unicore/Is/2.pl deleted file mode 100644 index 45bee6afc3..0000000000 --- a/lib/unicore/Is/2.pl +++ /dev/null @@ -1,298 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -return <<'END'; -0041 005A -0061 007A -00AA -00B5 -00BA -00C0 00D6 -00D8 00F6 -00F8 021F -0222 0233 -0250 02AD -02B0 02B8 -02BB 02C1 -02D0 02D1 -02E0 02E4 -02EE -037A -0386 -0388 038A -038C -038E 03A1 -03A3 03CE -03D0 03D7 -03DA 03F5 -0400 0481 -048C 04C4 -04C7 04C8 -04CB 04CC -04D0 04F5 -04F8 04F9 -0531 0556 -0559 -0561 0587 -05D0 05EA -05F0 05F2 -0621 063A -0640 064A -0671 06D3 -06D5 -06E5 06E6 -06FA 06FC -0710 -0712 072C -0780 07A5 -0905 0939 -093D -0950 -0958 0961 -0985 098C -098F 0990 -0993 09A8 -09AA 09B0 -09B2 -09B6 09B9 -09DC 09DD -09DF 09E1 -09F0 09F1 -0A05 0A0A -0A0F 0A10 -0A13 0A28 -0A2A 0A30 -0A32 0A33 -0A35 0A36 -0A38 0A39 -0A59 0A5C -0A5E -0A72 0A74 -0A85 0A8B -0A8D -0A8F 0A91 -0A93 0AA8 -0AAA 0AB0 -0AB2 0AB3 -0AB5 0AB9 -0ABD -0AD0 -0AE0 -0B05 0B0C -0B0F 0B10 -0B13 0B28 -0B2A 0B30 -0B32 0B33 -0B36 0B39 -0B3D -0B5C 0B5D -0B5F 0B61 -0B85 0B8A -0B8E 0B90 -0B92 0B95 -0B99 0B9A -0B9C -0B9E 0B9F -0BA3 0BA4 -0BA8 0BAA -0BAE 0BB5 -0BB7 0BB9 -0C05 0C0C -0C0E 0C10 -0C12 0C28 -0C2A 0C33 -0C35 0C39 -0C60 0C61 -0C85 0C8C -0C8E 0C90 -0C92 0CA8 -0CAA 0CB3 -0CB5 0CB9 -0CDE -0CE0 0CE1 -0D05 0D0C -0D0E 0D10 -0D12 0D28 -0D2A 0D39 -0D60 0D61 -0D85 0D96 -0D9A 0DB1 -0DB3 0DBB -0DBD -0DC0 0DC6 -0E01 0E30 -0E32 0E33 -0E40 0E46 -0E81 0E82 -0E84 -0E87 0E88 -0E8A -0E8D -0E94 0E97 -0E99 0E9F -0EA1 0EA3 -0EA5 -0EA7 -0EAA 0EAB -0EAD 0EB0 -0EB2 0EB3 -0EBD -0EC0 0EC4 -0EC6 -0EDC 0EDD -0F00 -0F40 0F47 -0F49 0F6A -0F88 0F8B -1000 1021 -1023 1027 -1029 102A -1050 1055 -10A0 10C5 -10D0 10F6 -1100 1159 -115F 11A2 -11A8 11F9 -1200 1206 -1208 1246 -1248 -124A 124D -1250 1256 -1258 -125A 125D -1260 1286 -1288 -128A 128D -1290 12AE -12B0 -12B2 12B5 -12B8 12BE -12C0 -12C2 12C5 -12C8 12CE -12D0 12D6 -12D8 12EE -12F0 130E -1310 -1312 1315 -1318 131E -1320 1346 -1348 135A -13A0 13F4 -1401 166C -166F 1676 -1681 169A -16A0 16EA -1780 17B3 -1820 1877 -1880 18A8 -1E00 1E9B -1EA0 1EF9 -1F00 1F15 -1F18 1F1D -1F20 1F45 -1F48 1F4D -1F50 1F57 -1F59 -1F5B -1F5D -1F5F 1F7D -1F80 1FB4 -1FB6 1FBC -1FBE -1FC2 1FC4 -1FC6 1FCC -1FD0 1FD3 -1FD6 1FDB -1FE0 1FEC -1FF2 1FF4 -1FF6 1FFC -207F -2102 -2107 -210A 2113 -2115 -2119 211D -2124 -2126 -2128 -212A 212D -212F 2131 -2133 2139 -3005 3006 -3031 3035 -3041 3094 -309D 309E -30A1 30FA -30FC 30FE -3105 312C -3131 318E -31A0 31B7 -3400 4DB5 -4E00 9FA5 -A000 A48C -AC00 D7A3 -F900 FA2D -FB00 FB06 -FB13 FB17 -FB1D -FB1F FB28 -FB2A FB36 -FB38 FB3C -FB3E -FB40 FB41 -FB43 FB44 -FB46 FBB1 -FBD3 FD3D -FD50 FD8F -FD92 FDC7 -FDF0 FDFB -FE70 FE72 -FE74 -FE76 FEFC -FF21 FF3A -FF41 FF5A -FF66 FFBE -FFC2 FFC7 -FFCA FFCF -FFD2 FFD7 -FFDA FFDC -10300 1031E -10330 10349 -10400 10425 -10428 1044D -1D400 1D454 -1D456 1D49C -1D49E 1D49F -1D4A2 -1D4A5 1D4A6 -1D4A9 1D4AC -1D4AE 1D4B9 -1D4BB -1D4BD 1D4C0 -1D4C2 1D4C3 -1D4C5 1D505 -1D507 1D50A -1D50D 1D514 -1D516 1D51C -1D51E 1D539 -1D53B 1D53E -1D540 1D544 -1D546 -1D54A 1D550 -1D552 1D6A3 -1D6A8 1D6C0 -1D6C2 1D6DA -1D6DC 1D6FA -1D6FC 1D714 -1D716 1D734 -1D736 1D74E -1D750 1D76E -1D770 1D788 -1D78A 1D7A8 -1D7AA 1D7C2 -1D7C4 1D7C9 -20000 2A6D6 -2F800 2FA1D -END diff --git a/lib/unicore/Is/ASCII.pl b/lib/unicore/Is/ASCII.pl index 38371c41e5..c40837d58b 100644 --- a/lib/unicore/Is/ASCII.pl +++ b/lib/unicore/Is/ASCII.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{ASCII} +# +# Meaning: [[:ASCII:]] +# return <<'END'; 0000 007F END diff --git a/lib/unicore/Is/Alnum.pl b/lib/unicore/Is/Alnum.pl index ba30997909..9fdf74eb47 100644 --- a/lib/unicore/Is/Alnum.pl +++ b/lib/unicore/Is/Alnum.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Alnum} +# +# Meaning: [[:Alnum:]] +# return <<'END'; 0030 0039 0041 005A diff --git a/lib/unicore/Is/Alpha.pl b/lib/unicore/Is/Alpha.pl index 6e71b2dc5e..c0923d8a3e 100644 --- a/lib/unicore/Is/Alpha.pl +++ b/lib/unicore/Is/Alpha.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Alpha} +# +# Meaning: [[:Alpha:]] +# return <<'END'; 0041 005A 0061 007A diff --git a/lib/unicore/Is/Alphabet.pl b/lib/unicore/Is/Alphabet.pl index 39657f636f..d7462dd280 100644 --- a/lib/unicore/Is/Alphabet.pl +++ b/lib/unicore/Is/Alphabet.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Alphabetic} (and fuzzy permutations) +# +# Meaning: [\p{L}\p{OtherAlphabetic}] +# return <<'END'; 0041 005A 0061 007A diff --git a/lib/unicore/Is/Any.pl b/lib/unicore/Is/Any.pl index 6fbc564158..45a06365bf 100644 --- a/lib/unicore/Is/Any.pl +++ b/lib/unicore/Is/Any.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Any} (and fuzzy permutations) +# \p{All} (and fuzzy permutations) +# +# Meaning: [\x{0000}-\x{10FFFF}] +# return <<'END'; 0000 10FFFF END diff --git a/lib/unicore/Is/Arabic.pl b/lib/unicore/Is/Arabic.pl index 10b15cdbca..4b66297c51 100644 --- a/lib/unicore/Is/Arabic.pl +++ b/lib/unicore/Is/Arabic.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Arabic} (and fuzzy permutations) +# +# Meaning: Script 'ARABIC' +# return <<'END'; 0621 063A ARABIC 0641 064A ARABIC diff --git a/lib/unicore/Is/Armenian.pl b/lib/unicore/Is/Armenian.pl index 3c296f138b..567452d377 100644 --- a/lib/unicore/Is/Armenian.pl +++ b/lib/unicore/Is/Armenian.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Armenian} (and fuzzy permutations) +# +# Meaning: Script 'ARMENIAN' +# return <<'END'; 0531 0556 ARMENIAN 0559 ARMENIAN diff --git a/lib/unicore/Is/ASCIIHex.pl b/lib/unicore/Is/AsciiHex.pl index f05281cbf8..ef8a62da0a 100644 --- a/lib/unicore/Is/ASCIIHex.pl +++ b/lib/unicore/Is/AsciiHex.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{AsciiHexDigit} (and fuzzy permutations) +# +# Meaning: Extended property 'ASCII_Hex_Digit' +# return <<'END'; 0030 0039 ASCII_Hex_Digit 0041 0046 ASCII_Hex_Digit diff --git a/lib/unicore/Is/Assigned.pl b/lib/unicore/Is/Assigned.pl index f304e3d684..3646421d65 100644 --- a/lib/unicore/Is/Assigned.pl +++ b/lib/unicore/Is/Assigned.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Assigned} (and fuzzy permutations) +# +# Meaning: All assigned code points +# return <<'END'; 0000 021F 0222 0233 diff --git a/lib/unicore/Is/Bengali.pl b/lib/unicore/Is/Bengali.pl index 540b1c125f..8a04b2024e 100644 --- a/lib/unicore/Is/Bengali.pl +++ b/lib/unicore/Is/Bengali.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Bengali} (and fuzzy permutations) +# +# Meaning: Script 'BENGALI' +# return <<'END'; 0981 BENGALI 0985 098C BENGALI diff --git a/lib/unicore/Is/BidiAL.pl b/lib/unicore/Is/BidiAL.pl index ab60d14b14..872a1a60f8 100644 --- a/lib/unicore/Is/BidiAL.pl +++ b/lib/unicore/Is/BidiAL.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiAL} +# +# Meaning: Bi-directional category 'AL' +# return <<'END'; 061B 061F diff --git a/lib/unicore/Is/BidiAN.pl b/lib/unicore/Is/BidiAN.pl index 9213e1cf8d..6c401fce9b 100644 --- a/lib/unicore/Is/BidiAN.pl +++ b/lib/unicore/Is/BidiAN.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiAN} +# +# Meaning: Bi-directional category 'AN' +# return <<'END'; 0660 0669 066B 066C diff --git a/lib/unicore/Is/BidiB.pl b/lib/unicore/Is/BidiB.pl index 448c7f7665..d0c069c1b1 100644 --- a/lib/unicore/Is/BidiB.pl +++ b/lib/unicore/Is/BidiB.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiB} +# +# Meaning: Bi-directional category 'B' +# return <<'END'; 000A 000D diff --git a/lib/unicore/Is/BidiBN.pl b/lib/unicore/Is/BidiBN.pl index ff79c3b671..f0e171b2d2 100644 --- a/lib/unicore/Is/BidiBN.pl +++ b/lib/unicore/Is/BidiBN.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiBN} +# +# Meaning: Bi-directional category 'BN' +# return <<'END'; 0000 0008 000E 001B diff --git a/lib/unicore/Is/BidiCS.pl b/lib/unicore/Is/BidiCS.pl index 924351cea9..129154e496 100644 --- a/lib/unicore/Is/BidiCS.pl +++ b/lib/unicore/Is/BidiCS.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiCS} +# +# Meaning: Bi-directional category 'CS' +# return <<'END'; 002C 002E diff --git a/lib/unicore/Is/BidiCont.pl b/lib/unicore/Is/BidiCont.pl index 0ef690eead..62a70c8da7 100644 --- a/lib/unicore/Is/BidiCont.pl +++ b/lib/unicore/Is/BidiCont.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiControl} (and fuzzy permutations) +# +# Meaning: Extended property 'Bidi_Control' +# return <<'END'; 200E 200F Bidi_Control 202A 202E Bidi_Control diff --git a/lib/unicore/Is/BidiEN.pl b/lib/unicore/Is/BidiEN.pl index 8bd7ee4ccb..3834487af5 100644 --- a/lib/unicore/Is/BidiEN.pl +++ b/lib/unicore/Is/BidiEN.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiEN} +# +# Meaning: Bi-directional category 'EN' +# return <<'END'; 0030 0039 00B2 00B3 diff --git a/lib/unicore/Is/BidiES.pl b/lib/unicore/Is/BidiES.pl index 3d033bb5d7..e62c1288c3 100644 --- a/lib/unicore/Is/BidiES.pl +++ b/lib/unicore/Is/BidiES.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiES} +# +# Meaning: Bi-directional category 'ES' +# return <<'END'; 002F FF0F diff --git a/lib/unicore/Is/BidiET.pl b/lib/unicore/Is/BidiET.pl index 56a4a5f6bb..092ac54307 100644 --- a/lib/unicore/Is/BidiET.pl +++ b/lib/unicore/Is/BidiET.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiET} +# +# Meaning: Bi-directional category 'ET' +# return <<'END'; 0023 0025 002B diff --git a/lib/unicore/Is/BidiL.pl b/lib/unicore/Is/BidiL.pl index 872a6cc311..bb13e01ad7 100644 --- a/lib/unicore/Is/BidiL.pl +++ b/lib/unicore/Is/BidiL.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiL} +# +# Meaning: Bi-directional category 'L' +# return <<'END'; 0041 005A 0061 007A diff --git a/lib/unicore/Is/BidiLRE.pl b/lib/unicore/Is/BidiLRE.pl index aabf153f12..208ec0a28c 100644 --- a/lib/unicore/Is/BidiLRE.pl +++ b/lib/unicore/Is/BidiLRE.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiLRE} +# +# Meaning: Bi-directional category 'LRE' +# return <<'END'; 202A END diff --git a/lib/unicore/Is/BidiLRO.pl b/lib/unicore/Is/BidiLRO.pl index 8e246626e6..f85446bd22 100644 --- a/lib/unicore/Is/BidiLRO.pl +++ b/lib/unicore/Is/BidiLRO.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiLRO} +# +# Meaning: Bi-directional category 'LRO' +# return <<'END'; 202D END diff --git a/lib/unicore/Is/BidiNSM.pl b/lib/unicore/Is/BidiNSM.pl index 3d3a5e1755..303cdb88fc 100644 --- a/lib/unicore/Is/BidiNSM.pl +++ b/lib/unicore/Is/BidiNSM.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiNSM} +# +# Meaning: Bi-directional category 'NSM' +# return <<'END'; 0300 034E 0360 0362 diff --git a/lib/unicore/Is/BidiON.pl b/lib/unicore/Is/BidiON.pl index fc94845f5e..7a205bf0d2 100644 --- a/lib/unicore/Is/BidiON.pl +++ b/lib/unicore/Is/BidiON.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiON} +# +# Meaning: Bi-directional category 'ON' +# return <<'END'; 0021 0022 0026 002A diff --git a/lib/unicore/Is/BidiPDF.pl b/lib/unicore/Is/BidiPDF.pl index 514c525559..35a417f77f 100644 --- a/lib/unicore/Is/BidiPDF.pl +++ b/lib/unicore/Is/BidiPDF.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiPDF} +# +# Meaning: Bi-directional category 'PDF' +# return <<'END'; 202C END diff --git a/lib/unicore/Is/BidiR.pl b/lib/unicore/Is/BidiR.pl index 785fa68ced..44d2445ba3 100644 --- a/lib/unicore/Is/BidiR.pl +++ b/lib/unicore/Is/BidiR.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiR} +# +# Meaning: Bi-directional category 'R' +# return <<'END'; 05BE 05C0 diff --git a/lib/unicore/Is/BidiRLE.pl b/lib/unicore/Is/BidiRLE.pl index 94d64fcf6e..f85f09af41 100644 --- a/lib/unicore/Is/BidiRLE.pl +++ b/lib/unicore/Is/BidiRLE.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiRLE} +# +# Meaning: Bi-directional category 'RLE' +# return <<'END'; 202B END diff --git a/lib/unicore/Is/BidiRLO.pl b/lib/unicore/Is/BidiRLO.pl index 1ddc022277..03629888de 100644 --- a/lib/unicore/Is/BidiRLO.pl +++ b/lib/unicore/Is/BidiRLO.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiRLO} +# +# Meaning: Bi-directional category 'RLO' +# return <<'END'; 202E END diff --git a/lib/unicore/Is/BidiS.pl b/lib/unicore/Is/BidiS.pl index c091b7f920..a7ba061798 100644 --- a/lib/unicore/Is/BidiS.pl +++ b/lib/unicore/Is/BidiS.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiS} +# +# Meaning: Bi-directional category 'S' +# return <<'END'; 0009 000B diff --git a/lib/unicore/Is/BidiWS.pl b/lib/unicore/Is/BidiWS.pl index c29ceac0c4..44414f0d5c 100644 --- a/lib/unicore/Is/BidiWS.pl +++ b/lib/unicore/Is/BidiWS.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{BidiWS} +# +# Meaning: Bi-directional category 'WS' +# return <<'END'; 000C 0020 diff --git a/lib/unicore/Is/Blank.pl b/lib/unicore/Is/Blank.pl index c30f42780c..73abe8220e 100644 --- a/lib/unicore/Is/Blank.pl +++ b/lib/unicore/Is/Blank.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Blank} +# +# Meaning: [[:Blank:]] +# return <<'END'; 0009 0020 diff --git a/lib/unicore/Is/Bopomofo.pl b/lib/unicore/Is/Bopomofo.pl index f78f022aa3..f4f1b70f78 100644 --- a/lib/unicore/Is/Bopomofo.pl +++ b/lib/unicore/Is/Bopomofo.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Bopomofo} (and fuzzy permutations) +# +# Meaning: Script 'BOPOMOFO' +# return <<'END'; 3105 312C BOPOMOFO 31A0 31B7 BOPOMOFO diff --git a/lib/unicore/Is/C.pl b/lib/unicore/Is/C.pl index e59fc12f03..f9f7420d23 100644 --- a/lib/unicore/Is/C.pl +++ b/lib/unicore/Is/C.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{C} +# \p{Other} (and fuzzy permutations) +# +# Meaning: Major Category 'C' +# return <<'END'; 0000 001F 007F 009F diff --git a/lib/unicore/Is/Canadian.pl b/lib/unicore/Is/Canadian.pl index 16e082d90a..dd223f1ff3 100644 --- a/lib/unicore/Is/Canadian.pl +++ b/lib/unicore/Is/Canadian.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{CanadianAboriginal} (and fuzzy permutations) +# +# Meaning: Script 'CANADIAN-ABORIGINAL' +# return <<'END'; 1401 166C CANADIAN-ABORIGINAL 166F 1676 CANADIAN-ABORIGINAL diff --git a/lib/unicore/Is/Canon.pl b/lib/unicore/Is/Canon.pl index 4d757c68a2..f01176318c 100644 --- a/lib/unicore/Is/Canon.pl +++ b/lib/unicore/Is/Canon.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Canon} +# +# Meaning: Decomposes to multiple characters +# return <<'END'; 00C0 00C5 00C7 00CF diff --git a/lib/unicore/Is/CanonDCI.pl b/lib/unicore/Is/CanonDCI.pl deleted file mode 100644 index cbde5da1f6..0000000000 --- a/lib/unicore/Is/CanonDCI.pl +++ /dev/null @@ -1,10 +0,0 @@ -# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. -# Any changes made here will be lost! - -return <<'END'; -0069 006A -012F -1E2D -1ECB -END diff --git a/lib/unicore/Is/Cc.pl b/lib/unicore/Is/Cc.pl index 3b5c61fc59..0d654abf50 100644 --- a/lib/unicore/Is/Cc.pl +++ b/lib/unicore/Is/Cc.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Cc} +# \p{Control} (and fuzzy permutations) +# +# Meaning: General Category 'Cc' +# return <<'END'; 0000 001F 007F 009F diff --git a/lib/unicore/Is/Cf.pl b/lib/unicore/Is/Cf.pl index c877b18bd9..efc1336eb1 100644 --- a/lib/unicore/Is/Cf.pl +++ b/lib/unicore/Is/Cf.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Cf} +# \p{Format} (and fuzzy permutations) +# +# Meaning: General Category 'Cf' +# return <<'END'; 070F 180B 180E diff --git a/lib/unicore/Is/Cherokee.pl b/lib/unicore/Is/Cherokee.pl index 8ede678917..9546fd7b21 100644 --- a/lib/unicore/Is/Cherokee.pl +++ b/lib/unicore/Is/Cherokee.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Cherokee} (and fuzzy permutations) +# +# Meaning: Script 'CHEROKEE' +# return <<'END'; 13A0 13F4 CHEROKEE END diff --git a/lib/unicore/Is/Cn.pl b/lib/unicore/Is/Cn.pl index 4b3b029724..b2598e7f41 100644 --- a/lib/unicore/Is/Cn.pl +++ b/lib/unicore/Is/Cn.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Cn} +# \p{Unassigned} (and fuzzy permutations) +# +# Meaning: General Category 'Cn' [not functional in Perl] +# return <<'END'; 0220 0221 0234 024F diff --git a/lib/unicore/Is/Cntrl.pl b/lib/unicore/Is/Cntrl.pl index 3bd0913247..cb64dffc01 100644 --- a/lib/unicore/Is/Cntrl.pl +++ b/lib/unicore/Is/Cntrl.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Cntrl} +# +# Meaning: [[:Cntrl:]] +# return <<'END'; 0000 001F 007F 009F diff --git a/lib/unicore/Is/Co.pl b/lib/unicore/Is/Co.pl index 368bc30f6b..46da434ec9 100644 --- a/lib/unicore/Is/Co.pl +++ b/lib/unicore/Is/Co.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Co} +# \p{PrivateUse} (and fuzzy permutations) +# +# Meaning: General Category 'Co' +# return <<'END'; E000 F8FF F0000 FFFFD diff --git a/lib/unicore/Is/Common.pl b/lib/unicore/Is/Common.pl index 3caaeed180..39156aec8b 100644 --- a/lib/unicore/Is/Common.pl +++ b/lib/unicore/Is/Common.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Common} (and fuzzy permutations) +# +# Meaning: Pseudo-Script of codepoints not in other Unicode scripts +# return <<'END'; 0000 0040 005B 0060 diff --git a/lib/unicore/Is/Compat.pl b/lib/unicore/Is/Compat.pl index b4d97a016c..0d8519e037 100644 --- a/lib/unicore/Is/Compat.pl +++ b/lib/unicore/Is/Compat.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Compat} +# +# Meaning: Compatible with a more-basic character +# return <<'END'; 00A0 00A8 diff --git a/lib/unicore/Is/Cs.pl b/lib/unicore/Is/Cs.pl index 1d0caab81f..a6a181fd2b 100644 --- a/lib/unicore/Is/Cs.pl +++ b/lib/unicore/Is/Cs.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Cs} +# \p{Surrogate} (and fuzzy permutations) +# +# Meaning: General Category 'Cs' +# return <<'END'; D800 DFFF END diff --git a/lib/unicore/Is/Cyrillic.pl b/lib/unicore/Is/Cyrillic.pl index 846a362697..c2da17985c 100644 --- a/lib/unicore/Is/Cyrillic.pl +++ b/lib/unicore/Is/Cyrillic.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Cyrillic} (and fuzzy permutations) +# +# Meaning: Script 'CYRILLIC' +# return <<'END'; 0400 0481 CYRILLIC 0483 0486 CYRILLIC diff --git a/lib/unicore/Is/DCcircle.pl b/lib/unicore/Is/DCcircle.pl index fa80aa5664..8cdd900855 100644 --- a/lib/unicore/Is/DCcircle.pl +++ b/lib/unicore/Is/DCcircle.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCcircle} +# +# Meaning: Compatible with 'circle' +# return <<'END'; 2460 2473 24B6 24EA diff --git a/lib/unicore/Is/DCcompat.pl b/lib/unicore/Is/DCcompat.pl index 8df8c0a1b6..dedd1db6f9 100644 --- a/lib/unicore/Is/DCcompat.pl +++ b/lib/unicore/Is/DCcompat.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCcompat} +# +# Meaning: Compatible with 'compat' +# return <<'END'; 00A8 00AF diff --git a/lib/unicore/Is/DCfinal.pl b/lib/unicore/Is/DCfinal.pl index 0c368dabca..35b7a2bbd1 100644 --- a/lib/unicore/Is/DCfinal.pl +++ b/lib/unicore/Is/DCfinal.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCfinal} +# +# Meaning: Compatible with 'final' +# return <<'END'; FB51 FB53 diff --git a/lib/unicore/Is/DCfont.pl b/lib/unicore/Is/DCfont.pl index 9adac97a97..d854833279 100644 --- a/lib/unicore/Is/DCfont.pl +++ b/lib/unicore/Is/DCfont.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCfont} +# +# Meaning: Compatible with 'font' +# return <<'END'; 2102 210A 2113 diff --git a/lib/unicore/Is/DCfracti.pl b/lib/unicore/Is/DCfracti.pl index 9e174a86ff..829f92c6a3 100644 --- a/lib/unicore/Is/DCfracti.pl +++ b/lib/unicore/Is/DCfracti.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCfraction} +# +# Meaning: Compatible with 'fraction' +# return <<'END'; 00BC 00BE 2153 215F diff --git a/lib/unicore/Is/DCinitia.pl b/lib/unicore/Is/DCinitia.pl index 07fcacc47d..d1806fc575 100644 --- a/lib/unicore/Is/DCinitia.pl +++ b/lib/unicore/Is/DCinitia.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCinitial} +# +# Meaning: Compatible with 'initial' +# return <<'END'; FB54 FB58 diff --git a/lib/unicore/Is/DCisolat.pl b/lib/unicore/Is/DCisolat.pl index 689e83c481..4d6c1d67bf 100644 --- a/lib/unicore/Is/DCisolat.pl +++ b/lib/unicore/Is/DCisolat.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCisolated} +# +# Meaning: Compatible with 'isolated' +# return <<'END'; FB50 FB52 diff --git a/lib/unicore/Is/DCmedial.pl b/lib/unicore/Is/DCmedial.pl index 2b5ccb9893..7fd7ee2561 100644 --- a/lib/unicore/Is/DCmedial.pl +++ b/lib/unicore/Is/DCmedial.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCmedial} +# +# Meaning: Compatible with 'medial' +# return <<'END'; FB55 FB59 diff --git a/lib/unicore/Is/DCnarrow.pl b/lib/unicore/Is/DCnarrow.pl index 55a4c60ad0..ed77c2e0be 100644 --- a/lib/unicore/Is/DCnarrow.pl +++ b/lib/unicore/Is/DCnarrow.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCnarrow} +# +# Meaning: Compatible with 'narrow' +# return <<'END'; FF61 FFBE FFC2 FFC7 diff --git a/lib/unicore/Is/DCnoBrea.pl b/lib/unicore/Is/DCnoBrea.pl index cd0c258445..57874cf570 100644 --- a/lib/unicore/Is/DCnoBrea.pl +++ b/lib/unicore/Is/DCnoBrea.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCnoBreak} +# +# Meaning: Compatible with 'noBreak' +# return <<'END'; 00A0 0F0C diff --git a/lib/unicore/Is/DCsmall.pl b/lib/unicore/Is/DCsmall.pl index 56c1d0fb8f..aa89e45fca 100644 --- a/lib/unicore/Is/DCsmall.pl +++ b/lib/unicore/Is/DCsmall.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCsmall} +# +# Meaning: Compatible with 'small' +# return <<'END'; FE50 FE52 FE54 FE66 diff --git a/lib/unicore/Is/DCsquare.pl b/lib/unicore/Is/DCsquare.pl index 175ce4054d..07dc325373 100644 --- a/lib/unicore/Is/DCsquare.pl +++ b/lib/unicore/Is/DCsquare.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCsquare} +# +# Meaning: Compatible with 'square' +# return <<'END'; 3300 3357 3371 3376 diff --git a/lib/unicore/Is/DCsub.pl b/lib/unicore/Is/DCsub.pl index eeff3e1317..ea5467b1f3 100644 --- a/lib/unicore/Is/DCsub.pl +++ b/lib/unicore/Is/DCsub.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCsub} +# +# Meaning: Compatible with 'sub' +# return <<'END'; 2080 208E END diff --git a/lib/unicore/Is/DCsuper.pl b/lib/unicore/Is/DCsuper.pl index 9a7b2f513b..13c7dd7485 100644 --- a/lib/unicore/Is/DCsuper.pl +++ b/lib/unicore/Is/DCsuper.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCsuper} +# +# Meaning: Compatible with 'super' +# return <<'END'; 00AA 00B2 00B3 diff --git a/lib/unicore/Is/DCvertic.pl b/lib/unicore/Is/DCvertic.pl index 28a7b0c4c9..8fa6b8c7ba 100644 --- a/lib/unicore/Is/DCvertic.pl +++ b/lib/unicore/Is/DCvertic.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCvertical} +# +# Meaning: Compatible with 'vertical' +# return <<'END'; FE30 FE44 END diff --git a/lib/unicore/Is/DCwide.pl b/lib/unicore/Is/DCwide.pl index 63d6a0de54..eb3f542cdf 100644 --- a/lib/unicore/Is/DCwide.pl +++ b/lib/unicore/Is/DCwide.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{DCwide} +# +# Meaning: Compatible with 'wide' +# return <<'END'; 3000 FF01 FF5E diff --git a/lib/unicore/Is/Dash.pl b/lib/unicore/Is/Dash.pl index be50e7aa34..00f3589682 100644 --- a/lib/unicore/Is/Dash.pl +++ b/lib/unicore/Is/Dash.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Dash} (and fuzzy permutations) +# +# Meaning: Extended property 'Dash' +# return <<'END'; 002D Dash 00AD Dash diff --git a/lib/unicore/Is/Deseret.pl b/lib/unicore/Is/Deseret.pl index ac4dc6b364..210e7a1661 100644 --- a/lib/unicore/Is/Deseret.pl +++ b/lib/unicore/Is/Deseret.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Deseret} (and fuzzy permutations) +# +# Meaning: Script 'DESERET' +# return <<'END'; 10400 10425 DESERET 10428 1044D DESERET diff --git a/lib/unicore/Is/Devanaga.pl b/lib/unicore/Is/Devanaga.pl index 50212d78a3..904708bc35 100644 --- a/lib/unicore/Is/Devanaga.pl +++ b/lib/unicore/Is/Devanaga.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Devanagari} (and fuzzy permutations) +# +# Meaning: Script 'DEVANAGARI' +# return <<'END'; 0901 0903 DEVANAGARI 0905 0939 DEVANAGARI diff --git a/lib/unicore/Is/Diacriti.pl b/lib/unicore/Is/Diacriti.pl index e83be3f0cc..ddd09eccc8 100644 --- a/lib/unicore/Is/Diacriti.pl +++ b/lib/unicore/Is/Diacriti.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Diacritic} (and fuzzy permutations) +# +# Meaning: Extended property 'Diacritic' +# return <<'END'; 005E Diacritic 0060 Diacritic diff --git a/lib/unicore/Is/Digit.pl b/lib/unicore/Is/Digit.pl index b0bd5c44cc..d494682d09 100644 --- a/lib/unicore/Is/Digit.pl +++ b/lib/unicore/Is/Digit.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Digit} +# +# Meaning: [[:Digit:]] +# return <<'END'; 0030 0039 0660 0669 diff --git a/lib/unicore/Is/Ethiopic.pl b/lib/unicore/Is/Ethiopic.pl index 5eb51ba19d..2627f83170 100644 --- a/lib/unicore/Is/Ethiopic.pl +++ b/lib/unicore/Is/Ethiopic.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Ethiopic} (and fuzzy permutations) +# +# Meaning: Script 'ETHIOPIC' +# return <<'END'; 1200 1206 ETHIOPIC 1208 1246 ETHIOPIC diff --git a/lib/unicore/Is/Extender.pl b/lib/unicore/Is/Extender.pl index f759d10a45..587cc5b540 100644 --- a/lib/unicore/Is/Extender.pl +++ b/lib/unicore/Is/Extender.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Extender} (and fuzzy permutations) +# +# Meaning: Extended property 'Extender' +# return <<'END'; 00B7 Extender 02D0 02D1 Extender diff --git a/lib/unicore/Is/Georgian.pl b/lib/unicore/Is/Georgian.pl index 2b5c27b92e..94d1445730 100644 --- a/lib/unicore/Is/Georgian.pl +++ b/lib/unicore/Is/Georgian.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Georgian} (and fuzzy permutations) +# +# Meaning: Script 'GEORGIAN' +# return <<'END'; 10A0 10C5 GEORGIAN 10D0 10F6 GEORGIAN diff --git a/lib/unicore/Is/Gothic.pl b/lib/unicore/Is/Gothic.pl index 4be2bd371a..6a25e6c234 100644 --- a/lib/unicore/Is/Gothic.pl +++ b/lib/unicore/Is/Gothic.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Gothic} (and fuzzy permutations) +# +# Meaning: Script 'GOTHIC' +# return <<'END'; 10330 1034A GOTHIC END diff --git a/lib/unicore/Is/Graph.pl b/lib/unicore/Is/Graph.pl index 1dd2402bb5..cc76eb2a23 100644 --- a/lib/unicore/Is/Graph.pl +++ b/lib/unicore/Is/Graph.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Graph} +# +# Meaning: [[:Graph:]] +# return <<'END'; 0021 007E 00A1 021F diff --git a/lib/unicore/Is/Greek.pl b/lib/unicore/Is/Greek.pl index ca3a22cafd..177a0d68c8 100644 --- a/lib/unicore/Is/Greek.pl +++ b/lib/unicore/Is/Greek.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Greek} (and fuzzy permutations) +# +# Meaning: Script 'GREEK' +# return <<'END'; 00B5 GREEK 037A GREEK diff --git a/lib/unicore/Is/Gujarati.pl b/lib/unicore/Is/Gujarati.pl index 066360f892..ef62ccc7fd 100644 --- a/lib/unicore/Is/Gujarati.pl +++ b/lib/unicore/Is/Gujarati.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Gujarati} (and fuzzy permutations) +# +# Meaning: Script 'GUJARATI' +# return <<'END'; 0A81 0A83 GUJARATI 0A85 0A8B GUJARATI diff --git a/lib/unicore/Is/Gurmukhi.pl b/lib/unicore/Is/Gurmukhi.pl index 4b5f4fdc43..4b16a9e433 100644 --- a/lib/unicore/Is/Gurmukhi.pl +++ b/lib/unicore/Is/Gurmukhi.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Gurmukhi} (and fuzzy permutations) +# +# Meaning: Script 'GURMUKHI' +# return <<'END'; 0A02 GURMUKHI 0A05 0A0A GURMUKHI diff --git a/lib/unicore/Is/Han.pl b/lib/unicore/Is/Han.pl index 4a6480088c..549ce1e5d6 100644 --- a/lib/unicore/Is/Han.pl +++ b/lib/unicore/Is/Han.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Han} (and fuzzy permutations) +# +# Meaning: Script 'HAN' +# return <<'END'; 2E80 2E99 HAN 2E9B 2EF3 HAN diff --git a/lib/unicore/Is/Hangul.pl b/lib/unicore/Is/Hangul.pl index 893cfc13cb..df24cd20a1 100644 --- a/lib/unicore/Is/Hangul.pl +++ b/lib/unicore/Is/Hangul.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Hangul} (and fuzzy permutations) +# +# Meaning: Script 'HANGUL' +# return <<'END'; 1100 1159 HANGUL 115F 11A2 HANGUL diff --git a/lib/unicore/Is/Hebrew.pl b/lib/unicore/Is/Hebrew.pl index d67aae746e..a4fb1eb435 100644 --- a/lib/unicore/Is/Hebrew.pl +++ b/lib/unicore/Is/Hebrew.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Hebrew} (and fuzzy permutations) +# +# Meaning: Script 'HEBREW' +# return <<'END'; 05D0 05EA HEBREW 05F0 05F2 HEBREW diff --git a/lib/unicore/Is/HexDigit.pl b/lib/unicore/Is/HexDigit.pl index 25a8e46105..38b30718f8 100644 --- a/lib/unicore/Is/HexDigit.pl +++ b/lib/unicore/Is/HexDigit.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{HexDigit} (and fuzzy permutations) +# +# Meaning: Extended property 'Hex_Digit' +# return <<'END'; 0030 0039 Hex_Digit 0041 0046 Hex_Digit diff --git a/lib/unicore/Is/Hiragana.pl b/lib/unicore/Is/Hiragana.pl index cfd901bcf6..8731f2b84b 100644 --- a/lib/unicore/Is/Hiragana.pl +++ b/lib/unicore/Is/Hiragana.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Hiragana} (and fuzzy permutations) +# +# Meaning: Script 'HIRAGANA' +# return <<'END'; 3041 3094 HIRAGANA 309D 309E HIRAGANA diff --git a/lib/unicore/Is/Hyphen.pl b/lib/unicore/Is/Hyphen.pl index 769b016a46..e868bcaf93 100644 --- a/lib/unicore/Is/Hyphen.pl +++ b/lib/unicore/Is/Hyphen.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Hyphen} (and fuzzy permutations) +# +# Meaning: Extended property 'Hyphen' +# return <<'END'; 002D Hyphen 00AD Hyphen diff --git a/lib/unicore/Is/IDContin.pl b/lib/unicore/Is/IdContin.pl index 294edd057d..365e88643c 100644 --- a/lib/unicore/Is/IDContin.pl +++ b/lib/unicore/Is/IdContin.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{IdContinue} (and fuzzy permutations) +# +# Meaning: [\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}] +# return <<'END'; 0030 0039 0041 005A diff --git a/lib/unicore/Is/IDStart.pl b/lib/unicore/Is/IdStart.pl index 76be64a6f8..4d89817fac 100644 --- a/lib/unicore/Is/IDStart.pl +++ b/lib/unicore/Is/IdStart.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{IdStart} (and fuzzy permutations) +# +# Meaning: [\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}] +# return <<'END'; 0041 005A 0061 007A diff --git a/lib/unicore/Is/Ideograp.pl b/lib/unicore/Is/Ideograp.pl index aab71a3cbf..505bbb9ab5 100644 --- a/lib/unicore/Is/Ideograp.pl +++ b/lib/unicore/Is/Ideograp.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Ideographic} (and fuzzy permutations) +# +# Meaning: Extended property 'Ideographic' +# return <<'END'; 3006 3007 Ideographic 3021 3029 Ideographic diff --git a/lib/unicore/Is/Inherite.pl b/lib/unicore/Is/Inherite.pl index 4bbdbb260a..d52a465c54 100644 --- a/lib/unicore/Is/Inherite.pl +++ b/lib/unicore/Is/Inherite.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Inherited} (and fuzzy permutations) +# +# Meaning: Script 'INHERITED' +# return <<'END'; 0300 034E INHERITED 0360 0362 INHERITED diff --git a/lib/unicore/Is/JoinCont.pl b/lib/unicore/Is/JoinCont.pl index 0c6a305ed6..9e8278a13f 100644 --- a/lib/unicore/Is/JoinCont.pl +++ b/lib/unicore/Is/JoinCont.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{JoinControl} (and fuzzy permutations) +# +# Meaning: Extended property 'Join_Control' +# return <<'END'; 200C 200D Join_Control END diff --git a/lib/unicore/Is/Kannada.pl b/lib/unicore/Is/Kannada.pl index c3f3db9c77..3fa302ff36 100644 --- a/lib/unicore/Is/Kannada.pl +++ b/lib/unicore/Is/Kannada.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Kannada} (and fuzzy permutations) +# +# Meaning: Script 'KANNADA' +# return <<'END'; 0C82 0C83 KANNADA 0C85 0C8C KANNADA diff --git a/lib/unicore/Is/Katakana.pl b/lib/unicore/Is/Katakana.pl index 3e1d26a368..1a55151f3f 100644 --- a/lib/unicore/Is/Katakana.pl +++ b/lib/unicore/Is/Katakana.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Katakana} (and fuzzy permutations) +# +# Meaning: Script 'KATAKANA' +# return <<'END'; 30A1 30FA KATAKANA 30FD 30FE KATAKANA diff --git a/lib/unicore/Is/Khmer.pl b/lib/unicore/Is/Khmer.pl index 6fdf98e10d..108bafb3ab 100644 --- a/lib/unicore/Is/Khmer.pl +++ b/lib/unicore/Is/Khmer.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Khmer} (and fuzzy permutations) +# +# Meaning: Script 'KHMER' +# return <<'END'; 1780 17D3 KHMER 17E0 17E9 KHMER diff --git a/lib/unicore/Is/L.pl b/lib/unicore/Is/L.pl index f9cdb5624b..663fd1e28b 100644 --- a/lib/unicore/Is/L.pl +++ b/lib/unicore/Is/L.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{L} +# \p{Letter} (and fuzzy permutations) +# +# Meaning: Major Category 'L' +# return <<'END'; 0041 005A 0061 007A @@ -10,11 +17,15 @@ return <<'END'; 00BA 00C0 00D6 00D8 00F6 -00F8 01BA -01BC 01BF -01C4 021F +00F8 021F 0222 0233 0250 02AD +02B0 02B8 +02BB 02C1 +02D0 02D1 +02E0 02E4 +02EE +037A 0386 0388 038A 038C @@ -29,8 +40,160 @@ return <<'END'; 04D0 04F5 04F8 04F9 0531 0556 +0559 0561 0587 +05D0 05EA +05F0 05F2 +0621 063A +0640 064A +0671 06D3 +06D5 +06E5 06E6 +06FA 06FC +0710 +0712 072C +0780 07A5 +0905 0939 +093D +0950 +0958 0961 +0985 098C +098F 0990 +0993 09A8 +09AA 09B0 +09B2 +09B6 09B9 +09DC 09DD +09DF 09E1 +09F0 09F1 +0A05 0A0A +0A0F 0A10 +0A13 0A28 +0A2A 0A30 +0A32 0A33 +0A35 0A36 +0A38 0A39 +0A59 0A5C +0A5E +0A72 0A74 +0A85 0A8B +0A8D +0A8F 0A91 +0A93 0AA8 +0AAA 0AB0 +0AB2 0AB3 +0AB5 0AB9 +0ABD +0AD0 +0AE0 +0B05 0B0C +0B0F 0B10 +0B13 0B28 +0B2A 0B30 +0B32 0B33 +0B36 0B39 +0B3D +0B5C 0B5D +0B5F 0B61 +0B85 0B8A +0B8E 0B90 +0B92 0B95 +0B99 0B9A +0B9C +0B9E 0B9F +0BA3 0BA4 +0BA8 0BAA +0BAE 0BB5 +0BB7 0BB9 +0C05 0C0C +0C0E 0C10 +0C12 0C28 +0C2A 0C33 +0C35 0C39 +0C60 0C61 +0C85 0C8C +0C8E 0C90 +0C92 0CA8 +0CAA 0CB3 +0CB5 0CB9 +0CDE +0CE0 0CE1 +0D05 0D0C +0D0E 0D10 +0D12 0D28 +0D2A 0D39 +0D60 0D61 +0D85 0D96 +0D9A 0DB1 +0DB3 0DBB +0DBD +0DC0 0DC6 +0E01 0E30 +0E32 0E33 +0E40 0E46 +0E81 0E82 +0E84 +0E87 0E88 +0E8A +0E8D +0E94 0E97 +0E99 0E9F +0EA1 0EA3 +0EA5 +0EA7 +0EAA 0EAB +0EAD 0EB0 +0EB2 0EB3 +0EBD +0EC0 0EC4 +0EC6 +0EDC 0EDD +0F00 +0F40 0F47 +0F49 0F6A +0F88 0F8B +1000 1021 +1023 1027 +1029 102A +1050 1055 10A0 10C5 +10D0 10F6 +1100 1159 +115F 11A2 +11A8 11F9 +1200 1206 +1208 1246 +1248 +124A 124D +1250 1256 +1258 +125A 125D +1260 1286 +1288 +128A 128D +1290 12AE +12B0 +12B2 12B5 +12B8 12BE +12C0 +12C2 12C5 +12C8 12CE +12D0 12D6 +12D8 12EE +12F0 130E +1310 +1312 1315 +1318 131E +1320 1346 +1348 135A +13A0 13F4 +1401 166C +166F 1676 +1681 169A +16A0 16EA +1780 17B3 +1820 1877 +1880 18A8 1E00 1E9B 1EA0 1EF9 1F00 1F15 @@ -63,12 +226,47 @@ return <<'END'; 2128 212A 212D 212F 2131 -2133 2134 -2139 +2133 2139 +3005 3006 +3031 3035 +3041 3094 +309D 309E +30A1 30FA +30FC 30FE +3105 312C +3131 318E +31A0 31B7 +3400 4DB5 +4E00 9FA5 +A000 A48C +AC00 D7A3 +F900 FA2D FB00 FB06 FB13 FB17 +FB1D +FB1F FB28 +FB2A FB36 +FB38 FB3C +FB3E +FB40 FB41 +FB43 FB44 +FB46 FBB1 +FBD3 FD3D +FD50 FD8F +FD92 FDC7 +FDF0 FDFB +FE70 FE72 +FE74 +FE76 FEFC FF21 FF3A FF41 FF5A +FF66 FFBE +FFC2 FFC7 +FFCA FFCF +FFD2 FFD7 +FFDA FFDC +10300 1031E +10330 10349 10400 10425 10428 1044D 1D400 1D454 @@ -102,4 +300,6 @@ FF41 FF5A 1D78A 1D7A8 1D7AA 1D7C2 1D7C4 1D7C9 +20000 2A6D6 +2F800 2FA1D END diff --git a/lib/unicore/Is/L_.pl b/lib/unicore/Is/L_.pl new file mode 100644 index 0000000000..b2e89eb889 --- /dev/null +++ b/lib/unicore/Is/L_.pl @@ -0,0 +1,111 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by ./mktables from e.g. Unicode.txt. +# Any changes made here will be lost! + +# +# This file supports: +# \p{L&} +# +# Meaning: [\p{Ll}\p{Lu}\p{Lt}] +# +return <<'END'; +0041 005A +0061 007A +00AA +00B5 +00BA +00C0 00D6 +00D8 00F6 +00F8 01BA +01BC 01BF +01C4 021F +0222 0233 +0250 02AD +0386 +0388 038A +038C +038E 03A1 +03A3 03CE +03D0 03D7 +03DA 03F5 +0400 0481 +048C 04C4 +04C7 04C8 +04CB 04CC +04D0 04F5 +04F8 04F9 +0531 0556 +0561 0587 +10A0 10C5 +1E00 1E9B +1EA0 1EF9 +1F00 1F15 +1F18 1F1D +1F20 1F45 +1F48 1F4D +1F50 1F57 +1F59 +1F5B +1F5D +1F5F 1F7D +1F80 1FB4 +1FB6 1FBC +1FBE +1FC2 1FC4 +1FC6 1FCC +1FD0 1FD3 +1FD6 1FDB +1FE0 1FEC +1FF2 1FF4 +1FF6 1FFC +207F +2102 +2107 +210A 2113 +2115 +2119 211D +2124 +2126 +2128 +212A 212D +212F 2131 +2133 2134 +2139 +FB00 FB06 +FB13 FB17 +FF21 FF3A +FF41 FF5A +10400 10425 +10428 1044D +1D400 1D454 +1D456 1D49C +1D49E 1D49F +1D4A2 +1D4A5 1D4A6 +1D4A9 1D4AC +1D4AE 1D4B9 +1D4BB +1D4BD 1D4C0 +1D4C2 1D4C3 +1D4C5 1D505 +1D507 1D50A +1D50D 1D514 +1D516 1D51C +1D51E 1D539 +1D53B 1D53E +1D540 1D544 +1D546 +1D54A 1D550 +1D552 1D6A3 +1D6A8 1D6C0 +1D6C2 1D6DA +1D6DC 1D6FA +1D6FC 1D714 +1D716 1D734 +1D736 1D74E +1D750 1D76E +1D770 1D788 +1D78A 1D7A8 +1D7AA 1D7C2 +1D7C4 1D7C9 +END diff --git a/lib/unicore/Is/Lao.pl b/lib/unicore/Is/Lao.pl index 33ae4ff8ed..e4822238f1 100644 --- a/lib/unicore/Is/Lao.pl +++ b/lib/unicore/Is/Lao.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Lao} (and fuzzy permutations) +# +# Meaning: Script 'LAO' +# return <<'END'; 0E81 0E82 LAO 0E84 LAO diff --git a/lib/unicore/Is/Latin.pl b/lib/unicore/Is/Latin.pl index 246e827a40..4c26446105 100644 --- a/lib/unicore/Is/Latin.pl +++ b/lib/unicore/Is/Latin.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Latin} (and fuzzy permutations) +# +# Meaning: Script 'LATIN' +# return <<'END'; 0041 005A LATIN 0061 007A LATIN diff --git a/lib/unicore/Is/LbrkAI.pl b/lib/unicore/Is/LbrkAI.pl index 662aabbbe4..36e3e17951 100644 --- a/lib/unicore/Is/LbrkAI.pl +++ b/lib/unicore/Is/LbrkAI.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkAI} +# +# Meaning: Linebreak category 'AI' +# return <<'END'; 00A1 00A7 00A8 diff --git a/lib/unicore/Is/LbrkAL.pl b/lib/unicore/Is/LbrkAL.pl index 1f57d148f1..59b3c4ae4b 100644 --- a/lib/unicore/Is/LbrkAL.pl +++ b/lib/unicore/Is/LbrkAL.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkAL} +# +# Meaning: Linebreak category 'AL' +# return <<'END'; 0023 0026 diff --git a/lib/unicore/Is/LbrkB2.pl b/lib/unicore/Is/LbrkB2.pl index 5e3e1becef..dee0b69208 100644 --- a/lib/unicore/Is/LbrkB2.pl +++ b/lib/unicore/Is/LbrkB2.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkB2} +# +# Meaning: Linebreak category 'B2' +# return <<'END'; 2014 END diff --git a/lib/unicore/Is/LbrkBA.pl b/lib/unicore/Is/LbrkBA.pl index 7ea38cdca3..fcc8c6186b 100644 --- a/lib/unicore/Is/LbrkBA.pl +++ b/lib/unicore/Is/LbrkBA.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkBA} +# +# Meaning: Linebreak category 'BA' +# return <<'END'; 0009 007C diff --git a/lib/unicore/Is/LbrkBB.pl b/lib/unicore/Is/LbrkBB.pl index e684b8493c..5f71338f50 100644 --- a/lib/unicore/Is/LbrkBB.pl +++ b/lib/unicore/Is/LbrkBB.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkBB} +# +# Meaning: Linebreak category 'BB' +# return <<'END'; 00B4 02C8 diff --git a/lib/unicore/Is/LbrkBK.pl b/lib/unicore/Is/LbrkBK.pl index 9558ecfaf9..27b43897ca 100644 --- a/lib/unicore/Is/LbrkBK.pl +++ b/lib/unicore/Is/LbrkBK.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkBK} +# +# Meaning: Linebreak category 'BK' +# return <<'END'; 000C 2028 2029 diff --git a/lib/unicore/Is/LbrkCB.pl b/lib/unicore/Is/LbrkCB.pl index e482d7d05d..2a71fd52aa 100644 --- a/lib/unicore/Is/LbrkCB.pl +++ b/lib/unicore/Is/LbrkCB.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkCB} +# +# Meaning: Linebreak category 'CB' +# return <<'END'; FFFC END diff --git a/lib/unicore/Is/LbrkCL.pl b/lib/unicore/Is/LbrkCL.pl index 259024a697..beeeadc061 100644 --- a/lib/unicore/Is/LbrkCL.pl +++ b/lib/unicore/Is/LbrkCL.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkCL} +# +# Meaning: Linebreak category 'CL' +# return <<'END'; 0029 005D diff --git a/lib/unicore/Is/LbrkCM.pl b/lib/unicore/Is/LbrkCM.pl index f26ca32ccf..1db78a9901 100644 --- a/lib/unicore/Is/LbrkCM.pl +++ b/lib/unicore/Is/LbrkCM.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkCM} +# +# Meaning: Linebreak category 'CM' +# return <<'END'; 0000 0008 000B diff --git a/lib/unicore/Is/LbrkCR.pl b/lib/unicore/Is/LbrkCR.pl index 02b9af153a..22a470259c 100644 --- a/lib/unicore/Is/LbrkCR.pl +++ b/lib/unicore/Is/LbrkCR.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkCR} +# +# Meaning: Linebreak category 'CR' +# return <<'END'; 000D END diff --git a/lib/unicore/Is/LbrkEX.pl b/lib/unicore/Is/LbrkEX.pl index 7d5949ebe9..48626e02a6 100644 --- a/lib/unicore/Is/LbrkEX.pl +++ b/lib/unicore/Is/LbrkEX.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkEX} +# +# Meaning: Linebreak category 'EX' +# return <<'END'; 0021 003F diff --git a/lib/unicore/Is/LbrkGL.pl b/lib/unicore/Is/LbrkGL.pl index 3a88b9da11..ae2f909cb5 100644 --- a/lib/unicore/Is/LbrkGL.pl +++ b/lib/unicore/Is/LbrkGL.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkGL} +# +# Meaning: Linebreak category 'GL' +# return <<'END'; 00A0 0F0C diff --git a/lib/unicore/Is/LbrkHY.pl b/lib/unicore/Is/LbrkHY.pl index 6c9f8c7e54..09cfd3651a 100644 --- a/lib/unicore/Is/LbrkHY.pl +++ b/lib/unicore/Is/LbrkHY.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkHY} +# +# Meaning: Linebreak category 'HY' +# return <<'END'; 002D END diff --git a/lib/unicore/Is/LbrkID.pl b/lib/unicore/Is/LbrkID.pl index 1ef996b11d..f91dd0ec2d 100644 --- a/lib/unicore/Is/LbrkID.pl +++ b/lib/unicore/Is/LbrkID.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkID} +# +# Meaning: Linebreak category 'ID' +# return <<'END'; 1100 1159 115F diff --git a/lib/unicore/Is/LbrkIN.pl b/lib/unicore/Is/LbrkIN.pl index 97f962dc19..e2920eb0e6 100644 --- a/lib/unicore/Is/LbrkIN.pl +++ b/lib/unicore/Is/LbrkIN.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkIN} +# +# Meaning: Linebreak category 'IN' +# return <<'END'; 2024 2026 END diff --git a/lib/unicore/Is/LbrkIS.pl b/lib/unicore/Is/LbrkIS.pl index 231623e170..32159badd4 100644 --- a/lib/unicore/Is/LbrkIS.pl +++ b/lib/unicore/Is/LbrkIS.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkIS} +# +# Meaning: Linebreak category 'IS' +# return <<'END'; 002C 002E diff --git a/lib/unicore/Is/LbrkLF.pl b/lib/unicore/Is/LbrkLF.pl index b0f7d53826..84d9ef433d 100644 --- a/lib/unicore/Is/LbrkLF.pl +++ b/lib/unicore/Is/LbrkLF.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkLF} +# +# Meaning: Linebreak category 'LF' +# return <<'END'; 000A END diff --git a/lib/unicore/Is/LbrkNS.pl b/lib/unicore/Is/LbrkNS.pl index 9704402243..829d01b48e 100644 --- a/lib/unicore/Is/LbrkNS.pl +++ b/lib/unicore/Is/LbrkNS.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkNS} +# +# Meaning: Linebreak category 'NS' +# return <<'END'; 0E5A 0E5B 17D4 diff --git a/lib/unicore/Is/LbrkNU.pl b/lib/unicore/Is/LbrkNU.pl index f5b8d9c793..bfecec3da3 100644 --- a/lib/unicore/Is/LbrkNU.pl +++ b/lib/unicore/Is/LbrkNU.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkNU} +# +# Meaning: Linebreak category 'NU' +# return <<'END'; 0030 0039 0660 0669 diff --git a/lib/unicore/Is/LbrkOP.pl b/lib/unicore/Is/LbrkOP.pl index c9148cd862..6560490a59 100644 --- a/lib/unicore/Is/LbrkOP.pl +++ b/lib/unicore/Is/LbrkOP.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkOP} +# +# Meaning: Linebreak category 'OP' +# return <<'END'; 0028 005B diff --git a/lib/unicore/Is/LbrkPO.pl b/lib/unicore/Is/LbrkPO.pl index ee94f310f5..0ea55480cc 100644 --- a/lib/unicore/Is/LbrkPO.pl +++ b/lib/unicore/Is/LbrkPO.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkPO} +# +# Meaning: Linebreak category 'PO' +# return <<'END'; 0025 00A2 diff --git a/lib/unicore/Is/LbrkPR.pl b/lib/unicore/Is/LbrkPR.pl index 8343f7c496..be6c3885bf 100644 --- a/lib/unicore/Is/LbrkPR.pl +++ b/lib/unicore/Is/LbrkPR.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkPR} +# +# Meaning: Linebreak category 'PR' +# return <<'END'; 0024 002B diff --git a/lib/unicore/Is/LbrkQU.pl b/lib/unicore/Is/LbrkQU.pl index 3ac9a3473a..f23ef75d9e 100644 --- a/lib/unicore/Is/LbrkQU.pl +++ b/lib/unicore/Is/LbrkQU.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkQU} +# +# Meaning: Linebreak category 'QU' +# return <<'END'; 0022 0027 diff --git a/lib/unicore/Is/LbrkSA.pl b/lib/unicore/Is/LbrkSA.pl index e513c68c6d..fc3d98c582 100644 --- a/lib/unicore/Is/LbrkSA.pl +++ b/lib/unicore/Is/LbrkSA.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkSA} +# +# Meaning: Linebreak category 'SA' +# return <<'END'; 0E01 0E30 0E32 0E33 diff --git a/lib/unicore/Is/LbrkSG.pl b/lib/unicore/Is/LbrkSG.pl index 1d0caab81f..a5acf16112 100644 --- a/lib/unicore/Is/LbrkSG.pl +++ b/lib/unicore/Is/LbrkSG.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkSG} +# +# Meaning: Linebreak category 'SG' +# return <<'END'; D800 DFFF END diff --git a/lib/unicore/Is/LbrkSP.pl b/lib/unicore/Is/LbrkSP.pl index 50aea35478..c21e46de31 100644 --- a/lib/unicore/Is/LbrkSP.pl +++ b/lib/unicore/Is/LbrkSP.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkSP} +# +# Meaning: Linebreak category 'SP' +# return <<'END'; 0020 END diff --git a/lib/unicore/Is/LbrkSY.pl b/lib/unicore/Is/LbrkSY.pl index 970e4148e0..554b30232c 100644 --- a/lib/unicore/Is/LbrkSY.pl +++ b/lib/unicore/Is/LbrkSY.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkSY} +# +# Meaning: Linebreak category 'SY' +# return <<'END'; 002F END diff --git a/lib/unicore/Is/LbrkXX.pl b/lib/unicore/Is/LbrkXX.pl index 368bc30f6b..6ab9fcfc4d 100644 --- a/lib/unicore/Is/LbrkXX.pl +++ b/lib/unicore/Is/LbrkXX.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkXX} +# +# Meaning: Linebreak category 'XX' +# return <<'END'; E000 F8FF F0000 FFFFD diff --git a/lib/unicore/Is/LbrkZW.pl b/lib/unicore/Is/LbrkZW.pl index 8566bccb12..a338cbae6d 100644 --- a/lib/unicore/Is/LbrkZW.pl +++ b/lib/unicore/Is/LbrkZW.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{LbrkZW} +# +# Meaning: Linebreak category 'ZW' +# return <<'END'; 200B END diff --git a/lib/unicore/Is/Ll.pl b/lib/unicore/Is/Ll.pl index 3a06ff4cf0..1cecfe7890 100644 --- a/lib/unicore/Is/Ll.pl +++ b/lib/unicore/Is/Ll.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Ll} +# \p{LowercaseLetter} (and fuzzy permutations) +# +# Meaning: General Category 'Ll' +# return <<'END'; 0061 007A 00AA diff --git a/lib/unicore/Is/Lm.pl b/lib/unicore/Is/Lm.pl index f0f95f9882..7cbb55e60a 100644 --- a/lib/unicore/Is/Lm.pl +++ b/lib/unicore/Is/Lm.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Lm} +# \p{ModifierLetter} (and fuzzy permutations) +# +# Meaning: General Category 'Lm' +# return <<'END'; 02B0 02B8 02BB 02C1 diff --git a/lib/unicore/Is/Lo.pl b/lib/unicore/Is/Lo.pl index fedd6fe8cd..b0b46e69f5 100644 --- a/lib/unicore/Is/Lo.pl +++ b/lib/unicore/Is/Lo.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Lo} +# \p{OtherLetter} (and fuzzy permutations) +# +# Meaning: General Category 'Lo' +# return <<'END'; 01BB 01C0 01C3 diff --git a/lib/unicore/Is/Lower.pl b/lib/unicore/Is/Lower.pl index 3a06ff4cf0..084a4b22c7 100644 --- a/lib/unicore/Is/Lower.pl +++ b/lib/unicore/Is/Lower.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Lower} +# +# Meaning: [[:Lower:]] +# return <<'END'; 0061 007A 00AA diff --git a/lib/unicore/Is/Lowercas.pl b/lib/unicore/Is/Lowercas.pl index 8df4e288b4..969d821dfa 100644 --- a/lib/unicore/Is/Lowercas.pl +++ b/lib/unicore/Is/Lowercas.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Lowercase} (and fuzzy permutations) +# +# Meaning: [\p{Ll}\p{OtherLowercase}] +# return <<'END'; 0061 007A 00AA diff --git a/lib/unicore/Is/Lt.pl b/lib/unicore/Is/Lt.pl index 31e2aa1b7f..ed8af130c0 100644 --- a/lib/unicore/Is/Lt.pl +++ b/lib/unicore/Is/Lt.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Lt} +# \p{TitlecaseLetter} (and fuzzy permutations) +# +# Meaning: General Category 'Lt' +# return <<'END'; 01C5 01C8 diff --git a/lib/unicore/Is/Lu.pl b/lib/unicore/Is/Lu.pl index be285b89ca..7ded16b508 100644 --- a/lib/unicore/Is/Lu.pl +++ b/lib/unicore/Is/Lu.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Lu} +# \p{UppercaseLetter} (and fuzzy permutations) +# +# Meaning: General Category 'Lu' +# return <<'END'; 0041 005A 00C0 00D6 diff --git a/lib/unicore/Is/M.pl b/lib/unicore/Is/M.pl index 35b2a1ea84..a4cd30ad3e 100644 --- a/lib/unicore/Is/M.pl +++ b/lib/unicore/Is/M.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{M} +# \p{Mark} (and fuzzy permutations) +# +# Meaning: Major Category 'M' +# return <<'END'; 0300 034E 0360 0362 diff --git a/lib/unicore/Is/Malayala.pl b/lib/unicore/Is/Malayala.pl index 3b9ea5b048..de2ec26851 100644 --- a/lib/unicore/Is/Malayala.pl +++ b/lib/unicore/Is/Malayala.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Malayalam} (and fuzzy permutations) +# +# Meaning: Script 'MALAYALAM' +# return <<'END'; 0D02 0D03 MALAYALAM 0D05 0D0C MALAYALAM diff --git a/lib/unicore/Is/Math.pl b/lib/unicore/Is/Math.pl index e917e0d965..389a73d37f 100644 --- a/lib/unicore/Is/Math.pl +++ b/lib/unicore/Is/Math.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Math} (and fuzzy permutations) +# +# Meaning: [\p{Sm}\p{OtherMath}] +# return <<'END'; 0028 002B 002D diff --git a/lib/unicore/Is/Mc.pl b/lib/unicore/Is/Mc.pl index 886eaa2308..5cb32b8448 100644 --- a/lib/unicore/Is/Mc.pl +++ b/lib/unicore/Is/Mc.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Mc} +# \p{SpacingMark} (and fuzzy permutations) +# +# Meaning: General Category 'Mc' +# return <<'END'; 0903 093E 0940 diff --git a/lib/unicore/Is/Me.pl b/lib/unicore/Is/Me.pl index 574648fc01..3afdefde2b 100644 --- a/lib/unicore/Is/Me.pl +++ b/lib/unicore/Is/Me.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Me} +# \p{EnclosingMark} (and fuzzy permutations) +# +# Meaning: General Category 'Me' +# return <<'END'; 0488 0489 06DD 06DE diff --git a/lib/unicore/Is/Mirrored.pl b/lib/unicore/Is/Mirrored.pl index 236ede1c33..2c25ac3dd0 100644 --- a/lib/unicore/Is/Mirrored.pl +++ b/lib/unicore/Is/Mirrored.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Mirrored} +# +# Meaning: Mirrored in bidirectional text +# return <<'END'; 0028 0029 003C diff --git a/lib/unicore/Is/Mn.pl b/lib/unicore/Is/Mn.pl index 150f38f972..c86c640d82 100644 --- a/lib/unicore/Is/Mn.pl +++ b/lib/unicore/Is/Mn.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Mn} +# \p{NonSpacingMark} (and fuzzy permutations) +# +# Meaning: General Category 'Mn' +# return <<'END'; 0300 034E 0360 0362 diff --git a/lib/unicore/Is/Mongolia.pl b/lib/unicore/Is/Mongolia.pl index bb9ce7bc39..b440c67282 100644 --- a/lib/unicore/Is/Mongolia.pl +++ b/lib/unicore/Is/Mongolia.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Mongolian} (and fuzzy permutations) +# +# Meaning: Script 'MONGOLIAN' +# return <<'END'; 1810 1819 MONGOLIAN 1820 1877 MONGOLIAN diff --git a/lib/unicore/Is/Myanmar.pl b/lib/unicore/Is/Myanmar.pl index d2a95e88cd..7428b5123d 100644 --- a/lib/unicore/Is/Myanmar.pl +++ b/lib/unicore/Is/Myanmar.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Myanmar} (and fuzzy permutations) +# +# Meaning: Script 'MYANMAR' +# return <<'END'; 1000 1021 MYANMAR 1023 1027 MYANMAR diff --git a/lib/unicore/Is/N.pl b/lib/unicore/Is/N.pl index 108ac9d86b..57b4170541 100644 --- a/lib/unicore/Is/N.pl +++ b/lib/unicore/Is/N.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{N} +# \p{Number} (and fuzzy permutations) +# +# Meaning: Major Category 'N' +# return <<'END'; 0030 0039 00B2 00B3 diff --git a/lib/unicore/Is/Nd.pl b/lib/unicore/Is/Nd.pl index b0bd5c44cc..c6fd1334a7 100644 --- a/lib/unicore/Is/Nd.pl +++ b/lib/unicore/Is/Nd.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Nd} +# \p{DecimalNumber} (and fuzzy permutations) +# +# Meaning: General Category 'Nd' +# return <<'END'; 0030 0039 0660 0669 diff --git a/lib/unicore/Is/Nl.pl b/lib/unicore/Is/Nl.pl index 899422037a..690bed4a44 100644 --- a/lib/unicore/Is/Nl.pl +++ b/lib/unicore/Is/Nl.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Nl} +# \p{LetterNumber} (and fuzzy permutations) +# +# Meaning: General Category 'Nl' +# return <<'END'; 16EE 16F0 2160 2183 diff --git a/lib/unicore/Is/No.pl b/lib/unicore/Is/No.pl index 3b514fc9ba..f67310ab15 100644 --- a/lib/unicore/Is/No.pl +++ b/lib/unicore/Is/No.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{No} +# \p{OtherNumber} (and fuzzy permutations) +# +# Meaning: General Category 'No' +# return <<'END'; 00B2 00B3 00B9 diff --git a/lib/unicore/Is/Nonchara.pl b/lib/unicore/Is/Nonchara.pl index 31467b338f..a46dc4ee1d 100644 --- a/lib/unicore/Is/Nonchara.pl +++ b/lib/unicore/Is/Nonchara.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{NoncharacterCodePoint} (and fuzzy permutations) +# +# Meaning: Extended property 'Noncharacter_Code_Point' +# return <<'END'; FDD0 FDEF Noncharacter_Code_Point FFFE FFFF Noncharacter_Code_Point diff --git a/lib/unicore/Is/Ogham.pl b/lib/unicore/Is/Ogham.pl index e812dfb0de..c44acffbda 100644 --- a/lib/unicore/Is/Ogham.pl +++ b/lib/unicore/Is/Ogham.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Ogham} (and fuzzy permutations) +# +# Meaning: Script 'OGHAM' +# return <<'END'; 1681 169A OGHAM END diff --git a/lib/unicore/Is/OldItali.pl b/lib/unicore/Is/OldItali.pl index d53ef71bb2..8dc65e7f56 100644 --- a/lib/unicore/Is/OldItali.pl +++ b/lib/unicore/Is/OldItali.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{OldItalic} (and fuzzy permutations) +# +# Meaning: Script 'OLD-ITALIC' +# return <<'END'; 10300 1031E OLD-ITALIC END diff --git a/lib/unicore/Is/Oriya.pl b/lib/unicore/Is/Oriya.pl index 3846d82129..3a4959e04e 100644 --- a/lib/unicore/Is/Oriya.pl +++ b/lib/unicore/Is/Oriya.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Oriya} (and fuzzy permutations) +# +# Meaning: Script 'ORIYA' +# return <<'END'; 0B01 0B03 ORIYA 0B05 0B0C ORIYA diff --git a/lib/unicore/Is/OtherAlp.pl b/lib/unicore/Is/OtherAlp.pl index 8b1b780475..82ef36b600 100644 --- a/lib/unicore/Is/OtherAlp.pl +++ b/lib/unicore/Is/OtherAlp.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{OtherAlphabetic} (and fuzzy permutations) +# +# Meaning: Extended property 'Other_Alphabetic' +# return <<'END'; 0345 Other_Alphabetic 05B0 05B9 Other_Alphabetic diff --git a/lib/unicore/Is/OtherLow.pl b/lib/unicore/Is/OtherLow.pl index 960aac710a..67d48ce4bd 100644 --- a/lib/unicore/Is/OtherLow.pl +++ b/lib/unicore/Is/OtherLow.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{OtherLowercase} (and fuzzy permutations) +# +# Meaning: Extended property 'Other_Lowercase' +# return <<'END'; 02B0 02B8 Other_Lowercase 02C0 02C1 Other_Lowercase diff --git a/lib/unicore/Is/OtherMat.pl b/lib/unicore/Is/OtherMat.pl index 008ff6f57a..199bbf05d4 100644 --- a/lib/unicore/Is/OtherMat.pl +++ b/lib/unicore/Is/OtherMat.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{OtherMath} (and fuzzy permutations) +# +# Meaning: Extended property 'Other_Math' +# return <<'END'; 0028 002A Other_Math 002D Other_Math diff --git a/lib/unicore/Is/OtherUpp.pl b/lib/unicore/Is/OtherUpp.pl index 777a0febec..3e66567dee 100644 --- a/lib/unicore/Is/OtherUpp.pl +++ b/lib/unicore/Is/OtherUpp.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{OtherUppercase} (and fuzzy permutations) +# +# Meaning: Extended property 'Other_Uppercase' +# return <<'END'; 2160 216F Other_Uppercase 24B6 24CF Other_Uppercase diff --git a/lib/unicore/Is/P.pl b/lib/unicore/Is/P.pl index 4f3f989bab..599bc300db 100644 --- a/lib/unicore/Is/P.pl +++ b/lib/unicore/Is/P.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{P} +# \p{Punctuation} (and fuzzy permutations) +# +# Meaning: Major Category 'P' +# return <<'END'; 0021 0023 0025 002A diff --git a/lib/unicore/Is/Pc.pl b/lib/unicore/Is/Pc.pl index f8e38015e4..04a8c1f471 100644 --- a/lib/unicore/Is/Pc.pl +++ b/lib/unicore/Is/Pc.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Pc} +# \p{ConnectorPunctuation} (and fuzzy permutations) +# +# Meaning: General Category 'Pc' +# return <<'END'; 005F 203F 2040 diff --git a/lib/unicore/Is/Pd.pl b/lib/unicore/Is/Pd.pl index ad116ba49d..453ec5a671 100644 --- a/lib/unicore/Is/Pd.pl +++ b/lib/unicore/Is/Pd.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Pd} +# \p{DashPunctuation} (and fuzzy permutations) +# +# Meaning: General Category 'Pd' +# return <<'END'; 002D 00AD diff --git a/lib/unicore/Is/Pe.pl b/lib/unicore/Is/Pe.pl index 8a9abe9ac4..2be04aec70 100644 --- a/lib/unicore/Is/Pe.pl +++ b/lib/unicore/Is/Pe.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Pe} +# \p{ClosePunctuation} (and fuzzy permutations) +# +# Meaning: General Category 'Pe' +# return <<'END'; 0029 005D diff --git a/lib/unicore/Is/Pf.pl b/lib/unicore/Is/Pf.pl index 942d56ba02..b8c60da170 100644 --- a/lib/unicore/Is/Pf.pl +++ b/lib/unicore/Is/Pf.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Pf} +# \p{FinalPunctuation} (and fuzzy permutations) +# +# Meaning: General Category 'Pf' +# return <<'END'; 00BB 2019 diff --git a/lib/unicore/Is/Pi.pl b/lib/unicore/Is/Pi.pl index 844566a728..868d4fb1ee 100644 --- a/lib/unicore/Is/Pi.pl +++ b/lib/unicore/Is/Pi.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Pi} +# \p{InitialPunctuation} (and fuzzy permutations) +# +# Meaning: General Category 'Pi' +# return <<'END'; 00AB 2018 diff --git a/lib/unicore/Is/Po.pl b/lib/unicore/Is/Po.pl index c13f5e132f..c24a8f423f 100644 --- a/lib/unicore/Is/Po.pl +++ b/lib/unicore/Is/Po.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Po} +# \p{OtherPunctuation} (and fuzzy permutations) +# +# Meaning: General Category 'Po' +# return <<'END'; 0021 0023 0025 0027 diff --git a/lib/unicore/Is/Print.pl b/lib/unicore/Is/Print.pl index 8d832d9c5f..5c0a3c7848 100644 --- a/lib/unicore/Is/Print.pl +++ b/lib/unicore/Is/Print.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Print} +# +# Meaning: [[:Print:]] +# return <<'END'; 0020 007E 00A0 021F diff --git a/lib/unicore/Is/Ps.pl b/lib/unicore/Is/Ps.pl index c9148cd862..8c29336da3 100644 --- a/lib/unicore/Is/Ps.pl +++ b/lib/unicore/Is/Ps.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Ps} +# \p{OpenPunctuation} (and fuzzy permutations) +# +# Meaning: General Category 'Ps' +# return <<'END'; 0028 005B diff --git a/lib/unicore/Is/Punct.pl b/lib/unicore/Is/Punct.pl index 4f3f989bab..ca2cdbbf0c 100644 --- a/lib/unicore/Is/Punct.pl +++ b/lib/unicore/Is/Punct.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Punct} +# +# Meaning: [[:Punct:]] +# return <<'END'; 0021 0023 0025 002A diff --git a/lib/unicore/Is/Quotatio.pl b/lib/unicore/Is/Quotatio.pl index faf87ae1c4..70e80f8124 100644 --- a/lib/unicore/Is/Quotatio.pl +++ b/lib/unicore/Is/Quotatio.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{QuotationMark} (and fuzzy permutations) +# +# Meaning: Extended property 'Quotation_Mark' +# return <<'END'; 0022 Quotation_Mark 0027 Quotation_Mark diff --git a/lib/unicore/Is/Runic.pl b/lib/unicore/Is/Runic.pl index 24f1f2b3d5..08fdd5b073 100644 --- a/lib/unicore/Is/Runic.pl +++ b/lib/unicore/Is/Runic.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Runic} (and fuzzy permutations) +# +# Meaning: Script 'RUNIC' +# return <<'END'; 16A0 16EA RUNIC 16EE 16F0 RUNIC diff --git a/lib/unicore/Is/S.pl b/lib/unicore/Is/S.pl index dd0d3cb682..5e51785f5a 100644 --- a/lib/unicore/Is/S.pl +++ b/lib/unicore/Is/S.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{S} +# \p{Symbol} (and fuzzy permutations) +# +# Meaning: Major Category 'S' +# return <<'END'; 0024 002B diff --git a/lib/unicore/Is/Sc.pl b/lib/unicore/Is/Sc.pl index 64c5ef45c4..b9818c2435 100644 --- a/lib/unicore/Is/Sc.pl +++ b/lib/unicore/Is/Sc.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Sc} +# \p{CurrencySymbol} (and fuzzy permutations) +# +# Meaning: General Category 'Sc' +# return <<'END'; 0024 00A2 00A5 diff --git a/lib/unicore/Is/Sinhala.pl b/lib/unicore/Is/Sinhala.pl index aa6973ae21..d7de9bbc58 100644 --- a/lib/unicore/Is/Sinhala.pl +++ b/lib/unicore/Is/Sinhala.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Sinhala} (and fuzzy permutations) +# +# Meaning: Script 'SINHALA' +# return <<'END'; 0D82 0D83 SINHALA 0D85 0D96 SINHALA diff --git a/lib/unicore/Is/Sk.pl b/lib/unicore/Is/Sk.pl index 89e0afae83..47febf5d7e 100644 --- a/lib/unicore/Is/Sk.pl +++ b/lib/unicore/Is/Sk.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Sk} +# \p{ModifierSymbol} (and fuzzy permutations) +# +# Meaning: General Category 'Sk' +# return <<'END'; 005E 0060 diff --git a/lib/unicore/Is/Sm.pl b/lib/unicore/Is/Sm.pl index 1e4798ebaa..5b423bfe1e 100644 --- a/lib/unicore/Is/Sm.pl +++ b/lib/unicore/Is/Sm.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Sm} +# \p{MathSymbol} (and fuzzy permutations) +# +# Meaning: General Category 'Sm' +# return <<'END'; 002B 003C 003E diff --git a/lib/unicore/Is/So.pl b/lib/unicore/Is/So.pl index 9dd20c31b2..7cb9987598 100644 --- a/lib/unicore/Is/So.pl +++ b/lib/unicore/Is/So.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{So} +# \p{OtherSymbol} (and fuzzy permutations) +# +# Meaning: General Category 'So' +# return <<'END'; 00A6 00A7 00A9 diff --git a/lib/unicore/Is/Space.pl b/lib/unicore/Is/Space.pl index 329b169078..ed97335526 100644 --- a/lib/unicore/Is/Space.pl +++ b/lib/unicore/Is/Space.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Space} +# +# Meaning: [[:Space:]] +# return <<'END'; 0009 000D 0020 diff --git a/lib/unicore/Is/SpacePer.pl b/lib/unicore/Is/SpacePer.pl index 8aad047666..b84be26cd2 100644 --- a/lib/unicore/Is/SpacePer.pl +++ b/lib/unicore/Is/SpacePer.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{SpacePerl} +# +# Meaning: \s +# return <<'END'; 0009 000A 000C 000D diff --git a/lib/unicore/Is/Syriac.pl b/lib/unicore/Is/Syriac.pl index 5812afcd02..355493a957 100644 --- a/lib/unicore/Is/Syriac.pl +++ b/lib/unicore/Is/Syriac.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Syriac} (and fuzzy permutations) +# +# Meaning: Script 'SYRIAC' +# return <<'END'; 0710 072C SYRIAC 0730 074A SYRIAC diff --git a/lib/unicore/Is/Tamil.pl b/lib/unicore/Is/Tamil.pl index 5cc188bf26..aaad4ac10b 100644 --- a/lib/unicore/Is/Tamil.pl +++ b/lib/unicore/Is/Tamil.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Tamil} (and fuzzy permutations) +# +# Meaning: Script 'TAMIL' +# return <<'END'; 0B82 0B83 TAMIL 0B85 0B8A TAMIL diff --git a/lib/unicore/Is/Telugu.pl b/lib/unicore/Is/Telugu.pl index 23ca7afa7e..97d051d22a 100644 --- a/lib/unicore/Is/Telugu.pl +++ b/lib/unicore/Is/Telugu.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Telugu} (and fuzzy permutations) +# +# Meaning: Script 'TELUGU' +# return <<'END'; 0C01 0C03 TELUGU 0C05 0C0C TELUGU diff --git a/lib/unicore/Is/Terminal.pl b/lib/unicore/Is/Terminal.pl index 3eebfe1ca8..4bfbd11025 100644 --- a/lib/unicore/Is/Terminal.pl +++ b/lib/unicore/Is/Terminal.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{TerminalPunctuation} (and fuzzy permutations) +# +# Meaning: Extended property 'Terminal_Punctuation' +# return <<'END'; 0021 Terminal_Punctuation 002C Terminal_Punctuation diff --git a/lib/unicore/Is/Thaana.pl b/lib/unicore/Is/Thaana.pl index 40037e4ac8..5007ea77a3 100644 --- a/lib/unicore/Is/Thaana.pl +++ b/lib/unicore/Is/Thaana.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Thaana} (and fuzzy permutations) +# +# Meaning: Script 'THAANA' +# return <<'END'; 0780 07B0 THAANA END diff --git a/lib/unicore/Is/Thai.pl b/lib/unicore/Is/Thai.pl index 2fe22fe7bd..1b72367d0e 100644 --- a/lib/unicore/Is/Thai.pl +++ b/lib/unicore/Is/Thai.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Thai} (and fuzzy permutations) +# +# Meaning: Script 'THAI' +# return <<'END'; 0E01 0E3A THAI 0E40 0E4E THAI diff --git a/lib/unicore/Is/Tibetan.pl b/lib/unicore/Is/Tibetan.pl index 55c1f67b6c..89c6c7cb6d 100644 --- a/lib/unicore/Is/Tibetan.pl +++ b/lib/unicore/Is/Tibetan.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Tibetan} (and fuzzy permutations) +# +# Meaning: Script 'TIBETAN' +# return <<'END'; 0F00 TIBETAN 0F18 0F19 TIBETAN diff --git a/lib/unicore/Is/Title.pl b/lib/unicore/Is/Title.pl index 31e2aa1b7f..b13f42b354 100644 --- a/lib/unicore/Is/Title.pl +++ b/lib/unicore/Is/Title.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Title} +# +# Meaning: [[:Title:]] +# return <<'END'; 01C5 01C8 diff --git a/lib/unicore/Is/Upper.pl b/lib/unicore/Is/Upper.pl index be285b89ca..6b3d280df5 100644 --- a/lib/unicore/Is/Upper.pl +++ b/lib/unicore/Is/Upper.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Upper} +# +# Meaning: [[:Upper:]] +# return <<'END'; 0041 005A 00C0 00D6 diff --git a/lib/unicore/Is/Uppercas.pl b/lib/unicore/Is/Uppercas.pl index 4a1db5d2aa..c216b45ea7 100644 --- a/lib/unicore/Is/Uppercas.pl +++ b/lib/unicore/Is/Uppercas.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Uppercase} (and fuzzy permutations) +# +# Meaning: [\p{Lu}\p{Other_Uppercase}] +# return <<'END'; 0041 005A 00C0 00D6 diff --git a/lib/unicore/Is/WhiteSpa.pl b/lib/unicore/Is/WhiteSpa.pl index 0966372cc0..18c90dea1f 100644 --- a/lib/unicore/Is/WhiteSpa.pl +++ b/lib/unicore/Is/WhiteSpa.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{WhiteSpace} (and fuzzy permutations) +# +# Meaning: Extended property 'White_space' +# return <<'END'; 0009 000D White_space 0020 White_space diff --git a/lib/unicore/Is/Word.pl b/lib/unicore/Is/Word.pl index aeb0b41003..c65866bd3e 100644 --- a/lib/unicore/Is/Word.pl +++ b/lib/unicore/Is/Word.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Word} +# +# Meaning: [[:Word:]] +# return <<'END'; 0030 0039 0041 005A diff --git a/lib/unicore/Is/XDigit.pl b/lib/unicore/Is/XDigit.pl index d899f402a8..3d3594e089 100644 --- a/lib/unicore/Is/XDigit.pl +++ b/lib/unicore/Is/XDigit.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{XDigit} +# +# Meaning: [[:XDigit:]] +# return <<'END'; 0030 0039 0041 0046 diff --git a/lib/unicore/Is/Yi.pl b/lib/unicore/Is/Yi.pl index bef6a330a8..169d739e95 100644 --- a/lib/unicore/Is/Yi.pl +++ b/lib/unicore/Is/Yi.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Yi} (and fuzzy permutations) +# +# Meaning: Script 'YI' +# return <<'END'; A000 A48C YI A490 A4A1 YI diff --git a/lib/unicore/Is/Z.pl b/lib/unicore/Is/Z.pl index 7cb38c03c7..4c13a1e269 100644 --- a/lib/unicore/Is/Z.pl +++ b/lib/unicore/Is/Z.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Z} +# \p{Separator} (and fuzzy permutations) +# +# Meaning: Major Category 'Z' +# return <<'END'; 0020 00A0 diff --git a/lib/unicore/Is/Zl.pl b/lib/unicore/Is/Zl.pl index b8d1d3a82a..80f59025b5 100644 --- a/lib/unicore/Is/Zl.pl +++ b/lib/unicore/Is/Zl.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Zl} +# \p{LineSeparator} (and fuzzy permutations) +# +# Meaning: General Category 'Zl' +# return <<'END'; 2028 END diff --git a/lib/unicore/Is/Zp.pl b/lib/unicore/Is/Zp.pl index cd5b7c3305..908dbb9326 100644 --- a/lib/unicore/Is/Zp.pl +++ b/lib/unicore/Is/Zp.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Zp} +# \p{ParagraphSeparator} (and fuzzy permutations) +# +# Meaning: General Category 'Zp' +# return <<'END'; 2029 END diff --git a/lib/unicore/Is/Zs.pl b/lib/unicore/Is/Zs.pl index 55eb499229..593fa23fd8 100644 --- a/lib/unicore/Is/Zs.pl +++ b/lib/unicore/Is/Zs.pl @@ -1,7 +1,14 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{Zs} +# \p{SpaceSeparator} (and fuzzy permutations) +# +# Meaning: General Category 'Zs' +# return <<'END'; 0020 00A0 diff --git a/lib/unicore/Is/_CanonDC.pl b/lib/unicore/Is/_CanonDC.pl new file mode 100644 index 0000000000..849aef2208 --- /dev/null +++ b/lib/unicore/Is/_CanonDC.pl @@ -0,0 +1,16 @@ +# !!!!!!! DO NOT EDIT THIS FILE !!!!!!! +# This file is built by ./mktables from e.g. Unicode.txt. +# Any changes made here will be lost! + +# +# This file supports: +# \p{_CanonDCIJ} +# +# Meaning: (for internal casefolding use) +# +return <<'END'; +0069 006A +012F +1E2D +1ECB +END diff --git a/lib/unicore/Is/CaseIgno.pl b/lib/unicore/Is/_CaseIgn.pl index 3fc58ec6f2..db919574ea 100644 --- a/lib/unicore/Is/CaseIgno.pl +++ b/lib/unicore/Is/_CaseIgn.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{_CaseIgnorable} +# +# Meaning: (for internal casefolding use) +# return <<'END'; 00AD 0300 034E diff --git a/lib/unicore/Is/CombAbov.pl b/lib/unicore/Is/_CombAbo.pl index e8213bc9d4..678ccc74df 100644 --- a/lib/unicore/Is/CombAbov.pl +++ b/lib/unicore/Is/_CombAbo.pl @@ -1,7 +1,13 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! +# +# This file supports: +# \p{_CombAbove} +# +# Meaning: (for internal casefolding use) +# return <<'END'; 0300 0314 033D 0344 diff --git a/lib/unicore/JamoShort.pl b/lib/unicore/JamoShort.pl index a9b7cec27d..08559e37d0 100644 --- a/lib/unicore/JamoShort.pl +++ b/lib/unicore/JamoShort.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Lbrk.pl b/lib/unicore/Lbrk.pl index 9436b4431c..ba16fcc0df 100644 --- a/lib/unicore/Lbrk.pl +++ b/lib/unicore/Lbrk.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Makefile b/lib/unicore/Makefile index 2aa20a9956..948e982dbe 100644 --- a/lib/unicore/Makefile +++ b/lib/unicore/Makefile @@ -3,3 +3,4 @@ all: clean: rm -f *.pl */*.pl + rm -f Properties diff --git a/lib/unicore/Name.pl b/lib/unicore/Name.pl index 0e02d9e7e1..a5ed116426 100644 --- a/lib/unicore/Name.pl +++ b/lib/unicore/Name.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Number.pl b/lib/unicore/Number.pl index b2378a34cd..99af073110 100644 --- a/lib/unicore/Number.pl +++ b/lib/unicore/Number.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/Properties b/lib/unicore/Properties new file mode 100644 index 0000000000..377fa5a392 --- /dev/null +++ b/lib/unicore/Properties @@ -0,0 +1,331 @@ +## +## This file created by ./mktables +## List of built-in \p{...}/\P{...} properties. +## +## '*' means name may be 'fuzzy' +## + + \p{ASCII} [[:ASCII:]] +* \p{All} Alias for \p{Any} ([\x{0000}-\x{10FFFF}]) + \p{Alnum} [[:Alnum:]] +* \p{Alphabetic} [\p{L}\p{OtherAlphabetic}] + \p{Alpha} [[:Alpha:]] +* \p{Any} [\x{0000}-\x{10FFFF}] +* \p{Arabic} Script 'ARABIC' +* \p{Armenian} Script 'ARMENIAN' +* \p{AsciiHexDigit} Extended property 'ASCII_Hex_Digit' +* \p{Assigned} All assigned code points +* \p{Bengali} Script 'BENGALI' + \p{BidiAL} Bi-directional category 'AL' + \p{BidiAN} Bi-directional category 'AN' + \p{BidiBN} Bi-directional category 'BN' + \p{BidiB} Bi-directional category 'B' + \p{BidiCS} Bi-directional category 'CS' +* \p{BidiControl} Extended property 'Bidi_Control' + \p{BidiEN} Bi-directional category 'EN' + \p{BidiES} Bi-directional category 'ES' + \p{BidiET} Bi-directional category 'ET' + \p{BidiLRE} Bi-directional category 'LRE' + \p{BidiLRO} Bi-directional category 'LRO' + \p{BidiL} Bi-directional category 'L' + \p{BidiNSM} Bi-directional category 'NSM' + \p{BidiON} Bi-directional category 'ON' + \p{BidiPDF} Bi-directional category 'PDF' + \p{BidiRLE} Bi-directional category 'RLE' + \p{BidiRLO} Bi-directional category 'RLO' + \p{BidiR} Bi-directional category 'R' + \p{BidiS} Bi-directional category 'S' + \p{BidiWS} Bi-directional category 'WS' + \p{Blank} [[:Blank:]] +* \p{Bopomofo} Script 'BOPOMOFO' +* \p{CanadianAboriginal} Script 'CANADIAN-ABORIGINAL' + \p{Canon} Decomposes to multiple characters + \p{Cc} General Category 'Cc' + \p{Cf} General Category 'Cf' +* \p{Cherokee} Script 'CHEROKEE' +* \p{ClosePunctuation} Alias for \p{Pe} (General Category 'Pe') + \p{Cntrl} [[:Cntrl:]] + \p{Cn} General Category 'Cn' [not functional in Perl] +* \p{Common} Pseudo-Script of codepoints not in other Unicode scripts + \p{Compat} Compatible with a more-basic character +* \p{ConnectorPunctuation} Alias for \p{Pc} (General Category 'Pc') +* \p{Control} Alias for \p{Cc} (General Category 'Cc') + \p{Co} General Category 'Co' + \p{Cs} General Category 'Cs' +* \p{CurrencySymbol} Alias for \p{Sc} (General Category 'Sc') +* \p{Cyrillic} Script 'CYRILLIC' + \p{C} Major Category 'C' + \p{DCcircle} Compatible with 'circle' + \p{DCcompat} Compatible with 'compat' + \p{DCfinal} Compatible with 'final' + \p{DCfont} Compatible with 'font' + \p{DCfraction} Compatible with 'fraction' + \p{DCinitial} Compatible with 'initial' + \p{DCisolated} Compatible with 'isolated' + \p{DCmedial} Compatible with 'medial' + \p{DCnarrow} Compatible with 'narrow' + \p{DCnoBreak} Compatible with 'noBreak' + \p{DCsmall} Compatible with 'small' + \p{DCsquare} Compatible with 'square' + \p{DCsub} Compatible with 'sub' + \p{DCsuper} Compatible with 'super' + \p{DCvertical} Compatible with 'vertical' + \p{DCwide} Compatible with 'wide' +* \p{DashPunctuation} Alias for \p{Pd} (General Category 'Pd') +* \p{Dash} Extended property 'Dash' +* \p{DecimalNumber} Alias for \p{Nd} (General Category 'Nd') +* \p{Deseret} Script 'DESERET' +* \p{Devanagari} Script 'DEVANAGARI' +* \p{Diacritic} Extended property 'Diacritic' + \p{Digit} [[:Digit:]] +* \p{EnclosingMark} Alias for \p{Me} (General Category 'Me') +* \p{Ethiopic} Script 'ETHIOPIC' +* \p{Extender} Extended property 'Extender' +* \p{FinalPunctuation} Alias for \p{Pf} (General Category 'Pf') +* \p{Format} Alias for \p{Cf} (General Category 'Cf') +* \p{Georgian} Script 'GEORGIAN' +* \p{Gothic} Script 'GOTHIC' + \p{Graph} [[:Graph:]] +* \p{Greek} Script 'GREEK' +* \p{Gujarati} Script 'GUJARATI' +* \p{Gurmukhi} Script 'GURMUKHI' +* \p{Hangul} Script 'HANGUL' +* \p{Han} Script 'HAN' +* \p{Hebrew} Script 'HEBREW' +* \p{HexDigit} Extended property 'Hex_Digit' +* \p{Hiragana} Script 'HIRAGANA' +* \p{Hyphen} Extended property 'Hyphen' +* \p{IdContinue} [\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}] +* \p{IdStart} [\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}] +* \p{Ideographic} Extended property 'Ideographic' +* \p{InAlphabeticPresentationForms} Block 'Alphabetic Presentation Forms' +* \p{InArabicPresentationFormsA} Block 'Arabic Presentation Forms-A' +* \p{InArabicPresentationFormsB} Block 'Arabic Presentation Forms-B' +* \p{InArabic} Block 'Arabic' +* \p{InArmenian} Block 'Armenian' +* \p{InArrows} Block 'Arrows' +* \p{InBasicLatin} Block 'Basic Latin' +* \p{InBengali} Block 'Bengali' +* \p{InBlockElements} Block 'Block Elements' +* \p{InBopomofoExtended} Block 'Bopomofo Extended' +* \p{InBopomofo} Block 'Bopomofo' +* \p{InBoxDrawing} Block 'Box Drawing' +* \p{InBraillePatterns} Block 'Braille Patterns' +* \p{InByzantineMusicalSymbols} Block 'Byzantine Musical Symbols' +* \p{InCherokee} Block 'Cherokee' +* \p{InCjkCompatibilityForms} Block 'CJK Compatibility Forms' +* \p{InCjkCompatibilityIdeographsSupplement} Block 'CJK Compatibility Ideographs Supplement' +* \p{InCjkCompatibilityIdeographs} Block 'CJK Compatibility Ideographs' +* \p{InCjkCompatibility} Block 'CJK Compatibility' +* \p{InCjkRadicalsSupplement} Block 'CJK Radicals Supplement' +* \p{InCjkSymbolsAndPunctuation} Block 'CJK Symbols and Punctuation' +* \p{InCjkUnifiedIdeographsExtensionA} Block 'CJK Unified Ideographs Extension A' +* \p{InCjkUnifiedIdeographsExtensionB} Block 'CJK Unified Ideographs Extension B' +* \p{InCjkUnifiedIdeographs} Block 'CJK Unified Ideographs' +* \p{InCombiningDiacriticalMarks} Block 'Combining Diacritical Marks' +* \p{InCombiningHalfMarks} Block 'Combining Half Marks' +* \p{InCombiningMarksForSymbols} Block 'Combining Marks for Symbols' +* \p{InControlPictures} Block 'Control Pictures' +* \p{InCurrencySymbols} Block 'Currency Symbols' +* \p{InCyrillic} Block 'Cyrillic' +* \p{InDeseret} Block 'Deseret' +* \p{InDevanagari} Block 'Devanagari' +* \p{InDingbats} Block 'Dingbats' +* \p{InEnclosedAlphanumerics} Block 'Enclosed Alphanumerics' +* \p{InEnclosedCjkLettersAndMonths} Block 'Enclosed CJK Letters and Months' +* \p{InEthiopic} Block 'Ethiopic' +* \p{InGeneralPunctuation} Block 'General Punctuation' +* \p{InGeometricShapes} Block 'Geometric Shapes' +* \p{InGeorgian} Block 'Georgian' +* \p{InGothic} Block 'Gothic' +* \p{InGreekExtended} Block 'Greek Extended' +* \p{InGreek} Block 'Greek' +* \p{InGujarati} Block 'Gujarati' +* \p{InGurmukhi} Block 'Gurmukhi' +* \p{InHalfwidthAndFullwidthForms} Block 'Halfwidth and Fullwidth Forms' +* \p{InHangulCompatibilityJamo} Block 'Hangul Compatibility Jamo' +* \p{InHangulJamo} Block 'Hangul Jamo' +* \p{InHangulSyllables} Block 'Hangul Syllables' +* \p{InHebrew} Block 'Hebrew' +* \p{InHighPrivateUseSurrogates} Block 'High Private Use Surrogates' +* \p{InHighSurrogates} Block 'High Surrogates' +* \p{InHiragana} Block 'Hiragana' +* \p{InIdeographicDescriptionCharacters} Block 'Ideographic Description Characters' +* \p{InIpaExtensions} Block 'IPA Extensions' +* \p{InKanbun} Block 'Kanbun' +* \p{InKangxiRadicals} Block 'Kangxi Radicals' +* \p{InKannada} Block 'Kannada' +* \p{InKatakana} Block 'Katakana' +* \p{InKhmer} Block 'Khmer' +* \p{InLao} Block 'Lao' +* \p{InLatin1Supplement} Block 'Latin-1 Supplement' +* \p{InLatinExtendedAdditional} Block 'Latin Extended Additional' +* \p{InLatinExtendedA} Block 'Latin Extended-A' +* \p{InLatinExtendedB} Block 'Latin Extended-B' +* \p{InLetterlikeSymbols} Block 'Letterlike Symbols' +* \p{InLowSurrogates} Block 'Low Surrogates' +* \p{InMalayalam} Block 'Malayalam' +* \p{InMathematicalAlphanumericSymbols} Block 'Mathematical Alphanumeric Symbols' +* \p{InMathematicalOperators} Block 'Mathematical Operators' +* \p{InMiscellaneousSymbols} Block 'Miscellaneous Symbols' +* \p{InMiscellaneousTechnical} Block 'Miscellaneous Technical' +* \p{InMongolian} Block 'Mongolian' +* \p{InMusicalSymbols} Block 'Musical Symbols' +* \p{InMyanmar} Block 'Myanmar' +* \p{InNumberForms} Block 'Number Forms' +* \p{InOgham} Block 'Ogham' +* \p{InOldItalic} Block 'Old Italic' +* \p{InOpticalCharacterRecognition} Block 'Optical Character Recognition' +* \p{InOriya} Block 'Oriya' +* \p{InPrivateUse} Block 'Private Use' +* \p{InRunic} Block 'Runic' +* \p{InSinhala} Block 'Sinhala' +* \p{InSmallFormVariants} Block 'Small Form Variants' +* \p{InSpacingModifierLetters} Block 'Spacing Modifier Letters' +* \p{InSpecials} Block 'Specials' +* \p{InSuperscriptsAndSubscripts} Block 'Superscripts and Subscripts' +* \p{InSyriac} Block 'Syriac' +* \p{InTags} Block 'Tags' +* \p{InTamil} Block 'Tamil' +* \p{InTelugu} Block 'Telugu' +* \p{InThaana} Block 'Thaana' +* \p{InThai} Block 'Thai' +* \p{InTibetan} Block 'Tibetan' +* \p{InUnifiedCanadianAboriginalSyllabics} Block 'Unified Canadian Aboriginal Syllabics' +* \p{InYiRadicals} Block 'Yi Radicals' +* \p{InYiSyllables} Block 'Yi Syllables' +* \p{Inherited} Script 'INHERITED' +* \p{InitialPunctuation} Alias for \p{Pi} (General Category 'Pi') +* \p{JoinControl} Extended property 'Join_Control' +* \p{Kannada} Script 'KANNADA' +* \p{Katakana} Script 'KATAKANA' +* \p{Khmer} Script 'KHMER' + \p{L&} [\p{Ll}\p{Lu}\p{Lt}] +* \p{Lao} Script 'LAO' +* \p{Latin} Script 'LATIN' + \p{LbrkAI} Linebreak category 'AI' + \p{LbrkAL} Linebreak category 'AL' + \p{LbrkB2} Linebreak category 'B2' + \p{LbrkBA} Linebreak category 'BA' + \p{LbrkBB} Linebreak category 'BB' + \p{LbrkBK} Linebreak category 'BK' + \p{LbrkCB} Linebreak category 'CB' + \p{LbrkCL} Linebreak category 'CL' + \p{LbrkCM} Linebreak category 'CM' + \p{LbrkCR} Linebreak category 'CR' + \p{LbrkEX} Linebreak category 'EX' + \p{LbrkGL} Linebreak category 'GL' + \p{LbrkHY} Linebreak category 'HY' + \p{LbrkID} Linebreak category 'ID' + \p{LbrkIN} Linebreak category 'IN' + \p{LbrkIS} Linebreak category 'IS' + \p{LbrkLF} Linebreak category 'LF' + \p{LbrkNS} Linebreak category 'NS' + \p{LbrkNU} Linebreak category 'NU' + \p{LbrkOP} Linebreak category 'OP' + \p{LbrkPO} Linebreak category 'PO' + \p{LbrkPR} Linebreak category 'PR' + \p{LbrkQU} Linebreak category 'QU' + \p{LbrkSA} Linebreak category 'SA' + \p{LbrkSG} Linebreak category 'SG' + \p{LbrkSP} Linebreak category 'SP' + \p{LbrkSY} Linebreak category 'SY' + \p{LbrkXX} Linebreak category 'XX' + \p{LbrkZW} Linebreak category 'ZW' +* \p{LetterNumber} Alias for \p{Nl} (General Category 'Nl') +* \p{Letter} Alias for \p{L} (Major Category 'L') +* \p{LineSeparator} Alias for \p{Zl} (General Category 'Zl') + \p{Ll} General Category 'Ll' + \p{Lm} General Category 'Lm' +* \p{LowercaseLetter} Alias for \p{Ll} (General Category 'Ll') +* \p{Lowercase} [\p{Ll}\p{OtherLowercase}] + \p{Lower} [[:Lower:]] + \p{Lo} General Category 'Lo' + \p{Lt} General Category 'Lt' + \p{Lu} General Category 'Lu' + \p{L} Major Category 'L' +* \p{Malayalam} Script 'MALAYALAM' +* \p{Mark} Alias for \p{M} (Major Category 'M') +* \p{MathSymbol} Alias for \p{Sm} (General Category 'Sm') +* \p{Math} [\p{Sm}\p{OtherMath}] + \p{Mc} General Category 'Mc' + \p{Me} General Category 'Me' + \p{Mirrored} Mirrored in bidirectional text + \p{Mn} General Category 'Mn' +* \p{ModifierLetter} Alias for \p{Lm} (General Category 'Lm') +* \p{ModifierSymbol} Alias for \p{Sk} (General Category 'Sk') +* \p{Mongolian} Script 'MONGOLIAN' +* \p{Myanmar} Script 'MYANMAR' + \p{M} Major Category 'M' + \p{Nd} General Category 'Nd' + \p{Nl} General Category 'Nl' +* \p{NonSpacingMark} Alias for \p{Mn} (General Category 'Mn') +* \p{NoncharacterCodePoint} Extended property 'Noncharacter_Code_Point' + \p{No} General Category 'No' +* \p{Number} Alias for \p{N} (Major Category 'N') + \p{N} Major Category 'N' +* \p{Ogham} Script 'OGHAM' +* \p{OldItalic} Script 'OLD-ITALIC' +* \p{OpenPunctuation} Alias for \p{Ps} (General Category 'Ps') +* \p{Oriya} Script 'ORIYA' +* \p{OtherAlphabetic} Extended property 'Other_Alphabetic' +* \p{OtherLetter} Alias for \p{Lo} (General Category 'Lo') +* \p{OtherLowercase} Extended property 'Other_Lowercase' +* \p{OtherMath} Extended property 'Other_Math' +* \p{OtherNumber} Alias for \p{No} (General Category 'No') +* \p{OtherPunctuation} Alias for \p{Po} (General Category 'Po') +* \p{OtherSymbol} Alias for \p{So} (General Category 'So') +* \p{OtherUppercase} Extended property 'Other_Uppercase' +* \p{Other} Alias for \p{C} (Major Category 'C') +* \p{ParagraphSeparator} Alias for \p{Zp} (General Category 'Zp') + \p{Pc} General Category 'Pc' + \p{Pd} General Category 'Pd' + \p{Pe} General Category 'Pe' + \p{Pf} General Category 'Pf' + \p{Pi} General Category 'Pi' + \p{Po} General Category 'Po' + \p{Print} [[:Print:]] +* \p{PrivateUse} Alias for \p{Co} (General Category 'Co') + \p{Ps} General Category 'Ps' +* \p{Punctuation} Alias for \p{P} (Major Category 'P') + \p{Punct} [[:Punct:]] + \p{P} Major Category 'P' +* \p{QuotationMark} Extended property 'Quotation_Mark' +* \p{Runic} Script 'RUNIC' + \p{Sc} General Category 'Sc' +* \p{Separator} Alias for \p{Z} (Major Category 'Z') +* \p{Sinhala} Script 'SINHALA' + \p{Sk} General Category 'Sk' + \p{Sm} General Category 'Sm' + \p{So} General Category 'So' + \p{SpacePerl} \s +* \p{SpaceSeparator} Alias for \p{Zs} (General Category 'Zs') + \p{Space} [[:Space:]] +* \p{SpacingMark} Alias for \p{Mc} (General Category 'Mc') +* \p{Surrogate} Alias for \p{Cs} (General Category 'Cs') +* \p{Symbol} Alias for \p{S} (Major Category 'S') +* \p{Syriac} Script 'SYRIAC' + \p{S} Major Category 'S' +* \p{Tamil} Script 'TAMIL' +* \p{Telugu} Script 'TELUGU' +* \p{TerminalPunctuation} Extended property 'Terminal_Punctuation' +* \p{Thaana} Script 'THAANA' +* \p{Thai} Script 'THAI' +* \p{Tibetan} Script 'TIBETAN' +* \p{TitlecaseLetter} Alias for \p{Lt} (General Category 'Lt') + \p{Title} [[:Title:]] +* \p{Unassigned} Alias for \p{Cn} (General Category 'Cn' [not functional in Perl]) +* \p{UppercaseLetter} Alias for \p{Lu} (General Category 'Lu') +* \p{Uppercase} [\p{Lu}\p{Other_Uppercase}] + \p{Upper} [[:Upper:]] +* \p{WhiteSpace} Extended property 'White_space' + \p{Word} [[:Word:]] + \p{XDigit} [[:XDigit:]] +* \p{Yi} Script 'YI' + \p{Zl} General Category 'Zl' + \p{Zp} General Category 'Zp' + \p{Zs} General Category 'Zs' + \p{Z} Major Category 'Z' + \p{_CanonDCIJ} (for internal casefolding use) + \p{_CaseIgnorable} (for internal casefolding use) + \p{_CombAbove} (for internal casefolding use) diff --git a/lib/unicore/Scripts.pl b/lib/unicore/Scripts.pl index 33130d55c6..25164ce61d 100644 --- a/lib/unicore/Scripts.pl +++ b/lib/unicore/Scripts.pl @@ -1,5 +1,5 @@ # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! -# This file is built by mktables from e.g. Unicode.txt. +# This file is built by ./mktables from e.g. Unicode.txt. # Any changes made here will be lost! return <<'END'; diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 637a83dfb2..1e1f7ed48b 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -29,7 +29,6 @@ while (@ARGV) my $LastUnicodeCodepoint = 0x10FFFF; # As of Unicode 3.1.1. -my $now = localtime; my $HEADER=<<"EOF"; # !!!!!!! DO NOT EDIT THIS FILE !!!!!!! # This file is built by $0 from e.g. Unicode.txt. @@ -51,15 +50,46 @@ sub RANGE_START() { 0 } ## index into range element sub RANGE_END() { 1 } ## index into range element sub RANGE_NAME() { 2 } ## index into range element +## Conceptually, these should really be folded into the 'Table' objects my %TableInfo; +my %TableDesc; my %FuzzyNames; my %AliasInfo; ## +## Turn something like +## OLD-ITALIC +## to +## OldItalic +## +sub CanonicalName($) +{ + my $name = lc shift; + $name =~ s/(?<![a-z])(\w)/\u$1/g; + $name =~ s/[_\W]+//g; + return $name; +} + +## +## Turn something like +## OLD-ITALIC +## to +## Old_Italic +## +sub CanonicalNameForPattern($) +{ + my $name = lc shift; + $name =~ s/(?<![a-z])(\w)/\u$1/g; + $name =~ s/[_\W]+/_/; + return $name; +} + + +## ## Associates a property ("Greek", "Lu", "Assigned",...) with a Table. ## ## Called like: -## New_Prop(In => 'Greek', $Table, AllowFuzzy => 1); +## New_Prop(In => 'Greek', $Table, Desc => 'Greek Block', Fuzzy => 1); ## ## Normally, these parameters are set when the Table is created (when the ## Table->New constructor is called), but there are times when it needs to @@ -74,7 +104,10 @@ sub New_Prop($$$@) ## remaining args are optional key/val my %Args = @_; - my $AllowFuzzy = delete $Args{AllowFuzzy}; + my $Fuzzy = delete $Args{Fuzzy}; + my $Desc = delete $Args{Desc}; # description + + $Name = CanonicalNameForPattern($Name) if $Fuzzy; ## sanity check a few args if (%Args or ($Type ne 'Is' and $Type ne 'In') or not ref $Table) { @@ -84,7 +117,8 @@ sub New_Prop($$$@) if (not $TableInfo{$Type}->{$Name}) { $TableInfo{$Type}->{$Name} = $Table; - if ($AllowFuzzy) { + $TableDesc{$Type}->{$Name} = $Desc; + if ($Fuzzy) { $FuzzyNames{$Type}->{$Name} = $Name; } } @@ -95,9 +129,10 @@ sub New_Prop($$$@) ## Creates a new Table object. ## ## Args are key/value pairs: -## In => Name -- Name of "In" property to be associated with -## Is => Name -- Name of "Is" property to be associated with -## AllowFuzzy => Boolean -- True if name can be accessed "fuzzily" +## In => Name -- Name of "In" property to be associated with +## Is => Name -- Name of "Is" property to be associated with +## Fuzzy => Boolean -- True if name can be accessed "fuzzily" +## Desc => String -- Description of the property ## ## No args are required. ## @@ -108,12 +143,13 @@ sub Table::New my $Table = bless [], $class; - my $AllowFuzzy = delete $Args{AllowFuzzy}; + my $Fuzzy = delete $Args{Fuzzy}; + my $Desc = delete $Args{Desc}; for my $Type ('Is', 'In') { if (my $Name = delete $Args{$Type}) { - New_Prop($Type => $Name, $Table, AllowFuzzy => $AllowFuzzy); + New_Prop($Type => $Name, $Table, Desc => $Desc, Fuzzy => $Fuzzy); } } @@ -329,11 +365,13 @@ sub Table::Merge ## ## Given a filename, write a representation of the Table to a file. +## May have an optional comment as a 2nd arg. ## sub Table::Write { - my $Table = shift; #self + my $Table = shift; #self my $filename = shift; + my $comment = shift; print "$filename\n" if $Verbose; @@ -342,6 +380,11 @@ sub Table::Write } print OUT $HEADER; + if (defined $comment) { + $comment =~ s/\s+\Z//; + $comment =~ s/^/# /gm; + print OUT "#\n$comment\n#\n"; + } print OUT "return <<'END';\n"; for my $set (@$Table) @@ -368,9 +411,9 @@ sub Table::Write ## ## Called like: -## New_Alias(Is => 'All', SameAs => 'Any', AllowFuzzy => 1); +## New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1); ## -## The args must be in that order, although the AllowFuzzy pair may be omitted. +## The args must be in that order, although the Fuzzy pair may be omitted. ## ## This creates 'IsAll' as an alias for 'IsAny' ## @@ -378,13 +421,13 @@ sub New_Alias($$$@) { my $Type = shift; ## "Is" or "In" my $Alias = shift; - my $SameAs = shift; + my $SameAs = shift; # expecting "SameAs" -- just ignored my $Name = shift; ## remaining args are optional key/val my %Args = @_; - my $AllowFuzzy = delete $Args{AllowFuzzy}; + my $Fuzzy = delete $Args{Fuzzy}; ## sanity check a few args if (%Args or ($Type ne 'Is' and $Type ne 'In') or $SameAs ne 'SameAs') { @@ -398,29 +441,17 @@ sub New_Alias($$$@) confess "$0: already have original $Type => $Alias; can't make alias"; } $AliasInfo{$Type}->{$Name} = $Alias; - if ($AllowFuzzy) { + if ($Fuzzy) { $FuzzyNames{$Type}->{$Alias} = $Name; } } -## -## Turn something like -## OLD-ITALIC -## to -## Old_Italic -## -sub CanonicalName($) -{ - my $name = lc shift; - $name =~ s/\W+/_/; - $name =~ s/(?<![a-z])(\w)/\u$1/g; - return $name; -} - ## All assigned code points -my $Assigned = Table->New(Is => 'Assigned', AllowFuzzy => 1); +my $Assigned = Table->New(Is => 'Assigned', + Desc => "All assigned code points", + Fuzzy => 1); my $Name = Table->New(); ## all characters, individually by name my $General = Table->New(); ## all characters, grouped by category @@ -436,32 +467,38 @@ sub Unicode_Txt() my $Deco = Table->New(); my $Comb = Table->New(); my $Number = Table->New(); - my $Mirrored = Table->New(Is => 'Mirrored', AllowFuzzy => 0); + my $Mirrored = Table->New(Is => 'Mirrored', + Desc => "Mirrored in bidirectional text", + Fuzzy => 0); my %DC; my %Bidi; my %Deco; - $Deco{Canon} = Table->New(Is => 'Canon', AllowFuzzy => 0); - $Deco{Compat} = Table->New(Is => 'Compat', AllowFuzzy => 0); + $Deco{Canon} = Table->New(Is => 'Canon', + Desc => 'Decomposes to multiple characters', + Fuzzy => 0); + $Deco{Compat} = Table->New(Is => 'Compat', + Desc => 'Compatible with a more-basic character', + Fuzzy => 0); ## Initialize Perl-generated categories - $Cat{Alnum} = Table->New(Is => 'Alnum', AllowFuzzy => 0); - $Cat{Alpha} = Table->New(Is => 'Alpha', AllowFuzzy => 0); - $Cat{ASCII} = Table->New(Is => 'ASCII', AllowFuzzy => 0); - $Cat{Blank} = Table->New(Is => 'Blank', AllowFuzzy => 0); - $Cat{Cntrl} = Table->New(Is => 'Cntrl', AllowFuzzy => 0); - $Cat{Digit} = Table->New(Is => 'Digit', AllowFuzzy => 0); - $Cat{Graph} = Table->New(Is => 'Graph', AllowFuzzy => 0); - $Cat{Lower} = Table->New(Is => 'Lower', AllowFuzzy => 0); - $Cat{Print} = Table->New(Is => 'Print', AllowFuzzy => 0); - $Cat{Punct} = Table->New(Is => 'Punct', AllowFuzzy => 0); - $Cat{SpacePerl} = Table->New(Is => 'SpacePerl', AllowFuzzy => 0); - $Cat{Space} = Table->New(Is => 'Space', AllowFuzzy => 0); - $Cat{Title} = Table->New(Is => 'Title', AllowFuzzy => 0); - $Cat{Upper} = Table->New(Is => 'Upper', AllowFuzzy => 0); - $Cat{Word} = Table->New(Is => 'Word' , AllowFuzzy => 0); - $Cat{XDigit} = Table->New(Is => 'XDigit', AllowFuzzy => 0); - ## Categories from Unicode.txt are auto-initialized in gencat() + ## (Categories from Unicode.txt are auto-initialized in gencat) + $Cat{Alnum} = Table->New(Is => 'Alnum', Desc => "[[:Alnum:]]", Fuzzy => 0); + $Cat{Alpha} = Table->New(Is => 'Alpha', Desc => "[[:Alpha:]]", Fuzzy => 0); + $Cat{ASCII} = Table->New(Is => 'ASCII', Desc => "[[:ASCII:]]", Fuzzy => 0); + $Cat{Blank} = Table->New(Is => 'Blank', Desc => "[[:Blank:]]", Fuzzy => 0); + $Cat{Cntrl} = Table->New(Is => 'Cntrl', Desc => "[[:Cntrl:]]", Fuzzy => 0); + $Cat{Digit} = Table->New(Is => 'Digit', Desc => "[[:Digit:]]", Fuzzy => 0); + $Cat{Graph} = Table->New(Is => 'Graph', Desc => "[[:Graph:]]", Fuzzy => 0); + $Cat{Lower} = Table->New(Is => 'Lower', Desc => "[[:Lower:]]", Fuzzy => 0); + $Cat{Print} = Table->New(Is => 'Print', Desc => "[[:Print:]]", Fuzzy => 0); + $Cat{Punct} = Table->New(Is => 'Punct', Desc => "[[:Punct:]]", Fuzzy => 0); + $Cat{Space} = Table->New(Is => 'Space', Desc => "[[:Space:]]", Fuzzy => 0); + $Cat{Title} = Table->New(Is => 'Title', Desc => "[[:Title:]]", Fuzzy => 0); + $Cat{Upper} = Table->New(Is => 'Upper', Desc => "[[:Upper:]]", Fuzzy => 0); + $Cat{XDigit} = Table->New(Is => 'XDigit', Desc => "[[:XDigit:]]", Fuzzy => 0); + $Cat{Word} = Table->New(Is => 'Word', Desc => "[[:Word:]]", Fuzzy => 0); + $Cat{SpacePerl} = Table->New(Is => 'SpacePerl', Desc => '\s', Fuzzy => 0); my %To; $To{Upper} = Table->New(); @@ -483,11 +520,15 @@ sub Unicode_Txt() $General->$op($code, $cat); ## add to the sub category (e.g. "Lu", "Nd", "Cf", ..) - $Cat{$cat} ||= Table->New(Is => $cat, AllowFuzzy => 0); + $Cat{$cat} ||= Table->New(Is => $cat, + Desc => "General Category '$cat'", + Fuzzy => 0); $Cat{$cat}->$op($code); ## add to the major category (e.g. "L", "N", "C", ...) - $Cat{$MajorCat} ||= Table->New(Is => $MajorCat, AllowFuzzy => 0); + $Cat{$MajorCat} ||= Table->New(Is => $MajorCat, + Desc => "Major Category '$MajorCat'", + Fuzzy => 0); $Cat{$MajorCat}->$op($code); ($General{$name} ||= Table->New)->$op($code, $name); @@ -549,7 +590,9 @@ sub Unicode_Txt() ## decomp. char an "i" or "j" (for \p{_CanonDCIJ}) ## This is filled in as we go.... - my $CombAbove = Table->New(Is => '_CombAbove', AllowFuzzy => 0); + my $CombAbove = Table->New(Is => '_CombAbove', + Desc => '(for internal casefolding use)', + Fuzzy => 0); while (<IN>) { @@ -595,7 +638,7 @@ sub Unicode_Txt() { $name = $1; gencat($name, $cat, $code, $2 eq 'First' ? 'Append' : 'Extend'); - #New_Prop(In => $name, $General{$name}, AllowFuzzy => 1); + #New_Prop(In => $name, $General{$name}, Fuzzy => 1); } else { @@ -614,7 +657,9 @@ sub Unicode_Txt() $Mirrored->Append($code) if $mirrored eq "Y"; - $Bidi{$bidi} ||= Table->New(Is => "Bidi$bidi", AllowFuzzy => 0); + $Bidi{$bidi} ||= Table->New(Is => "Bidi$bidi", + Desc => "Bi-directional category '$bidi'", + Fuzzy => 0); $Bidi{$bidi}->Append($code); if ($deco) @@ -624,7 +669,9 @@ sub Unicode_Txt() { $Deco{Compat}->Append($code); - $DC{$1} ||= Table->New(Is => "DC$1", AllowFuzzy => 0); + $DC{$1} ||= Table->New(Is => "DC$1", + Desc => "Compatible with '$1'", + Fuzzy => 0); $DC{$1}->Append($code); } else @@ -641,10 +688,13 @@ sub Unicode_Txt() ## $Cat{Cn} = $Assigned->Invert; ## Cn is everything that doesn't exist - New_Prop(Is => 'Cn', $Cat{Cn}, AllowFuzzy => 0); + New_Prop(Is => 'Cn', + $Cat{Cn}, + Desc => "General Category 'Cn' [not functional in Perl]", + Fuzzy => 0); ## Unassigned is the same as 'Cn' - New_Alias(Is => 'Unassigned', SameAs => 'Cn', AllowFuzzy => 1); + New_Alias(Is => 'Unassigned', SameAs => 'Cn', Fuzzy => 1); $Cat{C}->Replace($Cat{C}->Merge($Cat{Cn})); ## Now merge in Cn into C @@ -652,13 +702,17 @@ sub Unicode_Txt() # L& is Ll, Lu, and Lt. New_Prop(Is => 'L&', Table->Merge(@Cat{qw[Ll Lu Lt]}), - AllowFuzzy => 0); + Desc => '[\p{Ll}\p{Lu}\p{Lt}]', + Fuzzy => 0); ## Any and All are all code points. - my $Any = Table->New(Is => 'Any', AllowFuzzy => 1); + my $Any = Table->New(Is => 'Any', + Desc => sprintf("[\\x{0000}-\\x{%X}]", + $LastUnicodeCodepoint), + Fuzzy => 1); $Any->RawAppendRange(0, $LastUnicodeCodepoint); - New_Alias(Is => 'All', SameAs => 'Any', AllowFuzzy => 1); + New_Alias(Is => 'All', SameAs => 'Any', Fuzzy => 1); ## ## Build special properties for Perl's internal case-folding needs: @@ -673,11 +727,14 @@ sub Unicode_Txt() Table->Merge($Cat{Mn}, 0x00AD, #SOFT HYPHEN 0x2010), #HYPHEN - AllowFuzzy => 0); + Desc => '(for internal casefolding use)', + Fuzzy => 0); ## \p{_CanonDCIJ} is fairly complex... - my $CanonCDIJ = Table->New(Is => '_CanonDCIJ', AllowFuzzy => 0); + my $CanonCDIJ = Table->New(Is => '_CanonDCIJ', + Desc => '(for internal casefolding use)', + Fuzzy => 0); ## It contains the ASCII 'i' and 'j'.... $CanonCDIJ->Append(0x0069); # ASCII ord("i") $CanonCDIJ->Append(0x006A); # ASCII ord("j") @@ -740,7 +797,9 @@ sub LineBrk_Txt() $Lbrk->Append($first, $lbrk); - $Lbrk{$lbrk} ||= Table->New(Is => "Lbrk$lbrk", AllowFuzzy => 0); + $Lbrk{$lbrk} ||= Table->New(Is => "Lbrk$lbrk", + Desc => "Linebreak category '$lbrk'", + Fuzzy => 0); $Lbrk{$lbrk}->Append($first); if ($last) { @@ -831,8 +890,9 @@ sub Scripts_txt() my ($first, $last, $name) = @$script; $Scripts->Append($first, $name); - $Script{$name} ||= Table->New(Is => CanonicalName($name), - AllowFuzzy => 1); + $Script{$name} ||= Table->New(Is => $name, + Desc => "Script '$name'", + Fuzzy => 1); $Script{$name}->Append($first, $name); if ($last) { @@ -847,7 +907,10 @@ sub Scripts_txt() ## ## ***shouldn't this be intersected with \p{Assigned}? ****** ## - New_Prop(Is => 'Common', $Scripts->Invert, AllowFuzzy => 1); + New_Prop(Is => 'Common', + $Scripts->Invert, + Desc => 'Pseudo-Script of codepoints not in other Unicode scripts', + Fuzzy => 1); } ## @@ -887,7 +950,9 @@ sub Blocks_txt() $Blocks->Append($first, $name); - $Blocks{$name} ||= Table->New(In=>CanonicalName($name), AllowFuzzy=>1); + $Blocks{$name} ||= Table->New(In => $name, + Desc => "Block '$name'", + Fuzzy => 1); $Blocks{$name}->Append($first, $name); if ($last and $last != $first) { @@ -934,7 +999,9 @@ sub PropList_txt() my ($first, $last, $name) = @$prop; $Props->Append($first, $name); - $Prop{$name} ||= Table->New(Is => $name, AllowFuzzy => 1); + $Prop{$name} ||= Table->New(Is => $name, + Desc => "Extended property '$name'", + Fuzzy => 1); $Prop{$name}->Append($first, $name); if ($last) { @@ -944,34 +1011,40 @@ sub PropList_txt() } # Alphabetic is L and Other_Alphabetic. - New_Prop(Is => 'Alphabetic', + New_Prop(Is => 'Alphabetic', Table->Merge($Cat{L}, $Prop{Other_Alphabetic}), - AllowFuzzy => 1); + Desc => '[\p{L}\p{OtherAlphabetic}]', # use canonical names here + Fuzzy => 1); # Lowercase is Ll and Other_Lowercase. - New_Prop(Is => 'Lowercase', + New_Prop(Is => 'Lowercase', Table->Merge($Cat{Ll}, $Prop{Other_Lowercase}), - AllowFuzzy => 1); + Desc => '[\p{Ll}\p{OtherLowercase}]', # use canonical names here + Fuzzy => 1); # Uppercase is Lu and Other_Uppercase. New_Prop(Is => 'Uppercase', Table->Merge($Cat{Lu}, $Prop{Other_Uppercase}), - AllowFuzzy => 1); + Desc => '[\p{Lu}\p{Other_Uppercase}]', # use canonical names here + Fuzzy => 1); # Math is Sm and Other_Math. New_Prop(Is => 'Math', Table->Merge($Cat{Sm}, $Prop{Other_Math}), - AllowFuzzy => 1); + Desc => '[\p{Sm}\p{OtherMath}]', # use canonical names here + Fuzzy => 1); # ID_Start is Ll, Lu, Lt, Lm, Lo, and Nl. New_Prop(Is => 'ID_Start', Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl]}), - AllowFuzzy => 1); + Desc => '[\p{Ll}\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{Nl}]', + Fuzzy => 1); # ID_Continue is ID_Start, Mn, Mc, Nd, and Pc. New_Prop(Is => 'ID_Continue', Table->Merge(@Cat{qw[Ll Lu Lt Lm Lo Nl Mn Mc Nd Pc ]}), - AllowFuzzy => 1); + Desc => '[\p{ID_Start}\p{Mn}\p{Mc}\p{Nd}\p{Pc}]', + Fuzzy => 1); } sub Make_GC_Aliases() @@ -1029,7 +1102,7 @@ sub Make_GC_Aliases() ## make the aliases.... while (my ($Alias, $Name) = each %Is) { - New_Alias(Is => $Alias, SameAs => $Name, AllowFuzzy => 1); + New_Alias(Is => $Alias, SameAs => $Name, Fuzzy => 1); } } @@ -1043,6 +1116,8 @@ sub Make_GC_Aliases() ## sub WriteAllMappings() { + my @MAP; + for my $Type ('In', 'Is') { my %Filenames; @@ -1054,35 +1129,103 @@ sub WriteAllMappings() ## ## First write all the files to the $Type/ directory ## - while (my ($Name, $Table) = each %{$TableInfo{$Type}}) + for my $Name (sort { length $a <=> length $b } keys %{$TableInfo{$Type}}) { - ## Need an 8.3 safe filename. - my $filename = $Name; - $filename =~ s/[_\W]+(\w*)/\u$1/g; + my $Table = $TableInfo{$Type}->{$Name}; + + ## Need an 8.3 safe filename (which means "an 8 safe" $filename) + my $filename = $FuzzyNames{$Type}->{$Name} ? CanonicalName($Name): $Name; + $filename =~ s/[^\w_]+/_/g; # "L&" -> "L_" substr($filename, 8) = '' if length($filename) > 8; ## ## Make sure the filename doesn't conflict with something we ## might have already written. If we have, say, - ## Greek_Extended1 - ## Greek_Extended2 + ## GreekExtended1 + ## GreekExtended2 ## they become - ## Greek_Ex - ## Greek_E2 + ## GreekExt + ## GreekEx2 ## while (my $num = $Filenames{lc $filename}++) { $num++; ## so filenames with numbers start with '2', which ## just looks more natural. - substr($filename, -length($num)) = $num; + ## Want to append $num, but if it'll make the filename longer + ## than 8 characters, pre-truncate $filename so that the result + ## is acceptable. + my $delta = length($filename) + length($num) - 8; + if ($delta > 0) { + substr($filename, -$delta) = $num; + } else { + $filename .= $num; + } } + $Exact{$Name} = $filename; + + ## + ## Construct a nice comment to add to the file, and build data + ## for the "./Properties" file along the way. + ## + my $Comment; + { + my $Desc = $TableDesc{$Type}->{$Name} || ""; + ## get list of names this table is reference by + my @Supported = $Name; + while (my ($Orig, $Alias) = each %{ $AliasInfo{$Type} }) + { + if ($Orig eq $Name) { + push @Supported, $Alias; + } + } + + my $TypeToShow = $Type eq 'Is' ? "" : $Type; + my $OrigProp; + + $Comment = "This file supports:\n"; + for my $N (@Supported) + { + my $IsFuzzy = $FuzzyNames{$Type}->{$N}; + my $CName = $IsFuzzy ? CanonicalName($N): $N; + my $Prop = "\\p{$TypeToShow$CName}"; + $OrigProp = $Prop if not $OrigProp; #cache for aliases + if ($IsFuzzy) { + $Comment .= "\t$Prop (and fuzzy permutations)\n"; + } else { + $Comment .= "\t$Prop\n"; + } + my $MyDesc = ($N eq $Name) ? $Desc : "Alias for $OrigProp ($Desc)"; + + push @MAP, sprintf("%s %-42s %s\n", + $IsFuzzy ? '*' : ' ', $Prop, $MyDesc); + } + if ($Desc) { + $Comment .= "\nMeaning: $Desc\n"; + } + + } ## ## Okay, write the file... ## - $Exact{$Name} = $filename; - $Table->Write("$Type/$filename.pl"); + $Table->Write("$Type/$filename.pl", $Comment); + } + + ## + ## Write out the map + ## + if (not open MAP, ">Properties") { + die "$0: can't write Properties: $!\n"; } + print MAP "##\n"; + print MAP "## This file created by $0\n"; + print MAP "## List of built-in \\p{...}/\\P{...} properties.\n"; + print MAP "##\n"; + print MAP "## '*' means name may be 'fuzzy'\n"; + print MAP "##\n"; + print MAP "\n"; + print MAP sort { substr($a,2) cmp substr($b, 2) } @MAP; + close MAP; ## ## Build %Pat |