summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2019-03-06 17:50:23 -0700
committerKarl Williamson <khw@cpan.org>2019-03-07 13:14:26 -0700
commit21c34e9717de1e37a5e25d2e7d748da66c781488 (patch)
tree86ce71d42ceff26db39bfd0c754a2aa45197da2b
parentb37d10f658c300104241001e3f5de1f31d62b22f (diff)
downloadperl-21c34e9717de1e37a5e25d2e7d748da66c781488.tar.gz
Add hook for Unicode private use override
I am starting to write a Unicode::Private_Use module which will allow one to specify the Unicode properties of private use code points, thus making them actually useful. This commit adds a hook to regcomp.c to accommodate this module. The changes are pretty minimal. This way we don't have to wait another release cycle to get it out there. I don't want to document this interface, until it's proven.
-rw-r--r--MANIFEST1
-rw-r--r--charclass_invlists.h2
-rw-r--r--embedvar.h2
-rw-r--r--lib/unicore/uni_keywords.pl1266
-rw-r--r--lib/warnings.pm17
-rw-r--r--perlapi.h2
-rw-r--r--perlvars.h1
-rw-r--r--pod/perldiag.pod5
-rw-r--r--regcomp.c58
-rw-r--r--regen/mk_invlists.pl36
-rw-r--r--regen/warnings.pl4
-rw-r--r--uni_keywords.h2
-rw-r--r--warnings.h4
13 files changed, 1391 insertions, 9 deletions
diff --git a/MANIFEST b/MANIFEST
index 1bc8bc54f6..4466caf308 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4771,6 +4771,7 @@ lib/unicore/ScriptExtensions.txt Unicode character database
lib/unicore/Scripts.txt Unicode character database
lib/unicore/SpecialCasing.txt Unicode character database
lib/unicore/StandardizedVariants.txt Unicode character database
+lib/unicore/uni_keywords.pl Indices into array in charclass_invlists.h
lib/unicore/UnicodeData.txt Unicode character database
lib/unicore/version The version of the Unicode
lib/unicore/VerticalOrientation.txt Unicode character database
diff --git a/charclass_invlists.h b/charclass_invlists.h
index 5da5c7c8a6..557f021a5b 100644
--- a/charclass_invlists.h
+++ b/charclass_invlists.h
@@ -390181,5 +390181,5 @@ static const U8 WB_table[23][23] = {
* 93cc868487ef3345596041bcb90c302b1b056733bb95233101bc10dc2dbe36b4 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
- * 2fb19108265d37fc38920cd3f775c5c9fc25ae8b95be3bf197f47da1dc4989f4 regen/mk_invlists.pl
+ * d62728e4cfcba3e6550ff12ad924f4811ca2077ebec705b9fecabed716764414 regen/mk_invlists.pl
* ex: set ro: */
diff --git a/embedvar.h b/embedvar.h
index 420664d68c..35cf8f2191 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -371,6 +371,8 @@
#define PL_GNonFinalFold (my_vars->GNonFinalFold)
#define PL_Posix_ptrs (my_vars->GPosix_ptrs)
#define PL_GPosix_ptrs (my_vars->GPosix_ptrs)
+#define PL_Private_Use (my_vars->GPrivate_Use)
+#define PL_GPrivate_Use (my_vars->GPrivate_Use)
#define PL_SB_invlist (my_vars->GSB_invlist)
#define PL_GSB_invlist (my_vars->GSB_invlist)
#define PL_SCX_invlist (my_vars->GSCX_invlist)
diff --git a/lib/unicore/uni_keywords.pl b/lib/unicore/uni_keywords.pl
new file mode 100644
index 0000000000..43dea59e02
--- /dev/null
+++ b/lib/unicore/uni_keywords.pl
@@ -0,0 +1,1266 @@
+# -*- buffer-read-only: t -*-
+# !!!!!!! DO NOT EDIT THIS FILE !!!!!!!
+# This file is built by regen/mk_invlists.pl from Unicode::UCD.
+# Any changes made here will be lost!
+
+%utf8::uni_prop_ptrs_indices = (
+ '_perl_any_folds' => 1151,
+ '_perl_charname_begin' => 1152,
+ '_perl_charname_continue' => 1153,
+ '_perl_folds_to_multi_char' => 1154,
+ '_perl_idcont' => 1155,
+ '_perl_idstart' => 1156,
+ '_perl_is_in_multi_char_fold' => 1157,
+ '_perl_nchar' => 1158,
+ '_perl_non_final_folds' => 1159,
+ '_perl_patws' => 1160,
+ '_perl_problematic_locale_foldeds_start' => 1161,
+ '_perl_problematic_locale_folds' => 1162,
+ '_perl_quotemeta' => 1163,
+ '_perl_surrogate' => 1164,
+ 'adlm' => 1,
+ 'aegeannumbers' => 2,
+ 'age=10' => 3,
+ 'age=11' => 4,
+ 'age=12' => 5,
+ 'age=2' => 6,
+ 'age=2.1' => 7,
+ 'age=3' => 8,
+ 'age=3.1' => 9,
+ 'age=3.2' => 10,
+ 'age=4' => 11,
+ 'age=4.1' => 12,
+ 'age=5' => 13,
+ 'age=5.1' => 14,
+ 'age=5.2' => 15,
+ 'age=6' => 16,
+ 'age=6.1' => 17,
+ 'age=6.2' => 18,
+ 'age=6.3' => 19,
+ 'age=7' => 20,
+ 'age=8' => 21,
+ 'age=9' => 22,
+ 'age=na' => 23,
+ 'aghb' => 24,
+ 'ahex' => 938,
+ 'ahom' => 25,
+ 'alchemical' => 26,
+ 'all' => 27,
+ 'alnum' => 1126,
+ 'alpha' => 1127,
+ 'alphabetic' => 1127,
+ 'alphabeticpf' => 28,
+ 'ancientgreekmusic' => 29,
+ 'ancientgreeknumbers' => 30,
+ 'ancientsymbols' => 31,
+ 'any' => 32,
+ 'arab' => 33,
+ 'arabicexta' => 34,
+ 'arabicmath' => 35,
+ 'arabicpfa' => 36,
+ 'arabicpfb' => 37,
+ 'arabicsup' => 38,
+ 'armi' => 39,
+ 'armn' => 40,
+ 'arrows' => 41,
+ 'ascii' => 42,
+ 'asciihexdigit' => 938,
+ 'assigned' => 43,
+ 'avst' => 44,
+ 'bali' => 45,
+ 'bamu' => 46,
+ 'bamumsup' => 47,
+ 'basiclatin' => 42,
+ 'bass' => 48,
+ 'batk' => 49,
+ 'bc=al' => 50,
+ 'bc=an' => 51,
+ 'bc=b' => 52,
+ 'bc=bn' => 53,
+ 'bc=cs' => 54,
+ 'bc=en' => 55,
+ 'bc=es' => 56,
+ 'bc=et' => 57,
+ 'bc=fsi' => 58,
+ 'bc=l' => 59,
+ 'bc=lre' => 60,
+ 'bc=lri' => 61,
+ 'bc=lro' => 62,
+ 'bc=nsm' => 63,
+ 'bc=on' => 64,
+ 'bc=pdf' => 65,
+ 'bc=pdi' => 66,
+ 'bc=r' => 67,
+ 'bc=rle' => 68,
+ 'bc=rli' => 69,
+ 'bc=rlo' => 70,
+ 'bc=s' => 71,
+ 'bc=ws' => 72,
+ 'beng' => 73,
+ 'bhks' => 74,
+ 'bidic' => 75,
+ 'bidim' => 76,
+ 'blank' => 1128,
+ 'blockelements' => 77,
+ 'bopo' => 78,
+ 'bopomofoext' => 79,
+ 'boxdrawing' => 80,
+ 'bpt=c' => 81,
+ 'bpt=n' => 82,
+ 'bpt=o' => 83,
+ 'brah' => 84,
+ 'brai' => 85,
+ 'bugi' => 86,
+ 'buhd' => 87,
+ 'byzantinemusic' => 88,
+ 'c' => 89,
+ 'cakm' => 90,
+ 'cans' => 91,
+ 'cari' => 92,
+ 'cased' => 93,
+ 'casedletter' => 94,
+ 'cc' => 1129,
+ 'ccc=0' => 95,
+ 'ccc=1' => 96,
+ 'ccc=10' => 97,
+ 'ccc=103' => 98,
+ 'ccc=107' => 99,
+ 'ccc=11' => 100,
+ 'ccc=118' => 101,
+ 'ccc=12' => 102,
+ 'ccc=122' => 103,
+ 'ccc=129' => 104,
+ 'ccc=13' => 105,
+ 'ccc=130' => 106,
+ 'ccc=132' => 107,
+ 'ccc=14' => 108,
+ 'ccc=15' => 109,
+ 'ccc=16' => 110,
+ 'ccc=17' => 111,
+ 'ccc=18' => 112,
+ 'ccc=19' => 113,
+ 'ccc=20' => 114,
+ 'ccc=202' => 115,
+ 'ccc=21' => 116,
+ 'ccc=214' => 117,
+ 'ccc=216' => 118,
+ 'ccc=22' => 119,
+ 'ccc=23' => 120,
+ 'ccc=24' => 121,
+ 'ccc=25' => 122,
+ 'ccc=26' => 123,
+ 'ccc=27' => 124,
+ 'ccc=28' => 125,
+ 'ccc=29' => 126,
+ 'ccc=30' => 127,
+ 'ccc=31' => 128,
+ 'ccc=32' => 129,
+ 'ccc=33' => 130,
+ 'ccc=34' => 131,
+ 'ccc=35' => 132,
+ 'ccc=36' => 133,
+ 'ccc=7' => 134,
+ 'ccc=8' => 135,
+ 'ccc=84' => 136,
+ 'ccc=9' => 137,
+ 'ccc=91' => 138,
+ 'ccc=a' => 139,
+ 'ccc=al' => 140,
+ 'ccc=ar' => 141,
+ 'ccc=b' => 142,
+ 'ccc=bl' => 143,
+ 'ccc=br' => 144,
+ 'ccc=da' => 145,
+ 'ccc=db' => 146,
+ 'ccc=is' => 147,
+ 'ccc=l' => 148,
+ 'ccc=r' => 149,
+ 'ce' => 150,
+ 'cf' => 151,
+ 'cham' => 152,
+ 'cher' => 153,
+ 'cherokeesup' => 154,
+ 'chesssymbols' => 155,
+ 'ci' => 156,
+ 'cjk' => 157,
+ 'cjkcompat' => 158,
+ 'cjkcompatforms' => 159,
+ 'cjkcompatideographs' => 160,
+ 'cjkcompatideographssup' => 161,
+ 'cjkexta' => 162,
+ 'cjkextb' => 163,
+ 'cjkextc' => 164,
+ 'cjkextd' => 165,
+ 'cjkexte' => 166,
+ 'cjkextf' => 167,
+ 'cjkradicalssup' => 168,
+ 'cjkstrokes' => 169,
+ 'cjksymbols' => 170,
+ 'cn' => 171,
+ 'cntrl' => 1129,
+ 'co' => 172,
+ 'compatjamo' => 173,
+ 'compex' => 174,
+ 'control' => 1129,
+ 'controlpictures' => 175,
+ 'copt' => 176,
+ 'copticepactnumbers' => 177,
+ 'countingrod' => 178,
+ 'cprt' => 179,
+ 'cuneiformnumbers' => 180,
+ 'currencysymbols' => 181,
+ 'cwcf' => 182,
+ 'cwcm' => 183,
+ 'cwkcf' => 184,
+ 'cwl' => 185,
+ 'cwt' => 186,
+ 'cwu' => 187,
+ 'cypriotsyllabary' => 188,
+ 'cyrillicexta' => 189,
+ 'cyrillicextb' => 190,
+ 'cyrillicextc' => 191,
+ 'cyrillicsup' => 192,
+ 'cyrl' => 193,
+ 'dash' => 194,
+ 'decimalnumber' => 1130,
+ 'dep' => 195,
+ 'deva' => 196,
+ 'devanagariext' => 197,
+ 'di' => 198,
+ 'dia' => 199,
+ 'diacriticals' => 200,
+ 'diacriticalsext' => 201,
+ 'diacriticalsforsymbols' => 202,
+ 'diacriticalssup' => 203,
+ 'digit' => 1130,
+ 'dingbats' => 204,
+ 'dogr' => 205,
+ 'domino' => 206,
+ 'dsrt' => 207,
+ 'dt=can' => 208,
+ 'dt=com' => 209,
+ 'dt=enc' => 210,
+ 'dt=fin' => 211,
+ 'dt=font' => 212,
+ 'dt=fra' => 213,
+ 'dt=init' => 214,
+ 'dt=iso' => 215,
+ 'dt=med' => 216,
+ 'dt=nar' => 217,
+ 'dt=nb' => 218,
+ 'dt=noncanon' => 219,
+ 'dt=none' => 220,
+ 'dt=sml' => 221,
+ 'dt=sqr' => 222,
+ 'dt=sub' => 223,
+ 'dt=sup' => 224,
+ 'dt=vert' => 225,
+ 'dupl' => 226,
+ 'ea=a' => 228,
+ 'ea=f' => 229,
+ 'ea=h' => 230,
+ 'ea=n' => 231,
+ 'ea=na' => 232,
+ 'ea=w' => 233,
+ 'earlydynasticcuneiform' => 227,
+ 'egyp' => 234,
+ 'egyptianhieroglyphformatcontrols' => 235,
+ 'elba' => 236,
+ 'elym' => 237,
+ 'emoticons' => 238,
+ 'enclosedalphanum' => 239,
+ 'enclosedalphanumsup' => 240,
+ 'enclosedcjk' => 241,
+ 'enclosedideographicsup' => 242,
+ 'ethi' => 243,
+ 'ethiopicext' => 244,
+ 'ethiopicexta' => 245,
+ 'ethiopicsup' => 246,
+ 'ext' => 247,
+ 'gcb=cn' => 248,
+ 'gcb=ex' => 249,
+ 'gcb=l' => 250,
+ 'gcb=pp' => 251,
+ 'gcb=sm' => 252,
+ 'gcb=t' => 253,
+ 'gcb=v' => 254,
+ 'gcb=xx' => 255,
+ 'geometricshapes' => 256,
+ 'geometricshapesext' => 257,
+ 'geor' => 258,
+ 'georgianext' => 259,
+ 'georgiansup' => 260,
+ 'glag' => 261,
+ 'glagoliticsup' => 262,
+ 'gong' => 263,
+ 'gonm' => 264,
+ 'goth' => 265,
+ 'gran' => 266,
+ 'graph' => 1131,
+ 'grbase' => 267,
+ 'greekext' => 268,
+ 'grek' => 269,
+ 'grext' => 270,
+ 'gujr' => 271,
+ 'guru' => 272,
+ 'halfandfullforms' => 273,
+ 'halfmarks' => 274,
+ 'han' => 275,
+ 'hang' => 276,
+ 'hano' => 277,
+ 'hatr' => 278,
+ 'hebr' => 279,
+ 'hex' => 1138,
+ 'hexdigit' => 1138,
+ 'highpusurrogates' => 280,
+ 'highsurrogates' => 281,
+ 'hira' => 282,
+ 'hluw' => 283,
+ 'hmng' => 284,
+ 'hmnp' => 285,
+ 'horizspace' => 1128,
+ 'hst=na' => 286,
+ 'hung' => 287,
+ 'hyphen_perl_aux' => 288,
+ 'idc' => 289,
+ 'ideo' => 290,
+ 'ideographicsymbols' => 291,
+ 'ids' => 292,
+ 'idsb' => 293,
+ 'idst' => 294,
+ 'in=1.1' => 485,
+ 'in=10' => 482,
+ 'in=11' => 483,
+ 'in=12' => 484,
+ 'in=2' => 486,
+ 'in=2.1' => 487,
+ 'in=3' => 488,
+ 'in=3.1' => 489,
+ 'in=3.2' => 490,
+ 'in=4' => 491,
+ 'in=4.1' => 492,
+ 'in=5' => 493,
+ 'in=5.1' => 494,
+ 'in=5.2' => 495,
+ 'in=6' => 496,
+ 'in=6.1' => 497,
+ 'in=6.2' => 498,
+ 'in=6.3' => 499,
+ 'in=7' => 500,
+ 'in=8' => 501,
+ 'in=9' => 502,
+ 'inadlam' => 295,
+ 'inahom' => 296,
+ 'inanatolianhieroglyphs' => 297,
+ 'inarabic' => 298,
+ 'inarmenian' => 299,
+ 'inavestan' => 300,
+ 'inbalinese' => 301,
+ 'inbamum' => 302,
+ 'inbassavah' => 303,
+ 'inbatak' => 304,
+ 'inbengali' => 305,
+ 'inbhaiksuki' => 306,
+ 'inbopomofo' => 307,
+ 'inbrahmi' => 308,
+ 'inbuginese' => 309,
+ 'inbuhid' => 310,
+ 'incarian' => 311,
+ 'incaucasianalbanian' => 312,
+ 'inchakma' => 313,
+ 'incham' => 314,
+ 'incherokee' => 315,
+ 'incoptic' => 316,
+ 'incuneiform' => 317,
+ 'incyrillic' => 318,
+ 'indevanagari' => 319,
+ 'indicnumberforms' => 320,
+ 'indicsiyaqnumbers' => 321,
+ 'indogra' => 322,
+ 'induployan' => 323,
+ 'inegyptianhieroglyphs' => 324,
+ 'inelbasan' => 325,
+ 'inelymaic' => 326,
+ 'inethiopic' => 327,
+ 'ingeorgian' => 328,
+ 'inglagolitic' => 329,
+ 'ingothic' => 330,
+ 'ingrantha' => 331,
+ 'ingreek' => 332,
+ 'ingujarati' => 333,
+ 'ingunjalagondi' => 334,
+ 'ingurmukhi' => 335,
+ 'inhangul' => 336,
+ 'inhanifirohingya' => 337,
+ 'inhanunoo' => 338,
+ 'inhatran' => 339,
+ 'inhebrew' => 340,
+ 'inhiragana' => 341,
+ 'inidc' => 342,
+ 'inimperialaramaic' => 343,
+ 'ininscriptionalpahlavi' => 344,
+ 'ininscriptionalparthian' => 345,
+ 'injavanese' => 346,
+ 'inkaithi' => 347,
+ 'inkannada' => 348,
+ 'inkatakana' => 349,
+ 'inkharoshthi' => 350,
+ 'inkhmer' => 351,
+ 'inkhojki' => 352,
+ 'inkhudawadi' => 353,
+ 'inlao' => 354,
+ 'inlepcha' => 355,
+ 'inlimbu' => 356,
+ 'inlineara' => 357,
+ 'inlycian' => 358,
+ 'inlydian' => 359,
+ 'inmahajani' => 360,
+ 'inmakasar' => 361,
+ 'inmalayalam' => 362,
+ 'inmandaic' => 363,
+ 'inmanichaean' => 364,
+ 'inmarchen' => 365,
+ 'inmasaramgondi' => 366,
+ 'inmedefaidrin' => 367,
+ 'inmeeteimayek' => 368,
+ 'inmendekikakui' => 369,
+ 'inmeroiticcursive' => 370,
+ 'inmiao' => 371,
+ 'inmodi' => 372,
+ 'inmongolian' => 373,
+ 'inmro' => 374,
+ 'inmultani' => 375,
+ 'inmyanmar' => 376,
+ 'innabataean' => 377,
+ 'innandinagari' => 378,
+ 'innewa' => 379,
+ 'innewtailue' => 380,
+ 'innko' => 381,
+ 'innushu' => 382,
+ 'innyiakengpuachuehmong' => 383,
+ 'inogham' => 384,
+ 'inoldhungarian' => 385,
+ 'inolditalic' => 386,
+ 'inoldpermic' => 387,
+ 'inoldpersian' => 388,
+ 'inoldsogdian' => 389,
+ 'inoldturkic' => 390,
+ 'inoriya' => 391,
+ 'inosage' => 392,
+ 'inosmanya' => 393,
+ 'inpahawhhmong' => 394,
+ 'inpaucinhau' => 395,
+ 'inpc=bottom' => 396,
+ 'inpc=bottomandleft' => 397,
+ 'inpc=bottomandright' => 398,
+ 'inpc=left' => 399,
+ 'inpc=leftandright' => 400,
+ 'inpc=na' => 401,
+ 'inpc=overstruck' => 402,
+ 'inpc=right' => 403,
+ 'inpc=top' => 404,
+ 'inpc=topandbottom' => 405,
+ 'inpc=topandbottomandright' => 406,
+ 'inpc=topandleft' => 407,
+ 'inpc=topandleftandright' => 408,
+ 'inpc=topandright' => 409,
+ 'inphagspa' => 410,
+ 'inphoenician' => 411,
+ 'inpsalterpahlavi' => 412,
+ 'inpunctuation' => 413,
+ 'inrejang' => 414,
+ 'inrunic' => 415,
+ 'insamaritan' => 416,
+ 'insaurashtra' => 417,
+ 'insc=avagraha' => 418,
+ 'insc=bindu' => 419,
+ 'insc=brahmijoiningnumber' => 420,
+ 'insc=cantillationmark' => 421,
+ 'insc=consonant' => 422,
+ 'insc=consonantdead' => 423,
+ 'insc=consonantfinal' => 424,
+ 'insc=consonantheadletter' => 425,
+ 'insc=consonantinitialpostfixed' => 426,
+ 'insc=consonantkiller' => 427,
+ 'insc=consonantmedial' => 428,
+ 'insc=consonantplaceholder' => 429,
+ 'insc=consonantprecedingrepha' => 430,
+ 'insc=consonantprefixed' => 431,
+ 'insc=consonantsubjoined' => 432,
+ 'insc=consonantsucceedingrepha' => 433,
+ 'insc=consonantwithstacker' => 434,
+ 'insc=geminationmark' => 435,
+ 'insc=invisiblestacker' => 436,
+ 'insc=modifyingletter' => 437,
+ 'insc=nonjoiner' => 438,
+ 'insc=nukta' => 439,
+ 'insc=number' => 440,
+ 'insc=numberjoiner' => 441,
+ 'insc=other' => 442,
+ 'insc=purekiller' => 443,
+ 'insc=registershifter' => 444,
+ 'insc=syllablemodifier' => 445,
+ 'insc=toneletter' => 446,
+ 'insc=tonemark' => 447,
+ 'insc=virama' => 448,
+ 'insc=visarga' => 449,
+ 'insc=vowel' => 450,
+ 'insc=voweldependent' => 451,
+ 'insc=vowelindependent' => 452,
+ 'insharada' => 453,
+ 'insiddham' => 454,
+ 'insinhala' => 455,
+ 'insogdian' => 456,
+ 'insorasompeng' => 457,
+ 'insoyombo' => 458,
+ 'insundanese' => 459,
+ 'insylotinagri' => 460,
+ 'insyriac' => 461,
+ 'intagalog' => 462,
+ 'intagbanwa' => 463,
+ 'intaile' => 464,
+ 'intaitham' => 465,
+ 'intaiviet' => 466,
+ 'intakri' => 467,
+ 'intamil' => 468,
+ 'intangut' => 469,
+ 'intelugu' => 470,
+ 'inthaana' => 471,
+ 'inthai' => 472,
+ 'intibetan' => 473,
+ 'intifinagh' => 474,
+ 'intirhuta' => 475,
+ 'inugaritic' => 476,
+ 'invai' => 477,
+ 'invs' => 478,
+ 'inwancho' => 479,
+ 'inwarangciti' => 480,
+ 'inzanabazarsquare' => 481,
+ 'ipaext' => 503,
+ 'ital' => 504,
+ 'jamo' => 505,
+ 'jamoexta' => 506,
+ 'jamoextb' => 507,
+ 'java' => 508,
+ 'jg=africanfeh' => 509,
+ 'jg=africannoon' => 510,
+ 'jg=africanqaf' => 511,
+ 'jg=ain' => 512,
+ 'jg=alaph' => 513,
+ 'jg=alef' => 514,
+ 'jg=beh' => 515,
+ 'jg=beth' => 516,
+ 'jg=burushaskiyehbarree' => 517,
+ 'jg=dal' => 518,
+ 'jg=dalathrish' => 519,
+ 'jg=e' => 520,
+ 'jg=farsiyeh' => 521,
+ 'jg=fe' => 522,
+ 'jg=feh' => 523,
+ 'jg=finalsemkath' => 524,
+ 'jg=gaf' => 525,
+ 'jg=gamal' => 526,
+ 'jg=hah' => 527,
+ 'jg=hamzaonhehgoal' => 528,
+ 'jg=hanifirohingyakinnaya' => 529,
+ 'jg=hanifirohingyapa' => 530,
+ 'jg=he' => 531,
+ 'jg=heh' => 532,
+ 'jg=hehgoal' => 533,
+ 'jg=heth' => 534,
+ 'jg=kaf' => 535,
+ 'jg=kaph' => 536,
+ 'jg=khaph' => 537,
+ 'jg=knottedheh' => 538,
+ 'jg=lam' => 539,
+ 'jg=lamadh' => 540,
+ 'jg=malayalambha' => 541,
+ 'jg=malayalamja' => 542,
+ 'jg=malayalamlla' => 543,
+ 'jg=malayalamllla' => 544,
+ 'jg=malayalamnga' => 545,
+ 'jg=malayalamnna' => 546,
+ 'jg=malayalamnnna' => 547,
+ 'jg=malayalamnya' => 548,
+ 'jg=malayalamra' => 549,
+ 'jg=malayalamssa' => 550,
+ 'jg=malayalamtta' => 551,
+ 'jg=manichaeanaleph' => 552,
+ 'jg=manichaeanayin' => 553,
+ 'jg=manichaeanbeth' => 554,
+ 'jg=manichaeandaleth' => 555,
+ 'jg=manichaeandhamedh' => 556,
+ 'jg=manichaeanfive' => 557,
+ 'jg=manichaeangimel' => 558,
+ 'jg=manichaeanheth' => 559,
+ 'jg=manichaeanhundred' => 560,
+ 'jg=manichaeankaph' => 561,
+ 'jg=manichaeanlamedh' => 562,
+ 'jg=manichaeanmem' => 563,
+ 'jg=manichaeannun' => 564,
+ 'jg=manichaeanone' => 565,
+ 'jg=manichaeanpe' => 566,
+ 'jg=manichaeanqoph' => 567,
+ 'jg=manichaeanresh' => 568,
+ 'jg=manichaeansadhe' => 569,
+ 'jg=manichaeansamekh' => 570,
+ 'jg=manichaeantaw' => 571,
+ 'jg=manichaeanten' => 572,
+ 'jg=manichaeanteth' => 573,
+ 'jg=manichaeanthamedh' => 574,
+ 'jg=manichaeantwenty' => 575,
+ 'jg=manichaeanwaw' => 576,
+ 'jg=manichaeanyodh' => 577,
+ 'jg=manichaeanzayin' => 578,
+ 'jg=meem' => 579,
+ 'jg=mim' => 580,
+ 'jg=nojoininggroup' => 581,
+ 'jg=noon' => 582,
+ 'jg=nun' => 583,
+ 'jg=nya' => 584,
+ 'jg=pe' => 585,
+ 'jg=qaf' => 586,
+ 'jg=qaph' => 587,
+ 'jg=reh' => 588,
+ 'jg=reversedpe' => 589,
+ 'jg=rohingyayeh' => 590,
+ 'jg=sad' => 591,
+ 'jg=sadhe' => 592,
+ 'jg=seen' => 593,
+ 'jg=semkath' => 594,
+ 'jg=shin' => 595,
+ 'jg=straightwaw' => 596,
+ 'jg=swashkaf' => 597,
+ 'jg=syriacwaw' => 598,
+ 'jg=tah' => 599,
+ 'jg=taw' => 600,
+ 'jg=tehmarbuta' => 601,
+ 'jg=teth' => 602,
+ 'jg=waw' => 603,
+ 'jg=yeh' => 604,
+ 'jg=yehbarree' => 605,
+ 'jg=yehwithtail' => 606,
+ 'jg=yudh' => 607,
+ 'jg=yudhhe' => 608,
+ 'jg=zain' => 609,
+ 'jg=zhain' => 610,
+ 'joinc' => 611,
+ 'jt=c' => 612,
+ 'jt=d' => 613,
+ 'jt=l' => 614,
+ 'jt=r' => 615,
+ 'jt=t' => 616,
+ 'jt=u' => 617,
+ 'kali' => 618,
+ 'kana' => 619,
+ 'kanaexta' => 620,
+ 'kanasup' => 621,
+ 'kanbun' => 622,
+ 'kangxi' => 623,
+ 'katakanaext' => 624,
+ 'khar' => 625,
+ 'khmersymbols' => 626,
+ 'khmr' => 627,
+ 'khoj' => 628,
+ 'knda' => 629,
+ 'kthi' => 630,
+ 'l' => 631,
+ 'l_' => 94,
+ 'l_amp_' => 94,
+ 'lana' => 632,
+ 'lao' => 633,
+ 'latin1' => 634,
+ 'latinexta' => 635,
+ 'latinextadditional' => 636,
+ 'latinextb' => 637,
+ 'latinextc' => 638,
+ 'latinextd' => 639,
+ 'latinexte' => 640,
+ 'latn' => 641,
+ 'lb=ai' => 642,
+ 'lb=al' => 643,
+ 'lb=b2' => 644,
+ 'lb=ba' => 645,
+ 'lb=bb' => 646,
+ 'lb=bk' => 647,
+ 'lb=cb' => 648,
+ 'lb=cj' => 649,
+ 'lb=cl' => 650,
+ 'lb=cm' => 651,
+ 'lb=cp' => 652,
+ 'lb=cr' => 653,
+ 'lb=eb' => 654,
+ 'lb=em' => 655,
+ 'lb=ex' => 656,
+ 'lb=gl' => 657,
+ 'lb=h2' => 658,
+ 'lb=h3' => 659,
+ 'lb=hl' => 660,
+ 'lb=hy' => 661,
+ 'lb=id' => 662,
+ 'lb=in' => 663,
+ 'lb=is' => 664,
+ 'lb=lf' => 665,
+ 'lb=nl' => 666,
+ 'lb=ns' => 667,
+ 'lb=nu' => 668,
+ 'lb=op' => 669,
+ 'lb=po' => 670,
+ 'lb=pr' => 671,
+ 'lb=qu' => 672,
+ 'lb=sa' => 673,
+ 'lb=sg_perl_aux' => 674,
+ 'lb=sp' => 675,
+ 'lb=sy' => 676,
+ 'lb=wj' => 677,
+ 'lb=xx' => 678,
+ 'lb=zw' => 679,
+ 'lb=zwj' => 680,
+ 'lc' => 94,
+ 'lepc' => 681,
+ 'letterlikesymbols' => 682,
+ 'limb' => 683,
+ 'lina' => 684,
+ 'linb' => 685,
+ 'linearbideograms' => 686,
+ 'linearbsyllabary' => 687,
+ 'lisu' => 688,
+ 'll' => 692,
+ 'lm' => 689,
+ 'lo' => 690,
+ 'loe' => 691,
+ 'lower' => 1132,
+ 'lowercase' => 1132,
+ 'lowercaseletter' => 692,
+ 'lowsurrogates' => 693,
+ 'lt' => 1089,
+ 'lu' => 1095,
+ 'lyci' => 694,
+ 'lydi' => 695,
+ 'm' => 696,
+ 'mahj' => 697,
+ 'mahjong' => 698,
+ 'maka' => 699,
+ 'mand' => 700,
+ 'mani' => 701,
+ 'marc' => 702,
+ 'math' => 703,
+ 'mathalphanum' => 704,
+ 'mathoperators' => 705,
+ 'mayannumerals' => 706,
+ 'mc' => 707,
+ 'me' => 708,
+ 'medf' => 709,
+ 'meeteimayekext' => 710,
+ 'mend' => 711,
+ 'merc' => 712,
+ 'mero' => 713,
+ 'miao' => 714,
+ 'miscarrows' => 715,
+ 'miscmathsymbolsa' => 716,
+ 'miscmathsymbolsb' => 717,
+ 'miscpictographs' => 718,
+ 'miscsymbols' => 719,
+ 'misctechnical' => 720,
+ 'mlym' => 721,
+ 'mn' => 722,
+ 'modi' => 723,
+ 'modifierletters' => 724,
+ 'modifiertoneletters' => 725,
+ 'mong' => 726,
+ 'mongoliansup' => 727,
+ 'mro' => 728,
+ 'mtei' => 729,
+ 'mult' => 730,
+ 'music' => 731,
+ 'myanmarexta' => 732,
+ 'myanmarextb' => 733,
+ 'mymr' => 734,
+ 'n' => 735,
+ 'nand' => 736,
+ 'narb' => 737,
+ 'nb' => 738,
+ 'nbat' => 739,
+ 'nd' => 1130,
+ 'newa' => 740,
+ 'nfcqc=m' => 741,
+ 'nfcqc=y' => 742,
+ 'nfdqc=y' => 743,
+ 'nfkcqc=n' => 744,
+ 'nfkcqc=y' => 745,
+ 'nfkdqc=n' => 746,
+ 'nko' => 747,
+ 'nl' => 748,
+ 'no' => 749,
+ 'nshu' => 750,
+ 'nt=di' => 751,
+ 'nt=nu' => 752,
+ 'numberforms' => 753,
+ 'nv=0' => 754,
+ 'nv=1' => 755,
+ 'nv=1/10' => 780,
+ 'nv=1/12' => 781,
+ 'nv=1/16' => 782,
+ 'nv=1/160' => 783,
+ 'nv=1/2' => 784,
+ 'nv=1/20' => 785,
+ 'nv=1/3' => 786,
+ 'nv=1/32' => 787,
+ 'nv=1/320' => 788,
+ 'nv=1/4' => 789,
+ 'nv=1/40' => 790,
+ 'nv=1/5' => 791,
+ 'nv=1/6' => 792,
+ 'nv=1/64' => 793,
+ 'nv=1/7' => 794,
+ 'nv=1/8' => 795,
+ 'nv=1/80' => 796,
+ 'nv=1/9' => 797,
+ 'nv=10' => 756,
+ 'nv=100' => 757,
+ 'nv=1000' => 758,
+ 'nv=10000' => 759,
+ 'nv=100000' => 760,
+ 'nv=1000000' => 761,
+ 'nv=10000000' => 762,
+ 'nv=100000000' => 763,
+ 'nv=10000000000' => 764,
+ 'nv=1000000000000' => 765,
+ 'nv=11' => 766,
+ 'nv=11/12' => 767,
+ 'nv=11/2' => 768,
+ 'nv=12' => 769,
+ 'nv=13' => 770,
+ 'nv=13/2' => 771,
+ 'nv=14' => 772,
+ 'nv=15' => 773,
+ 'nv=15/2' => 774,
+ 'nv=16' => 775,
+ 'nv=17' => 776,
+ 'nv=17/2' => 777,
+ 'nv=18' => 778,
+ 'nv=19' => 779,
+ 'nv=2' => 798,
+ 'nv=2/3' => 815,
+ 'nv=2/5' => 816,
+ 'nv=20' => 799,
+ 'nv=200' => 800,
+ 'nv=2000' => 801,
+ 'nv=20000' => 802,
+ 'nv=200000' => 803,
+ 'nv=20000000' => 804,
+ 'nv=21' => 805,
+ 'nv=216000' => 806,
+ 'nv=22' => 807,
+ 'nv=23' => 808,
+ 'nv=24' => 809,
+ 'nv=25' => 810,
+ 'nv=26' => 811,
+ 'nv=27' => 812,
+ 'nv=28' => 813,
+ 'nv=29' => 814,
+ 'nv=3' => 817,
+ 'nv=3/16' => 832,
+ 'nv=3/2' => 833,
+ 'nv=3/20' => 834,
+ 'nv=3/4' => 835,
+ 'nv=3/5' => 836,
+ 'nv=3/64' => 837,
+ 'nv=3/8' => 838,
+ 'nv=3/80' => 839,
+ 'nv=30' => 818,
+ 'nv=300' => 819,
+ 'nv=3000' => 820,
+ 'nv=30000' => 821,
+ 'nv=300000' => 822,
+ 'nv=31' => 823,
+ 'nv=32' => 824,
+ 'nv=33' => 825,
+ 'nv=34' => 826,
+ 'nv=35' => 827,
+ 'nv=36' => 828,
+ 'nv=37' => 829,
+ 'nv=38' => 830,
+ 'nv=39' => 831,
+ 'nv=4' => 840,
+ 'nv=4/5' => 856,
+ 'nv=40' => 841,
+ 'nv=400' => 842,
+ 'nv=4000' => 843,
+ 'nv=40000' => 844,
+ 'nv=400000' => 845,
+ 'nv=41' => 846,
+ 'nv=42' => 847,
+ 'nv=43' => 848,
+ 'nv=432000' => 849,
+ 'nv=44' => 850,
+ 'nv=45' => 851,
+ 'nv=46' => 852,
+ 'nv=47' => 853,
+ 'nv=48' => 854,
+ 'nv=49' => 855,
+ 'nv=5' => 857,
+ 'nv=5/12' => 863,
+ 'nv=5/2' => 864,
+ 'nv=5/6' => 865,
+ 'nv=5/8' => 866,
+ 'nv=50' => 858,
+ 'nv=500' => 859,
+ 'nv=5000' => 860,
+ 'nv=50000' => 861,
+ 'nv=500000' => 862,
+ 'nv=6' => 867,
+ 'nv=60' => 868,
+ 'nv=600' => 869,
+ 'nv=6000' => 870,
+ 'nv=60000' => 871,
+ 'nv=600000' => 872,
+ 'nv=7' => 873,
+ 'nv=7/12' => 879,
+ 'nv=7/2' => 880,
+ 'nv=7/8' => 881,
+ 'nv=70' => 874,
+ 'nv=700' => 875,
+ 'nv=7000' => 876,
+ 'nv=70000' => 877,
+ 'nv=700000' => 878,
+ 'nv=8' => 882,
+ 'nv=80' => 883,
+ 'nv=800' => 884,
+ 'nv=8000' => 885,
+ 'nv=80000' => 886,
+ 'nv=800000' => 887,
+ 'nv=9' => 888,
+ 'nv=9/2' => 894,
+ 'nv=90' => 889,
+ 'nv=900' => 890,
+ 'nv=9000' => 891,
+ 'nv=90000' => 892,
+ 'nv=900000' => 893,
+ 'nv=_minus_1/2' => 896,
+ 'nv=nan' => 895,
+ 'ocr' => 897,
+ 'ogam' => 898,
+ 'olck' => 899,
+ 'orkh' => 900,
+ 'ornamentaldingbats' => 901,
+ 'orya' => 902,
+ 'osge' => 903,
+ 'osma' => 904,
+ 'ottomansiyaqnumbers' => 905,
+ 'p' => 906,
+ 'palm' => 907,
+ 'patsyn' => 908,
+ 'pauc' => 909,
+ 'pc' => 910,
+ 'pcm' => 911,
+ 'pd' => 912,
+ 'pe' => 913,
+ 'perlspace' => 935,
+ 'perlword' => 937,
+ 'perm' => 914,
+ 'pf' => 915,
+ 'phag' => 916,
+ 'phaistos' => 917,
+ 'phli' => 918,
+ 'phlp' => 919,
+ 'phnx' => 920,
+ 'phoneticext' => 921,
+ 'phoneticextsup' => 922,
+ 'pi' => 923,
+ 'playingcards' => 924,
+ 'po' => 925,
+ 'posixalnum' => 926,
+ 'posixalpha' => 927,
+ 'posixblank' => 928,
+ 'posixcntrl' => 929,
+ 'posixdigit' => 930,
+ 'posixgraph' => 931,
+ 'posixlower' => 932,
+ 'posixprint' => 933,
+ 'posixpunct' => 934,
+ 'posixspace' => 935,
+ 'posixupper' => 936,
+ 'posixword' => 937,
+ 'posixxdigit' => 938,
+ 'print' => 1133,
+ 'prti' => 939,
+ 'ps' => 940,
+ 'pua' => 941,
+ 'qaai' => 942,
+ 'qmark' => 943,
+ 'radical' => 944,
+ 'ri' => 945,
+ 'rjng' => 946,
+ 'rohg' => 947,
+ 'rumi' => 948,
+ 'runr' => 949,
+ 's' => 950,
+ 'samr' => 951,
+ 'sarb' => 952,
+ 'saur' => 953,
+ 'sb=at' => 954,
+ 'sb=cl' => 955,
+ 'sb=ex' => 956,
+ 'sb=fo' => 957,
+ 'sb=le' => 958,
+ 'sb=lo' => 959,
+ 'sb=sc' => 960,
+ 'sb=se' => 961,
+ 'sb=sp' => 962,
+ 'sb=st' => 963,
+ 'sb=up' => 964,
+ 'sb=xx' => 965,
+ 'sc' => 966,
+ 'sc=adlm' => 967,
+ 'sc=arab' => 968,
+ 'sc=armn' => 969,
+ 'sc=beng' => 970,
+ 'sc=bopo' => 971,
+ 'sc=bugi' => 972,
+ 'sc=buhd' => 973,
+ 'sc=cakm' => 974,
+ 'sc=copt' => 975,
+ 'sc=cprt' => 976,
+ 'sc=cyrl' => 977,
+ 'sc=deva' => 978,
+ 'sc=dogr' => 979,
+ 'sc=dupl' => 980,
+ 'sc=geor' => 981,
+ 'sc=glag' => 982,
+ 'sc=gong' => 983,
+ 'sc=gonm' => 984,
+ 'sc=gran' => 985,
+ 'sc=grek' => 986,
+ 'sc=gujr' => 987,
+ 'sc=guru' => 988,
+ 'sc=han' => 989,
+ 'sc=hang' => 990,
+ 'sc=hano' => 991,
+ 'sc=hira' => 992,
+ 'sc=java' => 993,
+ 'sc=kali' => 994,
+ 'sc=kana' => 995,
+ 'sc=khoj' => 996,
+ 'sc=knda' => 997,
+ 'sc=kthi' => 998,
+ 'sc=latn' => 999,
+ 'sc=limb' => 1000,
+ 'sc=lina' => 1001,
+ 'sc=linb' => 1002,
+ 'sc=mahj' => 1003,
+ 'sc=mand' => 1004,
+ 'sc=mani' => 1005,
+ 'sc=mlym' => 1006,
+ 'sc=modi' => 1007,
+ 'sc=mong' => 1008,
+ 'sc=mult' => 1009,
+ 'sc=mymr' => 1010,
+ 'sc=nand' => 1011,
+ 'sc=orya' => 1012,
+ 'sc=perm' => 1013,
+ 'sc=phag' => 1014,
+ 'sc=phlp' => 1015,
+ 'sc=qaai' => 1016,
+ 'sc=rohg' => 1017,
+ 'sc=shrd' => 1018,
+ 'sc=sind' => 1019,
+ 'sc=sinh' => 1020,
+ 'sc=sogd' => 1021,
+ 'sc=sylo' => 1022,
+ 'sc=syrc' => 1023,
+ 'sc=tagb' => 1024,
+ 'sc=takr' => 1025,
+ 'sc=tale' => 1026,
+ 'sc=taml' => 1027,
+ 'sc=telu' => 1028,
+ 'sc=tglg' => 1029,
+ 'sc=thaa' => 1030,
+ 'sc=tirh' => 1031,
+ 'sc=yi' => 1032,
+ 'sc=zyyy' => 1033,
+ 'sd' => 1034,
+ 'sgnw' => 1035,
+ 'shaw' => 1036,
+ 'shorthandformatcontrols' => 1037,
+ 'shrd' => 1038,
+ 'sidd' => 1039,
+ 'sind' => 1040,
+ 'sinh' => 1041,
+ 'sinhalaarchaicnumbers' => 1042,
+ 'sk' => 1043,
+ 'sm' => 1044,
+ 'smallforms' => 1045,
+ 'smallkanaext' => 1046,
+ 'so' => 1047,
+ 'sogd' => 1048,
+ 'sogo' => 1049,
+ 'sora' => 1050,
+ 'soyo' => 1051,
+ 'space' => 1135,
+ 'spaceperl' => 1135,
+ 'specials' => 1052,
+ 'sterm' => 1053,
+ 'sund' => 1054,
+ 'sundanesesup' => 1055,
+ 'suparrowsa' => 1056,
+ 'suparrowsb' => 1057,
+ 'suparrowsc' => 1058,
+ 'superandsub' => 1059,
+ 'supmathoperators' => 1060,
+ 'suppuaa' => 1061,
+ 'suppuab' => 1062,
+ 'suppunctuation' => 1063,
+ 'supsymbolsandpictographs' => 1064,
+ 'suttonsignwriting' => 1065,
+ 'sylo' => 1066,
+ 'symbolsandpictographsexta' => 1067,
+ 'syrc' => 1068,
+ 'syriacsup' => 1069,
+ 'tagb' => 1070,
+ 'tags' => 1071,
+ 'taixuanjing' => 1072,
+ 'takr' => 1073,
+ 'tale' => 1074,
+ 'talu' => 1075,
+ 'tamilsup' => 1076,
+ 'taml' => 1077,
+ 'tang' => 1078,
+ 'tangutcomponents' => 1079,
+ 'tavt' => 1080,
+ 'telu' => 1081,
+ 'term' => 1082,
+ 'tfng' => 1083,
+ 'tglg' => 1084,
+ 'thaa' => 1085,
+ 'thai' => 1086,
+ 'tibt' => 1087,
+ 'tirh' => 1088,
+ 'title' => 1089,
+ 'titlecase' => 1089,
+ 'titlecaseletter' => 1089,
+ 'transportandmap' => 1090,
+ 'ucas' => 1091,
+ 'ucasext' => 1092,
+ 'ugar' => 1093,
+ 'uideo' => 1094,
+ 'upper' => 1136,
+ 'uppercase' => 1136,
+ 'uppercaseletter' => 1095,
+ 'vai' => 1096,
+ 'vedicext' => 1097,
+ 'verticalforms' => 1098,
+ 'vertspace' => 1099,
+ 'vo=r' => 1100,
+ 'vo=tr' => 1101,
+ 'vo=tu' => 1102,
+ 'vo=u' => 1103,
+ 'vs' => 1104,
+ 'vssup' => 1105,
+ 'wara' => 1106,
+ 'wb=dq' => 1107,
+ 'wb=eb' => 1108,
+ 'wb=ex' => 1109,
+ 'wb=extend' => 1110,
+ 'wb=fo' => 1111,
+ 'wb=ka' => 1112,
+ 'wb=le' => 1113,
+ 'wb=mb' => 1114,
+ 'wb=ml' => 1115,
+ 'wb=mn' => 1116,
+ 'wb=nl' => 1117,
+ 'wb=nu' => 1118,
+ 'wb=sq' => 1119,
+ 'wb=wsegspace' => 1120,
+ 'wb=xx' => 1121,
+ 'wcho' => 1122,
+ 'whitespace' => 1135,
+ 'word' => 1137,
+ 'wspace' => 1135,
+ 'xdigit' => 1138,
+ 'xidc' => 1123,
+ 'xids' => 1124,
+ 'xpeo' => 1125,
+ 'xperlspace' => 1135,
+ 'xposixalnum' => 1126,
+ 'xposixalpha' => 1127,
+ 'xposixblank' => 1128,
+ 'xposixcntrl' => 1129,
+ 'xposixdigit' => 1130,
+ 'xposixgraph' => 1131,
+ 'xposixlower' => 1132,
+ 'xposixprint' => 1133,
+ 'xposixpunct' => 1134,
+ 'xposixspace' => 1135,
+ 'xposixupper' => 1136,
+ 'xposixword' => 1137,
+ 'xposixxdigit' => 1138,
+ 'xsux' => 1139,
+ 'yi' => 1140,
+ 'yijing' => 1141,
+ 'yiradicals' => 1142,
+ 'yisyllables' => 1143,
+ 'z' => 1144,
+ 'zanb' => 1145,
+ 'zl' => 1146,
+ 'zp' => 1147,
+ 'zs' => 1148,
+ 'zyyy' => 1149,
+ 'zzzz' => 1150,
+);
+
+1;
+
+# Generated from:
+# d34c77b7e7a94986cc82056919f4ffc1503b73091561e16bd8ef145ac0fd04c5 lib/Unicode/UCD.pm
+# fdc543859433c4578ec7991d8c3238a3638d59962eb0db446d0228cccb7a48da lib/unicore/ArabicShaping.txt
+# 530831b8bdbb2a8a6bc1f2eef88224991513c7371c16537d0a7e09386a2cd326 lib/unicore/BidiBrackets.txt
+# f07f44839398bc33792d988f14fcbcaf382abc3b79119e7e0bb083104260b890 lib/unicore/BidiMirroring.txt
+# a1a3ca4381eb91f7b65afe7cb7df615cdcf67993fef4b486585f66b349993a10 lib/unicore/Blocks.txt
+# d6caa458afc9a76d86bc2ef26a5423f4e663981904de450c97dd53ee0ab5c125 lib/unicore/CaseFolding.txt
+# 08a52bd7f893f5c9a34a6d4cc3a0b2d7a44293ee45fb15d581e729b9f8584dfa lib/unicore/CompositionExclusions.txt
+# 269ff9d35cbe3cb05dfadb262a6d9d89d7ad43ac6ecd7ec71f47ec036e174912 lib/unicore/DAge.txt
+# b559c5ac28549ec48b9f6f14d189b503d35e33158db6b5f63e6103f51aded2cd lib/unicore/DCoreProperties.txt
+# 8b4210273920c013e736332434222b73786e6c468a360710f6799eeace9b3d38 lib/unicore/DNormalizationProps.txt
+# f7e61d7cd168d4496c6019148579af3adfd6e0dac8fa7cd98d8617bcd8732b28 lib/unicore/EastAsianWidth.txt
+# cb048696ab01434bac59c30e5cb1f682d95022c972d8d1ebd5ea79b3841ba66c lib/unicore/EmojiData.txt
+# af2928fa86e77dc2d91ed268270ca13895ede2bf29e9f0f7533f1873c0aeb4b5 lib/unicore/EquivalentUnifiedIdeograph.txt
+# 2791813c07488ad72b3943bbcb2eb85bb03222271829c13bf25f79edfb214330 lib/unicore/HangulSyllableType.txt
+# 1df3666ef987e492b4323a9cd907738e41656e6428752f4b1f6fd95f9bfc8adb lib/unicore/IndicPositionalCategory.txt
+# b3932cf25d7ea43e1bf6be6d45094d4552f753f6b651a7a869464028f1aa88a1 lib/unicore/IndicSyllabicCategory.txt
+# 035d78d5e1fb60c3b44fb7049a96c660a6d045be27ac8f4425abfbc3733724de lib/unicore/Jamo.txt
+# 5afea13d57e5b11c761a9b2553806b12a5fe4e96b729398d9bf0dd94b7e6d7eb lib/unicore/LineBreak.txt
+# 46cce9be0639999e6a903bfa9f1313def3c361c7ae68f411cef244132ee8097b lib/unicore/NameAliases.txt
+# 0911bfd49367059219a74c69fb2c1440d256869e713619a2a231afd24357ec6e lib/unicore/NamedSequences.txt
+# adf1323156ef56014549698eab7b482a3682c8e02c251a7c19f68e427b391189 lib/unicore/PropList.txt
+# 648e1cb9942f02b9358edc52d523803efa491ff576aa7063d4b9aebdef7d7217 lib/unicore/PropValueAliases.txt
+# 402e075e4b9d56bf46d32c950d693e5bb52c1a324a652f732401dbf5a9a4d269 lib/unicore/PropertyAliases.txt
+# b77739d4b7c35069c10f8e156b71c88c5955800240c22e3a32b2ed3ea9620e5f lib/unicore/ScriptExtensions.txt
+# bd327cbb8bffd2e4b40b1e9842c9a7d641d1c4e91c34a41020cf174f3a1c86a1 lib/unicore/Scripts.txt
+# 22b1bff498f08633f72468d46eebb65e30dc325fbb6822dde319cb605af014c2 lib/unicore/SpecialCasing.txt
+# 57913fdb1c1271fa9fd51dea08d49370f33d5bfda6bbaba348f44d90029ea11d lib/unicore/UnicodeData.txt
+# 36900e28ff01fa810d8a7e0848e1075289975188525f2c1d14487d561bbf595a lib/unicore/VerticalOrientation.txt
+# 238635e1dfcc190345ffbcab734f7017dea05d2a66f2291745c11067893689a7 lib/unicore/auxiliary/GCBTest.txt
+# b49f36b98cdd4c996fc7971b8b4d25e3c13b45f0a71f4ad0c5e70a9cebe0b956 lib/unicore/auxiliary/GraphemeBreakProperty.txt
+# d7ae54c2704f27b3a8f40ae934c2f6f3ee87bec20a865e0f9689bc1cd3f92335 lib/unicore/auxiliary/LBTest.txt
+# a9fafe0badc760379957a282bbe3d82d5a6ba113911b89501781fd6b6ad66377 lib/unicore/auxiliary/SBTest.txt
+# 5bf7faf5898403b6adad2d94f1a9ddf26de650cc7a8f2ee00590f05bbea9bd03 lib/unicore/auxiliary/SentenceBreakProperty.txt
+# 12e02e6158929ae29c9b47f76ab95d68194b1d0c295d7b4f6df31487590222a5 lib/unicore/auxiliary/WBTest.txt
+# 9be679d9772dc4842697a3a5b6e1845022f876c1f230ecea38f1be5d6d5673d8 lib/unicore/auxiliary/WordBreakProperty.txt
+# 9e6955b173a13a8a91e3c1b468c9d7e122c588aa288f707bfcef35790a828f8a lib/unicore/extracted/DBidiClass.txt
+# 80e94d5bb24639c9f322bd3b4b8159a8e9a7c33ac45076eff9cd3ecb0b9b812e lib/unicore/extracted/DBinaryProperties.txt
+# 6dc031734cd287af8b33c99435d41f6c836e8e08a7a6cb6cb07e8453cbebdb3b lib/unicore/extracted/DCombiningClass.txt
+# b143bb87f041dcb69335806cb0a1a6b2328eb25cdfa04fe6fcd4c77c6a06fe82 lib/unicore/extracted/DDecompositionType.txt
+# f63f08d36d01eb45f34ff8d2b36681252211f92db690fa1258a399a9a86df3bb lib/unicore/extracted/DEastAsianWidth.txt
+# 6fa015f11f19a9407d590827276f9884c1ee4c71fbc826a09008f7d9d7c1b255 lib/unicore/extracted/DGeneralCategory.txt
+# 1b24a2d586a8b91c0768d4da8919634463bdf126b7479a8e76b85d007cbef8a0 lib/unicore/extracted/DJoinGroup.txt
+# 8095565a527d3d0918a17e30a7bcc0c7864ac3ad748defc21027b3e1ad436368 lib/unicore/extracted/DJoinType.txt
+# 5f98a68d5aa76b06052fe16312d2fdf17547fd69c4e648532845e11beaf2002b lib/unicore/extracted/DLineBreak.txt
+# 8011b1733f6d2cc17576b2a1e20067eca51205e8e0b740d00ebd52afbec2ea39 lib/unicore/extracted/DNumType.txt
+# 8b677a4cf17a4c6949632146fa8a99aaa65689a41b9dd231f81783e81600ff36 lib/unicore/extracted/DNumValues.txt
+# bdc891efa8fc8425685f8c773bbfba47247c2d3612c9926b096fc7f794a5ffc2 lib/unicore/mktables
+# 93cc868487ef3345596041bcb90c302b1b056733bb95233101bc10dc2dbe36b4 lib/unicore/version
+# 4bb677187a1a64e39d48f2e341b5ecb6c99857e49d7a79cf503bd8a3c709999b regen/charset_translations.pl
+# 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
+# d62728e4cfcba3e6550ff12ad924f4811ca2077ebec705b9fecabed716764414 regen/mk_invlists.pl
+# ex: set ro:
diff --git a/lib/warnings.pm b/lib/warnings.pm
index 8b0137c7de..ca008d241f 100644
--- a/lib/warnings.pm
+++ b/lib/warnings.pm
@@ -5,7 +5,7 @@
package warnings;
-our $VERSION = "1.43";
+our $VERSION = "1.44";
# Verify that we're called correctly so that warnings will work.
# Can't use Carp, since Carp uses us!
@@ -101,6 +101,9 @@ our %Offsets = (
'experimental::alpha_assertions' => 134,
'experimental::script_run' => 136,
'shadow' => 138,
+
+ # Warnings Categories added in Perl 5.029
+ 'experimental::private_use' => 140,
);
our %Bits = (
@@ -114,13 +117,14 @@ our %Bits = (
'digit' => "\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [31]
'exec' => "\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7]
'exiting' => "\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
- 'experimental' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x55\x51\x15\x50\x01", # [51..56,58..62,66..68]
+ 'experimental' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x55\x51\x15\x50\x11", # [51..56,58..62,66..68,70]
'experimental::alpha_assertions' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00", # [67]
'experimental::bitwise' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00", # [58]
'experimental::const_attr' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00", # [59]
'experimental::declared_refs' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00", # [66]
'experimental::lexical_subs' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00", # [52]
'experimental::postderef' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00", # [55]
+ 'experimental::private_use' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10", # [70]
'experimental::re_strict' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00", # [60]
'experimental::refaliasing' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00", # [61]
'experimental::regex_sets' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00", # [53]
@@ -187,13 +191,14 @@ our %DeadBits = (
'digit' => "\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [31]
'exec' => "\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7]
'exiting' => "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
- 'experimental' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\xaa\xa2\x2a\xa0\x02", # [51..56,58..62,66..68]
+ 'experimental' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\xaa\xa2\x2a\xa0\x22", # [51..56,58..62,66..68,70]
'experimental::alpha_assertions' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00", # [67]
'experimental::bitwise' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00", # [58]
'experimental::const_attr' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00", # [59]
'experimental::declared_refs' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00", # [66]
'experimental::lexical_subs' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", # [52]
'experimental::postderef' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00", # [55]
+ 'experimental::private_use' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20", # [70]
'experimental::re_strict' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00", # [60]
'experimental::refaliasing' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00", # [61]
'experimental::regex_sets' => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00", # [53]
@@ -251,8 +256,8 @@ our %DeadBits = (
# These are used by various things, including our own tests
our $NONE = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
-our $DEFAULT = "\x10\x01\x00\x00\x00\x50\x04\x00\x00\x00\x00\x00\x00\x55\x51\x55\x50\x01", # [2,4,22,23,25,52..56,58..63,66..68]
-our $LAST_BIT = 140 ;
+our $DEFAULT = "\x10\x01\x00\x00\x00\x50\x04\x00\x00\x00\x00\x00\x00\x55\x51\x55\x50\x11", # [2,4,22,23,25,52..56,58..63,66..68,70]
+our $LAST_BIT = 142 ;
our $BYTES = 18 ;
sub Croaker
@@ -806,6 +811,8 @@ The current hierarchy is:
| |
| +- experimental::postderef
| |
+ | +- experimental::private_use
+ | |
| +- experimental::re_strict
| |
| +- experimental::refaliasing
diff --git a/perlapi.h b/perlapi.h
index 66f5ac5f73..4cfbafecdf 100644
--- a/perlapi.h
+++ b/perlapi.h
@@ -123,6 +123,8 @@ END_EXTERN_C
#define PL_NonFinalFold (*Perl_GNonFinalFold_ptr(NULL))
#undef PL_Posix_ptrs
#define PL_Posix_ptrs (*Perl_GPosix_ptrs_ptr(NULL))
+#undef PL_Private_Use
+#define PL_Private_Use (*Perl_GPrivate_Use_ptr(NULL))
#undef PL_SB_invlist
#define PL_SB_invlist (*Perl_GSB_invlist_ptr(NULL))
#undef PL_SCX_invlist
diff --git a/perlvars.h b/perlvars.h
index edc96c46a5..466c515ebb 100644
--- a/perlvars.h
+++ b/perlvars.h
@@ -304,6 +304,7 @@ PERLVAR(G, utf8_charname_continue, SV *)
PERLVAR(G, utf8_mark, SV *)
PERLVARI(G, InBitmap, SV *, NULL)
PERLVAR(G, CCC_non0_non230, SV *)
+PERLVAR(G, Private_Use, SV *)
/* Definitions of user-defined \p{} properties, as the subs that define them
* are only called once */
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index 82d3e4e768..f254e96d8b 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -6130,6 +6130,11 @@ the feature:
(F) The function indicated isn't implemented on this architecture,
according to the probings of Configure.
+=item The private_use feature is experimental
+
+(S experimental::private_use) This feature is actually a hook for future
+use.
+
=item The regex_sets feature is experimental
(S experimental::regex_sets) This warning is emitted if you
diff --git a/regcomp.c b/regcomp.c
index 5cb8c6a56c..12e2454b69 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -21775,6 +21775,7 @@ Perl_init_uniprops(pTHX)
PL_utf8_foldclosures = _new_invlist_C_array(_Perl_IVCF_invlist);
PL_utf8_mark = _new_invlist_C_array(uni_prop_ptrs[UNI_M]);
PL_CCC_non0_non230 = _new_invlist_C_array(_Perl_CCC_non0_non230_invlist);
+ PL_Private_Use = _new_invlist_C_array(uni_prop_ptrs[UNI_CO]);
#ifdef UNI_XIDC
/* The below are used only by deprecated functions. They could be removed */
@@ -23036,10 +23037,65 @@ Perl_parse_uniprop_string(pTHX_
/* Create and return the inversion list */
prop_definition =_new_invlist_C_array(uni_prop_ptrs[table_index]);
+ sv_2mortal(prop_definition);
+
+
+ /* See if there is a private use override to add to this definition */
+ {
+ COPHH * hinthash = (IN_PERL_COMPILETIME)
+ ? CopHINTHASH_get(&PL_compiling)
+ : CopHINTHASH_get(PL_curcop);
+ SV * pu_overrides = cophh_fetch_pv(hinthash, "private_use", 0, 0);
+
+ if (UNLIKELY(pu_overrides && SvPOK(pu_overrides))) {
+
+ /* See if there is an element in the hints hash for this table */
+ SV * pu_lookup = Perl_newSVpvf(aTHX_ "%d=", table_index);
+ const char * pos = strstr(SvPVX(pu_overrides), SvPVX(pu_lookup));
+
+ if (pos) {
+ bool dummy;
+ SV * pu_definition;
+ SV * pu_invlist;
+ SV * expanded_prop_definition =
+ sv_2mortal(invlist_clone(prop_definition, NULL));
+
+ /* If so, it's definition is the string from here to the next
+ * \a character. And its format is the same as a user-defined
+ * property */
+ pos += SvCUR(pu_lookup);
+ pu_definition = newSVpvn(pos, strchr(pos, '\a') - pos);
+ pu_invlist = handle_user_defined_property(lookup_name,
+ lookup_len,
+ 0, /* Not UTF-8 */
+ 0, /* Not folded */
+ runtime,
+ pu_definition,
+ &dummy,
+ msg,
+ level);
+ if (TAINT_get) {
+ if (SvCUR(msg) > 0) sv_catpvs(msg, "; ");
+ sv_catpvs(msg, "Insecure private-use override");
+ goto append_name_to_msg;
+ }
+
+ /* For now, as a safety measure, make sure that it doesn't
+ * override non-private use code points */
+ _invlist_intersection(pu_invlist, PL_Private_Use, &pu_invlist);
+
+ /* Add it to the list to be returned */
+ _invlist_union(prop_definition, pu_invlist,
+ &expanded_prop_definition);
+ prop_definition = expanded_prop_definition;
+ Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__PRIVATE_USE), "The private_use feature is experimental");
+ }
+ }
+ }
+
if (invert_return) {
_invlist_invert(prop_definition);
}
- sv_2mortal(prop_definition);
return prop_definition;
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index 61148282b5..f6515ef567 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -3012,6 +3012,42 @@ my @sources = qw(regen/mk_invlists.pl
read_only_bottom_close_and_rename($out_fh, \@sources);
+use Data::Dumper;
+my %name_to_index;
+for my $i (0 .. @enums - 1) {
+ my $loose_name = $enums[$i] =~ s/^$table_name_prefix//r;
+ $loose_name = lc $loose_name;
+ $loose_name =~ s/__/=/;
+ $loose_name =~ s/_dot_/./;
+ $loose_name =~ s/_slash_/\//g;
+ $name_to_index{$loose_name} = $i + 1;
+}
+# unsanitize, exclude &, maybe add these before sanitize
+for my $i (0 .. @perl_prop_synonyms - 1) {
+ my $loose_name_pair = $perl_prop_synonyms[$i] =~ s/#\s*define\s*//r;
+ $loose_name_pair =~ s/\b$table_name_prefix//g;
+ $loose_name_pair = lc $loose_name_pair;
+ $loose_name_pair =~ s/__/=/g;
+ $loose_name_pair =~ s/_dot_/./g;
+ $loose_name_pair =~ s/_slash_/\//g;
+ my ($synonym, $primary) = split / +/, $loose_name_pair;
+ $name_to_index{$synonym} = $name_to_index{$primary};
+}
+
+my $uni_pl = open_new('lib/unicore/uni_keywords.pl', '>',
+ {style => '*', by => 'regen/mk_invlists.pl',
+ from => "Unicode::UCD"});
+{
+ print $uni_pl "\%utf8::uni_prop_ptrs_indices = (\n";
+ for my $name (sort keys %name_to_index) {
+ print STDERR __LINE__, $name, "\n" unless defined $name_to_index{$name};
+ print $uni_pl " '$name' => $name_to_index{$name},\n";
+ }
+ print $uni_pl ");\n\n1;\n";
+}
+
+read_only_bottom_close_and_rename($uni_pl, \@sources);
+
require './regen/mph.pl';
sub token_name
diff --git a/regen/warnings.pl b/regen/warnings.pl
index 504d86288e..ff08a2f0ce 100644
--- a/regen/warnings.pl
+++ b/regen/warnings.pl
@@ -16,7 +16,7 @@
#
# This script is normally invoked from regen.pl.
-$VERSION = '1.43';
+$VERSION = '1.44';
BEGIN {
require './regen/regen_lib.pl';
@@ -111,6 +111,8 @@ my $tree = {
[ 5.027, DEFAULT_ON ],
'experimental::alpha_assertions' =>
[ 5.027, DEFAULT_ON ],
+ 'experimental::private_use' =>
+ [ 5.029, DEFAULT_ON ],
}],
'missing' => [ 5.021, DEFAULT_OFF],
diff --git a/uni_keywords.h b/uni_keywords.h
index 4660ddf44c..a242c551b4 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7129,6 +7129,6 @@ MPH_VALt match_uniprop( const unsigned char * const key, const U16 key_len ) {
* 93cc868487ef3345596041bcb90c302b1b056733bb95233101bc10dc2dbe36b4 lib/unicore/version
* 2680b9254eb236c5c090f11b149605043e8c8433661b96efc4a42fb4709342a5 regen/charset_translations.pl
* 03e51b0f07beebd5da62ab943899aa4934eee1f792fa27c1fb638c33bf4ac6ea regen/mk_PL_charclass.pl
- * 2fb19108265d37fc38920cd3f775c5c9fc25ae8b95be3bf197f47da1dc4989f4 regen/mk_invlists.pl
+ * d62728e4cfcba3e6550ff12ad924f4811ca2077ebec705b9fecabed716764414 regen/mk_invlists.pl
* c56b78df81e0f96632246052d71580b212546ca02ba4075158965e11d892f21e regen/mph.pl
* ex: set ro: */
diff --git a/warnings.h b/warnings.h
index d076e7acc1..ba15ba94da 100644
--- a/warnings.h
+++ b/warnings.h
@@ -121,6 +121,10 @@
#define WARN_EXPERIMENTAL__SCRIPT_RUN 68
#define WARN_SHADOW 69
+/* Warnings Categories added in Perl 5.029 */
+
+#define WARN_EXPERIMENTAL__PRIVATE_USE 70
+
#define WARNsize 18
#define WARN_ALLstring "\125\125\125\125\125\125\125\125\125\125\125\125\125\125\125\125\125\125"
#define WARN_NONEstring "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"